## ----style, echo = FALSE, results = 'asis'--------------------------------------------------------
options(width=100)
knitr::opts_chunk$set(
    eval=as.logical(Sys.getenv("KNITR_EVAL", "TRUE")),
    cache=as.logical(Sys.getenv("KNITR_CACHE", "TRUE")))

## ----setup, echo=FALSE----------------------------------------------------------------------------
suppressPackageStartupMessages({
    library(SummarizedExperiment)
    library(GenomicAlignments)
    library(AnnotationHub)
    library(biomaRt)
    library(airway)
    library(org.Hs.eg.db)
    library(TxDb.Hsapiens.UCSC.hg19.knownGene)
    library(RNAseqData.HNRNPC.bam.chr14)
})

## ----ranges, message=FALSE------------------------------------------------------------------------
library(GenomicRanges)
gr <- GRanges("A", IRanges(c(10, 20, 22), width=5), "+")
shift(gr, 1)                            # intra-range
range(gr)                               # inter-range
reduce(gr)                              # inter-range
snps <- GRanges("A", IRanges(c(11, 17, 24), width=1))
findOverlaps(snps, gr)                  # between-range
setdiff(range(gr), gr)                  # 'introns'

## -------------------------------------------------------------------------------------------------
library(org.Hs.eg.db)
org <- org.Hs.eg.db
select(org, "BRCA1", c("ENSEMBL", "GENENAME"), "SYMBOL")

## -------------------------------------------------------------------------------------------------
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
promoters(txdb)

## -------------------------------------------------------------------------------------------------
library(AnnotationHub)
hub = AnnotationHub()
hub
query(hub, c("ensembl", "81.gtf"))
hub[["AH48004"]]

## ----SummarizedExperiment-------------------------------------------------------------------------
library(SummarizedExperiment)
library(airway)
data(airway)
airway
colData(airway)
airway[, airway$dex %in% "trt"]
chr14 <- as(seqinfo(airway), "GRanges")["14"]
airway[airway %over% chr14,]

## -------------------------------------------------------------------------------------------------
library(RNAseqData.HNRNPC.bam.chr14)
fls = RNAseqData.HNRNPC.bam.chr14_BAMFILES
basename(fls)

## -------------------------------------------------------------------------------------------------
library(GenomicAlignments)
bfls = BamFileList(fls)
bfl = bfls[[1]]

## -------------------------------------------------------------------------------------------------
ga = readGAlignments(bfl)
ga
table(strand(ga))

## -------------------------------------------------------------------------------------------------
tail(sort(table(cigar(ga))))
ga[cigar(ga) != "72M"]

## -------------------------------------------------------------------------------------------------
summarizeJunctions(ga)
junctions <- summarizeJunctions(ga, with.revmap=TRUE)
ga[ junctions$revmap[[1]] ]

## -------------------------------------------------------------------------------------------------
coverage(bfl)$chr14

## -------------------------------------------------------------------------------------------------
library(org.Hs.eg.db)

## -------------------------------------------------------------------------------------------------
select(org.Hs.eg.db, "HNRNPC", c("ENTREZID", "GENENAME"), "SYMBOL")
sym2eg <- mapIds(org.Hs.eg.db, "HNRNPC", "ENTREZID", "SYMBOL")

## -------------------------------------------------------------------------------------------------
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene

## -------------------------------------------------------------------------------------------------
gns <- genes(txdb)
exonsBy(txdb, "gene")[sym2eg]

## -------------------------------------------------------------------------------------------------
library(Rsamtools)
param <- ScanBamParam(which=gns[sym2eg])
readGAlignments(bfl, param=param)

## -------------------------------------------------------------------------------------------------
library(airway)
data(airway)
airway

## -------------------------------------------------------------------------------------------------
x <- assay(airway)
class(x)
dim(x)
head(x)
colData(airway)
rowRanges(airway)

## -------------------------------------------------------------------------------------------------
symid <- mapIds(org.Hs.eg.db, rownames(airway), "SYMBOL", "ENSEMBL")

## -------------------------------------------------------------------------------------------------
mcols(rowRanges(airway))$symid <- symid

## -------------------------------------------------------------------------------------------------
cidx <- colData(airway)$dex %in% "trt"
airway[, cidx]
## shortcut
airway[, airway$dex %in% "trt"]

## -------------------------------------------------------------------------------------------------
chr14 <- as(seqinfo(rowRanges(airway)), "GRanges")["14"]
ridx <- rowRanges(airway) %over% chr14
airway[ridx,]
## shortcut
chr14 <- as(seqinfo(airway), "GRanges")["14"]
airway[airway %over% chr14,]

## -------------------------------------------------------------------------------------------------
library(AnnotationHub)
hub <- AnnotationHub()
query(hub, c("epigenome", "metadata"))
meta <- hub[["AH41830"]]

## -------------------------------------------------------------------------------------------------
table(meta$ANATOMY)
meta[meta$ANATOMY == "LIVER",]

## -------------------------------------------------------------------------------------------------
query(hub, c("E118", "mnemonic"))
E118 <- hub[["AH46971"]]
E118

## -------------------------------------------------------------------------------------------------
table(E118$name)
E118[E118$name %in% "Heterochromatin"]

## ----biomart, eval=FALSE--------------------------------------------------------------------------
#  library(biomaRt)
#  head(listMarts(), 3)                      ## list marts
#  head(listDatasets(useMart("ensembl")), 3) ## mart datasets
#  ensembl <-                                ## fully specified mart
#      useMart("ensembl", dataset = "hsapiens_gene_ensembl")
#  
#  head(listFilters(ensembl), 3)             ## filters
#  myFilter <- "chromosome_name"
#  substr(filterOptions(myFilter, ensembl), 1, 50) ## return values
#  myValues <- c("21", "22")
#  head(listAttributes(ensembl), 3)          ## attributes
#  myAttributes <- c("ensembl_gene_id","chromosome_name")
#  
#  ## assemble and query the mart
#  res <- getBM(attributes =  myAttributes, filters =  myFilter,
#               values =  myValues, mart = ensembl)

