Download data and prepare for the analysis

Data is downloaded from GEO using getGEO function in GEOquery library. Expression matrix with probeset IDs, age of the samples and covarietes to be included in the analysis are extracted from geo object. Please note that this tutorial is just to demonstrate the functionality of this package and is not a proper gene expression analysis tutorial. Thus we skip many potential QC steps and probeset -> gene ID mapping.

library(GEOquery)
#> Loading required package: Biobase
#> Loading required package: BiocGenerics
#> Loading required package: parallel
#> 
#> Attaching package: 'BiocGenerics'
#> The following objects are masked from 'package:parallel':
#> 
#>     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
#>     clusterExport, clusterMap, parApply, parCapply, parLapply,
#>     parLapplyLB, parRapply, parSapply, parSapplyLB
#> The following objects are masked from 'package:stats':
#> 
#>     IQR, mad, sd, var, xtabs
#> The following objects are masked from 'package:base':
#> 
#>     anyDuplicated, append, as.data.frame, basename, cbind,
#>     colnames, dirname, do.call, duplicated, eval, evalq, Filter,
#>     Find, get, grep, grepl, intersect, is.unsorted, lapply, Map,
#>     mapply, match, mget, order, paste, pmax, pmax.int, pmin,
#>     pmin.int, Position, rank, rbind, Reduce, rownames, sapply,
#>     setdiff, sort, table, tapply, union, unique, unsplit, which,
#>     which.max, which.min
#> Welcome to Bioconductor
#> 
#>     Vignettes contain introductory material; view with
#>     'browseVignettes()'. To cite Bioconductor, see
#>     'citation("Biobase")', and for packages 'citation("pkgname")'.
#> Setting options('download.file.method.GEOquery'='auto')
#> Setting options('GEOquery.inmemory.gpl'=FALSE)
geo <- getGEO('GSE30272',destdir = '~/temp/')[[1]]
#> Found 1 file(s)
#> GSE30272_series_matrix.txt.gz
#> Using locally cached version: ~/temp//GSE30272_series_matrix.txt.gz
#> Parsed with column specification:
#> cols(
#>   .default = col_double(),
#>   ID_REF = col_character()
#> )
#> See spec(...) for full column specifications.
#> Using locally cached version of GPL4611 found here:
#> ~/temp//GPL4611.soft
pd <- pData(geo)
expmat <- exprs(geo)
ages <- setNames(as.numeric(pd$`age:ch1`), pd$geo_accession)
covs <- list(array = as.factor(setNames(pd$`array batch:ch1`, pd$geo_accession)),
bbs = as.factor(setNames(pd$`brain bank source:ch1`, pd$geo_accession)),
sex = as.factor(setNames(pd$`Sex:ch1`, pd$geo_accession)),
race = as.factor(setNames(pd$`race:ch1`, pd$geo_accession)))
ages <- ages[ages >= 20]
expmat <- expmat[, names(ages)]
covs <- lapply(covs, function(x)x[names(ages)])

Result object

The resulting object is a list with several fields.

The list of sampels used in the analysis:

The ages in the original format, including only the samples used in the analysis

Correlation between known potential confounders and predicted SVs

head(resx$sva_res$SV_cov_corr) %>%
  knitr::kable()
cov estimate p p.adj SV
array.1 0.0196222 0.5814825 0.9353713 1
array.10 0.0225531 0.5265514 0.9353713 1
array.11 0.0834974 0.0231217 0.1695593 1
array.12 0.0332366 0.4305371 0.9353713 1
array.13 0.1496421 0.0000623 0.0013710 1
array.14 0.0576888 0.2368201 0.8872256 1

Session Info

options(width = 100)
sessioninfo::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────────────────────────
#>  setting  value                       
#>  version  R version 3.6.1 (2019-07-05)
#>  os       macOS Catalina 10.15.2      
#>  system   x86_64, darwin15.6.0        
#>  ui       X11                         
#>  language (EN)                        
#>  collate  en_GB.UTF-8                 
#>  ctype    en_GB.UTF-8                 
#>  tz       Europe/Istanbul             
#>  date     2019-12-31                  
#> 
#> ─ Packages ───────────────────────────────────────────────────────────────────────────────────────
#>  package        * version   date       lib source        
#>  annotate         1.62.0    2019-05-02 [2] Bioconductor  
#>  AnnotationDbi    1.46.0    2019-05-02 [2] Bioconductor  
#>  assertthat       0.2.1     2019-03-21 [2] CRAN (R 3.6.0)
#>  backports        1.1.5     2019-10-02 [2] CRAN (R 3.6.0)
#>  Biobase        * 2.44.0    2019-05-02 [2] Bioconductor  
#>  BiocGenerics   * 0.30.0    2019-05-02 [2] Bioconductor  
#>  BiocParallel     1.18.1    2019-08-06 [2] Bioconductor  
#>  bit              1.1-14    2018-05-29 [2] CRAN (R 3.6.0)
#>  bit64            0.9-7     2017-05-08 [2] CRAN (R 3.6.0)
#>  bitops           1.0-6     2013-08-17 [2] CRAN (R 3.6.0)
#>  blob             1.2.0     2019-07-09 [2] CRAN (R 3.6.0)
#>  broom            0.5.2     2019-04-07 [2] CRAN (R 3.6.0)
#>  cellranger       1.1.0     2016-07-27 [2] CRAN (R 3.6.0)
#>  cli              1.1.0     2019-03-19 [2] CRAN (R 3.6.0)
#>  colorspace       1.4-1     2019-03-18 [2] CRAN (R 3.6.0)
#>  crayon           1.3.4     2017-09-16 [2] CRAN (R 3.6.0)
#>  curl             4.0       2019-07-22 [2] CRAN (R 3.6.0)
#>  DBI              1.0.0     2018-05-02 [2] CRAN (R 3.6.0)
#>  desc             1.2.0     2018-05-01 [2] CRAN (R 3.6.0)
#>  digest           0.6.21    2019-09-20 [2] CRAN (R 3.6.0)
#>  dplyr          * 0.8.3     2019-07-04 [2] CRAN (R 3.6.0)
#>  evaluate         0.14      2019-05-28 [2] CRAN (R 3.6.0)
#>  fansi            0.4.0     2018-10-05 [2] CRAN (R 3.6.0)
#>  forcats        * 0.4.0     2019-02-17 [2] CRAN (R 3.6.0)
#>  fs               1.3.1     2019-05-06 [2] CRAN (R 3.6.0)
#>  genefilter       1.66.0    2019-05-02 [2] Bioconductor  
#>  generics         0.0.2     2018-11-29 [2] CRAN (R 3.6.0)
#>  GEOquery       * 2.52.0    2019-05-02 [2] Bioconductor  
#>  ggplot2        * 3.2.1     2019-08-10 [2] CRAN (R 3.6.0)
#>  glue             1.3.1     2019-03-12 [2] CRAN (R 3.6.0)
#>  gtable           0.3.0     2019-03-25 [2] CRAN (R 3.6.0)
#>  haven            2.1.1     2019-07-04 [2] CRAN (R 3.6.0)
#>  hetAge         * 0.1.0     2019-12-30 [1] local         
#>  highr            0.8       2019-03-20 [2] CRAN (R 3.6.0)
#>  hms              0.5.0     2019-07-09 [2] CRAN (R 3.6.0)
#>  htmltools        0.3.6     2017-04-28 [2] CRAN (R 3.6.0)
#>  httr             1.4.1     2019-08-05 [2] CRAN (R 3.6.0)
#>  IRanges          2.18.1    2019-05-31 [2] Bioconductor  
#>  jsonlite         1.6       2018-12-07 [2] CRAN (R 3.6.0)
#>  knitr            1.24      2019-08-08 [2] CRAN (R 3.6.0)
#>  labeling         0.3       2014-08-23 [2] CRAN (R 3.6.0)
#>  lattice          0.20-38   2018-11-04 [2] CRAN (R 3.6.1)
#>  lazyeval         0.2.2     2019-03-15 [2] CRAN (R 3.6.0)
#>  limma            3.40.6    2019-07-26 [2] Bioconductor  
#>  lubridate        1.7.4     2018-04-11 [2] CRAN (R 3.6.0)
#>  magrittr         1.5       2014-11-22 [2] CRAN (R 3.6.0)
#>  MASS             7.3-51.4  2019-03-31 [2] CRAN (R 3.6.1)
#>  Matrix           1.2-17    2019-03-22 [2] CRAN (R 3.6.1)
#>  matrixStats      0.54.0    2018-07-23 [2] CRAN (R 3.6.0)
#>  memoise          1.1.0     2017-04-21 [2] CRAN (R 3.6.0)
#>  mgcv             1.8-28    2019-03-21 [2] CRAN (R 3.6.1)
#>  modelr           0.1.5     2019-08-08 [2] CRAN (R 3.6.0)
#>  munsell          0.5.0     2018-06-12 [2] CRAN (R 3.6.0)
#>  nlme             3.1-140   2019-05-12 [2] CRAN (R 3.6.1)
#>  pillar           1.4.2     2019-06-29 [2] CRAN (R 3.6.0)
#>  pkgconfig        2.0.3     2019-09-22 [2] CRAN (R 3.6.0)
#>  pkgdown          1.4.1     2019-09-15 [2] CRAN (R 3.6.0)
#>  plyr             1.8.4     2016-06-08 [2] CRAN (R 3.6.0)
#>  preprocessCore   1.46.0    2019-05-02 [2] Bioconductor  
#>  purrr          * 0.3.2     2019-03-15 [2] CRAN (R 3.6.0)
#>  R6               2.4.0     2019-02-14 [2] CRAN (R 3.6.0)
#>  Rcpp             1.0.2     2019-07-25 [2] CRAN (R 3.6.0)
#>  RCurl            1.95-4.12 2019-03-04 [2] CRAN (R 3.6.0)
#>  readr          * 1.3.1     2018-12-21 [2] CRAN (R 3.6.0)
#>  readxl           1.3.1     2019-03-13 [2] CRAN (R 3.6.0)
#>  reshape2         1.4.3     2017-12-11 [2] CRAN (R 3.6.0)
#>  rlang            0.4.0     2019-06-25 [2] CRAN (R 3.6.0)
#>  rmarkdown        1.14      2019-07-12 [2] CRAN (R 3.6.0)
#>  rprojroot        1.3-2     2018-01-03 [2] CRAN (R 3.6.0)
#>  RSQLite          2.1.2     2019-07-24 [2] CRAN (R 3.6.0)
#>  rstudioapi       0.10      2019-03-19 [2] CRAN (R 3.6.0)
#>  rvest            0.3.4     2019-05-15 [2] CRAN (R 3.6.0)
#>  S4Vectors        0.22.0    2019-05-02 [2] Bioconductor  
#>  scales           1.0.0     2018-08-09 [2] CRAN (R 3.6.0)
#>  sessioninfo      1.1.1     2018-11-05 [2] CRAN (R 3.6.0)
#>  stringi          1.4.3     2019-03-12 [2] CRAN (R 3.6.0)
#>  stringr        * 1.4.0     2019-02-10 [2] CRAN (R 3.6.0)
#>  survival         2.44-1.1  2019-04-01 [2] CRAN (R 3.6.1)
#>  sva              3.32.1    2019-05-22 [2] Bioconductor  
#>  tibble         * 2.1.3     2019-06-06 [2] CRAN (R 3.6.0)
#>  tidyr          * 0.8.3     2019-03-01 [2] CRAN (R 3.6.0)
#>  tidyselect       0.2.5     2018-10-11 [2] CRAN (R 3.6.0)
#>  tidyverse      * 1.2.1     2017-11-14 [2] CRAN (R 3.6.0)
#>  utf8             1.1.4     2018-05-24 [2] CRAN (R 3.6.0)
#>  vctrs            0.2.0     2019-07-05 [2] CRAN (R 3.6.0)
#>  withr            2.1.2     2018-03-15 [2] CRAN (R 3.6.0)
#>  xfun             0.8       2019-06-25 [2] CRAN (R 3.6.0)
#>  XML              3.98-1.20 2019-06-06 [2] CRAN (R 3.6.0)
#>  xml2             1.2.2     2019-08-09 [2] CRAN (R 3.6.0)
#>  xtable           1.8-4     2019-04-21 [2] CRAN (R 3.6.0)
#>  yaml             2.2.0     2018-07-25 [2] CRAN (R 3.6.0)
#>  zeallot          0.1.0     2018-01-28 [2] CRAN (R 3.6.0)
#> 
#> [1] /private/var/folders/z1/nv26gvmx4_11_968lfd0n5rh0000gn/T/RtmpAH4Avp/temp_libpath729350b707e8
#> [2] /Library/Frameworks/R.framework/Versions/3.6/Resources/library