Download data and prepare for the analysis

Data is downloaded from GEO using getGEO function in GEOquery library. Expression matrix with probeset IDs, age of the samples and covarietes to be included in the analysis are extracted from geo object. Please note that this tutorial is just to demonstrate the functionality of this package and is not a proper gene expression analysis tutorial. Thus we skip many potential QC steps and probeset -> gene ID mapping.

library(GEOquery)
#> Loading required package: Biobase
#> Loading required package: BiocGenerics
#> Loading required package: parallel
#> 
#> Attaching package: 'BiocGenerics'
#> The following objects are masked from 'package:parallel':
#> 
#>     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
#>     clusterExport, clusterMap, parApply, parCapply, parLapply,
#>     parLapplyLB, parRapply, parSapply, parSapplyLB
#> The following objects are masked from 'package:stats':
#> 
#>     IQR, mad, sd, var, xtabs
#> The following objects are masked from 'package:base':
#> 
#>     anyDuplicated, append, as.data.frame, basename, cbind,
#>     colnames, dirname, do.call, duplicated, eval, evalq, Filter,
#>     Find, get, grep, grepl, intersect, is.unsorted, lapply, Map,
#>     mapply, match, mget, order, paste, pmax, pmax.int, pmin,
#>     pmin.int, Position, rank, rbind, Reduce, rownames, sapply,
#>     setdiff, sort, table, tapply, union, unique, unsplit, which,
#>     which.max, which.min
#> Welcome to Bioconductor
#> 
#>     Vignettes contain introductory material; view with
#>     'browseVignettes()'. To cite Bioconductor, see
#>     'citation("Biobase")', and for packages 'citation("pkgname")'.
#> Setting options('download.file.method.GEOquery'='auto')
#> Setting options('GEOquery.inmemory.gpl'=FALSE)
geo <- getGEO('GSE30272',destdir = '~/temp/')[[1]]
#> Found 1 file(s)
#> GSE30272_series_matrix.txt.gz
#> Using locally cached version: ~/temp//GSE30272_series_matrix.txt.gz
#> Parsed with column specification:
#> cols(
#>   .default = col_double(),
#>   ID_REF = col_character()
#> )
#> See spec(...) for full column specifications.
#> Using locally cached version of GPL4611 found here:
#> ~/temp//GPL4611.soft
pd <- Biobase::pData(geo)
expmat <- Biobase::exprs(geo)
ages <- setNames(as.numeric(pd$`age:ch1`), pd$geo_accession)
covs <- list(array = as.factor(setNames(pd$`array batch:ch1`, pd$geo_accession)),
bbs = as.factor(setNames(pd$`brain bank source:ch1`, pd$geo_accession)),
sex = as.factor(setNames(pd$`Sex:ch1`, pd$geo_accession)),
race = as.factor(setNames(pd$`race:ch1`, pd$geo_accession)))
ages <- ages[ages >= 20]
expmat <- expmat[, names(ages)]
covs <- lapply(covs, function(x)x[names(ages)])

Result object

The resulting object is a list with several fields.

The list of samples used in the analysis:

The ages in the original format, including only the samples used in the analysis

Covariate coefficients for each feature

head(resx$LR_res$cov_coef)
#>                      (Intercept)  covarray.1 covarray.10 covarray.11
#> HEEBO-080-HCC80N4    0.219322907  0.08750003  -0.1305402  -0.1201039
#> HEEBO-032-HCC32G15  -0.504320419  0.40085120   0.2974034   0.7171240
#> HEEBO-007-HCC7J15    0.560306672  0.26045137   0.2454188  -0.1642902
#> HEEBO-067-HCC67B11  -0.079584084  0.02868392  -0.1608617  -0.2697832
#> HEEBO-102-HCA102M19 -0.006323141 -0.11559981   0.0969940   0.2872104
#> HEEBO-022-HCC22L5   -0.271284067  0.22426268   0.1500722  -0.1117165
#>                     covarray.12 covarray.13 covarray.14 covarray.15
#> HEEBO-080-HCC80N4   -0.12963752  -0.1179102 -0.36487842          NA
#> HEEBO-032-HCC32G15   0.21191424   1.0217497  0.39830074          NA
#> HEEBO-007-HCC7J15    0.05489805  -0.3240637 -0.12649879          NA
#> HEEBO-067-HCC67B11   0.05139789  -0.6360082 -0.34283061          NA
#> HEEBO-102-HCA102M19 -0.09345170   0.6209515  0.40434827          NA
#> HEEBO-022-HCC22L5    0.11412377  -0.3845804 -0.05879186          NA
#>                      covarray.16 covarray.17 covarray.18  covarray.19
#> HEEBO-080-HCC80N4   -0.236383209 -0.01256814 -0.21471822 -0.345928155
#> HEEBO-032-HCC32G15   0.197494525  0.16805447 -0.28884289 -0.602445451
#> HEEBO-007-HCC7J15    0.005987180  0.20146058  0.72478135 -0.052421433
#> HEEBO-067-HCC67B11   0.006673755 -0.12182664  0.22822666  0.299817410
#> HEEBO-102-HCA102M19  0.140891492  0.31722961  0.10286216  0.271018330
#> HEEBO-022-HCC22L5    0.113905883 -0.01687372 -0.02721613 -0.005352384
#>                      covarray.2  covarray.3  covarray.4  covarray.5
#> HEEBO-080-HCC80N4   -0.08483412  0.32088264  0.24378589 -0.07236595
#> HEEBO-032-HCC32G15   0.10447104 -0.08272082 -0.04727094  0.08729463
#> HEEBO-007-HCC7J15    0.11950811  0.37420799  0.07832349  0.14720948
#> HEEBO-067-HCC67B11   0.32186303  0.12626691  0.19028974 -0.12329526
#> HEEBO-102-HCA102M19  0.05549022 -0.25664401  0.01754580  0.02362881
#> HEEBO-022-HCC22L5    0.03997974  0.08424111  0.03023635  0.07037596
#>                      covarray.6  covarray.7   covarray.8 covarray.9
#> HEEBO-080-HCC80N4    0.04599456  0.21514419  0.100014241         NA
#> HEEBO-032-HCC32G15   0.09855558  0.25140655  0.002797911         NA
#> HEEBO-007-HCC7J15    0.39454630  0.31502784 -0.175826731         NA
#> HEEBO-067-HCC67B11   0.10896767 -0.07666651 -0.060366446         NA
#> HEEBO-102-HCA102M19 -0.10134975 -0.11307130  0.084767005         NA
#> HEEBO-022-HCC22L5   -0.08067107 -0.06376785  0.076040328         NA
#>                     covbbs.BTB covbbs.NIMH    covsex.5    covsex.F
#> HEEBO-080-HCC80N4           NA          NA -0.20495141 -0.09298862
#> HEEBO-032-HCC32G15          NA          NA  0.03406164  0.15553867
#> HEEBO-007-HCC7J15           NA          NA -0.01774561 -0.23516077
#> HEEBO-067-HCC67B11          NA          NA  0.59727070 -0.27384718
#> HEEBO-102-HCA102M19         NA          NA -0.19375831  0.14192097
#> HEEBO-022-HCC22L5           NA          NA  0.35810534 -0.17395426
#>                     covsex.M  covrace.AA  covrace.AS covrace.CAUC
#> HEEBO-080-HCC80N4         NA  0.06545684  0.11984152  -0.02736221
#> HEEBO-032-HCC32G15        NA  0.04718792  0.33596865  -0.06211015
#> HEEBO-007-HCC7J15         NA -0.25539515 -0.32161556  -0.37811509
#> HEEBO-067-HCC67B11        NA  0.12182882 -0.04381846   0.14950935
#> HEEBO-102-HCA102M19       NA -0.01308995  0.19807991   0.15780033
#> HEEBO-022-HCC22L5         NA -0.07992426 -0.17728326  -0.15957304
#>                     covrace.HISP
#> HEEBO-080-HCC80N4             NA
#> HEEBO-032-HCC32G15            NA
#> HEEBO-007-HCC7J15             NA
#> HEEBO-067-HCC67B11            NA
#> HEEBO-102-HCA102M19           NA
#> HEEBO-022-HCC22L5             NA

P values for the covariate coefficients for each feature based on linear regression

head(resx$LR_res$cov_p)
#>                     (Intercept) covarray.1 covarray.10  covarray.11
#> HEEBO-080-HCC80N4    0.29198469 0.62988389   0.4725447 0.5178972370
#> HEEBO-032-HCC32G15   0.01780076 0.03082354   0.1077441 0.0002084299
#> HEEBO-007-HCC7J15    0.04386085 0.28093221   0.3096206 0.5053696906
#> HEEBO-067-HCC67B11   0.71553021 0.88050523   0.3999591 0.1683725902
#> HEEBO-102-HCA102M19  0.96026606 0.29838433   0.3827267 0.0124067332
#> HEEBO-022-HCC22L5    0.15459778 0.17797060   0.3665244 0.5105269422
#>                     covarray.12  covarray.13 covarray.14 covarray.16
#> HEEBO-080-HCC80N4     0.5465101 5.232629e-01 0.143065416   0.2263659
#> HEEBO-032-HCC32G15    0.3312232 2.305996e-07 0.114890647   0.3180659
#> HEEBO-007-HCC7J15     0.8474022 1.876049e-01 0.701052952   0.9815353
#> HEEBO-067-HCC67B11    0.8199935 1.325977e-03 0.190199738   0.9740057
#> HEEBO-102-HCA102M19   0.4770681 1.908464e-07 0.008540665   0.2380117
#> HEEBO-022-HCC22L5     0.5613585 2.403765e-02 0.795439984   0.5227135
#>                     covarray.17 covarray.18 covarray.19 covarray.2
#> HEEBO-080-HCC80N4   0.948344590  0.25996072  0.22574852  0.6818626
#> HEEBO-032-HCC32G15  0.393278135  0.13553380  0.03843014  0.6184027
#> HEEBO-007-HCC7J15   0.434723766  0.00472668  0.88972586  0.6636521
#> HEEBO-067-HCC67B11  0.550553075  0.25475218  0.31748642  0.1406300
#> HEEBO-102-HCA102M19 0.008331661  0.37678167  0.12115619  0.6608818
#> HEEBO-022-HCC22L5   0.924183965  0.87554951  0.98359058  0.8325618
#>                     covarray.3 covarray.4 covarray.5 covarray.6 covarray.7
#> HEEBO-080-HCC80N4   0.08014095  0.2372337  0.6965723  0.8000927  0.2594620
#> HEEBO-032-HCC32G15  0.65432612  0.8205873  0.6425228  0.5924712  0.1938148
#> HEEBO-007-HCC7J15   0.12380799  0.7743169  0.5504018  0.1036470  0.2139261
#> HEEBO-067-HCC67B11  0.51024613  0.3795815  0.5276041  0.5684285  0.7016419
#> HEEBO-102-HCA102M19 0.02260143  0.8889990  0.8349345  0.3619898  0.3318732
#> HEEBO-022-HCC22L5   0.61322847  0.8722266  0.6782144  0.6271285  0.7139725
#>                     covarray.8  covsex.5     covsex.F covrace.AA
#> HEEBO-080-HCC80N4    0.6231264 0.5561418 0.1586798357  0.7170437
#> HEEBO-032-HCC32G15   0.9891698 0.9230422 0.0208280436  0.7965039
#> HEEBO-007-HCC7J15    0.5154375 0.9693637 0.0078805728  0.2878791
#> HEEBO-067-HCC67B11   0.7777626 0.1043588 0.0001194952  0.5213527
#> HEEBO-102-HCA102M19  0.4957549 0.3630518 0.0005608039  0.9055834
#> HEEBO-022-HCC22L5    0.6826493 0.2613828 0.0043796825  0.6283440
#>                     covrace.AS covrace.CAUC
#> HEEBO-080-HCC80N4    0.6282635    0.8831436
#> HEEBO-032-HCC32G15   0.1817446    0.7419918
#> HEEBO-007-HCC7J15    0.3284997    0.1278487
#> HEEBO-067-HCC67B11   0.8662173    0.4454020
#> HEEBO-102-HCA102M19  0.1917118    0.1670572
#> HEEBO-022-HCC22L5    0.4335958    0.3491379

Session Info

options(width = 100)
sessioninfo::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────────────────────────
#>  setting  value                       
#>  version  R version 3.6.1 (2019-07-05)
#>  os       macOS Catalina 10.15.2      
#>  system   x86_64, darwin15.6.0        
#>  ui       X11                         
#>  language (EN)                        
#>  collate  en_GB.UTF-8                 
#>  ctype    en_GB.UTF-8                 
#>  tz       Europe/Istanbul             
#>  date     2019-12-31                  
#> 
#> ─ Packages ───────────────────────────────────────────────────────────────────────────────────────
#>  package        * version   date       lib source        
#>  annotate         1.62.0    2019-05-02 [2] Bioconductor  
#>  AnnotationDbi    1.46.0    2019-05-02 [2] Bioconductor  
#>  assertthat       0.2.1     2019-03-21 [2] CRAN (R 3.6.0)
#>  backports        1.1.5     2019-10-02 [2] CRAN (R 3.6.0)
#>  Biobase        * 2.44.0    2019-05-02 [2] Bioconductor  
#>  BiocGenerics   * 0.30.0    2019-05-02 [2] Bioconductor  
#>  BiocParallel     1.18.1    2019-08-06 [2] Bioconductor  
#>  bit              1.1-14    2018-05-29 [2] CRAN (R 3.6.0)
#>  bit64            0.9-7     2017-05-08 [2] CRAN (R 3.6.0)
#>  bitops           1.0-6     2013-08-17 [2] CRAN (R 3.6.0)
#>  blob             1.2.0     2019-07-09 [2] CRAN (R 3.6.0)
#>  broom            0.5.2     2019-04-07 [2] CRAN (R 3.6.0)
#>  cellranger       1.1.0     2016-07-27 [2] CRAN (R 3.6.0)
#>  cli              1.1.0     2019-03-19 [2] CRAN (R 3.6.0)
#>  colorspace       1.4-1     2019-03-18 [2] CRAN (R 3.6.0)
#>  crayon           1.3.4     2017-09-16 [2] CRAN (R 3.6.0)
#>  curl             4.0       2019-07-22 [2] CRAN (R 3.6.0)
#>  DBI              1.0.0     2018-05-02 [2] CRAN (R 3.6.0)
#>  desc             1.2.0     2018-05-01 [2] CRAN (R 3.6.0)
#>  digest           0.6.21    2019-09-20 [2] CRAN (R 3.6.0)
#>  dplyr          * 0.8.3     2019-07-04 [2] CRAN (R 3.6.0)
#>  evaluate         0.14      2019-05-28 [2] CRAN (R 3.6.0)
#>  fansi            0.4.0     2018-10-05 [2] CRAN (R 3.6.0)
#>  forcats        * 0.4.0     2019-02-17 [2] CRAN (R 3.6.0)
#>  fs               1.3.1     2019-05-06 [2] CRAN (R 3.6.0)
#>  genefilter       1.66.0    2019-05-02 [2] Bioconductor  
#>  generics         0.0.2     2018-11-29 [2] CRAN (R 3.6.0)
#>  GEOquery       * 2.52.0    2019-05-02 [2] Bioconductor  
#>  ggplot2        * 3.2.1     2019-08-10 [2] CRAN (R 3.6.0)
#>  glue             1.3.1     2019-03-12 [2] CRAN (R 3.6.0)
#>  gtable           0.3.0     2019-03-25 [2] CRAN (R 3.6.0)
#>  haven            2.1.1     2019-07-04 [2] CRAN (R 3.6.0)
#>  hetAge         * 0.1.0     2019-12-30 [1] local         
#>  hms              0.5.0     2019-07-09 [2] CRAN (R 3.6.0)
#>  htmltools        0.3.6     2017-04-28 [2] CRAN (R 3.6.0)
#>  httr             1.4.1     2019-08-05 [2] CRAN (R 3.6.0)
#>  IRanges          2.18.1    2019-05-31 [2] Bioconductor  
#>  jsonlite         1.6       2018-12-07 [2] CRAN (R 3.6.0)
#>  knitr            1.24      2019-08-08 [2] CRAN (R 3.6.0)
#>  labeling         0.3       2014-08-23 [2] CRAN (R 3.6.0)
#>  lattice          0.20-38   2018-11-04 [2] CRAN (R 3.6.1)
#>  lazyeval         0.2.2     2019-03-15 [2] CRAN (R 3.6.0)
#>  limma            3.40.6    2019-07-26 [2] Bioconductor  
#>  lubridate        1.7.4     2018-04-11 [2] CRAN (R 3.6.0)
#>  magrittr         1.5       2014-11-22 [2] CRAN (R 3.6.0)
#>  MASS             7.3-51.4  2019-03-31 [2] CRAN (R 3.6.1)
#>  Matrix           1.2-17    2019-03-22 [2] CRAN (R 3.6.1)
#>  matrixStats      0.54.0    2018-07-23 [2] CRAN (R 3.6.0)
#>  memoise          1.1.0     2017-04-21 [2] CRAN (R 3.6.0)
#>  mgcv             1.8-28    2019-03-21 [2] CRAN (R 3.6.1)
#>  modelr           0.1.5     2019-08-08 [2] CRAN (R 3.6.0)
#>  munsell          0.5.0     2018-06-12 [2] CRAN (R 3.6.0)
#>  nlme             3.1-140   2019-05-12 [2] CRAN (R 3.6.1)
#>  pillar           1.4.2     2019-06-29 [2] CRAN (R 3.6.0)
#>  pkgconfig        2.0.3     2019-09-22 [2] CRAN (R 3.6.0)
#>  pkgdown          1.4.1     2019-09-15 [2] CRAN (R 3.6.0)
#>  plyr             1.8.4     2016-06-08 [2] CRAN (R 3.6.0)
#>  preprocessCore   1.46.0    2019-05-02 [2] Bioconductor  
#>  purrr          * 0.3.2     2019-03-15 [2] CRAN (R 3.6.0)
#>  R6               2.4.0     2019-02-14 [2] CRAN (R 3.6.0)
#>  Rcpp             1.0.2     2019-07-25 [2] CRAN (R 3.6.0)
#>  RCurl            1.95-4.12 2019-03-04 [2] CRAN (R 3.6.0)
#>  readr          * 1.3.1     2018-12-21 [2] CRAN (R 3.6.0)
#>  readxl           1.3.1     2019-03-13 [2] CRAN (R 3.6.0)
#>  reshape2         1.4.3     2017-12-11 [2] CRAN (R 3.6.0)
#>  rlang            0.4.0     2019-06-25 [2] CRAN (R 3.6.0)
#>  rmarkdown        1.14      2019-07-12 [2] CRAN (R 3.6.0)
#>  rprojroot        1.3-2     2018-01-03 [2] CRAN (R 3.6.0)
#>  RSQLite          2.1.2     2019-07-24 [2] CRAN (R 3.6.0)
#>  rstudioapi       0.10      2019-03-19 [2] CRAN (R 3.6.0)
#>  rvest            0.3.4     2019-05-15 [2] CRAN (R 3.6.0)
#>  S4Vectors        0.22.0    2019-05-02 [2] Bioconductor  
#>  scales           1.0.0     2018-08-09 [2] CRAN (R 3.6.0)
#>  sessioninfo      1.1.1     2018-11-05 [2] CRAN (R 3.6.0)
#>  stringi          1.4.3     2019-03-12 [2] CRAN (R 3.6.0)
#>  stringr        * 1.4.0     2019-02-10 [2] CRAN (R 3.6.0)
#>  survival         2.44-1.1  2019-04-01 [2] CRAN (R 3.6.1)
#>  sva              3.32.1    2019-05-22 [2] Bioconductor  
#>  tibble         * 2.1.3     2019-06-06 [2] CRAN (R 3.6.0)
#>  tidyr          * 0.8.3     2019-03-01 [2] CRAN (R 3.6.0)
#>  tidyselect       0.2.5     2018-10-11 [2] CRAN (R 3.6.0)
#>  tidyverse      * 1.2.1     2017-11-14 [2] CRAN (R 3.6.0)
#>  utf8             1.1.4     2018-05-24 [2] CRAN (R 3.6.0)
#>  vctrs            0.2.0     2019-07-05 [2] CRAN (R 3.6.0)
#>  withr            2.1.2     2018-03-15 [2] CRAN (R 3.6.0)
#>  xfun             0.8       2019-06-25 [2] CRAN (R 3.6.0)
#>  XML              3.98-1.20 2019-06-06 [2] CRAN (R 3.6.0)
#>  xml2             1.2.2     2019-08-09 [2] CRAN (R 3.6.0)
#>  xtable           1.8-4     2019-04-21 [2] CRAN (R 3.6.0)
#>  yaml             2.2.0     2018-07-25 [2] CRAN (R 3.6.0)
#>  zeallot          0.1.0     2018-01-28 [2] CRAN (R 3.6.0)
#> 
#> [1] /private/var/folders/z1/nv26gvmx4_11_968lfd0n5rh0000gn/T/RtmpAH4Avp/temp_libpath729350b707e8
#> [2] /Library/Frameworks/R.framework/Versions/3.6/Resources/library