Using pavian in R

Florian P Breitwieser

2017-11-05

Most functions that are available in the Pavian shiny interface can also be used from the command line.

library(pavian)
sample_data <- pavian::read_sample_data(system.file("shinyapp","example-data","brain-biopsies",package="pavian"))
reports <- pavian::read_reports(sample_data$ReportFilePath, sample_data$Name)
## [Nov05 10:44] Reading 11 reports ...
merged_reports <- pavian::merge_reports2(reports, col_names = sample_data$Name)

pavian::summarize_reports(reports)
##        number_of_raw_reads classified_reads chordate_reads
## PT1               12022284         11951847       11245467
## PT2                8294101          8281974        8219504
## PT3               17669644         17609799       16992534
## PT4               29101779         29038413       28453071
## PT5               26919065         26715453       25903121
## PT6               27261739         27159918       26654128
## PT7               19065574         18918495       13462479
## PT8-S1            14957133         14925304       14643350
## PT8-S2            13990253         13961725       13952205
## PT9               26500914         26422494       26183265
## PT10              21319274         21244417       21027311
##        artificial_reads unclassified_reads microbial_reads bacterial_reads
## PT1              704383              70437            1790            1497
## PT2               62140              12127             304             141
## PT3              610050              59845            7099            6859
## PT4              583338              63366            1821            1094
## PT5              786873             203612           25314           16046
## PT6              502575             101821            3034            2854
## PT7             5446842             147079            9089            8781
## PT8-S1           281657              31829              58              18
## PT8-S2             9225              28528              52              22
## PT9              236228              78420            2695            2052
## PT10             215939              74857            1029             569
##        viral_reads fungal_reads protozoan_reads
## PT1              0           73              27
## PT2              0            0               0
## PT3              3           58               4
## PT4              0           15              21
## PT5           8954           50               8
## PT6             17           11               3
## PT7             74           13               4
## PT8-S1           1            6               1
## PT8-S2           1            1               0
## PT9              1          519               0
## PT10            20          180               0
tax_data <- merged_reports[[1]]
clade_reads <- merged_reports[[2]]
taxon_reads <- merged_reports[[3]]

colSums(clade_reads,na.rm = T)
##       PT1       PT2       PT3       PT4       PT5       PT6       PT7 
## 362757256 263286609 546326888 912906520 832441890 855069673 452821122 
##    PT8-S1    PT8-S2       PT9      PT10 
## 469746275 446536577 838915270 673820947
colSums(taxon_reads,na.rm = T)
##      PT1      PT2      PT3      PT4      PT5      PT6      PT7   PT8-S1 
## 12022284  8294101 17669644 29101779 26919065 27261739 19065574 14957133 
##   PT8-S2      PT9     PT10 
## 13990253 26500914 21319274
sel_rows <- pavian::filter_taxa(tax_data,
                                rm_clades = c("Chordata", "artificial sequences", "unclassified"),
                                taxRank = "S")
summary(sel_rows)
##    Mode   FALSE    TRUE 
## logical    2339    1043
filtered_clade_reads <- pavian::filter_cladeReads(clade_reads, tax_data, c("Chordata", "artificial sequences", "unclassified"))

tax_data1 <- tax_data[sel_rows,]
filtered_clade_reads1 <- filtered_clade_reads[sel_rows, ]
taxon_reads1 <- taxon_reads[sel_rows, ]

head(cbind(tax_data1[,1:3],clade_reads[sel_rows, ])[order(-apply(filtered_clade_reads1,1,max, na.rm=T)),])
##                                name taxRank  taxID PT1 PT2 PT3 PT4  PT5
## 2804                JC polyomavirus       S  10632  NA  NA  NA  NA 8944
## 406         Propionibacterium acnes       S   1747   2  15  61  87  539
## 1937        Akkermansia muciniphila       S 239935  NA  NA 121   1 1031
## 113                Escherichia coli       S    562 750  44 640 250  548
## 2148 Candidatus Nitrospira defluvii       S 330214  NA  NA   7  NA  712
## 2354 Candidatus Babela massiliensis       S 673862  NA  NA  NA  NA  659
##      PT6  PT7 PT8-S1 PT8-S2 PT9 PT10
## 2804  15   NA     NA     NA  NA   NA
## 406   63 1459      2      3 222   23
## 1937 102    7     NA     NA  14   NA
## 113  447  148     NA     NA 174  190
## 2148  12    6     NA     NA  NA    1
## 2354   7    1     NA     NA  NA   NA
normalized_clade_reads <- normalize(filtered_clade_reads1)
normalized_taxon_reads <- normalize(taxon_reads[sel_rows,], sums = colSums(filtered_clade_reads1,na.rm = T))
head(cbind(tax_data1[,1:3],max=apply(cbind(normalized_clade_reads),1,max, na.rm=T), normalized_clade_reads)[order(-apply(cbind(normalized_clade_reads),1,max, na.rm=T)),])
##                          name taxRank taxID       max         PT1
## 113          Escherichia coli       S   562 0.8134490 0.813449024
## 2804          JC polyomavirus       S 10632 0.4929997          NA
## 55   Saccharomyces cerevisiae       S  4932 0.3392857 0.003253796
## 406   Propionibacterium acnes       S  1747 0.3000000 0.002169197
## 213       Delftia acidovorans       S 80866 0.1363636 0.003253796
## 172        Pseudomonas putida       S   303 0.1000000 0.004338395
##             PT2          PT3        PT4          PT5         PT6
## 113  0.43564356 0.1236954001 0.43554007 3.020615e-02 0.295634921
## 2804         NA           NA         NA 4.929997e-01 0.009920635
## 55           NA 0.0001932741 0.00174216 5.512071e-05          NA
## 406  0.14851485 0.0117897178 0.15156794 2.971007e-02 0.041666667
## 213  0.08910891 0.0168148434         NA           NA          NA
## 172  0.02970297 0.0094704291         NA 2.204829e-04 0.005291005
##              PT7     PT8-S1 PT8-S2         PT9       PT10
## 113  0.020029774         NA     NA 0.086739781 0.37698413
## 2804          NA         NA     NA          NA         NA
## 55            NA 0.22727273    0.1 0.251744766 0.33928571
## 406  0.197455677 0.09090909    0.3 0.110667996 0.04563492
## 213  0.017323048 0.13636364     NA 0.001994018         NA
## 172  0.008526188 0.04545455    0.1 0.004486540         NA
reads_zscore <- robust_zscore(100*cbind(normalized_clade_reads,normalized_taxon_reads), 0.001)
clade_reads_zscore <- reads_zscore[,1:20]
reads_zscore_df <- cbind(tax_data1[,1:3],max=apply(clade_reads_zscore,1,max, na.rm=T), clade_reads_zscore)[order(-apply(clade_reads_zscore,1,max, na.rm=T)),]
## Calculate z-score from the clade reads