Most functions that are available in the Pavian shiny interface can also be used from the command line.
library(pavian)
sample_data <- pavian::read_sample_data(system.file("shinyapp","example-data","brain-biopsies",package="pavian"))
reports <- pavian::read_reports(sample_data$ReportFilePath, sample_data$Name)
## [Nov05 10:44] Reading 11 reports ...
merged_reports <- pavian::merge_reports2(reports, col_names = sample_data$Name)
pavian::summarize_reports(reports)
## number_of_raw_reads classified_reads chordate_reads
## PT1 12022284 11951847 11245467
## PT2 8294101 8281974 8219504
## PT3 17669644 17609799 16992534
## PT4 29101779 29038413 28453071
## PT5 26919065 26715453 25903121
## PT6 27261739 27159918 26654128
## PT7 19065574 18918495 13462479
## PT8-S1 14957133 14925304 14643350
## PT8-S2 13990253 13961725 13952205
## PT9 26500914 26422494 26183265
## PT10 21319274 21244417 21027311
## artificial_reads unclassified_reads microbial_reads bacterial_reads
## PT1 704383 70437 1790 1497
## PT2 62140 12127 304 141
## PT3 610050 59845 7099 6859
## PT4 583338 63366 1821 1094
## PT5 786873 203612 25314 16046
## PT6 502575 101821 3034 2854
## PT7 5446842 147079 9089 8781
## PT8-S1 281657 31829 58 18
## PT8-S2 9225 28528 52 22
## PT9 236228 78420 2695 2052
## PT10 215939 74857 1029 569
## viral_reads fungal_reads protozoan_reads
## PT1 0 73 27
## PT2 0 0 0
## PT3 3 58 4
## PT4 0 15 21
## PT5 8954 50 8
## PT6 17 11 3
## PT7 74 13 4
## PT8-S1 1 6 1
## PT8-S2 1 1 0
## PT9 1 519 0
## PT10 20 180 0
tax_data <- merged_reports[[1]]
clade_reads <- merged_reports[[2]]
taxon_reads <- merged_reports[[3]]
colSums(clade_reads,na.rm = T)
## PT1 PT2 PT3 PT4 PT5 PT6 PT7
## 362757256 263286609 546326888 912906520 832441890 855069673 452821122
## PT8-S1 PT8-S2 PT9 PT10
## 469746275 446536577 838915270 673820947
colSums(taxon_reads,na.rm = T)
## PT1 PT2 PT3 PT4 PT5 PT6 PT7 PT8-S1
## 12022284 8294101 17669644 29101779 26919065 27261739 19065574 14957133
## PT8-S2 PT9 PT10
## 13990253 26500914 21319274
sel_rows <- pavian::filter_taxa(tax_data,
rm_clades = c("Chordata", "artificial sequences", "unclassified"),
taxRank = "S")
summary(sel_rows)
## Mode FALSE TRUE
## logical 2339 1043
filtered_clade_reads <- pavian::filter_cladeReads(clade_reads, tax_data, c("Chordata", "artificial sequences", "unclassified"))
tax_data1 <- tax_data[sel_rows,]
filtered_clade_reads1 <- filtered_clade_reads[sel_rows, ]
taxon_reads1 <- taxon_reads[sel_rows, ]
head(cbind(tax_data1[,1:3],clade_reads[sel_rows, ])[order(-apply(filtered_clade_reads1,1,max, na.rm=T)),])
## name taxRank taxID PT1 PT2 PT3 PT4 PT5
## 2804 JC polyomavirus S 10632 NA NA NA NA 8944
## 406 Propionibacterium acnes S 1747 2 15 61 87 539
## 1937 Akkermansia muciniphila S 239935 NA NA 121 1 1031
## 113 Escherichia coli S 562 750 44 640 250 548
## 2148 Candidatus Nitrospira defluvii S 330214 NA NA 7 NA 712
## 2354 Candidatus Babela massiliensis S 673862 NA NA NA NA 659
## PT6 PT7 PT8-S1 PT8-S2 PT9 PT10
## 2804 15 NA NA NA NA NA
## 406 63 1459 2 3 222 23
## 1937 102 7 NA NA 14 NA
## 113 447 148 NA NA 174 190
## 2148 12 6 NA NA NA 1
## 2354 7 1 NA NA NA NA
normalized_clade_reads <- normalize(filtered_clade_reads1)
normalized_taxon_reads <- normalize(taxon_reads[sel_rows,], sums = colSums(filtered_clade_reads1,na.rm = T))
head(cbind(tax_data1[,1:3],max=apply(cbind(normalized_clade_reads),1,max, na.rm=T), normalized_clade_reads)[order(-apply(cbind(normalized_clade_reads),1,max, na.rm=T)),])
## name taxRank taxID max PT1
## 113 Escherichia coli S 562 0.8134490 0.813449024
## 2804 JC polyomavirus S 10632 0.4929997 NA
## 55 Saccharomyces cerevisiae S 4932 0.3392857 0.003253796
## 406 Propionibacterium acnes S 1747 0.3000000 0.002169197
## 213 Delftia acidovorans S 80866 0.1363636 0.003253796
## 172 Pseudomonas putida S 303 0.1000000 0.004338395
## PT2 PT3 PT4 PT5 PT6
## 113 0.43564356 0.1236954001 0.43554007 3.020615e-02 0.295634921
## 2804 NA NA NA 4.929997e-01 0.009920635
## 55 NA 0.0001932741 0.00174216 5.512071e-05 NA
## 406 0.14851485 0.0117897178 0.15156794 2.971007e-02 0.041666667
## 213 0.08910891 0.0168148434 NA NA NA
## 172 0.02970297 0.0094704291 NA 2.204829e-04 0.005291005
## PT7 PT8-S1 PT8-S2 PT9 PT10
## 113 0.020029774 NA NA 0.086739781 0.37698413
## 2804 NA NA NA NA NA
## 55 NA 0.22727273 0.1 0.251744766 0.33928571
## 406 0.197455677 0.09090909 0.3 0.110667996 0.04563492
## 213 0.017323048 0.13636364 NA 0.001994018 NA
## 172 0.008526188 0.04545455 0.1 0.004486540 NA
reads_zscore <- robust_zscore(100*cbind(normalized_clade_reads,normalized_taxon_reads), 0.001)
clade_reads_zscore <- reads_zscore[,1:20]
reads_zscore_df <- cbind(tax_data1[,1:3],max=apply(clade_reads_zscore,1,max, na.rm=T), clade_reads_zscore)[order(-apply(clade_reads_zscore,1,max, na.rm=T)),]
## Calculate z-score from the clade reads