We begin by showing how SignIT runs and what its resulting output looks like. We then compare this against existing methods for individual sample mutation signature decomposition. This will serve to show the approach for each method, and begin a discussion of strengths and limitations. We will use the standard COSMIC 30-signature reference mutation set. You can configure this in the code below. We will also use msimr to create simulated mutation catalogs. You can also configure the settings for the catalogs below.
The following simulation shows what happens when novel signatures are present.
normalize <- function(v) {
return(v / sum(v))
}
n_signatures = 8
n_mutations = 10000
reference_signatures <- get_reference_signatures()
mode = 'demo'
if (mode == 'random') {
chosen_signatures <- sample(paste('Signature', 1:30), size = n_signatures, replace = F)
omitted_signature <- sample(chosen_signatures, size = 1)
remaining_signatures <- names(reference_signatures)[! names(reference_signatures) == omitted_signature]
test_ref <- reference_signatures[, c('mutation_type', chosen_signatures)]
known_ref <- reference_signatures[, remaining_signatures]
exposures <- tibble(
signature = reference_signatures %>% select(-mutation_type) %>% names()
) %>%
mutate(
exposure = if_else(
signature %in% chosen_signatures,
runif(n()),
0
),
exposure = normalize(exposure) * n_mutations
)
simulated_catalog <- msimR::get_simulated_mutation_catalog(
signatures = reference_signatures,
exposures = exposures
)
} else if (mode == 'demo') {
data('missing_signature_demo_data')
attach(missing_signature_demo_data)
}
signit_exposures <- get_exposures(simulated_catalog, known_ref)
## Sampling
##
## SAMPLING FOR MODEL 'signature_model' NOW (CHAIN 1).
##
## Gradient evaluation took 0 seconds
## 1000 transitions using 10 leapfrog steps per transition would take 0 seconds.
## Adjust your expectations accordingly!
##
##
## Iteration: 1 / 400 [ 0%] (Warmup)
## Iteration: 10 / 400 [ 2%] (Warmup)
## Iteration: 20 / 400 [ 5%] (Warmup)
## Iteration: 30 / 400 [ 7%] (Warmup)
## Iteration: 40 / 400 [ 10%] (Warmup)
## Iteration: 50 / 400 [ 12%] (Warmup)
## Iteration: 60 / 400 [ 15%] (Warmup)
## Iteration: 70 / 400 [ 17%] (Warmup)
## Iteration: 80 / 400 [ 20%] (Warmup)
## Iteration: 90 / 400 [ 22%] (Warmup)
## Iteration: 100 / 400 [ 25%] (Warmup)
## Iteration: 110 / 400 [ 27%] (Warmup)
## Iteration: 120 / 400 [ 30%] (Warmup)
## Iteration: 130 / 400 [ 32%] (Warmup)
## Iteration: 140 / 400 [ 35%] (Warmup)
## Iteration: 150 / 400 [ 37%] (Warmup)
## Iteration: 160 / 400 [ 40%] (Warmup)
## Iteration: 170 / 400 [ 42%] (Warmup)
## Iteration: 180 / 400 [ 45%] (Warmup)
## Iteration: 190 / 400 [ 47%] (Warmup)
## Iteration: 200 / 400 [ 50%] (Warmup)
## Iteration: 201 / 400 [ 50%] (Sampling)
## Iteration: 210 / 400 [ 52%] (Sampling)
## Iteration: 220 / 400 [ 55%] (Sampling)
## Iteration: 230 / 400 [ 57%] (Sampling)
## Iteration: 240 / 400 [ 60%] (Sampling)
## Iteration: 250 / 400 [ 62%] (Sampling)
## Iteration: 260 / 400 [ 65%] (Sampling)
## Iteration: 270 / 400 [ 67%] (Sampling)
## Iteration: 280 / 400 [ 70%] (Sampling)
## Iteration: 290 / 400 [ 72%] (Sampling)
## Iteration: 300 / 400 [ 75%] (Sampling)
## Iteration: 310 / 400 [ 77%] (Sampling)
## Iteration: 320 / 400 [ 80%] (Sampling)
## Iteration: 330 / 400 [ 82%] (Sampling)
## Iteration: 340 / 400 [ 85%] (Sampling)
## Iteration: 350 / 400 [ 87%] (Sampling)
## Iteration: 360 / 400 [ 90%] (Sampling)
## Iteration: 370 / 400 [ 92%] (Sampling)
## Iteration: 380 / 400 [ 95%] (Sampling)
## Iteration: 390 / 400 [ 97%] (Sampling)
## Iteration: 400 / 400 [100%] (Sampling)
##
## Elapsed Time: 0.952 seconds (Warm-up)
## 0.279 seconds (Sampling)
## 1.231 seconds (Total)
##
##
## SAMPLING FOR MODEL 'signature_model' NOW (CHAIN 2).
##
## Gradient evaluation took 0 seconds
## 1000 transitions using 10 leapfrog steps per transition would take 0 seconds.
## Adjust your expectations accordingly!
##
##
## Iteration: 1 / 400 [ 0%] (Warmup)
## Iteration: 10 / 400 [ 2%] (Warmup)
## Iteration: 20 / 400 [ 5%] (Warmup)
## Iteration: 30 / 400 [ 7%] (Warmup)
## Iteration: 40 / 400 [ 10%] (Warmup)
## Iteration: 50 / 400 [ 12%] (Warmup)
## Iteration: 60 / 400 [ 15%] (Warmup)
## Iteration: 70 / 400 [ 17%] (Warmup)
## Iteration: 80 / 400 [ 20%] (Warmup)
## Iteration: 90 / 400 [ 22%] (Warmup)
## Iteration: 100 / 400 [ 25%] (Warmup)
## Iteration: 110 / 400 [ 27%] (Warmup)
## Iteration: 120 / 400 [ 30%] (Warmup)
## Iteration: 130 / 400 [ 32%] (Warmup)
## Iteration: 140 / 400 [ 35%] (Warmup)
## Iteration: 150 / 400 [ 37%] (Warmup)
## Iteration: 160 / 400 [ 40%] (Warmup)
## Iteration: 170 / 400 [ 42%] (Warmup)
## Iteration: 180 / 400 [ 45%] (Warmup)
## Iteration: 190 / 400 [ 47%] (Warmup)
## Iteration: 200 / 400 [ 50%] (Warmup)
## Iteration: 201 / 400 [ 50%] (Sampling)
## Iteration: 210 / 400 [ 52%] (Sampling)
## Iteration: 220 / 400 [ 55%] (Sampling)
## Iteration: 230 / 400 [ 57%] (Sampling)
## Iteration: 240 / 400 [ 60%] (Sampling)
## Iteration: 250 / 400 [ 62%] (Sampling)
## Iteration: 260 / 400 [ 65%] (Sampling)
## Iteration: 270 / 400 [ 67%] (Sampling)
## Iteration: 280 / 400 [ 70%] (Sampling)
## Iteration: 290 / 400 [ 72%] (Sampling)
## Iteration: 300 / 400 [ 75%] (Sampling)
## Iteration: 310 / 400 [ 77%] (Sampling)
## Iteration: 320 / 400 [ 80%] (Sampling)
## Iteration: 330 / 400 [ 82%] (Sampling)
## Iteration: 340 / 400 [ 85%] (Sampling)
## Iteration: 350 / 400 [ 87%] (Sampling)
## Iteration: 360 / 400 [ 90%] (Sampling)
## Iteration: 370 / 400 [ 92%] (Sampling)
## Iteration: 380 / 400 [ 95%] (Sampling)
## Iteration: 390 / 400 [ 97%] (Sampling)
## Iteration: 400 / 400 [100%] (Sampling)
##
## Elapsed Time: 0.862 seconds (Warm-up)
## 0.335 seconds (Sampling)
## 1.197 seconds (Total)
##
##
## SAMPLING FOR MODEL 'signature_model' NOW (CHAIN 3).
##
## Gradient evaluation took 0 seconds
## 1000 transitions using 10 leapfrog steps per transition would take 0 seconds.
## Adjust your expectations accordingly!
##
##
## Iteration: 1 / 400 [ 0%] (Warmup)
## Iteration: 10 / 400 [ 2%] (Warmup)
## Iteration: 20 / 400 [ 5%] (Warmup)
## Iteration: 30 / 400 [ 7%] (Warmup)
## Iteration: 40 / 400 [ 10%] (Warmup)
## Iteration: 50 / 400 [ 12%] (Warmup)
## Iteration: 60 / 400 [ 15%] (Warmup)
## Iteration: 70 / 400 [ 17%] (Warmup)
## Iteration: 80 / 400 [ 20%] (Warmup)
## Iteration: 90 / 400 [ 22%] (Warmup)
## Iteration: 100 / 400 [ 25%] (Warmup)
## Iteration: 110 / 400 [ 27%] (Warmup)
## Iteration: 120 / 400 [ 30%] (Warmup)
## Iteration: 130 / 400 [ 32%] (Warmup)
## Iteration: 140 / 400 [ 35%] (Warmup)
## Iteration: 150 / 400 [ 37%] (Warmup)
## Iteration: 160 / 400 [ 40%] (Warmup)
## Iteration: 170 / 400 [ 42%] (Warmup)
## Iteration: 180 / 400 [ 45%] (Warmup)
## Iteration: 190 / 400 [ 47%] (Warmup)
## Iteration: 200 / 400 [ 50%] (Warmup)
## Iteration: 201 / 400 [ 50%] (Sampling)
## Iteration: 210 / 400 [ 52%] (Sampling)
## Iteration: 220 / 400 [ 55%] (Sampling)
## Iteration: 230 / 400 [ 57%] (Sampling)
## Iteration: 240 / 400 [ 60%] (Sampling)
## Iteration: 250 / 400 [ 62%] (Sampling)
## Iteration: 260 / 400 [ 65%] (Sampling)
## Iteration: 270 / 400 [ 67%] (Sampling)
## Iteration: 280 / 400 [ 70%] (Sampling)
## Iteration: 290 / 400 [ 72%] (Sampling)
## Iteration: 300 / 400 [ 75%] (Sampling)
## Iteration: 310 / 400 [ 77%] (Sampling)
## Iteration: 320 / 400 [ 80%] (Sampling)
## Iteration: 330 / 400 [ 82%] (Sampling)
## Iteration: 340 / 400 [ 85%] (Sampling)
## Iteration: 350 / 400 [ 87%] (Sampling)
## Iteration: 360 / 400 [ 90%] (Sampling)
## Iteration: 370 / 400 [ 92%] (Sampling)
## Iteration: 380 / 400 [ 95%] (Sampling)
## Iteration: 390 / 400 [ 97%] (Sampling)
## Iteration: 400 / 400 [100%] (Sampling)
##
## Elapsed Time: 1.271 seconds (Warm-up)
## 0.26 seconds (Sampling)
## 1.531 seconds (Total)
##
##
## SAMPLING FOR MODEL 'signature_model' NOW (CHAIN 4).
##
## Gradient evaluation took 0 seconds
## 1000 transitions using 10 leapfrog steps per transition would take 0 seconds.
## Adjust your expectations accordingly!
##
##
## Iteration: 1 / 400 [ 0%] (Warmup)
## Iteration: 10 / 400 [ 2%] (Warmup)
## Iteration: 20 / 400 [ 5%] (Warmup)
## Iteration: 30 / 400 [ 7%] (Warmup)
## Iteration: 40 / 400 [ 10%] (Warmup)
## Iteration: 50 / 400 [ 12%] (Warmup)
## Iteration: 60 / 400 [ 15%] (Warmup)
## Iteration: 70 / 400 [ 17%] (Warmup)
## Iteration: 80 / 400 [ 20%] (Warmup)
## Iteration: 90 / 400 [ 22%] (Warmup)
## Iteration: 100 / 400 [ 25%] (Warmup)
## Iteration: 110 / 400 [ 27%] (Warmup)
## Iteration: 120 / 400 [ 30%] (Warmup)
## Iteration: 130 / 400 [ 32%] (Warmup)
## Iteration: 140 / 400 [ 35%] (Warmup)
## Iteration: 150 / 400 [ 37%] (Warmup)
## Iteration: 160 / 400 [ 40%] (Warmup)
## Iteration: 170 / 400 [ 42%] (Warmup)
## Iteration: 180 / 400 [ 45%] (Warmup)
## Iteration: 190 / 400 [ 47%] (Warmup)
## Iteration: 200 / 400 [ 50%] (Warmup)
## Iteration: 201 / 400 [ 50%] (Sampling)
## Iteration: 210 / 400 [ 52%] (Sampling)
## Iteration: 220 / 400 [ 55%] (Sampling)
## Iteration: 230 / 400 [ 57%] (Sampling)
## Iteration: 240 / 400 [ 60%] (Sampling)
## Iteration: 250 / 400 [ 62%] (Sampling)
## Iteration: 260 / 400 [ 65%] (Sampling)
## Iteration: 270 / 400 [ 67%] (Sampling)
## Iteration: 280 / 400 [ 70%] (Sampling)
## Iteration: 290 / 400 [ 72%] (Sampling)
## Iteration: 300 / 400 [ 75%] (Sampling)
## Iteration: 310 / 400 [ 77%] (Sampling)
## Iteration: 320 / 400 [ 80%] (Sampling)
## Iteration: 330 / 400 [ 82%] (Sampling)
## Iteration: 340 / 400 [ 85%] (Sampling)
## Iteration: 350 / 400 [ 87%] (Sampling)
## Iteration: 360 / 400 [ 90%] (Sampling)
## Iteration: 370 / 400 [ 92%] (Sampling)
## Iteration: 380 / 400 [ 95%] (Sampling)
## Iteration: 390 / 400 [ 97%] (Sampling)
## Iteration: 400 / 400 [100%] (Sampling)
##
## Elapsed Time: 0.924 seconds (Warm-up)
## 0.4 seconds (Sampling)
## 1.324 seconds (Total)
plot_exposure_posteriors_bleed(signit_exposures)
plot_nnls_solution(signit_exposures) %>% print
deconstructsigs_data <- as.data.frame(simulated_catalog) %>%
mutate(sample = 'x', count = count/sum(count)) %>%
spread(mutation_type, count) %>%
column_to_rownames('sample')
dsigs_signatures <- known_ref %>%
as.data.frame %>%
gather(signature, probability, -mutation_type) %>%
spread(mutation_type, probability) %>%
mutate(
signature = gsub(' ', '.', signature),
signature = factor(signature, levels = paste('Signature', 1:30, sep = '.'))
) %>%
arrange(signature) %>%
column_to_rownames('signature')
dsigs_exposures <- whichSignatures(deconstructsigs_data, signatures.ref = dsigs_signatures)
deconstructSigs::makePie(dsigs_exposures)
sigest_exposures <- suboptimalSigExposures(
m = simulated_catalog$count %>% as.matrix(),
P = reference_signatures_as_matrix(known_ref, simulated_catalog),
R = 1000,
) %>%
.$exposures %>%
as.data.frame() %>%
rownames_to_column('signature') %>%
gather(replicate, exposure, -signature) %>%
as_tibble()
exposures %>% filter(exposure != 0)
## # A tibble: 8 x 2
## signature exposure
## <chr> <dbl>
## 1 Signature 2 394.
## 2 Signature 3 1292.
## 3 Signature 8 734.
## 4 Signature 10 873.
## 5 Signature 18 663.
## 6 Signature 21 1607.
## 7 Signature 27 2241.
## 8 Signature 28 2195.
true_exposure_plot <- exposures %>%
mutate(
signature = gsub('Signature ', '', signature) %>% factor(levels = 1:30)
) %>%
ggplot(aes(
x = signature,
y = exposure
)) +
geom_point() +
labs(
y = 'Exposure\n(Mutations)'
) +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))
sigest_exposure_plot <- sigest_exposures %>%
mutate(
signature = gsub('Signature ', '', signature) %>% factor(levels = 1:30),
exposure = exposure * n_mutations
) %>%
ggplot(aes(
x = signature,
y = exposure
)) +
geom_violin() +
labs(
y = 'Exposure\n(Mutations)'
) +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))
dsigs_exposure_plot <- dsigs_exposures$weights %>%
as.data.frame %>%
gather('signature', 'exposure') %>%
bind_rows(tribble(
~signature, ~exposure,
'Unknown', 1 - sum(dsigs_exposures$weights)
)) %>%
mutate(
signature = gsub('Signature.', '', signature) %>% factor(levels = c(1:30, 'Unknown')),
exposure = exposure * n_mutations
) %>%
ggplot(aes(
x = signature,
y = exposure
)) +
geom_point() +
labs(
y = 'Exposure\n(Mutations)'
) +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))
signit_exposures$exposure_chain %>%
mutate(signature = gsub('Signature ', '', signature) %>% factor(levels = 1:30)) %>%
ggplot(aes(
x = signature,
y = exposure
)) +
geom_violin() +
scale_x_discrete(drop = F) +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))
signit_exposure_plot <- plot_exposure_posteriors(signit_exposures) +
scale_x_discrete(drop = F) +
labs(
y = 'Exposure\n(Mutations)'
)
plot_grid(
true_exposure_plot,
dsigs_exposure_plot,
sigest_exposure_plot,
signit_exposure_plot,
ncol = 1,
rel_heights = c(1, 1.2, 1, 1)
)
plot_signature <- function(catalog) {
catalog %>%
mutate(
context = gsub('(.)\\[(.)>(.)\\](.)', '\\1\\4', mutation_type),
base_change = gsub('(.)\\[(.)>(.)\\](.)', '\\2>\\3', mutation_type)
) %>%
ggplot(aes(
x = context,
y = count
)) +
facet_grid(. ~ base_change) +
geom_bar(stat = 'identity')
}
collapsed_exposures = collapse_signatures_by_bleed(signit_exposures)
## Max bleed was 0.777. Merging Signature 22 and Signature 25.
## Sampling
## 28 signature solution obtained.
## Max bleed was 0.471. Merging Signature 21 and Signature 26.
## Sampling
## 27 signature solution obtained.
## Max bleed was 0.456. Merging Signature 18 and Signature 29.
## Sampling
## 26 signature solution obtained.
## Max bleed was 0.461. Merging Signature 24 and Signature 18/Signature 29.
## Sampling
## 25 signature solution obtained.
## Max bleed was 0.401. Merging Signature 3 and Signature 8.
## Sampling
## 24 signature solution obtained.
## Choosing signature set with maximum WAIC
## 25 signature solution chosen
collapsed_exposures_signit_plot <- plot_exposure_posteriors_bleed(collapsed_exposures)
plot_grid(
true_exposure_plot,
collapsed_exposures_signit_plot,
ncol = 1,
rel_heights = c(1,2)
)
exposures %>%
filter(exposure > 0)
## # A tibble: 8 x 2
## signature exposure
## <chr> <dbl>
## 1 Signature 2 394.
## 2 Signature 3 1292.
## 3 Signature 8 734.
## 4 Signature 10 873.
## 5 Signature 18 663.
## 6 Signature 21 1607.
## 7 Signature 27 2241.
## 8 Signature 28 2195.
merged_signature_names = with(
collapsed_exposures,
signature_names[! signature_names %in% signit_exposures$signature_names]
)
merged_signature_table <- collapsed_exposures$reference_signatures %>%
gather(signature, probability, -mutation_type) %>%
filter(signature %in% merged_signature_names)
omitted_signature_spectrum <- reference_signatures[, c('mutation_type', omitted_signature)] %>%
gather(signature, probability, -mutation_type) %>%
mutate(signature = paste('Hidden: ', signature))
merged_signature_table %>%
bind_rows(omitted_signature_spectrum) %>%
mutate(
context = gsub('(.)\\[.>.\\](.)', '\\1\\2', mutation_type),
base_change = gsub('.\\[(.>.)\\].', '\\1', mutation_type),
signature = gsub('Signature ', '', signature)
) %>%
ggplot(aes(
x = context,
y = probability
)) +
facet_grid(signature ~ base_change) +
geom_bar(stat = 'identity') +
theme(
axis.text.x = element_text(angle = 90, size = 6, hjust = 1, vjust = 0.5)
)