This simulation study aims to compare the performance of the Pareto Frontier approach for model calibration against a distance-based summary score (selecting the 1% of inputs sets that minimise the sum of absolute differences between model outputs and calibration targets).
We conducted a simulation study, using the well-documented Sick-Sicker Model. As in the original study, three parameters were calibrated: pS1_S2, RRS1_D, and RRS2_D (see figure below and/or the drawParams
and runTrueMarkov
functions in ./R/functions.R
)
First we illustrate the methods using a single true set of parameters and assess the performance (in terms of iNMB) against a single fixed (setion 1.1) and uncertain (section 1.2) set of calibration targets.
We then expand the simulation and test the performance of the Pareto Frontier approach more systematically for any true input parameter values and across different numbers of calibration iterations and four different target sets, whereby we consider fixed (2.1) as well as stochastic target sets (2.2).
True parameters were drawn from the following distributions:
The number of test sets take the following values:
Targets:
Target sets:
# LOAD PACKAGES AND FUNCTIONS
library(rPref)
library(reshape2)
library(ggplot2)
library(foreach)
library(parallel)
source("./R/functions.R")
# META PARAMS
HORIZON <- 30
DISC_RATE <- 0.03
THRESH <- 1000
set.seed(2021)
This analysis aims to provide some intuition for the simulation analysis. It shows the computation for a single case, with a fixed set of true model parameters and corresponding calibration targets. It closely mirrors the original study by Enns et al. (2015).
# Number of candidate input sets being evaluated
n_calib <- 10000
# true markov model with fixed parameters
true_params <- drawParams(pS1_S2 = 0.02, RRS1_D = 3, RRS2_D = 15)
true_model <- runTrueMarkov(params = true_params[1,], return_targets = T)
# corresponding true inmb
true_inmb <- true_model$ce_res[4]
# Derive true caloibration targets (without uncertainty)
target_set <- true_model$targets # True targets
target_set[1:4]
## [1] 0.9662136 0.9252154 0.2331344 15.2496760
# run calibration and estimate iNMB
# draw random test params
test_params <- drawParams(
pS1_S2 = runif(n_calib, 0.01, 0.48),
RRS1_D = runif(n_calib, 1, 4.5),
RRS2_D = runif(n_calib, 1, 20)
)
# run model for all random param sets and compre to targets
calib_res <- runCalibration(
params = test_params,
targets = target_set,
RUNS = n_calib
)
# evaluate target achievement and select winning param sets
selected_sets <- evalTargets(target_diff = calib_res$res_mat)
# retrieve results over selected param sets
res_list <- evalRes(selected_sets, test_params, true_inmb)
# number of paremter sets selected
table(res_list$res_df$L1)
##
## pareto sum_pareto sum_percent
## 21 21 100
# true inmb
true_inmb
## nmb
## 23068.8
# estimated inb and abs error
res_list$inmb_df
## L1 inmb inmb_error inmb_error_abs pareto_diff
## 1 pareto 13847.91 -9220.8874 9755.700 0.000
## 2 sum_pareto 22029.38 -1039.4202 1336.645 -8419.054
## 3 sum_percent 22225.78 -843.0131 1204.512 -8551.188
# ICER plane
ggplot() +
geom_point(
data = res_list$res_df,
aes(x = incr_C, y = incr_Q, col = L1),
alpha = 0.5, size = 1
) +
geom_point(
aes(
x = true_model$ce_res["incr_C"],
y = true_model$ce_res["incr_Q"],
col = "True"
), alpha = 2
) +
xlim(c(45000, 80000)) +
ylim(c(0.35, 0.8)) +
theme_minimal()
## Warning: Removed 2 rows containing missing values (geom_point).
Same as above with micro simulation to generate stochastic calibration targets.
# Number of candidate input sets being evaluated
n_calib <- 10000
# true markov model with fixed parameters
true_params <- drawParams(pS1_S2 = 0.02, RRS1_D = 3, RRS2_D = 15)
true_model <- runTrueMarkov(params = true_params[1,], return_targets = T)
# corresponding true inmb
true_inmb <- true_model$ce_res[4]
# Derive true caloibration targets (without uncertainty)
target_set <- simStudy(n = 1000, true_params = true_params[1,]) #micro sim
target_set[1:4]
## [1] 0.9640000 0.9280000 0.2284483 2.7283555
# run calibration and estimate iNMB
# draw random test params
test_params <- drawParams(
pS1_S2 = runif(n_calib, 0.01, 0.48),
RRS1_D = runif(n_calib, 1, 4.5),
RRS2_D = runif(n_calib, 1, 20)
)
# run model for all random param sets and compre to targets
calib_res <- runCalibration(
params = test_params,
targets = target_set,
RUNS = n_calib
)
# evaluate target achievement and select winning param sets
selected_sets <- evalTargets(target_diff = calib_res$res_mat)
# retrieve results over selected param sets
res_list <- evalRes(selected_sets, test_params, true_inmb)
# number of paremter sets selected
table(res_list$res_df$L1)
##
## pareto sum_pareto sum_percent
## 974 974 100
# true inmb
true_inmb
## nmb
## 23068.8
# estimated inb and abs error
res_list$inmb_df
## L1 inmb inmb_error inmb_error_abs pareto_diff
## 1 pareto 6131.849 -16936.95 17212.01 0.00
## 2 sum_pareto -9432.476 -32501.27 32501.27 15289.27
## 3 sum_percent -9202.755 -32271.55 32271.55 15059.55
# ICER plane
ggplot() +
geom_point(
data = res_list$res_df,
aes(x = incr_C, y = incr_Q, col = L1),
alpha = 0.5, size = 1
) +
geom_point(
aes(
x = true_model$ce_res["incr_C"],
y = true_model$ce_res["incr_Q"],
col = "True"
), alpha = 2
) +
xlim(c(45000, 80000)) +
ylim(c(0.35, 0.8)) +
theme_minimal()
## Warning: Removed 22 rows containing missing values (geom_point).
Analysis steps in short:
Consider running this in parallel in the cloud - otherwise, this may take >24h to run.
# load simulation results
res_mat <- data.frame(readRDS("./output/res_fixed.RDS"))
# re-format
for(i in c(1,3:ncol(res_mat))){
res_mat[,i] <- as.numeric(res_mat[,i])
}
names(res_mat) <- c(
"run","method","inmb","inmb_error","inmb_error_abs","pareto_diff",
"pS1_S2_diff", "RRS1_D_diff", "RRS2_D_diff","run2",
"n_calib","pareto_n","n_study_size","true_inmb"
)
res_mat$target_set <- rep(c(1,2,3,4), each = 3) # forgot to specify this previously
res_mat <- res_mat[res_mat$method %in% c("pareto", "sum_percent"),]
dim(res_mat)
## [1] 480000 15
head(res_mat)
## run method inmb inmb_error inmb_error_abs pareto_diff pS1_S2_diff
## 1 1 pareto -30842.37 -8115.0848 9310.131 0.000 3.525884e-02
## 3 1 sum_percent -22551.84 175.4498 1830.621 -7479.510 -8.716814e-05
## 4 1 pareto -31552.69 -8825.4008 9584.031 0.000 3.863675e-02
## 6 1 sum_percent -23245.88 -518.5938 1581.032 -8003.000 -5.036070e-03
## 7 1 pareto -31274.04 -8546.7580 8595.584 0.000 1.915449e-02
## 9 1 sum_percent -21464.51 1262.7810 1262.781 -7332.803 -3.340701e-03
## RRS1_D_diff RRS2_D_diff run2 n_calib pareto_n n_study_size true_inmb
## 1 -0.54948140 -2.6591060 1 500 13 NA -22727.29
## 3 0.16236510 0.1944475 1 500 13 NA -22727.29
## 4 -0.45180300 -3.2627796 1 500 9 NA -22727.29
## 6 0.17680380 -0.8872276 1 500 9 NA -22727.29
## 7 -0.89001917 -4.2606355 1 500 4 NA -22727.29
## 9 0.07149088 0.6632607 1 500 4 NA -22727.29
## target_set
## 1 1
## 3 1
## 4 2
## 6 2
## 7 3
## 9 3
# relative performance of pareto vs sum
pareto_diff_abs <- res_mat$inmb_error_abs[res_mat$method == "pareto"] - res_mat$inmb_error_abs[res_mat$method == "sum_percent"]
pareto_params_diff_abs <- abs(res_mat[res_mat$method == "pareto", c("pS1_S2_diff", "RRS1_D_diff", "RRS2_D_diff")]) - abs(res_mat[res_mat$method == "sum_percent", c("pS1_S2_diff", "RRS1_D_diff", "RRS2_D_diff")])
names(pareto_params_diff_abs)<-paste0("pareto_", names(pareto_params_diff_abs))
overall_diff_df <- res_mat[res_mat$method == "pareto",]
overall_diff_df$pareto_diff_abs <- pareto_diff_abs
overall_diff_df <- cbind(overall_diff_df,pareto_params_diff_abs)
# ref: calib 50,000
ref_df <- subset(res_mat, n_calib == 50000)
# incr NMB --------
aggregate(inmb_error_abs ~ method + target_set, ref_df, aggFormat, show_range = T)
## method target_set inmb_error_abs.mean (SD) inmb_error_abs.range
## 1 pareto 1 9,702 (4,784) [776; 32,904]
## 2 sum_percent 1 3,787 (1,488) [831; 10,525]
## 3 pareto 2 4,749 (2,468) [425; 24,752]
## 4 sum_percent 2 2,940 (1,255) [738; 8,017]
## 5 pareto 3 6,470 (4,373) [299; 39,916]
## 6 sum_percent 3 2,432 (597) [1,241; 7,189]
## 7 pareto 4 7,592 (3,543) [1,000; 29,721]
## 8 sum_percent 4 2,080 (648) [985; 7,036]
aggregate(pareto_diff_abs ~ n_calib + target_set, overall_diff_df[overall_diff_df$n_calib == 50000,], aggFormat, show_range = T)
## n_calib target_set pareto_diff_abs.mean (SD) pareto_diff_abs.range
## 1 50000 1 5,915 (4,451) [-1,835; 31,930]
## 2 50000 2 1,809 (2,172) [-2,643; 23,686]
## 3 50000 3 4,037 (4,204) [-3,657; 38,081]
## 4 50000 4 5,513 (3,232) [-332; 28,425]
# by n_calib
aggregate(inmb_error_abs ~ method+n_calib + target_set, res_mat, aggFormat, show_range = T)
## method n_calib target_set inmb_error_abs.mean (SD) inmb_error_abs.range
## 1 pareto 500 1 14,838 (7,431) [905; 65,622]
## 2 sum_percent 500 1 3,959 (2,128) [268; 18,753]
## 3 pareto 1000 1 13,940 (6,956) [1,226; 63,246]
## 4 sum_percent 1000 1 3,874 (1,811) [386; 14,260]
## 5 pareto 5000 1 11,971 (5,977) [948; 54,706]
## 6 sum_percent 5000 1 3,803 (1,550) [732; 12,259]
## 7 pareto 10000 1 11,240 (5,572) [993; 47,301]
## 8 sum_percent 10000 1 3,796 (1,517) [668; 11,233]
## 9 pareto 20000 1 10,504 (5,226) [1,132; 41,723]
## 10 sum_percent 20000 1 3,789 (1,495) [780; 10,846]
## 11 pareto 50000 1 9,702 (4,784) [776; 32,904]
## 12 sum_percent 50000 1 3,787 (1,488) [831; 10,525]
## 13 pareto 500 2 9,268 (5,428) [14; 58,171]
## 14 sum_percent 500 2 3,062 (1,695) [168; 12,888]
## 15 pareto 1000 2 8,320 (4,802) [171; 49,819]
## 16 sum_percent 1000 2 3,001 (1,480) [427; 10,462]
## 17 pareto 5000 2 6,558 (3,685) [719; 41,617]
## 18 sum_percent 5000 2 2,950 (1,296) [654; 8,419]
## 19 pareto 10000 2 5,982 (3,261) [715; 36,198]
## 20 sum_percent 10000 2 2,945 (1,274) [709; 8,498]
## 21 pareto 20000 2 5,395 (2,888) [364; 30,335]
## 22 sum_percent 20000 2 2,941 (1,260) [725; 8,147]
## 23 pareto 50000 2 4,749 (2,468) [425; 24,752]
## 24 sum_percent 50000 2 2,940 (1,255) [738; 8,017]
## 25 pareto 500 3 9,922 (6,854) [21; 76,681]
## 26 sum_percent 500 3 2,590 (1,107) [218; 8,714]
## 27 pareto 1000 3 9,205 (6,399) [34; 68,441]
## 28 sum_percent 1000 3 2,506 (869) [572; 7,927]
## 29 pareto 5000 3 7,876 (5,508) [138; 53,089]
## 30 sum_percent 5000 3 2,451 (657) [1,077; 6,855]
## 31 pareto 10000 3 7,389 (5,099) [230; 45,932]
## 32 sum_percent 10000 3 2,441 (626) [1,027; 7,122]
## 33 pareto 20000 3 6,983 (4,802) [222; 40,242]
## 34 sum_percent 20000 3 2,435 (606) [1,136; 6,852]
## 35 pareto 50000 3 6,470 (4,373) [299; 39,916]
## 36 sum_percent 50000 3 2,432 (597) [1,241; 7,189]
## 37 pareto 500 4 13,316 (6,673) [983; 65,622]
## 38 sum_percent 500 4 2,191 (1,025) [205; 9,370]
## 39 pareto 1000 4 12,222 (6,047) [1,417; 64,329]
## 40 sum_percent 1000 4 2,132 (842) [416; 6,909]
## 41 pareto 5000 4 9,930 (4,848) [1,428; 48,258]
## 42 sum_percent 5000 4 2,091 (688) [765; 6,964]
## 43 pareto 10000 4 9,139 (4,374) [1,247; 39,066]
## 44 sum_percent 10000 4 2,086 (668) [877; 7,032]
## 45 pareto 20000 4 8,394 (4,002) [1,367; 37,840]
## 46 sum_percent 20000 4 2,082 (656) [927; 6,855]
## 47 pareto 50000 4 7,592 (3,543) [1,000; 29,721]
## 48 sum_percent 50000 4 2,080 (648) [985; 7,036]
aggregate(pareto_diff_abs ~ n_calib + target_set, overall_diff_df, aggFormat, show_range = T)
## n_calib target_set pareto_diff_abs.mean (SD) pareto_diff_abs.range
## 1 500 1 10,880 (7,266) [-6,848; 60,326]
## 2 1000 1 10,066 (6,740) [-1,726; 58,010]
## 3 5000 1 8,167 (5,680) [-3,404; 47,951]
## 4 10000 1 7,444 (5,250) [-2,317; 42,842]
## 5 20000 1 6,714 (4,902) [-2,868; 37,623]
## 6 50000 1 5,915 (4,451) [-1,835; 31,930]
## 7 500 2 6,206 (5,217) [-3,996; 56,953]
## 8 1000 2 5,319 (4,552) [-2,184; 43,061]
## 9 5000 2 3,608 (3,412) [-2,665; 35,797]
## 10 10000 2 3,037 (2,974) [-2,933; 32,385]
## 11 20000 2 2,455 (2,596) [-2,779; 28,425]
## 12 50000 2 1,809 (2,172) [-2,643; 23,686]
## 13 500 3 7,332 (6,671) [-3,538; 72,134]
## 14 1000 3 6,700 (6,235) [-3,079; 64,396]
## 15 5000 3 5,425 (5,336) [-3,667; 48,062]
## 16 10000 3 4,948 (4,928) [-2,906; 43,932]
## 17 20000 3 4,547 (4,631) [-3,151; 36,650]
## 18 50000 3 4,037 (4,204) [-3,657; 38,081]
## 19 500 4 11,125 (6,426) [-66; 63,461]
## 20 1000 4 10,090 (5,796) [-380; 60,339]
## 21 5000 4 7,839 (4,564) [237; 44,928]
## 22 10000 4 7,052 (4,082) [-276; 35,809]
## 23 20000 4 6,312 (3,702) [-72; 36,237]
## 24 50000 4 5,513 (3,232) [-332; 28,425]
# true incr nmb
mean(ref_df$true_inmb); sd(ref_df$true_inmb)
## [1] -33534.43
## [1] 27123.7
# plot 1: abs error for n_calib = 50,000
inmb_error_abs_means <- aggregate(inmb_error_abs ~method+target_set,ref_df,mean )
ggplot(ref_df) +
geom_density(aes(x = inmb_error_abs,col = method,fill = method), alpha = 0.5, position = "identity") +
geom_vline(data = inmb_error_abs_means, aes(xintercept = inmb_error_abs, col = method)) +
facet_wrap(~target_set) +
coord_cartesian(xlim = c(0, 20000)) +
xlab("Absolute error in Incremental Net Monetary Benefit") +
theme_minimal() +
theme(legend.position = "top")
# plot 2: abs error by n_calib
ggplot(res_mat) +
geom_boxplot(aes(y=inmb_error_abs, x = as.factor(n_calib), fill = method)) +
facet_wrap(~target_set, ncol = 1) +
ylab("Absolute error in Incremental Net Monetary Benefit") +
xlab("Number of candidate input parameter sets being evaluated") +
theme_minimal() +
coord_cartesian(ylim=c(0,40000)) +
theme(legend.position = "top")
# params abs error --------
aggregate(abs(cbind(pS1_S2_diff,RRS1_D_diff,RRS2_D_diff)) ~ method + target_set, ref_df, aggFormat, digits = 3)
## method target_set pS1_S2_diff RRS1_D_diff RRS2_D_diff
## 1 pareto 1 0.100 (0.060) 0.826 (0.509) 7.557 (4.169)
## 2 sum_percent 1 0.119 (0.070) 0.804 (0.137) 7.382 (3.980)
## 3 pareto 2 0.113 (0.066) 0.824 (0.426) 7.440 (4.026)
## 4 sum_percent 2 0.118 (0.069) 0.803 (0.331) 7.395 (4.138)
## 5 pareto 3 0.109 (0.064) 0.826 (0.410) 7.460 (4.267)
## 6 sum_percent 3 0.118 (0.069) 0.822 (0.314) 7.504 (4.833)
## 7 pareto 4 0.106 (0.062) 0.818 (0.464) 7.495 (3.867)
## 8 sum_percent 4 0.119 (0.070) 0.814 (0.329) 7.451 (4.674)
aggregate(cbind(pareto_pS1_S2_diff,pareto_RRS1_D_diff,pareto_RRS2_D_diff) ~ method + target_set, overall_diff_df[overall_diff_df$n_calib == 50000,], aggFormat, digits = 3)
## method target_set pareto_pS1_S2_diff pareto_RRS1_D_diff pareto_RRS2_D_diff
## 1 pareto 1 -0.018 (0.024) 0.022 (0.444) 0.175 (1.479)
## 2 pareto 2 -0.005 (0.010) 0.021 (0.205) 0.045 (0.880)
## 3 pareto 3 -0.010 (0.019) 0.004 (0.299) -0.044 (1.326)
## 4 pareto 4 -0.012 (0.016) 0.004 (0.232) 0.044 (1.188)
# by n_calib
aggregate(abs(cbind(pS1_S2_diff,RRS1_D_diff,RRS2_D_diff)) ~ method + n_calib + target_set, res_mat, aggFormat, digits = 3)
## method n_calib target_set pS1_S2_diff RRS1_D_diff RRS2_D_diff
## 1 pareto 500 1 0.090 (0.055) 0.814 (0.394) 7.609 (3.471)
## 2 sum_percent 500 1 0.119 (0.070) 0.812 (0.435) 7.381 (4.203)
## 3 pareto 1000 1 0.091 (0.056) 0.814 (0.411) 7.586 (3.605)
## 4 sum_percent 1000 1 0.119 (0.070) 0.800 (0.333) 7.379 (4.084)
## 5 pareto 5000 1 0.095 (0.057) 0.819 (0.455) 7.571 (3.893)
## 6 sum_percent 5000 1 0.119 (0.070) 0.806 (0.191) 7.376 (3.999)
## 7 pareto 10000 1 0.097 (0.058) 0.820 (0.471) 7.560 (3.986)
## 8 sum_percent 10000 1 0.119 (0.070) 0.805 (0.164) 7.379 (3.987)
## 9 pareto 20000 1 0.099 (0.059) 0.824 (0.489) 7.551 (4.075)
## 10 sum_percent 20000 1 0.119 (0.070) 0.805 (0.148) 7.380 (3.983)
## 11 pareto 50000 1 0.100 (0.060) 0.826 (0.509) 7.557 (4.169)
## 12 sum_percent 50000 1 0.119 (0.070) 0.804 (0.137) 7.382 (3.980)
## 13 pareto 500 2 0.105 (0.061) 0.825 (0.423) 7.452 (3.322)
## 14 sum_percent 500 2 0.118 (0.070) 0.822 (0.481) 7.414 (4.302)
## 15 pareto 1000 2 0.107 (0.062) 0.827 (0.414) 7.416 (3.417)
## 16 sum_percent 1000 2 0.118 (0.069) 0.810 (0.420) 7.395 (4.220)
## 17 pareto 5000 2 0.110 (0.064) 0.823 (0.411) 7.436 (3.688)
## 18 sum_percent 5000 2 0.118 (0.069) 0.805 (0.350) 7.397 (4.155)
## 19 pareto 10000 2 0.111 (0.064) 0.820 (0.411) 7.455 (3.805)
## 20 sum_percent 10000 2 0.118 (0.069) 0.803 (0.340) 7.400 (4.141)
## 21 pareto 20000 2 0.112 (0.065) 0.821 (0.416) 7.449 (3.910)
## 22 sum_percent 20000 2 0.118 (0.069) 0.804 (0.334) 7.394 (4.140)
## 23 pareto 50000 2 0.113 (0.066) 0.824 (0.426) 7.440 (4.026)
## 24 sum_percent 50000 2 0.118 (0.069) 0.803 (0.331) 7.395 (4.138)
## 25 pareto 500 3 0.103 (0.062) 0.826 (0.447) 7.483 (3.925)
## 26 sum_percent 500 3 0.118 (0.069) 0.835 (0.490) 7.527 (4.882)
## 27 pareto 1000 3 0.104 (0.062) 0.831 (0.439) 7.471 (3.963)
## 28 sum_percent 1000 3 0.118 (0.069) 0.823 (0.420) 7.519 (4.857)
## 29 pareto 5000 3 0.106 (0.063) 0.833 (0.421) 7.481 (4.127)
## 30 sum_percent 5000 3 0.118 (0.069) 0.825 (0.337) 7.507 (4.839)
## 31 pareto 10000 3 0.107 (0.063) 0.831 (0.419) 7.489 (4.171)
## 32 sum_percent 10000 3 0.118 (0.069) 0.823 (0.324) 7.509 (4.836)
## 33 pareto 20000 3 0.108 (0.064) 0.829 (0.414) 7.466 (4.212)
## 34 sum_percent 20000 3 0.118 (0.069) 0.823 (0.317) 7.506 (4.834)
## 35 pareto 50000 3 0.109 (0.064) 0.826 (0.410) 7.460 (4.267)
## 36 sum_percent 50000 3 0.118 (0.069) 0.822 (0.314) 7.504 (4.833)
## 37 pareto 500 4 0.093 (0.055) 0.809 (0.340) 7.566 (3.053)
## 38 sum_percent 500 4 0.119 (0.070) 0.829 (0.487) 7.472 (4.742)
## 39 pareto 1000 4 0.096 (0.056) 0.810 (0.348) 7.535 (3.185)
## 40 sum_percent 1000 4 0.119 (0.070) 0.819 (0.422) 7.455 (4.709)
## 41 pareto 5000 4 0.101 (0.059) 0.812 (0.390) 7.516 (3.507)
## 42 sum_percent 5000 4 0.119 (0.070) 0.817 (0.348) 7.455 (4.680)
## 43 pareto 10000 4 0.103 (0.060) 0.813 (0.412) 7.513 (3.629)
## 44 sum_percent 10000 4 0.119 (0.070) 0.815 (0.338) 7.457 (4.674)
## 45 pareto 20000 4 0.104 (0.061) 0.815 (0.436) 7.494 (3.744)
## 46 sum_percent 20000 4 0.119 (0.070) 0.815 (0.332) 7.452 (4.675)
## 47 pareto 50000 4 0.106 (0.062) 0.818 (0.464) 7.495 (3.867)
## 48 sum_percent 50000 4 0.119 (0.070) 0.814 (0.329) 7.451 (4.674)
aggregate(cbind(pareto_pS1_S2_diff,pareto_RRS1_D_diff,pareto_RRS2_D_diff) ~ n_calib + target_set, overall_diff_df, aggFormat, digits = 3)
## n_calib target_set pareto_pS1_S2_diff pareto_RRS1_D_diff pareto_RRS2_D_diff
## 1 500 1 -0.029 (0.039) 0.002 (0.476) 0.228 (2.178)
## 2 1000 1 -0.027 (0.036) 0.014 (0.422) 0.207 (1.854)
## 3 5000 1 -0.023 (0.031) 0.013 (0.402) 0.195 (1.518)
## 4 10000 1 -0.022 (0.028) 0.015 (0.411) 0.181 (1.464)
## 5 20000 1 -0.020 (0.026) 0.018 (0.425) 0.172 (1.461)
## 6 50000 1 -0.018 (0.024) 0.022 (0.444) 0.175 (1.479)
## 7 500 2 -0.013 (0.027) 0.002 (0.377) 0.038 (2.051)
## 8 1000 2 -0.011 (0.023) 0.017 (0.310) 0.021 (1.743)
## 9 5000 2 -0.008 (0.017) 0.018 (0.236) 0.039 (1.231)
## 10 10000 2 -0.007 (0.014) 0.017 (0.219) 0.056 (1.102)
## 11 20000 2 -0.006 (0.013) 0.018 (0.209) 0.055 (0.985)
## 12 50000 2 -0.005 (0.010) 0.021 (0.205) 0.045 (0.880)
## 13 500 3 -0.015 (0.032) -0.009 (0.422) -0.044 (2.109)
## 14 1000 3 -0.014 (0.030) 0.008 (0.384) -0.047 (1.949)
## 15 5000 3 -0.012 (0.025) 0.008 (0.335) -0.026 (1.650)
## 16 10000 3 -0.011 (0.022) 0.008 (0.327) -0.020 (1.530)
## 17 20000 3 -0.011 (0.021) 0.006 (0.312) -0.040 (1.452)
## 18 50000 3 -0.010 (0.019) 0.004 (0.299) -0.044 (1.326)
## 19 500 4 -0.025 (0.034) -0.020 (0.386) 0.094 (2.456)
## 20 1000 4 -0.023 (0.030) -0.009 (0.291) 0.081 (2.191)
## 21 5000 4 -0.018 (0.023) -0.005 (0.196) 0.061 (1.661)
## 22 10000 4 -0.016 (0.020) -0.002 (0.194) 0.056 (1.497)
## 23 20000 4 -0.014 (0.018) 0.000 (0.207) 0.041 (1.347)
## 24 50000 4 -0.012 (0.016) 0.004 (0.232) 0.044 (1.188)
ref_df_params <- data.frame(
method = ref_df$method, target_set= ref_df$target_set,
abs(cbind(pS1_S2_diff=ref_df$pS1_S2_diff,RRS1_D_diff=ref_df$RRS1_D_diff,RRS2_D_diff=ref_df$RRS2_D_diff))
)
ref_df_params_long <- melt(ref_df_params, id.vars = c("target_set","method"))
params_abs_means <- aggregate(cbind(pS1_S2_diff,RRS1_D_diff,RRS2_D_diff) ~method+target_set,ref_df_params,mean )
params_abs_means_long <- melt(params_abs_means)
## Using method as id variables
ggplot(ref_df_params_long) +
geom_density(aes(x = value,col = method,fill = method), alpha = 0.5, position = "identity") +
# geom_vline(data = inmb_error_abs_means, aes(xintercept = inmb_error_abs, col = method)) +
facet_wrap(~target_set+variable, ncol = 3, scales = "free") +
# coord_cartesian(xlim = c(0, 20000)) +
# xlab("Absolute error in Incremental Net Monetary Benefit") +
theme_minimal() +
theme(legend.position = "top")
The number of selected parameter sets varies considerably, depending on the number of test sets and by random sampling variation. With low numbers of test sets, the Pareto Frontier approach may pccasionally end up selecting just 1 parameter set.
# pareto n size -----
pareto_n_df <- subset(res_mat, method == "pareto")
pareto_n_df_agg <- aggregate(pareto_n ~ n_calib + target_set, pareto_n_df,
function(x){
y <- c("mean" = mean(x), "sd" = sd(x), quantile(x, c(0.5, 0.25, 0.75, 0, 1)))
y <- formatC(y, digits = 0, format = "f", big.mark = ",")
y <- c(
"mean (SD)" = paste0(y[1], " (",y[2],")"),
# "median (IQR)" = paste0(y[3], " (",y[4],"; ",y[5],")")#,
"range" = paste0("[",y[6], "; ",y[7],"]")
)
y})
pareto_n_df_agg <- reshape(pareto_n_df_agg, direction = "wide", timevar = "target_set", idvar = "n_calib")
colnames(pareto_n_df_agg) <- c("n_calib" , paste("Target",1:4))
pareto_n_df_agg
## n_calib Target 1.mean (SD) Target 1.range Target 2.mean (SD) Target 2.range
## 1 500 19 (6) [2; 50] 9 (3) [1; 24]
## 2 1000 23 (7) [3; 61] 11 (4) [1; 28]
## 3 5000 36 (10) [3; 82] 16 (5) [4; 40]
## 4 10000 43 (12) [8; 108] 19 (6) [3; 44]
## 5 20000 50 (13) [9; 103] 23 (7) [3; 54]
## 6 50000 60 (15) [15; 121] 28 (8) [5; 66]
## Target 3.mean (SD) Target 3.range Target 4.mean (SD) Target 4.range
## 1 6 (2) [1; 19] 23 (7) [2; 58]
## 2 7 (2) [1; 18] 29 (9) [3; 72]
## 3 9 (3) [1; 20] 50 (14) [8; 108]
## 4 9 (3) [2; 22] 61 (17) [10; 125]
## 5 10 (3) [2; 22] 74 (19) [14; 152]
## 6 11 (3) [2; 27] 92 (23) [21; 188]
Analysis steps in short:
Consider running this in parallel in the cloud - otherwise, this may take >24h to run.
res_uncertain <- data.frame(load_object("./output/res_uncertain.RData")) # couldnt save as rds
for(i in c(1,3:ncol(res_uncertain))){
res_uncertain[,i] <- as.numeric(res_uncertain[,i])
}
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
# because of the stochastic ratio target, some results fail with NA outcomes if ratio = 0 or Inf
res_uncertain <- res_uncertain[complete.cases(res_uncertain),]
names(res_uncertain) <- c(
"run","method","inmb","inmb_error","inmb_error_abs","pareto_diff",
"pS1_S2_diff", "RRS1_D_diff", "RRS2_D_diff",
"target_set","pareto_n","n_study_size","true_inmb"
)
res_uncertain <- res_uncertain[res_uncertain$method %in% c("pareto", "sum_percent"),]
dim(res_uncertain)
## [1] 799874 13
head(res_uncertain)
## run method inmb inmb_error inmb_error_abs pareto_diff pS1_S2_diff
## 1 1 pareto -62067.38 -19794.930 20056.560 0.00000 0.213622600
## 3 1 sum_percent -83189.63 -40917.180 40917.180 20860.61100 0.246363700
## 4 1 pareto -43562.41 -1289.951 4307.477 0.00000 -0.026622775
## 6 1 sum_percent -44881.45 -2608.998 4344.525 37.04723 -0.008143567
## 7 1 pareto -35032.14 7240.313 7503.171 0.00000 -0.032768660
## 9 1 sum_percent -36243.56 6028.899 6028.899 -1474.27200 -0.034196920
## RRS1_D_diff RRS2_D_diff target_set pareto_n n_study_size true_inmb
## 1 0.7626147 2.7900669 1 1303 1684 -42272.46
## 3 0.7401651 -2.8526428 1 1303 1684 -42272.46
## 4 0.5599623 -2.7415790 2 105 1684 -42272.46
## 6 0.4616303 -1.6668570 2 105 1684 -42272.46
## 7 0.6783854 0.2797807 3 13 1684 -42272.46
## 9 0.8272466 -0.5224098 3 13 1684 -42272.46
# relative performance of pareto vs sum
pareto_diff_abs <- res_uncertain$inmb_error_abs[res_uncertain$method == "pareto"] - res_uncertain$inmb_error_abs[res_uncertain$method == "sum_percent"]
pareto_params_diff_abs <- abs(res_uncertain[res_uncertain$method == "pareto", c("pS1_S2_diff", "RRS1_D_diff", "RRS2_D_diff")]) - abs(res_uncertain[res_uncertain$method == "sum_percent", c("pS1_S2_diff", "RRS1_D_diff", "RRS2_D_diff")])
names(pareto_params_diff_abs)<-paste0("pareto_", names(pareto_params_diff_abs))
overall_diff_df <- res_uncertain[res_uncertain$method == "pareto",]
overall_diff_df$pareto_diff_abs <- pareto_diff_abs
overall_diff_df <- cbind(overall_diff_df,pareto_params_diff_abs)
# incr NMB --------
aggregate(inmb_error_abs ~ method + target_set, res_uncertain, aggFormat, show_range = T)
## method target_set inmb_error_abs.mean (SD) inmb_error_abs.range
## 1 pareto 1 21,095 (8,097) [1,169; 83,336]
## 2 sum_percent 1 28,185 (14,337) [2,471; 85,421]
## 3 pareto 2 7,403 (4,151) [530; 50,891]
## 4 sum_percent 2 5,368 (4,104) [709; 50,717]
## 5 pareto 3 11,600 (7,032) [45; 74,814]
## 6 sum_percent 3 7,505 (6,381) [1,086; 76,271]
## 7 pareto 4 12,461 (5,444) [819; 61,837]
## 8 sum_percent 4 3,905 (2,859) [760; 45,646]
aggregate(pareto_diff_abs ~ target_set, overall_diff_df, aggFormat, show_range = T)
## target_set pareto_diff_abs.mean (SD) pareto_diff_abs.range
## 1 1 -7,091 (14,534) [-50,094; 73,348]
## 2 2 2,036 (2,854) [-23,531; 36,500]
## 3 3 4,095 (5,070) [-17,465; 51,774]
## 4 4 8,556 (5,514) [-17,590; 55,056]
# # true values
# mean(res_t1$true_inmb); sd(res_t1$true_inmb)
# params -----
aggregate(cbind(pS1_S2_diff,RRS1_D_diff,RRS2_D_diff) ~ method + target_set, res_uncertain, aggFormat,digits = 3)
## method target_set pS1_S2_diff RRS1_D_diff RRS2_D_diff
## 1 pareto 1 0.114 (0.118) 0.813 (0.524) 3.659 (2.922)
## 2 sum_percent 1 0.169 (0.127) 0.778 (0.199) 0.724 (3.566)
## 3 pareto 2 0.024 (0.133) 0.675 (0.637) 3.574 (4.717)
## 4 sum_percent 2 0.022 (0.136) 0.681 (0.494) 3.574 (4.425)
## 5 pareto 3 0.024 (0.126) 0.733 (0.531) 3.536 (4.501)
## 6 sum_percent 3 0.024 (0.137) 0.710 (0.433) 3.640 (5.226)
## 7 pareto 4 0.049 (0.130) 0.308 (0.486) 3.354 (2.781)
## 8 sum_percent 4 0.026 (0.137) 0.570 (0.436) 3.803 (5.022)
aggregate(cbind(pareto_pS1_S2_diff,pareto_RRS1_D_diff,pareto_RRS2_D_diff) ~ method + target_set, overall_diff_df, aggFormat,digits = 3)
## method target_set pareto_pS1_S2_diff pareto_RRS1_D_diff pareto_RRS2_D_diff
## 1 pareto 1 -0.045 (0.054) 0.096 (0.396) 1.151 (2.815)
## 2 pareto 2 -0.002 (0.031) 0.050 (0.236) 0.171 (1.888)
## 3 pareto 3 -0.010 (0.022) 0.036 (0.345) -0.587 (1.538)
## 4 pareto 4 0.002 (0.044) -0.159 (0.404) -1.401 (2.625)
# PLOT
sample_df <- overall_diff_df[sample(1:nrow(overall_diff_df),100000), ] # easier to draw
ggplot(sample_df) +
geom_point(aes(x=n_study_size, y= pareto_diff_abs), size = 0.1, alpha =0.3) +
geom_smooth(aes(x=n_study_size, y= pareto_diff_abs), se = T) +
geom_hline(yintercept = 0, col = "red", linetype = "dashed") +
facet_wrap(~target_set) +
xlab("Size of micro simulation used to generate target set") +
ylab("Abs error iNMB compared to distance-based score") +
theme_minimal()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
fin.