################################################################################### ################ NEW DATA SIMULATION WITH CONTINUOUS OUTCOME ######################## ################################################################################### library('mvtnorm') library('LaplacesDemon') Datensimulation_stetig_out = function(k, n, m, z, rho, beta0, nicht_negativ){ #k: Number of data set / simulations #n: Number of observations #m: Number of variables #z: Anzahl zus. Variablen #rho: correlation #beta0: Intercept #nicht_negativ: intercept, so that the normal distribution is in the positive range. data = list() prop = matrix(NA, nrow = n, ncol = k) treat = matrix(NA, nrow = n, ncol = k) #Simulation of the outcome mean = rep(0, z) data_outcome = list() prop_outcome = matrix(NA, nrow = n, ncol = k) outcome = matrix(NA, nrow = n, ncol = k) sigma = diag(1,m,m) sigma[sigma==0] = rho #0.5, ... #Correlation matrix, variables are correlated in pairs with korr = rho for (i in 1:k) { data[[i]] = rmvnorm(n, sigma = sigma) #Calculation of propensity scores for the simulated data for (j in 1:n) { prop[j,i] = exp(beta0 + 0.1*data[[i]][j,1] + 0.2*data[[i]][j,2] + 0.3*data[[i]][j,3] + 0.4*data[[i]][j,4] + 0.5*data[[i]][j,5] + 0.6*data[[i]][j,6]) / (1 + exp(beta0 + 0.1*data[[i]][j,1] + 0.2*data[[i]][j,2] + 0.3*data[[i]][j,3] + 0.4*data[[i]][j,4] + 0.5*data[[i]][j,5] + 0.6*data[[i]][j,6])) treat[j,i] = rbinom(1,1,prop[j,i]) } #additional variables for the outcome (they are unknown in a study, but influence the outcome anyway) #Simulate data with multinormal distribution data_outcome[[i]] = rmvnorm(n, sigma = diag(1,3,3))#uncorrelated data data_outcome[[i]] = cbind(data[[i]][,1:3], data_outcome[[i]]) #From the first simulation the variables 1-3 are taken over and 3 new variables (7-9) are additionally simulated. #simulate continuous outcome for (j in 1:n) { prop_outcome[j,i] = exp(beta0*treat[j,i] + 0.4*data_outcome[[i]][j,1] + 0.1*data_outcome[[i]][j,2] + 0.5*data_outcome[[i]][j,3] + 0.3*data_outcome[[i]][j,4] + 0.2*data_outcome[[i]][j,5] + 0.6*data_outcome[[i]][j,6]) / (1 + exp(beta0*treat[j,i] + 0.4*data_outcome[[i]][j,1] + 0.1*data_outcome[[i]][j,2] + 0.5*data_outcome[[i]][j,3] + 0.3*data_outcome[[i]][j,4] + 0.2*data_outcome[[i]][j,5] + 0.6*data_outcome[[i]][j,6])) out_klein = rnorm(1,mean = prop_outcome[j,i]+beta0*treat[j,i]+nicht_negativ, sd = prop_outcome[j,i]/2) outcome[j,i] = out_klein*5 } } simu = list("data" = data, "prop" = prop,"treat" = treat, "data_outcome" = data_outcome, "outcome" = outcome, "rho" = rho, "beta0" = beta0, "k" = k) return(simu) } ############### ANALYSIS OF SIMULATION WITH A CONSTANT OUTCOME ########################## #Load packages: library(survival) library(MatchIt) library(optmatch) library(twang) library(car) #1. Matching Code: reg.match.stet_out = function(name){ load(name) k= length(simu$data) #Initialization of important parameters zaehler_match = c() odds_ratio_match = matrix(NA, nrow = 5, ncol = k) p_wert_repl = c() for (i in 1:k){ tr = simu$treat[,i] out = simu$out[,i] ps = simu$prop[,i] dat = simu$data[[i]] dat0 = data.frame(cbind(ps, dat, tr, out)) colnames(dat0) = c("prop","x1","x2","x3","x4","x5","x6", "treat", "outcome") #Here different matching possibilities ps.1.repl = matchit(dat0$treat ~ dat0$prop, data = dat0, method = "nearest",replace = T, ratio = 1) m.data.repl = match.data(ps.1.repl, group = "all", distance = "distance", weights = "weights", subclass = "subclass") #OUTCOME ANALYSIS #for nn with replacement model.1.repl = glm(m.data.repl$outcome ~ m.data.repl$treat + m.data.repl$prop, data = m.data.repl, family = "gaussian") p_wert_repl[i] = summary(model.1.repl)$coefficients[2,'Pr(>|t|)'] #odds_ratio_match[2,i] = exp(abs(summary(model.1.repl)$coefficients[2,'Estimate'])) } zaehler_match[1] = sum(p_wert_repl<=0.05) p_wert_tab = cbind(p_wert_repl) #mean_OR_match =rowMeans(odds_ratio_match, na.rm = T) tabelle = data.frame(zaehler_match) row.names(tabelle) = c("Match REPL") colnames(tabelle) = c("Sum sig p-Values" ) p_power = list(tabelle = tabelle, p_wert_tab = p_wert_tab) return(p_power) } #STRATIFICATION reg.strat.stet_out = function(name){ load(name) k =length(simu$data) p_wert_strat10.2 = c() for (i in 1:k){ tr = simu$treat[,i] out = simu$out[,i] ps = simu$prop[,i] dat = simu$data[[i]] dat0 = data.frame(cbind(ps, dat, tr, out)) colnames(dat0) = c("prop","x1","x2","x3","x4","x5","x6", "treat", "outcome") sbcl.10 = matchit(dat0$treat ~ dat0$prop, data = dat0, method = "subclass", subclass = 10) sbcl.data.10 = match.data(sbcl.10, group = "all", distance = "distance", weights = "weights", subclass = "subclass") #deciles breakvals_10 = quantile(ps, probs = seq(0, 1, 0.1), na.rm = T) #assigns to each ps a stratum of 1-10 strat_10 = cut(ps, breaks = breakvals_10, labels=c('1', '2', '3', '4','5','6','7','8','9','10'), include.lowest=TRUE,right = TRUE) dat0 = data.frame(cbind(ps, dat, tr, out)) colnames(dat0) = c("prop","x1","x2","x3","x4","x5","x6", "treat", "outcome") #conditional logistic regression model_strat_10.2 = glm(dat0$outcome ~ dat0$treat + factor(strat_10),data = dat0, family = "gaussian") #p-Values and odds ratio p_wert_strat10.2[i] = summary(model_strat_10.2)$coef[2,'Pr(>|t|)'] #odds_ratio_strat10.2[i] = summary(model_strat_10.2)$coef[1,'exp(coef)'] } summe_strat10.2 = sum(p_wert_strat10.2<=0.05) #Specify odds ratio as the mean of all simulations #odds_ratio_strat10.2 = mean(odds_ratio_strat10.2) tabelle = rbind(summe_strat10.2) colnames(tabelle) = c("Sum sig p-Values" ) row.names(tabelle) = c("Dezile") p_werte = cbind(p_wert_strat10.2) p_power = list(tabelle = tabelle, p_werte = p_werte) return(p_power) } #IPTW reg.iptw.stet_out= function(name){ load(name) k = length(simu$data) p_wert_iptw.stab.trunc = c() p_wert_iptw.stab.trunc2 = c() #odds_ratio_iptw.stab.trunc = c() for(i in 1:k){ tr = simu$treat[,i] out = simu$outcome[,i] dat = simu$data[[i]] ps = simu$prop[,i] dat0 = data.frame(cbind(dat, ps, tr, out)) colnames(dat0) = c("x1","x2","x3","x4","x5","x6","prop", "treat", "outcome") dat0$gewichte_ate = ifelse(tr == 1,1/ps, 1/(1-ps)) dat0$gewichte_ate.stab = ifelse(dat0$treat==1, (mean(ps))/ps, (mean(1-ps))/(1-ps)) # Find 99th percentile of the weights => as a cut off criterion perz99.stab = quantile(dat0$gewichte_ate.stab, probs = 0.99, na.rm = T) perz99.5.stab = quantile(dat0$gewichte_ate.stab, probs = 0.995, na.rm = T) dat0$gewichte.stab.trunc = dat0$gewichte_ate.stab dat0$gewichte.stab.trunc[dat0$gewichte_ate.stab > perz99.stab] = perz99.stab glm.IPTW.ATE.stab.trunc = glm(dat0$outcome ~ dat0$treat + dat0$prop, data= dat0, weights = dat0$gewichte_ate.stab.trunc, family = "gaussian") #glm.IPTW.ATE.stab.trunc2 = glm(dat0$outcome ~ dat0$treat +dat0$x1 + dat0$x2 + dat0$x3 + # dat0$x4+ dat0$x5 + dat0$x6 , data= dat0, weights = dat0$gewichte_ate.stab.trunc, family = "gaussian") p_wert_iptw.stab.trunc[i] = summary(glm.IPTW.ATE.stab.trunc)$coefficients[2,'Pr(>|t|)'] #p_wert_iptw.stab.trunc2[i] = summary(glm.IPTW.ATE.stab.trunc2)$coefficients[2,'Pr(>|t|)'] # odds_ratio_iptw.stab.trunc[i] = exp(abs(summary(glm.IPTW.ATE.stab.trunc)$coefficients[2,'Estimate'])) } sum_p_werte.st.tr = sum(p_wert_iptw.stab.trunc<=0.05) #sum_p_werte.st.tr2 = sum(p_wert_iptw.stab.trunc2<=0.05) #mean_OR_iptw.st.tr = mean(odds_ratio_iptw.stab.trunc) p_werte = cbind(p_wert_iptw.stab.trunc) #tabelle = data.frame(rbind(cbind(sum_p_werte, sum_p_werte2),cbind(sum_p_werte.st, sum_p_werte.st2), cbind(sum_p_werte.st.tr,sum_p_werte.st.tr2))) tabelle = data.frame(sum_p_werte.st.tr) # colnames(tabelle) = c("Summe sig p-Werte ohne Kov.", "Summe sig p-Werte mit Kov." ) names(tabelle) = c("IPTW stab trunc") p_power = list(tabelle = tabelle, p_werte = p_werte) return(p_power) } #4. Adjustment reg.adjust.stet_out = function(name){ load(name) k=length(simu$data) #Set name p_wert_adjust = c() #odds_ratio_adjust = c() for(i in 1:k){ tr = simu$treat[,i] out = simu$outcome[,i] dat = simu$data[[i]] ps = simu$prop[,i] dat0 = data.frame(cbind(ps, dat, tr, out)) colnames(dat0) = c("prop","x1","x2","x3","x4","x5","x6", "treat", "outcome") reg = glm(dat0$outcome ~ dat0$treat + dat0$prop, data = dat0, family = "gaussian") p_wert_adjust[i] = summary(reg)$coefficients[2,"Pr(>|t|)"] # odds_ratio_adjust[i] = exp(summary(logreg)$coefficients[2,"Estimate"]) } sig_p_wert = sum(p_wert_adjust<=0.05) #OR_mean_adjust = mean(odds_ratio_adjust, na.rm = T) tabelle = data.frame(sig_p_wert) row.names(tabelle) = c("adjust") colnames(tabelle) = c("Sum sig p-Values" ) p_out = list(tabelle = tabelle, p_wert_adjust = p_wert_adjust) return(p_out) }