Power for Delayed Effect Scenarios

library(gt)
library(dplyr)
library(tidyr)
library(tibble)
library(ggplot2)
library(gsDesign)
devtools::load_all()

Overview

We consider a delayed effect scenario where

The control group time-to-event distribution is exponential with a median of 15 months.
The experimental group has a hazard ratio vs. control of 1 for 6 months and 0.6 thereafter.
Enrollment at a constant rate for 12 months.
Total study duration from 20 to 48 months.
Exponential dropout rate of 0.001 per month.

enrollRates <- tibble(Stratum = "All", duration = 12, rate = 1)
failRates <- tibble(Stratum = "All",
                            duration = c(6, 100),
                            failRate = log(2) / 15,
                            hr = c(1, .6),
                            dropoutRate = 0.001)
enrollRates %>% gt() %>% tab_header(title = "Enrollment Table of Scenario 1")

Stratum	duration	rate
Enrollment Table of Scenario 1
All	12	1

failRates %>% gt() %>% tab_header(title = "Failure Table of Scenario 1")

Stratum	duration	failRate	hr	dropoutRate
Failure Table of Scenario 1
All	6	0.04620981	1.0	0.001
All	100	0.04620981	0.6	0.001

For the above scenarios, we investigate the power, sample size and events under 6 tests:

FH05: The Fleming-Harrington with \(\rho=0, \gamma=0.5\) test to obtain power of 85% given 1-sided Type I error of 0.025.
FH00: The regular logrank test with \(\rho=0, \gamma=0\) under fixed study duration \(\in\{20, 24, 28, \ldots, 60\}\).
mc2_test: The Max Combo test including 2 WLR tests, i.e., \(\{(\rho=0, \gamma=0, \tau = -1), (\rho=0, \gamma=0.5, \tau = -1)\}\).
mc2_test: The Max Combo test including 3 WLR tests, i.e., \(\{(\rho=0, \gamma=0, \tau = -1), (\rho=0, \gamma=0.5, \tau = -1), (\rho=0.5, \gamma=0.5, \tau = -1)\}\).
mc4_test: The Max Combo test including 4 WLR tests, i.e., \(\{(\rho=0, \gamma=0, \tau = -1), (\rho=0, \gamma=0.5, \tau = -1), (\rho=0.5, \gamma=0.5, \tau = -1), (\rho=0.5, \gamma=0, \tau = -1)\}\).
MB6: The Magirr-Burman with \(\rho=-1, \gamma=0, \tau = 6\) test with fixed study duration \(\in\{20, 24, 28, \ldots, 60\}\).

We then compute power for the logrank test. The general summary is that the Fleming-Harrington test has a meaningful power gain relative to logrank regardless of the study durations evaluated.

tab <- NULL

for(trial_duration in seq(24, 60, 4)){
  
  # Fleming-Harrington rho=0, gamma=0.5 test
  FH05 <- gs_design_wlr(enrollRates = enrollRates, 
                        failRates = failRates,
                        ratio = 1, 
                        alpha = 0.025, beta = 0.15,
                        weight = function(x, arm0, arm1){wlr_weight_fh(x, arm0, arm1, rho = 0, gamma = 0.5)},
                        upar = qnorm(.975),
                        lpar = -Inf,
                        analysisTimes = trial_duration) 
  
  # regular logrank test
  FH00 <- gs_power_wlr(enrollRates = FH05$enrollRates, 
                       failRates = failRates,
                       ratio = 1, 
                       weight = function(x, arm0, arm1){wlr_weight_fh(x, arm0, arm1, rho = 0, gamma = 0)},
                       upar = qnorm(.975),
                       lpar = -Inf,
                       analysisTimes = trial_duration,
                       events = .1) 
  
  # max combo test 1
  mc2_test <- data.frame(rho = 0, gamma = c(0, .5), tau = -1,
                         test = 1:2, Analysis = 1, analysisTimes = trial_duration)
  
  MC2 <- gs_power_combo(enrollRates = FH05$enrollRates, 
                        failRates = failRates, 
                        fh_test = mc2_test,
                        upper = gs_spending_combo,
                   upar  = list(sf = gsDesign::sfLDOF, total_spend = 0.025),
                   lower = gs_spending_combo,
                   lpar  = list(sf = gsDesign::sfLDOF, total_spend = 0.01))
  
  # max combo test 2
  mc3_test <- data.frame(rho = c(0, 0, .5), gamma = c(0, .5, .5), tau = -1,
                         test = 1:3, Analysis = 1, analysisTimes = trial_duration)
  
  MC3 <- gs_power_combo(enrollRates = FH05$enrollRates, 
                        failRates = failRates, 
                        fh_test = mc3_test,
                        upper = gs_spending_combo,
                        upar  = list(sf = gsDesign::sfLDOF, total_spend = 0.025),
                        lower = gs_spending_combo,
                        lpar  = list(sf = gsDesign::sfLDOF, total_spend = 0.01))
  
  # max combo test 
  mc4_test <- data.frame(rho = c(0, 0, .5, .5), gamma = c(0, .5, .5, 0), tau = -1,
                         test = 1:4, Analysis = 1, analysisTimes = trial_duration)
  
  MC4 <- gs_power_combo(enrollRates = FH05$enrollRates, 
                        failRates = failRates, 
                        fh_test = mc4_test,
                        upper = gs_spending_combo,
                        upar  = list(sf = gsDesign::sfLDOF, total_spend = 0.025),
                        lower = gs_spending_combo,
                        lpar  = list(sf = gsDesign::sfLDOF, total_spend = 0.01))
  
  # Magirr-Burman rho=-1, gamma=0, tau = 6 test
  MB6 <- gs_power_wlr(enrollRates = FH05$enrollRates, 
                      failRates = failRates,
                      ratio = 1, 
                      weight = function(x, arm0, arm1){wlr_weight_fh(x, arm0, arm1, rho = -1, gamma = 0, tau = 20)},
                      upar = qnorm(.975),
                      lpar = -Inf,
                      analysisTimes = trial_duration,
                      events = .1) 
  
  tab_new <- tibble(`Study duration` = trial_duration,
                    N = FH05$analysis$N[1],
                    Events = FH05$analysi$Events[1], 
                    `Events/N` = Events/N, 
                    # we use the AHR from regular WLR as the AHR of different max combo test
                    AHR = as.numeric(FH00$analysis$AHR[1]), 
                    `FH(0, 0.5) power` = FH05$bounds$Probability[1],
                    `FH(0, 0) power` = FH00$bounds$Probability[1],
                    `MC2 power` = MC2$bounds$Probability[1],
                    `MC4 power` = MC4$bounds$Probability[1],
                    `MC3 power` = MC3$bounds$Probability[1],
                    `MB6 power` = MB6$bounds$Probability[1])
  tab <- rbind(tab, tab_new)
}

tab %>% 
  gt() %>%
  fmt_number(columns = c(2, 3), decimals = 1) %>%
  fmt_number(columns = 4, decimals = 2) %>%
  fmt_number(columns = 5, decimals = 4) %>%
  fmt_number(columns = 6:11, decimals = 2)

Study duration	N	Events	Events/N	AHR	FH(0, 0.5) power	FH(0, 0) power	MC2 power	MC4 power	MC3 power	MB6 power
24	695.6	349.8	0.50	0.7688	0.85	0.69	0.82	0.81	0.82	0.81
28	521.9	296.1	0.57	0.7473	0.85	0.71	0.82	0.81	0.82	0.82
32	427.6	266.3	0.62	0.7325	0.85	0.72	0.82	0.81	0.82	0.82
36	369.4	247.5	0.67	0.7218	0.85	0.73	0.82	0.81	0.82	0.83
40	330.2	234.6	0.71	0.7138	0.85	0.74	0.82	0.80	0.82	0.83
44	302.3	225.3	0.75	0.7076	0.85	0.74	0.82	0.80	0.82	0.83
48	281.5	218.3	0.78	0.7027	0.85	0.74	0.81	0.80	0.81	0.83
52	265.5	212.8	0.80	0.6987	0.85	0.75	0.81	0.80	0.81	0.83
56	253.2	208.5	0.82	0.6955	0.85	0.75	0.81	0.80	0.81	0.83
60	243.3	205.1	0.84	0.6929	0.85	0.75	0.81	0.80	0.81	0.83

An Alternative Scenario

Now we consider an alternate scenario where the placebo group starts with the same median, but then has a piecewise change to a median of 30 after 16 months and with a hazard ratio of 0.85 during that late period.

enrollRates <- tibble(Stratum = "All", duration = 12, rate = 1)
failRates <- tibble(Stratum = "All",
                    duration = c(6, 10, 100),
                    # in Scenario 1: failRate = log(2) / 15,
                    failRate = log(2) / c(15, 15, 30),
                    # in Scenario 1: hr = c(1, .6)
                    hr = c(1, .6, .85),
                    dropoutRate = 0.001)
enrollRates %>% gt() %>% tab_header(title = "Enrollment Table of Scenario 2")

Stratum	duration	rate
Enrollment Table of Scenario 2
All	12	1

failRates %>% gt() %>% tab_header(title = "Failure Table of Scenario 2")

Stratum	duration	failRate	hr	dropoutRate
Failure Table of Scenario 2
All	6	0.04620981	1.00	0.001
All	10	0.04620981	0.60	0.001
All	100	0.02310491	0.85	0.001

tab <- NULL

for(trial_duration in seq(20, 60, 4)){
  # Fleming-Harrington rho=0, gamma=0.5 test
  FH05 <- gs_design_wlr(enrollRates = enrollRates, 
                        failRates = failRates,
                        ratio = 1,
                        alpha = 0.025, beta = 0.15,
                        weight = function(x, arm0, arm1){wlr_weight_fh(x, arm0, arm1, rho = 0, gamma = 0.5)},
                        upper = gs_b,
                        upar = qnorm(.975),
                        lower = gs_b,
                        lpar = -Inf,
                        analysisTimes = trial_duration) 
  
  # regular logrank test
  FH00 <- gs_power_wlr(enrollRates = FH05$enrollRates, 
                       failRates = failRates,
                       ratio = 1, 
                       weight = function(x, arm0, arm1){wlr_weight_fh(x, arm0, arm1, rho = 0, gamma = 0)},
                       upper = gs_b,
                       upar = qnorm(.975),
                       lower = gs_b,
                       lpar = -Inf,
                       analysisTimes = trial_duration,
                       events = .1)
  
  # max combo test 
  mc2_test <- data.frame(rho = 0, gamma = c(0, .5), tau = -1,
                         test = 1:2, Analysis = 1, analysisTimes = trial_duration)
  MC2 <- gs_power_combo(enrollRates = FH05$enrollRates, 
                        failRates = failRates, 
                        fh_test = mc2_test,
                        upper = gs_spending_combo,
                        upar  = list(sf = gsDesign::sfLDOF, total_spend = 0.025),
                        lower = gs_spending_combo,
                        lpar  = list(sf = gsDesign::sfLDOF, total_spend = 0.01))
  
  # max combo test 
  mc3_test <- data.frame(rho = c(0,0,.5), gamma = c(0, .5, .5), tau = -1,
                         test = 1:3, Analysis = 1, analysisTimes = trial_duration)
  
  MC3 <- gs_power_combo(enrollRates = FH05$enrollRates, 
                        failRates = failRates, 
                        fh_test = mc3_test,
                        upper = gs_spending_combo,
                        upar  = list(sf = gsDesign::sfLDOF, total_spend = 0.025),
                        lower = gs_spending_combo,
                        lpar  = list(sf = gsDesign::sfLDOF, total_spend = 0.01))
  
  # max combo test 
  mc4_test <- data.frame(rho = c(0,0,.5,.5), gamma = c(0, .5, .5, 0), tau = -1,
                         test = 1:4, Analysis = 1, analysisTimes = trial_duration)
  
  MC4 <- gs_power_combo(enrollRates = FH05$enrollRates, 
                        failRates = failRates, fh_test = mc4_test,
                        upper = gs_spending_combo,
                        upar  = list(sf = gsDesign::sfLDOF, total_spend = 0.025),
                        lower = gs_spending_combo,
                        lpar  = list(sf = gsDesign::sfLDOF, total_spend = 0.01))
  
  # Magirr-Burman rho=-1, gamma=0, tau = 6 test
  MB6 <- gs_power_wlr(enrollRates = FH05$enrollRates, 
                      failRates = failRates,
                      ratio = 1, 
                      weight = function(x, arm0, arm1){wlr_weight_fh(x, arm0, arm1, rho = -1, gamma = 0, tau = 6)},
                      upar = qnorm(.975),
                      lpar = -Inf,
                      analysisTimes = trial_duration,
                      events = .1) 
  
  tab_new <- tibble(`Study duration` = trial_duration,
                    N = FH05$analysis$N[1],
                    Events = FH05$analysi$Events[1], 
                    `Events/N` = Events/N, 
                    # we use the AHR from regular WLR as the AHR of different max combo test
                    AHR = as.numeric(FH00$analysis$AHR[1]), 
                    `FH(0, 0.5) power` = FH05$bounds$Probability[1],
                    `FH(0, 0) power` = FH00$bounds$Probability[1],
                    `MC2 power` = MC2$bounds$Probability[1],
                    `MC4 power` = MC4$bounds$Probability[1],
                    `MC3 power` = MC3$bounds$Probability[1],
                    `MB6 power` = MB6$bounds$Probability[1])
  
  tab <- rbind(tab, tab_new)
}

tab %>% 
  gt() %>%
  fmt_number(columns = c(2, 3), decimals = 1) %>%
  fmt_number(columns = 4, decimals = 2) %>%
  fmt_number(columns = 5, decimals = 4) %>%
  fmt_number(columns = 6:11, decimals = 2)

Study duration	N	Events	Events/N	AHR	FH(0, 0.5) power	FH(0, 0) power	MC2 power	MC4 power	MC3 power	MB6 power
20	2,537.2	1,072.0	0.42	0.8623	0.85	0.68	0.82	0.82	0.82	0.75
24	2,230.2	1,082.6	0.49	0.8582	0.85	0.71	0.82	0.82	0.82	0.77
28	2,126.6	1,129.9	0.53	0.8575	0.85	0.73	0.83	0.82	0.83	0.79
32	2,047.5	1,163.3	0.57	0.8568	0.85	0.75	0.83	0.82	0.83	0.80
36	1,979.7	1,191.5	0.60	0.8564	0.85	0.76	0.83	0.83	0.83	0.81
40	1,919.0	1,214.2	0.63	0.8560	0.85	0.77	0.83	0.83	0.84	0.81
44	1,863.6	1,231.6	0.66	0.8556	0.85	0.78	0.83	0.83	0.84	0.82
48	1,817.1	1,247.8	0.69	0.8554	0.85	0.79	0.83	0.83	0.84	0.82
52	1,774.6	1,260.4	0.71	0.8551	0.85	0.79	0.83	0.84	0.84	0.83
56	1,738.4	1,272.2	0.73	0.8551	0.85	0.80	0.84	0.84	0.84	0.83
60	1,704.0	1,280.7	0.75	0.8548	0.85	0.80	0.84	0.84	0.85	0.83

07 October, 2022

Overview

An Alternative Scenario