Skip to contents


We consider a delayed effect scenario where

  • The control group time-to-event distribution is exponential with a median of 15 months.
  • The experimental group has a hazard ratio vs. control of 1 for 6 months and 0.6 thereafter.
  • Enrollment at a constant rate for 12 months.
  • Total study duration from 20 to 48 months.
  • Exponential dropout rate of 0.001 per month.
enrollRates <- tibble(Stratum = "All", duration = 12, rate = 1)
failRates <- tibble(Stratum = "All",
                            duration = c(6, 100),
                            failRate = log(2) / 15,
                            hr = c(1, .6),
                            dropoutRate = 0.001)
enrollRates %>% gt() %>% tab_header(title = "Enrollment Table of Scenario 1")
Enrollment Table of Scenario 1
Stratum duration rate
All 12 1
failRates %>% gt() %>% tab_header(title = "Failure Table of Scenario 1")
Failure Table of Scenario 1
Stratum duration failRate hr dropoutRate
All 6 0.04620981 1.0 0.001
All 100 0.04620981 0.6 0.001

For the above scenarios, we investigate the power, sample size and events under 6 tests:

  • FH05: The Fleming-Harrington with \(\rho=0, \gamma=0.5\) test to obtain power of 85% given 1-sided Type I error of 0.025.
  • FH00: The regular logrank test with \(\rho=0, \gamma=0\) under fixed study duration \(\in\{20, 24, 28, \ldots, 60\}\).
  • mc2_test: The Max Combo test including 2 WLR tests, i.e., \(\{(\rho=0, \gamma=0, \tau = -1), (\rho=0, \gamma=0.5, \tau = -1)\}\).
  • mc2_test: The Max Combo test including 3 WLR tests, i.e., \(\{(\rho=0, \gamma=0, \tau = -1), (\rho=0, \gamma=0.5, \tau = -1), (\rho=0.5, \gamma=0.5, \tau = -1)\}\).
  • mc4_test: The Max Combo test including 4 WLR tests, i.e., \(\{(\rho=0, \gamma=0, \tau = -1), (\rho=0, \gamma=0.5, \tau = -1), (\rho=0.5, \gamma=0.5, \tau = -1), (\rho=0.5, \gamma=0, \tau = -1)\}\).
  • MB6: The Magirr-Burman with \(\rho=-1, \gamma=0, \tau = 6\) test with fixed study duration \(\in\{20, 24, 28, \ldots, 60\}\).

We then compute power for the logrank test. The general summary is that the Fleming-Harrington test has a meaningful power gain relative to logrank regardless of the study durations evaluated.

tab <- NULL

for(trial_duration in seq(24, 60, 4)){
  # Fleming-Harrington rho=0, gamma=0.5 test
  FH05 <- gs_design_wlr(enrollRates = enrollRates, 
                        failRates = failRates,
                        ratio = 1, 
                        alpha = 0.025, beta = 0.15,
                        weight = function(x, arm0, arm1){wlr_weight_fh(x, arm0, arm1, rho = 0, gamma = 0.5)},
                        upar = qnorm(.975),
                        lpar = -Inf,
                        analysisTimes = trial_duration) 
  # regular logrank test
  FH00 <- gs_power_wlr(enrollRates = FH05$enrollRates, 
                       failRates = failRates,
                       ratio = 1, 
                       weight = function(x, arm0, arm1){wlr_weight_fh(x, arm0, arm1, rho = 0, gamma = 0)},
                       upar = qnorm(.975),
                       lpar = -Inf,
                       analysisTimes = trial_duration,
                       events = .1) 
  # max combo test 1
  mc2_test <- data.frame(rho = 0, gamma = c(0, .5), tau = -1,
                         test = 1:2, Analysis = 1, analysisTimes = trial_duration)
  MC2 <- gs_power_combo(enrollRates = FH05$enrollRates, 
                        failRates = failRates, 
                        fh_test = mc2_test,
                        upper = gs_spending_combo,
                   upar  = list(sf = gsDesign::sfLDOF, total_spend = 0.025),
                   lower = gs_spending_combo,
                   lpar  = list(sf = gsDesign::sfLDOF, total_spend = 0.01))
  # max combo test 2
  mc3_test <- data.frame(rho = c(0, 0, .5), gamma = c(0, .5, .5), tau = -1,
                         test = 1:3, Analysis = 1, analysisTimes = trial_duration)
  MC3 <- gs_power_combo(enrollRates = FH05$enrollRates, 
                        failRates = failRates, 
                        fh_test = mc3_test,
                        upper = gs_spending_combo,
                        upar  = list(sf = gsDesign::sfLDOF, total_spend = 0.025),
                        lower = gs_spending_combo,
                        lpar  = list(sf = gsDesign::sfLDOF, total_spend = 0.01))
  # max combo test 
  mc4_test <- data.frame(rho = c(0, 0, .5, .5), gamma = c(0, .5, .5, 0), tau = -1,
                         test = 1:4, Analysis = 1, analysisTimes = trial_duration)
  MC4 <- gs_power_combo(enrollRates = FH05$enrollRates, 
                        failRates = failRates, 
                        fh_test = mc4_test,
                        upper = gs_spending_combo,
                        upar  = list(sf = gsDesign::sfLDOF, total_spend = 0.025),
                        lower = gs_spending_combo,
                        lpar  = list(sf = gsDesign::sfLDOF, total_spend = 0.01))
  # Magirr-Burman rho=-1, gamma=0, tau = 6 test
  MB6 <- gs_power_wlr(enrollRates = FH05$enrollRates, 
                      failRates = failRates,
                      ratio = 1, 
                      weight = function(x, arm0, arm1){wlr_weight_fh(x, arm0, arm1, rho = -1, gamma = 0, tau = 20)},
                      upar = qnorm(.975),
                      lpar = -Inf,
                      analysisTimes = trial_duration,
                      events = .1) 
  tab_new <- tibble(`Study duration` = trial_duration,
                    N = FH05$analysis$N[1],
                    Events = FH05$analysi$Events[1], 
                    `Events/N` = Events/N, 
                    # we use the AHR from regular WLR as the AHR of different max combo test
                    AHR = as.numeric(FH00$analysis$AHR[1]), 
                    `FH(0, 0.5) power` = FH05$bounds$Probability[1],
                    `FH(0, 0) power` = FH00$bounds$Probability[1],
                    `MC2 power` = MC2$bounds$Probability[1],
                    `MC4 power` = MC4$bounds$Probability[1],
                    `MC3 power` = MC3$bounds$Probability[1],
                    `MB6 power` = MB6$bounds$Probability[1])
  tab <- rbind(tab, tab_new)

tab %>% 
  gt() %>%
  fmt_number(columns = c(2, 3), decimals = 1) %>%
  fmt_number(columns = 4, decimals = 2) %>%
  fmt_number(columns = 5, decimals = 4) %>%
  fmt_number(columns = 6:11, decimals = 2)
Study duration N Events Events/N AHR FH(0, 0.5) power FH(0, 0) power MC2 power MC4 power MC3 power MB6 power
24 695.6 349.8 0.50 0.7688 0.85 0.69 0.82 0.81 0.82 0.81
28 521.9 296.1 0.57 0.7473 0.85 0.71 0.82 0.81 0.82 0.82
32 427.6 266.3 0.62 0.7325 0.85 0.72 0.82 0.81 0.82 0.82
36 369.4 247.5 0.67 0.7218 0.85 0.73 0.82 0.81 0.82 0.83
40 330.2 234.6 0.71 0.7138 0.85 0.74 0.82 0.80 0.82 0.83
44 302.3 225.3 0.75 0.7076 0.85 0.74 0.82 0.80 0.82 0.83
48 281.5 218.3 0.78 0.7027 0.85 0.74 0.81 0.80 0.81 0.83
52 265.5 212.8 0.80 0.6987 0.85 0.75 0.81 0.80 0.81 0.83
56 253.2 208.5 0.82 0.6955 0.85 0.75 0.81 0.80 0.81 0.83
60 243.3 205.1 0.84 0.6929 0.85 0.75 0.81 0.80 0.81 0.83

An Alternative Scenario

Now we consider an alternate scenario where the placebo group starts with the same median, but then has a piecewise change to a median of 30 after 16 months and with a hazard ratio of 0.85 during that late period.

enrollRates <- tibble(Stratum = "All", duration = 12, rate = 1)
failRates <- tibble(Stratum = "All",
                    duration = c(6, 10, 100),
                    # in Scenario 1: failRate = log(2) / 15,
                    failRate = log(2) / c(15, 15, 30),
                    # in Scenario 1: hr = c(1, .6)
                    hr = c(1, .6, .85),
                    dropoutRate = 0.001)
enrollRates %>% gt() %>% tab_header(title = "Enrollment Table of Scenario 2")
Enrollment Table of Scenario 2
Stratum duration rate
All 12 1
failRates %>% gt() %>% tab_header(title = "Failure Table of Scenario 2")
Failure Table of Scenario 2
Stratum duration failRate hr dropoutRate
All 6 0.04620981 1.00 0.001
All 10 0.04620981 0.60 0.001
All 100 0.02310491 0.85 0.001
tab <- NULL

for(trial_duration in seq(20, 60, 4)){
  # Fleming-Harrington rho=0, gamma=0.5 test
  FH05 <- gs_design_wlr(enrollRates = enrollRates, 
                        failRates = failRates,
                        ratio = 1,
                        alpha = 0.025, beta = 0.15,
                        weight = function(x, arm0, arm1){wlr_weight_fh(x, arm0, arm1, rho = 0, gamma = 0.5)},
                        upper = gs_b,
                        upar = qnorm(.975),
                        lower = gs_b,
                        lpar = -Inf,
                        analysisTimes = trial_duration) 
  # regular logrank test
  FH00 <- gs_power_wlr(enrollRates = FH05$enrollRates, 
                       failRates = failRates,
                       ratio = 1, 
                       weight = function(x, arm0, arm1){wlr_weight_fh(x, arm0, arm1, rho = 0, gamma = 0)},
                       upper = gs_b,
                       upar = qnorm(.975),
                       lower = gs_b,
                       lpar = -Inf,
                       analysisTimes = trial_duration,
                       events = .1)
  # max combo test 
  mc2_test <- data.frame(rho = 0, gamma = c(0, .5), tau = -1,
                         test = 1:2, Analysis = 1, analysisTimes = trial_duration)
  MC2 <- gs_power_combo(enrollRates = FH05$enrollRates, 
                        failRates = failRates, 
                        fh_test = mc2_test,
                        upper = gs_spending_combo,
                        upar  = list(sf = gsDesign::sfLDOF, total_spend = 0.025),
                        lower = gs_spending_combo,
                        lpar  = list(sf = gsDesign::sfLDOF, total_spend = 0.01))
  # max combo test 
  mc3_test <- data.frame(rho = c(0,0,.5), gamma = c(0, .5, .5), tau = -1,
                         test = 1:3, Analysis = 1, analysisTimes = trial_duration)
  MC3 <- gs_power_combo(enrollRates = FH05$enrollRates, 
                        failRates = failRates, 
                        fh_test = mc3_test,
                        upper = gs_spending_combo,
                        upar  = list(sf = gsDesign::sfLDOF, total_spend = 0.025),
                        lower = gs_spending_combo,
                        lpar  = list(sf = gsDesign::sfLDOF, total_spend = 0.01))
  # max combo test 
  mc4_test <- data.frame(rho = c(0,0,.5,.5), gamma = c(0, .5, .5, 0), tau = -1,
                         test = 1:4, Analysis = 1, analysisTimes = trial_duration)
  MC4 <- gs_power_combo(enrollRates = FH05$enrollRates, 
                        failRates = failRates, fh_test = mc4_test,
                        upper = gs_spending_combo,
                        upar  = list(sf = gsDesign::sfLDOF, total_spend = 0.025),
                        lower = gs_spending_combo,
                        lpar  = list(sf = gsDesign::sfLDOF, total_spend = 0.01))
  # Magirr-Burman rho=-1, gamma=0, tau = 6 test
  MB6 <- gs_power_wlr(enrollRates = FH05$enrollRates, 
                      failRates = failRates,
                      ratio = 1, 
                      weight = function(x, arm0, arm1){wlr_weight_fh(x, arm0, arm1, rho = -1, gamma = 0, tau = 6)},
                      upar = qnorm(.975),
                      lpar = -Inf,
                      analysisTimes = trial_duration,
                      events = .1) 
  tab_new <- tibble(`Study duration` = trial_duration,
                    N = FH05$analysis$N[1],
                    Events = FH05$analysi$Events[1], 
                    `Events/N` = Events/N, 
                    # we use the AHR from regular WLR as the AHR of different max combo test
                    AHR = as.numeric(FH00$analysis$AHR[1]), 
                    `FH(0, 0.5) power` = FH05$bounds$Probability[1],
                    `FH(0, 0) power` = FH00$bounds$Probability[1],
                    `MC2 power` = MC2$bounds$Probability[1],
                    `MC4 power` = MC4$bounds$Probability[1],
                    `MC3 power` = MC3$bounds$Probability[1],
                    `MB6 power` = MB6$bounds$Probability[1])
  tab <- rbind(tab, tab_new)

tab %>% 
  gt() %>%
  fmt_number(columns = c(2, 3), decimals = 1) %>%
  fmt_number(columns = 4, decimals = 2) %>%
  fmt_number(columns = 5, decimals = 4) %>%
  fmt_number(columns = 6:11, decimals = 2)
Study duration N Events Events/N AHR FH(0, 0.5) power FH(0, 0) power MC2 power MC4 power MC3 power MB6 power
20 2,537.2 1,072.0 0.42 0.8623 0.85 0.68 0.82 0.82 0.82 0.75
24 2,230.2 1,082.6 0.49 0.8582 0.85 0.71 0.82 0.82 0.82 0.77
28 2,126.6 1,129.9 0.53 0.8575 0.85 0.73 0.83 0.82 0.83 0.79
32 2,047.5 1,163.3 0.57 0.8568 0.85 0.75 0.83 0.82 0.83 0.80
36 1,979.7 1,191.5 0.60 0.8564 0.85 0.76 0.83 0.83 0.83 0.81
40 1,919.0 1,214.2 0.63 0.8560 0.85 0.77 0.83 0.83 0.84 0.81
44 1,863.6 1,231.6 0.66 0.8556 0.85 0.78 0.83 0.83 0.84 0.82
48 1,817.1 1,247.8 0.69 0.8554 0.85 0.79 0.83 0.83 0.84 0.82
52 1,774.6 1,260.4 0.71 0.8551 0.85 0.79 0.83 0.84 0.84 0.83
56 1,738.4 1,272.2 0.73 0.8551 0.85 0.80 0.84 0.84 0.84 0.83
60 1,704.0 1,280.7 0.75 0.8548 0.85 0.80 0.84 0.84 0.85 0.83