############################################################################### # Title: Optimisation of a Possion survival model in R # Date: 2020-07-22 # # Author: Joshua P. Entrop # Website: joshua-entrop.com ############################################################################### # 1. Prefix ------------------------------------------------------------------- # Remove all files from ls rm(list = ls()) # Loading packages require(survival) require(flexsurv) require(optimx) require(numDeriv) require(dplyr) require(tibble) require(car) # 2. Loading dataset ---------------------------------------------------------- #Reading the example data set lung from the survival package lung <- as.data.frame(survival::lung) #Recode dichotomous vairables lung$female <- ifelse(lung$sex == 2, 1, 0) lung$status_n <- ifelse(lung$status == 2, 1, 0) # 3. Define log-likelihood function for Poisson regression model -------------- negll <- function(par){ #Extract guesses for alpha, beta1 and beta2 alpha <- par[1] beta1 <- par[2] beta2 <- par[3] #Define dependent and independent variables t <- lung$time d <- lung$status_n x1 <- lung$female x2 <- lung$age #Calculate lambda lambda <- exp(alpha + beta1 * x1 + beta2 * x2) #Estimate negetive log likelihood value val <- -sum(d * log(lambda) - lambda * t) return(val) } # 4. Define gradient function for Poisson regression model ------------------- negll.grad <- function(par){ #Extract guesses for alpha and beta1 alpha <- par[1] beta1 <- par[2] beta2 <- par[3] #Define dependent and independent variables t <- lung$time d <- lung$status_n x1 <- lung$female x2 <- lung$age #Create output vector n <- length(par[1]) gg <- as.vector(rep(0, n)) #Calculate pi and xb lambda <- exp(alpha + beta1 * x1 + beta2 * x2) #Calculate gradients for alpha and beta1 gg[1] <- -sum(d - lambda * t) gg[2] <- -sum(d * x1 - lambda * x1 * t) gg[3] <- -sum(d * x2 - lambda * x2 * t) return(gg) } # 4.1 Compare gradient function with numeric approximation of gradient ======== # compare gradient at 0, 0, 0 mygrad <- negll.grad(c(0, 0, 0)) numgrad <- grad(x = c(0, 0, 0), func = negll) all.equal(mygrad, numgrad) # 5. Find maximum of log-likelihood function ---------------------------------- # Passing names to the values in the par vector improves readability of results opt <- optimx(par = c(alpha = 0, beta_female = 0, beta_age = 0), fn = negll, gr = negll.grad, hessian = TRUE, control = list(trace = 0, all.methods = TRUE)) # Show results for optimisation alogrithms, that convergered (convcode == 0) summary(opt, order = "value") %>% rownames_to_column("algorithm") %>% filter(convcode == 0) %>% select(algorithm, alpha, beta_female, beta_age, value) # 6. Estimate regression coeficents using flexsurvreg ------------------------- pois_model <- flexsurvreg(Surv(time, status_n == 1) ~ female + age, data = lung, dist = "exp") # 7. Comparing results from optimx and flexsurvreg ---------------------------- pois_results <- unname(coef(pois_model)) coef_opt <- coef(opt) lapply(1:nrow(coef_opt), function(i){ opt_name <- attributes(coef_opt)$dimnames[[1]][i] mle_pois1 <- (coef_opt[i, 1] - pois_results[1]) mle_pois2 <- (coef_opt[i, 2] - pois_results[2]) mle_pois3 <- (coef_opt[i, 3] - pois_results[3]) mean_dif <- mean(mle_pois1, mle_pois2, mle_pois3, na.rm = TRUE) data.frame(opt_name, mean_dif) }) %>% bind_rows() %>% filter(!is.na(mean_dif)) %>% mutate(mean_dif = abs(mean_dif)) %>% arrange(mean_dif) # 8. Estimate the standard error ---------------------------------------------- #Extract hessian matrix for nlminb optimisation hessian_m <- attributes(opt)$details["nlminb", "nhatend"][[1]] # Estimate SE based on hession matrix fisher_info <- solve(hessian_m) prop_se <- sqrt(diag(fisher_info)) # Compare the estimated SE from our model with the one from the flexsurv model # Note use res.t to get the estimates on the reale scale without transformaiton ses <- data.frame(se_nlminb = prop_se, se_felxsurvreg = pois_model$res.t[, "se"]) %>% print() all.equal(ses[, "se_nlminb"], ses[, "se_felxsurvreg"]) # 9. Estimate 95%CIs using estimation of SE ----------------------------------- # Extracting estimates from nlminb optimisaiton coef_test <- coef(opt)["nlminb",] # Compute 95%CIs upper <- coef_test + 1.96 * prop_se lower <- coef_test - 1.96 * prop_se # Print 95%CIs data.frame(Estimate = coef_test, CI_lower = lower, CI_upper = upper, se = prop_se) # 10. Plot survival curve with 95%-CI ----------------------------------------- # 10.1 Use Delta Method to compute CIs across time of follow-up =============== # Get coefficents for nlminb optimisation nlminb_coef <- coef(opt)["nlminb", ] # Compute CIs for a 60 year old female across follow-up time surv_optim_female <- lapply(as.list(seq(0.01, 1000.01, 10)), function(t){ g <- paste("exp(-exp(alpha + beta_female + 60 * beta_age) *", t, ")") fit <- deltaMethod(nlminb_coef, g, solve(hessian_m)) data.frame(time = t, estimate = fit[, "Estimate"], ci_low = fit[, "2.5 %"], ci_up = fit[, "97.5 %"]) }) %>% bind_rows() # 10.2 Plot survival curve with CIs =========================================== plot(surv_optim_female$time, surv_optim_female$estimate, ylim = c(0, 1), type = "n", xlab = "Time in Days", ylab = "S(t)", main = "Survival after lung cancer \n for 60 year old females") polygon(c(surv_optim_female$time, rev(surv_optim_female$time)), c(surv_optim_female$ci_low, rev(surv_optim_female$ci_up)), border = NA, col = "grey") lines(surv_optim_female$time, surv_optim_female$estimate) legend(0, 0.15, fill = "grey", "95% CI") #'///////////////////////////////////////////////////////////////////////////// #'END OF R-FILE