Commit a0e1044f authored by Xuefei Zhang's avatar Xuefei Zhang
Browse files

Upload New File

parent 7f821b70
# author: Xuefei Zhang and Boang Liu
# This script fit TVC model on training dataset and validate it on test dataset
library(survival)
library(pROC)
# read in full set of training data
data0 <- read.csv('./sample_data/TVC_dataframe_all.csv')
data0$PatientID = as.character(data0$PatientID)
# split data0 into 70/30 for training testing, repeat 30 times
ids_all = unique(data0$PatientID)
n_ids = length(ids_all)
set.seed(1000)
train_index_list = vector("list", 30)
for (i in 1:30){
train_id_index = sample(1:n_ids, round(n_ids*0.7), replace = F)
train_index_list[[i]] <- train_id_index
}
data0_test = read.csv('./sample_data/TVC_dataframe_test.csv')
data_outcome_LandK = read.csv('/sample_data/dataOutcome_Kyear_outcome_Lyear_predictor.csv')
data_outcome_LandK = data_outcome_LandK[is.element(data_outcome_LandK$PatientID, unique(data0_test$PatientID)),]
# run
auc_list = rep(0, 30)
set.seed(125)
for (i in 1:30){
print(i)
t1 = proc.time()
data_train <- data0[is.element(data0$PatientID, ids_all[train_index_list[[i]]]),]
data_test <- data0_test[!is.element(data0_test$PatientID, ids_all[train_index_list[[i]]]),]
data_train[,c('DaysToOutcome', 'Outcome')] <- NULL
data_test[,c('DaysToOutcome', 'Outcome')] <- NULL
fit1 <- coxph(Surv(tstart, tstop, OutcomeTVC) ~., data=data_train[,-1])
pred1 <- predict(fit1, data_test, type='expected')
pred1_by_pid <- cbind(data_test, pred1)
pred1_result <- aggregate(exp(-pred1)~PatientID, dat = pred1_by_pid, FUN = prod)
outcome_test <- merge(pred1_result, data_outcome_LandK)
outcome_test = na.omit(outcome_test)
roc1 <- roc(outcome_test[,3], 1 - outcome_test[,2])
auc_list[i] <-roc1$auc
print(auc_list[i])
t2 = proc.time()
print(t2 - t1)
}
mean(auc_list)
sd(auc_list)
summary(auc_list)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment