# --- cross validation for fixed tau_1 and tau_2 --- # --- Aug. 16, 2005 --- tau1<-1.0; tau2<-1.0; step<-5000; increase<-0.005; # -------------------------------- z.dat<-t(read.table("norm-express.dat")); outcome.dat<-read.table("survival.dat", header=T); cluster<-read.table("cluster-info.txt", header=T); samplesize<-nrow(z.dat); num.cov<-ncol(z.dat); z<-matrix(rep(0, samplesize*num.cov), nrow=samplesize); for(i in 1:samplesize){ for(j in 1:num.cov) z[i, j]<-z.dat[i, j]; } observe<-outcome.dat$FollowUp; indicator<-outcome.dat$Status; # ----- Cross validation ------ V<-5; cv.score<-rep(0, step); delete.size<-as.integer(samplesize/V); complete.data<-data.frame(cbind(observe, indicator, z)); for(v in 1:V){ tstart <- Sys.time() cat("data preparation start time: ") print(tstart) ### --- cut data --- cross.data<- complete.data[-((delete.size*(v-1)+1):(delete.size*v)), ]; cross.size<-nrow(cross.data); cross.index<-sort(cross.data[ ,1], index.return=T)$ix; cross.time<-rep(0, cross.size); cross.indicator<-rep(0, cross.size); cross.z<-matrix(rep(0, cross.size*num.cov), nrow=cross.size); for(i in 1:cross.size){ cross.time[i]<-cross.data[cross.index[i], 1]; cross.indicator[i]<-cross.data[cross.index[i], 2]; for(j in 1:num.cov) cross.z[i, j]<-cross.data[cross.index[i], (j+2)]; } evaluate.data<- complete.data[((delete.size*(v-1)+1):(delete.size*v)), ]; evaluate.size<-nrow(evaluate.data); evaluate.index<-sort(evaluate.data[ ,1], index.return=T)$ix; evaluate.time<-rep(0, evaluate.size); evaluate.indicator<-rep(0, evaluate.size); evaluate.z<-matrix(rep(0, num.cov*evaluate.size), ncol=num.cov); for(i in 1:evaluate.size){ evaluate.time[i]<-evaluate.data[evaluate.index[i], 1]; evaluate.indicator[i]<-evaluate.data[evaluate.index[i], 2]; for(j in 1:num.cov) evaluate.z[i, j]<-evaluate.data[evaluate.index[i], (j+2)]; } # -------------------------------------- # --- now estimation --- # --- ATTN: two "layer" of threshold --- beta.est<-rep(0, num.cov); h.v<-rep(0, num.cov); f.v<-rep(0, num.cov); f.v2<-rep(0, num.cov); g.v<-rep(0, num.cov); g.v2<-rep(0, num.cov); cri.2<-rep(0, num.cov); for(s in 1:step){ # --- first compute beta*com.z --- beta.z<-cross.z%*%beta.est; exp.beta.z<-rep(0, nrow(beta.z)); for(i in 1:length(exp.beta.z)) exp.beta.z[i]<-exp(beta.z[i,1]); # --- compute gradient --- gradient<-rep(0, num.cov); for(i in 1:cross.size){ if(cross.indicator[i]==1){ # --- for event only term1<-cross.z[i, ]; upper<-rep(0, length(term1)); for(j in i:cross.size){ upper<-upper+cross.z[j, ]*exp.beta.z[j]; } term2<-upper/sum(exp.beta.z[i:cross.size]); gradient<-gradient+term1-term2; } } max.gradient<-max(abs(gradient)); write.table(cbind(v, s, max(gradient)),file="check.txt", append=T, quote=F, row.name=F, col.name=F); gradient<-gradient/max.gradient; # --- the following copied from the additive model --- # ---- now the new and tricky part -------- work.gradient.dat<- data.frame(cbind(cluster$cluster, abs(gradient))); cri.cluster<-rep(0, 20); max.grad.cluster<-rep(0, 20); for(clus in 1:20){ work.grad.clus<- work.gradient.dat[work.gradient.dat[ ,1]==clus,]; cri.cluster[clus]<-sum(work.grad.clus[ ,2]); max.grad.cluster[clus]<-max(abs(work.grad.clus[ ,2])); } pseudo.gv<-rep(0, num.cov); for(clus in 1:num.cov){ pseudo.gv[clus]<-cri.cluster[cluster$cluster[clus]]; cri.2[clus]<-max.grad.cluster[cluster$cluster[clus]]*tau2; } for(i in 1:length(g.v)) g.v[i]<-gradient[i]; for(i in 1:length(f.v2)){ if( abs(g.v[i]) >= cri.2[i] ) f.v2[i]<-1; if( abs(g.v[i]) < cri.2[i] ) f.v2[i]<-0; } cri.t<- tau1*max(pseudo.gv); for(i in 1:length(pseudo.gv)){ if(pseudo.gv[i]>=cri.t){ f.v[i]<-1; } if(pseudo.gv[i]< cri.t){ f.v[i]<-0; } } for(i in 1:length(g.v)) h.v[i]<-f.v[i]*f.v2[i]*g.v[i]; beta.est<-beta.est+increase*h.v; # --- end of estimation, prepare evaluation --- log.like<-0; eva.beta.z<-evaluate.z%*%beta.est; exp.eva.betaz<-exp(eva.beta.z); temp<-exp.eva.betaz*evaluate.indicator; for(i in 1:evaluate.size){ term1<-exp.eva.betaz[i]; term2<-sum(temp[i:evaluate.size]); if(abs(term2)< 0.000001) add<- 0; if(abs(term2)> 0.000001) add<-log(term1/term2); log.like<-log.like+add*evaluate.indicator[i]; } cv.score[s]<-cv.score[s]+log.like; } } se<-seq(from=1, to=step, by=1); write.table(cbind(se, cv.score), file="cv.txt", quote=F, append=F, row.name=F, col.name=F);