Wdataclean2 <- function (z, d, wt = rep(1,length(z)) ) { # version 1, 2001-11. Mai Zhou niceorder<-order(z,-d) sortedz<-z[niceorder] sortedd<-d[niceorder] sortedw<-wt[niceorder] n <- length(sortedd) y1 <- sortedz[-1] != sortedz[-n] y2 <- sortedd[-1] != sortedd[-n] y <- y1 | y2 ind <- c(which(y | is.na(y)), n) csumw <- cumsum(sortedw) list( value = sortedz[ind], dd = sortedd[ind], weight = diff(c(0, csumw[ind])) ) } DnR <- function(x, d, w) { # take input from Wdataclean() posi <- d == 1 uncenx <- x[posi] uncenw <- w[posi] allrisk <- rev(cumsum(rev(w))) uncenR <- allrisk[posi] list( time = uncenx, n.risk = uncenR, n.event = uncenw ) } WKM <- function(x,d,w=rep(1, length(d)) ) { temp <- Wdataclean2(x,d,w) dd <- temp$dd ww <- temp$weight dd[length(dd)] <- 1 allrisk <- rev(cumsum(rev(ww))) survP <- cumprod( 1 - (dd*ww)/allrisk ) jumps <- -diff( c(1, survP) ) list(times=temp$value, jump=jumps, surv=survP ) } #Under Construction: NA generated in cumsum...... # #Wdataclean<-function(z, d, wt = rep(1,length(z)) ) #{ # z0 <- z[d==0] # ord0 <- order(z0) # z0 <- z0[ord0] # sortwz0 <- wt[d==0] # sortwz0 <- sortwz0[ord0] # # if(length(z0) > 1) { # z0.int<-match(z0,unique(z0)) # z0ends<-c(diff(z0.int) !=0, T) # wz0 <- diff(c(0,cumsum(sortwz0)[z0ends])) # newz0<-z0[z0ends] # } else { newz0 <- z0; wz0 <- sortwz0 } # # z1 <- z[d==1] # ord1 <- order(z1) # z1 <- z1[ord1] # sortwz1 <- wt[d==1] # sortwz1 <- sortwz1[ord1] # # if(length(z1) > 1) { # z1.int<-match(z1,unique(z1)) # z1ends<-c(diff(z1.int) !=0, T) # wz1 <- diff(c(0,cumsum(sortwz1)[z1ends])) # newz1<-z1[z1ends] # } else { newz1 <- z1; wz1 <- sortwz1 } # # newz <- c(newz0, newz1) # newd <- c(rep(0, length(newz0) ),rep(1, length(newz1))) # neww <- c(wz0, wz1) # # neworder <- order(newz, -newd) # newz <- newz[neworder] # newd <- newd[neworder] # neww <- neww[neworder] # # list(z = newz, d = newd, w = neww) } KapMei<-function(z, d, w) { # This code actually computes a weighted Kaplan-Meier estimator. # Z may have many ties. The function dataclean() sort the z and remove the # tied observations and put the multiplicity in neww, the weight. # May be I should put a new input vector, weights, up there. # z can still have tie between a censored and uncensored obs. # in this case the uncensored one should come first, i.e. in d, 1 comes first. # d is of the same length of z. # The last obs may need to be redefined to uncensored if you want a true # distribution as estimator always. n <- length(z) if(n < 2) stop("times vector, z, is shorter then 2!") if(length(d) != n) stop("two inputs must be of same length") if(any((d!=0)&(d!=1))) stop("d must be 0/1's for censor/not-censor") # How can I test if z have no NA's ....only the real numbers?? # is.numeric(z) if(length(which.na(z))) stop("there are NA's in z") temp <- Wdataclean(z, d) d <- temp$d n <- length(d) w <- temp$w sur <- rep(1, n) jum <- rep(0, n) var <- rep(0, n) risk <- rev(cumsum(rev(w))) if(d[1] == 1) { var[1] <- w[1]/(risk[1]*risk[2]) sur[1] <- 1 - w[1]/risk[1] jum[1] <- 1 - sur[1] } if(n < 2) stop("warning: only one distinct time in z?") for(i in 2:n) if(d[i] == 0) { var[i] <- var[i - 1] sur[i] <- sur[i - 1] } else { ttt <- w[i]/risk[i] var[i] <- var[i - 1] + ttt/risk[(i+1)] sur[i] <- sur[i - 1] * (1 - ttt) jum[i] <- sur[i - 1] - sur[i] } sigma <- sur*sqrt(var) list(time = temp$z, surv = sur, jump = jum, st.error = sigma ) }