# The function emlik2() calculates the jumps of A(t) and the -2log # empirical likelihood ratio for right censored data under the constraint # int g(t, theta)dA(t) = K where A(t) is the cumulative hazard. K is a # constant. This can be used as an implicit function for theta or explicit # function for K. Ref: Pan & Zhou (1998) U of Kentucky Tech report 36?. # # It uses the Splus function survfit(), so R users need to load survival # package first. (or write your own survfit() !) # Written by Mai Zhou (mai@ms.uky.edu) 2/97 version 1.0 3/98 V1.1 # # INPUTS: # x Observed (possibly right censored) times: x=min(t,C). # d Indicator of censoring: d=1 not censor, d=0 censor. # K the constant that the integrated hazard is = to under H0. # For median, take g(t, theta)=I[t<=theta] and K=log2. # fun (Weight) fuction of 2 variables, g(t, theta), to compute # weighted hazard. It has to be able to take a vector input # in the first variable and a scalar (parameter) in the second. # tola Optional tolerance value in solving the lambda = 0 equation. # theta Value of the implicit parameter constraint, passed to fun g(). # # OUTPUTS: # "-2logemLR" -2Log empirical likelihood ratio (ALR or Poisson version) # lambda Lagrange multiplier # times Location of the weights # wts Jump size of the new "Nelson-Aalen" estimate with constraint # nits Number of iterations used (for R, change "nf" to "iter") # message message from uniroot (for R, change "message" to "estim.prec") # # Examples of the input function fun is: (the indicator function) # fun2 <- function(x, theta) { as.numeric(x <= theta) } # # Bug: If data (x,d) has tied un-censored observations, the calculation # may be un-reliable. Tie of censored with uncensored obs. is OK, # tie between censored obs. is also OK. A temperary solution is to # break the tie by adding an epsilon to centain observations. # Also theta value has to be in certain range for this to work # properly, so a bit of searching may be needed. # # example: emlik2( c(1,2,3,4,5), c(1,1,0,1,1), 0.5, fun2, theta=3.5) # should return a list including # $"-2logemLR": # [1] 0.0226988 # etc. etc.... # this also work (define fun2 on the fly): #emlik2(c(1,2,3,4,5),c(1,1,0,1,1),0.5, # fun2<-function(x,theta){as.numeric(x<=theta)}, theta=3.5) ##################################################################### emlik2 <- function(x, d, K, fun, tola = .Machine$double.eps^.25,...) { if(!is.numeric(x)) stop("x must be numeric values -- observed times") n <- length(x) if( n <= 2 ) stop("Need more observations than two") if( length(d) != n ) stop("length of x and d must agree") if(any((d!=0)&(d!=1))) stop("d must be 0/1's for censor/not-censor") temp <- summary(survfit(Surv(x,d),se.fit=F,type="fleming",conf.type="none")) Dtime <- temp$time # only uncensored time? Yes. risk <- temp$n.risk jump <- (temp$n.event)/risk funtime <- fun(Dtime,...) funh <- (n/risk) * funtime # that is Zi funtimeTjump <- funtime * jump if(jump[length(jump)] >= 1) funh[length(jump)] <- 0 #for inthaz and weights inthaz <- function(x, ftj, fh, Konst){ sum(ftj/(1 + x * fh)) - Konst } diff <- inthaz(0, funtimeTjump, funh, K) if( diff == 0 ) { lam <- 0 } else { step <- 0.5/sqrt(n) if(abs(diff) > 99*log(n)*step ) ##why 99*log(n)? no reason, you stop("given theta value is too far away from theta0") # need something. mini<-0 maxi<-0 if(diff > 0) { maxi <- step while(inthaz(maxi, funtimeTjump, funh, K) > 0 && maxi < 50*log(n)*step) maxi <- maxi+step } else { mini <- -step while(inthaz(mini, funtimeTjump, funh, K) < 0 && mini > - 50*log(n)*step) mini <- mini - step } if(inthaz(mini,funtimeTjump,funh,K)*inthaz(maxi,funtimeTjump,funh,K)>0) stop("given theta is too far away from theta0") temp2 <- uniroot(inthaz,c(mini,maxi), tol = tola, ftj=funtimeTjump, fh=funh, Konst=K) lam <- temp2$root } onepluslamh<- 1 + lam * funh # this is 1 + lam Zi in Ref. weights <- jump/onepluslamh #need to change last jump to 1? NO. see above loglik <- 2*(sum(log(onepluslamh)) - sum((onepluslamh-1)/onepluslamh) ) #?is that right? YES see (3.2) in Ref. above. This is ALR, or Poisson LR. last <- length(jump) #to compute loglik2, we need to drop last jump if (jump[last] == 1) { risk1 <- risk[-last] jump1 <- jump[-last] weights1 <- weights[-last] } else { risk1 <- risk jump1 <- jump weights1 <- weights } loglik2 <- 2*( sum(log(onepluslamh)) + sum( (risk1 -1)*log((1-jump1)/(1- weights1) ) ) ) list( "-2logemLR"=loglik, "-2logemLRv2"=loglik2, lambda=lam, times=Dtime, wts=weights, nits=temp2$nf, message=temp2$message ) } # what should be the fun() and K if I want to perform a (1-sample) # log-rank test? # fun3 <- function(t1, z1) { psum( t( outer(z1, t1, FUN=">=") ) ) } # this is similar to the function in LogRank2() function. Need psum/2/3. # And K = int R(t) dH(t) = sum( H(z1) ) For example if H() is # exponential(0.3) then H(t) = 0.3*t, i.e. K <- sum(0.3* z1) # so finally a call may look like # # Assume z1 and d1 are the inputs: # emlik2(z1, d1, sum(0.3* z1), # fun3 <- function(t1,z){psum(t(outer(z,t1,FUN=">=") ) ) }, z=z1) # # Now use z1<-c(1,2,3,4,5) and d1<-c(1,1,0,1,1) we get # emlik2(z1, d1, sum(0.25* z1), # fun3 <- function(t1,z){psum(t(outer(z,t1,FUN=">=") ) ) }, z=z1) # # with output include this (and more) # $ "-2logemLR": # [1] 0.02204689 #This test if the (censored) obs. z1 is from exp(0.25)