Taking the data of the Beetle Mortality Example of  Agresti book.

[So far we only have the estimator computed. To get confidence interval,
may be we can use the empirical likelihood approach? (input from Mi-Ok Kim)]



> beetleDat <- data.frame(
+    lDose = c(1.691, 1.724, 1.755, 1.784,
+              1.811, 1.837, 1.861, 1.884),
+    n = c(59, 60, 62, 56, 63, 59, 62, 60),
+    x = c(6, 13, 18, 28, 52, 53, 61, 60)
+                         )
> 
> beetleDat
  lDose  n  x
1 1.691 59  6
2 1.724 60 13
3 1.755 62 18
4 1.784 56 28
5 1.811 63 52
6 1.837 59 53
7 1.861 62 61
8 1.884 60 60
> 



Now, my code want the data in the long format: i.e. each beetle as one line in data
So I expand the data to a matrix of two columns and 481 rows. Here is the first 10 rows.


> Longbeetle[1:10,]
       [,1] [,2]
 [1,] 1.691    1
 [2,] 1.691    1
 [3,] 1.691    1
 [4,] 1.691    1
 [5,] 1.691    1
 [6,] 1.691    1
 [7,] 1.691    0
 [8,] 1.691    0
 [9,] 1.691    0
[10,] 1.691    0


> rle( Longbeetle[,1] )
Run Length Encoding
  lengths: int [1:8] 59 60 62 56 63 59 62 60
  values : num [1:8] 1.691 1.724 1.755 1.784 1.811 1.837 1.861 1.884

> rle( Longbeetle[,2] )
Run Length Encoding
  lengths: int [1:15] 6 53 13 47 18 44 28 28 52 11 ...
  values : num [1:15] 1 0 1 0 1 0 1 0 1 0 ...
> rle( Longbeetle[,2] )$length
 [1]  6 53 13 47 18 44 28 28 52 11 53  6 61  1 60
> 

The fit:

> junk <- bibj(x=cbind(rep(1, 481), Longbeetle[,1]),delta= 1- Longbeetle[,2] )


> junk
$est
[1]  1.0000000 -0.6212231

$iterN
[1] 3

$distFx
 [1] -1.94951166  0.05048834  0.07098870  0.09024662  0.10826209  0.12503511  0.14118692  0.15609627  0.17038440
[10]  2.17038440

$distFy
 [1] 0.0000000 0.1016949 0.2166667 0.2903226 0.5000000 0.8253968 0.8983051 0.9838710 1.0000000 1.0000000



Since the error distribution is arbitrary, we have a free scale parameter.....so only the direction of the
beta vector is identifiable.   we shall only look at the
ratio of the two parameters: beta2/beta1




> glm(I(cbind(x, n - x)) ~ lDose,family = binomial(link = logit),data = beetleDat)$coef
(Intercept)       lDose 
  -60.74013    34.28593 
> 34.28593/-60.74013
[1] -0.5644692



So, this number in logistic is -0.5644692
and with bibj this number is   -0.6212231

To see a shape of the distribution 
> plot(junk$distFx, junk$distFy, xlim=c(0, 0.2))






Try more iteration to a smaller error tolarance.


> bibj(x=cbind(rep(1, 481), Longbeetle[,1]),delta= 1- Longbeetle[,2] , error=0.0000001)
$est
[1]  1.0000000 -0.6211079

$iterN
[1] 4

$distFx
 [1] -1.94970660  0.05029340  0.07078996  0.09004430  0.10805643  0.12482634  0.14097515  0.15588173  0.17016722
[10]  2.17016722

$distFy
 [1] 0.0000000 0.1016949 0.2166667 0.2903226 0.5000000 0.8253968 0.8983051 0.9838710 1.0000000 1.0000000




















Aug. 2012


The original code was written in 1992, and runs on Splus on a HP unix workstation.

We have since stop paying for Splus lisence and using R nowadays.

I just dig up the code and did minimal changes and then 
try the code on R version 2.14.1. And it runs. 
The original code has two versions of the isot( ) function; one runs in C code 
and thus a lot faster, but making the C to work in R will take more time. 
So this is just the R version (and a bit slower).  :-(




Example run:

 set.seed(1234)
 x<-matrix(c(rnorm(250), rnorm(250, mean=1)), ncol=2, byrow=F)
 z<-x[,1]+x[,2]+rnorm(250)
 delta<-rep(1,250)
 delta[z<0]<- -1 

> bibj(x=x, delta=delta, error=0.000001)
[1] 1.0000000 0.8107448 9.0000000

So, in 9 interation the estimator is 1.0 and 0.8107
because of the standardization, the beta1 is always taking as 1


Aug.20, 2012

I made a small change so that it also output the estimated F( ) distribution of epsilon.


=================================================
Now the code itself

bibj <- function(x, delta, maxiter = 20, error = 0.0001) {  
### x is a matrix of N rows, delta is a vector of length N.
### delta =1 mean the actual obs (y) is > 0. delta = -1 means y < 0. 
### Output: beta is a vector of length = no. of column(x)
 
dimofX <- dim(x)
if( dimofX[1] != length(delta) ) stop("check dim of x and /or delta")

      newtemp <- matrix(NA, ncol=dimofX[2], nrow=4)

      newtemp[1,]<-lsfit(x,delta,intercept=F)$coef 
      newtemp[1,]<-newtemp[1,]/newtemp[1,1]


      for (i in 2:4) { 
      newtemp[i,]<-iter(x,delta,newtemp[i-1,])
      newtemp[i,]<-newtemp[i,]/newtemp[i,1]
      } 

      num<-3 
      while(num <= maxiter && error <sum((newtemp[3,]-newtemp[4,])^2) ) {
             newtemp[3,] <- newtemp[4,]
             newtemp[4,]<-iter(x,delta,newtemp[3,] )
             newtemp[4,]<-newtemp[4,]/newtemp[4,1]
             num <- num + 1 
      } 
      delta[delta<0.5] <- 0
      u <- - x %*% newtemp[4,]
      tempF <- isot(x=u, y=1-delta, a=2, b=2)
      list( est=newtemp[4,], iterN=num,  distFx=tempF$x, distFy=tempF$y )
}




iter <- function(x,delta,beta) {  
### x is a matrix of N rows, delta is a vector of length N.
### delta =1 mean the actual obs (y) is > 0.
### beta is a vector of length = no. of column(x)

N <- length(delta)
k <- length(as.vector(beta))

if (dim(x)[1] != N) stop("check dim of x")
if (dim(x)[2] != k) stop("check length of beta and dim of x") 
u <- - x %*% beta 
ystar <- - u      # should I just let  ystar <- delta ?

dd <- delta
dd[delta<0.5] <- 0 
temp<-isot(u,1-dd,2,2)
tx<-temp$x
ty<-temp$y
TN<-length(tx)

    for (i in 1:N) {

               m<-length(tx[tx <= u[i]])
               if(delta[i] > 0.5 && ty[m] < 1 ) {
                             ystar[i]<- - u[i] + 0.5*sum((tx[(m+1):TN]+tx[m:(TN-1)])*diff(ty[m:TN])) / 
                                     (1- ty[m] ) 
                                }
              else if( ty[m] > 0 ) {
                    ystar[i]<- - u[i] + 0.5*sum((tx[2:m]+tx[1:(m-1)])*diff(ty[1:m])) / ty[m]
                    }

    }

return( lsfit(x,ystar,intercept=FALSE)$coef ) 

}






isot <- function(x,y,a,b) {  
### the parameter a, b are to argument output F( ); 
### so that F(x[1]-a)=0 and F(x[n]+b) =1 always. 
### y are 0's and 1's.
### The output is the NPMLE of the CDF for the latent variable U that generate y: (Y = I[U>x]).

if ( length(x) != length(y) ) stop("check length of x and/or y")
 
xsort <- unique(sort(x))
m<-length(xsort)
ysort <- rep(NA,m)
nsort <- rep(NA,m)
for ( i in 1:m ) {
ysort[i] <- sum(y[x == xsort[i]])
nsort[i] <- sum(x == xsort[i])
}
y <- ysort
n <- nsort

ms <- 1
ys <- y[1]
ns <- n[1]

for ( i in 2:length(y) ) {

ys <- c(ys,y[i])
ns <- c(ns,n[i])
ms <- c(ms,1)
ps <- ys/ns

j <- length(ys)
while ( length(ps) > 1 && ps[j-1] > ps[j] ) {
ys[j-1] <- ys[j-1] + ys[j] ; ys <- ys[-j]
ns[j-1] <- ns[j-1] + ns[j] ; ns <- ns[-j]
ms[j-1] <- ms[j-1] + ms[j] ; ms <- ms[-j]
ps <- ys/ns
j <- j - 1
}
}

return( list(x=c(xsort[1]-a,xsort,xsort[m]+b), y=c(0,rep(ps,ms),1)) )

}