##################################################################### ### If you want the duplicate values removed, use ### identical = rep(0,length(z)) ###################################################################### dataclean2 <- function(z, d, identical = rep(1, length(z)) ) { if(length(d) <= 1) {sortz <- z; sortd <- d; weight <- length(d)} else { niceorder <- order(z, - d) # order them as z increase. When z sortz <- z[niceorder] # has ties, use d=2, 1, 0 to order sortd <- d[niceorder] # the tied z values. dupsortz <- duplicated(sortz) #see if there is dup in z. But even if argdiff <- c(1, diff(sortd)) #z's tie, if d is diff, it's not a tie. dupsortz[argdiff != 0] <- F #seems I need not dupsortz ==T & dupsortz[identical != 0] <- F #also, do not collaps if identical !=0 sortz <- sortz[dupsortz != T] # get the unique values of sortz and sortd <- sortd[dupsortz != T] # sortd. # Now the weight or duplicate index: w[i] count <- (1:length(dupsortz))[dupsortz != T] weight <- diff( c(count, (1+length(dupsortz)) ) ) } list(z = sortz, d = sortd, w = weight) } ### a possible improvement: already have weight(i) in the input