PRESS2 <- function(lm) { hats <- lm.influence(lm)$hat press <- sum((residuals(lm)/(1 - hats))^2) c(length(coefficients(lm)), press) } ############################################################ # This is for Splus. ############################################################ mylm.influence <- function(lm, xmat) { GET <- function(x, what) { # eventually, x[[what, exact=T]] if(is.na(n <- match(what, names(x)))) NULL else x[[n]] } wt <- GET(lm, "weights") # should really test for < 1/BIG if machine pars available e <- lm$residuals n <- length(e) if(!is.null(wt)) e <- e * sqrt(wt) beta <- lm$coef if(is.matrix(beta)) { beta <- beta[, 1] e <- e[, 1] warning("multivariate response, only first y variable used") } na <- is.na(beta) beta <- beta[!na] p <- GET(lm, "rank") if(is.null(p)) p <- sum(!na) R <- lm$R if(p < max(dim(R))) R <- R[1:p, 1:p] qr <- GET(lm, "qr") if(is.null(qr)) { # lm.x <- GET(lm, "x") # if(is.null(lm.x)) # lm.x <- model.matrix(lm$terms, model.frame(lm), # contrasts = lm$contrasts) ####lm.x <- model.matrix(lm) lm.x <- xmat if(!is.null(wt)) lm.x <- lm.x * sqrt(wt) if(any(na)) lm.x <- lm.x[, !na, drop = F] Q <- left.solve(R, lm.x) } else { if(!is.null(wt) && any(zero <- wt == 0)) { Q <- matrix(0, n, p) dimnames(Q) <- list(names(e), names(beta)) Q[!zero, ] <- qr.Q(qr) } else Q <- qr.Q(qr) } h <- as.vector((Q^2 %*% array(1, c(p, 1)))) z <- e/(1 - h) press <- sum(z^2) list(hat = h, PRESS = press) } drop1PRESS.lm <- function(object, scope, scale, keep, all.cols = T) { if(!is.null(object$na.action)) object$na.action <- NULL b <- coef(object) cnames <- labels(b) singular <- attr(b, "singular") p <- length(b) x <- model.matrix(object) iswt <- !is.null(wt <- object$weights) if(iswt) x <- x * sqrt(wt) n <- dim(x)[[1]] asgn <- attr(x, "assign") tl <- attr(object$terms, "term.labels") if(missing(scope)) scope <- drop.scope(object) else { if(!is.character(scope)) scope <- attr(terms(update.formula(object, scope)), "term.labels") if(!all(match(scope, tl, F))) stop("scope is not a subset of term labels") } asgn <- asgn[scope] k <- length(scope) rdf <- object$df.resid chisq <- deviance.lm(object) if(missing(scale)) scale <- chisq/rdf if(!missing(keep)) { max.keep <- c("coefficients", "fitted", "residuals", "x.residuals", "effects", "R") if(is.logical(keep) && keep) keep <- max.keep else { if(!all(match(keep, max.keep, F))) stop(paste("Can only keep one or more of: \"", paste(max.keep, collapse = "\", \""), "\"", sep = "")) } fv <- predict(object) y <- object$residuals + fv if(iswt) { if(any(wt == 0)) stop("\"keep\" not allowed when some of the weights are zero") wt <- sqrt(wt) } } else keep <- character(0) xr <- match("x.residuals", keep, F) value <- array(vector("list", 6 * k), c(k, 6), list(scope, c("coefficients", "fitted", "residuals", "x.residuals", "effects", "R"))) if(length(ef <- object$effects) < n) stop("function only currently defined for methods that compute effects") dfs <- double(k) chis <- double(k) press <- double(k) if(!singular) { y <- (object$residuals + predict(object)) na.coef <- (1:length(object$coefficients))[!is.na(object$coefficients)] if(iswt) { if(!length(keep)) wt <- sqrt(wt) y <- y * wt } rank <- object$rank if(k < 125) { for(i in 1:k) { ii <- asgn[[i]] #brute force method if(all.cols) jj <- setdiff(seq(ncol(x)), ii) else jj <- setdiff(na.coef, ii) z <- lm.fit.qr(x[, jj, drop = F], y, singular = F, qr = xr) ##singular = T or F?## press[i] <- mylm.influence(z, xmat = x[, jj, drop = F])$PRESS cat("PRESS=", format(round(press[i], 2)), "\n\n") ######## sum( (residuals(z)/(1-hat(x[, jj, drop = F])))^2 ) ######## #efi <- z$effects #dfs[i] <- rank - (ranki <- z$rank) #chis[i] <- sum(efi[ - seq(ranki)]^2) #if(length(keep)) { # fvi <- z$fitted # res <- z$residuals # if(iswt) { # fvi <- fvi/wt # res <- res/wt # } # value[i, -4] <- list(z$coef, fvi, res, efi, z$R) # if(xr) { # xres <- qr.resid(z$qr, x[, ii, drop = F]) # if(iswt) # xres <- xres/wt # value[[i, 4]] <- xres # } #} } } else { for(i in 1:120) { ii <- asgn[[i]] #brute force method if(all.cols) jj <- setdiff(seq(ncol(x)), ii) else jj <- setdiff(na.coef, ii) z <- lm.fit.qr(x[, jj, drop = F], y, singular = T, qr = xr) ######## press[i] <- mylm.influence(z, xmat = x[, jj, drop = F])$PRESS cat("PRESS=", format(round(press[i], 2)), "\n\n") } for(i in 121:k) { ii <- asgn[[i]] #brute force method if(all.cols) jj <- setdiff(seq(ncol(x)), ii) else jj <- setdiff(na.coef, ii) z <- lm.fit.qr(x[, jj, drop = F], y, singular = T, qr = xr) ######## press[i] <- mylm.influence(z, xmat = x[, jj, drop = F])$PRESS cat("PRESS=", format(round(press[i], 2)), "\n\n") } } } else { R <- object$R R <- array(R, dim(R), dimnames(R)) if(xr) { xk <- array(0, dim(x), dimnames(x)) xk[1:p, ] <- R } else xk <- array(ef, c(n, 1), list(dimnames(x)[[1]], NULL)) for(i in 1:k) { ii <- asgn[[i]] pii <- length(ii) dfs[i] <- pii if(xr) { xi <- xk[, c(1, ii)] xi[, 1] <- ef } else xi <- xk r <- R pi <- 1:(p - pii) pp <- - p for(j in rev(ii)) { z <- delcol(r, xi, j) r <- z[[1]][pp, ] xi <- z[[2]] pp <- pp + 1 } efi <- xi[, 1] chis[i] <- sum(efi[ - pi]^2) if(length(keep)) { # compute it all, even though all may not be reqd bi <- as.matrix(backsolve(r, xi[pi, ])) dimnames(bi)[[1]] <- cnames[ - ii] fvi <- x[, - ii, drop = F] %*% bi if(iswt) fvi <- fvi/wt names(efi)[] <- "" names(efi)[pi] <- cnames[ - ii] value[i, -4] <- list(bi[, 1], fvi[, 1], y - fvi[, 1], efi, r) if(xr) { xres <- x[, ii] - fvi[, -1] if(iswt) xres <- xres/wt value[[i, 4]] <- xres } } } } scope <- c("", scope) dfs <- c(0, dfs) chis <- c(chisq, chis) aics <- chis + 2 * (n - rdf - dfs) * scale dfs[1] <- NA fullmod <- sum((residuals(object)/(1 - lm.influence(object)$hat))^2) press <- c(fullmod, press) aod <- data.frame(Df = dfs, "Sum of Sq" = c(NA, chis[-1] - chis[1]), RSS = chis, Cp = aics, PRESS = press, row.names = scope, check.names = F) head <- c("Single term deletions", "\nModel:", deparse(as.vector(formula(object)))) if(!missing(scale)) head <- c(head, paste("\nscale: ", format(scale), "\n")) class(aod) <- c("anova", "data.frame") attr(aod, "heading") <- head if(length(keep)) list(anova = aod, keep = structure(value[, keep, drop = F], class = "matrix")) else aod } mystepPRESS1 <- function(object, scope, scale = 0, direction = c("both", "backward", "forward"), trace = 1, keep = NULL, steps = 1000, use.start = F, k = 2, ...) { cut.string <- function(string) { if(length(string) > 1) string[-1] <- paste("\n", string[-1], sep = "") string } if(missing(scope)) { fdrop <- numeric(0) fadd <- NULL } else { if(is.list(scope)) { fdrop <- if(!is.null(fdrop <- scope$lower)) attr(terms(update.formula( object, fdrop)), "factors") else numeric(0) fadd <- if(!is.null(fadd <- scope$upper)) attr(terms(update.formula(object, fadd)), "factors") } else { fadd <- if(!is.null(fadd <- scope)) attr(terms(update.formula(object, scope )), "factors") fdrop <- numeric(0) } } if(is.null(fadd)) { backward <- T forward <- F } fit <- object bAIC <- PRESS2(fit) edf <- bAIC[1] bAIC <- bAIC[2] Terms <- fit$terms if(trace) cat("Start: PRESS=", format(round(bAIC, 2)), "\n", cut.string(deparse(as.vector( formula(fit)))), "\n\n") while(steps > 0) { steps <- steps - 1 ####AIC <- bAIC bfit <- fit ffac <- attr(Terms, "factors") scope <- factor.scope(ffac, list(add = fadd, drop = fdrop)) aod <- NULL change <- NULL ####if(backward && length(scope$drop)) { if(length(scope$drop)) { aod <- drop1PRESS.lm(fit, scale = scale) ########## rn <- row.names(aod) row.names(aod) <- c(rn[1], paste("-", rn[-1], sep = " ")) } if(is.null(aod) || ncol(aod) == 0) break pressvec <- aod$PRESS[-1] porder <- order(pressvec) bAIC[2] <- pressvec[porder[1]] if(aod$PRESS[1] < (bAIC[2] - k)) break changenames <- row.names(aod)[-1] change <- changenames[porder[1]] fit <- update(fit, paste("~ .", change)) Terms <- fit$terms edf <- bAIC[1] bAIC <- bAIC[2] if(trace) cat("Step: PRESS=", format(round(bAIC, 2)), "\n", cut.string(deparse(as.vector(formula(Terms)))), "\n\n") ##if(bAIC >= AIC) break } } ################################################################### # Currently only backward is implemented. #######################################################################