Is it possible to request the function "R" for the default value for its parameters?

In most cases, the default value of the function parameters is given in the documentation. However, in some cases, the default value is calculated using other parameters (including the data itself), so it cannot be specified in the documentation.

For example, how can I recognize the default lambda grid used for the glmnet function in the glmnet library? According to the documentation, by default, lambda computed based on nlambda , which by default is 100 and lambda.min.ratio , which appears to be a value-derived value.

When I run this function with a given dataset, I would like to know the lambda value that it used. This is especially useful when using cv.glmnet , because I want to know which lambda it chooses when I don't install it.

Input Example:

 library(glmnet) set.seed(1) x=rnorm(100) eps=rnorm(100) y = 1 + x + x^2 + x^3 + eps xmat=model.matrix(y~poly(x,10,raw=T),data=data.frame(x=x)) cv.out=cv.glmnet(xmat, y,alpha=0) # What is the lambda used here? bestlam=cv.out$lambda.min print(bestlam) # When a grid is specified, the result is very different and sometimes worse. grid=10^seq(10,-2,length=100) cv.out=cv.glmnet(xmat, y,alpha=0, lambda=grid) bestlam=cv.out$lambda.min print(bestlam) 

Example output (note that they are very different):

 0.3619167 0.04037017 
+6
source share
3 answers

If the default values ​​depend on the value of other arguments, I see no other solution than to enter the function in debug mode when it is called. You can use debugonce , for example:

 > debugonce(cv.glmnet) > > cv.out=cv.glmnet(xmat, y,alpha=0) # What is the lambda used here? debugging in: cv.glmnet(xmat, y, alpha = 0) [...] Browse[2]> ls() # [1] "foldid" "grouped" "keep" "lambda" "nfolds" "offset" # [7] "parallel" "type.measure" "weights" "x" "y" Browse[2]> lambda NULL Browse[2]> c > 

So, for this first call to lambda there is NULL . However, if you repeat this approach for the second call to cv.glmnet , you will see that in this case lambda is a number vector of length 100.

+4
source

I am surprised that none of these have been published, but the obvious functions are args and formals :

args only shows the "top" of a function without a body, as opposed to calling cv.glmnet :

 > args(cv.glmnet) function (x, y, weights, offset = NULL, lambda = NULL, type.measure = c("mse", "deviance", "class", "auc", "mae"), nfolds = 10, foldid, grouped = TRUE, keep = FALSE, parallel = FALSE, ...) NULL 

formals gives these arguments as a list:

 > formals(cv.glmnet) $x $y $weights $offset NULL $lambda NULL $type.measure c("mse", "deviance", "class", "auc", "mae") $nfolds [1] 10 $foldid $grouped [1] TRUE $keep [1] FALSE $parallel [1] FALSE $... 
+3
source

You can always simply enter the name of the function and press Enter to get the source code of the function. In your example, lambda defaults to NULL.

 cv.glmnet ## function (x, y, weights, offset = NULL, lambda = NULL, type.measure = c("mse", ## "deviance", "class", "auc", "mae"), nfolds = 10, foldid, ## grouped = TRUE, keep = FALSE, parallel = FALSE, ...) ## { ## if (missing(type.measure)) ## type.measure = "default" ## else type.measure = match.arg(type.measure) ## if (!is.null(lambda) && length(lambda) < 2) ## stop("Need more than one value of lambda for cv.glmnet") ## N = nrow(x) ## if (missing(weights)) ## weights = rep(1, N) ## else weights = as.double(weights) ## y = drop(y) ## glmnet.call = match.call(expand.dots = TRUE) ## which = match(c("type.measure", "nfolds", "foldid", "grouped", ## "keep"), names(glmnet.call), F) ## if (any(which)) ## glmnet.call = glmnet.call[-which] ## glmnet.call[[1]] = as.name("glmnet") ## glmnet.object = glmnet(x, y, weights = weights, offset = offset, ## lambda = lambda, ...) ## glmnet.object$call = glmnet.call ## is.offset = glmnet.object$offset ## lambda = glmnet.object$lambda ## if (inherits(glmnet.object, "multnet")) { ## nz = predict(glmnet.object, type = "nonzero") ## nz = sapply(nz, function(x) sapply(x, length)) ## nz = ceiling(apply(nz, 1, median)) ## } ## else nz = sapply(predict(glmnet.object, type = "nonzero"), ## length) ## if (missing(foldid)) ## foldid = sample(rep(seq(nfolds), length = N)) ## else nfolds = max(foldid) ## if (nfolds < 3) ## stop("nfolds must be bigger than 3; nfolds=10 recommended") ## outlist = as.list(seq(nfolds)) ## if (parallel && require(foreach)) { ## outlist = foreach(i = seq(nfolds), .packages = c("glmnet")) %dopar% ## { ## which = foldid == i ## if (is.matrix(y)) ## y_sub = y[!which, ] ## else y_sub = y[!which] ## if (is.offset) ## offset_sub = as.matrix(offset)[!which, ] ## else offset_sub = NULL ## glmnet(x[!which, , drop = FALSE], y_sub, lambda = lambda, ## offset = offset_sub, weights = weights[!which], ## ...) ## } ## } ## else { ## for (i in seq(nfolds)) { ## which = foldid == i ## if (is.matrix(y)) ## y_sub = y[!which, ] ## else y_sub = y[!which] ## if (is.offset) ## offset_sub = as.matrix(offset)[!which, ] ## else offset_sub = NULL ## outlist[[i]] = glmnet(x[!which, , drop = FALSE], ## y_sub, lambda = lambda, offset = offset_sub, ## weights = weights[!which], ...) ## } ## } ## fun = paste("cv", class(glmnet.object)[[1]], sep = ".") ## cvstuff = do.call(fun, list(outlist, lambda, x, y, weights, ## offset, foldid, type.measure, grouped, keep)) ## cvm = cvstuff$cvm ## cvsd = cvstuff$cvsd ## cvname = cvstuff$name ## out = list(lambda = lambda, cvm = cvm, cvsd = cvsd, cvup = cvm + ## cvsd, cvlo = cvm - cvsd, nzero = nz, name = cvname, glmnet.fit = glmnet.object) ## if (keep) ## out = c(out, list(fit.preval = cvstuff$fit.preval, foldid = foldid)) ## lamin = if (type.measure == "auc") ## getmin(lambda, -cvm, cvsd) ## else getmin(lambda, cvm, cvsd) ## obj = c(out, as.list(lamin)) ## class(obj) = "cv.glmnet" ## obj ## } ## <environment: namespace:glmnet> 
0
source

Source: https://habr.com/ru/post/957471/


All Articles