How can I generate marginal effects for the logit model when using checkweights?

I usually generate marginal effects of the logical model using the mfx package and the logitmfx function. However, the current survey I'm using has weight (which greatly affects the proportion of DV in the sample due to over-sampling in some populations), and logitmfx seems to have no way to include weighting factors.

I installed the model using svyglm as follows:

library(survey) survey.design <- svydesign(ids = combined.survey$id, weights = combined.survey$weight, data = combined.survey) vote.pred.1 <- svyglm(formula = turnout ~ gender + age.group + education + income, design = survey.design) summary(vote.pred.1) 

How can I generate marginal effects from these results?

+5
source share
1 answer

I had the same question. Below, I changed the function from the mfx package to calculate marginal effects using data organized as a subject. I did not do much, basically replacing "mean ()" and similar commands that are designed to work on data without polling with poll equivalents. After the modified mfx code, there is a code that runs the example.

Background

Details of the mfx package from Alan Fernihough: https://cran.r-project.org/web/packages/mfx/mfx.pdf

Code for the mfx package on github (the files I changed are probitmfxest.r and probitmfx.r): https://github.com/cran/mfx/tree/master/R

In the mfx calculator, I commented on the great flexibility built into the original functions, which handled various assumptions about clusters and reliable SEs. I could be wrong, but I think that these problems have already been addressed with the help of the regression evaluation command from the svyglm () survey package.

Marginal effects calculator

  library(survey) probitMfxEstSurv <- function(formula, design, atmean = TRUE, robust = FALSE, clustervar1 = NULL, clustervar2 = NULL, start = NULL # control = list() # this option is found in the original mfx package ){ if(is.null(formula)){ stop("model formula is missing") } for( i in 1:length(class(design))){ if(!((class(design)[i] %in% "survey.design2") | (class(design)[i] %in% "survey.design"))){ stop("design arguement must contain survey object") } } # from Fernihough original mfx function # I dont think this is needed because the # regression computed by the survey package should # take care of stratification and robust SEs # from the survey info # # # cluster sort part # if(is.null(clustervar1) & !is.null(clustervar2)){ # stop("use clustervar1 arguement before clustervar2 arguement") # } # if(!is.null(clustervar1)){ # if(is.null(clustervar2)){ # if(!(clustervar1 %in% names(data))){ # stop("clustervar1 not in data.frame object") # } # data = data.frame(model.frame(formula, data, na.action=NULL),data[,clustervar1]) # names(data)[dim(data)[2]] = clustervar1 # data=na.omit(data) # } # if(!is.null(clustervar2)){ # if(!(clustervar1 %in% names(data))){ # stop("clustervar1 not in data.frame object") # } # if(!(clustervar2 %in% names(data))){ # stop("clustervar2 not in data.frame object") # } # data = data.frame(model.frame(formula, data, na.action=NULL), # data[,c(clustervar1,clustervar2)]) # names(data)[c(dim(data)[2]-1):dim(data)[2]] = c(clustervar1,clustervar2) # data=na.omit(data) # } # } # fit the probit regression fit = svyglm(formula, design=design, family = quasibinomial(link = "probit"), x=T ) # TS: summary(fit) # terms needed x1 = model.matrix(fit) if (any(alias <- is.na(coef(fit)))) { # this conditional removes any vars with a NA coefficient x1 <- x1[, !alias, drop = FALSE] } xm = as.matrix(svymean(x1,design)) # calculate means of x variables be = as.matrix(na.omit(coef(fit))) # collect coefficients: be as in beta k1 = length(na.omit(coef(fit))) # collect number of coefficients or x variables xb = t(xm) %*% be # get the matrix product of xMean and beta, which is the model prediction at the mean fxb = ifelse(atmean==TRUE, dnorm(xb), mean(dnorm(x1 %*% be))) # collect either the overall predicted mean, or the average of every observation predictions # get variances vcv = vcov(fit) # from Fernihough original mfx function # I dont think this is needed because the # regression computed by the survey package should # take care of stratification and robust SEs # from the survey info # # if(robust){ # if(is.null(clustervar1)){ # # white correction # vcv = vcovHC(fit, type = "HC0") # } else { # if(is.null(clustervar2)){ # vcv = clusterVCV(data=data, fm=fit, cluster1=clustervar1,cluster2=NULL) # } else { # vcv = clusterVCV(data=data, fm=fit, cluster1=clustervar1,cluster2=clustervar2) # } # } # } # # if(robust==FALSE & is.null(clustervar1)==FALSE){ # if(is.null(clustervar2)){ # vcv = clusterVCV(data=data, fm=fit, cluster1=clustervar1,cluster2=NULL) # } else { # vcv = clusterVCV(data=data, fm=fit, cluster1=clustervar1,cluster2=clustervar2) # } # } # set mfx equal to predicted mean (or other value) multiplied by beta mfx = data.frame(mfx=fxb*be, se=NA) # get standard errors if(atmean){# fxb * id matrix - avg model prediction * (beta X xmean) gr = as.numeric(fxb)*(diag(k1) - as.numeric(xb) *(be %*% t(xm))) mfx$se = sqrt(diag(gr %*% vcv %*% t(gr))) } else { gr = apply(x1, 1, function(x){ as.numeric(as.numeric(dnorm(x %*% be))*(diag(k1) - as.numeric(x %*% be)*(be %*% t(x)))) }) gr = matrix(apply(gr,1,mean),nrow=k1) mfx$se = sqrt(diag(gr %*% vcv %*% t(gr))) } # pick out constant and remove from mfx table temp1 = apply(x1,2,function(x)length(table(x))==1) const = names(temp1[temp1==TRUE]) mfx = mfx[row.names(mfx)!=const,] # pick out discrete change variables temp1 = apply(x1,2,function(x)length(table(x))==2) disch = names(temp1[temp1==TRUE]) # calculate the disctrete change marginal effects and standard errors if(length(disch)!=0){ for(i in 1:length(disch)){ if(atmean){ disx0 = disx1 = xm disx1[disch[i],] = max(x1[,disch[i]]) disx0[disch[i],] = min(x1[,disch[i]]) # mfx equal to prediction @ x=1 minus prediction @ x=0 mfx[disch[i],1] = pnorm(t(be) %*% disx1) - pnorm(t(be) %*% disx0) # standard errors gr = dnorm(t(be) %*% disx1) %*% t(disx1) - dnorm(t(be) %*% disx0) %*% t(disx0) mfx[disch[i],2] = sqrt(gr %*% vcv %*% t(gr)) } else { disx0 = disx1 = x1 disx1[,disch[i]] = max(x1[,disch[i]]) disx0[,disch[i]] = min(x1[,disch[i]]) mfx[disch[i],1] = mean(pnorm(disx1 %*% be) - pnorm(disx0 %*% be)) # standard errors gr = as.numeric(dnorm(disx1 %*% be)) * disx1 - as.numeric(dnorm(disx0 %*% be)) * disx0 avegr = as.matrix(colMeans(gr)) mfx[disch[i],2] = sqrt(t(avegr) %*% vcv %*% avegr) } } } mfx$discretechgvar = ifelse(rownames(mfx) %in% disch, 1, 0) output = list(fit=fit, mfx=mfx) return(output) } probitMfxSurv <- function(formula, design, atmean = TRUE, robust = FALSE, clustervar1 = NULL, clustervar2 = NULL, start = NULL # control = list() # this option is found in original mfx package ) { # res = probitMfxEstSurv(formula, design, atmean, robust, clustervar1, clustervar2, start, control) res = probitMfxEstSurv(formula, design, atmean, robust, clustervar1, clustervar2, start) est = NULL est$mfxest = cbind(dFdx = res$mfx$mfx, StdErr = res$mfx$se, z.value = res$mfx$mfx/res$mfx$se, p.value = 2*pt(-abs(res$mfx$mfx/res$mfx$se), df = Inf)) colnames(est$mfxest) = c("dF/dx","Std. Err.","z","P>|z|") rownames(est$mfxest) = rownames(res$mfx) est$fit = res$fit est$dcvar = rownames(res$mfx[res$mfx$discretechgvar==1,]) est$call = match.call() class(est) = "probitmfx" est } 

Example

  # initialize sample data nObs = 100 x1 = rbinom(nObs,1,.5) x2 = rbinom(nObs,1,.3) #x3 = rbinom(100,1,.9) x3 = runif(nObs,0,.9) id = 1:nObs w1 = sample(c(10,50,100),nObs,replace=TRUE) # dependnt variables ystar = x1 + x2 - x3 + rnorm(nObs) y = ifelse(ystar>0,1,0) # set up data frame data = data.frame(id, w1, x1, x2, x3, ystar, y) # initialize survey survey.design <- svydesign(ids = data$id, weights = data$w1, data = data) mean(data$x2) sd(data$x2)/(length(data$x2))^0.5 svymean(x=x2,design=survey.design) probit = svyglm(y~x1 + x2 + x3, design=survey.design, family=quasibinomial(link='probit')) summary(probit) probitMfxSurv(formula = y~x1 + x2 + x3, design = survey.design) 
+4
source

Source: https://habr.com/ru/post/1205102/


All Articles