r - Automatically compare nested models from mice's glm.mids -
i have multiply-imputed model r's mice
bundle in there lots of factor variables. example:
library(mice) library(hmisc) # turn variables factors false = nhanes fake$age = as.factor(nhanes$age) fake$bmi = cut2(nhanes$bmi, g=3) fake$chl = cut2(nhanes$chl, g=3) head(fake) age bmi hyp chl 1 1 <na> na <na> 2 2 [20.4,25.5) 1 [187,206) 3 1 <na> 1 [187,206) 4 3 <na> na <na> 5 1 [20.4,25.5) 1 [113,187) 6 3 <na> na [113,187) imput = mice(nhanes) # big model fit1 = glm.mids((hyp==2) ~ age + bmi + chl, data=imput, family = binomial)
i want test significance of each entire factor variable in model (not indicator variables each level) testing total model against each possible nested model drops 1 variable @ time. manually, can do:
# little model (no chl) fit2 = glm.mids((hyp==2) ~ age + bmi, data=imput, family = binomial) # extract p-value pool.compare pool.compare(fit1, fit2)$pvalue
how can automatically factor variables in model? helpful function drop1
suggested me a previous question -- want except mice
case.
possibly helpful note: annoying feature of pool.compare
appears want "extra" variables in larger model placed after ones shared smaller model.
you can utilize loop iterate through different combinations of predictors, after arranging them in order required pool.compare
.
so using fake
info above - tweaked number of categories
library(mice) library(hmisc) # turn variables factors # turn variables factors false <- nhanes fake$age <- as.factor(nhanes$age) fake$bmi <- cut2(nhanes$bmi, g=2) fake$chl <- cut2(nhanes$chl, g=2) # impute imput <- mice(fake, seed=1) # create models # - reduced models 1 variable removed # - total models variables @ end of look vars <- c("age", "bmi", "chl") reddish <- combn(vars, length(vars)-1 , simplify=false) diffs <- lapply(red, function(i) setdiff(vars, i) ) (full <- lapply(1:length(red), function(i) paste(c(red[[i]], diffs[[i]]), collapse=" + "))) #[[1]] #[1] "age + bmi + chl" #[[2]] #[1] "age + chl + bmi" #[[3]] #[1] "bmi + chl + age" (red <- combn(vars, length(vars)-1 , fun=paste, collapse=" + ")) #[1] "age + bmi" "age + chl" "bmi + chl"
the models in right order pass glm
call. i've replaced glm.mids
method has been replaced with.mids
- see ?glm.mids
out <- vector("list", length(red)) for( in 1:length(red)) { redmod <- with(imput, glm(formula(paste("(hyp==2) ~ ", red[[i]])), family = binomial)) fullmod <- with(imput, glm(formula(paste("(hyp==2) ~ ", full[[i]])), family = binomial)) out[[i]] <- list(predictors = diffs[[i]], pval = c(pool.compare(fullmod, redmod)$pvalue)) } do.call(rbind.data.frame, out) # predictors pval #2 chl 0.9976629 #21 bmi 0.9985028 #3 age 0.9815831 # check manually leaving out chl mod1 <- with(imput, glm((hyp==2) ~ age + bmi + chl , family = binomial)) mod2 <- with(imput, glm((hyp==2) ~ age + bmi , family = binomial)) pool.compare(mod1, mod2)$pvalue # [,1] #[1,] 0.9976629
you lot of warnings using dataset
edit
you wrap in function
impglmdrop1 <- function(vars, outcome, data=imput, family="binomial") { reddish <- combn(vars, length(vars)-1 , simplify=false) diffs <- lapply(red, function(i) setdiff(vars, i)) total <- lapply(1:length(red), function(i) paste(c(red[[i]], diffs[[i]]), collapse=" + ")) reddish <- combn(vars, length(vars)-1 , fun=paste, collapse=" + ") out <- vector("list", length(red)) for( in 1:length(red)) { redmod <- with(data, glm(formula(paste(outcome, red[[i]], sep="~")), family = family)) fullmod <- with(data, glm(formula(paste(outcome, full[[i]], sep="~")), family = family)) out[[i]] <- list(predictors = diffs[[i]], pval = c(pool.compare(fullmod, redmod)$pvalue) ) } do.call(rbind.data.frame, out) } # run impglmdrop1(c("age", "bmi", "chl"), "(hyp==2)")
r anova categorical-data r-mice
No comments:
Post a Comment