I'm doing something with the mlr framework that causes FeatureImp
to return 1 for every feature and I can't put my finger on it. Here's an exemple:
library(caret)
#> Carregando pacotes exigidos: lattice
#> Carregando pacotes exigidos: ggplot2
library(mlr)
#> Carregando pacotes exigidos: ParamHelpers
#>
#> Attaching package: 'mlr'
#> The following object is masked from 'package:caret':
#>
#> train
library(iml)
data("iris")
iris = iris[iris$Species != 'setosa',]
iris$Species = ifelse(iris$Species == 'virginica', 1, 0)
iris$Species = as.factor(iris$Species)
ind=createDataPartition(iris$Species, times=1, p=0.8, list=FALSE)
train=iris[ind,]
test=iris[-ind,]
remove(ind)
train.task=makeClassifTask(data=train, target = 'Species', positive = 1)
test.task=makeClassifTask(data=test, target = 'Species', positive = 1)
learner=list(
xgboost = makeLearner("classif.xgboost",predict.type = "prob"),
ksvm = makeLearner("classif.ksvm",predict.type = "prob"),
nnet = makeLearner("classif.nnet",predict.type = "prob"),
randomForest = makeLearner("classif.randomForest",predict.type = "prob")
)
model = lapply(learner, function(x) train(x, train.task))
#> # weights: 19
#> initial value 57.506055
#> iter 10 value 52.109027
#> iter 20 value 7.798098
#> iter 30 value 5.401193
#> iter 40 value 4.707935
#> iter 50 value 4.702049
#> final value 4.701710
#> converged
prediction = lapply(model, function(x) predict(x, test.task))
ensemble = makeStackedLearner(learner, super.learner = 'classif.randomForest', predict.type = 'prob',
method = "stack.cv", use.feat = FALSE)
model$ensemble = train(ensemble, train.task)
#> # weights: 19
#> initial value 43.712841
#> iter 10 value 5.444287
#> iter 20 value 4.536990
#> iter 30 value 4.527489
#> iter 40 value 4.481401
#> iter 50 value 4.481221
#> iter 50 value 4.481221
#> iter 50 value 4.481221
#> final value 4.481221
#> converged
#> # weights: 19
#> initial value 52.864011
#> iter 10 value 33.347827
#> iter 20 value 2.926847
#> iter 30 value 0.011104
#> final value 0.000055
#> converged
#> # weights: 19
#> initial value 44.627604
#> iter 10 value 31.360597
#> iter 20 value 5.798769
#> iter 30 value 4.290623
#> iter 40 value 3.751202
#> iter 50 value 3.547856
#> iter 60 value 3.469366
#> iter 70 value 3.373487
#> iter 80 value 3.317680
#> iter 90 value 3.310354
#> iter 100 value 3.301115
#> final value 3.301115
#> stopped after 100 iterations
#> # weights: 19
#> initial value 46.410266
#> iter 10 value 29.975896
#> iter 20 value 1.266423
#> iter 30 value 0.004667
#> final value 0.000052
#> converged
#> # weights: 19
#> initial value 52.665930
#> final value 44.361399
#> converged
#> # weights: 19
#> initial value 60.471973
#> iter 10 value 50.475349
#> iter 20 value 7.580138
#> iter 30 value 4.828646
#> iter 40 value 4.543112
#> iter 50 value 2.995374
#> iter 60 value 2.636710
#> iter 70 value 2.539857
#> iter 80 value 2.497281
#> iter 90 value 2.427158
#> iter 100 value 2.370383
#> final value 2.370383
#> stopped after 100 iterations
prediction$ensemble = predict(model$ensemble, test.task)
predictor = Predictor$new(model$ensemble,
data = train.task$env$data[which(names(train.task$env$data) != "Species")],
y = as.numeric(train.task$env$data$Species)-1)
imp = FeatureImp$new(predictor, loss = "ce")
imp$results
#> feature importance.05 importance importance.95 permutation.error
#> 1 Sepal.Length 1 1 1 1
#> 2 Sepal.Width 1 1 1 1
#> 3 Petal.Length 1 1 1 1
#> 4 Petal.Width 1 1 1 1
Created on 2020-01-23 by the reprex package (v0.3.0)