Quantcast
Channel: Active questions tagged r - Stack Overflow
Viewing all articles
Browse latest Browse all 204742

Getting ROC curve from benchmark results

$
0
0

I have used the mlr and batchtools to benchmark openml datasets for 2 learners namely rpart and logistic regression.

rm(list = ls())
detectCores(all.tests = FALSE, logical = TRUE)
library(mlr)
library(batchtools)
library(OpenML)
library(randomForest)
library(classif.rpart)
saveOMLConfig(apikey = "12869a02b52f8dff259ff4f34a3dcbc6", arff.reader = "RWeka", overwrite=TRUE)


setBatchtoolsExperiment = function(seed = 1, ncpus = 3, 
                                   clas_used,
                                   nameExperiment =  paste("Data/Results/Batchtools/batchtool_experiment")) {

  # which subset of dataset
  omldatasets = clas_used$data.id


  unlink(nameExperiment, recursive = TRUE)
  regis = makeExperimentRegistry(nameExperiment, seed = seed,
                                 packages = c("mlr", "OpenML", "methods"), 
                                 #source = paste0(dir, "/benchmark_defs.R"),
                                 work.dir = paste0("Data/Results/Batchtools"),
                                 #conf.file = paste0("Data/Results/Batchtools/.batchtools.conf.R")
  )

  regis$cluster.functions = makeClusterFunctionsSocket(ncpus = ncpus) 



  # add selected OML datasets as problems
  for (did in omldatasets) {
    data = list(did = did)
    addProblem(name = as.character(did), data = data)
  }


  # add one generic 'algo' that compares RF and LR
  addAlgorithm("eval", fun = function(job, data, instance,  ...) {
    par.vals = list(...)

   # tryCatch({

      # get the dataset
      omldataset = getOMLDataSet(data$did)
      if (identical(omldataset$target.features, character(0))) {
        omldataset$target.features="Class"
        omldataset$desc$default.target.attribute="Class"
      }
      task = convertOMLDataSetToMlr(omldataset)

      # learners
      lrn.classif.lr = makeLearner("classif.logreg", predict.type = "prob", fix.factors.prediction = TRUE)

      lrn.classif.rpart = makeLearner("classif.rpart", predict.type = "prob", fix.factors.prediction = TRUE)
      lrn.list = list(lrn.classif.lr,lrn.classif.rpart)

      # measures
      measures = list(acc, brier, auc, timetrain, fpr, fnr)
      rdesc = makeResampleDesc("RepCV", folds = 5, reps = 10, stratify = TRUE)
      configureMlr(on.learner.error = "warn", show.learner.output = TRUE)
      bmr = benchmark(lrn.list, task, rdesc, measures, keep.pred = FALSE, models = FALSE, show.info = TRUE)
      bmr

    #}, error = function(e) return(paste0("The variable '", data$did, "'", 
    #                                     " caused the error: '", e, "'")))


  })


  # finalize experiment
  # set.seed(1)
  ades = data.frame(c(1))
  addExperiments(algo.designs = list(eval = ades))
  summarizeExperiments()
  getStatus()
}

This resulted into regis. I saw that inorder to generate ROC curve for my prediction I have to call generateThreshVsPerfData() and plotROCCurves() on the benchmark result (BenchmarkResult()). To generateThreshVsPerfData(), I have to pass either benchmark result (i.e bmr) or prediction. Because I cannot call get retrieve bmr from the regis object, I used reduceResultsList() inside generateThreshVsPerfData()

#retrieve benchmark result
result = reduceResultsList(ids = c(c(1:284), c(286:318)), reg = regis, missing.val = NA)
Reducing [===================================================================================================>] 100% eta:  0s> 
> df = generateThreshVsPerfData(result, measures = list(fpr, tpr, mmce))
Error in generateThreshVsPerfData.list(result, measures = list(fpr, tpr,  : 
  Assertion on 'obj' failed: May only contain the following types: Prediction,ResampleResult.
#method 2
> # Extract predictions
> preds = getBMRPredictions(result, drop = TRUE)
Error in getBMRPredictions(result, drop = TRUE) : 
  Assertion on 'bmr' failed: Must inherit from class 'BenchmarkResult', but has class 'list'.
> 
> # Change the class attribute
> preds2 = lapply(preds, function(x) {class(x) = "Prediction"; return(x)})
Error in lapply(preds, function(x) { : object 'preds' not found
> 
> # Draw ROC curves
> df = generateThreshVsPerfData(preds2, measures = list(fpr, tpr, mmce))
Error in generateThreshVsPerfData(preds2, measures = list(fpr, tpr, mmce)) : 
  object 'preds2' not found
> plotROCCurves(df)
Error in plotROCCurves(df) : 
  Assertion on 'obj' failed: Must inherit from class 'ThreshVsPerfData', but has class 'function'.

Please, can anyone be able to recommend how can i generate roc curve for benchmarked result. I have already benchmarked and got the result but in the form of regis object which i cannot use with generateThreshVsPerfData(). Hence, I have to use prediction results. Inorder to generate the prediction result, can I train the same model with the same datesets after benchmarking and use predict() on top of the model. Is it advsiable to do that?


Viewing all articles
Browse latest Browse all 204742

Trending Articles



<script src="https://jsc.adskeeper.com/r/s/rssing.com.1596347.js" async> </script>