Quantcast
Channel: Active questions tagged r - Stack Overflow
Viewing all articles
Browse latest Browse all 201867

Error in `[.default`(data, , pos) : subscript out of bounds using predict function in R

$
0
0

I am banging my head against the wall with this one. I have made predictions from a decision tree, my predictor variables are exactly the same dimensions in both the prediction object and my training data dataset, yet when I try to construct a Confusion Matrix I get the warning "Error in [.default(data, , pos) : subscript out of bounds". I cannot seem to be able to work it out.

set.seed(123) 
sample = sample.split(df_clean, SplitRatio = .75)
train1 = subset(df_clean, sample == TRUE)
test1  = subset(df_clean, sample == FALSE)
dim(train1)
dim(test1)

#training DT
set.seed(456)
dt <- rpart(my_label ~ activePower+activePowerDelta+reactivePower+voltage+
                         phase+transient8+transient10+harmonicDelta1+harmonicDelta2+
                         harmonicDelta8, data=train1, method = "class")

predictions_dt <- predict(dt, test1, type = "class")
confusionMatrix(predictions_dt, test1$my_label)

Both predictions_dt and test one are in the same format of 24,020 entries - factors with the same number of levels - eg. "+fridge, +fridge+microwave, +fridge+oven"

Thanks for your help!

adding output from checking levels:

str(df_clean$my_label)
df_clean$my_label <- as.factor(df_clean$my_label)
levels(df_clean$my_label)
levels(df_clean$my_label)[1]

levels(df_clean$my_label) [1] """+fridge"
[3] "+fridge+kettle""+fridge+kettle+microwave"
[5] "+fridge+kettle+tumble_dryer+washer_dryer""+fridge+kettle+tumble_dryer+washer_dryer+microwave" [7] "+fridge+kettle+washer_dryer""+fridge+kettle+washing_machine+washer_dryer"
[9] "+fridge+microwave""+fridge+shower"
[11] "+fridge+shower+kettle""+fridge+shower+tumble_dryer+washer_dryer"
[13] "+fridge+shower+washer_dryer""+fridge+shower+washing_machine+washer_dryer"
[15] "+fridge+tumble_dryer+washer_dryer""+fridge+tumble_dryer+washer_dryer+microwave"
[17] "+fridge+vacuum""+fridge+vacuum+tumble_dryer+washer_dryer"
[19] "+fridge+vacuum+washer_dryer""+fridge+vacuum+washing_machine+washer_dryer"
[21] "+fridge+washer_dryer""+fridge+washer_dryer+microwave"
[23] "+fridge+washing_machine+washer_dryer""+fridge+washing_machine+washer_dryer+microwave"
[25] "+kettle""+shower"
[27] "+tumble_dryer+washer_dryer""+washer_dryer"

Adding output from dput(head(df_clean))

dput(head(df_clean))
structure(list(id = c(74589930L, 74589012L, 74588101L, 74587582L, 
74587236L, 74586372L), type = c(5L, 5L, 1L, 2L, 5L, 5L), activePower = c(78L, 
80L, 77L, 43L, 143L, 146L), activePowerDelta = c(-2L, 1L, 32L, 
-100L, -3L, -7L), reactivePower = c(-38L, -38L, -37L, -22L, 143L, 
142L), voltage = c(223.389, 224.258, 225.127, 224.258, 223.389, 
223.389), phase = c(-25.6, -25.3, -25.6, -27, 44.6, 43.9), transient7 = c(0.567, 
0.562, 0.584, 0.282, 0.914, 0.924), transient9 = c(0.567, 0.562, 
0.57, 0.29, 0.914, 0.924), transient10 = c(0.567, 0.562, 0.572, 
0.282, 0.914, 0.924), harmonicDelta1 = c(90L, 21L, 235L, 1183L, 
82L, 128L), harmonicDelta7 = c(127L, 64L, 77L, 14L, 39L, 36L), 
    harmonicDelta9 = c(148L, 85L, 62L, 4L, 41L, 42L), timestamp = c("2018-01-21 23:58:08+00:00", 
    "2018-01-21 23:55:28+00:00", "2018-01-21 23:52:46+00:00", 
    "2018-01-21 23:51:03+00:00", "2018-01-21 23:49:59+00:00", 
    "2018-01-21 23:47:19+00:00"), my_label = structure(c(2L, 
    2L, 2L, 2L, 2L, 2L), .Label = c("", "+fridge", "+fridge+kettle", 
    "+fridge+kettle+microwave", "+fridge+kettle+tumble_dryer+washer_dryer", 
    "+fridge+kettle+tumble_dryer+washer_dryer+microwave", "+fridge+kettle+washer_dryer", 
    "+fridge+kettle+washing_machine+washer_dryer", "+fridge+microwave", 
    "+fridge+shower", "+fridge+shower+kettle", "+fridge+shower+tumble_dryer+washer_dryer", 
    "+fridge+shower+washer_dryer", "+fridge+shower+washing_machine+washer_dryer", 
    "+fridge+tumble_dryer+washer_dryer", "+fridge+tumble_dryer+washer_dryer+microwave", 
    "+fridge+vacuum", "+fridge+vacuum+tumble_dryer+washer_dryer", 
    "+fridge+vacuum+washer_dryer", "+fridge+vacuum+washing_machine+washer_dryer", 
    "+fridge+washer_dryer", "+fridge+washer_dryer+microwave", 
    "+fridge+washing_machine+washer_dryer", "+fridge+washing_machine+washer_dryer+microwave", 
    "+kettle", "+shower", "+tumble_dryer+washer_dryer", "+washer_dryer"
    ), class = "factor")), row.names = c(NA, 6L), class = "data.frame")

Viewing all articles
Browse latest Browse all 201867

Trending Articles