Hey I would like to compare the results of my k-means with the true labels which are in column one (is_non_maligant
) which can have the value 0 and 1
Sorry for the basic question but I am only a beginner
library(readr)
cancer_export <- read_csv("Marketing Research/cancer.export.csv")
cancer_export <- cancer_export[-c(1)] #Delete id column
View(cancer_export)
table(cancer_export$is_not_malignant, useNA = "ifany") #No missing value
prop.table(table(cancer_export$is_not_malignant)) #37% have a miligant tumor
cancer <- cancer_export[2:31]
cancer_z <- as.data.frame(lapply(cancer, scale)) #Scale the dataframe
set.seed(2345)
cancer_clusters <- kmeans(cancer_z, 2) #Data = cancer_z, Number of clusters = 2