My rating matrix that I make has a lot of N/A values (users rate jokes matrix). I need to sort the matrix where is the user with the most rates is at the start of the matrix and and the user with the less rates are in the bottom of the matrix. My code:
csv_table=read.csv('Jester5k.csv', header = TRUE, sep = ",", quote = "\"",dec = ".")
Row_num=max(csv_table$user_id)
Num_Unique_usersid=length(unique(csv_table$user_id))
Row_num==Num_Unique_usersid
Unique_users=unique(csv_table$user_id)
Col_num=max(csv_table$jokes)
Num_Unique_jokes=length(unique(csv_table$jokes))
Col_num==Num_Unique_jokes
Unique_jokes=unique(csv_table$jokes)
rownames(Mat_ratings)=paste0("userid_",seq(1:nrow(Mat_ratings)))
colnames(Mat_ratings)=paste0("jokes_",Unique_jokes)
Unique_users_sorted=sort(unique(csv_table$user_id))
identical(Unique_users_sorted,Unique_users)
Unique_items_sorted=sort(unique(csv_table$jokes))
identical(Unique_items_sorted,Unique_jokes)
Mat_ratings=matrix(NA, nrow = Row_num, ncol = Num_Unique_jokes)
rownames(Mat_ratings)=paste0("user_",Unique_users_sorted)
colnames(Mat_ratings)=paste0("item_",Unique_items_sorted)
for (i in 1:nrow(csv_table)){
Mat_ratings[which(csv_table$user_id[i]==Unique_users_sorted),
which(csv_table$jokes[i]==Unique_items_sorted)]=
csv_table$rating[i]
}
First I try to sort it that way but I understand that I don't realy do anything with the values. Is there a way to make it?