is there any efficient way to get rid of colored emojis from vectors and make them to their standard form? Please see two output for instance, I maybe not using appropriate terms. Currently I am doing like this:
library(rjson)
library(stringi)
library(stringr)
# this function gets name from emojis one at a time
emoji_json_file <- "https://raw.githubusercontent.com/ToadHanks/emojisLib_json/master/emojis.json"
json_data <- rjson::fromJSON(paste(readLines(emoji_json_file), collapse = "")) #read line by line make
# gets the name i.e. get_name_from_emoji("😋") output should be "yum"
get_name_from_emoji <- function(emoji_unicode, emoji_data = json_data) {
emoji_evaluated <- stringi::stri_unescape_unicode(emoji_unicode)
vector_of_emoji_names_and_characters <- unlist(
lapply(json_data, function(x){
x$char
})
)
name_of_emoji <- attr(
which(vector_of_emoji_names_and_characters == emoji_evaluated)[1],
"names"
)
return(name_of_emoji)
}
# Fill an empty vector with names
emoji_pouch_copy <- c("🤫","👇🏾","👉🏿","🖕🏻","🏿","🏿") #we can't render U+1F3FB (light-skin graft), U+1F3FF (dark-skin graft) here that's why "?"
emoji_keywords_pouch <- c()
for(i in 1: length(emoji_pouch_copy)){
emoji_keywords_pouch <- c(emoji_keywords_pouch, get_name_from_emoji(emoji_pouch_copy[i]))
}
emoji_keywords_pouch #output: "shushing","point_down_fairly_dark","point_right_dark","fu_light","dark_skin_tone","light_skin_tone"
#Function to remove the skin tones
remove_all_skins <- function(string, pattern) {
str_replace_all(string, pattern, "000")
}
#remove these and their nativ renders at a positions
skin_tones <- c("medium_skin_tone", "fairly_dark_skin_tone", "dark_skin_tone", "fairly_light_skin_tone", "light_skin_tone", "_light","_dark","_medium","_fairly")
emoji_keywords_pouch <- remove_all_skins(emoji_keywords_pouch, skin_tones[1])
emoji_keywords_pouch <- remove_all_skins(emoji_keywords_pouch, skin_tones[2])
emoji_keywords_pouch <- remove_all_skins(emoji_keywords_pouch, skin_tones[3])
emoji_keywords_pouch <- remove_all_skins(emoji_keywords_pouch, skin_tones[4])
emoji_keywords_pouch <- remove_all_skins(emoji_keywords_pouch, skin_tones[5])
emoji_keywords_pouch <- emoji_keywords_pouch[emoji_keywords_pouch != "000"] #free the memory
#It has to be this order, otherwise good strings will go bad in the variable containing keywords
emoji_keywords_pouch <- stringr::str_remove_all(emoji_keywords_pouch, skin_tones[6])
emoji_keywords_pouch <- stringr::str_remove_all(emoji_keywords_pouch, skin_tones[7])
emoji_keywords_pouch <- stringr::str_remove_all(emoji_keywords_pouch, skin_tones[8])
emoji_keywords_pouch <- stringr::str_remove_all(emoji_keywords_pouch, skin_tones[9])
#Reverse the function get_name... to get_emoji and rebuild the emoji_pouch
#i.e. get_emoji_from_name("yum") output should be "😋"
get_emoji_from_name <- function(emoji_name, emoji_data = json_data) {
vector_of_emoji_names_and_characters <- unlist(
lapply(json_data, function(x){
x$char
})
)
emoji_character <- unname(
vector_of_emoji_names_and_characters[
names(vector_of_emoji_names_and_characters) == emoji_name
]
)
return(emoji_character)
}
#reset the original emoji_...copy to include standard tones
emoji_pouch_copy <- c()
for(i in 1: length(emoji_keywords_pouch)){
# Sys.sleep(1)
emoji_pouch_copy <- c(emoji_pouch_copy, get_emoji_from_name(emoji_keywords_pouch[i]))
}
#All of the skin tones are removed, because there are no standad skin tones
emoji_pouch_copy #output: "🤫""👇""👉""🖕"
#Finished
In a nutshell I am going from emojis to their names. Then clean their names by removing skin conditions, and then reverting back to their emoji form. I have close to 1000 emojis, and for loop is causing like 5 seconds delay. So I was wondering if anyone know of some package that gets this work done much better than I can?
Any help or in fact feedback on this will be appreciated! Thank you.