faceclick/data/makejs.rb

1 #!/usr/bin/env ruby 2 require 'json' 3 require 'set' 4 5 # JSON file needs Emojibase-formatted entries, see https://emojibase.dev 6 # Also see customizer.rb in this directory. 7 8 json_in = ARGF.read 9 10 my_emoji = JSON.parse(json_in) 11 12 # Output target format example (taken from original dev test data) 13 # 14 # FC.data = { 15 # groups: [ 16 # { title: "People" , emoji: "😀", range: [0,2] }, 17 # { title: "Natural" , emoji: "🌴", range: [4,5] }, 18 # { title: "Activity", emoji: "🧭", range: [6,6] }, 19 # { title: "Things" , emoji: "📻️", range: [0,0] }, 20 # ], 21 # tags: { 22 # "face": [0,1,2,4], 23 # "wacky": [1], 24 # "cool": [2], 25 # "bear": [3,6], 26 # "pig": [4], 27 # "owl": [5], 28 # "animal": [3,4,5], 29 # "teddy": [6], 30 # }, 31 # emoji: [ 32 # "😀", // 0 grinning face 33 # "🤪", // 1 wacky face 34 # "😎", // 2 cool face with sunglasses 35 # "🐻", // 3 bear 36 # "🐷", // 4 pig face 37 # "🦉", // 5 owl 38 # "🧸", // 6 teddy bear 39 # ], 40 # }; 41 42 # Begin! 43 puts "FC.data = {" 44 45 # Make group list. For screen real estate, I've combined some of the groups 46 # together (original_groups). These groups become selectable "tab" filters in 47 # the final interface. 48 my_groups = [ 49 # Official group names: 50 # 0 Smileys & Emotion 51 # 1 People & Body 52 # 2 Components 53 # 3 Animals & Nature 54 # 4 Food & Drink 55 # 5 Travel & Places 56 # 6 Activities 57 # 7 Objects 58 # 8 Symbols 59 # 9 Flags 60 { title: "People" , emoji: "😀", from_groups: [0,1], range: [nil,0] }, 61 { title: "Natural" , emoji: "🌴", from_groups: [3,4], range: [nil,0] }, 62 { title: "Activity", emoji: "🧭", from_groups: [5,6], range: [nil,0] }, 63 { title: "Things" , emoji: "📻️", from_groups: [7,8], range: [nil,0] }, 64 ] 65 66 # Find first and last (range) emoji for each group 67 my_emoji.each_with_index do |e, i| 68 #puts "#{i} #{e["group"]}" 69 # is this group one of the from_groups? 70 g = my_groups.find { |g| g[:from_groups].include?(e["group"]) } 71 if g 72 # puts "#{i} vs #{g[:range][0]} - #{g[:range][1]}" 73 if g[:range][0] === nil 74 g[:range][0] = i # first! 75 end 76 if i > g[:range][1] 77 g[:range][1] = i # maybe last 78 end 79 end 80 end 81 82 # Print groups (not just turning the whole thing over 83 # to JSON.generate because I want to have explicit 84 # control over the pretty-printing as a compactness vs. 85 # readability balance. Since this output is really JS, 86 # not strict JSON, I can have trailing commas and all 87 # that good stuff, which simplifies things quite a bit. 88 group_strs = [] 89 my_groups.each do |g| 90 g.delete(:from_groups) 91 group_strs.push " #{JSON.generate(g)}" 92 end 93 puts "groups: [\n#{group_strs.join(",\n")}\n]," 94 95 # Remove tags that are in an entry's label 96 my_emoji.each do |e| 97 new_tags = e["tags"].filter do |t| 98 !e["label"].include?(t) 99 end 100 e["tags"] = new_tags 101 end 102 103 # Get all "word" usage counts from all tags and labels 104 word_usage = {} 105 my_emoji.each do |e| 106 these_words = e["tags"].to_set 107 these_words.merge e["label"].split(' ') 108 109 these_words.each do |word| 110 if word_usage.key?(word) 111 word_usage[word] += 1 112 else 113 word_usage[word] = 1 114 end 115 end 116 end 117 118 119 # Word parameters to adjust for best results 120 # Both of these will work with 1 or higher 121 min_word_usage_count = 4 122 min_word_length = 4 123 124 if !ENV['MIN_WORD_USAGE_COUNT'].nil? 125 min_word_usage_count = ENV['MIN_WORD_USAGE_COUNT'].to_i 126 end 127 128 if !ENV['MIN_WORD_LENGTH'].nil? 129 min_word_length = ENV['MIN_WORD_LENGTH'].to_i 130 end 131 132 # TODO allow input of the above in ARGV to override the defaults so I can 133 # automate the 25 or so permutations and get the output bytes for comparison to 134 # see which one is smallest. THEN change the defaults to match! 135 136 # Convert word list to array and enforce parameters 137 word_usage_list = [] 138 word_usage.each do |word,count| 139 if count >= min_word_usage_count && 140 word.length >= min_word_length 141 word_usage_list.push [word,count] 142 end 143 end 144 145 # Sort by usage count so more frequent words have lower index numbers 146 # (literally just for the savings of a shorter number of digits) 147 word_usage_list.sort_by! { |w| w[1] }.reverse! 148 149 # Turn usage list into array (just the word (0th position)) 150 my_words = word_usage_list.map { |w| w[0] } 151 152 line_len = 0 153 first = true 154 print "words: '" 155 my_words.each_with_index do |w| 156 if line_len + w.length > 70 157 # don't add to a long line, start a new one 158 print "'\n+'" 159 line_len = 0 160 end 161 if first 162 first = false 163 else 164 print ' ' 165 end 166 print w 167 line_len += w.length 168 end 169 puts "'," 170 171 # Replace any words from list in labels with, e.g. $15, $256 172 # 173 # Surprisingly, there are NO tags or labels with '$' in them 174 # (see check_for_dollar_tags.rb) 175 my_labels = [] 176 my_emoji.each do |e| 177 label_strings = [] 178 e['label'].split(' ').each do |word| 179 idx = my_words.find_index(word) 180 if idx.nil? 181 # not in word list, push verbatim word 182 label_strings.push(word) 183 else 184 label_strings.push("$#{idx}") 185 end 186 end 187 my_labels.push label_strings.join(' ') 188 end 189 190 # Make tag reference list: 191 # - exclude if tag can be found in the label 192 # - in the word list: use number 193 # - not in word list: use verbatim string 194 my_tags = [] 195 my_emoji.each do |e| 196 these_tags = [] 197 e["tags"].each do |t| 198 if e["label"].include?(t) 199 puts "already in label: #{t}" 200 next # it's already a word in the label 201 end 202 idx = my_words.find_index(t) 203 if idx.nil? 204 # not in word list, push verbatim tag 205 these_tags.push(t) 206 else 207 these_tags.push("$#{idx}") 208 end 209 end 210 my_tags.push these_tags.join(' ') 211 end 212 213 # Print emoji 214 # Collate in the labels and tags. 215 # As an array of arrays in this index order 216 # 0: emoji glyph 217 # 1: label string 218 # 2: tag string 219 # Example: ['X','winking $0',[2,'fart',17]] 220 # 221 line_len = 0 222 puts "emoji: [" 223 my_emoji.each_with_index do |e, i| 224 str = "['#{e['emoji']}','#{my_labels[i]}','#{my_tags[i]}']," 225 if line_len + str.length > 80 226 # don't add to a long line, start a new one 227 puts 228 line_len = 0 229 end 230 print str 231 line_len += str.length 232 end 233 234 puts 235 puts "] // End of FC.data.emoji" 236 puts "}; // End of FC.data"