1 #!/usr/bin/env ruby
     2 require 'json'
     3 require 'set'
     4 
     5 # Input format (https://emojibase.dev/) is:
     6 #
     7 # [
     8 #   {
     9 #     "label": "greedy skull farmer",
    10 #     "hexcode": "000000",
    11 #     "emoji": "X",
    12 #     "text": "",
    13 #     "type": 1,
    14 #     "version": 0
    15 #     "tags": ["greed","skull","farming",...]
    16 #     ...
    17 #   },
    18 #   ...
    19 # ]
    20 #
    21 # This script removes many Emoji based on a variety of criteria:
    22 #
    23 #   * Partial matching label names
    24 #   * Groups
    25 #   * Unicode versions
    26 #
    27 # Customize to your needs.
    28 
    29 json_in = ARGF.read
    30 
    31 if json_in.length < 1
    32   puts "Oops, need JSON data. Pipe in or supply filename."
    33   exit 1
    34 end
    35 
    36 list = JSON.parse(json_in)
    37 
    38 # For the curious:
    39 # This script often makes a 'newlist' between steps. This makes sense when you
    40 # realize that these steps used to be a series of *individual* scripts - so
    41 # this is still many times more efficient than spawning separate processes and
    42 # serializing/deserializing JSON between steps!
    43 
    44 # =============================================================================
    45 # Group stuff
    46 #
    47 # Official group names:
    48 #     0  Smileys & Emotion
    49 #     1  People & Body
    50 #     2  Components
    51 #     3  Animals & Nature
    52 #     4  Food & Drink
    53 #     5  Travel & Places
    54 #     6  Activities
    55 #     7  Objects
    56 #     8  Symbols
    57 #     9  Flags
    58 
    59 
    60 # Remove regional indicator letters. They're not meant to be used
    61 # stand-alone and I've seen first hand that they are not widely
    62 # supported as stand-alone characters (at least not yet).
    63 newlist = []
    64 list.each do |e|
    65   if e['label'].match(/^regional indicator/)
    66     next
    67   end
    68   newlist.push e
    69 end
    70 list = newlist
    71 
    72 # Previously:
    73 # Put regional indicator letters with the symbols group and re-sort.
    74 # Add "letter" to the tags list
    75 #list.each do |e|
    76 #  if e['label'].match(/^regional indicator/)
    77 #    e['group'] = 8 # "Symbols"
    78 #    if e['tags']
    79 #      e['tags'].push 'letter'
    80 #    else
    81 #      e['tags'] = ['letter']
    82 #    end
    83 #  end
    84 #end
    85 #newlist = list.sort_by! { |l| l["group"] }
    86 #list = newlist
    87 
    88 # Remove "facing right" variants
    89 newlist = []
    90 list.each do |e|
    91   if e['label'].match(/facing right/)
    92     next
    93   end
    94   newlist.push e
    95 end
    96 list = newlist
    97 
    98 # Remove keycaps
    99 newlist = []
   100 list.each do |e|
   101   if e['label'].match(/keycap:/)
   102     next
   103   end
   104   newlist.push e
   105 end
   106 list = newlist
   107 
   108 # Remove families (there's so many and I've never found a use for these!)
   109 newlist = []
   110 list.each do |e|
   111   if e['label'].match(/family:/)
   112     next
   113   end
   114   newlist.push e
   115 end
   116 list = newlist
   117 
   118 # Remove genders (your tasteful joke goes here)
   119 newlist = []
   120 list.each do |e|
   121   if e['label'].match(/(person|(wo)?man):? /)
   122     next
   123   end
   124   newlist.push e
   125 end
   126 list = newlist
   127 
   128 # Remove Japanese language elements (my audience doesn't speak it)
   129 newlist = []
   130 list.each do |e|
   131   if e['label'].match(/^Japanese/)
   132     next
   133   end
   134   newlist.push e
   135 end
   136 list = newlist
   137 
   138 # Delete group 2 (Components)
   139 newlist = []
   140 list.each do |e|
   141   if e["group"] == 2
   142     next
   143   end
   144   newlist.push e
   145 end
   146 list = newlist
   147 
   148 # Delete group 9 (Flags) - they're cool, but there's so many!
   149 newlist = []
   150 list.each do |e|
   151   if e["group"] == 9
   152     next
   153   end
   154   newlist.push e
   155 end
   156 list = newlist
   157 
   158 # Sadly, I'm also going to remove versions greater than 13, even
   159 # though it gets rid of some great emoji. My use case doesn't support
   160 # yet.
   161 newlist = []
   162 list.each do |e|
   163   if e["version"] > 13
   164     next
   165   end
   166   newlist.push e
   167 end
   168 list = newlist
   169 
   170 # There's only 49 unique uppercase words, so I don't think
   171 # this is worth the loss of readability for data savings.
   172 #
   173 # Downcase labels
   174 #list.each do |e|
   175 #  e["label"].downcase!
   176 #end
   177 #
   178 # Downcase (and de-dupe) tags
   179 #list.each do |e|
   180 #  e["tags"] = e["tags"].map(&:downcase).to_set.to_a
   181 #end
   182 
   183 newlist = []
   184 list.each do |e|
   185   newlist.push({
   186     'label': e["label"],
   187     'emoji': e["emoji"],
   188     'group': e["group"],
   189     'tags': e["tags"]
   190   })
   191 end
   192 list = newlist
   193 
   194 # Write out final list
   195 #puts JSON.pretty_generate(list)
   196 puts JSON.generate(list)