1 /* jsmin.c
     
2    2019-10-30
     
3 
     4 Copyright (C) 2002 Douglas Crockford  (www.crockford.com)
     
5 
     6 Permission is hereby granted, free of charge, to any person obtaining a copy of
     
7 this software and associated documentation files (the "Software"), to deal in
     
8 the Software without restriction, including without limitation the rights to
     
9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
    
10 of the Software, and to permit persons to whom the Software is furnished to do
    
11 so, subject to the following conditions:
    
12 
    13 The above copyright notice and this permission notice shall be included in all
    
14 copies or substantial portions of the Software.
    
15 
    16 The Software shall be used for Good, not Evil.
    
17 
    18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    
20 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    
21 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    
22 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    
23 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
    
24 SOFTWARE.
    
25 */
    
26 
    27 #include <stdlib.h>
    
28 #include <stdio.h>
    
29 
    30 static int the_a;
    
31 static int the_b;
    
32 static int look_ahead = EOF;
    
33 static int the_x = EOF;
    
34 static int the_y = EOF;
    
35 
    36 
    37 static void error(char* string) {
    
38     fputs("JSMIN Error: ", stderr);
    
39     fputs(string, stderr);
    
40     fputc('\n', stderr);
    
41     exit(1);
    
42 }
    
43 
    44 /* is_alphanum -- return true if the character is a letter, digit, underscore,
    
45         dollar sign, or non-ASCII character.
    
46 */
    
47 
    48 static int is_alphanum(int codeunit) {
    
49     return (
    
50         (codeunit >= 'a' && codeunit <= 'z')
    
51         || (codeunit >= '0' && codeunit <= '9')
    
52         || (codeunit >= 'A' && codeunit <= 'Z')
    
53         || codeunit == '_'
    
54         || codeunit == '$'
    
55         || codeunit == '\\'
    
56         || codeunit > 126
    
57     );
    
58 }
    
59 
    60 
    61 /* get -- return the next character from stdin. Watch out for lookahead. If
    
62         the character is a control character, translate it to a space or
    
63         linefeed.
    
64 */
    
65 
    66 static int get() {
    
67     int codeunit = look_ahead;
    
68     look_ahead = EOF;
    
69     if (codeunit == EOF) {
    
70         codeunit = getc(stdin);
    
71     }
    
72     if (codeunit >= ' ' || codeunit == '\n' || codeunit == EOF) {
    
73         return codeunit;
    
74     }
    
75     if (codeunit == '\r') {
    
76         return '\n';
    
77     }
    
78     return ' ';
    
79 }
    
80 
    81 
    82 /* peek -- get the next character without advancing.
    
83 */
    
84 
    85 static int peek() {
    
86     look_ahead = get();
    
87     return look_ahead;
    
88 }
    
89 
    90 
    91 /* next -- get the next character, excluding comments. peek() is used to see
    
92         if a '/' is followed by a '/' or '*'.
    
93 */
    
94 
    95 static int next() {
    
96     int codeunit = get();
    
97     if  (codeunit == '/') {
    
98         switch (peek()) {
    
99         case '/':
   
100             for (;;) {
   
101                 codeunit = get();
   
102                 if (codeunit <= '\n') {
   
103                     break;
   
104                 }
   
105             }
   
106             break;
   
107         case '*':
   
108             get();
   
109             while (codeunit != ' ') {
   
110                 switch (get()) {
   
111                 case '*':
   
112                     if (peek() == '/') {
   
113                         get();
   
114                         codeunit = ' ';
   
115                     }
   
116                     break;
   
117                 case EOF:
   
118                     error("Unterminated comment.");
   
119                 }
   
120             }
   
121             break;
   
122         }
   
123     }
   
124     the_y = the_x;
   
125     the_x = codeunit;
   
126     return codeunit;
   
127 }
   
128 
   129 
   130 /* action -- do something! What you do is determined by the argument:
   
131         1   Output A. Copy B to A. Get the next B.
   
132         2   Copy B to A. Get the next B. (Delete A).
   
133         3   Get the next B. (Delete B).
   
134    action treats a string as a single character.
   
135    action recognizes a regular expression if it is preceded by the likes of
   
136    '(' or ',' or '='.
   
137 */
   
138 
   139 static void action(int determined) {
   
140     switch (determined) {
   
141     case 1:
   
142         putc(the_a, stdout);
   
143         if (
   
144             (the_y == '\n' || the_y == ' ')
   
145             && (the_a == '+' || the_a == '-' || the_a == '*' || the_a == '/')
   
146             && (the_b == '+' || the_b == '-' || the_b == '*' || the_b == '/')
   
147         ) {
   
148             putc(the_y, stdout);
   
149         }
   
150     case 2:
   
151         the_a = the_b;
   
152         if (the_a == '\'' || the_a == '"' || the_a == '`') {
   
153             for (;;) {
   
154                 putc(the_a, stdout);
   
155                 the_a = get();
   
156                 if (the_a == the_b) {
   
157                     break;
   
158                 }
   
159                 if (the_a == '\\') {
   
160                     putc(the_a, stdout);
   
161                     the_a = get();
   
162                 }
   
163                 if (the_a == EOF) {
   
164                     error("Unterminated string literal.");
   
165                 }
   
166             }
   
167         }
   
168     case 3:
   
169         the_b = next();
   
170         if (the_b == '/' && (
   
171             the_a == '(' || the_a == ',' || the_a == '=' || the_a == ':'
   
172             || the_a == '[' || the_a == '!' || the_a == '&' || the_a == '|'
   
173             || the_a == '?' || the_a == '+' || the_a == '-' || the_a == '~'
   
174             || the_a == '*' || the_a == '/' || the_a == '{' || the_a == '}'
   
175             || the_a == ';'
   
176         )) {
   
177             putc(the_a, stdout);
   
178             if (the_a == '/' || the_a == '*') {
   
179                 putc(' ', stdout);
   
180             }
   
181             putc(the_b, stdout);
   
182             for (;;) {
   
183                 the_a = get();
   
184                 if (the_a == '[') {
   
185                     for (;;) {
   
186                         putc(the_a, stdout);
   
187                         the_a = get();
   
188                         if (the_a == ']') {
   
189                             break;
   
190                         }
   
191                         if (the_a == '\\') {
   
192                             putc(the_a, stdout);
   
193                             the_a = get();
   
194                         }
   
195                         if (the_a == EOF) {
   
196                             error(
   
197                                 "Unterminated set in Regular Expression literal."
   
198                             );
   
199                         }
   
200                     }
   
201                 } else if (the_a == '/') {
   
202                     switch (peek()) {
   
203                     case '/':
   
204                     case '*':
   
205                         error(
   
206                             "Unterminated set in Regular Expression literal."
   
207                         );
   
208                     }
   
209                     break;
   
210                 } else if (the_a =='\\') {
   
211                     putc(the_a, stdout);
   
212                     the_a = get();
   
213                 }
   
214                 if (the_a == EOF) {
   
215                     error("Unterminated Regular Expression literal.");
   
216                 }
   
217                 putc(the_a, stdout);
   
218             }
   
219             the_b = next();
   
220         }
   
221     }
   
222 }
   
223 
   224 
   225 /* jsmin -- Copy the input to the output, deleting the characters which are
   
226         insignificant to JavaScript. Comments will be removed. Tabs will be
   
227         replaced with spaces. Carriage returns will be replaced with linefeeds.
   
228         Most spaces and linefeeds will be removed.
   
229 */
   
230 
   231 static void jsmin() {
   
232     if (peek() == 0xEF) {
   
233         get();
   
234         get();
   
235         get();
   
236     }
   
237     the_a = '\n';
   
238     action(3);
   
239     while (the_a != EOF) {
   
240         switch (the_a) {
   
241         case ' ':
   
242             action(
   
243                 is_alphanum(the_b)
   
244                 ? 1
   
245                 : 2
   
246             );
   
247             break;
   
248         case '\n':
   
249             switch (the_b) {
   
250             case '{':
   
251             case '[':
   
252             case '(':
   
253             case '+':
   
254             case '-':
   
255             case '!':
   
256             case '~':
   
257                 action(1);
   
258                 break;
   
259             case ' ':
   
260                 action(3);
   
261                 break;
   
262             default:
   
263                 action(
   
264                     is_alphanum(the_b)
   
265                     ? 1
   
266                     : 2
   
267                 );
   
268             }
   
269             break;
   
270         default:
   
271             switch (the_b) {
   
272             case ' ':
   
273                 action(
   
274                     is_alphanum(the_a)
   
275                     ? 1
   
276                     : 3
   
277                 );
   
278                 break;
   
279             case '\n':
   
280                 switch (the_a) {
   
281                 case '}':
   
282                 case ']':
   
283                 case ')':
   
284                 case '+':
   
285                 case '-':
   
286                 case '"':
   
287                 case '\'':
   
288                 case '`':
   
289                     action(1);
   
290                     break;
   
291                 default:
   
292                     action(
   
293                         is_alphanum(the_a)
   
294                         ? 1
   
295                         : 3
   
296                     );
   
297                 }
   
298                 break;
   
299             default:
   
300                 action(1);
   
301                 break;
   
302             }
   
303         }
   
304     }
   
305 }
   
306 
   307 
   308 /* main -- Output any command line arguments as comments
   
309         and then minify the input.
   
310 */
   
311 
   312 extern int main(int argc, char* argv[]) {
   
313     int i;
   
314     for (i = 1; i < argc; i += 1) {
   
315         fprintf(stdout, "// %s\n", argv[i]);
   
316     }
   
317     jsmin();
   
318     return 0;
   
319 }