1 ; +----------------------------------------------------------------------------+
     
2 ; | Dave's NASM port of JONESFORTH                                             |
     
3 ; +----------------------------------------------------------------------------+
     
4 ;
     
5 ; This port will have explanitory comments in my own words.
     
6 ;
     
7 ; For the full "JONESFORTH experience", read the original source files which
     
8 ; you should find in this repo at:
     
9 ;
    
10 ;     jonesforth/jonesforth.S
    
11 ;     jonesforth/jonesforth.f
    
12 ; 
    
13 %assign NASMJF_VERSION 48 ; One more than JONESFORTH
    
14 
    15 ; Guide to register use:
    
16 ;     esi - Next Forth word address pointer
    
17 ;     ebp - Return stack pointer ("RSP")
    
18 ;     esp - THE STACK (aka "parameter stack") pointer
    
19 ;
    
20 ; A Forth system is composed of words. Words are like functions: they contain
    
21 ; a series of instructions. Those instructions are actually just a list of
    
22 ; pointers to other words. So it's words all the way down.
    
23 ;
    
24 ; Well, actually, nothing really useful happens until you get to one of the
    
25 ; base words that is written in machine code. These "code words" are the low
    
26 ; level primitives that provide the "bootstrapping" fuctionality for all of
    
27 ; the other words built on top of them.
    
28 ;
    
29 ; Whether a word is a regular word or a code word, it has the same basic
    
30 ; structure, starting with a header:
    
31 ;
    
32 ; Here's one called "FOO":
    
33 ;
    
34 ;        Word Header      (For *all* word types)
    
35 ;    +------------------+
    
36 ; <----0x8C5FCD8        | Pointer to the previous word
    
37 ;    +------------------+
    
38 ;    | 3                | Name length (3 for "FOO") & flags (none)
    
39 ;    +------------------+
    
40 ;    | 'F'              |
    
41 ;    +------------------+
    
42 ;    | 'O'              |
    
43 ;    +------------------+
    
44 ;    | 'O'              |
    
45 ;    +------------------+
    
46 ;
    
47 ; After the header, a code word looks like this:
    
48 ;
    
49 ;          ...            <--- Header ends here, word body begins
    
50 ;        Code Word
    
51 ;    +------------------+
    
52 ;  +---0x80490A3        | Pointer to the machine code that follows!
    
53 ;  | +------------------+
    
54 ;  +-->D4 88 F8 02      | Machine code!
    
55 ;    +------------------+
    
56 ;    | A8 0F 98 C3      | More machine code...
    
57 ;    +------------------+
    
58 ;      ...
    
59 ;    +------------------+
    
60 ;    | NEXT             |
    
61 ;    +------------------+
    
62 ;
    
63 ;
    
64 ; Which seems weird and pointless (why not just start executing the machine
    
65 ; code directly?) until we look at a regular word that is not a code word
    
66 ; made of machine code. A regular word looks like this:
    
67 ;
    
68 ;          ...            <--- Header ends here, word body begins
    
69 ;       Regular Word
    
70 ;    +------------------+
    
71 ; <----0x80490A3        | Pointer to special *interpreter code word*
    
72 ;    +------------------+
    
73 ; <----0x804A2F0        | Address of _another_ word's code word
    
74 ;    +------------------+
    
75 ; <----0x804A2F0        | And another...
    
76 ;    +------------------+
    
77 ;      ...
    
78 ;    +------------------+
    
79 ;    | EXIT             |
    
80 ;    +------------------+
    
81 ;
    
82 ; Regular words use the "EXIT" word, a return stack, and the ebp register to
    
83 ; do the same thing. And here's the fun part: EXIT ends with NEXT!
    
84 ;
    
85 ; What the regular words and "code words" have in common is that they both
    
86 ; start (after the header) with a pointer that points to machine code to be
    
87 ; executed. Also known as the *interpreter code word*. For many regular words,
    
88 ; the interpreter code word will be:
    
89 ;
    
90 ;            DOCOL
    
91 ;
    
92 ; The interpreter code word executes the rest of the current word by
    
93 ; incrementing the instruction pointer (esi) and calling the NEXT macro.
    
94 ;
    
95 ; This is called "indirect threaded code" because of the second level of
    
96 ; pointer indirection.
    
97 ;
    
98 ; It may be helpful to summarize at this point:
    
99 ;
   
100 ;     |==============|=============|============|===========================|
   
101 ;     | Type of word | Starts with | Ends with  | Which uses                |
   
102 ;     |--------------|-------------|------------|---------------------------|
   
103 ;     | Regular word | Ptr to code | EXIT ptr   | esi, main data memory     |
   
104 ;     | Codeword     | Ptr to self | NEXT macro | ebp ("RSP"), return stack |
   
105 ;     |==============|=============|============|===========================|
   
106 ;
   
107 ; Also, let's visualize the layout of a code word and regular word side-by side:
   
108 ;
   
109 ;         Code Word                    Regular Word
   
110 ;    +------------------+          +------------------+
   
111 ;    | Link pointer     |          | Link pointer     |
   
112 ;    +------------------+          +------------------+
   
113 ;    | Name/flags       |          | Name/flags       |
   
114 ;    +------------------+          +------------------+      +------------+
   
115 ;    | Pointer to code  | ---+     | Pointer to DOCOL | ---> | DOCOL      |
   
116 ;    +------------------+    |     +------------------+      +------------+
   
117 ;    | <machine code>   |<---+     | Pointer to word  | <--- | NEXT       |
   
118 ;    +------------------+          +------------------+      +------------+
   
119 ;    | <machine code>   |          | Pointer to EXIT  |   
   
120 ;    +------------------+          +------------------+      +------------+
   
121 ;    | NEXT             |          | Pointer to EXIT  | ---> + EXIT       |
   
122 ;    +------------------+          +------------------+      +------------+
   
123 ;                                                            | NEXT       |
   
124 ;                                                            +------------+
   
125 ;
   
126 ; Without further ado, here's the next macro:
   
127 ;
   
128 ; +----------------------------------------------------------------------------+
   
129 ; | The NEXT Macro                                                             |
   
130 ; +----------------------------------------------------------------------------+
   
131 ; Register esi is the instruction pointer. NEXT puts the pointer it's pointing
   
132 ; to into register eax and advances esi 
   
133 ;
   
134 ;    +------------------+                NEXT:
   
135 ; <----0x8000000        | <-- esi        * eax = 0x8000000
   
136 ;    +------------------+           +--- * esi points to next pointer
   
137 ; <----0x8AAAAAA        | <---------+    * jump to address in eax
   
138 ;    +------------------+
   
139 ;
   
140 ; The only thing that keeps this whole thing moving is the fact that *every*
   
141 ; word ends in NEXT. There is no other mechanism propelling this threaded
   
142 ; interpreter forward.
   
143 %macro NEXT 0
   
144     lodsd     ; NEXT: Load from memory into eax, inc esi to point to next word.
   
145     jmp [eax] ; Jump to whatever code we're now pointing at.
   
146 %endmacro
   
147  
   148 ; That's a lot of stuff pointing at stuff.
   
149  
   150 ; By the way, the thing that makes Forth so hard to understand isn't all the
   
151 ; little details. It's the fact that *none of it makes sense in pieces*. Only
   
152 ; with the entire puzzle together in your head can you comprehend the machine.
   
153 
   154 ; +----------------------------------------------------------------------------+
   
155 ; | Return stack PUSH/POP macros                                               |
   
156 ; +----------------------------------------------------------------------------+
   
157 ; The ebp register will be the return stack pointer ("RSP")
   
158 ; The PUSHRSP and POPRSP macros handle pushing registers onto stack memory.
   
159 ; The return stack is used to 
   
160 ; (NASM macros use placeholders %1, %2, etc. as sequential params to substitute
   
161 ; into the machine code verbatim.)
   
162 %macro PUSHRSP 1
   
163     lea ebp, [ebp-4]   ; "load effective address" of next stack position
   
164     mov [ebp], %1      ; "push" the register value to the address at ebp
   
165 %endmacro
   
166 %macro POPRSP 1
   
167     mov %1, [ebp]
   
168     lea ebp, [ebp+4]
   
169 %endmacro
   
170 
   171 ; +----------------------------------------------------------------------------+
   
172 ; | System Call Numbers                                                        |
   
173 ; +----------------------------------------------------------------------------+
   
174 ; JONESFORTH uses an external include file which you may not have. I'm just
   
175 ; gonna hardcode them here. I can't imagine these changing often.
   
176 ; (I found them in Linux source in file arch/x86/include/asm/unistd_32.h)
   
177 %assign __NR_exit  1
   
178 %assign __NR_open  5
   
179 %assign __NR_close 6
   
180 %assign __NR_read  3
   
181 %assign __NR_write 4
   
182 %assign __NR_creat 8
   
183 %assign __NR_brk   45
   
184 
   185 ; +----------------------------------------------------------------------------+
   
186 ; | Return stack and main memory - initial memory allocations                  |
   
187 ; +----------------------------------------------------------------------------+
   
188 ; The BSS section is for uninitialized storage space. We'll reserve bytes (resb)
   
189 ; and make labels so we can refer to these addresses later. The following are
   
190 ; reserved:
   
191 ;  * buffer - storage for user input
   
192 ;  * return stack - addresses of words so EXIT can return to them
   
193 ;  * emit scratch - just a 4-byte bit of memory to store characters to print
   
194 SECTION .bss
   
195 %define buffer_size 4096
   
196 return_stack: resb 8192
   
197 return_stack_top: resb 4
   
198 buffer:       resb buffer_size
   
199 emit_scratch: resb 4 ; (note: JF had this in .data as .space 1)
   
200 
   201 ; +----------------------------------------------------------------------------+
   
202 ; | A label used as a pointer to the first word that will be executed          |
   
203 ; +----------------------------------------------------------------------------+
   
204 ; We need to be able to indirectly address the first word because that's how
   
205 ; NEXT works - it can only jump to an address _in memory_. So we'll use this
   
206 ; cold_start label pointing to memory containing the address of the first
   
207 ; codeword by putting it in the esi register and then calling NEXT. And yes,
   
208 ; that's right, in standard Forth convention, we start with "QUIT"!
   
209 SECTION .data
   
210 cold_start: dd QUIT  ; we need a way to indirectly address the first word
   
211 
   212 ; +----------------------------------------------------------------------------+
   
213 ; | "LOADJF"                                                                   |
   
214 ; +----------------------------------------------------------------------------+
   
215 ; I load the rest of the interpreter - a Forth source file - upon startup.
   
216 ;
   
217 ; This is a major difference with my port. Search for 'LOADJF' in this file to
   
218 ; see all of the places where I made changes or additions to support this.
   
219 ;
   
220 ; This path is relative by default to make it easy to run 'nasmjf' from
   
221 ; the repo dir. But you can set it to an absolute path to allow running
   
222 ; from any location.
   
223 jfsource:     db "jonesforth/jonesforth.f", 0h ; LOADJF path, null-terminated
   
224 jfsource_end: db 0h                            ; LOADJF null-terminated string
   
225 
   226 
   227 ; +----------------------------------------------------------------------------+
   
228 ; | Program entry point - start the interpreter!                               |
   
229 ; +----------------------------------------------------------------------------+
   
230 ; Now begins the real program. There's some housekeeping to do and almost all
   
231 ; of it is setting up memory and pointers to memory.
   
232 SECTION .text
   
233 global _start
   
234 
   235 _start:
   
236     ; Clear the "direction flag" which means the string instructions
   
237     ; (such as LODSD) work in increment order instead of decrement.
   
238     cld
   
239 
   240     ; Save the current value of the stack pointer to S0. This is the first
   
241     ; variable we've seen that is available in Forth. You can examine and change
   
242     ; the value of this variable in the interpreter!
   
243     mov [var_SZ], esp
   
244 
   245     ; We will use ebp to keep track of the return stack, used by EXIT
   
246     ; to return to the previous word when the current one is finished.
   
247     mov ebp, return_stack_top
   
248 
   249     ; Now allocate main memory for Forth dictionary and data!
   
250     ; First, we get the start address of the "break", which is where
   
251     ; the data segment starts. Then we request a break that is at a new
   
252     ; address N bytes larger. The OS does it and now we've got more
   
253     ; memory available to us!
   
254     ;
   
255     ; Note that brk returns the current break address on failure, so
   
256     ; the first call we make with 0 in ebx is a way of making brk fail
   
257     ; on purpose! Most examples on the web scrupulously avoid explaining
   
258     ; this.
   
259     ;
   
260     ; We store the start, end, and current "position" in this main
   
261     ; memory in variables. HERE is particularly important!
   
262     xor ebx, ebx
   
263     mov eax, __NR_brk         ; syscall brk
   
264     int 0x80
   
265     mov [var_HERE], eax       ; eax has start addr of data segment
   
266     mov [var_CSTART], eax     ; store info: start address of data segment
   
267     add eax, 0x16000          ; add our desired number of bytes to break addr
   
268     mov ebx, eax              ; reserve memory by setting this new break addr
   
269     mov [var_CEND], eax       ; store info: end address of data segment
   
270     mov eax, __NR_brk         ; syscall brk again
   
271     int 0x80
   
272 
   273     ; "LOADJF" Process jonesforth.f upon startup. Open the file.
   
274     ; Then store the file descriptor (fd) so we can make the interpreter
   
275     ; read from the file rather than from STDIN.
   
276     mov ecx, 0                ; LOADJF read only flag for open
   
277     mov ebx, jfsource         ; LOADJF address of string path for open
   
278     mov eax, __NR_open        ; LOADJF open syscall
   
279     int 80h                   ; LOADJF fd now in eax
   
280     cmp eax, 0                ; LOADJF fd < 0 is an error!
   
281     jl .loadjf_open_fail
   
282     mov [read_from_fd], eax   ; LOADJF store fd and tell KEY to read from this
   
283 
   284     ; Now "prime the pump" for the NEXT macro by sticking an indirect
   
285     ; address in esi. NEXT will jump to whatever's stored there.
   
286     ; Housekeeping stuff is over. The interpreter will start running now.
   
287     mov esi, cold_start
   
288     NEXT ; Start Forthing!
   
289 
   290     ; Handle failure of LOADJF!
   
291     ; I could have avoided a lot of code if I just exited when opening
   
292     ; the jonesforth.f file fails. But I thought it was important to make
   
293     ; a proper error message that was as helpful as possible. Because
   
294     ; if it fails, it's going to be _very_ confusing. I'm sure it will
   
295     ; happen to me years from now when I revisit this. And I don't want
   
296     ; to be confused!
   
297 .loadjf_open_fail:             ; LOADJF
   
298     ; For each of these write syscalls:
   
299     ;     ebx = stderr fd
   
300     ;     ecx = start address of string
   
301     ;     edx = length of string
   
302     ; Print first half of error message
   
303     mov ebx, 2                 ; LOADJF
   
304     mov ecx, loadjf_fail_msg   ; LOADJF
   
305     mov edx, (loadjf_fail_msg_end - loadjf_fail_msg)
   
306     mov eax, __NR_write        ; LOADJF
   
307     int 80h                    ; LOADJF
   
308     ; Print jonesforth source path
   
309     mov ebx, 2                 ; LOADJF
   
310     mov ecx, jfsource          ; LOADJF
   
311     mov edx, (jfsource_end - jfsource)
   
312     mov eax, __NR_write        ; LOADJF
   
313     int 80h                    ; LOADJF
   
314     ; Print second half of error message
   
315     mov ebx, 2                 ; LOADJF
   
316     mov ecx, loadjf_fail_msg2  ; LOADJF
   
317     mov edx, (loadjf_fail_msg_end2 - loadjf_fail_msg2)
   
318     mov eax, __NR_write        ; LOADJF
   
319     int 80h                    ; LOADJF
   
320     mov ebx, 1                 ; LOADJF exit code and fall through to exit
   
321 
   322     ; Exit program.
   
323     ; I define this here so the above LOADJF failure can fall through into it.
   
324     ; But it is also called when the user ends input (Ctrl+d) in the normal use
   
325     ; of the interpreter.
   
326 exit_with_grace_and_beauty: ; (don't forget to set ebx to exit code)
   
327     mov eax,__NR_exit       ; syscall: exit
   
328     int 0x80                ; invoke syscall
   
329 
   330 ; +----------------------------------------------------------------------------+
   
331 ; |                                                                            |
   
332 ; |                           Part Two: Words!                                 |
   
333 ; |                                                                            |
   
334 ; +----------------------------------------------------------------------------+
   
335 ; Everything from here on out is Forth bootstrapping itself as a series of word
   
336 ; definitions - first in machine code (written in assembly), then as words
   
337 ; defined as lists of addresses of other words. Lastly, as text in the Forth
   
338 ; language!
   
339 ;
   
340 ; +----------------------------------------------------------------------------+
   
341 ; | Forth DOCOL implementation                                                 |
   
342 ; +----------------------------------------------------------------------------+
   
343 ; This is the "interpreter" word - it is used at the beginning of "normal" Forth
   
344 ; words (composed of other words, not machine code). All DOCOL does is gets the
   
345 ; esi register pointed at the first word address and starts the NEXT macro.
   
346 ; (See my ASCII art boxes at the top of this document. Search for "DOCOL".)
   
347 ;
   
348 ; Note that esi doesn't contain the address of the next word to run. Instead,
   
349 ; it contains the next address that will *point to* the next word to run!
   
350 DOCOL:
   
351     PUSHRSP esi     ; push esi on to the "RSP" return stack
   
352     add eax, 4      ; eax currently points to DOCOL (me!), point to next addr
   
353     mov esi, eax    ; Load the next word pointer into esi
   
354     NEXT
   
355 
   356 ; +----------------------------------------------------------------------------+
   
357 ; | Word header flags                                                          |
   
358 ; +----------------------------------------------------------------------------+
   
359 ; These are bits that can be ANDed together to indicate special properties of
   
360 ; the word: 
   
361 ;  * IMMED - an "immediate" word runs in compile mode
   
362 ;  * HIDDEN - a word is usually hidden while it's being compiled!
   
363 ;  * LENMASK - to save space (!), the word name length is combined with flags
   
364 %assign F_IMMED 0x80
   
365 %assign F_HIDDEN 0x20
   
366 %assign F_LENMASK 0x1f
   
367 
   368 ; Link holds address of last word defined (to make linked list)
   
369 ; (NASM Note: Must be %define rather than %assign or we'll run afoul
   
370 ; assigning the name_label address below.)
   
371 %define link 0           ; null link - beginning of the linked list
   
372 
   373 ; +----------------------------------------------------------------------------+
   
374 ; | DEFWORD and DEFCODE macros                                                 |
   
375 ; +----------------------------------------------------------------------------+
   
376 ; As mentioned in the beginning, there are two kinds of words in Forth: 
   
377 ;
   
378 ;   1. Code words are pure machine language
   
379 ;   2. Regular words are defined as a series of pointers to other words
   
380 ;
   
381 ; Both start with a header: link, name length + flags, name.
   
382 ;
   
383 ; After the header, both start with an address: a pointer to code to be
   
384 ; executed immediatly when we run the word.
   
385 ;
   
386 ; The big difference is that a code word points to its own instructions, while
   
387 ; a regular word points to DOCOL, the "interpreter word", which sets the esi
   
388 ; register and uses NEXT to execute the rest of the word definition.
   
389 ;
   
390 ; Refer again to the ASCII art boxes a the top of this file to see how the two
   
391 ; types of words are laid out in memory.
   
392 ;
   
393 ; The following assembler macros help us create both types of words from
   
394 ; within assembly.
   
395 ;
   
396 ; The two macros are very similar. But notice how DEFWORD begins the body
   
397 ; of the word after the header with the address of DOCOL.
   
398 ;
   
399 ; Define a regular word. Create header from name and flags, then start the
   
400 ; word body with the address of DOCOL.
   
401 %macro DEFWORD 3 ; 1=name 2=label 3=flags
   
402     %strlen namelen %1 ; NASM calculates this for us!
   
403     SECTION .data
   
404     align 4            ; Everything is aligned on 4 byte boundaries.
   
405 
   406     ; Start of the word header
   
407     ; ------------------------
   
408     global name_%2     ; name_<label> for use in assembly
   
409     name_%2:
   
410         dd link                ; link the previous word's addr
   
411         %define link name_%2   ; store *my* link addr for next time
   
412         db %3 + namelen        ; flags + namelen (packed into byte)
   
413         db %1                  ; name string ("FOO")
   
414         align 4
   
415 
   416     ; Start of the word body
   
417     ; ----------------------
   
418      global %2    ; <label> for use in assembly
   
419      %2:
   
420          dd DOCOL ; Pointer to DOCOL code word that will execute the
   
421                   ; word pointer that will follow the use of this macro.
   
422 %endmacro
   
423 
   424 ; Define a code word. Create header from name and flags, then start the
   
425 ; word body with the next address after itself. See comments in DEFWORD
   
426 ; above for explanation of the header portion.
   
427 %macro DEFCODE 3 ; 1=name 2=label 3=flags
   
428     %strlen namelen %1
   
429     SECTION .data
   
430     align 4
   
431 
   432     ; Start of the word header
   
433     ; ------------------------
   
434     global name_%2
   
435     name_%2:
   
436         dd link
   
437         %define link name_%2   ; store this link addr for next time
   
438         db %3 + namelen        ; flags + namelen 
   
439         db %1                  ; name string
   
440         align 4
   
441 
   442     ; Start of the word body
   
443     ; ----------------------
   
444     global %2
   
445     %2:
   
446         dd code_%2 ; The address of the label that follows...
   
447         align 4
   
448 
   449     SECTION .text  ; Assembly intructions (machine code) will follow.
   
450     global code_%2
   
451     code_%2:
   
452         ; Whatever follows the use of this macro is the machine code
   
453         ; definition of the code word. We can execute this word directly
   
454         ; in assembly by jumping to this label. We can "compile it" into
   
455         ; a regular word with the body label (%2). And like all words,
   
456         ; we can execute it in Forth using it's string name.
   
457 %endmacro
   
458 
   459 ; +----------------------------------------------------------------------------+
   
460 ; +----------------------------------------------------------------------------+
   
461 ; What follow are 9 regular words and 130 code words. Only some fraction of the
   
462 ; code words are really be required here. They're just more efficient if they're
   
463 ; implemented in assembly rather than Forth itself.
   
464 
   465 ; +----------------------------------------------------------------------------+
   
466 ; | QUIT: the "outer interpreter"                                              |
   
467 ; +----------------------------------------------------------------------------+
   
468 ; At the top of this file, I describe the shape of word definitions and the
   
469 ; "interpreter word" and the NEXT and EXIT mechanisms. Now we can take a gander
   
470 ; at the outer main loop that really holds all of this together and makes this
   
471 ; Forth an actual interpreter in the sense most of us expect.
   
472 ;
   
473 ; You'll notice that QUIT contains neither a NEXT nor EXIT. This is the outer
   
474 ; loop ; and a NEXT will eventually bring us back here, where an unconditional
   
475 ; loop ; will keep looking for input and executing it.
   
476 ;
   
477 ; (And yes, "QUIT" is a bizarre name for this word.)
   
478 ;
   
479 ; I think this might be a helpful way to think of the nested nature of QUIT
   
480 ; and the two types of words:
   
481 ;
   
482 ; QUIT (INTERPRET)
   
483 ;     * regular word
   
484 ;         DOCOL
   
485 ;         NEXT
   
486 ;         * regular word
   
487 ;             DOCOL (codeword
   
488 ;             NEXT
   
489 ;             * code word
   
490 ;                 <machine code>
   
491 ;             NEXT
   
492 ;             * code word
   
493 ;                 <machine code>
   
494 ;             NEXT
   
495 ;         EXIT
   
496 ;         NEXT
   
497 ;    EXIT
   
498 ;    NEXT
   
499 ; QUIT (BRANCH -8 back to INTERPRET for more)
   
500 ;   
   
501 ; Notice how every code word ends in a NEXT and every regular word ends in
   
502 ; an EXIT, which also has a NEXT to go to back up the call stack.
   
503 ;
   
504 ; And when those words are done, the next address to execute happens to
   
505 ; be the unconditional branch in QUIT that starts the "outer interpreter"
   
506 ; loop all over again.
   
507 ;
   
508 ; Here's the definition of QUIT. Notice how much easier it is to write than
   
509 ; to describe!
   
510 DEFWORD "QUIT",QUIT,0
   
511     dd R0           ; push R0 (addr of top of return stack)
   
512     dd RSPSTORE     ; store R0 in return stack pointer (ebp)
   
513     dd INTERPRET    ; interpret the next word
   
514     dd BRANCH,-8    ; and loop (indefinitely)
   
515 
   516 ; +----------------------------------------------------------------------------+
   
517 ; | EXIT                                                                       |
   
518 ; +----------------------------------------------------------------------------+
   
519 ; And here's EXIT. Look at how tiny this is! This ends every regular word by
   
520 ; popping the "return address" pushed by DOCOL when the word began.
   
521 DEFCODE "EXIT",EXIT,0
   
522     POPRSP esi            ; pop return stack into esi
   
523 NEXT
   
524 
   525 ; +----------------------------------------------------------------------------+
   
526 ; | The Forth Interpreter words                                                |
   
527 ; +----------------------------------------------------------------------------+
   
528 ; The following three words contain some pretty beefy assembly code. They get
   
529 ; input, split it into words, find the word definitions, and execute them:
   
530 ;
   
531 ; KEY        - Buffers input from STDIN (or a file)
   
532 ; WORD       - Calls KEY, gets a whitespace-delimited "word" of text
   
533 ; INTERPRET  - Calls WORD, looks up words in dictionary, attempts to
   
534 ;              handle literal number values, and executes the results.
   
535 ;
   
536 ; Now, here they are in the opposite order:
   
537 ;
   
538 ; +----------------------------------------------------------------------------+
   
539 ; | INTERPRET                                                                  |
   
540 ; +----------------------------------------------------------------------------+
   
541 ; Get's "word" of input (that term is overloaded here) and determines what to
   
542 ; do with it.
   
543 DEFCODE "INTERPRET",INTERPRET,0
   
544     call _WORD              ; Returns %ecx = length, %edi = pointer to word.
   
545 
   546     ; Is it in the dictionary?
   
547     xor eax,eax             ; back from _WORD...zero eax
   
548     mov [interpret_is_lit], eax ; 0 means not a literal number (yet)
   
549     call _FIND              ; Returns %eax = pointer to header or 0 if not found.
   
550     test eax,eax            ; Found?
   
551     jz .try_literal
   
552 
   553     ; In the dictionary.  Is it an IMMEDIATE codeword?
   
554     mov edi,eax             ; edi = dictionary entry YES WE HAVE MATCHED A WORD!!!
   
555     mov al,[edi+4]          ; Get name+flags.
   
556     push ax                 ; Just save it for now.
   
557     call _TCFA              ; Convert dictionary entry (in %edi) to codeword pointer.
   
558     pop ax
   
559     and al,F_IMMED          ; is IMMED flag set?
   
560     mov eax,edi
   
561     jnz .execute            ; If IMMED, jump straight to executing.
   
562     jmp .check_state
   
563 
   564 .try_literal:      ; (1) Not in the dictionary (not a word) so assume it's a literal number.
   
565     inc byte [interpret_is_lit] ; DID NOT MATCH a word, trying literal number
   
566     call _NUMBER            ; Returns the parsed number in %eax, %ecx > 0 if error
   
567     test ecx,ecx
   
568     jnz .parse_error
   
569     mov ebx,eax
   
570     mov eax,LIT             ; The word is now LIT
   
571 
   572 .check_state:      ; (2) Are we compiling or executing?
   
573     mov edx,[var_STATE]
   
574     test edx,edx
   
575     jz .execute             ; Jump if executing.
   
576 
   577     ; Compiling - just append the word to the current dictionary definition.
   
578     call _COMMA
   
579     mov ecx,[interpret_is_lit] ; Was it a literal?
   
580     test ecx,ecx
   
581     jz .go_next             ; nope, done
   
582     mov eax,ebx             ; Yes, so LIT is followed by a number.
   
583     call _COMMA
   
584 .go_next: ; (3)
   
585     NEXT
   
586 
   587 .execute:         ; (4) Executing - run it!
   
588     mov ecx,[interpret_is_lit] ; Literal?
   
589     test ecx,ecx               ; Literal?
   
590     jnz .do_literal
   
591 
   592     ; Not a literal, execute it now.  This never returns, but the codeword will
   
593     ; eventually call NEXT which will reenter the loop in QUIT.
   
594     jmp [eax]
   
595 
   596 .do_literal:      ; (5) Executing a literal, which means push it on the stack.
   
597     push ebx
   
598     NEXT
   
599 
   600 .parse_error:     ; (6) Parse error (not a known word or a number in the current BASE).
   
601     ; Print an error message followed by up to 40 characters of context.
   
602     mov ebx,2               ; 1st param: stderr
   
603     mov ecx,errmsg          ; 2nd param: error message
   
604     mov edx,(errmsgend - errmsg) ; 3rd param: length of string
   
605     mov eax,__NR_write      ; write syscall
   
606     int 80h
   
607 
   608     mov ecx,[currkey]       ; the error occurred just before currkey position
   
609     mov edx,ecx
   
610     sub edx,buffer          ; edx = currkey - buffer (length in buffer before currkey)
   
611     cmp edx,40              ; if >= 40, then print only 40 characters
   
612     jle .print_error
   
613     mov edx,40
   
614 .print_error:     ; (7)
   
615     sub ecx,edx             ; ecx = start of area to print, edx = length
   
616     mov eax,__NR_write      ; write syscall
   
617     int 80h
   
618 
   619     mov ecx,errmsgnl      ; newline
   
620     mov edx,1               ; 1 char long
   
621     mov eax,__NR_write    ; write syscall
   
622     int 80h
   
623 NEXT
   
624 
   625 ; +----------------------------------------------------------------------------+
   
626 ; | WORD                                                                       |
   
627 ; +----------------------------------------------------------------------------+
   
628 ; Return a Forth string: an address and length (unlike C strings, we don't end
   
629 ; with a sentinel NUL.) This should perhaps be called "token".
   
630 DEFCODE "WORD",FWORD,0  ; Note changed nasm reserved keyword WORD to FWORD!
   
631     call _WORD
   
632     push edi                ; push base address
   
633     push ecx                ; push length
   
634 NEXT
   
635 _WORD:
   
636     ; Search for first non-blank character.  Also skip \ comments.
   
637 .skip_non_words:
   
638     call _KEY               ; get next key, returned in %eax
   
639     cmp al,'\'              ; start of a comment?
   
640     je .skip_comment        ; if so, skip the comment
   
641     cmp al,' '              ; compare to ASCII space (0x20)
   
642     jbe .skip_non_words     ; Is space or lower, keep scanning
   
643 
   644         ; now we've reached a word - start storing the chars
   
645     mov edi,word_buffer     ; put addr to word return buffer in edi (used by stosb)
   
646 .collect_word:
   
647     stosb                   ; add character to return buffer (8 bits from al)
   
648     call _KEY               ; get next key, returned in %al
   
649     cmp al,' '              ; compare to ASCII space (0x20)
   
650     ja .collect_word        ; Is higher than space, keep collecting
   
651 
   652         ; return word buffer addr and length...
   
653     sub edi, word_buffer    ; calculate the length of the word
   
654     mov ecx, edi            ; return it
   
655     mov edi, word_buffer    ; return start address of the word
   
656     ret
   
657 .skip_comment: ; skip \ comment to end of current line
   
658     call _KEY
   
659     cmp al,`\n`             ; eol? (escapes okay in backtick strings in nasm)
   
660     jne .skip_comment
   
661     jmp .skip_non_words
   
662 
   663 SECTION .data
   
664 word_buffer:
   
665     times 32 db 0x0 ; 32 bytes of buffer for word names
   
666 SECTION .text
   
667 
   668 ; +----------------------------------------------------------------------------+
   
669 ; | KEY                                                                        |
   
670 ; +----------------------------------------------------------------------------+
   
671 ; This should really be called "char" because it gets a character of input, not
   
672 ; a "key". It's easy to imagine the historical implementation fitting the name.
   
673 DEFCODE "KEY",KEY,0
   
674     call _KEY
   
675     push eax        ; push return value on stack
   
676     NEXT
   
677 _KEY:
   
678     mov ebx, [currkey]
   
679     cmp ebx, [bufftop]
   
680     jge .get_more_input
   
681     xor eax, eax
   
682     mov al, [ebx]               ; get next key from input buffer
   
683 
   684 .continue_with_key:
   
685     inc ebx
   
686     mov [currkey], ebx        ; increment currkey
   
687     ret
   
688 
   689 .get_more_input:  ; Use read(2) to fetch more input
   
690     mov ebx, [read_from_fd] ; LOADJF 1st param: input file (STDIN when getting user input)
   
691     ;xor ebx,ebx             ; 1st param: stdin
   
692     mov ecx,buffer          ; 2nd param: buffer
   
693     mov [currkey],ecx
   
694     mov edx,buffer_size     ; 3rd param: max length
   
695     mov eax,__NR_read       ; syscall: read
   
696     int 0x80                ; syscall!
   
697     test eax,eax            ; If %eax <= 0, then exit.
   
698     jbe .eof 
   
699     add ecx,eax             ; buffer+%eax = bufftop
   
700     mov [bufftop],ecx
   
701     jmp _KEY
   
702 
   703 .eof: ; Error or end of input
   
704     cmp dword [read_from_fd], 0 ; LOADJF If we were reading from STDIN (0)...
   
705     je .eof_stdin               ; LOADJF ...then exit the program normally.
   
706     mov ebx, [read_from_fd]     ; LOADJF Otherwise, close the file.
   
707     mov eax, __NR_close         ; LOADJF
   
708     int 80h
   
709     mov dword [read_from_fd], 0 ; LOADJF Change the read-from fd to STDIN.
   
710     jmp .get_more_input         ; LOADJF And continue reading!
   
711 .eof_stdin: ; Exit peacefully!
   
712     xor ebx,ebx             ; set ebx to exit with no error (0)
   
713     jmp exit_with_grace_and_beauty
   
714 
   715 
   716 ; +----------------------------------------------------------------------------+
   
717 ; | Some Forth primitives                                                      |
   
718 ; +----------------------------------------------------------------------------+
   
719 ; TICK (or single quote: ') gets the address of the word
   
720 ; that matches the next word of input text. Uses the same
   
721 ; lodsd trick as LIT to grab the next word of input without
   
722 ; executing it. Only works while in compile state. (: ... ;)
   
723 ; It's not an immediate word, so it executes at run time,
   
724 ; which is why we end up with the address of the next word
   
725 ; (which was matched at compile time) to put on the stack!
   
726 DEFCODE "'",TICK,0
   
727     lodsd                   ; Moves value at esi to eax, esi++
   
728     push eax                ; Push address on the stack
   
729 NEXT
   
730 
   731 ; BRANCH is the simplest possible way to loop - it always
   
732 ; moves the word pointer by the amount in the next value
   
733 ; pointed to by esi! It's helpful to see how LIT works because
   
734 ; it's a similar premise - the value after BRANCH isn't a
   
735 ; word address, it's the amount to add to esi.
   
736 ; To branch/loop back to a previous instruction, you provide
   
737 ; a negative offset.
   
738 ; esi currently points at the offset number.
   
739 DEFCODE "BRANCH",BRANCH,0
   
740     add esi, [esi]          ; add the offset to the instruction pointer
   
741 NEXT
   
742 
   743 ; 0BRANCH is the same thing, but with a condition: it only
   
744 ; jumps if the top of the stack is zero.
   
745 DEFCODE "0BRANCH",ZBRANCH,0
   
746     pop eax
   
747     test eax, eax           ; top of stack is zero?
   
748     jz code_BRANCH          ; if so, jump back to BRANCH
   
749     lodsd                   ; or skip the offset (esi to eax, esi++)
   
750 NEXT
   
751 
   752 ; Another primitive - this one is used to implement the string
   
753 ; words in Forth (." and S"). I'll just port it for now, then
   
754 ; test it later.
   
755 ; The lodsd "trick" (see also LIT) to load the next 4 bytes of
   
756 ; memory from the address at the current instruction pointer
   
757 ; (esi) into eax and then increment esi to skip over it so
   
758 ; NEXT doesnt try to execute it.
   
759 DEFCODE "LITSTRING",LITSTRING,0
   
760     lodsd                   ; get the length of the string into eax
   
761     push esi                ; push the address of the start of the string
   
762     push eax                ; push it on the stack
   
763     add esi, eax            ; skip past the string
   
764     add esi, 3              ; but round up to next 4 byte boundary
   
765     and esi, ~3
   
766 NEXT
   
767 
   768 ; Same deal here - another primitive. This one uses a Linux syscall
   
769 ; to print a string.
   
770 DEFCODE "TELL",TELL,0
   
771     mov ebx, 1        ; 1st param: stdout
   
772     pop edx        ; 3rd param: length of string
   
773     pop ecx        ; 2nd param: address of string
   
774     mov eax,__NR_write      ; write syscall
   
775     int 80h
   
776 NEXT
   
777 
   778 ; Turn a dictionary pointer into a codeword pointer.
   
779 ; This is where we use the stored length of the word name
   
780 ; to skip to the beginning of the code.
   
781 DEFCODE ">CFA",TCFA,0
   
782     pop edi
   
783     call _TCFA
   
784     push edi
   
785 NEXT
   
786 _TCFA:
   
787     xor eax,eax
   
788     add edi,4               ; Skip link pointer.
   
789     mov al,[edi]            ; Load flags+len into %al.
   
790     inc edi                 ; Skip flags+len byte.
   
791     and al,F_LENMASK        ; Just the length, not the flags.
   
792     add edi,eax             ; Skip the name.
   
793     add edi,3               ; The codeword is 4-byte aligned:
   
794     and edi,~3              ;   Add ...00000011 and mask ...11111100.
   
795     ret                     ;   For more, see log06.txt in this repo.
   
796 
   797 ; Turn a dictionary pointer into a "data" pointer.
   
798 ; Data simply being the word addresses immediately
   
799 ; following the codeword (4 bytes later).
   
800 DEFWORD ">DFA",TDFA,0
   
801     dd TCFA                 ; get codeword address
   
802     dd INCR4                ; advance 4 bytes
   
803 dd EXIT                 ; return from this word
   
804 
   805 ; parse numeric literal from input using BASE as radix
   
806 DEFCODE "NUMBER",NUMBER,0
   
807     pop ecx                 ; length of string
   
808     pop edi                 ; start address of string
   
809     call _NUMBER
   
810     push eax                ; parsed number
   
811     push ecx                ; number of unparsed characters (0 = no error)
   
812 NEXT
   
813 _NUMBER:
   
814     xor eax,eax
   
815     xor ebx,ebx
   
816 
   817     test ecx,ecx            ; trying to parse a zero-length string is an error, but returns 0
   
818     jz .return
   
819 
   820     mov edx, [var_BASE]    ; get BASE (in dl)
   
821 
   822     ; Check if first character is '-'.
   
823     mov bl,[edi]            ; bl = first character in string
   
824     inc edi
   
825     push eax                ; push 0 on stack
   
826     cmp bl,'-'              ; negative number?
   
827     jnz .convert_char
   
828     pop eax
   
829     push ebx                ; push non-0 on stack, indicating negative
   
830     dec ecx
   
831     jnz .next_char
   
832     pop ebx                 ; error: string is only '-'.
   
833     mov ecx,1
   
834     ret
   
835 
   836 .next_char:        ; (1) Loop reading digits.
   
837     imul eax,edx           ; eax *= BASE
   
838     mov bl,[edi]           ; bl = next character in string
   
839     inc edi
   
840 
   841 .convert_char:   ; (2) Convert 0-9, A-Z to a number 0-35.
   
842     sub bl,'0'              ; < '0'?
   
843     jb .negate
   
844     cmp bl,10        ; <= '9'?
   
845     jb .compare_base
   
846     sub bl,17              ; < 'A'? (17 is 'A'-'0')
   
847     jb .negate
   
848     add bl,10
   
849 
   850 .compare_base:   ; (3)
   
851         cmp bl,dl               ; >= BASE?
   
852     jge .negate
   
853 
   854     ; add it to eax and loop.
   
855     add eax,ebx
   
856     dec ecx
   
857     jnz .next_char
   
858 
   859 .negate:       ; (4) Negate the result if first character was '-' (saved on the stack).
   
860     pop ebx
   
861     test ebx,ebx
   
862     jz .return
   
863     neg eax
   
864 
   865 .return: ;(5)
   
866     ret
   
867 
   868 ; esi always points to the next thing. Usually this is
   
869 ; the next word. But in this case, it's the literal value
   
870 ; to push onto the stack.
   
871 DEFCODE "LIT",LIT,0
   
872     lodsd                   ; loads the value at esi into eax, increments esi
   
873     push eax                ; push the literal number on to stack
   
874 NEXT
   
875 
   876 ; Before this, we'll have called _WORD which pushed (returned):
   
877 ;     ecx = length
   
878 ;     edi = start of word (addr)
   
879 DEFCODE "FIND",FIND,0
   
880     pop ecx                 ; length of word
   
881     pop edi                 ; buffer with word
   
882     call _FIND
   
883     push eax                ; push address of dict entry (or null) as return val
   
884 NEXT
   
885 _FIND:
   
886     push esi                ; _FIND! Save esi, we'll use this reg for string comparison
   
887 
   888     ; Now we start searching backwards through the dictionary for this word.
   
889     mov edx,[var_LATEST]    ; LATEST points to name header of the latest word in the dictionary
   
890 .test_word:
   
891     test edx,edx            ; NULL pointer?  (end of the linked list)
   
892     je .not_found
   
893 
   894     ; First compare the length expected and the length of the word.
   
895     ; Note that if the F_HIDDEN flag is set on the word, then by a bit of trickery
   
896     ; this won't pick the word (the length will appear to be wrong).
   
897     xor eax,eax
   
898     mov al, [edx+4]           ; al = flags+length field
   
899     and al,(F_HIDDEN|F_LENMASK) ; al = length, but including hidden bit!
   
900     cmp cl,al        ; Length is the same?
   
901     jne .prev_word          ; nope, try prev
   
902 
   903     ; Compare the strings in detail.
   
904     push ecx                ; Save the length
   
905     push edi                ; Save the address (repe cmpsb will move this pointer)
   
906     lea esi,[edx+5]         ; Dictionary string we are checking against.
   
907     repe cmpsb              ; Compare the strings.
   
908     pop edi
   
909     pop ecx
   
910     jne .prev_word          ; nope, try prev
   
911 
   912     ; The strings are the same - return the header pointer in eax
   
913     pop esi
   
914     mov eax,edx
   
915     ret                     ; Found!
   
916 
   917 .prev_word:
   
918     mov edx,[edx]           ; Move back through the link field to the previous word
   
919     jmp .test_word          ; loop, test prev word
   
920 
   921 .not_found:
   
922     pop esi
   
923     xor eax,eax             ; Return zero to indicate not found (aka null ptr)
   
924     ret
   
925 
   926 ; CREATE makes words! Specifically, the header portion of words.
   
927 DEFCODE "CREATE",CREATE,0
   
928     pop ecx                   ; length of word name
   
929     pop ebx                   ; address of word name
   
930 
   931     ; link pointer
   
932     mov edi, [var_HERE]       ; the address of the header
   
933     mov eax, [var_LATEST]     ; get link pointer
   
934     stosd                     ; and store it in the header.
   
935 
   936     ; Length byte and the word itself.
   
937     mov al, cl                ; Get the length.
   
938     stosb                     ; Store the length/flags byte.
   
939     push esi
   
940     mov esi, ebx              ; esi = word
   
941     rep movsb                 ; Copy the word
   
942     pop esi
   
943     add edi, 3                ; Align to next 4 byte boundary. See TCFA
   
944     and edi, ~3
   
945 
   946     ; Update LATEST and HERE.
   
947     mov eax, [var_HERE]
   
948     mov [var_LATEST], eax
   
949     mov [var_HERE], edi
   
950 NEXT
   
951 
   952 ; COMMA (,) 
   
953 ; This is a super primitive word used to compile words. It puts the
   
954 ; currently-pushed value from the stack to the position pointed to
   
955 ; by HERE and increments HERE to the next 4 bytes.
   
956 DEFCODE ",",COMMA,0
   
957     pop eax                ; Code pointer to store.
   
958     call _COMMA
   
959 NEXT
   
960     _COMMA:
   
961     mov edi, [var_HERE]
   
962     cmp edi, [var_CSTART]
   
963     jl .oops
   
964     cmp edi, [var_CEND]
   
965     jg .oops
   
966     stosd                  ; puts the value in eax at edi, increments edi
   
967     mov [var_HERE], edi
   
968     ret
   
969 .oops:
   
970     nop
   
971 
   972 ; LBRAC and RBRAC ([ and ])
   
973 ; Simply toggle the STATE variable (0=immediate, 1=compile)
   
974 ; So:
   
975 ;       <compile mode> [ <immediate mode> ] <compile mode>
   
976 ;
   
977 ; Note that LBRAC has the immediate flag set because otherwise
   
978 ; it would get compiled rather than switch modes then and there.
   
979 DEFCODE "[",LBRAC,F_IMMED
   
980     xor eax, eax
   
981     mov [var_STATE], eax      ; Set STATE to 0 (immediate)
   
982 NEXT
   
983 
   984 DEFCODE "]",RBRAC,0
   
985     mov [var_STATE], word 1   ; Set STATE to 1 (compile)
   
986 NEXT
   
987 
   988 ; HIDDEN toggles the hidden flag for the dictionary entry
   
989 ; at the address on the stack
   
990 DEFCODE "HIDDEN",HIDDEN,0
   
991     pop edi                 ; Dictionary entry, first byte is link
   
992     add edi, 4              ; Move to name/flags byte.
   
993     xor [edi], word F_HIDDEN  ; Toggle the HIDDEN bit in place.
   
994 NEXT
   
995 
   996 
   997 ; +----------------------------------------------------------------------------+
   
998 ; | COLON and SEMICOLON: The Compiler!                                         |
   
999 ; +----------------------------------------------------------------------------+
  
1000 ; COLON (:) creates the new word header and starts compile mode
  
1001 ; It also sets the new definition to hidden so the word isn't
  
1002 ; discovered while it is being compiled.
  
1003 DEFWORD ":",COLON,0
  
1004     dd FWORD                 ; Get the name of the new word
  
1005     dd CREATE               ; CREATE the dictionary entry / header
  
1006     dd LIT, DOCOL, COMMA    ; Append DOCOL  (the codeword).
  
1007     dd LATEST, FETCH, HIDDEN ; Make the word hidden while it's being compiled.
  
1008     ;dd LATEST, HIDDEN ; Make the word hidden while it's being compiled.
  
1009     dd RBRAC                ; Go into compile mode.
  
1010 dd EXIT                 ; Return from the function.
  
1011 
  1012 ; SEMICOLON (;) is an immediate word (F_IMMED) and it ends compile
  
1013 ; mode and unhides the word entry being compiled.
  
1014 DEFWORD ";",SEMICOLON,F_IMMED
  
1015     dd LIT, EXIT, COMMA     ; Append EXIT (so the word will return).
  
1016     dd LATEST, FETCH, HIDDEN ; Unhide word now that it's been compiled.
  
1017     ;dd LATEST, HIDDEN ; Unhide word now that it's been compiled.
  
1018     dd LBRAC                ; Go back to IMMEDIATE mode.
  
1019 dd EXIT                 ; Return from the function.
  
1020 
  1021 ; EMIT just displays a character of output from the stack.
  
1022 ; It doesnt attempt to be efficient at all (no buffering, etc.)
  
1023 DEFCODE "EMIT",EMIT,0
  
1024     pop eax
  
1025     call _EMIT
  
1026 NEXT
  
1027 _EMIT:
  
1028     mov [emit_scratch], al  ; put char to print at scratch space
  
1029     mov ebx, 1              ; syscall param 1: stdout
  
1030     mov ecx, emit_scratch   ; syscall param 2: address to print
  
1031     mov edx, 1              ; syscall param 3: length in bytes to print
  
1032     mov eax, __NR_write     ; syscall 'write'
  
1033     int 0x80                ; request syscall!
  
1034     ret
  
1035 
  1036     ; DOT (temporary definiion) displays ascii decimal represention
  
1037     ; of numbers. Based on "echoi" proc written as part of asmtutor.com
  
1038     ; The real dot will be written as pure Forth later.
  
1039     DEFCODE ".",DOT,0
  
1040     pop eax
  
1041     call _DOT
  
1042     NEXT
  
1043 _DOT:
  
1044     push esi ; preserve
  
1045     mov ecx, 0 ; counter of digits to print at the end
  
1046 .divideloop:
  
1047     inc ecx
  
1048     mov edx, 0
  
1049     mov esi, 10
  
1050     idiv esi   ; divide eax by this
  
1051     add edx, 48 ; convert remainder to ascii digit
  
1052     push edx   ; push on stack to be echoed later (for correct order)
  
1053                 ; what's clever about pushing the ascii digits onto the
  
1054                 ; stack is that we can use the stack memory as our
  
1055                 ; buffer by using the stack pointer at esp in our
  
1056                 ; syscall to print the digits
  
1057     cmp eax, 0 ; are we done?
  
1058     jnz .divideloop
  
1059     mov esi, ecx ; printing... we use ecx for syscall, so count down with esi
  
1060 .printloop:
  
1061     ; arguably, I should be making use of EMIT...but this is all temporary
  
1062     ; anyway so I'm just going to inline the syscall to print the digits...
  
1063     dec esi
  
1064     mov ebx, 1              ; syscall param 1: stdout
  
1065     mov ecx, esp            ; syscall param 2: address to print
  
1066     mov edx, 1              ; syscall param 3: length in bytes to print
  
1067     mov eax, __NR_write     ; syscall 'write'
  
1068     int 0x80                ; request syscall!
  
1069     pop eax      ; next digit
  
1070     cmp esi, 0   ; are we done?
  
1071     jnz .printloop
  
1072     pop esi      ;restore our word address pointer
  
1073     ret
  
1074 
  1075     ; PRINTWORD
  
1076     ; Super killer debugging word! Prints the name of the word pointed to
  
1077     ; on the stack. Example: LATEST PRINTWORD
  
1078     DEFCODE "PRINTWORD",PRINTWORD,0
  
1079     pop eax
  
1080     call _PRINTWORD
  
1081     NEXT
  
1082 _PRINTWORD:
  
1083     mov edx,eax             ; stack had addr of header of dictionary word
  
1084     xor eax,eax             ; zero out all of eax
  
1085     mov al, [edx+4]         ; al = flags+length field
  
1086     and al, F_LENMASK       ; al = just length of name
  
1087     add edx,5               ; move pointer to name string
  
1088     mov ebx,1               ; 1st param: stdout
  
1089     mov ecx,edx             ; 2nd param: address to print
  
1090     mov edx,eax             ; 3rd param: length of string
  
1091     mov eax,__NR_write      ; write syscall
  
1092     int 80h
  
1093 
  1094 ; +----------------------------------------------------------------------------+
  
1095 ; | Stack manipulation words                                                   |
  
1096 ; +----------------------------------------------------------------------------+
  
1097 
  1098 ; drop top of stack
  
1099 DEFCODE "DROP",DROP,0
  
1100     pop eax
  
1101 NEXT
  
1102 
  1103 ; swap top two elements
  
1104 DEFCODE "SWAP",SWAP,0
  
1105     pop eax
  
1106     pop ebx
  
1107     push eax
  
1108     push ebx
  
1109 NEXT
  
1110 
  1111 ; duplicate element on top of stack
  
1112 DEFCODE "DUP",DUP,0
  
1113     mov eax, [esp]
  
1114     push eax
  
1115 NEXT
  
1116 
  1117 ; duplicate second element of stack to top
  
1118 DEFCODE "OVER",OVER,0
  
1119     mov eax, [esp+4]
  
1120     push eax
  
1121 NEXT
  
1122 
  1123 ; rotate the top three items on stack (ABC -> BCA)
  
1124 DEFCODE "ROT",ROT,0
  
1125     pop eax
  
1126     pop ebx
  
1127     pop ecx
  
1128     push ebx
  
1129     push eax
  
1130     push ecx
  
1131 NEXT
  
1132 
  1133 ; reverse rotate top three items on stack (ABC -> CAB)
  
1134 DEFCODE "-ROT",NROT,0
  
1135     pop eax
  
1136     pop ebx
  
1137     pop ecx
  
1138     push eax
  
1139     push ecx
  
1140     push ebx
  
1141 NEXT
  
1142 
  1143 ; drop top two elements from stack
  
1144 DEFCODE "2DROP",TWODROP,0
  
1145     pop eax
  
1146     pop eax
  
1147 NEXT
  
1148 
  1149 ; duplicate top two elements on stack
  
1150 DEFCODE "2DUP",TWODUP,0
  
1151     mov eax, [esp]
  
1152     mov ebx, [esp + 4]
  
1153     push ebx
  
1154     push eax
  
1155 NEXT
  
1156 
  1157 ; swap top two pairs (ABCD -> CDAB)
  
1158 DEFCODE "2SWAP",TWOSWAP,0
  
1159     pop eax
  
1160     pop ebx
  
1161     pop ecx
  
1162     pop edx
  
1163     push ebx
  
1164     push eax
  
1165     push edx
  
1166     push ecx
  
1167 NEXT
  
1168 
  1169 ; duplicate top element on stack if it's non-zero
  
1170 DEFCODE "?DUP",QDUP,0
  
1171     mov eax, [esp]
  
1172     test eax, eax
  
1173     jz .skip
  
1174     push eax
  
1175 .skip:
  
1176 NEXT
  
1177 
  1178 
  1179 ; +----------------------------------------------------------------------------+
  
1180 ; | Math words                                                                 |
  
1181 ; +----------------------------------------------------------------------------+
  
1182 
  1183 DEFCODE "1+",INCR,0
  
1184     inc dword [esp]       ; increment top of stack
  
1185 NEXT
  
1186 
  1187 DEFCODE "1-",DECR,0
  
1188     dec dword [esp]       ; decrement top of stack
  
1189 NEXT
  
1190 
  1191 DEFCODE "4+",INCR4,0
  
1192     add dword [esp], 4    ; add 4 to top of stack
  
1193 NEXT
  
1194 
  1195 DEFCODE "4-",DECR4,0
  
1196     sub dword [esp], 4   ; subtract 4 from top of stack
  
1197 NEXT
  
1198 
  1199 DEFCODE "+",ADD,0
  
1200     pop eax       ; get top of stack
  
1201     add [esp], eax  ; and add it to next word on stack
  
1202 NEXT
  
1203 
  1204 DEFCODE "-",SUB,0
  
1205     pop eax         ; get top of stack
  
1206     sub [esp], eax  ; and subtract it from next word on stack
  
1207 NEXT
  
1208 
  1209 DEFCODE "*",MUL,0
  
1210     pop eax
  
1211     pop ebx
  
1212     imul eax, ebx
  
1213     push eax        ; ignore overflow
  
1214 NEXT
  
1215 
  1216 ; In JonesFORTH, /MOD is defined in asm. / and MOD will
  
1217 ; be defined later in FORTH. This is because i386 idiv
  
1218 ; gives us both the quotient and remainder.
  
1219 DEFCODE "/MOD",DIVMOD,0
  
1220     xor edx, edx
  
1221     pop ebx
  
1222     pop eax
  
1223     idiv ebx
  
1224     push edx        ; push remainder
  
1225     push eax        ; push quotient
  
1226 NEXT
  
1227 
  1228 ; +----------------------------------------------------------------------------+
  
1229 ; | Comparison/conditional words                                               |
  
1230 ; +----------------------------------------------------------------------------+
  
1231 
  1232 DEFCODE "=",EQU,0      ;  top two values are equal?
  
1233     pop eax
  
1234     pop ebx
  
1235     cmp eax, ebx
  
1236     sete al          ; sete sets operand (al) to 1 if cmp was true
  
1237     movzx eax, al    ; movzx moves the value, then fills in zeros
  
1238     push eax         ; push answer on stack
  
1239 NEXT
  
1240  
  1241 DEFCODE "<>",NEQU,0    ; top two words are not equal?
  
1242     pop eax
  
1243     pop ebx
  
1244     cmp eax, ebx
  
1245     setne al
  
1246     movzx eax, al
  
1247     push eax
  
1248 NEXT
  
1249  
  1250 DEFCODE "<",LT,0
  
1251     pop eax
  
1252     pop ebx
  
1253     cmp ebx, eax
  
1254     setl al
  
1255     movzx eax, al
  
1256     push eax
  
1257 NEXT
  
1258  
  1259 DEFCODE ">",GT,0
  
1260     pop eax
  
1261     pop ebx
  
1262     cmp ebx, eax
  
1263     setg al
  
1264     movzx eax, al
  
1265     push eax
  
1266 NEXT
  
1267  
  1268 DEFCODE "<=",LE,0
  
1269     pop eax
  
1270     pop ebx
  
1271     cmp ebx, eax
  
1272     setle al
  
1273     movzx eax, al
  
1274     push eax
  
1275 NEXT
  
1276  
  1277 DEFCODE ">=",GE,0
  
1278     pop eax
  
1279     pop ebx
  
1280     cmp ebx, eax
  
1281     setge al
  
1282     movzx eax, al
  
1283     push eax
  
1284 NEXT
  
1285  
  1286 DEFCODE "0=",ZEQU,0    ; top of stack equals 0?
  
1287     pop eax
  
1288     test eax,eax
  
1289     setz al
  
1290     movzx eax, al
  
1291     push eax
  
1292 NEXT
  
1293  
  1294 DEFCODE "0<>",ZNEQU,0    ; top of stack not 0?
  
1295     pop eax
  
1296     test eax,eax
  
1297     setnz al
  
1298     movzx eax, al
  
1299     push eax
  
1300 NEXT
  
1301  
  1302 DEFCODE "0<",ZLT,0    ; greater than zero
  
1303     pop eax
  
1304     test eax,eax
  
1305     setl al
  
1306     movzx eax, al
  
1307     push eax
  
1308 NEXT
  
1309  
  1310 DEFCODE "0>",ZGT,0   ; less than zero
  
1311     pop eax
  
1312     test eax,eax
  
1313     setg al
  
1314     movzx eax, al
  
1315     push eax
  
1316 NEXT
  
1317  
  1318 DEFCODE "0<=",ZLE,0
  
1319     pop eax
  
1320     test eax,eax
  
1321     setle al
  
1322     movzx eax,al
  
1323     push eax
  
1324 NEXT
  
1325  
  1326 DEFCODE "0>=",ZGE,0
  
1327     pop eax
  
1328     test eax,eax
  
1329     setge al
  
1330     movzx eax,al
  
1331     push eax
  
1332 NEXT
  
1333 
  1334 ; +----------------------------------------------------------------------------+
  
1335 ; | Bitwise logic words                                                        |
  
1336 ; +----------------------------------------------------------------------------+
  
1337 
  1338 DEFCODE "AND",AND,0
  
1339     pop eax
  
1340     and [esp],eax
  
1341 NEXT
  
1342 
  1343 DEFCODE "OR",OR,0
  
1344     pop eax
  
1345     or [esp],eax
  
1346 NEXT
  
1347 
  1348 DEFCODE "XOR",XOR,0
  
1349     pop eax
  
1350     xor [esp], eax
  
1351 NEXT
  
1352 
  1353 DEFCODE "INVERT",INVERT,0
  
1354     not dword [esp]
  
1355 NEXT
  
1356 
  1357 ; +----------------------------------------------------------------------------+
  
1358 ; | Primitive memory words                                                     |
  
1359 ; +----------------------------------------------------------------------------+
  
1360 
  1361 DEFCODE "!",STORE,0
  
1362     pop ebx           ; address to store at
  
1363     pop eax           ; data to store there
  
1364     mov [ebx], eax
  
1365 NEXT
  
1366 
  1367 DEFCODE "@",FETCH,0
  
1368     pop ebx                 ; address to fetch
  
1369     mov eax, [ebx]          ; fetch it
  
1370     push eax                ; push value onto stack
  
1371 NEXT
  
1372 
  1373 DEFCODE "+!",ADDSTORE,0
  
1374     pop ebx                ; address
  
1375     pop eax                ; the amount to add
  
1376     add [ebx], eax
  
1377 NEXT
  
1378 
  1379 DEFCODE "-!",SUBSTORE,0
  
1380     pop ebx                ; address
  
1381     pop eax                ; the amount to subtract
  
1382     sub [ebx], eax
  
1383 NEXT
  
1384 
  1385 ; Primitive byte-oriented operations are like the above 32-bit
  
1386 ; operations, but work on 8 bits. x86 has instructions for this
  
1387 ; so we can define these.
  
1388 DEFCODE "C!",STOREBYTE,0
  
1389     pop ebx                ; address to store at
  
1390     pop eax                ; data to store there
  
1391     mov [ebx], al
  
1392 NEXT
  
1393 
  1394 DEFCODE "C@",FETCHBYTE,0
  
1395     pop ebx               ; address to fetch
  
1396     xor eax, eax          ; clear the register
  
1397     mov al, [ebx]         ; grab a byte
  
1398     push eax
  
1399 NEXT
  
1400 
  1401 DEFCODE "C@C!",CCOPY,0 ; byte copy
  
1402     mov ebx, [esp+4]      ; source address
  
1403     mov al, [ebx]         ; source byte
  
1404     pop edi               ; destination address
  
1405     stosb                 ; copy to destination
  
1406     push edi              ; increment destination address
  
1407     inc byte [esp+4]      ; increment source address
  
1408 NEXT
  
1409 
  1410 DEFCODE "CMOVE",CMOVE,0 ; copy n bytes
  
1411     mov edx, esi          ; preserve esi
  
1412     pop ecx               ; length
  
1413     pop edi               ; destination address
  
1414     pop esi               ; source address
  
1415     rep movsb             ; copy source to destination
  
1416     mov esi, edx          ; restore esi
  
1417 NEXT
  
1418 
  1419 ; +----------------------------------------------------------------------------+
  
1420 ; | Return stack manipulation words                                            |
  
1421 ; +----------------------------------------------------------------------------+
  
1422 ; ebp is the return stack pointer (RSP)
  
1423 ; In traditional Forth implementations, you're encouraged to put temporary
  
1424 ; values on the return stack (and you'd better not forget to clean up after
  
1425 ; yourself! Can you imagine proposing that to someone today? You'd be burned
  
1426 ; at the stake as a heretic!
  
1427 
  1428 DEFCODE ">R",TOR,0  ; move value from param stack to return stack
  
1429     pop eax
  
1430     PUSHRSP eax
  
1431 NEXT
  
1432 
  1433 DEFCODE "R>",FROMR,0 ; move value from return stack to param stack
  
1434     POPRSP eax
  
1435     push eax
  
1436 NEXT
  
1437 
  1438 DEFCODE "RSP@",RSPFETCH,0 ; get the actual address RSP points to
  
1439     push ebp
  
1440 NEXT
  
1441 
  1442 DEFCODE "RSP!",RSPSTORE,0 ; set the address RSP points to
  
1443     pop ebp
  
1444 NEXT
  
1445 
  1446 DEFCODE "RDROP",RDROP,0 ; move RSP to "pop" value and throw it away
  
1447     add ebp, 4
  
1448 NEXT
  
1449 
  1450 ; +----------------------------------------------------------------------------+
  
1451 ; | Param stack manipulation words                                             |
  
1452 ; +----------------------------------------------------------------------------+
  
1453 ; esp is the param (or "data" or "main") stack pointer (DSP)
  
1454 
  1455 DEFCODE "DSP@",DSPFETCH,0
  
1456     mov eax, esp
  
1457     push eax
  
1458 NEXT
  
1459 
  1460 DEFCODE "DSP!",DSPSTORE,0
  
1461     pop esp
  
1462 NEXT
  
1463 
  1464 ; +----------------------------------------------------------------------------+
  
1465 ; | Misc words needed for interpreter/compiler                                 |
  
1466 ; +----------------------------------------------------------------------------+
  
1467 
  1468 DEFCODE "IMMEDIATE",IMMEDIATE,F_IMMED ; makes latest word immediate
  
1469     mov edi, [var_LATEST]     ; addr of LATEST word.
  
1470     add edi, 4                ; Point to name/flags byte.
  
1471     xor byte [edi], F_IMMED   ; Toggle the IMMED bit.
  
1472 NEXT
  
1473 
  1474 DEFWORD "HIDE",HIDE,0
  
1475     dd FWORD        ; Get the word (after HIDE).
  
1476     dd FIND        ; Look up in the dictionary.
  
1477     dd HIDDEN      ; Set F_HIDDEN flag.
  
1478 dd EXIT        ; Return.
  
1479 
  1480 DEFCODE "CHAR",CHAR,0
  
1481     call _WORD              ; Returns %ecx = length, %edi = pointer to word.
  
1482     xor eax,eax
  
1483     mov al,[edi]            ; Get the first character of the word.
  
1484     push eax                ; Push it onto the stack.
  
1485 NEXT
  
1486 
  1487 DEFCODE "EXECUTE",EXECUTE,0
  
1488     pop eax                ; Get xt into %eax
  
1489     jmp [eax]              ; and jump to it. After xt runs its NEXT will
  
1490                            ; continue executing the current word.
  
1491 
  1492 DEFCODE "SYSCALL3",SYSCALL3,0
  
1493     pop eax                ; System call number (see <asm/unistd.h>)
  
1494     pop ebx                ; First parameter.
  
1495     pop ecx                ; Second parameter
  
1496     pop edx                ; Third parameter
  
1497     int 80h
  
1498     push eax               ; Result (negative for -errno)
  
1499 NEXT
  
1500 
  1501 DEFCODE "SYSCALL2",SYSCALL2,0
  
1502     pop eax                ; System call number (see <asm/unistd.h>)
  
1503     pop ebx                ; First parameter.
  
1504     pop ecx                ; Second parameter
  
1505     int 80h
  
1506     push eax               ; Result (negative for -errno)
  
1507 NEXT
  
1508 
  1509 DEFCODE "SYSCALL1",SYSCALL1,0
  
1510     pop eax                ; System call number (see <asm/unistd.h>)
  
1511     pop ebx                ; First parameter.
  
1512     int 80h
  
1513     push eax               ; Result (negative for -errno)
  
1514 NEXT
  
1515 
  1516 DEFCODE "SYSCALL0",SYSCALL0,0
  
1517     pop eax                ; System call number (see <asm/unistd.h>)
  
1518     int 80h
  
1519     push eax               ; Result (negative for -errno)
  
1520 NEXT
  
1521 
  1522 ; +----------------------------------------------------------------------------+
  
1523 ; | Forth constants                                                            |
  
1524 ; +----------------------------------------------------------------------------+
  
1525 ;
  
1526 ;  VERSION      Is the current version of this FORTH.
  
1527 ;  R0           The address of the top of the return stack.
  
1528 ;  DOCOL        Pointer to DOCOL.
  
1529 ;  F_IMMED      The IMMEDIATE flag's actual value.
  
1530 ;  F_HIDDEN     The HIDDEN flag's actual value.
  
1531 ;  F_LENMASK    The length mask in the flags/len byte.
  
1532 ;  SYS_*        and the numeric codes of various Linux syscalls
  
1533 ;
  
1534 ; Check it out! A const is just a word that pushes a value!
  
1535 %macro DEFCONST 4 ; 1=name 2=label 3=flags 4=value
  
1536     DEFCODE %1,%2,%3
  
1537     push %4
  
1538     NEXT
  
1539 %endmacro
  
1540 
  1541 DEFCONST "VERSION",VERSION,0,NASMJF_VERSION
  
1542 DEFCONST "R0",R0,0,return_stack_top
  
1543 DEFCONST "DOCOL",__DOCOL,0,DOCOL
  
1544 DEFCONST "F_IMMED",__F_IMMED,0,F_IMMED
  
1545 DEFCONST "F_HIDDEN",__F_HIDDEN,0,F_HIDDEN
  
1546 DEFCONST "F_LENMASK",__F_LENMASK,0,F_LENMASK
  
1547 
  1548 DEFCONST "SYS_EXIT",SYS_EXIT,0,__NR_exit
  
1549 DEFCONST "SYS_OPEN",SYS_OPEN,0,__NR_open
  
1550 DEFCONST "SYS_CLOSE",SYS_CLOSE,0,__NR_close
  
1551 DEFCONST "SYS_READ",SYS_READ,0,__NR_read
  
1552 DEFCONST "SYS_WRITE",SYS_WRITE,0,__NR_write
  
1553 DEFCONST "SYS_CREAT",SYS_CREAT,0,__NR_creat
  
1554 DEFCONST "SYS_BRK",SYS_BRK,0,__NR_brk
  
1555 
  1556 DEFCONST "O_RDONLY",__O_RDONLY,0,0
  
1557 DEFCONST "O_WRONLY",__O_WRONLY,0,1
  
1558 DEFCONST "O_RDWR",__O_RDWR,0,2
  
1559 DEFCONST "O_CREAT",__O_CREAT,0,0100
  
1560 DEFCONST "O_EXCL",__O_EXCL,0,0200
  
1561 DEFCONST "O_TRUNC",__O_TRUNC,0,01000
  
1562 DEFCONST "O_APPEND",__O_APPEND,0,02000
  
1563 DEFCONST "O_NONBLOCK",__O_NONBLOCK,0,04000
  
1564 
  1565 ; +----------------------------------------------------------------------------+
  
1566 ; | Forth built-in variables                                                   |
  
1567 ; +----------------------------------------------------------------------------+
  
1568 ;
  
1569 ;   STATE   Is the interpreter executing code (0) or compiling a word (non-zero)?
  
1570 ;   LATEST  Points to the latest (most recently defined) word in the dictionary.
  
1571 ;   HERE    Points to the next free byte of memory.  When compiling, compiled words go here.
  
1572 ;   S0      Stores the address of the top of the parameter stack.
  
1573 ;   BASE    The current base for printing and reading numbers.
  
1574 ;  
  
1575 ; A variable is a word that leaves its *address* on the stack. Use '@' and '!' to
  
1576 ; read or write a *value* at that address.
  
1577 %macro DEFVAR 4 ; 1=name 2=label 3=flags 4=value
  
1578         DEFCODE %1,%2,%3
  
1579         push dword var_%2
  
1580     NEXT
  
1581     section .data
  
1582         align 4
  
1583     var_%2:   ; Give it an asm label. Example: var_SZ for 'S0'
  
1584         dd %4 ; note dd to reserve a "double" (4b)
  
1585 %endmacro
  
1586 
  1587 DEFVAR "STATE",STATE,0,0
  
1588 DEFVAR "HERE",HERE,0,0
  
1589 DEFVAR "S0",SZ,0,0
  
1590 DEFVAR "BASE",BASE,0,10
  
1591 DEFVAR "CSTART",CSTART,0,0
  
1592 DEFVAR "CEND",CEND,0,0
  
1593 DEFVAR "READFROM",READFROM,0,read_from_fd ; LOADJF - make available to Forth???
  
1594 DEFVAR "LATEST",LATEST,0,name_LATEST      ; points to last word defined...which will just
  
1595                                           ; happen to be self. We'll see if this works.
  
1596 
  1597 ; +----------------------------------------------------------------------------+
  
1598 ; | Data section - reserve memory for interpreter use                          |
  
1599 ; +----------------------------------------------------------------------------+
  
1600 ;
  
1601 ; db - "define byte(s)"
  
1602 ; dd - "define double"   (4 bytes)
  
1603 ;
  
1604 SECTION    .data
  
1605     align 4
  
1606 currkey: dd 0          ; Current place in input buffer (next character to read).
  
1607 bufftop: dd 0          ; Last valid data in input buffer + 1.
  
1608 interpret_is_lit: dd 0 ; 1 means "reading a literal"
  
1609 read_from_fd: dd 0     ; 0=STDIN, etc.
  
1610 errmsg: db "PARSE ERROR: "
  
1611 errmsgend:
  
1612 errmsgnl: db `\n`
  
1613 loadjf_fail_msg: db "ERROR Could not open '" ; LOADJF
  
1614 loadjf_fail_msg_end:                         ; LOADJF
  
1615 loadjf_fail_msg2: db "'."                    ; LOADJF
  
1616 db `\n`                                      ; LOADJF
  
1617 loadjf_fail_msg_end2:                        ; LOADJF