1 /*	A sometimes minimal FORTH compiler and tutorial for Linux / i386 systems. -*- asm -*-
     
2 	By Richard W.M. Jones <rich@annexia.org> http://annexia.org/forth
     
3 	This is PUBLIC DOMAIN (see public domain release statement below).
     
4 	$Id: jonesforth.S,v 1.47 2009-09-11 08:33:13 rich Exp $
     
5 
     6 	gcc -m32 -nostdlib -static -Wl,-Ttext,0 -Wl,--build-id=none -o jonesforth jonesforth.S
     
7 */
     
8 	.set JONES_VERSION,47
     
9 /*
    
10 	INTRODUCTION ----------------------------------------------------------------------
    
11 
    12 	FORTH is one of those alien languages which most working programmers regard in the same
    
13 	way as Haskell, LISP, and so on.  Something so strange that they'd rather any thoughts
    
14 	of it just go away so they can get on with writing this paying code.  But that's wrong
    
15 	and if you care at all about programming then you should at least understand all these
    
16 	languages, even if you will never use them.
    
17 
    18 	LISP is the ultimate high-level language, and features from LISP are being added every
    
19 	decade to the more common languages.  But FORTH is in some ways the ultimate in low level
    
20 	programming.  Out of the box it lacks features like dynamic memory management and even
    
21 	strings.  In fact, at its primitive level it lacks even basic concepts like IF-statements
    
22 	and loops.
    
23 
    24 	Why then would you want to learn FORTH?  There are several very good reasons.  First
    
25 	and foremost, FORTH is minimal.  You really can write a complete FORTH in, say, 2000
    
26 	lines of code.  I don't just mean a FORTH program, I mean a complete FORTH operating
    
27 	system, environment and language.  You could boot such a FORTH on a bare PC and it would
    
28 	come up with a prompt where you could start doing useful work.  The FORTH you have here
    
29 	isn't minimal and uses a Linux process as its 'base PC' (both for the purposes of making
    
30 	it a good tutorial). It's possible to completely understand the system.  Who can say they
    
31 	completely understand how Linux works, or gcc?
    
32 
    33 	Secondly FORTH has a peculiar bootstrapping property.  By that I mean that after writing
    
34 	a little bit of assembly to talk to the hardware and implement a few primitives, all the
    
35 	rest of the language and compiler is written in FORTH itself.  Remember I said before
    
36 	that FORTH lacked IF-statements and loops?  Well of course it doesn't really because
    
37 	such a lanuage would be useless, but my point was rather that IF-statements and loops are
    
38 	written in FORTH itself.
    
39 
    40 	Now of course this is common in other languages as well, and in those languages we call
    
41 	them 'libraries'.  For example in C, 'printf' is a library function written in C.  But
    
42 	in FORTH this goes way beyond mere libraries.  Can you imagine writing C's 'if' in C?
    
43 	And that brings me to my third reason: If you can write 'if' in FORTH, then why restrict
    
44 	yourself to the usual if/while/for/switch constructs?  You want a construct that iterates
    
45 	over every other element in a list of numbers?  You can add it to the language.  What
    
46 	about an operator which pulls in variables directly from a configuration file and makes
    
47 	them available as FORTH variables?  Or how about adding Makefile-like dependencies to
    
48 	the language?  No problem in FORTH.  How about modifying the FORTH compiler to allow
    
49 	complex inlining strategies -- simple.  This concept isn't common in programming languages,
    
50 	but it has a name (in fact two names): "macros" (by which I mean LISP-style macros, not
    
51 	the lame C preprocessor) and "domain specific languages" (DSLs).
    
52 
    53 	This tutorial isn't about learning FORTH as the language.  I'll point you to some references
    
54 	you should read if you're not familiar with using FORTH.  This tutorial is about how to
    
55 	write FORTH.  In fact, until you understand how FORTH is written, you'll have only a very
    
56 	superficial understanding of how to use it.
    
57 
    58 	So if you're not familiar with FORTH or want to refresh your memory here are some online
    
59 	references to read:
    
60 
    61 	http://en.wikipedia.org/wiki/Forth_%28programming_language%29
    
62 
    63 	http://galileo.phys.virginia.edu/classes/551.jvn.fall01/primer.htm
    
64 
    65 	http://wiki.laptop.org/go/Forth_Lessons
    
66 
    67 	http://www.albany.net/~hello/simple.htm
    
68 
    69 	Here is another "Why FORTH?" essay: http://www.jwdt.com/~paysan/why-forth.html
    
70 
    71 	Discussion and criticism of this FORTH here: http://lambda-the-ultimate.org/node/2452
    
72 
    73 	ACKNOWLEDGEMENTS ----------------------------------------------------------------------
    
74 
    75 	This code draws heavily on the design of LINA FORTH (http://home.hccnet.nl/a.w.m.van.der.horst/lina.html)
    
76 	by Albert van der Horst.  Any similarities in the code are probably not accidental.
    
77 
    78 	Some parts of this FORTH are also based on this IOCCC entry from 1992:
    
79 	http://ftp.funet.fi/pub/doc/IOCCC/1992/buzzard.2.design.
    
80 	I was very proud when Sean Barrett, the original author of the IOCCC entry, commented in the LtU thread
    
81 	http://lambda-the-ultimate.org/node/2452#comment-36818 about this FORTH.
    
82 
    83 	And finally I'd like to acknowledge the (possibly forgotten?) authors of ARTIC FORTH because their
    
84 	original program which I still have on original cassette tape kept nagging away at me all these years.
    
85 	http://en.wikipedia.org/wiki/Artic_Software
    
86 
    87 	PUBLIC DOMAIN ----------------------------------------------------------------------
    
88 
    89 	I, the copyright holder of this work, hereby release it into the public domain. This applies worldwide.
    
90 
    91 	In case this is not legally possible, I grant any entity the right to use this work for any purpose,
    
92 	without any conditions, unless such conditions are required by law.
    
93 
    94 	SETTING UP ----------------------------------------------------------------------
    
95 
    96 	Let's get a few housekeeping things out of the way.  Firstly because I need to draw lots of
    
97 	ASCII-art diagrams to explain concepts, the best way to look at this is using a window which
    
98 	uses a fixed width font and is at least this wide:
    
99 
   100  <------------------------------------------------------------------------------------------------------------------------>
   
101 
   102 	Secondly make sure TABS are set to 8 characters.  The following should be a vertical
   
103 	line.  If not, sort out your tabs.
   
104 
   105 		|
   
106 	        |
   
107 	    	|
   
108 
   109 	Thirdly I assume that your screen is at least 50 characters high.
   
110 
   111 	ASSEMBLING ----------------------------------------------------------------------
   
112 
   113 	If you want to actually run this FORTH, rather than just read it, you will need Linux on an
   
114 	i386.  Linux because instead of programming directly to the hardware on a bare PC which I
   
115 	could have done, I went for a simpler tutorial by assuming that the 'hardware' is a Linux
   
116 	process with a few basic system calls (read, write and exit and that's about all).  i386
   
117 	is needed because I had to write the assembly for a processor, and i386 is by far the most
   
118 	common.  (Of course when I say 'i386', any 32- or 64-bit x86 processor will do.  I'm compiling
   
119 	this on a 64 bit AMD Opteron).
   
120 
   121 	Again, to assemble this you will need gcc and gas (the GNU assembler).  The commands to
   
122 	assemble and run the code (save this file as 'jonesforth.S') are:
   
123 
   124 	gcc -m32 -nostdlib -static -Wl,-Ttext,0 -Wl,--build-id=none -o jonesforth jonesforth.S
   
125 	cat jonesforth.f - | ./jonesforth
   
126 
   127 	If you want to run your own FORTH programs you can do:
   
128 
   129 	cat jonesforth.f myprog.f | ./jonesforth
   
130 
   131 	If you want to load your own FORTH code and then continue reading user commands, you can do:
   
132 
   133 	cat jonesforth.f myfunctions.f - | ./jonesforth
   
134 
   135 	ASSEMBLER ----------------------------------------------------------------------
   
136 
   137 	(You can just skip to the next section -- you don't need to be able to read assembler to
   
138 	follow this tutorial).
   
139 
   140 	However if you do want to read the assembly code here are a few notes about gas (the GNU assembler):
   
141 
   142 	(1) Register names are prefixed with '%', so %eax is the 32 bit i386 accumulator.  The registers
   
143 	    available on i386 are: %eax, %ebx, %ecx, %edx, %esi, %edi, %ebp and %esp, and most of them
   
144 	    have special purposes.
   
145 
   146 	(2) Add, mov, etc. take arguments in the form SRC,DEST.  So mov %eax,%ecx moves %eax -> %ecx
   
147 
   148 	(3) Constants are prefixed with '$', and you mustn't forget it!  If you forget it then it
   
149 	    causes a read from memory instead, so:
   
150 	    mov $2,%eax		moves number 2 into %eax
   
151 	    mov 2,%eax		reads the 32 bit word from address 2 into %eax (ie. most likely a mistake)
   
152 
   153 	(4) gas has a funky syntax for local labels, where '1f' (etc.) means label '1:' "forwards"
   
154 	    and '1b' (etc.) means label '1:' "backwards".  Notice that these labels might be mistaken
   
155 	    for hex numbers (eg. you might confuse 1b with $0x1b).
   
156 
   157 	(5) 'ja' is "jump if above", 'jb' for "jump if below", 'je' "jump if equal" etc.
   
158 
   159 	(6) gas has a reasonably nice .macro syntax, and I use them a lot to make the code shorter and
   
160 	    less repetitive.
   
161 
   162 	For more help reading the assembler, do "info gas" at the Linux prompt.
   
163 
   164 	Now the tutorial starts in earnest.
   
165 
   166 	THE DICTIONARY ----------------------------------------------------------------------
   
167 
   168 	In FORTH as you will know, functions are called "words", and just as in other languages they
   
169 	have a name and a definition.  Here are two FORTH words:
   
170 
   171 	: DOUBLE DUP + ;		\ name is "DOUBLE", definition is "DUP +"
   
172 	: QUADRUPLE DOUBLE DOUBLE ;	\ name is "QUADRUPLE", definition is "DOUBLE DOUBLE"
   
173 
   174 	Words, both built-in ones and ones which the programmer defines later, are stored in a dictionary
   
175 	which is just a linked list of dictionary entries.
   
176 
   177 	<--- DICTIONARY ENTRY (HEADER) ----------------------->
   
178 	+------------------------+--------+---------- - - - - +----------- - - - -
   
179 	| LINK POINTER           | LENGTH/| NAME	      | DEFINITION
   
180 	|			 | FLAGS  |     	      |
   
181 	+--- (4 bytes) ----------+- byte -+- n bytes  - - - - +----------- - - - -
   
182 
   183 	I'll come to the definition of the word later.  For now just look at the header.  The first
   
184 	4 bytes are the link pointer.  This points back to the previous word in the dictionary, or, for
   
185 	the first word in the dictionary it is just a NULL pointer.  Then comes a length/flags byte.
   
186 	The length of the word can be up to 31 characters (5 bits used) and the top three bits are used
   
187 	for various flags which I'll come to later.  This is followed by the name itself, and in this
   
188 	implementation the name is rounded up to a multiple of 4 bytes by padding it with zero bytes.
   
189 	That's just to ensure that the definition starts on a 32 bit boundary.
   
190 
   191 	A FORTH variable called LATEST contains a pointer to the most recently defined word, in
   
192 	other words, the head of this linked list.
   
193 
   194 	DOUBLE and QUADRUPLE might look like this:
   
195 
   196 	  pointer to previous word
   
197 	   ^
   
198 	   |
   
199 	+--|------+---+---+---+---+---+---+---+---+------------- - - - -
   
200 	| LINK    | 6 | D | O | U | B | L | E | 0 | (definition ...)
   
201 	+---------+---+---+---+---+---+---+---+---+------------- - - - -
   
202            ^       len                         padding
   
203 	   |
   
204 	+--|------+---+---+---+---+---+---+---+---+---+---+---+---+------------- - - - -
   
205 	| LINK    | 9 | Q | U | A | D | R | U | P | L | E | 0 | 0 | (definition ...)
   
206 	+---------+---+---+---+---+---+---+---+---+---+---+---+---+------------- - - - -
   
207            ^       len                                     padding
   
208            |
   
209            |
   
210 	  LATEST
   
211 
   212 	You should be able to see from this how you might implement functions to find a word in
   
213 	the dictionary (just walk along the dictionary entries starting at LATEST and matching
   
214 	the names until you either find a match or hit the NULL pointer at the end of the dictionary);
   
215 	and add a word to the dictionary (create a new definition, set its LINK to LATEST, and set
   
216 	LATEST to point to the new word).  We'll see precisely these functions implemented in
   
217 	assembly code later on.
   
218 
   219 	One interesting consequence of using a linked list is that you can redefine words, and
   
220 	a newer definition of a word overrides an older one.  This is an important concept in
   
221 	FORTH because it means that any word (even "built-in" or "standard" words) can be
   
222 	overridden with a new definition, either to enhance it, to make it faster or even to
   
223 	disable it.  However because of the way that FORTH words get compiled, which you'll
   
224 	understand below, words defined using the old definition of a word continue to use
   
225 	the old definition.  Only words defined after the new definition use the new definition.
   
226 
   227 	DIRECT THREADED CODE ----------------------------------------------------------------------
   
228 
   229 	Now we'll get to the really crucial bit in understanding FORTH, so go and get a cup of tea
   
230 	or coffee and settle down.  It's fair to say that if you don't understand this section, then you
   
231 	won't "get" how FORTH works, and that would be a failure on my part for not explaining it well.
   
232 	So if after reading this section a few times you don't understand it, please email me
   
233 	(rich@annexia.org).
   
234 
   235 	Let's talk first about what "threaded code" means.  Imagine a peculiar version of C where
   
236 	you are only allowed to call functions without arguments.  (Don't worry for now that such a
   
237 	language would be completely useless!)  So in our peculiar C, code would look like this:
   
238 
   239 	f ()
   
240 	{
   
241 	  a ();
   
242 	  b ();
   
243 	  c ();
   
244 	}
   
245 
   246 	and so on.  How would a function, say 'f' above, be compiled by a standard C compiler?
   
247 	Probably into assembly code like this.  On the right hand side I've written the actual
   
248 	i386 machine code.
   
249 
   250 	f:
   
251 	  CALL a			E8 08 00 00 00
   
252 	  CALL b			E8 1C 00 00 00
   
253 	  CALL c			E8 2C 00 00 00
   
254 	  ; ignore the return from the function for now
   
255 
   256 	"E8" is the x86 machine code to "CALL" a function.  In the first 20 years of computing
   
257 	memory was hideously expensive and we might have worried about the wasted space being used
   
258 	by the repeated "E8" bytes.  We can save 20% in code size (and therefore, in expensive memory)
   
259 	by compressing this into just:
   
260 
   261 	08 00 00 00		Just the function addresses, without
   
262 	1C 00 00 00		the CALL prefix.
   
263 	2C 00 00 00
   
264 
   265 	On a 16-bit machine like the ones which originally ran FORTH the savings are even greater - 33%.
   
266 
   267 	[Historical note: If the execution model that FORTH uses looks strange from the following
   
268 	paragraphs, then it was motivated entirely by the need to save memory on early computers.
   
269 	This code compression isn't so important now when our machines have more memory in their L1
   
270 	caches than those early computers had in total, but the execution model still has some
   
271 	useful properties].
   
272 
   273 	Of course this code won't run directly on the CPU any more.  Instead we need to write an
   
274 	interpreter which takes each set of bytes and calls it.
   
275 
   276 	On an i386 machine it turns out that we can write this interpreter rather easily, in just
   
277 	two assembly instructions which turn into just 3 bytes of machine code.  Let's store the
   
278 	pointer to the next word to execute in the %esi register:
   
279 
   280 		08 00 00 00	<- We're executing this one now.  %esi is the _next_ one to execute.
   
281 	%esi -> 1C 00 00 00
   
282 		2C 00 00 00
   
283 
   284 	The all-important i386 instruction is called LODSL (or in Intel manuals, LODSW).  It does
   
285 	two things.  Firstly it reads the memory at %esi into the accumulator (%eax).  Secondly it
   
286 	increments %esi by 4 bytes.  So after LODSL, the situation now looks like this:
   
287 
   288 		08 00 00 00	<- We're still executing this one
   
289 		1C 00 00 00	<- %eax now contains this address (0x0000001C)
   
290 	%esi -> 2C 00 00 00
   
291 
   292 	Now we just need to jump to the address in %eax.  This is again just a single x86 instruction
   
293 	written JMP *(%eax).  And after doing the jump, the situation looks like:
   
294 
   295 		08 00 00 00
   
296 		1C 00 00 00	<- Now we're executing this subroutine.
   
297 	%esi -> 2C 00 00 00
   
298 
   299 	To make this work, each subroutine is followed by the two instructions 'LODSL; JMP *(%eax)'
   
300 	which literally make the jump to the next subroutine.
   
301 
   302 	And that brings us to our first piece of actual code!  Well, it's a macro.
   
303 */
   
304 
   305 /* NEXT macro. */
   
306 	.macro NEXT
   
307 	lodsl
   
308 	jmp *(%eax)
   
309 	.endm
   
310 
   311 /*	The macro is called NEXT.  That's a FORTH-ism.  It expands to those two instructions.
   
312 
   313 	Every FORTH primitive that we write has to be ended by NEXT.  Think of it kind of like
   
314 	a return.
   
315 
   316 	The above describes what is known as direct threaded code.
   
317 
   318 	To sum up: We compress our function calls down to a list of addresses and use a somewhat
   
319 	magical macro to act as a "jump to next function in the list".  We also use one register (%esi)
   
320 	to act as a kind of instruction pointer, pointing to the next function in the list.
   
321 
   322 	I'll just give you a hint of what is to come by saying that a FORTH definition such as:
   
323 
   324 	: QUADRUPLE DOUBLE DOUBLE ;
   
325 
   326 	actually compiles (almost, not precisely but we'll see why in a moment) to a list of
   
327 	function addresses for DOUBLE, DOUBLE and a special function called EXIT to finish off.
   
328 
   329 	At this point, REALLY EAGLE-EYED ASSEMBLY EXPERTS are saying "JONES, YOU'VE MADE A MISTAKE!".
   
330 
   331 	I lied about JMP *(%eax).  
   
332 
   333 	INDIRECT THREADED CODE ----------------------------------------------------------------------
   
334 
   335 	It turns out that direct threaded code is interesting but only if you want to just execute
   
336 	a list of functions written in assembly language.  So QUADRUPLE would work only if DOUBLE
   
337 	was an assembly language function.  In the direct threaded code, QUADRUPLE would look like:
   
338 
   339 		+------------------+
   
340 		| addr of DOUBLE  --------------------> (assembly code to do the double)
   
341 		+------------------+                    NEXT
   
342 	%esi ->	| addr of DOUBLE   |
   
343 		+------------------+
   
344 
   345 	We can add an extra indirection to allow us to run both words written in assembly language
   
346 	(primitives written for speed) and words written in FORTH themselves as lists of addresses.
   
347 
   348 	The extra indirection is the reason for the brackets in JMP *(%eax).
   
349 
   350 	Let's have a look at how QUADRUPLE and DOUBLE really look in FORTH:
   
351 
   352 	        : QUADRUPLE DOUBLE DOUBLE ;
   
353 
   354 		+------------------+
   
355 		| codeword         |		   : DOUBLE DUP + ;
   
356 		+------------------+
   
357 		| addr of DOUBLE  ---------------> +------------------+
   
358 		+------------------+               | codeword         |
   
359 		| addr of DOUBLE   |		   +------------------+
   
360 		+------------------+	   	   | addr of DUP   --------------> +------------------+
   
361 		| addr of EXIT	   |		   +------------------+            | codeword      -------+
   
362 		+------------------+	   %esi -> | addr of +     --------+	   +------------------+   |
   
363 						   +------------------+	   |	   | assembly to    <-----+
   
364 						   | addr of EXIT     |    |       | implement DUP    |
   
365 						   +------------------+	   |	   |	..	      |
   
366 									   |	   |    ..            |
   
367 									   |	   | NEXT             |
   
368 									   |	   +------------------+
   
369 									   |
   
370 									   +-----> +------------------+
   
371 										   | codeword      -------+
   
372 										   +------------------+   |
   
373 										   | assembly to   <------+
   
374 										   | implement +      |
   
375 										   | 	..            |
   
376 										   | 	..            |
   
377 										   | NEXT      	      |
   
378 										   +------------------+
   
379 
   380 	This is the part where you may need an extra cup of tea/coffee/favourite caffeinated
   
381 	beverage.  What has changed is that I've added an extra pointer to the beginning of
   
382 	the definitions.  In FORTH this is sometimes called the "codeword".  The codeword is
   
383 	a pointer to the interpreter to run the function.  For primitives written in
   
384 	assembly language, the "interpreter" just points to the actual assembly code itself.
   
385 	They don't need interpreting, they just run.
   
386 
   387 	In words written in FORTH (like QUADRUPLE and DOUBLE), the codeword points to an interpreter
   
388 	function.
   
389 
   390 	I'll show you the interpreter function shortly, but let's recall our indirect
   
391 	JMP *(%eax) with the "extra" brackets.  Take the case where we're executing DOUBLE
   
392 	as shown, and DUP has been called.  Note that %esi is pointing to the address of +
   
393 
   394 	The assembly code for DUP eventually does a NEXT.  That:
   
395 
   396 	(1) reads the address of + into %eax		%eax points to the codeword of +
   
397 	(2) increments %esi by 4
   
398 	(3) jumps to the indirect %eax			jumps to the address in the codeword of +,
   
399 							ie. the assembly code to implement +
   
400 
   401 		+------------------+
   
402 		| codeword         |
   
403 		+------------------+
   
404 		| addr of DOUBLE  ---------------> +------------------+
   
405 		+------------------+               | codeword         |
   
406 		| addr of DOUBLE   |		   +------------------+
   
407 		+------------------+	   	   | addr of DUP   --------------> +------------------+
   
408 		| addr of EXIT	   |		   +------------------+            | codeword      -------+
   
409 		+------------------+	   	   | addr of +     --------+	   +------------------+   |
   
410 						   +------------------+	   |	   | assembly to    <-----+
   
411 					   %esi -> | addr of EXIT     |    |       | implement DUP    |
   
412 						   +------------------+	   |	   |	..	      |
   
413 									   |	   |    ..            |
   
414 									   |	   | NEXT             |
   
415 									   |	   +------------------+
   
416 									   |
   
417 									   +-----> +------------------+
   
418 										   | codeword      -------+
   
419 										   +------------------+   |
   
420 									now we're  | assembly to    <-----+
   
421 									executing  | implement +      |
   
422 									this	   | 	..            |
   
423 									function   | 	..            |
   
424 										   | NEXT      	      |
   
425 										   +------------------+
   
426 
   427 	So I hope that I've convinced you that NEXT does roughly what you'd expect.  This is
   
428 	indirect threaded code.
   
429 
   430 	I've glossed over four things.  I wonder if you can guess without reading on what they are?
   
431 
   432 	.
   
433 	.
   
434 	.
   
435 
   436 	My list of four things are: (1) What does "EXIT" do?  (2) which is related to (1) is how do
   
437 	you call into a function, ie. how does %esi start off pointing at part of QUADRUPLE, but
   
438 	then point at part of DOUBLE.  (3) What goes in the codeword for the words which are written
   
439 	in FORTH?  (4) How do you compile a function which does anything except call other functions
   
440 	ie. a function which contains a number like : DOUBLE 2 * ; ?
   
441 
   442 	THE INTERPRETER AND RETURN STACK ------------------------------------------------------------
   
443 
   444 	Going at these in no particular order, let's talk about issues (3) and (2), the interpreter
   
445 	and the return stack.
   
446 
   447 	Words which are defined in FORTH need a codeword which points to a little bit of code to
   
448 	give them a "helping hand" in life.  They don't need much, but they do need what is known
   
449 	as an "interpreter", although it doesn't really "interpret" in the same way that, say,
   
450 	Java bytecode used to be interpreted (ie. slowly).  This interpreter just sets up a few
   
451 	machine registers so that the word can then execute at full speed using the indirect
   
452 	threaded model above.
   
453 
   454 	One of the things that needs to happen when QUADRUPLE calls DOUBLE is that we save the old
   
455 	%esi ("instruction pointer") and create a new one pointing to the first word in DOUBLE.
   
456 	Because we will need to restore the old %esi at the end of DOUBLE (this is, after all, like
   
457 	a function call), we will need a stack to store these "return addresses" (old values of %esi).
   
458 
   459 	As you will have seen in the background documentation, FORTH has two stacks, an ordinary
   
460 	stack for parameters, and a return stack which is a bit more mysterious.  But our return
   
461 	stack is just the stack I talked about in the previous paragraph, used to save %esi when
   
462 	calling from a FORTH word into another FORTH word.
   
463 
   464 	In this FORTH, we are using the normal stack pointer (%esp) for the parameter stack.
   
465 	We will use the i386's "other" stack pointer (%ebp, usually called the "frame pointer")
   
466 	for our return stack.
   
467 
   468 	I've got two macros which just wrap up the details of using %ebp for the return stack.
   
469 	You use them as for example "PUSHRSP %eax" (push %eax on the return stack) or "POPRSP %ebx"
   
470 	(pop top of return stack into %ebx).
   
471 */
   
472 
   473 /* Macros to deal with the return stack. */
   
474 	.macro PUSHRSP reg
   
475 	lea -4(%ebp),%ebp	// push reg on to return stack
   
476 	movl \reg,(%ebp)
   
477 	.endm
   
478 
   479 	.macro POPRSP reg
   
480 	mov (%ebp),\reg		// pop top of return stack to reg
   
481 	lea 4(%ebp),%ebp
   
482 	.endm
   
483 
   484 /*
   
485 	And with that we can now talk about the interpreter.
   
486 
   487 	In FORTH the interpreter function is often called DOCOL (I think it means "DO COLON" because
   
488 	all FORTH definitions start with a colon, as in : DOUBLE DUP + ;
   
489 
   490 	The "interpreter" (it's not really "interpreting") just needs to push the old %esi on the
   
491 	stack and set %esi to the first word in the definition.  Remember that we jumped to the
   
492 	function using JMP *(%eax)?  Well a consequence of that is that conveniently %eax contains
   
493 	the address of this codeword, so just by adding 4 to it we get the address of the first
   
494 	data word.  Finally after setting up %esi, it just does NEXT which causes that first word
   
495 	to run.
   
496 */
   
497 
   498 /* DOCOL - the interpreter! */
   
499 	.text
   
500 	.align 4
   
501 DOCOL:
   
502 	PUSHRSP %esi		// push %esi on to the return stack
   
503 	addl $4,%eax		// %eax points to codeword, so make
   
504 	movl %eax,%esi		// %esi point to first data word
   
505 	NEXT
   
506 
   507 /*
   
508 	Just to make this absolutely clear, let's see how DOCOL works when jumping from QUADRUPLE
   
509 	into DOUBLE:
   
510 
   511 		QUADRUPLE:
   
512 		+------------------+
   
513 		| codeword         |
   
514 		+------------------+		   DOUBLE:
   
515 		| addr of DOUBLE  ---------------> +------------------+
   
516 		+------------------+       %eax -> | addr of DOCOL    |
   
517 	%esi ->	| addr of DOUBLE   |		   +------------------+
   
518 		+------------------+	   	   | addr of DUP      |
   
519 		| addr of EXIT	   |		   +------------------+
   
520 		+------------------+               | etc.             |
   
521 
   522 	First, the call to DOUBLE calls DOCOL (the codeword of DOUBLE).  DOCOL does this:  It
   
523 	pushes the old %esi on the return stack.  %eax points to the codeword of DOUBLE, so we
   
524 	just add 4 on to it to get our new %esi:
   
525 
   526 		QUADRUPLE:
   
527 		+------------------+
   
528 		| codeword         |
   
529 		+------------------+		   DOUBLE:
   
530 		| addr of DOUBLE  ---------------> +------------------+
   
531 top of return	+------------------+       %eax -> | addr of DOCOL    |
   
532 stack points ->	| addr of DOUBLE   |	   + 4 =   +------------------+
   
533 		+------------------+	   %esi -> | addr of DUP      |
   
534 		| addr of EXIT	   |		   +------------------+
   
535 		+------------------+               | etc.             |
   
536 
   537 	Then we do NEXT, and because of the magic of threaded code that increments %esi again
   
538 	and calls DUP.
   
539 
   540 	Well, it seems to work.
   
541 
   542 	One minor point here.  Because DOCOL is the first bit of assembly actually to be defined
   
543 	in this file (the others were just macros), and because I usually compile this code with the
   
544 	text segment starting at address 0, DOCOL has address 0.  So if you are disassembling the
   
545 	code and see a word with a codeword of 0, you will immediately know that the word is
   
546 	written in FORTH (it's not an assembler primitive) and so uses DOCOL as the interpreter.
   
547 
   548 	STARTING UP ----------------------------------------------------------------------
   
549 
   550 	Now let's get down to nuts and bolts.  When we start the program we need to set up
   
551 	a few things like the return stack.  But as soon as we can, we want to jump into FORTH
   
552 	code (albeit much of the "early" FORTH code will still need to be written as
   
553 	assembly language primitives).
   
554 
   555 	This is what the set up code does.  Does a tiny bit of house-keeping, sets up the
   
556 	separate return stack (NB: Linux gives us the ordinary parameter stack already), then
   
557 	immediately jumps to a FORTH word called QUIT.  Despite its name, QUIT doesn't quit
   
558 	anything.  It resets some internal state and starts reading and interpreting commands.
   
559 	(The reason it is called QUIT is because you can call QUIT from your own FORTH code
   
560 	to "quit" your program and go back to interpreting).
   
561 */
   
562 
   563 /* Assembler entry point. */
   
564 	.text
   
565 	.globl _start
   
566 _start:
   
567 	cld
   
568 	mov %esp,var_S0		// Save the initial data stack pointer in FORTH variable S0.
   
569 	mov $return_stack_top,%ebp // Initialise the return stack.
   
570 	call set_up_data_segment
   
571 
   572 	mov $cold_start,%esi	// Initialise interpreter.
   
573 	NEXT			// Run interpreter!
   
574 
   575 	.section .rodata
   
576 cold_start:			// High-level code without a codeword.
   
577 	.int QUIT
   
578 
   579 /*
   
580 	BUILT-IN WORDS ----------------------------------------------------------------------
   
581 
   582 	Remember our dictionary entries (headers)?  Let's bring those together with the codeword
   
583 	and data words to see how : DOUBLE DUP + ; really looks in memory.
   
584 
   585 	  pointer to previous word
   
586 	   ^
   
587 	   |
   
588 	+--|------+---+---+---+---+---+---+---+---+------------+------------+------------+------------+
   
589 	| LINK    | 6 | D | O | U | B | L | E | 0 | DOCOL      | DUP        | +          | EXIT       |
   
590 	+---------+---+---+---+---+---+---+---+---+------------+--|---------+------------+------------+
   
591            ^       len                         pad  codeword      |
   
592 	   |							  V
   
593 	  LINK in next word				points to codeword of DUP
   
594 	
   595 	Initially we can't just write ": DOUBLE DUP + ;" (ie. that literal string) here because we
   
596 	don't yet have anything to read the string, break it up at spaces, parse each word, etc. etc.
   
597 	So instead we will have to define built-in words using the GNU assembler data constructors
   
598 	(like .int, .byte, .string, .ascii and so on -- look them up in the gas info page if you are
   
599 	unsure of them).
   
600 
   601 	The long way would be:
   
602 
   603 	.int <link to previous word>
   
604 	.byte 6			// len
   
605 	.ascii "DOUBLE"		// string
   
606 	.byte 0			// padding
   
607 DOUBLE: .int DOCOL		// codeword
   
608 	.int DUP		// pointer to codeword of DUP
   
609 	.int PLUS		// pointer to codeword of +
   
610 	.int EXIT		// pointer to codeword of EXIT
   
611 
   612 	That's going to get quite tedious rather quickly, so here I define an assembler macro
   
613 	so that I can just write:
   
614 
   615 	defword "DOUBLE",6,,DOUBLE
   
616 	.int DUP,PLUS,EXIT
   
617 
   618 	and I'll get exactly the same effect.
   
619 
   620 	Don't worry too much about the exact implementation details of this macro - it's complicated!
   
621 */
   
622 
   623 /* Flags - these are discussed later. */
   
624 	.set F_IMMED,0x80
   
625 	.set F_HIDDEN,0x20
   
626 	.set F_LENMASK,0x1f	// length mask
   
627 
   628 	// Store the chain of links.
   
629 	.set link,0
   
630 
   631 	.macro defword name, namelen, flags=0, label
   
632 	.section .rodata
   
633 	.align 4
   
634 	.globl name_\label
   
635 name_\label :
   
636 	.int link		// link
   
637 	.set link,name_\label
   
638 	.byte \flags+\namelen	// flags + length byte
   
639 	.ascii "\name"		// the name
   
640 	.align 4		// padding to next 4 byte boundary
   
641 	.globl \label
   
642 \label :
   
643 	.int DOCOL		// codeword - the interpreter
   
644 	// list of word pointers follow
   
645 	.endm
   
646 
   647 /*
   
648 	Similarly I want a way to write words written in assembly language.  There will quite a few
   
649 	of these to start with because, well, everything has to start in assembly before there's
   
650 	enough "infrastructure" to be able to start writing FORTH words, but also I want to define
   
651 	some common FORTH words in assembly language for speed, even though I could write them in FORTH.
   
652 
   653 	This is what DUP looks like in memory:
   
654 
   655 	  pointer to previous word
   
656 	   ^
   
657 	   |
   
658 	+--|------+---+---+---+---+------------+
   
659 	| LINK    | 3 | D | U | P | code_DUP ---------------------> points to the assembly
   
660 	+---------+---+---+---+---+------------+		    code used to write DUP,
   
661            ^       len              codeword			    which ends with NEXT.
   
662 	   |
   
663 	  LINK in next word
   
664 
   665 	Again, for brevity in writing the header I'm going to write an assembler macro called defcode.
   
666 	As with defword above, don't worry about the complicated details of the macro.
   
667 */
   
668 
   669 	.macro defcode name, namelen, flags=0, label
   
670 	.section .rodata
   
671 	.align 4
   
672 	.globl name_\label
   
673 name_\label :
   
674 	.int link		// link
   
675 	.set link,name_\label
   
676 	.byte \flags+\namelen	// flags + length byte
   
677 	.ascii "\name"		// the name
   
678 	.align 4		// padding to next 4 byte boundary
   
679 	.globl \label
   
680 \label :
   
681 	.int code_\label	// codeword
   
682 	.text
   
683 	//.align 4
   
684 	.globl code_\label
   
685 code_\label :			// assembler code follows
   
686 	.endm
   
687 
   688 /*
   
689 	Now some easy FORTH primitives.  These are written in assembly for speed.  If you understand
   
690 	i386 assembly language then it is worth reading these.  However if you don't understand assembly
   
691 	you can skip the details.
   
692 */
   
693 
   694 	defcode "DROP",4,,DROP
   
695 	pop %eax		// drop top of stack
   
696 	NEXT
   
697 
   698 	defcode "SWAP",4,,SWAP
   
699 	pop %eax		// swap top two elements on stack
   
700 	pop %ebx
   
701 	push %eax
   
702 	push %ebx
   
703 	NEXT
   
704 
   705 	defcode "DUP",3,,DUP
   
706 	mov (%esp),%eax		// duplicate top of stack
   
707 	push %eax
   
708 	NEXT
   
709 
   710 	defcode "OVER",4,,OVER
   
711 	mov 4(%esp),%eax	// get the second element of stack
   
712 	push %eax		// and push it on top
   
713 	NEXT
   
714 
   715 	defcode "ROT",3,,ROT
   
716 	pop %eax
   
717 	pop %ebx
   
718 	pop %ecx
   
719 	push %ebx
   
720 	push %eax
   
721 	push %ecx
   
722 	NEXT
   
723 
   724 	defcode "-ROT",4,,NROT
   
725 	pop %eax
   
726 	pop %ebx
   
727 	pop %ecx
   
728 	push %eax
   
729 	push %ecx
   
730 	push %ebx
   
731 	NEXT
   
732 
   733 	defcode "2DROP",5,,TWODROP // drop top two elements of stack
   
734 	pop %eax
   
735 	pop %eax
   
736 	NEXT
   
737 
   738 	defcode "2DUP",4,,TWODUP // duplicate top two elements of stack
   
739 	mov (%esp),%eax
   
740 	mov 4(%esp),%ebx
   
741 	push %ebx
   
742 	push %eax
   
743 	NEXT
   
744 
   745 	defcode "2SWAP",5,,TWOSWAP // swap top two pairs of elements of stack
   
746 	pop %eax
   
747 	pop %ebx
   
748 	pop %ecx
   
749 	pop %edx
   
750 	push %ebx
   
751 	push %eax
   
752 	push %edx
   
753 	push %ecx
   
754 	NEXT
   
755 
   756 	defcode "?DUP",4,,QDUP	// duplicate top of stack if non-zero
   
757 	movl (%esp),%eax
   
758 	test %eax,%eax
   
759 	jz 1f
   
760 	push %eax
   
761 1:	NEXT
   
762 
   763 
   764 	defcode "1+",2,,INCR
   
765 	incl (%esp)		// increment top of stack
   
766 	NEXT
   
767 
   768 	defcode "1-",2,,DECR
   
769 	decl (%esp)		// decrement top of stack
   
770 	NEXT
   
771 
   772 	defcode "4+",2,,INCR4
   
773 	addl $4,(%esp)		// add 4 to top of stack
   
774 	NEXT
   
775 
   776 	defcode "4-",2,,DECR4
   
777 	subl $4,(%esp)		// subtract 4 from top of stack
   
778 	NEXT
   
779 
   780 	defcode "+",1,,ADD
   
781 	pop %eax		// get top of stack
   
782 	addl %eax,(%esp)	// and add it to next word on stack
   
783 	NEXT
   
784 
   785 	defcode "-",1,,SUB
   
786 	pop %eax		// get top of stack
   
787 	subl %eax,(%esp)	// and subtract it from next word on stack
   
788 	NEXT
   
789 
   790 	defcode "*",1,,MUL
   
791 	pop %eax
   
792 	pop %ebx
   
793 	imull %ebx,%eax
   
794 	push %eax		// ignore overflow
   
795 	NEXT
   
796 
   797 /*
   
798 	In this FORTH, only /MOD is primitive.  Later we will define the / and MOD words in
   
799 	terms of the primitive /MOD.  The design of the i386 assembly instruction idiv which
   
800 	leaves both quotient and remainder makes this the obvious choice.
   
801 */
   
802 
   803 	defcode "/MOD",4,,DIVMOD
   
804 	xor %edx,%edx
   
805 	pop %ebx
   
806 	pop %eax
   
807 	idivl %ebx
   
808 	push %edx		// push remainder
   
809 	push %eax		// push quotient
   
810 	NEXT
   
811 
   812 /*
   
813 	Lots of comparison operations like =, <, >, etc..
   
814 
   815 	ANS FORTH says that the comparison words should return all (binary) 1's for
   
816 	TRUE and all 0's for FALSE.  However this is a bit of a strange convention
   
817 	so this FORTH breaks it and returns the more normal (for C programmers ...)
   
818 	1 meaning TRUE and 0 meaning FALSE.
   
819 */
   
820 
   821 	defcode "=",1,,EQU	// top two words are equal?
   
822 	pop %eax
   
823 	pop %ebx
   
824 	cmp %ebx,%eax
   
825 	sete %al
   
826 	movzbl %al,%eax
   
827 	pushl %eax
   
828 	NEXT
   
829 
   830 	defcode "<>",2,,NEQU	// top two words are not equal?
   
831 	pop %eax
   
832 	pop %ebx
   
833 	cmp %ebx,%eax
   
834 	setne %al
   
835 	movzbl %al,%eax
   
836 	pushl %eax
   
837 	NEXT
   
838 
   839 	defcode "<",1,,LT
   
840 	pop %eax
   
841 	pop %ebx
   
842 	cmp %eax,%ebx
   
843 	setl %al
   
844 	movzbl %al,%eax
   
845 	pushl %eax
   
846 	NEXT
   
847 
   848 	defcode ">",1,,GT
   
849 	pop %eax
   
850 	pop %ebx
   
851 	cmp %eax,%ebx
   
852 	setg %al
   
853 	movzbl %al,%eax
   
854 	pushl %eax
   
855 	NEXT
   
856 
   857 	defcode "<=",2,,LE
   
858 	pop %eax
   
859 	pop %ebx
   
860 	cmp %eax,%ebx
   
861 	setle %al
   
862 	movzbl %al,%eax
   
863 	pushl %eax
   
864 	NEXT
   
865 
   866 	defcode ">=",2,,GE
   
867 	pop %eax
   
868 	pop %ebx
   
869 	cmp %eax,%ebx
   
870 	setge %al
   
871 	movzbl %al,%eax
   
872 	pushl %eax
   
873 	NEXT
   
874 
   875 	defcode "0=",2,,ZEQU	// top of stack equals 0?
   
876 	pop %eax
   
877 	test %eax,%eax
   
878 	setz %al
   
879 	movzbl %al,%eax
   
880 	pushl %eax
   
881 	NEXT
   
882 
   883 	defcode "0<>",3,,ZNEQU	// top of stack not 0?
   
884 	pop %eax
   
885 	test %eax,%eax
   
886 	setnz %al
   
887 	movzbl %al,%eax
   
888 	pushl %eax
   
889 	NEXT
   
890 
   891 	defcode "0<",2,,ZLT	// comparisons with 0
   
892 	pop %eax
   
893 	test %eax,%eax
   
894 	setl %al
   
895 	movzbl %al,%eax
   
896 	pushl %eax
   
897 	NEXT
   
898 
   899 	defcode "0>",2,,ZGT
   
900 	pop %eax
   
901 	test %eax,%eax
   
902 	setg %al
   
903 	movzbl %al,%eax
   
904 	pushl %eax
   
905 	NEXT
   
906 
   907 	defcode "0<=",3,,ZLE
   
908 	pop %eax
   
909 	test %eax,%eax
   
910 	setle %al
   
911 	movzbl %al,%eax
   
912 	pushl %eax
   
913 	NEXT
   
914 
   915 	defcode "0>=",3,,ZGE
   
916 	pop %eax
   
917 	test %eax,%eax
   
918 	setge %al
   
919 	movzbl %al,%eax
   
920 	pushl %eax
   
921 	NEXT
   
922 
   923 	defcode "AND",3,,AND	// bitwise AND
   
924 	pop %eax
   
925 	andl %eax,(%esp)
   
926 	NEXT
   
927 
   928 	defcode "OR",2,,OR	// bitwise OR
   
929 	pop %eax
   
930 	orl %eax,(%esp)
   
931 	NEXT
   
932 
   933 	defcode "XOR",3,,XOR	// bitwise XOR
   
934 	pop %eax
   
935 	xorl %eax,(%esp)
   
936 	NEXT
   
937 
   938 	defcode "INVERT",6,,INVERT // this is the FORTH bitwise "NOT" function (cf. NEGATE and NOT)
   
939 	notl (%esp)
   
940 	NEXT
   
941 
   942 /*
   
943 	RETURNING FROM FORTH WORDS ----------------------------------------------------------------------
   
944 
   945 	Time to talk about what happens when we EXIT a function.  In this diagram QUADRUPLE has called
   
946 	DOUBLE, and DOUBLE is about to exit (look at where %esi is pointing):
   
947 
   948 		QUADRUPLE
   
949 		+------------------+
   
950 		| codeword         |
   
951 		+------------------+		   DOUBLE
   
952 		| addr of DOUBLE  ---------------> +------------------+
   
953 		+------------------+               | codeword         |
   
954 		| addr of DOUBLE   |		   +------------------+
   
955 		+------------------+	   	   | addr of DUP      |
   
956 		| addr of EXIT	   |		   +------------------+
   
957 		+------------------+	   	   | addr of +        |
   
958 						   +------------------+
   
959 					   %esi -> | addr of EXIT     |
   
960 						   +------------------+
   
961 
   962 	What happens when the + function does NEXT?  Well, the following code is executed.
   
963 */
   
964 
   965 	defcode "EXIT",4,,EXIT
   
966 	POPRSP %esi		// pop return stack into %esi
   
967 	NEXT
   
968 
   969 /*
   
970 	EXIT gets the old %esi which we saved from before on the return stack, and puts it in %esi.
   
971 	So after this (but just before NEXT) we get:
   
972 
   973 		QUADRUPLE
   
974 		+------------------+
   
975 		| codeword         |
   
976 		+------------------+		   DOUBLE
   
977 		| addr of DOUBLE  ---------------> +------------------+
   
978 		+------------------+               | codeword         |
   
979 	%esi ->	| addr of DOUBLE   |		   +------------------+
   
980 		+------------------+	   	   | addr of DUP      |
   
981 		| addr of EXIT	   |		   +------------------+
   
982 		+------------------+	   	   | addr of +        |
   
983 						   +------------------+
   
984 						   | addr of EXIT     |
   
985 						   +------------------+
   
986 
   987 	And NEXT just completes the job by, well, in this case just by calling DOUBLE again :-)
   
988 
   989 	LITERALS ----------------------------------------------------------------------
   
990 
   991 	The final point I "glossed over" before was how to deal with functions that do anything
   
992 	apart from calling other functions.  For example, suppose that DOUBLE was defined like this:
   
993 
   994 	: DOUBLE 2 * ;
   
995 
   996 	It does the same thing, but how do we compile it since it contains the literal 2?  One way
   
997 	would be to have a function called "2" (which you'd have to write in assembler), but you'd need
   
998 	a function for every single literal that you wanted to use.
   
999 
  1000 	FORTH solves this by compiling the function using a special word called LIT:
  
1001 
  1002 	+---------------------------+-------+-------+-------+-------+-------+
  
1003 	| (usual header of DOUBLE)  | DOCOL | LIT   | 2     | *     | EXIT  |
  
1004 	+---------------------------+-------+-------+-------+-------+-------+
  
1005 
  1006 	LIT is executed in the normal way, but what it does next is definitely not normal.  It
  
1007 	looks at %esi (which now points to the number 2), grabs it, pushes it on the stack, then
  
1008 	manipulates %esi in order to skip the number as if it had never been there.
  
1009 
  1010 	What's neat is that the whole grab/manipulate can be done using a single byte single
  
1011 	i386 instruction, our old friend LODSL.  Rather than me drawing more ASCII-art diagrams,
  
1012 	see if you can find out how LIT works:
  
1013 */
  
1014 
  1015 	defcode "LIT",3,,LIT
  
1016 	// %esi points to the next command, but in this case it points to the next
  
1017 	// literal 32 bit integer.  Get that literal into %eax and increment %esi.
  
1018 	// On x86, it's a convenient single byte instruction!  (cf. NEXT macro)
  
1019 	lodsl
  
1020 	push %eax		// push the literal number on to stack
  
1021 	NEXT
  
1022 
  1023 /*
  
1024 	MEMORY ----------------------------------------------------------------------
  
1025 
  1026 	As important point about FORTH is that it gives you direct access to the lowest levels
  
1027 	of the machine.  Manipulating memory directly is done frequently in FORTH, and these are
  
1028 	the primitive words for doing it.
  
1029 */
  
1030 
  1031 	defcode "!",1,,STORE
  
1032 	pop %ebx		// address to store at
  
1033 	pop %eax		// data to store there
  
1034 	mov %eax,(%ebx)		// store it
  
1035 	NEXT
  
1036 
  1037 	defcode "@",1,,FETCH
  
1038 	pop %ebx		// address to fetch
  
1039 	mov (%ebx),%eax		// fetch it
  
1040 	push %eax		// push value onto stack
  
1041 	NEXT
  
1042 
  1043 	defcode "+!",2,,ADDSTORE
  
1044 	pop %ebx		// address
  
1045 	pop %eax		// the amount to add
  
1046 	addl %eax,(%ebx)	// add it
  
1047 	NEXT
  
1048 
  1049 	defcode "-!",2,,SUBSTORE
  
1050 	pop %ebx		// address
  
1051 	pop %eax		// the amount to subtract
  
1052 	subl %eax,(%ebx)	// add it
  
1053 	NEXT
  
1054 
  1055 /*
  
1056 	! and @ (STORE and FETCH) store 32-bit words.  It's also useful to be able to read and write bytes
  
1057 	so we also define standard words C@ and C!.
  
1058 
  1059 	Byte-oriented operations only work on architectures which permit them (i386 is one of those).
  
1060  */
  
1061 
  1062 	defcode "C!",2,,STOREBYTE
  
1063 	pop %ebx		// address to store at
  
1064 	pop %eax		// data to store there
  
1065 	movb %al,(%ebx)		// store it
  
1066 	NEXT
  
1067 
  1068 	defcode "C@",2,,FETCHBYTE
  
1069 	pop %ebx		// address to fetch
  
1070 	xor %eax,%eax
  
1071 	movb (%ebx),%al		// fetch it
  
1072 	push %eax		// push value onto stack
  
1073 	NEXT
  
1074 
  1075 /* C@C! is a useful byte copy primitive. */
  
1076 	defcode "C@C!",4,,CCOPY
  
1077 	movl 4(%esp),%ebx	// source address
  
1078 	movb (%ebx),%al		// get source character
  
1079 	pop %edi		// destination address
  
1080 	stosb			// copy to destination
  
1081 	push %edi		// increment destination address
  
1082 	incl 4(%esp)		// increment source address
  
1083 	NEXT
  
1084 
  1085 /* and CMOVE is a block copy operation. */
  
1086 	defcode "CMOVE",5,,CMOVE
  
1087 	mov %esi,%edx		// preserve %esi
  
1088 	pop %ecx		// length
  
1089 	pop %edi		// destination address
  
1090 	pop %esi		// source address
  
1091 	rep movsb		// copy source to destination
  
1092 	mov %edx,%esi		// restore %esi
  
1093 	NEXT
  
1094 
  1095 
  1096 /*
  
1097 	BUILT-IN VARIABLES ----------------------------------------------------------------------
  
1098 
  1099 	These are some built-in variables and related standard FORTH words.  Of these, the only one that we
  
1100 	have discussed so far was LATEST, which points to the last (most recently defined) word in the
  
1101 	FORTH dictionary.  LATEST is also a FORTH word which pushes the address of LATEST (the variable)
  
1102 	on to the stack, so you can read or write it using @ and ! operators.  For example, to print
  
1103 	the current value of LATEST (and this can apply to any FORTH variable) you would do:
  
1104 
  1105 	LATEST @ . CR
  
1106 
  1107 	To make defining variables shorter, I'm using a macro called defvar, similar to defword and
  
1108 	defcode above.  (In fact the defvar macro uses defcode to do the dictionary header).
  
1109 */
  
1110 
  1111 	.macro defvar name, namelen, flags=0, label, initial=0
  
1112 	defcode \name,\namelen,\flags,\label
  
1113 	push $var_\name
  
1114 	NEXT
  
1115 	.data
  
1116 	.align 4
  
1117 var_\name :
  
1118 	.int \initial
  
1119 	.endm
  
1120 
  1121 /*
  
1122 	The built-in variables are:
  
1123 
  1124 	STATE		Is the interpreter executing code (0) or compiling a word (non-zero)?
  
1125 	LATEST		Points to the latest (most recently defined) word in the dictionary.
  
1126 	HERE		Points to the next free byte of memory.  When compiling, compiled words go here.
  
1127 	S0		Stores the address of the top of the parameter stack.
  
1128 	BASE		The current base for printing and reading numbers.
  
1129 
  1130 */
  
1131 	defvar "STATE",5,,STATE
  
1132 	defvar "HERE",4,,HERE
  
1133 	defvar "LATEST",6,,LATEST,name_SYSCALL0 // SYSCALL0 must be last in built-in dictionary
  
1134 	defvar "S0",2,,SZ
  
1135 	defvar "BASE",4,,BASE,10
  
1136 
  1137 /*
  
1138 	BUILT-IN CONSTANTS ----------------------------------------------------------------------
  
1139 
  1140 	It's also useful to expose a few constants to FORTH.  When the word is executed it pushes a
  
1141 	constant value on the stack.
  
1142 
  1143 	The built-in constants are:
  
1144 
  1145 	VERSION		Is the current version of this FORTH.
  
1146 	R0		The address of the top of the return stack.
  
1147 	DOCOL		Pointer to DOCOL.
  
1148 	F_IMMED		The IMMEDIATE flag's actual value.
  
1149 	F_HIDDEN	The HIDDEN flag's actual value.
  
1150 	F_LENMASK	The length mask in the flags/len byte.
  
1151 
  1152 	SYS_*		and the numeric codes of various Linux syscalls (from <asm/unistd.h>)
  
1153 */
  
1154 
  1155 //#include <asm-i386/unistd.h>	// you might need this instead
  
1156 #include <asm/unistd.h>
  
1157 
  1158 	.macro defconst name, namelen, flags=0, label, value
  
1159 	defcode \name,\namelen,\flags,\label
  
1160 	push $\value
  
1161 	NEXT
  
1162 	.endm
  
1163 
  1164 	defconst "VERSION",7,,VERSION,JONES_VERSION
  
1165 	defconst "R0",2,,RZ,return_stack_top
  
1166 	defconst "DOCOL",5,,__DOCOL,DOCOL
  
1167 	defconst "F_IMMED",7,,__F_IMMED,F_IMMED
  
1168 	defconst "F_HIDDEN",8,,__F_HIDDEN,F_HIDDEN
  
1169 	defconst "F_LENMASK",9,,__F_LENMASK,F_LENMASK
  
1170 
  1171 	defconst "SYS_EXIT",8,,SYS_EXIT,__NR_exit
  
1172 	defconst "SYS_OPEN",8,,SYS_OPEN,__NR_open
  
1173 	defconst "SYS_CLOSE",9,,SYS_CLOSE,__NR_close
  
1174 	defconst "SYS_READ",8,,SYS_READ,__NR_read
  
1175 	defconst "SYS_WRITE",9,,SYS_WRITE,__NR_write
  
1176 	defconst "SYS_CREAT",9,,SYS_CREAT,__NR_creat
  
1177 	defconst "SYS_BRK",7,,SYS_BRK,__NR_brk
  
1178 
  1179 	defconst "O_RDONLY",8,,__O_RDONLY,0
  
1180 	defconst "O_WRONLY",8,,__O_WRONLY,1
  
1181 	defconst "O_RDWR",6,,__O_RDWR,2
  
1182 	defconst "O_CREAT",7,,__O_CREAT,0100
  
1183 	defconst "O_EXCL",6,,__O_EXCL,0200
  
1184 	defconst "O_TRUNC",7,,__O_TRUNC,01000
  
1185 	defconst "O_APPEND",8,,__O_APPEND,02000
  
1186 	defconst "O_NONBLOCK",10,,__O_NONBLOCK,04000
  
1187 
  1188 /*
  
1189 	RETURN STACK ----------------------------------------------------------------------
  
1190 
  1191 	These words allow you to access the return stack.  Recall that the register %ebp always points to
  
1192 	the top of the return stack.
  
1193 */
  
1194 
  1195 	defcode ">R",2,,TOR
  
1196 	pop %eax		// pop parameter stack into %eax
  
1197 	PUSHRSP %eax		// push it on to the return stack
  
1198 	NEXT
  
1199 
  1200 	defcode "R>",2,,FROMR
  
1201 	POPRSP %eax		// pop return stack on to %eax
  
1202 	push %eax		// and push on to parameter stack
  
1203 	NEXT
  
1204 
  1205 	defcode "RSP@",4,,RSPFETCH
  
1206 	push %ebp
  
1207 	NEXT
  
1208 
  1209 	defcode "RSP!",4,,RSPSTORE
  
1210 	pop %ebp
  
1211 	NEXT
  
1212 
  1213 	defcode "RDROP",5,,RDROP
  
1214 	addl $4,%ebp		// pop return stack and throw away
  
1215 	NEXT
  
1216 
  1217 /*
  
1218 	PARAMETER (DATA) STACK ----------------------------------------------------------------------
  
1219 
  1220 	These functions allow you to manipulate the parameter stack.  Recall that Linux sets up the parameter
  
1221 	stack for us, and it is accessed through %esp.
  
1222 */
  
1223 
  1224 	defcode "DSP@",4,,DSPFETCH
  
1225 	mov %esp,%eax
  
1226 	push %eax
  
1227 	NEXT
  
1228 
  1229 	defcode "DSP!",4,,DSPSTORE
  
1230 	pop %esp
  
1231 	NEXT
  
1232 
  1233 /*
  
1234 	INPUT AND OUTPUT ----------------------------------------------------------------------
  
1235 
  1236 	These are our first really meaty/complicated FORTH primitives.  I have chosen to write them in
  
1237 	assembler, but surprisingly in "real" FORTH implementations these are often written in terms
  
1238 	of more fundamental FORTH primitives.  I chose to avoid that because I think that just obscures
  
1239 	the implementation.  After all, you may not understand assembler but you can just think of it
  
1240 	as an opaque block of code that does what it says.
  
1241 
  1242 	Let's discuss input first.
  
1243 
  1244 	The FORTH word KEY reads the next byte from stdin (and pushes it on the parameter stack).
  
1245 	So if KEY is called and someone hits the space key, then the number 32 (ASCII code of space)
  
1246 	is pushed on the stack.
  
1247 
  1248 	In FORTH there is no distinction between reading code and reading input.  We might be reading
  
1249 	and compiling code, we might be reading words to execute, we might be asking for the user
  
1250 	to type their name -- ultimately it all comes in through KEY.
  
1251 
  1252 	The implementation of KEY uses an input buffer of a certain size (defined at the end of this
  
1253 	file).  It calls the Linux read(2) system call to fill this buffer and tracks its position
  
1254 	in the buffer using a couple of variables, and if it runs out of input buffer then it refills
  
1255 	it automatically.  The other thing that KEY does is if it detects that stdin has closed, it
  
1256 	exits the program, which is why when you hit ^D the FORTH system cleanly exits.
  
1257 
  1258      buffer			      bufftop
  
1259 	|				 |
  
1260 	V				 V
  
1261 	+-------------------------------+--------------------------------------+
  
1262 	| INPUT READ FROM STDIN ....... | unused part of the buffer            |
  
1263 	+-------------------------------+--------------------------------------+
  
1264 	                  ^
  
1265 			  |
  
1266 		       currkey (next character to read)
  
1267 
  1268 	<---------------------- BUFFER_SIZE (4096 bytes) ---------------------->
  
1269 */
  
1270 
  1271 	defcode "KEY",3,,KEY
  
1272 	call _KEY
  
1273 	push %eax		// push return value on stack
  
1274 	NEXT
  
1275 _KEY:
  
1276 	mov (currkey),%ebx
  
1277 	cmp (bufftop),%ebx
  
1278 	jge 1f			// exhausted the input buffer?
  
1279 	xor %eax,%eax
  
1280 	mov (%ebx),%al		// get next key from input buffer
  
1281 	inc %ebx
  
1282 	mov %ebx,(currkey)	// increment currkey
  
1283 	ret
  
1284 
  1285 1:	// Out of input; use read(2) to fetch more input from stdin.
  
1286 	xor %ebx,%ebx		// 1st param: stdin
  
1287 	mov $buffer,%ecx	// 2nd param: buffer
  
1288 	mov %ecx,currkey
  
1289 	mov $BUFFER_SIZE,%edx	// 3rd param: max length
  
1290 	mov $__NR_read,%eax	// syscall: read
  
1291 	int $0x80
  
1292 	test %eax,%eax		// If %eax <= 0, then exit.
  
1293 	jbe 2f
  
1294 	addl %eax,%ecx		// buffer+%eax = bufftop
  
1295 	mov %ecx,bufftop
  
1296 	jmp _KEY
  
1297 
  1298 2:	// Error or end of input: exit the program.
  
1299 	xor %ebx,%ebx
  
1300 	mov $__NR_exit,%eax	// syscall: exit
  
1301 	int $0x80
  
1302 
  1303 	.data
  
1304 	.align 4
  
1305 currkey:
  
1306 	.int buffer		// Current place in input buffer (next character to read).
  
1307 bufftop:
  
1308 	.int buffer		// Last valid data in input buffer + 1.
  
1309 
  1310 /*
  
1311 	By contrast, output is much simpler.  The FORTH word EMIT writes out a single byte to stdout.
  
1312 	This implementation just uses the write system call.  No attempt is made to buffer output, but
  
1313 	it would be a good exercise to add it.
  
1314 */
  
1315 
  1316 	defcode "EMIT",4,,EMIT
  
1317 	pop %eax
  
1318 	call _EMIT
  
1319 	NEXT
  
1320 _EMIT:
  
1321 	mov $1,%ebx		// 1st param: stdout
  
1322 
  1323 	// write needs the address of the byte to write
  
1324 	mov %al,emit_scratch
  
1325 	mov $emit_scratch,%ecx	// 2nd param: address
  
1326 
  1327 	mov $1,%edx		// 3rd param: nbytes = 1
  
1328 
  1329 	mov $__NR_write,%eax	// write syscall
  
1330 	int $0x80
  
1331 	ret
  
1332 
  1333 	.data			// NB: easier to fit in the .data section
  
1334 emit_scratch:
  
1335 	.space 1		// scratch used by EMIT
  
1336 
  1337 /*
  
1338 	Back to input, WORD is a FORTH word which reads the next full word of input.
  
1339 
  1340 	What it does in detail is that it first skips any blanks (spaces, tabs, newlines and so on).
  
1341 	Then it calls KEY to read characters into an internal buffer until it hits a blank.  Then it
  
1342 	calculates the length of the word it read and returns the address and the length as
  
1343 	two words on the stack (with the length at the top of stack).
  
1344 
  1345 	Notice that WORD has a single internal buffer which it overwrites each time (rather like
  
1346 	a static C string).  Also notice that WORD's internal buffer is just 32 bytes long and
  
1347 	there is NO checking for overflow.  31 bytes happens to be the maximum length of a
  
1348 	FORTH word that we support, and that is what WORD is used for: to read FORTH words when
  
1349 	we are compiling and executing code.  The returned strings are not NUL-terminated.
  
1350 
  1351 	Start address+length is the normal way to represent strings in FORTH (not ending in an
  
1352 	ASCII NUL character as in C), and so FORTH strings can contain any character including NULs
  
1353 	and can be any length.
  
1354 
  1355 	WORD is not suitable for just reading strings (eg. user input) because of all the above
  
1356 	peculiarities and limitations.
  
1357 
  1358 	Note that when executing, you'll see:
  
1359 	WORD FOO
  
1360 	which puts "FOO" and length 3 on the stack, but when compiling:
  
1361 	: BAR WORD FOO ;
  
1362 	is an error (or at least it doesn't do what you might expect).  Later we'll talk about compiling
  
1363 	and immediate mode, and you'll understand why.
  
1364 */
  
1365 
  1366 	defcode "WORD",4,,WORD
  
1367 	call _WORD
  
1368 	push %edi		// push base address
  
1369 	push %ecx		// push length
  
1370 	NEXT
  
1371 
  1372 _WORD:
  
1373 	/* Search for first non-blank character.  Also skip \ comments. */
  
1374 1:
  
1375 	call _KEY		// get next key, returned in %eax
  
1376 	cmpb $'\\',%al		// start of a comment?
  
1377 	je 3f			// if so, skip the comment
  
1378 	cmpb $' ',%al
  
1379 	jbe 1b			// if so, keep looking
  
1380 
  1381 	/* Search for the end of the word, storing chars as we go. */
  
1382 	mov $word_buffer,%edi	// pointer to return buffer
  
1383 2:
  
1384 	stosb			// add character to return buffer
  
1385 	call _KEY		// get next key, returned in %al
  
1386 	cmpb $' ',%al		// is blank?
  
1387 	ja 2b			// if not, keep looping
  
1388 
  1389 	/* Return the word (well, the static buffer) and length. */
  
1390 	sub $word_buffer,%edi
  
1391 	mov %edi,%ecx		// return length of the word
  
1392 	mov $word_buffer,%edi	// return address of the word
  
1393 	ret
  
1394 
  1395 	/* Code to skip \ comments to end of the current line. */
  
1396 3:
  
1397 	call _KEY
  
1398 	cmpb $'\n',%al		// end of line yet?
  
1399 	jne 3b
  
1400 	jmp 1b
  
1401 
  1402 	.data			// NB: easier to fit in the .data section
  
1403 	// A static buffer where WORD returns.  Subsequent calls
  
1404 	// overwrite this buffer.  Maximum word length is 32 chars.
  
1405 word_buffer:
  
1406 	.space 32
  
1407 
  1408 /*
  
1409 	As well as reading in words we'll need to read in numbers and for that we are using a function
  
1410 	called NUMBER.  This parses a numeric string such as one returned by WORD and pushes the
  
1411 	number on the parameter stack.
  
1412 
  1413 	The function uses the variable BASE as the base (radix) for conversion, so for example if
  
1414 	BASE is 2 then we expect a binary number.  Normally BASE is 10.
  
1415 
  1416 	If the word starts with a '-' character then the returned value is negative.
  
1417 
  1418 	If the string can't be parsed as a number (or contains characters outside the current BASE)
  
1419 	then we need to return an error indication.  So NUMBER actually returns two items on the stack.
  
1420 	At the top of stack we return the number of unconverted characters (ie. if 0 then all characters
  
1421 	were converted, so there is no error).  Second from top of stack is the parsed number or a
  
1422 	partial value if there was an error.
  
1423 */
  
1424 	defcode "NUMBER",6,,NUMBER
  
1425 	pop %ecx		// length of string
  
1426 	pop %edi		// start address of string
  
1427 	call _NUMBER
  
1428 	push %eax		// parsed number
  
1429 	push %ecx		// number of unparsed characters (0 = no error)
  
1430 	NEXT
  
1431 
  1432 _NUMBER:
  
1433 	xor %eax,%eax
  
1434 	xor %ebx,%ebx
  
1435 
  1436 	test %ecx,%ecx		// trying to parse a zero-length string is an error, but will return 0.
  
1437 	jz 5f
  
1438 
  1439 	movl var_BASE,%edx	// get BASE (in %dl)
  
1440 
  1441 	// Check if first character is '-'.
  
1442 	movb (%edi),%bl		// %bl = first character in string
  
1443 	inc %edi
  
1444 	push %eax		// push 0 on stack
  
1445 	cmpb $'-',%bl		// negative number?
  
1446 	jnz 2f
  
1447 	pop %eax
  
1448 	push %ebx		// push <> 0 on stack, indicating negative
  
1449 	dec %ecx
  
1450 	jnz 1f
  
1451 	pop %ebx		// error: string is only '-'.
  
1452 	movl $1,%ecx
  
1453 	ret
  
1454 
  1455 	// Loop reading digits.
  
1456 1:	imull %edx,%eax		// %eax *= BASE
  
1457 	movb (%edi),%bl		// %bl = next character in string
  
1458 	inc %edi
  
1459 
  1460 	// Convert 0-9, A-Z to a number 0-35.
  
1461 2:	subb $'0',%bl		// < '0'?
  
1462 	jb 4f
  
1463 	cmp $10,%bl		// <= '9'?
  
1464 	jb 3f
  
1465 	subb $17,%bl		// < 'A'? (17 is 'A'-'0')
  
1466 	jb 4f
  
1467 	addb $10,%bl
  
1468 
  1469 3:	cmp %dl,%bl		// >= BASE?
  
1470 	jge 4f
  
1471 
  1472 	// OK, so add it to %eax and loop.
  
1473 	add %ebx,%eax
  
1474 	dec %ecx
  
1475 	jnz 1b
  
1476 
  1477 	// Negate the result if first character was '-' (saved on the stack).
  
1478 4:	pop %ebx
  
1479 	test %ebx,%ebx
  
1480 	jz 5f
  
1481 	neg %eax
  
1482 
  1483 5:	ret
  
1484 
  1485 /*
  
1486 	DICTIONARY LOOK UPS ----------------------------------------------------------------------
  
1487 
  1488 	We're building up to our prelude on how FORTH code is compiled, but first we need yet more infrastructure.
  
1489 
  1490 	The FORTH word FIND takes a string (a word as parsed by WORD -- see above) and looks it up in the
  
1491 	dictionary.  What it actually returns is the address of the dictionary header, if it finds it,
  
1492 	or 0 if it didn't.
  
1493 
  1494 	So if DOUBLE is defined in the dictionary, then WORD DOUBLE FIND returns the following pointer:
  
1495 
  1496     pointer to this
  
1497 	|
  
1498 	|
  
1499 	V
  
1500 	+---------+---+---+---+---+---+---+---+---+------------+------------+------------+------------+
  
1501 	| LINK    | 6 | D | O | U | B | L | E | 0 | DOCOL      | DUP        | +          | EXIT       |
  
1502 	+---------+---+---+---+---+---+---+---+---+------------+------------+------------+------------+
  
1503 
  1504 	See also >CFA and >DFA.
  
1505 
  1506 	FIND doesn't find dictionary entries which are flagged as HIDDEN.  See below for why.
  
1507 */
  
1508 
  1509 	defcode "FIND",4,,FIND
  
1510 	pop %ecx		// %ecx = length
  
1511 	pop %edi		// %edi = address
  
1512 	call _FIND
  
1513 	push %eax		// %eax = address of dictionary entry (or NULL)
  
1514 	NEXT
  
1515 
  1516 _FIND:
  
1517 	push %esi		// Save %esi so we can use it in string comparison.
  
1518 
  1519 	// Now we start searching backwards through the dictionary for this word.
  
1520 	mov var_LATEST,%edx	// LATEST points to name header of the latest word in the dictionary
  
1521 1:	test %edx,%edx		// NULL pointer?  (end of the linked list)
  
1522 	je 4f
  
1523 
  1524 	// Compare the length expected and the length of the word.
  
1525 	// Note that if the F_HIDDEN flag is set on the word, then by a bit of trickery
  
1526 	// this won't pick the word (the length will appear to be wrong).
  
1527 	xor %eax,%eax
  
1528 	movb 4(%edx),%al	// %al = flags+length field
  
1529 	andb $(F_HIDDEN|F_LENMASK),%al // %al = name length
  
1530 	cmpb %cl,%al		// Length is the same?
  
1531 	jne 2f
  
1532 
  1533 	// Compare the strings in detail.
  
1534 	push %ecx		// Save the length
  
1535 	push %edi		// Save the address (repe cmpsb will move this pointer)
  
1536 	lea 5(%edx),%esi	// Dictionary string we are checking against.
  
1537 	repe cmpsb		// Compare the strings.
  
1538 	pop %edi
  
1539 	pop %ecx
  
1540 	jne 2f			// Not the same.
  
1541 
  1542 	// The strings are the same - return the header pointer in %eax
  
1543 	pop %esi
  
1544 	mov %edx,%eax
  
1545 	ret
  
1546 
  1547 2:	mov (%edx),%edx		// Move back through the link field to the previous word
  
1548 	jmp 1b			// .. and loop.
  
1549 
  1550 4:	// Not found.
  
1551 	pop %esi
  
1552 	xor %eax,%eax		// Return zero to indicate not found.
  
1553 	ret
  
1554 
  1555 /*
  
1556 	FIND returns the dictionary pointer, but when compiling we need the codeword pointer (recall
  
1557 	that FORTH definitions are compiled into lists of codeword pointers).  The standard FORTH
  
1558 	word >CFA turns a dictionary pointer into a codeword pointer.
  
1559 
  1560 	The example below shows the result of:
  
1561 
  1562 		WORD DOUBLE FIND >CFA
  
1563 
  1564 	FIND returns a pointer to this
  
1565 	|				>CFA converts it to a pointer to this
  
1566 	|					   |
  
1567 	V					   V
  
1568 	+---------+---+---+---+---+---+---+---+---+------------+------------+------------+------------+
  
1569 	| LINK    | 6 | D | O | U | B | L | E | 0 | DOCOL      | DUP        | +          | EXIT       |
  
1570 	+---------+---+---+---+---+---+---+---+---+------------+------------+------------+------------+
  
1571 						   codeword
  
1572 
  1573 	Notes:
  
1574 
  1575 	Because names vary in length, this isn't just a simple increment.
  
1576 
  1577 	In this FORTH you cannot easily turn a codeword pointer back into a dictionary entry pointer, but
  
1578 	that is not true in most FORTH implementations where they store a back pointer in the definition
  
1579 	(with an obvious memory/complexity cost).  The reason they do this is that it is useful to be
  
1580 	able to go backwards (codeword -> dictionary entry) in order to decompile FORTH definitions
  
1581 	quickly.
  
1582 
  1583 	What does CFA stand for?  My best guess is "Code Field Address".
  
1584 */
  
1585 
  1586 	defcode ">CFA",4,,TCFA
  
1587 	pop %edi
  
1588 	call _TCFA
  
1589 	push %edi
  
1590 	NEXT
  
1591 _TCFA:
  
1592 	xor %eax,%eax
  
1593 	add $4,%edi		// Skip link pointer.
  
1594 	movb (%edi),%al		// Load flags+len into %al.
  
1595 	inc %edi		// Skip flags+len byte.
  
1596 	andb $F_LENMASK,%al	// Just the length, not the flags.
  
1597 	add %eax,%edi		// Skip the name.
  
1598 	addl $3,%edi		// The codeword is 4-byte aligned.
  
1599 	andl $~3,%edi
  
1600 	ret
  
1601 
  1602 /*
  
1603 	Related to >CFA is >DFA which takes a dictionary entry address as returned by FIND and
  
1604 	returns a pointer to the first data field.
  
1605 
  1606 	FIND returns a pointer to this
  
1607 	|				>CFA converts it to a pointer to this
  
1608 	|					   |
  
1609 	|					   |	>DFA converts it to a pointer to this
  
1610 	|					   |		 |
  
1611 	V					   V		 V
  
1612 	+---------+---+---+---+---+---+---+---+---+------------+------------+------------+------------+
  
1613 	| LINK    | 6 | D | O | U | B | L | E | 0 | DOCOL      | DUP        | +          | EXIT       |
  
1614 	+---------+---+---+---+---+---+---+---+---+------------+------------+------------+------------+
  
1615 						   codeword
  
1616 
  1617 	(Note to those following the source of FIG-FORTH / ciforth: My >DFA definition is
  
1618 	different from theirs, because they have an extra indirection).
  
1619 
  1620 	You can see that >DFA is easily defined in FORTH just by adding 4 to the result of >CFA.
  
1621 */
  
1622 
  1623 	defword ">DFA",4,,TDFA
  
1624 	.int TCFA		// >CFA		(get code field address)
  
1625 	.int INCR4		// 4+		(add 4 to it to get to next word)
  
1626 	.int EXIT		// EXIT		(return from FORTH word)
  
1627 
  1628 /*
  
1629 	COMPILING ----------------------------------------------------------------------
  
1630 
  1631 	Now we'll talk about how FORTH compiles words.  Recall that a word definition looks like this:
  
1632 
  1633 		: DOUBLE DUP + ;
  
1634 
  1635 	and we have to turn this into:
  
1636 
  1637 	  pointer to previous word
  
1638 	   ^
  
1639 	   |
  
1640 	+--|------+---+---+---+---+---+---+---+---+------------+------------+------------+------------+
  
1641 	| LINK    | 6 | D | O | U | B | L | E | 0 | DOCOL      | DUP        | +          | EXIT       |
  
1642 	+---------+---+---+---+---+---+---+---+---+------------+--|---------+------------+------------+
  
1643            ^       len                         pad  codeword      |
  
1644 	   |							  V
  
1645 	  LATEST points here				points to codeword of DUP
  
1646 
  1647 	There are several problems to solve.  Where to put the new word?  How do we read words?  How
  
1648 	do we define the words : (COLON) and ; (SEMICOLON)?
  
1649 
  1650 	FORTH solves this rather elegantly and as you might expect in a very low-level way which
  
1651 	allows you to change how the compiler works on your own code.
  
1652 
  1653 	FORTH has an INTERPRET function (a true interpreter this time, not DOCOL) which runs in a
  
1654 	loop, reading words (using WORD), looking them up (using FIND), turning them into codeword
  
1655 	pointers (using >CFA) and deciding what to do with them.
  
1656 
  1657 	What it does depends on the mode of the interpreter (in variable STATE).
  
1658 
  1659 	When STATE is zero, the interpreter just runs each word as it looks them up.  This is known as
  
1660 	immediate mode.
  
1661 
  1662 	The interesting stuff happens when STATE is non-zero -- compiling mode.  In this mode the
  
1663 	interpreter appends the codeword pointer to user memory (the HERE variable points to the next
  
1664 	free byte of user memory -- see DATA SEGMENT section below).
  
1665 
  1666 	So you may be able to see how we could define : (COLON).  The general plan is:
  
1667 
  1668 	(1) Use WORD to read the name of the function being defined.
  
1669 
  1670 	(2) Construct the dictionary entry -- just the header part -- in user memory:
  
1671 
  1672     pointer to previous word (from LATEST)			+-- Afterwards, HERE points here, where
  
1673 	   ^							|   the interpreter will start appending
  
1674 	   |							V   codewords.
  
1675 	+--|------+---+---+---+---+---+---+---+---+------------+
  
1676 	| LINK    | 6 | D | O | U | B | L | E | 0 | DOCOL      |
  
1677 	+---------+---+---+---+---+---+---+---+---+------------+
  
1678                    len                         pad  codeword
  
1679 
  1680 	(3) Set LATEST to point to the newly defined word, ...
  
1681 
  1682 	(4) .. and most importantly leave HERE pointing just after the new codeword.  This is where
  
1683 	    the interpreter will append codewords.
  
1684 
  1685 	(5) Set STATE to 1.  This goes into compile mode so the interpreter starts appending codewords to
  
1686 	    our partially-formed header.
  
1687 
  1688 	After : has run, our input is here:
  
1689 
  1690 	: DOUBLE DUP + ;
  
1691 	         ^
  
1692 		 |
  
1693 		Next byte returned by KEY will be the 'D' character of DUP
  
1694 
  1695 	so the interpreter (now it's in compile mode, so I guess it's really the compiler) reads "DUP",
  
1696 	looks it up in the dictionary, gets its codeword pointer, and appends it:
  
1697 
  1698 									     +-- HERE updated to point here.
  
1699 									     |
  
1700 									     V
  
1701 	+---------+---+---+---+---+---+---+---+---+------------+------------+
  
1702 	| LINK    | 6 | D | O | U | B | L | E | 0 | DOCOL      | DUP        |
  
1703 	+---------+---+---+---+---+---+---+---+---+------------+------------+
  
1704                    len                         pad  codeword
  
1705 
  1706 	Next we read +, get the codeword pointer, and append it:
  
1707 
  1708 											  +-- HERE updated to point here.
  
1709 											  |
  
1710 											  V
  
1711 	+---------+---+---+---+---+---+---+---+---+------------+------------+------------+
  
1712 	| LINK    | 6 | D | O | U | B | L | E | 0 | DOCOL      | DUP        | +          |
  
1713 	+---------+---+---+---+---+---+---+---+---+------------+------------+------------+
  
1714                    len                         pad  codeword
  
1715 
  1716 	The issue is what happens next.  Obviously what we _don't_ want to happen is that we
  
1717 	read ";" and compile it and go on compiling everything afterwards.
  
1718 
  1719 	At this point, FORTH uses a trick.  Remember the length byte in the dictionary definition
  
1720 	isn't just a plain length byte, but can also contain flags.  One flag is called the
  
1721 	IMMEDIATE flag (F_IMMED in this code).  If a word in the dictionary is flagged as
  
1722 	IMMEDIATE then the interpreter runs it immediately _even if it's in compile mode_.
  
1723 
  1724 	This is how the word ; (SEMICOLON) works -- as a word flagged in the dictionary as IMMEDIATE.
  
1725 
  1726 	And all it does is append the codeword for EXIT on to the current definition and switch
  
1727 	back to immediate mode (set STATE back to 0).  Shortly we'll see the actual definition
  
1728 	of ; and we'll see that it's really a very simple definition, declared IMMEDIATE.
  
1729 
  1730 	After the interpreter reads ; and executes it 'immediately', we get this:
  
1731 
  1732 	+---------+---+---+---+---+---+---+---+---+------------+------------+------------+------------+
  
1733 	| LINK    | 6 | D | O | U | B | L | E | 0 | DOCOL      | DUP        | +          | EXIT       |
  
1734 	+---------+---+---+---+---+---+---+---+---+------------+------------+------------+------------+
  
1735                    len                         pad  codeword					       ^
  
1736 												       |
  
1737 												    
  1738 	STATE is set to 0.
  
1739 
  1740 	And that's it, job done, our new definition is compiled, and we're back in immediate mode
  
1741 	just reading and executing words, perhaps including a call to test our new word DOUBLE.
  
1742 
  1743 	The only last wrinkle in this is that while our word was being compiled, it was in a
  
1744 	half-finished state.  We certainly wouldn't want DOUBLE to be called somehow during
  
1745 	this time.  There are several ways to stop this from happening, but in FORTH what we
  
1746 	do is flag the word with the HIDDEN flag (F_HIDDEN in this code) just while it is
  
1747 	being compiled.  This prevents FIND from finding it, and thus in theory stops any
  
1748 	chance of it being called.
  
1749 
  1750 	The above explains how compiling, : (COLON) and ; (SEMICOLON) works and in a moment I'm
  
1751 	going to define them.  The : (COLON) function can be made a little bit more general by writing
  
1752 	it in two parts.  The first part, called CREATE, makes just the header:
  
1753 
  1754 						   +-- Afterwards, HERE points here.
  
1755 						   |
  
1756 						   V
  
1757 	+---------+---+---+---+---+---+---+---+---+
  
1758 	| LINK    | 6 | D | O | U | B | L | E | 0 |
  
1759 	+---------+---+---+---+---+---+---+---+---+
  
1760                    len                         pad
  
1761 
  1762 	and the second part, the actual definition of : (COLON), calls CREATE and appends the
  
1763 	DOCOL codeword, so leaving:
  
1764 
  1765 								+-- Afterwards, HERE points here.
  
1766 								|
  
1767 								V
  
1768 	+---------+---+---+---+---+---+---+---+---+------------+
  
1769 	| LINK    | 6 | D | O | U | B | L | E | 0 | DOCOL      |
  
1770 	+---------+---+---+---+---+---+---+---+---+------------+
  
1771                    len                         pad  codeword
  
1772 
  1773 	CREATE is a standard FORTH word and the advantage of this split is that we can reuse it to
  
1774 	create other types of words (not just ones which contain code, but words which contain variables,
  
1775 	constants and other data).
  
1776 */
  
1777 
  1778 	defcode "CREATE",6,,CREATE
  
1779 
  1780 	// Get the name length and address.
  
1781 	pop %ecx		// %ecx = length
  
1782 	pop %ebx		// %ebx = address of name
  
1783 
  1784 	// Link pointer.
  
1785 	movl var_HERE,%edi	// %edi is the address of the header
  
1786 	movl var_LATEST,%eax	// Get link pointer
  
1787 	stosl			// and store it in the header.
  
1788 
  1789 	// Length byte and the word itself.
  
1790 	mov %cl,%al		// Get the length.
  
1791 	stosb			// Store the length/flags byte.
  
1792 	push %esi
  
1793 	mov %ebx,%esi		// %esi = word
  
1794 	rep movsb		// Copy the word
  
1795 	pop %esi
  
1796 	addl $3,%edi		// Align to next 4 byte boundary.
  
1797 	andl $~3,%edi
  
1798 
  1799 	// Update LATEST and HERE.
  
1800 	movl var_HERE,%eax
  
1801 	movl %eax,var_LATEST
  
1802 	movl %edi,var_HERE
  
1803 	NEXT
  
1804 
  1805 /*
  
1806 	Because I want to define : (COLON) in FORTH, not assembler, we need a few more FORTH words
  
1807 	to use.
  
1808 
  1809 	The first is , (COMMA) which is a standard FORTH word which appends a 32 bit integer to the user
  
1810 	memory pointed to by HERE, and adds 4 to HERE.  So the action of , (COMMA) is:
  
1811 
  1812 							previous value of HERE
  
1813 								 |
  
1814 								 V
  
1815 	+---------+---+---+---+---+---+---+---+---+-- - - - - --+------------+
  
1816 	| LINK    | 6 | D | O | U | B | L | E | 0 |             |  <data>    |
  
1817 	+---------+---+---+---+---+---+---+---+---+-- - - - - --+------------+
  
1818                    len                         pad		              ^
  
1819 									      |
  
1820 									new value of HERE
  
1821 
  1822 	and <data> is whatever 32 bit integer was at the top of the stack.
  
1823 
  1824 	, (COMMA) is quite a fundamental operation when compiling.  It is used to append codewords
  
1825 	to the current word that is being compiled.
  
1826 */
  
1827 
  1828 	defcode ",",1,,COMMA
  
1829 	pop %eax		// Code pointer to store.
  
1830 	call _COMMA
  
1831 	NEXT
  
1832 _COMMA:
  
1833 	movl var_HERE,%edi	// HERE
  
1834 	stosl			// Store it.
  
1835 	movl %edi,var_HERE	// Update HERE (incremented)
  
1836 	ret
  
1837 
  1838 /*
  
1839 	Our definitions of : (COLON) and ; (SEMICOLON) will need to switch to and from compile mode.
  
1840 
  1841 	Immediate mode vs. compile mode is stored in the global variable STATE, and by updating this
  
1842 	variable we can switch between the two modes.
  
1843 
  1844 	For various reasons which may become apparent later, FORTH defines two standard words called
  
1845 	[ and ] (LBRAC and RBRAC) which switch between modes:
  
1846 
  1847 	Word	Assembler	Action		Effect
  
1848 	[	LBRAC		STATE := 0	Switch to immediate mode.
  
1849 	]	RBRAC		STATE := 1	Switch to compile mode.
  
1850 
  1851 	[ (LBRAC) is an IMMEDIATE word.  The reason is as follows: If we are in compile mode and the
  
1852 	interpreter saw [ then it would compile it rather than running it.  We would never be able to
  
1853 	switch back to immediate mode!  So we flag the word as IMMEDIATE so that even in compile mode
  
1854 	the word runs immediately, switching us back to immediate mode.
  
1855 */
  
1856 
  1857 	defcode "[",1,F_IMMED,LBRAC
  
1858 	xor %eax,%eax
  
1859 	movl %eax,var_STATE	// Set STATE to 0.
  
1860 	NEXT
  
1861 
  1862 	defcode "]",1,,RBRAC
  
1863 	movl $1,var_STATE	// Set STATE to 1.
  
1864 	NEXT
  
1865 
  1866 /*
  
1867 	Now we can define : (COLON) using CREATE.  It just calls CREATE, appends DOCOL (the codeword), sets
  
1868 	the word HIDDEN and goes into compile mode.
  
1869 */
  
1870 
  1871 	defword ":",1,,COLON
  
1872 	.int WORD		// Get the name of the new word
  
1873 	.int CREATE		// CREATE the dictionary entry / header
  
1874 	.int LIT, DOCOL, COMMA	// Append DOCOL  (the codeword).
  
1875 	.int LATEST, FETCH, HIDDEN // Make the word hidden (see below for definition).
  
1876 	.int RBRAC		// Go into compile mode.
  
1877 	.int EXIT		// Return from the function.
  
1878 
  1879 /*
  
1880 	; (SEMICOLON) is also elegantly simple.  Notice the F_IMMED flag.
  
1881 */
  
1882 
  1883 	defword ";",1,F_IMMED,SEMICOLON
  
1884 	.int LIT, EXIT, COMMA	// Append EXIT (so the word will return).
  
1885 	.int LATEST, FETCH, HIDDEN // Toggle hidden flag -- unhide the word (see below for definition).
  
1886 	.int LBRAC		// Go back to IMMEDIATE mode.
  
1887 	.int EXIT		// Return from the function.
  
1888 
  1889 /*
  
1890 	EXTENDING THE COMPILER ----------------------------------------------------------------------
  
1891 
  1892 	Words flagged with IMMEDIATE (F_IMMED) aren't just for the FORTH compiler to use.  You can define
  
1893 	your own IMMEDIATE words too, and this is a crucial aspect when extending basic FORTH, because
  
1894 	it allows you in effect to extend the compiler itself.  Does gcc let you do that?
  
1895 
  1896 	Standard FORTH words like IF, WHILE, ." and so on are all written as extensions to the basic
  
1897 	compiler, and are all IMMEDIATE words.
  
1898 
  1899 	The IMMEDIATE word toggles the F_IMMED (IMMEDIATE flag) on the most recently defined word,
  
1900 	or on the current word if you call it in the middle of a definition.
  
1901 
  1902 	Typical usage is:
  
1903 
  1904 	: MYIMMEDWORD IMMEDIATE
  
1905 		...definition...
  
1906 	;
  
1907 
  1908 	but some FORTH programmers write this instead:
  
1909 
  1910 	: MYIMMEDWORD
  
1911 		...definition...
  
1912 	; IMMEDIATE
  
1913 
  1914 	The two usages are equivalent, to a first approximation.
  
1915 */
  
1916 
  1917 	defcode "IMMEDIATE",9,F_IMMED,IMMEDIATE
  
1918 	movl var_LATEST,%edi	// LATEST word.
  
1919 	addl $4,%edi		// Point to name/flags byte.
  
1920 	xorb $F_IMMED,(%edi)	// Toggle the IMMED bit.
  
1921 	NEXT
  
1922 
  1923 /*
  
1924 	'addr HIDDEN' toggles the hidden flag (F_HIDDEN) of the word defined at addr.  To hide the
  
1925 	most recently defined word (used above in : and ; definitions) you would do:
  
1926 
  1927 		LATEST @ HIDDEN
  
1928 
  1929 	'HIDE word' toggles the flag on a named 'word'.
  
1930 
  1931 	Setting this flag stops the word from being found by FIND, and so can be used to make 'private'
  
1932 	words.  For example, to break up a large word into smaller parts you might do:
  
1933 
  1934 		: SUB1 ... subword ... ;
  
1935 		: SUB2 ... subword ... ;
  
1936 		: SUB3 ... subword ... ;
  
1937 		: MAIN ... defined in terms of SUB1, SUB2, SUB3 ... ;
  
1938 		HIDE SUB1
  
1939 		HIDE SUB2
  
1940 		HIDE SUB3
  
1941 
  1942 	After this, only MAIN is 'exported' or seen by the rest of the program.
  
1943 */
  
1944 
  1945 	defcode "HIDDEN",6,,HIDDEN
  
1946 	pop %edi		// Dictionary entry.
  
1947 	addl $4,%edi		// Point to name/flags byte.
  
1948 	xorb $F_HIDDEN,(%edi)	// Toggle the HIDDEN bit.
  
1949 	NEXT
  
1950 
  1951 	defword "HIDE",4,,HIDE
  
1952 	.int WORD		// Get the word (after HIDE).
  
1953 	.int FIND		// Look up in the dictionary.
  
1954 	.int HIDDEN		// Set F_HIDDEN flag.
  
1955 	.int EXIT		// Return.
  
1956 
  1957 /*
  
1958 	' (TICK) is a standard FORTH word which returns the codeword pointer of the next word.
  
1959 
  1960 	The common usage is:
  
1961 
  1962 	' FOO ,
  
1963 
  1964 	which appends the codeword of FOO to the current word we are defining (this only works in compiled code).
  
1965 
  1966 	You tend to use ' in IMMEDIATE words.  For example an alternate (and rather useless) way to define
  
1967 	a literal 2 might be:
  
1968 
  1969 	: LIT2 IMMEDIATE
  
1970 		' LIT ,		\ Appends LIT to the currently-being-defined word
  
1971 		2 ,		\ Appends the number 2 to the currently-being-defined word
  
1972 	;
  
1973 
  1974 	So you could do:
  
1975 
  1976 	: DOUBLE LIT2 * ;
  
1977 
  1978 	(If you don't understand how LIT2 works, then you should review the material about compiling words
  
1979 	and immediate mode).
  
1980 
  1981 	This definition of ' uses a cheat which I copied from buzzard92.  As a result it only works in
  
1982 	compiled code.  It is possible to write a version of ' based on WORD, FIND, >CFA which works in
  
1983 	immediate mode too.
  
1984 */
  
1985 	defcode "'",1,,TICK
  
1986 	lodsl			// Get the address of the next word and skip it.
  
1987 	pushl %eax		// Push it on the stack.
  
1988 	NEXT
  
1989 
  1990 /*
  
1991 	BRANCHING ----------------------------------------------------------------------
  
1992 
  1993 	It turns out that all you need in order to define looping constructs, IF-statements, etc.
  
1994 	are two primitives.
  
1995 
  1996 	BRANCH is an unconditional branch. 0BRANCH is a conditional branch (it only branches if the
  
1997 	top of stack is zero).
  
1998 
  1999 	The diagram below shows how BRANCH works in some imaginary compiled word.  When BRANCH executes,
  
2000 	%esi starts by pointing to the offset field (compare to LIT above):
  
2001 
  2002 	+---------------------+-------+---- - - ---+------------+------------+---- - - - ----+------------+
  
2003 	| (Dictionary header) | DOCOL |            | BRANCH     | offset     | (skipped)     | word       |
  
2004 	+---------------------+-------+---- - - ---+------------+-----|------+---- - - - ----+------------+
  
2005 								   ^  |			      ^
  
2006 								   |  |			      |
  
2007 								   |  +-----------------------+
  
2008 								  %esi added to offset
  
2009 
  2010 	The offset is added to %esi to make the new %esi, and the result is that when NEXT runs, execution
  
2011 	continues at the branch target.  Negative offsets work as expected.
  
2012 
  2013 	0BRANCH is the same except the branch happens conditionally.
  
2014 
  2015 	Now standard FORTH words such as IF, THEN, ELSE, WHILE, REPEAT, etc. can be implemented entirely
  
2016 	in FORTH.  They are IMMEDIATE words which append various combinations of BRANCH or 0BRANCH
  
2017 	into the word currently being compiled.
  
2018 
  2019 	As an example, code written like this:
  
2020 
  2021 		condition-code IF true-part THEN rest-code
  
2022 
  2023 	compiles to:
  
2024 
  2025 		condition-code 0BRANCH OFFSET true-part rest-code
  
2026 					  |		^
  
2027 					  |		|
  
2028 					  +-------------+
  
2029 */
  
2030 
  2031 	defcode "BRANCH",6,,BRANCH
  
2032 	add (%esi),%esi		// add the offset to the instruction pointer
  
2033 	NEXT
  
2034 
  2035 	defcode "0BRANCH",7,,ZBRANCH
  
2036 	pop %eax
  
2037 	test %eax,%eax		// top of stack is zero?
  
2038 	jz code_BRANCH		// if so, jump back to the branch function above
  
2039 	lodsl			// otherwise we need to skip the offset
  
2040 	NEXT
  
2041 
  2042 /*
  
2043 	LITERAL STRINGS ----------------------------------------------------------------------
  
2044 
  2045 	LITSTRING is a primitive used to implement the ." and S" operators (which are written in
  
2046 	FORTH).  See the definition of those operators later.
  
2047 
  2048 	TELL just prints a string.  It's more efficient to define this in assembly because we
  
2049 	can make it a single Linux syscall.
  
2050 */
  
2051 
  2052 	defcode "LITSTRING",9,,LITSTRING
  
2053 	lodsl			// get the length of the string
  
2054 	push %esi		// push the address of the start of the string
  
2055 	push %eax		// push it on the stack
  
2056 	addl %eax,%esi		// skip past the string
  
2057  	addl $3,%esi		// but round up to next 4 byte boundary
  
2058 	andl $~3,%esi
  
2059 	NEXT
  
2060 
  2061 	defcode "TELL",4,,TELL
  
2062 	mov $1,%ebx		// 1st param: stdout
  
2063 	pop %edx		// 3rd param: length of string
  
2064 	pop %ecx		// 2nd param: address of string
  
2065 	mov $__NR_write,%eax	// write syscall
  
2066 	int $0x80
  
2067 	NEXT
  
2068 
  2069 /*
  
2070 	QUIT AND INTERPRET ----------------------------------------------------------------------
  
2071 
  2072 	QUIT is the first FORTH function called, almost immediately after the FORTH system "boots".
  
2073 	As explained before, QUIT doesn't "quit" anything.  It does some initialisation (in particular
  
2074 	it clears the return stack) and it calls INTERPRET in a loop to interpret commands.  The
  
2075 	reason it is called QUIT is because you can call it from your own FORTH words in order to
  
2076 	"quit" your program and start again at the user prompt.
  
2077 
  2078 	INTERPRET is the FORTH interpreter ("toploop", "toplevel" or "REPL" might be a more accurate
  
2079 	description -- see: http://en.wikipedia.org/wiki/REPL).
  
2080 */
  
2081 
  2082 	// QUIT must not return (ie. must not call EXIT).
  
2083 	defword "QUIT",4,,QUIT
  
2084 	.int RZ,RSPSTORE	// R0 RSP!, clear the return stack
  
2085 	.int INTERPRET		// interpret the next word
  
2086 	.int BRANCH,-8		// and loop (indefinitely)
  
2087 
  2088 /*
  
2089 	This interpreter is pretty simple, but remember that in FORTH you can always override
  
2090 	it later with a more powerful one!
  
2091  */
  
2092 	defcode "INTERPRET",9,,INTERPRET
  
2093 	call _WORD		// Returns %ecx = length, %edi = pointer to word.
  
2094 
  2095 	// Is it in the dictionary?
  
2096 	xor %eax,%eax
  
2097 	movl %eax,interpret_is_lit // Not a literal number (not yet anyway ...)
  
2098 	call _FIND		// Returns %eax = pointer to header or 0 if not found.
  
2099 	test %eax,%eax		// Found?
  
2100 	jz 1f
  
2101 
  2102 	// In the dictionary.  Is it an IMMEDIATE codeword?
  
2103 	mov %eax,%edi		// %edi = dictionary entry
  
2104 	movb 4(%edi),%al	// Get name+flags.
  
2105 	push %ax		// Just save it for now.
  
2106 	call _TCFA		// Convert dictionary entry (in %edi) to codeword pointer.
  
2107 	pop %ax
  
2108 	andb $F_IMMED,%al	// Is IMMED flag set?
  
2109 	mov %edi,%eax
  
2110 	jnz 4f			// If IMMED, jump straight to executing.
  
2111 
  2112 	jmp 2f
  
2113 
  2114 1:	// Not in the dictionary (not a word) so assume it's a literal number.
  
2115 	incl interpret_is_lit
  
2116 	call _NUMBER		// Returns the parsed number in %eax, %ecx > 0 if error
  
2117 	test %ecx,%ecx
  
2118 	jnz 6f
  
2119 	mov %eax,%ebx
  
2120 	mov $LIT,%eax		// The word is LIT
  
2121 
  2122 2:	// Are we compiling or executing?
  
2123 	movl var_STATE,%edx
  
2124 	test %edx,%edx
  
2125 	jz 4f			// Jump if executing.
  
2126 
  2127 	// Compiling - just append the word to the current dictionary definition.
  
2128 	call _COMMA
  
2129 	mov interpret_is_lit,%ecx // Was it a literal?
  
2130 	test %ecx,%ecx
  
2131 	jz 3f
  
2132 	mov %ebx,%eax		// Yes, so LIT is followed by a number.
  
2133 	call _COMMA
  
2134 3:	NEXT
  
2135 
  2136 4:	// Executing - run it!
  
2137 	mov interpret_is_lit,%ecx // Literal?
  
2138 	test %ecx,%ecx		// Literal?
  
2139 	jnz 5f
  
2140 
  2141 	// Not a literal, execute it now.  This never returns, but the codeword will
  
2142 	// eventually call NEXT which will reenter the loop in QUIT.
  
2143 	jmp *(%eax)
  
2144 
  2145 5:	// Executing a literal, which means push it on the stack.
  
2146 	push %ebx
  
2147 	NEXT
  
2148 
  2149 6:	// Parse error (not a known word or a number in the current BASE).
  
2150 	// Print an error message followed by up to 40 characters of context.
  
2151 	mov $2,%ebx		// 1st param: stderr
  
2152 	mov $errmsg,%ecx	// 2nd param: error message
  
2153 	mov $errmsgend-errmsg,%edx // 3rd param: length of string
  
2154 	mov $__NR_write,%eax	// write syscall
  
2155 	int $0x80
  
2156 
  2157 	mov (currkey),%ecx	// the error occurred just before currkey position
  
2158 	mov %ecx,%edx
  
2159 	sub $buffer,%edx	// %edx = currkey - buffer (length in buffer before currkey)
  
2160 	cmp $40,%edx		// if > 40, then print only 40 characters
  
2161 	jle 7f
  
2162 	mov $40,%edx
  
2163 7:	sub %edx,%ecx		// %ecx = start of area to print, %edx = length
  
2164 	mov $__NR_write,%eax	// write syscall
  
2165 	int $0x80
  
2166 
  2167 	mov $errmsgnl,%ecx	// newline
  
2168 	mov $1,%edx
  
2169 	mov $__NR_write,%eax	// write syscall
  
2170 	int $0x80
  
2171 
  2172 	NEXT
  
2173 
  2174 	.section .rodata
  
2175 errmsg: .ascii "PARSE ERROR: "
  
2176 errmsgend:
  
2177 errmsgnl: .ascii "\n"
  
2178 
  2179 	.data			// NB: easier to fit in the .data section
  
2180 	.align 4
  
2181 interpret_is_lit:
  
2182 	.int 0			// Flag used to record if reading a literal
  
2183 
  2184 /*
  
2185 	ODDS AND ENDS ----------------------------------------------------------------------
  
2186 
  2187 	CHAR puts the ASCII code of the first character of the following word on the stack.  For example
  
2188 	CHAR A puts 65 on the stack.
  
2189 
  2190 	EXECUTE is used to run execution tokens.  See the discussion of execution tokens in the
  
2191 	FORTH code for more details.
  
2192 
  2193 	SYSCALL0, SYSCALL1, SYSCALL2, SYSCALL3 make a standard Linux system call.  (See <asm/unistd.h>
  
2194 	for a list of system call numbers).  As their name suggests these forms take between 0 and 3
  
2195 	syscall parameters, plus the system call number.
  
2196 
  2197 	In this FORTH, SYSCALL0 must be the last word in the built-in (assembler) dictionary because we
  
2198 	initialise the LATEST variable to point to it.  This means that if you want to extend the assembler
  
2199 	part, you must put new words before SYSCALL0, or else change how LATEST is initialised.
  
2200 */
  
2201 
  2202 	defcode "CHAR",4,,CHAR
  
2203 	call _WORD		// Returns %ecx = length, %edi = pointer to word.
  
2204 	xor %eax,%eax
  
2205 	movb (%edi),%al		// Get the first character of the word.
  
2206 	push %eax		// Push it onto the stack.
  
2207 	NEXT
  
2208 
  2209 	defcode "EXECUTE",7,,EXECUTE
  
2210 	pop %eax		// Get xt into %eax
  
2211 	jmp *(%eax)		// and jump to it.
  
2212 				// After xt runs its NEXT will continue executing the current word.
  
2213 
  2214 	defcode "SYSCALL3",8,,SYSCALL3
  
2215 	pop %eax		// System call number (see <asm/unistd.h>)
  
2216 	pop %ebx		// First parameter.
  
2217 	pop %ecx		// Second parameter
  
2218 	pop %edx		// Third parameter
  
2219 	int $0x80
  
2220 	push %eax		// Result (negative for -errno)
  
2221 	NEXT
  
2222 
  2223 	defcode "SYSCALL2",8,,SYSCALL2
  
2224 	pop %eax		// System call number (see <asm/unistd.h>)
  
2225 	pop %ebx		// First parameter.
  
2226 	pop %ecx		// Second parameter
  
2227 	int $0x80
  
2228 	push %eax		// Result (negative for -errno)
  
2229 	NEXT
  
2230 
  2231 	defcode "SYSCALL1",8,,SYSCALL1
  
2232 	pop %eax		// System call number (see <asm/unistd.h>)
  
2233 	pop %ebx		// First parameter.
  
2234 	int $0x80
  
2235 	push %eax		// Result (negative for -errno)
  
2236 	NEXT
  
2237 
  2238 	defcode "SYSCALL0",8,,SYSCALL0
  
2239 	pop %eax		// System call number (see <asm/unistd.h>)
  
2240 	int $0x80
  
2241 	push %eax		// Result (negative for -errno)
  
2242 	NEXT
  
2243 
  2244 /*
  
2245 	DATA SEGMENT ----------------------------------------------------------------------
  
2246 
  2247 	Here we set up the Linux data segment, used for user definitions and variously known as just
  
2248 	the 'data segment', 'user memory' or 'user definitions area'.  It is an area of memory which
  
2249 	grows upwards and stores both newly-defined FORTH words and global variables of various
  
2250 	sorts.
  
2251 
  2252 	It is completely analogous to the C heap, except there is no generalised 'malloc' and 'free'
  
2253 	(but as with everything in FORTH, writing such functions would just be a Simple Matter
  
2254 	Of Programming).  Instead in normal use the data segment just grows upwards as new FORTH
  
2255 	words are defined/appended to it.
  
2256 
  2257 	There are various "features" of the GNU toolchain which make setting up the data segment
  
2258 	more complicated than it really needs to be.  One is the GNU linker which inserts a random
  
2259 	"build ID" segment.  Another is Address Space Randomization which means we can't tell
  
2260 	where the kernel will choose to place the data segment (or the stack for that matter).
  
2261 
  2262 	Therefore writing this set_up_data_segment assembler routine is a little more complicated
  
2263 	than it really needs to be.  We ask the Linux kernel where it thinks the data segment starts
  
2264 	using the brk(2) system call, then ask it to reserve some initial space (also using brk(2)).
  
2265 
  2266 	You don't need to worry about this code.
  
2267 */
  
2268 	.text
  
2269 	.set INITIAL_DATA_SEGMENT_SIZE,65536
  
2270 set_up_data_segment:
  
2271 	xor %ebx,%ebx		// Call brk(0)
  
2272 	movl $__NR_brk,%eax
  
2273 	int $0x80
  
2274 	movl %eax,var_HERE	// Initialise HERE to point at beginning of data segment.
  
2275 	addl $INITIAL_DATA_SEGMENT_SIZE,%eax	// Reserve nn bytes of memory for initial data segment.
  
2276 	movl %eax,%ebx		// Call brk(HERE+INITIAL_DATA_SEGMENT_SIZE)
  
2277 	movl $__NR_brk,%eax
  
2278 	int $0x80
  
2279 	ret
  
2280 
  2281 /*
  
2282 	We allocate static buffers for the return static and input buffer (used when
  
2283 	reading in files and text that the user types in).
  
2284 */
  
2285 	.set RETURN_STACK_SIZE,8192
  
2286 	.set BUFFER_SIZE,4096
  
2287 
  2288 	.bss
  
2289 /* FORTH return stack. */
  
2290 	.align 4096
  
2291 return_stack:
  
2292 	.space RETURN_STACK_SIZE
  
2293 return_stack_top:		// Initial top of return stack.
  
2294 
  2295 /* This is used as a temporary input buffer when reading from files or the terminal. */
  
2296 	.align 4096
  
2297 buffer:
  
2298 	.space BUFFER_SIZE
  
2299 
  2300 /*
  
2301 	START OF FORTH CODE ----------------------------------------------------------------------
  
2302 
  2303 	We've now reached the stage where the FORTH system is running and self-hosting.  All further
  
2304 	words can be written as FORTH itself, including words like IF, THEN, .", etc which in most
  
2305 	languages would be considered rather fundamental.
  
2306 
  2307 	I used to append this here in the assembly file, but I got sick of fighting against gas's
  
2308 	crack-smoking (lack of) multiline string syntax.  So now that is in a separate file called
  
2309 	jonesforth.f
  
2310 
  2311 	If you don't already have that file, download it from http://annexia.org/forth in order
  
2312 	to continue the tutorial.
  
2313 */
  
2314 
  2315 /* END OF jonesforth.S */