Increase MAX_STATIC_DATA
[bazic.git] / tokeniser.h
1 include "token-table.h";
2 constant TOKEN__EOL 0;
3 constant TOKEN__NUMBER 1;
4 constant TOKEN__STRING 2;
5 constant TOKEN__VAR 3;
6 constant TOKEN__SPACE 4;
7 constant TOKEN__PLUS 5;
8 constant TOKEN__MINUS 6;
9 constant TOKEN__STAR 7;
10 constant TOKEN__SLASH 8;
11 constant TOKEN__COLON 9;
12 constant TOKEN__EQUALS 10;
13 constant TOKEN__COMMA 11;
14 constant TOKEN__LPAREN 12;
15 constant TOKEN__RPAREN 13;
16 constant TOKEN__LARROW 14;
17 constant TOKEN__RARROW 15;
18 constant TOKEN__SEMICOLON 16;
19 constant TOKEN__GEQUAL 17;
20 constant TOKEN__LEQUAL 18;
21 constant TOKEN__NEQUAL 19;
22
23 ! Is this an invalid alphabetical token character?
24
25 [ token_invalidchar c;
26         return ((c < 'a') || (c > 'z'));
27 ];
28
29 ! Is this a valid variable name character?
30
31 [ token_validvarnamechar c;
32         return (((c >= 'a') && (c <= 'z')) ||
33                 ((c >= 'A') && (c <= 'Z')) ||
34                 ((c >= '0') && (c <= '9')) ||
35                 (c == '_') || (c == '%') || (c == '$'));
36 ];
37
38 ! Is this a number?
39
40 [ token_isnumber c;
41         return ((c >= '0') && (c <= '9'));
42 ];
43
44 ! Is this whitespace?
45
46 [ token_isspace c;
47         return ((c == 32) || (c == 9));
48 ];
49
50 ! Tokenise an input stream.
51 !
52 ! The input and output pointers must point to different regions of memory.
53
54 [ tokenise_stream in out  incount outcount i j k;
55         out->0 = 0;
56         outcount = 1;
57         incount = 0;
58
59         while (in->incount)
60         {
61                 i = token_encode(in+incount, out+outcount);
62                 if (i ~= 0)
63                 {
64                         incount = incount + i;
65                         outcount = outcount + 1;
66                 }
67                 else
68                 {
69                         ! Not a recognised token. We test against all the
70                         ! other things we recognise. Note the order! This
71                         ! is important.
72                         
73                         i = in->incount;
74
75                         ! Is it white space?
76                                 
77                         if (token_isspace(i))
78                         {
79                                 while (token_isspace(in->(incount)))
80                                         incount++;
81                                 out->(outcount++) = TOKEN__SPACE;
82                                 continue;
83                         }
84
85                         ! Is it a number?
86
87                         if (token_isnumber(i))
88                         {
89                                 out->(outcount++) = TOKEN__NUMBER;
90                                 i = 0;
91                                 do {
92                                         i = i*10 + (in->incount - '0');
93                                         incount++;
94                                 } until (token_isnumber(in->incount) == 0);
95                                 (out+outcount)-->0 = i;
96                                 outcount = outcount + 2;
97                                 continue;
98                         }
99
100                         ! Is it a string?
101
102                         if (i == '"')
103                         {
104                                 ! Work out the size of the string.
105
106                                 incount++;
107                                 i = incount;
108                                 do {
109                                         k = in->(incount++);
110                                 } until ((k == '"') || (k == 0));
111                                 j = incount-i-1;
112
113                                 ! Emit the opcode.
114
115                                 out->(outcount++) = TOKEN__STRING;
116                                 out->(outcount++) = j;
117
118                                 ! And now emit the string itself.
119
120                                 memcpy(out+outcount, in+i, j);
121                                 outcount = outcount + j;
122
123                                 ! Remember to skip over the close quote
124                                 ! before exiting.
125
126                                 !incount++;
127                                 continue;
128                         }
129                         
130                         ! Is it an operator?
131
132                         switch (i)
133                         {
134                                 '+':    out->(outcount++) = TOKEN__PLUS;
135                                         incount++;
136                                         continue;
137
138                                 '-':    out->(outcount++) = TOKEN__MINUS;
139                                         incount++;
140                                         continue;
141
142                                 '*':    out->(outcount++) = TOKEN__STAR;
143                                         incount++;
144                                         continue;
145
146                                 '/':    out->(outcount++) = TOKEN__SLASH;
147                                         incount++;
148                                         continue;
149
150                                 ':':    out->(outcount++) = TOKEN__COLON;
151                                         incount++;
152                                         continue;
153
154                                 '=':    out->(outcount++) = TOKEN__EQUALS;
155                                         incount++;
156                                         continue;
157
158                                 ',':    out->(outcount++) = TOKEN__COMMA;
159                                         incount++;
160                                         continue;
161
162                                 '(':    out->(outcount++) = TOKEN__LPAREN;
163                                         incount++;
164                                         continue;
165
166                                 ')':    out->(outcount++) = TOKEN__RPAREN;
167                                         incount++;
168                                         continue;
169                                         
170                                 '<':    switch (in->(++incount))
171                                         {
172                                                 '>':    out->(outcount++) = TOKEN__NEQUAL;
173                                                         incount++;
174                                                         break;
175
176                                                 '=':    out->(outcount++) = TOKEN__LEQUAL;
177                                                         incount++;
178                                                         break;
179
180                                                 default: out->(outcount++) = TOKEN__LARROW;
181                                                          break;
182                                         }
183                                         continue;
184
185                                 '>':    switch (in->(++incount))
186                                         {
187                                                 '=':    out->(outcount++) = TOKEN__GEQUAL;
188                                                         incount++;
189                                                         break;
190
191                                                 default: out->(outcount++) = TOKEN__RARROW;
192                                                         break;
193                                         }
194                                         continue;
195                                         
196                                 ';':    out->(outcount++) = TOKEN__SEMICOLON;
197                                         incount++;
198                                         continue;
199                         }
200
201                         ! Is it a variable name?
202
203                         if (token_validvarnamechar(i))
204                         {
205                                 out->(outcount++) = TOKEN__VAR;
206                                 do {
207                                         out->(outcount++) = in->(incount++);
208                                 } until (token_validvarnamechar(in->incount) == 0);
209                                 out->(outcount++) = 0;
210                                 continue;
211                         }
212
213                         return incount;
214                 }
215         }
216
217         ! Patch up the line length.
218
219         out->outcount = TOKEN__EOL;
220         out->0 = outcount + 1;
221
222         return -1;
223 ];
224
225 ! Detokenise a stream.
226
227 [ detokenise_stream in  i;
228         while (1)
229         {
230                 i = (in++)->0;
231                 switch(i)
232                 {
233                         TOKEN__EOL:
234                                 print "^";
235                                 return;
236
237                         TOKEN__VAR:
238                                 while (i = (in++)->0)
239                                         print (char) i;
240                                 break;
241                                 
242                         TOKEN__NUMBER:
243                                 print in-->0;
244                                 in = in + 2;
245                                 break;
246
247                         TOKEN__SPACE:
248                                 print " ";
249                                 break;
250
251                         TOKEN__STRING:
252                                 i = (in++)->0;
253                                 print "~";
254                                 while (i--)
255                                         print (char) (in++)->0;
256                                 print "~";
257                                 break;
258
259                         TOKEN__PLUS:
260                                 print "+";
261                                 break;
262
263                         TOKEN__MINUS:
264                                 print "-";
265                                 break;
266
267                         TOKEN__STAR:
268                                 print "*";
269                                 break;
270
271                         TOKEN__SLASH:
272                                 print "/";
273                                 break;
274
275                         TOKEN__COLON:
276                                 print ":";
277                                 break;
278
279                         TOKEN__EQUALS:
280                                 print "=";
281                                 break;
282
283                         TOKEN__COMMA:
284                                 print ",";
285                                 break;
286
287                         TOKEN__LPAREN:
288                                 print "(";
289                                 break;
290
291                         TOKEN__RPAREN:
292                                 print ")";
293                                 break;
294
295                         TOKEN__LARROW:
296                                 print "<";
297                                 break;
298
299                         TOKEN__RARROW:
300                                 print ">";
301                                 break;
302
303                         TOKEN__SEMICOLON:
304                                 print ";";
305                                 break;
306
307                         TOKEN__GEQUAL:
308                                 print ">=";
309                                 break;
310
311                         TOKEN__LEQUAL:
312                                 print "<=";
313                                 break;
314
315                         TOKEN__NEQUAL:
316                                 print "<>";
317                                 break;
318                         default:
319                                 print (string) token_decode(i);
320                 }
321         }
322 ];
323