db_lex.c revision 1.2 1 /*
2 * Mach Operating System
3 * Copyright (c) 1991,1990 Carnegie Mellon University
4 * All Rights Reserved.
5 *
6 * Permission to use, copy, modify and distribute this software and its
7 * documentation is hereby granted, provided that both the copyright
8 * notice and this permission notice appear in all copies of the
9 * software, derivative works or modified versions, and any portions
10 * thereof, and that both notices appear in supporting documentation.
11 *
12 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS
13 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
14 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
15 *
16 * Carnegie Mellon requests users of this software to return to
17 *
18 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
19 * School of Computer Science
20 * Carnegie Mellon University
21 * Pittsburgh PA 15213-3890
22 *
23 * any improvements or extensions that they make and grant Carnegie the
24 * rights to redistribute these changes.
25 */
26 /*
27 * $Id: db_lex.c,v 1.2 1993/05/20 03:39:16 cgd Exp $
28 *
29 * HISTORY
30 * $Log: db_lex.c,v $
31 * Revision 1.2 1993/05/20 03:39:16 cgd
32 * add explicit rcs id
33 *
34 * Revision 1.1.1.1 1993/03/21 09:46:26 cgd
35 * initial import of 386bsd-0.1 sources
36 *
37 * Revision 1.1 1992/03/25 21:45:13 pace
38 * Initial revision
39 *
40 * Revision 2.3 91/02/05 17:06:36 mrt
41 * Changed to new Mach copyright
42 * [91/01/31 16:18:20 mrt]
43 *
44 * Revision 2.2 90/08/27 21:51:10 dbg
45 * Add 'dotdot' token.
46 * [90/08/22 dbg]
47 *
48 * Allow backslash to quote any character into an identifier.
49 * Allow colon in identifier for symbol table qualification.
50 * [90/08/16 dbg]
51 * Reduce lint.
52 * [90/08/07 dbg]
53 * Created.
54 * [90/07/25 dbg]
55 *
56 */
57 /*
58 * Author: David B. Golub, Carnegie Mellon University
59 * Date: 7/90
60 */
61 /*
62 * Lexical analyzer.
63 */
64 #include <ddb/db_lex.h>
65
66 char db_line[120];
67 char * db_lp, *db_endlp;
68
69 int
70 db_read_line()
71 {
72 int i;
73
74 i = db_readline(db_line, sizeof(db_line));
75 if (i == 0)
76 return (0); /* EOI */
77 db_lp = db_line;
78 db_endlp = db_lp + i;
79 return (i);
80 }
81
82 void
83 db_flush_line()
84 {
85 db_lp = db_line;
86 db_endlp = db_line;
87 }
88
89 int db_look_char = 0;
90
91 int
92 db_read_char()
93 {
94 int c;
95
96 if (db_look_char != 0) {
97 c = db_look_char;
98 db_look_char = 0;
99 }
100 else if (db_lp >= db_endlp)
101 c = -1;
102 else
103 c = *db_lp++;
104 return (c);
105 }
106
107 void
108 db_unread_char(c)
109 {
110 db_look_char = c;
111 }
112
113 int db_look_token = 0;
114
115 void
116 db_unread_token(t)
117 int t;
118 {
119 db_look_token = t;
120 }
121
122 int
123 db_read_token()
124 {
125 int t;
126
127 if (db_look_token) {
128 t = db_look_token;
129 db_look_token = 0;
130 }
131 else
132 t = db_lex();
133 return (t);
134 }
135
136 int db_tok_number;
137 char db_tok_string[TOK_STRING_SIZE];
138
139 int db_radix = 16;
140
141 void
142 db_flush_lex()
143 {
144 db_flush_line();
145 db_look_char = 0;
146 db_look_token = 0;
147 }
148
149 int
150 db_lex()
151 {
152 int c;
153
154 c = db_read_char();
155 while (c <= ' ' || c > '~') {
156 if (c == '\n' || c == -1)
157 return (tEOL);
158 c = db_read_char();
159 }
160
161 if (c >= '0' && c <= '9') {
162 /* number */
163 int r, digit;
164
165 if (c > '0')
166 r = db_radix;
167 else {
168 c = db_read_char();
169 if (c == 'O' || c == 'o')
170 r = 8;
171 else if (c == 'T' || c == 't')
172 r = 10;
173 else if (c == 'X' || c == 'x')
174 r = 16;
175 else {
176 r = db_radix;
177 db_unread_char(c);
178 }
179 c = db_read_char();
180 }
181 db_tok_number = 0;
182 for (;;) {
183 if (c >= '0' && c <= ((r == 8) ? '7' : '9'))
184 digit = c - '0';
185 else if (r == 16 && ((c >= 'A' && c <= 'F') ||
186 (c >= 'a' && c <= 'f'))) {
187 if (c >= 'a')
188 digit = c - 'a' + 10;
189 else if (c >= 'A')
190 digit = c - 'A' + 10;
191 }
192 else
193 break;
194 db_tok_number = db_tok_number * r + digit;
195 c = db_read_char();
196 }
197 if ((c >= '0' && c <= '9') ||
198 (c >= 'A' && c <= 'Z') ||
199 (c >= 'a' && c <= 'z') ||
200 (c == '_'))
201 {
202 db_error("Bad character in number\n");
203 db_flush_lex();
204 return (tEOF);
205 }
206 db_unread_char(c);
207 return (tNUMBER);
208 }
209 if ((c >= 'A' && c <= 'Z') ||
210 (c >= 'a' && c <= 'z') ||
211 c == '_' || c == '\\')
212 {
213 /* string */
214 char *cp;
215
216 cp = db_tok_string;
217 if (c == '\\') {
218 c = db_read_char();
219 if (c == '\n' || c == -1)
220 db_error("Bad escape\n");
221 }
222 *cp++ = c;
223 while (1) {
224 c = db_read_char();
225 if ((c >= 'A' && c <= 'Z') ||
226 (c >= 'a' && c <= 'z') ||
227 (c >= '0' && c <= '9') ||
228 c == '_' || c == '\\' || c == ':')
229 {
230 if (c == '\\') {
231 c = db_read_char();
232 if (c == '\n' || c == -1)
233 db_error("Bad escape\n");
234 }
235 *cp++ = c;
236 if (cp == db_tok_string+sizeof(db_tok_string)) {
237 db_error("String too long\n");
238 db_flush_lex();
239 return (tEOF);
240 }
241 continue;
242 }
243 else {
244 *cp = '\0';
245 break;
246 }
247 }
248 db_unread_char(c);
249 return (tIDENT);
250 }
251
252 switch (c) {
253 case '+':
254 return (tPLUS);
255 case '-':
256 return (tMINUS);
257 case '.':
258 c = db_read_char();
259 if (c == '.')
260 return (tDOTDOT);
261 db_unread_char(c);
262 return (tDOT);
263 case '*':
264 return (tSTAR);
265 case '/':
266 return (tSLASH);
267 case '=':
268 return (tEQ);
269 case '%':
270 return (tPCT);
271 case '#':
272 return (tHASH);
273 case '(':
274 return (tLPAREN);
275 case ')':
276 return (tRPAREN);
277 case ',':
278 return (tCOMMA);
279 case '"':
280 return (tDITTO);
281 case '$':
282 return (tDOLLAR);
283 case '!':
284 return (tEXCL);
285 case '<':
286 c = db_read_char();
287 if (c == '<')
288 return (tSHIFT_L);
289 db_unread_char(c);
290 break;
291 case '>':
292 c = db_read_char();
293 if (c == '>')
294 return (tSHIFT_R);
295 db_unread_char(c);
296 break;
297 case -1:
298 return (tEOF);
299 }
300 db_printf("Bad character\n");
301 db_flush_lex();
302 return (tEOF);
303 }
304