/[svn]/parser/wlexer.ml
ViewVC logotype

Contents of /parser/wlexer.ml

Parent Directory Parent Directory | Revision Log Revision Log


Revision 162 - (show annotations)
Tue Jul 10 17:11:42 2007 UTC (5 years, 10 months ago) by abate
File size: 11741 byte(s)
[r2002-12-01 20:46:51 by cvscast] Empty log message

Original author: cvscast
Date: 2002-12-01 20:46:51+00:00
1 let eof = 0
2 let encoding_error = 1
3 let xml_char = 2
4 let blank = 3
5 let lowercase = 4
6 let uppercase = 5
7 let ascii_digit = 6
8 let char_5f = 7
9 let char_3c = 8
10 let char_3e = 9
11 let char_3d = 10
12 let char_2e = 11
13 let char_2c = 12
14 let char_3a = 13
15 let char_3b = 14
16 let char_2b = 15
17 let char_2d = 16
18 let char_2a = 17
19 let char_2f = 18
20 let char_40 = 19
21 let char_26 = 20
22 let char_7b = 21
23 let char_7d = 22
24 let char_5b = 23
25 let char_5d = 24
26 let char_28 = 25
27 let char_29 = 26
28 let char_7c = 27
29 let char_3f = 28
30 let char_60 = 29
31 let char_22 = 30
32 let char_5c = 31
33 let char_27 = 32
34 let char_21 = 33
35
36 let one_char_classes = [
37 (0x5f, 07);
38 (0x3c, 08);
39 (0x3e, 09);
40 (0x3d, 10);
41 (0x2e, 11);
42 (0x2c, 12);
43 (0x3a, 13);
44 (0x3b, 14);
45 (0x2b, 15);
46 (0x2d, 16);
47 (0x2a, 17);
48 (0x2f, 18);
49 (0x40, 19);
50 (0x26, 20);
51 (0x7b, 21);
52 (0x7d, 22);
53 (0x5b, 23);
54 (0x5d, 24);
55 (0x28, 25);
56 (0x29, 26);
57 (0x7c, 27);
58 (0x3f, 28);
59 (0x60, 29);
60 (0x22, 30);
61 (0x5c, 31);
62 (0x27, 32);
63 (0x21, 33);
64 ]
65
66 let nb_classes = 34
67
68 # 12 "parser/wlexer.mll"
69
70 let keywords = Hashtbl.create 17
71
72 let error i j exn = raise (Location.Location ((i,j),exn))
73 exception Illegal_character of char
74 exception Unterminated_comment
75 exception Unterminated_string
76 exception Unterminated_string_in_comment
77
78 (* Buffer for string literals *)
79
80 let string_buff = Buffer.create 1024
81 let store_char = Buffer.add_char string_buff
82 let get_stored_string () =
83 let s = Buffer.contents string_buff in
84 Buffer.clear string_buff;
85 s
86
87 let string_start_pos = ref 0;;
88 let comment_start_pos : int list ref = ref [];;
89
90 let char_for_decimal_code s =
91 let s = String.sub s 1 (String.length s - 1) in
92 let c = int_of_string s in
93 assert ( c < 256 ); (* TODO: handle Unicode *)
94 Char.chr c
95
96 let rec tag_of_tag s i =
97 match s.[i] with
98 | '\008' | '\009' | '\010' | '\013' | '\032' -> tag_of_tag s (i+1)
99 | _ -> String.sub s i (String.length s - i)
100
101 let lex_tables = {
102 Lexing.lex_base =
103 "\000\000\009\000\012\000\018\000\252\255\251\255\004\000\255\255\
104 \005\000\254\255\014\000\013\000\003\000\005\000\253\255\255\255\
105 \247\255\246\255\020\000\047\000\051\000\018\000\043\000\250\255\
106 \027\000\017\000\005\000\050\000\011\000\044\000\040\000\249\255\
107 \250\255\248\255\064\000\067\000\071\000\080\000\057\000\084\000\
108 \100\000\104\000\114\000\118\000\124\000\062\000";
109 Lexing.lex_backtrk =
110 "\255\255\255\255\255\255\255\255\255\255\255\255\004\000\255\255\
111 \002\000\255\255\004\000\002\000\004\000\004\000\255\255\255\255\
112 \255\255\255\255\000\000\001\000\002\000\003\000\005\000\255\255\
113 \005\000\005\000\005\000\005\000\005\000\005\000\005\000\255\255\
114 \255\255\255\255\255\255\004\000\255\255\004\000\003\000\002\000\
115 \255\255\002\000\001\000\255\255\001\000\000\000";
116 Lexing.lex_default =
117 "\023\000\005\000\005\000\005\000\000\000\000\000\255\255\000\000\
118 \255\255\000\000\255\255\255\255\255\255\255\255\000\000\000\000\
119 \000\000\000\000\255\255\255\255\255\255\255\255\255\255\000\000\
120 \255\255\255\255\255\255\255\255\255\255\255\255\255\255\000\000\
121 \000\000\000\000\255\255\255\255\255\255\255\255\255\255\255\255\
122 \255\255\255\255\255\255\255\255\255\255\255\255";
123 Lexing.lex_trans =
124 "\016\000\017\000\017\000\018\000\019\000\020\000\021\000\019\000\
125 \022\000\004\000\008\000\008\000\004\000\024\000\025\000\026\000\
126 \027\000\026\000\004\000\011\000\011\000\028\000\015\000\045\000\
127 \038\000\029\000\012\000\030\000\026\000\009\000\031\000\032\000\
128 \031\000\032\000\013\000\009\000\009\000\032\000\032\000\014\000\
129 \032\000\014\000\007\000\010\000\009\000\009\000\034\000\035\000\
130 \035\000\006\000\007\000\042\000\042\000\042\000\042\000\039\000\
131 \039\000\039\000\039\000\032\000\043\000\033\000\032\000\038\000\
132 \040\000\045\000\032\000\034\000\035\000\035\000\000\000\035\000\
133 \035\000\035\000\035\000\037\000\037\000\037\000\037\000\042\000\
134 \036\000\000\000\000\000\039\000\037\000\037\000\037\000\037\000\
135 \039\000\039\000\039\000\039\000\000\000\036\000\000\000\000\000\
136 \000\000\040\000\000\000\035\000\000\000\000\000\000\000\037\000\
137 \041\000\041\000\041\000\041\000\041\000\041\000\041\000\041\000\
138 \037\000\000\000\000\000\000\000\039\000\040\000\042\000\042\000\
139 \042\000\042\000\044\000\044\000\044\000\044\000\000\000\043\000\
140 \044\000\044\000\044\000\044\000\041\000\000\000\000\000\000\000\
141 \041\000\043\000\000\000\000\000\000\000\000\000\000\000\000\000\
142 \000\000\000\000\042\000\000\000\000\000\000\000\044\000\000\000\
143 \000\000\000\000\000\000\000\000\044\000\000\000";
144 Lexing.lex_check =
145 "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\
146 \000\000\001\000\006\000\008\000\002\000\000\000\000\000\000\000\
147 \000\000\000\000\003\000\011\000\010\000\000\000\013\000\018\000\
148 \021\000\000\000\001\000\000\000\000\000\012\000\000\000\025\000\
149 \000\000\026\000\001\000\006\000\006\000\024\000\028\000\001\000\
150 \024\000\001\000\002\000\002\000\010\000\010\000\022\000\022\000\
151 \022\000\003\000\003\000\019\000\019\000\019\000\019\000\020\000\
152 \020\000\020\000\020\000\027\000\019\000\029\000\030\000\038\000\
153 \020\000\045\000\027\000\034\000\034\000\034\000\255\255\035\000\
154 \035\000\035\000\035\000\036\000\036\000\036\000\036\000\019\000\
155 \035\000\255\255\255\255\020\000\037\000\037\000\037\000\037\000\
156 \039\000\039\000\039\000\039\000\255\255\037\000\255\255\255\255\
157 \255\255\039\000\255\255\035\000\255\255\255\255\255\255\036\000\
158 \040\000\040\000\040\000\040\000\041\000\041\000\041\000\041\000\
159 \037\000\255\255\255\255\255\255\039\000\041\000\042\000\042\000\
160 \042\000\042\000\043\000\043\000\043\000\043\000\255\255\042\000\
161 \044\000\044\000\044\000\044\000\040\000\255\255\255\255\255\255\
162 \041\000\044\000\255\255\255\255\255\255\255\255\255\255\255\255\
163 \255\255\255\255\042\000\255\255\255\255\255\255\043\000\255\255\
164 \255\255\255\255\255\255\255\255\044\000\255\255"
165 }
166
167 let rec token engine lexbuf =
168 match engine lex_tables 0 lexbuf with
169 0 -> (
170 # 51 "parser/wlexer.mll"
171 token engine lexbuf )
172 | 1 -> (
173 # 52 "parser/wlexer.mll"
174
175 let s = Lexing.lexeme lexbuf in
176 if Hashtbl.mem keywords s then "",s else "LIDENT",s
177 )
178 | 2 -> (
179 # 56 "parser/wlexer.mll"
180 "UIDENT",Lexing.lexeme lexbuf )
181 | 3 -> (
182 # 57 "parser/wlexer.mll"
183 "INT",Lexing.lexeme lexbuf )
184 | 4 -> (
185 # 58 "parser/wlexer.mll"
186
187 let s = Lexing.lexeme lexbuf in
188 "TAG", tag_of_tag s 1
189 )
190 | 5 -> (
191 # 66 "parser/wlexer.mll"
192 "",Lexing.lexeme lexbuf )
193 | 6 -> (
194 # 69 "parser/wlexer.mll"
195 let string_start = Lexing.lexeme_start lexbuf in
196 string_start_pos := string_start;
197 let double_quote = Lexing.lexeme_char lexbuf 0 = '"' in
198 if double_quote then string2 engine lexbuf else string1 engine lexbuf;
199 lexbuf.Lexing.lex_start_pos <-
200 string_start - lexbuf.Lexing.lex_abs_pos;
201 (if double_quote then "STRING2" else "STRING1"),
202 (get_stored_string()) )
203 | 7 -> (
204 # 79 "parser/wlexer.mll"
205 comment_start_pos := [Lexing.lexeme_start lexbuf];
206 comment engine lexbuf;
207 token engine lexbuf )
208 | 8 -> (
209 # 84 "parser/wlexer.mll"
210 "EOI","" )
211 | 9 -> (
212 # 86 "parser/wlexer.mll"
213 error
214 (Lexing.lexeme_start lexbuf) (Lexing.lexeme_end lexbuf)
215 (Illegal_character ((Lexing.lexeme lexbuf).[0])) )
216 | _ -> failwith "lexing: empty token [token]"
217
218 and comment engine lexbuf =
219 match engine lex_tables 1 lexbuf with
220 0 -> (
221 # 92 "parser/wlexer.mll"
222 comment_start_pos := Lexing.lexeme_start lexbuf :: !comment_start_pos;
223 comment engine lexbuf;
224 )
225 | 1 -> (
226 # 96 "parser/wlexer.mll"
227 comment_start_pos := List.tl !comment_start_pos;
228 if !comment_start_pos <> [] then comment engine lexbuf;
229 )
230 | 2 -> (
231 # 100 "parser/wlexer.mll"
232 string_start_pos := Lexing.lexeme_start lexbuf;
233 let string =
234 if Lexing.lexeme_char lexbuf 0 = '"' then string2 else string1 in
235 (try string engine lexbuf
236 with Location.Location (_,Unterminated_string) ->
237 let st = List.hd !comment_start_pos in
238 error st (st+2) Unterminated_string_in_comment);
239 Buffer.clear string_buff;
240 comment engine lexbuf )
241 | 3 -> (
242 # 110 "parser/wlexer.mll"
243 let st = List.hd !comment_start_pos in
244 error st (st+2) Unterminated_comment
245 )
246 | 4 -> (
247 # 114 "parser/wlexer.mll"
248 comment engine lexbuf )
249 | _ -> failwith "lexing: empty token [comment]"
250
251 and string2 engine lexbuf =
252 match engine lex_tables 2 lexbuf with
253 0 -> (
254 # 118 "parser/wlexer.mll"
255 () )
256 | 1 -> (
257 # 120 "parser/wlexer.mll"
258 store_char (Lexing.lexeme_char lexbuf 1);
259 string2 engine lexbuf )
260 | 2 -> (
261 # 123 "parser/wlexer.mll"
262 store_char (char_for_decimal_code (Lexing.lexeme lexbuf));
263 string2 engine lexbuf )
264 | 3 -> (
265 # 126 "parser/wlexer.mll"
266 error !string_start_pos (!string_start_pos+1) Unterminated_string )
267 | 4 -> (
268 # 128 "parser/wlexer.mll"
269 store_char (Lexing.lexeme_char lexbuf 0);
270 (* TODO: Unicode *)
271 string2 engine lexbuf )
272 | _ -> failwith "lexing: empty token [string2]"
273
274 and string1 engine lexbuf =
275 match engine lex_tables 3 lexbuf with
276 0 -> (
277 # 134 "parser/wlexer.mll"
278 () )
279 | 1 -> (
280 # 136 "parser/wlexer.mll"
281 store_char (Lexing.lexeme_char lexbuf 1);
282 string1 engine lexbuf )
283 | 2 -> (
284 # 139 "parser/wlexer.mll"
285 store_char (char_for_decimal_code (Lexing.lexeme lexbuf));
286 string1 engine lexbuf )
287 | 3 -> (
288 # 142 "parser/wlexer.mll"
289 error !string_start_pos (!string_start_pos+1) Unterminated_string )
290 | 4 -> (
291 # 144 "parser/wlexer.mll"
292 store_char (Lexing.lexeme_char lexbuf 0);
293 string1 engine lexbuf )
294 | _ -> failwith "lexing: empty token [string1]"
295
296 ;;
297
298 # 147 "parser/wlexer.mll"
299
300
301 let lexer_func_of_wlex lexfun lexengine cs =
302 let lb =
303 Lexing.from_function
304 (fun s n ->
305 try s.[0] <- Stream.next cs; 1 with Stream.Failure -> 0)
306 in
307 let next () =
308 let tok = lexfun lexengine lb in
309 let loc = (Lexing.lexeme_start lb, Lexing.lexeme_end lb) in
310 (tok, loc)
311 in
312 Token.make_stream_and_location next
313
314 let register_kw (s1,s2) =
315 if s1 = "" then
316 match s2.[0] with
317 | 'a' .. 'z' when not (Hashtbl.mem keywords s2) ->
318 Hashtbl.add keywords s2 ()
319 | _ -> ()
320
321
322 let lexer lexfun lexengine =
323 {
324 Token.tok_func = lexer_func_of_wlex lexfun lexengine;
325 Token.tok_using = register_kw;
326 Token.tok_removing = (fun _ -> ());
327 Token.tok_match = Token.default_match;
328 Token.tok_text = Token.lexer_text
329 }
330
331 let classes =
332 let c i = (i,i) in
333 let i ch1 ch2 = (Char.code ch1, Char.code ch2) in
334 [ (ascii_digit, [i '0' '9']);
335 (lowercase, [i 'a' 'z']);
336 (uppercase, [i 'A' 'Z']);
337 (blank, [c 8; c 9; c 10; c 13; c 32]);
338 ]
339
340 let table =
341 assert(nb_classes <= 256);
342 let v = String.make 256 (Char.chr encoding_error) in
343 let fill_int c (i, j) = String.fill v i (j-i+1) c in
344 let fill_class (c, l) = List.iter (fill_int (Char.chr c)) l in
345 let fill_char (ch, cl) = v.[ch] <- Char.chr cl in
346 List.iter fill_class classes;
347 List.iter fill_char one_char_classes;
348 v
349
350 let utf8_engine = Lex_engines.engine_tiny_utf8 table
351 (fun c ->
352 if c>=0x10000 && c < 0x11000 then xml_char
353 else encoding_error)
354
355 let latin1_engine = Lex_engines.engine_tiny_8bit table

CVS Admin">CVS Admin
ViewVC Help
Powered by ViewVC 1.1.5