/[svn]/parser/wlexer.ml
ViewVC logotype

Contents of /parser/wlexer.ml

Parent Directory Parent Directory | Revision Log Revision Log


Revision 553 - (show annotations)
Tue Jul 10 17:44:06 2007 UTC (5 years, 10 months ago) by abate
File size: 18795 byte(s)
[r2003-07-02 09:24:27 by cvscast] reinit_ns directive

Original author: cvscast
Date: 2003-07-02 09:24:28+00:00
1 let eof = 0
2 let encoding_error = 1
3 let xml_char = 2
4 let blank = 3
5 let lowercase = 4
6 let uppercase = 5
7 let ascii_digit = 6
8 let char_23 = 7
9 let char_5f = 8
10 let char_3c = 9
11 let char_3e = 10
12 let char_3d = 11
13 let char_2e = 12
14 let char_2c = 13
15 let char_3a = 14
16 let char_3b = 15
17 let char_2b = 16
18 let char_2d = 17
19 let char_2a = 18
20 let char_2f = 19
21 let char_40 = 20
22 let char_26 = 21
23 let char_7b = 22
24 let char_7d = 23
25 let char_5b = 24
26 let char_5d = 25
27 let char_28 = 26
28 let char_29 = 27
29 let char_7c = 28
30 let char_3f = 29
31 let char_60 = 30
32 let char_22 = 31
33 let char_5c = 32
34 let char_27 = 33
35 let char_21 = 34
36 let unicode_base_char = 35
37 let unicode_ideographic = 36
38 let unicode_combining_char = 37
39 let unicode_digit = 38
40 let unicode_extender = 39
41
42 let one_char_classes = [
43 (0x23, 07);
44 (0x5f, 08);
45 (0x3c, 09);
46 (0x3e, 10);
47 (0x3d, 11);
48 (0x2e, 12);
49 (0x2c, 13);
50 (0x3a, 14);
51 (0x3b, 15);
52 (0x2b, 16);
53 (0x2d, 17);
54 (0x2a, 18);
55 (0x2f, 19);
56 (0x40, 20);
57 (0x26, 21);
58 (0x7b, 22);
59 (0x7d, 23);
60 (0x5b, 24);
61 (0x5d, 25);
62 (0x28, 26);
63 (0x29, 27);
64 (0x7c, 28);
65 (0x3f, 29);
66 (0x60, 30);
67 (0x22, 31);
68 (0x5c, 32);
69 (0x27, 33);
70 (0x21, 34);
71 ]
72
73 let nb_classes = 40
74
75 # 17 "parser/wlexer.mll"
76
77 let keywords = Hashtbl.create 17
78
79 let in_comment = ref false
80
81 let error = Location.raise_loc
82 exception Illegal_character of char
83 exception Unterminated_comment
84 exception Unterminated_string
85 exception Unterminated_string_in_comment
86
87
88 (* Buffer for string literals (always encoded in UTF8). *)
89
90 let string_buff = Buffer.create 1024
91
92 let store_ascii = Buffer.add_char string_buff
93 let store_char = Buffer.add_string string_buff
94 let store_code = Encodings.Utf8.store string_buff
95 let get_stored_string () =
96 let s = Buffer.contents string_buff in
97 Buffer.clear string_buff;
98 s
99 let store_special = function
100 | 'n' -> store_ascii '\n'
101 | 'r' -> store_ascii '\r'
102 | 't' -> store_ascii '\t'
103 | c -> raise (Illegal_character '\\')
104
105 let string_start_pos = ref 0;;
106 let comment_start_pos : int list ref = ref [];;
107
108 let decimal_char s =
109 int_of_string (String.sub s 1 (String.length s - 2))
110
111
112 let hexa_digit = function
113 | '0'..'9' as c -> (Char.code c) - (Char.code '0')
114 | 'a'..'f' as c -> (Char.code c) - (Char.code 'a') + 10
115 | _ -> failwith "Invalid hexadecimal digit" (* TODO: error loc *)
116
117
118 let hexa_char s =
119 let rec aux i accu =
120 if i = String.length s - 1 then accu
121 else aux (succ i) (accu * 16 + hexa_digit s.[i])
122 in
123 aux 0 0
124
125 let lex_tables = {
126 Lexing.lex_base =
127 "\000\000\023\000\011\000\015\000\254\255\042\000\046\000\255\255\
128 \250\255\249\255\255\255\041\000\253\255\019\000\252\255\252\255\
129 \251\255\000\000\002\000\253\255\246\255\245\255\010\000\054\000\
130 \018\000\059\000\021\000\059\000\250\255\026\000\064\000\036\000\
131 \067\000\070\000\024\000\027\000\047\000\054\000\248\255\250\255\
132 \247\255\073\000\063\000\093\000\103\000\098\000\139\000\079\000\
133 ";
134 Lexing.lex_backtrk =
135 "\255\255\255\255\255\255\255\255\255\255\001\000\255\255\255\255\
136 \255\255\255\255\255\255\004\000\255\255\255\255\255\255\255\255\
137 \255\255\004\000\004\000\255\255\255\255\255\255\000\000\001\000\
138 \004\000\005\000\005\000\005\000\255\255\005\000\005\000\005\000\
139 \005\000\005\000\005\000\005\000\005\000\005\000\255\255\255\255\
140 \255\255\004\000\255\255\006\000\001\000\255\255\001\000\000\000\
141 ";
142 Lexing.lex_default =
143 "\028\000\016\000\009\000\004\000\000\000\255\255\255\255\000\000\
144 \000\000\000\000\000\000\255\255\000\000\255\255\000\000\000\000\
145 \000\000\255\255\255\255\000\000\000\000\000\000\255\255\255\255\
146 \255\255\255\255\255\255\255\255\000\000\255\255\255\255\255\255\
147 \255\255\255\255\255\255\255\255\255\255\255\255\000\000\000\000\
148 \000\000\255\255\255\255\255\255\255\255\255\255\255\255\255\255\
149 ";
150 Lexing.lex_trans =
151 "\020\000\021\000\021\000\022\000\023\000\023\000\024\000\025\000\
152 \023\000\026\000\027\000\008\000\029\000\047\000\030\000\031\000\
153 \032\000\033\000\034\000\005\000\007\000\005\000\035\000\015\000\
154 \041\000\013\000\036\000\004\000\037\000\034\000\039\000\038\000\
155 \039\000\038\000\014\000\023\000\023\000\021\000\021\000\021\000\
156 \042\000\017\000\010\000\011\000\010\000\012\000\006\000\013\000\
157 \006\000\018\000\006\000\039\000\006\000\039\000\019\000\039\000\
158 \019\000\007\000\044\000\044\000\044\000\007\000\044\000\043\000\
159 \043\000\040\000\044\000\043\000\045\000\039\000\039\000\044\000\
160 \004\000\004\000\004\000\039\000\041\000\039\000\039\000\041\000\
161 \039\000\014\000\047\000\039\000\000\000\000\000\000\000\039\000\
162 \000\000\044\000\044\000\044\000\044\000\044\000\043\000\043\000\
163 \039\000\043\000\043\000\043\000\000\000\043\000\046\000\046\000\
164 \000\000\043\000\046\000\044\000\044\000\044\000\043\000\044\000\
165 \000\000\000\000\000\000\044\000\012\000\045\000\000\000\000\000\
166 \044\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\
167 \043\000\043\000\043\000\043\000\043\000\046\000\046\000\000\000\
168 \000\000\000\000\044\000\044\000\044\000\044\000\044\000\046\000\
169 \046\000\046\000\000\000\046\000\000\000\000\000\000\000\046\000\
170 \000\000\000\000\000\000\000\000\046\000\000\000\000\000\000\000\
171 \000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\
172 \000\000\000\000\000\000\000\000\000\000\000\000\046\000\046\000\
173 \046\000\046\000\046\000";
174 Lexing.lex_check =
175 "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\
176 \000\000\000\000\000\000\002\000\000\000\022\000\000\000\000\000\
177 \000\000\000\000\000\000\003\000\018\000\003\000\000\000\001\000\
178 \024\000\013\000\000\000\017\000\000\000\000\000\026\000\000\000\
179 \026\000\000\000\013\000\000\000\000\000\000\000\000\000\000\000\
180 \029\000\001\000\002\000\002\000\002\000\011\000\005\000\011\000\
181 \005\000\001\000\006\000\031\000\006\000\034\000\001\000\035\000\
182 \001\000\005\000\023\000\023\000\023\000\006\000\023\000\025\000\
183 \025\000\036\000\023\000\025\000\023\000\027\000\027\000\023\000\
184 \011\000\011\000\011\000\030\000\033\000\037\000\030\000\041\000\
185 \033\000\042\000\047\000\032\000\255\255\255\255\255\255\033\000\
186 \255\255\023\000\023\000\023\000\023\000\023\000\025\000\025\000\
187 \032\000\043\000\043\000\043\000\255\255\043\000\045\000\045\000\
188 \255\255\043\000\045\000\044\000\044\000\044\000\043\000\044\000\
189 \255\255\255\255\255\255\044\000\045\000\044\000\255\255\255\255\
190 \044\000\255\255\255\255\255\255\255\255\255\255\255\255\255\255\
191 \043\000\043\000\043\000\043\000\043\000\045\000\045\000\255\255\
192 \255\255\255\255\044\000\044\000\044\000\044\000\044\000\046\000\
193 \046\000\046\000\255\255\046\000\255\255\255\255\255\255\046\000\
194 \255\255\255\255\255\255\255\255\046\000\255\255\255\255\255\255\
195 \255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\
196 \255\255\255\255\255\255\255\255\255\255\255\255\046\000\046\000\
197 \046\000\046\000\046\000"
198 }
199
200 let rec token engine lexbuf =
201 match engine lex_tables 0 lexbuf with
202 0 -> (
203 # 78 "parser/wlexer.mll"
204 token engine lexbuf )
205 | 1 -> (
206 # 80 "parser/wlexer.mll"
207
208 let s = Lexing.lexeme lexbuf in
209 if (s.[0] >= 'A') && (s.[0] <= 'Z')
210 then "UIDENT",s
211 else if Hashtbl.mem keywords s then "",s else "LIDENT",s
212 )
213 | 2 -> (
214 # 87 "parser/wlexer.mll"
215
216 let s = Lexing.lexeme lexbuf in
217 let s = String.sub s 0 (String.length s - 2) in
218 "ANY_IN_NS", s
219 )
220 | 3 -> (
221 # 92 "parser/wlexer.mll"
222 "ANY_IN_NS", "" )
223 | 4 -> (
224 # 94 "parser/wlexer.mll"
225 "INT",Lexing.lexeme lexbuf )
226 | 5 -> (
227 # 99 "parser/wlexer.mll"
228 "",Lexing.lexeme lexbuf )
229 | 6 -> (
230 # 100 "parser/wlexer.mll"
231 "DIRECTIVE",Lexing.lexeme lexbuf )
232 | 7 -> (
233 # 102 "parser/wlexer.mll"
234 let string_start = Lexing.lexeme_start lexbuf in
235 string_start_pos := string_start;
236 let double_quote = Lexing.lexeme_char lexbuf 0 = '"' in
237 string (Lexing.lexeme lexbuf) engine lexbuf;
238 lexbuf.Lexing.lex_start_pos <-
239 string_start - lexbuf.Lexing.lex_abs_pos;
240 (if double_quote then "STRING2" else "STRING1"),
241 (get_stored_string()) )
242 | 8 -> (
243 # 112 "parser/wlexer.mll"
244 comment_start_pos := [Lexing.lexeme_start lexbuf];
245 in_comment := true;
246 comment engine lexbuf;
247 in_comment := false;
248 token engine lexbuf )
249 | 9 -> (
250 # 119 "parser/wlexer.mll"
251 "EOI","" )
252 | 10 -> (
253 # 121 "parser/wlexer.mll"
254 error
255 (Lexing.lexeme_start lexbuf) (Lexing.lexeme_end lexbuf)
256 (Illegal_character ((Lexing.lexeme lexbuf).[0])) )
257 | _ -> failwith "lexing: empty token [token]"
258
259 and comment engine lexbuf =
260 match engine lex_tables 1 lexbuf with
261 0 -> (
262 # 127 "parser/wlexer.mll"
263 comment_start_pos := Lexing.lexeme_start lexbuf :: !comment_start_pos;
264 comment engine lexbuf;
265 )
266 | 1 -> (
267 # 131 "parser/wlexer.mll"
268 comment_start_pos := List.tl !comment_start_pos;
269 if !comment_start_pos <> [] then comment engine lexbuf;
270 )
271 | 2 -> (
272 # 135 "parser/wlexer.mll"
273 string_start_pos := Lexing.lexeme_start lexbuf;
274 Buffer.clear string_buff;
275 let ender = Lexing.lexeme lexbuf in
276 (try string ender engine lexbuf
277 with Location.Location (_,_,Unterminated_string) ->
278 let st = List.hd !comment_start_pos in
279 error st (st+2) Unterminated_string_in_comment);
280 Buffer.clear string_buff;
281 comment engine lexbuf )
282 | 3 -> (
283 # 145 "parser/wlexer.mll"
284 let st = List.hd !comment_start_pos in
285 error st (st+2) Unterminated_comment
286 )
287 | 4 -> (
288 # 149 "parser/wlexer.mll"
289 comment engine lexbuf )
290 | _ -> failwith "lexing: empty token [comment]"
291
292 and string ender engine lexbuf =
293 match engine lex_tables 2 lexbuf with
294 0 -> (
295 # 153 "parser/wlexer.mll"
296 let c = Lexing.lexeme lexbuf in
297 if c = ender then ()
298 else (store_char (Lexing.lexeme lexbuf);
299 string ender engine lexbuf) )
300 | 1 -> (
301 # 158 "parser/wlexer.mll"
302 store_ascii (Lexing.lexeme_char lexbuf 1);
303 string ender engine lexbuf )
304 | 2 -> (
305 # 161 "parser/wlexer.mll"
306 let c = Lexing.lexeme_char lexbuf 1 in
307 if c = 'x'
308 then parse_hexa_char engine lexbuf
309 else store_special c;
310 string ender engine lexbuf )
311 | 3 -> (
312 # 167 "parser/wlexer.mll"
313 store_code (decimal_char (Lexing.lexeme lexbuf));
314 string ender engine lexbuf )
315 | 4 -> (
316 # 170 "parser/wlexer.mll"
317 error
318 (Lexing.lexeme_start lexbuf) (Lexing.lexeme_end lexbuf)
319 (Illegal_character '\\') )
320 | 5 -> (
321 # 174 "parser/wlexer.mll"
322 error !string_start_pos (!string_start_pos+1) Unterminated_string )
323 | 6 -> (
324 # 176 "parser/wlexer.mll"
325 store_code (Char.code (Lexing.lexeme_char lexbuf 0));
326 (* Adapt when source is UTF8 *)
327 string ender engine lexbuf )
328 | _ -> failwith "lexing: empty token [string ender]"
329
330 and parse_hexa_char engine lexbuf =
331 match engine lex_tables 3 lexbuf with
332 0 -> (
333 # 182 "parser/wlexer.mll"
334 store_code (hexa_char (Lexing.lexeme lexbuf)) )
335 | 1 -> (
336 # 184 "parser/wlexer.mll"
337 error
338 (Lexing.lexeme_start lexbuf) (Lexing.lexeme_end lexbuf)
339 (Illegal_character '\\') )
340 | _ -> failwith "lexing: empty token [parse_hexa_char]"
341
342 ;;
343
344 # 190 "parser/wlexer.mll"
345
346
347 let delta_loc = ref 0
348 let set_delta_loc dl = delta_loc := dl
349
350 (* For synchronization on errors in the toplevel ... *)
351 (* Issue: file inclusion *)
352 let lexbuf = ref None
353 let last_tok = ref ("","")
354
355 let lexer_func_of_wlex lexfun lexengine cs =
356 let dl = !delta_loc in
357 delta_loc := 0;
358 let lb =
359 Lexing.from_function
360 (fun s n ->
361 try s.[0] <- Stream.next cs; 1
362 with Stream.Failure -> 0)
363 in
364 lexbuf := Some lb;
365 let next () =
366 let tok = lexfun lexengine lb in
367 let loc = (Lexing.lexeme_start lb + dl,
368 Lexing.lexeme_end lb + dl) in
369 last_tok := tok;
370 (tok, loc)
371 in
372 Token.make_stream_and_location next
373
374 let register_kw (s1,s2) =
375 if s1 = "" then
376 match s2.[0] with
377 | 'a' .. 'z' when not (Hashtbl.mem keywords s2) ->
378 Hashtbl.add keywords s2 ()
379 | _ -> ()
380
381
382 let lexer lexfun lexengine =
383 {
384 Token.tok_func = lexer_func_of_wlex lexfun lexengine;
385 Token.tok_using = register_kw;
386 Token.tok_removing = (fun _ -> ());
387 Token.tok_match = Token.default_match;
388 Token.tok_text = Token.lexer_text
389 }
390
391 let classes =
392 let c i = (i,i) in
393 let i ch1 ch2 = (Char.code ch1, Char.code ch2) in
394 [ unicode_base_char,
395 [ 0x00C0,0x00D6; 0x00D8,0x00F6;
396 0x00F8,0x00FF; 0x0100,0x0131; 0x0134,0x013E; 0x0141,0x0148;
397 0x014A,0x017E; 0x0180,0x01C3; 0x01CD,0x01F0; 0x01F4,0x01F5;
398 0x01FA,0x0217; 0x0250,0x02A8; 0x02BB,0x02C1; 0x0386,0x0386;
399 0x0388,0x038A; 0x038C,0x038C; 0x038E,0x03A1; 0x03A3,0x03CE;
400 0x03D0,0x03D6; 0x03DA,0x03DA; 0x03DC,0x03DC; 0x03DE,0x03DE;
401 0x03E0,0x03E0; 0x03E2,0x03F3;
402 0x0401,0x040C; 0x040E,0x044F; 0x0451,0x045C; 0x045E,0x0481;
403 0x0490,0x04C4; 0x04C7,0x04C8; 0x04CB,0x04CC; 0x04D0,0x04EB;
404 0x04EE,0x04F5; 0x04F8,0x04F9; 0x0531,0x0556; 0x0559,0x0559;
405 0x0561,0x0586; 0x05D0,0x05EA; 0x05F0,0x05F2; 0x0621,0x063A;
406 0x0641,0x064A; 0x0671,0x06B7; 0x06BA,0x06BE; 0x06C0,0x06CE;
407 0x06D0,0x06D3; 0x06D5,0x06D5; 0x06E5,0x06E6; 0x0905,0x0939;
408 0x093D,0x093D;
409 0x0958,0x0961; 0x0985,0x098C; 0x098F,0x0990; 0x0993,0x09A8;
410 0x09AA,0x09B0; 0x09B2,0x09B2; 0x09B6,0x09B9; 0x09DC,0x09DD;
411 0x09DF,0x09E1; 0x09F0,0x09F1; 0x0A05,0x0A0A; 0x0A0F,0x0A10;
412 0x0A13,0x0A28; 0x0A2A,0x0A30; 0x0A32,0x0A33; 0x0A35,0x0A36;
413 0x0A38,0x0A39; 0x0A59,0x0A5C; 0x0A5E,0x0A5E; 0x0A72,0x0A74;
414 0x0A85,0x0A8B; 0x0A8D,0x0A8D; 0x0A8F,0x0A91; 0x0A93,0x0AA8;
415 0x0AAA,0x0AB0; 0x0AB2,0x0AB3; 0x0AB5,0x0AB9; 0x0ABD,0x0ABD;
416 0x0AE0,0x0AE0;
417 0x0B05,0x0B0C; 0x0B0F,0x0B10; 0x0B13,0x0B28; 0x0B2A,0x0B30;
418 0x0B32,0x0B33; 0x0B36,0x0B39; 0x0B3D,0x0B3D; 0x0B5C,0x0B5D;
419 0x0B5F,0x0B61; 0x0B85,0x0B8A; 0x0B8E,0x0B90; 0x0B92,0x0B95;
420 0x0B99,0x0B9A; 0x0B9C,0x0B9C; 0x0B9E,0x0B9F; 0x0BA3,0x0BA4;
421 0x0BA8,0x0BAA; 0x0BAE,0x0BB5; 0x0BB7,0x0BB9; 0x0C05,0x0C0C;
422 0x0C0E,0x0C10; 0x0C12,0x0C28; 0x0C2A,0x0C33; 0x0C35,0x0C39;
423 0x0C60,0x0C61; 0x0C85,0x0C8C; 0x0C8E,0x0C90; 0x0C92,0x0CA8;
424 0x0CAA,0x0CB3; 0x0CB5,0x0CB9; 0x0CDE,0x0CDE; 0x0CE0,0x0CE1;
425 0x0D05,0x0D0C; 0x0D0E,0x0D10; 0x0D12,0x0D28; 0x0D2A,0x0D39;
426 0x0D60,0x0D61; 0x0E01,0x0E2E; 0x0E30,0x0E30; 0x0E32,0x0E33;
427 0x0E40,0x0E45; 0x0E81,0x0E82; 0x0E84,0x0E84; 0x0E87,0x0E88;
428 0x0E8A,0x0E8A;
429 0x0E8D,0x0E8D; 0x0E94,0x0E97; 0x0E99,0x0E9F; 0x0EA1,0x0EA3;
430 0x0EA5,0x0EA5;
431 0x0EA7,0x0EA7; 0x0EAA,0x0EAB; 0x0EAD,0x0EAE; 0x0EB0,0x0EB0;
432 0x0EB2,0x0EB3;
433 0x0EBD,0x0EBD; 0x0EC0,0x0EC4; 0x0F40,0x0F47; 0x0F49,0x0F69;
434 0x10A0,0x10C5; 0x10D0,0x10F6; 0x1100,0x1100; 0x1102,0x1103;
435 0x1105,0x1107; 0x1109,0x1109; 0x110B,0x110C; 0x110E,0x1112;
436 0x113C,0x113C;
437 0x113E,0x113E; 0x1140,0x1140; 0x114C,0x114C; 0x114E,0x114E;
438 0x1150,0x1150; 0x1154,0x1155; 0x1159,0x1159;
439 0x115F,0x1161; 0x1163,0x1163; 0x1165,0x1165; 0x1167,0x1167;
440 0x1169,0x1169; 0x116D,0x116E;
441 0x1172,0x1173; 0x1175,0x1175; 0x119E,0x119E; 0x11A8,0x11A8;
442 0x11AB,0x11AB; 0x11AE,0x11AF;
443 0x11B7,0x11B8; 0x11BA,0x11BA; 0x11BC,0x11C2; 0x11EB,0x11EB;
444 0x11F0,0x11F0; 0x11F9,0x11F9;
445 0x1E00,0x1E9B; 0x1EA0,0x1EF9; 0x1F00,0x1F15; 0x1F18,0x1F1D;
446 0x1F20,0x1F45; 0x1F48,0x1F4D; 0x1F50,0x1F57; 0x1F59,0x1F59;
447 0x1F5B,0x1F5B;
448 0x1F5D,0x1F5D; 0x1F5F,0x1F7D; 0x1F80,0x1FB4; 0x1FB6,0x1FBC;
449 0x1FBE,0x1FBE;
450 0x1FC2,0x1FC4; 0x1FC6,0x1FCC; 0x1FD0,0x1FD3; 0x1FD6,0x1FDB;
451 0x1FE0,0x1FEC; 0x1FF2,0x1FF4; 0x1FF6,0x1FFC; 0x2126,0x2126;
452 0x212A,0x212B; 0x212E,0x212E; 0x2180,0x2182; 0x3041,0x3094;
453 0x30A1,0x30FA; 0x3105,0x312C; (* 0xAC00,0xD7A3 *) ];
454
455 unicode_ideographic,
456 [ 0x3007,0x3007; 0x3021,0x3029 (* 0x4E00-0x9FA5 *) ];
457
458 unicode_combining_char,
459 [ 0x0300,0x0345; 0x0360,0x0361; 0x0483,0x0486; 0x0591,0x05A1;
460 0x05A3,0x05B9; 0x05BB,0x05BD; 0x05BF,0x05BF; 0x05C1,0x05C2;
461 0x05C4,0x05C4; 0x064B,0x0652; 0x0670,0x0670; 0x06D6,0x06DC;
462 0x06DD,0x06DF; 0x06E0,0x06E4; 0x06E7,0x06E8; 0x06EA,0x06ED;
463 0x0901,0x0903; 0x093C,0x093C; 0x093E,0x094C; 0x094D,0x094D;
464 0x0951,0x0954; 0x0962,0x0963; 0x0981,0x0983; 0x09BC,0x09BC;
465 0x09BE,0x09BE; 0x09BF,0x09BF; 0x09C0,0x09C4; 0x09C7,0x09C8;
466 0x09CB,0x09CD; 0x09D7,0x09D7; 0x09E2,0x09E3; 0x0A02,0x0A02;
467 0x0A3C,0x0A3C; 0x0A3E,0x0A3E; 0x0A3F,0x0A3F; 0x0A40,0x0A42;
468 0x0A47,0x0A48; 0x0A4B,0x0A4D; 0x0A70,0x0A71; 0x0A81,0x0A83;
469 0x0ABC,0x0ABC; 0x0ABE,0x0AC5; 0x0AC7,0x0AC9; 0x0ACB,0x0ACD;
470 0x0B01,0x0B03; 0x0B3C,0x0B3C; 0x0B3E,0x0B43; 0x0B47,0x0B48;
471 0x0B4B,0x0B4D; 0x0B56,0x0B57; 0x0B82,0x0B83; 0x0BBE,0x0BC2;
472 0x0BC6,0x0BC8; 0x0BCA,0x0BCD; 0x0BD7,0x0BD7; 0x0C01,0x0C03;
473 0x0C3E,0x0C44; 0x0C46,0x0C48; 0x0C4A,0x0C4D; 0x0C55,0x0C56;
474 0x0C82,0x0C83; 0x0CBE,0x0CC4; 0x0CC6,0x0CC8; 0x0CCA,0x0CCD;
475 0x0CD5,0x0CD6; 0x0D02,0x0D03; 0x0D3E,0x0D43; 0x0D46,0x0D48;
476 0x0D4A,0x0D4D; 0x0D57,0x0D57; 0x0E31,0x0E31; 0x0E34,0x0E3A;
477 0x0E47,0x0E4E; 0x0EB1,0x0EB1; 0x0EB4,0x0EB9; 0x0EBB,0x0EBC;
478 0x0EC8,0x0ECD; 0x0F18,0x0F19; 0x0F35,0x0F35; 0x0F37,0x0F37;
479 0x0F39,0x0F39; 0x0F3E,0x0F3E; 0x0F3F,0x0F3F; 0x0F71,0x0F84;
480 0x0F86,0x0F8B; 0x0F90,0x0F95; 0x0F97,0x0F97; 0x0F99,0x0FAD;
481 0x0FB1,0x0FB7; 0x0FB9,0x0FB9; 0x20D0,0x20DC; 0x20E1,0x20E1;
482 0x302A,0x302F; 0x3099,0x3099; 0x309A,0x309A ];
483
484 unicode_digit,
485 [ 0x0660,0x0669; 0x06F0,0x06F9; 0x0966,0x096F; 0x09E6,0x09EF;
486 0x0A66,0x0A6F; 0x0AE6,0x0AEF; 0x0B66,0x0B6F; 0x0BE7,0x0BEF;
487 0x0C66,0x0C6F; 0x0CE6,0x0CEF; 0x0D66,0x0D6F; 0x0E50,0x0E59;
488 0x0ED0,0x0ED9; 0x0F20,0x0F29 ];
489
490
491 unicode_extender,
492 [ 0x00B7,0x00B7; 0x02D0,0x02D1; 0x0387,0x0387; 0x0640,0x0640;
493 0x0E46,0x0E46; 0x0EC6,0x0EC6; 0x3005,0x3005; 0x3031,0x3035;
494 0x309D,0x309E; 0x30FC,0x30FE ];
495
496 ascii_digit,
497 [ i '0' '9'];
498
499 lowercase,
500 [i 'a' 'z'];
501
502 uppercase,
503 [i 'A' 'Z'];
504
505 blank,
506 [c 8; c 9; c 10; c 13; c 32]
507 ]
508
509 let table =
510 assert(nb_classes <= 256);
511 let v = String.make 0x312d (Char.chr encoding_error) in
512 let fill_int c (i, j) = String.fill v i (j-i+1) c in
513 let fill_class (c, l) = List.iter (fill_int (Char.chr c)) l in
514 let fill_char (ch, cl) = v.[ch] <- Char.chr cl in
515 List.iter fill_class classes;
516 List.iter fill_char one_char_classes;
517 v
518
519 let utf8_engine =
520 Lex_engines.engine_tiny_utf8 table
521 (fun code ->
522 if code >= 0x4E00 && code <= 0x9FA5 then
523 unicode_ideographic
524 else if code >= 0xAC00 && code <= 0xD7A3 then
525 unicode_base_char
526 else if code <= 0xD7FF || (code >= 0xE000 && code <= 0xFFFD) ||
527 (code >= 0x10000 && code <= 0x10FFFF) then
528 xml_char
529 else encoding_error)
530
531 let latin1_engine = Lex_engines.engine_tiny_8bit table

CVS Admin">CVS Admin
ViewVC Help
Powered by ViewVC 1.1.5