| 1 |
let eof = 0
|
| 2 |
let encoding_error = 1
|
| 3 |
let xml_char = 2
|
| 4 |
let blank = 3
|
| 5 |
let lowercase = 4
|
| 6 |
let uppercase = 5
|
| 7 |
let ascii_digit = 6
|
| 8 |
let char_5f = 7
|
| 9 |
let char_3c = 8
|
| 10 |
let char_3e = 9
|
| 11 |
let char_3d = 10
|
| 12 |
let char_2e = 11
|
| 13 |
let char_2c = 12
|
| 14 |
let char_3a = 13
|
| 15 |
let char_3b = 14
|
| 16 |
let char_2b = 15
|
| 17 |
let char_2d = 16
|
| 18 |
let char_2a = 17
|
| 19 |
let char_2f = 18
|
| 20 |
let char_40 = 19
|
| 21 |
let char_26 = 20
|
| 22 |
let char_7b = 21
|
| 23 |
let char_7d = 22
|
| 24 |
let char_5b = 23
|
| 25 |
let char_5d = 24
|
| 26 |
let char_28 = 25
|
| 27 |
let char_29 = 26
|
| 28 |
let char_7c = 27
|
| 29 |
let char_3f = 28
|
| 30 |
let char_60 = 29
|
| 31 |
let char_22 = 30
|
| 32 |
let char_5c = 31
|
| 33 |
let char_27 = 32
|
| 34 |
let char_21 = 33
|
| 35 |
|
| 36 |
let one_char_classes = [
|
| 37 |
(0x5f, 07);
|
| 38 |
(0x3c, 08);
|
| 39 |
(0x3e, 09);
|
| 40 |
(0x3d, 10);
|
| 41 |
(0x2e, 11);
|
| 42 |
(0x2c, 12);
|
| 43 |
(0x3a, 13);
|
| 44 |
(0x3b, 14);
|
| 45 |
(0x2b, 15);
|
| 46 |
(0x2d, 16);
|
| 47 |
(0x2a, 17);
|
| 48 |
(0x2f, 18);
|
| 49 |
(0x40, 19);
|
| 50 |
(0x26, 20);
|
| 51 |
(0x7b, 21);
|
| 52 |
(0x7d, 22);
|
| 53 |
(0x5b, 23);
|
| 54 |
(0x5d, 24);
|
| 55 |
(0x28, 25);
|
| 56 |
(0x29, 26);
|
| 57 |
(0x7c, 27);
|
| 58 |
(0x3f, 28);
|
| 59 |
(0x60, 29);
|
| 60 |
(0x22, 30);
|
| 61 |
(0x5c, 31);
|
| 62 |
(0x27, 32);
|
| 63 |
(0x21, 33);
|
| 64 |
]
|
| 65 |
|
| 66 |
let nb_classes = 34
|
| 67 |
|
| 68 |
# 12 "parser/wlexer.mll"
|
| 69 |
|
| 70 |
let keywords = Hashtbl.create 17
|
| 71 |
|
| 72 |
let error i j exn = raise (Location.Location ((i,j),exn))
|
| 73 |
exception Illegal_character of char
|
| 74 |
exception Unterminated_comment
|
| 75 |
exception Unterminated_string
|
| 76 |
exception Unterminated_string_in_comment
|
| 77 |
|
| 78 |
(* Buffer for string literals *)
|
| 79 |
|
| 80 |
let string_buff = Buffer.create 1024
|
| 81 |
let store_char = Buffer.add_char string_buff
|
| 82 |
let get_stored_string () =
|
| 83 |
let s = Buffer.contents string_buff in
|
| 84 |
Buffer.clear string_buff;
|
| 85 |
s
|
| 86 |
|
| 87 |
let string_start_pos = ref 0;;
|
| 88 |
let comment_start_pos : int list ref = ref [];;
|
| 89 |
|
| 90 |
let char_for_decimal_code s =
|
| 91 |
let s = String.sub s 1 (String.length s - 1) in
|
| 92 |
let c = int_of_string s in
|
| 93 |
assert ( c < 256 ); (* TODO: handle Unicode *)
|
| 94 |
Char.chr c
|
| 95 |
|
| 96 |
let rec tag_of_tag s i =
|
| 97 |
match s.[i] with
|
| 98 |
| '\008' | '\009' | '\010' | '\013' | '\032' -> tag_of_tag s (i+1)
|
| 99 |
| _ -> String.sub s i (String.length s - i)
|
| 100 |
|
| 101 |
let lex_tables = {
|
| 102 |
Lexing.lex_base =
|
| 103 |
"\000\000\009\000\012\000\018\000\252\255\251\255\004\000\255\255\
|
| 104 |
\005\000\254\255\014\000\013\000\003\000\005\000\253\255\255\255\
|
| 105 |
\247\255\246\255\020\000\047\000\051\000\018\000\043\000\250\255\
|
| 106 |
\027\000\017\000\005\000\050\000\011\000\044\000\040\000\249\255\
|
| 107 |
\250\255\248\255\064\000\067\000\071\000\080\000\057\000\084\000\
|
| 108 |
\100\000\104\000\114\000\118\000\124\000\062\000";
|
| 109 |
Lexing.lex_backtrk =
|
| 110 |
"\255\255\255\255\255\255\255\255\255\255\255\255\004\000\255\255\
|
| 111 |
\002\000\255\255\004\000\002\000\004\000\004\000\255\255\255\255\
|
| 112 |
\255\255\255\255\000\000\001\000\002\000\003\000\005\000\255\255\
|
| 113 |
\005\000\005\000\005\000\005\000\005\000\005\000\005\000\255\255\
|
| 114 |
\255\255\255\255\255\255\004\000\255\255\004\000\003\000\002\000\
|
| 115 |
\255\255\002\000\001\000\255\255\001\000\000\000";
|
| 116 |
Lexing.lex_default =
|
| 117 |
"\023\000\005\000\005\000\005\000\000\000\000\000\255\255\000\000\
|
| 118 |
\255\255\000\000\255\255\255\255\255\255\255\255\000\000\000\000\
|
| 119 |
\000\000\000\000\255\255\255\255\255\255\255\255\255\255\000\000\
|
| 120 |
\255\255\255\255\255\255\255\255\255\255\255\255\255\255\000\000\
|
| 121 |
\000\000\000\000\255\255\255\255\255\255\255\255\255\255\255\255\
|
| 122 |
\255\255\255\255\255\255\255\255\255\255\255\255";
|
| 123 |
Lexing.lex_trans =
|
| 124 |
"\016\000\017\000\017\000\018\000\019\000\020\000\021\000\019\000\
|
| 125 |
\022\000\004\000\008\000\008\000\004\000\024\000\025\000\026\000\
|
| 126 |
\027\000\026\000\004\000\011\000\011\000\028\000\015\000\045\000\
|
| 127 |
\038\000\029\000\012\000\030\000\026\000\009\000\031\000\032\000\
|
| 128 |
\031\000\032\000\013\000\009\000\009\000\032\000\032\000\014\000\
|
| 129 |
\032\000\014\000\007\000\010\000\009\000\009\000\034\000\035\000\
|
| 130 |
\035\000\006\000\007\000\042\000\042\000\042\000\042\000\039\000\
|
| 131 |
\039\000\039\000\039\000\032\000\043\000\033\000\032\000\038\000\
|
| 132 |
\040\000\045\000\032\000\034\000\035\000\035\000\000\000\035\000\
|
| 133 |
\035\000\035\000\035\000\037\000\037\000\037\000\037\000\042\000\
|
| 134 |
\036\000\000\000\000\000\039\000\037\000\037\000\037\000\037\000\
|
| 135 |
\039\000\039\000\039\000\039\000\000\000\036\000\000\000\000\000\
|
| 136 |
\000\000\040\000\000\000\035\000\000\000\000\000\000\000\037\000\
|
| 137 |
\041\000\041\000\041\000\041\000\041\000\041\000\041\000\041\000\
|
| 138 |
\037\000\000\000\000\000\000\000\039\000\040\000\042\000\042\000\
|
| 139 |
\042\000\042\000\044\000\044\000\044\000\044\000\000\000\043\000\
|
| 140 |
\044\000\044\000\044\000\044\000\041\000\000\000\000\000\000\000\
|
| 141 |
\041\000\043\000\000\000\000\000\000\000\000\000\000\000\000\000\
|
| 142 |
\000\000\000\000\042\000\000\000\000\000\000\000\044\000\000\000\
|
| 143 |
\000\000\000\000\000\000\000\000\044\000\000\000";
|
| 144 |
Lexing.lex_check =
|
| 145 |
"\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\
|
| 146 |
\000\000\001\000\006\000\008\000\002\000\000\000\000\000\000\000\
|
| 147 |
\000\000\000\000\003\000\011\000\010\000\000\000\013\000\018\000\
|
| 148 |
\021\000\000\000\001\000\000\000\000\000\012\000\000\000\025\000\
|
| 149 |
\000\000\026\000\001\000\006\000\006\000\024\000\028\000\001\000\
|
| 150 |
\024\000\001\000\002\000\002\000\010\000\010\000\022\000\022\000\
|
| 151 |
\022\000\003\000\003\000\019\000\019\000\019\000\019\000\020\000\
|
| 152 |
\020\000\020\000\020\000\027\000\019\000\029\000\030\000\038\000\
|
| 153 |
\020\000\045\000\027\000\034\000\034\000\034\000\255\255\035\000\
|
| 154 |
\035\000\035\000\035\000\036\000\036\000\036\000\036\000\019\000\
|
| 155 |
\035\000\255\255\255\255\020\000\037\000\037\000\037\000\037\000\
|
| 156 |
\039\000\039\000\039\000\039\000\255\255\037\000\255\255\255\255\
|
| 157 |
\255\255\039\000\255\255\035\000\255\255\255\255\255\255\036\000\
|
| 158 |
\040\000\040\000\040\000\040\000\041\000\041\000\041\000\041\000\
|
| 159 |
\037\000\255\255\255\255\255\255\039\000\041\000\042\000\042\000\
|
| 160 |
\042\000\042\000\043\000\043\000\043\000\043\000\255\255\042\000\
|
| 161 |
\044\000\044\000\044\000\044\000\040\000\255\255\255\255\255\255\
|
| 162 |
\041\000\044\000\255\255\255\255\255\255\255\255\255\255\255\255\
|
| 163 |
\255\255\255\255\042\000\255\255\255\255\255\255\043\000\255\255\
|
| 164 |
\255\255\255\255\255\255\255\255\044\000\255\255"
|
| 165 |
}
|
| 166 |
|
| 167 |
let rec token engine lexbuf =
|
| 168 |
match engine lex_tables 0 lexbuf with
|
| 169 |
0 -> (
|
| 170 |
# 51 "parser/wlexer.mll"
|
| 171 |
token engine lexbuf )
|
| 172 |
| 1 -> (
|
| 173 |
# 52 "parser/wlexer.mll"
|
| 174 |
|
| 175 |
let s = Lexing.lexeme lexbuf in
|
| 176 |
if Hashtbl.mem keywords s then "",s else "LIDENT",s
|
| 177 |
)
|
| 178 |
| 2 -> (
|
| 179 |
# 56 "parser/wlexer.mll"
|
| 180 |
"UIDENT",Lexing.lexeme lexbuf )
|
| 181 |
| 3 -> (
|
| 182 |
# 57 "parser/wlexer.mll"
|
| 183 |
"INT",Lexing.lexeme lexbuf )
|
| 184 |
| 4 -> (
|
| 185 |
# 58 "parser/wlexer.mll"
|
| 186 |
|
| 187 |
let s = Lexing.lexeme lexbuf in
|
| 188 |
"TAG", tag_of_tag s 1
|
| 189 |
)
|
| 190 |
| 5 -> (
|
| 191 |
# 66 "parser/wlexer.mll"
|
| 192 |
"",Lexing.lexeme lexbuf )
|
| 193 |
| 6 -> (
|
| 194 |
# 69 "parser/wlexer.mll"
|
| 195 |
let string_start = Lexing.lexeme_start lexbuf in
|
| 196 |
string_start_pos := string_start;
|
| 197 |
let double_quote = Lexing.lexeme_char lexbuf 0 = '"' in
|
| 198 |
if double_quote then string2 engine lexbuf else string1 engine lexbuf;
|
| 199 |
lexbuf.Lexing.lex_start_pos <-
|
| 200 |
string_start - lexbuf.Lexing.lex_abs_pos;
|
| 201 |
(if double_quote then "STRING2" else "STRING1"),
|
| 202 |
(get_stored_string()) )
|
| 203 |
| 7 -> (
|
| 204 |
# 79 "parser/wlexer.mll"
|
| 205 |
comment_start_pos := [Lexing.lexeme_start lexbuf];
|
| 206 |
comment engine lexbuf;
|
| 207 |
token engine lexbuf )
|
| 208 |
| 8 -> (
|
| 209 |
# 84 "parser/wlexer.mll"
|
| 210 |
"EOI","" )
|
| 211 |
| 9 -> (
|
| 212 |
# 86 "parser/wlexer.mll"
|
| 213 |
error
|
| 214 |
(Lexing.lexeme_start lexbuf) (Lexing.lexeme_end lexbuf)
|
| 215 |
(Illegal_character ((Lexing.lexeme lexbuf).[0])) )
|
| 216 |
| _ -> failwith "lexing: empty token [token]"
|
| 217 |
|
| 218 |
and comment engine lexbuf =
|
| 219 |
match engine lex_tables 1 lexbuf with
|
| 220 |
0 -> (
|
| 221 |
# 92 "parser/wlexer.mll"
|
| 222 |
comment_start_pos := Lexing.lexeme_start lexbuf :: !comment_start_pos;
|
| 223 |
comment engine lexbuf;
|
| 224 |
)
|
| 225 |
| 1 -> (
|
| 226 |
# 96 "parser/wlexer.mll"
|
| 227 |
comment_start_pos := List.tl !comment_start_pos;
|
| 228 |
if !comment_start_pos <> [] then comment engine lexbuf;
|
| 229 |
)
|
| 230 |
| 2 -> (
|
| 231 |
# 100 "parser/wlexer.mll"
|
| 232 |
string_start_pos := Lexing.lexeme_start lexbuf;
|
| 233 |
let string =
|
| 234 |
if Lexing.lexeme_char lexbuf 0 = '"' then string2 else string1 in
|
| 235 |
(try string engine lexbuf
|
| 236 |
with Location.Location (_,Unterminated_string) ->
|
| 237 |
let st = List.hd !comment_start_pos in
|
| 238 |
error st (st+2) Unterminated_string_in_comment);
|
| 239 |
Buffer.clear string_buff;
|
| 240 |
comment engine lexbuf )
|
| 241 |
| 3 -> (
|
| 242 |
# 110 "parser/wlexer.mll"
|
| 243 |
let st = List.hd !comment_start_pos in
|
| 244 |
error st (st+2) Unterminated_comment
|
| 245 |
)
|
| 246 |
| 4 -> (
|
| 247 |
# 114 "parser/wlexer.mll"
|
| 248 |
comment engine lexbuf )
|
| 249 |
| _ -> failwith "lexing: empty token [comment]"
|
| 250 |
|
| 251 |
and string2 engine lexbuf =
|
| 252 |
match engine lex_tables 2 lexbuf with
|
| 253 |
0 -> (
|
| 254 |
# 118 "parser/wlexer.mll"
|
| 255 |
() )
|
| 256 |
| 1 -> (
|
| 257 |
# 120 "parser/wlexer.mll"
|
| 258 |
store_char (Lexing.lexeme_char lexbuf 1);
|
| 259 |
string2 engine lexbuf )
|
| 260 |
| 2 -> (
|
| 261 |
# 123 "parser/wlexer.mll"
|
| 262 |
store_char (char_for_decimal_code (Lexing.lexeme lexbuf));
|
| 263 |
string2 engine lexbuf )
|
| 264 |
| 3 -> (
|
| 265 |
# 126 "parser/wlexer.mll"
|
| 266 |
error !string_start_pos (!string_start_pos+1) Unterminated_string )
|
| 267 |
| 4 -> (
|
| 268 |
# 128 "parser/wlexer.mll"
|
| 269 |
store_char (Lexing.lexeme_char lexbuf 0);
|
| 270 |
(* TODO: Unicode *)
|
| 271 |
string2 engine lexbuf )
|
| 272 |
| _ -> failwith "lexing: empty token [string2]"
|
| 273 |
|
| 274 |
and string1 engine lexbuf =
|
| 275 |
match engine lex_tables 3 lexbuf with
|
| 276 |
0 -> (
|
| 277 |
# 134 "parser/wlexer.mll"
|
| 278 |
() )
|
| 279 |
| 1 -> (
|
| 280 |
# 136 "parser/wlexer.mll"
|
| 281 |
store_char (Lexing.lexeme_char lexbuf 1);
|
| 282 |
string1 engine lexbuf )
|
| 283 |
| 2 -> (
|
| 284 |
# 139 "parser/wlexer.mll"
|
| 285 |
store_char (char_for_decimal_code (Lexing.lexeme lexbuf));
|
| 286 |
string1 engine lexbuf )
|
| 287 |
| 3 -> (
|
| 288 |
# 142 "parser/wlexer.mll"
|
| 289 |
error !string_start_pos (!string_start_pos+1) Unterminated_string )
|
| 290 |
| 4 -> (
|
| 291 |
# 144 "parser/wlexer.mll"
|
| 292 |
store_char (Lexing.lexeme_char lexbuf 0);
|
| 293 |
string1 engine lexbuf )
|
| 294 |
| _ -> failwith "lexing: empty token [string1]"
|
| 295 |
|
| 296 |
;;
|
| 297 |
|
| 298 |
# 147 "parser/wlexer.mll"
|
| 299 |
|
| 300 |
|
| 301 |
let lexer_func_of_wlex lexfun lexengine cs =
|
| 302 |
let lb =
|
| 303 |
Lexing.from_function
|
| 304 |
(fun s n ->
|
| 305 |
try s.[0] <- Stream.next cs; 1 with Stream.Failure -> 0)
|
| 306 |
in
|
| 307 |
let next () =
|
| 308 |
let tok = lexfun lexengine lb in
|
| 309 |
let loc = (Lexing.lexeme_start lb, Lexing.lexeme_end lb) in
|
| 310 |
(tok, loc)
|
| 311 |
in
|
| 312 |
Token.make_stream_and_location next
|
| 313 |
|
| 314 |
let register_kw (s1,s2) =
|
| 315 |
if s1 = "" then
|
| 316 |
match s2.[0] with
|
| 317 |
| 'a' .. 'z' when not (Hashtbl.mem keywords s2) ->
|
| 318 |
Hashtbl.add keywords s2 ()
|
| 319 |
| _ -> ()
|
| 320 |
|
| 321 |
|
| 322 |
let lexer lexfun lexengine =
|
| 323 |
{
|
| 324 |
Token.tok_func = lexer_func_of_wlex lexfun lexengine;
|
| 325 |
Token.tok_using = register_kw;
|
| 326 |
Token.tok_removing = (fun _ -> ());
|
| 327 |
Token.tok_match = Token.default_match;
|
| 328 |
Token.tok_text = Token.lexer_text
|
| 329 |
}
|
| 330 |
|
| 331 |
let classes =
|
| 332 |
let c i = (i,i) in
|
| 333 |
let i ch1 ch2 = (Char.code ch1, Char.code ch2) in
|
| 334 |
[ (ascii_digit, [i '0' '9']);
|
| 335 |
(lowercase, [i 'a' 'z']);
|
| 336 |
(uppercase, [i 'A' 'Z']);
|
| 337 |
(blank, [c 8; c 9; c 10; c 13; c 32]);
|
| 338 |
]
|
| 339 |
|
| 340 |
let table =
|
| 341 |
assert(nb_classes <= 256);
|
| 342 |
let v = String.make 256 (Char.chr encoding_error) in
|
| 343 |
let fill_int c (i, j) = String.fill v i (j-i+1) c in
|
| 344 |
let fill_class (c, l) = List.iter (fill_int (Char.chr c)) l in
|
| 345 |
let fill_char (ch, cl) = v.[ch] <- Char.chr cl in
|
| 346 |
List.iter fill_class classes;
|
| 347 |
List.iter fill_char one_char_classes;
|
| 348 |
v
|
| 349 |
|
| 350 |
let utf8_engine = Lex_engines.engine_tiny_utf8 table
|
| 351 |
(fun c ->
|
| 352 |
if c>=0x10000 && c < 0x11000 then xml_char
|
| 353 |
else encoding_error)
|
| 354 |
|
| 355 |
let latin1_engine = Lex_engines.engine_tiny_8bit table
|