| 1 |
(* Modified from Camlp4 Plexer module *)
|
| 2 |
|
| 3 |
open Stdpp
|
| 4 |
open Token
|
| 5 |
|
| 6 |
let no_quotations = ref false
|
| 7 |
|
| 8 |
(* The string buffering machinery *)
|
| 9 |
|
| 10 |
let buff = ref (String.create 80)
|
| 11 |
let store len x =
|
| 12 |
if len >= String.length !buff then
|
| 13 |
buff := !buff ^ String.create (String.length !buff);
|
| 14 |
!buff.[len] <- x;
|
| 15 |
succ len
|
| 16 |
let mstore len s =
|
| 17 |
let rec add_rec len i =
|
| 18 |
if i == String.length s then len else add_rec (store len s.[i]) (succ i)
|
| 19 |
in
|
| 20 |
add_rec len 0
|
| 21 |
let get_buff len = String.sub !buff 0 len
|
| 22 |
|
| 23 |
(* The lexer *)
|
| 24 |
|
| 25 |
let rec ident len =
|
| 26 |
parser
|
| 27 |
[< ''A'..'Z' | 'a'..'z' | '\192'..'\214' | '\216'..'\246' |
|
| 28 |
'\248'..'\255' | '0'..'9' | '_' | '\'' as c;
|
| 29 |
s >] ->
|
| 30 |
ident (store len c) s
|
| 31 |
| [< >] -> len
|
| 32 |
and ident2 len =
|
| 33 |
parser
|
| 34 |
[< ''!' | '?' | '~' | '=' | '@' | '^' | '&' | '+' | '-' | '*' | '/' |
|
| 35 |
'%' | '.' | ':' | '<' | '>' | '|' | '$' as c;
|
| 36 |
s >] ->
|
| 37 |
ident2 (store len c) s
|
| 38 |
| [< >] -> len
|
| 39 |
and ident3 len =
|
| 40 |
parser
|
| 41 |
[< ''0'..'9' | 'A'..'Z' | 'a'..'z' | '\192'..'\214' | '\216'..'\246' |
|
| 42 |
'\248'..'\255' | '_' | '!' | '%' | '&' | '*' | '+' | '-' | '.' | '/' |
|
| 43 |
':' | '<' | '=' | '>' | '?' | '@' | '^' | '|' | '~' | '\'' | '$' as c;
|
| 44 |
s >] ->
|
| 45 |
ident3 (store len c) s
|
| 46 |
| [< >] -> len
|
| 47 |
and base_number len =
|
| 48 |
parser
|
| 49 |
[< ''o' | 'O'; s >] -> octal_digits (store len 'o') s
|
| 50 |
| [< ''x' | 'X'; s >] -> hexa_digits (store len 'x') s
|
| 51 |
| [< ''b' | 'B'; s >] -> binary_digits (store len 'b') s
|
| 52 |
| [< a = number len >] -> a
|
| 53 |
and octal_digits len =
|
| 54 |
parser
|
| 55 |
[< ''0'..'7' as d; s >] -> octal_digits (store len d) s
|
| 56 |
| [< >] -> "INT", get_buff len
|
| 57 |
and hexa_digits len =
|
| 58 |
parser
|
| 59 |
[< ''0'..'9' | 'a'..'f' | 'A'..'F' as d; s >] ->
|
| 60 |
hexa_digits (store len d) s
|
| 61 |
| [< >] -> "INT", get_buff len
|
| 62 |
and binary_digits len =
|
| 63 |
parser
|
| 64 |
[< ''0'..'1' as d; s >] -> binary_digits (store len d) s
|
| 65 |
| [< >] -> "INT", get_buff len
|
| 66 |
and number len =
|
| 67 |
parser
|
| 68 |
[< ''0'..'9' as c; s >] -> number (store len c) s
|
| 69 |
| [< ''.'; s >] -> decimal_part (store len '.') s
|
| 70 |
| [< ''e' | 'E'; s >] -> exponent_part (store len 'E') s
|
| 71 |
| [< >] -> "INT", get_buff len
|
| 72 |
and decimal_part len =
|
| 73 |
parser
|
| 74 |
[< ''0'..'9' as c; s >] -> decimal_part (store len c) s
|
| 75 |
| [< ''e' | 'E'; s >] -> exponent_part (store len 'E') s
|
| 76 |
| [< >] -> "FLOAT", get_buff len
|
| 77 |
and exponent_part len =
|
| 78 |
parser
|
| 79 |
[< ''+' | '-' as c; s >] -> end_exponent_part (store len c) s
|
| 80 |
| [< a = end_exponent_part len >] -> a
|
| 81 |
and end_exponent_part len =
|
| 82 |
parser
|
| 83 |
[< ''0'..'9' as c; s >] -> end_exponent_part (store len c) s
|
| 84 |
| [< >] -> "FLOAT", get_buff len
|
| 85 |
|
| 86 |
let rec skip_spaces =
|
| 87 |
parser
|
| 88 |
[< '' ' | '\010' | '\013' | '\t' | '\026' | '\012'; s >] -> skip_spaces s
|
| 89 |
| [< >] -> ()
|
| 90 |
|
| 91 |
let error_on_unknown_keywords = ref false
|
| 92 |
let err loc msg = raise_with_loc loc (Token.Error msg)
|
| 93 |
|
| 94 |
let next_token_fun dfa find_kwd =
|
| 95 |
let keyword_or_error loc s =
|
| 96 |
try ("", find_kwd s), loc with
|
| 97 |
Not_found ->
|
| 98 |
if !error_on_unknown_keywords then err loc ("illegal token: " ^ s)
|
| 99 |
else ("", s), loc
|
| 100 |
in
|
| 101 |
let rec next_token =
|
| 102 |
parser bp
|
| 103 |
[< '' ' | '\010' | '\013' | '\t' | '\026' | '\012'; s >] -> next_token s
|
| 104 |
| [< ''('; s >] -> left_paren bp s
|
| 105 |
| [< ''#'; s >] -> spaces_tabs s; linenum bp s
|
| 106 |
| [< ''A'..'Z' | '\192'..'\214' | '\216'..'\222' as c; s >] ->
|
| 107 |
let id = get_buff (ident (store 0 c) s) in
|
| 108 |
let loc = bp, Stream.count s in
|
| 109 |
(try "", find_kwd id with
|
| 110 |
Not_found -> "UIDENT", id),
|
| 111 |
loc
|
| 112 |
| [< ''a'..'z' | '\223'..'\246' | '\248'..'\255' | '_' as c; s >] ->
|
| 113 |
let id = get_buff (ident (store 0 c) s) in
|
| 114 |
let loc = bp, Stream.count s in
|
| 115 |
(try "", find_kwd id with
|
| 116 |
Not_found -> "LIDENT", id),
|
| 117 |
loc
|
| 118 |
| [< ''1'..'9' as c; s >] ->
|
| 119 |
let tok = number (store 0 c) s in
|
| 120 |
let loc = bp, Stream.count s in tok, loc
|
| 121 |
| [< ''0'; s >] ->
|
| 122 |
let tok = base_number (store 0 '0') s in
|
| 123 |
let loc = bp, Stream.count s in tok, loc
|
| 124 |
| [< ''\''; s >] ->
|
| 125 |
(* begin match Stream.npeek 2 s with
|
| 126 |
[_; '\''] | ['\\'; _] -> *)
|
| 127 |
let tok = "CHAR", get_buff (char bp 0 s) in
|
| 128 |
let loc = bp, Stream.count s in tok, loc
|
| 129 |
(* | _ -> keyword_or_error (bp, Stream.count s) "'"
|
| 130 |
end *)
|
| 131 |
| [< ''\"'; s >] ->
|
| 132 |
let tok = "STRING", get_buff (string bp 0 s) in
|
| 133 |
let loc = bp, Stream.count s in tok, loc
|
| 134 |
| [< ''$'; s >] ->
|
| 135 |
let tok = dollar bp 0 s in let loc = bp, Stream.count s in tok, loc
|
| 136 |
| [< ''!' | '=' | '@' | '^' | '&' | '+' | '-' | '*' | '/' | '%' as c;
|
| 137 |
s >] ->
|
| 138 |
let id = get_buff (ident2 (store 0 c) s) in
|
| 139 |
keyword_or_error (bp, Stream.count s) id
|
| 140 |
| [< ''~' as c;
|
| 141 |
a =
|
| 142 |
parser
|
| 143 |
[< ''a'..'z' as c; len = ident (store 0 c) >] ep ->
|
| 144 |
("TILDEIDENT", get_buff len), (bp, ep)
|
| 145 |
| [< s >] ->
|
| 146 |
let id = get_buff (ident2 (store 0 c) s) in
|
| 147 |
keyword_or_error (bp, Stream.count s) id >] ->
|
| 148 |
a
|
| 149 |
| [< ''?' as c;
|
| 150 |
a =
|
| 151 |
parser
|
| 152 |
[< ''a'..'z' as c; len = ident (store 0 c) >] ep ->
|
| 153 |
("QUESTIONIDENT", get_buff len), (bp, ep)
|
| 154 |
| [< s >] ->
|
| 155 |
let id = get_buff (ident2 (store 0 c) s) in
|
| 156 |
keyword_or_error (bp, Stream.count s) id >] ->
|
| 157 |
a
|
| 158 |
| [< ''<'; s >] -> less bp s
|
| 159 |
| [< '':' as c1;
|
| 160 |
len =
|
| 161 |
parser
|
| 162 |
[< '']' | ':' | '=' | '>' as c2 >] -> store (store 0 c1) c2
|
| 163 |
| [< >] -> store 0 c1 >] ep ->
|
| 164 |
let id = get_buff len in keyword_or_error (bp, ep) id
|
| 165 |
| [< ''>' | '|' as c1;
|
| 166 |
len =
|
| 167 |
parser
|
| 168 |
[< '']' | '}' as c2 >] -> store (store 0 c1) c2
|
| 169 |
| [< a = ident2 (store 0 c1) >] -> a >] ep ->
|
| 170 |
let id = get_buff len in keyword_or_error (bp, ep) id
|
| 171 |
| [< ''[' | '{' as c1; s >] ->
|
| 172 |
let len =
|
| 173 |
match Stream.npeek 2 s with
|
| 174 |
['<'; '<' | ':'] -> store 0 c1
|
| 175 |
| _ ->
|
| 176 |
match s with parser
|
| 177 |
[< ''|' | '<' | ':' as c2 >] -> store (store 0 c1) c2
|
| 178 |
| [< >] -> store 0 c1
|
| 179 |
in
|
| 180 |
let ep = Stream.count s in
|
| 181 |
let id = get_buff len in keyword_or_error (bp, ep) id
|
| 182 |
| [< ''.';
|
| 183 |
id =
|
| 184 |
parser
|
| 185 |
[< ''.' >] -> ".."
|
| 186 |
| [< >] -> "." >] ep ->
|
| 187 |
keyword_or_error (bp, ep) id
|
| 188 |
| [< '';';
|
| 189 |
id =
|
| 190 |
parser
|
| 191 |
[< '';' >] -> ";;"
|
| 192 |
| [< >] -> ";" >] ep ->
|
| 193 |
keyword_or_error (bp, ep) id
|
| 194 |
| [< ''\\'; s >] ep -> ("LIDENT", get_buff (ident3 0 s)), (bp, ep)
|
| 195 |
| [< 'c >] ep -> keyword_or_error (bp, ep) (String.make 1 c)
|
| 196 |
| [< _ = Stream.empty >] -> ("EOI", ""), (bp, succ bp)
|
| 197 |
and less bp strm =
|
| 198 |
if !no_quotations then
|
| 199 |
match strm with parser
|
| 200 |
[< len = ident2 (store 0 '<') >] ep ->
|
| 201 |
let id = get_buff len in keyword_or_error (bp, ep) id
|
| 202 |
else
|
| 203 |
match strm with parser
|
| 204 |
[< ''<'; len = quotation bp 0 >] ep ->
|
| 205 |
("QUOTATION", ":" ^ get_buff len), (bp, ep)
|
| 206 |
| [< '':';
|
| 207 |
i =
|
| 208 |
(parser
|
| 209 |
[< len = ident 0 >] -> get_buff len);
|
| 210 |
''<' ?? "character '<' expected"; len = quotation bp 0 >] ep ->
|
| 211 |
("QUOTATION", i ^ ":" ^ get_buff len), (bp, ep)
|
| 212 |
| [< len = ident2 (store 0 '<') >] ep ->
|
| 213 |
let id = get_buff len in keyword_or_error (bp, ep) id
|
| 214 |
and string bp len =
|
| 215 |
parser
|
| 216 |
[< ''\"' >] -> len
|
| 217 |
| [< ''\\'; 'c; s >] -> string bp (store (store len '\\') c) s
|
| 218 |
| [< 'c; s >] -> string bp (store len c) s
|
| 219 |
| [< >] ep -> err (bp, ep) "string not terminated"
|
| 220 |
and char bp len =
|
| 221 |
parser
|
| 222 |
[< ''\''; s >] -> if len = 0 then char bp (store len '\'') s else len
|
| 223 |
| [< ''\\'; 'c; s >] -> char bp (store (store len '\\') c) s
|
| 224 |
| [< 'c; s >] -> char bp (store len c) s
|
| 225 |
| [< >] ep -> err (bp, ep) "char not terminated"
|
| 226 |
and dollar bp len =
|
| 227 |
parser
|
| 228 |
[< ''$' >] -> "ANTIQUOT", ":" ^ get_buff len
|
| 229 |
| [< ''a'..'z' | 'A'..'Z' as c; s >] -> antiquot bp (store len c) s
|
| 230 |
| [< ''0'..'9' as c; s >] -> maybe_locate bp (store len c) s
|
| 231 |
| [< '':'; s >] ->
|
| 232 |
let k = get_buff len in
|
| 233 |
"ANTIQUOT", k ^ ":" ^ locate_or_antiquot_rest bp 0 s
|
| 234 |
| [< ''\\'; 'c; s >] ->
|
| 235 |
"ANTIQUOT", ":" ^ locate_or_antiquot_rest bp (store len c) s
|
| 236 |
| [< s >] ->
|
| 237 |
if dfa then
|
| 238 |
match s with parser
|
| 239 |
[< 'c >] ->
|
| 240 |
"ANTIQUOT", ":" ^ locate_or_antiquot_rest bp (store len c) s
|
| 241 |
| [< >] ep -> err (bp, ep) "antiquotation not terminated"
|
| 242 |
else "", get_buff (ident2 (store 0 '$') s)
|
| 243 |
and maybe_locate bp len =
|
| 244 |
parser
|
| 245 |
[< ''$' >] -> "ANTIQUOT", ":" ^ get_buff len
|
| 246 |
| [< ''0'..'9' as c; s >] -> maybe_locate bp (store len c) s
|
| 247 |
| [< '':'; s >] ->
|
| 248 |
"LOCATE", get_buff len ^ ":" ^ locate_or_antiquot_rest bp 0 s
|
| 249 |
| [< ''\\'; 'c; s >] ->
|
| 250 |
"ANTIQUOT", ":" ^ locate_or_antiquot_rest bp (store len c) s
|
| 251 |
| [< 'c; s >] ->
|
| 252 |
"ANTIQUOT", ":" ^ locate_or_antiquot_rest bp (store len c) s
|
| 253 |
| [< >] ep -> err (bp, ep) "antiquotation not terminated"
|
| 254 |
and antiquot bp len =
|
| 255 |
parser
|
| 256 |
[< ''$' >] -> "ANTIQUOT", ":" ^ get_buff len
|
| 257 |
| [< ''a'..'z' | 'A'..'Z' | '0'..'9' as c; s >] ->
|
| 258 |
antiquot bp (store len c) s
|
| 259 |
| [< '':'; s >] ->
|
| 260 |
let k = get_buff len in
|
| 261 |
"ANTIQUOT", k ^ ":" ^ locate_or_antiquot_rest bp 0 s
|
| 262 |
| [< ''\\'; 'c; s >] ->
|
| 263 |
"ANTIQUOT", ":" ^ locate_or_antiquot_rest bp (store len c) s
|
| 264 |
| [< 'c; s >] ->
|
| 265 |
"ANTIQUOT", ":" ^ locate_or_antiquot_rest bp (store len c) s
|
| 266 |
| [< >] ep -> err (bp, ep) "antiquotation not terminated"
|
| 267 |
and locate_or_antiquot_rest bp len =
|
| 268 |
parser
|
| 269 |
[< ''$' >] -> get_buff len
|
| 270 |
| [< ''\\'; 'c; s >] -> locate_or_antiquot_rest bp (store len c) s
|
| 271 |
| [< 'c; s >] -> locate_or_antiquot_rest bp (store len c) s
|
| 272 |
| [< >] ep -> err (bp, ep) "antiquotation not terminated"
|
| 273 |
and quotation bp len =
|
| 274 |
parser
|
| 275 |
[< ''>'; s >] -> maybe_end_quotation bp len s
|
| 276 |
| [< ''<'; s >] ->
|
| 277 |
quotation bp (maybe_nested_quotation bp (store len '<') strm__) s
|
| 278 |
| [< ''\\';
|
| 279 |
len =
|
| 280 |
(parser
|
| 281 |
[< ''>' | '<' | '\\' as c >] -> store len c
|
| 282 |
| [< >] -> store len '\\');
|
| 283 |
s >] ->
|
| 284 |
quotation bp len s
|
| 285 |
| [< 'c; s >] -> quotation bp (store len c) s
|
| 286 |
| [< >] ep -> err (bp, ep) "quotation not terminated"
|
| 287 |
and maybe_nested_quotation bp len =
|
| 288 |
parser
|
| 289 |
[< ''<'; s >] -> mstore (quotation bp (store len '<') s) ">>"
|
| 290 |
| [< '':'; len = ident (store len ':');
|
| 291 |
a =
|
| 292 |
parser
|
| 293 |
[< ''<'; s >] -> mstore (quotation bp (store len '<') s) ">>"
|
| 294 |
| [< >] -> len >] ->
|
| 295 |
a
|
| 296 |
| [< >] -> len
|
| 297 |
and maybe_end_quotation bp len =
|
| 298 |
parser
|
| 299 |
[< ''>' >] -> len
|
| 300 |
| [< a = quotation bp (store len '>') >] -> a
|
| 301 |
and left_paren bp =
|
| 302 |
parser
|
| 303 |
[< ''*'; _ = comment bp; a = next_token >] -> a
|
| 304 |
| [< >] ep -> keyword_or_error (bp, ep) "("
|
| 305 |
and comment bp =
|
| 306 |
parser
|
| 307 |
[< ''('; s >] -> left_paren_in_comment bp s
|
| 308 |
| [< ''*'; s >] -> star_in_comment bp s
|
| 309 |
| [< ''\"'; _ = string bp 0; s >] -> comment bp s
|
| 310 |
| [< ''\''; s >] -> quote_in_comment bp s
|
| 311 |
| [< 'c; s >] -> comment bp s
|
| 312 |
| [< >] ep -> err (bp, ep) "comment not terminated"
|
| 313 |
and quote_in_comment bp =
|
| 314 |
parser
|
| 315 |
[< ''\''; s >] -> comment bp s
|
| 316 |
| [< ''\\'; s >] -> quote_antislash_in_comment bp 0 s
|
| 317 |
| [< '_; s >] -> quote_any_in_comment bp s
|
| 318 |
| [< a = comment bp >] -> a
|
| 319 |
and quote_any_in_comment bp =
|
| 320 |
parser
|
| 321 |
[< ''\''; s >] -> comment bp s
|
| 322 |
| [< a = comment bp >] -> a
|
| 323 |
and quote_antislash_in_comment bp len =
|
| 324 |
parser
|
| 325 |
[< ''\''; s >] -> comment bp s
|
| 326 |
| [< ''\\' | '\"' | 'n' | 't' | 'b' | 'r'; s >] ->
|
| 327 |
quote_any_in_comment bp s
|
| 328 |
| [< ''0'..'9'; s >] -> quote_antislash_digit_in_comment bp s
|
| 329 |
| [< a = comment bp >] -> a
|
| 330 |
and quote_antislash_digit_in_comment bp =
|
| 331 |
parser
|
| 332 |
[< ''0'..'9'; s >] -> quote_antislash_digit2_in_comment bp s
|
| 333 |
| [< a = comment bp >] -> a
|
| 334 |
and quote_antislash_digit2_in_comment bp =
|
| 335 |
parser
|
| 336 |
[< ''0'..'9'; s >] -> quote_any_in_comment bp s
|
| 337 |
| [< a = comment bp >] -> a
|
| 338 |
and left_paren_in_comment bp =
|
| 339 |
parser
|
| 340 |
[< ''*'; s >] -> comment bp s; comment bp s
|
| 341 |
| [< a = comment bp >] -> a
|
| 342 |
and star_in_comment bp =
|
| 343 |
parser
|
| 344 |
[< '')' >] -> ()
|
| 345 |
| [< a = comment bp >] -> a
|
| 346 |
and linenum bp =
|
| 347 |
parser
|
| 348 |
[< ''0'..'9'; _ = digits; _ = spaces_tabs; ''\"'; _ = any_to_nl; s >] ->
|
| 349 |
next_token s
|
| 350 |
| [< >] -> keyword_or_error (bp, bp + 1) "#"
|
| 351 |
and spaces_tabs =
|
| 352 |
parser
|
| 353 |
[< '' ' | '\t'; s >] -> spaces_tabs s
|
| 354 |
| [< >] -> ()
|
| 355 |
and digits =
|
| 356 |
parser
|
| 357 |
[< ''0'..'9'; s >] -> digits s
|
| 358 |
| [< >] -> ()
|
| 359 |
and any_to_nl =
|
| 360 |
parser
|
| 361 |
[< ''\013' | '\010' >] -> ()
|
| 362 |
| [< '_; s >] -> any_to_nl s
|
| 363 |
| [< >] -> ()
|
| 364 |
in
|
| 365 |
fun cstrm ->
|
| 366 |
try next_token cstrm with
|
| 367 |
Stream.Error str -> err (Stream.count cstrm, Stream.count cstrm + 1) str
|
| 368 |
|
| 369 |
let dollar_for_antiquotation = ref true
|
| 370 |
|
| 371 |
let func kwd_table =
|
| 372 |
let find = Hashtbl.find kwd_table in
|
| 373 |
let dfa = !dollar_for_antiquotation in
|
| 374 |
Token.lexer_func_of_parser (next_token_fun dfa find)
|
| 375 |
|
| 376 |
let rec check_keyword_stream =
|
| 377 |
parser
|
| 378 |
[< _ = check; _ = Stream.empty >] -> true
|
| 379 |
and check =
|
| 380 |
parser
|
| 381 |
[< ''A'..'Z' | 'a'..'z' | '\192'..'\214' | '\216'..'\246' |
|
| 382 |
'\248'..'\255';
|
| 383 |
s >] ->
|
| 384 |
check_ident s
|
| 385 |
| [< ''!' | '?' | '~' | '=' | '@' | '^' | '&' | '+' | '-' | '*' | '/' |
|
| 386 |
'%' | '.';
|
| 387 |
s >] ->
|
| 388 |
check_ident2 s
|
| 389 |
| [< ''<'; s >] ->
|
| 390 |
begin match Stream.npeek 1 s with
|
| 391 |
[':' | '<'] -> ()
|
| 392 |
| _ -> check_ident2 s
|
| 393 |
end
|
| 394 |
| [< '':';
|
| 395 |
_ =
|
| 396 |
parser
|
| 397 |
[< '']' | ':' | '=' | '>' >] -> ()
|
| 398 |
| [< >] -> () >] ep ->
|
| 399 |
()
|
| 400 |
| [< ''>' | '|';
|
| 401 |
_ =
|
| 402 |
parser
|
| 403 |
[< '']' | '}' >] -> ()
|
| 404 |
| [< a = check_ident2 >] -> a >] ->
|
| 405 |
()
|
| 406 |
| [< ''[' | '{'; s >] ->
|
| 407 |
begin match Stream.npeek 2 s with
|
| 408 |
['<'; '<' | ':'] -> ()
|
| 409 |
| _ ->
|
| 410 |
match s with parser
|
| 411 |
[< ''|' | '<' | ':' >] -> () | [< >] -> ()
|
| 412 |
end
|
| 413 |
| [< '';';
|
| 414 |
_ =
|
| 415 |
parser
|
| 416 |
[< '';' >] -> ()
|
| 417 |
| [< >] -> () >] ->
|
| 418 |
()
|
| 419 |
| [< '_ >] -> ()
|
| 420 |
and check_ident =
|
| 421 |
parser
|
| 422 |
[< ''A'..'Z' | 'a'..'z' | '\192'..'\214' | '\216'..'\246' |
|
| 423 |
'\248'..'\255' | '0'..'9' | '_' | '\'';
|
| 424 |
s >] ->
|
| 425 |
check_ident s
|
| 426 |
| [< >] -> ()
|
| 427 |
and check_ident2 =
|
| 428 |
parser
|
| 429 |
[< ''!' | '?' | '~' | '=' | '@' | '^' | '&' | '+' | '-' | '*' | '/' |
|
| 430 |
'%' | '.' | ':' | '<' | '>' | '|';
|
| 431 |
s >] ->
|
| 432 |
check_ident2 s
|
| 433 |
| [< >] -> ()
|
| 434 |
|
| 435 |
let check_keyword s =
|
| 436 |
try check_keyword_stream (Stream.of_string s) with
|
| 437 |
_ -> false
|
| 438 |
|
| 439 |
let error_no_respect_rules p_con p_prm =
|
| 440 |
raise
|
| 441 |
(Token.Error
|
| 442 |
("the token " ^
|
| 443 |
(if p_con = "" then "\"" ^ p_prm ^ "\""
|
| 444 |
else if p_prm = "" then p_con
|
| 445 |
else p_con ^ " \"" ^ p_prm ^ "\"") ^
|
| 446 |
" does not respect Plexer rules"))
|
| 447 |
|
| 448 |
let error_ident_and_keyword p_con p_prm =
|
| 449 |
raise
|
| 450 |
(Token.Error
|
| 451 |
("the token \"" ^ p_prm ^ "\" is used as " ^ p_con ^
|
| 452 |
" and as keyword"))
|
| 453 |
|
| 454 |
let using_token kwd_table ident_table (p_con, p_prm) =
|
| 455 |
match p_con with
|
| 456 |
"" ->
|
| 457 |
if not (Hashtbl.mem kwd_table p_prm) then
|
| 458 |
if check_keyword p_prm then
|
| 459 |
if Hashtbl.mem ident_table p_prm then
|
| 460 |
error_ident_and_keyword (Hashtbl.find ident_table p_prm) p_prm
|
| 461 |
else Hashtbl.add kwd_table p_prm p_prm
|
| 462 |
else error_no_respect_rules p_con p_prm
|
| 463 |
| "LIDENT" ->
|
| 464 |
if p_prm = "" then ()
|
| 465 |
else
|
| 466 |
begin match p_prm.[0] with
|
| 467 |
'A'..'Z' -> error_no_respect_rules p_con p_prm
|
| 468 |
| _ ->
|
| 469 |
if Hashtbl.mem kwd_table p_prm then
|
| 470 |
error_ident_and_keyword p_con p_prm
|
| 471 |
else Hashtbl.add ident_table p_prm p_con
|
| 472 |
end
|
| 473 |
| "UIDENT" ->
|
| 474 |
if p_prm = "" then ()
|
| 475 |
else
|
| 476 |
begin match p_prm.[0] with
|
| 477 |
'a'..'z' -> error_no_respect_rules p_con p_prm
|
| 478 |
| _ ->
|
| 479 |
if Hashtbl.mem kwd_table p_prm then
|
| 480 |
error_ident_and_keyword p_con p_prm
|
| 481 |
else Hashtbl.add ident_table p_prm p_con
|
| 482 |
end
|
| 483 |
| "TILDEIDENT" | "QUESTIONIDENT" | "INT" | "FLOAT" | "CHAR" | "STRING" |
|
| 484 |
"QUOTATION" | "ANTIQUOT" | "LOCATE" | "EOI" ->
|
| 485 |
()
|
| 486 |
| _ ->
|
| 487 |
raise
|
| 488 |
(Token.Error
|
| 489 |
("the constructor \"" ^ p_con ^ "\" is not recognized by Plexer"))
|
| 490 |
|
| 491 |
let removing_token kwd_table ident_table (p_con, p_prm) =
|
| 492 |
match p_con with
|
| 493 |
"" -> Hashtbl.remove kwd_table p_prm
|
| 494 |
| "LIDENT" | "UIDENT" ->
|
| 495 |
if p_prm <> "" then Hashtbl.remove ident_table p_prm
|
| 496 |
| _ -> ()
|
| 497 |
|
| 498 |
let text =
|
| 499 |
function
|
| 500 |
"", t -> "'" ^ t ^ "'"
|
| 501 |
| "LIDENT", "" -> "lowercase identifier"
|
| 502 |
| "LIDENT", t -> "'" ^ t ^ "'"
|
| 503 |
| "UIDENT", "" -> "uppercase identifier"
|
| 504 |
| "UIDENT", t -> "'" ^ t ^ "'"
|
| 505 |
| "INT", "" -> "integer"
|
| 506 |
| "INT", s -> "'" ^ s ^ "'"
|
| 507 |
| "FLOAT", "" -> "float"
|
| 508 |
| "STRING", "" -> "string"
|
| 509 |
| "CHAR", "" -> "char"
|
| 510 |
| "QUOTATION", "" -> "quotation"
|
| 511 |
| "ANTIQUOT", k -> "antiquot \"" ^ k ^ "\""
|
| 512 |
| "LOCATE", "" -> "locate"
|
| 513 |
| "EOI", "" -> "end of input"
|
| 514 |
| con, "" -> con
|
| 515 |
| con, prm -> con ^ " \"" ^ prm ^ "\""
|
| 516 |
|
| 517 |
let eq_before_colon p e =
|
| 518 |
let rec loop i =
|
| 519 |
if i == String.length e then
|
| 520 |
failwith "Internal error in Plexer: incorrect ANTIQUOT"
|
| 521 |
else if i == String.length p then e.[i] == ':'
|
| 522 |
else if p.[i] == e.[i] then loop (i + 1)
|
| 523 |
else false
|
| 524 |
in
|
| 525 |
loop 0
|
| 526 |
|
| 527 |
let after_colon e =
|
| 528 |
try
|
| 529 |
let i = String.index e ':' in
|
| 530 |
String.sub e (i + 1) (String.length e - i - 1)
|
| 531 |
with
|
| 532 |
Not_found -> ""
|
| 533 |
|
| 534 |
let tok_match =
|
| 535 |
function
|
| 536 |
"ANTIQUOT", p_prm ->
|
| 537 |
begin function
|
| 538 |
"ANTIQUOT", prm when eq_before_colon p_prm prm -> after_colon prm
|
| 539 |
| _ -> raise Stream.Failure
|
| 540 |
end
|
| 541 |
| tok -> Token.default_match tok
|
| 542 |
|
| 543 |
let gmake () =
|
| 544 |
let kwd_table = Hashtbl.create 301 in
|
| 545 |
let id_table = Hashtbl.create 301 in
|
| 546 |
{tok_func = func kwd_table; tok_using = using_token kwd_table id_table;
|
| 547 |
tok_removing = removing_token kwd_table id_table; tok_match = tok_match;
|
| 548 |
tok_text = text}
|
| 549 |
|
| 550 |
let tparse =
|
| 551 |
function
|
| 552 |
"ANTIQUOT", p_prm ->
|
| 553 |
let p =
|
| 554 |
parser
|
| 555 |
[< '"ANTIQUOT", prm when eq_before_colon p_prm prm >] ->
|
| 556 |
after_colon prm
|
| 557 |
in
|
| 558 |
Some p
|
| 559 |
| _ -> None
|
| 560 |
|
| 561 |
let make () =
|
| 562 |
let kwd_table = Hashtbl.create 301 in
|
| 563 |
let id_table = Hashtbl.create 301 in
|
| 564 |
{func = func kwd_table; using = using_token kwd_table id_table;
|
| 565 |
removing = removing_token kwd_table id_table; tparse = tparse; text = text}
|