/[svn]/parser/wlexer.mll
ViewVC logotype

Contents of /parser/wlexer.mll

Parent Directory Parent Directory | Revision Log Revision Log


Revision 99 - (hide annotations)
Tue Jul 10 17:06:11 2007 UTC (5 years, 10 months ago) by abate
File size: 5717 byte(s)
[r2002-11-10 12:49:51 by cvscast] Empty log message

Original author: cvscast
Date: 2002-11-10 12:49:51+00:00
1 abate 81 (* File to be processed by wlex, not ocamllex ! *)
2     (* Loosely inspired from OCaml lexer.mll *)
3    
4     classes
5     encoding_error
6     xml_char
7     blank
8     lowercase uppercase ascii_digit
9     "_<>=.,:;+-*/@&{}[]()|?`\"\\\'!"
10    
11    
12     {
13     let keywords = Hashtbl.create 17
14    
15     let error i j exn = raise (Location.Location ((i,j),exn))
16     exception Illegal_character of char
17     exception Unterminated_comment
18     exception Unterminated_string
19     exception Unterminated_string_in_comment
20    
21     (* Buffer for string literals *)
22    
23     let string_buff = Buffer.create 1024
24     let store_char = Buffer.add_char string_buff
25     let get_stored_string () =
26     let s = Buffer.contents string_buff in
27     Buffer.clear string_buff;
28     s
29    
30     let string_start_pos = ref 0;;
31     let comment_start_pos : int list ref = ref [];;
32    
33     let char_for_decimal_code s =
34     let s = String.sub s 1 (String.length s - 1) in
35     let c = int_of_string s in
36     assert ( c < 256 ); (* TODO: handle Unicode *)
37     Char.chr c
38     }
39    
40    
41     let identchar = lowercase | uppercase | ascii_digit | '_'
42    
43     rule token = parse
44     blank+ { token engine lexbuf }
45     | (lowercase | '_') identchar* {
46     let s = Lexing.lexeme lexbuf in
47     if Hashtbl.mem keywords s then "",s else "LIDENT",s
48     }
49     | uppercase identchar* { "UIDENT",Lexing.lexeme lexbuf }
50     | ascii_digit+ { "INT",Lexing.lexeme lexbuf }
51     | "<" blank* (lowercase | uppercase) identchar* {
52     let s = Lexing.lexeme lexbuf in
53     "TAG", String.sub s 1 (String.length s - 1)
54     }
55     | [ "<>=.,:;+-*/@&{}[]()|?`!" ]
56 abate 99 | "->" | "::" | ";;" | "--" | ":=" | "\\"
57 abate 81 | ["?+*"] "?"
58     { "",Lexing.lexeme lexbuf }
59    
60     | '"' | "'"
61     { let string_start = Lexing.lexeme_start lexbuf in
62     string_start_pos := string_start;
63     let double_quote = Lexing.lexeme_char lexbuf 0 = '"' in
64     if double_quote then string2 engine lexbuf else string1 engine lexbuf;
65     lexbuf.Lexing.lex_start_pos <-
66     string_start - lexbuf.Lexing.lex_abs_pos;
67     (if double_quote then "STRING2" else "STRING1"),
68     (get_stored_string()) }
69    
70     | "(*"
71     { comment_start_pos := [Lexing.lexeme_start lexbuf];
72     comment engine lexbuf;
73     token engine lexbuf }
74    
75     | eof
76     { "EOI","" }
77     | _
78     { error
79     (Lexing.lexeme_start lexbuf) (Lexing.lexeme_end lexbuf)
80     (Illegal_character ((Lexing.lexeme lexbuf).[0])) }
81    
82     and comment = parse
83     "(*"
84     { comment_start_pos := Lexing.lexeme_start lexbuf :: !comment_start_pos;
85     comment engine lexbuf;
86     }
87     | "*)"
88     { comment_start_pos := List.tl !comment_start_pos;
89     if !comment_start_pos <> [] then comment engine lexbuf;
90     }
91     | '"' | "'"
92     { string_start_pos := Lexing.lexeme_start lexbuf;
93     let string =
94     if Lexing.lexeme_char lexbuf 0 = '"' then string2 else string1 in
95     (try string engine lexbuf
96     with Location.Location (_,Unterminated_string) ->
97     let st = List.hd !comment_start_pos in
98     error st (st+2) Unterminated_string_in_comment);
99     Buffer.clear string_buff;
100     comment engine lexbuf }
101     | eof
102     { let st = List.hd !comment_start_pos in
103     error st (st+2) Unterminated_comment
104     }
105     | _
106     { comment engine lexbuf }
107    
108     and string2 = parse
109     '"'
110     { () }
111     | '\\' ['\\' '"']
112     { store_char (Lexing.lexeme_char lexbuf 1);
113     string2 engine lexbuf }
114     | '\\' ascii_digit+
115     { store_char (char_for_decimal_code (Lexing.lexeme lexbuf));
116     string2 engine lexbuf }
117     | eof
118     { error !string_start_pos (!string_start_pos+1) Unterminated_string }
119     | _
120     { store_char (Lexing.lexeme_char lexbuf 0);
121     (* TODO: Unicode *)
122     string2 engine lexbuf }
123    
124     and string1 = parse
125     "'"
126     { () }
127     | '\\' ['\\' '\'']
128     { store_char (Lexing.lexeme_char lexbuf 1);
129     string1 engine lexbuf }
130     | '\\' ascii_digit+
131     { store_char (char_for_decimal_code (Lexing.lexeme lexbuf));
132     string1 engine lexbuf }
133     | eof
134     { error !string_start_pos (!string_start_pos+1) Unterminated_string }
135     | _
136     { store_char (Lexing.lexeme_char lexbuf 0);
137     string1 engine lexbuf }
138    
139     {
140    
141     let lexer_func_of_wlex lexfun lexengine cs =
142     let lb =
143     Lexing.from_function
144     (fun s n ->
145     try s.[0] <- Stream.next cs; 1 with Stream.Failure -> 0)
146     in
147     let next () =
148     let tok = lexfun lexengine lb in
149     let loc = (Lexing.lexeme_start lb, Lexing.lexeme_end lb) in
150     (tok, loc)
151     in
152     Token.make_stream_and_location next
153    
154     let register_kw (s1,s2) =
155     if s1 = "" then
156     match s2.[0] with
157     | 'a' .. 'z' when not (Hashtbl.mem keywords s2) ->
158     Hashtbl.add keywords s2 ()
159     | _ -> ()
160    
161    
162     let lexer lexfun lexengine =
163     {
164     Token.tok_func = lexer_func_of_wlex lexfun lexengine;
165     Token.tok_using = register_kw;
166     Token.tok_removing = (fun _ -> ());
167     Token.tok_match = Token.default_match;
168     Token.tok_text = Token.lexer_text
169     }
170    
171     let classes =
172     let c i = (i,i) in
173     let i ch1 ch2 = (Char.code ch1, Char.code ch2) in
174     [ (ascii_digit, [i '0' '9']);
175     (lowercase, [i 'a' 'z']);
176     (uppercase, [i 'A' 'Z']);
177     (blank, [c 8; c 9; c 10; c 13; c 32]);
178     ]
179    
180     let table =
181     assert(nb_classes <= 256);
182     let v = String.make 256 (Char.chr encoding_error) in
183     let fill_int c (i, j) = String.fill v i (j-i+1) c in
184     let fill_class (c, l) = List.iter (fill_int (Char.chr c)) l in
185     let fill_char (ch, cl) = v.[ch] <- Char.chr cl in
186     List.iter fill_class classes;
187     List.iter fill_char one_char_classes;
188     v
189    
190     let utf8_engine = Lex_engines.engine_tiny_utf8 table
191     (fun c ->
192     if c>=0x10000 && c < 0x11000 then xml_char
193     else encoding_error)
194    
195     let latin1_engine = Lex_engines.engine_tiny_8bit table
196     }

CVS Admin">CVS Admin
ViewVC Help
Powered by ViewVC 1.1.5