/[svn]/parser/wlexer.mll
ViewVC logotype

Contents of /parser/wlexer.mll

Parent Directory Parent Directory | Revision Log Revision Log


Revision 237 - (show annotations)
Tue Jul 10 17:18:04 2007 UTC (5 years, 11 months ago) by abate
File size: 6332 byte(s)
[r2003-03-14 16:14:17 by cvscast] Empty log message

Original author: cvscast
Date: 2003-03-14 16:15:14+00:00
1 (* File to be processed by wlex, not ocamllex ! *)
2 (* Loosely inspired from OCaml lexer.mll *)
3
4 classes
5 encoding_error
6 xml_char
7 blank
8 lowercase uppercase ascii_digit
9 "_<>=.,:;+-*/@&{}[]()|?`\"\\\'!"
10
11
12 {
13 let keywords = Hashtbl.create 17
14
15 let error i j exn = raise (Location.Location ((i,j),exn))
16 exception Illegal_character of char
17 exception Unterminated_comment
18 exception Unterminated_string
19 exception Unterminated_string_in_comment
20
21
22 (* Buffer for string literals *)
23
24 let string_buff = Buffer.create 1024
25 let store_char = Buffer.add_char string_buff
26 let get_stored_string () =
27 let s = Buffer.contents string_buff in
28 Buffer.clear string_buff;
29 s
30 let store_special = function
31 | 'n' -> store_char '\n'
32 | 'r' -> store_char '\r'
33 | 't' -> store_char '\t'
34 | c -> raise (Illegal_character '\\')
35
36 let string_start_pos = ref 0;;
37 let comment_start_pos : int list ref = ref [];;
38
39 let char_for_decimal_code s =
40 let s = String.sub s 1 (String.length s - 1) in
41 let c = int_of_string s in
42 assert ( c < 256 ); (* TODO: handle Unicode *)
43 Char.chr c
44
45 let rec tag_of_tag s i =
46 match s.[i] with
47 | '\008' | '\009' | '\010' | '\013' | '\032' -> tag_of_tag s (i+1)
48 | _ -> String.sub s i (String.length s - i)
49
50 }
51
52
53 let identchar = lowercase | uppercase | ascii_digit | '_' | '\'' | '-'
54 let ident = identchar* ( ':' identchar+)*
55
56 rule token = parse
57 blank+ { token engine lexbuf }
58 | (lowercase | '_') ident {
59 let s = Lexing.lexeme lexbuf in
60 if Hashtbl.mem keywords s then "",s else "LIDENT",s
61 }
62 | uppercase ident { "UIDENT",Lexing.lexeme lexbuf }
63 | ascii_digit+ { "INT",Lexing.lexeme lexbuf }
64 | "<" blank* (lowercase | uppercase) ident {
65 let s = Lexing.lexeme lexbuf in
66 "TAG", tag_of_tag s 1
67 }
68 | [ "<>=.,:;+-*/@&{}[]()|?`!" ]
69 | "->" | "::" | ";;" | "--" | ":=" | "\\" | "++"
70 | "{|" | "|}" | "<=" | ">=" | "<<" | ">>"
71 | ["?+*"] "?"
72 { "",Lexing.lexeme lexbuf }
73
74 | '"' | "'"
75 { let string_start = Lexing.lexeme_start lexbuf in
76 string_start_pos := string_start;
77 let double_quote = Lexing.lexeme_char lexbuf 0 = '"' in
78 if double_quote then string2 engine lexbuf else string1 engine lexbuf;
79 lexbuf.Lexing.lex_start_pos <-
80 string_start - lexbuf.Lexing.lex_abs_pos;
81 (if double_quote then "STRING2" else "STRING1"),
82 (get_stored_string()) }
83
84 | "(*"
85 { comment_start_pos := [Lexing.lexeme_start lexbuf];
86 comment engine lexbuf;
87 token engine lexbuf }
88
89 | eof
90 { "EOI","" }
91 | _
92 { error
93 (Lexing.lexeme_start lexbuf) (Lexing.lexeme_end lexbuf)
94 (Illegal_character ((Lexing.lexeme lexbuf).[0])) }
95
96 and comment = parse
97 "(*"
98 { comment_start_pos := Lexing.lexeme_start lexbuf :: !comment_start_pos;
99 comment engine lexbuf;
100 }
101 | "*)"
102 { comment_start_pos := List.tl !comment_start_pos;
103 if !comment_start_pos <> [] then comment engine lexbuf;
104 }
105 | '"' | "'"
106 { string_start_pos := Lexing.lexeme_start lexbuf;
107 let string =
108 if Lexing.lexeme_char lexbuf 0 = '"' then string2 else string1 in
109 (try string engine lexbuf
110 with Location.Location (_,Unterminated_string) ->
111 let st = List.hd !comment_start_pos in
112 error st (st+2) Unterminated_string_in_comment);
113 Buffer.clear string_buff;
114 comment engine lexbuf }
115 | eof
116 { let st = List.hd !comment_start_pos in
117 error st (st+2) Unterminated_comment
118 }
119 | _
120 { comment engine lexbuf }
121
122 and string2 = parse
123 '"'
124 { () }
125 | '\\' ['\\' '"']
126 { store_char (Lexing.lexeme_char lexbuf 1);
127 string2 engine lexbuf }
128 | '\\' lowercase {
129 store_special (Lexing.lexeme_char lexbuf 1);
130 string1 engine lexbuf }
131 | '\\' ascii_digit+
132 { store_char (char_for_decimal_code (Lexing.lexeme lexbuf));
133 string2 engine lexbuf }
134 | eof
135 { error !string_start_pos (!string_start_pos+1) Unterminated_string }
136 | _
137 { store_char (Lexing.lexeme_char lexbuf 0);
138 (* TODO: Unicode *)
139 string2 engine lexbuf }
140
141 and string1 = parse
142 "'"
143 { () }
144 | '\\' ['\\' '\'']
145 { store_char (Lexing.lexeme_char lexbuf 1);
146 string1 engine lexbuf }
147 | '\\' lowercase {
148 store_special (Lexing.lexeme_char lexbuf 1);
149 string1 engine lexbuf }
150 | '\\' ascii_digit+
151 { store_char (char_for_decimal_code (Lexing.lexeme lexbuf));
152 string1 engine lexbuf }
153 | eof
154 { error !string_start_pos (!string_start_pos+1) Unterminated_string }
155 | _
156 { store_char (Lexing.lexeme_char lexbuf 0);
157 string1 engine lexbuf }
158
159 {
160
161 let lexer_func_of_wlex lexfun lexengine cs =
162 let lb =
163 Lexing.from_function
164 (fun s n ->
165 try s.[0] <- Stream.next cs; 1 with Stream.Failure -> 0)
166 in
167 let next () =
168 let tok = lexfun lexengine lb in
169 let loc = (Lexing.lexeme_start lb, Lexing.lexeme_end lb) in
170 (tok, loc)
171 in
172 Token.make_stream_and_location next
173
174 let register_kw (s1,s2) =
175 if s1 = "" then
176 match s2.[0] with
177 | 'a' .. 'z' when not (Hashtbl.mem keywords s2) ->
178 Hashtbl.add keywords s2 ()
179 | _ -> ()
180
181
182 let lexer lexfun lexengine =
183 {
184 Token.tok_func = lexer_func_of_wlex lexfun lexengine;
185 Token.tok_using = register_kw;
186 Token.tok_removing = (fun _ -> ());
187 Token.tok_match = Token.default_match;
188 Token.tok_text = Token.lexer_text
189 }
190
191 let classes =
192 let c i = (i,i) in
193 let i ch1 ch2 = (Char.code ch1, Char.code ch2) in
194 [ (ascii_digit, [i '0' '9']);
195 (lowercase, [i 'a' 'z']);
196 (uppercase, [i 'A' 'Z']);
197 (blank, [c 8; c 9; c 10; c 13; c 32]);
198 ]
199
200 let table =
201 assert(nb_classes <= 256);
202 let v = String.make 256 (Char.chr encoding_error) in
203 let fill_int c (i, j) = String.fill v i (j-i+1) c in
204 let fill_class (c, l) = List.iter (fill_int (Char.chr c)) l in
205 let fill_char (ch, cl) = v.[ch] <- Char.chr cl in
206 List.iter fill_class classes;
207 List.iter fill_char one_char_classes;
208 v
209
210 let utf8_engine = Lex_engines.engine_tiny_utf8 table
211 (fun c ->
212 if c>=0x10000 && c < 0x11000 then xml_char
213 else encoding_error)
214
215 let latin1_engine = Lex_engines.engine_tiny_8bit table
216 }

CVS Admin">CVS Admin
ViewVC Help
Powered by ViewVC 1.1.5