/[svn]/parser/wlexer.mll
ViewVC logotype

Contents of /parser/wlexer.mll

Parent Directory Parent Directory | Revision Log Revision Log


Revision 162 - (show annotations)
Tue Jul 10 17:11:42 2007 UTC (5 years, 10 months ago) by abate
File size: 5918 byte(s)
[r2002-12-01 20:46:51 by cvscast] Empty log message

Original author: cvscast
Date: 2002-12-01 20:46:51+00:00
1 (* File to be processed by wlex, not ocamllex ! *)
2 (* Loosely inspired from OCaml lexer.mll *)
3
4 classes
5 encoding_error
6 xml_char
7 blank
8 lowercase uppercase ascii_digit
9 "_<>=.,:;+-*/@&{}[]()|?`\"\\\'!"
10
11
12 {
13 let keywords = Hashtbl.create 17
14
15 let error i j exn = raise (Location.Location ((i,j),exn))
16 exception Illegal_character of char
17 exception Unterminated_comment
18 exception Unterminated_string
19 exception Unterminated_string_in_comment
20
21 (* Buffer for string literals *)
22
23 let string_buff = Buffer.create 1024
24 let store_char = Buffer.add_char string_buff
25 let get_stored_string () =
26 let s = Buffer.contents string_buff in
27 Buffer.clear string_buff;
28 s
29
30 let string_start_pos = ref 0;;
31 let comment_start_pos : int list ref = ref [];;
32
33 let char_for_decimal_code s =
34 let s = String.sub s 1 (String.length s - 1) in
35 let c = int_of_string s in
36 assert ( c < 256 ); (* TODO: handle Unicode *)
37 Char.chr c
38
39 let rec tag_of_tag s i =
40 match s.[i] with
41 | '\008' | '\009' | '\010' | '\013' | '\032' -> tag_of_tag s (i+1)
42 | _ -> String.sub s i (String.length s - i)
43
44 }
45
46
47 let identchar = lowercase | uppercase | ascii_digit | '_' | '\''
48 let ident = identchar* ( ':' identchar+)*
49
50 rule token = parse
51 blank+ { token engine lexbuf }
52 | (lowercase | '_') ident {
53 let s = Lexing.lexeme lexbuf in
54 if Hashtbl.mem keywords s then "",s else "LIDENT",s
55 }
56 | uppercase ident { "UIDENT",Lexing.lexeme lexbuf }
57 | ascii_digit+ { "INT",Lexing.lexeme lexbuf }
58 | "<" blank* (lowercase | uppercase) ident {
59 let s = Lexing.lexeme lexbuf in
60 "TAG", tag_of_tag s 1
61 }
62 | [ "<>=.,:;+-*/@&{}[]()|?`!" ]
63 | "->" | "::" | ";;" | "--" | ":=" | "\\"
64 | "{|" | "|}"
65 | ["?+*"] "?"
66 { "",Lexing.lexeme lexbuf }
67
68 | '"' | "'"
69 { let string_start = Lexing.lexeme_start lexbuf in
70 string_start_pos := string_start;
71 let double_quote = Lexing.lexeme_char lexbuf 0 = '"' in
72 if double_quote then string2 engine lexbuf else string1 engine lexbuf;
73 lexbuf.Lexing.lex_start_pos <-
74 string_start - lexbuf.Lexing.lex_abs_pos;
75 (if double_quote then "STRING2" else "STRING1"),
76 (get_stored_string()) }
77
78 | "(*"
79 { comment_start_pos := [Lexing.lexeme_start lexbuf];
80 comment engine lexbuf;
81 token engine lexbuf }
82
83 | eof
84 { "EOI","" }
85 | _
86 { error
87 (Lexing.lexeme_start lexbuf) (Lexing.lexeme_end lexbuf)
88 (Illegal_character ((Lexing.lexeme lexbuf).[0])) }
89
90 and comment = parse
91 "(*"
92 { comment_start_pos := Lexing.lexeme_start lexbuf :: !comment_start_pos;
93 comment engine lexbuf;
94 }
95 | "*)"
96 { comment_start_pos := List.tl !comment_start_pos;
97 if !comment_start_pos <> [] then comment engine lexbuf;
98 }
99 | '"' | "'"
100 { string_start_pos := Lexing.lexeme_start lexbuf;
101 let string =
102 if Lexing.lexeme_char lexbuf 0 = '"' then string2 else string1 in
103 (try string engine lexbuf
104 with Location.Location (_,Unterminated_string) ->
105 let st = List.hd !comment_start_pos in
106 error st (st+2) Unterminated_string_in_comment);
107 Buffer.clear string_buff;
108 comment engine lexbuf }
109 | eof
110 { let st = List.hd !comment_start_pos in
111 error st (st+2) Unterminated_comment
112 }
113 | _
114 { comment engine lexbuf }
115
116 and string2 = parse
117 '"'
118 { () }
119 | '\\' ['\\' '"']
120 { store_char (Lexing.lexeme_char lexbuf 1);
121 string2 engine lexbuf }
122 | '\\' ascii_digit+
123 { store_char (char_for_decimal_code (Lexing.lexeme lexbuf));
124 string2 engine lexbuf }
125 | eof
126 { error !string_start_pos (!string_start_pos+1) Unterminated_string }
127 | _
128 { store_char (Lexing.lexeme_char lexbuf 0);
129 (* TODO: Unicode *)
130 string2 engine lexbuf }
131
132 and string1 = parse
133 "'"
134 { () }
135 | '\\' ['\\' '\'']
136 { store_char (Lexing.lexeme_char lexbuf 1);
137 string1 engine lexbuf }
138 | '\\' ascii_digit+
139 { store_char (char_for_decimal_code (Lexing.lexeme lexbuf));
140 string1 engine lexbuf }
141 | eof
142 { error !string_start_pos (!string_start_pos+1) Unterminated_string }
143 | _
144 { store_char (Lexing.lexeme_char lexbuf 0);
145 string1 engine lexbuf }
146
147 {
148
149 let lexer_func_of_wlex lexfun lexengine cs =
150 let lb =
151 Lexing.from_function
152 (fun s n ->
153 try s.[0] <- Stream.next cs; 1 with Stream.Failure -> 0)
154 in
155 let next () =
156 let tok = lexfun lexengine lb in
157 let loc = (Lexing.lexeme_start lb, Lexing.lexeme_end lb) in
158 (tok, loc)
159 in
160 Token.make_stream_and_location next
161
162 let register_kw (s1,s2) =
163 if s1 = "" then
164 match s2.[0] with
165 | 'a' .. 'z' when not (Hashtbl.mem keywords s2) ->
166 Hashtbl.add keywords s2 ()
167 | _ -> ()
168
169
170 let lexer lexfun lexengine =
171 {
172 Token.tok_func = lexer_func_of_wlex lexfun lexengine;
173 Token.tok_using = register_kw;
174 Token.tok_removing = (fun _ -> ());
175 Token.tok_match = Token.default_match;
176 Token.tok_text = Token.lexer_text
177 }
178
179 let classes =
180 let c i = (i,i) in
181 let i ch1 ch2 = (Char.code ch1, Char.code ch2) in
182 [ (ascii_digit, [i '0' '9']);
183 (lowercase, [i 'a' 'z']);
184 (uppercase, [i 'A' 'Z']);
185 (blank, [c 8; c 9; c 10; c 13; c 32]);
186 ]
187
188 let table =
189 assert(nb_classes <= 256);
190 let v = String.make 256 (Char.chr encoding_error) in
191 let fill_int c (i, j) = String.fill v i (j-i+1) c in
192 let fill_class (c, l) = List.iter (fill_int (Char.chr c)) l in
193 let fill_char (ch, cl) = v.[ch] <- Char.chr cl in
194 List.iter fill_class classes;
195 List.iter fill_char one_char_classes;
196 v
197
198 let utf8_engine = Lex_engines.engine_tiny_utf8 table
199 (fun c ->
200 if c>=0x10000 && c < 0x11000 then xml_char
201 else encoding_error)
202
203 let latin1_engine = Lex_engines.engine_tiny_8bit table
204 }

CVS Admin">CVS Admin
ViewVC Help
Powered by ViewVC 1.1.5