/[svn]/parser/parser.ml
ViewVC logotype

Contents of /parser/parser.ml

Parent Directory Parent Directory | Revision Log Revision Log


Revision 448 - (show annotations)
Tue Jul 10 17:35:38 2007 UTC (5 years, 10 months ago) by abate
File size: 14794 byte(s)
[r2003-05-26 20:59:50 by cvscast] bug for file inclusion

Original author: cvscast
Date: 2003-05-26 20:59:50+00:00
1 open Location
2 open Ast
3 open Ident
4
5 (*
6 let () = Grammar.error_verbose := true
7 *)
8
9 let gram = Grammar.gcreate (Wlexer.lexer Wlexer.token Wlexer.latin1_engine)
10
11 let true_atom = Atoms.mk_ascii "true"
12 let false_atom = Atoms.mk_ascii "false"
13 let true_type = Types.atom (Atoms.atom true_atom)
14 let false_type = Types.atom (Atoms.atom false_atom)
15
16 let parse_ident = Encodings.Utf8.mk_latin1
17
18 let id_dummy = ident (U.mk "$$$")
19 let atom s = Atoms.mk (parse_ident s)
20 let label s = LabelPool.mk (parse_ident s)
21 let ident s = ident (parse_ident s)
22
23 let prog = Grammar.Entry.create gram "prog"
24 let top_phrases = Grammar.Entry.create gram "toplevel phrases"
25 let expr = Grammar.Entry.create gram "expression"
26 let pat = Grammar.Entry.create gram "type/pattern expression"
27 let regexp = Grammar.Entry.create gram "type/pattern regexp"
28 let const = Grammar.Entry.create gram "scalar constant"
29
30 let exp pos e = LocatedExpr (loc_of_pos pos,e)
31
32 let rec multi_prod loc = function
33 | [ x ] -> x
34 | x :: l -> mk loc (Prod (x, multi_prod loc l))
35 | [] -> assert false
36
37 let rec tuple = function
38 | [ x ] -> x
39 | x :: l -> Pair (x, tuple l)
40 | [] -> assert false
41
42 let tuple_queue =
43 List.fold_right (fun x q -> Pair (x, q))
44
45
46 let char = mknoloc (Internal (Types.char Chars.any))
47 let string_regexp = Star (Elem char)
48
49 let cst_nil = Cst (Types.Atom Sequence.nil_atom)
50
51 let seq_of_string pos s =
52 let s = Encodings.Utf8.mk s in
53 let (pos,_) = pos in
54 let rec aux pos i j =
55 if Encodings.Utf8.equal_index i j then []
56 else
57 let (len,i) = Encodings.Utf8.next s i in
58 let (c,i) = Encodings.Utf8.next s i in
59 ((pos,pos+len),c) :: (aux (pos + len) i j)
60 in
61 aux (pos+1) (Encodings.Utf8.start_index s) (Encodings.Utf8.end_index s)
62
63 let get_string s =
64 let s = Encodings.Utf8.mk s in
65 let b = Buffer.create 32 in
66 let rec aux i j =
67 if Encodings.Utf8.equal_index i j then ()
68 else
69 let (len,i) = Encodings.Utf8.next s i in
70 let (c,i) = Encodings.Utf8.next s i in
71 Encodings.Utf8.store b c;
72 aux i j
73 in
74 aux (Encodings.Utf8.start_index s) (Encodings.Utf8.end_index s);
75 Buffer.contents b
76
77 exception Error of string
78 let error (i,j) s = Location.raise_loc i j (Error s)
79
80 let make_record loc r =
81 LabelMap.from_list (fun _ _ -> error loc "Duplicated record field") r
82
83 let parse_char loc s =
84 let s = seq_of_string loc s in
85 match s with
86 | [ loc,c ] -> c
87 | _ -> error loc "Character litteral must have length 1"
88
89 let char_list loc s =
90 let s = seq_of_string loc s in
91 List.map (fun (loc,c) -> exp loc (Cst (Types.Char (Chars.mk_int c)))) s
92
93
94 let include_stack = ref []
95
96 let protect_exn f g =
97 try let x = f () in g (); x
98 with e -> g (); raise e
99
100 EXTEND
101 GLOBAL: top_phrases prog expr pat regexp const;
102
103 top_phrases: [
104 [ l = LIST0 phrase; ";;" -> List.flatten l ]
105 ];
106
107 prog: [
108 [ l = LIST0 [ p = phrase ; OPT ";;" -> p ]; EOI -> List.flatten l ]
109 ];
110
111 phrase: [
112 [ (f,p,e) = let_binding ->
113 if f then [ mk loc (FunDecl e) ] else
114 [ mk loc (LetDecl (p,e)) ]
115 | (_,p,e1) = let_binding; "in"; e2 = expr LEVEL "top"->
116 [ mk loc (EvalStatement (exp loc (Match (e1,[p,e2])))) ]
117 | "type"; x = UIDENT; "="; t = pat -> [ mk loc (TypeDecl (x,t)) ]
118 | "type"; x = LIDENT -> error loc "Type identifiers must be capitalized"
119 | "debug"; d = debug_directive -> [ mk loc (Debug d) ]
120 | DIRECTIVE "#quit" -> [ mk loc (Directive `Quit) ]
121 | DIRECTIVE "#env" -> [ mk loc (Directive `Env) ]
122 | "include"; s = STRING2 ->
123 let s = get_string s in
124 protect_op "File inclusion";
125 (* avoid looping; should issue an error ? *)
126 if List.mem s !include_stack then []
127 else (
128 include_stack := s :: !include_stack;
129 Location.push_source (`File s);
130 protect_exn
131 (fun () ->
132 let chan = open_in s in
133 let input = Stream.of_channel chan in
134 let l = Grammar.Entry.parse prog input in
135 close_in chan;
136 l)
137 (fun () ->
138 Location.pop_source ();
139 include_stack := List.tl !include_stack)
140 )
141 ] |
142 [ e = expr -> [ mk loc (EvalStatement e) ]
143 ]
144 ];
145
146 debug_directive: [
147 [ LIDENT "filter"; t = pat; p = pat -> `Filter(t,p)
148 | LIDENT "accept"; p = pat -> `Accept p
149 | LIDENT "compile"; t = pat; p = LIST1 pat -> `Compile (t,p)
150 | LIDENT "sample"; t = pat -> `Sample t
151 | LIDENT "subtype"; t1 = pat; t2 = pat -> `Subtype (t1,t2)
152 ]
153 ];
154
155 keyword: [
156 [ a =
157 [ "map" | "match" | "with" | "try" | "xtransform"
158 | "if" | "then" | "else"
159 | "transform" | "fun" | "in"
160 | "let" | "type" | "debug" | "include"
161 ]
162 -> a
163 ]
164 ];
165
166 expr: [
167 "top" RIGHTA
168 [ "match"; e = SELF; "with"; b = branches ->
169 exp loc (Match (e,b))
170 | "try"; e = SELF; "with"; b = branches ->
171 exp loc (Try (e,b))
172 | "map"; e = SELF; "with"; b = branches ->
173 exp loc (Map (e,b))
174 | "xtransform"; e = SELF; "with"; b = branches ->
175 exp loc (Xtrans (e,b))
176 | "if"; e = SELF; "then"; e1 = SELF; "else"; e2 = SELF ->
177 let p1 = mk loc (Internal true_type)
178 and p2 = mk loc (Internal false_type) in
179 exp loc (Match (e, [p1,e1; p2,e2]))
180 | "transform"; e = SELF; "with"; b = branches ->
181 exp loc (Transform (e,b))
182 | "fun"; (f,a,b) = fun_decl ->
183 exp loc (Abstraction { fun_name = f; fun_iface = a; fun_body = b })
184 | (_,p,e1) = let_binding; "in"; e2 = expr LEVEL "top"->
185 exp loc (Match (e1,[p,e2]))
186 | e = expr; ":"; p = pat ->
187 exp loc (Forget (e,p))
188 ]
189
190
191 |
192 [ e1 = expr; op = ["=" | "<=" | "<<" | ">>" | ">=" ]; e2 = expr ->
193 let op = match op with
194 | "<<" -> "<"
195 | ">>" -> ">"
196 | s -> s in
197 exp loc (Op (op,[e1;e2]))
198 ]
199
200 |
201 [ e1 = expr; op = ["+" | "-" | "@" ]; e2 = expr ->
202 exp loc (Op (op,[e1;e2]))
203 | e = expr; "\\"; l = [LIDENT | UIDENT | keyword ] ->
204 exp loc (RemoveField (e, label l))
205 ]
206 |
207 [ e1 = expr; op = ["*"]; e2 = expr -> exp loc (Op (op,[e1;e2]))
208 | e = expr; op = "/"; p = pat ->
209
210 let tag = mk loc (Internal (Types.atom (Atoms.any))) in
211 let att = mk loc (Internal Types.Record.any) in
212 let any = mk loc (Internal (Types.any)) in
213 let re = Star(Alt(SeqCapture(id_dummy,Elem p), Elem any)) in
214 let ct = mk loc (Regexp (re,any)) in
215 let p = mk loc (XmlT (tag, multi_prod loc [att;ct])) in
216 let b = (p, Var id_dummy) in
217 exp loc (Transform (e,[b]))
218 ]
219 |
220 [ e = expr; "."; l = [LIDENT | UIDENT | keyword ] ->
221 exp loc (Dot (e, label l))
222 ]
223
224 |
225 [ op = [ LIDENT "flatten"
226 | LIDENT "load_xml"
227 | LIDENT "load_file" | LIDENT "load_file_utf8"
228 | LIDENT "load_html"
229 | LIDENT "print_xml" | LIDENT "print_xml_utf8"
230 | LIDENT "print"
231 | LIDENT "int_of"
232 | LIDENT "string_of"
233 | LIDENT "atom_of"
234 | LIDENT "raise"
235 ];
236 e = expr -> exp loc (Op (op,[e]))
237 | op = [ LIDENT "dump_to_file" | LIDENT "dump_to_file_utf8" ];
238 e1 = expr LEVEL "no_appl"; e2 = expr -> exp loc (Op (op, [e1;e2]))
239 | e1 = SELF; LIDENT "div"; e2 = expr -> exp loc (Op ("/", [e1;e2]))
240 | e1 = SELF; LIDENT "mod"; e2 = expr -> exp loc (Op ("mod", [e1;e2]))
241 | e1 = SELF; e2 = expr -> exp loc (Apply (e1,e2))
242 ]
243
244 | "no_appl"
245 [ c = const -> exp loc (Cst c)
246 | "("; l = LIST1 expr SEP ","; ")" -> exp loc (tuple l)
247 | "["; l = LIST0 seq_elem; e = OPT [ ";"; e = expr -> e ]; "]" ->
248 let e = match e with Some e -> e | None -> cst_nil in
249 let l = List.fold_right
250 (fun x q ->
251 match x with
252 | `Elems l -> tuple_queue l q
253 | `Explode x -> Op ("@",[x;q])
254 ) l e
255 in
256 exp loc l
257 | "<"; t = [ "("; e = expr; ")" -> e
258 | a = [ LIDENT | UIDENT | keyword ] ->
259 exp loc (Cst (Types.Atom (atom a))) ];
260 a = expr_attrib_spec; ">"; c = expr ->
261 exp loc (Xml (t, Pair (a,c)))
262 | "{"; r = [ expr_record_spec | -> exp loc (RecordLitt LabelMap.empty) ]; "}" -> r
263 | s = STRING2 ->
264 exp loc (tuple (char_list loc s @ [cst_nil]))
265 | a = LIDENT -> exp loc (Var (ident a))
266 ]
267
268 ];
269
270 seq_elem: [
271 [ x = STRING1 -> `Elems (char_list loc x)
272 | e = expr LEVEL "no_appl" -> `Elems [e]
273 | "!"; e = expr LEVEL "no_appl" -> `Explode e
274 ]
275 ];
276
277 let_binding: [
278 [ "let"; p = pat; "="; e = expr -> (false,p,e)
279 | "let"; p = pat; ":"; t = pat; "="; e = expr -> (false,p, Forget (e,t))
280 | "let"; "fun"; (f,a,b) = fun_decl ->
281 let p = match f with
282 | Some x -> mk loc (Capture x)
283 | _ -> failwith "Function name mandatory in let fun declarations"
284 in
285 let abst = { fun_name = f; fun_iface = a; fun_body = b } in
286 let e = exp loc (Abstraction abst) in
287 (true,p,e)
288 ]
289 ];
290
291 fun_decl: [
292 (* need an hack to do this, because both productions would
293 match [ OPT LIDENT; "("; pat ] .... *)
294 [ f = OPT [ x = LIDENT -> ident x]; "("; p1 = pat LEVEL "no_arrow";
295 res = [ "->"; p2 = pat;
296 a = [ ";"; a = LIST0 arrow SEP ";" -> a | -> [] ];
297 ")"; b = branches -> `Classic (p2,a,b)
298 | ":"; targ1 = pat;
299 args = LIST0 [ ","; arg = pat; ":"; targ = pat -> (arg,targ) ];
300 ")"; ":"; tres = pat ;
301 "="; body = expr ->
302 `Compact (targ1,args,tres,body)
303 ] ->
304 match res with
305 | `Classic (p2,a,b) -> f,(p1,p2)::a,b
306 | `Compact (targ1,args,tres,body) ->
307 let args = (p1,targ1) :: args in
308 let targ = multi_prod nopos (List.map snd args) in
309 let arg = multi_prod nopos (List.map fst args) in
310 let b = [arg, body] in
311 let a = [targ,tres] in
312 (f,a,b)
313 ]
314 ];
315
316 arrow: [
317 [ t1 = pat LEVEL "no_arrow"; "->"; t2 = pat -> (t1,t2)]
318 ];
319
320 branches: [
321 [ OPT "|"; l = LIST1 branch SEP "|" -> l ]
322 ];
323
324 branch: [
325 [ p = pat LEVEL "no_arrow"; "->"; e = expr -> (p,e) ]
326 ];
327
328
329 regexp: [
330 [ x = regexp; "|"; y = regexp ->
331 match (x,y) with
332 | Elem x, Elem y -> Elem (mk loc (Or (x,y)))
333 | _ -> Alt (x,y)
334 ]
335 | [ x = regexp; y = regexp -> Seq (x,y) ]
336 | [ a = LIDENT; "::"; x = regexp -> SeqCapture (ident a,x) ]
337 | [ x = regexp; "*" -> Star x
338 | x = regexp; "*?" -> WeakStar x
339 | x = regexp; "+" -> Seq (x, Star x)
340 | x = regexp; "+?" -> Seq (x, WeakStar x)
341 | x = regexp; "?" -> Alt (x, Epsilon)
342 | x = regexp; "??" -> Alt (Epsilon, x) ]
343 | [ "("; x = regexp; ")" -> x
344 | "("; a = LIDENT; ":="; c = const; ")" ->
345 Elem (mk loc (Constant ((ident a,c))))
346 | UIDENT "PCDATA" -> string_regexp
347 | i = STRING1; "--"; j = STRING1 ->
348 let i = Chars.mk_int (parse_char loc i)
349 and j = Chars.mk_int (parse_char loc j) in
350 Elem (mk loc (Internal (Types.char (Chars.char_class i j))))
351 | s = STRING1 ->
352 let s = seq_of_string loc s in
353 List.fold_right
354 (fun (loc,c) accu ->
355 let c = Chars.mk_int c in
356 let c = Chars.atom c in
357 Seq (Elem (mk loc (Internal (Types.char c))), accu))
358 s
359 Epsilon
360 | e = pat LEVEL "simple" -> Elem e
361 ]
362 ];
363
364 pat: [
365 [ x = pat; LIDENT "where";
366 b = LIST1 [ a = UIDENT; "="; y = pat -> (a,y)
367 | LIDENT -> error loc "Type/pattern identifiers must be capitalized"
368 ] SEP "and"
369 -> mk loc (Recurs (x,b)) ]
370 | RIGHTA [ x = pat; "->"; y = pat -> mk loc (Arrow (x,y)) ]
371 | "no_arrow" [ x = pat; "|"; y = pat -> mk loc (Or (x,y)) ]
372 | "simple" [ x = pat; "&"; y = pat -> mk loc (And (x,y))
373 | x = pat; "\\"; y = pat -> mk loc (Diff (x,y)) ]
374 |
375 [ "{"; r = record_spec; "}" -> mk loc (Record (true,r))
376 | "{|"; r = record_spec; "|}" -> mk loc (Record (false,r))
377 | LIDENT "_" -> mk loc (Internal Types.any)
378 | a = LIDENT -> mk loc (Capture (ident a))
379 | "("; a = LIDENT; ":="; c = const; ")" ->
380 mk loc (Constant (ident a,c))
381 | a = UIDENT -> mk loc (PatVar a)
382 | i = INT ; "--"; j = INT ->
383 let i = Intervals.mk i
384 and j = Intervals.mk j in
385 mk loc (Internal (Types.interval (Intervals.bounded i j)))
386 | i = INT ->
387 let i = Intervals.mk i in
388 mk loc (Internal (Types.interval (Intervals.atom i)))
389 | "*"; "--"; j = INT ->
390 let j = Intervals.mk j in
391 mk loc (Internal (Types.interval (Intervals.left j)))
392 | i = INT; "--"; "*" ->
393 let i = Intervals.mk i in
394 mk loc (Internal (Types.interval (Intervals.right i)))
395 | i = char ->
396 mk loc (Internal (Types.char (Chars.char_class i i)))
397 | i = char ; "--"; j = char ->
398 mk loc (Internal (Types.char (Chars.char_class i j)))
399 | c = const -> mk loc (Internal (Types.constant c))
400 | "("; l = LIST1 pat SEP ","; ")" -> multi_prod loc l
401 | "["; r = [ r = regexp -> r | -> Epsilon ];
402 q = [ ";"; q = pat -> q
403 | -> mknoloc (Internal (Sequence.nil_type)) ];
404 "]" -> mk loc (Regexp (r,q))
405 | "<"; t =
406 [ x = [ LIDENT | UIDENT | keyword ] ->
407 let a = if x = "_" then Atoms.any else Atoms.atom (atom x) in
408 mk loc (Internal (Types.atom a))
409 | "("; t = pat; ")" -> t ];
410 a = attrib_spec; ">"; c = pat ->
411 mk loc (XmlT (t, multi_prod loc [a;c]))
412 | s = STRING2 ->
413 let s = seq_of_string loc s in
414 let s = List.map
415 (fun (loc,c) ->
416 mk loc (Internal
417 (Types.char
418 (Chars.atom
419 (Chars.mk_int c))))) s in
420 let s = s @ [mk loc (Internal (Sequence.nil_type))] in
421 multi_prod loc s
422 ]
423
424 ];
425
426 record_spec:
427 [ [ r = LIST0 [ l = [LIDENT | UIDENT | keyword ]; "=";
428 o = [ "?" -> true | -> false];
429 x = pat ->
430 let x = if o then mk loc (Optional x) else x in
431 (label l, x)
432 ] SEP ";" ->
433 make_record loc r
434 ] ];
435
436 char:
437 [
438 [ c = STRING1 -> Chars.mk_int (parse_char loc c) ]
439 ];
440
441
442 const:
443 [
444 [ i = INT -> Types.Integer (Intervals.mk i)
445 | "`"; a = [LIDENT | UIDENT | keyword ] -> Types.Atom (atom a)
446 | c = char -> Types.Char c ]
447 ];
448
449
450 attrib_spec:
451 [ [ r = record_spec -> mk loc (Record (true,r))
452 | "("; t = pat; ")" -> t
453 | "{"; r = record_spec; "}" -> mk loc (Record (true,r))
454 | "{|"; r = record_spec; "|}" -> mk loc (Record (false,r))
455 ] ];
456
457 expr_record_spec:
458 [ [ r = LIST1
459 [ l = [LIDENT | UIDENT | keyword ]; "="; x = expr ->
460 (label l,x) ]
461 SEP ";" ->
462 exp loc (RecordLitt (make_record loc r))
463 ] ];
464
465 expr_attrib_spec:
466 [ [ r = expr_record_spec -> r ]
467 | [ e = expr LEVEL "no_appl" -> e
468 | -> exp loc (RecordLitt (LabelMap.empty))
469 ]
470 ];
471 END
472
473 let pat' = Grammar.Entry.create gram "type/pattern expression"
474 EXTEND GLOBAL: pat pat';
475 pat': [ [ p = pat; EOI -> p ] ];
476 END
477
478 let pat = Grammar.Entry.parse pat
479 and expr = Grammar.Entry.parse expr
480 and prog = Grammar.Entry.parse prog
481 and top_phrases = Grammar.Entry.parse top_phrases
482
483 let rec sync s =
484 match Stream.next s with
485 | ';' ->
486 (match Stream.next s with
487 | ';' -> ()
488 | _ -> sync s)
489 | _ -> sync s

CVS Admin">CVS Admin
ViewVC Help
Powered by ViewVC 1.1.5