/[svn]/parser/parser.ml
ViewVC logotype

Contents of /parser/parser.ml

Parent Directory Parent Directory | Revision Log Revision Log


Revision 448 - (hide annotations)
Tue Jul 10 17:35:38 2007 UTC (5 years, 11 months ago) by abate
File size: 14794 byte(s)
[r2003-05-26 20:59:50 by cvscast] bug for file inclusion

Original author: cvscast
Date: 2003-05-26 20:59:50+00:00
1 abate 4 open Location
2     open Ast
3 abate 225 open Ident
4 abate 4
5 abate 161 (*
6 abate 151 let () = Grammar.error_verbose := true
7 abate 161 *)
8 abate 81
9     let gram = Grammar.gcreate (Wlexer.lexer Wlexer.token Wlexer.latin1_engine)
10    
11 abate 421 let true_atom = Atoms.mk_ascii "true"
12     let false_atom = Atoms.mk_ascii "false"
13     let true_type = Types.atom (Atoms.atom true_atom)
14     let false_type = Types.atom (Atoms.atom false_atom)
15 abate 81
16 abate 374 let parse_ident = Encodings.Utf8.mk_latin1
17    
18 abate 375 let id_dummy = ident (U.mk "$$$")
19     let atom s = Atoms.mk (parse_ident s)
20 abate 374 let label s = LabelPool.mk (parse_ident s)
21 abate 375 let ident s = ident (parse_ident s)
22 abate 374
23 abate 38 let prog = Grammar.Entry.create gram "prog"
24 abate 431 let top_phrases = Grammar.Entry.create gram "toplevel phrases"
25 abate 38 let expr = Grammar.Entry.create gram "expression"
26     let pat = Grammar.Entry.create gram "type/pattern expression"
27     let regexp = Grammar.Entry.create gram "type/pattern regexp"
28     let const = Grammar.Entry.create gram "scalar constant"
29    
30 abate 316 let exp pos e = LocatedExpr (loc_of_pos pos,e)
31    
32 abate 38 let rec multi_prod loc = function
33     | [ x ] -> x
34     | x :: l -> mk loc (Prod (x, multi_prod loc l))
35     | [] -> assert false
36    
37 abate 316 let rec tuple = function
38 abate 38 | [ x ] -> x
39 abate 316 | x :: l -> Pair (x, tuple l)
40 abate 38 | [] -> assert false
41 abate 89
42     let tuple_queue =
43 abate 316 List.fold_right (fun x q -> Pair (x, q))
44 abate 89
45 abate 233
46 abate 249 let char = mknoloc (Internal (Types.char Chars.any))
47 abate 66 let string_regexp = Star (Elem char)
48 abate 38
49 abate 316 let cst_nil = Cst (Types.Atom Sequence.nil_atom)
50 abate 38
51 abate 66 let seq_of_string pos s =
52 abate 310 let s = Encodings.Utf8.mk s in
53 abate 66 let (pos,_) = pos in
54 abate 310 let rec aux pos i j =
55     if Encodings.Utf8.equal_index i j then []
56     else
57 abate 325 let (len,i) = Encodings.Utf8.next s i in
58 abate 310 let (c,i) = Encodings.Utf8.next s i in
59 abate 325 ((pos,pos+len),c) :: (aux (pos + len) i j)
60 abate 310 in
61 abate 325 aux (pos+1) (Encodings.Utf8.start_index s) (Encodings.Utf8.end_index s)
62 abate 4
63 abate 326 let get_string s =
64     let s = Encodings.Utf8.mk s in
65     let b = Buffer.create 32 in
66     let rec aux i j =
67     if Encodings.Utf8.equal_index i j then ()
68     else
69     let (len,i) = Encodings.Utf8.next s i in
70     let (c,i) = Encodings.Utf8.next s i in
71     Encodings.Utf8.store b c;
72     aux i j
73     in
74     aux (Encodings.Utf8.start_index s) (Encodings.Utf8.end_index s);
75     Buffer.contents b
76    
77 abate 81 exception Error of string
78 abate 249 let error (i,j) s = Location.raise_loc i j (Error s)
79 abate 18
80 abate 233 let make_record loc r =
81     LabelMap.from_list (fun _ _ -> error loc "Duplicated record field") r
82    
83 abate 81 let parse_char loc s =
84 abate 310 let s = seq_of_string loc s in
85     match s with
86 abate 325 | [ loc,c ] -> c
87 abate 310 | _ -> error loc "Character litteral must have length 1"
88 abate 81
89 abate 325 let char_list loc s =
90     let s = seq_of_string loc s in
91 abate 316 List.map (fun (loc,c) -> exp loc (Cst (Types.Char (Chars.mk_int c)))) s
92 abate 18
93    
94 abate 249 let include_stack = ref []
95    
96 abate 448 let protect_exn f g =
97     try let x = f () in g (); x
98     with e -> g (); raise e
99    
100 abate 38 EXTEND
101 abate 431 GLOBAL: top_phrases prog expr pat regexp const;
102 abate 4
103 abate 431 top_phrases: [
104 abate 446 [ l = LIST0 phrase; ";;" -> List.flatten l ]
105 abate 431 ];
106    
107 abate 10 prog: [
108 abate 431 [ l = LIST0 [ p = phrase ; OPT ";;" -> p ]; EOI -> List.flatten l ]
109 abate 10 ];
110    
111 abate 13 phrase: [
112 abate 431 [ (f,p,e) = let_binding ->
113     if f then [ mk loc (FunDecl e) ] else
114     [ mk loc (LetDecl (p,e)) ]
115     | (_,p,e1) = let_binding; "in"; e2 = expr LEVEL "top"->
116 abate 316 [ mk loc (EvalStatement (exp loc (Match (e1,[p,e2])))) ]
117 abate 431 | "type"; x = UIDENT; "="; t = pat -> [ mk loc (TypeDecl (x,t)) ]
118     | "type"; x = LIDENT -> error loc "Type identifiers must be capitalized"
119     | "debug"; d = debug_directive -> [ mk loc (Debug d) ]
120 abate 446 | DIRECTIVE "#quit" -> [ mk loc (Directive `Quit) ]
121     | DIRECTIVE "#env" -> [ mk loc (Directive `Env) ]
122 abate 431 | "include"; s = STRING2 ->
123 abate 326 let s = get_string s in
124 abate 249 protect_op "File inclusion";
125     (* avoid looping; should issue an error ? *)
126     if List.mem s !include_stack then []
127     else (
128     include_stack := s :: !include_stack;
129     Location.push_source (`File s);
130 abate 448 protect_exn
131     (fun () ->
132     let chan = open_in s in
133     let input = Stream.of_channel chan in
134     let l = Grammar.Entry.parse prog input in
135     close_in chan;
136     l)
137     (fun () ->
138     Location.pop_source ();
139     include_stack := List.tl !include_stack)
140 abate 249 )
141 abate 66 ] |
142 abate 249 [ e = expr -> [ mk loc (EvalStatement e) ]
143 abate 43 ]
144 abate 13 ];
145    
146 abate 43 debug_directive: [
147     [ LIDENT "filter"; t = pat; p = pat -> `Filter(t,p)
148 abate 75 | LIDENT "accept"; p = pat -> `Accept p
149 abate 43 | LIDENT "compile"; t = pat; p = LIST1 pat -> `Compile (t,p)
150 abate 407 | LIDENT "sample"; t = pat -> `Sample t
151 abate 224 | LIDENT "subtype"; t1 = pat; t2 = pat -> `Subtype (t1,t2)
152 abate 43 ]
153     ];
154    
155 abate 332 keyword: [
156     [ a =
157     [ "map" | "match" | "with" | "try" | "xtransform"
158     | "if" | "then" | "else"
159     | "transform" | "fun" | "in"
160 abate 431 | "let" | "type" | "debug" | "include"
161 abate 332 ]
162     -> a
163     ]
164     ];
165    
166 abate 4 expr: [
167     "top" RIGHTA
168 abate 421 [ "match"; e = SELF; "with"; b = branches ->
169     exp loc (Match (e,b))
170 abate 64 | "try"; e = SELF; "with"; b = branches ->
171 abate 425 exp loc (Try (e,b))
172 abate 421 | "map"; e = SELF; "with"; b = branches ->
173     exp loc (Map (e,b))
174     | "xtransform"; e = SELF; "with"; b = branches ->
175     exp loc (Xtrans (e,b))
176 abate 237 | "if"; e = SELF; "then"; e1 = SELF; "else"; e2 = SELF ->
177 abate 421 let p1 = mk loc (Internal true_type)
178     and p2 = mk loc (Internal false_type) in
179 abate 316 exp loc (Match (e, [p1,e1; p2,e2]))
180 abate 17 | "transform"; e = SELF; "with"; b = branches ->
181 abate 421 exp loc (Transform (e,b))
182 abate 48 | "fun"; (f,a,b) = fun_decl ->
183 abate 316 exp loc (Abstraction { fun_name = f; fun_iface = a; fun_body = b })
184 abate 431 | (_,p,e1) = let_binding; "in"; e2 = expr LEVEL "top"->
185 abate 316 exp loc (Match (e1,[p,e2]))
186 abate 54 | e = expr; ":"; p = pat ->
187 abate 316 exp loc (Forget (e,p))
188 abate 4 ]
189    
190    
191 abate 15 |
192 abate 237 [ e1 = expr; op = ["=" | "<=" | "<<" | ">>" | ">=" ]; e2 = expr ->
193     let op = match op with
194     | "<<" -> "<"
195     | ">>" -> ">"
196     | s -> s in
197 abate 316 exp loc (Op (op,[e1;e2]))
198 abate 237 ]
199    
200     |
201 abate 242 [ e1 = expr; op = ["+" | "-" | "@" ]; e2 = expr ->
202 abate 316 exp loc (Op (op,[e1;e2]))
203 abate 339 | e = expr; "\\"; l = [LIDENT | UIDENT | keyword ] ->
204 abate 374 exp loc (RemoveField (e, label l))
205 abate 51 ]
206 abate 16 |
207 abate 316 [ e1 = expr; op = ["*"]; e2 = expr -> exp loc (Op (op,[e1;e2]))
208 abate 151 | e = expr; op = "/"; p = pat ->
209    
210     let tag = mk loc (Internal (Types.atom (Atoms.any))) in
211     let att = mk loc (Internal Types.Record.any) in
212     let any = mk loc (Internal (Types.any)) in
213 abate 375 let re = Star(Alt(SeqCapture(id_dummy,Elem p), Elem any)) in
214 abate 151 let ct = mk loc (Regexp (re,any)) in
215     let p = mk loc (XmlT (tag, multi_prod loc [att;ct])) in
216 abate 375 let b = (p, Var id_dummy) in
217 abate 421 exp loc (Transform (e,[b]))
218 abate 51 ]
219 abate 26 |
220 abate 339 [ e = expr; "."; l = [LIDENT | UIDENT | keyword ] ->
221 abate 374 exp loc (Dot (e, label l))
222 abate 51 ]
223 abate 26
224 abate 52 |
225 abate 66 [ op = [ LIDENT "flatten"
226     | LIDENT "load_xml"
227 abate 374 | LIDENT "load_file" | LIDENT "load_file_utf8"
228 abate 188 | LIDENT "load_html"
229 abate 374 | LIDENT "print_xml" | LIDENT "print_xml_utf8"
230 abate 124 | LIDENT "print"
231 abate 66 | LIDENT "int_of"
232 abate 133 | LIDENT "string_of"
233 abate 329 | LIDENT "atom_of"
234 abate 421 | LIDENT "raise"
235 abate 66 ];
236 abate 316 e = expr -> exp loc (Op (op,[e]))
237 abate 374 | op = [ LIDENT "dump_to_file" | LIDENT "dump_to_file_utf8" ];
238 abate 316 e1 = expr LEVEL "no_appl"; e2 = expr -> exp loc (Op (op, [e1;e2]))
239     | e1 = SELF; LIDENT "div"; e2 = expr -> exp loc (Op ("/", [e1;e2]))
240     | e1 = SELF; LIDENT "mod"; e2 = expr -> exp loc (Op ("mod", [e1;e2]))
241     | e1 = SELF; e2 = expr -> exp loc (Apply (e1,e2))
242 abate 52 ]
243    
244 abate 4 | "no_appl"
245 abate 316 [ c = const -> exp loc (Cst c)
246     | "("; l = LIST1 expr SEP ","; ")" -> exp loc (tuple l)
247 abate 18 | "["; l = LIST0 seq_elem; e = OPT [ ";"; e = expr -> e ]; "]" ->
248     let e = match e with Some e -> e | None -> cst_nil in
249 abate 321 let l = List.fold_right
250     (fun x q ->
251     match x with
252     | `Elems l -> tuple_queue l q
253     | `Explode x -> Op ("@",[x;q])
254     ) l e
255     in
256     exp loc l
257 abate 332 | "<"; t = [ "("; e = expr; ")" -> e
258     | a = [ LIDENT | UIDENT | keyword ] ->
259 abate 375 exp loc (Cst (Types.Atom (atom a))) ];
260 abate 81 a = expr_attrib_spec; ">"; c = expr ->
261 abate 316 exp loc (Xml (t, Pair (a,c)))
262     | "{"; r = [ expr_record_spec | -> exp loc (RecordLitt LabelMap.empty) ]; "}" -> r
263 abate 81 | s = STRING2 ->
264 abate 316 exp loc (tuple (char_list loc s @ [cst_nil]))
265 abate 375 | a = LIDENT -> exp loc (Var (ident a))
266 abate 4 ]
267    
268     ];
269 abate 18
270     seq_elem: [
271 abate 89 [ x = STRING1 -> `Elems (char_list loc x)
272     | e = expr LEVEL "no_appl" -> `Elems [e]
273     | "!"; e = expr LEVEL "no_appl" -> `Explode e
274 abate 18 ]
275     ];
276 abate 4
277     let_binding: [
278 abate 431 [ "let"; p = pat; "="; e = expr -> (false,p,e)
279     | "let"; p = pat; ":"; t = pat; "="; e = expr -> (false,p, Forget (e,t))
280 abate 48 | "let"; "fun"; (f,a,b) = fun_decl ->
281     let p = match f with
282     | Some x -> mk loc (Capture x)
283     | _ -> failwith "Function name mandatory in let fun declarations"
284     in
285     let abst = { fun_name = f; fun_iface = a; fun_body = b } in
286 abate 316 let e = exp loc (Abstraction abst) in
287 abate 431 (true,p,e)
288 abate 4 ]
289     ];
290    
291 abate 48 fun_decl: [
292 abate 85 (* need an hack to do this, because both productions would
293     match [ OPT LIDENT; "("; pat ] .... *)
294 abate 375 [ f = OPT [ x = LIDENT -> ident x]; "("; p1 = pat LEVEL "no_arrow";
295 abate 85 res = [ "->"; p2 = pat;
296     a = [ ";"; a = LIST0 arrow SEP ";" -> a | -> [] ];
297     ")"; b = branches -> `Classic (p2,a,b)
298     | ":"; targ1 = pat;
299     args = LIST0 [ ","; arg = pat; ":"; targ = pat -> (arg,targ) ];
300     ")"; ":"; tres = pat ;
301     "="; body = expr ->
302     `Compact (targ1,args,tres,body)
303     ] ->
304     match res with
305     | `Classic (p2,a,b) -> f,(p1,p2)::a,b
306     | `Compact (targ1,args,tres,body) ->
307     let args = (p1,targ1) :: args in
308 abate 249 let targ = multi_prod nopos (List.map snd args) in
309     let arg = multi_prod nopos (List.map fst args) in
310 abate 85 let b = [arg, body] in
311     let a = [targ,tres] in
312     (f,a,b)
313 abate 48 ]
314     ];
315    
316 abate 85 arrow: [
317 abate 9 [ t1 = pat LEVEL "no_arrow"; "->"; t2 = pat -> (t1,t2)]
318 abate 4 ];
319    
320     branches: [
321 abate 81 [ OPT "|"; l = LIST1 branch SEP "|" -> l ]
322 abate 4 ];
323    
324     branch: [
325 abate 6 [ p = pat LEVEL "no_arrow"; "->"; e = expr -> (p,e) ]
326 abate 4 ];
327    
328    
329     regexp: [
330 abate 236 [ x = regexp; "|"; y = regexp ->
331     match (x,y) with
332     | Elem x, Elem y -> Elem (mk loc (Or (x,y)))
333     | _ -> Alt (x,y)
334     ]
335 abate 4 | [ x = regexp; y = regexp -> Seq (x,y) ]
336 abate 375 | [ a = LIDENT; "::"; x = regexp -> SeqCapture (ident a,x) ]
337 abate 4 | [ x = regexp; "*" -> Star x
338     | x = regexp; "*?" -> WeakStar x
339     | x = regexp; "+" -> Seq (x, Star x)
340     | x = regexp; "+?" -> Seq (x, WeakStar x)
341 abate 71 | x = regexp; "?" -> Alt (x, Epsilon)
342 abate 4 | x = regexp; "??" -> Alt (Epsilon, x) ]
343     | [ "("; x = regexp; ")" -> x
344 abate 225 | "("; a = LIDENT; ":="; c = const; ")" ->
345 abate 375 Elem (mk loc (Constant ((ident a,c))))
346 abate 66 | UIDENT "PCDATA" -> string_regexp
347 abate 81 | i = STRING1; "--"; j = STRING1 ->
348 abate 310 let i = Chars.mk_int (parse_char loc i)
349     and j = Chars.mk_int (parse_char loc j) in
350 abate 66 Elem (mk loc (Internal (Types.char (Chars.char_class i j))))
351 abate 81 | s = STRING1 ->
352     let s = seq_of_string loc s in
353 abate 18 List.fold_right
354 abate 66 (fun (loc,c) accu ->
355 abate 310 let c = Chars.mk_int c in
356 abate 18 let c = Chars.atom c in
357     Seq (Elem (mk loc (Internal (Types.char c))), accu))
358     s
359 abate 66 Epsilon
360 abate 4 | e = pat LEVEL "simple" -> Elem e
361     ]
362     ];
363    
364     pat: [
365 abate 332 [ x = pat; LIDENT "where";
366 abate 338 b = LIST1 [ a = UIDENT; "="; y = pat -> (a,y)
367     | LIDENT -> error loc "Type/pattern identifiers must be capitalized"
368     ] SEP "and"
369 abate 4 -> mk loc (Recurs (x,b)) ]
370     | RIGHTA [ x = pat; "->"; y = pat -> mk loc (Arrow (x,y)) ]
371 abate 6 | "no_arrow" [ x = pat; "|"; y = pat -> mk loc (Or (x,y)) ]
372 abate 121 | "simple" [ x = pat; "&"; y = pat -> mk loc (And (x,y))
373 abate 99 | x = pat; "\\"; y = pat -> mk loc (Diff (x,y)) ]
374 abate 4 |
375 abate 159 [ "{"; r = record_spec; "}" -> mk loc (Record (true,r))
376     | "{|"; r = record_spec; "|}" -> mk loc (Record (false,r))
377 abate 4 | LIDENT "_" -> mk loc (Internal Types.any)
378 abate 375 | a = LIDENT -> mk loc (Capture (ident a))
379 abate 225 | "("; a = LIDENT; ":="; c = const; ")" ->
380 abate 375 mk loc (Constant (ident a,c))
381 abate 4 | a = UIDENT -> mk loc (PatVar a)
382     | i = INT ; "--"; j = INT ->
383 abate 222 let i = Intervals.mk i
384     and j = Intervals.mk j in
385 abate 52 mk loc (Internal (Types.interval (Intervals.bounded i j)))
386     | i = INT ->
387 abate 222 let i = Intervals.mk i in
388 abate 52 mk loc (Internal (Types.interval (Intervals.atom i)))
389 abate 81 | "*"; "--"; j = INT ->
390 abate 222 let j = Intervals.mk j in
391 abate 52 mk loc (Internal (Types.interval (Intervals.left j)))
392 abate 81 | i = INT; "--"; "*" ->
393 abate 222 let i = Intervals.mk i in
394 abate 52 mk loc (Internal (Types.interval (Intervals.right i)))
395 abate 19 | i = char ->
396     mk loc (Internal (Types.char (Chars.char_class i i)))
397 abate 13 | i = char ; "--"; j = char ->
398 abate 18 mk loc (Internal (Types.char (Chars.char_class i j)))
399 abate 4 | c = const -> mk loc (Internal (Types.constant c))
400     | "("; l = LIST1 pat SEP ","; ")" -> multi_prod loc l
401     | "["; r = [ r = regexp -> r | -> Epsilon ];
402     q = [ ";"; q = pat -> q
403 abate 249 | -> mknoloc (Internal (Sequence.nil_type)) ];
404 abate 4 "]" -> mk loc (Regexp (r,q))
405 abate 332 | "<"; t =
406     [ x = [ LIDENT | UIDENT | keyword ] ->
407 abate 375 let a = if x = "_" then Atoms.any else Atoms.atom (atom x) in
408 abate 332 mk loc (Internal (Types.atom a))
409     | "("; t = pat; ")" -> t ];
410 abate 81 a = attrib_spec; ">"; c = pat ->
411 abate 110 mk loc (XmlT (t, multi_prod loc [a;c]))
412 abate 81 | s = STRING2 ->
413     let s = seq_of_string loc s in
414 abate 63 let s = List.map
415 abate 66 (fun (loc,c) ->
416 abate 63 mk loc (Internal
417     (Types.char
418     (Chars.atom
419 abate 310 (Chars.mk_int c))))) s in
420 abate 63 let s = s @ [mk loc (Internal (Sequence.nil_type))] in
421     multi_prod loc s
422 abate 4 ]
423    
424     ];
425    
426     record_spec:
427 abate 339 [ [ r = LIST0 [ l = [LIDENT | UIDENT | keyword ]; "=";
428 abate 81 o = [ "?" -> true | -> false];
429 abate 229 x = pat ->
430     let x = if o then mk loc (Optional x) else x in
431 abate 374 (label l, x)
432 abate 4 ] SEP ";" ->
433 abate 233 make_record loc r
434 abate 4 ] ];
435    
436 abate 13 char:
437     [
438 abate 310 [ c = STRING1 -> Chars.mk_int (parse_char loc c) ]
439 abate 13 ];
440    
441    
442 abate 4 const:
443     [
444 abate 222 [ i = INT -> Types.Integer (Intervals.mk i)
445 abate 375 | "`"; a = [LIDENT | UIDENT | keyword ] -> Types.Atom (atom a)
446 abate 13 | c = char -> Types.Char c ]
447 abate 4 ];
448    
449    
450     attrib_spec:
451 abate 159 [ [ r = record_spec -> mk loc (Record (true,r))
452 abate 252 | "("; t = pat; ")" -> t
453     | "{"; r = record_spec; "}" -> mk loc (Record (true,r))
454     | "{|"; r = record_spec; "|}" -> mk loc (Record (false,r))
455     ] ];
456 abate 4
457     expr_record_spec:
458     [ [ r = LIST1
459 abate 339 [ l = [LIDENT | UIDENT | keyword ]; "="; x = expr ->
460 abate 374 (label l,x) ]
461 abate 4 SEP ";" ->
462 abate 316 exp loc (RecordLitt (make_record loc r))
463 abate 4 ] ];
464    
465     expr_attrib_spec:
466     [ [ r = expr_record_spec -> r ]
467     | [ e = expr LEVEL "no_appl" -> e
468 abate 316 | -> exp loc (RecordLitt (LabelMap.empty))
469 abate 4 ]
470     ];
471     END
472    
473 abate 61 let pat' = Grammar.Entry.create gram "type/pattern expression"
474     EXTEND GLOBAL: pat pat';
475     pat': [ [ p = pat; EOI -> p ] ];
476     END
477    
478 abate 10 let pat = Grammar.Entry.parse pat
479 abate 81 and expr = Grammar.Entry.parse expr
480     and prog = Grammar.Entry.parse prog
481 abate 431 and top_phrases = Grammar.Entry.parse top_phrases
482 abate 10
483 abate 431 let rec sync s =
484     match Stream.next s with
485     | ';' ->
486     (match Stream.next s with
487     | ';' -> ()
488     | _ -> sync s)
489     | _ -> sync s

CVS Admin">CVS Admin
ViewVC Help
Powered by ViewVC 1.1.5