/[svn]/misc/pretty.ml
ViewVC logotype

Contents of /misc/pretty.ml

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1686 - (show annotations)
Tue Jul 10 19:14:31 2007 UTC (5 years, 10 months ago) by abate
File size: 14031 byte(s)
[r2005-05-24 12:26:59 by afrisch] Empty log message

Original author: afrisch
Date: 2005-05-24 12:26:59+00:00
1 type 'a regexp =
2 | Empty
3 | Epsilon
4 | Seq of 'a regexp * 'a regexp
5 | Alt of 'a regexp * 'a regexp
6 | Star of 'a regexp
7 | Plus of 'a regexp
8 | Trans of 'a
9
10 module type TABLE = sig
11 type key
12 type 'a t
13 val create: int -> 'a t
14 val add: 'a t -> key -> 'a -> unit
15 val find: 'a t -> key -> 'a
16 end
17
18 module type S = sig
19 type t
20 val equal: t -> t -> bool
21 val compare: t -> t -> int
22 val hash: t -> int
23 end
24
25 module Decompile(H : TABLE)(S : S) = struct
26
27 (* Now attempt to simplify regexp. Does not work.... disabled *)
28 module A = struct
29 type atom =
30 | AStar of trie
31 | APlus of trie
32 | ATrans of S.t
33 and trie =
34 | AEmpty
35 | AEps
36 | ABranch of atom list * trie * trie * bool * int * int
37 (* Branching atom, left, right,
38 nullable,
39 hash,
40 uid *)
41
42
43 type re = trie
44
45
46
47 let empty = AEmpty
48 let epsilon = AEps
49
50 let nullable = function
51 | AEmpty -> false
52 | AEps -> true
53 | ABranch (_,_,_,n,_,_) -> n
54
55 let nullable_atom = function
56 | AStar _ -> true
57 | APlus t -> assert(not (nullable t)); false
58 | ATrans _ -> false
59 let nullable_atom_list = List.exists nullable_atom
60
61 (*
62 let size = function
63 | AEmpty -> 0
64 | AEps -> 0
65 | ABranch (_,_,_,_,_,_,sz) -> sz
66 *)
67
68 let compare_trie t1 t2 = match t1,t2 with
69 | AEmpty, AEmpty | AEps, AEps -> 0
70 | AEmpty, _ -> -1 | _,AEmpty -> 1
71 | AEps, _ -> -1 | _, AEps -> 1
72 | ABranch (_,_,_,_,_,id1), ABranch (_,_,_,_,_,id2) -> id1 - id2
73
74 let equal_atom a1 a2 = match a1,a2 with
75 | AStar t1, AStar t2 | APlus t1, APlus t2 -> t1 == t2
76 | ATrans t1, ATrans t2 -> S.equal t1 t2
77 | _ -> false
78
79 let rec equal_atom_list a1 a2 = match a1,a2 with
80 | [],[] -> true
81 | hd1::tl1,hd2::tl2 -> equal_atom hd1 hd2 && equal_atom_list tl1 tl2
82 | _ -> false
83
84 let compare_atom a1 a2 = match a1,a2 with
85 | AStar t1, AStar t2 | APlus t1, APlus t2 -> compare_trie t1 t2
86 | AStar _, _ -> -1 | _, AStar _ -> 1
87 | APlus _, _ -> -1 | _, APlus _ -> 1
88 | ATrans t1, ATrans t2 -> S.compare t1 t2
89
90
91 let hash_trie = function
92 | AEmpty -> 0
93 | AEps -> 1
94 | ABranch (_,_,_,_,h,_) -> h
95
96 let hash_atom = function
97 | AStar t -> 17 * (hash_trie t)
98 | APlus t -> 1 + 17 * (hash_trie t)
99 | ATrans t -> 2 + 17 * (S.hash t)
100
101 let rec hash_atom_list = function
102 | hd::tl -> hash_atom hd + 17 * (hash_atom_list tl)
103 | [] -> 0
104
105 module T = struct
106 type t = atom list * trie * trie * int
107
108 let equal (a,ay,an,_) (b,by,bn,_) =
109 (equal_atom_list a b) && (ay == by) && (an == bn)
110 let hash (a,ay,an,h) =
111 h
112 end
113
114 module HT = Hashtbl.Make(T)
115
116 let branches = HT.create 17
117 let uid = ref 0
118
119 let branch0 a ay an =
120 let h = hash_atom_list a + 17 * (hash_trie ay) + 257 * (hash_trie an) in
121 let b = (a,ay,an,h) in
122 try HT.find branches b
123 with Not_found ->
124 let h = T.hash b in
125 incr uid;
126 let nullable =
127 nullable an || ((nullable ay) && (nullable_atom_list a)) in
128 let x = ABranch (a,ay,an,nullable,h,!uid) in
129 HT.add branches b x;
130 x
131
132 let branch a ay an =
133 (* assert (List.length a > 0);
134 match ay,an with
135 | ABranch (b,by,bn,_,_,_), AEmpty -> branch0 (a @ b) by bn
136 | AEmpty, AEmpty -> AEmpty
137 | _ -> *) branch0 a ay an
138
139 let rec opt = function
140 | ABranch (a,ay,an,_,_,_) -> branch0 a ay (opt an)
141 | AEmpty -> AEps
142 | t -> t
143
144 let rec factor accu ctx x y = match x,y with
145 | hd1::tl1, hd2::tl2 when equal_atom hd1 hd2 ->
146 factor (hd1::accu) (hd1::ctx) tl1 tl2
147 | _ -> List.rev accu, ctx,x,y
148
149 let rec get_seq accu = function
150 | ABranch (a,AEps,AEmpty,_,_,_) -> Some a
151 | AEps -> Some []
152 | _ -> None
153
154 let get_seq = get_seq []
155
156 let apply_factor f r =
157 branch0 f r AEmpty
158
159 let apply_ctx ctx r =
160 List.fold_right (fun a r -> branch0 a r AEmpty) ctx r
161
162 let star x r = match x with
163 | AEmpty | AEps -> AEps
164 | t -> branch0 [ AStar t ] r AEmpty
165
166 let plus x =
167 if nullable x then AStar x else APlus x
168
169 (* (AB)*A ==> A(BA)*
170 BA(BA)* ==> (BA)+ *)
171 let rec create_plus ctx = function
172 | AStar x :: follow ->
173 (match get_seq x with
174 | Some s ->
175 let (accu,ctx,s,follow) = factor [] ctx s follow in
176 let s = s @ accu in
177 let rec aux accu = function
178 | ctx,[] ->
179 create_plus
180 (plus (apply_factor accu AEps) :: ctx)
181 follow
182 | a::b,c::d when equal_atom a c -> aux (a::accu) (b,d)
183 | _ -> create_plus (AStar x :: ctx) follow
184 in
185 aux [] (ctx,s)
186 | None -> create_plus (AStar x :: ctx) follow)
187 | x :: follow -> create_plus (x :: ctx) follow
188 | [] -> List.rev ctx
189
190
191 let rec size = function
192 | AEps -> 1
193 | AEmpty -> 0
194 | ABranch (a,ay,an,_,_,_) ->
195 if (ay == an) then 1 + (size ay)
196 else 3 + (size ay) + (size an)
197
198 let choose u v =
199 if size u > size v then v else u
200
201
202 let rec alt t1 t2 = match t1,t2 with
203 | AEmpty,t | t,AEmpty -> t
204 | AEps,t | t,AEps -> opt t
205 | ABranch (_,_,_,_,_,id1), ABranch (_,_,_,_,_,id2) when id1 = id2 -> t1
206 | ABranch (al,ay,an,_,_,_), ABranch (bl,by,bn,_,_,_) ->
207 (* br al ay (alt an t2) *)
208 let (accu,_,al,bl) = factor [] [] al bl in
209 match accu with
210 | [] ->
211 (* let u = br al ay (alt an t2)
212 and v = br bl by (alt bn t1) in
213 choose u v *)
214 branch al ay (alt an t2)
215 | _ ->
216 let t1 = br al ay AEps in
217 let t2 = br bl by AEps in
218 branch accu (alt t1 t2) (alt an bn)
219
220
221 and br a ay an =
222 (* match a with
223 | [] -> alt ay an
224 | l -> *) branch a ay an
225
226 and seq t1 t2 = match t1,t2 with
227 | AEmpty,_|_,AEmpty -> AEmpty
228 | AEps,t | t,AEps -> t
229 | ABranch (a,ay,an,_,_,_), t2 ->
230 (* (alt
231 (branch a (seq ay t2) AEmpty)
232 (seq an t2) )
233 *)
234 (branch a (seq ay t2) (seq an t2))
235
236 let rtrans t = branch [ATrans t] AEps AEmpty
237 let star = function
238 | AEmpty | AEps -> AEps
239 | t -> branch [AStar t] AEps AEmpty
240
241 let rseq r1 r2 = match r1,r2 with
242 | Epsilon, z | z, Epsilon -> z
243 | Empty, _ | _, Empty -> Empty
244 | x,y -> Seq (x,y)
245 let ralt r1 r2 = match r1,r2 with
246 | Empty, z | z, Empty -> z
247 | x,y -> Alt (x,y)
248
249 let rec minim = function
250 | AEmpty -> AEmpty
251 | AEps -> AEps
252 | ABranch (a,ay,(ABranch (b,by,bn,_,_,_) as an),_,_,_) as br
253 when ay != an ->
254 choose (branch b (minim by) (branch a (minim ay) bn)) br
255 | br -> br
256
257 let rec minim_trie r =
258 let r' = minim r in
259 if (size r' < size r) then minim_trie r' else r
260
261 let rec regexp r =
262 (* let r = minim_trie r in *)
263 match r with
264 | AEmpty -> Empty
265 | AEps -> Epsilon
266 | ABranch (a,ay,an,_,_,_) when ay == an ->
267 (* let a = create_plus [] a in *)
268 rseq (ralt (regexp_atom_list a) Epsilon) (regexp ay)
269 | ABranch (a,ay,an,_,_,_) ->
270 (* let a = create_plus [] a in *)
271 ralt (rseq (regexp_atom_list a) (regexp ay)) (regexp an)
272
273 and regexp_atom_list = function
274 | hd::tl -> rseq (regexp_atom hd) (regexp_atom_list tl)
275 | [] -> Epsilon
276 and regexp_atom = function
277 | AStar t -> Star (regexp t)
278 | APlus t -> Plus (regexp t)
279 | ATrans t -> Trans t
280
281 let () = () and (* Hack to avoid "let regexp ..." (ulex construction) *)
282 regexp r =
283 (* Need to clear hashtable because S.t objects might have different
284 meaning across calls *)
285 let re = regexp r in
286 HT.clear branches;
287 re
288
289 end
290
291 module B = struct
292 type re =
293 | RSeq of re list
294 | RAlt of re list
295 | RTrans of S.t
296 | RStar of re
297 | RPlus of re
298
299 let rec compare s1 s2 =
300 if s1 == s2 then 0
301 else match (s1,s2) with
302 | RSeq x, RSeq y | RAlt x, RAlt y -> compare_list x y
303 | RSeq _, _ -> -1 | _, RSeq _ -> 1
304 | RAlt _, _ -> -1 | _, RAlt _ -> 1
305 | RTrans x, RTrans y -> S.compare x y
306 | RTrans _, _ -> -1 | _, RTrans _ -> 1
307 | RStar x, RStar y | RPlus x, RPlus y -> compare x y
308 | RStar _, _ -> -1 | _, RStar _ -> 1
309 and compare_list l1 l2 = match (l1,l2) with
310 | x1::y1, x2::y2 ->
311 let c = compare x1 x2 in if c = 0 then compare_list y1 y2 else c
312 | [], [] -> 0
313 | [], _ -> -1 | _, [] -> 1
314
315 let rec dump ppf = function
316 | RSeq l -> Format.fprintf ppf "Seq(%a)" dump_list l
317 | RAlt l -> Format.fprintf ppf "Alt(%a)" dump_list l
318 | RStar r -> Format.fprintf ppf "Star(%a)" dump r
319 | RPlus r -> Format.fprintf ppf "Plus(%a)" dump r
320 | RTrans x -> Format.fprintf ppf "Trans"
321 and dump_list ppf = function
322 | [] -> ()
323 | [h] -> Format.fprintf ppf "%a" dump h
324 | h::t -> Format.fprintf ppf "%a,%a" dump h dump_list t
325
326 let rec factor accu l1 l2 = match (l1,l2) with
327 | (x1::y1,x2::y2) when compare x1 x2 = 0 -> factor (x1::accu) y1 y2
328 | (l1,l2) -> (accu,l1,l2)
329
330
331 let rec regexp = function
332 | RSeq l ->
333 let rec aux = function
334 | [h] -> regexp h
335 | h::t -> Seq (regexp h,aux t)
336 | [] -> Epsilon in
337 aux l
338 | RAlt l ->
339 let rec aux = function
340 | [h] -> regexp h
341 | h::t -> Alt (regexp h,aux t)
342 | [] -> Empty in
343 aux l
344 | RTrans x -> Trans x
345 | RStar r -> Star (regexp r)
346 | RPlus r -> Plus (regexp r)
347
348 let epsilon = RSeq []
349 let empty = RAlt []
350 let rtrans t = RTrans t
351
352 let rec nullable = function
353 | RAlt l -> List.exists nullable l
354 | RSeq l -> List.for_all nullable l
355 | RPlus r -> nullable r
356 | RStar _ -> true
357 | RTrans _ -> false
358
359 let has_epsilon =
360 List.exists (function RSeq [] -> true | _ -> false)
361
362 let remove_epsilon =
363 List.filter (function RSeq [] -> false | _ -> true)
364
365 let rec merge l1 l2 = match (l1,l2) with
366 | x1::y1, x2::y2 ->
367 let c = compare x1 x2 in
368 if c = 0 then x1::(merge y1 y2)
369 else if c < 0 then x1::(merge y1 l2)
370 else x2::(merge l1 y2)
371 | [], l | l,[] -> l
372
373 let sort l =
374 let rec initlist = function
375 | [] -> []
376 | e::rest -> [e] :: initlist rest in
377 let rec merge2 = function
378 | l1::l2::rest -> merge l1 l2 :: merge2 rest
379 | x -> x in
380 let rec mergeall = function
381 | [] -> []
382 | [l] -> l
383 | llist -> mergeall (merge2 llist) in
384 mergeall (initlist l)
385
386 let rec sub l1 l2 =
387 (compare l1 l2 = 0) ||
388 match (l1,l2) with
389 | RSeq [x], y -> sub x y
390 | RPlus x, (RStar y | RPlus y) -> sub x y
391 | RSeq (x::y), (RPlus z | RStar z) ->
392 (sub x z) && (sub (RSeq y) (RStar z))
393 | x, (RStar y | RPlus y) -> sub x y
394 | _ -> false
395
396
397 let rec absorb_epsilon = function
398 | RPlus r :: l -> RStar r :: l
399 | (r :: _) as l when nullable r -> l
400 | r :: l -> r :: (absorb_epsilon l)
401 | [] -> [ epsilon ]
402
403 let rec simplify_alt accu = function
404 | [] -> List.rev accu
405 | x::rest ->
406 if (List.exists (sub x) accu) || (List.exists (sub x) rest)
407 then simplify_alt accu rest
408 else simplify_alt (x::accu) rest
409
410 let alt s1 s2 =
411 let s1 = match s1 with RAlt x -> x | x -> [x] in
412 let s2 = match s2 with RAlt x -> x | x -> [x] in
413 let l = merge s1 s2 in
414 let l =
415 if has_epsilon l
416 then absorb_epsilon (remove_epsilon l)
417 else l in
418 let l = simplify_alt [] l in
419 match l with
420 | [x] -> x
421 | l -> RAlt l
422
423 let rec simplify_seq = function
424 | RStar x :: ((RStar y | RPlus y) :: _ as rest)
425 when compare x y = 0 ->
426 simplify_seq rest
427 | RPlus x :: (RPlus y :: _ as rest)
428 when compare x y = 0 ->
429 simplify_seq (x :: rest)
430 | RPlus x :: (RStar y :: rest) when compare x y = 0 ->
431 simplify_seq (RPlus y :: rest)
432 | x :: rest -> x :: (simplify_seq rest)
433 | [] -> []
434
435 let rec seq s1 s2 =
436 match (s1,s2) with
437 | RAlt [], _ | _, RAlt [] -> epsilon
438 | RSeq [], x | x, RSeq [] -> x
439 | _ ->
440 let s1 = match s1 with RSeq x -> x | x -> [x] in
441 let s2 = match s2 with RSeq x -> x | x -> [x] in
442 find_plus [] (s1 @ s2)
443 and find_plus before = function
444 | [] ->
445 (match before with [h] -> h | l -> RSeq (simplify_seq (List.rev l)))
446 | (RStar s)::after ->
447 let star = match s with RSeq x -> x | x -> [x] in
448 let (right,star',after') = factor [] star after in
449 let (left,star'',before') = factor [] (List.rev star') before in
450 (match star'' with
451 | [] ->
452 let s = find_plus [] (left @ (List.rev right)) in
453 find_plus ((RPlus s)::before') after'
454 | _ ->
455 find_plus ((RStar s)::before) after)
456 | x::after -> find_plus (x::before) after
457
458 let star = function
459 | RAlt [] | RSeq [] -> epsilon
460 | RStar _ as s -> s
461 | RPlus s -> RStar s
462 | s -> RStar s
463 end
464
465 open B
466
467 type slot = {
468 mutable weight : int;
469 mutable outg : (slot * re) list;
470 mutable inc : (slot * re) list;
471 mutable self : re;
472 mutable ok : bool
473 }
474 let alloc_slot () =
475 { weight = 0; outg = []; inc = []; self = empty; ok = false }
476
477 let decompile trans n0 =
478 let slot_table = H.create 121 in
479 let slots = ref [] in
480 let slot n =
481 try H.find slot_table n
482 with Not_found ->
483 let s = alloc_slot () in
484 H.add slot_table n s;
485 slots := s :: !slots;
486 s in
487
488 let add_trans s1 s2 t =
489 if s1 == s2
490 then s1.self <- alt s1.self t
491 else (s1.outg <- (s2,t) :: s1.outg; s2.inc <- (s1,t) :: s2.inc) in
492
493 let final = alloc_slot () in
494 let initial = alloc_slot () in
495
496 let rec conv n =
497 let s = slot n in
498 if not s.ok then (
499 s.ok <- true;
500 let (tr,f) = trans n in
501 if f then add_trans s final epsilon;
502 List.iter (fun (l,dst) -> add_trans s (conv dst) (rtrans l)) tr;
503 );
504 s in
505
506 let elim s =
507 s.weight <- (-1);
508 let loop = star s.self in
509 List.iter
510 (fun (s1,t1) -> if s1.weight >= 0 then
511 List.iter
512 (fun (s2,t2) -> if s2.weight >= 0 then
513 add_trans s1 s2 (seq t1 (seq loop t2)))
514 s.outg
515 ) s.inc in
516
517 add_trans initial (conv n0) epsilon;
518 List.iter
519 (fun s -> s.weight <- List.length s.inc * List.length s.outg)
520 !slots;
521 let slots =
522 List.sort (fun s1 s2 -> Pervasives.compare s1.weight s2.weight) !slots in
523 List.iter elim slots;
524 let r =
525 List.fold_left
526 (fun accu (s,t) ->
527 if s == final then alt accu t else accu)
528 empty
529 initial.outg in
530 regexp r
531 end

CVS Admin">CVS Admin
ViewVC Help
Powered by ViewVC 1.1.5