| 1 |
|
open Ident |
| 2 |
|
open Encodings |
| 3 |
|
|
| 4 |
type t = |
type t = |
| 5 |
| Pair of t * t |
| Pair of t * t |
| 6 |
| Record of (Types.label,t) SortedMap.t |
| Xml of t * t |
| 7 |
| Atom of Types.atom |
| Record of t label_map |
| 8 |
| Integer of Big_int.big_int |
| Atom of Atoms.v |
| 9 |
| Char of Chars.Unichar.t |
| Integer of Intervals.v |
| 10 |
|
| Char of Chars.v |
| 11 |
| Abstraction of (Types.descr * Types.descr) list * (t -> t) |
| Abstraction of (Types.descr * Types.descr) list * (t -> t) |
| 12 |
| String of int * int * string * t |
| String_latin1 of int * int * string * t |
| 13 |
|
| String_utf8 of Utf8.uindex * Utf8.uindex * Utf8.t * t |
| 14 |
|
| Absent |
| 15 |
|
|
| 16 |
|
|
| 17 |
exception CDuceExn of t |
exception CDuceExn of t |
| 18 |
|
|
| 19 |
let nil = Atom Sequence.nil_atom |
let nil = Atom Sequence.nil_atom |
| 20 |
let string s = String (0,String.length s, s, nil) |
let string_latin1 s = String_latin1 (0,String.length s, s, nil) |
| 21 |
|
let string_utf8 s = String_utf8 (Utf8.start_index s,Utf8.end_index s, s, nil) |
| 22 |
|
let vtrue = Atom Builtin.true_atom |
| 23 |
|
let vfalse = Atom Builtin.false_atom |
| 24 |
|
let vbool x = if x then vtrue else vfalse |
| 25 |
|
|
| 26 |
|
let rec sequence = function |
| 27 |
|
| [] -> nil |
| 28 |
|
| h::t -> Pair (h, sequence t) |
| 29 |
|
|
| 30 |
let const = function |
let const = function |
| 31 |
| Types.Integer i -> Integer i |
| Types.Integer i -> Integer i |
| 32 |
| Types.Atom a -> Atom a |
| Types.Atom a -> Atom a |
| 33 |
| Types.Char c -> Char c |
| Types.Char c -> Char c |
| 34 |
|
|
| 35 |
let get_string e = |
let buf = Buffer.create 100 |
| 36 |
let rec compute_len accu = function |
|
| 37 |
| Pair (_,y) -> compute_len (accu + 1) y |
let rec add_buf_utf8_to_latin1 src i j = |
| 38 |
| String (i,j,_,y) -> compute_len (accu + j - i) y |
if Utf8.equal_index i j then () |
| 39 |
| _ -> accu in |
else |
| 40 |
let rec fill pos s = function |
let (c,i) = Utf8.next src i in |
| 41 |
| Pair (Char x,y) -> s.[pos] <- Chars.Unichar.to_char x; fill (pos + 1) s y |
if (c > 255) then failwith "get_string_latin1"; |
| 42 |
| String (i,j,src,y) -> |
Buffer.add_char buf (Char.chr c); |
| 43 |
String.blit src i s pos (j - i); fill (pos + j - i) s y |
add_buf_utf8_to_latin1 src i j |
| 44 |
| _ -> s in |
|
| 45 |
fill 0 (String.create (compute_len 0 e)) e |
let rec add_buf_latin1_to_utf8_to_latin1 src i j = |
| 46 |
|
for k = i to j do |
| 47 |
|
Utf8.store buf (Char.code src.[i]) |
| 48 |
|
done |
| 49 |
|
|
| 50 |
|
let get_string_latin1 e = |
| 51 |
|
let rec aux = function |
| 52 |
|
| Pair (Char x,y) -> Buffer.add_char buf (Chars.to_char x); aux y |
| 53 |
|
| String_latin1 (i,j,src,y) -> Buffer.add_substring buf src i (j - i); aux y |
| 54 |
|
| String_utf8 (i,j,src,y) -> add_buf_utf8_to_latin1 src i j; aux y |
| 55 |
|
| _ -> () in |
| 56 |
|
aux e; |
| 57 |
|
let s = Buffer.contents buf in |
| 58 |
|
Buffer.clear buf; |
| 59 |
|
s |
| 60 |
|
|
| 61 |
|
let get_string_utf8 e = |
| 62 |
|
let rec aux = function |
| 63 |
|
| Pair (Char x,y) -> Utf8.store buf (Chars.to_int x); aux y |
| 64 |
|
| String_latin1 (i,j,src,y) -> add_buf_latin1_to_utf8_to_latin1 src i j; aux y |
| 65 |
|
| String_utf8 (i,j,src,y) -> Utf8.copy buf src i j; aux y |
| 66 |
|
| q -> q in |
| 67 |
|
let q = aux e in |
| 68 |
|
let s = Buffer.contents buf in |
| 69 |
|
Buffer.clear buf; |
| 70 |
|
(Utf8.mk s, q) |
| 71 |
|
|
| 72 |
|
|
| 73 |
let rec is_seq = function |
let rec is_seq = function |
| 74 |
| Pair (_, y) when is_seq y -> true |
| Pair (_, y) when is_seq y -> true |
| 75 |
| Atom a when a = Sequence.nil_atom -> true |
| Atom a when a = Sequence.nil_atom -> true |
| 76 |
| String (_,_,_,y) when is_seq y -> true |
| String_latin1 (_,_,_,y) | String_utf8 (_,_,_,y) when is_seq y -> true |
| 77 |
| _ -> false |
| _ -> false |
| 78 |
|
|
| 79 |
let is_xml = function |
let is_xml = function |
| 83 |
let rec is_str = function |
let rec is_str = function |
| 84 |
| Pair (Char _, y) -> is_str y |
| Pair (Char _, y) -> is_str y |
| 85 |
| Atom a when a = Sequence.nil_atom -> true |
| Atom a when a = Sequence.nil_atom -> true |
| 86 |
| String(_,_,_,q) -> is_str q |
| String_latin1 (_,_,_,q) | String_utf8(_,_,_,q) -> is_str q |
| 87 |
| _ -> false |
| _ -> false |
| 88 |
|
|
| 89 |
let rec print ppf v = |
let rec print ppf v = |
| 90 |
if is_str v then Format.fprintf ppf "\"%a\"" print_quoted_str v |
if is_str v then |
| 91 |
else if is_xml v then print_xml ppf v |
(Format.fprintf ppf "\""; |
| 92 |
else if is_seq v then Format.fprintf ppf "[ %a]" print_seq v |
print_quoted_str ppf v; |
| 93 |
|
Format.fprintf ppf "\"") |
| 94 |
|
else if is_seq v then Format.fprintf ppf "[ @[<hv>%a@]]" print_seq v |
| 95 |
else match v with |
else match v with |
| 96 |
| Pair (x,y) -> Format.fprintf ppf "(%a,%a)" print x print y |
| Pair (x,y) -> Format.fprintf ppf "(%a,%a)" print x print y |
| 97 |
| Record l -> Format.fprintf ppf "{%a }" print_record l |
| Xml (x,y) -> print_xml ppf (x,y) |
| 98 |
| Atom a -> Format.fprintf ppf "`%s" (Types.AtomPool.value a) |
| Record l -> Format.fprintf ppf "{%a }" print_record (LabelMap.get l) |
| 99 |
| Integer i -> Format.fprintf ppf "%s" (Big_int.string_of_big_int i) |
| Atom a -> Atoms.print_v ppf a |
| 100 |
| Char c -> Chars.Unichar.print ppf c |
| Integer i -> Intervals.print_v ppf i |
| 101 |
|
| Char c -> Chars.print_v ppf c |
| 102 |
| Abstraction _ -> Format.fprintf ppf "<fun>" |
| Abstraction _ -> Format.fprintf ppf "<fun>" |
| 103 |
| String (i,j,s,q) -> |
| String_latin1 (i,j,s,q) -> |
| 104 |
Format.fprintf ppf "<string:%i-%i,%S,%a>" i j s print q |
Format.fprintf ppf "<string_latin1:%i-%i,%S,%a>" i j s print q |
| 105 |
|
| String_utf8 (i,j,s,q) -> |
| 106 |
|
Format.fprintf ppf "<string_utf8:%i-%i,%S,%a>" |
| 107 |
|
(Utf8.get_idx i) (Utf8.get_idx j) (Utf8.get_str s) print q |
| 108 |
|
| Absent -> |
| 109 |
|
Format.fprintf ppf "<[absent]>" |
| 110 |
and print_quoted_str ppf = function |
and print_quoted_str ppf = function |
| 111 |
| Pair (Char c, q) -> |
| Pair (Char c, q) -> |
| 112 |
Chars.Unichar.print_in_string ppf c; |
Chars.print_v_in_string ppf c; |
| 113 |
|
print_quoted_str ppf q |
| 114 |
|
| String_latin1 (i,j,s, q) -> |
| 115 |
|
for k = i to j - 1 do |
| 116 |
|
Chars.print_v_in_string ppf (Chars.mk_char s.[k]) |
| 117 |
|
done; |
| 118 |
print_quoted_str ppf q |
print_quoted_str ppf q |
| 119 |
| String (i,j,s, q) -> |
| String_utf8 (i,j,s, q) -> |
| 120 |
Format.fprintf ppf "%s" (String.escaped (String.sub s i (j-i))); |
(* Format.fprintf ppf "UTF8:{"; *) |
| 121 |
|
let rec aux i = |
| 122 |
|
if Utf8.equal_index i j then q |
| 123 |
|
else |
| 124 |
|
let (c,i) =Utf8.next s i in |
| 125 |
|
Chars.print_v_in_string ppf (Chars.mk_int c); |
| 126 |
|
aux i |
| 127 |
|
in |
| 128 |
|
let q = aux i in |
| 129 |
|
(* Format.fprintf ppf "}"; *) |
| 130 |
print_quoted_str ppf q |
print_quoted_str ppf q |
| 131 |
| _ -> () |
| q -> q |
| 132 |
and print_seq ppf = function |
and print_seq ppf = function |
| 133 |
| Pair (Char _, _) as s -> Format.fprintf ppf "'%a" print_str s |
| (Pair(Char _, _)|String_latin1 (_,_,_,_)|String_utf8 (_,_,_,_)) as s -> |
|
| Pair (x,y) -> Format.fprintf ppf "@[%a@]@ %a" print x print_seq y |
|
|
| String (i,j,s,y) -> |
|
| 134 |
Format.fprintf ppf "'"; |
Format.fprintf ppf "'"; |
| 135 |
for k = i to j - 1 do |
let q = print_quoted_str ppf s in |
| 136 |
Format.fprintf ppf "%s" (Char.escaped s.[k]) |
Format.fprintf ppf "'@ "; |
| 137 |
done; |
print_seq ppf q |
| 138 |
Format.fprintf ppf "' %a" print_seq y |
| Pair (x,y) -> |
| 139 |
|
Format.fprintf ppf "@[%a@]@ " print x; |
| 140 |
|
print_seq ppf y |
| 141 |
| _ -> () |
| _ -> () |
|
and print_str ppf = function |
|
|
| Pair (Char c,y) -> |
|
|
let c = Chars.Unichar.to_char c in |
|
|
Format.fprintf ppf "%s" (Char.escaped c); |
|
|
print_str ppf y |
|
|
| v -> |
|
|
Format.fprintf ppf "\' "; |
|
|
print_seq ppf v |
|
| 142 |
|
|
| 143 |
and print_xml ppf = function |
and print_xml ppf = function |
| 144 |
| Pair(Atom tag, Pair (Record attr,content)) -> |
| (Atom tag, Pair (Record attr,content)) -> |
| 145 |
Format.fprintf ppf "@[<hv2><%s%a>[@ %a@]]" |
Format.fprintf ppf "@[<hv2><%a%a>[@ %a@]]" |
| 146 |
(Types.AtomPool.value tag) |
Utf8.print (Atoms.value tag) |
| 147 |
print_record attr |
print_record (LabelMap.get attr) |
| 148 |
print_seq content |
print_seq content |
| 149 |
| _ -> assert false |
| _ -> assert false |
| 150 |
|
|
| 154 |
| f :: rem -> Format.fprintf ppf " %a;%a" print_field f print_record rem |
| f :: rem -> Format.fprintf ppf " %a;%a" print_field f print_record rem |
| 155 |
|
|
| 156 |
and print_field ppf (l,v) = |
and print_field ppf (l,v) = |
| 157 |
Format.fprintf ppf "%s=%a" (Types.LabelPool.value l) print v |
Format.fprintf ppf "%a=%a" U.print (LabelPool.value l) print v |
| 158 |
|
|
| 159 |
|
|
| 160 |
let normalize = function |
let normalize_string_latin1 i j s q = |
|
| String (i,j,s,q) -> |
|
| 161 |
if i = j then q else |
if i = j then q else |
| 162 |
Pair (Char (Chars.Unichar.from_char s.[i]), |
Pair (Char (Chars.mk_char (String.unsafe_get s i)), String_latin1 (succ i,j,s,q)) |
| 163 |
String (succ i,j,s,q)) |
|
| 164 |
|
let normalize_string_utf8 i j s q = |
| 165 |
|
if Utf8.equal_index i j then q |
| 166 |
|
else |
| 167 |
|
let (c,i) = Utf8.next s i in |
| 168 |
|
Pair (Char (Chars.mk_int c), String_utf8 (i,j,s,q)) |
| 169 |
|
|
| 170 |
|
let normalize = function |
| 171 |
|
| String_latin1 (i,j,s,q) -> normalize_string_latin1 i j s q |
| 172 |
|
| String_utf8 (i,j,s,q) -> normalize_string_utf8 i j s q |
| 173 |
| v -> assert false |
| v -> assert false |
| 174 |
|
|
| 175 |
|
let rec compare x y = |
| 176 |
|
if (x == y) then 0 |
| 177 |
|
else |
| 178 |
|
match (x,y) with |
| 179 |
|
| Pair (x1,x2), Pair (y1,y2) | Xml (x1,x2), Xml (y1,y2) -> |
| 180 |
|
let c = compare x1 y1 in if c <> 0 then c |
| 181 |
|
else compare x2 y2 |
| 182 |
|
| Record rx, Record ry -> LabelMap.compare compare rx ry |
| 183 |
|
| Atom x, Atom y -> Atoms.vcompare x y |
| 184 |
|
| Integer x, Integer y -> Intervals.vcompare x y |
| 185 |
|
| Char x, Char y -> Chars.vcompare x y |
| 186 |
|
| Abstraction (_,_), Abstraction (_,_) -> |
| 187 |
|
raise (CDuceExn (string_latin1 "comparing functional values")) |
| 188 |
|
| Absent,_ | _,Absent -> assert false |
| 189 |
|
| String_latin1 (ix,jx,sx,qx), String_latin1 (iy,jy,sy,qy) -> |
| 190 |
|
if (sx == sy) && (ix = iy) && (jx = jy) then compare qx qy |
| 191 |
|
else |
| 192 |
|
(* Note: we would like to compare first jx-ix and jy-iy, |
| 193 |
|
but this is not compatible with the equivalence of values *) |
| 194 |
|
let rec aux ix iy = |
| 195 |
|
if (ix = jx) then |
| 196 |
|
if (iy = jy) then compare qx qy |
| 197 |
|
else compare qx (normalize_string_latin1 iy jy sy qy) |
| 198 |
|
else |
| 199 |
|
if (iy = jy) then compare (normalize_string_latin1 ix jx sx qx) qy |
| 200 |
|
else |
| 201 |
|
let c1 = String.unsafe_get sx ix |
| 202 |
|
and c2 = String.unsafe_get sy iy in |
| 203 |
|
if c1 < c2 then -1 else |
| 204 |
|
if c1 > c2 then 1 else aux (ix + 1) (iy + 1) |
| 205 |
|
in |
| 206 |
|
aux ix iy |
| 207 |
|
| String_utf8 (ix,jx,sx,qx), String_utf8 (iy,jy,sy,qy) -> |
| 208 |
|
if (sx == sy) && (Utf8.equal_index ix iy) && (Utf8.equal_index jx jy) then compare qx qy |
| 209 |
|
else |
| 210 |
|
let rec aux ix iy = |
| 211 |
|
if (Utf8.equal_index ix jx) then |
| 212 |
|
if (Utf8.equal_index iy jy) then compare qx qy |
| 213 |
|
else compare qx (normalize_string_utf8 iy jy sy qy) |
| 214 |
|
else |
| 215 |
|
if (Utf8.equal_index iy jy) then compare (normalize_string_utf8 ix jx sx qx) qy |
| 216 |
|
else |
| 217 |
|
let (c1,ix) = Utf8.next sx ix in |
| 218 |
|
let (c2,iy) = Utf8.next sy iy in |
| 219 |
|
if c1 < c2 then -1 else |
| 220 |
|
if c1 > c2 then 1 else aux ix iy |
| 221 |
|
in |
| 222 |
|
aux ix iy |
| 223 |
|
| String_latin1 (i,j,s,q), _ -> compare (normalize_string_latin1 i j s q) y |
| 224 |
|
| _, String_latin1 (i,j,s,q) -> compare x (normalize_string_latin1 i j s q) |
| 225 |
|
| String_utf8 (i,j,s,q), _ -> compare (normalize_string_utf8 i j s q) y |
| 226 |
|
| _, String_utf8 (i,j,s,q) -> compare x (normalize_string_utf8 i j s q) |
| 227 |
|
| _,_ -> Obj.tag (Obj.repr x) - Obj.tag (Obj.repr y) |
| 228 |
|
(* TODO: rewrite this case *) |
| 229 |
|
|
| 230 |
|
|