| 1 |
abate |
233 |
open Ident
|
| 2 |
abate |
310 |
open Encodings
|
| 3 |
abate |
233 |
|
| 4 |
abate |
48 |
type t =
|
| 5 |
|
|
| Pair of t * t
|
| 6 |
abate |
405 |
| Xml of t * t * t
|
| 7 |
abate |
233 |
| Record of t label_map
|
| 8 |
abate |
222 |
| Atom of Atoms.v
|
| 9 |
|
|
| Integer of Intervals.v
|
| 10 |
|
|
| Char of Chars.v
|
| 11 |
abate |
69 |
| Abstraction of (Types.descr * Types.descr) list * (t -> t)
|
| 12 |
abate |
310 |
| String_latin1 of int * int * string * t
|
| 13 |
abate |
374 |
| String_utf8 of Utf8.uindex * Utf8.uindex * Utf8.t * t
|
| 14 |
abate |
229 |
| Absent
|
| 15 |
abate |
48 |
|
| 16 |
abate |
431 |
| Delayed of t ref
|
| 17 |
abate |
69 |
|
| 18 |
abate |
431 |
|
| 19 |
abate |
64 |
exception CDuceExn of t
|
| 20 |
|
|
|
| 21 |
abate |
66 |
let nil = Atom Sequence.nil_atom
|
| 22 |
abate |
310 |
let string_latin1 s = String_latin1 (0,String.length s, s, nil)
|
| 23 |
abate |
374 |
let string_utf8 s = String_utf8 (Utf8.start_index s,Utf8.end_index s, s, nil)
|
| 24 |
abate |
421 |
let vtrue = Atom (Atoms.mk_ascii "true")
|
| 25 |
|
|
let vfalse = Atom (Atoms.mk_ascii "false")
|
| 26 |
abate |
237 |
let vbool x = if x then vtrue else vfalse
|
| 27 |
abate |
66 |
|
| 28 |
abate |
501 |
let vrecord l =
|
| 29 |
|
|
let l = List.map (fun (l,v) -> LabelPool.mk (U.mk l), v) l in
|
| 30 |
|
|
Record (LabelMap.from_list (fun _ _ -> assert false) l)
|
| 31 |
|
|
|
| 32 |
|
|
let get_fields = function
|
| 33 |
|
|
| Record map ->
|
| 34 |
|
|
LabelMap.mapi_to_list
|
| 35 |
|
|
(fun k v -> Utf8.to_string (LabelPool.value k), v)
|
| 36 |
|
|
map
|
| 37 |
|
|
| _ -> assert false
|
| 38 |
|
|
|
| 39 |
abate |
368 |
let rec sequence = function
|
| 40 |
|
|
| [] -> nil
|
| 41 |
|
|
| h::t -> Pair (h, sequence t)
|
| 42 |
abate |
237 |
|
| 43 |
abate |
421 |
let rec concat l1 l2 = match l1 with
|
| 44 |
|
|
| Pair (x,y) -> Pair (x, concat y l2)
|
| 45 |
|
|
| String_latin1 (s,i,j,q) -> String_latin1 (s,i,j, concat q l2)
|
| 46 |
|
|
| String_utf8 (s,i,j,q) -> String_utf8 (s,i,j, concat q l2)
|
| 47 |
|
|
| q -> l2
|
| 48 |
|
|
|
| 49 |
|
|
let rec flatten = function
|
| 50 |
|
|
| Pair (x,y) -> concat x (flatten y)
|
| 51 |
|
|
| q -> q
|
| 52 |
|
|
|
| 53 |
abate |
70 |
let const = function
|
| 54 |
|
|
| Types.Integer i -> Integer i
|
| 55 |
|
|
| Types.Atom a -> Atom a
|
| 56 |
|
|
| Types.Char c -> Char c
|
| 57 |
|
|
|
| 58 |
abate |
310 |
let buf = Buffer.create 100
|
| 59 |
abate |
71 |
|
| 60 |
abate |
310 |
let rec add_buf_utf8_to_latin1 src i j =
|
| 61 |
|
|
if Utf8.equal_index i j then ()
|
| 62 |
|
|
else
|
| 63 |
|
|
let (c,i) = Utf8.next src i in
|
| 64 |
|
|
if (c > 255) then failwith "get_string_latin1";
|
| 65 |
|
|
Buffer.add_char buf (Char.chr c);
|
| 66 |
|
|
add_buf_utf8_to_latin1 src i j
|
| 67 |
|
|
|
| 68 |
abate |
386 |
let rec add_buf_latin1_to_utf8 src i j =
|
| 69 |
|
|
for k = i to j - 1 do
|
| 70 |
|
|
Utf8.store buf (Char.code src.[k])
|
| 71 |
abate |
310 |
done
|
| 72 |
|
|
|
| 73 |
|
|
let get_string_latin1 e =
|
| 74 |
|
|
let rec aux = function
|
| 75 |
|
|
| Pair (Char x,y) -> Buffer.add_char buf (Chars.to_char x); aux y
|
| 76 |
|
|
| String_latin1 (i,j,src,y) -> Buffer.add_substring buf src i (j - i); aux y
|
| 77 |
|
|
| String_utf8 (i,j,src,y) -> add_buf_utf8_to_latin1 src i j; aux y
|
| 78 |
|
|
| _ -> () in
|
| 79 |
|
|
aux e;
|
| 80 |
|
|
let s = Buffer.contents buf in
|
| 81 |
|
|
Buffer.clear buf;
|
| 82 |
|
|
s
|
| 83 |
|
|
|
| 84 |
|
|
let get_string_utf8 e =
|
| 85 |
|
|
let rec aux = function
|
| 86 |
|
|
| Pair (Char x,y) -> Utf8.store buf (Chars.to_int x); aux y
|
| 87 |
abate |
386 |
| String_latin1 (i,j,src,y) -> add_buf_latin1_to_utf8 src i j; aux y
|
| 88 |
abate |
310 |
| String_utf8 (i,j,src,y) -> Utf8.copy buf src i j; aux y
|
| 89 |
abate |
374 |
| q -> q in
|
| 90 |
|
|
let q = aux e in
|
| 91 |
abate |
310 |
let s = Buffer.contents buf in
|
| 92 |
|
|
Buffer.clear buf;
|
| 93 |
abate |
374 |
(Utf8.mk s, q)
|
| 94 |
abate |
310 |
|
| 95 |
abate |
501 |
let get_int = function
|
| 96 |
|
|
| Integer i when Intervals.is_int i -> Intervals.get_int i
|
| 97 |
|
|
| _ -> raise (Invalid_argument "Value.get_int")
|
| 98 |
abate |
310 |
|
| 99 |
abate |
52 |
let rec is_seq = function
|
| 100 |
|
|
| Pair (_, y) when is_seq y -> true
|
| 101 |
|
|
| Atom a when a = Sequence.nil_atom -> true
|
| 102 |
abate |
310 |
| String_latin1 (_,_,_,y) | String_utf8 (_,_,_,y) when is_seq y -> true
|
| 103 |
abate |
52 |
| _ -> false
|
| 104 |
abate |
48 |
|
| 105 |
abate |
52 |
let is_xml = function
|
| 106 |
|
|
| Pair (Atom _, Pair (Record _, s)) when is_seq s -> true
|
| 107 |
|
|
| _ -> false
|
| 108 |
|
|
|
| 109 |
|
|
let rec is_str = function
|
| 110 |
abate |
76 |
| Pair (Char _, y) -> is_str y
|
| 111 |
abate |
52 |
| Atom a when a = Sequence.nil_atom -> true
|
| 112 |
abate |
310 |
| String_latin1 (_,_,_,q) | String_utf8(_,_,_,q) -> is_str q
|
| 113 |
abate |
52 |
| _ -> false
|
| 114 |
|
|
|
| 115 |
|
|
let rec print ppf v =
|
| 116 |
abate |
374 |
if is_str v then
|
| 117 |
|
|
(Format.fprintf ppf "\"";
|
| 118 |
|
|
print_quoted_str ppf v;
|
| 119 |
|
|
Format.fprintf ppf "\"")
|
| 120 |
|
|
else if is_seq v then Format.fprintf ppf "[ @[<hv>%a@]]" print_seq v
|
| 121 |
|
|
else match v with
|
| 122 |
abate |
52 |
| Pair (x,y) -> Format.fprintf ppf "(%a,%a)" print x print y
|
| 123 |
abate |
405 |
| Xml (x,y,z) -> print_xml ppf x y z
|
| 124 |
abate |
233 |
| Record l -> Format.fprintf ppf "{%a }" print_record (LabelMap.get l)
|
| 125 |
abate |
222 |
| Atom a -> Atoms.print_v ppf a
|
| 126 |
|
|
| Integer i -> Intervals.print_v ppf i
|
| 127 |
|
|
| Char c -> Chars.print_v ppf c
|
| 128 |
abate |
69 |
| Abstraction _ -> Format.fprintf ppf "<fun>"
|
| 129 |
abate |
310 |
| String_latin1 (i,j,s,q) ->
|
| 130 |
|
|
Format.fprintf ppf "<string_latin1:%i-%i,%S,%a>" i j s print q
|
| 131 |
|
|
| String_utf8 (i,j,s,q) ->
|
| 132 |
|
|
Format.fprintf ppf "<string_utf8:%i-%i,%S,%a>"
|
| 133 |
|
|
(Utf8.get_idx i) (Utf8.get_idx j) (Utf8.get_str s) print q
|
| 134 |
abate |
229 |
| Absent ->
|
| 135 |
|
|
Format.fprintf ppf "<[absent]>"
|
| 136 |
abate |
431 |
| Delayed x ->
|
| 137 |
|
|
Format.fprintf ppf "<[delayed]>"
|
| 138 |
abate |
52 |
and print_quoted_str ppf = function
|
| 139 |
abate |
76 |
| Pair (Char c, q) ->
|
| 140 |
abate |
222 |
Chars.print_v_in_string ppf c;
|
| 141 |
abate |
76 |
print_quoted_str ppf q
|
| 142 |
abate |
310 |
| String_latin1 (i,j,s, q) ->
|
| 143 |
abate |
312 |
for k = i to j - 1 do
|
| 144 |
abate |
311 |
Chars.print_v_in_string ppf (Chars.mk_char s.[k])
|
| 145 |
|
|
done;
|
| 146 |
abate |
76 |
print_quoted_str ppf q
|
| 147 |
abate |
310 |
| String_utf8 (i,j,s, q) ->
|
| 148 |
abate |
374 |
(* Format.fprintf ppf "UTF8:{"; *)
|
| 149 |
|
|
let rec aux i =
|
| 150 |
|
|
if Utf8.equal_index i j then q
|
| 151 |
|
|
else
|
| 152 |
|
|
let (c,i) =Utf8.next s i in
|
| 153 |
|
|
Chars.print_v_in_string ppf (Chars.mk_int c);
|
| 154 |
|
|
aux i
|
| 155 |
|
|
in
|
| 156 |
|
|
let q = aux i in
|
| 157 |
|
|
(* Format.fprintf ppf "}"; *)
|
| 158 |
|
|
print_quoted_str ppf q
|
| 159 |
|
|
| q -> q
|
| 160 |
abate |
52 |
and print_seq ppf = function
|
| 161 |
abate |
374 |
| (Pair(Char _, _)|String_latin1 (_,_,_,_)|String_utf8 (_,_,_,_)) as s ->
|
| 162 |
abate |
76 |
Format.fprintf ppf "'";
|
| 163 |
abate |
374 |
let q = print_quoted_str ppf s in
|
| 164 |
|
|
Format.fprintf ppf "'@ ";
|
| 165 |
|
|
print_seq ppf q
|
| 166 |
|
|
| Pair (x,y) ->
|
| 167 |
|
|
Format.fprintf ppf "@[%a@]@ " print x;
|
| 168 |
|
|
print_seq ppf y
|
| 169 |
abate |
52 |
| _ -> ()
|
| 170 |
|
|
|
| 171 |
abate |
405 |
and print_xml ppf tag attr content =
|
| 172 |
abate |
501 |
if is_seq content then
|
| 173 |
|
|
Format.fprintf ppf "@[<hv2><%a%a>[@ %a@]]"
|
| 174 |
|
|
print_tag tag
|
| 175 |
|
|
print_attr attr
|
| 176 |
|
|
print_seq content
|
| 177 |
|
|
else
|
| 178 |
|
|
Format.fprintf ppf "@[<hv2><%a%a>@ %a@]"
|
| 179 |
|
|
print_tag tag
|
| 180 |
|
|
print_attr attr
|
| 181 |
|
|
print content
|
| 182 |
abate |
392 |
and print_tag ppf = function
|
| 183 |
|
|
| Atom tag -> Utf8.print ppf (Atoms.value tag)
|
| 184 |
|
|
| tag -> Format.fprintf ppf "(%a)" print tag
|
| 185 |
|
|
and print_attr ppf = function
|
| 186 |
|
|
| Record attr -> print_record ppf (LabelMap.get attr)
|
| 187 |
|
|
| attr -> Format.fprintf ppf "(%a)" print attr
|
| 188 |
abate |
52 |
|
| 189 |
abate |
48 |
and print_record ppf = function
|
| 190 |
|
|
| [] -> ()
|
| 191 |
abate |
52 |
| [f] -> Format.fprintf ppf " %a" print_field f
|
| 192 |
|
|
| f :: rem -> Format.fprintf ppf " %a;%a" print_field f print_record rem
|
| 193 |
abate |
48 |
|
| 194 |
|
|
and print_field ppf (l,v) =
|
| 195 |
abate |
374 |
Format.fprintf ppf "%a=%a" U.print (LabelPool.value l) print v
|
| 196 |
abate |
48 |
|
| 197 |
abate |
310 |
let normalize_string_latin1 i j s q =
|
| 198 |
abate |
237 |
if i = j then q else
|
| 199 |
abate |
310 |
Pair (Char (Chars.mk_char (String.unsafe_get s i)), String_latin1 (succ i,j,s,q))
|
| 200 |
abate |
237 |
|
| 201 |
abate |
310 |
let normalize_string_utf8 i j s q =
|
| 202 |
|
|
if Utf8.equal_index i j then q
|
| 203 |
|
|
else
|
| 204 |
|
|
let (c,i) = Utf8.next s i in
|
| 205 |
|
|
Pair (Char (Chars.mk_int c), String_utf8 (i,j,s,q))
|
| 206 |
|
|
|
| 207 |
abate |
70 |
let normalize = function
|
| 208 |
abate |
310 |
| String_latin1 (i,j,s,q) -> normalize_string_latin1 i j s q
|
| 209 |
|
|
| String_utf8 (i,j,s,q) -> normalize_string_utf8 i j s q
|
| 210 |
abate |
71 |
| v -> assert false
|
| 211 |
abate |
237 |
|
| 212 |
|
|
let rec compare x y =
|
| 213 |
|
|
if (x == y) then 0
|
| 214 |
|
|
else
|
| 215 |
|
|
match (x,y) with
|
| 216 |
abate |
405 |
| Pair (x1,x2), Pair (y1,y2) ->
|
| 217 |
abate |
237 |
let c = compare x1 y1 in if c <> 0 then c
|
| 218 |
|
|
else compare x2 y2
|
| 219 |
abate |
405 |
| Xml (x1,x2,x3), Xml (y1,y2,y3) ->
|
| 220 |
|
|
let c = compare x1 y1 in if c <> 0 then c
|
| 221 |
abate |
512 |
else let c = compare x2 y2 in if c <> 0 then c
|
| 222 |
abate |
405 |
else compare x3 y3
|
| 223 |
abate |
237 |
| Record rx, Record ry -> LabelMap.compare compare rx ry
|
| 224 |
|
|
| Atom x, Atom y -> Atoms.vcompare x y
|
| 225 |
|
|
| Integer x, Integer y -> Intervals.vcompare x y
|
| 226 |
|
|
| Char x, Char y -> Chars.vcompare x y
|
| 227 |
abate |
424 |
| Abstraction (_,_), _
|
| 228 |
|
|
| _, Abstraction (_,_) ->
|
| 229 |
abate |
310 |
raise (CDuceExn (string_latin1 "comparing functional values"))
|
| 230 |
abate |
431 |
| Absent,_ | _,Absent
|
| 231 |
|
|
| Delayed _, _ | _, Delayed _ -> assert false
|
| 232 |
abate |
310 |
| String_latin1 (ix,jx,sx,qx), String_latin1 (iy,jy,sy,qy) ->
|
| 233 |
abate |
237 |
if (sx == sy) && (ix = iy) && (jx = jy) then compare qx qy
|
| 234 |
|
|
else
|
| 235 |
|
|
(* Note: we would like to compare first jx-ix and jy-iy,
|
| 236 |
|
|
but this is not compatible with the equivalence of values *)
|
| 237 |
|
|
let rec aux ix iy =
|
| 238 |
|
|
if (ix = jx) then
|
| 239 |
|
|
if (iy = jy) then compare qx qy
|
| 240 |
abate |
310 |
else compare qx (normalize_string_latin1 iy jy sy qy)
|
| 241 |
abate |
237 |
else
|
| 242 |
abate |
310 |
if (iy = jy) then compare (normalize_string_latin1 ix jx sx qx) qy
|
| 243 |
abate |
237 |
else
|
| 244 |
|
|
let c1 = String.unsafe_get sx ix
|
| 245 |
|
|
and c2 = String.unsafe_get sy iy in
|
| 246 |
|
|
if c1 < c2 then -1 else
|
| 247 |
|
|
if c1 > c2 then 1 else aux (ix + 1) (iy + 1)
|
| 248 |
|
|
in
|
| 249 |
|
|
aux ix iy
|
| 250 |
abate |
310 |
| String_utf8 (ix,jx,sx,qx), String_utf8 (iy,jy,sy,qy) ->
|
| 251 |
|
|
if (sx == sy) && (Utf8.equal_index ix iy) && (Utf8.equal_index jx jy) then compare qx qy
|
| 252 |
|
|
else
|
| 253 |
|
|
let rec aux ix iy =
|
| 254 |
|
|
if (Utf8.equal_index ix jx) then
|
| 255 |
|
|
if (Utf8.equal_index iy jy) then compare qx qy
|
| 256 |
|
|
else compare qx (normalize_string_utf8 iy jy sy qy)
|
| 257 |
|
|
else
|
| 258 |
|
|
if (Utf8.equal_index iy jy) then compare (normalize_string_utf8 ix jx sx qx) qy
|
| 259 |
|
|
else
|
| 260 |
|
|
let (c1,ix) = Utf8.next sx ix in
|
| 261 |
|
|
let (c2,iy) = Utf8.next sy iy in
|
| 262 |
|
|
if c1 < c2 then -1 else
|
| 263 |
|
|
if c1 > c2 then 1 else aux ix iy
|
| 264 |
|
|
in
|
| 265 |
|
|
aux ix iy
|
| 266 |
|
|
| String_latin1 (i,j,s,q), _ -> compare (normalize_string_latin1 i j s q) y
|
| 267 |
|
|
| _, String_latin1 (i,j,s,q) -> compare x (normalize_string_latin1 i j s q)
|
| 268 |
|
|
| String_utf8 (i,j,s,q), _ -> compare (normalize_string_utf8 i j s q) y
|
| 269 |
|
|
| _, String_utf8 (i,j,s,q) -> compare x (normalize_string_utf8 i j s q)
|
| 270 |
abate |
237 |
|
| 271 |
abate |
424 |
| Pair (_,_), _ -> -1 | _, Pair(_,_) -> 1
|
| 272 |
|
|
| Xml (_,_,_),_ -> -1 | _, Xml(_,_,_) -> 1
|
| 273 |
|
|
| Record _,_ -> -1 | _, Record _ -> 1
|
| 274 |
|
|
| Atom _,_ -> -1 | _, Atom _ -> 1
|
| 275 |
|
|
| Integer _,_ -> -1 | _, Integer _ -> 1
|
| 276 |
abate |
237 |
|
| 277 |
abate |
424 |
|
| 278 |
abate |
506 |
(* (* BUGGY *)
|
| 279 |
|
|
let explode_rev s =
|
| 280 |
|
|
let rec aux acc = function
|
| 281 |
|
|
| v when v = nil -> acc
|
| 282 |
|
|
| Pair (v, seq) -> aux (v::acc) seq
|
| 283 |
|
|
| v -> [v]
|
| 284 |
|
|
in
|
| 285 |
|
|
aux [] s
|
| 286 |
|
|
*)
|
| 287 |
abate |
424 |
|
| 288 |
abate |
506 |
let iter_xml pcdata_callback other_callback =
|
| 289 |
|
|
let rec aux = function
|
| 290 |
|
|
| v when compare v nil = 0 -> ()
|
| 291 |
abate |
513 |
| Pair (Char c, tl) ->
|
| 292 |
|
|
pcdata_callback (U.mk_char (Chars.to_int c));
|
| 293 |
abate |
506 |
aux tl
|
| 294 |
abate |
513 |
| String_latin1 (i,j,s,tl) ->
|
| 295 |
|
|
pcdata_callback (U.mk_latin1 (String.sub s i j));
|
| 296 |
abate |
506 |
aux tl
|
| 297 |
abate |
513 |
| String_utf8 (i,j,s,tl) ->
|
| 298 |
abate |
506 |
pcdata_callback (U.mk (U.get_substr s i j));
|
| 299 |
|
|
aux tl
|
| 300 |
|
|
| Pair (hd, tl) ->
|
| 301 |
|
|
other_callback hd;
|
| 302 |
|
|
aux tl
|
| 303 |
abate |
513 |
| v -> raise (Invalid_argument "Value.iter_xml")
|
| 304 |
abate |
506 |
in
|
| 305 |
|
|
function
|
| 306 |
|
|
| Xml (_,_,cont) -> aux cont
|
| 307 |
|
|
| _ -> raise (Invalid_argument "Value.iter_xml")
|
| 308 |
|
|
;;
|
| 309 |
abate |
424 |
|