| 1 |
|
| 2 |
open Printf
|
| 3 |
|
| 4 |
open Encodings
|
| 5 |
open Encodings.Utf8.Pcre
|
| 6 |
open Schema_common
|
| 7 |
open Schema_types
|
| 8 |
|
| 9 |
(* TODO dates: boundary checks (e.g. 95/26/2003) *)
|
| 10 |
(* TODO a lot of almost cut-and-paste code, expecially in gFoo types validation
|
| 11 |
*)
|
| 12 |
|
| 13 |
(** {2 Aux/Misc stuff} *)
|
| 14 |
|
| 15 |
let add_xsd_prefix s = Schema_xml.add_xsd_prefix (Utf8.mk s)
|
| 16 |
|
| 17 |
let unsupported =
|
| 18 |
List.map (fun s -> add_xsd_prefix s)
|
| 19 |
[ "decimal"; "float"; "double"; "NOTATION"; "QName" ]
|
| 20 |
|
| 21 |
let is_empty s = Utf8.equal s (Utf8.mk "")
|
| 22 |
|
| 23 |
let zero = Intervals.V.zero
|
| 24 |
let one = (Intervals.V.succ Intervals.V.zero)
|
| 25 |
let minus_one = (Intervals.V.pred Intervals.V.zero)
|
| 26 |
let long_l = (Intervals.V.mk "-9223372036854775808")
|
| 27 |
let long_r = (Intervals.V.mk "9223372036854775807")
|
| 28 |
let int_l = (Intervals.V.mk "-2147483648")
|
| 29 |
let int_r = (Intervals.V.mk "2147483647")
|
| 30 |
let short_l = (Intervals.V.mk "-32768")
|
| 31 |
let short_r = (Intervals.V.mk "32767")
|
| 32 |
let byte_l = (Intervals.V.mk "-128")
|
| 33 |
let byte_r = (Intervals.V.mk "127")
|
| 34 |
|
| 35 |
let xml_S_RE = pcre_regexp "[ \\t\\r\\n]+"
|
| 36 |
(* split a string at XML recommendation "S" production boundaries *)
|
| 37 |
let split_xml_S s = pcre_split ~rex:xml_S_RE s
|
| 38 |
let norm_RE = pcre_regexp "[\\t\\r\\n]"
|
| 39 |
|
| 40 |
let char_of_hex =
|
| 41 |
let int_of_hex_char = function
|
| 42 |
| '0' -> 0 | '1' -> 1 | '2' -> 2 | '3' -> 3 | '4' -> 4 | '5' -> 5 | '6' -> 6
|
| 43 |
| '7' -> 7 | '8' -> 8 | '9' -> 9 | 'a' | 'A' -> 10 | 'b' | 'B' -> 11
|
| 44 |
| 'c' | 'C' -> 12 | 'd' | 'D' -> 13 | 'e' | 'E' -> 14 | 'f' | 'F' -> 15
|
| 45 |
| _ -> assert false
|
| 46 |
in
|
| 47 |
(* most significative, least significative *)
|
| 48 |
fun ms ls -> Char.unsafe_chr (int_of_hex_char ms * 16 + int_of_hex_char ls)
|
| 49 |
|
| 50 |
let strip_parens s = Pcre.replace ~pat:"[()]" s
|
| 51 |
let add_limits s = "^" ^ s ^ "$"
|
| 52 |
|
| 53 |
exception Schema_builtin_error of Utf8.t
|
| 54 |
let simple_type_error name = raise (Schema_builtin_error (add_xsd_prefix name))
|
| 55 |
|
| 56 |
let qualify s = (Ns.empty, Encodings.Utf8.mk s)
|
| 57 |
|
| 58 |
(* regular expressions used to validate built-in types *)
|
| 59 |
|
| 60 |
let timezone_RE_raw = "(Z)|(([+-])?(\\d{2}):(\\d{2}))"
|
| 61 |
let date_RE_raw = "(\\d{4,})-(\\d{2})-(\\d{2})"
|
| 62 |
let time_RE_raw = "(\\d{2}):(\\d{2}):(\\d{2})"
|
| 63 |
|
| 64 |
let gYearMonth_RE_raw = sprintf "(-)?(\\d{4,})-(\\d{2})(%s)?" timezone_RE_raw
|
| 65 |
let gYear_RE_raw = sprintf "(-)?(\\d{4,})(%s)?" timezone_RE_raw
|
| 66 |
let gMonthDay_RE_raw = sprintf "--(\\d{2})-(\\d{2})(%s)?" timezone_RE_raw
|
| 67 |
let gDay_RE_raw = sprintf "---(\\d{2})(%s)?" timezone_RE_raw
|
| 68 |
let gMonth_RE_raw = "--(\\d{2})--(%s)?"
|
| 69 |
|
| 70 |
(** {2 CDuce types} *)
|
| 71 |
|
| 72 |
let positive_field = false, qualify "positive", Builtin_defs.bool
|
| 73 |
let year_field = false, qualify "year", Builtin_defs.int
|
| 74 |
let month_field = false, qualify "month", Builtin_defs.int
|
| 75 |
let day_field = false, qualify "day", Builtin_defs.int
|
| 76 |
let hour_field = false, qualify "hour", Builtin_defs.int
|
| 77 |
let minute_field = false, qualify "minute", Builtin_defs.int
|
| 78 |
let second_field = false, qualify "second", Builtin_defs.int
|
| 79 |
(* TODO this should be a decimal *)
|
| 80 |
let time_type_fields = [ hour_field; minute_field; second_field ]
|
| 81 |
let date_type_fields = [ year_field; month_field; day_field ]
|
| 82 |
|
| 83 |
let time_kind_field = false, qualify "time_kind", Builtin_defs.time_kind
|
| 84 |
let time_kind kind = (qualify "time_kind", Value.Atom (Atoms.V.mk_ascii kind))
|
| 85 |
|
| 86 |
(* TODO the constraint that at least one part should be present isn't easily
|
| 87 |
expressible with CDuce types *)
|
| 88 |
let duration_type = Types.rec_of_list' [
|
| 89 |
time_kind_field;
|
| 90 |
positive_field;
|
| 91 |
true, qualify "year", Builtin_defs.int;
|
| 92 |
true, qualify "month", Builtin_defs.int;
|
| 93 |
true, qualify "day", Builtin_defs.int;
|
| 94 |
true, qualify "hour", Builtin_defs.int;
|
| 95 |
true, qualify "minute", Builtin_defs.int;
|
| 96 |
true, qualify "second", Builtin_defs.int; (* TODO this should be a decimal *)
|
| 97 |
]
|
| 98 |
let timezone_type = Types.rec_of_list' [
|
| 99 |
positive_field;
|
| 100 |
hour_field; minute_field
|
| 101 |
]
|
| 102 |
let timezone_type_fields = [ true, qualify "timezone", timezone_type ]
|
| 103 |
let time_type = Types.rec_of_list' (time_kind_field :: time_type_fields @ timezone_type_fields)
|
| 104 |
let date_type = Types.rec_of_list' (time_kind_field :: positive_field :: date_type_fields)
|
| 105 |
let dateTime_type =
|
| 106 |
Types.rec_of_list' (time_kind_field :: positive_field ::
|
| 107 |
(date_type_fields @ time_type_fields @ timezone_type_fields))
|
| 108 |
let gYearMonth_type = Types.rec_of_list' [
|
| 109 |
positive_field; time_kind_field; year_field; month_field
|
| 110 |
]
|
| 111 |
let gYear_type = Types.rec_of_list' [ time_kind_field; positive_field; year_field ]
|
| 112 |
let gMonthDay_type = Types.rec_of_list' [ time_kind_field; month_field; day_field ]
|
| 113 |
let gDay_type = Types.rec_of_list' [ time_kind_field; day_field ]
|
| 114 |
let gMonth_type = Types.rec_of_list' [ time_kind_field; month_field ]
|
| 115 |
|
| 116 |
let nonPositiveInteger_type = Builtin_defs.non_pos_int
|
| 117 |
let negativeInteger_type = Builtin_defs.neg_int
|
| 118 |
let nonNegativeInteger_type = Builtin_defs.non_neg_int
|
| 119 |
let positiveInteger_type = Builtin_defs.pos_int
|
| 120 |
let long_type = Builtin_defs.long_int
|
| 121 |
let int_type = Builtin_defs.int_int
|
| 122 |
let short_type = Builtin_defs.short_int
|
| 123 |
let byte_type = Builtin_defs.byte_int
|
| 124 |
|
| 125 |
let string_list_type = Sequence.star Builtin_defs.string
|
| 126 |
|
| 127 |
(** {2 Validation functions (string -> Value.t)} *)
|
| 128 |
|
| 129 |
let parse_sign s =
|
| 130 |
if Utf8.equal s (Utf8.mk "+") || Utf8.equal s (Utf8.mk "") then
|
| 131 |
Value.vtrue
|
| 132 |
else
|
| 133 |
Value.vfalse
|
| 134 |
|
| 135 |
let validate_integer s =
|
| 136 |
try
|
| 137 |
Value.Integer (Intervals.V.mk (Utf8.get_str s))
|
| 138 |
with Failure _ -> simple_type_error "integer"
|
| 139 |
|
| 140 |
let strip_decimal_RE = Pcre.regexp "\\..*$"
|
| 141 |
|
| 142 |
let parse_date =
|
| 143 |
let rex = Pcre.regexp (add_limits date_RE_raw) in
|
| 144 |
fun s ->
|
| 145 |
let abort () = simple_type_error "date" in
|
| 146 |
let subs = try pcre_extract ~rex s with Not_found -> abort () in
|
| 147 |
[ qualify "year", validate_integer subs.(1);
|
| 148 |
qualify "month", validate_integer subs.(2);
|
| 149 |
qualify "day", validate_integer subs.(3) ]
|
| 150 |
|
| 151 |
let parse_time =
|
| 152 |
let rex = Pcre.regexp (add_limits time_RE_raw) in
|
| 153 |
fun s ->
|
| 154 |
let abort () = simple_type_error "time" in
|
| 155 |
let subs = try pcre_extract ~rex s with Not_found -> abort () in
|
| 156 |
[ qualify "hour", validate_integer subs.(1);
|
| 157 |
qualify "minute", validate_integer subs.(2);
|
| 158 |
qualify "second", validate_integer subs.(3) ]
|
| 159 |
|
| 160 |
let parse_timezone =
|
| 161 |
let rex = Pcre.regexp (add_limits timezone_RE_raw) in
|
| 162 |
fun s ->
|
| 163 |
let abort () = simple_type_error "timezone" in
|
| 164 |
let subs = try pcre_extract ~rex s with Not_found -> abort () in
|
| 165 |
if Utf8.equal subs.(1) (Utf8.mk "Z") then
|
| 166 |
[qualify "positive", Value.vtrue;
|
| 167 |
qualify "hour", validate_integer (Utf8.mk "0");
|
| 168 |
qualify "minute", validate_integer (Utf8.mk "0")]
|
| 169 |
else
|
| 170 |
[qualify "positive", parse_sign subs.(3);
|
| 171 |
qualify "hour", validate_integer subs.(4);
|
| 172 |
qualify "minute", validate_integer subs.(5)]
|
| 173 |
(* parse a timezone from a string, if it's empty return the empty list,
|
| 174 |
otherwise return a list containing a pair <"timezone", timezone value> *)
|
| 175 |
let parse_timezone' s =
|
| 176 |
if is_empty s then
|
| 177 |
[]
|
| 178 |
else
|
| 179 |
[ qualify "timezone", Value.vrecord (parse_timezone s) ]
|
| 180 |
|
| 181 |
let validate_string s = Value.string_utf8 s
|
| 182 |
let validate_normalizedString s =
|
| 183 |
validate_string (normalize_white_space `Replace s)
|
| 184 |
let validate_token s =
|
| 185 |
validate_string (normalize_white_space `Collapse s)
|
| 186 |
let validate_token_list s =
|
| 187 |
Value.sequence (List.map validate_token (split_xml_S s))
|
| 188 |
|
| 189 |
let validate_interval interval type_name s =
|
| 190 |
let integer =
|
| 191 |
try
|
| 192 |
Intervals.V.mk (Utf8.get_str s)
|
| 193 |
with Failure _ -> simple_type_error type_name
|
| 194 |
in
|
| 195 |
if Intervals.contains integer interval then
|
| 196 |
Value.Integer integer
|
| 197 |
else
|
| 198 |
simple_type_error type_name
|
| 199 |
let validate_nonPositiveInteger =
|
| 200 |
validate_interval (Intervals.left Intervals.V.zero) "nonPositiveInteger"
|
| 201 |
let validate_negativeInteger =
|
| 202 |
validate_interval (Intervals.left Intervals.V.minus_one) "negativeInteger"
|
| 203 |
let validate_nonNegativeInteger =
|
| 204 |
validate_interval (Intervals.right Intervals.V.zero) "nonNegativeInteger"
|
| 205 |
let validate_positiveInteger =
|
| 206 |
validate_interval (Intervals.right Intervals.V.one) "positiveInteger"
|
| 207 |
let validate_long = validate_interval (Intervals.bounded long_l long_r) "long"
|
| 208 |
let validate_int = validate_interval (Intervals.bounded int_l int_r) "int"
|
| 209 |
let validate_short =
|
| 210 |
validate_interval (Intervals.bounded short_l short_r) "short"
|
| 211 |
let validate_byte = validate_interval (Intervals.bounded byte_l byte_r) "byte"
|
| 212 |
|
| 213 |
let validate_bool s =
|
| 214 |
if Utf8.equal s (Utf8.mk "true") || Utf8.equal s (Utf8.mk "1") then
|
| 215 |
Value.vtrue
|
| 216 |
else if Utf8.equal s (Utf8.mk "false") || Utf8.equal s (Utf8.mk "0") then
|
| 217 |
Value.vfalse
|
| 218 |
else
|
| 219 |
simple_type_error "boolean"
|
| 220 |
|
| 221 |
let validate_duration =
|
| 222 |
let rex = pcre_regexp
|
| 223 |
"^([+-])?P((\\d+)Y)?((\\d+)M)?((\\d+)D)?(T((\\d+)H)?((\\d+)M)?((\\d+)S)?)?$"
|
| 224 |
in
|
| 225 |
fun s ->
|
| 226 |
let abort () = simple_type_error "duration" in
|
| 227 |
let subs = try pcre_extract ~rex s with Not_found -> abort () in
|
| 228 |
try
|
| 229 |
let fields =
|
| 230 |
time_kind "duration" ::
|
| 231 |
[qualify "positive", parse_sign subs.(1) ] @
|
| 232 |
(if is_empty subs.(3) then []
|
| 233 |
else [qualify "year", validate_integer subs.(3)]) @
|
| 234 |
(if is_empty subs.(5) then []
|
| 235 |
else [qualify "month", validate_integer subs.(5)]) @
|
| 236 |
(if is_empty subs.(7) then []
|
| 237 |
else [qualify "day", validate_integer subs.(7)]) @
|
| 238 |
(if is_empty subs.(10) then []
|
| 239 |
else [qualify "hour", validate_integer subs.(10)]) @
|
| 240 |
(if is_empty subs.(12) then []
|
| 241 |
else [qualify "minute", validate_integer subs.(12)]) @
|
| 242 |
(if is_empty subs.(14) then []
|
| 243 |
else [qualify "second", validate_integer subs.(14)])
|
| 244 |
in
|
| 245 |
Value.vrecord fields
|
| 246 |
with Schema_builtin_error _ -> abort ()
|
| 247 |
|
| 248 |
let validate_dateTime =
|
| 249 |
let rex = Pcre.regexp (sprintf "^([+-])?(%s)T(%s)(%s)?$"
|
| 250 |
(strip_parens date_RE_raw) (strip_parens time_RE_raw)
|
| 251 |
(strip_parens timezone_RE_raw))
|
| 252 |
in
|
| 253 |
fun s ->
|
| 254 |
let abort () = simple_type_error "dateTime" in
|
| 255 |
let subs = try pcre_extract ~rex s with Not_found -> abort () in
|
| 256 |
try
|
| 257 |
let fields =
|
| 258 |
time_kind "dateTime" ::
|
| 259 |
[ qualify "positive", parse_sign subs.(1) ] @
|
| 260 |
parse_date subs.(2) @
|
| 261 |
parse_time subs.(3) @
|
| 262 |
parse_timezone' subs.(4)
|
| 263 |
in
|
| 264 |
Value.vrecord fields
|
| 265 |
with Schema_builtin_error _ -> abort ()
|
| 266 |
|
| 267 |
let validate_gYearMonth =
|
| 268 |
let rex = Pcre.regexp (add_limits gYearMonth_RE_raw) in
|
| 269 |
fun s ->
|
| 270 |
let abort () = simple_type_error "gYearMonth" in
|
| 271 |
let subs = try pcre_extract ~rex s with Not_found -> abort () in
|
| 272 |
try
|
| 273 |
let fields = [
|
| 274 |
time_kind "gYearMonth";
|
| 275 |
qualify "positive", parse_sign subs.(1);
|
| 276 |
qualify "year", validate_integer subs.(2);
|
| 277 |
qualify "month", validate_integer subs.(3)
|
| 278 |
] @ parse_timezone' subs.(4)
|
| 279 |
in
|
| 280 |
Value.vrecord fields
|
| 281 |
with Schema_builtin_error _ -> abort ()
|
| 282 |
|
| 283 |
let validate_gYear =
|
| 284 |
let rex = Pcre.regexp (add_limits gYear_RE_raw) in
|
| 285 |
fun s ->
|
| 286 |
let abort () = simple_type_error "gYear" in
|
| 287 |
let subs = try pcre_extract ~rex s with Not_found -> abort () in
|
| 288 |
try
|
| 289 |
let fields = [
|
| 290 |
time_kind "gYear";
|
| 291 |
qualify "positive", parse_sign subs.(1);
|
| 292 |
qualify "year", validate_integer subs.(2);
|
| 293 |
] @ parse_timezone' subs.(3)
|
| 294 |
in
|
| 295 |
Value.vrecord fields
|
| 296 |
with Schema_builtin_error _ -> abort ()
|
| 297 |
|
| 298 |
let validate_gMonthDay =
|
| 299 |
let rex = Pcre.regexp (add_limits gMonthDay_RE_raw) in
|
| 300 |
fun s ->
|
| 301 |
let abort () = simple_type_error "gMonthDay" in
|
| 302 |
let subs = try pcre_extract ~rex s with Not_found -> abort () in
|
| 303 |
try
|
| 304 |
let fields = [
|
| 305 |
time_kind "gMonthDay";
|
| 306 |
qualify "month", validate_integer subs.(1);
|
| 307 |
qualify "day", validate_integer subs.(2);
|
| 308 |
] @ parse_timezone' subs.(3)
|
| 309 |
in
|
| 310 |
Value.vrecord fields
|
| 311 |
with Schema_builtin_error _ -> abort ()
|
| 312 |
|
| 313 |
let validate_gDay =
|
| 314 |
let rex = Pcre.regexp (add_limits gDay_RE_raw) in
|
| 315 |
fun s ->
|
| 316 |
let abort () = simple_type_error "gDay" in
|
| 317 |
let subs = try pcre_extract ~rex s with Not_found -> abort () in
|
| 318 |
try
|
| 319 |
let fields =
|
| 320 |
time_kind "gDay" ::
|
| 321 |
(qualify "day", validate_integer subs.(1)) ::
|
| 322 |
(parse_timezone' subs.(2))
|
| 323 |
in
|
| 324 |
Value.vrecord fields
|
| 325 |
with Schema_builtin_error _ -> abort ()
|
| 326 |
|
| 327 |
let validate_gMonth =
|
| 328 |
let rex = Pcre.regexp (add_limits gMonth_RE_raw) in
|
| 329 |
fun s ->
|
| 330 |
let abort () = simple_type_error "gMonth" in
|
| 331 |
let subs = try pcre_extract ~rex s with Not_found -> abort () in
|
| 332 |
try
|
| 333 |
let fields =
|
| 334 |
time_kind "gMonth" ::
|
| 335 |
(qualify "month", validate_integer subs.(1)) ::
|
| 336 |
(parse_timezone' subs.(2))
|
| 337 |
in
|
| 338 |
Value.vrecord fields
|
| 339 |
with Schema_builtin_error _ -> abort ()
|
| 340 |
|
| 341 |
let validate_time =
|
| 342 |
let rex = Pcre.regexp (sprintf "^(%s)(%s)?$" (strip_parens time_RE_raw)
|
| 343 |
(strip_parens timezone_RE_raw))
|
| 344 |
in
|
| 345 |
fun s ->
|
| 346 |
let abort () = simple_type_error "time" in
|
| 347 |
let subs = try pcre_extract ~rex s with Not_found -> abort () in
|
| 348 |
try
|
| 349 |
let fields =
|
| 350 |
time_kind "time" ::
|
| 351 |
parse_time subs.(1) @
|
| 352 |
(if is_empty subs.(2) then []
|
| 353 |
else [ qualify "timezone", Value.vrecord (parse_timezone subs.(2)) ])
|
| 354 |
in
|
| 355 |
Value.vrecord fields
|
| 356 |
with Schema_builtin_error _ -> abort ()
|
| 357 |
|
| 358 |
let validate_date =
|
| 359 |
let rex = Pcre.regexp (sprintf "^(-)?(%s)(%s)?$" (strip_parens date_RE_raw)
|
| 360 |
(strip_parens timezone_RE_raw))
|
| 361 |
in
|
| 362 |
fun s ->
|
| 363 |
let abort () = simple_type_error "date" in
|
| 364 |
let subs = try pcre_extract ~rex s with Not_found -> abort () in
|
| 365 |
try
|
| 366 |
let fields =
|
| 367 |
time_kind "date" ::
|
| 368 |
[ qualify "positive", parse_sign subs.(1) ] @
|
| 369 |
parse_date subs.(2) @
|
| 370 |
(if is_empty subs.(3) then []
|
| 371 |
else [ qualify "timezone", Value.vrecord (parse_timezone subs.(3)) ])
|
| 372 |
in
|
| 373 |
Value.vrecord fields
|
| 374 |
with Schema_builtin_error _ -> abort ()
|
| 375 |
|
| 376 |
let validate_hexBinary s =
|
| 377 |
let s = Utf8.get_str s in
|
| 378 |
let len = String.length s in
|
| 379 |
if len mod 2 <> 0 then
|
| 380 |
simple_type_error "hexBinary";
|
| 381 |
let res = String.create (len / 2) in
|
| 382 |
let rec aux idx =
|
| 383 |
if idx < len then begin
|
| 384 |
String.unsafe_set res (idx / 2)
|
| 385 |
(char_of_hex (String.unsafe_get s idx) (String.unsafe_get s (idx + 1)));
|
| 386 |
aux (idx + 2)
|
| 387 |
end
|
| 388 |
in
|
| 389 |
aux 0;
|
| 390 |
validate_string (Utf8.mk res)
|
| 391 |
|
| 392 |
let validate_base64Binary s =
|
| 393 |
let s = Utf8.get_str s in
|
| 394 |
validate_string (Utf8.mk (Netencoding.Base64.decode s))
|
| 395 |
|
| 396 |
let validate_anyURI s =
|
| 397 |
let s = Utf8.get_str s in
|
| 398 |
try
|
| 399 |
validate_string (Utf8.mk (Neturl.string_of_url (Neturl.url_of_string
|
| 400 |
Neturl.ip_url_syntax s)))
|
| 401 |
with Neturl.Malformed_URL -> simple_type_error "anyURI"
|
| 402 |
|
| 403 |
(** {2 API backend} *)
|
| 404 |
|
| 405 |
let builtins = Hashtbl.create 50
|
| 406 |
let reg name spec = Hashtbl.add builtins (add_xsd_prefix name) spec
|
| 407 |
let alias alias name =
|
| 408 |
let (alias, name) = (add_xsd_prefix alias, add_xsd_prefix name) in
|
| 409 |
Hashtbl.add builtins alias
|
| 410 |
(let (st_def, descr, validator) = Hashtbl.find builtins name in
|
| 411 |
let new_def =
|
| 412 |
match st_def with
|
| 413 |
| Primitive _ -> Primitive alias
|
| 414 |
| Derived (_, variety, facets, base) ->
|
| 415 |
Derived (Some alias, variety, facets, base)
|
| 416 |
in
|
| 417 |
(new_def, descr, validator))
|
| 418 |
let restrict' name basename new_facets =
|
| 419 |
let (name, basename) = (add_xsd_prefix name, add_xsd_prefix basename) in
|
| 420 |
let (base, _, _) = Hashtbl.find builtins basename in
|
| 421 |
let variety = variety_of_simple_type_definition base in
|
| 422 |
let facets =
|
| 423 |
merge_facets (facets_of_simple_type_definition base) new_facets
|
| 424 |
in
|
| 425 |
Derived (Some name, variety, facets, base)
|
| 426 |
let list' name itemname =
|
| 427 |
let (name, itemname) = (add_xsd_prefix name, add_xsd_prefix itemname) in
|
| 428 |
let (base, _, _) = Hashtbl.find builtins itemname in
|
| 429 |
Derived (Some name, List base, no_facets, base)
|
| 430 |
|
| 431 |
let fill () = (* fill "builtins" hashtbl *)
|
| 432 |
let primitive name = Primitive (add_xsd_prefix name) in
|
| 433 |
|
| 434 |
(* primitive builtins *)
|
| 435 |
|
| 436 |
reg "anySimpleType"
|
| 437 |
(primitive "anySimpleType", Builtin_defs.string, validate_string);
|
| 438 |
alias "anyType" "anySimpleType"; (* TODO BUG HERE *)
|
| 439 |
reg "string"
|
| 440 |
(primitive "string", Builtin_defs.string, validate_string);
|
| 441 |
|
| 442 |
(* TODO following types not yet supported (see "unsupported" above) *)
|
| 443 |
alias "decimal" "string";
|
| 444 |
alias "float" "string";
|
| 445 |
alias "double" "string";
|
| 446 |
alias "NOTATION" "string";
|
| 447 |
alias "QName" "string";
|
| 448 |
|
| 449 |
reg "boolean"
|
| 450 |
(primitive "boolean", Builtin_defs.bool, validate_bool);
|
| 451 |
reg "hexBinary"
|
| 452 |
(primitive "hexBinary", Builtin_defs.string, validate_hexBinary);
|
| 453 |
reg "base64Binary"
|
| 454 |
(primitive "base64Binary", Builtin_defs.string, validate_base64Binary);
|
| 455 |
reg "anyURI"
|
| 456 |
(primitive "anyURI", Builtin_defs.string, validate_anyURI);
|
| 457 |
reg "duration"
|
| 458 |
(primitive "duration", duration_type, validate_duration);
|
| 459 |
reg "dateTime"
|
| 460 |
(primitive "dateTime", dateTime_type, validate_dateTime);
|
| 461 |
reg "time"
|
| 462 |
(primitive "time", time_type, validate_time);
|
| 463 |
reg "date"
|
| 464 |
(primitive "date", date_type, validate_date);
|
| 465 |
reg "gYearMonth"
|
| 466 |
(primitive "gYearMonth", gYearMonth_type, validate_gYearMonth);
|
| 467 |
reg "gYear"
|
| 468 |
(primitive "gYear", gYear_type, validate_gYear);
|
| 469 |
reg "gMonthDay"
|
| 470 |
(primitive "gMonthDay", gMonthDay_type, validate_gMonthDay);
|
| 471 |
reg "gDay"
|
| 472 |
(primitive "gDay", gDay_type, validate_gDay);
|
| 473 |
reg "gMonth"
|
| 474 |
(primitive "gMonth", gMonth_type, validate_gMonth);
|
| 475 |
|
| 476 |
(* derived builtins *)
|
| 477 |
|
| 478 |
reg "integer"
|
| 479 |
(restrict' "integer" "decimal" no_facets, (* fake restriction *)
|
| 480 |
Builtin_defs.int, validate_integer);
|
| 481 |
reg "nonPositiveInteger"
|
| 482 |
(restrict' "nonPositiveInteger" "integer"
|
| 483 |
{ no_facets with maxInclusive = Some (Value.Integer zero, false) },
|
| 484 |
nonPositiveInteger_type, validate_nonPositiveInteger);
|
| 485 |
reg "negativeInteger"
|
| 486 |
(restrict' "negativeInteger" "nonPositiveInteger"
|
| 487 |
{ no_facets with maxInclusive = Some (Value.Integer minus_one, false) },
|
| 488 |
negativeInteger_type, validate_negativeInteger);
|
| 489 |
reg "nonNegativeInteger"
|
| 490 |
(restrict' "nonNegativeInteger" "integer"
|
| 491 |
{ no_facets with minInclusive = Some (Value.Integer zero, false) },
|
| 492 |
nonNegativeInteger_type, validate_nonNegativeInteger);
|
| 493 |
reg "positiveInteger"
|
| 494 |
(restrict' "positiveInteger" "nonNegativeInteger"
|
| 495 |
{ no_facets with minInclusive = Some (Value.Integer one, false) },
|
| 496 |
positiveInteger_type, validate_positiveInteger);
|
| 497 |
reg "long"
|
| 498 |
(restrict' "long" "integer"
|
| 499 |
{ no_facets with
|
| 500 |
minInclusive = Some (Value.Integer long_l, false);
|
| 501 |
maxInclusive = Some (Value.Integer long_r, false)},
|
| 502 |
long_type, validate_long);
|
| 503 |
reg "int"
|
| 504 |
(restrict' "int" "long"
|
| 505 |
{ no_facets with
|
| 506 |
minInclusive = Some (Value.Integer int_l, false);
|
| 507 |
maxInclusive = Some (Value.Integer int_r, false)},
|
| 508 |
int_type, validate_int);
|
| 509 |
reg "short"
|
| 510 |
(restrict' "short" "int"
|
| 511 |
{ no_facets with
|
| 512 |
minInclusive = Some (Value.Integer short_l, false);
|
| 513 |
maxInclusive = Some (Value.Integer short_r, false)},
|
| 514 |
short_type, validate_short);
|
| 515 |
reg "byte"
|
| 516 |
(restrict' "byte" "short"
|
| 517 |
{ no_facets with
|
| 518 |
minInclusive = Some (Value.Integer byte_l, false);
|
| 519 |
maxInclusive = Some (Value.Integer byte_r, false)},
|
| 520 |
byte_type, validate_short);
|
| 521 |
reg "normalizedString"
|
| 522 |
(restrict' "normalizedString" "string"
|
| 523 |
{ no_facets with whiteSpace = `Replace, false },
|
| 524 |
Builtin_defs.string, validate_normalizedString);
|
| 525 |
reg "token"
|
| 526 |
(restrict' "token" "normalizedString"
|
| 527 |
{ no_facets with whiteSpace = `Collapse, false },
|
| 528 |
Builtin_defs.string, validate_token);
|
| 529 |
alias "language" "token";
|
| 530 |
alias "Name" "token";
|
| 531 |
alias "NMTOKEN" "token";
|
| 532 |
alias "NCName" "token";
|
| 533 |
alias "ID" "token";
|
| 534 |
alias "IDREF" "token";
|
| 535 |
alias "ENTITY" "token";
|
| 536 |
reg "NMTOKENS"
|
| 537 |
(list' "NMTOKENS" "token",
|
| 538 |
string_list_type, validate_token_list);
|
| 539 |
alias "IDREFS" "NMTOKENS";
|
| 540 |
alias "ENTITIES" "NMTOKENS"
|
| 541 |
|
| 542 |
let _ = try fill () with Not_found -> assert false
|
| 543 |
|
| 544 |
(** {2 Printing} *)
|
| 545 |
|
| 546 |
open Big_int
|
| 547 |
|
| 548 |
type kind =
|
| 549 |
Duration | DateTime | Time | Date | GYearMonth | GYear | GMonthDay | GDay |
|
| 550 |
GMonth
|
| 551 |
type timezone = bool * Intervals.V.t * Intervals.V.t
|
| 552 |
(* positive, hour, minute *)
|
| 553 |
type time_value = {
|
| 554 |
kind: kind option; positive: bool option; year: Intervals.V.t option;
|
| 555 |
month: Intervals.V.t option; day: Intervals.V.t option;
|
| 556 |
hour: Intervals.V.t option; minute: Intervals.V.t option;
|
| 557 |
second: Intervals.V.t option; timezone: timezone option
|
| 558 |
}
|
| 559 |
let null_value = {
|
| 560 |
kind = None; positive = None; year = None; month = None; day = None;
|
| 561 |
hour = None; minute = None; second = None; timezone = None
|
| 562 |
}
|
| 563 |
|
| 564 |
let string_of_time_type fields =
|
| 565 |
let fail () = raise (Schema_builtin_error (Utf8.mk "")) in
|
| 566 |
let parse_int = function Value.Integer i -> i | _ -> fail () in
|
| 567 |
let parse_timezone v =
|
| 568 |
let fields =
|
| 569 |
try
|
| 570 |
Value.get_fields v
|
| 571 |
with Invalid_argument _ -> fail ()
|
| 572 |
in
|
| 573 |
let (positive, hour, minute) = (ref true, ref zero, ref zero) in
|
| 574 |
List.iter
|
| 575 |
(fun ((ns, name), value) ->
|
| 576 |
if ns <> Ns.empty then fail ();
|
| 577 |
(match Utf8.get_str name with
|
| 578 |
| "positive" -> positive := (Value.equal value Value.vtrue)
|
| 579 |
| "hour" -> hour := parse_int value
|
| 580 |
| "minute" -> minute := parse_int value
|
| 581 |
| _ -> fail ()))
|
| 582 |
fields;
|
| 583 |
!positive, !hour, !minute
|
| 584 |
in
|
| 585 |
let parse_time_kind = function
|
| 586 |
| Value.Atom a ->
|
| 587 |
(match Utf8.get_str (snd (Atoms.V.value a)) with
|
| 588 |
| "duration" -> Duration | "dateTime" -> DateTime | "time" -> Time
|
| 589 |
| "date" -> Date | "gYearMonth" -> GYearMonth | "gYear" -> GYear
|
| 590 |
| "gMonthDay" -> GMonthDay | "gDay" -> GDay | "gMonth" -> GMonth
|
| 591 |
| _ -> fail ())
|
| 592 |
| _ -> fail ()
|
| 593 |
in
|
| 594 |
let parse_positive = function
|
| 595 |
| v when Value.equal v Value.vfalse -> false
|
| 596 |
| _ -> true
|
| 597 |
in
|
| 598 |
let string_of_positive v =
|
| 599 |
match v.positive with Some false -> "-" | _ -> ""
|
| 600 |
in
|
| 601 |
let string_of_year v =
|
| 602 |
match v.year with None -> fail () | Some i -> Intervals.V.to_string i
|
| 603 |
in
|
| 604 |
let string_of_month v =
|
| 605 |
match v.month with None -> fail () | Some i -> Intervals.V.to_string i
|
| 606 |
in
|
| 607 |
let string_of_day v =
|
| 608 |
match v.day with None -> fail () | Some i -> Intervals.V.to_string i
|
| 609 |
in
|
| 610 |
let string_of_hour v =
|
| 611 |
match v.hour with None -> fail () | Some i -> Intervals.V.to_string i
|
| 612 |
in
|
| 613 |
let string_of_minute v =
|
| 614 |
match v.minute with None -> fail () | Some i -> Intervals.V.to_string i
|
| 615 |
in
|
| 616 |
let string_of_second v =
|
| 617 |
match v.second with None -> fail () | Some i -> Intervals.V.to_string i
|
| 618 |
in
|
| 619 |
let string_of_date v =
|
| 620 |
sprintf "%s-%s-%s" (string_of_year v) (string_of_month v) (string_of_day v)
|
| 621 |
in
|
| 622 |
let string_of_timezone v =
|
| 623 |
match v.timezone with
|
| 624 |
| Some (positive, hour, minute) ->
|
| 625 |
sprintf "Z%s%s:%s" (if not positive then "-" else "")
|
| 626 |
(Intervals.V.to_string hour) (Intervals.V.to_string minute)
|
| 627 |
| None -> ""
|
| 628 |
in
|
| 629 |
let string_of_time v =
|
| 630 |
sprintf "%s:%s:%s" (string_of_hour v) (string_of_minute v)
|
| 631 |
(string_of_second v)
|
| 632 |
in
|
| 633 |
let v =
|
| 634 |
List.fold_left
|
| 635 |
(fun acc ((ns, name), value) ->
|
| 636 |
if ns <> Ns.empty then fail ();
|
| 637 |
(match Utf8.get_str name with
|
| 638 |
| "year" -> { acc with year = Some (parse_int value) }
|
| 639 |
| "month" -> { acc with month = Some (parse_int value) }
|
| 640 |
| "day" -> { acc with day = Some (parse_int value) }
|
| 641 |
| "hour" -> { acc with hour = Some (parse_int value) }
|
| 642 |
| "minute" -> { acc with minute = Some (parse_int value) }
|
| 643 |
| "second" -> { acc with second = Some (parse_int value) }
|
| 644 |
| "timezone" -> { acc with timezone = Some (parse_timezone value) }
|
| 645 |
| "time_kind" -> { acc with kind = Some (parse_time_kind value) }
|
| 646 |
| "positive" -> { acc with positive = Some (parse_positive value) }
|
| 647 |
| s -> assert false))
|
| 648 |
null_value fields
|
| 649 |
in
|
| 650 |
let s =
|
| 651 |
match v.kind with
|
| 652 |
| None -> fail ()
|
| 653 |
| Some Duration ->
|
| 654 |
sprintf "%sP%s%s%s%s"
|
| 655 |
(string_of_positive v)
|
| 656 |
(match v.year with Some v -> Intervals.V.to_string v ^ "Y" | _ -> "")
|
| 657 |
(match v.month with Some v -> Intervals.V.to_string v ^ "M" | _ -> "")
|
| 658 |
(match v.day with Some v -> Intervals.V.to_string v ^ "D" | _ -> "")
|
| 659 |
(if v.hour = None && v.minute = None && v.second = None then
|
| 660 |
""
|
| 661 |
else
|
| 662 |
"T" ^
|
| 663 |
(match v.hour with Some v -> Intervals.V.to_string v ^ "H" | _ ->
|
| 664 |
"") ^
|
| 665 |
(match v.minute with Some v -> Intervals.V.to_string v ^ "M" | _ ->
|
| 666 |
"") ^
|
| 667 |
(match v.second with Some v -> Intervals.V.to_string v ^ "S" | _ ->
|
| 668 |
""))
|
| 669 |
| Some DateTime ->
|
| 670 |
sprintf "%s%sT%s%s" (string_of_positive v) (string_of_date v)
|
| 671 |
(string_of_time v) (string_of_timezone v)
|
| 672 |
| Some Time ->
|
| 673 |
sprintf "%s%s%s" (string_of_positive v) (string_of_time v)
|
| 674 |
(string_of_timezone v)
|
| 675 |
| Some Date ->
|
| 676 |
sprintf "%s%s%s" (string_of_positive v) (string_of_date v)
|
| 677 |
(string_of_timezone v)
|
| 678 |
| Some GYearMonth ->
|
| 679 |
sprintf "%s%s-%s%s" (string_of_positive v) (string_of_year v)
|
| 680 |
(string_of_month v) (string_of_timezone v)
|
| 681 |
| Some GYear ->
|
| 682 |
sprintf "%s%s%s" (string_of_positive v) (string_of_year v)
|
| 683 |
(string_of_timezone v)
|
| 684 |
| Some GMonthDay ->
|
| 685 |
sprintf "--%s%s%s" (string_of_month v) (string_of_day v)
|
| 686 |
(string_of_timezone v)
|
| 687 |
| Some GDay ->
|
| 688 |
sprintf "---%s%s" (string_of_day v) (string_of_timezone v)
|
| 689 |
| Some GMonth ->
|
| 690 |
sprintf "--%s--%s" (string_of_month v) (string_of_timezone v)
|
| 691 |
in
|
| 692 |
Utf8.mk s
|
| 693 |
|
| 694 |
(** {2 API} *)
|
| 695 |
|
| 696 |
let is_builtin = Hashtbl.mem builtins
|
| 697 |
let iter_builtin f =
|
| 698 |
Hashtbl.iter (fun _ (type_def, _, _) -> f type_def) builtins
|
| 699 |
|
| 700 |
let lookup name = Hashtbl.find builtins name
|
| 701 |
|
| 702 |
let fst (x,_,_) = x
|
| 703 |
let snd (_,y,_) = y
|
| 704 |
let trd (_,_,z) = z
|
| 705 |
|
| 706 |
let get_builtin name = fst (lookup name)
|
| 707 |
let cd_type_of_builtin name = snd (lookup name)
|
| 708 |
let validate_builtin name = trd (lookup name)
|
| 709 |
|