/[svn]/schema/schema_builtin.ml
ViewVC logotype

Contents of /schema/schema_builtin.ml

Parent Directory Parent Directory | Revision Log Revision Log


Revision 812 - (show annotations)
Tue Jul 10 18:04:55 2007 UTC (5 years, 10 months ago) by abate
File size: 19205 byte(s)
[r2003-11-24 16:03:12 by szach] use Utf8 everywhere in schema support

Original author: szach
Date: 2003-11-24 16:03:14+00:00
1
2 open Printf
3
4 open Encodings
5 open Encodings.Utf8.Pcre
6 open Schema_common
7 open Schema_types
8
9 (* TODO dates: boundary checks (e.g. 95/26/2003) *)
10 (* TODO a lot of almost cut-and-paste code, expecially in gFoo types validation
11 *)
12
13 (** {2 Aux/Misc stuff} *)
14
15 let add_xsd_prefix s = Schema_xml.add_xsd_prefix (Utf8.mk s)
16
17 let is_empty s = Utf8.equal s (Utf8.mk "")
18
19 let zero = Intervals.V.zero
20 let one = (Intervals.V.succ Intervals.V.zero)
21 let minus_one = (Intervals.V.pred Intervals.V.zero)
22 let long_l = (Intervals.V.mk "-9223372036854775808")
23 let long_r = (Intervals.V.mk "9223372036854775807")
24 let int_l = (Intervals.V.mk "-2147483648")
25 let int_r = (Intervals.V.mk "2147483647")
26 let short_l = (Intervals.V.mk "-32768")
27 let short_r = (Intervals.V.mk "32767")
28 let byte_l = (Intervals.V.mk "-128")
29 let byte_r = (Intervals.V.mk "127")
30
31 let xml_S_RE = pcre_regexp "[ \\t\\r\\n]+"
32 (* split a string at XML recommendation "S" production boundaries *)
33 let split_xml_S s = pcre_split ~rex:xml_S_RE s
34 let norm_RE = pcre_regexp "[\\t\\r\\n]"
35
36 let char_of_hex =
37 let int_of_hex_char = function
38 | '0' -> 0 | '1' -> 1 | '2' -> 2 | '3' -> 3 | '4' -> 4 | '5' -> 5 | '6' -> 6
39 | '7' -> 7 | '8' -> 8 | '9' -> 9 | 'a' | 'A' -> 10 | 'b' | 'B' -> 11
40 | 'c' | 'C' -> 12 | 'd' | 'D' -> 13 | 'e' | 'E' -> 14 | 'f' | 'F' -> 15
41 | _ -> assert false
42 in
43 (* most significative, least significative *)
44 fun ms ls -> Char.unsafe_chr (int_of_hex_char ms * 16 + int_of_hex_char ls)
45
46 let strip_parens s = Pcre.replace ~pat:"[()]" s
47 let add_limits s = "^" ^ s ^ "$"
48
49 exception Schema_builtin_error of Utf8.t
50 let simple_type_error name = raise (Schema_builtin_error (add_xsd_prefix name))
51
52 let qualify s = (Ns.empty, Encodings.Utf8.mk s)
53
54 (* regular expressions used to validate built-in types *)
55
56 let timezone_RE_raw = "(Z)|(([+-])?(\\d{2}):(\\d{2}))"
57 let date_RE_raw = "(\\d{4,})-(\\d{2})-(\\d{2})"
58 let time_RE_raw = "(\\d{2}):(\\d{2}):(\\d{2})"
59
60 let gYearMonth_RE_raw = sprintf "(-)?(\\d{4,})-(\\d{2})(%s)?" timezone_RE_raw
61 let gYear_RE_raw = sprintf "(-)?(\\d{4,})(%s)?" timezone_RE_raw
62 let gMonthDay_RE_raw = sprintf "--(\\d{2})-(\\d{2})(%s)?" timezone_RE_raw
63 let gDay_RE_raw = sprintf "---(\\d{2})(%s)?" timezone_RE_raw
64 let gMonth_RE_raw = "--(\\d{2})--(%s)?"
65
66 (** {2 CDuce types} *)
67
68 let positive_field = false, qualify "positive", Builtin_defs.bool
69 let year_field = false, qualify "year", Builtin_defs.int
70 let month_field = false, qualify "month", Builtin_defs.int
71 let day_field = false, qualify "day", Builtin_defs.int
72 let hour_field = false, qualify "hour", Builtin_defs.int
73 let minute_field = false, qualify "minute", Builtin_defs.int
74 let second_field = false, qualify "second", Builtin_defs.int
75 (* TODO this should be a decimal *)
76 let time_type_fields = [ hour_field; minute_field; second_field ]
77 let date_type_fields = [ year_field; month_field; day_field ]
78
79 (* TODO the constraint that at least one part should be present isn't easily
80 expressible with CDuce types *)
81 let duration_type = Types.rec_of_list' [
82 positive_field;
83 true, qualify "year", Builtin_defs.int;
84 true, qualify "month", Builtin_defs.int;
85 true, qualify "day", Builtin_defs.int;
86 true, qualify "hour", Builtin_defs.int;
87 true, qualify "minute", Builtin_defs.int;
88 true, qualify "second", Builtin_defs.int; (* TODO this should be a decimal *)
89 ]
90 let timezone_type = Types.rec_of_list' [
91 positive_field;
92 hour_field; minute_field
93 ]
94 let timezone_type_fields = [ true, qualify "timezone", timezone_type ]
95 let time_type = Types.rec_of_list' (time_type_fields @ timezone_type_fields)
96 let date_type = Types.rec_of_list' (positive_field :: date_type_fields)
97 let dateTime_type =
98 Types.rec_of_list' (positive_field ::
99 (date_type_fields @ time_type_fields @ timezone_type_fields))
100 let gYearMonth_type = Types.rec_of_list' [
101 positive_field; year_field; month_field
102 ]
103 let gYear_type = Types.rec_of_list' [ positive_field; year_field ]
104 let gMonthDay_type = Types.rec_of_list' [ month_field; day_field ]
105 let gDay_type = Types.rec_of_list' [ day_field ]
106 let gMonth_type = Types.rec_of_list' [ month_field ]
107
108 let nonPositiveInteger_type = Builtin_defs.non_pos_int
109 let negativeInteger_type = Builtin_defs.neg_int
110 let nonNegativeInteger_type = Builtin_defs.non_neg_int
111 let positiveInteger_type = Builtin_defs.pos_int
112 let long_type = Builtin_defs.long_int
113 let int_type = Builtin_defs.int_int
114 let short_type = Builtin_defs.short_int
115 let byte_type = Builtin_defs.byte_int
116
117 let string_list_type = Sequence.star Builtin_defs.string
118
119 (** {2 Validation functions (string -> Value.t)} *)
120
121 let parse_sign s =
122 if Utf8.equal s (Utf8.mk "+") || Utf8.equal s (Utf8.mk "") then
123 Value.vtrue
124 else
125 Value.vfalse
126
127 let validate_integer s =
128 try
129 Value.Integer (Intervals.V.mk (Utf8.get_str s))
130 with Failure _ -> simple_type_error "integer"
131
132 let strip_decimal_RE = Pcre.regexp "\\..*$"
133 let validate_decimal s =
134 validate_integer (pcre_replace ~rex:strip_decimal_RE s)
135
136 let parse_date =
137 let rex = Pcre.regexp (add_limits date_RE_raw) in
138 fun s ->
139 let abort () = simple_type_error "date" in
140 let subs = try pcre_extract ~rex s with Not_found -> abort () in
141 [ qualify "year", validate_integer subs.(1);
142 qualify "month", validate_integer subs.(2);
143 qualify "day", validate_integer subs.(3) ]
144
145 let parse_time =
146 let rex = Pcre.regexp (add_limits time_RE_raw) in
147 fun s ->
148 let abort () = simple_type_error "time" in
149 let subs = try pcre_extract ~rex s with Not_found -> abort () in
150 [ qualify "hour", validate_integer subs.(1);
151 qualify "minute", validate_integer subs.(2);
152 qualify "second", validate_integer subs.(3) ]
153
154 let parse_timezone =
155 let rex = Pcre.regexp (add_limits timezone_RE_raw) in
156 fun s ->
157 let abort () = simple_type_error "timezone" in
158 let subs = try pcre_extract ~rex s with Not_found -> abort () in
159 if Utf8.equal subs.(1) (Utf8.mk "Z") then
160 [qualify "positive", Value.vtrue;
161 qualify "hour", validate_integer (Utf8.mk "0");
162 qualify "minute", validate_integer (Utf8.mk "0")]
163 else
164 [qualify "positive", parse_sign subs.(3);
165 qualify "hour", validate_integer subs.(4);
166 qualify "minute", validate_integer subs.(5)]
167 (* parse a timezone from a string, if it's empty return the empty list,
168 otherwise return a list containing a pair <"timezone", timezone value> *)
169 let parse_timezone' s =
170 if is_empty s then
171 []
172 else
173 [ qualify "timezone", Value.vrecord (parse_timezone s) ]
174
175 let validate_string s = Value.string_utf8 s
176 let validate_normalizedString s =
177 validate_string (normalize_white_space `Replace s)
178 let validate_token s =
179 validate_string (normalize_white_space `Collapse s)
180 let validate_token_list s =
181 Value.sequence (List.map validate_token (split_xml_S s))
182
183 let validate_interval interval type_name s =
184 let integer =
185 try
186 Intervals.V.mk (Utf8.get_str s)
187 with Failure _ -> simple_type_error type_name
188 in
189 if Intervals.contains integer interval then
190 Value.Integer integer
191 else
192 simple_type_error type_name
193 let validate_nonPositiveInteger =
194 validate_interval (Intervals.left Intervals.V.zero) "nonPositiveInteger"
195 let validate_negativeInteger =
196 validate_interval (Intervals.left Intervals.V.minus_one) "negativeInteger"
197 let validate_nonNegativeInteger =
198 validate_interval (Intervals.right Intervals.V.zero) "nonNegativeInteger"
199 let validate_positiveInteger =
200 validate_interval (Intervals.right Intervals.V.one) "positiveInteger"
201 let validate_long = validate_interval (Intervals.bounded long_l long_r) "long"
202 let validate_int = validate_interval (Intervals.bounded int_l int_r) "int"
203 let validate_short =
204 validate_interval (Intervals.bounded short_l short_r) "short"
205 let validate_byte = validate_interval (Intervals.bounded byte_l byte_r) "byte"
206
207 let validate_bool s =
208 if Utf8.equal s (Utf8.mk "true") || Utf8.equal s (Utf8.mk "1") then
209 Value.vtrue
210 else if Utf8.equal s (Utf8.mk "false") || Utf8.equal s (Utf8.mk "0") then
211 Value.vfalse
212 else
213 simple_type_error "boolean"
214
215 let validate_duration =
216 let rex = pcre_regexp
217 "^([+-])?P((\\d+)Y)?((\\d+)M)?((\\d+)D)?(T((\\d+)H)?((\\d+)M)?((\\d+)S)?)?$"
218 in
219 fun s ->
220 let abort () = simple_type_error "duration" in
221 let subs = try pcre_extract ~rex s with Not_found -> abort () in
222 try
223 let fields =
224 [qualify "positive", parse_sign subs.(1) ] @
225 (if is_empty subs.(3) then []
226 else [qualify "year", validate_integer subs.(3)]) @
227 (if is_empty subs.(5) then []
228 else [qualify "month", validate_integer subs.(5)]) @
229 (if is_empty subs.(7) then []
230 else [qualify "day", validate_integer subs.(7)]) @
231 (if is_empty subs.(10) then []
232 else [qualify "hour", validate_integer subs.(10)]) @
233 (if is_empty subs.(12) then []
234 else [qualify "minute", validate_integer subs.(12)]) @
235 (if is_empty subs.(14) then []
236 else [qualify "second", validate_integer subs.(14)])
237 in
238 Value.vrecord fields
239 with Schema_builtin_error _ -> abort ()
240
241 let validate_dateTime =
242 let rex = Pcre.regexp (sprintf "^([+-])?(%s)T(%s)(%s)?$"
243 (strip_parens date_RE_raw) (strip_parens time_RE_raw)
244 (strip_parens timezone_RE_raw))
245 in
246 fun s ->
247 let abort () = simple_type_error "dateTime" in
248 let subs = try pcre_extract ~rex s with Not_found -> abort () in
249 try
250 let fields =
251 [ qualify "positive", parse_sign subs.(1) ] @
252 parse_date subs.(2) @
253 parse_time subs.(3) @
254 parse_timezone' subs.(4)
255 in
256 Value.vrecord fields
257 with Schema_builtin_error _ -> abort ()
258
259 let validate_gYearMonth =
260 let rex = Pcre.regexp (add_limits gYearMonth_RE_raw) in
261 fun s ->
262 let abort () = simple_type_error "gYearMonth" in
263 let subs = try pcre_extract ~rex s with Not_found -> abort () in
264 try
265 let fields = [
266 qualify "positive", parse_sign subs.(1);
267 qualify "year", validate_integer subs.(2);
268 qualify "month", validate_integer subs.(3)
269 ] @ parse_timezone' subs.(4)
270 in
271 Value.vrecord fields
272 with Schema_builtin_error _ -> abort ()
273
274 let validate_gYear =
275 let rex = Pcre.regexp (add_limits gYear_RE_raw) in
276 fun s ->
277 let abort () = simple_type_error "gYear" in
278 let subs = try pcre_extract ~rex s with Not_found -> abort () in
279 try
280 let fields = [
281 qualify "positive", parse_sign subs.(1);
282 qualify "year", validate_integer subs.(2);
283 ] @ parse_timezone' subs.(3)
284 in
285 Value.vrecord fields
286 with Schema_builtin_error _ -> abort ()
287
288 let validate_gMonthDay =
289 let rex = Pcre.regexp (add_limits gMonthDay_RE_raw) in
290 fun s ->
291 let abort () = simple_type_error "gMonthDay" in
292 let subs = try pcre_extract ~rex s with Not_found -> abort () in
293 try
294 let fields = [
295 qualify "month", validate_integer subs.(1);
296 qualify "day", validate_integer subs.(2);
297 ] @ parse_timezone' subs.(3)
298 in
299 Value.vrecord fields
300 with Schema_builtin_error _ -> abort ()
301
302 let validate_gDay =
303 let rex = Pcre.regexp (add_limits gDay_RE_raw) in
304 fun s ->
305 let abort () = simple_type_error "gDay" in
306 let subs = try pcre_extract ~rex s with Not_found -> abort () in
307 try
308 let fields =
309 (qualify "day", validate_integer subs.(1)) ::
310 (parse_timezone' subs.(2))
311 in
312 Value.vrecord fields
313 with Schema_builtin_error _ -> abort ()
314
315 let validate_gMonth =
316 let rex = Pcre.regexp (add_limits gMonth_RE_raw) in
317 fun s ->
318 let abort () = simple_type_error "gMonth" in
319 let subs = try pcre_extract ~rex s with Not_found -> abort () in
320 try
321 let fields =
322 (qualify "month", validate_integer subs.(1)) ::
323 (parse_timezone' subs.(2))
324 in
325 Value.vrecord fields
326 with Schema_builtin_error _ -> abort ()
327
328 let validate_time =
329 let rex = Pcre.regexp (sprintf "^(%s)(%s)?$" (strip_parens time_RE_raw)
330 (strip_parens timezone_RE_raw))
331 in
332 fun s ->
333 let abort () = simple_type_error "time" in
334 let subs = try pcre_extract ~rex s with Not_found -> abort () in
335 try
336 let fields =
337 parse_time subs.(1) @
338 (if is_empty subs.(2) then []
339 else [ qualify "timezone", Value.vrecord (parse_timezone subs.(2)) ])
340 in
341 Value.vrecord fields
342 with Schema_builtin_error _ -> abort ()
343
344 let validate_date =
345 let rex = Pcre.regexp (sprintf "^(-)?(%s)(%s)?$" (strip_parens date_RE_raw)
346 (strip_parens timezone_RE_raw))
347 in
348 fun s ->
349 let abort () = simple_type_error "date" in
350 let subs = try pcre_extract ~rex s with Not_found -> abort () in
351 try
352 let fields =
353 [ qualify "positive", parse_sign subs.(1) ] @
354 parse_date subs.(2) @
355 (if is_empty subs.(3) then []
356 else [ qualify "timezone", Value.vrecord (parse_timezone subs.(3)) ])
357 in
358 Value.vrecord fields
359 with Schema_builtin_error _ -> abort ()
360
361 let validate_hexBinary s =
362 let s = Utf8.get_str s in
363 let len = String.length s in
364 if len mod 2 <> 0 then
365 simple_type_error "hexBinary";
366 let res = String.create (len / 2) in
367 let rec aux idx =
368 if idx < len then begin
369 String.unsafe_set res (idx / 2)
370 (char_of_hex (String.unsafe_get s idx) (String.unsafe_get s (idx + 1)));
371 aux (idx + 2)
372 end
373 in
374 aux 0;
375 validate_string (Utf8.mk res)
376
377 let validate_base64Binary s =
378 let s = Utf8.get_str s in
379 validate_string (Utf8.mk (Netencoding.Base64.decode s))
380
381 let validate_anyURI s =
382 let s = Utf8.get_str s in
383 try
384 validate_string (Utf8.mk (Neturl.string_of_url (Neturl.url_of_string
385 Neturl.ip_url_syntax s)))
386 with Neturl.Malformed_URL -> simple_type_error "anyURI"
387
388 (** {2 API backend} *)
389
390 let builtins = Hashtbl.create 50
391 let reg name spec = Hashtbl.add builtins (add_xsd_prefix name) spec
392 let alias alias name =
393 let (alias, name) = (add_xsd_prefix alias, add_xsd_prefix name) in
394 Hashtbl.add builtins alias
395 (let (st_def, descr, validator) = Hashtbl.find builtins name in
396 let new_def =
397 match st_def with
398 | Primitive _ -> Primitive alias
399 | Derived (_, variety, facets, base) ->
400 Derived (Some alias, variety, facets, base)
401 in
402 (new_def, descr, validator))
403 let restrict' name basename new_facets =
404 let (name, basename) = (add_xsd_prefix name, add_xsd_prefix basename) in
405 let (base, _, _) = Hashtbl.find builtins basename in
406 let variety = variety_of_simple_type_definition base in
407 let facets =
408 merge_facets (facets_of_simple_type_definition base) new_facets
409 in
410 Derived (Some name, variety, facets, base)
411 let list' name itemname =
412 let (name, itemname) = (add_xsd_prefix name, add_xsd_prefix itemname) in
413 let (base, _, _) = Hashtbl.find builtins itemname in
414 Derived (Some name, List base, no_facets, base)
415
416 let fill () = (* fill "builtins" hashtbl *)
417 let primitive name = Primitive (add_xsd_prefix name) in
418
419 (* TODO missing built-in simple types: float, double, QName, NOTATION *)
420
421 (* primitive builtins *)
422
423 reg "anySimpleType"
424 (primitive "anySimpleType", Builtin_defs.string, validate_string);
425 alias "anyType" "anySimpleType";
426 reg "string"
427 (primitive "string", Builtin_defs.string, validate_string);
428 reg "decimal"
429 (* collapsed in CDuce to an integer, since CDuce has no decimal numbers *)
430 (primitive "decimal", Builtin_defs.int, validate_decimal);
431 reg "boolean"
432 (primitive "boolean", Builtin_defs.bool, validate_bool);
433 reg "hexBinary"
434 (primitive "hexBinary", Builtin_defs.string, validate_hexBinary);
435 reg "base64Binary"
436 (primitive "base64Binary", Builtin_defs.string, validate_base64Binary);
437 reg "anyURI"
438 (primitive "anyURI", Builtin_defs.string, validate_anyURI);
439 reg "duration"
440 (primitive "duration", duration_type, validate_duration);
441 reg "dateTime"
442 (primitive "dateTime", dateTime_type, validate_dateTime);
443 reg "time"
444 (primitive "time", time_type, validate_time);
445 reg "date"
446 (primitive "date", date_type, validate_date);
447 reg "gYearMonth"
448 (primitive "gYearMonth", gYearMonth_type, validate_gYearMonth);
449 reg "gYear"
450 (primitive "gYear", gYear_type, validate_gYear);
451 reg "gMonthDay"
452 (primitive "gMonthDay", gMonthDay_type, validate_gMonthDay);
453 reg "gDay"
454 (primitive "gDay", gDay_type, validate_gDay);
455 reg "gMonth"
456 (primitive "gMonth", gMonth_type, validate_gMonth);
457
458 (* derived builtins *)
459
460 reg "integer"
461 (restrict' "integer" "decimal" no_facets, (* fake restriction *)
462 Builtin_defs.int, validate_integer);
463 reg "nonPositiveInteger"
464 (restrict' "nonPositiveInteger" "integer"
465 { no_facets with maxInclusive = Some (Value.Integer zero, false) },
466 nonPositiveInteger_type, validate_nonPositiveInteger);
467 reg "negativeInteger"
468 (restrict' "negativeInteger" "nonPositiveInteger"
469 { no_facets with maxInclusive = Some (Value.Integer minus_one, false) },
470 negativeInteger_type, validate_negativeInteger);
471 reg "nonNegativeInteger"
472 (restrict' "nonNegativeInteger" "integer"
473 { no_facets with minInclusive = Some (Value.Integer zero, false) },
474 nonNegativeInteger_type, validate_nonNegativeInteger);
475 reg "positiveInteger"
476 (restrict' "positiveInteger" "nonNegativeInteger"
477 { no_facets with minInclusive = Some (Value.Integer one, false) },
478 positiveInteger_type, validate_positiveInteger);
479 reg "long"
480 (restrict' "long" "integer"
481 { no_facets with
482 minInclusive = Some (Value.Integer long_l, false);
483 maxInclusive = Some (Value.Integer long_r, false)},
484 long_type, validate_long);
485 reg "int"
486 (restrict' "int" "long"
487 { no_facets with
488 minInclusive = Some (Value.Integer int_l, false);
489 maxInclusive = Some (Value.Integer int_r, false)},
490 int_type, validate_int);
491 reg "short"
492 (restrict' "short" "int"
493 { no_facets with
494 minInclusive = Some (Value.Integer short_l, false);
495 maxInclusive = Some (Value.Integer short_r, false)},
496 short_type, validate_short);
497 reg "byte"
498 (restrict' "byte" "short"
499 { no_facets with
500 minInclusive = Some (Value.Integer byte_l, false);
501 maxInclusive = Some (Value.Integer byte_r, false)},
502 byte_type, validate_short);
503 reg "normalizedString"
504 (restrict' "normalizedString" "string"
505 { no_facets with whiteSpace = `Replace, false },
506 Builtin_defs.string, validate_normalizedString);
507 reg "token"
508 (restrict' "token" "normalizedString"
509 { no_facets with whiteSpace = `Collapse, false },
510 Builtin_defs.string, validate_token);
511 alias "language" "token";
512 alias "Name" "token";
513 alias "NMTOKEN" "token";
514 alias "NCName" "token";
515 alias "ID" "token";
516 alias "IDREF" "token";
517 alias "ENTITY" "token";
518 reg "NMTOKENS"
519 (list' "NMTOKENS" "token",
520 string_list_type, validate_token_list);
521 alias "IDREFS" "NMTOKENS";
522 alias "ENTITIES" "NMTOKENS"
523
524 let _ = try fill () with Not_found -> assert false
525
526 (** {2 API} *)
527
528 let is_builtin = Hashtbl.mem builtins
529 let iter_builtin f =
530 Hashtbl.iter (fun _ (type_def, _, _) -> f type_def) builtins
531
532 let lookup name = Hashtbl.find builtins name
533
534 let fst (x,_,_) = x
535 let snd (_,y,_) = y
536 let trd (_,_,z) = z
537
538 let get_builtin name = fst (lookup name)
539 let cd_type_of_builtin name = snd (lookup name)
540 let validate_builtin name = trd (lookup name)
541

CVS Admin">CVS Admin
ViewVC Help
Powered by ViewVC 1.1.5