/[svn]/schema/schema_validator.ml
ViewVC logotype

Contents of /schema/schema_validator.ml

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1453 - (show annotations)
Tue Jul 10 18:51:16 2007 UTC (5 years, 10 months ago) by abate
File size: 20091 byte(s)
[r2005-02-17 13:35:50 by afrisch] Clean schema

Original author: afrisch
Date: 2005-02-17 13:35:50+00:00
1 let debug = false
2
3 open Printf
4
5 open Encodings
6 open Schema_pcre
7 open Schema_common
8 open Schema_types
9 open Value
10
11 (** {2 Misc} *)
12
13 let empty_string = string_utf8 (Utf8.mk "")
14 let empty_record = Value.vrecord []
15 let foo_atom = Value.Atom (Atoms.V.mk_ascii "foo")
16 let foo_event = E_char_data (Utf8.mk "")
17
18 let hashtbl_is_empty tbl =
19 try Hashtbl.iter (fun _ _ -> raise Exit) tbl; true
20 with Exit -> false
21
22 let string_of_value value =
23 let buf = Buffer.create 1024 in
24 let fmt = Format.formatter_of_buffer buf in
25 Value.print fmt value;
26 Buffer.contents buf
27
28 let foo_qname = Ns.empty, Utf8.mk ""
29
30 let ptbl_of_particles particles =
31 let tbl = Hashtbl.create 20 in
32 List.iter (* fill table *)
33 (* ASSUMPTION: firsts are disjoing as per UPA Schema constraint *)
34 (fun p ->
35 List.iter
36 (function None -> () | Some tag -> Hashtbl.add tbl tag p)
37 (first_of_particle p))
38 particles;
39 tbl
40
41 (** Validation context *)
42 class type validation_context =
43 object
44 (* if ns isn't given, targetNamespace of the schema is used *)
45 method expect_start_tag: ?ns:Ns.t -> Utf8.t -> unit
46 method expect_end_tag: ?ns:Ns.t -> Utf8.t -> unit
47 method expect_any_start_tag: Ns.qname
48 method expect_any_end_tag: Ns.qname
49 method get_string: Utf8.t
50 method junk: unit
51 method peek: event
52
53 method set_mixed: bool -> unit
54 method mixed: bool
55
56 method ns: Ns.t
57 end
58
59 let validation_error ?context s = raise (XSI_validation_error s)
60 let validation_error_exemplar = XSI_validation_error ""
61
62 let compare_exn e1 e2 =
63 (* comparison function on exceptions; include all validation error
64 * exceptions in an equivalence class *)
65 match e1, e2 with
66 | XSI_validation_error _, XSI_validation_error _ -> 0
67 | e1, e2 -> Pervasives.compare e1 e2
68
69 let rec tries funs exn arg =
70 match funs with
71 | [] -> raise Not_found
72 | f :: tl ->
73 try
74 f arg
75 with e when compare_exn e exn = 0 ->
76 tries tl exn arg
77
78 let space_RE = pcre_regexp " "
79 let split = pcre_split ~rex:space_RE
80
81 (** {2 Facets validation} *)
82
83 module Schema_facets:
84 sig
85 exception Facet_error of string
86 val facets_valid: facets -> Value.t -> unit
87 end
88 =
89 struct
90
91 open Big_int
92 open Value
93
94 exception Facet_error of string
95
96 (* compute the length of a particular CDuce *)
97 (* STRONG ASSUMPTION: v is a CDuce value built via "validate_simple_type"
98 * function below, thus it contains no sequence of characters, but strings
99 * and no Concat, but just Pair *)
100 let length v =
101 let rec aux acc = function
102 | Pair (_, rest) -> aux (Intervals.V.succ acc) rest
103 | v when v = Value.nil -> Intervals.V.zero
104 | _ -> assert false
105 in
106 aux Intervals.V.zero v
107
108 let length_valid len value =
109 if not (Intervals.V.equal (length value) len) then
110 raise (Facet_error "length")
111 let minLength_valid min_len value =
112 if Intervals.V.lt (length value) min_len then
113 raise (Facet_error "minLength")
114 let maxLength_valid max_len value =
115 if Intervals.V.gt (length value) max_len then
116 raise (Facet_error "maxLength")
117
118 let enumeration_valid enum value =
119 if not (ValueSet.mem value enum) then raise (Facet_error "enumeration")
120
121 let maxInclusive_valid max_inc value =
122 if value |>| max_inc then raise (Facet_error "maxInclusive")
123 let maxExclusive_valid max_exc value =
124 if value |>=| max_exc then raise (Facet_error "maxExclusive")
125 let minInclusive_valid min_inc value =
126 if value |<| min_inc then raise (Facet_error "minInclusive")
127 let minExclusive_valid min_exc value =
128 if value |<=| min_exc then raise (Facet_error "minInclusive")
129
130 (* check facets validaty rules other than pattern and whiteSpace. "value"
131 parameter should already be white space normalized and pattern valid.
132 Assumption: facets set contains only facets that are applicable to the type
133 of value *)
134 let facets_valid facets value =
135 (* TODO efficiency *)
136 (* all facets are always checked, but we know that in some cases only some
137 * of them can be present; e.g. if variety is union only pattern and
138 * enumeration are possibles ... *)
139 (match facets.length with
140 | None ->
141 (match facets.minLength with
142 | None -> ()
143 | Some (len, _) -> minLength_valid len value);
144 (match facets.maxLength with
145 | None -> ()
146 | Some (len, _) -> maxLength_valid len value);
147 | Some (len, _) -> length_valid len value);
148 (match facets.enumeration with
149 | None -> ()
150 | Some enum -> enumeration_valid enum value);
151 (match facets.maxInclusive with
152 | None -> ()
153 | Some (lim, _) -> maxInclusive_valid lim value);
154 (match facets.maxExclusive with
155 | None -> ()
156 | Some (lim, _) -> maxExclusive_valid lim value);
157 (match facets.minInclusive with
158 | None -> ()
159 | Some (lim, _) -> minInclusive_valid lim value);
160 (match facets.minExclusive with
161 | None -> ()
162 | Some (lim, _) -> minExclusive_valid lim value);
163 (*
164 (match facets.totalDigits with
165 | None -> ()
166 | Some (dig, _) -> totalDigits_valid dig value);
167 (match facets.fractionDigits with
168 | None -> ()
169 | Some (dig, _) -> fractionDigits_valid dig value)
170 *)
171
172 end
173
174 (** {2 Simple type validation} *)
175
176 let rec validate_simple_type def v =
177 let s =
178 match get_string_utf8 v with
179 | utf8_string, rest when rest |=| nil -> utf8_string
180 | _ -> validation_error "string expected"
181 in
182 match def with
183 | Primitive name | Derived (Some name, _, _, _)
184 when Schema_builtin.is_builtin name ->
185 (try
186 Schema_builtin.validate_builtin name s
187 with Schema_builtin.Schema_builtin_error name ->
188 validation_error (sprintf "%s isn't a valid %s"
189 (Utf8.to_string s) (Utf8.to_string name)))
190 | Primitive _ -> assert false
191 | Derived (_, variety, facets, base) ->
192 (match variety with
193 | Atomic primitive ->
194 let literal = normalize_white_space (fst facets.whiteSpace) s in
195 let value = validate_simple_type base(*primitive*)(*???*)
196 (string_utf8 literal) in
197 Schema_facets.facets_valid facets value;
198 value
199 | List item ->
200 let literal = normalize_white_space (fst facets.whiteSpace) s in
201 let items =
202 List.map (validate_simple_type item)
203 (List.map string_utf8 (split literal))
204 in
205 let value = Value.sequence items in
206 Schema_facets.facets_valid facets value;
207 value
208 | Union members ->
209 let value = tries (List.map validate_simple_type members)
210 validation_error_exemplar
211 (string_utf8 s) in
212 Schema_facets.facets_valid facets value;
213 value)
214
215 (* wrapper for validate_simple_type which works on contexts *)
216 let validate_simple_type_wrapper context st_def =
217 validate_simple_type st_def (string_utf8 context#get_string)
218
219 (** {2 Complex type validation} *)
220
221 let rec validate_any_type (context: validation_context) =
222 (* assumption: attribute events (if any) come first *)
223 let attrs = ref [] in
224 let cont = ref [] in
225 let rec aux () =
226 match context#peek with
227 | E_start_tag (ns, name) ->
228 context#junk;
229 let (attrs, content) = validate_any_type context in
230 let element =
231 Value.Xml (Value.Atom (Atoms.V.mk ns name), attrs, content)
232 in
233 context#expect_end_tag ~ns name;
234 cont := element :: !cont;
235 aux ()
236 | E_end_tag _ -> (Value.vrecord !attrs, Value.sequence (List.rev !cont))
237 | E_attribute (qname, value) ->
238 context#junk;
239 attrs := (qname, Value.string_utf8 value) :: !attrs;
240 aux ()
241 | E_char_data utf8_data ->
242 context#junk;
243 cont := Value.string_utf8 utf8_data :: !cont;
244 aux ()
245 in
246 aux ()
247
248 let check_fixed ~context fixed value =
249 if not (Value.equal fixed value) then
250 validation_error ~context (sprintf "Expected fixed value: %s; found %s"
251 (string_of_value fixed) (string_of_value value))
252
253 let validate_attribute_uses context attr_uses =
254 let tbl = Hashtbl.create 11 in
255 List.iter
256 (fun use -> Hashtbl.add tbl (Ns.empty, name_of_attribute_use use) use)
257 attr_uses;
258 let attrs = ref [] in
259 let rec aux () = (* look for attribute events and fill "attrs" *)
260 match context#peek with
261 | E_attribute (qname, value) ->
262 let { attr_decl = { attr_typdef = st_def };
263 attr_use_cstr = constr } =
264 try
265 Hashtbl.find tbl qname
266 with Not_found ->
267 validation_error ~context (sprintf "Unexpected attribute: %s"
268 (Ns.QName.to_string qname))
269 in
270 let value = validate_simple_type st_def (Value.string_utf8 value) in
271 (match constr with (* check fixed constraint *)
272 | Some (`Fixed v) -> check_fixed ~context v value
273 | _ -> ());
274 attrs := (qname, value) :: !attrs;
275 Hashtbl.remove tbl qname;
276 context#junk;
277 aux ()
278 | _ -> ()
279 in
280 aux ();
281 Hashtbl.iter
282 (fun qname at ->
283 if at.attr_required then (* check for missing required attributes *)
284 validation_error ~context (sprintf "Required attribute %s is missing"
285 (Ns.QName.to_string qname))
286 else (* add default values *)
287 match at.attr_use_cstr with
288 | Some (`Default v) -> attrs := (qname, v) :: !attrs
289 | _ -> ())
290 tbl;
291 Value.vrecord !attrs
292
293 let rec validate_element (context: validation_context) elt =
294 context#expect_start_tag elt.elt_name;
295 let (attrs, content) = validate_type context elt.elt_typdef in
296 let content = (* use default if needed and check fixed constraints *)
297 match elt.elt_cstr with
298 | Some (`Default v) when Value.equal content empty_string -> v
299 | Some (`Fixed v) ->
300 check_fixed ~context v content;
301 content
302 | _ -> content
303 in
304 let element =
305 Value.Xml (Value.Atom (Atoms.V.mk context#ns elt.elt_name), attrs, content)
306 in
307 context#expect_end_tag elt.elt_name;
308 element
309
310 and validate_type context = function
311 | AnyType -> validate_any_type context
312 | Simple st_def -> (empty_record, validate_simple_type_wrapper context st_def)
313 | Complex ct_def -> validate_complex_type context ct_def
314
315 (** @return Value.t * Value.t (* attrs, content *) *)
316 and validate_complex_type context ct =
317 let attrs = validate_attribute_uses context ct.ct_attrs in
318 let content = Value.sequence (validate_content_type context ct.ct_content) in
319 (attrs, content)
320
321 (** @return Value.t list *)
322 and validate_content_type context content_type =
323 match content_type with
324 | CT_empty -> []
325 | CT_simple st_def -> [ validate_simple_type_wrapper context st_def ]
326 | CT_model (particle, mixed) ->
327 context#set_mixed mixed;
328 validate_particle context particle
329
330 (** @return Value.t list *)
331 and validate_particle context particle =
332 let (min, max, term, first) = particle in
333 let content = ref [] in
334 let push v = content := v :: !content in
335 let rec validate_once ~cont_ok ~cont_failure =
336 match context#peek with
337 | E_start_tag (ns, tag) as event when Ns.equal ns context#ns ->
338 if is_in_first tag first then begin
339 List.iter push (validate_term context term);
340 cont_ok ()
341 end else
342 cont_failure event
343 | E_char_data utf8_data when context#mixed ->
344 push (Value.string_utf8 utf8_data);
345 context#junk;
346 validate_once ~cont_ok ~cont_failure
347 | ev -> cont_failure ev
348 in
349 let rec required = function
350 | v when Intervals.V.equal v Intervals.V.zero -> ()
351 | n (* when n > 0 *) ->
352 validate_once
353 ~cont_ok:(fun () -> required (Intervals.V.pred n))
354 ~cont_failure:(fun event ->
355 validation_error ~context (sprintf "Unexpected content: %s"
356 (string_of_event event)))
357 in
358 let rec optional = function
359 | None ->
360 validate_once
361 ~cont_ok:(fun () -> optional None)
362 ~cont_failure:(fun _ -> ())
363 | Some v when Intervals.V.equal v Intervals.V.zero -> ()
364 | Some n (* when n > 0 *) ->
365 validate_once
366 ~cont_ok:(fun () -> optional (Some (Intervals.V.pred n)))
367 ~cont_failure:(fun _ -> ())
368 in
369 let rec trailing_cdata () =
370 match context#peek with
371 | E_char_data utf8_data ->
372 push (Value.string_utf8 utf8_data);
373 context#junk;
374 trailing_cdata ()
375 | _ -> ()
376 in
377 required min;
378 optional
379 (match max with None -> None | Some v -> Some (Intervals.V.sub v min));
380 if context#mixed then trailing_cdata ();
381 List.rev !content
382
383 (** @return Value.t list *)
384 and validate_term context term =
385 match term with
386 | Elt elt_decl_ref -> [ validate_element context !elt_decl_ref ]
387 | Model model_group -> validate_model_group context model_group
388
389 (** @return (Value.t list * Utf8.t)
390 * 2nd value is the key for tbl that return the particle effectively used for
391 * validation *)
392 and validate_choice context tbl =
393 let backlog = ref [] in
394 let push v = backlog := v :: !backlog in
395 let rec next_tag () =
396 match context#peek with
397 | E_char_data utf8_data when context#mixed ->
398 push (Value.string_utf8 utf8_data);
399 context#junk;
400 next_tag ()
401 | E_char_data utf8_data (* when not context#mixed *) ->
402 validation_error ~context
403 (sprintf "Unexpected char data in non-mixed content: %s"
404 (Utf8.get_str utf8_data))
405 | E_start_tag qname -> qname
406 | ev ->
407 validation_error ~context
408 (sprintf "Unexpected content: %s" (string_of_event ev))
409 in
410 let (ns, tag) = next_tag () in
411 if Ns.equal ns context#ns then
412 try
413 let particle = Hashtbl.find tbl tag in
414 ((List.rev !backlog) @ (validate_particle context particle), tag)
415 with Not_found ->
416 validation_error ~context (sprintf "Unexpected element %s"
417 (Ns.QName.to_string (ns, tag)))
418 else (* wrong namespace *)
419 validation_error ~context
420 (sprintf "Element from unexpected namespace: %s"
421 (Ns.QName.to_string (ns, tag)))
422
423 (** @return Value.t list *)
424 and validate_model_group context model_group =
425 match model_group with
426 | All particles ->
427 let tbl = ptbl_of_particles particles in
428 let contents = ref [] in
429 let rec aux () =
430 if hashtbl_is_empty tbl then
431 List.concat (List.rev !contents)
432 else begin
433 let (content, key) = validate_choice context tbl in
434 contents := content :: !contents;
435 Hashtbl.remove tbl key;
436 aux ()
437 end
438 in
439 aux ()
440 | Choice particles ->
441 fst (validate_choice context (ptbl_of_particles particles))
442 | Sequence particles ->
443 List.concat (List.map (validate_particle context) particles)
444
445 (** {2 Context implementation} *)
446
447 class context ~stream ~schema =
448 object (self)
449 val mutable mixed = false
450
451 method mixed = mixed
452 method set_mixed v = mixed <- v
453
454 method private next =
455 try
456 Stream.next stream
457 with Stream.Failure ->
458 self#error "Unexpected end of stream";
459 (* just to cheat with the type checker, above function wont return *)
460 Stream.next stream
461 method peek =
462 match Stream.peek stream with
463 | None ->
464 self#error "Unexpected end of stream";
465 (* just to cheat with the type checker as above *)
466 Stream.next stream
467 | Some e -> e
468 method junk = Stream.junk stream
469 method get_string =
470 let buf = Buffer.create 1024 in
471 let rec aux () =
472 match self#peek with
473 | E_char_data data ->
474 Buffer.add_string buf (Utf8.get_str data);
475 self#junk;
476 aux ()
477 | _ -> Utf8.mk (Buffer.contents buf)
478 in
479 aux ()
480
481 method private error s = ignore (validation_error ~context:self s)
482
483 method expect_start_tag ?ns name =
484 let ns = match ns with Some ns -> ns | _ -> schema.targetNamespace in
485 let expected = (ns, name) in
486 match self#next with
487 | E_start_tag found ->
488 if not (Ns.QName.equal expected found) then
489 self#error (sprintf "Start tag error: expected %s, found %s"
490 (Ns.QName.to_string expected) (Ns.QName.to_string found))
491 | ev ->
492 self#error (sprintf "Expected start tag (%s), found %s"
493 (Ns.QName.to_string expected) (string_of_event ev))
494 method expect_end_tag ?ns name =
495 let ns = match ns with Some ns -> ns | _ -> schema.targetNamespace in
496 let expected = (ns, name) in
497 match self#next with
498 | E_end_tag found ->
499 if not (Ns.QName.equal expected found) then
500 self#error (sprintf "Start tag error: expected %s, found %s"
501 (Ns.QName.to_string expected) (Ns.QName.to_string found))
502 | ev ->
503 self#error (sprintf "Expected end tag (%s), found %s"
504 (Ns.QName.to_string expected) (string_of_event ev))
505 method expect_any_start_tag =
506 match self#next with
507 | E_start_tag tag -> tag
508 | ev ->
509 self#error (sprintf "Expected start tag, found %s"
510 (string_of_event ev));
511 foo_qname (* useless *)
512 method expect_any_end_tag =
513 match self#next with
514 | E_end_tag tag -> tag
515 | ev ->
516 self#error (sprintf "Expected end tag, found %s"
517 (string_of_event ev));
518 foo_qname (* useless *)
519
520 method ns = schema.targetNamespace
521
522 end
523
524 (** {2 API} *)
525
526 let validate_element decl schema value =
527 validate_element (new context ~stream:(stream_of_value value) ~schema) decl
528
529 let validate_type def schema value =
530 match def with
531 | AnyType -> value (* shortcut *)
532 | Simple st_def ->
533 if not (is_str value) then
534 validation_error
535 "Only string values could be validate against simple types";
536 validate_simple_type st_def value (* shortcut *)
537 | Complex ct_def ->
538 let context = new context ~stream:(stream_of_value value) ~schema in
539 let start_tag = context#expect_any_start_tag in
540 let (attrs, content) = validate_complex_type context ct_def in
541 let end_tag = context#expect_any_end_tag in
542 assert (start_tag = end_tag);
543 let (ns, name) = start_tag in
544 Value.Xml (Value.Atom (Atoms.V.mk ns name), attrs, content)
545
546 let validate_attribute decl schema value =
547 assert false; (* TODO see the .mli *)
548 (match value with
549 | Record _ -> ()
550 | _ ->
551 validation_error
552 "Only record values could be validated against attributes");
553 let (name, st_def, constr) = decl in
554 let qname = (schema.targetNamespace, name) in
555 let fields = Value.get_fields value in
556 let found = ref false in
557 let rec aux = function
558 | [] -> []
559 | (qname', value) :: rest when qname' = qname ->
560 found := true;
561 (qname', validate_simple_type st_def value) :: aux rest
562 | field :: rest -> field :: aux rest
563
564 in
565 let fields = aux (Value.get_fields value) in
566 let fields =
567 if not !found then
568 match constr with
569 | Some (`Default v) -> (qname, v) :: fields
570 | _ ->
571 validation_error (sprintf
572 "Attribute %s was not found and no default value was provided"
573 (Ns.QName.to_string qname))
574 else
575 fields
576 in
577 Value.vrecord fields
578
579 let validate_attribute_group { ag_def = attr_uses } schema value =
580 let stream =
581 match value with
582 | Record _ ->
583 Stream.of_list
584 ((List.map
585 (fun (qname, v) ->
586 E_attribute (qname, fst (Value.get_string_utf8 v)))
587 (Value.get_fields value)) @
588 [ foo_event ])
589 | _ ->
590 validation_error
591 "Only record values could be validated against attribute groups"
592 in
593 validate_attribute_uses (new context ~stream ~schema) attr_uses
594
595 let validate_model_group { mg_def = mg } schema value =
596 if not (Value.is_seq value) then
597 validation_error
598 "Only sequence values could be validated against model groups";
599 let stream = stream_of_value (Value.Xml (foo_atom, empty_record, value)) in
600 Stream.junk stream;
601 Value.sequence (validate_model_group (new context ~stream ~schema) mg)
602

CVS Admin">CVS Admin
ViewVC Help
Powered by ViewVC 1.1.5