/[svn]/schema/schema_validator.ml
ViewVC logotype

Contents of /schema/schema_validator.ml

Parent Directory Parent Directory | Revision Log Revision Log


Revision 846 - (hide annotations)
Tue Jul 10 18:06:50 2007 UTC (5 years, 10 months ago) by abate
File size: 20321 byte(s)
[r2003-11-26 16:23:06 by szach] first real life implementation of schema validations

Original author: szach
Date: 2003-11-26 16:23:06+00:00
1 abate 500
2 abate 507 let debug = false
3 abate 500
4 abate 507 open Printf
5 abate 784
6 abate 812 open Encodings
7     open Encodings.Utf8.Pcre
8 abate 759 open Schema_common
9 abate 507 open Schema_types
10 abate 784 open Value
11 abate 500
12 abate 812 (** {2 Misc} *)
13 abate 500
14 abate 846 let empty_string = Value.string_utf8 (Utf8.mk "")
15    
16     let hashtbl_is_empty tbl =
17     let empty = ref true in
18     (try
19     Hashtbl.iter (fun _ _ -> empty := false; raise Exit) tbl
20     with Exit -> ());
21     !empty
22    
23     let string_of_value value =
24     let buf = Buffer.create 1024 in
25     let fmt = Format.formatter_of_buffer buf in
26     Value.print fmt value;
27     Buffer.contents buf
28    
29     let foo_qname = Ns.empty, Utf8.mk ""
30    
31     let ptbl_of_particles particles =
32     let tbl = Hashtbl.create 20 in
33     List.iter (* fill table *)
34     (* ASSUMPTION: firsts are disjoing as per UPA Schema constraint *)
35     (fun p ->
36     List.iter
37     (function None -> () | Some tag -> Hashtbl.add tbl tag p)
38     (first_of_particle p))
39     particles;
40     tbl
41    
42     (** Validation context *)
43     class type validation_context =
44     object
45     (* if ns isn't given, targetNamespace of the schema is used *)
46     method expect_start_tag: ?ns:Ns.t -> Utf8.t -> unit
47     method expect_end_tag: ?ns:Ns.t -> Utf8.t -> unit
48     method expect_any_start_tag: Ns.qname
49     method expect_any_end_tag: Ns.qname
50     method get_string: Utf8.t
51     method junk: unit
52     method peek: event
53    
54     method set_mixed: bool -> unit
55     method mixed: bool
56    
57     method ns: Ns.t
58     end
59    
60 abate 812 let validation_error ?context s = raise (XSI_validation_error s)
61     let validation_error_exemplar = XSI_validation_error ""
62    
63 abate 759 let compare_exn e1 e2 =
64 abate 784 (* comparison function on exceptions; include all validation error
65     * exceptions in an equivalence class *)
66 abate 759 match e1, e2 with
67     | XSI_validation_error _, XSI_validation_error _ -> 0
68     | e1, e2 -> Pervasives.compare e1 e2
69 abate 510
70 abate 812 let rec tries funs exn arg =
71     match funs with
72     | [] -> raise Not_found
73     | f :: tl ->
74     try
75     f arg
76     with e when compare_exn e exn = 0 ->
77     tries tl exn arg
78    
79     let space_RE = pcre_regexp " "
80     let split = pcre_split ~rex:space_RE
81    
82     (** {2 Facets validation} *)
83    
84 abate 784 module Schema_facets:
85     sig
86     exception Facet_error of string
87     val facets_valid: facets -> Value.t -> unit
88     end
89     =
90     struct
91 abate 500
92 abate 759 open Big_int
93     open Value
94 abate 516
95 abate 759 exception Facet_error of string
96 abate 516
97 abate 759 (* compute the length of a particular CDuce *)
98     (* STRONG ASSUMPTION: v is a CDuce value built via "validate_simple_type"
99     * function below, thus it contains no sequence of characters, but strings
100     * and no Concat, but just Pair *)
101     let length v =
102     let rec aux acc = function
103     | Pair (_, rest) -> aux (Intervals.V.succ acc) rest
104     | v when v = Value.nil -> Intervals.V.zero
105     | _ -> assert false
106     in
107     aux Intervals.V.zero v
108 abate 500
109 abate 759 let length_valid len value =
110     if not (Intervals.V.equal (length value) len) then
111     raise (Facet_error "length")
112     let minLength_valid min_len value =
113     if Intervals.V.lt (length value) min_len then
114     raise (Facet_error "minLength")
115     let maxLength_valid max_len value =
116     if Intervals.V.gt (length value) max_len then
117     raise (Facet_error "maxLength")
118 abate 500
119 abate 759 let enumeration_valid enum value =
120     if not (ValueSet.mem value enum) then raise (Facet_error "enumeration")
121 abate 500
122 abate 759 let maxInclusive_valid max_inc value =
123     if value |>| max_inc then raise (Facet_error "maxInclusive")
124     let maxExclusive_valid max_exc value =
125     if value |>=| max_exc then raise (Facet_error "maxExclusive")
126     let minInclusive_valid min_inc value =
127     if value |<| min_inc then raise (Facet_error "minInclusive")
128     let minExclusive_valid min_exc value =
129     if value |<=| min_exc then raise (Facet_error "minInclusive")
130 abate 500
131 abate 759 (* check facets validaty rules other than pattern and whiteSpace. "value"
132     parameter should already be white space normalized and pattern valid.
133     Assumption: facets set contains only facets that are applicable to the type
134     of value *)
135     let facets_valid facets value =
136     (* TODO efficiency *)
137     (* all facets are always checked, but we know that in some cases only some
138     * of them can be present; e.g. if variety is union only pattern and
139     * enumeration are possibles ... *)
140     (match facets.length with
141     | None ->
142     (match facets.minLength with
143     | None -> ()
144     | Some (len, _) -> minLength_valid len value);
145     (match facets.maxLength with
146     | None -> ()
147     | Some (len, _) -> maxLength_valid len value);
148     | Some (len, _) -> length_valid len value);
149     (match facets.enumeration with
150     | None -> ()
151     | Some enum -> enumeration_valid enum value);
152     (match facets.maxInclusive with
153     | None -> ()
154     | Some (lim, _) -> maxInclusive_valid lim value);
155     (match facets.maxExclusive with
156     | None -> ()
157     | Some (lim, _) -> maxExclusive_valid lim value);
158     (match facets.minInclusive with
159     | None -> ()
160     | Some (lim, _) -> minInclusive_valid lim value);
161     (match facets.minExclusive with
162     | None -> ()
163     | Some (lim, _) -> minExclusive_valid lim value);
164     (*
165     (match facets.totalDigits with
166     | None -> ()
167     | Some (dig, _) -> totalDigits_valid dig value);
168     (match facets.fractionDigits with
169     | None -> ()
170     | Some (dig, _) -> fractionDigits_valid dig value)
171     *)
172 abate 500
173 abate 759 end
174 abate 500
175 abate 812 (** {2 Simple type validation} *)
176 abate 500
177 abate 812 let rec validate_simple_type def v =
178 abate 784 let s =
179     match get_string_utf8 v with
180 abate 812 | utf8_string, rest when rest |=| nil -> utf8_string
181     | _ -> validation_error "string expected"
182 abate 784 in
183 abate 812 match def with
184 abate 759 | Primitive name | Derived (Some name, _, _, _)
185 abate 784 when Schema_builtin.is_builtin name ->
186     (try
187     Schema_builtin.validate_builtin name s
188     with Schema_builtin.Schema_builtin_error name ->
189 abate 812 validation_error (sprintf "%s isn't a valid %s"
190     (Utf8.to_string s) (Utf8.to_string name)))
191 abate 759 | Primitive _ -> assert false
192     | Derived (_, variety, facets, _) ->
193     (match variety with
194     | Atomic primitive ->
195 abate 812 let validate_base = validate_simple_type primitive in
196 abate 784 let literal = normalize_white_space (fst facets.whiteSpace) s in
197 abate 759 (* pattern_valid facets.pattern literal; *)
198 abate 812 let value = validate_base (Value.string_utf8 literal) in
199 abate 784 Schema_facets.facets_valid facets value;
200     value
201 abate 759 | List item ->
202 abate 812 let validate_base = validate_simple_type item in
203 abate 784 let literal = normalize_white_space (fst facets.whiteSpace) s in
204 abate 759 (* pattern_valid facets.pattern literal; *)
205 abate 784 let items =
206     List.map validate_base
207 abate 812 (List.map Value.string_utf8 (split literal))
208 abate 784 in
209     let value = Value.sequence items in
210     Schema_facets.facets_valid facets value;
211     value
212 abate 759 | Union members ->
213     let validate_members =
214 abate 812 tries (List.map validate_simple_type members)
215     validation_error_exemplar
216 abate 759 in
217 abate 812 let value = validate_members (Value.string_utf8 s) in
218 abate 784 Schema_facets.facets_valid facets value;
219     value)
220 abate 500
221 abate 846 (* wrapper for validate_simple_type which works on contexts *)
222     let validate_simple_type_wrapper context st_def =
223     validate_simple_type st_def (Value.string_utf8 context#get_string)
224    
225     (** {2 Complex type validation} *)
226    
227     let rec validate_any_type (context: validation_context) =
228     (* assumption: attribute events (if any) come first *)
229     let attrs = ref [] in
230     let cont = ref [] in
231     let rec aux () =
232     match context#peek with
233     | E_start_tag (ns, name) ->
234     context#junk;
235     let (attrs, content) = validate_any_type context in
236     let element =
237     Value.Xml (Value.Atom (Atoms.V.mk ns name), attrs, content)
238     in
239     context#expect_end_tag ~ns name;
240     cont := element :: !cont;
241     aux ()
242     | E_end_tag _ -> (Value.vrecord !attrs, Value.sequence (List.rev !cont))
243     | E_attribute (qname, value) ->
244     context#junk;
245     attrs := (qname, Value.string_utf8 value) :: !attrs;
246     aux ()
247     | E_char_data utf8_data ->
248     context#junk;
249     cont := Value.string_utf8 utf8_data :: !cont;
250     aux ()
251     in
252     aux ()
253    
254     let check_fixed ~context fixed value =
255     if not (Value.equal fixed value) then
256     validation_error ~context (sprintf "Expected fixed value: %s; found %s"
257     (string_of_value fixed) (string_of_value value))
258    
259     let validate_attribute_uses context attr_uses =
260     let tbl = Hashtbl.create 11 in
261     List.iter
262     (fun use -> Hashtbl.add tbl (Ns.empty, name_of_attribute_use use) use)
263     attr_uses;
264     let attrs = ref [] in
265     let rec aux () = (* look for attribute events and fill "attrs" *)
266     match context#peek with
267     | E_attribute (qname, value) ->
268     let (_, (_, st_def, _), constr) = (* attribute use *)
269     try
270     Hashtbl.find tbl qname
271     with Not_found ->
272     validation_error ~context (sprintf "Unexpected attribute: %s"
273     (Ns.QName.to_string qname))
274     in
275     let value = validate_simple_type st_def (Value.string_utf8 value) in
276     (match constr with (* check fixed constraint *)
277     | Some (`Fixed v) -> check_fixed ~context v value
278     | _ -> ());
279     attrs := (qname, value) :: !attrs;
280     Hashtbl.remove tbl qname;
281     context#junk;
282     aux ()
283     | _ -> ()
284     in
285     aux ();
286     Hashtbl.iter
287     (fun qname (required, _, constr) ->
288     if required then (* check for missing required attributes *)
289     validation_error ~context (sprintf "Required attribute %s is missing"
290     (Ns.QName.to_string qname))
291     else (* add default values *)
292     match constr with
293     | Some (`Default v) -> attrs := (qname, v) :: !attrs
294     | _ -> ())
295     tbl;
296     Value.vrecord !attrs
297    
298     let rec validate_element (context: validation_context) decl =
299     let (_, name, type_def_ref, constr) = decl in
300 abate 812 context#expect_start_tag name;
301 abate 846 let (attrs, content) = validate_type context !type_def_ref in
302     let content = (* use default if needed and check fixed constraints *)
303     match constr with
304     | Some (`Default v) when Value.equal content empty_string -> v
305     | Some (`Fixed v) ->
306     check_fixed ~context v content;
307     content
308     | _ -> content
309     in
310     let element =
311     Value.Xml (Value.Atom (Atoms.V.mk context#ns name), attrs, content)
312     in
313     context#expect_end_tag name;
314     element
315 abate 784
316 abate 846 and validate_type context = function
317     | AnyType -> validate_any_type (context :> validation_context)
318     | Simple st_def -> (Value.nil, validate_simple_type_wrapper context st_def)
319     | Complex ct_def ->
320     validate_complex_type (context :> validation_context) ct_def
321    
322     (** @return Value.t * Value.t (* attrs, content *) *)
323     and validate_complex_type context ct_def =
324     let (_, _, _, _, attr_uses, content_type) = ct_def in
325     let attrs = validate_attribute_uses context attr_uses in
326     let content = Value.sequence (validate_content_type context content_type) in
327     (attrs, content)
328    
329     (** @return Value.t list *)
330     and validate_content_type context content_type =
331     match content_type with
332     | CT_empty -> []
333     | CT_simple st_def -> [ validate_simple_type_wrapper context st_def ]
334     | CT_model (particle, mixed) ->
335     context#set_mixed mixed;
336     validate_particle context particle
337    
338     (** @return Value.t list *)
339     and validate_particle context particle =
340     let (min, max, term, first) = particle in
341     let content = ref [] in
342     let push v = content := v :: !content in
343     let rec validate_once ~cont_ok ~cont_failure =
344     match context#peek with
345     | E_start_tag (ns, tag) as event when Ns.equal ns context#ns ->
346     if is_in_first tag first then begin
347     List.iter push (validate_term context term);
348     cont_ok ()
349     end else
350     cont_failure event
351     | E_char_data utf8_data when context#mixed ->
352     push (Value.string_utf8 utf8_data);
353     context#junk;
354     validate_once ~cont_ok ~cont_failure
355     | ev -> cont_failure ev
356     in
357     let rec required = function
358     | v when Intervals.V.equal v Intervals.V.zero -> ()
359     | n (* when n > 0 *) ->
360     validate_once
361     ~cont_ok:(fun () -> required (Intervals.V.pred n))
362     ~cont_failure:(fun event ->
363     validation_error ~context (sprintf "Unexpected content: %s"
364     (string_of_event event)))
365     in
366     let rec optional = function
367     | None ->
368     validate_once
369     ~cont_ok:(fun () -> optional None)
370     ~cont_failure:(fun _ -> ())
371     | Some v when Intervals.V.equal v Intervals.V.zero -> ()
372     | Some n (* when n > 0 *) ->
373     validate_once
374     ~cont_ok:(fun () -> optional (Some (Intervals.V.pred n)))
375     ~cont_failure:(fun _ -> ())
376     in
377     let rec trailing_cdata () =
378     match context#peek with
379     | E_char_data utf8_data ->
380     push (Value.string_utf8 utf8_data);
381     context#junk;
382     trailing_cdata ()
383     | _ -> ()
384     in
385     required min;
386     optional
387     (match max with None -> None | Some v -> Some (Intervals.V.sub v min));
388     if context#mixed then trailing_cdata ();
389     List.rev !content
390    
391     (** @return Value.t list *)
392     and validate_term context term =
393     match term with
394     | Elt elt_decl_ref -> [ validate_element context !elt_decl_ref ]
395     | Model model_group -> validate_model_group context model_group
396    
397     (** @return (Value.t list * Utf8.t)
398     * 2nd value is the key for tbl that return the particle effectively used for
399     * validation *)
400     and validate_choice context tbl =
401     let backlog = ref [] in
402     let push v = backlog := v :: !backlog in
403     let rec next_tag () =
404     match context#peek with
405     | E_char_data utf8_data when context#mixed ->
406     push (Value.string_utf8 utf8_data);
407     context#junk;
408     next_tag ()
409     | E_char_data utf8_data (* when not context#mixed *) ->
410     validation_error ~context
411     (sprintf "Unexpected char data in non-mixed content: %s"
412     (Utf8.get_str utf8_data))
413     | E_start_tag qname -> qname
414     | ev ->
415     validation_error ~context
416     (sprintf "Unexpected content: %s" (string_of_event ev))
417     in
418     let (ns, tag) = next_tag () in
419     if Ns.equal ns context#ns then
420     try
421     let particle = Hashtbl.find tbl tag in
422     ((List.rev !backlog) @ (validate_particle context particle), tag)
423     with Not_found ->
424     validation_error ~context (sprintf "Unexpected element %s"
425     (Ns.QName.to_string (ns, tag)))
426     else (* wrong namespace *)
427     validation_error ~context
428     (sprintf "Element from unexpected namespace: %s"
429     (Ns.QName.to_string (ns, tag)))
430    
431     (** @return Value.t list *)
432     and validate_model_group context model_group =
433     match model_group with
434     | All particles ->
435     let tbl = ptbl_of_particles particles in
436     let contents = ref [] in
437     let rec aux () =
438     if hashtbl_is_empty tbl then
439     List.concat (List.rev !contents)
440     else begin
441     let (content, key) = validate_choice context tbl in
442     contents := content :: !contents;
443     Hashtbl.remove tbl key;
444     aux ()
445     end
446     in
447     aux ()
448     | Choice particles ->
449     fst (validate_choice context (ptbl_of_particles particles))
450     | Sequence particles ->
451     List.concat (List.map (validate_particle context) particles)
452    
453     (** {2 Context implementation} *)
454    
455     class context ~stream ~schema =
456 abate 812 object (self)
457 abate 846 val mutable mixed = false
458 abate 784
459 abate 846 method mixed = mixed
460     method set_mixed v = mixed <- v
461    
462 abate 812 method private next =
463     try
464     Stream.next stream
465     with Stream.Failure ->
466 abate 846 self#error "Unexpected end of stream";
467     (* just to cheat with the type checker, above function wont return *)
468     Stream.next stream
469     method peek =
470     match Stream.peek stream with
471     | None ->
472     self#error "Unexpected end of stream";
473     (* just to cheat with the type checker as above *)
474     Stream.next stream
475     | Some e -> e
476     method junk = Stream.junk stream
477     method get_string =
478     let buf = Buffer.create 1024 in
479     let rec aux () =
480     match self#peek with
481     | E_char_data data ->
482     Buffer.add_string buf (Utf8.get_str data);
483     self#junk;
484     aux ()
485     | _ -> Utf8.mk (Buffer.contents buf)
486     in
487     aux ()
488 abate 784
489 abate 846 method private error s = ignore (validation_error ~context:self s)
490    
491     method expect_start_tag ?ns name =
492     let ns = match ns with Some ns -> ns | _ -> schema.targetNamespace in
493     let expected = (ns, name) in
494 abate 812 match self#next with
495     | E_start_tag found ->
496     if not (Ns.QName.equal expected found) then
497 abate 846 self#error (sprintf "Start tag error: expected %s, found %s"
498     (Ns.QName.to_string expected) (Ns.QName.to_string found))
499 abate 812 | ev ->
500 abate 846 self#error (sprintf "Expected start tag (%s), found %s"
501     (Ns.QName.to_string expected) (string_of_event ev))
502     method expect_end_tag ?ns name =
503     let ns = match ns with Some ns -> ns | _ -> schema.targetNamespace in
504     let expected = (ns, name) in
505 abate 812 match self#next with
506     | E_end_tag found ->
507     if not (Ns.QName.equal expected found) then
508 abate 846 self#error (sprintf "Start tag error: expected %s, found %s"
509     (Ns.QName.to_string expected) (Ns.QName.to_string found))
510 abate 812 | ev ->
511 abate 846 self#error (sprintf "Expected end tag (%s), found %s"
512     (Ns.QName.to_string expected) (string_of_event ev))
513     method expect_any_start_tag =
514     match self#next with
515     | E_start_tag tag -> tag
516     | ev ->
517     self#error (sprintf "Expected start tag, found %s"
518     (string_of_event ev));
519     foo_qname (* useless *)
520     method expect_any_end_tag =
521     match self#next with
522     | E_end_tag tag -> tag
523     | ev ->
524     self#error (sprintf "Expected end tag, found %s"
525     (string_of_event ev));
526     foo_qname (* useless *)
527 abate 784
528 abate 846 method ns = schema.targetNamespace
529    
530 abate 812 end
531 abate 784
532 abate 812 (** {2 API} *)
533 abate 784
534 abate 812 let validate_element decl schema value =
535 abate 846 validate_element (new context ~stream:(stream_of_value value) ~schema) decl
536 abate 500
537 abate 846 let validate_type def schema value =
538     match def with
539     | AnyType -> value (* shortcut *)
540     | Simple st_def ->
541     if not (is_str value) then
542     validation_error
543     "Only string values could be validate against simple types";
544     validate_simple_type st_def value (* shortcut *)
545     | Complex ct_def ->
546     let context = new context ~stream:(stream_of_value value) ~schema in
547     let start_tag = context#expect_any_start_tag in
548     let (attrs, content) = validate_complex_type context ct_def in
549     let end_tag = context#expect_any_end_tag in
550     assert (start_tag = end_tag);
551     let (ns, name) = start_tag in
552     Value.Xml (Value.Atom (Atoms.V.mk ns name), attrs, content)
553 abate 784
554 abate 846 let validate_attribute decl schema value =
555     (match value with
556     | Record _ -> ()
557     | _ ->
558     validation_error
559     "Only record values could be validated against attributes");
560     let (name, st_def, constr) = decl in
561     let qname = (schema.targetNamespace, name) in
562     let fields = Value.get_fields value in
563     let found = ref false in
564     let rec aux = function
565     | [] -> []
566     | (qname', value) :: rest when qname' = qname ->
567     (qname', validate_simple_type st_def value) :: aux rest
568     | field :: rest -> field :: aux rest
569    
570     in
571     let fields = aux (Value.get_fields value) in
572     let fields =
573     if not !found then
574     match constr with
575     | Some (`Default v) -> (qname, v) :: fields
576     | _ ->
577     validation_error (sprintf
578     "Attribute %s was not found and no default value was provided"
579     (Ns.QName.to_string qname))
580     else
581     fields
582     in
583     Value.vrecord fields
584    
585     let validate_attribute_group def schema value =
586     let (_, attr_uses) = def in
587     let stream =
588     match value with
589     | Record _ ->
590     Stream.of_list
591     (List.map
592     (fun (qname, v) ->
593     E_attribute (qname, fst (Value.get_string_utf8 v)))
594     (Value.get_fields value))
595     | _ ->
596     validation_error
597     "Only record values could be validated against attribute groups"
598     in
599     validate_attribute_uses (new context ~stream ~schema) attr_uses
600    
601     let validate_model_group def schema value =
602     if not (Value.is_seq value) then
603     validation_error
604     "Only sequence values could be validated against model groups";
605     let stream =
606     stream_of_value (Value.Xml (Value.Absent, Value.Absent, value))
607     in
608     Stream.junk stream;
609     Value.sequence (validate_model_group (new context ~stream ~schema) (snd def))
610    

CVS Admin">CVS Admin
ViewVC Help
Powered by ViewVC 1.1.5