| 1 |
abate |
500 |
|
| 2 |
abate |
507 |
let debug = false
|
| 3 |
abate |
500 |
|
| 4 |
abate |
507 |
open Printf
|
| 5 |
|
|
open Pxp_yacc
|
| 6 |
|
|
open Schema_types
|
| 7 |
abate |
500 |
|
| 8 |
|
|
exception Stop ;; (* internal *)
|
| 9 |
|
|
|
| 10 |
abate |
507 |
type validator = (Pxp_yacc.event Stream.t -> Value.t) * First.t
|
| 11 |
abate |
500 |
|
| 12 |
abate |
507 |
let validate ~validator:(validate_fun, _) = validate_fun
|
| 13 |
abate |
500 |
|
| 14 |
|
|
(* wrap a function validating a string with a validator *)
|
| 15 |
abate |
507 |
let pcdata_wrapper f = (fun stream -> f (Schema_xml.collect_pcdata stream))
|
| 16 |
abate |
500 |
|
| 17 |
|
|
let string_of_expect_token = function
|
| 18 |
|
|
| `E_start_tag tag -> "<" ^ tag ^ ">"
|
| 19 |
|
|
| `E_end_tag tag -> "</" ^ tag ^ ">"
|
| 20 |
|
|
|
| 21 |
|
|
let string_of_pxp_event = function
|
| 22 |
|
|
| E_start_doc (version, standalone, dtd) -> "E_start_doc"
|
| 23 |
|
|
| E_end_doc -> "E_end_doc"
|
| 24 |
|
|
| E_start_tag (name, attlist, entity_id) -> sprintf "E_start_tag (%s)" name
|
| 25 |
|
|
| E_end_tag (name, entity_id) -> sprintf "E_end_tag (%s)" name
|
| 26 |
|
|
| E_char_data data ->
|
| 27 |
|
|
sprintf "E_char_data (%s)" (Pcre.replace ~pat:"\n" ~templ:"\\n" data)
|
| 28 |
|
|
| E_pinstr (target, value) -> "E_pinstr"
|
| 29 |
|
|
| E_comment data -> "E_comment"
|
| 30 |
|
|
| E_position (entity, line, col) -> "E_position"
|
| 31 |
|
|
| E_error exn -> sprintf "E_error (%s)" (Pxp_types.string_of_exn exn)
|
| 32 |
|
|
| E_end_of_stream -> "E_end_of_stream"
|
| 33 |
|
|
|
| 34 |
abate |
507 |
exception Not_a_start_tag of Pxp_yacc.event
|
| 35 |
abate |
500 |
|
| 36 |
|
|
(* used, along with "first", to discriminate between choices *)
|
| 37 |
|
|
let rec peek_start_tag ?(ignore_ws = true) stream =
|
| 38 |
|
|
match Schema_xml.peek stream with
|
| 39 |
|
|
| E_char_data s when (Pxp_lib.only_whitespace s) && ignore_ws ->
|
| 40 |
|
|
Stream.junk stream;
|
| 41 |
|
|
peek_start_tag stream
|
| 42 |
|
|
| E_pinstr (_, _) | E_comment _ | E_position _ | E_start_doc (_, _, _)
|
| 43 |
|
|
| E_end_doc | E_error _ | E_end_of_stream ->
|
| 44 |
|
|
Stream.junk stream;
|
| 45 |
|
|
peek_start_tag stream
|
| 46 |
|
|
| E_start_tag (name, _, _) -> name
|
| 47 |
|
|
| e -> raise (Not_a_start_tag e)
|
| 48 |
|
|
|
| 49 |
|
|
(* INVARIANT: if expect fails, the stream is left unchanged. Up to white space
|
| 50 |
|
|
only CDATA removals if ignore_ws is true. *)
|
| 51 |
|
|
let expect ?(ignore_ws = true) expected stream =
|
| 52 |
|
|
if debug then
|
| 53 |
|
|
print_endline ("Expecting: " ^ string_of_expect_token expected);
|
| 54 |
|
|
let rec junk () =
|
| 55 |
|
|
match Schema_xml.peek stream with
|
| 56 |
|
|
| E_char_data s when (Pxp_lib.only_whitespace s) && ignore_ws ->
|
| 57 |
|
|
Stream.junk stream;
|
| 58 |
|
|
junk ()
|
| 59 |
|
|
| E_pinstr (_, _) | E_comment _ | E_position _ | E_start_doc (_, _, _)
|
| 60 |
|
|
| E_end_doc | E_error _ | E_end_of_stream ->
|
| 61 |
|
|
Stream.junk stream;
|
| 62 |
|
|
junk ()
|
| 63 |
|
|
| _ -> ()
|
| 64 |
|
|
in
|
| 65 |
|
|
junk ();
|
| 66 |
|
|
match (expected, Schema_xml.peek stream) with
|
| 67 |
|
|
| `E_start_tag tag, ((E_start_tag (t, _, _)) as found) when tag = t ->
|
| 68 |
|
|
Stream.junk stream;
|
| 69 |
|
|
found
|
| 70 |
|
|
| `E_end_tag tag, ((E_end_tag (t, _)) as found) when tag = t ->
|
| 71 |
|
|
Stream.junk stream;
|
| 72 |
|
|
found
|
| 73 |
|
|
| expected, found ->
|
| 74 |
|
|
raise (XSI_validation_error (sprintf "expect failure: expected %s, found \
|
| 75 |
|
|
%s" (string_of_expect_token expected) (string_of_pxp_event found)))
|
| 76 |
|
|
|
| 77 |
|
|
(* TODO not tail recursive .... but Value.sequence isn't tail recursive too
|
| 78 |
|
|
:-((( AND TODO terribly slow !!! *)
|
| 79 |
|
|
let flatten_cont =
|
| 80 |
|
|
let rec dfv = function (* depth first visit *)
|
| 81 |
|
|
| Value.Pair (x, y) -> (dfv x) @ (dfv y)
|
| 82 |
|
|
| v when Value.compare v Value.nil = 0 -> []
|
| 83 |
|
|
| v -> [v]
|
| 84 |
|
|
in
|
| 85 |
|
|
fun l -> Value.sequence (dfv l)
|
| 86 |
|
|
|
| 87 |
|
|
let (>>=) n m =
|
| 88 |
|
|
match (n, m) with Some n, m -> n >= m | None, _ -> true
|
| 89 |
|
|
|
| 90 |
|
|
|
| 91 |
|
|
let validator_of_simple_type = function
|
| 92 |
|
|
| SBuilt_in s ->
|
| 93 |
|
|
(pcdata_wrapper (Schema_builtin.__validate_fun_of_builtin s),
|
| 94 |
|
|
First.empty)
|
| 95 |
|
|
| SUser_defined (_, _, _, _) -> assert false
|
| 96 |
|
|
|
| 97 |
abate |
507 |
let validate_simple_type (simple_type_def: simple_type_def) value =
|
| 98 |
abate |
500 |
validate ~validator:(validator_of_simple_type simple_type_def)
|
| 99 |
|
|
([< 'Pxp_yacc.E_char_data value; 'Pxp_yacc.E_end_of_stream >])
|
| 100 |
|
|
|
| 101 |
abate |
507 |
let validate_attrs_of_uses (attr_uses: attribute_use list) attrs =
|
| 102 |
abate |
500 |
let attrs = (* (string * string) list -> (string StringMap.t) ref *)
|
| 103 |
|
|
ref (List.fold_left (fun map (k,v) -> StringMap.add k v map)
|
| 104 |
|
|
StringMap.empty attrs)
|
| 105 |
|
|
in
|
| 106 |
|
|
let record = Value.vrecord (List.fold_left
|
| 107 |
|
|
(fun fields (required, (name, st, _), constr) ->
|
| 108 |
|
|
try
|
| 109 |
|
|
let value =
|
| 110 |
|
|
try
|
| 111 |
|
|
let value_raw = StringMap.find name !attrs in
|
| 112 |
|
|
attrs := StringMap.remove name !attrs;
|
| 113 |
abate |
507 |
let value = validate_simple_type st value_raw in
|
| 114 |
abate |
500 |
(match constr with
|
| 115 |
|
|
| None | Some (Default _) -> value
|
| 116 |
|
|
| Some (Fixed v) when (Value.compare v value = 0)-> value
|
| 117 |
|
|
| Some (Fixed _) ->
|
| 118 |
|
|
raise (XSI_validation_error (sprintf "Value %s isn't \
|
| 119 |
|
|
compatible with 'fixed' constraint" value_raw)))
|
| 120 |
|
|
with Not_found ->
|
| 121 |
|
|
if required then
|
| 122 |
|
|
raise (XSI_validation_error
|
| 123 |
|
|
(sprintf "Required attribute '%s' is missing" name))
|
| 124 |
|
|
else (* optional *)
|
| 125 |
|
|
(match constr with
|
| 126 |
|
|
| None -> raise Stop
|
| 127 |
|
|
| Some (Fixed v) | Some (Default v) -> v)
|
| 128 |
|
|
in
|
| 129 |
|
|
(name, value) :: fields
|
| 130 |
|
|
with Stop -> fields)
|
| 131 |
|
|
[] attr_uses)
|
| 132 |
|
|
in
|
| 133 |
|
|
(* remaining attributes in "attrs" are undeclared or prohibited *)
|
| 134 |
|
|
match StringMap.fold (fun n _ acc -> n::acc) !attrs [] with
|
| 135 |
|
|
| [] -> record
|
| 136 |
|
|
| l ->
|
| 137 |
|
|
raise (XSI_validation_error ("The following attributes are undeclared or \
|
| 138 |
|
|
prohibited: " ^ String.concat ", " l))
|
| 139 |
|
|
|
| 140 |
|
|
let string_of_first ?(show_epsilon = false) first =
|
| 141 |
|
|
let elts =
|
| 142 |
|
|
First.fold
|
| 143 |
|
|
(fun elt acc ->
|
| 144 |
|
|
match elt with
|
| 145 |
|
|
| None when show_epsilon -> " EPSILON " :: acc
|
| 146 |
|
|
| None -> acc
|
| 147 |
|
|
| Some e -> e :: acc)
|
| 148 |
|
|
first []
|
| 149 |
|
|
in
|
| 150 |
|
|
String.concat ", " elts
|
| 151 |
|
|
|
| 152 |
abate |
507 |
let rec validator_of_particle (min, max, (term: term)) =
|
| 153 |
abate |
500 |
assert (not ((min = 0) && (max = Some 0))); (* TODO empty CM *)
|
| 154 |
|
|
assert (min >= 0);
|
| 155 |
|
|
assert (match max with Some n -> (n >= 0) | _ -> true);
|
| 156 |
|
|
assert (max >>= min);
|
| 157 |
|
|
let validator = validator_of_term term in
|
| 158 |
|
|
let term_first = snd validator in
|
| 159 |
|
|
let first =
|
| 160 |
|
|
let old_first = snd validator in
|
| 161 |
|
|
if min = 0 then First.add None old_first else old_first
|
| 162 |
|
|
in
|
| 163 |
|
|
match (min, max) with
|
| 164 |
|
|
| (min, Some max) ->
|
| 165 |
|
|
(fun stream ->
|
| 166 |
|
|
let content = ref [] in
|
| 167 |
|
|
for i = 1 to min do
|
| 168 |
|
|
content := validate ~validator stream :: !content
|
| 169 |
|
|
done;
|
| 170 |
|
|
(try
|
| 171 |
|
|
for i = 1 to max - min do
|
| 172 |
|
|
let next = peek_start_tag stream in
|
| 173 |
|
|
if not (First.mem (Some next) term_first) then
|
| 174 |
|
|
raise Stop
|
| 175 |
|
|
else
|
| 176 |
|
|
content := validate ~validator stream :: !content
|
| 177 |
|
|
done
|
| 178 |
|
|
with Stop | Not_a_start_tag _ -> ());
|
| 179 |
|
|
Value.sequence (List.rev !content)),
|
| 180 |
|
|
first
|
| 181 |
|
|
| (min, None) ->
|
| 182 |
|
|
(fun stream ->
|
| 183 |
|
|
let content = ref [] in
|
| 184 |
|
|
for i = 1 to min do
|
| 185 |
|
|
content := validate ~validator stream :: !content
|
| 186 |
|
|
done;
|
| 187 |
|
|
(try
|
| 188 |
|
|
while true do
|
| 189 |
|
|
let next = peek_start_tag stream in
|
| 190 |
|
|
if not (First.mem (Some next) term_first) then
|
| 191 |
|
|
raise Stop
|
| 192 |
|
|
else
|
| 193 |
|
|
content := validate ~validator stream :: !content
|
| 194 |
|
|
done
|
| 195 |
|
|
with Stop | Not_a_start_tag _ -> ());
|
| 196 |
|
|
Value.sequence (List.rev !content)),
|
| 197 |
|
|
first
|
| 198 |
|
|
|
| 199 |
|
|
and validator_of_term = function
|
| 200 |
|
|
| All [] | Choice [] | Sequence [] -> assert false (* TODO empty CM *)
|
| 201 |
|
|
| All _ -> assert false (* TODO xsd:all *)
|
| 202 |
|
|
| Choice particles -> (* TODO UPA *)
|
| 203 |
|
|
let validators = List.map validator_of_particle particles in
|
| 204 |
|
|
let find_validator name = (* find the validation function for a given
|
| 205 |
|
|
element *)
|
| 206 |
|
|
let rec aux = function
|
| 207 |
|
|
| [] -> raise Not_found
|
| 208 |
|
|
| ((_, first) as v) :: tl when (First.mem (Some name) first) -> v
|
| 209 |
|
|
| _ :: tl -> aux tl
|
| 210 |
|
|
in
|
| 211 |
|
|
aux validators
|
| 212 |
|
|
in
|
| 213 |
|
|
let first = (* union of choices' firsts *)
|
| 214 |
|
|
List.fold_left (fun acc (_, f) -> First.union f acc) First.empty
|
| 215 |
|
|
validators
|
| 216 |
|
|
in
|
| 217 |
|
|
(fun stream ->
|
| 218 |
|
|
let error found =
|
| 219 |
|
|
raise (XSI_validation_error (sprintf "Expected one of: %s; \
|
| 220 |
|
|
found %s" (string_of_first first) found))
|
| 221 |
|
|
in
|
| 222 |
|
|
let next =
|
| 223 |
|
|
try
|
| 224 |
|
|
peek_start_tag stream
|
| 225 |
|
|
with Not_a_start_tag ev -> error (Schema_xml.string_of_pxp_event ev)
|
| 226 |
|
|
in
|
| 227 |
|
|
let validator = try find_validator next with Not_found -> error next in
|
| 228 |
|
|
validate ~validator stream),
|
| 229 |
|
|
first
|
| 230 |
|
|
| Sequence particles ->
|
| 231 |
|
|
let validators = List.map validator_of_particle particles in
|
| 232 |
|
|
let first = (* union of first until epsilon is in one of them *)
|
| 233 |
|
|
let rec aux acc = function
|
| 234 |
|
|
| [] -> acc
|
| 235 |
|
|
| (_, first) :: tl ->
|
| 236 |
|
|
let next_first = First.union acc first in
|
| 237 |
|
|
if First.mem None first then aux next_first tl else next_first
|
| 238 |
|
|
in
|
| 239 |
|
|
aux First.empty validators
|
| 240 |
|
|
in
|
| 241 |
|
|
(fun stream ->
|
| 242 |
|
|
let values = ref [] in
|
| 243 |
|
|
List.iter
|
| 244 |
|
|
(fun v -> values := validate ~validator:v stream :: !values)
|
| 245 |
|
|
validators;
|
| 246 |
|
|
Value.sequence (List.rev !values)),
|
| 247 |
|
|
first
|
| 248 |
|
|
| Elt decl -> validator_of_elt_decl !decl
|
| 249 |
|
|
|
| 250 |
|
|
and validator_of_complex_type = function
|
| 251 |
|
|
| CBuilt_in s -> (* TODO uhm .... is this useful? *)
|
| 252 |
|
|
((fun _ -> assert false),
|
| 253 |
|
|
(pcdata_wrapper (Schema_builtin.__validate_fun_of_builtin s),
|
| 254 |
|
|
First.empty))
|
| 255 |
abate |
505 |
| CUser_defined (_, _, _, _, attr_uses, ct) ->
|
| 256 |
abate |
500 |
let validate_attrs = validate_attrs_of_uses attr_uses in
|
| 257 |
|
|
let content_validator =
|
| 258 |
|
|
match ct with
|
| 259 |
|
|
| CT_empty -> (fun _ -> Value.sequence []), First.empty
|
| 260 |
|
|
| CT_simple def -> validator_of_simple_type def
|
| 261 |
|
|
| CT_model (particle, mixed) ->
|
| 262 |
|
|
assert (not mixed); (* TODO mixed content support *)
|
| 263 |
|
|
validator_of_particle particle
|
| 264 |
|
|
in
|
| 265 |
|
|
(validate_attrs, content_validator)
|
| 266 |
|
|
|
| 267 |
abate |
507 |
and validator_of_elt_decl ((name, def, _): elt_decl) = (* TODO constraints *)
|
| 268 |
abate |
500 |
let first = First.singleton (Some name) in
|
| 269 |
|
|
match !def with
|
| 270 |
|
|
| S def ->
|
| 271 |
|
|
let validator = validator_of_simple_type def in
|
| 272 |
|
|
(fun stream ->
|
| 273 |
|
|
ignore (expect (`E_start_tag name) stream);
|
| 274 |
|
|
let cont = validate ~validator stream in
|
| 275 |
|
|
ignore (expect (`E_end_tag name) stream);
|
| 276 |
|
|
Value.Xml (Value.Atom (Atoms.mk_ascii name), Value.vrecord [], cont)),
|
| 277 |
|
|
first
|
| 278 |
|
|
| C def ->
|
| 279 |
|
|
let (validate_attrs, validator) = validator_of_complex_type def in
|
| 280 |
|
|
(fun stream ->
|
| 281 |
|
|
match expect (`E_start_tag name) stream with
|
| 282 |
|
|
| E_start_tag (name, attrs, _) ->
|
| 283 |
|
|
let attrs = validate_attrs attrs in
|
| 284 |
|
|
let cont = flatten_cont (validate ~validator stream) in
|
| 285 |
|
|
ignore (expect (`E_end_tag name) stream);
|
| 286 |
|
|
Value.Xml (Value.Atom (Atoms.mk_ascii name), attrs, cont)
|
| 287 |
|
|
| _ -> assert false),
|
| 288 |
|
|
first
|
| 289 |
|
|
|