|
|
|
|
open Pxp_document |
|
|
open Pxp_ev_parser |
|
|
open Pxp_tree_parser |
|
|
open Pxp_types |
|
|
|
|
| 1 |
open Encodings |
open Encodings |
| 2 |
open Encodings.Utf8.Pcre |
open Schema_pcre |
| 3 |
|
|
| 4 |
type pxp_node = |
type node = |
| 5 |
('a Pxp_document.node Pxp_document.extension as 'a) Pxp_document.node |
('a Pxp_document.node Pxp_document.extension as 'a) Pxp_document.node |
|
type pxp_document = |
|
|
('a Pxp_document.node Pxp_document.extension as 'a) Pxp_document.document |
|
| 6 |
|
|
| 7 |
let xsd_RE = pcre_regexp "^xsd:" |
module Node = struct |
| 8 |
|
type t = node |
| 9 |
|
let compare = Pxp_document.compare |
| 10 |
|
end |
| 11 |
|
|
| 12 |
let has_xsd_prefix s = Pcre.pmatch ~rex:xsd_RE (Utf8.get_str s) |
let start_with s pr = |
| 13 |
|
let s = Utf8.get_str s in |
| 14 |
|
(String.length s >= String.length pr) && |
| 15 |
|
(String.sub s 0 (String.length pr) = pr) |
| 16 |
|
|
| 17 |
|
let has_xsd_prefix s = start_with s "xsd:" |
| 18 |
|
|
| 19 |
let xsd_namespace = Utf8.mk "http://www.w3.org/2001/XMLSchema" |
let xsd_namespace = Utf8.mk "http://www.w3.org/2001/XMLSchema" |
| 20 |
let xsi_namespace = Utf8.mk "http://www.w3.org/2001/XMLSchema-instance" |
let xsi_namespace = Utf8.mk "http://www.w3.org/2001/XMLSchema-instance" |
| 21 |
let xsd_prefix = Utf8.mk "xsd" |
let xsd_prefix = Utf8.mk "xsd" |
| 22 |
let xsi_prefix = Utf8.mk "xsi" |
let xsi_prefix = Utf8.mk "xsi" |
|
let add_xsd_prefix = |
|
|
let prefix = Utf8.concat xsd_prefix (Utf8.mk ":") in |
|
|
fun s -> Utf8.concat prefix s |
|
| 23 |
|
|
| 24 |
let schema_ns_prefixes = |
let schema_ns_prefixes = |
| 25 |
[ xsd_prefix, xsd_namespace; xsi_prefix, xsi_namespace ] |
[ xsd_prefix, xsd_namespace; xsi_prefix, xsi_namespace ] |
| 26 |
|
|
| 27 |
let spec = default_namespace_spec |
let spec = Pxp_tree_parser.default_namespace_spec |
| 28 |
let new_xsd_config () = |
let new_xsd_config () = |
| 29 |
let ns_manager = new Pxp_dtd.namespace_manager in |
let ns_manager = new Pxp_dtd.namespace_manager in |
| 30 |
List.iter |
List.iter |
| 31 |
(fun (p, ns) -> ns_manager#add_namespace (Utf8.get_str p) (Utf8.get_str ns)) |
(fun (p, ns) -> ns_manager#add_namespace (Utf8.get_str p) (Utf8.get_str ns)) |
| 32 |
schema_ns_prefixes; |
schema_ns_prefixes; |
| 33 |
{ default_namespace_config with |
{ Pxp_types.default_namespace_config with |
| 34 |
|
Pxp_types.encoding = `Enc_utf8; |
| 35 |
Pxp_types.enable_namespace_processing = Some ns_manager |
Pxp_types.enable_namespace_processing = Some ns_manager |
| 36 |
} |
} |
| 37 |
|
|
| 38 |
let pxp_node_of ?(config = new_xsd_config ()) src = |
let node_of src = |
| 39 |
parse_wfcontent_entity config src spec |
(Pxp_tree_parser.parse_wfdocument_entity (new_xsd_config ()) src spec) # root |
|
let pxp_document_of ?(config = new_xsd_config ()) src = |
|
|
parse_wfdocument_entity config src spec |
|
|
|
|
|
let pxp_stream_of_file ?(config = new_xsd_config ()) fname = |
|
|
let config = { config with drop_ignorable_whitespace = true } in |
|
|
let entity_manager = |
|
|
create_entity_manager ~is_document:true config (from_file fname) |
|
|
in |
|
|
let pull_parser = |
|
|
create_pull_parser config |
|
|
(`Entry_document [ `Extend_dtd_fully; `Parse_xml_decl ]) |
|
|
entity_manager |
|
|
in |
|
|
Stream.from pull_parser |
|
|
|
|
|
(* |
|
|
class foo_entity_id = object end |
|
|
let eid = new foo_entity_id |
|
|
type to_be_visited = |
|
|
| Fully of Value.t (* xml values still to be visited *) |
|
|
| Half of Value.t (* xml values half visited (i.e. E_start_tag generated) *) |
|
|
| Other of Value.t (* other values *) |
|
|
|
|
|
let pxp_stream_of_value v = |
|
|
let stack = ref [Fully v] in |
|
|
let f _ = (* lazy visit of a tree of CDuce XML values, stack keeps track of |
|
|
what is still to be visited *) |
|
|
(match !stack with |
|
|
| (Fully ((Value.Xml (Value.Atom a, attrs, _)) as v)) :: tl -> |
|
|
let (ns,a) = Atoms.V.value a in |
|
|
assert( ns == Ns.empty ); |
|
|
let tag_ascii = Encodings.Utf8.to_string a in |
|
|
let attrs_ascii = |
|
|
List.map (fun (k, v) -> (k, Value.get_string_latin1 v)) |
|
|
(Value.get_fields attrs) |
|
|
in |
|
|
let event = Some (E_start_tag (tag_ascii, attrs_ascii, eid)) in |
|
|
stack := (Half v) :: tl; |
|
|
let children = ref [] in (* TODO inefficient *) |
|
|
let push v = children := v :: !children in |
|
|
Value.iter_xml |
|
|
(fun pcdata -> push (Other (Value.string_utf8 pcdata))) |
|
|
(fun v -> |
|
|
match v with |
|
|
| (Value.Xml (_, _, _)) as v -> push (Fully v) |
|
|
| v -> raise (Invalid_argument "Schema_xml.pxp_stream_of_value")) |
|
|
v; |
|
|
stack := (List.rev !children) @ !stack; |
|
|
event |
|
|
| (Half (Value.Xml (Value.Atom a, _, _))) :: tl -> |
|
|
let (ns,a) = Atoms.V.value a in |
|
|
assert( ns == Ns.empty ); |
|
|
let tag_ascii = Encodings.Utf8.to_string a in |
|
|
let event = Some (E_end_tag (tag_ascii, eid)) in |
|
|
stack := tl; |
|
|
event |
|
|
| (Fully (Value.Xml (_, _, _)))::_ | (Half (Value.Xml (_, _, _)))::_ -> |
|
|
failwith "Schema_xml.pxp_stream_of_value: non-atom-tag xml value" |
|
|
| (Other v) :: tl -> |
|
|
let event = Some (E_char_data (Value.get_string_latin1 v)) in |
|
|
stack := tl; |
|
|
event |
|
|
| [] -> None |
|
|
| _ -> assert false) |
|
|
in |
|
|
Stream.from f |
|
|
*) |
|
| 40 |
|
|
| 41 |
open Printf |
let node_of_uri uri = |
| 42 |
|
try |
| 43 |
let string_of_pxp_event = function |
let source = match Url.process uri with |
| 44 |
| E_start_doc (version, standalone, dtd) -> "E_start_doc" |
| Url.Filename s -> Pxp_types.from_file s |
| 45 |
| E_end_doc -> "E_end_doc" |
| Url.Url s -> Pxp_types.from_string s |
|
| E_start_tag (name, attlist, entity_id) -> sprintf "E_start_tag (%s)" name |
|
|
| E_end_tag (name, entity_id) -> sprintf "E_end_tag (%s)" name |
|
|
| E_char_data data -> |
|
|
sprintf "E_char_data (%s)" (Pcre.replace ~pat:"\n" ~templ:"\\n" data) |
|
|
| E_pinstr (target, value) -> "E_pinstr" |
|
|
| E_comment data -> "E_comment" |
|
|
| E_position (entity, line, col) -> "E_position" |
|
|
| E_error exn -> sprintf "E_error (%s)" (Pxp_types.string_of_exn exn) |
|
|
| E_end_of_stream -> "E_end_of_stream" |
|
|
| _ -> assert false |
|
|
|
|
|
let rec dump_stream s = |
|
|
print_endline (string_of_pxp_event (Stream.next s)); |
|
|
flush stdout; |
|
|
dump_stream s |
|
|
|
|
|
let dump_stream s = try dump_stream s with Stream.Failure -> () |
|
|
|
|
|
(* peek version that assume the stream isn't at the end *) |
|
|
let peek s = |
|
|
match Stream.peek s with |
|
|
| Some v -> v |
|
|
| None -> raise Stream.Failure |
|
|
|
|
|
(* collect all E_char_data events from a PXP stream and return the |
|
|
concatenation of their datas *) |
|
|
let collect_pcdata s = |
|
|
let buf = Buffer.create 1 in |
|
|
let rec collect () = |
|
|
match peek s with |
|
|
| E_char_data d -> |
|
|
Buffer.add_string buf d; Stream.junk s; collect () |
|
|
| _ -> Buffer.contents buf |
|
| 46 |
in |
in |
| 47 |
collect () |
node_of source |
| 48 |
|
with exn -> raise (Location.Generic (Pxp_types.string_of_exn exn)) |
| 49 |
|
|
| 50 |
module Pxp_helpers = |
let _may_attr name n = |
| 51 |
struct |
try |
| 52 |
|
match n#attribute name with |
| 53 |
open Pxp_document |
| Pxp_types.Value v -> Some (Utf8.mk v) |
| 54 |
open Pxp_types |
| _ -> None |
| 55 |
|
with Not_found -> None |
|
exception PxpHelpers of exn |
|
|
let _raise e = raise (PxpHelpers e) |
|
|
let space_RE = pcre_regexp " " |
|
|
|
|
|
let _tag_name (n: pxp_node) = |
|
|
match n#node_type with |
|
|
| T_element tag -> Utf8.mk tag |
|
|
| _ -> raise Not_found |
|
| 56 |
|
|
| 57 |
let _has_attribute name (n: pxp_node) = |
let _is_attr name n v = |
| 58 |
try |
try |
| 59 |
match n#attribute name with |
match n#attribute name with |
| 60 |
| Value _ -> true |
| Pxp_types.Value v' -> v = v' |
| 61 |
| _ -> false |
| _ -> false |
| 62 |
with Not_found -> false |
with Not_found -> false |
| 63 |
|
|
| 64 |
let _attribute name (n: pxp_node) = |
let _attr name n = |
| 65 |
match n#attribute name with |
match n#attribute name with |
| 66 |
| Value v -> Utf8.mk v |
| Pxp_types.Value v -> Utf8.mk v |
| 67 |
| _ -> raise Not_found |
| _ -> failwith ("Attribute " ^ name ^ " is missing") |
| 68 |
|
|
| 69 |
let _has_element e (n: pxp_node) = |
let _may_elem e (n: node) = |
| 70 |
try ignore (find_element e n); true with Not_found -> false |
try Some (Pxp_document.find_element e n) with Not_found -> None |
| 71 |
|
|
| 72 |
let _element e (n: pxp_node): pxp_node = find_element e n |
let _elems e n = Pxp_document.find_all_elements e n |
|
let _elements e (n: pxp_node): pxp_node list = find_all_elements e n |
|
| 73 |
|
|
| 74 |
let _element' names (n: pxp_node): pxp_node = |
let _filter_elems p n = |
| 75 |
let node = ref None in |
Pxp_document.find_all (fun n -> match n#node_type with |
| 76 |
(try |
| Pxp_document.T_element s -> List.mem s p |
| 77 |
n#iter_nodes (fun n -> |
| _ -> false) n |
| 78 |
(match n#node_type with |
|
| 79 |
| T_element name when List.mem name names -> |
let _line n = match n#position with (_,l,_) -> l |
|
node := Some n; |
|
|
raise Exit |
|
|
| _ -> ())) |
|
|
with Exit -> ()); |
|
|
match !node with None -> raise Not_found | Some n -> n |
|
| 80 |
|
|
| 81 |
let _elements' names (n: pxp_node): pxp_node list = |
let _iter_nodes n f = n#iter_nodes f |
| 82 |
find_all (fun n -> |
|
| 83 |
|
let _iter_elems n f = n#iter_nodes |
| 84 |
|
(fun n -> |
| 85 |
match n#node_type with |
match n#node_type with |
| 86 |
| T_element name when List.mem name names -> true |
| Pxp_document.T_element s -> f n s |
| 87 |
| _ -> false) n |
| _ -> () |
| 88 |
|
) |
| 89 |
|
|
| 90 |
|
let _fold_elems n x f = |
| 91 |
|
let x = ref x in |
| 92 |
|
n#iter_nodes |
| 93 |
|
(fun n -> |
| 94 |
|
match n#node_type with |
| 95 |
|
| Pxp_document.T_element s -> x := f !x n s |
| 96 |
|
| _ -> () |
| 97 |
|
); |
| 98 |
|
!x |
| 99 |
|
|
| 100 |
let _has_element' names (n: pxp_node) = |
let _tag n = |
| 101 |
try ignore (_element' names n); true with Not_found -> false |
match n#node_type with |
| 102 |
|
| Pxp_document.T_element s -> s |
| 103 |
|
| _ -> assert false |
| 104 |
|
|
| 105 |
end |
let _has_tag n f = |
| 106 |
|
match n#node_type with |
| 107 |
|
| Pxp_document.T_element s -> f s |
| 108 |
|
| _ -> false |
| 109 |
|
|
| 110 |
|
let _namespaces n = |
| 111 |
|
List.map |
| 112 |
|
(fun n -> |
| 113 |
|
(match n#node_type with |
| 114 |
|
Pxp_document.T_namespace p -> p | _ -> assert false), |
| 115 |
|
n#data |
| 116 |
|
) |
| 117 |
|
n#namespaces_as_nodes |
| 118 |
|
|
| 119 |
|
let _find p n = Pxp_document.find p n |
| 120 |
|
|
| 121 |
|
let _resolve_qname n qname = |
| 122 |
|
let (prefix,local) = Ns.split_qname qname in |
| 123 |
|
let ns = |
| 124 |
|
try Ns.mk (Utf8.mk (n # namespace_scope # uri_of_display_prefix prefix)) |
| 125 |
|
with Not_found -> Ns.empty |
| 126 |
|
in |
| 127 |
|
(ns,local) |
| 128 |
|
|
| 129 |
|
let _may_qname_attr name n = |
| 130 |
|
match _may_attr name n with |
| 131 |
|
| Some qname -> Some (_resolve_qname n qname) |
| 132 |
|
| None -> None |
| 133 |
|
|
| 134 |
(** export Ns.t version of defined namespaces *) |
let _qname_attr name n = |
| 135 |
|
match _may_attr name n with |
| 136 |
|
| Some qname -> _resolve_qname n qname |
| 137 |
|
| None -> assert false |
| 138 |
|
|
|
let xsd_namespace = Ns.mk xsd_namespace |
|
|
let xsi_namespace = Ns.mk xsi_namespace |
|
| 139 |
|
|
| 140 |
|
let xsd = Ns.mk xsd_namespace |
| 141 |
|
let xsi = Ns.mk xsi_namespace |