| 1 |
abate |
507 |
open Pxp_document
|
| 2 |
abate |
759 |
open Pxp_ev_parser
|
| 3 |
|
|
open Pxp_tree_parser
|
| 4 |
abate |
507 |
open Pxp_types
|
| 5 |
abate |
500 |
|
| 6 |
abate |
812 |
open Encodings
|
| 7 |
abate |
1440 |
open Schema_pcre
|
| 8 |
abate |
812 |
|
| 9 |
abate |
759 |
type pxp_node =
|
| 10 |
|
|
('a Pxp_document.node Pxp_document.extension as 'a) Pxp_document.node
|
| 11 |
|
|
type pxp_document =
|
| 12 |
|
|
('a Pxp_document.node Pxp_document.extension as 'a) Pxp_document.document
|
| 13 |
abate |
500 |
|
| 14 |
abate |
812 |
let xsd_RE = pcre_regexp "^xsd:"
|
| 15 |
abate |
500 |
|
| 16 |
abate |
812 |
let has_xsd_prefix s = Pcre.pmatch ~rex:xsd_RE (Utf8.get_str s)
|
| 17 |
abate |
500 |
|
| 18 |
abate |
812 |
let xsd_namespace = Utf8.mk "http://www.w3.org/2001/XMLSchema"
|
| 19 |
|
|
let xsi_namespace = Utf8.mk "http://www.w3.org/2001/XMLSchema-instance"
|
| 20 |
|
|
let xsd_prefix = Utf8.mk "xsd"
|
| 21 |
|
|
let xsi_prefix = Utf8.mk "xsi"
|
| 22 |
|
|
let add_xsd_prefix =
|
| 23 |
|
|
let prefix = Utf8.concat xsd_prefix (Utf8.mk ":") in
|
| 24 |
|
|
fun s -> Utf8.concat prefix s
|
| 25 |
abate |
500 |
|
| 26 |
abate |
759 |
let schema_ns_prefixes =
|
| 27 |
|
|
[ xsd_prefix, xsd_namespace; xsi_prefix, xsi_namespace ]
|
| 28 |
abate |
500 |
|
| 29 |
abate |
759 |
let spec = default_namespace_spec
|
| 30 |
|
|
let new_xsd_config () =
|
| 31 |
|
|
let ns_manager = new Pxp_dtd.namespace_manager in
|
| 32 |
abate |
812 |
List.iter
|
| 33 |
|
|
(fun (p, ns) -> ns_manager#add_namespace (Utf8.get_str p) (Utf8.get_str ns))
|
| 34 |
|
|
schema_ns_prefixes;
|
| 35 |
abate |
759 |
{ default_namespace_config with
|
| 36 |
|
|
Pxp_types.enable_namespace_processing = Some ns_manager
|
| 37 |
|
|
}
|
| 38 |
abate |
500 |
|
| 39 |
abate |
1451 |
let pxp_node_of src =
|
| 40 |
|
|
parse_wfcontent_entity (new_xsd_config ()) src spec
|
| 41 |
abate |
500 |
|
| 42 |
|
|
|
| 43 |
abate |
507 |
open Printf
|
| 44 |
abate |
500 |
|
| 45 |
|
|
let string_of_pxp_event = function
|
| 46 |
abate |
1260 |
| E_start_doc (version, dtd) -> "E_start_doc"
|
| 47 |
|
|
| E_end_doc _ -> "E_end_doc"
|
| 48 |
|
|
| E_start_tag (name, attlist, _, entity_id) -> sprintf "E_start_tag (%s)" name
|
| 49 |
abate |
500 |
| E_end_tag (name, entity_id) -> sprintf "E_end_tag (%s)" name
|
| 50 |
|
|
| E_char_data data ->
|
| 51 |
|
|
sprintf "E_char_data (%s)" (Pcre.replace ~pat:"\n" ~templ:"\\n" data)
|
| 52 |
abate |
1261 |
| E_pinstr _ -> "E_pinstr"
|
| 53 |
abate |
500 |
| E_comment data -> "E_comment"
|
| 54 |
|
|
| E_position (entity, line, col) -> "E_position"
|
| 55 |
|
|
| E_error exn -> sprintf "E_error (%s)" (Pxp_types.string_of_exn exn)
|
| 56 |
|
|
| E_end_of_stream -> "E_end_of_stream"
|
| 57 |
abate |
528 |
| _ -> assert false
|
| 58 |
abate |
500 |
|
| 59 |
|
|
let rec dump_stream s =
|
| 60 |
|
|
print_endline (string_of_pxp_event (Stream.next s));
|
| 61 |
|
|
flush stdout;
|
| 62 |
|
|
dump_stream s
|
| 63 |
|
|
|
| 64 |
abate |
507 |
let dump_stream s = try dump_stream s with Stream.Failure -> ()
|
| 65 |
abate |
500 |
|
| 66 |
|
|
(* peek version that assume the stream isn't at the end *)
|
| 67 |
|
|
let peek s =
|
| 68 |
|
|
match Stream.peek s with
|
| 69 |
|
|
| Some v -> v
|
| 70 |
|
|
| None -> raise Stream.Failure
|
| 71 |
|
|
|
| 72 |
|
|
(* collect all E_char_data events from a PXP stream and return the
|
| 73 |
|
|
concatenation of their datas *)
|
| 74 |
|
|
let collect_pcdata s =
|
| 75 |
|
|
let buf = Buffer.create 1 in
|
| 76 |
|
|
let rec collect () =
|
| 77 |
|
|
match peek s with
|
| 78 |
|
|
| E_char_data d ->
|
| 79 |
|
|
Buffer.add_string buf d; Stream.junk s; collect ()
|
| 80 |
|
|
| _ -> Buffer.contents buf
|
| 81 |
|
|
in
|
| 82 |
|
|
collect ()
|
| 83 |
|
|
|
| 84 |
abate |
759 |
module Pxp_helpers =
|
| 85 |
|
|
struct
|
| 86 |
|
|
|
| 87 |
|
|
open Pxp_document
|
| 88 |
|
|
open Pxp_types
|
| 89 |
|
|
|
| 90 |
|
|
exception PxpHelpers of exn
|
| 91 |
|
|
let _raise e = raise (PxpHelpers e)
|
| 92 |
abate |
812 |
let space_RE = pcre_regexp " "
|
| 93 |
abate |
759 |
|
| 94 |
|
|
let _tag_name (n: pxp_node) =
|
| 95 |
|
|
match n#node_type with
|
| 96 |
abate |
812 |
| T_element tag -> Utf8.mk tag
|
| 97 |
abate |
759 |
| _ -> raise Not_found
|
| 98 |
|
|
|
| 99 |
|
|
let _has_attribute name (n: pxp_node) =
|
| 100 |
|
|
try
|
| 101 |
|
|
match n#attribute name with
|
| 102 |
|
|
| Value _ -> true
|
| 103 |
|
|
| _ -> false
|
| 104 |
|
|
with Not_found -> false
|
| 105 |
|
|
|
| 106 |
|
|
let _attribute name (n: pxp_node) =
|
| 107 |
|
|
match n#attribute name with
|
| 108 |
abate |
812 |
| Value v -> Utf8.mk v
|
| 109 |
abate |
759 |
| _ -> raise Not_found
|
| 110 |
|
|
|
| 111 |
|
|
let _has_element e (n: pxp_node) =
|
| 112 |
|
|
try ignore (find_element e n); true with Not_found -> false
|
| 113 |
|
|
|
| 114 |
|
|
let _element e (n: pxp_node): pxp_node = find_element e n
|
| 115 |
|
|
let _elements e (n: pxp_node): pxp_node list = find_all_elements e n
|
| 116 |
|
|
|
| 117 |
|
|
let _element' names (n: pxp_node): pxp_node =
|
| 118 |
|
|
let node = ref None in
|
| 119 |
|
|
(try
|
| 120 |
|
|
n#iter_nodes (fun n ->
|
| 121 |
|
|
(match n#node_type with
|
| 122 |
|
|
| T_element name when List.mem name names ->
|
| 123 |
|
|
node := Some n;
|
| 124 |
|
|
raise Exit
|
| 125 |
|
|
| _ -> ()))
|
| 126 |
|
|
with Exit -> ());
|
| 127 |
|
|
match !node with None -> raise Not_found | Some n -> n
|
| 128 |
|
|
|
| 129 |
|
|
let _elements' names (n: pxp_node): pxp_node list =
|
| 130 |
|
|
find_all (fun n ->
|
| 131 |
|
|
match n#node_type with
|
| 132 |
|
|
| T_element name when List.mem name names -> true
|
| 133 |
|
|
| _ -> false) n
|
| 134 |
|
|
|
| 135 |
|
|
let _has_element' names (n: pxp_node) =
|
| 136 |
|
|
try ignore (_element' names n); true with Not_found -> false
|
| 137 |
|
|
|
| 138 |
|
|
end
|
| 139 |
|
|
|
| 140 |
abate |
800 |
(** export Ns.t version of defined namespaces *)
|
| 141 |
|
|
|
| 142 |
abate |
812 |
let xsd_namespace = Ns.mk xsd_namespace
|
| 143 |
|
|
let xsi_namespace = Ns.mk xsi_namespace
|
| 144 |
abate |
800 |
|