| 1 |
|
|
| 2 |
open Pxp_document ;; |
open Pxp_document |
| 3 |
open Pxp_yacc ;; |
open Pxp_yacc |
| 4 |
open Pxp_types ;; |
open Pxp_types |
| 5 |
|
|
|
(* TODO avoid using this extension when unneeded (e.g. instance document |
|
|
parsing *) |
|
| 6 |
class schema_extension = |
class schema_extension = |
| 7 |
let is_ncname = (* TODO check if s is a NCName, this is only a hack that |
let is_ncname = (* TODO check if s is a NCName, this is only a hack that |
| 8 |
checks if no spaces are presents *) |
checks if no spaces are presents *) |
| 79 |
method find_term = |
method find_term = |
| 80 |
match self#find_terms with |
match self#find_terms with |
| 81 |
| [t] -> t |
| [t] -> t |
| 82 |
| [] -> raise Not_found |
| _ -> raise Not_found |
|
| _ -> failwith "too many term nodes" |
|
| 83 |
|
|
| 84 |
method find_attributes = |
method find_attributes = |
| 85 |
List.filter |
List.filter |
| 115 |
method nth_element = List.nth self#sub_elements |
method nth_element = List.nth self#sub_elements |
| 116 |
|
|
| 117 |
end |
end |
|
;; |
|
| 118 |
|
|
| 119 |
let spec = |
let spec = |
| 120 |
make_spec_from_alist |
make_spec_from_alist |
| 122 |
~default_element_exemplar: (new element_impl (new schema_extension)) |
~default_element_exemplar: (new element_impl (new schema_extension)) |
| 123 |
~element_alist: [] |
~element_alist: [] |
| 124 |
() |
() |
|
;; |
|
| 125 |
|
|
| 126 |
let pxp_tree_of fname = |
let pxp_tree_of fname = |
| 127 |
parse_wfdocument_entity default_config (from_file fname) spec |
parse_wfdocument_entity default_config (from_file fname) spec |
|
;; |
|
| 128 |
|
|
| 129 |
let pxp_stream_of_file fname = |
let pxp_stream_of_file fname = |
| 130 |
let config = { default_config with drop_ignorable_whitespace = true } in |
let config = { default_config with drop_ignorable_whitespace = true } in |
| 137 |
entity_manager |
entity_manager |
| 138 |
in |
in |
| 139 |
Stream.from pull_parser |
Stream.from pull_parser |
|
;; |
|
| 140 |
|
|
| 141 |
class foo_entity_id = object end ;; |
class foo_entity_id = object end |
| 142 |
let eid = new foo_entity_id ;; |
let eid = new foo_entity_id |
| 143 |
type to_be_visited = |
type to_be_visited = |
| 144 |
| Fully of Value.t (* xml values still to be visited *) |
| Fully of Value.t (* xml values still to be visited *) |
| 145 |
| Half of Value.t (* xml values half visited (i.e. E_start_tag generated) *) |
| Half of Value.t (* xml values half visited (i.e. E_start_tag generated) *) |
| 146 |
| Other of Value.t (* other values *) |
| Other of Value.t (* other values *) |
|
;; |
|
| 147 |
|
|
| 148 |
let pxp_stream_of_value v = |
let pxp_stream_of_value v = |
| 149 |
let stack = ref [Fully v] in |
let stack = ref [Fully v] in |
| 150 |
let f _ = (* lazy visit of a tree of CDuce XML values, stack keeps track of |
let f _ = (* lazy visit of a tree of CDuce XML values, stack keeps track of |
| 151 |
what is still to be visited *) |
what is still to be visited *) |
| 152 |
(match !stack with |
(match !stack with |
| 153 |
| (Fully ((Value.Xml (Value.Atom a, attrs, cont)) as v)) :: tl -> |
| (Fully ((Value.Xml (Value.Atom a, attrs, _)) as v)) :: tl -> |
| 154 |
let tag_ascii = Encodings.Utf8.to_string (Atoms.value a) in |
let tag_ascii = Encodings.Utf8.to_string (Atoms.value a) in |
| 155 |
let attrs_ascii = |
let attrs_ascii = |
| 156 |
List.map (fun (k, v) -> (k, Value.get_string_latin1 v)) |
List.map (fun (k, v) -> (k, Value.get_string_latin1 v)) |
| 157 |
(Value.get_fields attrs) |
(Value.get_fields attrs) |
| 158 |
in |
in |
| 159 |
let event = Some (E_start_tag (tag_ascii, attrs_ascii, eid)) in |
let event = Some (E_start_tag (tag_ascii, attrs_ascii, eid)) in |
|
(match cont with |
|
|
| v when v = Value.nil -> (* no content *) |
|
|
stack := (Half v) :: tl |
|
|
| seq -> |
|
| 160 |
stack := (Half v) :: tl; |
stack := (Half v) :: tl; |
| 161 |
List.iter |
let children = ref [] in (* TODO inefficient *) |
| 162 |
(function |
let push v = children := v :: !children in |
| 163 |
| (Value.Xml (_, _, _)) as v -> stack := (Fully v) :: !stack |
Value.iter_xml |
| 164 |
| v -> stack := (Other v) :: !stack) |
(fun pcdata -> push (Other (Value.string_utf8 pcdata))) |
| 165 |
(Value.explode_rev seq)); |
(fun v -> |
| 166 |
|
match v with |
| 167 |
|
| (Value.Xml (_, _, _)) as v -> push (Fully v) |
| 168 |
|
| v -> raise (Invalid_argument "Schema_xml.pxp_stream_of_value")) |
| 169 |
|
v; |
| 170 |
|
stack := (List.rev !children) @ !stack; |
| 171 |
event |
event |
| 172 |
| (Half (Value.Xml (Value.Atom a, _, _))) :: tl -> |
| (Half (Value.Xml (Value.Atom a, _, _))) :: tl -> |
| 173 |
let tag_ascii = Encodings.Utf8.to_string (Atoms.value a) in |
let tag_ascii = Encodings.Utf8.to_string (Atoms.value a) in |
| 181 |
stack := tl; |
stack := tl; |
| 182 |
event |
event |
| 183 |
| [] -> None |
| [] -> None |
| 184 |
| _ -> assert false) (* TODO assertion failure with regtest/flatten.xml *) |
| _ -> assert false) |
| 185 |
in |
in |
| 186 |
Stream.from f |
Stream.from f |
|
;; |
|
| 187 |
|
|
| 188 |
open Printf ;; |
open Printf |
| 189 |
|
|
| 190 |
let string_of_pxp_event = function |
let string_of_pxp_event = function |
| 191 |
| E_start_doc (version, standalone, dtd) -> "E_start_doc" |
| E_start_doc (version, standalone, dtd) -> "E_start_doc" |
| 199 |
| E_position (entity, line, col) -> "E_position" |
| E_position (entity, line, col) -> "E_position" |
| 200 |
| E_error exn -> sprintf "E_error (%s)" (Pxp_types.string_of_exn exn) |
| E_error exn -> sprintf "E_error (%s)" (Pxp_types.string_of_exn exn) |
| 201 |
| E_end_of_stream -> "E_end_of_stream" |
| E_end_of_stream -> "E_end_of_stream" |
|
;; |
|
| 202 |
|
|
| 203 |
let rec dump_stream s = |
let rec dump_stream s = |
| 204 |
print_endline (string_of_pxp_event (Stream.next s)); |
print_endline (string_of_pxp_event (Stream.next s)); |
| 205 |
flush stdout; |
flush stdout; |
| 206 |
dump_stream s |
dump_stream s |
|
;; |
|
| 207 |
|
|
| 208 |
let dump_stream s = try dump_stream s with Stream.Failure -> () ;; |
let dump_stream s = try dump_stream s with Stream.Failure -> () |
| 209 |
|
|
| 210 |
(* peek version that assume the stream isn't at the end *) |
(* peek version that assume the stream isn't at the end *) |
| 211 |
let peek s = |
let peek s = |
| 212 |
match Stream.peek s with |
match Stream.peek s with |
| 213 |
| Some v -> v |
| Some v -> v |
| 214 |
| None -> raise Stream.Failure |
| None -> raise Stream.Failure |
|
;; |
|
| 215 |
|
|
| 216 |
(* collect all E_char_data events from a PXP stream and return the |
(* collect all E_char_data events from a PXP stream and return the |
| 217 |
concatenation of their datas *) |
concatenation of their datas *) |
| 224 |
| _ -> Buffer.contents buf |
| _ -> Buffer.contents buf |
| 225 |
in |
in |
| 226 |
collect () |
collect () |
|
;; |
|
| 227 |
|
|