/[svn]/schema/schema_xml.ml
ViewVC logotype

Contents of /schema/schema_xml.ml

Parent Directory Parent Directory | Revision Log Revision Log


Revision 806 - (show annotations)
Tue Jul 10 18:04:23 2007 UTC (5 years, 10 months ago) by abate
File size: 6748 byte(s)
[r2003-11-24 09:12:00 by szach] use Ns.mk_ascii

Original author: szach
Date: 2003-11-24 09:12:00+00:00
1
2 open Pxp_document
3 open Pxp_ev_parser
4 open Pxp_tree_parser
5 open Pxp_types
6
7 type pxp_node =
8 ('a Pxp_document.node Pxp_document.extension as 'a) Pxp_document.node
9 type pxp_document =
10 ('a Pxp_document.node Pxp_document.extension as 'a) Pxp_document.document
11
12 let regexp' s = Pcre.regexp ~flags:[`UTF8] s
13 let xsd_RE = regexp' "^xsd:"
14 let namespace_split name = (* Pxp_aux.namespace_split *)
15 try
16 let n = String.index name ':' in (* may raise Not_found *)
17 let prefix = String.sub name 0 n in
18 let localname = String.sub name (n+1) (String.length name - n - 1)
19 in
20 (prefix, localname)
21 with
22 Not_found -> ("", name)
23
24 let has_xsd_prefix s = Pcre.pmatch ~rex:xsd_RE s
25
26 let xsd_namespace = "http://www.w3.org/2001/XMLSchema"
27 let xsi_namespace = "http://www.w3.org/2001/XMLSchema-instance"
28 let xsd_prefix = "xsd"
29 let xsi_prefix = "xsi"
30
31 let schema_ns_prefixes =
32 [ xsd_prefix, xsd_namespace; xsi_prefix, xsi_namespace ]
33
34 let spec = default_namespace_spec
35 let new_xsd_config () =
36 let ns_manager = new Pxp_dtd.namespace_manager in
37 List.iter (fun (p, ns) -> ns_manager#add_namespace p ns) schema_ns_prefixes;
38 { default_namespace_config with
39 Pxp_types.enable_namespace_processing = Some ns_manager
40 }
41
42 let pxp_node_of ?(config = new_xsd_config ()) src =
43 parse_wfcontent_entity config src spec
44 let pxp_document_of ?(config = new_xsd_config ()) src =
45 parse_wfdocument_entity config src spec
46
47 let pxp_stream_of_file ?(config = new_xsd_config ()) fname =
48 let config = { config with drop_ignorable_whitespace = true } in
49 let entity_manager =
50 create_entity_manager ~is_document:true config (from_file fname)
51 in
52 let pull_parser =
53 create_pull_parser config
54 (`Entry_document [ `Extend_dtd_fully; `Parse_xml_decl ])
55 entity_manager
56 in
57 Stream.from pull_parser
58
59 (*
60 class foo_entity_id = object end
61 let eid = new foo_entity_id
62 type to_be_visited =
63 | Fully of Value.t (* xml values still to be visited *)
64 | Half of Value.t (* xml values half visited (i.e. E_start_tag generated) *)
65 | Other of Value.t (* other values *)
66
67 let pxp_stream_of_value v =
68 let stack = ref [Fully v] in
69 let f _ = (* lazy visit of a tree of CDuce XML values, stack keeps track of
70 what is still to be visited *)
71 (match !stack with
72 | (Fully ((Value.Xml (Value.Atom a, attrs, _)) as v)) :: tl ->
73 let (ns,a) = Atoms.V.value a in
74 assert( ns == Ns.empty );
75 let tag_ascii = Encodings.Utf8.to_string a in
76 let attrs_ascii =
77 List.map (fun (k, v) -> (k, Value.get_string_latin1 v))
78 (Value.get_fields attrs)
79 in
80 let event = Some (E_start_tag (tag_ascii, attrs_ascii, eid)) in
81 stack := (Half v) :: tl;
82 let children = ref [] in (* TODO inefficient *)
83 let push v = children := v :: !children in
84 Value.iter_xml
85 (fun pcdata -> push (Other (Value.string_utf8 pcdata)))
86 (fun v ->
87 match v with
88 | (Value.Xml (_, _, _)) as v -> push (Fully v)
89 | v -> raise (Invalid_argument "Schema_xml.pxp_stream_of_value"))
90 v;
91 stack := (List.rev !children) @ !stack;
92 event
93 | (Half (Value.Xml (Value.Atom a, _, _))) :: tl ->
94 let (ns,a) = Atoms.V.value a in
95 assert( ns == Ns.empty );
96 let tag_ascii = Encodings.Utf8.to_string a in
97 let event = Some (E_end_tag (tag_ascii, eid)) in
98 stack := tl;
99 event
100 | (Fully (Value.Xml (_, _, _)))::_ | (Half (Value.Xml (_, _, _)))::_ ->
101 failwith "Schema_xml.pxp_stream_of_value: non-atom-tag xml value"
102 | (Other v) :: tl ->
103 let event = Some (E_char_data (Value.get_string_latin1 v)) in
104 stack := tl;
105 event
106 | [] -> None
107 | _ -> assert false)
108 in
109 Stream.from f
110 *)
111
112 open Printf
113
114 let string_of_pxp_event = function
115 | E_start_doc (version, standalone, dtd) -> "E_start_doc"
116 | E_end_doc -> "E_end_doc"
117 | E_start_tag (name, attlist, entity_id) -> sprintf "E_start_tag (%s)" name
118 | E_end_tag (name, entity_id) -> sprintf "E_end_tag (%s)" name
119 | E_char_data data ->
120 sprintf "E_char_data (%s)" (Pcre.replace ~pat:"\n" ~templ:"\\n" data)
121 | E_pinstr (target, value) -> "E_pinstr"
122 | E_comment data -> "E_comment"
123 | E_position (entity, line, col) -> "E_position"
124 | E_error exn -> sprintf "E_error (%s)" (Pxp_types.string_of_exn exn)
125 | E_end_of_stream -> "E_end_of_stream"
126 | _ -> assert false
127
128 let rec dump_stream s =
129 print_endline (string_of_pxp_event (Stream.next s));
130 flush stdout;
131 dump_stream s
132
133 let dump_stream s = try dump_stream s with Stream.Failure -> ()
134
135 (* peek version that assume the stream isn't at the end *)
136 let peek s =
137 match Stream.peek s with
138 | Some v -> v
139 | None -> raise Stream.Failure
140
141 (* collect all E_char_data events from a PXP stream and return the
142 concatenation of their datas *)
143 let collect_pcdata s =
144 let buf = Buffer.create 1 in
145 let rec collect () =
146 match peek s with
147 | E_char_data d ->
148 Buffer.add_string buf d; Stream.junk s; collect ()
149 | _ -> Buffer.contents buf
150 in
151 collect ()
152
153 module Pxp_helpers =
154 struct
155
156 open Pxp_document
157 open Pxp_types
158
159 exception PxpHelpers of exn
160 let _raise e = raise (PxpHelpers e)
161 let space_RE = regexp' " "
162
163 let _tag_name (n: pxp_node) =
164 match n#node_type with
165 | T_element tag -> tag
166 | _ -> raise Not_found
167
168 let _has_attribute name (n: pxp_node) =
169 try
170 match n#attribute name with
171 | Value _ -> true
172 | _ -> false
173 with Not_found -> false
174
175 let _attribute name (n: pxp_node) =
176 match n#attribute name with
177 | Value v -> v
178 | _ -> raise Not_found
179
180 let _has_element e (n: pxp_node) =
181 try ignore (find_element e n); true with Not_found -> false
182
183 let _element e (n: pxp_node): pxp_node = find_element e n
184 let _elements e (n: pxp_node): pxp_node list = find_all_elements e n
185
186 let _element' names (n: pxp_node): pxp_node =
187 let node = ref None in
188 (try
189 n#iter_nodes (fun n ->
190 (match n#node_type with
191 | T_element name when List.mem name names ->
192 node := Some n;
193 raise Exit
194 | _ -> ()))
195 with Exit -> ());
196 match !node with None -> raise Not_found | Some n -> n
197
198 let _elements' names (n: pxp_node): pxp_node list =
199 find_all (fun n ->
200 match n#node_type with
201 | T_element name when List.mem name names -> true
202 | _ -> false) n
203
204 let _has_element' names (n: pxp_node) =
205 try ignore (_element' names n); true with Not_found -> false
206
207 end
208
209 (** export Ns.t version of defined namespaces *)
210
211 let xsd_namespace = Ns.mk_ascii xsd_namespace
212 let xsi_namespace = Ns.mk_ascii xsi_namespace
213

CVS Admin">CVS Admin
ViewVC Help
Powered by ViewVC 1.1.5