/[svn]/schema/schema_xml.ml
ViewVC logotype

Contents of /schema/schema_xml.ml

Parent Directory Parent Directory | Revision Log Revision Log


Revision 500 - (show annotations)
Tue Jul 10 17:39:22 2007 UTC (5 years, 10 months ago) by abate
File size: 7626 byte(s)
[r2003-06-12 11:54:45 by cvscast] Merging schema

Original author: cvscast
Date: 2003-06-12 11:54:49+00:00
1
2 open Pxp_document ;;
3 open Pxp_yacc ;;
4 open Pxp_types ;;
5
6 (* TODO avoid using this extension when unneeded (e.g. instance document
7 parsing *)
8 class schema_extension =
9 let is_ncname = (* TODO check if s is a NCName, this is only a hack that
10 checks if no spaces are presents *)
11 let space_RE = Pcre.regexp " " in
12 fun s -> not (Pcre.pmatch ~rex:space_RE s)
13 in
14 object (self)
15
16 val mutable node = (None: schema_extension node option)
17
18 method clone = {< >}
19 method node = match node with None -> assert false | Some n -> n
20 method set_node n = node <- Some n
21
22 method private string_attribute name = (* TODO check if we should accept
23 also Valuelist attributes *)
24 match self#node#attribute name with Value v -> v | _ -> raise Not_found
25 method private ncattr name = self#string_attribute name
26
27 method base = self#ncattr "base"
28 method default = self#ncattr "default"
29 method fixed = self#ncattr "fixed"
30 method maxOccurs = self#ncattr "maxOccurs"
31 method minOccurs = self#ncattr "minOccurs"
32 method mixed = bool_of_string (self#ncattr "mixed")
33 method name = self#ncattr "name"
34 method ref = self#ncattr "ref"
35 method typ = self#ncattr "type"
36 method value = self#ncattr "value"
37
38 method prohibited =
39 try
40 match self#ncattr "use" with
41 | "prohibited" -> true
42 | _ -> false
43 with Not_found -> false
44
45 method required =
46 try
47 match self#ncattr "use" with
48 | "required" -> true
49 | _ -> false
50 with Not_found -> false
51
52 method has_attribute a = List.mem a (self#node#attribute_names)
53 method has_element e =
54 try
55 ignore (find_element e self#node); true
56 with Not_found -> false
57
58 method find_facets =
59 let facets = ref [] in
60 self#node#iter_nodes (fun n -> match n#node_type with
61 | T_element "length" | T_element "minLength" | T_element "maxLength"
62 | T_element "pattern" | T_element "enumeration" | T_element "whiteSpace"
63 | T_element "maxInclusive" | T_element "maxExclusive"
64 | T_element "minInclusive" | T_element "minExclusive"
65 | T_element "totalDigits" | T_element "fractionDigits" ->
66 facets := n :: !facets
67 | _ -> ());
68 !facets
69
70 method find_terms =
71 List.filter
72 (fun n ->
73 match n#node_type with
74 | T_element "xsd:all"
75 | T_element "xsd:choice"
76 | T_element "xsd:sequence"
77 | T_element "xsd:element" -> true
78 | _ -> false)
79 self#node#sub_nodes
80
81 method find_term =
82 match self#find_terms with
83 | [t] -> t
84 | [] -> raise Not_found
85 | _ -> failwith "too many term nodes"
86
87 method find_attributes =
88 List.filter
89 (fun n ->
90 match n#node_type with
91 | T_element "xsd:attribute" -> true
92 | _ -> false)
93 self#node#sub_nodes
94
95 method private find_gen tagname name =
96 match
97 find_all
98 (fun n ->
99 (n#node_type = T_element tagname) &&
100 (try n#extension#name = name with Not_found -> false))
101 self#node
102 with
103 | [t] -> t
104 | [] -> raise Not_found
105 | _ -> assert false
106
107 method find_simpleType = self#find_gen "xsd:simpleType"
108 method find_complexType = self#find_gen "xsd:complexType"
109 method find_global_element = self#find_gen "xsd:element"
110 method find_global_attribute = self#find_gen "xsd:attribute"
111
112 method has_no_term = self#find_terms = []
113
114 method sub_elements =
115 List.filter
116 (fun n -> match n#node_type with T_element _ -> true | _ -> false)
117 self#node#sub_nodes
118 method nth_element = List.nth self#sub_elements
119
120 end
121 ;;
122
123 let spec =
124 make_spec_from_alist
125 ~data_exemplar: (new data_impl (new schema_extension))
126 ~default_element_exemplar: (new element_impl (new schema_extension))
127 ~element_alist: []
128 ()
129 ;;
130
131 let pxp_tree_of fname =
132 parse_wfdocument_entity default_config (from_file fname) spec
133 ;;
134
135 let pxp_stream_of_file fname =
136 let config = { default_config with drop_ignorable_whitespace = true } in
137 let entity_manager =
138 create_entity_manager ~is_document:true config (from_file fname)
139 in
140 let pull_parser =
141 create_pull_parser default_config
142 (`Entry_document [ `Extend_dtd_fully; `Parse_xml_decl ])
143 entity_manager
144 in
145 Stream.from pull_parser
146 ;;
147
148 class foo_entity_id = object end ;;
149 let eid = new foo_entity_id ;;
150 type to_be_visited =
151 | Fully of Value.t (* xml values still to be visited *)
152 | Half of Value.t (* xml values half visited (i.e. E_start_tag generated) *)
153 | Other of Value.t (* other values *)
154 ;;
155
156 let pxp_stream_of_value v =
157 let stack = ref [Fully v] in
158 let f _ = (* lazy visit of a tree of CDuce XML values, stack keeps track of
159 what is still to be visited *)
160 (match !stack with
161 | (Fully ((Value.Xml (Value.Atom a, attrs, cont)) as v)) :: tl ->
162 let tag_ascii = Encodings.Utf8.to_string (Atoms.value a) in
163 let attrs_ascii =
164 List.map (fun (k, v) -> (k, Value.get_string_latin1 v))
165 (Value.get_fields attrs)
166 in
167 let event = Some (E_start_tag (tag_ascii, attrs_ascii, eid)) in
168 (match cont with
169 | v when v = Value.nil -> (* no content *)
170 stack := (Half v) :: tl
171 | seq ->
172 stack := (Half v) :: tl;
173 List.iter
174 (function
175 | (Value.Xml (_, _, _)) as v -> stack := (Fully v) :: !stack
176 | v -> stack := (Other v) :: !stack)
177 (Value.explode_rev seq));
178 event
179 | (Half (Value.Xml (Value.Atom a, _, _))) :: tl ->
180 let tag_ascii = Encodings.Utf8.to_string (Atoms.value a) in
181 let event = Some (E_end_tag (tag_ascii, eid)) in
182 stack := tl;
183 event
184 | (Fully (Value.Xml (_, _, _)))::_ | (Half (Value.Xml (_, _, _)))::_ ->
185 failwith "Schema_xml.pxp_stream_of_value: non-atom-tag xml value"
186 | (Other v) :: tl ->
187 let event = Some (E_char_data (Value.get_string_latin1 v)) in
188 stack := tl;
189 event
190 | [] -> None
191 | _ -> assert false) (* TODO assertion failure with regtest/flatten.xml *)
192 in
193 Stream.from f
194 ;;
195
196 open Printf ;;
197
198 let string_of_pxp_event = function
199 | E_start_doc (version, standalone, dtd) -> "E_start_doc"
200 | E_end_doc -> "E_end_doc"
201 | E_start_tag (name, attlist, entity_id) -> sprintf "E_start_tag (%s)" name
202 | E_end_tag (name, entity_id) -> sprintf "E_end_tag (%s)" name
203 | E_char_data data ->
204 sprintf "E_char_data (%s)" (Pcre.replace ~pat:"\n" ~templ:"\\n" data)
205 | E_pinstr (target, value) -> "E_pinstr"
206 | E_comment data -> "E_comment"
207 | E_position (entity, line, col) -> "E_position"
208 | E_error exn -> sprintf "E_error (%s)" (Pxp_types.string_of_exn exn)
209 | E_end_of_stream -> "E_end_of_stream"
210 ;;
211
212 let rec dump_stream s =
213 print_endline (string_of_pxp_event (Stream.next s));
214 flush stdout;
215 dump_stream s
216 ;;
217
218 let dump_stream s = try dump_stream s with Stream.Failure -> () ;;
219
220 (* peek version that assume the stream isn't at the end *)
221 let peek s =
222 match Stream.peek s with
223 | Some v -> v
224 | None -> raise Stream.Failure
225 ;;
226
227 (* collect all E_char_data events from a PXP stream and return the
228 concatenation of their datas *)
229 let collect_pcdata s =
230 let buf = Buffer.create 1 in
231 let rec collect () =
232 match peek s with
233 | E_char_data d ->
234 Buffer.add_string buf d; Stream.junk s; collect ()
235 | _ -> Buffer.contents buf
236 in
237 collect ()
238 ;;
239

CVS Admin">CVS Admin
ViewVC Help
Powered by ViewVC 1.1.5