/[svn]/schema/schema_xml.ml
ViewVC logotype

Contents of /schema/schema_xml.ml

Parent Directory Parent Directory | Revision Log Revision Log


Revision 507 - (show annotations)
Tue Jul 10 17:40:14 2007 UTC (5 years, 10 months ago) by abate
File size: 7475 byte(s)
[r2003-06-13 10:02:51 by cvscast] - removed unneeded refs from Schema_types

- use iter_xml instead of explode_rev to generate streams of PXP
  events from a CDuce value

Original author: cvscast
Date: 2003-06-13 10:02:51+00:00
1
2 open Pxp_document
3 open Pxp_yacc
4 open Pxp_types
5
6 class schema_extension =
7 let is_ncname = (* TODO check if s is a NCName, this is only a hack that
8 checks if no spaces are presents *)
9 let space_RE = Pcre.regexp " " in
10 fun s -> not (Pcre.pmatch ~rex:space_RE s)
11 in
12 object (self)
13
14 val mutable node = (None: schema_extension node option)
15
16 method clone = {< >}
17 method node = match node with None -> assert false | Some n -> n
18 method set_node n = node <- Some n
19
20 method private string_attribute name = (* TODO check if we should accept
21 also Valuelist attributes *)
22 match self#node#attribute name with Value v -> v | _ -> raise Not_found
23 method private ncattr name = self#string_attribute name
24
25 method base = self#ncattr "base"
26 method default = self#ncattr "default"
27 method fixed = self#ncattr "fixed"
28 method maxOccurs = self#ncattr "maxOccurs"
29 method minOccurs = self#ncattr "minOccurs"
30 method mixed = bool_of_string (self#ncattr "mixed")
31 method name = self#ncattr "name"
32 method ref = self#ncattr "ref"
33 method typ = self#ncattr "type"
34 method value = self#ncattr "value"
35
36 method prohibited =
37 try
38 match self#ncattr "use" with
39 | "prohibited" -> true
40 | _ -> false
41 with Not_found -> false
42
43 method required =
44 try
45 match self#ncattr "use" with
46 | "required" -> true
47 | _ -> false
48 with Not_found -> false
49
50 method has_attribute a = List.mem a (self#node#attribute_names)
51 method has_element e =
52 try
53 ignore (find_element e self#node); true
54 with Not_found -> false
55
56 method find_facets =
57 let facets = ref [] in
58 self#node#iter_nodes (fun n -> match n#node_type with
59 | T_element "length" | T_element "minLength" | T_element "maxLength"
60 | T_element "pattern" | T_element "enumeration" | T_element "whiteSpace"
61 | T_element "maxInclusive" | T_element "maxExclusive"
62 | T_element "minInclusive" | T_element "minExclusive"
63 | T_element "totalDigits" | T_element "fractionDigits" ->
64 facets := n :: !facets
65 | _ -> ());
66 !facets
67
68 method find_terms =
69 List.filter
70 (fun n ->
71 match n#node_type with
72 | T_element "xsd:all"
73 | T_element "xsd:choice"
74 | T_element "xsd:sequence"
75 | T_element "xsd:element" -> true
76 | _ -> false)
77 self#node#sub_nodes
78
79 method find_term =
80 match self#find_terms with
81 | [t] -> t
82 | _ -> raise Not_found
83
84 method find_attributes =
85 List.filter
86 (fun n ->
87 match n#node_type with
88 | T_element "xsd:attribute" -> true
89 | _ -> false)
90 self#node#sub_nodes
91
92 method private find_gen tagname name =
93 match
94 find_all
95 (fun n ->
96 (n#node_type = T_element tagname) &&
97 (try n#extension#name = name with Not_found -> false))
98 self#node
99 with
100 | [t] -> t
101 | [] -> raise Not_found
102 | _ -> assert false
103
104 method find_simpleType = self#find_gen "xsd:simpleType"
105 method find_complexType = self#find_gen "xsd:complexType"
106 method find_global_element = self#find_gen "xsd:element"
107 method find_global_attribute = self#find_gen "xsd:attribute"
108
109 method has_no_term = self#find_terms = []
110
111 method sub_elements =
112 List.filter
113 (fun n -> match n#node_type with T_element _ -> true | _ -> false)
114 self#node#sub_nodes
115 method nth_element = List.nth self#sub_elements
116
117 end
118
119 let spec =
120 make_spec_from_alist
121 ~data_exemplar: (new data_impl (new schema_extension))
122 ~default_element_exemplar: (new element_impl (new schema_extension))
123 ~element_alist: []
124 ()
125
126 let pxp_tree_of fname =
127 parse_wfdocument_entity default_config (from_file fname) spec
128
129 let pxp_stream_of_file fname =
130 let config = { default_config with drop_ignorable_whitespace = true } in
131 let entity_manager =
132 create_entity_manager ~is_document:true config (from_file fname)
133 in
134 let pull_parser =
135 create_pull_parser default_config
136 (`Entry_document [ `Extend_dtd_fully; `Parse_xml_decl ])
137 entity_manager
138 in
139 Stream.from pull_parser
140
141 class foo_entity_id = object end
142 let eid = new foo_entity_id
143 type to_be_visited =
144 | Fully of Value.t (* xml values still to be visited *)
145 | Half of Value.t (* xml values half visited (i.e. E_start_tag generated) *)
146 | Other of Value.t (* other values *)
147
148 let pxp_stream_of_value v =
149 let stack = ref [Fully v] in
150 let f _ = (* lazy visit of a tree of CDuce XML values, stack keeps track of
151 what is still to be visited *)
152 (match !stack with
153 | (Fully ((Value.Xml (Value.Atom a, attrs, _)) as v)) :: tl ->
154 let tag_ascii = Encodings.Utf8.to_string (Atoms.value a) in
155 let attrs_ascii =
156 List.map (fun (k, v) -> (k, Value.get_string_latin1 v))
157 (Value.get_fields attrs)
158 in
159 let event = Some (E_start_tag (tag_ascii, attrs_ascii, eid)) in
160 stack := (Half v) :: tl;
161 let children = ref [] in (* TODO inefficient *)
162 let push v = children := v :: !children in
163 Value.iter_xml
164 (fun pcdata -> push (Other (Value.string_utf8 pcdata)))
165 (fun v ->
166 match v with
167 | (Value.Xml (_, _, _)) as v -> push (Fully v)
168 | v -> raise (Invalid_argument "Schema_xml.pxp_stream_of_value"))
169 v;
170 stack := (List.rev !children) @ !stack;
171 event
172 | (Half (Value.Xml (Value.Atom a, _, _))) :: tl ->
173 let tag_ascii = Encodings.Utf8.to_string (Atoms.value a) in
174 let event = Some (E_end_tag (tag_ascii, eid)) in
175 stack := tl;
176 event
177 | (Fully (Value.Xml (_, _, _)))::_ | (Half (Value.Xml (_, _, _)))::_ ->
178 failwith "Schema_xml.pxp_stream_of_value: non-atom-tag xml value"
179 | (Other v) :: tl ->
180 let event = Some (E_char_data (Value.get_string_latin1 v)) in
181 stack := tl;
182 event
183 | [] -> None
184 | _ -> assert false)
185 in
186 Stream.from f
187
188 open Printf
189
190 let string_of_pxp_event = function
191 | E_start_doc (version, standalone, dtd) -> "E_start_doc"
192 | E_end_doc -> "E_end_doc"
193 | E_start_tag (name, attlist, entity_id) -> sprintf "E_start_tag (%s)" name
194 | E_end_tag (name, entity_id) -> sprintf "E_end_tag (%s)" name
195 | E_char_data data ->
196 sprintf "E_char_data (%s)" (Pcre.replace ~pat:"\n" ~templ:"\\n" data)
197 | E_pinstr (target, value) -> "E_pinstr"
198 | E_comment data -> "E_comment"
199 | E_position (entity, line, col) -> "E_position"
200 | E_error exn -> sprintf "E_error (%s)" (Pxp_types.string_of_exn exn)
201 | E_end_of_stream -> "E_end_of_stream"
202
203 let rec dump_stream s =
204 print_endline (string_of_pxp_event (Stream.next s));
205 flush stdout;
206 dump_stream s
207
208 let dump_stream s = try dump_stream s with Stream.Failure -> ()
209
210 (* peek version that assume the stream isn't at the end *)
211 let peek s =
212 match Stream.peek s with
213 | Some v -> v
214 | None -> raise Stream.Failure
215
216 (* collect all E_char_data events from a PXP stream and return the
217 concatenation of their datas *)
218 let collect_pcdata s =
219 let buf = Buffer.create 1 in
220 let rec collect () =
221 match peek s with
222 | E_char_data d ->
223 Buffer.add_string buf d; Stream.junk s; collect ()
224 | _ -> Buffer.contents buf
225 in
226 collect ()
227

CVS Admin">CVS Admin
ViewVC Help
Powered by ViewVC 1.1.5