/[svn]/schema/schema_xml.ml
ViewVC logotype

Diff of /schema/schema_xml.ml

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 500 by abate, Tue Jul 10 17:39:22 2007 UTC revision 507 by abate, Tue Jul 10 17:40:14 2007 UTC
# Line 1  Line 1 
1    
2  open Pxp_document ;;  open Pxp_document
3  open Pxp_yacc ;;  open Pxp_yacc
4  open Pxp_types ;;  open Pxp_types
5    
   (* TODO avoid using this extension when unneeded (e.g. instance document  
   parsing *)  
6  class schema_extension =  class schema_extension =
7    let is_ncname = (* TODO check if s is a NCName, this is only a hack that    let is_ncname = (* TODO check if s is a NCName, this is only a hack that
8                    checks if no spaces are presents *)                    checks if no spaces are presents *)
# Line 81  Line 79 
79      method find_term =      method find_term =
80        match self#find_terms with        match self#find_terms with
81        | [t] -> t        | [t] -> t
82        | [] -> raise Not_found        | _ -> raise Not_found
       | _ -> failwith "too many term nodes"  
83    
84      method find_attributes =      method find_attributes =
85        List.filter        List.filter
# Line 118  Line 115 
115      method nth_element = List.nth self#sub_elements      method nth_element = List.nth self#sub_elements
116    
117    end    end
 ;;  
118    
119  let spec =  let spec =
120    make_spec_from_alist    make_spec_from_alist
# Line 126  Line 122 
122      ~default_element_exemplar:  (new element_impl (new schema_extension))      ~default_element_exemplar:  (new element_impl (new schema_extension))
123      ~element_alist:             []      ~element_alist:             []
124      ()      ()
 ;;  
125    
126  let pxp_tree_of fname =  let pxp_tree_of fname =
127    parse_wfdocument_entity default_config (from_file fname) spec    parse_wfdocument_entity default_config (from_file fname) spec
 ;;  
128    
129  let pxp_stream_of_file fname =  let pxp_stream_of_file fname =
130    let config = { default_config with drop_ignorable_whitespace = true } in    let config = { default_config with drop_ignorable_whitespace = true } in
# Line 143  Line 137 
137        entity_manager        entity_manager
138    in    in
139    Stream.from pull_parser    Stream.from pull_parser
 ;;  
140    
141  class foo_entity_id = object end ;;  class foo_entity_id = object end
142  let eid = new foo_entity_id ;;  let eid = new foo_entity_id
143  type to_be_visited =  type to_be_visited =
144    | Fully of Value.t  (* xml values still to be visited *)    | Fully of Value.t  (* xml values still to be visited *)
145    | Half of Value.t   (* xml values half visited (i.e. E_start_tag generated) *)    | Half of Value.t   (* xml values half visited (i.e. E_start_tag generated) *)
146    | Other of Value.t  (* other values *)    | Other of Value.t  (* other values *)
 ;;  
147    
148  let pxp_stream_of_value v =  let pxp_stream_of_value v =
149    let stack = ref [Fully v] in    let stack = ref [Fully v] in
150    let f _ = (* lazy visit of a tree of CDuce XML values, stack keeps track of    let f _ = (* lazy visit of a tree of CDuce XML values, stack keeps track of
151              what is still to be visited *)              what is still to be visited *)
152      (match !stack with      (match !stack with
153      | (Fully ((Value.Xml (Value.Atom a, attrs, cont)) as v)) :: tl ->      | (Fully ((Value.Xml (Value.Atom a, attrs, _)) as v)) :: tl ->
154          let tag_ascii = Encodings.Utf8.to_string (Atoms.value a) in          let tag_ascii = Encodings.Utf8.to_string (Atoms.value a) in
155          let attrs_ascii =          let attrs_ascii =
156            List.map (fun (k, v) -> (k, Value.get_string_latin1 v))            List.map (fun (k, v) -> (k, Value.get_string_latin1 v))
157              (Value.get_fields attrs)              (Value.get_fields attrs)
158          in          in
159          let event = Some (E_start_tag (tag_ascii, attrs_ascii, eid)) in          let event = Some (E_start_tag (tag_ascii, attrs_ascii, eid)) in
         (match cont with  
         | v when v = Value.nil -> (* no content *)  
             stack := (Half v) :: tl  
         | seq ->  
160              stack := (Half v) :: tl;              stack := (Half v) :: tl;
161              List.iter          let children = ref [] in  (* TODO inefficient *)
162                (function          let push v = children := v :: !children in
163                  | (Value.Xml (_, _, _)) as v -> stack := (Fully v) :: !stack          Value.iter_xml
164                  | v -> stack := (Other v) :: !stack)            (fun pcdata -> push (Other (Value.string_utf8 pcdata)))
165                (Value.explode_rev seq));            (fun v ->
166                match v with
167                | (Value.Xml (_, _, _)) as v -> push (Fully v)
168                | v -> raise (Invalid_argument "Schema_xml.pxp_stream_of_value"))
169              v;
170            stack := (List.rev !children) @ !stack;
171          event          event
172      | (Half (Value.Xml (Value.Atom a, _, _))) :: tl ->      | (Half (Value.Xml (Value.Atom a, _, _))) :: tl ->
173          let tag_ascii = Encodings.Utf8.to_string (Atoms.value a) in          let tag_ascii = Encodings.Utf8.to_string (Atoms.value a) in
# Line 188  Line 181 
181          stack := tl;          stack := tl;
182          event          event
183      | [] -> None      | [] -> None
184      | _ -> assert false)  (* TODO assertion failure with regtest/flatten.xml *)      | _ -> assert false)
185    in    in
186    Stream.from f    Stream.from f
 ;;  
187    
188  open Printf ;;  open Printf
189    
190  let string_of_pxp_event = function  let string_of_pxp_event = function
191    | E_start_doc (version, standalone, dtd) -> "E_start_doc"    | E_start_doc (version, standalone, dtd) -> "E_start_doc"
# Line 207  Line 199 
199    | E_position (entity, line, col) -> "E_position"    | E_position (entity, line, col) -> "E_position"
200    | E_error exn -> sprintf "E_error (%s)" (Pxp_types.string_of_exn exn)    | E_error exn -> sprintf "E_error (%s)" (Pxp_types.string_of_exn exn)
201    | E_end_of_stream -> "E_end_of_stream"    | E_end_of_stream -> "E_end_of_stream"
 ;;  
202    
203  let rec dump_stream s =  let rec dump_stream s =
204    print_endline (string_of_pxp_event (Stream.next s));    print_endline (string_of_pxp_event (Stream.next s));
205    flush stdout;    flush stdout;
206    dump_stream s    dump_stream s
 ;;  
207    
208  let dump_stream s = try dump_stream s with Stream.Failure -> () ;;  let dump_stream s = try dump_stream s with Stream.Failure -> ()
209    
210    (* peek version that assume the stream isn't at the end *)    (* peek version that assume the stream isn't at the end *)
211  let peek s =  let peek s =
212    match Stream.peek s with    match Stream.peek s with
213    | Some v -> v    | Some v -> v
214    | None -> raise Stream.Failure    | None -> raise Stream.Failure
 ;;  
215    
216    (* collect all E_char_data events from a PXP stream and return the    (* collect all E_char_data events from a PXP stream and return the
217    concatenation of their datas *)    concatenation of their datas *)
# Line 235  Line 224 
224      | _ -> Buffer.contents buf      | _ -> Buffer.contents buf
225    in    in
226    collect ()    collect ()
 ;;  
227    

Legend:
Removed from v.500  
changed lines
  Added in v.507

CVS Admin">CVS Admin
ViewVC Help
Powered by ViewVC 1.1.5