/[svn]/schema/schema_xml.ml
ViewVC logotype

Diff of /schema/schema_xml.ml

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 812 by abate, Tue Jul 10 18:04:55 2007 UTC revision 1488 by abate, Tue Jul 10 18:54:55 2007 UTC
# Line 1  Line 1 
   
 open Pxp_document  
 open Pxp_ev_parser  
 open Pxp_tree_parser  
 open Pxp_types  
   
1  open Encodings  open Encodings
2  open Encodings.Utf8.Pcre  open Schema_pcre
3    
4  type pxp_node =  type node =
5    ('a Pxp_document.node Pxp_document.extension as 'a) Pxp_document.node    ('a Pxp_document.node Pxp_document.extension as 'a) Pxp_document.node
 type pxp_document =  
   ('a Pxp_document.node Pxp_document.extension as 'a) Pxp_document.document  
6    
7  let xsd_RE = pcre_regexp "^xsd:"  module Node = struct
8      type t = node
9      let compare = Pxp_document.compare
10    end
11    
12  let has_xsd_prefix s = Pcre.pmatch ~rex:xsd_RE (Utf8.get_str s)  let start_with s pr =
13      let s = Utf8.get_str s in
14      (String.length s >= String.length pr) &&
15        (String.sub s 0 (String.length pr) = pr)
16    
17    let has_xsd_prefix s = start_with s "xsd:"
18    
19  let xsd_namespace = Utf8.mk "http://www.w3.org/2001/XMLSchema"  let xsd_namespace = Utf8.mk "http://www.w3.org/2001/XMLSchema"
20  let xsi_namespace = Utf8.mk "http://www.w3.org/2001/XMLSchema-instance"  let xsi_namespace = Utf8.mk "http://www.w3.org/2001/XMLSchema-instance"
21  let xsd_prefix = Utf8.mk "xsd"  let xsd_prefix = Utf8.mk "xsd"
22  let xsi_prefix = Utf8.mk "xsi"  let xsi_prefix = Utf8.mk "xsi"
 let add_xsd_prefix =  
   let prefix = Utf8.concat xsd_prefix (Utf8.mk ":") in  
   fun s -> Utf8.concat prefix s  
23    
24  let schema_ns_prefixes =  let schema_ns_prefixes =
25    [ xsd_prefix, xsd_namespace; xsi_prefix, xsi_namespace ]    [ xsd_prefix, xsd_namespace; xsi_prefix, xsi_namespace ]
26    
27  let spec = default_namespace_spec  let spec = Pxp_tree_parser.default_namespace_spec
28  let new_xsd_config () =  let new_xsd_config () =
29    let ns_manager = new Pxp_dtd.namespace_manager in    let ns_manager = new Pxp_dtd.namespace_manager in
30    List.iter    List.iter
31      (fun (p, ns) -> ns_manager#add_namespace (Utf8.get_str p) (Utf8.get_str ns))      (fun (p, ns) -> ns_manager#add_namespace (Utf8.get_str p) (Utf8.get_str ns))
32      schema_ns_prefixes;      schema_ns_prefixes;
33    { default_namespace_config with    { Pxp_types.default_namespace_config with
34          Pxp_types.encoding = `Enc_utf8;
35        Pxp_types.enable_namespace_processing = Some ns_manager        Pxp_types.enable_namespace_processing = Some ns_manager
36    }    }
37    
38  let pxp_node_of ?(config = new_xsd_config ()) src =  let node_of src =
39    parse_wfcontent_entity config src spec    (Pxp_tree_parser.parse_wfdocument_entity (new_xsd_config ()) src spec) # root
 let pxp_document_of ?(config = new_xsd_config ()) src =  
   parse_wfdocument_entity config src spec  
   
 let pxp_stream_of_file ?(config = new_xsd_config ()) fname =  
   let config = { config with drop_ignorable_whitespace = true } in  
   let entity_manager =  
     create_entity_manager ~is_document:true config (from_file fname)  
   in  
   let pull_parser =  
     create_pull_parser config  
       (`Entry_document [ `Extend_dtd_fully; `Parse_xml_decl ])  
       entity_manager  
   in  
   Stream.from pull_parser  
   
 (*  
 class foo_entity_id = object end  
 let eid = new foo_entity_id  
 type to_be_visited =  
   | Fully of Value.t  (* xml values still to be visited *)  
   | Half of Value.t   (* xml values half visited (i.e. E_start_tag generated) *)  
   | Other of Value.t  (* other values *)  
   
 let pxp_stream_of_value v =  
   let stack = ref [Fully v] in  
   let f _ = (* lazy visit of a tree of CDuce XML values, stack keeps track of  
             what is still to be visited *)  
     (match !stack with  
     | (Fully ((Value.Xml (Value.Atom a, attrs, _)) as v)) :: tl ->  
         let (ns,a) = Atoms.V.value a in  
         assert( ns == Ns.empty );  
         let tag_ascii = Encodings.Utf8.to_string a in  
         let attrs_ascii =  
           List.map (fun (k, v) -> (k, Value.get_string_latin1 v))  
             (Value.get_fields attrs)  
         in  
         let event = Some (E_start_tag (tag_ascii, attrs_ascii, eid)) in  
         stack := (Half v) :: tl;  
         let children = ref [] in  (* TODO inefficient *)  
         let push v = children := v :: !children in  
         Value.iter_xml  
           (fun pcdata -> push (Other (Value.string_utf8 pcdata)))  
           (fun v ->  
             match v with  
             | (Value.Xml (_, _, _)) as v -> push (Fully v)  
             | v -> raise (Invalid_argument "Schema_xml.pxp_stream_of_value"))  
           v;  
         stack := (List.rev !children) @ !stack;  
         event  
     | (Half (Value.Xml (Value.Atom a, _, _))) :: tl ->  
         let (ns,a) = Atoms.V.value a in  
         assert( ns == Ns.empty );  
         let tag_ascii = Encodings.Utf8.to_string a in  
         let event = Some (E_end_tag (tag_ascii, eid)) in  
         stack := tl;  
         event  
     | (Fully (Value.Xml (_, _, _)))::_ | (Half (Value.Xml (_, _, _)))::_ ->  
         failwith "Schema_xml.pxp_stream_of_value: non-atom-tag xml value"  
     | (Other v) :: tl ->  
         let event = Some (E_char_data (Value.get_string_latin1 v)) in  
         stack := tl;  
         event  
     | [] -> None  
     | _ -> assert false)  
   in  
   Stream.from f  
 *)  
40    
41  open Printf  let node_of_uri uri =
42      try
43  let string_of_pxp_event = function      let source = match Url.process uri with
44    | E_start_doc (version, standalone, dtd) -> "E_start_doc"        | Url.Filename s -> Pxp_types.from_file s
45    | E_end_doc -> "E_end_doc"        | Url.Url s -> Pxp_types.from_string s
   | E_start_tag (name, attlist, entity_id) -> sprintf "E_start_tag (%s)" name  
   | E_end_tag (name, entity_id) -> sprintf "E_end_tag (%s)" name  
   | E_char_data data ->  
       sprintf "E_char_data (%s)" (Pcre.replace ~pat:"\n" ~templ:"\\n" data)  
   | E_pinstr (target, value) -> "E_pinstr"  
   | E_comment data -> "E_comment"  
   | E_position (entity, line, col) -> "E_position"  
   | E_error exn -> sprintf "E_error (%s)" (Pxp_types.string_of_exn exn)  
   | E_end_of_stream -> "E_end_of_stream"  
   | _ -> assert false  
   
 let rec dump_stream s =  
   print_endline (string_of_pxp_event (Stream.next s));  
   flush stdout;  
   dump_stream s  
   
 let dump_stream s = try dump_stream s with Stream.Failure -> ()  
   
   (* peek version that assume the stream isn't at the end *)  
 let peek s =  
   match Stream.peek s with  
   | Some v -> v  
   | None -> raise Stream.Failure  
   
   (* collect all E_char_data events from a PXP stream and return the  
   concatenation of their datas *)  
 let collect_pcdata s =  
   let buf = Buffer.create 1 in  
   let rec collect () =  
     match peek s with  
     | E_char_data d ->  
         Buffer.add_string buf d; Stream.junk s; collect ()  
     | _ -> Buffer.contents buf  
46    in    in
47    collect ()      node_of source
48      with exn -> raise (Location.Generic (Pxp_types.string_of_exn exn))
49    
50  module Pxp_helpers =  let _may_attr name n =
51    struct    try
52        match n#attribute name with
53      open Pxp_document        | Pxp_types.Value v -> Some (Utf8.mk v)
54      open Pxp_types        | _ -> None
55      with Not_found -> None
     exception PxpHelpers of exn  
     let _raise e = raise (PxpHelpers e)  
     let space_RE = pcre_regexp " "  
   
     let _tag_name (n: pxp_node) =  
       match n#node_type with  
       | T_element tag -> Utf8.mk tag  
       | _ -> raise Not_found  
56    
57      let _has_attribute name (n: pxp_node) =  let _is_attr name n v =
58        try        try
59          match n#attribute name with          match n#attribute name with
60          | Value _ -> true        | Pxp_types.Value v' -> v = v'
61          | _ -> false          | _ -> false
62        with Not_found -> false        with Not_found -> false
63    
64      let _attribute name (n: pxp_node) =  let _attr name n =
65        match n#attribute name with        match n#attribute name with
66        | Value v -> Utf8.mk v      | Pxp_types.Value v -> Utf8.mk v
67        | _ -> raise Not_found      | _ -> failwith ("Attribute " ^ name ^ " is missing")
68    
69      let _has_element e (n: pxp_node) =  let _may_elem e (n: node) =
70        try ignore (find_element e n); true with Not_found -> false    try Some (Pxp_document.find_element e n) with Not_found -> None
71    
72      let _element e (n: pxp_node): pxp_node = find_element e n  let _elems e n = Pxp_document.find_all_elements e n
     let _elements e (n: pxp_node): pxp_node list = find_all_elements e n  
73    
74      let _element' names (n: pxp_node): pxp_node =  let _filter_elems p n =
75        let node = ref None in    Pxp_document.find_all (fun n -> match n#node_type with
76        (try                             | Pxp_document.T_element s -> List.mem s p
77          n#iter_nodes (fun n ->                             | _ -> false) n
78            (match n#node_type with  
79            | T_element name when List.mem name names ->  let _line n = match n#position with (_,l,_) -> l
               node := Some n;  
               raise Exit  
           | _ -> ()))  
       with Exit -> ());  
       match !node with None -> raise Not_found | Some n -> n  
80    
81      let _elements' names (n: pxp_node): pxp_node list =  let _iter_nodes n f = n#iter_nodes f
82        find_all (fun n ->  
83    let _iter_elems n f = n#iter_nodes
84      (fun n ->
85          match n#node_type with          match n#node_type with
86          | T_element name when List.mem name names -> true         | Pxp_document.T_element s -> f n s
87          | _ -> false) n         | _ -> ()
88      )
89    
90    let _fold_elems n x f =
91      let x = ref x in
92      n#iter_nodes
93        (fun n ->
94           match n#node_type with
95             | Pxp_document.T_element s -> x := f !x n s
96             | _ -> ()
97        );
98      !x
99    
100      let _has_element' names (n: pxp_node) =  let _tag n =
101        try ignore (_element' names n); true with Not_found -> false    match n#node_type with
102        | Pxp_document.T_element s -> s
103        | _ -> assert false
104    
105    end  let _has_tag n f =
106      match n#node_type with
107        | Pxp_document.T_element s -> f s
108        | _ -> false
109    
110    let _namespaces n =
111      List.map
112        (fun n ->
113           (match n#node_type with
114                Pxp_document.T_namespace p -> p | _ -> assert false),
115           n#data
116        )
117        n#namespaces_as_nodes
118    
119    let _find p n = Pxp_document.find p n
120    
121    let _resolve_qname n qname =
122      let (prefix,local) = Ns.split_qname qname in
123      let ns =
124        try Ns.mk (Utf8.mk (n # namespace_scope # uri_of_display_prefix prefix))
125        with Not_found -> Ns.empty
126      in
127      (ns,local)
128    
129    let _may_qname_attr name n =
130      match _may_attr name n with
131        | Some qname -> Some (_resolve_qname n qname)
132        | None -> None
133    
134    (** export Ns.t version of defined namespaces *)  let _qname_attr name n =
135      match _may_attr name n with
136        | Some qname -> _resolve_qname n qname
137        | None -> assert false
138    
 let xsd_namespace = Ns.mk xsd_namespace  
 let xsi_namespace = Ns.mk xsi_namespace  
139    
140    let xsd = Ns.mk xsd_namespace
141    let xsi = Ns.mk xsi_namespace

Legend:
Removed from v.812  
changed lines
  Added in v.1488

CVS Admin">CVS Admin
ViewVC Help
Powered by ViewVC 1.1.5