/[svn]/runtime/load_xml.ml
ViewVC logotype

Contents of /runtime/load_xml.ml

Parent Directory Parent Directory | Revision Log Revision Log


Revision 466 - (hide annotations)
Tue Jul 10 17:36:47 2007 UTC (5 years, 11 months ago) by abate
File size: 4472 byte(s)
[r2003-05-31 10:32:42 by cvscast] support for expat

Original author: cvscast
Date: 2003-05-31 10:32:43+00:00
1 abate 70 (* Loading XML documents *)
2    
3 abate 466 let use_parser = ref `Pxp
4 abate 70
5     open Pxp_yacc
6     open Pxp_lexer_types
7     open Pxp_types
8     open Value
9 abate 233 open Ident
10 abate 374 open Encodings
11 abate 70
12 abate 379 type buf =
13     { mutable buffer : string;
14     mutable pos : int;
15     mutable length : int }
16 abate 131
17 abate 379 let create n = { buffer = String.create n; pos = 0; length = n }
18 abate 131
19 abate 379 let resize b n =
20     let new_len = b.length * 2 + n in
21     let new_buf = String.create new_len in
22     String.unsafe_blit b.buffer 0 new_buf 0 b.pos;
23     b.buffer <- new_buf;
24     b.length <- new_len
25    
26     let add_string b s =
27     let len = String.length s in
28     let new_pos = b.pos + len in
29     if new_pos > b.length then resize b len;
30     String.unsafe_blit s 0 b.buffer b.pos len;
31     b.pos <- new_pos
32    
33     let rec only_ws s i =
34     (i = 0) ||
35     (let i = pred i in match (String.unsafe_get s i) with
36     | ' ' | '\t' | '\n' | '\r' -> only_ws s i
37 abate 466 | _ -> false)
38 abate 379
39    
40 abate 374 let string s q =
41     let s = Utf8.mk s in
42     String_utf8 (Utf8.start_index s,Utf8.end_index s, s, q)
43 abate 70
44 abate 374
45 abate 188 let attrib att =
46 abate 374 let att = List.map (fun (l,v) -> LabelPool.mk (U.mk l), string v nil) att in
47 abate 233 LabelMap.from_list (fun _ _ -> assert false) att
48 abate 188
49     let elem tag att child =
50 abate 405 Xml (Atom (Atoms.mk (U.mk tag)), Record (attrib att), child)
51 abate 188
52 abate 402 (*
53 abate 310 class warner = object method warn w = print_endline ("PXP WARNING: " ^ w) end
54 abate 402 *)
55 abate 310
56 abate 394 type token =
57     | Element of Value.t
58     | Start of string * (string * string) list
59     | String of string
60    
61 abate 466 let stack = ref []
62     let txt = create 1024
63    
64    
65     let rec create_elt accu = function
66     | String s :: st -> create_elt (string s accu) st
67     | Element x :: st -> create_elt (Pair (x,accu)) st
68     | Start (name,att) :: st -> stack := Element (elem name att accu) :: st
69     | [] -> assert false
70    
71    
72     let buflen = 1000
73     let buf = String.create buflen
74    
75     let load_expat s =
76     let p = Expat.parser_create "" in
77     Expat.set_start_element_handler p
78     (fun name att ->
79     if not (only_ws txt.buffer txt.pos) then
80     stack := String (String.sub txt.buffer 0 txt.pos) :: !stack;
81     txt.pos <- 0;
82     stack := Start (name,att) :: !stack);
83     Expat.set_end_element_handler p
84     (fun _ ->
85     let accu =
86     if only_ws txt.buffer txt.pos
87     then nil
88     else string (String.sub txt.buffer 0 txt.pos) nil in
89     txt.pos <- 0;
90     create_elt accu !stack);
91     Expat.set_character_data_handler p (add_string txt);
92     let ic = open_in s in
93     let rec loop () =
94     let n = input ic buf 0 buflen in
95     if (n > 0) then
96     (*(Expat.parse p (String.sub buf 0 n); loop ())*)
97     (Expat.parse_sub p buf 0 n; loop ())
98     in
99     try
100     loop();
101     Expat.final p;
102     close_in ic;
103     match !stack with
104     | [ Element x ] -> stack := []; x
105     | _ -> assert false
106     with
107     Expat.Expat_error e ->
108     failwith ("Expat ("^s^"):"^Expat.xml_error_to_string e)
109    
110    
111     let handle_event = function
112     | E_start_tag (name,att,_) ->
113     if not (only_ws txt.buffer txt.pos) then
114     stack := String (String.sub txt.buffer 0 txt.pos) :: !stack;
115     txt.pos <- 0;
116     stack := Start (name,att) :: !stack
117     | E_char_data data ->
118     add_string txt data
119     | E_end_tag (_,_) ->
120     let accu =
121     if only_ws txt.buffer txt.pos
122     then nil
123     else string (String.sub txt.buffer 0 txt.pos) nil in
124     txt.pos <- 0;
125     create_elt accu !stack
126     | _ -> ()
127    
128     let load_pxp s =
129 abate 70 let config = { default_config with
130 abate 310 (* warner = new warner; *)
131 abate 374 encoding = `Enc_utf8;
132 abate 70 store_element_positions = false;
133     drop_ignorable_whitespace = true
134     }
135     in
136     let mgr = create_entity_manager config (from_file s) in
137 abate 466 process_entity config (`Entry_document[]) mgr handle_event;
138     match !stack with
139     | [ Element x ] -> stack := []; x
140     | _ -> assert false
141 abate 70
142 abate 466 let load_xml_aux s =
143     match !use_parser with
144     | `Expat -> load_expat s
145     | `Pxp -> load_pxp s
146 abate 172
147 abate 188 let load_xml s =
148 abate 126 Location.protect_op "load_xml";
149 abate 188 try load_xml_aux s
150 abate 91 with exn ->
151     raise
152     (Location.Generic (Pxp_types.string_of_exn exn))
153    
154 abate 188
155     let load_html s =
156     let rec val_of_doc q = function
157     | Nethtml.Data data ->
158 abate 379 if (only_ws data (String.length data)) then q else string data q
159 abate 188 | Nethtml.Element (tag, att, child) ->
160     Pair (elem tag att (val_of_docs child), q)
161     and val_of_docs = function
162     | [] -> nil
163     | h::t -> val_of_doc (val_of_docs t) h
164     in
165    
166 abate 310 Location.protect_op "load_html";
167 abate 188 let ic = open_in s in
168     let doc = Nethtml.parse_document
169     ~dtd:Nethtml.relaxed_html40_dtd
170     (Lexing.from_channel ic) in
171 abate 374 let doc = Nethtml.decode ~enc:`Enc_utf8 ~subst:(fun _ -> "???") doc in
172 abate 188 close_in ic;
173     val_of_docs doc
174 abate 379

CVS Admin">CVS Admin
ViewVC Help
Powered by ViewVC 1.1.5