/[svn]/runtime/load_xml.ml
ViewVC logotype

Contents of /runtime/load_xml.ml

Parent Directory Parent Directory | Revision Log Revision Log


Revision 467 - (show annotations)
Tue Jul 10 17:36:54 2007 UTC (5 years, 10 months ago) by abate
File size: 4557 byte(s)
[r2003-05-31 11:02:27 by cvscast] expat

Original author: cvscast
Date: 2003-05-31 11:02:27+00:00
1 (* Loading XML documents *)
2
3 let use_parser = ref `Pxp
4
5 open Pxp_yacc
6 open Pxp_lexer_types
7 open Pxp_types
8 open Value
9 open Ident
10 open Encodings
11
12 type buf =
13 { mutable buffer : string;
14 mutable pos : int;
15 mutable length : int }
16
17 let create n = { buffer = String.create n; pos = 0; length = n }
18
19 let resize b n =
20 let new_len = b.length * 2 + n in
21 let new_buf = String.create new_len in
22 String.unsafe_blit b.buffer 0 new_buf 0 b.pos;
23 b.buffer <- new_buf;
24 b.length <- new_len
25
26 let add_string b s =
27 let len = String.length s in
28 let new_pos = b.pos + len in
29 if new_pos > b.length then resize b len;
30 String.unsafe_blit s 0 b.buffer b.pos len;
31 b.pos <- new_pos
32
33 let rec only_ws s i =
34 (i = 0) ||
35 (let i = pred i in match (String.unsafe_get s i) with
36 | ' ' | '\t' | '\n' | '\r' -> only_ws s i
37 | _ -> false)
38
39
40 let string s q =
41 let s = Utf8.mk s in
42 String_utf8 (Utf8.start_index s,Utf8.end_index s, s, q)
43
44
45 let attrib att =
46 let att = List.map (fun (l,v) -> LabelPool.mk (U.mk l), string v nil) att in
47 LabelMap.from_list (fun _ _ -> assert false) att
48
49 let elem tag att child =
50 Xml (Atom (Atoms.mk (U.mk tag)), Record (attrib att), child)
51
52 (*
53 class warner = object method warn w = print_endline ("PXP WARNING: " ^ w) end
54 *)
55
56 type token =
57 | Element of Value.t
58 | Start of string * (string * string) list
59 | String of string
60
61 let stack = ref []
62 let txt = create 1024
63
64
65 let rec create_elt accu = function
66 | String s :: st -> create_elt (string s accu) st
67 | Element x :: st -> create_elt (Pair (x,accu)) st
68 | Start (name,att) :: st -> stack := Element (elem name att accu) :: st
69 | [] -> assert false
70
71
72 let buflen = 1000
73 let buf = String.create buflen
74
75 ifdef EXPAT then
76
77 let load_expat s =
78 let p = Expat.parser_create "" in
79 Expat.set_start_element_handler p
80 (fun name att ->
81 if not (only_ws txt.buffer txt.pos) then
82 stack := String (String.sub txt.buffer 0 txt.pos) :: !stack;
83 txt.pos <- 0;
84 stack := Start (name,att) :: !stack);
85 Expat.set_end_element_handler p
86 (fun _ ->
87 let accu =
88 if only_ws txt.buffer txt.pos
89 then nil
90 else string (String.sub txt.buffer 0 txt.pos) nil in
91 txt.pos <- 0;
92 create_elt accu !stack);
93 Expat.set_character_data_handler p (add_string txt);
94 let ic = open_in s in
95 let rec loop () =
96 let n = input ic buf 0 buflen in
97 if (n > 0) then
98 (*(Expat.parse p (String.sub buf 0 n); loop ())*)
99 (Expat.parse_sub p buf 0 n; loop ())
100 in
101 try
102 loop();
103 Expat.final p;
104 close_in ic;
105 match !stack with
106 | [ Element x ] -> stack := []; x
107 | _ -> assert false
108 with
109 Expat.Expat_error e ->
110 failwith ("Expat ("^s^"):"^Expat.xml_error_to_string e)
111
112 else
113
114 let load_expat s =
115 failwith "Expat support not included"
116
117
118
119 let handle_event = function
120 | E_start_tag (name,att,_) ->
121 if not (only_ws txt.buffer txt.pos) then
122 stack := String (String.sub txt.buffer 0 txt.pos) :: !stack;
123 txt.pos <- 0;
124 stack := Start (name,att) :: !stack
125 | E_char_data data ->
126 add_string txt data
127 | E_end_tag (_,_) ->
128 let accu =
129 if only_ws txt.buffer txt.pos
130 then nil
131 else string (String.sub txt.buffer 0 txt.pos) nil in
132 txt.pos <- 0;
133 create_elt accu !stack
134 | _ -> ()
135
136 let load_pxp s =
137 let config = { default_config with
138 (* warner = new warner; *)
139 encoding = `Enc_utf8;
140 store_element_positions = false;
141 drop_ignorable_whitespace = true
142 }
143 in
144 let mgr = create_entity_manager config (from_file s) in
145 process_entity config (`Entry_document[]) mgr handle_event;
146 match !stack with
147 | [ Element x ] -> stack := []; x
148 | _ -> assert false
149
150 let load_xml_aux s =
151 match !use_parser with
152 | `Expat -> load_expat s
153 | `Pxp -> load_pxp s
154
155 let load_xml s =
156 Location.protect_op "load_xml";
157 try load_xml_aux s
158 with exn ->
159 raise
160 (Location.Generic (Pxp_types.string_of_exn exn))
161
162
163 let load_html s =
164 let rec val_of_doc q = function
165 | Nethtml.Data data ->
166 if (only_ws data (String.length data)) then q else string data q
167 | Nethtml.Element (tag, att, child) ->
168 Pair (elem tag att (val_of_docs child), q)
169 and val_of_docs = function
170 | [] -> nil
171 | h::t -> val_of_doc (val_of_docs t) h
172 in
173
174 Location.protect_op "load_html";
175 let ic = open_in s in
176 let doc = Nethtml.parse_document
177 ~dtd:Nethtml.relaxed_html40_dtd
178 (Lexing.from_channel ic) in
179 let doc = Nethtml.decode ~enc:`Enc_utf8 ~subst:(fun _ -> "???") doc in
180 close_in ic;
181 val_of_docs doc
182

CVS Admin">CVS Admin
ViewVC Help
Powered by ViewVC 1.1.5