| 1 |
(* Loading XML documents *) |
(* Loading XML documents *) |
| 2 |
|
|
| 3 |
(*TODO: close the file ! *) |
let use_parser = ref `Pxp |
| 4 |
|
|
| 5 |
open Pxp_yacc |
open Pxp_yacc |
| 6 |
open Pxp_lexer_types |
open Pxp_lexer_types |
| 58 |
| Start of string * (string * string) list |
| Start of string * (string * string) list |
| 59 |
| String of string |
| String of string |
| 60 |
|
|
| 61 |
let load_xml_aux s = |
let stack = ref [] |
| 62 |
let config = { default_config with |
let txt = create 1024 |
|
(* warner = new warner; *) |
|
|
encoding = `Enc_utf8; |
|
|
store_element_positions = false; |
|
|
drop_ignorable_whitespace = true |
|
|
} |
|
|
in |
|
|
let mgr = create_entity_manager config (from_file s) in |
|
|
let next_event = |
|
|
create_pull_parser config (`Entry_document[]) mgr in |
|
|
let get () = |
|
|
match next_event () with |
|
|
| Some (E_error exn) -> failwith (Pxp_types.string_of_exn exn) |
|
|
| Some E_end_of_stream | None -> failwith "Unexpected end of XML stream" |
|
|
| Some x -> x in |
|
| 63 |
|
|
|
let txt = create 1024 in |
|
| 64 |
|
|
| 65 |
let rec create_elt accu = function |
let rec create_elt accu = function |
| 66 |
| String s :: st -> create_elt (string s accu) st |
| String s :: st -> create_elt (string s accu) st |
| 67 |
| Element x :: st -> create_elt (Pair (x,accu)) st |
| Element x :: st -> create_elt (Pair (x,accu)) st |
| 68 |
| [ Start (name,att) ] -> elem name att accu |
| Start (name,att) :: st -> stack := Element (elem name att accu) :: st |
|
| Start (name,att) :: st -> parse_seq (Element (elem name att accu) :: st) |
|
| 69 |
| [] -> assert false |
| [] -> assert false |
| 70 |
and parse_seq stack = |
|
| 71 |
match get() with |
|
| 72 |
|
let buflen = 1000 |
| 73 |
|
let buf = String.create buflen |
| 74 |
|
|
| 75 |
|
let load_expat s = |
| 76 |
|
let p = Expat.parser_create "" in |
| 77 |
|
Expat.set_start_element_handler p |
| 78 |
|
(fun name att -> |
| 79 |
|
if not (only_ws txt.buffer txt.pos) then |
| 80 |
|
stack := String (String.sub txt.buffer 0 txt.pos) :: !stack; |
| 81 |
|
txt.pos <- 0; |
| 82 |
|
stack := Start (name,att) :: !stack); |
| 83 |
|
Expat.set_end_element_handler p |
| 84 |
|
(fun _ -> |
| 85 |
|
let accu = |
| 86 |
|
if only_ws txt.buffer txt.pos |
| 87 |
|
then nil |
| 88 |
|
else string (String.sub txt.buffer 0 txt.pos) nil in |
| 89 |
|
txt.pos <- 0; |
| 90 |
|
create_elt accu !stack); |
| 91 |
|
Expat.set_character_data_handler p (add_string txt); |
| 92 |
|
let ic = open_in s in |
| 93 |
|
let rec loop () = |
| 94 |
|
let n = input ic buf 0 buflen in |
| 95 |
|
if (n > 0) then |
| 96 |
|
(*(Expat.parse p (String.sub buf 0 n); loop ())*) |
| 97 |
|
(Expat.parse_sub p buf 0 n; loop ()) |
| 98 |
|
in |
| 99 |
|
try |
| 100 |
|
loop(); |
| 101 |
|
Expat.final p; |
| 102 |
|
close_in ic; |
| 103 |
|
match !stack with |
| 104 |
|
| [ Element x ] -> stack := []; x |
| 105 |
|
| _ -> assert false |
| 106 |
|
with |
| 107 |
|
Expat.Expat_error e -> |
| 108 |
|
failwith ("Expat ("^s^"):"^Expat.xml_error_to_string e) |
| 109 |
|
|
| 110 |
|
|
| 111 |
|
let handle_event = function |
| 112 |
| E_start_tag (name,att,_) -> |
| E_start_tag (name,att,_) -> |
| 113 |
let stack = |
if not (only_ws txt.buffer txt.pos) then |
| 114 |
if only_ws txt.buffer txt.pos then stack |
stack := String (String.sub txt.buffer 0 txt.pos) :: !stack; |
|
else String (String.sub txt.buffer 0 txt.pos) :: stack in |
|
| 115 |
txt.pos <- 0; |
txt.pos <- 0; |
| 116 |
parse_seq (Start (name,att) :: stack) |
stack := Start (name,att) :: !stack |
| 117 |
| E_char_data data -> |
| E_char_data data -> |
| 118 |
add_string txt data; |
add_string txt data |
|
parse_seq stack |
|
| 119 |
| E_end_tag (_,_) -> |
| E_end_tag (_,_) -> |
| 120 |
let accu = |
let accu = |
| 121 |
if only_ws txt.buffer txt.pos |
if only_ws txt.buffer txt.pos |
| 122 |
then nil |
then nil |
| 123 |
else string (String.sub txt.buffer 0 txt.pos) nil in |
else string (String.sub txt.buffer 0 txt.pos) nil in |
| 124 |
txt.pos <- 0; |
txt.pos <- 0; |
| 125 |
create_elt accu stack |
create_elt accu !stack |
| 126 |
| _ -> failwith "Expect start_tag, char_data, or end_tag" |
| _ -> () |
| 127 |
|
|
| 128 |
|
let load_pxp s = |
| 129 |
|
let config = { default_config with |
| 130 |
|
(* warner = new warner; *) |
| 131 |
|
encoding = `Enc_utf8; |
| 132 |
|
store_element_positions = false; |
| 133 |
|
drop_ignorable_whitespace = true |
| 134 |
|
} |
| 135 |
in |
in |
| 136 |
let rec parse_doc () = |
let mgr = create_entity_manager config (from_file s) in |
| 137 |
match get () with |
process_entity config (`Entry_document[]) mgr handle_event; |
| 138 |
| E_start_tag (name,att,_) -> parse_seq [ Start (name,att) ] |
match !stack with |
| 139 |
| _ -> parse_doc () in |
| [ Element x ] -> stack := []; x |
| 140 |
parse_doc () |
| _ -> assert false |
| 141 |
|
|
| 142 |
|
let load_xml_aux s = |
| 143 |
|
match !use_parser with |
| 144 |
|
| `Expat -> load_expat s |
| 145 |
|
| `Pxp -> load_pxp s |
| 146 |
|
|
| 147 |
let load_xml s = |
let load_xml s = |
| 148 |
Location.protect_op "load_xml"; |
Location.protect_op "load_xml"; |