| 1 |
(* Loading XML documents *)
|
| 2 |
|
| 3 |
(*TODO: close the file ! *)
|
| 4 |
|
| 5 |
open Pxp_yacc
|
| 6 |
open Pxp_lexer_types
|
| 7 |
open Pxp_types
|
| 8 |
open Value
|
| 9 |
|
| 10 |
let string s q =
|
| 11 |
let rec check_ws i = (i < 0) ||
|
| 12 |
(match s.[i] with
|
| 13 |
| ' ' | '\t' | '\n' | '\r' -> check_ws (i - 1)
|
| 14 |
| _ -> false) in
|
| 15 |
if check_ws (String.length s - 1) then q
|
| 16 |
else String (0,String.length s,s,q)
|
| 17 |
|
| 18 |
let run s =
|
| 19 |
let config = { default_config with
|
| 20 |
store_element_positions = false;
|
| 21 |
drop_ignorable_whitespace = true
|
| 22 |
}
|
| 23 |
in
|
| 24 |
let mgr = create_entity_manager config (from_file s) in
|
| 25 |
let next_event =
|
| 26 |
create_pull_parser config (`Entry_document[]) mgr in
|
| 27 |
let curr = ref E_end_of_stream in
|
| 28 |
let get () =
|
| 29 |
match next_event () with
|
| 30 |
| Some x -> curr := x
|
| 31 |
| None -> () in
|
| 32 |
|
| 33 |
let rec parse_elt name att =
|
| 34 |
let att = List.map (fun (l,v) -> Types.label l, string v nil) att in
|
| 35 |
let att = SortedMap.from_list (fun _ _ -> assert false) att in
|
| 36 |
let child = parse_seq () in
|
| 37 |
|
| 38 |
let elt = Pair
|
| 39 |
(Atom (Types.mk_atom name),
|
| 40 |
Pair (Record att, child)
|
| 41 |
) in
|
| 42 |
(match !curr with
|
| 43 |
| E_end_tag (_,_) -> get ()
|
| 44 |
| _ -> failwith "Expect end_tag");
|
| 45 |
elt
|
| 46 |
|
| 47 |
|
| 48 |
and parse_seq () =
|
| 49 |
match !curr with
|
| 50 |
| E_start_tag (name,att,_) ->
|
| 51 |
get ();
|
| 52 |
let e1 = parse_elt name att in
|
| 53 |
let rest = parse_seq () in
|
| 54 |
Pair (e1, rest)
|
| 55 |
| E_char_data data ->
|
| 56 |
get ();
|
| 57 |
let rest = parse_seq () in
|
| 58 |
string data rest
|
| 59 |
| E_end_tag (_,_) ->
|
| 60 |
nil
|
| 61 |
| _ -> failwith "Expect start_tag, char_data, or end_tag"
|
| 62 |
|
| 63 |
and parse_doc () =
|
| 64 |
match !curr with
|
| 65 |
| E_start_tag (name,att,_) -> get (); parse_elt name att
|
| 66 |
| _ -> get (); parse_doc ()
|
| 67 |
in
|
| 68 |
get ();
|
| 69 |
parse_doc ()
|
| 70 |
|