| 19 |
exception Unterminated_string_in_comment |
exception Unterminated_string_in_comment |
| 20 |
|
|
| 21 |
|
|
| 22 |
(* Buffer for string literals : always encoded in Utf8 *) |
(* Buffer for string literals (always encoded in UTF8). |
| 23 |
|
Each character is encoded in two consecutives code point; |
| 24 |
|
the first one gives the number of bytes in the input document; |
| 25 |
|
the second one gives the Unicode representation *) |
| 26 |
|
|
| 27 |
let string_buff = Buffer.create 1024 |
let string_buff = Buffer.create 1024 |
| 28 |
|
|
| 29 |
|
let store_len ?(add=0) lexbuf = |
| 30 |
|
let l = add + (Lexing.lexeme_end lexbuf) - (Lexing.lexeme_start lexbuf) in |
| 31 |
|
Encodings.Utf8.store string_buff l |
| 32 |
|
|
| 33 |
let store_ascii = Buffer.add_char string_buff |
let store_ascii = Buffer.add_char string_buff |
| 34 |
let store_char = Buffer.add_string string_buff |
let store_char = Buffer.add_string string_buff |
| 35 |
let store_code = Encodings.Utf8.store string_buff |
let store_code = Encodings.Utf8.store string_buff |
| 67 |
let identchar = lowercase | uppercase | ascii_digit | '_' | '\'' | '-' |
let identchar = lowercase | uppercase | ascii_digit | '_' | '\'' | '-' |
| 68 |
let ident = identchar* ( ':' identchar+)* |
let ident = identchar* ( ':' identchar+)* |
| 69 |
|
|
|
let numeric_char = '\\' ascii_digit+ ';' |
|
|
|
|
| 70 |
rule token = parse |
rule token = parse |
| 71 |
blank+ { token engine lexbuf } |
blank+ { token engine lexbuf } |
| 72 |
| (lowercase | '_') ident { |
| (lowercase | '_') ident { |
| 118 |
} |
} |
| 119 |
| '"' | "'" |
| '"' | "'" |
| 120 |
{ string_start_pos := Lexing.lexeme_start lexbuf; |
{ string_start_pos := Lexing.lexeme_start lexbuf; |
| 121 |
|
Buffer.clear string_buff; |
| 122 |
let ender = Lexing.lexeme lexbuf in |
let ender = Lexing.lexeme lexbuf in |
| 123 |
(try string ender engine lexbuf |
(try string ender engine lexbuf |
| 124 |
with Location.Location (_,Unterminated_string) -> |
with Location.Location (_,Unterminated_string) -> |
| 135 |
|
|
| 136 |
and string ender = parse |
and string ender = parse |
| 137 |
| '"' | "'" |
| '"' | "'" |
| 138 |
{ |
{ let c = Lexing.lexeme lexbuf in |
|
let c = Lexing.lexeme lexbuf in |
|
| 139 |
if c = ender then () |
if c = ender then () |
| 140 |
else (store_char (Lexing.lexeme lexbuf); string ender engine lexbuf) |
else (store_len lexbuf; |
| 141 |
} |
store_char (Lexing.lexeme lexbuf); |
| 142 |
|
string ender engine lexbuf) } |
| 143 |
| '\\' ['\\' '"' '\''] |
| '\\' ['\\' '"' '\''] |
| 144 |
{ store_ascii (Lexing.lexeme_char lexbuf 1); |
{ store_len lexbuf; |
| 145 |
|
store_ascii (Lexing.lexeme_char lexbuf 1); |
| 146 |
string ender engine lexbuf } |
string ender engine lexbuf } |
| 147 |
| '\\' lowercase { |
| '\\' lowercase |
| 148 |
let c = Lexing.lexeme_char lexbuf 1 in |
{ let c = Lexing.lexeme_char lexbuf 1 in |
| 149 |
if c = 'x' then parse_hexa_char engine lexbuf else store_special c; |
if c = 'x' |
| 150 |
|
then parse_hexa_char engine lexbuf |
| 151 |
|
else (store_len lexbuf; store_special c); |
| 152 |
string ender engine lexbuf } |
string ender engine lexbuf } |
| 153 |
| numeric_char |
| '\\' ascii_digit+ ';' |
| 154 |
{ store_code (numeric_char (Lexing.lexeme lexbuf)); |
{ store_len lexbuf; |
| 155 |
|
store_code (numeric_char (Lexing.lexeme lexbuf)); |
| 156 |
string ender engine lexbuf } |
string ender engine lexbuf } |
| 157 |
|
| '\\' |
| 158 |
|
{ error |
| 159 |
|
(Lexing.lexeme_start lexbuf) (Lexing.lexeme_end lexbuf) |
| 160 |
|
(Illegal_character '\\') } |
| 161 |
| eof |
| eof |
| 162 |
{ error !string_start_pos (!string_start_pos+1) Unterminated_string } |
{ error !string_start_pos (!string_start_pos+1) Unterminated_string } |
| 163 |
| _ |
| _ |
| 164 |
{ store_code (Char.code (Lexing.lexeme_char lexbuf 0)); (* Adapt when source is UTF8 *) |
{ store_len lexbuf; |
| 165 |
|
store_code (Char.code (Lexing.lexeme_char lexbuf 0)); |
| 166 |
|
(* Adapt when source is UTF8 *) |
| 167 |
string ender engine lexbuf } |
string ender engine lexbuf } |
| 168 |
|
|
| 169 |
and parse_hexa_char = parse |
and parse_hexa_char = parse |
| 170 |
| ascii_digit+ ';' |
| ascii_digit+ ';' |
| 171 |
{ store_code (hexa_char (Lexing.lexeme lexbuf)) } |
{ store_len ~add:2 lexbuf; |
| 172 |
|
store_code (hexa_char (Lexing.lexeme lexbuf)) } |
| 173 |
| _ |
| _ |
| 174 |
{ store_char "\\x"; |
{ error |
| 175 |
store_char (Lexing.lexeme lexbuf); } |
(Lexing.lexeme_start lexbuf) (Lexing.lexeme_end lexbuf) |
| 176 |
|
(Illegal_character '\\') } |
| 177 |
|
|
| 178 |
|
|
| 179 |
{ |
{ |
| 180 |
|
|