Commit | Line | Data |
---|---|---|
b1b2de81 | 1 | { |
feec80c3 C |
2 | (***************************************************************************** |
3 | * Sexplib * | |
4 | * * | |
5 | * Copyright (C) 2005- Jane Street Holding, LLC * | |
6 | * Contact: opensource@janestreet.com * | |
7 | * WWW: http://www.janestreet.com/ocaml * | |
8 | * Author: Markus Mottl * | |
9 | * * | |
10 | * This library is free software; you can redistribute it and/or * | |
11 | * modify it under the terms of the GNU Lesser General Public * | |
12 | * License as published by the Free Software Foundation; either * | |
13 | * version 2 of the License, or (at your option) any later version. * | |
14 | * * | |
15 | * This library is distributed in the hope that it will be useful, * | |
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of * | |
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * | |
18 | * Lesser General Public License for more details. * | |
19 | * * | |
20 | * You should have received a copy of the GNU Lesser General Public * | |
21 | * License along with this library; if not, write to the Free Software * | |
22 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * | |
23 | * * | |
24 | *****************************************************************************) | |
b1b2de81 C |
25 | |
26 | (** Lexer: Lexer Specification for S-expressions *) | |
27 | ||
28 | open Printf | |
29 | open Lexing | |
30 | open Parser | |
31 | ||
32 | let char_for_backslash = function | |
33 | | 'n' -> '\n' | |
34 | | 't' -> '\t' | |
35 | | 'b' -> '\b' | |
36 | | 'r' -> '\r' | |
37 | | c -> c | |
38 | ||
39 | let double_nl = "\013\010" | |
40 | ||
41 | let dec_code c1 c2 c3 = | |
42 | 100 * (Char.code c1 - 48) + 10 * (Char.code c2 - 48) + (Char.code c3 - 48) | |
43 | ||
44 | let hex_code c1 c2 = | |
45 | let d1 = Char.code c1 in | |
46 | let val1 = | |
47 | if d1 >= 97 then d1 - 87 | |
48 | else if d1 >= 65 then d1 - 55 | |
49 | else d1 - 48 in | |
50 | let d2 = Char.code c2 in | |
51 | let val2 = | |
52 | if d2 >= 97 then d2 - 87 | |
53 | else if d2 >= 65 then d2 - 55 | |
54 | else d2 - 48 in | |
55 | val1 * 16 + val2 | |
56 | ||
57 | let found_newline lexbuf diff = | |
58 | let curr_p = lexbuf.lex_curr_p in | |
59 | lexbuf.lex_curr_p <- | |
60 | { | |
61 | curr_p with | |
62 | pos_lnum = curr_p.pos_lnum + 1; | |
63 | pos_bol = max 1 (curr_p.pos_cnum - diff); | |
64 | } | |
65 | ||
66 | let get_lexeme_len lexbuf = lexbuf.lex_curr_pos - lexbuf.lex_start_pos | |
67 | } | |
68 | ||
69 | let newline = ('\010' | '\013' | "\013\010") | |
70 | let space = [' ' '\009' '\012'] | |
71 | let whitespace = [' ' '\010' '\013' '\009' '\012'] | |
72 | let backslash_escapes = ['\\' '"' '\'' 'n' 't' 'b' 'r'] | |
73 | ||
74 | rule main buf = parse | |
75 | | newline { found_newline lexbuf 1; main buf lexbuf } | |
feec80c3 | 76 | | space+ | ';' [^ '\n' '\r']* { main buf lexbuf } |
b1b2de81 C |
77 | | '(' { LPAREN } |
78 | | ')' { RPAREN } | |
79 | | '"' | |
80 | { | |
81 | scan_string buf lexbuf; | |
82 | let str = Buffer.contents buf in | |
83 | Buffer.clear buf; | |
84 | STRING str | |
85 | } | |
86 | | ([^ ';' '(' ')' '"'] # whitespace)+ as str { STRING str } | |
87 | | eof { EOF } | |
88 | ||
89 | and scan_string buf = parse | |
90 | | '"' { () } | |
91 | | '\\' ['\010' '\013'] [' ' '\009']* | |
92 | { | |
93 | let len = get_lexeme_len lexbuf in | |
94 | found_newline lexbuf (len - 2); | |
95 | scan_string buf lexbuf | |
96 | } | |
97 | | '\\' "\013\010" [' ' '\009']* | |
98 | { | |
99 | let len = get_lexeme_len lexbuf in | |
100 | found_newline lexbuf (len - 3); | |
101 | scan_string buf lexbuf | |
102 | } | |
103 | | '\\' (backslash_escapes as c) | |
104 | { | |
105 | Buffer.add_char buf (char_for_backslash c); | |
106 | scan_string buf lexbuf | |
107 | } | |
108 | | '\\' (['0'-'9'] as c1) (['0'-'9'] as c2) (['0'-'9'] as c3) | |
109 | { | |
110 | let v = dec_code c1 c2 c3 in | |
111 | if v > 255 then ( | |
112 | let pos = lexbuf.lex_curr_p in | |
113 | let msg = | |
114 | sprintf | |
115 | "Sexplib.Lexer.scan_string: \ | |
116 | illegal escape at line %d char %d: `\\%c%c%c'" | |
117 | pos.pos_lnum (pos.pos_cnum - pos.pos_bol - 3) | |
118 | c1 c2 c3 in | |
119 | failwith msg); | |
120 | Buffer.add_char buf (Char.chr v); | |
121 | scan_string buf lexbuf | |
122 | } | |
123 | | '\\' 'x' (['0'-'9' 'a'-'f' 'A'-'F'] as c1) (['0'-'9' 'a'-'f' 'A'-'F'] as c2) | |
124 | { | |
125 | let v = hex_code c1 c2 in | |
126 | if v > 255 then ( | |
127 | let pos = lexbuf.lex_curr_p in | |
128 | let msg = | |
129 | sprintf | |
130 | "Sexplib.Lexer.scan_string: \ | |
131 | illegal escape at line %d char %d: `\\x%c%c'" | |
132 | pos.pos_lnum (pos.pos_cnum - pos.pos_bol - 3) | |
133 | c1 c2 in | |
134 | failwith msg); | |
135 | Buffer.add_char buf (Char.chr v); | |
136 | scan_string buf lexbuf | |
137 | } | |
138 | | '\\' (_ as c) | |
139 | { | |
140 | Buffer.add_char buf '\\'; | |
141 | Buffer.add_char buf c; | |
142 | scan_string buf lexbuf | |
143 | } | |
144 | | ['\010' '\013'] as c | |
145 | { | |
146 | found_newline lexbuf 1; | |
147 | Buffer.add_char buf c; | |
148 | scan_string buf lexbuf | |
149 | } | |
150 | | "\013\010" | |
151 | { | |
152 | found_newline lexbuf 2; | |
153 | Buffer.add_string buf double_nl; | |
154 | scan_string buf lexbuf | |
155 | } | |
156 | | [^ '\\' '"']+ | |
157 | { | |
158 | let ofs = lexbuf.lex_start_pos in | |
159 | let len = lexbuf.lex_curr_pos - ofs in | |
160 | Buffer.add_substring buf lexbuf.lex_buffer ofs len; | |
161 | scan_string buf lexbuf | |
162 | } | |
163 | | eof { failwith "Sexplib.Lexer.scan_string: unterminated string" } | |
164 | ||
165 | { | |
166 | let main ?buf = | |
167 | let buf = | |
168 | match buf with | |
169 | | None -> Buffer.create 64 | |
170 | | Some buf -> Buffer.clear buf; buf | |
171 | in | |
172 | main buf | |
173 | } |