Coccinelle release 0.2.5-rc3
[bpt/coccinelle.git] / ocamlsexp / lexer.mll
1 {
2 (* File: lexer.mll
3
4 Copyright (C) 2005-
5
6 Jane Street Holding, LLC
7 Author: Markus Mottl
8 email: mmottl@janestcapital.com
9 WWW: http://www.janestcapital.com/ocaml
10
11 This library is free software; you can redistribute it and/or
12 modify it under the terms of the GNU Lesser General Public
13 License as published by the Free Software Foundation; either
14 version 2 of the License, or (at your option) any later version.
15
16 This library is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 Lesser General Public License for more details.
20
21 You should have received a copy of the GNU Lesser General Public
22 License along with this library; if not, write to the Free Software
23 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 *)
25
26 (** Lexer: Lexer Specification for S-expressions *)
27
28 open Printf
29 open Lexing
30 open Parser
31
32 let char_for_backslash = function
33 | 'n' -> '\n'
34 | 't' -> '\t'
35 | 'b' -> '\b'
36 | 'r' -> '\r'
37 | c -> c
38
39 let double_nl = "\013\010"
40
41 let dec_code c1 c2 c3 =
42 100 * (Char.code c1 - 48) + 10 * (Char.code c2 - 48) + (Char.code c3 - 48)
43
44 let hex_code c1 c2 =
45 let d1 = Char.code c1 in
46 let val1 =
47 if d1 >= 97 then d1 - 87
48 else if d1 >= 65 then d1 - 55
49 else d1 - 48 in
50 let d2 = Char.code c2 in
51 let val2 =
52 if d2 >= 97 then d2 - 87
53 else if d2 >= 65 then d2 - 55
54 else d2 - 48 in
55 val1 * 16 + val2
56
57 let found_newline lexbuf diff =
58 let curr_p = lexbuf.lex_curr_p in
59 lexbuf.lex_curr_p <-
60 {
61 curr_p with
62 pos_lnum = curr_p.pos_lnum + 1;
63 pos_bol = max 1 (curr_p.pos_cnum - diff);
64 }
65
66 let get_lexeme_len lexbuf = lexbuf.lex_curr_pos - lexbuf.lex_start_pos
67 }
68
69 let newline = ('\010' | '\013' | "\013\010")
70 let space = [' ' '\009' '\012']
71 let whitespace = [' ' '\010' '\013' '\009' '\012']
72 let backslash_escapes = ['\\' '"' '\'' 'n' 't' 'b' 'r']
73
74 rule main buf = parse
75 | newline { found_newline lexbuf 1; main buf lexbuf }
76 | space+ { main buf lexbuf }
77 | ';' [^ '\n' '\r']+ { main buf lexbuf }
78 | '(' { LPAREN }
79 | ')' { RPAREN }
80 | '"'
81 {
82 scan_string buf lexbuf;
83 let str = Buffer.contents buf in
84 Buffer.clear buf;
85 STRING str
86 }
87 | ([^ ';' '(' ')' '"'] # whitespace)+ as str { STRING str }
88 | eof { EOF }
89
90 and scan_string buf = parse
91 | '"' { () }
92 | '\\' ['\010' '\013'] [' ' '\009']*
93 {
94 let len = get_lexeme_len lexbuf in
95 found_newline lexbuf (len - 2);
96 scan_string buf lexbuf
97 }
98 | '\\' "\013\010" [' ' '\009']*
99 {
100 let len = get_lexeme_len lexbuf in
101 found_newline lexbuf (len - 3);
102 scan_string buf lexbuf
103 }
104 | '\\' (backslash_escapes as c)
105 {
106 Buffer.add_char buf (char_for_backslash c);
107 scan_string buf lexbuf
108 }
109 | '\\' (['0'-'9'] as c1) (['0'-'9'] as c2) (['0'-'9'] as c3)
110 {
111 let v = dec_code c1 c2 c3 in
112 if v > 255 then (
113 let pos = lexbuf.lex_curr_p in
114 let msg =
115 sprintf
116 "Sexplib.Lexer.scan_string: \
117 illegal escape at line %d char %d: `\\%c%c%c'"
118 pos.pos_lnum (pos.pos_cnum - pos.pos_bol - 3)
119 c1 c2 c3 in
120 failwith msg);
121 Buffer.add_char buf (Char.chr v);
122 scan_string buf lexbuf
123 }
124 | '\\' 'x' (['0'-'9' 'a'-'f' 'A'-'F'] as c1) (['0'-'9' 'a'-'f' 'A'-'F'] as c2)
125 {
126 let v = hex_code c1 c2 in
127 if v > 255 then (
128 let pos = lexbuf.lex_curr_p in
129 let msg =
130 sprintf
131 "Sexplib.Lexer.scan_string: \
132 illegal escape at line %d char %d: `\\x%c%c'"
133 pos.pos_lnum (pos.pos_cnum - pos.pos_bol - 3)
134 c1 c2 in
135 failwith msg);
136 Buffer.add_char buf (Char.chr v);
137 scan_string buf lexbuf
138 }
139 | '\\' (_ as c)
140 {
141 Buffer.add_char buf '\\';
142 Buffer.add_char buf c;
143 scan_string buf lexbuf
144 }
145 | ['\010' '\013'] as c
146 {
147 found_newline lexbuf 1;
148 Buffer.add_char buf c;
149 scan_string buf lexbuf
150 }
151 | "\013\010"
152 {
153 found_newline lexbuf 2;
154 Buffer.add_string buf double_nl;
155 scan_string buf lexbuf
156 }
157 | [^ '\\' '"']+
158 {
159 let ofs = lexbuf.lex_start_pos in
160 let len = lexbuf.lex_curr_pos - ofs in
161 Buffer.add_substring buf lexbuf.lex_buffer ofs len;
162 scan_string buf lexbuf
163 }
164 | eof { failwith "Sexplib.Lexer.scan_string: unterminated string" }
165
166 {
167 let main ?buf =
168 let buf =
169 match buf with
170 | None -> Buffer.create 64
171 | Some buf -> Buffer.clear buf; buf
172 in
173 main buf
174 }