* Makefile.am (libguile_la_LDFLAGS): Bump library version.
[bpt/guile.git] / libguile / regex-posix.c
CommitLineData
f255378e
JB
1/* Copyright (C) 1997 Free Software Foundation, Inc.
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation; either version 2, or (at your option)
6 * any later version.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this software; see the file COPYING. If not, write to
15 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
16 *
17 * As a special exception, the Free Software Foundation gives permission
18 * for additional uses of the text contained in its release of GUILE.
19 *
20 * The exception is that, if you link the GUILE library with other files
21 * to produce an executable, this does not by itself cause the
22 * resulting executable to be covered by the GNU General Public License.
23 * Your use of that executable is in no way restricted on account of
24 * linking the GUILE library code into it.
25 *
26 * This exception does not however invalidate any other reasons why
27 * the executable file might be covered by the GNU General Public License.
28 *
29 * This exception applies only to the code released by the
30 * Free Software Foundation under the name GUILE. If you copy
31 * code from other Free Software Foundation releases into a copy of
32 * GUILE, as the General Public License permits, the exception does
33 * not apply to the code that you add in this way. To avoid misleading
34 * anyone as to the status of such modified files, you must delete
35 * this exception notice from them.
36 *
37 * If you write modifications of your own for GUILE, it is your choice
38 * whether to permit this exception to apply to your modifications.
39 * If you do not wish that, delete this exception notice.
40 */
41\f
42
43/* regex-posix.c -- POSIX regular expression support.
44
45 This code was written against Henry Spencer's famous regex package.
46 The principal reference for POSIX behavior was the man page for this
47 library, not the 1003.2 document itself. Ergo, other `POSIX'
48 libraries which do not agree with the Spencer implementation may
49 produce varying behavior. Sigh. */
50
51#include <stdio.h>
52#include <sys/types.h>
24e37377 53
69e0587b
JB
54#include "_scm.h"
55
24e37377
JB
56/* Supposedly, this file is never compiled unless we know we have
57 POSIX regular expressions. But we still put this in an #ifdef so
58 the file is CPP'able (for dependency scanning) even on systems that
59 don't have a <regex.h> header. */
60#ifdef HAVE_REGCOMP
f255378e 61#include <regex.h>
24e37377
JB
62#endif
63
f255378e
JB
64#include "smob.h"
65#include "symbols.h"
66#include "vectors.h"
67#include "strports.h"
68#include "ports.h"
20044282 69#include "feature.h"
f255378e
JB
70
71#include "regex-posix.h"
72
73long scm_tc16_regex_t;
74
75static size_t
76scm_free_regex_t (obj)
77 SCM obj;
78{
79 regfree (SCM_RGX (obj));
80 free (SCM_RGX (obj));
81 return 0;
82}
83
84static int
85scm_print_regex_t (obj, port, pstate)
86 SCM obj;
87 SCM port;
88 scm_print_state *pstate;
89{
90 regex_t *r;
91 r = SCM_RGX (obj);
92 scm_gen_puts (scm_regular_string, "#<rgx ", port);
93 scm_intprint (obj, 16, port);
94 scm_gen_puts (scm_regular_string, ">", port);
95 return 1;
96}
97
98
99static scm_smobfuns regex_t_smob =
100{ scm_mark0, scm_free_regex_t, scm_print_regex_t, 0 };
101\f
102
103SCM_SYMBOL (scm_regexp_error_key, "regular-expression-syntax");
104
105char *
106scm_regexp_error_msg (regerrno, rx)
107 int regerrno;
108 SCM rx;
109{
110 SCM errmsg;
111 int l;
112
113 /* FIXME: must we wrap any external calls in SCM_DEFER_INTS...SCM_ALLOW_INTS?
114 Or are these only necessary when a SCM object may be left in an
115 undetermined state (half-formed)? If the latter then I believe we
116 may do without the critical section code. -twp */
117
118 /* We could simply make errmsg a char pointer, and allocate space with
119 malloc. But since we are about to pass the pointer to scm_error, which
120 never returns, we would never have the opportunity to free it. Creating
121 it as a SCM object means that the system will GC it at some point. */
122
123 errmsg = scm_make_string (SCM_MAKINUM (80), SCM_UNDEFINED);
124 SCM_DEFER_INTS;
125 l = regerror (regerrno, SCM_RGX (rx), SCM_CHARS (errmsg), 80);
126 if (l > 80)
127 {
128 errmsg = scm_make_string (SCM_MAKINUM (l), SCM_UNDEFINED);
129 regerror (regerrno, SCM_RGX (rx), SCM_CHARS (errmsg), l);
130 }
131 SCM_ALLOW_INTS;
132 return SCM_CHARS (errmsg);
133}
134
135SCM_PROC (s_regexp_p, "regexp?", 1, 0, 0, scm_regexp_p);
136
137SCM
138scm_regexp_p (x)
139 SCM x;
140{
141 return (SCM_NIMP (x) && SCM_RGXP (x) ? SCM_BOOL_T : SCM_BOOL_F);
142}
143
bd56d016 144SCM_PROC (s_make_regexp, "make-regexp", 1, 1, 0, scm_make_regexp);
f255378e
JB
145
146SCM
bd56d016 147scm_make_regexp (SCM pat, SCM flags)
f255378e
JB
148{
149 SCM result;
150 regex_t *rx;
151 int status;
152
153 SCM_ASSERT (SCM_NIMP(pat) && SCM_ROSTRINGP(pat), pat, SCM_ARG1,
154 s_make_regexp);
155 SCM_COERCE_SUBSTR (pat);
bd56d016
JB
156 if (SCM_UNBNDP (flags))
157 flags = SCM_MAKINUM (REG_EXTENDED);
158 SCM_ASSERT (SCM_INUMP (flags), flags, SCM_ARG2, s_make_regexp);
f255378e
JB
159
160 SCM_DEFER_INTS;
161 rx = (regex_t *) scm_must_malloc (sizeof (regex_t), s_make_regexp);
bd56d016 162 status = regcomp (rx, SCM_ROCHARS (pat), SCM_INUM (flags));
f255378e
JB
163 if (status != 0)
164 {
165 SCM_ALLOW_INTS;
166 scm_error (scm_regexp_error_key,
167 s_make_regexp,
168 scm_regexp_error_msg (status, rx),
169 SCM_BOOL_F,
170 SCM_BOOL_F);
171 /* never returns */
172 }
173 SCM_NEWCELL (result);
174 SCM_SETCAR (result, scm_tc16_regex_t);
175 SCM_SETCDR (result, rx);
176 SCM_ALLOW_INTS;
177 return result;
178}
179
bd56d016 180SCM_PROC (s_regexp_exec, "regexp-exec", 2, 2, 0, scm_regexp_exec);
f255378e
JB
181
182SCM
bd56d016 183scm_regexp_exec (SCM rx, SCM str, SCM start, SCM flags)
f255378e 184{
0b787875 185 int status, nmatches, offset;
f255378e
JB
186 regmatch_t *matches;
187 SCM mvec = SCM_BOOL_F;
188
189 SCM_ASSERT (SCM_NIMP (rx) && SCM_RGXP (rx), rx, SCM_ARG1, s_regexp_exec);
190 SCM_ASSERT (SCM_NIMP (str) && SCM_ROSTRINGP (str), str, SCM_ARG2,
191 s_regexp_exec);
0b787875
JB
192
193 if (SCM_UNBNDP (start))
194 offset = 0;
195 else
196 {
197 SCM_ASSERT (SCM_INUMP (start), start, SCM_ARG3, s_regexp_exec);
198 offset = SCM_INUM (start);
199 SCM_ASSERT (offset >= 0 && offset <= SCM_LENGTH (str), start,
200 SCM_OUTOFRANGE, s_regexp_exec);
201 }
202
bd56d016
JB
203 if (SCM_UNBNDP (flags))
204 flags = SCM_INUM0;
205 SCM_ASSERT (SCM_INUMP (flags), flags, SCM_ARG2, s_regexp_exec);
206
f255378e
JB
207 SCM_COERCE_SUBSTR (str);
208
209 /* re_nsub doesn't account for the `subexpression' representing the
210 whole regexp, so add 1 to nmatches. */
211
212 nmatches = SCM_RGX(rx)->re_nsub + 1;
213 SCM_DEFER_INTS;
214 matches = (regmatch_t *) scm_must_malloc (sizeof (regmatch_t) * nmatches,
215 s_regexp_exec);
bd56d016
JB
216 status = regexec (SCM_RGX (rx), SCM_ROCHARS (str) + offset,
217 nmatches, matches,
218 SCM_INUM (flags));
f255378e
JB
219 if (!status)
220 {
221 int i;
222 /* The match vector must include a cell for the string that was matched,
223 so add 1. */
224 mvec = scm_make_vector (SCM_MAKINUM (nmatches + 1), SCM_UNSPECIFIED,
225 SCM_UNDEFINED);
226 SCM_VELTS(mvec)[0] = str;
227 for (i = 0; i < nmatches; ++i)
0b787875
JB
228 SCM_VELTS(mvec)[i+1] = scm_cons(SCM_MAKINUM(matches[i].rm_so + offset),
229 SCM_MAKINUM(matches[i].rm_eo + offset));
f255378e
JB
230 }
231 SCM_ALLOW_INTS;
232
233 if (status != 0 && status != REG_NOMATCH)
234 scm_error (scm_regexp_error_key,
235 s_regexp_exec,
236 scm_regexp_error_msg (status),
237 SCM_BOOL_F,
238 SCM_BOOL_F);
239 return mvec;
240}
241
242void
243scm_init_regex_posix ()
244{
245 scm_tc16_regex_t = scm_newsmob (&regex_t_smob);
bd56d016
JB
246
247 /* Compilation flags. */
fcfb248d
JB
248 scm_sysintern ("regexp/extended", scm_long2num (REG_EXTENDED));
249 scm_sysintern ("regexp/icase", scm_long2num (REG_ICASE));
250 scm_sysintern ("regexp/nosub", scm_long2num (REG_NOSUB));
251 scm_sysintern ("regexp/newline", scm_long2num (REG_NEWLINE));
bd56d016
JB
252
253 /* Execution flags. */
fcfb248d
JB
254 scm_sysintern ("regexp/notbol", scm_long2num (REG_NOTBOL));
255 scm_sysintern ("regexp/noteol", scm_long2num (REG_NOTEOL));
bd56d016 256
f255378e 257#include "regex-posix.x"
20044282
JB
258
259 scm_add_feature ("regex");
f255378e 260}