X-Git-Url: http://git.hcoop.net/bpt/guile.git/blobdiff_plain/3fd207d759c2486d2f8515ec0fc98075b95c5205..e723f8de40a7b727c7c970cdaaf1e580b8720d66:/libguile/regex-posix.c diff --git a/libguile/regex-posix.c b/libguile/regex-posix.c index 75ed36dbe..40f7f3371 100644 --- a/libguile/regex-posix.c +++ b/libguile/regex-posix.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1997 Free Software Foundation, Inc. +/* Copyright (C) 1997, 1998, 1999 Free Software Foundation, Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -12,7 +12,8 @@ * * You should have received a copy of the GNU General Public License * along with this software; see the file COPYING. If not, write to - * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * the Free Software Foundation, Inc., 59 Temple Place, Suite 330, + * Boston, MA 02111-1307 USA * * As a special exception, the Free Software Foundation gives permission * for additional uses of the text contained in its release of GUILE. @@ -38,6 +39,10 @@ * whether to permit this exception to apply to your modifications. * If you do not wish that, delete this exception notice. */ + +/* Software engineering face-lift by Greg J. Badros, 11-Dec-1999, + gjb@cs.washington.edu, http://www.cs.washington.edu/homes/gjb */ + /* regex-posix.c -- POSIX regular expression support. @@ -78,6 +83,7 @@ #include "ports.h" #include "feature.h" +#include "scm_validate.h" #include "regex-posix.h" /* This is defined by some regex libraries and omitted by others. */ @@ -85,42 +91,22 @@ #define REG_BASIC 0 #endif -long scm_tc16_regex_t; +long scm_tc16_regex; static scm_sizet -scm_free_regex_t (obj) - SCM obj; +free_regex (SCM obj) { regfree (SCM_RGX (obj)); free (SCM_RGX (obj)); return sizeof(regex_t); } -static int -scm_print_regex_t (obj, port, pstate) - SCM obj; - SCM port; - scm_print_state *pstate; -{ - regex_t *r; - r = SCM_RGX (obj); - scm_puts ("#", port); - return 1; -} - - -static scm_smobfuns regex_t_smob = -{ 0, scm_free_regex_t, scm_print_regex_t, 0 }; SCM_SYMBOL (scm_regexp_error_key, "regular-expression-syntax"); static char * -scm_regexp_error_msg (regerrno, rx) - int regerrno; - SCM rx; +scm_regexp_error_msg (int regerrno, regex_t *rx) { SCM errmsg; int l; @@ -137,36 +123,73 @@ scm_regexp_error_msg (regerrno, rx) errmsg = scm_make_string (SCM_MAKINUM (80), SCM_UNDEFINED); SCM_DEFER_INTS; - l = regerror (regerrno, SCM_RGX (rx), SCM_CHARS (errmsg), 80); + l = regerror (regerrno, rx, SCM_CHARS (errmsg), 80); if (l > 80) { errmsg = scm_make_string (SCM_MAKINUM (l), SCM_UNDEFINED); - regerror (regerrno, SCM_RGX (rx), SCM_CHARS (errmsg), l); + regerror (regerrno, rx, SCM_CHARS (errmsg), l); } SCM_ALLOW_INTS; return SCM_CHARS (errmsg); } -SCM_PROC (s_regexp_p, "regexp?", 1, 0, 0, scm_regexp_p); - -SCM -scm_regexp_p (x) - SCM x; +SCM_DEFINE (scm_regexp_p, "regexp?", 1, 0, 0, + (SCM x), +"Return @code{#t} if @var{obj} is a compiled regular expression, or +@code{#f} otherwise.") +#define FUNC_NAME s_scm_regexp_p { - return (SCM_NIMP (x) && SCM_RGXP (x) ? SCM_BOOL_T : SCM_BOOL_F); + return SCM_BOOL(SCM_RGXP (x)); } - -SCM_PROC (s_make_regexp, "make-regexp", 1, 0, 1, scm_make_regexp); - -SCM -scm_make_regexp (SCM pat, SCM flags) +#undef FUNC_NAME + +SCM_DEFINE (scm_make_regexp, "make-regexp", 1, 0, 1, + (SCM pat, SCM flags), +"Compile the regular expression described by @var{str}, and return the +compiled regexp structure. If @var{str} does not describe a legal +regular expression, @code{make-regexp} throws a +@code{regular-expression-syntax} error. + +The @var{flag} arguments change the behavior of the compiled regexp. +The following flags may be supplied: + +@table @code +@item regexp/icase +Consider uppercase and lowercase letters to be the same when matching. + +@item regexp/newline +If a newline appears in the target string, then permit the @samp{^} and +@samp{$} operators to match immediately after or immediately before the +newline, respectively. Also, the @samp{.} and @samp{[^...]} operators +will never match a newline character. The intent of this flag is to +treat the target string as a buffer containing many lines of text, and +the regular expression as a pattern that may match a single one of those +lines. + +@item regexp/basic +Compile a basic (``obsolete'') regexp instead of the extended +(``modern'') regexps that are the default. Basic regexps do not +consider @samp{|}, @samp{+} or @samp{?} to be special characters, and +require the @samp{@{...@}} and @samp{(...)} metacharacters to be +backslash-escaped (@pxref{Backslash Escapes}). There are several other +differences between basic and extended regular expressions, but these +are the most significant. + +@item regexp/extended +Compile an extended regular expression rather than a basic regexp. This +is the default behavior; this flag will not usually be needed. If a +call to @code{make-regexp} includes both @code{regexp/basic} and +@code{regexp/extended} flags, the one which comes last will override +the earlier one. +@end table +") +#define FUNC_NAME s_scm_make_regexp { - SCM result, flag; + SCM flag; regex_t *rx; int status, cflags; - SCM_ASSERT (SCM_NIMP(pat) && SCM_ROSTRINGP(pat), pat, SCM_ARG1, - s_make_regexp); + SCM_VALIDATE_ROSTRING (1,pat); SCM_COERCE_SUBSTR (pat); /* Examine list of regexp flags. If REG_BASIC is supplied, then @@ -182,56 +205,43 @@ scm_make_regexp (SCM pat, SCM flags) flag = SCM_CDR (flag); } - SCM_DEFER_INTS; - rx = (regex_t *) scm_must_malloc (sizeof (regex_t), s_make_regexp); + rx = SCM_MUST_MALLOC_TYPE(regex_t); status = regcomp (rx, SCM_ROCHARS (pat), /* Make sure they're not passing REG_NOSUB; regexp-exec assumes we're getting match data. */ cflags & ~REG_NOSUB); if (status != 0) { - SCM_ALLOW_INTS; scm_error (scm_regexp_error_key, - s_make_regexp, + FUNC_NAME, scm_regexp_error_msg (status, rx), SCM_BOOL_F, SCM_BOOL_F); /* never returns */ } - SCM_NEWCELL (result); - SCM_SETCAR (result, scm_tc16_regex_t); - SCM_SETCDR (result, rx); - SCM_ALLOW_INTS; - return result; + SCM_RETURN_NEWSMOB (scm_tc16_regex, rx); } - -SCM_PROC (s_regexp_exec, "regexp-exec", 2, 2, 0, scm_regexp_exec); - -SCM -scm_regexp_exec (SCM rx, SCM str, SCM start, SCM flags) +#undef FUNC_NAME + +SCM_DEFINE (scm_regexp_exec, "regexp-exec", 2, 2, 0, + (SCM rx, SCM str, SCM start, SCM flags), +"Match the compiled regular expression @var{regexp} against @code{str}. +If the optional integer @var{start} argument is provided, begin matching +from that position in the string. Return a match structure describing +the results of the match, or @code{#f} if no match could be found.") +#define FUNC_NAME s_scm_regexp_exec { int status, nmatches, offset; regmatch_t *matches; SCM mvec = SCM_BOOL_F; - SCM_ASSERT (SCM_NIMP (rx) && SCM_RGXP (rx), rx, SCM_ARG1, s_regexp_exec); - SCM_ASSERT (SCM_NIMP (str) && SCM_ROSTRINGP (str), str, SCM_ARG2, - s_regexp_exec); - - if (SCM_UNBNDP (start)) - offset = 0; - else - { - SCM_ASSERT (SCM_INUMP (start), start, SCM_ARG3, s_regexp_exec); - offset = SCM_INUM (start); - SCM_ASSERT (offset >= 0 && (unsigned) offset <= SCM_LENGTH (str), start, - SCM_OUTOFRANGE, s_regexp_exec); - } - + SCM_VALIDATE_RGXP (1,rx); + SCM_VALIDATE_ROSTRING (2,str); + SCM_VALIDATE_INUM_DEF_COPY (3,start,0,offset); + SCM_ASSERT_RANGE (3,start,offset >= 0 && (unsigned) offset <= SCM_LENGTH (str)); if (SCM_UNBNDP (flags)) flags = SCM_INUM0; - SCM_ASSERT (SCM_INUMP (flags), flags, SCM_ARG2, s_regexp_exec); - + SCM_VALIDATE_INUM (4,flags); SCM_COERCE_SUBSTR (str); /* re_nsub doesn't account for the `subexpression' representing the @@ -239,8 +249,7 @@ scm_regexp_exec (SCM rx, SCM str, SCM start, SCM flags) nmatches = SCM_RGX(rx)->re_nsub + 1; SCM_DEFER_INTS; - matches = (regmatch_t *) scm_must_malloc (sizeof (regmatch_t) * nmatches, - s_regexp_exec); + matches = SCM_MUST_MALLOC_TYPE_NUM (regmatch_t,nmatches); status = regexec (SCM_RGX (rx), SCM_ROCHARS (str) + offset, nmatches, matches, SCM_INUM (flags)); @@ -252,25 +261,31 @@ scm_regexp_exec (SCM rx, SCM str, SCM start, SCM flags) mvec = scm_make_vector (SCM_MAKINUM (nmatches + 1), SCM_UNSPECIFIED); SCM_VELTS(mvec)[0] = str; for (i = 0; i < nmatches; ++i) - SCM_VELTS(mvec)[i+1] = scm_cons(SCM_MAKINUM(matches[i].rm_so + offset), - SCM_MAKINUM(matches[i].rm_eo + offset)); + if (matches[i].rm_so == -1) + SCM_VELTS(mvec)[i+1] = scm_cons (SCM_MAKINUM (-1), SCM_MAKINUM (-1)); + else + SCM_VELTS(mvec)[i+1] + = scm_cons(SCM_MAKINUM(matches[i].rm_so + offset), + SCM_MAKINUM(matches[i].rm_eo + offset)); } scm_must_free ((char *) matches); SCM_ALLOW_INTS; if (status != 0 && status != REG_NOMATCH) scm_error (scm_regexp_error_key, - s_regexp_exec, - scm_regexp_error_msg (status), + FUNC_NAME, + scm_regexp_error_msg (status, SCM_RGX (rx)), SCM_BOOL_F, SCM_BOOL_F); return mvec; } +#undef FUNC_NAME void scm_init_regex_posix () { - scm_tc16_regex_t = scm_newsmob (®ex_t_smob); + scm_tc16_regex = scm_make_smob_type_mfpe ("regexp", sizeof (regex_t), + NULL, free_regex, NULL, NULL); /* Compilation flags. */ scm_sysintern ("regexp/basic", scm_long2num (REG_BASIC));