From: Andy Wingo Date: Thu, 6 Jan 2011 00:21:54 +0000 (-0600) Subject: add scm_{to,from}_{utf8,latin1}_string{n,} X-Git-Url: https://git.hcoop.net/bpt/guile.git/commitdiff_plain/d40e1ca893149e9781bad54ac1e39d03e7be988f?hp=929ccf48fc4bada585b29b3887f295bfcc1dcdaa add scm_{to,from}_{utf8,latin1}_string{n,} * libguile/strings.h: * libguile/strings.c (scm_from_latin1_string, scm_to_latin1_string): New functions, in terms of the latin1_stringn variants. (scm_from_utf8_string, scm_from_utf8_stringn) (scm_to_utf8_string, scm_to_utf8_stringn): New functions. (scm_i_from_utf8_string, scm_i_to_utf8_string): Removed these internal functions. (scm_from_stringn): Handle -1 as a length. Unlike the previous behavior of scm_from_locale_string (NULL), which returned the empty string, we now raise an error. The null pointer is not the same as the empty string. * libguile/stime.c (scm_strftime, scm_strptime): Adapt to publishing of utf8 functions. --- diff --git a/libguile/stime.c b/libguile/stime.c index 07dedf3b3..78aa6731a 100644 --- a/libguile/stime.c +++ b/libguile/stime.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1995,1996,1997,1998,1999,2000,2001, 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc. +/* Copyright (C) 1995,1996,1997,1998,1999,2000,2001, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2011 Free Software Foundation, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License @@ -625,11 +625,11 @@ SCM_DEFINE (scm_strftime, "strftime", 2, 0, 0, { struct tm t; - scm_t_uint8 *tbuf; + char *tbuf; int size = 50; - scm_t_uint8 *fmt; - scm_t_uint8 *myfmt; - int len; + char *fmt; + char *myfmt; + size_t len; SCM result; SCM_VALIDATE_STRING (1, format); @@ -637,8 +637,7 @@ SCM_DEFINE (scm_strftime, "strftime", 2, 0, 0, /* Convert string to UTF-8 so that non-ASCII characters in the format are passed through unchanged. */ - fmt = scm_i_to_utf8_string (format); - len = strlen ((const char *) fmt); + fmt = scm_to_utf8_stringn (format, &len); /* Ugly hack: strftime can return 0 if its buffer is too small, but some valid time strings (e.g. "%p") can sometimes produce @@ -647,7 +646,7 @@ SCM_DEFINE (scm_strftime, "strftime", 2, 0, 0, nonzero. */ myfmt = scm_malloc (len+2); *myfmt = (scm_t_uint8) 'x'; - strncpy ((char *) myfmt + 1, (const char *) fmt, len); + strncpy (myfmt + 1, fmt, len); myfmt[len + 1] = 0; scm_remember_upto_here_1 (format); free (fmt); @@ -685,8 +684,7 @@ SCM_DEFINE (scm_strftime, "strftime", 2, 0, 0, /* Use `nstrftime ()' from Gnulib, which supports all GNU extensions supported by glibc. */ - while ((len = nstrftime ((char *) tbuf, size, - (const char *) myfmt, &t, 0, 0)) == 0) + while ((len = nstrftime (tbuf, size, myfmt, &t, 0, 0)) == 0) { free (tbuf); size *= 2; @@ -702,7 +700,7 @@ SCM_DEFINE (scm_strftime, "strftime", 2, 0, 0, #endif } - result = scm_i_from_utf8_string ((const scm_t_uint8 *) tbuf + 1); + result = scm_from_utf8_string (tbuf + 1); free (tbuf); free (myfmt); #if HAVE_STRUCT_TM_TM_ZONE @@ -728,7 +726,7 @@ SCM_DEFINE (scm_strptime, "strptime", 2, 0, 0, #define FUNC_NAME s_scm_strptime { struct tm t; - scm_t_uint8 *fmt, *str, *rest; + char *fmt, *str, *rest; size_t used_len; long zoff; @@ -737,8 +735,8 @@ SCM_DEFINE (scm_strptime, "strptime", 2, 0, 0, /* Convert strings to UTF-8 so that non-ASCII characters are passed through unchanged. */ - fmt = scm_i_to_utf8_string (format); - str = scm_i_to_utf8_string (string); + fmt = scm_to_utf8_string (format); + str = scm_to_utf8_string (string); /* initialize the struct tm */ #define tm_init(field) t.field = 0 @@ -760,8 +758,7 @@ SCM_DEFINE (scm_strptime, "strptime", 2, 0, 0, fields, hence the use of SCM_CRITICAL_SECTION_START. */ t.tm_isdst = -1; SCM_CRITICAL_SECTION_START; - rest = (scm_t_uint8 *) strptime ((const char *) str, - (const char *) fmt, &t); + rest = strptime (str, fmt, &t); SCM_CRITICAL_SECTION_END; if (rest == NULL) { @@ -784,7 +781,7 @@ SCM_DEFINE (scm_strptime, "strptime", 2, 0, 0, #endif /* Compute the number of UTF-8 characters. */ - used_len = u8_strnlen (str, rest-str); + used_len = u8_strnlen ((scm_t_uint8*) str, rest-str); scm_remember_upto_here_2 (format, string); free (str); free (fmt); diff --git a/libguile/strings.c b/libguile/strings.c index 71f0b5274..74bdc69a4 100644 --- a/libguile/strings.c +++ b/libguile/strings.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1995,1996,1998,2000,2001, 2004, 2006, 2008, 2009, 2010 Free Software Foundation, Inc. +/* Copyright (C) 1995,1996,1998,2000,2001, 2004, 2006, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License @@ -1437,8 +1437,13 @@ scm_from_stringn (const char *str, size_t len, const char *encoding, int wide = 0; SCM res; + /* The order of these checks is important. */ if (len == 0) return scm_nullstr; + if (!str) + scm_misc_error ("scm_from_stringn", "NULL string pointer", SCM_EOL); + if (len == (size_t) -1) + len = strlen (str); if (encoding == NULL) { @@ -1502,9 +1507,9 @@ scm_from_stringn (const char *str, size_t len, const char *encoding, } SCM -scm_from_latin1_stringn (const char *str, size_t len) +scm_from_locale_string (const char *str) { - return scm_from_stringn (str, len, NULL, SCM_FAILED_CONVERSION_ERROR); + return scm_from_locale_stringn (str, -1); } SCM @@ -1515,11 +1520,6 @@ scm_from_locale_stringn (const char *str, size_t len) SCM inport; scm_t_port *pt; - if (len == (size_t) -1) - len = strlen (str); - if (len == 0) - return scm_nullstr; - inport = scm_current_input_port (); if (!SCM_UNBNDP (inport) && SCM_OPINPORTP (inport)) { @@ -1537,20 +1537,27 @@ scm_from_locale_stringn (const char *str, size_t len) } SCM -scm_from_locale_string (const char *str) +scm_from_latin1_string (const char *str) { - if (str == NULL) - return scm_nullstr; + return scm_from_latin1_stringn (str, -1); +} - return scm_from_locale_stringn (str, -1); +SCM +scm_from_latin1_stringn (const char *str, size_t len) +{ + return scm_from_stringn (str, len, NULL, SCM_FAILED_CONVERSION_ERROR); } SCM -scm_i_from_utf8_string (const scm_t_uint8 *str) +scm_from_utf8_string (const char *str) { - return scm_from_stringn ((const char *) str, - strlen ((char *) str), "UTF-8", - SCM_FAILED_CONVERSION_ERROR); + return scm_from_utf8_stringn (str, -1); +} + +SCM +scm_from_utf8_stringn (const char *str, size_t len) +{ + return scm_from_stringn (str, len, "UTF-8", SCM_FAILED_CONVERSION_ERROR); } /* Create a new scheme string from the C string STR. The memory of @@ -1707,9 +1714,9 @@ scm_i_unistring_escapes_to_r6rs_escapes (char *buf, size_t *lenp) } char * -scm_to_latin1_stringn (SCM str, size_t *lenp) +scm_to_locale_string (SCM str) { - return scm_to_stringn (str, lenp, NULL, SCM_FAILED_CONVERSION_ERROR); + return scm_to_locale_stringn (str, NULL); } char * @@ -1733,6 +1740,30 @@ scm_to_locale_stringn (SCM str, size_t *lenp) scm_i_get_conversion_strategy (SCM_BOOL_F)); } +char * +scm_to_latin1_string (SCM str) +{ + return scm_to_latin1_stringn (str, NULL); +} + +char * +scm_to_latin1_stringn (SCM str, size_t *lenp) +{ + return scm_to_stringn (str, lenp, NULL, SCM_FAILED_CONVERSION_ERROR); +} + +char * +scm_to_utf8_string (SCM str) +{ + return scm_to_utf8_stringn (str, NULL); +} + +char * +scm_to_utf8_stringn (SCM str, size_t *lenp) +{ + return scm_to_stringn (str, lenp, "UTF-8", SCM_FAILED_CONVERSION_ERROR); +} + /* Return a malloc(3)-allocated buffer containing the contents of STR encoded according to ENCODING. If LENP is non-NULL, set it to the size in bytes of the returned buffer. If the conversion to ENCODING fails, apply the strategy @@ -1845,20 +1876,6 @@ scm_to_stringn (SCM str, size_t *lenp, const char *encoding, return buf; } -char * -scm_to_locale_string (SCM str) -{ - return scm_to_locale_stringn (str, NULL); -} - -scm_t_uint8 * -scm_i_to_utf8_string (SCM str) -{ - char *u8str; - u8str = scm_to_stringn (str, NULL, "UTF-8", SCM_FAILED_CONVERSION_ERROR); - return (scm_t_uint8 *) u8str; -} - size_t scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len) { diff --git a/libguile/strings.h b/libguile/strings.h index 00bc22430..1a8ff7c33 100644 --- a/libguile/strings.h +++ b/libguile/strings.h @@ -3,7 +3,7 @@ #ifndef SCM_STRINGS_H #define SCM_STRINGS_H -/* Copyright (C) 1995,1996,1997,1998,2000,2001, 2004, 2005, 2006, 2008, 2009, 2010 Free Software Foundation, Inc. +/* Copyright (C) 1995,1996,1997,1998,2000,2001, 2004, 2005, 2006, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License @@ -125,18 +125,31 @@ SCM_API SCM scm_c_substring_read_only (SCM str, size_t start, size_t end); SCM_API SCM scm_c_substring_shared (SCM str, size_t start, size_t end); SCM_API SCM scm_c_substring_copy (SCM str, size_t start, size_t end); -SCM_API SCM scm_from_latin1_stringn (const char *str, size_t len); +/* Use locale encoding for user input, user output, or interacting with + the C library. Use latin1 for ASCII, and for literals in source + code. Use utf8 for interaction with modern libraries which deal in + UTF-8. Otherwise use scm_to_stringn or scm_from_stringn with a + specific encoding. */ + SCM_API SCM scm_from_locale_string (const char *str); SCM_API SCM scm_from_locale_stringn (const char *str, size_t len); -SCM_INTERNAL SCM scm_i_from_utf8_string (const scm_t_uint8 *str); SCM_API SCM scm_take_locale_string (char *str); SCM_API SCM scm_take_locale_stringn (char *str, size_t len); -SCM_API char *scm_to_latin1_stringn (SCM str, size_t *lenp); SCM_API char *scm_to_locale_string (SCM str); SCM_API char *scm_to_locale_stringn (SCM str, size_t *lenp); + +SCM_API SCM scm_from_latin1_string (const char *str); +SCM_API SCM scm_from_latin1_stringn (const char *str, size_t len); +SCM_API char *scm_to_latin1_string (SCM str); +SCM_API char *scm_to_latin1_stringn (SCM str, size_t *lenp); + +SCM_API char *scm_to_utf8_string (SCM str); +SCM_API char *scm_to_utf8_stringn (SCM str, size_t *lenp); +SCM_API SCM scm_from_utf8_string (const char *str); +SCM_API SCM scm_from_utf8_stringn (const char *str, size_t len); + SCM_API char *scm_to_stringn (SCM str, size_t *lenp, const char *encoding, scm_t_string_failed_conversion_handler handler); -SCM_INTERNAL scm_t_uint8 *scm_i_to_utf8_string (SCM str); SCM_API size_t scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len); SCM_API SCM scm_string_normalize_nfd (SCM str);