X-Git-Url: http://git.hcoop.net/bpt/guile.git/blobdiff_plain/e9b8556ec92039396e740620238d56a3748f2a99..cf512e32687b41690ab436f13322d7a9e00094b7:/libguile/i18n.c diff --git a/libguile/i18n.c b/libguile/i18n.c index 2a778eb91..f0e344329 100644 --- a/libguile/i18n.c +++ b/libguile/i18n.c @@ -1,44 +1,29 @@ -/* Copyright (C) 2006, 2007 Free Software Foundation, Inc. +/* Copyright (C) 2006-2014 Free Software Foundation, Inc. * * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 3 of + * the License, or (at your option) any later version. * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of + * This library is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA */ -#define _GNU_SOURCE /* Ask for glibc's `newlocale' API */ - -#if HAVE_CONFIG_H +#ifdef HAVE_CONFIG_H # include #endif -#if HAVE_ALLOCA_H -# include -#elif defined __GNUC__ -# define alloca __builtin_alloca -#elif defined _AIX -# define alloca __alloca -#elif defined _MSC_VER -# include -# define alloca _alloca -#else -# include -# ifdef __cplusplus -extern "C" -# endif -void *alloca (size_t); -#endif +#include #include "libguile/_scm.h" +#include "libguile/extensions.h" #include "libguile/feature.h" #include "libguile/i18n.h" #include "libguile/strings.h" @@ -52,8 +37,10 @@ void *alloca (size_t); #include /* `strcoll ()' */ #include /* `toupper ()' et al. */ #include +#include +#include -#if (defined HAVE_NEWLOCALE) && (defined HAVE_STRCOLL_L) +#if defined HAVE_NEWLOCALE && defined HAVE_STRCOLL_L && defined HAVE_USELOCALE /* The GNU thread-aware locale API is documented in ``Thread-Aware Locale Model, a Proposal'', by Ulrich Drepper: @@ -63,21 +50,17 @@ void *alloca (size_t); http://developer.apple.com/documentation/Darwin/Reference/ManPages/man3/newlocale.3.html - The whole API is being standardized by the X/Open Group (as of Jan. 2007) - following Drepper's proposal. */ -# define USE_GNU_LOCALE_API -#endif + The whole API was eventually standardized in the ``Open Group Base + Specifications Issue 7'' (aka. "POSIX 2008"): -#if (defined USE_GNU_LOCALE_API) && (defined HAVE_XLOCALE_H) -# include + http://www.opengroup.org/onlinepubs/9699919799/basedefs/locale.h.html */ +# define USE_GNU_LOCALE_API #endif #include "libguile/posix.h" /* for `scm_i_locale_mutex' */ -#if (defined HAVE_LANGINFO_H) && (defined HAVE_NL_TYPES_H) -# include -# include -#endif +/* Use Gnulib's header, which also provides `nl_item' & co. */ +#include #ifndef HAVE_SETLOCALE static inline char * @@ -91,6 +74,25 @@ setlocale (int category, const char *name) /* Helper stringification macro. */ #define SCM_I18N_STRINGIFY(_name) # _name +/* Acquiring and releasing the locale lock. */ + +static inline void +lock_locale_mutex (void) +{ +#ifdef HAVE_POSIX + scm_i_pthread_mutex_lock (&scm_i_locale_mutex); +#else +#endif +} + +static inline void +unlock_locale_mutex (void) +{ +#ifdef HAVE_POSIX + scm_i_pthread_mutex_unlock (&scm_i_locale_mutex); +#else +#endif +} /* Locale objects, string and character collation, and other locale-dependent @@ -186,22 +188,11 @@ typedef struct scm_locale int category_mask; } *scm_t_locale; - -/* Free the resources used by LOCALE. */ -static inline void -scm_i_locale_free (scm_t_locale locale) -{ - free (locale->locale_name); - locale->locale_name = NULL; -} - #else /* USE_GNU_LOCALE_API */ /* Alias for glibc's locale type. */ typedef locale_t scm_t_locale; -#define scm_i_locale_free freelocale - #endif /* USE_GNU_LOCALE_API */ @@ -224,7 +215,7 @@ SCM_GLOBAL_VARIABLE (scm_global_locale, "%global-locale"); #define SCM_VALIDATE_OPTIONAL_LOCALE_COPY(_pos, _arg, _c_locale) \ do \ { \ - if ((_arg) != SCM_UNDEFINED) \ + if (!SCM_UNBNDP (_arg)) \ SCM_VALIDATE_LOCALE_COPY (_pos, _arg, _c_locale); \ else \ (_c_locale) = NULL; \ @@ -234,35 +225,20 @@ SCM_GLOBAL_VARIABLE (scm_global_locale, "%global-locale"); SCM_SMOB (scm_tc16_locale_smob_type, "locale", 0); +#ifdef USE_GNU_LOCALE_API + SCM_SMOB_FREE (scm_tc16_locale_smob_type, smob_locale_free, locale) { scm_t_locale c_locale; c_locale = (scm_t_locale) SCM_SMOB_DATA (locale); - scm_i_locale_free (c_locale); + if (c_locale) + freelocale (c_locale); return 0; } -#ifndef USE_GNU_LOCALE_API -static SCM -smob_locale_mark (SCM locale) -{ - register SCM dependency; - - if (!scm_is_eq (locale, SCM_VARIABLE_REF (scm_global_locale))) - { - scm_t_locale c_locale; - - c_locale = (scm_t_locale) SCM_SMOB_DATA (locale); - dependency = (c_locale->base_locale); - } - else - dependency = SCM_BOOL_F; - - return dependency; -} -#endif +#endif /* USE_GNU_LOCALE_API */ static void inline scm_locale_error (const char *, int) SCM_NORETURN; @@ -410,7 +386,7 @@ install_locale (scm_t_locale locale) account. */ category_mask |= locale->category_mask; - if (locale->base_locale != SCM_UNDEFINED) + if (!SCM_UNBNDP (locale->base_locale)) locale = (scm_t_locale) SCM_SMOB_DATA (locale->base_locale); else locale = NULL; @@ -450,7 +426,7 @@ leave_locale_section (const scm_t_locale_settings *settings) /* Restore the previous locale settings. */ (void)restore_locale_settings (settings); - scm_i_pthread_mutex_unlock (&scm_i_locale_mutex); + unlock_locale_mutex (); } /* Enter a locked locale section. */ @@ -460,12 +436,12 @@ enter_locale_section (scm_t_locale locale, { int err; - scm_i_pthread_mutex_lock (&scm_i_locale_mutex); + lock_locale_mutex (); err = get_current_locale_settings (prev_locale); if (err) { - scm_i_pthread_mutex_unlock (&scm_i_locale_mutex); + unlock_locale_mutex (); return err; } @@ -511,34 +487,43 @@ get_current_locale (SCM *result) c_locale = scm_gc_malloc (sizeof (* c_locale), "locale"); - - scm_i_pthread_mutex_lock (&scm_i_locale_mutex); + lock_locale_mutex (); c_locale->category_mask = LC_ALL_MASK; c_locale->base_locale = SCM_UNDEFINED; current_locale = setlocale (LC_ALL, NULL); if (current_locale != NULL) - { - c_locale->locale_name = strdup (current_locale); - if (c_locale->locale_name == NULL) - err = ENOMEM; - } + c_locale->locale_name = scm_gc_strdup (current_locale, "locale"); else err = EINVAL; - scm_i_pthread_mutex_unlock (&scm_i_locale_mutex); + unlock_locale_mutex (); - if (err) - scm_gc_free (c_locale, sizeof (* c_locale), "locale"); - else + if (err == 0) SCM_NEWSMOB (*result, scm_tc16_locale_smob_type, c_locale); + else + *result = SCM_BOOL_F; return err; } +#else /* USE_GNU_LOCALE_API */ + +/* Convenient macro to run STATEMENT in the locale context of C_LOCALE. */ +#define RUN_IN_LOCALE_SECTION(_c_locale, _statement) \ + do \ + { \ + scm_t_locale old_loc; \ + \ + old_loc = uselocale (_c_locale); \ + _statement ; \ + uselocale (old_loc); \ + } \ + while (0) + -#endif /* !USE_GNU_LOCALE_API */ +#endif /* USE_GNU_LOCALE_API */ @@ -635,27 +620,14 @@ SCM_DEFINE (scm_make_locale, "make-locale", 2, 1, 0, #ifdef USE_GNU_LOCALE_API if (scm_is_eq (base_locale, SCM_VARIABLE_REF (scm_global_locale))) - { - /* Fetch the current locale and turn in into a `locale_t'. Don't - duplicate the resulting `locale_t' because we want it to be consumed - by `newlocale ()'. */ - char *current_locale; - - scm_i_pthread_mutex_lock (&scm_i_locale_mutex); + c_base_locale = LC_GLOBAL_LOCALE; - current_locale = setlocale (LC_ALL, NULL); - c_base_locale = newlocale (LC_ALL_MASK, current_locale, NULL); - - scm_i_pthread_mutex_unlock (&scm_i_locale_mutex); - - if (c_base_locale == (locale_t) 0) - scm_locale_error (FUNC_NAME, errno); - } - else if (c_base_locale != (locale_t) 0) + if (c_base_locale != (locale_t) 0) { /* C_BASE_LOCALE is to be consumed by `newlocale ()' so it needs to be duplicated before. */ c_base_locale = duplocale (c_base_locale); + if (c_base_locale == (locale_t) 0) { err = errno; @@ -666,13 +638,12 @@ SCM_DEFINE (scm_make_locale, "make-locale", 2, 1, 0, c_locale = newlocale (c_category_mask, c_locale_name, c_base_locale); free (c_locale_name); + c_locale_name = NULL; if (c_locale == (locale_t) 0) { - if (scm_is_eq (base_locale, SCM_VARIABLE_REF (scm_global_locale))) - /* The base locale object was created lazily and must be freed. */ + if (c_base_locale != (locale_t) 0) freelocale (c_base_locale); - scm_locale_error (FUNC_NAME, errno); } else @@ -683,7 +654,9 @@ SCM_DEFINE (scm_make_locale, "make-locale", 2, 1, 0, c_locale = scm_gc_malloc (sizeof (* c_locale), "locale"); c_locale->category_mask = c_category_mask; - c_locale->locale_name = c_locale_name; + c_locale->locale_name = scm_gc_strdup (c_locale_name, "locale"); + free (c_locale_name); + c_locale_name = NULL; if (scm_is_eq (base_locale, SCM_VARIABLE_REF (scm_global_locale))) { @@ -702,14 +675,18 @@ SCM_DEFINE (scm_make_locale, "make-locale", 2, 1, 0, scm_t_locale_settings prev_locale; err = enter_locale_section (c_locale, &prev_locale); - leave_locale_section (&prev_locale); if (err) goto fail; else - SCM_NEWSMOB (locale, scm_tc16_locale_smob_type, c_locale); + { + leave_locale_section (&prev_locale); + SCM_NEWSMOB (locale, scm_tc16_locale_smob_type, c_locale); + } } + /* silence gcc's unused variable warning */ + (void) c_base_locale; #endif return locale; @@ -741,39 +718,117 @@ SCM_DEFINE (scm_locale_p, "locale?", 1, 0, 0, A similar API can be found in MzScheme starting from version 200: http://download.plt-scheme.org/chronology/mzmr200alpha14.html . */ +#define SCM_STRING_TO_U32_BUF(s1, c_s1) \ + do \ + { \ + if (scm_i_is_narrow_string (s1)) \ + { \ + size_t i, len; \ + const char *buf = scm_i_string_chars (s1); \ + \ + len = scm_i_string_length (s1); \ + c_s1 = alloca (sizeof (scm_t_wchar) * (len + 1)); \ + \ + for (i = 0; i < len; i ++) \ + c_s1[i] = (unsigned char ) buf[i]; \ + c_s1[len] = 0; \ + } \ + else \ + c_s1 = (scm_t_wchar *) scm_i_string_wide_chars (s1); \ + } while (0) + -/* Compare null-terminated strings C_S1 and C_S2 according to LOCALE. Return - an integer whose sign is the same as the difference between C_S1 and - C_S2. */ +/* Compare UTF-32 strings according to LOCALE. Returns a negative value if + S1 compares smaller than S2, a positive value if S1 compares larger than + S2, or 0 if they compare equal. */ static inline int -compare_strings (const char *c_s1, const char *c_s2, SCM locale, - const char *func_name) +compare_u32_strings (SCM s1, SCM s2, SCM locale, const char *func_name) #define FUNC_NAME func_name { int result; scm_t_locale c_locale; + scm_t_wchar *c_s1, *c_s2; + SCM_VALIDATE_OPTIONAL_LOCALE_COPY (3, locale, c_locale); + + SCM_STRING_TO_U32_BUF (s1, c_s1); + SCM_STRING_TO_U32_BUF (s2, c_s2); + + if (c_locale) + RUN_IN_LOCALE_SECTION (c_locale, + result = u32_strcoll ((const scm_t_uint32 *) c_s1, + (const scm_t_uint32 *) c_s2)); + else + result = u32_strcoll ((const scm_t_uint32 *) c_s1, + (const scm_t_uint32 *) c_s2); + scm_remember_upto_here_2 (s1, s2); + scm_remember_upto_here (locale); + return result; +} +#undef FUNC_NAME + +/* Return the current language of the locale. */ +static const char * +locale_language () +{ + /* Note: If the locale has been set with 'uselocale', uc_locale_language + from libunistring versions 0.9.1 and older will return the incorrect + (non-thread-specific) locale. This is fixed in versions 0.9.2 and + newer. */ + return uc_locale_language (); +} + +static inline int +u32_locale_casecoll (const char *func_name, const scm_t_uint32 *c_s1, + const scm_t_uint32 *c_s2, + int *result) +{ + /* Note: Since this is called from `RUN_IN_LOCALE_SECTION', it must note + make any non-local exit. */ + + int ret; + const char *loc = locale_language (); + + ret = u32_casecoll (c_s1, u32_strlen (c_s1), + c_s2, u32_strlen (c_s2), + loc, UNINORM_NFC, result); + + return ret == 0 ? ret : errno; +} + +static inline int +compare_u32_strings_ci (SCM s1, SCM s2, SCM locale, const char *func_name) +#define FUNC_NAME func_name +{ + int result, ret = 0; + scm_t_locale c_locale; + scm_t_wchar *c_s1, *c_s2; SCM_VALIDATE_OPTIONAL_LOCALE_COPY (3, locale, c_locale); + SCM_STRING_TO_U32_BUF (s1, c_s1); + SCM_STRING_TO_U32_BUF (s2, c_s2); + if (c_locale) + RUN_IN_LOCALE_SECTION + (c_locale, + ret = u32_locale_casecoll (func_name, + (const scm_t_uint32 *) c_s1, + (const scm_t_uint32 *) c_s2, + &result)); + else + ret = u32_locale_casecoll (func_name, + (const scm_t_uint32 *) c_s1, + (const scm_t_uint32 *) c_s2, + &result); + + if (SCM_UNLIKELY (ret != 0)) { -#ifdef USE_GNU_LOCALE_API - result = strcoll_l (c_s1, c_s2, c_locale); -#else -#ifdef HAVE_STRCOLL - RUN_IN_LOCALE_SECTION (c_locale, result = strcoll (c_s1, c_s2)); -#else - result = strcmp (c_s1, c_s2); -#endif -#endif /* !USE_GNU_LOCALE_API */ + errno = ret; + scm_syserror (FUNC_NAME); } - else -#ifdef HAVE_STRCOLL - result = strcoll (c_s1, c_s2); -#else - result = strcmp (c_s1, c_s2); -#endif + scm_remember_upto_here_2 (s1, s2); + scm_remember_upto_here (locale); return result; } @@ -784,7 +839,7 @@ static inline void str_upcase (register char *dst, register const char *src) { for (; *src != '\0'; src++, dst++) - *dst = toupper (*src); + *dst = toupper ((int) *src); *dst = '\0'; } @@ -792,7 +847,7 @@ static inline void str_downcase (register char *dst, register const char *src) { for (; *src != '\0'; src++, dst++) - *dst = tolower (*src); + *dst = tolower ((int) *src); *dst = '\0'; } @@ -817,71 +872,6 @@ str_downcase_l (register char *dst, register const char *src, #endif -/* Compare null-terminated strings C_S1 and C_S2 in a case-independent way - according to LOCALE. Return an integer whose sign is the same as the - difference between C_S1 and C_S2. */ -static inline int -compare_strings_ci (const char *c_s1, const char *c_s2, SCM locale, - const char *func_name) -#define FUNC_NAME func_name -{ - int result; - scm_t_locale c_locale; - char *c_us1, *c_us2; - - SCM_VALIDATE_OPTIONAL_LOCALE_COPY (3, locale, c_locale); - - c_us1 = (char *) alloca (strlen (c_s1) + 1); - c_us2 = (char *) alloca (strlen (c_s2) + 1); - - if (c_locale) - { -#ifdef USE_GNU_LOCALE_API - str_upcase_l (c_us1, c_s1, c_locale); - str_upcase_l (c_us2, c_s2, c_locale); - - result = strcoll_l (c_us1, c_us2, c_locale); -#else - int err; - scm_t_locale_settings prev_locale; - - err = enter_locale_section (c_locale, &prev_locale); - if (err) - { - scm_locale_error (func_name, err); - return 0; - } - - str_upcase (c_us1, c_s1); - str_upcase (c_us2, c_s2); - -#ifdef HAVE_STRCOLL - result = strcoll (c_us1, c_us2); -#else - result = strcmp (c_us1, c_us2); -#endif /* !HAVE_STRCOLL */ - - leave_locale_section (&prev_locale); - free_locale_settings (&prev_locale); -#endif /* !USE_GNU_LOCALE_API */ - } - else - { - str_upcase (c_us1, c_s1); - str_upcase (c_us2, c_s2); - -#ifdef HAVE_STRCOLL - result = strcoll (c_us1, c_us2); -#else - result = strcmp (c_us1, c_us2); -#endif - } - - return result; -} -#undef FUNC_NAME - - SCM_DEFINE (scm_string_locale_lt, "string-locale?", 2, 1, 0, #define FUNC_NAME s_scm_string_locale_gt { int result; - const char *c_s1, *c_s2; SCM_VALIDATE_STRING (1, s1); SCM_VALIDATE_STRING (2, s2); - c_s1 = scm_i_string_chars (s1); - c_s2 = scm_i_string_chars (s2); - - result = compare_strings (c_s1, c_s2, locale, FUNC_NAME); - - scm_remember_upto_here_2 (s1, s2); + result = compare_u32_strings (s1, s2, locale, FUNC_NAME); return scm_from_bool (result > 0); } @@ -942,17 +920,11 @@ SCM_DEFINE (scm_string_locale_ci_lt, "string-locale-ci?", 2, 1, 0, #define FUNC_NAME s_scm_string_locale_ci_gt { int result; - const char *c_s1, *c_s2; SCM_VALIDATE_STRING (1, s1); SCM_VALIDATE_STRING (2, s2); - c_s1 = scm_i_string_chars (s1); - c_s2 = scm_i_string_chars (s2); - - result = compare_strings_ci (c_s1, c_s2, locale, FUNC_NAME); - - scm_remember_upto_here_2 (s1, s2); + result = compare_u32_strings_ci (s1, s2, locale, FUNC_NAME); return scm_from_bool (result > 0); } @@ -994,17 +960,11 @@ SCM_DEFINE (scm_string_locale_ci_eq, "string-locale-ci=?", 2, 1, 0, #define FUNC_NAME s_scm_string_locale_ci_eq { int result; - const char *c_s1, *c_s2; SCM_VALIDATE_STRING (1, s1); SCM_VALIDATE_STRING (2, s2); - c_s1 = scm_i_string_chars (s1); - c_s2 = scm_i_string_chars (s2); - - result = compare_strings_ci (c_s1, c_s2, locale, FUNC_NAME); - - scm_remember_upto_here_2 (s1, s2); + result = compare_u32_strings_ci (s1, s2, locale, FUNC_NAME); return scm_from_bool (result == 0); } @@ -1017,15 +977,16 @@ SCM_DEFINE (scm_char_locale_lt, "char-locale?", 2, 1, 0, "according to @var{locale} or to the current locale.") #define FUNC_NAME s_scm_char_locale_gt { - char c_c1[2], c_c2[2]; + int result; SCM_VALIDATE_CHAR (1, c1); SCM_VALIDATE_CHAR (2, c2); - c_c1[0] = (char)SCM_CHAR (c1); c_c1[1] = '\0'; - c_c2[0] = (char)SCM_CHAR (c2); c_c2[1] = '\0'; + result = compare_u32_strings (scm_string (scm_list_1 (c1)), + scm_string (scm_list_1 (c2)), + locale, FUNC_NAME); - return scm_from_bool (compare_strings (c_c1, c_c2, locale, FUNC_NAME) > 0); + return scm_from_bool (result > 0); } #undef FUNC_NAME @@ -1055,15 +1017,13 @@ SCM_DEFINE (scm_char_locale_ci_lt, "char-locale-ci?", 2, 1, 0, #define FUNC_NAME s_scm_char_locale_ci_gt { int result; - char c_c1[2], c_c2[2]; SCM_VALIDATE_CHAR (1, c1); SCM_VALIDATE_CHAR (2, c2); - c_c1[0] = (char)SCM_CHAR (c1); c_c1[1] = '\0'; - c_c2[0] = (char)SCM_CHAR (c2); c_c2[1] = '\0'; - - result = compare_strings_ci (c_c1, c_c2, locale, FUNC_NAME); + result = compare_u32_strings_ci (scm_string (scm_list_1 (c1)), + scm_string (scm_list_1 (c2)), + locale, FUNC_NAME); return scm_from_bool (result > 0); } @@ -1099,15 +1057,13 @@ SCM_DEFINE (scm_char_locale_ci_eq, "char-locale-ci=?", 2, 1, 0, #define FUNC_NAME s_scm_char_locale_ci_eq { int result; - char c_c1[2], c_c2[2]; SCM_VALIDATE_CHAR (1, c1); SCM_VALIDATE_CHAR (2, c2); - c_c1[0] = (char)SCM_CHAR (c1); c_c1[1] = '\0'; - c_c2[0] = (char)SCM_CHAR (c2); c_c2[1] = '\0'; - - result = compare_strings_ci (c_c1, c_c2, locale, FUNC_NAME); + result = compare_u32_strings_ci (scm_string (scm_list_1 (c1)), + scm_string (scm_list_1 (c2)), + locale, FUNC_NAME); return scm_from_bool (result == 0); } @@ -1117,33 +1073,96 @@ SCM_DEFINE (scm_char_locale_ci_eq, "char-locale-ci=?", 2, 1, 0, /* Locale-dependent alphabetic character mapping. */ +static inline int +u32_locale_tocase (const scm_t_uint32 *c_s1, size_t len, + scm_t_uint32 **p_c_s2, size_t * p_len2, + scm_t_uint32 *(*func) (const scm_t_uint32 *, size_t, + const char *, uninorm_t, + scm_t_uint32 *, size_t *)) +{ + /* Note: Since this is called from `RUN_IN_LOCALE_SECTION', it must not + make any non-local exit. */ + + scm_t_uint32 *ret; + const char *loc = locale_language (); + + /* The first NULL here indicates that no NFC or NFKC normalization + is done. The second NULL means the return buffer is + malloc'ed here. */ + ret = func (c_s1, len, loc, NULL, NULL, p_len2); + + if (ret == NULL) + { + *p_c_s2 = (scm_t_uint32 *) NULL; + *p_len2 = 0; + return errno; + } + *p_c_s2 = ret; + + return 0; +} + + +static SCM +chr_to_case (SCM chr, scm_t_locale c_locale, + scm_t_uint32 *(*func) (const scm_t_uint32 *, size_t, const char *, + uninorm_t, scm_t_uint32 *, size_t *), + const char *func_name, + int *err) +#define FUNC_NAME func_name +{ + int ret; + scm_t_uint32 c; + scm_t_uint32 *convbuf; + size_t convlen; + SCM convchar; + + c = SCM_CHAR (chr); + + if (c_locale != NULL) + RUN_IN_LOCALE_SECTION (c_locale, ret = + u32_locale_tocase (&c, 1, &convbuf, &convlen, func)); + else + ret = + u32_locale_tocase (&c, 1, &convbuf, &convlen, func); + + if (SCM_UNLIKELY (ret != 0)) + { + *err = ret; + return SCM_BOOL_F; + } + + if (convlen == 1) + convchar = SCM_MAKE_CHAR ((scm_t_wchar) convbuf[0]); + else + convchar = chr; + free (convbuf); + + return convchar; +} +#undef FUNC_NAME + SCM_DEFINE (scm_char_locale_downcase, "char-locale-downcase", 1, 1, 0, (SCM chr, SCM locale), "Return the lowercase character that corresponds to @var{chr} " "according to either @var{locale} or the current locale.") #define FUNC_NAME s_scm_char_locale_downcase { - char c_chr; - int c_result; scm_t_locale c_locale; - + SCM ret; + int err = 0; + SCM_VALIDATE_CHAR (1, chr); - c_chr = SCM_CHAR (chr); - SCM_VALIDATE_OPTIONAL_LOCALE_COPY (2, locale, c_locale); - if (c_locale != NULL) + ret = chr_to_case (chr, c_locale, u32_tolower, FUNC_NAME, &err); + + if (err != 0) { -#ifdef USE_GNU_LOCALE_API - c_result = tolower_l (c_chr, c_locale); -#else - RUN_IN_LOCALE_SECTION (c_locale, c_result = tolower (c_chr)); -#endif + errno = err; + scm_syserror (FUNC_NAME); } - else - c_result = tolower (c_chr); - - return (SCM_MAKE_CHAR (c_result)); + return ret; } #undef FUNC_NAME @@ -1153,27 +1172,92 @@ SCM_DEFINE (scm_char_locale_upcase, "char-locale-upcase", 1, 1, 0, "according to either @var{locale} or the current locale.") #define FUNC_NAME s_scm_char_locale_upcase { - char c_chr; - int c_result; scm_t_locale c_locale; + SCM ret; + int err = 0; SCM_VALIDATE_CHAR (1, chr); - c_chr = SCM_CHAR (chr); + SCM_VALIDATE_OPTIONAL_LOCALE_COPY (2, locale, c_locale); + + ret = chr_to_case (chr, c_locale, u32_toupper, FUNC_NAME, &err); + + if (err != 0) + { + errno = err; + scm_syserror (FUNC_NAME); + } + return ret; +} +#undef FUNC_NAME + +SCM_DEFINE (scm_char_locale_titlecase, "char-locale-titlecase", 1, 1, 0, + (SCM chr, SCM locale), + "Return the titlecase character that corresponds to @var{chr} " + "according to either @var{locale} or the current locale.") +#define FUNC_NAME s_scm_char_locale_titlecase +{ + scm_t_locale c_locale; + SCM ret; + int err = 0; + SCM_VALIDATE_CHAR (1, chr); SCM_VALIDATE_OPTIONAL_LOCALE_COPY (2, locale, c_locale); - if (c_locale != NULL) + ret = chr_to_case (chr, c_locale, u32_totitle, FUNC_NAME, &err); + + if (err != 0) { -#ifdef USE_GNU_LOCALE_API - c_result = toupper_l (c_chr, c_locale); -#else - RUN_IN_LOCALE_SECTION (c_locale, c_result = toupper (c_chr)); -#endif + errno = err; + scm_syserror (FUNC_NAME); } + return ret; +} +#undef FUNC_NAME + +static SCM +str_to_case (SCM str, scm_t_locale c_locale, + scm_t_uint32 *(*func) (const scm_t_uint32 *, size_t, const char *, + uninorm_t, scm_t_uint32 *, size_t *), + const char *func_name, + int *err) +#define FUNC_NAME func_name +{ + scm_t_wchar *c_str, *c_buf; + scm_t_uint32 *c_convstr; + size_t len, convlen; + int ret; + SCM convstr; + + len = scm_i_string_length (str); + if (len == 0) + return scm_nullstr; + SCM_STRING_TO_U32_BUF (str, c_str); + + if (c_locale) + RUN_IN_LOCALE_SECTION (c_locale, ret = + u32_locale_tocase ((scm_t_uint32 *) c_str, len, + &c_convstr, + &convlen, func)); else - c_result = toupper (c_chr); + ret = + u32_locale_tocase ((scm_t_uint32 *) c_str, len, + &c_convstr, &convlen, func); + + scm_remember_upto_here (str); + + if (SCM_UNLIKELY (ret != 0)) + { + *err = ret; + return SCM_BOOL_F; + } - return (SCM_MAKE_CHAR (c_result)); + convstr = scm_i_make_wide_string (convlen, &c_buf, 0); + memcpy (c_buf, c_convstr, convlen * sizeof (scm_t_wchar)); + free (c_convstr); + + scm_i_try_narrow_string (convstr); + + return convstr; } #undef FUNC_NAME @@ -1184,30 +1268,21 @@ SCM_DEFINE (scm_string_locale_upcase, "string-locale-upcase", 1, 1, 0, "locale.") #define FUNC_NAME s_scm_string_locale_upcase { - const char *c_str; - char *c_ustr; scm_t_locale c_locale; + SCM ret; + int err = 0; SCM_VALIDATE_STRING (1, str); SCM_VALIDATE_OPTIONAL_LOCALE_COPY (2, locale, c_locale); - c_str = scm_i_string_chars (str); - c_ustr = (char *) alloca (strlen (c_str) + 1); - - if (c_locale) + ret = str_to_case (str, c_locale, u32_toupper, FUNC_NAME, &err); + + if (err != 0) { -#ifdef USE_GNU_LOCALE_API - str_upcase_l (c_ustr, c_str, c_locale); -#else - RUN_IN_LOCALE_SECTION (c_locale, str_upcase (c_ustr, c_str)); -#endif + errno = err; + scm_syserror (FUNC_NAME); } - else - str_upcase (c_ustr, c_str); - - scm_remember_upto_here (str); - - return (scm_from_locale_string (c_ustr)); + return ret; } #undef FUNC_NAME @@ -1218,30 +1293,46 @@ SCM_DEFINE (scm_string_locale_downcase, "string-locale-downcase", 1, 1, 0, "locale.") #define FUNC_NAME s_scm_string_locale_downcase { - const char *c_str; - char *c_lstr; scm_t_locale c_locale; + SCM ret; + int err = 0; SCM_VALIDATE_STRING (1, str); SCM_VALIDATE_OPTIONAL_LOCALE_COPY (2, locale, c_locale); - c_str = scm_i_string_chars (str); - c_lstr = (char *) alloca (strlen (c_str) + 1); + ret = str_to_case (str, c_locale, u32_tolower, FUNC_NAME, &err); - if (c_locale) + if (err != 0) { -#ifdef USE_GNU_LOCALE_API - str_downcase_l (c_lstr, c_str, c_locale); -#else - RUN_IN_LOCALE_SECTION (c_locale, str_downcase (c_lstr, c_str)); -#endif + errno = err; + scm_syserror (FUNC_NAME); } - else - str_downcase (c_lstr, c_str); + return ret; +} +#undef FUNC_NAME - scm_remember_upto_here (str); +SCM_DEFINE (scm_string_locale_titlecase, "string-locale-titlecase", 1, 1, 0, + (SCM str, SCM locale), + "Return a new string that is the title-case version of " + "@var{str} according to either @var{locale} or the current " + "locale.") +#define FUNC_NAME s_scm_string_locale_titlecase +{ + scm_t_locale c_locale; + SCM ret; + int err = 0; - return (scm_from_locale_string (c_lstr)); + SCM_VALIDATE_STRING (1, str); + SCM_VALIDATE_OPTIONAL_LOCALE_COPY (2, locale, c_locale); + + ret = str_to_case (str, c_locale, u32_totitle, FUNC_NAME, &err); + + if (err != 0) + { + errno = err; + scm_syserror (FUNC_NAME); + } + return ret; } #undef FUNC_NAME @@ -1273,7 +1364,7 @@ SCM_DEFINE (scm_locale_string_to_integer, "locale-string->integer", SCM_VALIDATE_STRING (1, str); c_str = scm_i_string_chars (str); - if (base != SCM_UNDEFINED) + if (!scm_is_eq (base, SCM_UNDEFINED)) SCM_VALIDATE_INT_COPY (2, base, c_base); else c_base = 10; @@ -1360,16 +1451,28 @@ SCM_DEFINE (scm_locale_string_to_inexact, "locale-string->inexact", 2. `nl_langinfo ()' is not available on Windows. 3. `nl_langinfo ()' may return strings encoded in a locale different from - the current one, thereby defeating `scm_from_locale_string ()'. - Example: support the current locale is "Latin-1" and one asks: + the current one. + For example: (nl-langinfo DAY_1 (make-locale LC_ALL "eo_EO.UTF-8")) - The result will be a UTF-8 string. However, `scm_from_locale_string', - which expects a Latin-1 string, won't be able to make much sense of the - returned string. Thus, we'd need an `scm_from_string ()' variant where - the locale (or charset) is explicitly passed. */ + returns a result that is a UTF-8 string, regardless of the + setting of the current locale. If nl_langinfo supports CODESET, + we can convert the string properly using scm_from_stringn. If + CODESET is not supported, we won't be able to make much sense of + the returned string. + + Note: We don't use Gnulib's `nl_langinfo' module because it's currently not + as complete as the compatibility hacks in `i18n.scm'. */ +static char * +copy_string_or_null (const char *s) +{ + if (s == NULL) + return NULL; + else + return strdup (s); +} SCM_DEFINE (scm_nl_langinfo, "nl-langinfo", 1, 1, 0, (SCM item, SCM locale), @@ -1381,11 +1484,11 @@ SCM_DEFINE (scm_nl_langinfo, "nl-langinfo", 1, 1, 0, "Reference Manual}).") #define FUNC_NAME s_scm_nl_langinfo { -#ifdef HAVE_NL_LANGINFO SCM result; nl_item c_item; char *c_result; scm_t_locale c_locale; + char *codeset; SCM_VALIDATE_INT_COPY (2, item, c_item); SCM_VALIDATE_OPTIONAL_LOCALE_COPY (2, locale, c_locale); @@ -1397,12 +1500,13 @@ SCM_DEFINE (scm_nl_langinfo, "nl-langinfo", 1, 1, 0, http://opengroup.org/onlinepubs/007908799/xsh/nl_langinfo.html for details. */ - scm_i_pthread_mutex_lock (&scm_i_locale_mutex); + lock_locale_mutex (); if (c_locale != NULL) { #ifdef USE_GNU_LOCALE_API - c_result = nl_langinfo_l (c_item, c_locale); -#else + c_result = copy_string_or_null (nl_langinfo_l (c_item, c_locale)); + codeset = copy_string_or_null (nl_langinfo_l (CODESET, c_locale)); +#else /* !USE_GNU_LOCALE_API */ /* We can't use `RUN_IN_LOCALE_SECTION ()' here because the locale mutex is already taken. */ int lsec_err; @@ -1410,7 +1514,7 @@ SCM_DEFINE (scm_nl_langinfo, "nl-langinfo", 1, 1, 0, lsec_err = get_current_locale_settings (&lsec_prev_locale); if (lsec_err) - scm_i_pthread_mutex_unlock (&scm_i_locale_mutex); + unlock_locale_mutex (); else { lsec_err = install_locale (c_locale); @@ -1425,18 +1529,21 @@ SCM_DEFINE (scm_nl_langinfo, "nl-langinfo", 1, 1, 0, scm_locale_error (FUNC_NAME, lsec_err); else { - c_result = nl_langinfo (c_item); + c_result = copy_string_or_null (nl_langinfo (c_item)); + codeset = copy_string_or_null (nl_langinfo (CODESET)); - leave_locale_section (&lsec_prev_locale); + restore_locale_settings (&lsec_prev_locale); free_locale_settings (&lsec_prev_locale); } #endif } else - c_result = nl_langinfo (c_item); + { + c_result = copy_string_or_null (nl_langinfo (c_item)); + codeset = copy_string_or_null (nl_langinfo (CODESET)); + } - c_result = strdup (c_result); - scm_i_pthread_mutex_unlock (&scm_i_locale_mutex); + unlock_locale_mutex (); if (c_result == NULL) result = SCM_BOOL_F; @@ -1450,11 +1557,14 @@ SCM_DEFINE (scm_nl_langinfo, "nl-langinfo", 1, 1, 0, { char *p; - /* In this cases, the result is to be interpreted as a list of - numbers. If the last item is `CHARS_MAX', it has the special - meaning "no more grouping". */ + /* In this cases, the result is to be interpreted as a list + of numbers. If the last item is `CHAR_MAX' or a negative + number, it has the special meaning "no more grouping" + (negative numbers aren't specified in POSIX but can be + used by glibc; see + ). */ result = SCM_EOL; - for (p = c_result; (*p != '\0') && (*p != CHAR_MAX); p++) + for (p = c_result; (*p > 0) && (*p != CHAR_MAX); p++) result = scm_cons (SCM_I_MAKINUM ((int) *p), result); { @@ -1462,10 +1572,10 @@ SCM_DEFINE (scm_nl_langinfo, "nl-langinfo", 1, 1, 0, result = scm_reverse_x (result, SCM_EOL); - if (*p != CHAR_MAX) + if (*p == 0) { /* Cyclic grouping information. */ - if (last_pair != SCM_EOL) + if (!scm_is_null (last_pair)) SCM_SETCDR (last_pair, result); } } @@ -1475,9 +1585,13 @@ SCM_DEFINE (scm_nl_langinfo, "nl-langinfo", 1, 1, 0, } #endif -#if (defined FRAC_DIGITS) && (defined INT_FRAC_DIGITS) +#if defined FRAC_DIGITS || defined INT_FRAC_DIGITS +#ifdef FRAC_DIGITS case FRAC_DIGITS: +#endif +#ifdef INT_FRAC_DIGITS case INT_FRAC_DIGITS: +#endif /* This is to be interpreted as a single integer. */ if (*c_result == CHAR_MAX) /* Unspecified. */ @@ -1489,12 +1603,18 @@ SCM_DEFINE (scm_nl_langinfo, "nl-langinfo", 1, 1, 0, break; #endif -#if (defined P_CS_PRECEDES) && (defined INT_N_CS_PRECEDES) +#if defined P_CS_PRECEDES || defined N_CS_PRECEDES || \ + defined INT_P_CS_PRECEDES || defined INT_N_CS_PRECEDES || \ + defined P_SEP_BY_SPACE || defined N_SEP_BY_SPACE +#ifdef P_CS_PRECEDES case P_CS_PRECEDES: case N_CS_PRECEDES: +#endif +#ifdef INT_N_CS_PRECEDES case INT_P_CS_PRECEDES: case INT_N_CS_PRECEDES: -#if (defined P_SEP_BY_SPACE) && (defined N_SEP_BY_SPACE) +#endif +#ifdef P_SEP_BY_SPACE case P_SEP_BY_SPACE: case N_SEP_BY_SPACE: #endif @@ -1505,55 +1625,59 @@ SCM_DEFINE (scm_nl_langinfo, "nl-langinfo", 1, 1, 0, break; #endif -#if (defined P_SIGN_POSN) && (defined INT_N_SIGN_POSN) +#if defined P_SIGN_POSN || defined N_SIGN_POSN || \ + defined INT_P_SIGN_POSN || defined INT_N_SIGN_POSN +#ifdef P_SIGN_POSN case P_SIGN_POSN: case N_SIGN_POSN: +#endif +#ifdef INT_P_SIGN_POSN case INT_P_SIGN_POSN: case INT_N_SIGN_POSN: +#endif /* See `(libc) Sign of Money Amount' for the interpretation of the return value here. */ switch (*c_result) { case 0: - result = scm_from_locale_symbol ("parenthesize"); + result = scm_from_latin1_symbol ("parenthesize"); break; case 1: - result = scm_from_locale_symbol ("sign-before"); + result = scm_from_latin1_symbol ("sign-before"); break; case 2: - result = scm_from_locale_symbol ("sign-after"); + result = scm_from_latin1_symbol ("sign-after"); break; case 3: - result = scm_from_locale_symbol ("sign-before-currency-symbol"); + result = scm_from_latin1_symbol ("sign-before-currency-symbol"); break; case 4: - result = scm_from_locale_symbol ("sign-after-currency-symbol"); + result = scm_from_latin1_symbol ("sign-after-currency-symbol"); break; default: - result = scm_from_locale_symbol ("unspecified"); + result = scm_from_latin1_symbol ("unspecified"); } + free (c_result); break; #endif default: - /* FIXME: `locale_string ()' is not appropriate here because of - encoding issues (see comment above). */ - result = scm_take_locale_string (c_result); + result = scm_from_stringn (c_result, strlen (c_result), + codeset, + SCM_FAILED_CONVERSION_QUESTION_MARK); + free (c_result); } } - return result; -#else - scm_syserror_msg (FUNC_NAME, "`nl-langinfo' not supported on your system", - SCM_EOL, ENOSYS); + if (codeset != NULL) + free (codeset); - return SCM_BOOL_F; -#endif + return result; } #undef FUNC_NAME @@ -1561,8 +1685,6 @@ SCM_DEFINE (scm_nl_langinfo, "nl-langinfo", 1, 1, 0, static inline void define_langinfo_items (void) { -#if (defined HAVE_NL_TYPES_H) && (defined HAVE_LANGINFO_H) - #define DEFINE_NLITEM_CONSTANT(_item) \ scm_c_define (# _item, scm_from_int (_item)) @@ -1622,13 +1744,23 @@ define_langinfo_items (void) DEFINE_NLITEM_CONSTANT (T_FMT); /* Time format for strftime. */ DEFINE_NLITEM_CONSTANT (T_FMT_AMPM);/* 12-hour time format for strftime. */ +#ifdef ERA DEFINE_NLITEM_CONSTANT (ERA); /* Alternate era. */ +#endif +#ifdef ERA_D_FMT DEFINE_NLITEM_CONSTANT (ERA_D_FMT); /* Date in alternate era format. */ +#endif +#ifdef ERA_D_T_FMT DEFINE_NLITEM_CONSTANT (ERA_D_T_FMT); /* Date and time in alternate era format. */ +#endif +#ifdef ERA_T_FMT DEFINE_NLITEM_CONSTANT (ERA_T_FMT); /* Time in alternate era format. */ +#endif +#ifdef ALT_DIGITS DEFINE_NLITEM_CONSTANT (ALT_DIGITS); /* Alternate symbols for digits. */ +#endif DEFINE_NLITEM_CONSTANT (RADIXCHAR); DEFINE_NLITEM_CONSTANT (THOUSEP); @@ -1717,8 +1849,6 @@ define_langinfo_items (void) #endif #undef DEFINE_NLITEM_CONSTANT - -#endif /* HAVE_NL_TYPES_H */ } @@ -1727,22 +1857,29 @@ scm_init_i18n () { SCM global_locale_smob; -#ifdef HAVE_NL_LANGINFO scm_add_feature ("nl-langinfo"); define_langinfo_items (); -#endif #include "libguile/i18n.x" -#ifndef USE_GNU_LOCALE_API - scm_set_smob_mark (scm_tc16_locale_smob_type, smob_locale_mark); -#endif - /* Initialize the global locale object with a special `locale' SMOB. */ + /* XXX: We don't define it as `LC_GLOBAL_LOCALE' because of bugs as of + glibc <= 2.11 not (yet) worked around by Gnulib. See + http://sourceware.org/bugzilla/show_bug.cgi?id=11009 for details. */ SCM_NEWSMOB (global_locale_smob, scm_tc16_locale_smob_type, NULL); SCM_VARIABLE_SET (scm_global_locale, global_locale_smob); } +void +scm_bootstrap_i18n () +{ + scm_c_register_extension ("libguile-" SCM_EFFECTIVE_VERSION, + "scm_init_i18n", + (scm_t_extension_init_func) scm_init_i18n, + NULL); + +} + /* Local Variables: