Tests for locale-specific case conversion
[bpt/guile.git] / libguile / i18n.c
CommitLineData
49c9839e 1/* Copyright (C) 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
9361f762
MV
2 *
3 * This library is free software; you can redistribute it and/or
53befeb7
NJ
4 * modify it under the terms of the GNU Lesser General Public License
5 * as published by the Free Software Foundation; either version 3 of
6 * the License, or (at your option) any later version.
9361f762 7 *
53befeb7
NJ
8 * This library is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
9361f762
MV
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
12 *
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library; if not, write to the Free Software
53befeb7
NJ
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301 USA
9361f762
MV
17 */
18
dbb605f5 19#ifdef HAVE_CONFIG_H
9361f762
MV
20# include <config.h>
21#endif
22
f7439099 23#include <alloca.h>
b89c4943 24
9361f762 25#include "libguile/_scm.h"
756e8a3a 26#include "libguile/extensions.h"
9361f762
MV
27#include "libguile/feature.h"
28#include "libguile/i18n.h"
29#include "libguile/strings.h"
b89c4943 30#include "libguile/chars.h"
9361f762 31#include "libguile/dynwind.h"
b89c4943
LC
32#include "libguile/validate.h"
33#include "libguile/values.h"
a2f00b9b 34#include "libguile/threads.h"
9361f762 35
9361f762 36#include <locale.h>
b89c4943
LC
37#include <string.h> /* `strcoll ()' */
38#include <ctype.h> /* `toupper ()' et al. */
39#include <errno.h>
5b878445
MG
40#include <unicase.h>
41#include <unistr.h>
9361f762 42
b89c4943 43#if (defined HAVE_NEWLOCALE) && (defined HAVE_STRCOLL_L)
a2f00b9b
LC
44/* The GNU thread-aware locale API is documented in ``Thread-Aware Locale
45 Model, a Proposal'', by Ulrich Drepper:
46
47 http://people.redhat.com/drepper/tllocale.ps.gz
48
49 It is now also implemented by Darwin:
50
51 http://developer.apple.com/documentation/Darwin/Reference/ManPages/man3/newlocale.3.html
52
83fda470
LC
53 The whole API was eventually standardized in the ``Open Group Base
54 Specifications Issue 7'' (aka. "POSIX 2008"):
55
56 http://www.opengroup.org/onlinepubs/9699919799/basedefs/locale.h.html */
b89c4943 57# define USE_GNU_LOCALE_API
9361f762 58#endif
b89c4943 59
a2f00b9b
LC
60#if (defined USE_GNU_LOCALE_API) && (defined HAVE_XLOCALE_H)
61# include <xlocale.h>
62#endif
63
64#include "libguile/posix.h" /* for `scm_i_locale_mutex' */
65
66#if (defined HAVE_LANGINFO_H) && (defined HAVE_NL_TYPES_H)
67# include <langinfo.h>
68# include <nl_types.h>
9361f762 69#endif
b89c4943
LC
70
71#ifndef HAVE_SETLOCALE
72static inline char *
73setlocale (int category, const char *name)
74{
75 errno = ENOSYS;
76 return NULL;
77}
9361f762 78#endif
b89c4943 79
a2f00b9b
LC
80/* Helper stringification macro. */
81#define SCM_I18N_STRINGIFY(_name) # _name
82
b89c4943
LC
83
84\f
85/* Locale objects, string and character collation, and other locale-dependent
86 string operations.
87
88 A large part of the code here deals with emulating glibc's reentrant
89 locale API on non-GNU systems. The emulation is a bit "brute-force":
90 Whenever a `-locale<?' procedure is passed a locale object, then:
91
a2f00b9b 92 1. The `scm_i_locale_mutex' is locked.
b89c4943 93 2. A series of `setlocale ()' call is performed to store the current
a2f00b9b 94 locale for each category in an `scm_t_locale' object.
b89c4943
LC
95 3. A series of `setlocale ()' call is made to install each of the locale
96 categories of each of the base locales of each locale object,
97 recursively, starting from the last locale object of the chain.
98 4. The settings captured in step (2) are restored.
a2f00b9b 99 5. The `scm_i_locale_mutex' is released.
b89c4943 100
a2f00b9b 101 Hopefully, the X/Open standard will eventually make this hack useless.
b89c4943
LC
102
103 Note: We don't wrap glibc's `uselocale ()' call because it sets the locale
104 of the current _thread_ (unlike `setlocale ()') and doing so would require
105 maintaining per-thread locale information on non-GNU systems and always
106 re-installing this locale upon locale-dependent calls. */
107
108
a2f00b9b
LC
109/* Return the category mask corresponding to CAT. */
110#define SCM_LOCALE_CATEGORY_MASK(_cat) LC_ ## _cat ## _MASK
111
112
b89c4943
LC
113#ifndef USE_GNU_LOCALE_API
114
a2f00b9b
LC
115/* Provide the locale category masks as found in glibc. This must be kept in
116 sync with `locale-categories.h'. */
b89c4943 117
a2f00b9b
LC
118# define LC_CTYPE_MASK 1
119# define LC_COLLATE_MASK 2
120# define LC_MESSAGES_MASK 4
121# define LC_MONETARY_MASK 8
122# define LC_NUMERIC_MASK 16
123# define LC_TIME_MASK 32
b89c4943
LC
124
125# ifdef LC_PAPER
a2f00b9b 126# define LC_PAPER_MASK 64
b89c4943
LC
127# else
128# define LC_PAPER_MASK 0
129# endif
130# ifdef LC_NAME
a2f00b9b 131# define LC_NAME_MASK 128
b89c4943
LC
132# else
133# define LC_NAME_MASK 0
134# endif
135# ifdef LC_ADDRESS
a2f00b9b 136# define LC_ADDRESS_MASK 256
b89c4943
LC
137# else
138# define LC_ADDRESS_MASK 0
139# endif
140# ifdef LC_TELEPHONE
a2f00b9b 141# define LC_TELEPHONE_MASK 512
b89c4943
LC
142# else
143# define LC_TELEPHONE_MASK 0
144# endif
145# ifdef LC_MEASUREMENT
a2f00b9b 146# define LC_MEASUREMENT_MASK 1024
b89c4943
LC
147# else
148# define LC_MEASUREMENT_MASK 0
149# endif
150# ifdef LC_IDENTIFICATION
a2f00b9b 151# define LC_IDENTIFICATION_MASK 2048
b89c4943
LC
152# else
153# define LC_IDENTIFICATION_MASK 0
154# endif
155
a2f00b9b
LC
156# define LC_ALL_MASK (LC_CTYPE_MASK \
157 | LC_NUMERIC_MASK \
158 | LC_TIME_MASK \
159 | LC_COLLATE_MASK \
160 | LC_MONETARY_MASK \
161 | LC_MESSAGES_MASK \
162 | LC_PAPER_MASK \
163 | LC_NAME_MASK \
164 | LC_ADDRESS_MASK \
165 | LC_TELEPHONE_MASK \
166 | LC_MEASUREMENT_MASK \
167 | LC_IDENTIFICATION_MASK \
b89c4943
LC
168 )
169
170/* Locale objects as returned by `make-locale' on non-GNU systems. */
171typedef struct scm_locale
172{
173 SCM base_locale; /* a `locale' object */
174 char *locale_name;
175 int category_mask;
176} *scm_t_locale;
177
a2f00b9b
LC
178
179/* Free the resources used by LOCALE. */
180static inline void
181scm_i_locale_free (scm_t_locale locale)
182{
183 free (locale->locale_name);
184 locale->locale_name = NULL;
185}
186
187#else /* USE_GNU_LOCALE_API */
b89c4943
LC
188
189/* Alias for glibc's locale type. */
190typedef locale_t scm_t_locale;
191
a2f00b9b
LC
192#define scm_i_locale_free freelocale
193
194#endif /* USE_GNU_LOCALE_API */
195
196
197/* A locale object denoting the global locale. */
198SCM_GLOBAL_VARIABLE (scm_global_locale, "%global-locale");
199
b89c4943
LC
200
201/* Validate parameter ARG as a locale object and set C_LOCALE to the
202 corresponding C locale object. */
203#define SCM_VALIDATE_LOCALE_COPY(_pos, _arg, _c_locale) \
204 do \
205 { \
206 SCM_VALIDATE_SMOB ((_pos), (_arg), locale_smob_type); \
207 (_c_locale) = (scm_t_locale)SCM_SMOB_DATA (_arg); \
208 } \
209 while (0)
210
211/* Validate optional parameter ARG as either undefined or bound to a locale
212 object. Set C_LOCALE to the corresponding C locale object or NULL. */
213#define SCM_VALIDATE_OPTIONAL_LOCALE_COPY(_pos, _arg, _c_locale) \
214 do \
215 { \
216 if ((_arg) != SCM_UNDEFINED) \
217 SCM_VALIDATE_LOCALE_COPY (_pos, _arg, _c_locale); \
218 else \
219 (_c_locale) = NULL; \
220 } \
221 while (0)
222
223
224SCM_SMOB (scm_tc16_locale_smob_type, "locale", 0);
225
226SCM_SMOB_FREE (scm_tc16_locale_smob_type, smob_locale_free, locale)
227{
228 scm_t_locale c_locale;
229
a2f00b9b
LC
230 c_locale = (scm_t_locale) SCM_SMOB_DATA (locale);
231 scm_i_locale_free (c_locale);
b89c4943
LC
232
233 return 0;
234}
235
9361f762 236
a2f00b9b 237static void inline scm_locale_error (const char *, int) SCM_NORETURN;
b89c4943 238
a2f00b9b
LC
239/* Throw an exception corresponding to error ERR. */
240static void inline
241scm_locale_error (const char *func_name, int err)
242{
243 scm_syserror_msg (func_name,
244 "Failed to install locale",
245 SCM_EOL, err);
b89c4943 246}
b89c4943
LC
247
248
249\f
a2f00b9b
LC
250/* Emulating GNU's reentrant locale API. */
251#ifndef USE_GNU_LOCALE_API
b89c4943
LC
252
253
254/* Maximum number of chained locales (via `base_locale'). */
255#define LOCALE_STACK_SIZE_MAX 256
256
257typedef struct
258{
259#define SCM_DEFINE_LOCALE_CATEGORY(_name) char * _name;
260#include "locale-categories.h"
261#undef SCM_DEFINE_LOCALE_CATEGORY
262} scm_t_locale_settings;
263
264/* Fill out SETTINGS according to the current locale settings. On success
265 zero is returned and SETTINGS is properly initialized. */
266static int
267get_current_locale_settings (scm_t_locale_settings *settings)
268{
269 const char *locale_name;
270
271#define SCM_DEFINE_LOCALE_CATEGORY(_name) \
272 { \
273 SCM_SYSCALL (locale_name = setlocale (LC_ ## _name, NULL)); \
a2f00b9b 274 if (locale_name == NULL) \
b89c4943
LC
275 goto handle_error; \
276 \
277 settings-> _name = strdup (locale_name); \
278 if (settings-> _name == NULL) \
279 goto handle_oom; \
280 }
281
282#include "locale-categories.h"
283#undef SCM_DEFINE_LOCALE_CATEGORY
284
285 return 0;
286
287 handle_error:
a2f00b9b 288 return EINVAL;
9361f762 289
b89c4943
LC
290 handle_oom:
291 return ENOMEM;
292}
293
294/* Restore locale settings SETTINGS. On success, return zero. */
295static int
296restore_locale_settings (const scm_t_locale_settings *settings)
297{
298 const char *result;
299
300#define SCM_DEFINE_LOCALE_CATEGORY(_name) \
301 SCM_SYSCALL (result = setlocale (LC_ ## _name, settings-> _name)); \
302 if (result == NULL) \
303 goto handle_error;
304
305#include "locale-categories.h"
306#undef SCM_DEFINE_LOCALE_CATEGORY
307
308 return 0;
309
310 handle_error:
a2f00b9b 311 return EINVAL;
b89c4943
LC
312}
313
314/* Free memory associated with SETTINGS. */
315static void
316free_locale_settings (scm_t_locale_settings *settings)
317{
318#define SCM_DEFINE_LOCALE_CATEGORY(_name) \
319 free (settings-> _name); \
320 settings->_name = NULL;
321#include "locale-categories.h"
322#undef SCM_DEFINE_LOCALE_CATEGORY
323}
9361f762 324
b89c4943
LC
325/* Install the locale named LOCALE_NAME for all the categories listed in
326 CATEGORY_MASK. */
327static int
328install_locale_categories (const char *locale_name, int category_mask)
329{
330 const char *result;
9361f762 331
b89c4943 332 if (category_mask == LC_ALL_MASK)
9361f762 333 {
b89c4943
LC
334 SCM_SYSCALL (result = setlocale (LC_ALL, locale_name));
335 if (result == NULL)
336 goto handle_error;
9361f762
MV
337 }
338 else
339 {
b89c4943 340#define SCM_DEFINE_LOCALE_CATEGORY(_name) \
a2f00b9b 341 if (category_mask & SCM_LOCALE_CATEGORY_MASK (_name)) \
b89c4943
LC
342 { \
343 SCM_SYSCALL (result = setlocale (LC_ ## _name, locale_name)); \
344 if (result == NULL) \
345 goto handle_error; \
346 }
347#include "locale-categories.h"
348#undef SCM_DEFINE_LOCALE_CATEGORY
349 }
9361f762 350
b89c4943 351 return 0;
9361f762 352
b89c4943 353 handle_error:
a2f00b9b 354 return EINVAL;
b89c4943
LC
355}
356
357/* Install LOCALE, recursively installing its base locales first. On
358 success, zero is returned. */
359static int
360install_locale (scm_t_locale locale)
361{
362 scm_t_locale stack[LOCALE_STACK_SIZE_MAX];
a2f00b9b 363 int category_mask = 0;
b89c4943
LC
364 size_t stack_size = 0;
365 int stack_offset = 0;
366 const char *result = NULL;
367
368 /* Build up a locale stack by traversing the `base_locale' link. */
369 do
370 {
371 if (stack_size >= LOCALE_STACK_SIZE_MAX)
372 /* We cannot use `scm_error ()' here because otherwise the locale
373 mutex may remain locked. */
374 return EINVAL;
375
376 stack[stack_size++] = locale;
377
a2f00b9b
LC
378 /* Keep track of which categories have already been taken into
379 account. */
380 category_mask |= locale->category_mask;
381
b89c4943 382 if (locale->base_locale != SCM_UNDEFINED)
a2f00b9b 383 locale = (scm_t_locale) SCM_SMOB_DATA (locale->base_locale);
9361f762 384 else
b89c4943
LC
385 locale = NULL;
386 }
a2f00b9b 387 while ((locale != NULL) && (category_mask != LC_ALL_MASK));
9361f762 388
b89c4943
LC
389 /* Install the C locale to start from a pristine state. */
390 SCM_SYSCALL (result = setlocale (LC_ALL, "C"));
391 if (result == NULL)
392 goto handle_error;
393
394 /* Install the locales in reverse order. */
395 for (stack_offset = stack_size - 1;
396 stack_offset >= 0;
397 stack_offset--)
398 {
399 int err;
400 scm_t_locale locale;
401
402 locale = stack[stack_offset];
403 err = install_locale_categories (locale->locale_name,
404 locale->category_mask);
405 if (err)
406 goto handle_error;
9361f762
MV
407 }
408
b89c4943 409 return 0;
9361f762 410
b89c4943 411 handle_error:
a2f00b9b 412 return EINVAL;
9361f762 413}
9361f762 414
b89c4943
LC
415/* Leave the locked locale section. */
416static inline void
417leave_locale_section (const scm_t_locale_settings *settings)
418{
419 /* Restore the previous locale settings. */
420 (void)restore_locale_settings (settings);
421
422 scm_i_pthread_mutex_unlock (&scm_i_locale_mutex);
423}
9361f762 424
b89c4943
LC
425/* Enter a locked locale section. */
426static inline int
427enter_locale_section (scm_t_locale locale,
428 scm_t_locale_settings *prev_locale)
9361f762 429{
b89c4943
LC
430 int err;
431
432 scm_i_pthread_mutex_lock (&scm_i_locale_mutex);
433
434 err = get_current_locale_settings (prev_locale);
435 if (err)
436 {
437 scm_i_pthread_mutex_unlock (&scm_i_locale_mutex);
438 return err;
439 }
9361f762 440
b89c4943
LC
441 err = install_locale (locale);
442 if (err)
443 {
444 leave_locale_section (prev_locale);
445 free_locale_settings (prev_locale);
446 }
447
448 return err;
449}
450
b89c4943
LC
451/* Convenient macro to run STATEMENT in the locale context of C_LOCALE. */
452#define RUN_IN_LOCALE_SECTION(_c_locale, _statement) \
453 do \
454 { \
455 int lsec_err; \
456 scm_t_locale_settings lsec_prev_locale; \
457 \
458 lsec_err = enter_locale_section ((_c_locale), &lsec_prev_locale); \
459 if (lsec_err) \
460 scm_locale_error (FUNC_NAME, lsec_err); \
461 else \
462 { \
463 _statement ; \
464 \
465 leave_locale_section (&lsec_prev_locale); \
466 free_locale_settings (&lsec_prev_locale); \
467 } \
468 } \
469 while (0)
470
a2f00b9b
LC
471/* Convert the current locale settings into a locale SMOB. On success, zero
472 is returned and RESULT points to the new SMOB. Otherwise, an error is
473 returned. */
474static int
475get_current_locale (SCM *result)
476{
477 int err = 0;
478 scm_t_locale c_locale;
479 const char *current_locale;
480
481 c_locale = scm_gc_malloc (sizeof (* c_locale), "locale");
482
483
484 scm_i_pthread_mutex_lock (&scm_i_locale_mutex);
485
486 c_locale->category_mask = LC_ALL_MASK;
487 c_locale->base_locale = SCM_UNDEFINED;
488
489 current_locale = setlocale (LC_ALL, NULL);
490 if (current_locale != NULL)
491 {
492 c_locale->locale_name = strdup (current_locale);
493 if (c_locale->locale_name == NULL)
494 err = ENOMEM;
495 }
496 else
497 err = EINVAL;
498
499 scm_i_pthread_mutex_unlock (&scm_i_locale_mutex);
500
501 if (err)
502 scm_gc_free (c_locale, sizeof (* c_locale), "locale");
503 else
504 SCM_NEWSMOB (*result, scm_tc16_locale_smob_type, c_locale);
505
506 return err;
507}
508
5b878445 509#else /* USE_GNU_LOCALE_API */
a2f00b9b 510
5b878445 511/* Convenient macro to run STATEMENT in the locale context of C_LOCALE. */
12f0c3e5
LC
512#define RUN_IN_LOCALE_SECTION(_c_locale, _statement) \
513 do \
514 { \
515 scm_t_locale old_loc; \
516 \
517 old_loc = uselocale (_c_locale); \
518 _statement ; \
519 uselocale (old_loc); \
520 } \
5b878445
MG
521 while (0)
522
523
524#endif /* USE_GNU_LOCALE_API */
9361f762 525
a2f00b9b
LC
526
527\f
528/* `make-locale' can take either category lists or single categories (the
529 `LC_*' integer constants). */
530#define SCM_LIST_OR_INTEGER_P(arg) \
531 (scm_is_integer (arg) || scm_is_true (scm_list_p (arg)))
532
533
534/* Return the category mask corresponding to CATEGORY (an `LC_' integer
535 constant). */
536static inline int
537category_to_category_mask (SCM category,
538 const char *func_name, int pos)
539{
540 int c_category;
541 int c_category_mask;
542
543 c_category = scm_to_int (category);
544
545#define SCM_DEFINE_LOCALE_CATEGORY(_name) \
546 case LC_ ## _name: \
547 c_category_mask = SCM_LOCALE_CATEGORY_MASK (_name); \
548 break;
549
550 switch (c_category)
551 {
552#include "locale-categories.h"
553
554 case LC_ALL:
555 c_category_mask = LC_ALL_MASK;
556 break;
557
558 default:
559 scm_wrong_type_arg_msg (func_name, pos, category,
560 "locale category");
561 }
562
563#undef SCM_DEFINE_LOCALE_CATEGORY
564
565 return c_category_mask;
566}
567
568/* Convert CATEGORIES, a list of locale categories or a single category (an
569 integer), into a category mask. */
570static int
571category_list_to_category_mask (SCM categories,
572 const char *func_name, int pos)
573{
574 int c_category_mask = 0;
575
576 if (scm_is_integer (categories))
577 c_category_mask = category_to_category_mask (categories,
578 func_name, pos);
579 else
580 for (; !scm_is_null (categories); categories = SCM_CDR (categories))
581 {
582 SCM category = SCM_CAR (categories);
583
584 c_category_mask |=
585 category_to_category_mask (category, func_name, pos);
586 }
587
588 return c_category_mask;
589}
590
591
592SCM_DEFINE (scm_make_locale, "make-locale", 2, 1, 0,
593 (SCM category_list, SCM locale_name, SCM base_locale),
594 "Return a reference to a data structure representing a set of "
595 "locale datasets. @var{category_list} should be either a list "
596 "of locale categories or a single category as used with "
597 "@code{setlocale} (@pxref{Locales, @code{setlocale}}) and "
598 "@var{locale_name} should be the name of the locale considered "
599 "(e.g., @code{\"sl_SI\"}). Optionally, if @code{base_locale} is "
600 "passed, it should be a locale object denoting settings for "
601 "categories not listed in @var{category_list}.")
602#define FUNC_NAME s_scm_make_locale
603{
604 SCM locale = SCM_BOOL_F;
605 int err = 0;
606 int c_category_mask;
607 char *c_locale_name;
608 scm_t_locale c_base_locale, c_locale;
609
610 SCM_MAKE_VALIDATE (1, category_list, LIST_OR_INTEGER_P);
611 SCM_VALIDATE_STRING (2, locale_name);
612 SCM_VALIDATE_OPTIONAL_LOCALE_COPY (3, base_locale, c_base_locale);
613
614 c_category_mask = category_list_to_category_mask (category_list,
615 FUNC_NAME, 1);
616 c_locale_name = scm_to_locale_string (locale_name);
617
618#ifdef USE_GNU_LOCALE_API
619
620 if (scm_is_eq (base_locale, SCM_VARIABLE_REF (scm_global_locale)))
621 {
622 /* Fetch the current locale and turn in into a `locale_t'. Don't
623 duplicate the resulting `locale_t' because we want it to be consumed
624 by `newlocale ()'. */
625 char *current_locale;
626
627 scm_i_pthread_mutex_lock (&scm_i_locale_mutex);
628
629 current_locale = setlocale (LC_ALL, NULL);
630 c_base_locale = newlocale (LC_ALL_MASK, current_locale, NULL);
631
632 scm_i_pthread_mutex_unlock (&scm_i_locale_mutex);
633
634 if (c_base_locale == (locale_t) 0)
635 scm_locale_error (FUNC_NAME, errno);
636 }
637 else if (c_base_locale != (locale_t) 0)
638 {
639 /* C_BASE_LOCALE is to be consumed by `newlocale ()' so it needs to be
640 duplicated before. */
641 c_base_locale = duplocale (c_base_locale);
642 if (c_base_locale == (locale_t) 0)
643 {
644 err = errno;
645 goto fail;
646 }
647 }
648
649 c_locale = newlocale (c_category_mask, c_locale_name, c_base_locale);
650
651 free (c_locale_name);
652
653 if (c_locale == (locale_t) 0)
654 {
655 if (scm_is_eq (base_locale, SCM_VARIABLE_REF (scm_global_locale)))
656 /* The base locale object was created lazily and must be freed. */
657 freelocale (c_base_locale);
658
659 scm_locale_error (FUNC_NAME, errno);
660 }
661 else
662 SCM_NEWSMOB (locale, scm_tc16_locale_smob_type, c_locale);
663
664#else
665
666 c_locale = scm_gc_malloc (sizeof (* c_locale), "locale");
667
668 c_locale->category_mask = c_category_mask;
669 c_locale->locale_name = c_locale_name;
670
671 if (scm_is_eq (base_locale, SCM_VARIABLE_REF (scm_global_locale)))
672 {
673 /* Get the current locale settings and turn them into a locale
674 object. */
675 err = get_current_locale (&base_locale);
676 if (err)
677 goto fail;
678 }
679
680 c_locale->base_locale = base_locale;
681
682 {
683 /* Try out the new locale and raise an exception if it doesn't work. */
684 int err;
685 scm_t_locale_settings prev_locale;
686
687 err = enter_locale_section (c_locale, &prev_locale);
a2f00b9b
LC
688
689 if (err)
690 goto fail;
691 else
1445e449
LC
692 {
693 leave_locale_section (&prev_locale);
694 SCM_NEWSMOB (locale, scm_tc16_locale_smob_type, c_locale);
695 }
a2f00b9b
LC
696 }
697
698#endif
699
700 return locale;
701
702 fail:
703#ifndef USE_GNU_LOCALE_API
704 scm_gc_free (c_locale, sizeof (* c_locale), "locale");
705#endif
706 free (c_locale_name);
707 scm_locale_error (FUNC_NAME, err);
708
709 return SCM_BOOL_F;
710}
711#undef FUNC_NAME
712
713SCM_DEFINE (scm_locale_p, "locale?", 1, 0, 0,
714 (SCM obj),
715 "Return true if @var{obj} is a locale object.")
716#define FUNC_NAME s_scm_locale_p
717{
718 return scm_from_bool (SCM_SMOB_PREDICATE (scm_tc16_locale_smob_type, obj));
719}
720#undef FUNC_NAME
721
722
b89c4943 723\f
a2f00b9b
LC
724/* Locale-dependent string comparison.
725
726 A similar API can be found in MzScheme starting from version 200:
727 http://download.plt-scheme.org/chronology/mzmr200alpha14.html . */
728
df047aa2
LC
729#define SCM_STRING_TO_U32_BUF(s1, c_s1) \
730 do \
731 { \
732 if (scm_i_is_narrow_string (s1)) \
733 { \
734 size_t i, len; \
735 const char *buf = scm_i_string_chars (s1); \
736 \
737 len = scm_i_string_length (s1); \
738 c_s1 = (scm_t_wchar *) alloca (sizeof (scm_t_wchar) * (len + 1)); \
739 \
740 for (i = 0; i < len; i ++) \
741 c_s1[i] = (unsigned char ) buf[i]; \
742 c_s1[len] = 0; \
743 } \
744 else \
745 c_s1 = (scm_t_wchar *) scm_i_string_wide_chars (s1); \
746 } while (0)
747
748
749/* Compare UTF-32 strings according to LOCALE. Returns a negative value if
750 S1 compares smaller than S2, a positive value if S1 compares larger than
751 S2, or 0 if they compare equal. */
b89c4943 752static inline int
5b878445 753compare_u32_strings (SCM s1, SCM s2, SCM locale, const char *func_name)
b89c4943
LC
754#define FUNC_NAME func_name
755{
756 int result;
757 scm_t_locale c_locale;
5b878445
MG
758 scm_t_wchar *c_s1, *c_s2;
759 SCM_VALIDATE_OPTIONAL_LOCALE_COPY (3, locale, c_locale);
760
761 SCM_STRING_TO_U32_BUF (s1, c_s1);
762 SCM_STRING_TO_U32_BUF (s2, c_s2);
763
764 if (c_locale)
765 RUN_IN_LOCALE_SECTION (c_locale,
766 result = u32_strcoll ((const scm_t_uint32 *) c_s1,
767 (const scm_t_uint32 *) c_s2));
768 else
cdf52ff0
LC
769 result = u32_strcoll ((const scm_t_uint32 *) c_s1,
770 (const scm_t_uint32 *) c_s2);
5b878445 771
5b878445
MG
772 scm_remember_upto_here_2 (s1, s2);
773 scm_remember_upto_here (locale);
774 return result;
775}
776#undef FUNC_NAME
777
778static inline int
779u32_locale_casecoll (const char *func_name, const scm_t_uint32 *c_s1,
c543e41e
LC
780 const scm_t_uint32 *c_s2,
781 int *result)
5b878445 782{
c543e41e
LC
783 /* Note: Since this is called from `RUN_IN_LOCALE_SECTION', it must note
784 make any non-local exit. */
5b878445 785
c543e41e
LC
786 int ret;
787 const char *loc = uc_locale_language ();
5b878445
MG
788
789 ret = u32_casecoll (c_s1, u32_strlen (c_s1),
790 c_s2, u32_strlen (c_s2),
c543e41e 791 loc, UNINORM_NFC, result);
5b878445 792
c543e41e 793 return ret == 0 ? ret : errno;
5b878445
MG
794}
795
796static inline int
797compare_u32_strings_ci (SCM s1, SCM s2, SCM locale, const char *func_name)
798#define FUNC_NAME func_name
799{
c543e41e 800 int result, ret = 0;
5b878445
MG
801 scm_t_locale c_locale;
802 scm_t_wchar *c_s1, *c_s2;
b89c4943 803 SCM_VALIDATE_OPTIONAL_LOCALE_COPY (3, locale, c_locale);
9361f762 804
5b878445
MG
805 SCM_STRING_TO_U32_BUF (s1, c_s1);
806 SCM_STRING_TO_U32_BUF (s2, c_s2);
807
b89c4943 808 if (c_locale)
cdf52ff0
LC
809 RUN_IN_LOCALE_SECTION
810 (c_locale,
c543e41e 811 ret = u32_locale_casecoll (func_name,
cdf52ff0 812 (const scm_t_uint32 *) c_s1,
c543e41e
LC
813 (const scm_t_uint32 *) c_s2,
814 &result));
815 else
816 ret = u32_locale_casecoll (func_name,
817 (const scm_t_uint32 *) c_s1,
818 (const scm_t_uint32 *) c_s2,
819 &result);
820
821 if (SCM_UNLIKELY (ret != 0))
822 {
823 errno = ret;
824 scm_syserror (FUNC_NAME);
825 }
b89c4943 826
5b878445
MG
827 scm_remember_upto_here_2 (s1, s2);
828 scm_remember_upto_here (locale);
b89c4943
LC
829
830 return result;
831}
832#undef FUNC_NAME
833
834/* Store into DST an upper-case version of SRC. */
835static inline void
836str_upcase (register char *dst, register const char *src)
837{
838 for (; *src != '\0'; src++, dst++)
4e641322 839 *dst = toupper ((int) *src);
b89c4943
LC
840 *dst = '\0';
841}
842
843static inline void
844str_downcase (register char *dst, register const char *src)
845{
846 for (; *src != '\0'; src++, dst++)
4e641322 847 *dst = tolower ((int) *src);
b89c4943
LC
848 *dst = '\0';
849}
850
851#ifdef USE_GNU_LOCALE_API
852static inline void
853str_upcase_l (register char *dst, register const char *src,
854 scm_t_locale locale)
855{
856 for (; *src != '\0'; src++, dst++)
857 *dst = toupper_l (*src, locale);
858 *dst = '\0';
859}
860
861static inline void
862str_downcase_l (register char *dst, register const char *src,
863 scm_t_locale locale)
864{
865 for (; *src != '\0'; src++, dst++)
866 *dst = tolower_l (*src, locale);
867 *dst = '\0';
868}
869#endif
870
871
b89c4943
LC
872SCM_DEFINE (scm_string_locale_lt, "string-locale<?", 2, 1, 0,
873 (SCM s1, SCM s2, SCM locale),
874 "Compare strings @var{s1} and @var{s2} in a locale-dependent way."
875 "If @var{locale} is provided, it should be locale object (as "
876 "returned by @code{make-locale}) and will be used to perform the "
877 "comparison; otherwise, the current system locale is used.")
878#define FUNC_NAME s_scm_string_locale_lt
9361f762 879{
b89c4943 880 int result;
9361f762 881
b89c4943
LC
882 SCM_VALIDATE_STRING (1, s1);
883 SCM_VALIDATE_STRING (2, s2);
9361f762 884
5b878445 885 result = compare_u32_strings (s1, s2, locale, FUNC_NAME);
b89c4943
LC
886
887 return scm_from_bool (result < 0);
888}
889#undef FUNC_NAME
890
891SCM_DEFINE (scm_string_locale_gt, "string-locale>?", 2, 1, 0,
892 (SCM s1, SCM s2, SCM locale),
893 "Compare strings @var{s1} and @var{s2} in a locale-dependent way."
894 "If @var{locale} is provided, it should be locale object (as "
895 "returned by @code{make-locale}) and will be used to perform the "
896 "comparison; otherwise, the current system locale is used.")
897#define FUNC_NAME s_scm_string_locale_gt
898{
899 int result;
b89c4943
LC
900
901 SCM_VALIDATE_STRING (1, s1);
902 SCM_VALIDATE_STRING (2, s2);
903
5b878445 904 result = compare_u32_strings (s1, s2, locale, FUNC_NAME);
b89c4943
LC
905
906 return scm_from_bool (result > 0);
907}
908#undef FUNC_NAME
909
910SCM_DEFINE (scm_string_locale_ci_lt, "string-locale-ci<?", 2, 1, 0,
911 (SCM s1, SCM s2, SCM locale),
912 "Compare strings @var{s1} and @var{s2} in a case-insensitive, "
913 "and locale-dependent way. If @var{locale} is provided, it "
914 "should be locale object (as returned by @code{make-locale}) "
915 "and will be used to perform the comparison; otherwise, the "
916 "current system locale is used.")
917#define FUNC_NAME s_scm_string_locale_ci_lt
918{
919 int result;
b89c4943
LC
920
921 SCM_VALIDATE_STRING (1, s1);
922 SCM_VALIDATE_STRING (2, s2);
923
5b878445 924 result = compare_u32_strings_ci (s1, s2, locale, FUNC_NAME);
b89c4943
LC
925
926 return scm_from_bool (result < 0);
927}
928#undef FUNC_NAME
929
930SCM_DEFINE (scm_string_locale_ci_gt, "string-locale-ci>?", 2, 1, 0,
931 (SCM s1, SCM s2, SCM locale),
932 "Compare strings @var{s1} and @var{s2} in a case-insensitive, "
933 "and locale-dependent way. If @var{locale} is provided, it "
934 "should be locale object (as returned by @code{make-locale}) "
935 "and will be used to perform the comparison; otherwise, the "
936 "current system locale is used.")
937#define FUNC_NAME s_scm_string_locale_ci_gt
938{
939 int result;
b89c4943
LC
940
941 SCM_VALIDATE_STRING (1, s1);
942 SCM_VALIDATE_STRING (2, s2);
943
5b878445 944 result = compare_u32_strings_ci (s1, s2, locale, FUNC_NAME);
b89c4943
LC
945
946 return scm_from_bool (result > 0);
947}
948#undef FUNC_NAME
949
950SCM_DEFINE (scm_string_locale_ci_eq, "string-locale-ci=?", 2, 1, 0,
951 (SCM s1, SCM s2, SCM locale),
952 "Compare strings @var{s1} and @var{s2} in a case-insensitive, "
953 "and locale-dependent way. If @var{locale} is provided, it "
954 "should be locale object (as returned by @code{make-locale}) "
955 "and will be used to perform the comparison; otherwise, the "
956 "current system locale is used.")
957#define FUNC_NAME s_scm_string_locale_ci_eq
958{
959 int result;
b89c4943
LC
960
961 SCM_VALIDATE_STRING (1, s1);
962 SCM_VALIDATE_STRING (2, s2);
963
5b878445 964 result = compare_u32_strings_ci (s1, s2, locale, FUNC_NAME);
b89c4943
LC
965
966 return scm_from_bool (result == 0);
967}
968#undef FUNC_NAME
969
970
971SCM_DEFINE (scm_char_locale_lt, "char-locale<?", 2, 1, 0,
972 (SCM c1, SCM c2, SCM locale),
973 "Return true if character @var{c1} is lower than @var{c2} "
974 "according to @var{locale} or to the current locale.")
975#define FUNC_NAME s_scm_char_locale_lt
976{
5b878445 977 int result;
b89c4943
LC
978
979 SCM_VALIDATE_CHAR (1, c1);
980 SCM_VALIDATE_CHAR (2, c2);
981
5b878445
MG
982 result = compare_u32_strings (scm_string (scm_list_1 (c1)),
983 scm_string (scm_list_1 (c2)),
984 locale, FUNC_NAME);
b89c4943 985
5b878445 986 return scm_from_bool (result < 0);
b89c4943
LC
987}
988#undef FUNC_NAME
989
990SCM_DEFINE (scm_char_locale_gt, "char-locale>?", 2, 1, 0,
991 (SCM c1, SCM c2, SCM locale),
992 "Return true if character @var{c1} is greater than @var{c2} "
993 "according to @var{locale} or to the current locale.")
994#define FUNC_NAME s_scm_char_locale_gt
995{
5b878445 996 int result;
b89c4943
LC
997
998 SCM_VALIDATE_CHAR (1, c1);
999 SCM_VALIDATE_CHAR (2, c2);
1000
5b878445
MG
1001 result = compare_u32_strings (scm_string (scm_list_1 (c1)),
1002 scm_string (scm_list_1 (c2)),
1003 locale, FUNC_NAME);
b89c4943 1004
5b878445 1005 return scm_from_bool (result > 0);
b89c4943
LC
1006}
1007#undef FUNC_NAME
1008
1009SCM_DEFINE (scm_char_locale_ci_lt, "char-locale-ci<?", 2, 1, 0,
1010 (SCM c1, SCM c2, SCM locale),
1011 "Return true if character @var{c1} is lower than @var{c2}, "
1012 "in a case insensitive way according to @var{locale} or to "
1013 "the current locale.")
1014#define FUNC_NAME s_scm_char_locale_ci_lt
1015{
1016 int result;
b89c4943
LC
1017
1018 SCM_VALIDATE_CHAR (1, c1);
1019 SCM_VALIDATE_CHAR (2, c2);
1020
5b878445
MG
1021 result = compare_u32_strings_ci (scm_string (scm_list_1 (c1)),
1022 scm_string (scm_list_1 (c2)),
1023 locale, FUNC_NAME);
b89c4943
LC
1024
1025 return scm_from_bool (result < 0);
1026}
1027#undef FUNC_NAME
1028
1029SCM_DEFINE (scm_char_locale_ci_gt, "char-locale-ci>?", 2, 1, 0,
1030 (SCM c1, SCM c2, SCM locale),
1031 "Return true if character @var{c1} is greater than @var{c2}, "
1032 "in a case insensitive way according to @var{locale} or to "
1033 "the current locale.")
1034#define FUNC_NAME s_scm_char_locale_ci_gt
1035{
1036 int result;
b89c4943
LC
1037
1038 SCM_VALIDATE_CHAR (1, c1);
1039 SCM_VALIDATE_CHAR (2, c2);
1040
5b878445
MG
1041 result = compare_u32_strings_ci (scm_string (scm_list_1 (c1)),
1042 scm_string (scm_list_1 (c2)),
1043 locale, FUNC_NAME);
b89c4943
LC
1044
1045 return scm_from_bool (result > 0);
1046}
1047#undef FUNC_NAME
1048
1049SCM_DEFINE (scm_char_locale_ci_eq, "char-locale-ci=?", 2, 1, 0,
1050 (SCM c1, SCM c2, SCM locale),
1051 "Return true if character @var{c1} is equal to @var{c2}, "
1052 "in a case insensitive way according to @var{locale} or to "
1053 "the current locale.")
1054#define FUNC_NAME s_scm_char_locale_ci_eq
1055{
1056 int result;
b89c4943
LC
1057
1058 SCM_VALIDATE_CHAR (1, c1);
1059 SCM_VALIDATE_CHAR (2, c2);
1060
5b878445
MG
1061 result = compare_u32_strings_ci (scm_string (scm_list_1 (c1)),
1062 scm_string (scm_list_1 (c2)),
1063 locale, FUNC_NAME);
b89c4943
LC
1064
1065 return scm_from_bool (result == 0);
1066}
1067#undef FUNC_NAME
1068
1069
1070\f
1071/* Locale-dependent alphabetic character mapping. */
1072
27f3413e
MG
1073static inline int
1074u32_locale_tocase (const scm_t_uint32 *c_s1, size_t len,
1075 scm_t_uint32 **p_c_s2, size_t * p_len2,
1076 scm_t_uint32 *(*func) (const scm_t_uint32 *, size_t,
1077 const char *, uninorm_t,
1078 scm_t_uint32 *, size_t *))
1079{
1080 /* Note: Since this is called from `RUN_IN_LOCALE_SECTION', it must not
1081 make any non-local exit. */
1082
1083 scm_t_uint32 *ret;
1084 const char *loc = uc_locale_language ();
1085
1086 /* The first NULL here indicates that no NFC or NFKC normalization
1087 is done. The second NULL means the return buffer is
1088 malloc'ed here. */
1089 ret = func (c_s1, len, loc, NULL, NULL, p_len2);
1090
1091 if (ret == NULL)
1092 {
2c48e4d5 1093 *p_c_s2 = (scm_t_uint32 *) NULL;
27f3413e
MG
1094 *p_len2 = 0;
1095 return errno;
1096 }
1097 *p_c_s2 = ret;
1098
1099 return 0;
1100}
1101
1102
1103
b89c4943
LC
1104SCM_DEFINE (scm_char_locale_downcase, "char-locale-downcase", 1, 1, 0,
1105 (SCM chr, SCM locale),
1106 "Return the lowercase character that corresponds to @var{chr} "
1107 "according to either @var{locale} or the current locale.")
1108#define FUNC_NAME s_scm_char_locale_downcase
1109{
27f3413e 1110 int ret;
b89c4943 1111 scm_t_locale c_locale;
2c48e4d5
MG
1112 scm_t_wchar *buf;
1113 scm_t_uint32 *downbuf;
27f3413e
MG
1114 size_t downlen;
1115 SCM str, downchar;
b89c4943
LC
1116
1117 SCM_VALIDATE_CHAR (1, chr);
b89c4943
LC
1118 SCM_VALIDATE_OPTIONAL_LOCALE_COPY (2, locale, c_locale);
1119
27f3413e
MG
1120 str = scm_i_make_wide_string (1, &buf);
1121 buf[0] = SCM_CHAR (chr);
1122
b89c4943 1123 if (c_locale != NULL)
27f3413e
MG
1124 RUN_IN_LOCALE_SECTION (c_locale, ret =
1125 u32_locale_tocase ((scm_t_uint32 *) buf, 1,
2c48e4d5 1126 &downbuf,
27f3413e
MG
1127 &downlen, u32_tolower));
1128 else
1129 ret =
2c48e4d5 1130 u32_locale_tocase ((scm_t_uint32 *) buf, 1, &downbuf,
27f3413e
MG
1131 &downlen, u32_tolower);
1132
1133 if (SCM_UNLIKELY (ret != 0))
9361f762 1134 {
27f3413e
MG
1135 errno = ret;
1136 scm_syserror (FUNC_NAME);
9361f762 1137 }
27f3413e
MG
1138
1139 if (downlen == 1)
2c48e4d5 1140 downchar = SCM_MAKE_CHAR ((scm_t_wchar) downbuf[0]);
b89c4943 1141 else
27f3413e
MG
1142 downchar = chr;
1143 free (downbuf);
9361f762 1144
27f3413e 1145 return downchar;
9361f762
MV
1146}
1147#undef FUNC_NAME
1148
b89c4943
LC
1149SCM_DEFINE (scm_char_locale_upcase, "char-locale-upcase", 1, 1, 0,
1150 (SCM chr, SCM locale),
1151 "Return the uppercase character that corresponds to @var{chr} "
1152 "according to either @var{locale} or the current locale.")
1153#define FUNC_NAME s_scm_char_locale_upcase
9361f762 1154{
27f3413e 1155 int ret;
b89c4943 1156 scm_t_locale c_locale;
2c48e4d5
MG
1157 scm_t_wchar *buf;
1158 scm_t_uint32 *upbuf;
27f3413e
MG
1159 size_t uplen;
1160 SCM str, upchar;
b89c4943
LC
1161
1162 SCM_VALIDATE_CHAR (1, chr);
b89c4943 1163 SCM_VALIDATE_OPTIONAL_LOCALE_COPY (2, locale, c_locale);
9361f762 1164
27f3413e
MG
1165 str = scm_i_make_wide_string (1, &buf);
1166 buf[0] = SCM_CHAR (chr);
1167
b89c4943 1168 if (c_locale != NULL)
27f3413e
MG
1169 RUN_IN_LOCALE_SECTION (c_locale, ret =
1170 u32_locale_tocase ((scm_t_uint32 *) buf, 1,
2c48e4d5 1171 &upbuf,
27f3413e
MG
1172 &uplen, u32_toupper));
1173 else
1174 ret =
2c48e4d5 1175 u32_locale_tocase ((scm_t_uint32 *) buf, 1, &upbuf,
27f3413e
MG
1176 &uplen, u32_toupper);
1177
1178 if (SCM_UNLIKELY (ret != 0))
b89c4943 1179 {
27f3413e
MG
1180 errno = ret;
1181 scm_syserror (FUNC_NAME);
b89c4943 1182 }
27f3413e 1183 if (uplen == 1)
2c48e4d5 1184 upchar = SCM_MAKE_CHAR ((scm_t_wchar) upbuf[0]);
9361f762 1185 else
27f3413e
MG
1186 upchar = chr;
1187 free (upbuf);
1188 return upchar;
b89c4943
LC
1189}
1190#undef FUNC_NAME
1191
1192SCM_DEFINE (scm_string_locale_upcase, "string-locale-upcase", 1, 1, 0,
1193 (SCM str, SCM locale),
1194 "Return a new string that is the uppercase version of "
1195 "@var{str} according to either @var{locale} or the current "
1196 "locale.")
1197#define FUNC_NAME s_scm_string_locale_upcase
1198{
2c48e4d5
MG
1199 scm_t_wchar *c_str, *c_buf;
1200 scm_t_uint32 *c_upstr;
27f3413e
MG
1201 size_t len, uplen;
1202 int ret;
b89c4943 1203 scm_t_locale c_locale;
27f3413e 1204 SCM upstr;
b89c4943
LC
1205
1206 SCM_VALIDATE_STRING (1, str);
1207 SCM_VALIDATE_OPTIONAL_LOCALE_COPY (2, locale, c_locale);
27f3413e
MG
1208 len = scm_i_string_length (str);
1209 if (len == 0)
1210 return scm_nullstr;
1211 SCM_STRING_TO_U32_BUF (str, c_str);
b89c4943
LC
1212
1213 if (c_locale)
27f3413e
MG
1214 RUN_IN_LOCALE_SECTION (c_locale, ret =
1215 u32_locale_tocase ((scm_t_uint32 *) c_str, len,
2c48e4d5 1216 &c_upstr,
27f3413e 1217 &uplen, u32_toupper));
b89c4943 1218 else
27f3413e
MG
1219 ret =
1220 u32_locale_tocase ((scm_t_uint32 *) c_str, len,
2c48e4d5 1221 &c_upstr, &uplen, u32_toupper);
b89c4943
LC
1222
1223 scm_remember_upto_here (str);
1224
27f3413e
MG
1225 if (SCM_UNLIKELY (ret != 0))
1226 {
1227 errno = ret;
1228 scm_syserror (FUNC_NAME);
1229 }
1230
1231 upstr = scm_i_make_wide_string (uplen, &c_buf);
1232 memcpy (c_buf, c_upstr, uplen * sizeof (scm_t_wchar));
1233 free (c_upstr);
1234
1235 scm_i_try_narrow_string (upstr);
1236
1237 return upstr;
b89c4943
LC
1238}
1239#undef FUNC_NAME
9361f762 1240
b89c4943
LC
1241SCM_DEFINE (scm_string_locale_downcase, "string-locale-downcase", 1, 1, 0,
1242 (SCM str, SCM locale),
1243 "Return a new string that is the down-case version of "
1244 "@var{str} according to either @var{locale} or the current "
1245 "locale.")
1246#define FUNC_NAME s_scm_string_locale_downcase
1247{
2c48e4d5
MG
1248 scm_t_wchar *c_str, *c_buf;
1249 scm_t_uint32 *c_downstr;
27f3413e
MG
1250 size_t len, downlen;
1251 int ret;
b89c4943 1252 scm_t_locale c_locale;
27f3413e 1253 SCM downstr;
b89c4943
LC
1254
1255 SCM_VALIDATE_STRING (1, str);
1256 SCM_VALIDATE_OPTIONAL_LOCALE_COPY (2, locale, c_locale);
27f3413e
MG
1257 len = scm_i_string_length (str);
1258 if (len == 0)
1259 return scm_nullstr;
1260 SCM_STRING_TO_U32_BUF (str, c_str);
9361f762 1261
b89c4943 1262 if (c_locale)
27f3413e
MG
1263 RUN_IN_LOCALE_SECTION (c_locale, ret =
1264 u32_locale_tocase ((scm_t_uint32 *) c_str, len,
2c48e4d5 1265 &c_downstr,
27f3413e 1266 &downlen, u32_tolower));
9361f762 1267 else
27f3413e
MG
1268 ret =
1269 u32_locale_tocase ((scm_t_uint32 *) c_str, len,
2c48e4d5 1270 &c_downstr, &downlen, u32_tolower);
9361f762 1271
b89c4943
LC
1272 scm_remember_upto_here (str);
1273
27f3413e
MG
1274 if (SCM_UNLIKELY (ret != 0))
1275 {
1276 errno = ret;
1277 scm_syserror (FUNC_NAME);
1278 }
1279
1280 downstr = scm_i_make_wide_string (downlen, &c_buf);
1281 memcpy (c_buf, c_downstr, downlen * sizeof (scm_t_wchar));
1282 free (c_downstr);
1283
1284 scm_i_try_narrow_string (downstr);
1285
1286 return downstr;
9361f762
MV
1287}
1288#undef FUNC_NAME
1289
b89c4943
LC
1290/* Note: We don't provide mutative versions of `string-locale-(up|down)case'
1291 because, in some languages, a single downcase character maps to a couple
1292 of uppercase characters. Read the SRFI-13 document for a detailed
1293 discussion about this. */
1294
1295
1296\f
1297/* Locale-dependent number parsing. */
1298
1299SCM_DEFINE (scm_locale_string_to_integer, "locale-string->integer",
1300 1, 2, 0, (SCM str, SCM base, SCM locale),
1301 "Convert string @var{str} into an integer according to either "
1302 "@var{locale} (a locale object as returned by @code{make-locale}) "
1303 "or the current process locale. Return two values: an integer "
1304 "(on success) or @code{#f}, and the number of characters read "
1305 "from @var{str} (@code{0} on failure).")
1306#define FUNC_NAME s_scm_locale_string_to_integer
9361f762 1307{
9361f762 1308 SCM result;
b89c4943
LC
1309 long c_result;
1310 int c_base;
1311 const char *c_str;
1312 char *c_endptr;
1313 scm_t_locale c_locale;
9361f762 1314
b89c4943
LC
1315 SCM_VALIDATE_STRING (1, str);
1316 c_str = scm_i_string_chars (str);
9361f762 1317
b89c4943
LC
1318 if (base != SCM_UNDEFINED)
1319 SCM_VALIDATE_INT_COPY (2, base, c_base);
9361f762 1320 else
b89c4943
LC
1321 c_base = 10;
1322
1323 SCM_VALIDATE_OPTIONAL_LOCALE_COPY (3, locale, c_locale);
1324
1325 if (c_locale != NULL)
9361f762 1326 {
b89c4943
LC
1327#ifdef USE_GNU_LOCALE_API
1328 c_result = strtol_l (c_str, &c_endptr, c_base, c_locale);
1329#else
1330 RUN_IN_LOCALE_SECTION (c_locale,
1331 c_result = strtol (c_str, &c_endptr, c_base));
1332#endif
9361f762 1333 }
b89c4943
LC
1334 else
1335 c_result = strtol (c_str, &c_endptr, c_base);
1336
1337 scm_remember_upto_here (str);
1338
1339 if (c_endptr == c_str)
1340 result = SCM_BOOL_F;
1341 else
1342 result = scm_from_long (c_result);
1343
1344 return (scm_values (scm_list_2 (result, scm_from_long (c_endptr - c_str))));
1345}
1346#undef FUNC_NAME
1347
1348SCM_DEFINE (scm_locale_string_to_inexact, "locale-string->inexact",
1349 1, 1, 0, (SCM str, SCM locale),
1350 "Convert string @var{str} into an inexact number according to "
1351 "either @var{locale} (a locale object as returned by "
1352 "@code{make-locale}) or the current process locale. Return "
1353 "two values: an inexact number (on success) or @code{#f}, and "
1354 "the number of characters read from @var{str} (@code{0} on "
1355 "failure).")
1356#define FUNC_NAME s_scm_locale_string_to_inexact
1357{
1358 SCM result;
1359 double c_result;
1360 const char *c_str;
1361 char *c_endptr;
1362 scm_t_locale c_locale;
9361f762 1363
b89c4943
LC
1364 SCM_VALIDATE_STRING (1, str);
1365 c_str = scm_i_string_chars (str);
9361f762 1366
b89c4943 1367 SCM_VALIDATE_OPTIONAL_LOCALE_COPY (2, locale, c_locale);
9361f762 1368
b89c4943
LC
1369 if (c_locale != NULL)
1370 {
1371#ifdef USE_GNU_LOCALE_API
1372 c_result = strtod_l (c_str, &c_endptr, c_locale);
1373#else
1374 RUN_IN_LOCALE_SECTION (c_locale,
1375 c_result = strtod (c_str, &c_endptr));
1376#endif
1377 }
9361f762 1378 else
b89c4943
LC
1379 c_result = strtod (c_str, &c_endptr);
1380
1381 scm_remember_upto_here (str);
1382
1383 if (c_endptr == c_str)
9361f762 1384 result = SCM_BOOL_F;
b89c4943
LC
1385 else
1386 result = scm_from_double (c_result);
9361f762 1387
b89c4943 1388 return (scm_values (scm_list_2 (result, scm_from_long (c_endptr - c_str))));
9361f762
MV
1389}
1390#undef FUNC_NAME
1391
b89c4943 1392\f
a2f00b9b
LC
1393/* Language information, aka. `nl_langinfo ()'. */
1394
1395/* FIXME: Issues related to `nl-langinfo'.
1396
1397 1. The `CODESET' value is not normalized. This is a secondary issue, but
1398 still a practical issue. See
1399 http://www.cl.cam.ac.uk/~mgk25/ucs/norm_charmap.c for codeset
1400 normalization.
1401
1402 2. `nl_langinfo ()' is not available on Windows.
1403
1404 3. `nl_langinfo ()' may return strings encoded in a locale different from
1405 the current one, thereby defeating `scm_from_locale_string ()'.
1406 Example: support the current locale is "Latin-1" and one asks:
1407
1408 (nl-langinfo DAY_1 (make-locale LC_ALL "eo_EO.UTF-8"))
1409
1410 The result will be a UTF-8 string. However, `scm_from_locale_string',
1411 which expects a Latin-1 string, won't be able to make much sense of the
1412 returned string. Thus, we'd need an `scm_from_string ()' variant where
1413 the locale (or charset) is explicitly passed. */
1414
1415
1416SCM_DEFINE (scm_nl_langinfo, "nl-langinfo", 1, 1, 0,
1417 (SCM item, SCM locale),
1418 "Return a string denoting locale information for @var{item} "
1419 "in the current locale or that specified by @var{locale}. "
1420 "The semantics and arguments are the same as those of the "
1421 "X/Open @code{nl_langinfo} function (@pxref{The Elegant and "
1422 "Fast Way, @code{nl_langinfo},, libc, The GNU C Library "
1423 "Reference Manual}).")
1424#define FUNC_NAME s_scm_nl_langinfo
1425{
1426#ifdef HAVE_NL_LANGINFO
1427 SCM result;
1428 nl_item c_item;
1429 char *c_result;
1430 scm_t_locale c_locale;
1431
1432 SCM_VALIDATE_INT_COPY (2, item, c_item);
1433 SCM_VALIDATE_OPTIONAL_LOCALE_COPY (2, locale, c_locale);
1434
1435 /* Sadly, `nl_langinfo ()' returns a pointer to a static string. According
1436 to SuS v2, that static string may be modified by subsequent calls to
1437 `nl_langinfo ()' as well as by calls to `setlocale ()'. Thus, we must
1438 acquire the locale mutex before doing invoking `nl_langinfo ()'. See
1439 http://opengroup.org/onlinepubs/007908799/xsh/nl_langinfo.html for
1440 details. */
1441
1442 scm_i_pthread_mutex_lock (&scm_i_locale_mutex);
1443 if (c_locale != NULL)
1444 {
1445#ifdef USE_GNU_LOCALE_API
1446 c_result = nl_langinfo_l (c_item, c_locale);
1447#else
1448 /* We can't use `RUN_IN_LOCALE_SECTION ()' here because the locale
1449 mutex is already taken. */
1450 int lsec_err;
1451 scm_t_locale_settings lsec_prev_locale;
1452
1453 lsec_err = get_current_locale_settings (&lsec_prev_locale);
1454 if (lsec_err)
1455 scm_i_pthread_mutex_unlock (&scm_i_locale_mutex);
1456 else
1457 {
1458 lsec_err = install_locale (c_locale);
1459 if (lsec_err)
1460 {
1461 leave_locale_section (&lsec_prev_locale);
1462 free_locale_settings (&lsec_prev_locale);
1463 }
1464 }
1465
1466 if (lsec_err)
1467 scm_locale_error (FUNC_NAME, lsec_err);
1468 else
1469 {
1470 c_result = nl_langinfo (c_item);
1471
1445e449 1472 restore_locale_settings (&lsec_prev_locale);
a2f00b9b
LC
1473 free_locale_settings (&lsec_prev_locale);
1474 }
1475#endif
1476 }
1477 else
1478 c_result = nl_langinfo (c_item);
1479
1480 c_result = strdup (c_result);
1481 scm_i_pthread_mutex_unlock (&scm_i_locale_mutex);
1482
1483 if (c_result == NULL)
1484 result = SCM_BOOL_F;
1485 else
1486 {
a2f00b9b
LC
1487 switch (c_item)
1488 {
afb49959 1489#if (defined GROUPING) && (defined MON_GROUPING)
a2f00b9b
LC
1490 case GROUPING:
1491 case MON_GROUPING:
a2f00b9b 1492 {
afb49959 1493 char *p;
a2f00b9b 1494
afb49959
LC
1495 /* In this cases, the result is to be interpreted as a list of
1496 numbers. If the last item is `CHARS_MAX', it has the special
1497 meaning "no more grouping". */
1498 result = SCM_EOL;
1499 for (p = c_result; (*p != '\0') && (*p != CHAR_MAX); p++)
1500 result = scm_cons (SCM_I_MAKINUM ((int) *p), result);
a2f00b9b 1501
afb49959
LC
1502 {
1503 SCM last_pair = result;
a2f00b9b 1504
afb49959
LC
1505 result = scm_reverse_x (result, SCM_EOL);
1506
1507 if (*p != CHAR_MAX)
1508 {
1509 /* Cyclic grouping information. */
1510 if (last_pair != SCM_EOL)
1511 SCM_SETCDR (last_pair, result);
1512 }
1513 }
a2f00b9b 1514
afb49959
LC
1515 free (c_result);
1516 break;
1517 }
1518#endif
1519
1520#if (defined FRAC_DIGITS) && (defined INT_FRAC_DIGITS)
a2f00b9b
LC
1521 case FRAC_DIGITS:
1522 case INT_FRAC_DIGITS:
1523 /* This is to be interpreted as a single integer. */
1524 if (*c_result == CHAR_MAX)
1525 /* Unspecified. */
1526 result = SCM_BOOL_F;
1527 else
1528 result = SCM_I_MAKINUM (*c_result);
1529
1530 free (c_result);
1531 break;
afb49959 1532#endif
a2f00b9b 1533
afb49959 1534#if (defined P_CS_PRECEDES) && (defined INT_N_CS_PRECEDES)
a2f00b9b
LC
1535 case P_CS_PRECEDES:
1536 case N_CS_PRECEDES:
1537 case INT_P_CS_PRECEDES:
1538 case INT_N_CS_PRECEDES:
afb49959 1539#if (defined P_SEP_BY_SPACE) && (defined N_SEP_BY_SPACE)
a2f00b9b
LC
1540 case P_SEP_BY_SPACE:
1541 case N_SEP_BY_SPACE:
afb49959 1542#endif
a2f00b9b
LC
1543 /* This is to be interpreted as a boolean. */
1544 result = scm_from_bool (*c_result);
1545
1546 free (c_result);
1547 break;
afb49959 1548#endif
a2f00b9b 1549
afb49959 1550#if (defined P_SIGN_POSN) && (defined INT_N_SIGN_POSN)
a2f00b9b
LC
1551 case P_SIGN_POSN:
1552 case N_SIGN_POSN:
1553 case INT_P_SIGN_POSN:
1554 case INT_N_SIGN_POSN:
1555 /* See `(libc) Sign of Money Amount' for the interpretation of the
1556 return value here. */
1557 switch (*c_result)
1558 {
1559 case 0:
1560 result = scm_from_locale_symbol ("parenthesize");
1561 break;
1562
1563 case 1:
1564 result = scm_from_locale_symbol ("sign-before");
1565 break;
1566
1567 case 2:
1568 result = scm_from_locale_symbol ("sign-after");
1569 break;
1570
1571 case 3:
1572 result = scm_from_locale_symbol ("sign-before-currency-symbol");
1573 break;
1574
1575 case 4:
1576 result = scm_from_locale_symbol ("sign-after-currency-symbol");
1577 break;
1578
1579 default:
1580 result = scm_from_locale_symbol ("unspecified");
1581 }
1582 break;
afb49959 1583#endif
a2f00b9b
LC
1584
1585 default:
1586 /* FIXME: `locale_string ()' is not appropriate here because of
1587 encoding issues (see comment above). */
1588 result = scm_take_locale_string (c_result);
1589 }
1590 }
1591
1592 return result;
1593#else
1594 scm_syserror_msg (FUNC_NAME, "`nl-langinfo' not supported on your system",
1595 SCM_EOL, ENOSYS);
1596
1597 return SCM_BOOL_F;
1598#endif
1599}
1600#undef FUNC_NAME
1601
1602/* Define the `nl_item' constants. */
1603static inline void
1604define_langinfo_items (void)
9361f762 1605{
a2f00b9b
LC
1606#if (defined HAVE_NL_TYPES_H) && (defined HAVE_LANGINFO_H)
1607
1608#define DEFINE_NLITEM_CONSTANT(_item) \
1609 scm_c_define (# _item, scm_from_int (_item))
1610
1611 DEFINE_NLITEM_CONSTANT (CODESET);
1612
1613 /* Abbreviated days of the week. */
1614 DEFINE_NLITEM_CONSTANT (ABDAY_1);
1615 DEFINE_NLITEM_CONSTANT (ABDAY_2);
1616 DEFINE_NLITEM_CONSTANT (ABDAY_3);
1617 DEFINE_NLITEM_CONSTANT (ABDAY_4);
1618 DEFINE_NLITEM_CONSTANT (ABDAY_5);
1619 DEFINE_NLITEM_CONSTANT (ABDAY_6);
1620 DEFINE_NLITEM_CONSTANT (ABDAY_7);
1621
1622 /* Long-named days of the week. */
1623 DEFINE_NLITEM_CONSTANT (DAY_1); /* Sunday */
1624 DEFINE_NLITEM_CONSTANT (DAY_2); /* Monday */
1625 DEFINE_NLITEM_CONSTANT (DAY_3); /* Tuesday */
1626 DEFINE_NLITEM_CONSTANT (DAY_4); /* Wednesday */
1627 DEFINE_NLITEM_CONSTANT (DAY_5); /* Thursday */
1628 DEFINE_NLITEM_CONSTANT (DAY_6); /* Friday */
1629 DEFINE_NLITEM_CONSTANT (DAY_7); /* Saturday */
1630
1631 /* Abbreviated month names. */
1632 DEFINE_NLITEM_CONSTANT (ABMON_1); /* Jan */
1633 DEFINE_NLITEM_CONSTANT (ABMON_2);
1634 DEFINE_NLITEM_CONSTANT (ABMON_3);
1635 DEFINE_NLITEM_CONSTANT (ABMON_4);
1636 DEFINE_NLITEM_CONSTANT (ABMON_5);
1637 DEFINE_NLITEM_CONSTANT (ABMON_6);
1638 DEFINE_NLITEM_CONSTANT (ABMON_7);
1639 DEFINE_NLITEM_CONSTANT (ABMON_8);
1640 DEFINE_NLITEM_CONSTANT (ABMON_9);
1641 DEFINE_NLITEM_CONSTANT (ABMON_10);
1642 DEFINE_NLITEM_CONSTANT (ABMON_11);
1643 DEFINE_NLITEM_CONSTANT (ABMON_12);
1644
1645 /* Long month names. */
1646 DEFINE_NLITEM_CONSTANT (MON_1); /* January */
1647 DEFINE_NLITEM_CONSTANT (MON_2);
1648 DEFINE_NLITEM_CONSTANT (MON_3);
1649 DEFINE_NLITEM_CONSTANT (MON_4);
1650 DEFINE_NLITEM_CONSTANT (MON_5);
1651 DEFINE_NLITEM_CONSTANT (MON_6);
1652 DEFINE_NLITEM_CONSTANT (MON_7);
1653 DEFINE_NLITEM_CONSTANT (MON_8);
1654 DEFINE_NLITEM_CONSTANT (MON_9);
1655 DEFINE_NLITEM_CONSTANT (MON_10);
1656 DEFINE_NLITEM_CONSTANT (MON_11);
1657 DEFINE_NLITEM_CONSTANT (MON_12);
1658
1659 DEFINE_NLITEM_CONSTANT (AM_STR); /* Ante meridiem string. */
1660 DEFINE_NLITEM_CONSTANT (PM_STR); /* Post meridiem string. */
1661
1662 DEFINE_NLITEM_CONSTANT (D_T_FMT); /* Date and time format for strftime. */
1663 DEFINE_NLITEM_CONSTANT (D_FMT); /* Date format for strftime. */
1664 DEFINE_NLITEM_CONSTANT (T_FMT); /* Time format for strftime. */
1665 DEFINE_NLITEM_CONSTANT (T_FMT_AMPM);/* 12-hour time format for strftime. */
1666
1667 DEFINE_NLITEM_CONSTANT (ERA); /* Alternate era. */
1668 DEFINE_NLITEM_CONSTANT (ERA_D_FMT); /* Date in alternate era format. */
1669 DEFINE_NLITEM_CONSTANT (ERA_D_T_FMT); /* Date and time in alternate era
1670 format. */
1671 DEFINE_NLITEM_CONSTANT (ERA_T_FMT); /* Time in alternate era format. */
1672
1673 DEFINE_NLITEM_CONSTANT (ALT_DIGITS); /* Alternate symbols for digits. */
1674 DEFINE_NLITEM_CONSTANT (RADIXCHAR);
1675 DEFINE_NLITEM_CONSTANT (THOUSEP);
1676
1677#ifdef YESEXPR
1678 DEFINE_NLITEM_CONSTANT (YESEXPR);
1679#endif
1680#ifdef NOEXPR
1681 DEFINE_NLITEM_CONSTANT (NOEXPR);
1682#endif
b89c4943 1683
a2f00b9b
LC
1684#ifdef CRNCYSTR /* currency symbol */
1685 DEFINE_NLITEM_CONSTANT (CRNCYSTR);
1686#endif
b89c4943 1687
a2f00b9b 1688 /* GNU extensions. */
b89c4943 1689
a2f00b9b
LC
1690#ifdef ERA_YEAR
1691 DEFINE_NLITEM_CONSTANT (ERA_YEAR); /* Year in alternate era format. */
1692#endif
b89c4943 1693
a2f00b9b
LC
1694 /* LC_MONETARY category: formatting of monetary quantities.
1695 These items each correspond to a member of `struct lconv',
1696 defined in <locale.h>. */
1697#ifdef INT_CURR_SYMBOL
1698 DEFINE_NLITEM_CONSTANT (INT_CURR_SYMBOL);
1699#endif
1700#ifdef MON_DECIMAL_POINT
1701 DEFINE_NLITEM_CONSTANT (MON_DECIMAL_POINT);
1702#endif
1703#ifdef MON_THOUSANDS_SEP
1704 DEFINE_NLITEM_CONSTANT (MON_THOUSANDS_SEP);
1705#endif
1706#ifdef MON_GROUPING
1707 DEFINE_NLITEM_CONSTANT (MON_GROUPING);
1708#endif
1709#ifdef POSITIVE_SIGN
1710 DEFINE_NLITEM_CONSTANT (POSITIVE_SIGN);
1711#endif
1712#ifdef NEGATIVE_SIGN
1713 DEFINE_NLITEM_CONSTANT (NEGATIVE_SIGN);
1714#endif
1715#ifdef GROUPING
1716 DEFINE_NLITEM_CONSTANT (GROUPING);
1717#endif
1718#ifdef INT_FRAC_DIGITS
1719 DEFINE_NLITEM_CONSTANT (INT_FRAC_DIGITS);
1720#endif
1721#ifdef FRAC_DIGITS
1722 DEFINE_NLITEM_CONSTANT (FRAC_DIGITS);
1723#endif
1724#ifdef P_CS_PRECEDES
1725 DEFINE_NLITEM_CONSTANT (P_CS_PRECEDES);
1726#endif
1727#ifdef P_SEP_BY_SPACE
1728 DEFINE_NLITEM_CONSTANT (P_SEP_BY_SPACE);
1729#endif
1730#ifdef N_CS_PRECEDES
1731 DEFINE_NLITEM_CONSTANT (N_CS_PRECEDES);
1732#endif
1733#ifdef N_SEP_BY_SPACE
1734 DEFINE_NLITEM_CONSTANT (N_SEP_BY_SPACE);
1735#endif
1736#ifdef P_SIGN_POSN
1737 DEFINE_NLITEM_CONSTANT (P_SIGN_POSN);
1738#endif
1739#ifdef N_SIGN_POSN
1740 DEFINE_NLITEM_CONSTANT (N_SIGN_POSN);
1741#endif
1742#ifdef INT_P_CS_PRECEDES
1743 DEFINE_NLITEM_CONSTANT (INT_P_CS_PRECEDES);
1744#endif
1745#ifdef INT_P_SEP_BY_SPACE
1746 DEFINE_NLITEM_CONSTANT (INT_P_SEP_BY_SPACE);
1747#endif
1748#ifdef INT_N_CS_PRECEDES
1749 DEFINE_NLITEM_CONSTANT (INT_N_CS_PRECEDES);
1750#endif
1751#ifdef INT_N_SEP_BY_SPACE
1752 DEFINE_NLITEM_CONSTANT (INT_N_SEP_BY_SPACE);
1753#endif
1754#ifdef INT_P_SIGN_POSN
1755 DEFINE_NLITEM_CONSTANT (INT_P_SIGN_POSN);
1756#endif
1757#ifdef INT_N_SIGN_POSN
1758 DEFINE_NLITEM_CONSTANT (INT_N_SIGN_POSN);
1759#endif
1760
1761#undef DEFINE_NLITEM_CONSTANT
1762
1763#endif /* HAVE_NL_TYPES_H */
1764}
1765
1766\f
1767void
1768scm_init_i18n ()
1769{
1770 SCM global_locale_smob;
1771
1772#ifdef HAVE_NL_LANGINFO
1773 scm_add_feature ("nl-langinfo");
1774 define_langinfo_items ();
1775#endif
b89c4943 1776
9361f762 1777#include "libguile/i18n.x"
b89c4943 1778
a2f00b9b
LC
1779 /* Initialize the global locale object with a special `locale' SMOB. */
1780 SCM_NEWSMOB (global_locale_smob, scm_tc16_locale_smob_type, NULL);
1781 SCM_VARIABLE_SET (scm_global_locale, global_locale_smob);
9361f762
MV
1782}
1783
756e8a3a
LC
1784void
1785scm_bootstrap_i18n ()
1786{
1787 scm_c_register_extension ("libguile", "scm_init_i18n",
1788 (scm_t_extension_init_func) scm_init_i18n,
1789 NULL);
1790
1791}
1792
9361f762
MV
1793
1794/*
1795 Local Variables:
1796 c-file-style: "gnu"
1797 End:
1798*/