Improved support for Unicode title case in Guile's string and character APIs.
[bpt/guile.git] / libguile / i18n.c
1 /* Copyright (C) 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
2 *
3 * This library is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU Lesser General Public License
5 * as published by the Free Software Foundation; either version 3 of
6 * the License, or (at your option) any later version.
7 *
8 * This library is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
12 *
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301 USA
17 */
18
19 #ifdef HAVE_CONFIG_H
20 # include <config.h>
21 #endif
22
23 #include <alloca.h>
24
25 #include "libguile/_scm.h"
26 #include "libguile/extensions.h"
27 #include "libguile/feature.h"
28 #include "libguile/i18n.h"
29 #include "libguile/strings.h"
30 #include "libguile/chars.h"
31 #include "libguile/dynwind.h"
32 #include "libguile/validate.h"
33 #include "libguile/values.h"
34 #include "libguile/threads.h"
35
36 #include <locale.h>
37 #include <string.h> /* `strcoll ()' */
38 #include <ctype.h> /* `toupper ()' et al. */
39 #include <errno.h>
40 #include <unicase.h>
41 #include <unistr.h>
42
43 #if (defined HAVE_NEWLOCALE) && (defined HAVE_STRCOLL_L)
44 /* The GNU thread-aware locale API is documented in ``Thread-Aware Locale
45 Model, a Proposal'', by Ulrich Drepper:
46
47 http://people.redhat.com/drepper/tllocale.ps.gz
48
49 It is now also implemented by Darwin:
50
51 http://developer.apple.com/documentation/Darwin/Reference/ManPages/man3/newlocale.3.html
52
53 The whole API was eventually standardized in the ``Open Group Base
54 Specifications Issue 7'' (aka. "POSIX 2008"):
55
56 http://www.opengroup.org/onlinepubs/9699919799/basedefs/locale.h.html */
57 # define USE_GNU_LOCALE_API
58 #endif
59
60 #include "libguile/posix.h" /* for `scm_i_locale_mutex' */
61
62 #if (defined HAVE_LANGINFO_H) && (defined HAVE_NL_TYPES_H)
63 # include <langinfo.h>
64 # include <nl_types.h>
65 #endif
66
67 #ifndef HAVE_SETLOCALE
68 static inline char *
69 setlocale (int category, const char *name)
70 {
71 errno = ENOSYS;
72 return NULL;
73 }
74 #endif
75
76 /* Helper stringification macro. */
77 #define SCM_I18N_STRINGIFY(_name) # _name
78
79
80 \f
81 /* Locale objects, string and character collation, and other locale-dependent
82 string operations.
83
84 A large part of the code here deals with emulating glibc's reentrant
85 locale API on non-GNU systems. The emulation is a bit "brute-force":
86 Whenever a `-locale<?' procedure is passed a locale object, then:
87
88 1. The `scm_i_locale_mutex' is locked.
89 2. A series of `setlocale ()' call is performed to store the current
90 locale for each category in an `scm_t_locale' object.
91 3. A series of `setlocale ()' call is made to install each of the locale
92 categories of each of the base locales of each locale object,
93 recursively, starting from the last locale object of the chain.
94 4. The settings captured in step (2) are restored.
95 5. The `scm_i_locale_mutex' is released.
96
97 Hopefully, the X/Open standard will eventually make this hack useless.
98
99 Note: We don't wrap glibc's `uselocale ()' call because it sets the locale
100 of the current _thread_ (unlike `setlocale ()') and doing so would require
101 maintaining per-thread locale information on non-GNU systems and always
102 re-installing this locale upon locale-dependent calls. */
103
104
105 /* Return the category mask corresponding to CAT. */
106 #define SCM_LOCALE_CATEGORY_MASK(_cat) LC_ ## _cat ## _MASK
107
108
109 #ifndef USE_GNU_LOCALE_API
110
111 /* Provide the locale category masks as found in glibc. This must be kept in
112 sync with `locale-categories.h'. */
113
114 # define LC_CTYPE_MASK 1
115 # define LC_COLLATE_MASK 2
116 # define LC_MESSAGES_MASK 4
117 # define LC_MONETARY_MASK 8
118 # define LC_NUMERIC_MASK 16
119 # define LC_TIME_MASK 32
120
121 # ifdef LC_PAPER
122 # define LC_PAPER_MASK 64
123 # else
124 # define LC_PAPER_MASK 0
125 # endif
126 # ifdef LC_NAME
127 # define LC_NAME_MASK 128
128 # else
129 # define LC_NAME_MASK 0
130 # endif
131 # ifdef LC_ADDRESS
132 # define LC_ADDRESS_MASK 256
133 # else
134 # define LC_ADDRESS_MASK 0
135 # endif
136 # ifdef LC_TELEPHONE
137 # define LC_TELEPHONE_MASK 512
138 # else
139 # define LC_TELEPHONE_MASK 0
140 # endif
141 # ifdef LC_MEASUREMENT
142 # define LC_MEASUREMENT_MASK 1024
143 # else
144 # define LC_MEASUREMENT_MASK 0
145 # endif
146 # ifdef LC_IDENTIFICATION
147 # define LC_IDENTIFICATION_MASK 2048
148 # else
149 # define LC_IDENTIFICATION_MASK 0
150 # endif
151
152 # define LC_ALL_MASK (LC_CTYPE_MASK \
153 | LC_NUMERIC_MASK \
154 | LC_TIME_MASK \
155 | LC_COLLATE_MASK \
156 | LC_MONETARY_MASK \
157 | LC_MESSAGES_MASK \
158 | LC_PAPER_MASK \
159 | LC_NAME_MASK \
160 | LC_ADDRESS_MASK \
161 | LC_TELEPHONE_MASK \
162 | LC_MEASUREMENT_MASK \
163 | LC_IDENTIFICATION_MASK \
164 )
165
166 /* Locale objects as returned by `make-locale' on non-GNU systems. */
167 typedef struct scm_locale
168 {
169 SCM base_locale; /* a `locale' object */
170 char *locale_name;
171 int category_mask;
172 } *scm_t_locale;
173
174
175 /* Free the resources used by LOCALE. */
176 static inline void
177 scm_i_locale_free (scm_t_locale locale)
178 {
179 free (locale->locale_name);
180 locale->locale_name = NULL;
181 }
182
183 #else /* USE_GNU_LOCALE_API */
184
185 /* Alias for glibc's locale type. */
186 typedef locale_t scm_t_locale;
187
188 #define scm_i_locale_free freelocale
189
190 #endif /* USE_GNU_LOCALE_API */
191
192
193 /* A locale object denoting the global locale. */
194 SCM_GLOBAL_VARIABLE (scm_global_locale, "%global-locale");
195
196
197 /* Validate parameter ARG as a locale object and set C_LOCALE to the
198 corresponding C locale object. */
199 #define SCM_VALIDATE_LOCALE_COPY(_pos, _arg, _c_locale) \
200 do \
201 { \
202 SCM_VALIDATE_SMOB ((_pos), (_arg), locale_smob_type); \
203 (_c_locale) = (scm_t_locale)SCM_SMOB_DATA (_arg); \
204 } \
205 while (0)
206
207 /* Validate optional parameter ARG as either undefined or bound to a locale
208 object. Set C_LOCALE to the corresponding C locale object or NULL. */
209 #define SCM_VALIDATE_OPTIONAL_LOCALE_COPY(_pos, _arg, _c_locale) \
210 do \
211 { \
212 if ((_arg) != SCM_UNDEFINED) \
213 SCM_VALIDATE_LOCALE_COPY (_pos, _arg, _c_locale); \
214 else \
215 (_c_locale) = NULL; \
216 } \
217 while (0)
218
219
220 SCM_SMOB (scm_tc16_locale_smob_type, "locale", 0);
221
222 SCM_SMOB_FREE (scm_tc16_locale_smob_type, smob_locale_free, locale)
223 {
224 scm_t_locale c_locale;
225
226 c_locale = (scm_t_locale) SCM_SMOB_DATA (locale);
227 scm_i_locale_free (c_locale);
228
229 return 0;
230 }
231
232
233 static void inline scm_locale_error (const char *, int) SCM_NORETURN;
234
235 /* Throw an exception corresponding to error ERR. */
236 static void inline
237 scm_locale_error (const char *func_name, int err)
238 {
239 scm_syserror_msg (func_name,
240 "Failed to install locale",
241 SCM_EOL, err);
242 }
243
244
245 \f
246 /* Emulating GNU's reentrant locale API. */
247 #ifndef USE_GNU_LOCALE_API
248
249
250 /* Maximum number of chained locales (via `base_locale'). */
251 #define LOCALE_STACK_SIZE_MAX 256
252
253 typedef struct
254 {
255 #define SCM_DEFINE_LOCALE_CATEGORY(_name) char * _name;
256 #include "locale-categories.h"
257 #undef SCM_DEFINE_LOCALE_CATEGORY
258 } scm_t_locale_settings;
259
260 /* Fill out SETTINGS according to the current locale settings. On success
261 zero is returned and SETTINGS is properly initialized. */
262 static int
263 get_current_locale_settings (scm_t_locale_settings *settings)
264 {
265 const char *locale_name;
266
267 #define SCM_DEFINE_LOCALE_CATEGORY(_name) \
268 { \
269 SCM_SYSCALL (locale_name = setlocale (LC_ ## _name, NULL)); \
270 if (locale_name == NULL) \
271 goto handle_error; \
272 \
273 settings-> _name = strdup (locale_name); \
274 if (settings-> _name == NULL) \
275 goto handle_oom; \
276 }
277
278 #include "locale-categories.h"
279 #undef SCM_DEFINE_LOCALE_CATEGORY
280
281 return 0;
282
283 handle_error:
284 return EINVAL;
285
286 handle_oom:
287 return ENOMEM;
288 }
289
290 /* Restore locale settings SETTINGS. On success, return zero. */
291 static int
292 restore_locale_settings (const scm_t_locale_settings *settings)
293 {
294 const char *result;
295
296 #define SCM_DEFINE_LOCALE_CATEGORY(_name) \
297 SCM_SYSCALL (result = setlocale (LC_ ## _name, settings-> _name)); \
298 if (result == NULL) \
299 goto handle_error;
300
301 #include "locale-categories.h"
302 #undef SCM_DEFINE_LOCALE_CATEGORY
303
304 return 0;
305
306 handle_error:
307 return EINVAL;
308 }
309
310 /* Free memory associated with SETTINGS. */
311 static void
312 free_locale_settings (scm_t_locale_settings *settings)
313 {
314 #define SCM_DEFINE_LOCALE_CATEGORY(_name) \
315 free (settings-> _name); \
316 settings->_name = NULL;
317 #include "locale-categories.h"
318 #undef SCM_DEFINE_LOCALE_CATEGORY
319 }
320
321 /* Install the locale named LOCALE_NAME for all the categories listed in
322 CATEGORY_MASK. */
323 static int
324 install_locale_categories (const char *locale_name, int category_mask)
325 {
326 const char *result;
327
328 if (category_mask == LC_ALL_MASK)
329 {
330 SCM_SYSCALL (result = setlocale (LC_ALL, locale_name));
331 if (result == NULL)
332 goto handle_error;
333 }
334 else
335 {
336 #define SCM_DEFINE_LOCALE_CATEGORY(_name) \
337 if (category_mask & SCM_LOCALE_CATEGORY_MASK (_name)) \
338 { \
339 SCM_SYSCALL (result = setlocale (LC_ ## _name, locale_name)); \
340 if (result == NULL) \
341 goto handle_error; \
342 }
343 #include "locale-categories.h"
344 #undef SCM_DEFINE_LOCALE_CATEGORY
345 }
346
347 return 0;
348
349 handle_error:
350 return EINVAL;
351 }
352
353 /* Install LOCALE, recursively installing its base locales first. On
354 success, zero is returned. */
355 static int
356 install_locale (scm_t_locale locale)
357 {
358 scm_t_locale stack[LOCALE_STACK_SIZE_MAX];
359 int category_mask = 0;
360 size_t stack_size = 0;
361 int stack_offset = 0;
362 const char *result = NULL;
363
364 /* Build up a locale stack by traversing the `base_locale' link. */
365 do
366 {
367 if (stack_size >= LOCALE_STACK_SIZE_MAX)
368 /* We cannot use `scm_error ()' here because otherwise the locale
369 mutex may remain locked. */
370 return EINVAL;
371
372 stack[stack_size++] = locale;
373
374 /* Keep track of which categories have already been taken into
375 account. */
376 category_mask |= locale->category_mask;
377
378 if (locale->base_locale != SCM_UNDEFINED)
379 locale = (scm_t_locale) SCM_SMOB_DATA (locale->base_locale);
380 else
381 locale = NULL;
382 }
383 while ((locale != NULL) && (category_mask != LC_ALL_MASK));
384
385 /* Install the C locale to start from a pristine state. */
386 SCM_SYSCALL (result = setlocale (LC_ALL, "C"));
387 if (result == NULL)
388 goto handle_error;
389
390 /* Install the locales in reverse order. */
391 for (stack_offset = stack_size - 1;
392 stack_offset >= 0;
393 stack_offset--)
394 {
395 int err;
396 scm_t_locale locale;
397
398 locale = stack[stack_offset];
399 err = install_locale_categories (locale->locale_name,
400 locale->category_mask);
401 if (err)
402 goto handle_error;
403 }
404
405 return 0;
406
407 handle_error:
408 return EINVAL;
409 }
410
411 /* Leave the locked locale section. */
412 static inline void
413 leave_locale_section (const scm_t_locale_settings *settings)
414 {
415 /* Restore the previous locale settings. */
416 (void)restore_locale_settings (settings);
417
418 scm_i_pthread_mutex_unlock (&scm_i_locale_mutex);
419 }
420
421 /* Enter a locked locale section. */
422 static inline int
423 enter_locale_section (scm_t_locale locale,
424 scm_t_locale_settings *prev_locale)
425 {
426 int err;
427
428 scm_i_pthread_mutex_lock (&scm_i_locale_mutex);
429
430 err = get_current_locale_settings (prev_locale);
431 if (err)
432 {
433 scm_i_pthread_mutex_unlock (&scm_i_locale_mutex);
434 return err;
435 }
436
437 err = install_locale (locale);
438 if (err)
439 {
440 leave_locale_section (prev_locale);
441 free_locale_settings (prev_locale);
442 }
443
444 return err;
445 }
446
447 /* Convenient macro to run STATEMENT in the locale context of C_LOCALE. */
448 #define RUN_IN_LOCALE_SECTION(_c_locale, _statement) \
449 do \
450 { \
451 int lsec_err; \
452 scm_t_locale_settings lsec_prev_locale; \
453 \
454 lsec_err = enter_locale_section ((_c_locale), &lsec_prev_locale); \
455 if (lsec_err) \
456 scm_locale_error (FUNC_NAME, lsec_err); \
457 else \
458 { \
459 _statement ; \
460 \
461 leave_locale_section (&lsec_prev_locale); \
462 free_locale_settings (&lsec_prev_locale); \
463 } \
464 } \
465 while (0)
466
467 /* Convert the current locale settings into a locale SMOB. On success, zero
468 is returned and RESULT points to the new SMOB. Otherwise, an error is
469 returned. */
470 static int
471 get_current_locale (SCM *result)
472 {
473 int err = 0;
474 scm_t_locale c_locale;
475 const char *current_locale;
476
477 c_locale = scm_gc_malloc (sizeof (* c_locale), "locale");
478
479
480 scm_i_pthread_mutex_lock (&scm_i_locale_mutex);
481
482 c_locale->category_mask = LC_ALL_MASK;
483 c_locale->base_locale = SCM_UNDEFINED;
484
485 current_locale = setlocale (LC_ALL, NULL);
486 if (current_locale != NULL)
487 {
488 c_locale->locale_name = strdup (current_locale);
489 if (c_locale->locale_name == NULL)
490 err = ENOMEM;
491 }
492 else
493 err = EINVAL;
494
495 scm_i_pthread_mutex_unlock (&scm_i_locale_mutex);
496
497 if (err)
498 scm_gc_free (c_locale, sizeof (* c_locale), "locale");
499 else
500 SCM_NEWSMOB (*result, scm_tc16_locale_smob_type, c_locale);
501
502 return err;
503 }
504
505 #else /* USE_GNU_LOCALE_API */
506
507 /* Convenient macro to run STATEMENT in the locale context of C_LOCALE. */
508 #define RUN_IN_LOCALE_SECTION(_c_locale, _statement) \
509 do \
510 { \
511 scm_t_locale old_loc; \
512 \
513 old_loc = uselocale (_c_locale); \
514 _statement ; \
515 uselocale (old_loc); \
516 } \
517 while (0)
518
519
520 #endif /* USE_GNU_LOCALE_API */
521
522
523 \f
524 /* `make-locale' can take either category lists or single categories (the
525 `LC_*' integer constants). */
526 #define SCM_LIST_OR_INTEGER_P(arg) \
527 (scm_is_integer (arg) || scm_is_true (scm_list_p (arg)))
528
529
530 /* Return the category mask corresponding to CATEGORY (an `LC_' integer
531 constant). */
532 static inline int
533 category_to_category_mask (SCM category,
534 const char *func_name, int pos)
535 {
536 int c_category;
537 int c_category_mask;
538
539 c_category = scm_to_int (category);
540
541 #define SCM_DEFINE_LOCALE_CATEGORY(_name) \
542 case LC_ ## _name: \
543 c_category_mask = SCM_LOCALE_CATEGORY_MASK (_name); \
544 break;
545
546 switch (c_category)
547 {
548 #include "locale-categories.h"
549
550 case LC_ALL:
551 c_category_mask = LC_ALL_MASK;
552 break;
553
554 default:
555 scm_wrong_type_arg_msg (func_name, pos, category,
556 "locale category");
557 }
558
559 #undef SCM_DEFINE_LOCALE_CATEGORY
560
561 return c_category_mask;
562 }
563
564 /* Convert CATEGORIES, a list of locale categories or a single category (an
565 integer), into a category mask. */
566 static int
567 category_list_to_category_mask (SCM categories,
568 const char *func_name, int pos)
569 {
570 int c_category_mask = 0;
571
572 if (scm_is_integer (categories))
573 c_category_mask = category_to_category_mask (categories,
574 func_name, pos);
575 else
576 for (; !scm_is_null (categories); categories = SCM_CDR (categories))
577 {
578 SCM category = SCM_CAR (categories);
579
580 c_category_mask |=
581 category_to_category_mask (category, func_name, pos);
582 }
583
584 return c_category_mask;
585 }
586
587
588 SCM_DEFINE (scm_make_locale, "make-locale", 2, 1, 0,
589 (SCM category_list, SCM locale_name, SCM base_locale),
590 "Return a reference to a data structure representing a set of "
591 "locale datasets. @var{category_list} should be either a list "
592 "of locale categories or a single category as used with "
593 "@code{setlocale} (@pxref{Locales, @code{setlocale}}) and "
594 "@var{locale_name} should be the name of the locale considered "
595 "(e.g., @code{\"sl_SI\"}). Optionally, if @code{base_locale} is "
596 "passed, it should be a locale object denoting settings for "
597 "categories not listed in @var{category_list}.")
598 #define FUNC_NAME s_scm_make_locale
599 {
600 SCM locale = SCM_BOOL_F;
601 int err = 0;
602 int c_category_mask;
603 char *c_locale_name;
604 scm_t_locale c_base_locale, c_locale;
605
606 SCM_MAKE_VALIDATE (1, category_list, LIST_OR_INTEGER_P);
607 SCM_VALIDATE_STRING (2, locale_name);
608 SCM_VALIDATE_OPTIONAL_LOCALE_COPY (3, base_locale, c_base_locale);
609
610 c_category_mask = category_list_to_category_mask (category_list,
611 FUNC_NAME, 1);
612 c_locale_name = scm_to_locale_string (locale_name);
613
614 #ifdef USE_GNU_LOCALE_API
615
616 if (scm_is_eq (base_locale, SCM_VARIABLE_REF (scm_global_locale)))
617 c_base_locale = LC_GLOBAL_LOCALE;
618
619 if (c_base_locale != (locale_t) 0)
620 {
621 /* C_BASE_LOCALE is to be consumed by `newlocale ()' so it needs to be
622 duplicated before. */
623 c_base_locale = duplocale (c_base_locale);
624
625 if (c_base_locale == (locale_t) 0)
626 {
627 err = errno;
628 goto fail;
629 }
630 }
631
632 c_locale = newlocale (c_category_mask, c_locale_name, c_base_locale);
633
634 free (c_locale_name);
635
636 if (c_locale == (locale_t) 0)
637 {
638 if (c_base_locale != (locale_t) 0)
639 freelocale (c_base_locale);
640 scm_locale_error (FUNC_NAME, errno);
641 }
642 else
643 SCM_NEWSMOB (locale, scm_tc16_locale_smob_type, c_locale);
644
645 #else
646
647 c_locale = scm_gc_malloc (sizeof (* c_locale), "locale");
648
649 c_locale->category_mask = c_category_mask;
650 c_locale->locale_name = c_locale_name;
651
652 if (scm_is_eq (base_locale, SCM_VARIABLE_REF (scm_global_locale)))
653 {
654 /* Get the current locale settings and turn them into a locale
655 object. */
656 err = get_current_locale (&base_locale);
657 if (err)
658 goto fail;
659 }
660
661 c_locale->base_locale = base_locale;
662
663 {
664 /* Try out the new locale and raise an exception if it doesn't work. */
665 int err;
666 scm_t_locale_settings prev_locale;
667
668 err = enter_locale_section (c_locale, &prev_locale);
669
670 if (err)
671 goto fail;
672 else
673 {
674 leave_locale_section (&prev_locale);
675 SCM_NEWSMOB (locale, scm_tc16_locale_smob_type, c_locale);
676 }
677 }
678
679 #endif
680
681 return locale;
682
683 fail:
684 #ifndef USE_GNU_LOCALE_API
685 scm_gc_free (c_locale, sizeof (* c_locale), "locale");
686 #endif
687 free (c_locale_name);
688 scm_locale_error (FUNC_NAME, err);
689
690 return SCM_BOOL_F;
691 }
692 #undef FUNC_NAME
693
694 SCM_DEFINE (scm_locale_p, "locale?", 1, 0, 0,
695 (SCM obj),
696 "Return true if @var{obj} is a locale object.")
697 #define FUNC_NAME s_scm_locale_p
698 {
699 return scm_from_bool (SCM_SMOB_PREDICATE (scm_tc16_locale_smob_type, obj));
700 }
701 #undef FUNC_NAME
702
703
704 \f
705 /* Locale-dependent string comparison.
706
707 A similar API can be found in MzScheme starting from version 200:
708 http://download.plt-scheme.org/chronology/mzmr200alpha14.html . */
709
710 #define SCM_STRING_TO_U32_BUF(s1, c_s1) \
711 do \
712 { \
713 if (scm_i_is_narrow_string (s1)) \
714 { \
715 size_t i, len; \
716 const char *buf = scm_i_string_chars (s1); \
717 \
718 len = scm_i_string_length (s1); \
719 c_s1 = alloca (sizeof (scm_t_wchar) * (len + 1)); \
720 \
721 for (i = 0; i < len; i ++) \
722 c_s1[i] = (unsigned char ) buf[i]; \
723 c_s1[len] = 0; \
724 } \
725 else \
726 c_s1 = (scm_t_wchar *) scm_i_string_wide_chars (s1); \
727 } while (0)
728
729
730 /* Compare UTF-32 strings according to LOCALE. Returns a negative value if
731 S1 compares smaller than S2, a positive value if S1 compares larger than
732 S2, or 0 if they compare equal. */
733 static inline int
734 compare_u32_strings (SCM s1, SCM s2, SCM locale, const char *func_name)
735 #define FUNC_NAME func_name
736 {
737 int result;
738 scm_t_locale c_locale;
739 scm_t_wchar *c_s1, *c_s2;
740 SCM_VALIDATE_OPTIONAL_LOCALE_COPY (3, locale, c_locale);
741
742 SCM_STRING_TO_U32_BUF (s1, c_s1);
743 SCM_STRING_TO_U32_BUF (s2, c_s2);
744
745 if (c_locale)
746 RUN_IN_LOCALE_SECTION (c_locale,
747 result = u32_strcoll ((const scm_t_uint32 *) c_s1,
748 (const scm_t_uint32 *) c_s2));
749 else
750 result = u32_strcoll ((const scm_t_uint32 *) c_s1,
751 (const scm_t_uint32 *) c_s2);
752
753 scm_remember_upto_here_2 (s1, s2);
754 scm_remember_upto_here (locale);
755 return result;
756 }
757 #undef FUNC_NAME
758
759 /* Return the current language of the locale. */
760 static const char *
761 locale_language ()
762 {
763 /* FIXME: If the locale has been set with 'uselocale',
764 libunistring's uc_locale_language will return the incorrect
765 language: it will return the language appropriate for the global
766 (non-thread-specific) locale.
767
768 There appears to be no portable way to extract the language from
769 the thread-specific locale_t. There is no LANGUAGE capability in
770 nl_langinfo or nl_langinfo_l.
771
772 Thus, uc_locale_language needs to be fixed upstream. */
773 return uc_locale_language ();
774 }
775
776 static inline int
777 u32_locale_casecoll (const char *func_name, const scm_t_uint32 *c_s1,
778 const scm_t_uint32 *c_s2,
779 int *result)
780 {
781 /* Note: Since this is called from `RUN_IN_LOCALE_SECTION', it must note
782 make any non-local exit. */
783
784 int ret;
785 const char *loc = locale_language ();
786
787 ret = u32_casecoll (c_s1, u32_strlen (c_s1),
788 c_s2, u32_strlen (c_s2),
789 loc, UNINORM_NFC, result);
790
791 return ret == 0 ? ret : errno;
792 }
793
794 static inline int
795 compare_u32_strings_ci (SCM s1, SCM s2, SCM locale, const char *func_name)
796 #define FUNC_NAME func_name
797 {
798 int result, ret = 0;
799 scm_t_locale c_locale;
800 scm_t_wchar *c_s1, *c_s2;
801 SCM_VALIDATE_OPTIONAL_LOCALE_COPY (3, locale, c_locale);
802
803 SCM_STRING_TO_U32_BUF (s1, c_s1);
804 SCM_STRING_TO_U32_BUF (s2, c_s2);
805
806 if (c_locale)
807 RUN_IN_LOCALE_SECTION
808 (c_locale,
809 ret = u32_locale_casecoll (func_name,
810 (const scm_t_uint32 *) c_s1,
811 (const scm_t_uint32 *) c_s2,
812 &result));
813 else
814 ret = u32_locale_casecoll (func_name,
815 (const scm_t_uint32 *) c_s1,
816 (const scm_t_uint32 *) c_s2,
817 &result);
818
819 if (SCM_UNLIKELY (ret != 0))
820 {
821 errno = ret;
822 scm_syserror (FUNC_NAME);
823 }
824
825 scm_remember_upto_here_2 (s1, s2);
826 scm_remember_upto_here (locale);
827
828 return result;
829 }
830 #undef FUNC_NAME
831
832 /* Store into DST an upper-case version of SRC. */
833 static inline void
834 str_upcase (register char *dst, register const char *src)
835 {
836 for (; *src != '\0'; src++, dst++)
837 *dst = toupper ((int) *src);
838 *dst = '\0';
839 }
840
841 static inline void
842 str_downcase (register char *dst, register const char *src)
843 {
844 for (; *src != '\0'; src++, dst++)
845 *dst = tolower ((int) *src);
846 *dst = '\0';
847 }
848
849 #ifdef USE_GNU_LOCALE_API
850 static inline void
851 str_upcase_l (register char *dst, register const char *src,
852 scm_t_locale locale)
853 {
854 for (; *src != '\0'; src++, dst++)
855 *dst = toupper_l (*src, locale);
856 *dst = '\0';
857 }
858
859 static inline void
860 str_downcase_l (register char *dst, register const char *src,
861 scm_t_locale locale)
862 {
863 for (; *src != '\0'; src++, dst++)
864 *dst = tolower_l (*src, locale);
865 *dst = '\0';
866 }
867 #endif
868
869
870 SCM_DEFINE (scm_string_locale_lt, "string-locale<?", 2, 1, 0,
871 (SCM s1, SCM s2, SCM locale),
872 "Compare strings @var{s1} and @var{s2} in a locale-dependent way."
873 "If @var{locale} is provided, it should be locale object (as "
874 "returned by @code{make-locale}) and will be used to perform the "
875 "comparison; otherwise, the current system locale is used.")
876 #define FUNC_NAME s_scm_string_locale_lt
877 {
878 int result;
879
880 SCM_VALIDATE_STRING (1, s1);
881 SCM_VALIDATE_STRING (2, s2);
882
883 result = compare_u32_strings (s1, s2, locale, FUNC_NAME);
884
885 return scm_from_bool (result < 0);
886 }
887 #undef FUNC_NAME
888
889 SCM_DEFINE (scm_string_locale_gt, "string-locale>?", 2, 1, 0,
890 (SCM s1, SCM s2, SCM locale),
891 "Compare strings @var{s1} and @var{s2} in a locale-dependent way."
892 "If @var{locale} is provided, it should be locale object (as "
893 "returned by @code{make-locale}) and will be used to perform the "
894 "comparison; otherwise, the current system locale is used.")
895 #define FUNC_NAME s_scm_string_locale_gt
896 {
897 int result;
898
899 SCM_VALIDATE_STRING (1, s1);
900 SCM_VALIDATE_STRING (2, s2);
901
902 result = compare_u32_strings (s1, s2, locale, FUNC_NAME);
903
904 return scm_from_bool (result > 0);
905 }
906 #undef FUNC_NAME
907
908 SCM_DEFINE (scm_string_locale_ci_lt, "string-locale-ci<?", 2, 1, 0,
909 (SCM s1, SCM s2, SCM locale),
910 "Compare strings @var{s1} and @var{s2} in a case-insensitive, "
911 "and locale-dependent way. If @var{locale} is provided, it "
912 "should be locale object (as returned by @code{make-locale}) "
913 "and will be used to perform the comparison; otherwise, the "
914 "current system locale is used.")
915 #define FUNC_NAME s_scm_string_locale_ci_lt
916 {
917 int result;
918
919 SCM_VALIDATE_STRING (1, s1);
920 SCM_VALIDATE_STRING (2, s2);
921
922 result = compare_u32_strings_ci (s1, s2, locale, FUNC_NAME);
923
924 return scm_from_bool (result < 0);
925 }
926 #undef FUNC_NAME
927
928 SCM_DEFINE (scm_string_locale_ci_gt, "string-locale-ci>?", 2, 1, 0,
929 (SCM s1, SCM s2, SCM locale),
930 "Compare strings @var{s1} and @var{s2} in a case-insensitive, "
931 "and locale-dependent way. If @var{locale} is provided, it "
932 "should be locale object (as returned by @code{make-locale}) "
933 "and will be used to perform the comparison; otherwise, the "
934 "current system locale is used.")
935 #define FUNC_NAME s_scm_string_locale_ci_gt
936 {
937 int result;
938
939 SCM_VALIDATE_STRING (1, s1);
940 SCM_VALIDATE_STRING (2, s2);
941
942 result = compare_u32_strings_ci (s1, s2, locale, FUNC_NAME);
943
944 return scm_from_bool (result > 0);
945 }
946 #undef FUNC_NAME
947
948 SCM_DEFINE (scm_string_locale_ci_eq, "string-locale-ci=?", 2, 1, 0,
949 (SCM s1, SCM s2, SCM locale),
950 "Compare strings @var{s1} and @var{s2} in a case-insensitive, "
951 "and locale-dependent way. If @var{locale} is provided, it "
952 "should be locale object (as returned by @code{make-locale}) "
953 "and will be used to perform the comparison; otherwise, the "
954 "current system locale is used.")
955 #define FUNC_NAME s_scm_string_locale_ci_eq
956 {
957 int result;
958
959 SCM_VALIDATE_STRING (1, s1);
960 SCM_VALIDATE_STRING (2, s2);
961
962 result = compare_u32_strings_ci (s1, s2, locale, FUNC_NAME);
963
964 return scm_from_bool (result == 0);
965 }
966 #undef FUNC_NAME
967
968
969 SCM_DEFINE (scm_char_locale_lt, "char-locale<?", 2, 1, 0,
970 (SCM c1, SCM c2, SCM locale),
971 "Return true if character @var{c1} is lower than @var{c2} "
972 "according to @var{locale} or to the current locale.")
973 #define FUNC_NAME s_scm_char_locale_lt
974 {
975 int result;
976
977 SCM_VALIDATE_CHAR (1, c1);
978 SCM_VALIDATE_CHAR (2, c2);
979
980 result = compare_u32_strings (scm_string (scm_list_1 (c1)),
981 scm_string (scm_list_1 (c2)),
982 locale, FUNC_NAME);
983
984 return scm_from_bool (result < 0);
985 }
986 #undef FUNC_NAME
987
988 SCM_DEFINE (scm_char_locale_gt, "char-locale>?", 2, 1, 0,
989 (SCM c1, SCM c2, SCM locale),
990 "Return true if character @var{c1} is greater than @var{c2} "
991 "according to @var{locale} or to the current locale.")
992 #define FUNC_NAME s_scm_char_locale_gt
993 {
994 int result;
995
996 SCM_VALIDATE_CHAR (1, c1);
997 SCM_VALIDATE_CHAR (2, c2);
998
999 result = compare_u32_strings (scm_string (scm_list_1 (c1)),
1000 scm_string (scm_list_1 (c2)),
1001 locale, FUNC_NAME);
1002
1003 return scm_from_bool (result > 0);
1004 }
1005 #undef FUNC_NAME
1006
1007 SCM_DEFINE (scm_char_locale_ci_lt, "char-locale-ci<?", 2, 1, 0,
1008 (SCM c1, SCM c2, SCM locale),
1009 "Return true if character @var{c1} is lower than @var{c2}, "
1010 "in a case insensitive way according to @var{locale} or to "
1011 "the current locale.")
1012 #define FUNC_NAME s_scm_char_locale_ci_lt
1013 {
1014 int result;
1015
1016 SCM_VALIDATE_CHAR (1, c1);
1017 SCM_VALIDATE_CHAR (2, c2);
1018
1019 result = compare_u32_strings_ci (scm_string (scm_list_1 (c1)),
1020 scm_string (scm_list_1 (c2)),
1021 locale, FUNC_NAME);
1022
1023 return scm_from_bool (result < 0);
1024 }
1025 #undef FUNC_NAME
1026
1027 SCM_DEFINE (scm_char_locale_ci_gt, "char-locale-ci>?", 2, 1, 0,
1028 (SCM c1, SCM c2, SCM locale),
1029 "Return true if character @var{c1} is greater than @var{c2}, "
1030 "in a case insensitive way according to @var{locale} or to "
1031 "the current locale.")
1032 #define FUNC_NAME s_scm_char_locale_ci_gt
1033 {
1034 int result;
1035
1036 SCM_VALIDATE_CHAR (1, c1);
1037 SCM_VALIDATE_CHAR (2, c2);
1038
1039 result = compare_u32_strings_ci (scm_string (scm_list_1 (c1)),
1040 scm_string (scm_list_1 (c2)),
1041 locale, FUNC_NAME);
1042
1043 return scm_from_bool (result > 0);
1044 }
1045 #undef FUNC_NAME
1046
1047 SCM_DEFINE (scm_char_locale_ci_eq, "char-locale-ci=?", 2, 1, 0,
1048 (SCM c1, SCM c2, SCM locale),
1049 "Return true if character @var{c1} is equal to @var{c2}, "
1050 "in a case insensitive way according to @var{locale} or to "
1051 "the current locale.")
1052 #define FUNC_NAME s_scm_char_locale_ci_eq
1053 {
1054 int result;
1055
1056 SCM_VALIDATE_CHAR (1, c1);
1057 SCM_VALIDATE_CHAR (2, c2);
1058
1059 result = compare_u32_strings_ci (scm_string (scm_list_1 (c1)),
1060 scm_string (scm_list_1 (c2)),
1061 locale, FUNC_NAME);
1062
1063 return scm_from_bool (result == 0);
1064 }
1065 #undef FUNC_NAME
1066
1067
1068 \f
1069 /* Locale-dependent alphabetic character mapping. */
1070
1071 static inline int
1072 u32_locale_tocase (const scm_t_uint32 *c_s1, size_t len,
1073 scm_t_uint32 **p_c_s2, size_t * p_len2,
1074 scm_t_uint32 *(*func) (const scm_t_uint32 *, size_t,
1075 const char *, uninorm_t,
1076 scm_t_uint32 *, size_t *))
1077 {
1078 /* Note: Since this is called from `RUN_IN_LOCALE_SECTION', it must not
1079 make any non-local exit. */
1080
1081 scm_t_uint32 *ret;
1082 const char *loc = locale_language ();
1083
1084 /* The first NULL here indicates that no NFC or NFKC normalization
1085 is done. The second NULL means the return buffer is
1086 malloc'ed here. */
1087 ret = func (c_s1, len, loc, NULL, NULL, p_len2);
1088
1089 if (ret == NULL)
1090 {
1091 *p_c_s2 = (scm_t_uint32 *) NULL;
1092 *p_len2 = 0;
1093 return errno;
1094 }
1095 *p_c_s2 = ret;
1096
1097 return 0;
1098 }
1099
1100
1101 static SCM
1102 chr_to_case (SCM chr, scm_t_locale c_locale,
1103 scm_t_uint32 *(*func) (const scm_t_uint32 *, size_t, const char *,
1104 uninorm_t, scm_t_uint32 *, size_t *),
1105 int *err)
1106 {
1107 int ret;
1108 scm_t_wchar *buf;
1109 scm_t_uint32 *convbuf;
1110 size_t convlen;
1111 SCM str, convchar;
1112
1113 str = scm_i_make_wide_string (1, &buf);
1114 buf[0] = SCM_CHAR (chr);
1115
1116 if (c_locale != NULL)
1117 RUN_IN_LOCALE_SECTION (c_locale, ret =
1118 u32_locale_tocase ((scm_t_uint32 *) buf, 1,
1119 &convbuf,
1120 &convlen, func));
1121 else
1122 ret =
1123 u32_locale_tocase ((scm_t_uint32 *) buf, 1, &convbuf,
1124 &convlen, func);
1125
1126 if (SCM_UNLIKELY (ret != 0))
1127 {
1128 *err = ret;
1129 return NULL;
1130 }
1131
1132 if (convlen == 1)
1133 convchar = SCM_MAKE_CHAR ((scm_t_wchar) convbuf[0]);
1134 else
1135 convchar = chr;
1136 free (convbuf);
1137
1138 return convchar;
1139 }
1140
1141 SCM_DEFINE (scm_char_locale_downcase, "char-locale-downcase", 1, 1, 0,
1142 (SCM chr, SCM locale),
1143 "Return the lowercase character that corresponds to @var{chr} "
1144 "according to either @var{locale} or the current locale.")
1145 #define FUNC_NAME s_scm_char_locale_downcase
1146 {
1147 scm_t_locale c_locale;
1148 SCM ret;
1149 int err = 0;
1150
1151 SCM_VALIDATE_CHAR (1, chr);
1152 SCM_VALIDATE_OPTIONAL_LOCALE_COPY (2, locale, c_locale);
1153
1154 ret = chr_to_case (chr, c_locale, u32_tolower, &err);
1155
1156 if (err != 0)
1157 {
1158 errno = err;
1159 scm_syserror (FUNC_NAME);
1160 }
1161 return ret;
1162 }
1163 #undef FUNC_NAME
1164
1165 SCM_DEFINE (scm_char_locale_upcase, "char-locale-upcase", 1, 1, 0,
1166 (SCM chr, SCM locale),
1167 "Return the uppercase character that corresponds to @var{chr} "
1168 "according to either @var{locale} or the current locale.")
1169 #define FUNC_NAME s_scm_char_locale_upcase
1170 {
1171 scm_t_locale c_locale;
1172 SCM ret;
1173 int err = 0;
1174
1175 SCM_VALIDATE_CHAR (1, chr);
1176 SCM_VALIDATE_OPTIONAL_LOCALE_COPY (2, locale, c_locale);
1177
1178 ret = chr_to_case (chr, c_locale, u32_toupper, &err);
1179
1180 if (err != 0)
1181 {
1182 errno = err;
1183 scm_syserror (FUNC_NAME);
1184 }
1185 return ret;
1186 }
1187 #undef FUNC_NAME
1188
1189 SCM_DEFINE (scm_char_locale_titlecase, "char-locale-titlecase", 1, 1, 0,
1190 (SCM chr, SCM locale),
1191 "Return the titlecase character that corresponds to @var{chr} "
1192 "according to either @var{locale} or the current locale.")
1193 #define FUNC_NAME s_scm_char_locale_titlecase
1194 {
1195 scm_t_locale c_locale;
1196 SCM ret;
1197 int err = 0;
1198
1199 SCM_VALIDATE_CHAR (1, chr);
1200 SCM_VALIDATE_OPTIONAL_LOCALE_COPY (2, locale, c_locale);
1201
1202 ret = chr_to_case (chr, c_locale, u32_totitle, &err);
1203
1204 if (err != 0)
1205 {
1206 errno = err;
1207 scm_syserror (FUNC_NAME);
1208 }
1209 return ret;
1210 }
1211 #undef FUNC_NAME
1212
1213 static SCM
1214 str_to_case (SCM str, scm_t_locale c_locale,
1215 scm_t_uint32 *(*func) (const scm_t_uint32 *, size_t, const char *,
1216 uninorm_t, scm_t_uint32 *, size_t *),
1217 int *err)
1218 {
1219 scm_t_wchar *c_str, *c_buf;
1220 scm_t_uint32 *c_convstr;
1221 size_t len, convlen;
1222 int ret;
1223 SCM convstr;
1224
1225 len = scm_i_string_length (str);
1226 if (len == 0)
1227 return scm_nullstr;
1228 SCM_STRING_TO_U32_BUF (str, c_str);
1229
1230 if (c_locale)
1231 RUN_IN_LOCALE_SECTION (c_locale, ret =
1232 u32_locale_tocase ((scm_t_uint32 *) c_str, len,
1233 &c_convstr,
1234 &convlen, func));
1235 else
1236 ret =
1237 u32_locale_tocase ((scm_t_uint32 *) c_str, len,
1238 &c_convstr, &convlen, func);
1239
1240 scm_remember_upto_here (str);
1241
1242 if (SCM_UNLIKELY (ret != 0))
1243 {
1244 *err = ret;
1245 return NULL;
1246 }
1247
1248 convstr = scm_i_make_wide_string (convlen, &c_buf);
1249 memcpy (c_buf, c_convstr, convlen * sizeof (scm_t_wchar));
1250 free (c_convstr);
1251
1252 scm_i_try_narrow_string (convstr);
1253
1254 return convstr;
1255 }
1256
1257 SCM_DEFINE (scm_string_locale_upcase, "string-locale-upcase", 1, 1, 0,
1258 (SCM str, SCM locale),
1259 "Return a new string that is the uppercase version of "
1260 "@var{str} according to either @var{locale} or the current "
1261 "locale.")
1262 #define FUNC_NAME s_scm_string_locale_upcase
1263 {
1264 scm_t_locale c_locale;
1265 SCM ret;
1266 int err = 0;
1267
1268 SCM_VALIDATE_STRING (1, str);
1269 SCM_VALIDATE_OPTIONAL_LOCALE_COPY (2, locale, c_locale);
1270
1271 ret = str_to_case (str, c_locale, u32_toupper, &err);
1272
1273 if (err != 0)
1274 {
1275 errno = err;
1276 scm_syserror (FUNC_NAME);
1277 }
1278 return ret;
1279 }
1280 #undef FUNC_NAME
1281
1282 SCM_DEFINE (scm_string_locale_downcase, "string-locale-downcase", 1, 1, 0,
1283 (SCM str, SCM locale),
1284 "Return a new string that is the down-case version of "
1285 "@var{str} according to either @var{locale} or the current "
1286 "locale.")
1287 #define FUNC_NAME s_scm_string_locale_downcase
1288 {
1289 scm_t_locale c_locale;
1290 SCM ret;
1291 int err = 0;
1292
1293 SCM_VALIDATE_STRING (1, str);
1294 SCM_VALIDATE_OPTIONAL_LOCALE_COPY (2, locale, c_locale);
1295
1296 ret = str_to_case (str, c_locale, u32_tolower, &err);
1297
1298 if (err != 0)
1299 {
1300 errno = err;
1301 scm_syserror (FUNC_NAME);
1302 }
1303 return ret;
1304 }
1305 #undef FUNC_NAME
1306
1307 SCM_DEFINE (scm_string_locale_titlecase, "string-locale-titlecase", 1, 1, 0,
1308 (SCM str, SCM locale),
1309 "Return a new string that is the title-case version of "
1310 "@var{str} according to either @var{locale} or the current "
1311 "locale.")
1312 #define FUNC_NAME s_scm_string_locale_titlecase
1313 {
1314 scm_t_locale c_locale;
1315 SCM ret;
1316 int err = 0;
1317
1318 SCM_VALIDATE_STRING (1, str);
1319 SCM_VALIDATE_OPTIONAL_LOCALE_COPY (2, locale, c_locale);
1320
1321 ret = str_to_case (str, c_locale, u32_totitle, &err);
1322
1323 if (err != 0)
1324 {
1325 errno = err;
1326 scm_syserror (FUNC_NAME);
1327 }
1328 return ret;
1329 }
1330 #undef FUNC_NAME
1331
1332 /* Note: We don't provide mutative versions of `string-locale-(up|down)case'
1333 because, in some languages, a single downcase character maps to a couple
1334 of uppercase characters. Read the SRFI-13 document for a detailed
1335 discussion about this. */
1336
1337
1338 \f
1339 /* Locale-dependent number parsing. */
1340
1341 SCM_DEFINE (scm_locale_string_to_integer, "locale-string->integer",
1342 1, 2, 0, (SCM str, SCM base, SCM locale),
1343 "Convert string @var{str} into an integer according to either "
1344 "@var{locale} (a locale object as returned by @code{make-locale}) "
1345 "or the current process locale. Return two values: an integer "
1346 "(on success) or @code{#f}, and the number of characters read "
1347 "from @var{str} (@code{0} on failure).")
1348 #define FUNC_NAME s_scm_locale_string_to_integer
1349 {
1350 SCM result;
1351 long c_result;
1352 int c_base;
1353 const char *c_str;
1354 char *c_endptr;
1355 scm_t_locale c_locale;
1356
1357 SCM_VALIDATE_STRING (1, str);
1358 c_str = scm_i_string_chars (str);
1359
1360 if (base != SCM_UNDEFINED)
1361 SCM_VALIDATE_INT_COPY (2, base, c_base);
1362 else
1363 c_base = 10;
1364
1365 SCM_VALIDATE_OPTIONAL_LOCALE_COPY (3, locale, c_locale);
1366
1367 if (c_locale != NULL)
1368 {
1369 #ifdef USE_GNU_LOCALE_API
1370 c_result = strtol_l (c_str, &c_endptr, c_base, c_locale);
1371 #else
1372 RUN_IN_LOCALE_SECTION (c_locale,
1373 c_result = strtol (c_str, &c_endptr, c_base));
1374 #endif
1375 }
1376 else
1377 c_result = strtol (c_str, &c_endptr, c_base);
1378
1379 scm_remember_upto_here (str);
1380
1381 if (c_endptr == c_str)
1382 result = SCM_BOOL_F;
1383 else
1384 result = scm_from_long (c_result);
1385
1386 return (scm_values (scm_list_2 (result, scm_from_long (c_endptr - c_str))));
1387 }
1388 #undef FUNC_NAME
1389
1390 SCM_DEFINE (scm_locale_string_to_inexact, "locale-string->inexact",
1391 1, 1, 0, (SCM str, SCM locale),
1392 "Convert string @var{str} into an inexact number according to "
1393 "either @var{locale} (a locale object as returned by "
1394 "@code{make-locale}) or the current process locale. Return "
1395 "two values: an inexact number (on success) or @code{#f}, and "
1396 "the number of characters read from @var{str} (@code{0} on "
1397 "failure).")
1398 #define FUNC_NAME s_scm_locale_string_to_inexact
1399 {
1400 SCM result;
1401 double c_result;
1402 const char *c_str;
1403 char *c_endptr;
1404 scm_t_locale c_locale;
1405
1406 SCM_VALIDATE_STRING (1, str);
1407 c_str = scm_i_string_chars (str);
1408
1409 SCM_VALIDATE_OPTIONAL_LOCALE_COPY (2, locale, c_locale);
1410
1411 if (c_locale != NULL)
1412 {
1413 #ifdef USE_GNU_LOCALE_API
1414 c_result = strtod_l (c_str, &c_endptr, c_locale);
1415 #else
1416 RUN_IN_LOCALE_SECTION (c_locale,
1417 c_result = strtod (c_str, &c_endptr));
1418 #endif
1419 }
1420 else
1421 c_result = strtod (c_str, &c_endptr);
1422
1423 scm_remember_upto_here (str);
1424
1425 if (c_endptr == c_str)
1426 result = SCM_BOOL_F;
1427 else
1428 result = scm_from_double (c_result);
1429
1430 return (scm_values (scm_list_2 (result, scm_from_long (c_endptr - c_str))));
1431 }
1432 #undef FUNC_NAME
1433
1434 \f
1435 /* Language information, aka. `nl_langinfo ()'. */
1436
1437 /* FIXME: Issues related to `nl-langinfo'.
1438
1439 1. The `CODESET' value is not normalized. This is a secondary issue, but
1440 still a practical issue. See
1441 http://www.cl.cam.ac.uk/~mgk25/ucs/norm_charmap.c for codeset
1442 normalization.
1443
1444 2. `nl_langinfo ()' is not available on Windows.
1445
1446 3. `nl_langinfo ()' may return strings encoded in a locale different from
1447 the current one.
1448 For example:
1449
1450 (nl-langinfo DAY_1 (make-locale LC_ALL "eo_EO.UTF-8"))
1451
1452 returns a result that is a UTF-8 string, regardless of the
1453 setting of the current locale. If nl_langinfo supports CODESET,
1454 we can convert the string properly using scm_from_stringn. If
1455 CODESET is not supported, we won't be able to make much sense of
1456 the returned string. */
1457
1458
1459 SCM_DEFINE (scm_nl_langinfo, "nl-langinfo", 1, 1, 0,
1460 (SCM item, SCM locale),
1461 "Return a string denoting locale information for @var{item} "
1462 "in the current locale or that specified by @var{locale}. "
1463 "The semantics and arguments are the same as those of the "
1464 "X/Open @code{nl_langinfo} function (@pxref{The Elegant and "
1465 "Fast Way, @code{nl_langinfo},, libc, The GNU C Library "
1466 "Reference Manual}).")
1467 #define FUNC_NAME s_scm_nl_langinfo
1468 {
1469 #ifdef HAVE_NL_LANGINFO
1470 SCM result;
1471 nl_item c_item;
1472 char *c_result;
1473 scm_t_locale c_locale;
1474 #ifdef HAVE_LANGINFO_CODESET
1475 char *codeset;
1476 #endif
1477
1478 SCM_VALIDATE_INT_COPY (2, item, c_item);
1479 SCM_VALIDATE_OPTIONAL_LOCALE_COPY (2, locale, c_locale);
1480
1481 /* Sadly, `nl_langinfo ()' returns a pointer to a static string. According
1482 to SuS v2, that static string may be modified by subsequent calls to
1483 `nl_langinfo ()' as well as by calls to `setlocale ()'. Thus, we must
1484 acquire the locale mutex before doing invoking `nl_langinfo ()'. See
1485 http://opengroup.org/onlinepubs/007908799/xsh/nl_langinfo.html for
1486 details. */
1487
1488 scm_i_pthread_mutex_lock (&scm_i_locale_mutex);
1489 if (c_locale != NULL)
1490 {
1491 #ifdef USE_GNU_LOCALE_API
1492 c_result = nl_langinfo_l (c_item, c_locale);
1493 #ifdef HAVE_LANGINFO_CODESET
1494 codeset = nl_langinfo_l (CODESET, c_locale);
1495 #endif /* HAVE_LANGINFO_CODESET */
1496 #else /* !USE_GNU_LOCALE_API */
1497 /* We can't use `RUN_IN_LOCALE_SECTION ()' here because the locale
1498 mutex is already taken. */
1499 int lsec_err;
1500 scm_t_locale_settings lsec_prev_locale;
1501
1502 lsec_err = get_current_locale_settings (&lsec_prev_locale);
1503 if (lsec_err)
1504 scm_i_pthread_mutex_unlock (&scm_i_locale_mutex);
1505 else
1506 {
1507 lsec_err = install_locale (c_locale);
1508 if (lsec_err)
1509 {
1510 leave_locale_section (&lsec_prev_locale);
1511 free_locale_settings (&lsec_prev_locale);
1512 }
1513 }
1514
1515 if (lsec_err)
1516 scm_locale_error (FUNC_NAME, lsec_err);
1517 else
1518 {
1519 c_result = nl_langinfo (c_item);
1520 #ifdef HAVE_LANGINFO_CODESET
1521 codeset = nl_langinfo (CODESET);
1522 #endif /* HAVE_LANGINFO_CODESET */
1523
1524 restore_locale_settings (&lsec_prev_locale);
1525 free_locale_settings (&lsec_prev_locale);
1526 }
1527 #endif
1528 }
1529 else
1530 {
1531 c_result = nl_langinfo (c_item);
1532 #ifdef HAVE_LANGINFO_CODESET
1533 codeset = nl_langinfo (CODESET);
1534 #endif /* HAVE_LANGINFO_CODESET */
1535 }
1536
1537 c_result = strdup (c_result);
1538 scm_i_pthread_mutex_unlock (&scm_i_locale_mutex);
1539
1540 if (c_result == NULL)
1541 result = SCM_BOOL_F;
1542 else
1543 {
1544 switch (c_item)
1545 {
1546 #if (defined GROUPING) && (defined MON_GROUPING)
1547 case GROUPING:
1548 case MON_GROUPING:
1549 {
1550 char *p;
1551
1552 /* In this cases, the result is to be interpreted as a list of
1553 numbers. If the last item is `CHARS_MAX', it has the special
1554 meaning "no more grouping". */
1555 result = SCM_EOL;
1556 for (p = c_result; (*p != '\0') && (*p != CHAR_MAX); p++)
1557 result = scm_cons (SCM_I_MAKINUM ((int) *p), result);
1558
1559 {
1560 SCM last_pair = result;
1561
1562 result = scm_reverse_x (result, SCM_EOL);
1563
1564 if (*p != CHAR_MAX)
1565 {
1566 /* Cyclic grouping information. */
1567 if (last_pair != SCM_EOL)
1568 SCM_SETCDR (last_pair, result);
1569 }
1570 }
1571
1572 free (c_result);
1573 break;
1574 }
1575 #endif
1576
1577 #if (defined FRAC_DIGITS) && (defined INT_FRAC_DIGITS)
1578 case FRAC_DIGITS:
1579 case INT_FRAC_DIGITS:
1580 /* This is to be interpreted as a single integer. */
1581 if (*c_result == CHAR_MAX)
1582 /* Unspecified. */
1583 result = SCM_BOOL_F;
1584 else
1585 result = SCM_I_MAKINUM (*c_result);
1586
1587 free (c_result);
1588 break;
1589 #endif
1590
1591 #if (defined P_CS_PRECEDES) && (defined INT_N_CS_PRECEDES)
1592 case P_CS_PRECEDES:
1593 case N_CS_PRECEDES:
1594 case INT_P_CS_PRECEDES:
1595 case INT_N_CS_PRECEDES:
1596 #if (defined P_SEP_BY_SPACE) && (defined N_SEP_BY_SPACE)
1597 case P_SEP_BY_SPACE:
1598 case N_SEP_BY_SPACE:
1599 #endif
1600 /* This is to be interpreted as a boolean. */
1601 result = scm_from_bool (*c_result);
1602
1603 free (c_result);
1604 break;
1605 #endif
1606
1607 #if (defined P_SIGN_POSN) && (defined INT_N_SIGN_POSN)
1608 case P_SIGN_POSN:
1609 case N_SIGN_POSN:
1610 case INT_P_SIGN_POSN:
1611 case INT_N_SIGN_POSN:
1612 /* See `(libc) Sign of Money Amount' for the interpretation of the
1613 return value here. */
1614 switch (*c_result)
1615 {
1616 case 0:
1617 result = scm_from_locale_symbol ("parenthesize");
1618 break;
1619
1620 case 1:
1621 result = scm_from_locale_symbol ("sign-before");
1622 break;
1623
1624 case 2:
1625 result = scm_from_locale_symbol ("sign-after");
1626 break;
1627
1628 case 3:
1629 result = scm_from_locale_symbol ("sign-before-currency-symbol");
1630 break;
1631
1632 case 4:
1633 result = scm_from_locale_symbol ("sign-after-currency-symbol");
1634 break;
1635
1636 default:
1637 result = scm_from_locale_symbol ("unspecified");
1638 }
1639 break;
1640 #endif
1641
1642 default:
1643 #ifdef HAVE_LANGINFO_CODESET
1644 result = scm_from_stringn (c_result, strlen (c_result),
1645 codeset,
1646 SCM_FAILED_CONVERSION_QUESTION_MARK);
1647 #else /* !HAVE_LANGINFO_CODESET */
1648 /* This may be incorrectly encoded if the locale differs
1649 from the c_locale. */
1650 result = scm_from_locale_string (c_result);
1651 #endif /* !HAVE_LANGINFO_CODESET */
1652 free (c_result);
1653 }
1654 }
1655
1656 return result;
1657 #else
1658 scm_syserror_msg (FUNC_NAME, "`nl-langinfo' not supported on your system",
1659 SCM_EOL, ENOSYS);
1660
1661 return SCM_BOOL_F;
1662 #endif
1663 }
1664 #undef FUNC_NAME
1665
1666 /* Define the `nl_item' constants. */
1667 static inline void
1668 define_langinfo_items (void)
1669 {
1670 #if (defined HAVE_NL_TYPES_H) && (defined HAVE_LANGINFO_H)
1671
1672 #define DEFINE_NLITEM_CONSTANT(_item) \
1673 scm_c_define (# _item, scm_from_int (_item))
1674
1675 DEFINE_NLITEM_CONSTANT (CODESET);
1676
1677 /* Abbreviated days of the week. */
1678 DEFINE_NLITEM_CONSTANT (ABDAY_1);
1679 DEFINE_NLITEM_CONSTANT (ABDAY_2);
1680 DEFINE_NLITEM_CONSTANT (ABDAY_3);
1681 DEFINE_NLITEM_CONSTANT (ABDAY_4);
1682 DEFINE_NLITEM_CONSTANT (ABDAY_5);
1683 DEFINE_NLITEM_CONSTANT (ABDAY_6);
1684 DEFINE_NLITEM_CONSTANT (ABDAY_7);
1685
1686 /* Long-named days of the week. */
1687 DEFINE_NLITEM_CONSTANT (DAY_1); /* Sunday */
1688 DEFINE_NLITEM_CONSTANT (DAY_2); /* Monday */
1689 DEFINE_NLITEM_CONSTANT (DAY_3); /* Tuesday */
1690 DEFINE_NLITEM_CONSTANT (DAY_4); /* Wednesday */
1691 DEFINE_NLITEM_CONSTANT (DAY_5); /* Thursday */
1692 DEFINE_NLITEM_CONSTANT (DAY_6); /* Friday */
1693 DEFINE_NLITEM_CONSTANT (DAY_7); /* Saturday */
1694
1695 /* Abbreviated month names. */
1696 DEFINE_NLITEM_CONSTANT (ABMON_1); /* Jan */
1697 DEFINE_NLITEM_CONSTANT (ABMON_2);
1698 DEFINE_NLITEM_CONSTANT (ABMON_3);
1699 DEFINE_NLITEM_CONSTANT (ABMON_4);
1700 DEFINE_NLITEM_CONSTANT (ABMON_5);
1701 DEFINE_NLITEM_CONSTANT (ABMON_6);
1702 DEFINE_NLITEM_CONSTANT (ABMON_7);
1703 DEFINE_NLITEM_CONSTANT (ABMON_8);
1704 DEFINE_NLITEM_CONSTANT (ABMON_9);
1705 DEFINE_NLITEM_CONSTANT (ABMON_10);
1706 DEFINE_NLITEM_CONSTANT (ABMON_11);
1707 DEFINE_NLITEM_CONSTANT (ABMON_12);
1708
1709 /* Long month names. */
1710 DEFINE_NLITEM_CONSTANT (MON_1); /* January */
1711 DEFINE_NLITEM_CONSTANT (MON_2);
1712 DEFINE_NLITEM_CONSTANT (MON_3);
1713 DEFINE_NLITEM_CONSTANT (MON_4);
1714 DEFINE_NLITEM_CONSTANT (MON_5);
1715 DEFINE_NLITEM_CONSTANT (MON_6);
1716 DEFINE_NLITEM_CONSTANT (MON_7);
1717 DEFINE_NLITEM_CONSTANT (MON_8);
1718 DEFINE_NLITEM_CONSTANT (MON_9);
1719 DEFINE_NLITEM_CONSTANT (MON_10);
1720 DEFINE_NLITEM_CONSTANT (MON_11);
1721 DEFINE_NLITEM_CONSTANT (MON_12);
1722
1723 DEFINE_NLITEM_CONSTANT (AM_STR); /* Ante meridiem string. */
1724 DEFINE_NLITEM_CONSTANT (PM_STR); /* Post meridiem string. */
1725
1726 DEFINE_NLITEM_CONSTANT (D_T_FMT); /* Date and time format for strftime. */
1727 DEFINE_NLITEM_CONSTANT (D_FMT); /* Date format for strftime. */
1728 DEFINE_NLITEM_CONSTANT (T_FMT); /* Time format for strftime. */
1729 DEFINE_NLITEM_CONSTANT (T_FMT_AMPM);/* 12-hour time format for strftime. */
1730
1731 DEFINE_NLITEM_CONSTANT (ERA); /* Alternate era. */
1732 DEFINE_NLITEM_CONSTANT (ERA_D_FMT); /* Date in alternate era format. */
1733 DEFINE_NLITEM_CONSTANT (ERA_D_T_FMT); /* Date and time in alternate era
1734 format. */
1735 DEFINE_NLITEM_CONSTANT (ERA_T_FMT); /* Time in alternate era format. */
1736
1737 DEFINE_NLITEM_CONSTANT (ALT_DIGITS); /* Alternate symbols for digits. */
1738 DEFINE_NLITEM_CONSTANT (RADIXCHAR);
1739 DEFINE_NLITEM_CONSTANT (THOUSEP);
1740
1741 #ifdef YESEXPR
1742 DEFINE_NLITEM_CONSTANT (YESEXPR);
1743 #endif
1744 #ifdef NOEXPR
1745 DEFINE_NLITEM_CONSTANT (NOEXPR);
1746 #endif
1747
1748 #ifdef CRNCYSTR /* currency symbol */
1749 DEFINE_NLITEM_CONSTANT (CRNCYSTR);
1750 #endif
1751
1752 /* GNU extensions. */
1753
1754 #ifdef ERA_YEAR
1755 DEFINE_NLITEM_CONSTANT (ERA_YEAR); /* Year in alternate era format. */
1756 #endif
1757
1758 /* LC_MONETARY category: formatting of monetary quantities.
1759 These items each correspond to a member of `struct lconv',
1760 defined in <locale.h>. */
1761 #ifdef INT_CURR_SYMBOL
1762 DEFINE_NLITEM_CONSTANT (INT_CURR_SYMBOL);
1763 #endif
1764 #ifdef MON_DECIMAL_POINT
1765 DEFINE_NLITEM_CONSTANT (MON_DECIMAL_POINT);
1766 #endif
1767 #ifdef MON_THOUSANDS_SEP
1768 DEFINE_NLITEM_CONSTANT (MON_THOUSANDS_SEP);
1769 #endif
1770 #ifdef MON_GROUPING
1771 DEFINE_NLITEM_CONSTANT (MON_GROUPING);
1772 #endif
1773 #ifdef POSITIVE_SIGN
1774 DEFINE_NLITEM_CONSTANT (POSITIVE_SIGN);
1775 #endif
1776 #ifdef NEGATIVE_SIGN
1777 DEFINE_NLITEM_CONSTANT (NEGATIVE_SIGN);
1778 #endif
1779 #ifdef GROUPING
1780 DEFINE_NLITEM_CONSTANT (GROUPING);
1781 #endif
1782 #ifdef INT_FRAC_DIGITS
1783 DEFINE_NLITEM_CONSTANT (INT_FRAC_DIGITS);
1784 #endif
1785 #ifdef FRAC_DIGITS
1786 DEFINE_NLITEM_CONSTANT (FRAC_DIGITS);
1787 #endif
1788 #ifdef P_CS_PRECEDES
1789 DEFINE_NLITEM_CONSTANT (P_CS_PRECEDES);
1790 #endif
1791 #ifdef P_SEP_BY_SPACE
1792 DEFINE_NLITEM_CONSTANT (P_SEP_BY_SPACE);
1793 #endif
1794 #ifdef N_CS_PRECEDES
1795 DEFINE_NLITEM_CONSTANT (N_CS_PRECEDES);
1796 #endif
1797 #ifdef N_SEP_BY_SPACE
1798 DEFINE_NLITEM_CONSTANT (N_SEP_BY_SPACE);
1799 #endif
1800 #ifdef P_SIGN_POSN
1801 DEFINE_NLITEM_CONSTANT (P_SIGN_POSN);
1802 #endif
1803 #ifdef N_SIGN_POSN
1804 DEFINE_NLITEM_CONSTANT (N_SIGN_POSN);
1805 #endif
1806 #ifdef INT_P_CS_PRECEDES
1807 DEFINE_NLITEM_CONSTANT (INT_P_CS_PRECEDES);
1808 #endif
1809 #ifdef INT_P_SEP_BY_SPACE
1810 DEFINE_NLITEM_CONSTANT (INT_P_SEP_BY_SPACE);
1811 #endif
1812 #ifdef INT_N_CS_PRECEDES
1813 DEFINE_NLITEM_CONSTANT (INT_N_CS_PRECEDES);
1814 #endif
1815 #ifdef INT_N_SEP_BY_SPACE
1816 DEFINE_NLITEM_CONSTANT (INT_N_SEP_BY_SPACE);
1817 #endif
1818 #ifdef INT_P_SIGN_POSN
1819 DEFINE_NLITEM_CONSTANT (INT_P_SIGN_POSN);
1820 #endif
1821 #ifdef INT_N_SIGN_POSN
1822 DEFINE_NLITEM_CONSTANT (INT_N_SIGN_POSN);
1823 #endif
1824
1825 #undef DEFINE_NLITEM_CONSTANT
1826
1827 #endif /* HAVE_NL_TYPES_H */
1828 }
1829
1830 \f
1831 void
1832 scm_init_i18n ()
1833 {
1834 SCM global_locale_smob;
1835
1836 #ifdef HAVE_NL_LANGINFO
1837 scm_add_feature ("nl-langinfo");
1838 define_langinfo_items ();
1839 #endif
1840
1841 #include "libguile/i18n.x"
1842
1843 /* Initialize the global locale object with a special `locale' SMOB. */
1844 /* XXX: We don't define it as `LC_GLOBAL_LOCALE' because of bugs as of
1845 glibc <= 2.11 not (yet) worked around by Gnulib. See
1846 http://sourceware.org/bugzilla/show_bug.cgi?id=11009 for details. */
1847 SCM_NEWSMOB (global_locale_smob, scm_tc16_locale_smob_type, NULL);
1848 SCM_VARIABLE_SET (scm_global_locale, global_locale_smob);
1849 }
1850
1851 void
1852 scm_bootstrap_i18n ()
1853 {
1854 scm_c_register_extension ("libguile", "scm_init_i18n",
1855 (scm_t_extension_init_func) scm_init_i18n,
1856 NULL);
1857
1858 }
1859
1860
1861 /*
1862 Local Variables:
1863 c-file-style: "gnu"
1864 End:
1865 */