Update comment about uc_locale_language.
[bpt/guile.git] / libguile / i18n.c
1 /* Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
2 *
3 * This library is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU Lesser General Public License
5 * as published by the Free Software Foundation; either version 3 of
6 * the License, or (at your option) any later version.
7 *
8 * This library is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
12 *
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301 USA
17 */
18
19 #ifdef HAVE_CONFIG_H
20 # include <config.h>
21 #endif
22
23 #include <alloca.h>
24
25 #include "libguile/_scm.h"
26 #include "libguile/extensions.h"
27 #include "libguile/feature.h"
28 #include "libguile/i18n.h"
29 #include "libguile/strings.h"
30 #include "libguile/chars.h"
31 #include "libguile/dynwind.h"
32 #include "libguile/validate.h"
33 #include "libguile/values.h"
34 #include "libguile/threads.h"
35
36 #include <locale.h>
37 #include <string.h> /* `strcoll ()' */
38 #include <ctype.h> /* `toupper ()' et al. */
39 #include <errno.h>
40 #include <unicase.h>
41 #include <unistr.h>
42
43 #if (defined HAVE_NEWLOCALE) && (defined HAVE_STRCOLL_L)
44 /* The GNU thread-aware locale API is documented in ``Thread-Aware Locale
45 Model, a Proposal'', by Ulrich Drepper:
46
47 http://people.redhat.com/drepper/tllocale.ps.gz
48
49 It is now also implemented by Darwin:
50
51 http://developer.apple.com/documentation/Darwin/Reference/ManPages/man3/newlocale.3.html
52
53 The whole API was eventually standardized in the ``Open Group Base
54 Specifications Issue 7'' (aka. "POSIX 2008"):
55
56 http://www.opengroup.org/onlinepubs/9699919799/basedefs/locale.h.html */
57 # define USE_GNU_LOCALE_API
58 #endif
59
60 #include "libguile/posix.h" /* for `scm_i_locale_mutex' */
61
62 #ifdef HAVE_LANGINFO_H
63 # include <langinfo.h>
64 #endif
65 #ifdef HAVE_NL_TYPES_H
66 # include <nl_types.h>
67 #endif
68 #ifndef HAVE_NL_ITEM
69 /* Cygwin has <langinfo.h> but lacks <nl_types.h> and `nl_item'. */
70 typedef int nl_item;
71 #endif
72
73 #ifndef HAVE_SETLOCALE
74 static inline char *
75 setlocale (int category, const char *name)
76 {
77 errno = ENOSYS;
78 return NULL;
79 }
80 #endif
81
82 /* Helper stringification macro. */
83 #define SCM_I18N_STRINGIFY(_name) # _name
84
85
86 \f
87 /* Locale objects, string and character collation, and other locale-dependent
88 string operations.
89
90 A large part of the code here deals with emulating glibc's reentrant
91 locale API on non-GNU systems. The emulation is a bit "brute-force":
92 Whenever a `-locale<?' procedure is passed a locale object, then:
93
94 1. The `scm_i_locale_mutex' is locked.
95 2. A series of `setlocale ()' call is performed to store the current
96 locale for each category in an `scm_t_locale' object.
97 3. A series of `setlocale ()' call is made to install each of the locale
98 categories of each of the base locales of each locale object,
99 recursively, starting from the last locale object of the chain.
100 4. The settings captured in step (2) are restored.
101 5. The `scm_i_locale_mutex' is released.
102
103 Hopefully, the X/Open standard will eventually make this hack useless.
104
105 Note: We don't wrap glibc's `uselocale ()' call because it sets the locale
106 of the current _thread_ (unlike `setlocale ()') and doing so would require
107 maintaining per-thread locale information on non-GNU systems and always
108 re-installing this locale upon locale-dependent calls. */
109
110
111 /* Return the category mask corresponding to CAT. */
112 #define SCM_LOCALE_CATEGORY_MASK(_cat) LC_ ## _cat ## _MASK
113
114
115 #ifndef USE_GNU_LOCALE_API
116
117 /* Provide the locale category masks as found in glibc. This must be kept in
118 sync with `locale-categories.h'. */
119
120 # define LC_CTYPE_MASK 1
121 # define LC_COLLATE_MASK 2
122 # define LC_MESSAGES_MASK 4
123 # define LC_MONETARY_MASK 8
124 # define LC_NUMERIC_MASK 16
125 # define LC_TIME_MASK 32
126
127 # ifdef LC_PAPER
128 # define LC_PAPER_MASK 64
129 # else
130 # define LC_PAPER_MASK 0
131 # endif
132 # ifdef LC_NAME
133 # define LC_NAME_MASK 128
134 # else
135 # define LC_NAME_MASK 0
136 # endif
137 # ifdef LC_ADDRESS
138 # define LC_ADDRESS_MASK 256
139 # else
140 # define LC_ADDRESS_MASK 0
141 # endif
142 # ifdef LC_TELEPHONE
143 # define LC_TELEPHONE_MASK 512
144 # else
145 # define LC_TELEPHONE_MASK 0
146 # endif
147 # ifdef LC_MEASUREMENT
148 # define LC_MEASUREMENT_MASK 1024
149 # else
150 # define LC_MEASUREMENT_MASK 0
151 # endif
152 # ifdef LC_IDENTIFICATION
153 # define LC_IDENTIFICATION_MASK 2048
154 # else
155 # define LC_IDENTIFICATION_MASK 0
156 # endif
157
158 # define LC_ALL_MASK (LC_CTYPE_MASK \
159 | LC_NUMERIC_MASK \
160 | LC_TIME_MASK \
161 | LC_COLLATE_MASK \
162 | LC_MONETARY_MASK \
163 | LC_MESSAGES_MASK \
164 | LC_PAPER_MASK \
165 | LC_NAME_MASK \
166 | LC_ADDRESS_MASK \
167 | LC_TELEPHONE_MASK \
168 | LC_MEASUREMENT_MASK \
169 | LC_IDENTIFICATION_MASK \
170 )
171
172 /* Locale objects as returned by `make-locale' on non-GNU systems. */
173 typedef struct scm_locale
174 {
175 SCM base_locale; /* a `locale' object */
176 char *locale_name;
177 int category_mask;
178 } *scm_t_locale;
179
180
181 /* Free the resources used by LOCALE. */
182 static inline void
183 scm_i_locale_free (scm_t_locale locale)
184 {
185 free (locale->locale_name);
186 locale->locale_name = NULL;
187 }
188
189 #else /* USE_GNU_LOCALE_API */
190
191 /* Alias for glibc's locale type. */
192 typedef locale_t scm_t_locale;
193
194 #define scm_i_locale_free freelocale
195
196 #endif /* USE_GNU_LOCALE_API */
197
198
199 /* A locale object denoting the global locale. */
200 SCM_GLOBAL_VARIABLE (scm_global_locale, "%global-locale");
201
202
203 /* Validate parameter ARG as a locale object and set C_LOCALE to the
204 corresponding C locale object. */
205 #define SCM_VALIDATE_LOCALE_COPY(_pos, _arg, _c_locale) \
206 do \
207 { \
208 SCM_VALIDATE_SMOB ((_pos), (_arg), locale_smob_type); \
209 (_c_locale) = (scm_t_locale)SCM_SMOB_DATA (_arg); \
210 } \
211 while (0)
212
213 /* Validate optional parameter ARG as either undefined or bound to a locale
214 object. Set C_LOCALE to the corresponding C locale object or NULL. */
215 #define SCM_VALIDATE_OPTIONAL_LOCALE_COPY(_pos, _arg, _c_locale) \
216 do \
217 { \
218 if ((_arg) != SCM_UNDEFINED) \
219 SCM_VALIDATE_LOCALE_COPY (_pos, _arg, _c_locale); \
220 else \
221 (_c_locale) = NULL; \
222 } \
223 while (0)
224
225
226 SCM_SMOB (scm_tc16_locale_smob_type, "locale", 0);
227
228 SCM_SMOB_FREE (scm_tc16_locale_smob_type, smob_locale_free, locale)
229 {
230 scm_t_locale c_locale;
231
232 c_locale = (scm_t_locale) SCM_SMOB_DATA (locale);
233 scm_i_locale_free (c_locale);
234
235 return 0;
236 }
237
238
239 static void inline scm_locale_error (const char *, int) SCM_NORETURN;
240
241 /* Throw an exception corresponding to error ERR. */
242 static void inline
243 scm_locale_error (const char *func_name, int err)
244 {
245 scm_syserror_msg (func_name,
246 "Failed to install locale",
247 SCM_EOL, err);
248 }
249
250
251 \f
252 /* Emulating GNU's reentrant locale API. */
253 #ifndef USE_GNU_LOCALE_API
254
255
256 /* Maximum number of chained locales (via `base_locale'). */
257 #define LOCALE_STACK_SIZE_MAX 256
258
259 typedef struct
260 {
261 #define SCM_DEFINE_LOCALE_CATEGORY(_name) char * _name;
262 #include "locale-categories.h"
263 #undef SCM_DEFINE_LOCALE_CATEGORY
264 } scm_t_locale_settings;
265
266 /* Fill out SETTINGS according to the current locale settings. On success
267 zero is returned and SETTINGS is properly initialized. */
268 static int
269 get_current_locale_settings (scm_t_locale_settings *settings)
270 {
271 const char *locale_name;
272
273 #define SCM_DEFINE_LOCALE_CATEGORY(_name) \
274 { \
275 SCM_SYSCALL (locale_name = setlocale (LC_ ## _name, NULL)); \
276 if (locale_name == NULL) \
277 goto handle_error; \
278 \
279 settings-> _name = strdup (locale_name); \
280 if (settings-> _name == NULL) \
281 goto handle_oom; \
282 }
283
284 #include "locale-categories.h"
285 #undef SCM_DEFINE_LOCALE_CATEGORY
286
287 return 0;
288
289 handle_error:
290 return EINVAL;
291
292 handle_oom:
293 return ENOMEM;
294 }
295
296 /* Restore locale settings SETTINGS. On success, return zero. */
297 static int
298 restore_locale_settings (const scm_t_locale_settings *settings)
299 {
300 const char *result;
301
302 #define SCM_DEFINE_LOCALE_CATEGORY(_name) \
303 SCM_SYSCALL (result = setlocale (LC_ ## _name, settings-> _name)); \
304 if (result == NULL) \
305 goto handle_error;
306
307 #include "locale-categories.h"
308 #undef SCM_DEFINE_LOCALE_CATEGORY
309
310 return 0;
311
312 handle_error:
313 return EINVAL;
314 }
315
316 /* Free memory associated with SETTINGS. */
317 static void
318 free_locale_settings (scm_t_locale_settings *settings)
319 {
320 #define SCM_DEFINE_LOCALE_CATEGORY(_name) \
321 free (settings-> _name); \
322 settings->_name = NULL;
323 #include "locale-categories.h"
324 #undef SCM_DEFINE_LOCALE_CATEGORY
325 }
326
327 /* Install the locale named LOCALE_NAME for all the categories listed in
328 CATEGORY_MASK. */
329 static int
330 install_locale_categories (const char *locale_name, int category_mask)
331 {
332 const char *result;
333
334 if (category_mask == LC_ALL_MASK)
335 {
336 SCM_SYSCALL (result = setlocale (LC_ALL, locale_name));
337 if (result == NULL)
338 goto handle_error;
339 }
340 else
341 {
342 #define SCM_DEFINE_LOCALE_CATEGORY(_name) \
343 if (category_mask & SCM_LOCALE_CATEGORY_MASK (_name)) \
344 { \
345 SCM_SYSCALL (result = setlocale (LC_ ## _name, locale_name)); \
346 if (result == NULL) \
347 goto handle_error; \
348 }
349 #include "locale-categories.h"
350 #undef SCM_DEFINE_LOCALE_CATEGORY
351 }
352
353 return 0;
354
355 handle_error:
356 return EINVAL;
357 }
358
359 /* Install LOCALE, recursively installing its base locales first. On
360 success, zero is returned. */
361 static int
362 install_locale (scm_t_locale locale)
363 {
364 scm_t_locale stack[LOCALE_STACK_SIZE_MAX];
365 int category_mask = 0;
366 size_t stack_size = 0;
367 int stack_offset = 0;
368 const char *result = NULL;
369
370 /* Build up a locale stack by traversing the `base_locale' link. */
371 do
372 {
373 if (stack_size >= LOCALE_STACK_SIZE_MAX)
374 /* We cannot use `scm_error ()' here because otherwise the locale
375 mutex may remain locked. */
376 return EINVAL;
377
378 stack[stack_size++] = locale;
379
380 /* Keep track of which categories have already been taken into
381 account. */
382 category_mask |= locale->category_mask;
383
384 if (locale->base_locale != SCM_UNDEFINED)
385 locale = (scm_t_locale) SCM_SMOB_DATA (locale->base_locale);
386 else
387 locale = NULL;
388 }
389 while ((locale != NULL) && (category_mask != LC_ALL_MASK));
390
391 /* Install the C locale to start from a pristine state. */
392 SCM_SYSCALL (result = setlocale (LC_ALL, "C"));
393 if (result == NULL)
394 goto handle_error;
395
396 /* Install the locales in reverse order. */
397 for (stack_offset = stack_size - 1;
398 stack_offset >= 0;
399 stack_offset--)
400 {
401 int err;
402 scm_t_locale locale;
403
404 locale = stack[stack_offset];
405 err = install_locale_categories (locale->locale_name,
406 locale->category_mask);
407 if (err)
408 goto handle_error;
409 }
410
411 return 0;
412
413 handle_error:
414 return EINVAL;
415 }
416
417 /* Leave the locked locale section. */
418 static inline void
419 leave_locale_section (const scm_t_locale_settings *settings)
420 {
421 /* Restore the previous locale settings. */
422 (void)restore_locale_settings (settings);
423
424 scm_i_pthread_mutex_unlock (&scm_i_locale_mutex);
425 }
426
427 /* Enter a locked locale section. */
428 static inline int
429 enter_locale_section (scm_t_locale locale,
430 scm_t_locale_settings *prev_locale)
431 {
432 int err;
433
434 scm_i_pthread_mutex_lock (&scm_i_locale_mutex);
435
436 err = get_current_locale_settings (prev_locale);
437 if (err)
438 {
439 scm_i_pthread_mutex_unlock (&scm_i_locale_mutex);
440 return err;
441 }
442
443 err = install_locale (locale);
444 if (err)
445 {
446 leave_locale_section (prev_locale);
447 free_locale_settings (prev_locale);
448 }
449
450 return err;
451 }
452
453 /* Convenient macro to run STATEMENT in the locale context of C_LOCALE. */
454 #define RUN_IN_LOCALE_SECTION(_c_locale, _statement) \
455 do \
456 { \
457 int lsec_err; \
458 scm_t_locale_settings lsec_prev_locale; \
459 \
460 lsec_err = enter_locale_section ((_c_locale), &lsec_prev_locale); \
461 if (lsec_err) \
462 scm_locale_error (FUNC_NAME, lsec_err); \
463 else \
464 { \
465 _statement ; \
466 \
467 leave_locale_section (&lsec_prev_locale); \
468 free_locale_settings (&lsec_prev_locale); \
469 } \
470 } \
471 while (0)
472
473 /* Convert the current locale settings into a locale SMOB. On success, zero
474 is returned and RESULT points to the new SMOB. Otherwise, an error is
475 returned. */
476 static int
477 get_current_locale (SCM *result)
478 {
479 int err = 0;
480 scm_t_locale c_locale;
481 const char *current_locale;
482
483 c_locale = scm_gc_malloc (sizeof (* c_locale), "locale");
484
485
486 scm_i_pthread_mutex_lock (&scm_i_locale_mutex);
487
488 c_locale->category_mask = LC_ALL_MASK;
489 c_locale->base_locale = SCM_UNDEFINED;
490
491 current_locale = setlocale (LC_ALL, NULL);
492 if (current_locale != NULL)
493 {
494 c_locale->locale_name = strdup (current_locale);
495 if (c_locale->locale_name == NULL)
496 err = ENOMEM;
497 }
498 else
499 err = EINVAL;
500
501 scm_i_pthread_mutex_unlock (&scm_i_locale_mutex);
502
503 if (err)
504 scm_gc_free (c_locale, sizeof (* c_locale), "locale");
505 else
506 SCM_NEWSMOB (*result, scm_tc16_locale_smob_type, c_locale);
507
508 return err;
509 }
510
511 #else /* USE_GNU_LOCALE_API */
512
513 /* Convenient macro to run STATEMENT in the locale context of C_LOCALE. */
514 #define RUN_IN_LOCALE_SECTION(_c_locale, _statement) \
515 do \
516 { \
517 scm_t_locale old_loc; \
518 \
519 old_loc = uselocale (_c_locale); \
520 _statement ; \
521 uselocale (old_loc); \
522 } \
523 while (0)
524
525
526 #endif /* USE_GNU_LOCALE_API */
527
528
529 \f
530 /* `make-locale' can take either category lists or single categories (the
531 `LC_*' integer constants). */
532 #define SCM_LIST_OR_INTEGER_P(arg) \
533 (scm_is_integer (arg) || scm_is_true (scm_list_p (arg)))
534
535
536 /* Return the category mask corresponding to CATEGORY (an `LC_' integer
537 constant). */
538 static inline int
539 category_to_category_mask (SCM category,
540 const char *func_name, int pos)
541 {
542 int c_category;
543 int c_category_mask;
544
545 c_category = scm_to_int (category);
546
547 #define SCM_DEFINE_LOCALE_CATEGORY(_name) \
548 case LC_ ## _name: \
549 c_category_mask = SCM_LOCALE_CATEGORY_MASK (_name); \
550 break;
551
552 switch (c_category)
553 {
554 #include "locale-categories.h"
555
556 case LC_ALL:
557 c_category_mask = LC_ALL_MASK;
558 break;
559
560 default:
561 scm_wrong_type_arg_msg (func_name, pos, category,
562 "locale category");
563 }
564
565 #undef SCM_DEFINE_LOCALE_CATEGORY
566
567 return c_category_mask;
568 }
569
570 /* Convert CATEGORIES, a list of locale categories or a single category (an
571 integer), into a category mask. */
572 static int
573 category_list_to_category_mask (SCM categories,
574 const char *func_name, int pos)
575 {
576 int c_category_mask = 0;
577
578 if (scm_is_integer (categories))
579 c_category_mask = category_to_category_mask (categories,
580 func_name, pos);
581 else
582 for (; !scm_is_null (categories); categories = SCM_CDR (categories))
583 {
584 SCM category = SCM_CAR (categories);
585
586 c_category_mask |=
587 category_to_category_mask (category, func_name, pos);
588 }
589
590 return c_category_mask;
591 }
592
593
594 SCM_DEFINE (scm_make_locale, "make-locale", 2, 1, 0,
595 (SCM category_list, SCM locale_name, SCM base_locale),
596 "Return a reference to a data structure representing a set of "
597 "locale datasets. @var{category_list} should be either a list "
598 "of locale categories or a single category as used with "
599 "@code{setlocale} (@pxref{Locales, @code{setlocale}}) and "
600 "@var{locale_name} should be the name of the locale considered "
601 "(e.g., @code{\"sl_SI\"}). Optionally, if @code{base_locale} is "
602 "passed, it should be a locale object denoting settings for "
603 "categories not listed in @var{category_list}.")
604 #define FUNC_NAME s_scm_make_locale
605 {
606 SCM locale = SCM_BOOL_F;
607 int err = 0;
608 int c_category_mask;
609 char *c_locale_name;
610 scm_t_locale c_base_locale, c_locale;
611
612 SCM_MAKE_VALIDATE (1, category_list, LIST_OR_INTEGER_P);
613 SCM_VALIDATE_STRING (2, locale_name);
614 SCM_VALIDATE_OPTIONAL_LOCALE_COPY (3, base_locale, c_base_locale);
615
616 c_category_mask = category_list_to_category_mask (category_list,
617 FUNC_NAME, 1);
618 c_locale_name = scm_to_locale_string (locale_name);
619
620 #ifdef USE_GNU_LOCALE_API
621
622 if (scm_is_eq (base_locale, SCM_VARIABLE_REF (scm_global_locale)))
623 c_base_locale = LC_GLOBAL_LOCALE;
624
625 if (c_base_locale != (locale_t) 0)
626 {
627 /* C_BASE_LOCALE is to be consumed by `newlocale ()' so it needs to be
628 duplicated before. */
629 c_base_locale = duplocale (c_base_locale);
630
631 if (c_base_locale == (locale_t) 0)
632 {
633 err = errno;
634 goto fail;
635 }
636 }
637
638 c_locale = newlocale (c_category_mask, c_locale_name, c_base_locale);
639
640 free (c_locale_name);
641
642 if (c_locale == (locale_t) 0)
643 {
644 if (c_base_locale != (locale_t) 0)
645 freelocale (c_base_locale);
646 scm_locale_error (FUNC_NAME, errno);
647 }
648 else
649 SCM_NEWSMOB (locale, scm_tc16_locale_smob_type, c_locale);
650
651 #else
652
653 c_locale = scm_gc_malloc (sizeof (* c_locale), "locale");
654
655 c_locale->category_mask = c_category_mask;
656 c_locale->locale_name = c_locale_name;
657
658 if (scm_is_eq (base_locale, SCM_VARIABLE_REF (scm_global_locale)))
659 {
660 /* Get the current locale settings and turn them into a locale
661 object. */
662 err = get_current_locale (&base_locale);
663 if (err)
664 goto fail;
665 }
666
667 c_locale->base_locale = base_locale;
668
669 {
670 /* Try out the new locale and raise an exception if it doesn't work. */
671 int err;
672 scm_t_locale_settings prev_locale;
673
674 err = enter_locale_section (c_locale, &prev_locale);
675
676 if (err)
677 goto fail;
678 else
679 {
680 leave_locale_section (&prev_locale);
681 SCM_NEWSMOB (locale, scm_tc16_locale_smob_type, c_locale);
682 }
683 }
684
685 #endif
686
687 return locale;
688
689 fail:
690 #ifndef USE_GNU_LOCALE_API
691 scm_gc_free (c_locale, sizeof (* c_locale), "locale");
692 #endif
693 free (c_locale_name);
694 scm_locale_error (FUNC_NAME, err);
695
696 return SCM_BOOL_F;
697 }
698 #undef FUNC_NAME
699
700 SCM_DEFINE (scm_locale_p, "locale?", 1, 0, 0,
701 (SCM obj),
702 "Return true if @var{obj} is a locale object.")
703 #define FUNC_NAME s_scm_locale_p
704 {
705 return scm_from_bool (SCM_SMOB_PREDICATE (scm_tc16_locale_smob_type, obj));
706 }
707 #undef FUNC_NAME
708
709
710 \f
711 /* Locale-dependent string comparison.
712
713 A similar API can be found in MzScheme starting from version 200:
714 http://download.plt-scheme.org/chronology/mzmr200alpha14.html . */
715
716 #define SCM_STRING_TO_U32_BUF(s1, c_s1) \
717 do \
718 { \
719 if (scm_i_is_narrow_string (s1)) \
720 { \
721 size_t i, len; \
722 const char *buf = scm_i_string_chars (s1); \
723 \
724 len = scm_i_string_length (s1); \
725 c_s1 = alloca (sizeof (scm_t_wchar) * (len + 1)); \
726 \
727 for (i = 0; i < len; i ++) \
728 c_s1[i] = (unsigned char ) buf[i]; \
729 c_s1[len] = 0; \
730 } \
731 else \
732 c_s1 = (scm_t_wchar *) scm_i_string_wide_chars (s1); \
733 } while (0)
734
735
736 /* Compare UTF-32 strings according to LOCALE. Returns a negative value if
737 S1 compares smaller than S2, a positive value if S1 compares larger than
738 S2, or 0 if they compare equal. */
739 static inline int
740 compare_u32_strings (SCM s1, SCM s2, SCM locale, const char *func_name)
741 #define FUNC_NAME func_name
742 {
743 int result;
744 scm_t_locale c_locale;
745 scm_t_wchar *c_s1, *c_s2;
746 SCM_VALIDATE_OPTIONAL_LOCALE_COPY (3, locale, c_locale);
747
748 SCM_STRING_TO_U32_BUF (s1, c_s1);
749 SCM_STRING_TO_U32_BUF (s2, c_s2);
750
751 if (c_locale)
752 RUN_IN_LOCALE_SECTION (c_locale,
753 result = u32_strcoll ((const scm_t_uint32 *) c_s1,
754 (const scm_t_uint32 *) c_s2));
755 else
756 result = u32_strcoll ((const scm_t_uint32 *) c_s1,
757 (const scm_t_uint32 *) c_s2);
758
759 scm_remember_upto_here_2 (s1, s2);
760 scm_remember_upto_here (locale);
761 return result;
762 }
763 #undef FUNC_NAME
764
765 /* Return the current language of the locale. */
766 static const char *
767 locale_language ()
768 {
769 /* Note: If the locale has been set with 'uselocale', uc_locale_language
770 from libunistring versions 0.9.1 and older will return the incorrect
771 (non-thread-specific) locale. This is fixed in versions 0.9.2 and
772 newer. */
773 return uc_locale_language ();
774 }
775
776 static inline int
777 u32_locale_casecoll (const char *func_name, const scm_t_uint32 *c_s1,
778 const scm_t_uint32 *c_s2,
779 int *result)
780 {
781 /* Note: Since this is called from `RUN_IN_LOCALE_SECTION', it must note
782 make any non-local exit. */
783
784 int ret;
785 const char *loc = locale_language ();
786
787 ret = u32_casecoll (c_s1, u32_strlen (c_s1),
788 c_s2, u32_strlen (c_s2),
789 loc, UNINORM_NFC, result);
790
791 return ret == 0 ? ret : errno;
792 }
793
794 static inline int
795 compare_u32_strings_ci (SCM s1, SCM s2, SCM locale, const char *func_name)
796 #define FUNC_NAME func_name
797 {
798 int result, ret = 0;
799 scm_t_locale c_locale;
800 scm_t_wchar *c_s1, *c_s2;
801 SCM_VALIDATE_OPTIONAL_LOCALE_COPY (3, locale, c_locale);
802
803 SCM_STRING_TO_U32_BUF (s1, c_s1);
804 SCM_STRING_TO_U32_BUF (s2, c_s2);
805
806 if (c_locale)
807 RUN_IN_LOCALE_SECTION
808 (c_locale,
809 ret = u32_locale_casecoll (func_name,
810 (const scm_t_uint32 *) c_s1,
811 (const scm_t_uint32 *) c_s2,
812 &result));
813 else
814 ret = u32_locale_casecoll (func_name,
815 (const scm_t_uint32 *) c_s1,
816 (const scm_t_uint32 *) c_s2,
817 &result);
818
819 if (SCM_UNLIKELY (ret != 0))
820 {
821 errno = ret;
822 scm_syserror (FUNC_NAME);
823 }
824
825 scm_remember_upto_here_2 (s1, s2);
826 scm_remember_upto_here (locale);
827
828 return result;
829 }
830 #undef FUNC_NAME
831
832 /* Store into DST an upper-case version of SRC. */
833 static inline void
834 str_upcase (register char *dst, register const char *src)
835 {
836 for (; *src != '\0'; src++, dst++)
837 *dst = toupper ((int) *src);
838 *dst = '\0';
839 }
840
841 static inline void
842 str_downcase (register char *dst, register const char *src)
843 {
844 for (; *src != '\0'; src++, dst++)
845 *dst = tolower ((int) *src);
846 *dst = '\0';
847 }
848
849 #ifdef USE_GNU_LOCALE_API
850 static inline void
851 str_upcase_l (register char *dst, register const char *src,
852 scm_t_locale locale)
853 {
854 for (; *src != '\0'; src++, dst++)
855 *dst = toupper_l (*src, locale);
856 *dst = '\0';
857 }
858
859 static inline void
860 str_downcase_l (register char *dst, register const char *src,
861 scm_t_locale locale)
862 {
863 for (; *src != '\0'; src++, dst++)
864 *dst = tolower_l (*src, locale);
865 *dst = '\0';
866 }
867 #endif
868
869
870 SCM_DEFINE (scm_string_locale_lt, "string-locale<?", 2, 1, 0,
871 (SCM s1, SCM s2, SCM locale),
872 "Compare strings @var{s1} and @var{s2} in a locale-dependent way."
873 "If @var{locale} is provided, it should be locale object (as "
874 "returned by @code{make-locale}) and will be used to perform the "
875 "comparison; otherwise, the current system locale is used.")
876 #define FUNC_NAME s_scm_string_locale_lt
877 {
878 int result;
879
880 SCM_VALIDATE_STRING (1, s1);
881 SCM_VALIDATE_STRING (2, s2);
882
883 result = compare_u32_strings (s1, s2, locale, FUNC_NAME);
884
885 return scm_from_bool (result < 0);
886 }
887 #undef FUNC_NAME
888
889 SCM_DEFINE (scm_string_locale_gt, "string-locale>?", 2, 1, 0,
890 (SCM s1, SCM s2, SCM locale),
891 "Compare strings @var{s1} and @var{s2} in a locale-dependent way."
892 "If @var{locale} is provided, it should be locale object (as "
893 "returned by @code{make-locale}) and will be used to perform the "
894 "comparison; otherwise, the current system locale is used.")
895 #define FUNC_NAME s_scm_string_locale_gt
896 {
897 int result;
898
899 SCM_VALIDATE_STRING (1, s1);
900 SCM_VALIDATE_STRING (2, s2);
901
902 result = compare_u32_strings (s1, s2, locale, FUNC_NAME);
903
904 return scm_from_bool (result > 0);
905 }
906 #undef FUNC_NAME
907
908 SCM_DEFINE (scm_string_locale_ci_lt, "string-locale-ci<?", 2, 1, 0,
909 (SCM s1, SCM s2, SCM locale),
910 "Compare strings @var{s1} and @var{s2} in a case-insensitive, "
911 "and locale-dependent way. If @var{locale} is provided, it "
912 "should be locale object (as returned by @code{make-locale}) "
913 "and will be used to perform the comparison; otherwise, the "
914 "current system locale is used.")
915 #define FUNC_NAME s_scm_string_locale_ci_lt
916 {
917 int result;
918
919 SCM_VALIDATE_STRING (1, s1);
920 SCM_VALIDATE_STRING (2, s2);
921
922 result = compare_u32_strings_ci (s1, s2, locale, FUNC_NAME);
923
924 return scm_from_bool (result < 0);
925 }
926 #undef FUNC_NAME
927
928 SCM_DEFINE (scm_string_locale_ci_gt, "string-locale-ci>?", 2, 1, 0,
929 (SCM s1, SCM s2, SCM locale),
930 "Compare strings @var{s1} and @var{s2} in a case-insensitive, "
931 "and locale-dependent way. If @var{locale} is provided, it "
932 "should be locale object (as returned by @code{make-locale}) "
933 "and will be used to perform the comparison; otherwise, the "
934 "current system locale is used.")
935 #define FUNC_NAME s_scm_string_locale_ci_gt
936 {
937 int result;
938
939 SCM_VALIDATE_STRING (1, s1);
940 SCM_VALIDATE_STRING (2, s2);
941
942 result = compare_u32_strings_ci (s1, s2, locale, FUNC_NAME);
943
944 return scm_from_bool (result > 0);
945 }
946 #undef FUNC_NAME
947
948 SCM_DEFINE (scm_string_locale_ci_eq, "string-locale-ci=?", 2, 1, 0,
949 (SCM s1, SCM s2, SCM locale),
950 "Compare strings @var{s1} and @var{s2} in a case-insensitive, "
951 "and locale-dependent way. If @var{locale} is provided, it "
952 "should be locale object (as returned by @code{make-locale}) "
953 "and will be used to perform the comparison; otherwise, the "
954 "current system locale is used.")
955 #define FUNC_NAME s_scm_string_locale_ci_eq
956 {
957 int result;
958
959 SCM_VALIDATE_STRING (1, s1);
960 SCM_VALIDATE_STRING (2, s2);
961
962 result = compare_u32_strings_ci (s1, s2, locale, FUNC_NAME);
963
964 return scm_from_bool (result == 0);
965 }
966 #undef FUNC_NAME
967
968
969 SCM_DEFINE (scm_char_locale_lt, "char-locale<?", 2, 1, 0,
970 (SCM c1, SCM c2, SCM locale),
971 "Return true if character @var{c1} is lower than @var{c2} "
972 "according to @var{locale} or to the current locale.")
973 #define FUNC_NAME s_scm_char_locale_lt
974 {
975 int result;
976
977 SCM_VALIDATE_CHAR (1, c1);
978 SCM_VALIDATE_CHAR (2, c2);
979
980 result = compare_u32_strings (scm_string (scm_list_1 (c1)),
981 scm_string (scm_list_1 (c2)),
982 locale, FUNC_NAME);
983
984 return scm_from_bool (result < 0);
985 }
986 #undef FUNC_NAME
987
988 SCM_DEFINE (scm_char_locale_gt, "char-locale>?", 2, 1, 0,
989 (SCM c1, SCM c2, SCM locale),
990 "Return true if character @var{c1} is greater than @var{c2} "
991 "according to @var{locale} or to the current locale.")
992 #define FUNC_NAME s_scm_char_locale_gt
993 {
994 int result;
995
996 SCM_VALIDATE_CHAR (1, c1);
997 SCM_VALIDATE_CHAR (2, c2);
998
999 result = compare_u32_strings (scm_string (scm_list_1 (c1)),
1000 scm_string (scm_list_1 (c2)),
1001 locale, FUNC_NAME);
1002
1003 return scm_from_bool (result > 0);
1004 }
1005 #undef FUNC_NAME
1006
1007 SCM_DEFINE (scm_char_locale_ci_lt, "char-locale-ci<?", 2, 1, 0,
1008 (SCM c1, SCM c2, SCM locale),
1009 "Return true if character @var{c1} is lower than @var{c2}, "
1010 "in a case insensitive way according to @var{locale} or to "
1011 "the current locale.")
1012 #define FUNC_NAME s_scm_char_locale_ci_lt
1013 {
1014 int result;
1015
1016 SCM_VALIDATE_CHAR (1, c1);
1017 SCM_VALIDATE_CHAR (2, c2);
1018
1019 result = compare_u32_strings_ci (scm_string (scm_list_1 (c1)),
1020 scm_string (scm_list_1 (c2)),
1021 locale, FUNC_NAME);
1022
1023 return scm_from_bool (result < 0);
1024 }
1025 #undef FUNC_NAME
1026
1027 SCM_DEFINE (scm_char_locale_ci_gt, "char-locale-ci>?", 2, 1, 0,
1028 (SCM c1, SCM c2, SCM locale),
1029 "Return true if character @var{c1} is greater than @var{c2}, "
1030 "in a case insensitive way according to @var{locale} or to "
1031 "the current locale.")
1032 #define FUNC_NAME s_scm_char_locale_ci_gt
1033 {
1034 int result;
1035
1036 SCM_VALIDATE_CHAR (1, c1);
1037 SCM_VALIDATE_CHAR (2, c2);
1038
1039 result = compare_u32_strings_ci (scm_string (scm_list_1 (c1)),
1040 scm_string (scm_list_1 (c2)),
1041 locale, FUNC_NAME);
1042
1043 return scm_from_bool (result > 0);
1044 }
1045 #undef FUNC_NAME
1046
1047 SCM_DEFINE (scm_char_locale_ci_eq, "char-locale-ci=?", 2, 1, 0,
1048 (SCM c1, SCM c2, SCM locale),
1049 "Return true if character @var{c1} is equal to @var{c2}, "
1050 "in a case insensitive way according to @var{locale} or to "
1051 "the current locale.")
1052 #define FUNC_NAME s_scm_char_locale_ci_eq
1053 {
1054 int result;
1055
1056 SCM_VALIDATE_CHAR (1, c1);
1057 SCM_VALIDATE_CHAR (2, c2);
1058
1059 result = compare_u32_strings_ci (scm_string (scm_list_1 (c1)),
1060 scm_string (scm_list_1 (c2)),
1061 locale, FUNC_NAME);
1062
1063 return scm_from_bool (result == 0);
1064 }
1065 #undef FUNC_NAME
1066
1067
1068 \f
1069 /* Locale-dependent alphabetic character mapping. */
1070
1071 static inline int
1072 u32_locale_tocase (const scm_t_uint32 *c_s1, size_t len,
1073 scm_t_uint32 **p_c_s2, size_t * p_len2,
1074 scm_t_uint32 *(*func) (const scm_t_uint32 *, size_t,
1075 const char *, uninorm_t,
1076 scm_t_uint32 *, size_t *))
1077 {
1078 /* Note: Since this is called from `RUN_IN_LOCALE_SECTION', it must not
1079 make any non-local exit. */
1080
1081 scm_t_uint32 *ret;
1082 const char *loc = locale_language ();
1083
1084 /* The first NULL here indicates that no NFC or NFKC normalization
1085 is done. The second NULL means the return buffer is
1086 malloc'ed here. */
1087 ret = func (c_s1, len, loc, NULL, NULL, p_len2);
1088
1089 if (ret == NULL)
1090 {
1091 *p_c_s2 = (scm_t_uint32 *) NULL;
1092 *p_len2 = 0;
1093 return errno;
1094 }
1095 *p_c_s2 = ret;
1096
1097 return 0;
1098 }
1099
1100
1101 static SCM
1102 chr_to_case (SCM chr, scm_t_locale c_locale,
1103 scm_t_uint32 *(*func) (const scm_t_uint32 *, size_t, const char *,
1104 uninorm_t, scm_t_uint32 *, size_t *),
1105 const char *func_name,
1106 int *err)
1107 #define FUNC_NAME func_name
1108 {
1109 int ret;
1110 scm_t_uint32 c;
1111 scm_t_uint32 *convbuf;
1112 size_t convlen;
1113 SCM convchar;
1114
1115 c = SCM_CHAR (chr);
1116
1117 if (c_locale != NULL)
1118 RUN_IN_LOCALE_SECTION (c_locale, ret =
1119 u32_locale_tocase (&c, 1, &convbuf, &convlen, func));
1120 else
1121 ret =
1122 u32_locale_tocase (&c, 1, &convbuf, &convlen, func);
1123
1124 if (SCM_UNLIKELY (ret != 0))
1125 {
1126 *err = ret;
1127 return NULL;
1128 }
1129
1130 if (convlen == 1)
1131 convchar = SCM_MAKE_CHAR ((scm_t_wchar) convbuf[0]);
1132 else
1133 convchar = chr;
1134 free (convbuf);
1135
1136 return convchar;
1137 }
1138 #undef FUNC_NAME
1139
1140 SCM_DEFINE (scm_char_locale_downcase, "char-locale-downcase", 1, 1, 0,
1141 (SCM chr, SCM locale),
1142 "Return the lowercase character that corresponds to @var{chr} "
1143 "according to either @var{locale} or the current locale.")
1144 #define FUNC_NAME s_scm_char_locale_downcase
1145 {
1146 scm_t_locale c_locale;
1147 SCM ret;
1148 int err = 0;
1149
1150 SCM_VALIDATE_CHAR (1, chr);
1151 SCM_VALIDATE_OPTIONAL_LOCALE_COPY (2, locale, c_locale);
1152
1153 ret = chr_to_case (chr, c_locale, u32_tolower, FUNC_NAME, &err);
1154
1155 if (err != 0)
1156 {
1157 errno = err;
1158 scm_syserror (FUNC_NAME);
1159 }
1160 return ret;
1161 }
1162 #undef FUNC_NAME
1163
1164 SCM_DEFINE (scm_char_locale_upcase, "char-locale-upcase", 1, 1, 0,
1165 (SCM chr, SCM locale),
1166 "Return the uppercase character that corresponds to @var{chr} "
1167 "according to either @var{locale} or the current locale.")
1168 #define FUNC_NAME s_scm_char_locale_upcase
1169 {
1170 scm_t_locale c_locale;
1171 SCM ret;
1172 int err = 0;
1173
1174 SCM_VALIDATE_CHAR (1, chr);
1175 SCM_VALIDATE_OPTIONAL_LOCALE_COPY (2, locale, c_locale);
1176
1177 ret = chr_to_case (chr, c_locale, u32_toupper, FUNC_NAME, &err);
1178
1179 if (err != 0)
1180 {
1181 errno = err;
1182 scm_syserror (FUNC_NAME);
1183 }
1184 return ret;
1185 }
1186 #undef FUNC_NAME
1187
1188 SCM_DEFINE (scm_char_locale_titlecase, "char-locale-titlecase", 1, 1, 0,
1189 (SCM chr, SCM locale),
1190 "Return the titlecase character that corresponds to @var{chr} "
1191 "according to either @var{locale} or the current locale.")
1192 #define FUNC_NAME s_scm_char_locale_titlecase
1193 {
1194 scm_t_locale c_locale;
1195 SCM ret;
1196 int err = 0;
1197
1198 SCM_VALIDATE_CHAR (1, chr);
1199 SCM_VALIDATE_OPTIONAL_LOCALE_COPY (2, locale, c_locale);
1200
1201 ret = chr_to_case (chr, c_locale, u32_totitle, FUNC_NAME, &err);
1202
1203 if (err != 0)
1204 {
1205 errno = err;
1206 scm_syserror (FUNC_NAME);
1207 }
1208 return ret;
1209 }
1210 #undef FUNC_NAME
1211
1212 static SCM
1213 str_to_case (SCM str, scm_t_locale c_locale,
1214 scm_t_uint32 *(*func) (const scm_t_uint32 *, size_t, const char *,
1215 uninorm_t, scm_t_uint32 *, size_t *),
1216 const char *func_name,
1217 int *err)
1218 #define FUNC_NAME func_name
1219 {
1220 scm_t_wchar *c_str, *c_buf;
1221 scm_t_uint32 *c_convstr;
1222 size_t len, convlen;
1223 int ret;
1224 SCM convstr;
1225
1226 len = scm_i_string_length (str);
1227 if (len == 0)
1228 return scm_nullstr;
1229 SCM_STRING_TO_U32_BUF (str, c_str);
1230
1231 if (c_locale)
1232 RUN_IN_LOCALE_SECTION (c_locale, ret =
1233 u32_locale_tocase ((scm_t_uint32 *) c_str, len,
1234 &c_convstr,
1235 &convlen, func));
1236 else
1237 ret =
1238 u32_locale_tocase ((scm_t_uint32 *) c_str, len,
1239 &c_convstr, &convlen, func);
1240
1241 scm_remember_upto_here (str);
1242
1243 if (SCM_UNLIKELY (ret != 0))
1244 {
1245 *err = ret;
1246 return NULL;
1247 }
1248
1249 convstr = scm_i_make_wide_string (convlen, &c_buf, 0);
1250 memcpy (c_buf, c_convstr, convlen * sizeof (scm_t_wchar));
1251 free (c_convstr);
1252
1253 scm_i_try_narrow_string (convstr);
1254
1255 return convstr;
1256 }
1257 #undef FUNC_NAME
1258
1259 SCM_DEFINE (scm_string_locale_upcase, "string-locale-upcase", 1, 1, 0,
1260 (SCM str, SCM locale),
1261 "Return a new string that is the uppercase version of "
1262 "@var{str} according to either @var{locale} or the current "
1263 "locale.")
1264 #define FUNC_NAME s_scm_string_locale_upcase
1265 {
1266 scm_t_locale c_locale;
1267 SCM ret;
1268 int err = 0;
1269
1270 SCM_VALIDATE_STRING (1, str);
1271 SCM_VALIDATE_OPTIONAL_LOCALE_COPY (2, locale, c_locale);
1272
1273 ret = str_to_case (str, c_locale, u32_toupper, FUNC_NAME, &err);
1274
1275 if (err != 0)
1276 {
1277 errno = err;
1278 scm_syserror (FUNC_NAME);
1279 }
1280 return ret;
1281 }
1282 #undef FUNC_NAME
1283
1284 SCM_DEFINE (scm_string_locale_downcase, "string-locale-downcase", 1, 1, 0,
1285 (SCM str, SCM locale),
1286 "Return a new string that is the down-case version of "
1287 "@var{str} according to either @var{locale} or the current "
1288 "locale.")
1289 #define FUNC_NAME s_scm_string_locale_downcase
1290 {
1291 scm_t_locale c_locale;
1292 SCM ret;
1293 int err = 0;
1294
1295 SCM_VALIDATE_STRING (1, str);
1296 SCM_VALIDATE_OPTIONAL_LOCALE_COPY (2, locale, c_locale);
1297
1298 ret = str_to_case (str, c_locale, u32_tolower, FUNC_NAME, &err);
1299
1300 if (err != 0)
1301 {
1302 errno = err;
1303 scm_syserror (FUNC_NAME);
1304 }
1305 return ret;
1306 }
1307 #undef FUNC_NAME
1308
1309 SCM_DEFINE (scm_string_locale_titlecase, "string-locale-titlecase", 1, 1, 0,
1310 (SCM str, SCM locale),
1311 "Return a new string that is the title-case version of "
1312 "@var{str} according to either @var{locale} or the current "
1313 "locale.")
1314 #define FUNC_NAME s_scm_string_locale_titlecase
1315 {
1316 scm_t_locale c_locale;
1317 SCM ret;
1318 int err = 0;
1319
1320 SCM_VALIDATE_STRING (1, str);
1321 SCM_VALIDATE_OPTIONAL_LOCALE_COPY (2, locale, c_locale);
1322
1323 ret = str_to_case (str, c_locale, u32_totitle, FUNC_NAME, &err);
1324
1325 if (err != 0)
1326 {
1327 errno = err;
1328 scm_syserror (FUNC_NAME);
1329 }
1330 return ret;
1331 }
1332 #undef FUNC_NAME
1333
1334 /* Note: We don't provide mutative versions of `string-locale-(up|down)case'
1335 because, in some languages, a single downcase character maps to a couple
1336 of uppercase characters. Read the SRFI-13 document for a detailed
1337 discussion about this. */
1338
1339
1340 \f
1341 /* Locale-dependent number parsing. */
1342
1343 SCM_DEFINE (scm_locale_string_to_integer, "locale-string->integer",
1344 1, 2, 0, (SCM str, SCM base, SCM locale),
1345 "Convert string @var{str} into an integer according to either "
1346 "@var{locale} (a locale object as returned by @code{make-locale}) "
1347 "or the current process locale. Return two values: an integer "
1348 "(on success) or @code{#f}, and the number of characters read "
1349 "from @var{str} (@code{0} on failure).")
1350 #define FUNC_NAME s_scm_locale_string_to_integer
1351 {
1352 SCM result;
1353 long c_result;
1354 int c_base;
1355 const char *c_str;
1356 char *c_endptr;
1357 scm_t_locale c_locale;
1358
1359 SCM_VALIDATE_STRING (1, str);
1360 c_str = scm_i_string_chars (str);
1361
1362 if (base != SCM_UNDEFINED)
1363 SCM_VALIDATE_INT_COPY (2, base, c_base);
1364 else
1365 c_base = 10;
1366
1367 SCM_VALIDATE_OPTIONAL_LOCALE_COPY (3, locale, c_locale);
1368
1369 if (c_locale != NULL)
1370 {
1371 #ifdef USE_GNU_LOCALE_API
1372 c_result = strtol_l (c_str, &c_endptr, c_base, c_locale);
1373 #else
1374 RUN_IN_LOCALE_SECTION (c_locale,
1375 c_result = strtol (c_str, &c_endptr, c_base));
1376 #endif
1377 }
1378 else
1379 c_result = strtol (c_str, &c_endptr, c_base);
1380
1381 scm_remember_upto_here (str);
1382
1383 if (c_endptr == c_str)
1384 result = SCM_BOOL_F;
1385 else
1386 result = scm_from_long (c_result);
1387
1388 return (scm_values (scm_list_2 (result, scm_from_long (c_endptr - c_str))));
1389 }
1390 #undef FUNC_NAME
1391
1392 SCM_DEFINE (scm_locale_string_to_inexact, "locale-string->inexact",
1393 1, 1, 0, (SCM str, SCM locale),
1394 "Convert string @var{str} into an inexact number according to "
1395 "either @var{locale} (a locale object as returned by "
1396 "@code{make-locale}) or the current process locale. Return "
1397 "two values: an inexact number (on success) or @code{#f}, and "
1398 "the number of characters read from @var{str} (@code{0} on "
1399 "failure).")
1400 #define FUNC_NAME s_scm_locale_string_to_inexact
1401 {
1402 SCM result;
1403 double c_result;
1404 const char *c_str;
1405 char *c_endptr;
1406 scm_t_locale c_locale;
1407
1408 SCM_VALIDATE_STRING (1, str);
1409 c_str = scm_i_string_chars (str);
1410
1411 SCM_VALIDATE_OPTIONAL_LOCALE_COPY (2, locale, c_locale);
1412
1413 if (c_locale != NULL)
1414 {
1415 #ifdef USE_GNU_LOCALE_API
1416 c_result = strtod_l (c_str, &c_endptr, c_locale);
1417 #else
1418 RUN_IN_LOCALE_SECTION (c_locale,
1419 c_result = strtod (c_str, &c_endptr));
1420 #endif
1421 }
1422 else
1423 c_result = strtod (c_str, &c_endptr);
1424
1425 scm_remember_upto_here (str);
1426
1427 if (c_endptr == c_str)
1428 result = SCM_BOOL_F;
1429 else
1430 result = scm_from_double (c_result);
1431
1432 return (scm_values (scm_list_2 (result, scm_from_long (c_endptr - c_str))));
1433 }
1434 #undef FUNC_NAME
1435
1436 \f
1437 /* Language information, aka. `nl_langinfo ()'. */
1438
1439 /* FIXME: Issues related to `nl-langinfo'.
1440
1441 1. The `CODESET' value is not normalized. This is a secondary issue, but
1442 still a practical issue. See
1443 http://www.cl.cam.ac.uk/~mgk25/ucs/norm_charmap.c for codeset
1444 normalization.
1445
1446 2. `nl_langinfo ()' is not available on Windows.
1447
1448 3. `nl_langinfo ()' may return strings encoded in a locale different from
1449 the current one.
1450 For example:
1451
1452 (nl-langinfo DAY_1 (make-locale LC_ALL "eo_EO.UTF-8"))
1453
1454 returns a result that is a UTF-8 string, regardless of the
1455 setting of the current locale. If nl_langinfo supports CODESET,
1456 we can convert the string properly using scm_from_stringn. If
1457 CODESET is not supported, we won't be able to make much sense of
1458 the returned string.
1459
1460 Note: We don't use Gnulib's `nl_langinfo' module because it's currently not
1461 as complete as the compatibility hacks in `i18n.scm'. */
1462
1463
1464 SCM_DEFINE (scm_nl_langinfo, "nl-langinfo", 1, 1, 0,
1465 (SCM item, SCM locale),
1466 "Return a string denoting locale information for @var{item} "
1467 "in the current locale or that specified by @var{locale}. "
1468 "The semantics and arguments are the same as those of the "
1469 "X/Open @code{nl_langinfo} function (@pxref{The Elegant and "
1470 "Fast Way, @code{nl_langinfo},, libc, The GNU C Library "
1471 "Reference Manual}).")
1472 #define FUNC_NAME s_scm_nl_langinfo
1473 {
1474 #ifdef HAVE_NL_LANGINFO
1475 SCM result;
1476 nl_item c_item;
1477 char *c_result;
1478 scm_t_locale c_locale;
1479 #ifdef HAVE_LANGINFO_CODESET
1480 char *codeset;
1481 #endif
1482
1483 SCM_VALIDATE_INT_COPY (2, item, c_item);
1484 SCM_VALIDATE_OPTIONAL_LOCALE_COPY (2, locale, c_locale);
1485
1486 /* Sadly, `nl_langinfo ()' returns a pointer to a static string. According
1487 to SuS v2, that static string may be modified by subsequent calls to
1488 `nl_langinfo ()' as well as by calls to `setlocale ()'. Thus, we must
1489 acquire the locale mutex before doing invoking `nl_langinfo ()'. See
1490 http://opengroup.org/onlinepubs/007908799/xsh/nl_langinfo.html for
1491 details. */
1492
1493 scm_i_pthread_mutex_lock (&scm_i_locale_mutex);
1494 if (c_locale != NULL)
1495 {
1496 #ifdef USE_GNU_LOCALE_API
1497 c_result = nl_langinfo_l (c_item, c_locale);
1498 #ifdef HAVE_LANGINFO_CODESET
1499 codeset = nl_langinfo_l (CODESET, c_locale);
1500 #endif /* HAVE_LANGINFO_CODESET */
1501 #else /* !USE_GNU_LOCALE_API */
1502 /* We can't use `RUN_IN_LOCALE_SECTION ()' here because the locale
1503 mutex is already taken. */
1504 int lsec_err;
1505 scm_t_locale_settings lsec_prev_locale;
1506
1507 lsec_err = get_current_locale_settings (&lsec_prev_locale);
1508 if (lsec_err)
1509 scm_i_pthread_mutex_unlock (&scm_i_locale_mutex);
1510 else
1511 {
1512 lsec_err = install_locale (c_locale);
1513 if (lsec_err)
1514 {
1515 leave_locale_section (&lsec_prev_locale);
1516 free_locale_settings (&lsec_prev_locale);
1517 }
1518 }
1519
1520 if (lsec_err)
1521 scm_locale_error (FUNC_NAME, lsec_err);
1522 else
1523 {
1524 c_result = nl_langinfo (c_item);
1525 #ifdef HAVE_LANGINFO_CODESET
1526 codeset = nl_langinfo (CODESET);
1527 #endif /* HAVE_LANGINFO_CODESET */
1528
1529 restore_locale_settings (&lsec_prev_locale);
1530 free_locale_settings (&lsec_prev_locale);
1531 }
1532 #endif
1533 }
1534 else
1535 {
1536 c_result = nl_langinfo (c_item);
1537 #ifdef HAVE_LANGINFO_CODESET
1538 codeset = nl_langinfo (CODESET);
1539 #endif /* HAVE_LANGINFO_CODESET */
1540 }
1541
1542 c_result = strdup (c_result);
1543 scm_i_pthread_mutex_unlock (&scm_i_locale_mutex);
1544
1545 if (c_result == NULL)
1546 result = SCM_BOOL_F;
1547 else
1548 {
1549 switch (c_item)
1550 {
1551 #if (defined GROUPING) && (defined MON_GROUPING)
1552 case GROUPING:
1553 case MON_GROUPING:
1554 {
1555 char *p;
1556
1557 /* In this cases, the result is to be interpreted as a list
1558 of numbers. If the last item is `CHAR_MAX' or a negative
1559 number, it has the special meaning "no more grouping"
1560 (negative numbers aren't specified in POSIX but can be
1561 used by glibc; see
1562 <http://lists.gnu.org/archive/html/bug-guile/2011-02/msg00159.html>). */
1563 result = SCM_EOL;
1564 for (p = c_result; (*p > 0) && (*p != CHAR_MAX); p++)
1565 result = scm_cons (SCM_I_MAKINUM ((int) *p), result);
1566
1567 {
1568 SCM last_pair = result;
1569
1570 result = scm_reverse_x (result, SCM_EOL);
1571
1572 if (*p == 0)
1573 {
1574 /* Cyclic grouping information. */
1575 if (last_pair != SCM_EOL)
1576 SCM_SETCDR (last_pair, result);
1577 }
1578 }
1579
1580 free (c_result);
1581 break;
1582 }
1583 #endif
1584
1585 #if (defined FRAC_DIGITS) && (defined INT_FRAC_DIGITS)
1586 case FRAC_DIGITS:
1587 case INT_FRAC_DIGITS:
1588 /* This is to be interpreted as a single integer. */
1589 if (*c_result == CHAR_MAX)
1590 /* Unspecified. */
1591 result = SCM_BOOL_F;
1592 else
1593 result = SCM_I_MAKINUM (*c_result);
1594
1595 free (c_result);
1596 break;
1597 #endif
1598
1599 #if (defined P_CS_PRECEDES) && (defined INT_N_CS_PRECEDES)
1600 case P_CS_PRECEDES:
1601 case N_CS_PRECEDES:
1602 case INT_P_CS_PRECEDES:
1603 case INT_N_CS_PRECEDES:
1604 #if (defined P_SEP_BY_SPACE) && (defined N_SEP_BY_SPACE)
1605 case P_SEP_BY_SPACE:
1606 case N_SEP_BY_SPACE:
1607 #endif
1608 /* This is to be interpreted as a boolean. */
1609 result = scm_from_bool (*c_result);
1610
1611 free (c_result);
1612 break;
1613 #endif
1614
1615 #if (defined P_SIGN_POSN) && (defined INT_N_SIGN_POSN)
1616 case P_SIGN_POSN:
1617 case N_SIGN_POSN:
1618 case INT_P_SIGN_POSN:
1619 case INT_N_SIGN_POSN:
1620 /* See `(libc) Sign of Money Amount' for the interpretation of the
1621 return value here. */
1622 switch (*c_result)
1623 {
1624 case 0:
1625 result = scm_from_latin1_symbol ("parenthesize");
1626 break;
1627
1628 case 1:
1629 result = scm_from_latin1_symbol ("sign-before");
1630 break;
1631
1632 case 2:
1633 result = scm_from_latin1_symbol ("sign-after");
1634 break;
1635
1636 case 3:
1637 result = scm_from_latin1_symbol ("sign-before-currency-symbol");
1638 break;
1639
1640 case 4:
1641 result = scm_from_latin1_symbol ("sign-after-currency-symbol");
1642 break;
1643
1644 default:
1645 result = scm_from_latin1_symbol ("unspecified");
1646 }
1647 break;
1648 #endif
1649
1650 default:
1651 #ifdef HAVE_LANGINFO_CODESET
1652 result = scm_from_stringn (c_result, strlen (c_result),
1653 codeset,
1654 SCM_FAILED_CONVERSION_QUESTION_MARK);
1655 #else /* !HAVE_LANGINFO_CODESET */
1656 /* This may be incorrectly encoded if the locale differs
1657 from the c_locale. */
1658 result = scm_from_locale_string (c_result);
1659 #endif /* !HAVE_LANGINFO_CODESET */
1660 free (c_result);
1661 }
1662 }
1663
1664 return result;
1665 #else
1666 scm_syserror_msg (FUNC_NAME, "`nl-langinfo' not supported on your system",
1667 SCM_EOL, ENOSYS);
1668
1669 return SCM_BOOL_F;
1670 #endif
1671 }
1672 #undef FUNC_NAME
1673
1674 /* Define the `nl_item' constants. */
1675 static inline void
1676 define_langinfo_items (void)
1677 {
1678 #ifdef HAVE_LANGINFO_H
1679
1680 #define DEFINE_NLITEM_CONSTANT(_item) \
1681 scm_c_define (# _item, scm_from_int (_item))
1682
1683 DEFINE_NLITEM_CONSTANT (CODESET);
1684
1685 /* Abbreviated days of the week. */
1686 DEFINE_NLITEM_CONSTANT (ABDAY_1);
1687 DEFINE_NLITEM_CONSTANT (ABDAY_2);
1688 DEFINE_NLITEM_CONSTANT (ABDAY_3);
1689 DEFINE_NLITEM_CONSTANT (ABDAY_4);
1690 DEFINE_NLITEM_CONSTANT (ABDAY_5);
1691 DEFINE_NLITEM_CONSTANT (ABDAY_6);
1692 DEFINE_NLITEM_CONSTANT (ABDAY_7);
1693
1694 /* Long-named days of the week. */
1695 DEFINE_NLITEM_CONSTANT (DAY_1); /* Sunday */
1696 DEFINE_NLITEM_CONSTANT (DAY_2); /* Monday */
1697 DEFINE_NLITEM_CONSTANT (DAY_3); /* Tuesday */
1698 DEFINE_NLITEM_CONSTANT (DAY_4); /* Wednesday */
1699 DEFINE_NLITEM_CONSTANT (DAY_5); /* Thursday */
1700 DEFINE_NLITEM_CONSTANT (DAY_6); /* Friday */
1701 DEFINE_NLITEM_CONSTANT (DAY_7); /* Saturday */
1702
1703 /* Abbreviated month names. */
1704 DEFINE_NLITEM_CONSTANT (ABMON_1); /* Jan */
1705 DEFINE_NLITEM_CONSTANT (ABMON_2);
1706 DEFINE_NLITEM_CONSTANT (ABMON_3);
1707 DEFINE_NLITEM_CONSTANT (ABMON_4);
1708 DEFINE_NLITEM_CONSTANT (ABMON_5);
1709 DEFINE_NLITEM_CONSTANT (ABMON_6);
1710 DEFINE_NLITEM_CONSTANT (ABMON_7);
1711 DEFINE_NLITEM_CONSTANT (ABMON_8);
1712 DEFINE_NLITEM_CONSTANT (ABMON_9);
1713 DEFINE_NLITEM_CONSTANT (ABMON_10);
1714 DEFINE_NLITEM_CONSTANT (ABMON_11);
1715 DEFINE_NLITEM_CONSTANT (ABMON_12);
1716
1717 /* Long month names. */
1718 DEFINE_NLITEM_CONSTANT (MON_1); /* January */
1719 DEFINE_NLITEM_CONSTANT (MON_2);
1720 DEFINE_NLITEM_CONSTANT (MON_3);
1721 DEFINE_NLITEM_CONSTANT (MON_4);
1722 DEFINE_NLITEM_CONSTANT (MON_5);
1723 DEFINE_NLITEM_CONSTANT (MON_6);
1724 DEFINE_NLITEM_CONSTANT (MON_7);
1725 DEFINE_NLITEM_CONSTANT (MON_8);
1726 DEFINE_NLITEM_CONSTANT (MON_9);
1727 DEFINE_NLITEM_CONSTANT (MON_10);
1728 DEFINE_NLITEM_CONSTANT (MON_11);
1729 DEFINE_NLITEM_CONSTANT (MON_12);
1730
1731 DEFINE_NLITEM_CONSTANT (AM_STR); /* Ante meridiem string. */
1732 DEFINE_NLITEM_CONSTANT (PM_STR); /* Post meridiem string. */
1733
1734 DEFINE_NLITEM_CONSTANT (D_T_FMT); /* Date and time format for strftime. */
1735 DEFINE_NLITEM_CONSTANT (D_FMT); /* Date format for strftime. */
1736 DEFINE_NLITEM_CONSTANT (T_FMT); /* Time format for strftime. */
1737 DEFINE_NLITEM_CONSTANT (T_FMT_AMPM);/* 12-hour time format for strftime. */
1738
1739 DEFINE_NLITEM_CONSTANT (ERA); /* Alternate era. */
1740 DEFINE_NLITEM_CONSTANT (ERA_D_FMT); /* Date in alternate era format. */
1741 DEFINE_NLITEM_CONSTANT (ERA_D_T_FMT); /* Date and time in alternate era
1742 format. */
1743 DEFINE_NLITEM_CONSTANT (ERA_T_FMT); /* Time in alternate era format. */
1744
1745 DEFINE_NLITEM_CONSTANT (ALT_DIGITS); /* Alternate symbols for digits. */
1746 DEFINE_NLITEM_CONSTANT (RADIXCHAR);
1747 DEFINE_NLITEM_CONSTANT (THOUSEP);
1748
1749 #ifdef YESEXPR
1750 DEFINE_NLITEM_CONSTANT (YESEXPR);
1751 #endif
1752 #ifdef NOEXPR
1753 DEFINE_NLITEM_CONSTANT (NOEXPR);
1754 #endif
1755
1756 #ifdef CRNCYSTR /* currency symbol */
1757 DEFINE_NLITEM_CONSTANT (CRNCYSTR);
1758 #endif
1759
1760 /* GNU extensions. */
1761
1762 #ifdef ERA_YEAR
1763 DEFINE_NLITEM_CONSTANT (ERA_YEAR); /* Year in alternate era format. */
1764 #endif
1765
1766 /* LC_MONETARY category: formatting of monetary quantities.
1767 These items each correspond to a member of `struct lconv',
1768 defined in <locale.h>. */
1769 #ifdef INT_CURR_SYMBOL
1770 DEFINE_NLITEM_CONSTANT (INT_CURR_SYMBOL);
1771 #endif
1772 #ifdef MON_DECIMAL_POINT
1773 DEFINE_NLITEM_CONSTANT (MON_DECIMAL_POINT);
1774 #endif
1775 #ifdef MON_THOUSANDS_SEP
1776 DEFINE_NLITEM_CONSTANT (MON_THOUSANDS_SEP);
1777 #endif
1778 #ifdef MON_GROUPING
1779 DEFINE_NLITEM_CONSTANT (MON_GROUPING);
1780 #endif
1781 #ifdef POSITIVE_SIGN
1782 DEFINE_NLITEM_CONSTANT (POSITIVE_SIGN);
1783 #endif
1784 #ifdef NEGATIVE_SIGN
1785 DEFINE_NLITEM_CONSTANT (NEGATIVE_SIGN);
1786 #endif
1787 #ifdef GROUPING
1788 DEFINE_NLITEM_CONSTANT (GROUPING);
1789 #endif
1790 #ifdef INT_FRAC_DIGITS
1791 DEFINE_NLITEM_CONSTANT (INT_FRAC_DIGITS);
1792 #endif
1793 #ifdef FRAC_DIGITS
1794 DEFINE_NLITEM_CONSTANT (FRAC_DIGITS);
1795 #endif
1796 #ifdef P_CS_PRECEDES
1797 DEFINE_NLITEM_CONSTANT (P_CS_PRECEDES);
1798 #endif
1799 #ifdef P_SEP_BY_SPACE
1800 DEFINE_NLITEM_CONSTANT (P_SEP_BY_SPACE);
1801 #endif
1802 #ifdef N_CS_PRECEDES
1803 DEFINE_NLITEM_CONSTANT (N_CS_PRECEDES);
1804 #endif
1805 #ifdef N_SEP_BY_SPACE
1806 DEFINE_NLITEM_CONSTANT (N_SEP_BY_SPACE);
1807 #endif
1808 #ifdef P_SIGN_POSN
1809 DEFINE_NLITEM_CONSTANT (P_SIGN_POSN);
1810 #endif
1811 #ifdef N_SIGN_POSN
1812 DEFINE_NLITEM_CONSTANT (N_SIGN_POSN);
1813 #endif
1814 #ifdef INT_P_CS_PRECEDES
1815 DEFINE_NLITEM_CONSTANT (INT_P_CS_PRECEDES);
1816 #endif
1817 #ifdef INT_P_SEP_BY_SPACE
1818 DEFINE_NLITEM_CONSTANT (INT_P_SEP_BY_SPACE);
1819 #endif
1820 #ifdef INT_N_CS_PRECEDES
1821 DEFINE_NLITEM_CONSTANT (INT_N_CS_PRECEDES);
1822 #endif
1823 #ifdef INT_N_SEP_BY_SPACE
1824 DEFINE_NLITEM_CONSTANT (INT_N_SEP_BY_SPACE);
1825 #endif
1826 #ifdef INT_P_SIGN_POSN
1827 DEFINE_NLITEM_CONSTANT (INT_P_SIGN_POSN);
1828 #endif
1829 #ifdef INT_N_SIGN_POSN
1830 DEFINE_NLITEM_CONSTANT (INT_N_SIGN_POSN);
1831 #endif
1832
1833 #undef DEFINE_NLITEM_CONSTANT
1834
1835 #endif /* HAVE_NL_TYPES_H */
1836 }
1837
1838 \f
1839 void
1840 scm_init_i18n ()
1841 {
1842 SCM global_locale_smob;
1843
1844 #ifdef HAVE_NL_LANGINFO
1845 scm_add_feature ("nl-langinfo");
1846 define_langinfo_items ();
1847 #endif
1848
1849 #include "libguile/i18n.x"
1850
1851 /* Initialize the global locale object with a special `locale' SMOB. */
1852 /* XXX: We don't define it as `LC_GLOBAL_LOCALE' because of bugs as of
1853 glibc <= 2.11 not (yet) worked around by Gnulib. See
1854 http://sourceware.org/bugzilla/show_bug.cgi?id=11009 for details. */
1855 SCM_NEWSMOB (global_locale_smob, scm_tc16_locale_smob_type, NULL);
1856 SCM_VARIABLE_SET (scm_global_locale, global_locale_smob);
1857 }
1858
1859 void
1860 scm_bootstrap_i18n ()
1861 {
1862 scm_c_register_extension ("libguile-" SCM_EFFECTIVE_VERSION,
1863 "scm_init_i18n",
1864 (scm_t_extension_init_func) scm_init_i18n,
1865 NULL);
1866
1867 }
1868
1869
1870 /*
1871 Local Variables:
1872 c-file-style: "gnu"
1873 End:
1874 */