Add (ice-9 unicode) module
authorAndy Wingo <wingo@pobox.com>
Fri, 12 Sep 2014 15:00:59 +0000 (17:00 +0200)
committerMark H Weaver <mhw@netris.org>
Tue, 30 Sep 2014 01:54:14 +0000 (21:54 -0400)
* libguile/unicode.c:
* libguile/unicode.h:
* test-suite/tests/unicode.test:
* module/ice-9/unicode.scm: New files.

* module/Makefile.am:
* libguile/Makefile.am:
* test-suite/Makefile.am:
* libguile/init.c: Wire new files into the build.

* doc/ref/api-data.texi: Add docs.

doc/ref/api-data.texi
libguile/Makefile.am
libguile/init.c
libguile/unicode.c [new file with mode: 0644]
libguile/unicode.h [new file with mode: 0644]
module/Makefile.am
module/ice-9/unicode.scm [new file with mode: 0644]
test-suite/Makefile.am
test-suite/tests/unicode.test [new file with mode: 0644]

index acdf9ca..c1dd761 100644 (file)
@@ -2331,6 +2331,24 @@ lowercase, and titlecase forms respectively.  The type
 @code{scm_t_wchar} is a signed, 32-bit integer.
 @end deftypefn
 
+Characters also have ``formal names'', which are defined by Unicode.
+These names can be accessed in Guile from the @code{(ice-9 unicode)}
+module:
+
+@example
+(use-modules (ice-9 unicode))
+@end example
+
+@deffn {Scheme Procedure} char->formal-name chr
+Return the formal all-upper-case Unicode name of @var{ch},
+as a string, or @code{#f} if the character has no name.
+@end deffn
+
+@deffn {Scheme Procedure} formal-name->char name
+Return the character whose formal all-upper-case Unicode name is
+@var{name}, or @code{#f} if no such character is known.
+@end deffn
+
 @node Character Sets
 @subsection Character Sets
 
index 2bdf71f..55dbc5f 100644 (file)
@@ -215,6 +215,7 @@ libguile_@GUILE_EFFECTIVE_VERSION@_la_SOURCES =                             \
        threads.c                               \
        throw.c                                 \
        trees.c                                 \
+       unicode.c                               \
        uniform.c                               \
        values.c                                \
        variable.c                              \
@@ -312,6 +313,7 @@ DOT_X_FILES =                                       \
        threads.x                               \
        throw.x                                 \
        trees.x                                 \
+       unicode.x                               \
        uniform.x                               \
        values.x                                \
        variable.x                              \
@@ -413,6 +415,7 @@ DOT_DOC_FILES =                             \
        threads.doc                             \
        throw.doc                               \
        trees.doc                               \
+       unicode.doc                             \
        uniform.doc                             \
        values.doc                              \
        variable.doc                            \
@@ -651,6 +654,7 @@ modinclude_HEADERS =                                \
        throw.h                                 \
        trees.h                                 \
        validate.h                              \
+       unicode.h                               \
        uniform.h                               \
        values.h                                \
        variable.h                              \
index 61b81e9..f558413 100644 (file)
 #include "libguile/throw.h"
 #include "libguile/arrays.h"
 #include "libguile/trees.h"
+#include "libguile/unicode.h"
 #include "libguile/values.h"
 #include "libguile/variable.h"
 #include "libguile/vectors.h"
@@ -512,6 +513,7 @@ scm_i_init_guile (void *base)
 #endif
   scm_bootstrap_i18n ();
   scm_init_script ();
+  scm_init_unicode ();
 
   scm_init_goops ();
 
diff --git a/libguile/unicode.c b/libguile/unicode.c
new file mode 100644 (file)
index 0000000..65d319a
--- /dev/null
@@ -0,0 +1,95 @@
+/* Copyright (C) 2014 Free Software Foundation, Inc.
+ *
+ * This library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation, either version 3 of the
+ * License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+\f
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <ctype.h>
+#include <limits.h>
+#include <unicase.h>
+#include <unictype.h>
+#include <uniname.h>
+
+#include "libguile/_scm.h"
+#include "libguile/validate.h"
+
+#include "libguile/unicode.h"
+
+\f
+
+SCM_DEFINE (scm_char_to_formal_name, "char->formal-name", 1, 0, 0, 
+            (SCM ch),
+           "Return the formal all-upper-case unicode name of @var{ch},\n"
+            "as a string.  If the character has no name, return @code{#f}.")
+#define FUNC_NAME s_scm_char_to_formal_name
+{
+  char buf[UNINAME_MAX + 1];
+
+  SCM_VALIDATE_CHAR (1, ch);
+
+  memset(buf, 0, UNINAME_MAX + 1);
+
+  if (unicode_character_name (SCM_CHAR (ch), buf))
+    return scm_from_latin1_string (buf);
+
+  return SCM_BOOL_F;
+}
+#undef FUNC_NAME
+
+SCM_DEFINE (scm_formal_name_to_char, "formal-name->char", 1, 0, 0, 
+            (SCM name),
+           "Return the character whose formal all-upper-case unicode name is\n"
+            "@var{name}, or @code{#f} if no such character is known.")
+#define FUNC_NAME s_scm_formal_name_to_char
+{
+  char *c_name;
+  scm_t_wchar ret;
+  
+  SCM_VALIDATE_STRING (1, name);
+
+  c_name = scm_to_latin1_string (name);
+  ret = unicode_name_character (c_name);
+  free (c_name);
+
+  return ret == UNINAME_INVALID ? SCM_BOOL_F : SCM_MAKE_CHAR (ret);
+}
+#undef FUNC_NAME
+
+static void
+scm_load_unicode (void)
+{
+#ifndef SCM_MAGIC_SNARFER
+#include "libguile/unicode.x"
+#endif
+}
+
+void
+scm_init_unicode (void)
+{
+  scm_c_register_extension ("libguile-" SCM_EFFECTIVE_VERSION,
+                            "scm_init_unicode",
+                            (scm_t_extension_init_func)scm_load_unicode,
+                            NULL);
+}
+
+/*
+  Local Variables:
+  c-file-style: "gnu"
+  End:
+*/
diff --git a/libguile/unicode.h b/libguile/unicode.h
new file mode 100644 (file)
index 0000000..88261c1
--- /dev/null
@@ -0,0 +1,37 @@
+/* classes: h_files */
+
+#ifndef SCM_UNICODE_H
+#define SCM_UNICODE_H
+
+/* Copyright (C) 2014 Free Software Foundation, Inc.
+ *
+ * This library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation, either version 3 of the
+ * License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+\f
+
+#include "libguile/__scm.h"
+
+SCM_INTERNAL SCM scm_formal_name_to_char (SCM);
+SCM_INTERNAL SCM scm_char_to_formal_name (SCM);
+SCM_INTERNAL void scm_init_unicode (void);
+
+#endif  /* SCM_UNICODE_H */
+
+/*
+  Local Variables:
+  c-file-style: "gnu"
+  End:
+*/
index b257116..a9aaa76 100644 (file)
@@ -255,7 +255,8 @@ ICE_9_SOURCES = \
   ice-9/weak-vector.scm \
   ice-9/list.scm \
   ice-9/serialize.scm \
-  ice-9/local-eval.scm
+  ice-9/local-eval.scm \
+  ice-9/unicode.scm
 
 srfi/srfi-64.go: srfi/srfi-64.scm srfi/srfi-64/testing.scm
 
diff --git a/module/ice-9/unicode.scm b/module/ice-9/unicode.scm
new file mode 100644 (file)
index 0000000..534d9c4
--- /dev/null
@@ -0,0 +1,26 @@
+;; unicode
+
+;;;; Copyright (C) 2014 Free Software Foundation, Inc.
+;;;; 
+;;;; This library is free software: you can redistribute it and/or modify
+;;;; it under the terms of the GNU Lesser General Public License as
+;;;; published by the Free Software Foundation, either version 3 of the
+;;;; License, or (at your option) any later version.
+;;;;
+;;;; This library is distributed in the hope that it will be useful,
+;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;;;; GNU Lesser General Public License for more details.
+;;;;
+;;;; You should have received a copy of the GNU Lesser General Public
+;;;; License along with this library.  If not, see
+;;;; <http://www.gnu.org/licenses/>.
+;;;; 
+
+(define-module (ice-9 unicode)
+  #:export (formal-name->char
+            char->formal-name))
+
+(eval-when (expand load eval)
+  (load-extension (string-append "libguile-" (effective-version))
+                  "scm_init_unicode"))
index a050f83..3b10353 100644 (file)
@@ -176,6 +176,7 @@ SCM_TESTS = tests/00-initial-env.test               \
            tests/time.test                     \
            tests/tree-il.test                  \
            tests/types.test                    \
+           tests/unicode.test                  \
            tests/version.test                  \
            tests/vectors.test                  \
            tests/vlist.test                    \
diff --git a/test-suite/tests/unicode.test b/test-suite/tests/unicode.test
new file mode 100644 (file)
index 0000000..5cfafca
--- /dev/null
@@ -0,0 +1,28 @@
+;;;; unicode.test                               -*- scheme -*-
+;;;;
+;;;; Copyright (C) 2014 Free Software Foundation, Inc.
+;;;; 
+;;;; This library is free software: you can redistribute it and/or modify
+;;;; it under the terms of the GNU Lesser General Public License as
+;;;; published by the Free Software Foundation, either version 3 of the
+;;;; License, or (at your option) any later version.
+;;;;
+;;;; This library is distributed in the hope that it will be useful,
+;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;;;; GNU Lesser General Public License for more details.
+;;;;
+;;;; You should have received a copy of the GNU Lesser General Public
+;;;; License along with this library.  If not, see
+;;;; <http://www.gnu.org/licenses/>.
+;;;; 
+
+(define-module (test-suite test-unicode)
+  #:use-module (test-suite lib)
+  #:use-module (ice-9 unicode))
+
+(pass-if-equal "LATIN SMALL LETTER A" (char->formal-name #\a))
+(pass-if-equal #\a (formal-name->char "LATIN SMALL LETTER A"))
+
+(pass-if-equal #f (char->formal-name #\nul))
+(pass-if-equal #f (formal-name->char "not a known formal name"))