Add (ice-9 unicode) module

[bpt/guile.git] / doc / ref / api-data.texi
diff --git a/doc/ref/api-data.texi b/doc/ref/api-data.texi

index 81c6d5b..23f3bfc 100644 (file)
--- a/doc/ref/api-data.texi
+++ b/doc/ref/api-data.texi
@@ -1,7 +1,7 @@
  @c -*-texinfo-*-
  @c This is part of the GNU Guile Reference Manual.
  @c Copyright (C)  1996, 1997, 2000, 2001, 2002, 2003, 2004, 2006, 2007,
-@c   2008, 2009, 2010, 2011, 2012, 2013  Free Software Foundation, Inc.
+@c   2008, 2009, 2010, 2011, 2012, 2013, 2014  Free Software Foundation, Inc.
  @c See the file guile.texi for copying conditions.
  
  @node Simple Data Types
@@ -56,6 +56,7 @@ For the documentation of such @dfn{compound} data types, see
  @tpindex Booleans
  
  The two boolean values are @code{#t} for true and @code{#f} for false.
+They can also be written as @code{#true} and @code{#false}, as per R7RS.
  
  Boolean values are returned by predicate procedures, such as the general
  equality predicates @code{eq?}, @code{eqv?} and @code{equal?}
@@ -318,7 +319,8 @@ Scheme integers can be exact and inexact.  For example, a number
  written as @code{3.0} with an explicit decimal-point is inexact, but
  it is also an integer.  The functions @code{integer?} and
  @code{scm_is_integer} report true for such a number, but the functions
-@code{scm_is_signed_integer} and @code{scm_is_unsigned_integer} only
+@code{exact-integer?}, @code{scm_is_exact_integer},
+@code{scm_is_signed_integer}, and @code{scm_is_unsigned_integer} only
  allow exact integers and thus report false.  Likewise, the conversion
  functions like @code{scm_to_signed_integer} only accept exact
  integers.
@@ -333,7 +335,7 @@ will become exact fractions.)
  @deffn {Scheme Procedure} integer? x
  @deffnx {C Function} scm_integer_p (x)
  Return @code{#t} if @var{x} is an exact or inexact integer number, else
-@code{#f}.
+return @code{#f}.
  
  @lisp
  (integer? 487)
@@ -346,7 +348,7 @@ Return @code{#t} if @var{x} is an exact or inexact integer number, else
  @result{} #f
  
  (integer? +inf.0)
-@result{} #t
+@result{} #f
  @end lisp
  @end deffn
  
@@ -354,6 +356,24 @@ Return @code{#t} if @var{x} is an exact or inexact integer number, else
  This is equivalent to @code{scm_is_true (scm_integer_p (x))}.
  @end deftypefn
  
+@deffn {Scheme Procedure} exact-integer? x
+@deffnx {C Function} scm_exact_integer_p (x)
+Return @code{#t} if @var{x} is an exact integer number, else
+return @code{#f}.
+
+@lisp
+(exact-integer? 37)
+@result{} #t
+
+(exact-integer? 3.0)
+@result{} #f
+@end lisp
+@end deffn
+
+@deftypefn {C Function} int scm_is_exact_integer (SCM x)
+This is equivalent to @code{scm_is_true (scm_exact_integer_p (x))}.
+@end deftypefn
+
  @defvr  {C Type} scm_t_int8
  @defvrx {C Type} scm_t_uint8
  @defvrx {C Type} scm_t_int16
@@ -425,6 +445,8 @@ function will always succeed and will always return an exact number.
  @deftypefnx {C Function} scm_t_uint64 scm_to_uint64 (SCM x)
  @deftypefnx {C Function} scm_t_intmax scm_to_intmax (SCM x)
  @deftypefnx {C Function} scm_t_uintmax scm_to_uintmax (SCM x)
+@deftypefnx {C Function} scm_t_intptr scm_to_intptr_t (SCM x)
+@deftypefnx {C Function} scm_t_uintptr scm_to_uintptr_t (SCM x)
  When @var{x} represents an exact integer that fits into the indicated
  C type, return that integer.  Else signal an error, either a
  `wrong-type' error when @var{x} is not an exact integer, or an
@@ -459,6 +481,8 @@ the corresponding types are.
  @deftypefnx {C Function} SCM scm_from_uint64 (scm_t_uint64 x)
  @deftypefnx {C Function} SCM scm_from_intmax (scm_t_intmax x)
  @deftypefnx {C Function} SCM scm_from_uintmax (scm_t_uintmax x)
+@deftypefnx {C Function} SCM scm_from_intptr_t (scm_t_intptr x)
+@deftypefnx {C Function} SCM scm_from_uintptr_t (scm_t_uintptr x)
  Return the @code{SCM} value that represents the integer @var{x}.
  These functions will always succeed and will always return an exact
  number.
@@ -1688,7 +1712,7 @@ starts from 0 for the least significant bit.
  
  @deffn {Scheme Procedure} ash n count
  @deffnx {C Function} scm_ash (n, count)
-Return @math{floor(@var{n} * 2^@var{count})}.
+Return @math{floor(n * 2^count)}.
  @var{n} and @var{count} must be exact integers.
  
  With @var{n} viewed as an infinite-precision twos-complement
@@ -1707,7 +1731,7 @@ when @var{count} is negative.  This is an ``arithmetic'' shift.
  
  @deffn {Scheme Procedure} round-ash n count
  @deffnx {C Function} scm_round_ash (n, count)
-Return @math{round(@var{n} * 2^@var{count})}.
+Return @math{round(n * 2^count)}.
  @var{n} and @var{count} must be exact integers.
  
  With @var{n} viewed as an infinite-precision twos-complement
@@ -2046,6 +2070,9 @@ name for each character.
  The short name for the ``delete'' character (code point U+007F) is
  @code{#\del}.
  
+The R7RS name for the ``escape'' character (code point U+001B) is
+@code{#\escape}.
+
  There are also a few alternative names left over for compatibility with
  previous versions of Guile.
  
@@ -2308,6 +2335,24 @@ lowercase, and titlecase forms respectively.  The type
  @code{scm_t_wchar} is a signed, 32-bit integer.
  @end deftypefn
  
+Characters also have ``formal names'', which are defined by Unicode.
+These names can be accessed in Guile from the @code{(ice-9 unicode)}
+module:
+
+@example
+(use-modules (ice-9 unicode))
+@end example
+
+@deffn {Scheme Procedure} char->formal-name chr
+Return the formal all-upper-case Unicode name of @var{ch},
+as a string, or @code{#f} if the character has no name.
+@end deffn
+
+@deffn {Scheme Procedure} formal-name->char name
+Return the character whose formal all-upper-case Unicode name is
+@var{name}, or @code{#f} if no such character is known.
+@end deffn
+
  @node Character Sets
  @subsection Character Sets
  
@@ -2918,9 +2963,10 @@ The read syntax for strings is an arbitrarily long sequence of
  characters enclosed in double quotes (@nicode{"}).
  
  Backslash is an escape character and can be used to insert the following
-special characters.  @nicode{\"} and @nicode{\\} are R5RS standard, the
-next seven are R6RS standard --- notice they follow C syntax --- and the
-remaining four are Guile extensions.
+special characters.  @nicode{\"} and @nicode{\\} are R5RS standard,
+@nicode{\|} is R7RS standard, the next seven are R6RS standard ---
+notice they follow C syntax --- and the remaining four are Guile
+extensions.
  
  @table @asis
  @item @nicode{\\}
@@ -2930,6 +2976,9 @@ Backslash character.
  Double quote character (an unescaped @nicode{"} is otherwise the end
  of the string).
  
+@item @nicode{\|}
+Vertical bar character.
+
  @item @nicode{\a}
  Bell character (ASCII 7).
  
@@ -3148,7 +3197,7 @@ placed between the strings, and defaults to the symbol
  @item infix
  Insert the separator between list elements.  An empty string
  will produce an empty list.
-@item string-infix
+@item strict-infix
  Like @code{infix}, but will raise an error if given the empty
  list.
  @item suffix
@@ -4222,7 +4271,7 @@ Unlike the rest of the procedures in this section, you have to load the
  (use-modules (ice-9 iconv))
  @end example
  
-@deffn string->bytevector string encoding [conversion-strategy]
+@deffn {Scheme Procedure} string->bytevector string encoding [conversion-strategy]
  Encode @var{string} as a sequence of bytes.
  
  The string will be encoded in the character set specified by the
@@ -4236,7 +4285,7 @@ bytevectors.  @xref{Ports}, for more on character encodings and
  conversion strategies.
  @end deffn
  
-@deffn bytevector->string bytevector encoding [conversion-strategy]
+@deffn {Scheme Procedure} bytevector->string bytevector encoding [conversion-strategy]
  Decode @var{bytevector} into a string.
  
  The bytes will be decoded from the character set by the @var{encoding}
@@ -4247,7 +4296,7 @@ argument to modify this behavior.  @xref{Ports}, for more on character
  encodings and conversion strategies.
  @end deffn
  
-@deffn call-with-output-encoded-string encoding proc [conversion-strategy]
+@deffn {Scheme Procedure} call-with-output-encoded-string encoding proc [conversion-strategy]
  Like @code{call-with-output-string}, but instead of returning a string,
  returns a encoding of the string according to @var{encoding}, as a
  bytevector.  This procedure can be more efficient than collecting a
@@ -4303,9 +4352,9 @@ If the C string is ill-formed, an error will be raised.
  
  Note that these functions should @emph{not} be used to convert C string
  constants, because there is no guarantee that the current locale will
-match that of the source code.  To convert C string constants, use
-@code{scm_from_latin1_string}, @code{scm_from_utf8_string} or
-@code{scm_from_utf32_string}.
+match that of the execution character set, used for string and character
+constants.  Most modern C compilers use UTF-8 by default, so to convert
+C string constants we recommend @code{scm_from_utf8_string}.
  @end deftypefn
  
  @deftypefn  {C Function} SCM scm_take_locale_string (char *str)
@@ -4448,6 +4497,19 @@ returned is the number of bytes for @code{scm_to_latin1_stringn} and
  for @code{scm_to_utf32_stringn}.
  @end deftypefn
  
+It is not often the case, but sometimes when you are dealing with the
+implementation details of a port, you need to encode and decode strings
+according to the encoding and conversion strategy of the port.  There
+are some convenience functions for that purpose as well.
+
+@deftypefn {C Function} SCM scm_from_port_string (const char *str, SCM port)
+@deftypefnx {C Function} SCM scm_from_port_stringn (const char *str, size_t len, SCM port)
+@deftypefnx {C Function} char* scm_to_port_string (SCM str, SCM port)
+@deftypefnx {C Function} char* scm_to_port_stringn (SCM str, size_t *lenp, SCM port)
+Like @code{scm_from_stringn} and friends, except they take their
+encoding and conversion strategy from a given port object.
+@end deftypefn
+
  @node String Internals
  @subsubsection String Internals
  
@@ -5362,15 +5424,15 @@ When you want to do more from C, you should convert between symbols
  and strings using @code{scm_symbol_to_string} and
  @code{scm_string_to_symbol} and work with the strings.
  
-@deftypefn {C Function} scm_from_latin1_symbol (const char *name)
-@deftypefnx {C Function} scm_from_utf8_symbol (const char *name)
+@deftypefn {C Function} SCM scm_from_latin1_symbol (const char *name)
+@deftypefnx {C Function} SCM scm_from_utf8_symbol (const char *name)
  Construct and return a Scheme symbol whose name is specified by the
  null-terminated C string @var{name}.  These are appropriate when
  the C string is hard-coded in the source code.
  @end deftypefn
  
-@deftypefn {C Function} scm_from_locale_symbol (const char *name)
-@deftypefnx {C Function} scm_from_locale_symboln (const char *name, size_t len)
+@deftypefn {C Function} SCM scm_from_locale_symbol (const char *name)
+@deftypefnx {C Function} SCM scm_from_locale_symboln (const char *name, size_t len)
  Construct and return a Scheme symbol whose name is specified by
  @var{name}.  For @code{scm_from_locale_symbol}, @var{name} must be null
  terminated; for @code{scm_from_locale_symboln} the length of @var{name} is
@@ -5378,8 +5440,9 @@ specified explicitly by @var{len}.
  
  Note that these functions should @emph{not} be used when @var{name} is a
  C string constant, because there is no guarantee that the current locale
-will match that of the source code.  In such cases, use
-@code{scm_from_latin1_symbol} or @code{scm_from_utf8_symbol}.
+will match that of the execution character set, used for string and
+character constants.  Most modern C compilers use UTF-8 by default, so
+in such cases we recommend @code{scm_from_utf8_symbol}.
  @end deftypefn
  
  @deftypefn  {C Function} SCM scm_take_locale_symbol (char *str)
@@ -5498,6 +5561,8 @@ approach to properties, see @ref{Object Properties}.
  @node Symbol Read Syntax
  @subsubsection Extended Read Syntax for Symbols
  
+@cindex r7rs-symbols
+
  The read syntax for a symbol is a sequence of letters, digits, and
  @dfn{extended alphabetic characters}, beginning with a character that
  cannot begin a number.  In addition, the special cases of @code{+},
@@ -5547,6 +5612,26 @@ Although Guile provides this extended read syntax for symbols,
  widespread usage of it is discouraged because it is not portable and not
  very readable.
  
+Alternatively, if you enable the @code{r7rs-symbols} read option (see
+@pxref{Scheme Read}), you can write arbitrary symbols using the same
+notation used for strings, except delimited by vertical bars instead of
+double quotes.
+
+@example
+|foo bar|
+|\x3BB; is a greek lambda|
+|\| is a vertical bar|
+@end example
+
+Note that there's also an @code{r7rs-symbols} print option
+(@pxref{Scheme Write}).  To enable the use of this notation, evaluate
+one or both of the following expressions:
+
+@example
+(read-enable  'r7rs-symbols)
+(print-enable 'r7rs-symbols)
+@end example
+
  
  @node Symbol Uninterned
  @subsubsection Uninterned Symbols
@@ -5779,6 +5864,8 @@ For further details on @code{let-keywords}, @code{define*} and other
  facilities provided by the @code{(ice-9 optargs)} module, see
  @ref{Optional Arguments}.
  
+To handle keyword arguments from procedures implemented in C,
+use @code{scm_c_bind_keyword_arguments} (@pxref{Keyword Procedures}).
  
  @node Keyword Read Syntax
  @subsubsection Keyword Read Syntax
@@ -5870,8 +5957,9 @@ Equivalent to @code{scm_symbol_to_keyword (scm_from_locale_symbol
  
  Note that these functions should @emph{not} be used when @var{name} is a
  C string constant, because there is no guarantee that the current locale
-will match that of the source code.  In such cases, use
-@code{scm_from_latin1_keyword} or @code{scm_from_utf8_keyword}.
+will match that of the execution character set, used for string and
+character constants.  Most modern C compilers use UTF-8 by default, so
+in such cases we recommend @code{scm_from_utf8_keyword}.
  @end deftypefn
  
  @deftypefn {C Function} SCM scm_from_latin1_keyword (const char *name)
@@ -5881,6 +5969,70 @@ Equivalent to @code{scm_symbol_to_keyword (scm_from_latin1_symbol
  (@var{name}))}, respectively.
  @end deftypefn
  
+@deftypefn {C Function} void scm_c_bind_keyword_arguments (const char *subr, @
+                             SCM rest, scm_t_keyword_arguments_flags flags, @
+                             SCM keyword1, SCM *argp1, @
+                             @dots{}, @
+                             SCM keywordN, SCM *argpN, @
+                             @nicode{SCM_UNDEFINED})
+
+Extract the specified keyword arguments from @var{rest}, which is not
+modified.  If the keyword argument @var{keyword1} is present in
+@var{rest} with an associated value, that value is stored in the
+variable pointed to by @var{argp1}, otherwise the variable is left
+unchanged.  Similarly for the other keywords and argument pointers up to
+@var{keywordN} and @var{argpN}.  The argument list to
+@code{scm_c_bind_keyword_arguments} must be terminated by
+@code{SCM_UNDEFINED}.
+
+Note that since the variables pointed to by @var{argp1} through
+@var{argpN} are left unchanged if the associated keyword argument is not
+present, they should be initialized to their default values before
+calling @code{scm_c_bind_keyword_arguments}.  Alternatively, you can
+initialize them to @code{SCM_UNDEFINED} before the call, and then use
+@code{SCM_UNBNDP} after the call to see which ones were provided.
+
+If an unrecognized keyword argument is present in @var{rest} and
+@var{flags} does not contain @code{SCM_ALLOW_OTHER_KEYS}, or if
+non-keyword arguments are present and @var{flags} does not contain
+@code{SCM_ALLOW_NON_KEYWORD_ARGUMENTS}, an exception is raised.
+@var{subr} should be the name of the procedure receiving the keyword
+arguments, for purposes of error reporting.
+
+For example:
+
+@example
+SCM k_delimiter;
+SCM k_grammar;
+SCM sym_infix;
+
+SCM my_string_join (SCM strings, SCM rest)
+@{
+  SCM delimiter = SCM_UNDEFINED;
+  SCM grammar   = sym_infix;
+
+  scm_c_bind_keyword_arguments ("my-string-join", rest, 0,
+                                k_delimiter, &delimiter,
+                                k_grammar, &grammar,
+                                SCM_UNDEFINED);
+
+  if (SCM_UNBNDP (delimiter))
+    delimiter = scm_from_utf8_string (" ");
+
+  return scm_string_join (strings, delimiter, grammar);
+@}
+
+void my_init ()
+@{
+  k_delimiter = scm_from_utf8_keyword ("delimiter");
+  k_grammar   = scm_from_utf8_keyword ("grammar");
+  sym_infix   = scm_from_utf8_symbol  ("infix");
+  scm_c_define_gsubr ("my-string-join", 1, 0, 1, my_string_join);
+@}
+@end example
+@end deftypefn
+
+
  @node Other Types
  @subsection ``Functionality-Centric'' Data Types