Merge remote-tracking branch 'origin/stable-2.0'

[bpt/guile.git] / doc / ref / api-data.texi
diff --git a/doc/ref/api-data.texi b/doc/ref/api-data.texi

index 39c9790..e3c94e2 100644 (file)
--- a/doc/ref/api-data.texi
+++ b/doc/ref/api-data.texi
@@ -1,7 +1,7 @@
  @c -*-texinfo-*-
  @c This is part of the GNU Guile Reference Manual.
-@c Copyright (C)  1996, 1997, 2000, 2001, 2002, 2003, 2004, 2006, 2007, 2008, 2009, 2010, 2011, 2012
-@c   Free Software Foundation, Inc.
+@c Copyright (C)  1996, 1997, 2000, 2001, 2002, 2003, 2004, 2006, 2007,
+@c   2008, 2009, 2010, 2011, 2012, 2013  Free Software Foundation, Inc.
  @c See the file guile.texi for copying conditions.
  
  @node Simple Data Types
@@ -414,6 +414,7 @@ function will always succeed and will always return an exact number.
  @deftypefnx {C Function} {unsigned long long} scm_to_ulong_long (SCM x)
  @deftypefnx {C Function} size_t scm_to_size_t (SCM x)
  @deftypefnx {C Function} ssize_t scm_to_ssize_t (SCM x)
+@deftypefnx {C Function} scm_t_ptrdiff scm_to_ptrdiff_t (SCM x)
  @deftypefnx {C Function} scm_t_int8 scm_to_int8 (SCM x)
  @deftypefnx {C Function} scm_t_uint8 scm_to_uint8 (SCM x)
  @deftypefnx {C Function} scm_t_int16 scm_to_int16 (SCM x)
@@ -447,6 +448,7 @@ the corresponding types are.
  @deftypefnx {C Function} SCM scm_from_ulong_long (unsigned long long x)
  @deftypefnx {C Function} SCM scm_from_size_t (size_t x)
  @deftypefnx {C Function} SCM scm_from_ssize_t (ssize_t x)
+@deftypefnx {C Function} SCM scm_from_ptrdiff_t (scm_t_ptrdiff x)
  @deftypefnx {C Function} SCM scm_from_int8 (scm_t_int8 x)
  @deftypefnx {C Function} SCM scm_from_uint8 (scm_t_uint8 x)
  @deftypefnx {C Function} SCM scm_from_int16 (scm_t_int16 x)
@@ -1684,19 +1686,15 @@ starts from 0 for the least significant bit.
  @end lisp
  @end deffn
  
-@deffn {Scheme Procedure} ash n cnt
-@deffnx {C Function} scm_ash (n, cnt)
-Return @var{n} shifted left by @var{cnt} bits, or shifted right if
-@var{cnt} is negative.  This is an ``arithmetic'' shift.
+@deffn {Scheme Procedure} ash n count
+@deffnx {C Function} scm_ash (n, count)
+Return @math{floor(n * 2^count)}.
+@var{n} and @var{count} must be exact integers.
  
-This is effectively a multiplication by @m{2^{cnt}, 2^@var{cnt}}, and
-when @var{cnt} is negative it's a division, rounded towards negative
-infinity.  (Note that this is not the same rounding as @code{quotient}
-does.)
-
-With @var{n} viewed as an infinite precision twos complement,
-@code{ash} means a left shift introducing zero bits, or a right shift
-dropping bits.
+With @var{n} viewed as an infinite-precision twos-complement
+integer, @code{ash} means a left shift introducing zero bits
+when @var{count} is positive, or a right shift dropping bits
+when @var{count} is negative.  This is an ``arithmetic'' shift.
  
  @lisp
  (number->string (ash #b1 3) 2)     @result{} "1000"
@@ -1707,6 +1705,28 @@ dropping bits.
  @end lisp
  @end deffn
  
+@deffn {Scheme Procedure} round-ash n count
+@deffnx {C Function} scm_round_ash (n, count)
+Return @math{round(n * 2^count)}.
+@var{n} and @var{count} must be exact integers.
+
+With @var{n} viewed as an infinite-precision twos-complement
+integer, @code{round-ash} means a left shift introducing zero
+bits when @var{count} is positive, or a right shift rounding
+to the nearest integer (with ties going to the nearest even
+integer) when @var{count} is negative.  This is a rounded
+``arithmetic'' shift.
+
+@lisp
+(number->string (round-ash #b1 3) 2)     @result{} \"1000\"
+(number->string (round-ash #b1010 -1) 2) @result{} \"101\"
+(number->string (round-ash #b1010 -2) 2) @result{} \"10\"
+(number->string (round-ash #b1011 -2) 2) @result{} \"11\"
+(number->string (round-ash #b1101 -2) 2) @result{} \"11\"
+(number->string (round-ash #b1110 -2) 2) @result{} \"100\"
+@end lisp
+@end deffn
+
  @deffn {Scheme Procedure} logcount n
  @deffnx {C Function} scm_logcount (n)
  Return the number of bits in integer @var{n}.  If @var{n} is
@@ -2047,7 +2067,7 @@ number of one to eight digits.
  @rnindex char?
  @deffn {Scheme Procedure} char? x
  @deffnx {C Function} scm_char_p (x)
-Return @code{#t} iff @var{x} is a character, else @code{#f}.
+Return @code{#t} if @var{x} is a character, else @code{#f}.
  @end deffn
  
  Fundamentally, the character comparison operations below are
@@ -2055,31 +2075,31 @@ numeric comparisons of the character's code points.
  
  @rnindex char=?
  @deffn {Scheme Procedure} char=? x y
-Return @code{#t} iff code point of @var{x} is equal to the code point
+Return @code{#t} if code point of @var{x} is equal to the code point
  of @var{y}, else @code{#f}.
  @end deffn
  
  @rnindex char<?
  @deffn {Scheme Procedure} char<? x y
-Return @code{#t} iff the code point of @var{x} is less than the code
+Return @code{#t} if the code point of @var{x} is less than the code
  point of @var{y}, else @code{#f}.
  @end deffn
  
  @rnindex char<=?
  @deffn {Scheme Procedure} char<=? x y
-Return @code{#t} iff the code point of @var{x} is less than or equal
+Return @code{#t} if the code point of @var{x} is less than or equal
  to the code point of @var{y}, else @code{#f}.
  @end deffn
  
  @rnindex char>?
  @deffn {Scheme Procedure} char>? x y
-Return @code{#t} iff the code point of @var{x} is greater than the
+Return @code{#t} if the code point of @var{x} is greater than the
  code point of @var{y}, else @code{#f}.
  @end deffn
  
  @rnindex char>=?
  @deffn {Scheme Procedure} char>=? x y
-Return @code{#t} iff the code point of @var{x} is greater than or
+Return @code{#t} if the code point of @var{x} is greater than or
  equal to the code point of @var{y}, else @code{#f}.
  @end deffn
  
@@ -2097,32 +2117,32 @@ it cannot cover all cases for all languages.
  
  @rnindex char-ci=?
  @deffn {Scheme Procedure} char-ci=? x y
-Return @code{#t} iff the case-folded code point of @var{x} is the same
+Return @code{#t} if the case-folded code point of @var{x} is the same
  as the case-folded code point of @var{y}, else @code{#f}.
  @end deffn
  
  @rnindex char-ci<?
  @deffn {Scheme Procedure} char-ci<? x y
-Return @code{#t} iff the case-folded code point of @var{x} is less
+Return @code{#t} if the case-folded code point of @var{x} is less
  than the case-folded code point of @var{y}, else @code{#f}.
  @end deffn
  
  @rnindex char-ci<=?
  @deffn {Scheme Procedure} char-ci<=? x y
-Return @code{#t} iff the case-folded code point of @var{x} is less
+Return @code{#t} if the case-folded code point of @var{x} is less
  than or equal to the case-folded code point of @var{y}, else
  @code{#f}.
  @end deffn
  
  @rnindex char-ci>?
  @deffn {Scheme Procedure} char-ci>? x y
-Return @code{#t} iff the case-folded code point of @var{x} is greater
+Return @code{#t} if the case-folded code point of @var{x} is greater
  than the case-folded code point of @var{y}, else @code{#f}.
  @end deffn
  
  @rnindex char-ci>=?
  @deffn {Scheme Procedure} char-ci>=? x y
-Return @code{#t} iff the case-folded code point of @var{x} is greater
+Return @code{#t} if the case-folded code point of @var{x} is greater
  than or equal to the case-folded code point of @var{y}, else
  @code{#f}.
  @end deffn
@@ -2130,36 +2150,36 @@ than or equal to the case-folded code point of @var{y}, else
  @rnindex char-alphabetic?
  @deffn {Scheme Procedure} char-alphabetic? chr
  @deffnx {C Function} scm_char_alphabetic_p (chr)
-Return @code{#t} iff @var{chr} is alphabetic, else @code{#f}.
+Return @code{#t} if @var{chr} is alphabetic, else @code{#f}.
  @end deffn
  
  @rnindex char-numeric?
  @deffn {Scheme Procedure} char-numeric? chr
  @deffnx {C Function} scm_char_numeric_p (chr)
-Return @code{#t} iff @var{chr} is numeric, else @code{#f}.
+Return @code{#t} if @var{chr} is numeric, else @code{#f}.
  @end deffn
  
  @rnindex char-whitespace?
  @deffn {Scheme Procedure} char-whitespace? chr
  @deffnx {C Function} scm_char_whitespace_p (chr)
-Return @code{#t} iff @var{chr} is whitespace, else @code{#f}.
+Return @code{#t} if @var{chr} is whitespace, else @code{#f}.
  @end deffn
  
  @rnindex char-upper-case?
  @deffn {Scheme Procedure} char-upper-case? chr
  @deffnx {C Function} scm_char_upper_case_p (chr)
-Return @code{#t} iff @var{chr} is uppercase, else @code{#f}.
+Return @code{#t} if @var{chr} is uppercase, else @code{#f}.
  @end deffn
  
  @rnindex char-lower-case?
  @deffn {Scheme Procedure} char-lower-case? chr
  @deffnx {C Function} scm_char_lower_case_p (chr)
-Return @code{#t} iff @var{chr} is lowercase, else @code{#f}.
+Return @code{#t} if @var{chr} is lowercase, else @code{#f}.
  @end deffn
  
  @deffn {Scheme Procedure} char-is-both? chr
  @deffnx {C Function} scm_char_is_both_p (chr)
-Return @code{#t} iff @var{chr} is either uppercase or lowercase, else
+Return @code{#t} if @var{chr} is either uppercase or lowercase, else
  @code{#f}.
  @end deffn
  
@@ -2581,8 +2601,8 @@ string is not defined.
  
  @deffn {Scheme Procedure} char-set-contains? cs ch
  @deffnx {C Function} scm_char_set_contains_p (cs, ch)
-Return @code{#t} iff the character @var{ch} is contained in the
-character set @var{cs}.
+Return @code{#t} if the character @var{ch} is contained in the
+character set @var{cs}, or @code{#f} otherwise.
  @end deffn
  
  @deffn {Scheme Procedure} char-set-every pred cs
@@ -2881,6 +2901,7 @@ Guile provides all procedures of SRFI-13 and a few more.
  * Reversing and Appending Strings:: Appending strings to form a new string.
  * Mapping Folding and Unfolding::   Iterating over strings.
  * Miscellaneous String Operations:: Replicating, insertion, parsing, ...
+* Representing Strings as Bytes::   Encoding and decoding strings.
  * Conversion to/from C::
  * String Internals::                The storage strategy for strings.
  @end menu
@@ -3127,7 +3148,7 @@ placed between the strings, and defaults to the symbol
  @item infix
  Insert the separator between list elements.  An empty string
  will produce an empty list.
-@item string-infix
+@item strict-infix
  Like @code{infix}, but will raise an error if given the empty
  list.
  @item suffix
@@ -3152,12 +3173,24 @@ These procedures are useful for similar tasks.
  Convert the string @var{str} into a list of characters.
  @end deffn
  
-@deffn {Scheme Procedure} string-split str chr
-@deffnx {C Function} scm_string_split (str, chr)
+@deffn {Scheme Procedure} string-split str char_pred
+@deffnx {C Function} scm_string_split (str, char_pred)
  Split the string @var{str} into a list of substrings delimited
-by appearances of the character @var{chr}.  Note that an empty substring
-between separator characters will result in an empty string in the
-result list.
+by appearances of characters that
+
+@itemize @bullet
+@item
+equal @var{char_pred}, if it is a character,
+
+@item
+satisfy the predicate @var{char_pred}, if it is a procedure,
+
+@item
+are in the set @var{char_pred}, if it is a character set.
+@end itemize
+
+Note that an empty substring between separator characters will result in
+an empty string in the result list.
  
  @lisp
  (string-split "root:x:0:0:root:/root:/bin/bash" #\:)
@@ -4151,6 +4184,76 @@ a predicate, if it is a character, it is tested for equality and if it
  is a character set, it is tested for membership.
  @end deffn
  
+@node Representing Strings as Bytes
+@subsubsection Representing Strings as Bytes
+
+Out in the cold world outside of Guile, not all strings are treated in
+the same way.  Out there there are only bytes, and there are many ways
+of representing a strings (sequences of characters) as binary data
+(sequences of bytes).
+
+As a user, usually you don't have to think about this very much.  When
+you type on your keyboard, your system encodes your keystrokes as bytes
+according to the locale that you have configured on your computer.
+Guile uses the locale to decode those bytes back into characters --
+hopefully the same characters that you typed in.
+
+All is not so clear when dealing with a system with multiple users, such
+as a web server.  Your web server might get a request from one user for
+data encoded in the ISO-8859-1 character set, and then another request
+from a different user for UTF-8 data.
+
+@cindex iconv
+@cindex character encoding
+Guile provides an @dfn{iconv} module for converting between strings and
+sequences of bytes.  @xref{Bytevectors}, for more on how Guile
+represents raw byte sequences.  This module gets its name from the
+common @sc{unix} command of the same name.
+
+Note that often it is sufficient to just read and write strings from
+ports instead of using these functions.  To do this, specify the port
+encoding using @code{set-port-encoding!}.  @xref{Ports}, for more on
+ports and character encodings.
+
+Unlike the rest of the procedures in this section, you have to load the
+@code{iconv} module before having access to these procedures:
+
+@example
+(use-modules (ice-9 iconv))
+@end example
+
+@deffn string->bytevector string encoding [conversion-strategy]
+Encode @var{string} as a sequence of bytes.
+
+The string will be encoded in the character set specified by the
+@var{encoding} string.  If the string has characters that cannot be
+represented in the encoding, by default this procedure raises an
+@code{encoding-error}.  Pass a @var{conversion-strategy} argument to
+specify other behaviors.
+
+The return value is a bytevector.  @xref{Bytevectors}, for more on
+bytevectors.  @xref{Ports}, for more on character encodings and
+conversion strategies.
+@end deffn
+
+@deffn bytevector->string bytevector encoding [conversion-strategy]
+Decode @var{bytevector} into a string.
+
+The bytes will be decoded from the character set by the @var{encoding}
+string.  If the bytes do not form a valid encoding, by default this
+procedure raises an @code{decoding-error}.  As with
+@code{string->bytevector}, pass the optional @var{conversion-strategy}
+argument to modify this behavior.  @xref{Ports}, for more on character
+encodings and conversion strategies.
+@end deffn
+
+@deffn call-with-output-encoded-string encoding proc [conversion-strategy]
+Like @code{call-with-output-string}, but instead of returning a string,
+returns a encoding of the string according to @var{encoding}, as a
+bytevector.  This procedure can be more efficient than collecting a
+string and then converting it via @code{string->bytevector}.
+@end deffn
+
  @node Conversion to/from C
  @subsubsection Conversion to/from C
  
@@ -4160,9 +4263,9 @@ important.
  
  In C, a string is just a sequence of bytes, and the character encoding
  describes the relation between these bytes and the actual characters
-that make up the string.  For Scheme strings, character encoding is
-not an issue (most of the time), since in Scheme you never get to see
-the bytes, only the characters.
+that make up the string.  For Scheme strings, character encoding is not
+an issue (most of the time), since in Scheme you usually treat strings
+as character sequences, not byte sequences.
  
  Converting to C and converting from C each have their own challenges.
  
@@ -4293,6 +4396,9 @@ into @var{encoding}.
  If @var{lenp} is @code{NULL}, this function will return a null-terminated C
  string.  It will throw an error if the string contains a null
  character.
+
+The Scheme interface to this function is @code{string->bytevector}, from the
+@code{ice-9 iconv} module.  @xref{Representing Strings as Bytes}.
  @end deftypefn
  
  @deftypefn {C Function} SCM scm_from_stringn (const char *str, size_t len, const char *encoding, scm_t_string_failed_conversion_handler handler)
@@ -4301,6 +4407,9 @@ length in bytes of the C string is input as @var{len}.  The encoding of the C
  string is passed as the ASCII, null-terminated C string @code{encoding}.
  The @var{handler} parameters suggests a strategy for dealing with
  unconvertable characters.
+
+The Scheme interface to this function is @code{bytevector->string}.
+@xref{Representing Strings as Bytes}.
  @end deftypefn
  
  The following conversion functions are provided as a convenience for the
@@ -4339,6 +4448,19 @@ returned is the number of bytes for @code{scm_to_latin1_stringn} and
  for @code{scm_to_utf32_stringn}.
  @end deftypefn
  
+It is not often the case, but sometimes when you are dealing with the
+implementation details of a port, you need to encode and decode strings
+according to the encoding and conversion strategy of the port.  There
+are some convenience functions for that purpose as well.
+
+@deftypefn {C Function} SCM scm_from_port_string (const char *str, SCM port)
+@deftypefnx {C Function} SCM scm_from_port_stringn (const char *str, size_t len, SCM port)
+@deftypefnx {C Function} char* scm_to_port_string (SCM str, SCM port)
+@deftypefnx {C Function} char* scm_to_port_stringn (SCM str, size_t *lenp, SCM port)
+Like @code{scm_from_stringn} and friends, except they take their
+encoding and conversion strategy from a given port object.
+@end deftypefn
+
  @node String Internals
  @subsubsection String Internals
  
@@ -4446,7 +4568,7 @@ R6RS (@pxref{R6RS I/O Ports}).
  * Bytevectors and Integer Lists::  Converting to/from an integer list.
  * Bytevectors as Floats::       Interpreting bytes as real numbers.
  * Bytevectors as Strings::      Interpreting bytes as Unicode strings.
-* Bytevectors as Generalized Vectors::  Guile extension to the bytevector API.
+* Bytevectors as Arrays::       Guile extension to the bytevector API.
  * Bytevectors as Uniform Vectors::  Bytevectors and SRFI-4.
  @end menu
  
@@ -4798,6 +4920,7 @@ the host's native endianness.
  
  Bytevector contents can also be interpreted as Unicode strings encoded
  in one of the most commonly available encoding formats.
+@xref{Representing Strings as Bytes}, for a more generic interface.
  
  @lisp
  (utf8->string (u8-list->bytevector '(99 97 102 101)))
@@ -4831,25 +4954,27 @@ or UTF-32-decoded contents of bytevector @var{utf}.  For UTF-16 and UTF-32,
  it defaults to big endian.
  @end deffn
  
-@node Bytevectors as Generalized Vectors
-@subsubsection Accessing Bytevectors with the Generalized Vector API
+@node Bytevectors as Arrays
+@subsubsection Accessing Bytevectors with the Array API
  
  As an extension to the R6RS, Guile allows bytevectors to be manipulated
-with the @dfn{generalized vector} procedures (@pxref{Generalized
-Vectors}).  This also allows bytevectors to be accessed using the
-generic @dfn{array} procedures (@pxref{Array Procedures}).  When using
-these APIs, bytes are accessed one at a time as 8-bit unsigned integers:
+with the @dfn{array} procedures (@pxref{Arrays}).  When using these
+APIs, bytes are accessed one at a time as 8-bit unsigned integers:
  
  @example
  (define bv #vu8(0 1 2 3))
  
-(generalized-vector? bv)
+(array? bv)
  @result{} #t
  
-(generalized-vector-ref bv 2)
+(array-rank bv)
+@result{} 1
+
+(array-ref bv 2)
  @result{} 2
  
-(generalized-vector-set! bv 2 77)
+;; Note the different argument order on array-set!.
+(array-set! bv 77 2)
  (array-ref bv 2)
  @result{} 77