*** empty log message ***

[bpt/emacs.git] / lispref / nonascii.texi
diff --git a/lispref/nonascii.texi b/lispref/nonascii.texi

index 62bd28f..c526d41 100644 (file)
--- a/lispref/nonascii.texi
+++ b/lispref/nonascii.texi
@@ -1,6 +1,7 @@
  @c -*-texinfo-*-
  @c This is part of the GNU Emacs Lisp Reference Manual.
-@c Copyright (C) 1998, 1999 Free Software Foundation, Inc.
+@c Copyright (C) 1998, 1999, 2002, 2003, 2004,
+@c   2005, 2006 Free Software Foundation, Inc.
  @c See the file elisp.texi for copying conditions.
  @setfilename ../info/characters
  @node Non-ASCII Characters, Searching and Matching, Text, Top
@@ -95,9 +96,10 @@ default value to @code{nil} early in startup.
  
  @defun position-bytes position
  @tindex position-bytes
-Return the byte-position corresponding to buffer position @var{position}
-in the current buffer.  If @var{position} is out of range, the value
-is @code{nil}.
+Return the byte-position corresponding to buffer position
+@var{position} in the current buffer.  This is 1 at the start of the
+buffer, and counts upward in bytes.  If @var{position} is out of
+range, the value is @code{nil}.
  @end defun
  
  @defun byte-to-position byte-position
@@ -292,8 +294,8 @@ codes cannot occur at all in multibyte text.  Only the @acronym{ASCII} codes
  0 through 127 are completely legitimate in both representations.
  
  @defun char-valid-p charcode &optional genericp
-This returns @code{t} if @var{charcode} is valid for either one of the two
-text representations.
+This returns @code{t} if @var{charcode} is valid (either for unibyte
+text or for multibyte text).
  
  @example
  (char-valid-p 65)
@@ -359,6 +361,11 @@ as the property list of that symbol.  Charset properties are used for
  special purposes within Emacs.
  @end defun
  
+@deffn Command list-charset-chars charset
+This command displays a list of characters in the character set
+@var{charset}.
+@end deffn
+
  @node Chars and Bytes
  @section Characters and Bytes
  @cindex bytes and characters
@@ -474,6 +481,13 @@ part of a buffer or a string.  One use for this is in determining which
  coding systems (@pxref{Coding Systems}) are capable of representing all
  of the text in question.
  
+@defun charset-after &optional pos
+This function return the charset of a character in the current buffer
+at position @var{pos}.  If @var{pos} is omitted or @code{nil}, it
+defaults to the current value of point.  If @var{pos} is out of range,
+the value is @code{nil}.
+@end defun
+
  @defun find-charset-region beg end &optional translation
  This function returns a list of the character sets that appear in the
  current buffer between positions @var{beg} and @var{end}.
@@ -564,12 +578,14 @@ coding systems that don't specify any other translation table.
  
  @defvar translation-table-for-input
  Self-inserting characters are translated through this translation
-table before they are inserted.  This variable automatically becomes
-buffer-local when set.
+table before they are inserted.  Search commands also translate their
+input through this table, so they can compare more reliably with
+what's in the buffer.
  
  @code{set-buffer-file-coding-system} sets this variable so that your
  keyboard input gets translated into the character sets that the buffer
-is likely to contain.
+is likely to contain.  This variable automatically becomes
+buffer-local when set.
  @end defvar
  
  @node Coding Systems
@@ -615,6 +631,27 @@ characters; for example, there are three coding systems for the Cyrillic
  conversion, but some of them leave the choice unspecified---to be chosen
  heuristically for each file, based on the data.
  
+  In general, a coding system doesn't guarantee roundtrip identity:
+decoding a byte sequence using coding system, then encoding the
+resulting text in the same coding system, can produce a different byte
+sequence.  However, the following coding systems do guarantee that the
+byte sequence will be the same as what you originally decoded:
+
+@quotation
+chinese-big5 chinese-iso-8bit cyrillic-iso-8bit emacs-mule
+greek-iso-8bit hebrew-iso-8bit iso-latin-1 iso-latin-2 iso-latin-3
+iso-latin-4 iso-latin-5 iso-latin-8 iso-latin-9 iso-safe
+japanese-iso-8bit japanese-shift-jis korean-iso-8bit raw-text
+@end quotation
+
+  Encoding buffer text and then decoding the result can also fail to
+reproduce the original text.  For instance, if you encode Latin-2
+characters with @code{utf-8} and decode the result using the same
+coding system, you'll get Unicode characters (of charset
+@code{mule-unicode-0100-24ff}).  If you encode Unicode characters with
+@code{iso-latin-2} and decode the result with the same coding system,
+you'll get Latin-2 characters.
+
  @cindex end of line conversion
    @dfn{End of line conversion} handles three different conventions used
  on various systems for representing end of line in files.  The Unix
@@ -673,7 +710,7 @@ a coding system for decoding the file data, and @code{write-region}
  uses one to encode the buffer contents.
  
    You can specify the coding system to use either explicitly
-(@pxref{Specifying Coding Systems}), or implicitly using the defaulting
+(@pxref{Specifying Coding Systems}), or implicitly using a default
  mechanism (@pxref{Default Coding Systems}).  But these methods may not
  completely specify what to do.  For example, they may choose a coding
  system such as @code{undefined} which leaves the character code
@@ -682,8 +719,8 @@ operation finishes the job of choosing a coding system.  Very often
  you will want to find out afterwards which coding system was chosen.
  
  @defvar buffer-file-coding-system
-This variable records the coding system that was used for visiting the
-current buffer.  It is used for saving the buffer, and for writing part
+This buffer-local variable records the coding system that was used to visit
+the current buffer.  It is used for saving the buffer, and for writing part
  of the buffer with @code{write-region}.  If the text to be written
  cannot be safely encoded using the coding system specified by this
  variable, these operations select an alternative encoding by calling
@@ -1033,11 +1070,11 @@ for decoding (in case @var{operation} does decoding), and
  @var{encoding-system} is the coding system for encoding (in case
  @var{operation} does encoding).
  
-The argument @var{operation} should be a symbol, one of
-@code{insert-file-contents}, @code{write-region}, @code{call-process},
-@code{call-process-region}, @code{start-process}, or
-@code{open-network-stream}.  These are the names of the Emacs I/O primitives
-that can do coding system conversion.
+The argument @var{operation} should be a symbol, any one of
+@code{insert-file-contents}, @code{write-region},
+@code{start-process}, @code{call-process}, @code{call-process-region},
+or @code{open-network-stream}.  These are the names of the Emacs I/O
+primitives that can do coding system conversion.
  
  The remaining arguments should be the same arguments that might be given
  to that I/O primitive.  Depending on the primitive, one of those
@@ -1047,9 +1084,9 @@ name is the target.  For subprocess primitives, the process name is the
  target.  For @code{open-network-stream}, the target is the service name
  or port number.
  
-This function looks up the target in @code{file-coding-system-alist},
-@code{process-coding-system-alist}, or
-@code{network-coding-system-alist}, depending on @var{operation}.
+Depending on @var{operation}, this function looks up the target in
+@code{file-coding-system-alist}, @code{process-coding-system-alist},
+or @code{network-coding-system-alist}.
  @end defun
  
  @node Specifying Coding Systems