Doc updates for several Emacs 24.4 changes.

[bpt/emacs.git] / doc / lispref / nonascii.texi
diff --git a/doc/lispref/nonascii.texi b/doc/lispref/nonascii.texi

index ec949b3..7b49915 100644 (file)
--- a/doc/lispref/nonascii.texi
+++ b/doc/lispref/nonascii.texi
@@ -409,7 +409,7 @@ of character properties.  In particular, Emacs supports the
  @uref{http://www.unicode.org/reports/tr23/, Unicode Character Property
  Model}, and the Emacs character property database is derived from the
  Unicode Character Database (@acronym{UCD}).  See the
-@uref{http://www.unicode.org/versions/Unicode5.0.0/ch04.pdf, Character
+@uref{http://www.unicode.org/versions/Unicode6.2.0/ch04.pdf, Character
  Properties chapter of the Unicode Standard}, for a detailed
  description of Unicode character properties and their meaning.  This
  section assumes you are already familiar with that chapter of the
@@ -440,7 +440,7 @@ properties that Emacs knows about:
  Corresponds to the @code{Name} Unicode property.  The value is a
  string consisting of upper-case Latin letters A to Z, digits, spaces,
  and hyphen @samp{-} characters.  For unassigned codepoints, the value
-is an empty string.
+is @code{nil}.
  
  @cindex unicode general category
  @item general-category
@@ -478,14 +478,14 @@ unassigned codepoints, the value is the character itself.
  
  @item decimal-digit-value
  Corresponds to the Unicode @code{Numeric_Value} property for
-characters whose @code{Numeric_Type} is @samp{Digit}.  The value is an
-integer number.  For unassigned codepoints, the value is @code{nil},
-which means @acronym{NaN}, or ``not-a-number''.
+characters whose @code{Numeric_Type} is @samp{Decimal}.  The value is
+an integer number.  For unassigned codepoints, the value is
+@code{nil}, which means @acronym{NaN}, or ``not-a-number''.
  
  @item digit-value
  Corresponds to the Unicode @code{Numeric_Value} property for
-characters whose @code{Numeric_Type} is @samp{Decimal}.  The value is
-an integer number.  Examples of such characters include compatibility
+characters whose @code{Numeric_Type} is @samp{Digit}.  The value is an
+integer number.  Examples of such characters include compatibility
  subscript and superscript digits, for which the value is the
  corresponding number.  For unassigned codepoints, the value is
  @code{nil}, which means @acronym{NaN}.
@@ -522,7 +522,8 @@ is @code{nil}.
  
  @item old-name
  Corresponds to the Unicode @code{Unicode_1_Name} property.  The value
-is a string.  For unassigned codepoints, the value is an empty string.
+is a string.  Unassigned codepoints, and characters that have no value
+for this property, the value is @code{nil}.
  
  @item iso-10646-comment
  Corresponds to the Unicode @code{ISO_Comment} property.  The value is
@@ -610,6 +611,7 @@ property as a symbol.
  @end defvar
  
  @defvar char-script-table
+@cindex script symbols
  The value of this variable is a char-table that specifies, for each
  character, a symbol whose name is the script to which the character
  belongs, according to the Unicode Standard classification of the
@@ -686,6 +688,7 @@ which case the returned charset must be supported by that coding
  system (@pxref{Coding Systems}).
  @end defun
  
+@c TODO: Explain the properties here and add indexes such as ‘charset property’.
  @defun charset-plist charset
  This function returns the property list of the character set
  @var{charset}.  Although @var{charset} is a symbol, this is not the
@@ -851,6 +854,8 @@ systems specifies its own translation tables, the table that is the
  value of this variable, if non-@code{nil}, is applied after them.
  @end defvar
  
+@c FIXME: This variable is obsolete since 23.1.  We should mention
+@c that here or simply remove this defvar.  --xfq
  @defvar translation-table-for-input
  Self-inserting characters are translated through this translation
  table before they are inserted.  Search commands also translate their
@@ -959,7 +964,8 @@ Unix convention, used on GNU and Unix systems, is to use the linefeed
  character (also called newline).  The DOS convention, used on
  MS-Windows and MS-DOS systems, is to use a carriage-return and a
  linefeed at the end of a line.  The Mac convention is to use just
-carriage-return.
+carriage-return.  (This was the convention used on the Macintosh
+system prior to OS X.)
  
  @cindex base coding system
  @cindex variant coding system
@@ -1103,6 +1109,16 @@ visited file name, saving may use the wrong file name, or it may get
  an error.  If such a problem happens, use @kbd{C-x C-w} to specify a
  new file name for that buffer.
  
+@cindex file-name encoding, MS-Windows
+  On Windows 2000 and later, Emacs by default uses Unicode APIs to
+pass file names to the OS, so the value of
+@code{file-name-coding-system} is largely ignored.  Lisp applications
+that need to encode or decode file names on the Lisp level should use
+@code{utf-8} coding-system when @code{system-type} is
+@code{windows-nt}; the conversion of UTF-8 encoded file names to the
+encoding appropriate for communicating with the OS is performed
+internally by Emacs.
+
  @node Lisp and Coding Systems
  @subsection Coding Systems in Lisp
  
@@ -1273,17 +1289,18 @@ Sets}) supported by @var{coding-system}.  Some coding systems that
  support too many character sets to list them all yield special values:
  @itemize @bullet
  @item
-If @var{coding-system} supports all the ISO-2022 charsets, the value
-is @code{iso-2022}.
-@item
  If @var{coding-system} supports all Emacs characters, the value is
  @code{(emacs)}.
  @item
-If @var{coding-system} supports all emacs-mule characters, the value
-is @code{emacs-mule}.
-@item
  If @var{coding-system} supports all Unicode characters, the value is
  @code{(unicode)}.
+@item
+If @var{coding-system} supports all ISO-2022 charsets, the value is
+@code{iso-2022}.
+@item
+If @var{coding-system} supports all the characters in the internal
+coding system used by Emacs version 21 (prior to the implementation of
+internal Unicode support), the value is @code{emacs-mule}.
  @end itemize
  @end defun
  
@@ -1568,7 +1585,7 @@ the alist; otherwise it returns @code{nil}.
  
  If @var{operation} is @code{insert-file-contents}, the argument
  corresponding to the target may be a cons cell of the form
-@code{(@var{filename} . @var{buffer})}).  In that case, @var{filename}
+@code{(@var{filename} . @var{buffer})}.  In that case, @var{filename}
  is a file name to look up in @code{file-coding-system-alist}, and
  @var{buffer} is a buffer that contains the file's contents (not yet
  decoded).  If @code{file-coding-system-alist} specifies a function to
@@ -1601,8 +1618,7 @@ of the right way to use the variable:
  
  @example
  ;; @r{Read the file with no character code conversion.}
-;; @r{Assume @acronym{crlf} represents end-of-line.}
-(let ((coding-system-for-read 'emacs-mule-dos))
+(let ((coding-system-for-read 'no-conversion))
    (insert-file-contents filename))
  @end example