*** empty log message ***
[bpt/emacs.git] / lisp / descr-text.el
index b0e091a..229e1b5 100644 (file)
@@ -1,6 +1,6 @@
 ;;; descr-text.el --- describe text mode
 
-;; Copyright (c) 1994, 1995, 1996, 2001, 2002 Free Software Foundation, Inc.
+;; Copyright (c) 1994, 1995, 1996, 2001, 02, 03 Free Software Foundation, Inc.
 
 ;; Author: Boris Goldowsky <boris@gnu.org>
 ;; Keywords: faces
@@ -28,6 +28,8 @@
 
 ;;; Code:
 
+(eval-when-compile (require 'button))
+
 (defun describe-text-done ()
   "Delete the current window or bury the current buffer."
   (interactive)
       (delete-window)
     (bury-buffer)))
 
-(defvar describe-text-mode-map 
+(defvar describe-text-mode-map
   (let ((map (make-sparse-keymap)))
     (set-keymap-parent map widget-keymap)
     map)
   "Keymap for `describe-text-mode'.")
-  
+
 (defcustom describe-text-mode-hook nil
   "List of hook functions ran by `describe-text-mode'."
   :type 'hook
@@ -67,7 +69,7 @@ if that value is non-nil."
   (widget-create 'link
                 :notify `(lambda (&rest ignore)
                            (widget-browse ',widget))
-                (format "%S" (if (symbolp widget) 
+                (format "%S" (if (symbolp widget)
                                  widget
                                (car widget))))
   (widget-insert " ")
@@ -197,7 +199,7 @@ otherwise."
       ;; Buttons
       (when (and button (not (widgetp wid-button)))
        (newline)
-       (widget-insert "Here is a " (format "%S" button-type) 
+       (widget-insert "Here is a " (format "%S" button-type)
                       " button labeled `" button-label "'.\n\n"))
       ;; Overlays
       (when overlays
@@ -207,7 +209,7 @@ otherwise."
          (widget-insert "There are " (format "%d" (length overlays))
                         " overlays here:\n"))
        (dolist (overlay overlays)
-         (widget-insert " From " (format "%d" (overlay-start overlay)) 
+         (widget-insert " From " (format "%d" (overlay-start overlay))
                         " to " (format "%d" (overlay-end overlay)) "\n")
          (describe-property-list (overlay-properties overlay)))
        (widget-insert "\n"))
@@ -216,7 +218,223 @@ otherwise."
        (newline)
        (widget-insert "There are text properties here:\n")
        (describe-property-list properties)))))
+\f
+;;; We cannot use the UnicodeData.txt file as such; it is not free.
+;;; We can turn that info a different format and release the result
+;;; as free data.  When that is done, we could reinstate the code below.
+;;; For the mean time, here is a dummy placeholder.
+;;;  -- rms
+(defun describe-char-unicode-data (char) nil)
+
+;;; (defcustom describe-char-unicodedata-file nil
+;;;   "Location of Unicode data file.
+;;; This is the UnicodeData.txt file from the Unicode consortium, used for
+;;; diagnostics.  If it is non-nil `describe-char-after' will print data
+;;; looked up from it.  This facility is mostly of use to people doing
+;;; multilingual development.
+
+;;; This is a fairly large file, not typically present on GNU systems.  At
+;;; the time of writing it is at
+;;; <URL:ftp://www.unicode.org/Public/UNIDATA/UnicodeData.txt>."
+;;;   :group 'mule
+;;;   :version "21.5"
+;;;   :type '(choice (const :tag "None" nil)
+;;;             file))
 
+;;; ;; We could convert the unidata file into a Lispy form once-for-all
+;;; ;; and distribute it for loading on demand.  It might be made more
+;;; ;; space-efficient by splitting strings word-wise and replacing them
+;;; ;; with lists of symbols interned in a private obarray, e.g.
+;;; ;; "LATIN SMALL LETTER A" => '(LATIN SMALL LETTER A).
+
+;;; ;; Fixme: Check whether this needs updating for Unicode 4.
+;;; (defun describe-char-unicode-data (char)
+;;;   "Return a list of Unicode data for unicode CHAR.
+;;; Each element is a list of a property description and the property value.
+;;; The list is null if CHAR isn't found in `describe-char-unicodedata-file'."
+;;;   (when describe-char-unicodedata-file
+;;;     (unless (file-exists-p describe-char-unicodedata-file)
+;;;       (error "`unicodedata-file' %s not found" describe-char-unicodedata-file))
+;;;     (save-excursion
+;;;       ;; Find file in fundamental mode to avoid, e.g. flyspell turned
+;;;       ;; on for .txt.  Don't use RAWFILE arg in case of DOS line endings.
+;;;       (set-buffer (let ((auto-mode-alist))
+;;;                (find-file-noselect describe-char-unicodedata-file)))
+;;;       (goto-char (point-min))
+;;;       (let ((hex (format "%04X" char))
+;;;        found first last)
+;;;    (if (re-search-forward (concat "^" hex) nil t)
+;;;        (setq found t)
+;;;      ;; It's not listed explicitly.  Look for ranges, e.g. CJK
+;;;      ;; ideographs, and check whether it's in one of them.
+;;;      (while (and (re-search-forward "^\\([^;]+\\);[^;]+First>;" nil t)
+;;;                  (>= char (setq first
+;;;                                 (string-to-number (match-string 1) 16)))
+;;;                  (progn
+;;;                    (forward-line 1)
+;;;                    (looking-at "^\\([^;]+\\);[^;]+Last>;")
+;;;                    (> char
+;;;                       (setq last
+;;;                             (string-to-number (match-string 1) 16))))))
+;;;      (if (and (>= char first)
+;;;               (<= char last))
+;;;          (setq found t)))
+;;;    (if found
+;;;        (let ((fields (mapcar (lambda (elt)
+;;;                                (if (> (length elt) 0)
+;;;                                    elt))
+;;;                              (cdr (split-string
+;;;                                    (buffer-substring
+;;;                                     (line-beginning-position)
+;;;                                     (line-end-position))
+;;;                                    ";")))))
+;;;          ;; The length depends on whether the last field was empty.
+;;;          (unless (or (= 13 (length fields))
+;;;                      (= 14 (length fields)))
+;;;            (error "Invalid contents in %s" describe-char-unicodedata-file))
+;;;          ;; The field names and values lists are slightly
+;;;          ;; modified from Mule-UCS unidata.el.
+;;;          (list
+;;;           (list "Name" (let ((name (nth 0 fields)))
+;;;                          ;; Check for <..., First>, <..., Last>
+;;;                          (if (string-match "\\`\\(<[^,]+\\)," name)
+;;;                              (concat (match-string 1 name) ">")
+;;;                            name)))
+;;;           (list "Category"
+;;;                 (cdr (assoc
+;;;                       (nth 1 fields)
+;;;                       '(("Lu" . "uppercase letter")
+;;;                         ("Ll" . "lowercase letter")
+;;;                         ("Lt" . "titlecase letter")
+;;;                         ("Mn" . "non-spacing mark")
+;;;                         ("Mc" . "spacing-combining mark")
+;;;                         ("Me" . "enclosing mark")
+;;;                         ("Nd" . "decimal digit")
+;;;                         ("Nl" . "letter number")
+;;;                         ("No" . "other number")
+;;;                         ("Zs" . "space separator")
+;;;                         ("Zl" . "line separator")
+;;;                         ("Zp" . "paragraph separator")
+;;;                         ("Cc" . "other control")
+;;;                         ("Cf" . "other format")
+;;;                         ("Cs" . "surrogate")
+;;;                         ("Co" . "private use")
+;;;                         ("Cn" . "not assigned")
+;;;                         ("Lm" . "modifier letter")
+;;;                         ("Lo" . "other letter")
+;;;                         ("Pc" . "connector punctuation")
+;;;                         ("Pd" . "dash punctuation")
+;;;                         ("Ps" . "open punctuation")
+;;;                         ("Pe" . "close punctuation")
+;;;                         ("Pi" . "initial-quotation punctuation")
+;;;                         ("Pf" . "final-quotation punctuation")
+;;;                         ("Po" . "other punctuation")
+;;;                         ("Sm" . "math symbol")
+;;;                         ("Sc" . "currency symbol")
+;;;                         ("Sk" . "modifier symbol")
+;;;                         ("So" . "other symbol")))))
+;;;           (list "Combining class"
+;;;                 (cdr (assoc
+;;;                       (string-to-number (nth 2 fields))
+;;;                       '((0 . "Spacing")
+;;;                         (1 . "Overlays and interior")
+;;;                         (7 . "Nuktas") 
+;;;                         (8 . "Hiragana/Katakana voicing marks")
+;;;                         (9 . "Viramas")
+;;;                         (10 . "Start of fixed position classes")
+;;;                         (199 . "End of fixed position classes")
+;;;                         (200 . "Below left attached")
+;;;                         (202 . "Below attached")
+;;;                         (204 . "Below right attached")
+;;;                         (208 . "Left attached (reordrant around \
+;;; single base character)")
+;;;                         (210 . "Right attached")
+;;;                         (212 . "Above left attached")
+;;;                         (214 . "Above attached")
+;;;                         (216 . "Above right attached")
+;;;                         (218 . "Below left")
+;;;                         (220 . "Below")
+;;;                         (222 . "Below right")
+;;;                         (224 . "Left (reordrant around single base \
+;;; character)")
+;;;                         (226 . "Right")
+;;;                         (228 . "Above left")
+;;;                         (230 . "Above")
+;;;                         (232 . "Above right")
+;;;                         (233 . "Double below")
+;;;                         (234 . "Double above")
+;;;                         (240 . "Below (iota subscript)")))))
+;;;           (list "Bidi category"
+;;;                 (cdr (assoc
+;;;                       (nth 3 fields)
+;;;                       '(("L" . "Left-to-Right")
+;;;                         ("LRE" . "Left-to-Right Embedding")
+;;;                         ("LRO" . "Left-to-Right Override")
+;;;                         ("R" . "Right-to-Left")
+;;;                         ("AL" . "Right-to-Left Arabic")
+;;;                         ("RLE" . "Right-to-Left Embedding")
+;;;                         ("RLO" . "Right-to-Left Override")
+;;;                         ("PDF" . "Pop Directional Format")
+;;;                         ("EN" . "European Number")
+;;;                         ("ES" . "European Number Separator")
+;;;                         ("ET" . "European Number Terminator")
+;;;                         ("AN" . "Arabic Number")
+;;;                         ("CS" . "Common Number Separator")
+;;;                         ("NSM" . "Non-Spacing Mark")
+;;;                         ("BN" . "Boundary Neutral")
+;;;                         ("B" . "Paragraph Separator")
+;;;                         ("S" . "Segment Separator")
+;;;                         ("WS" . "Whitespace")
+;;;                         ("ON" . "Other Neutrals")))))
+;;;           (list
+;;;            "Decomposition"
+;;;            (if (nth 4 fields)
+;;;                (let* ((parts (split-string (nth 4 fields)))
+;;;                       (info (car parts)))
+;;;                  (if (string-match "\\`<\\(.+\\)>\\'" info)
+;;;                      (setq info (match-string 1 info))
+;;;                    (setq info nil))
+;;;                  (if info (setq parts (cdr parts)))
+;;;                  ;; Maybe printing ? for unrepresentable unicodes
+;;;                  ;; here and below should be changed?
+;;;                  (setq parts (mapconcat
+;;;                               (lambda (arg)
+;;;                                 (string (or (decode-char
+;;;                                              'ucs
+;;;                                              (string-to-number arg 16))
+;;;                                             ??)))
+;;;                               parts " "))
+;;;                  (concat info parts))))
+;;;           (list "Decimal digit value"
+;;;                 (nth 5 fields))
+;;;           (list "Digit value"
+;;;                 (nth 6 fields))
+;;;           (list "Numeric value"
+;;;                 (nth 7 fields))
+;;;           (list "Mirrored"
+;;;                 (if (equal "Y" (nth 8 fields))
+;;;                     "yes"))
+;;;           (list "Old name" (nth 9 fields))
+;;;           (list "ISO 10646 comment" (nth 10 fields))
+;;;           (list "Uppercase" (and (nth 11 fields)
+;;;                                  (string (or (decode-char
+;;;                                               'ucs
+;;;                                               (string-to-number
+;;;                                                (nth 11 fields) 16))
+;;;                                              ??))))
+;;;           (list "Lowercase" (and (nth 12 fields)
+;;;                                  (string (or (decode-char
+;;;                                               'ucs
+;;;                                               (string-to-number
+;;;                                                (nth 12 fields) 16))
+;;;                                              ??))))
+;;;           (list "Titlecase" (and (nth 13 fields)
+;;;                                  (string (or (decode-char
+;;;                                               'ucs
+;;;                                               (string-to-number
+;;;                                                (nth 13 fields) 16))
+;;;                                              ??)))))))))))
+\f
 ;;;###autoload
 (defun describe-char (pos)
   "Describe the character after POS (interactively, the character after point).
@@ -234,7 +452,7 @@ as well as widgets, buttons, overlays, and text properties."
         (composed (if composition (buffer-substring (car composition)
                                                     (nth 1 composition))))
         (multibyte-p enable-multibyte-characters)
-        item-list max-width)
+        item-list max-width unicode)
     (if (eq charset 'unknown)
        (setq item-list
              `(("character"
@@ -243,12 +461,21 @@ as well as widgets, buttons, overlays, and text properties."
                              (single-key-description char)
                            (char-to-string char))
                          char char char))))
+
+      (if (or (< char 256)
+             (memq 'mule-utf-8 (find-coding-systems-region pos (1+ pos)))
+             (get-char-property pos 'untranslated-utf-8))
+         (setq unicode (or (get-char-property pos 'untranslated-utf-8)
+                           (encode-char char 'ucs))))
       (setq item-list
            `(("character"
-              ,(format "%s (0%o, %d, 0x%x)" (if (< char 256)
+              ,(format "%s (0%o, %d, 0x%x%s)" (if (< char 256)
                                                 (single-key-description char)
                                               (char-to-string char))
-                       char char char))
+                       char char char
+                       (if unicode
+                           (format ", U+%04X" unicode)
+                         "")))
              ("charset"
               ,(symbol-name charset)
               ,(format "(%s)" (charset-description charset)))
@@ -287,14 +514,6 @@ as well as widgets, buttons, overlays, and text properties."
                             (format "(encoded by coding system %S)" coding))
                     (list "not encodable by coding system"
                           (symbol-name coding)))))
-             ,@(if (or (memq 'mule-utf-8
-                         (find-coding-systems-region pos (1+ pos)))
-                       (get-char-property pos 'untranslated-utf-8))
-                   (let ((uc (or (get-char-property pos 'untranslated-utf-8)
-                                 (encode-char char 'ucs))))
-                     (if uc
-                         (list (list "Unicode"
-                                     (format "%04X" uc))))))
              ,(if (display-graphic-p (selected-frame))
                   (list "font" (or (internal-char-font pos)
                                    "-- none --"))
@@ -303,26 +522,31 @@ as well as widgets, buttons, overlays, and text properties."
                              (encoded (encode-coding-char char coding)))
                         (if encoded
                             (encoded-string-description encoded coding)
-                          "not encodable")))))))
+                          "not encodable"))))
+             ,@(let ((unicodedata (and unicode
+                                       (describe-char-unicode-data unicode))))
+                 (if unicodedata
+                     (cons (list "Unicode data" " ") unicodedata))))))
     (setq max-width (apply #'max (mapcar #'(lambda (x) (length (car x)))
                                         item-list)))
     (when (eq (current-buffer) (get-buffer "*Help*"))
-      (error "Can't do self inspection"))
+      (error "Can't describe char in Help buffer"))
     (with-output-to-temp-buffer "*Help*"
       (with-current-buffer standard-output
        (set-buffer-multibyte multibyte-p)
        (let ((formatter (format "%%%ds:" max-width)))
          (dolist (elt item-list)
-           (insert (format formatter (car elt)))
-           (dolist (clm (cdr elt))
-             (when (>= (+ (current-column)
-                          (or (string-match "\n" clm)
-                              (string-width clm)) 1)
-                       (frame-width))
-               (insert "\n")
-               (indent-to (1+ max-width)))
-             (insert " " clm))
-           (insert "\n")))
+           (when (cadr elt)
+             (insert (format formatter (car elt)))
+             (dolist (clm (cdr elt))
+               (when (>= (+ (current-column)
+                            (or (string-match "\n" clm)
+                                (string-width clm)) 1)
+                         (frame-width))
+                 (insert "\n")
+                 (indent-to (1+ max-width)))
+               (insert " " clm))
+             (insert "\n"))))
        (when composition
          (insert "\nComposed with the "
                  (cond
@@ -336,7 +560,7 @@ as well as widgets, buttons, overlays, and text properties."
                   (t (concat (substring composed 0 (- pos (car composition)))
                              "' and `"
                              (substring composed (- (1+ pos) (car composition))))))
-                   
+
                  "' to form `" composed "'")
          (if (nth 3 composition)
              (insert ".\n")
@@ -354,6 +578,9 @@ as well as widgets, buttons, overlays, and text properties."
            (describe-text-properties pos output))
          (describe-text-mode))))))
 
+(defalias 'describe-char-after 'describe-char)
+(make-obsolete 'describe-char-after 'describe-char "21.5")
+
 (provide 'descr-text)
 
 ;;; descr-text.el ends here