| 1 | ;;; mule.el --- basic commands for multilingual environment |
| 2 | |
| 3 | ;; Copyright (C) 1997-2011 Free Software Foundation, Inc. |
| 4 | ;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, |
| 5 | ;; 2005, 2006, 2007, 2008, 2009, 2010, 2011 |
| 6 | ;; National Institute of Advanced Industrial Science and Technology (AIST) |
| 7 | ;; Registration Number H14PRO021 |
| 8 | ;; Copyright (C) 2003 |
| 9 | ;; National Institute of Advanced Industrial Science and Technology (AIST) |
| 10 | ;; Registration Number H13PRO009 |
| 11 | |
| 12 | ;; Keywords: mule, multilingual, character set, coding system |
| 13 | |
| 14 | ;; This file is part of GNU Emacs. |
| 15 | |
| 16 | ;; GNU Emacs is free software: you can redistribute it and/or modify |
| 17 | ;; it under the terms of the GNU General Public License as published by |
| 18 | ;; the Free Software Foundation, either version 3 of the License, or |
| 19 | ;; (at your option) any later version. |
| 20 | |
| 21 | ;; GNU Emacs is distributed in the hope that it will be useful, |
| 22 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 23 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 24 | ;; GNU General Public License for more details. |
| 25 | |
| 26 | ;; You should have received a copy of the GNU General Public License |
| 27 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
| 28 | |
| 29 | ;;; Commentary: |
| 30 | |
| 31 | ;;; Code: |
| 32 | |
| 33 | (defconst mule-version "6.0 (HANACHIRUSATO)" "\ |
| 34 | Version number and name of this version of MULE (multilingual environment).") |
| 35 | |
| 36 | (defconst mule-version-date "2003.9.1" "\ |
| 37 | Distribution date of this version of MULE (multilingual environment).") |
| 38 | |
| 39 | \f |
| 40 | ;;; CHARSET |
| 41 | |
| 42 | ;; Backward compatibility code for handling emacs-mule charsets. |
| 43 | (defvar private-char-area-1-min #xF0000) |
| 44 | (defvar private-char-area-1-max #xFFFFE) |
| 45 | (defvar private-char-area-2-min #x100000) |
| 46 | (defvar private-char-area-2-max #x10FFFE) |
| 47 | |
| 48 | ;; Table of emacs-mule charsets indexed by their emacs-mule ID. |
| 49 | (defvar emacs-mule-charset-table (make-vector 256 nil)) |
| 50 | (aset emacs-mule-charset-table 0 'ascii) |
| 51 | |
| 52 | ;; Convert the argument of old-style call of define-charset to a |
| 53 | ;; property list used by the new-style. |
| 54 | ;; INFO-VECTOR is a vector of the format: |
| 55 | ;; [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE |
| 56 | ;; SHORT-NAME LONG-NAME DESCRIPTION] |
| 57 | |
| 58 | (defun convert-define-charset-argument (emacs-mule-id info-vector) |
| 59 | (let* ((dim (aref info-vector 0)) |
| 60 | (chars (aref info-vector 1)) |
| 61 | (total (if (= dim 1) chars (* chars chars))) |
| 62 | (code-space (if (= dim 1) (if (= chars 96) [32 127] [33 126]) |
| 63 | (if (= chars 96) [32 127 32 127] [33 126 33 126]))) |
| 64 | code-offset) |
| 65 | (if (integerp emacs-mule-id) |
| 66 | (or (= emacs-mule-id 0) |
| 67 | (and (>= emacs-mule-id 129) (< emacs-mule-id 256)) |
| 68 | (error "Invalid CHARSET-ID: %d" emacs-mule-id)) |
| 69 | (let (from-id to-id) |
| 70 | (if (= dim 1) (setq from-id 160 to-id 224) |
| 71 | (setq from-id 224 to-id 255)) |
| 72 | (while (and (< from-id to-id) |
| 73 | (not (aref emacs-mule-charset-table from-id))) |
| 74 | (setq from-id (1+ from-id))) |
| 75 | (if (= from-id to-id) |
| 76 | (error "No more room for the new Emacs-mule charset")) |
| 77 | (setq emacs-mule-id from-id))) |
| 78 | (if (> (- private-char-area-1-max private-char-area-1-min) total) |
| 79 | (setq code-offset private-char-area-1-min |
| 80 | private-char-area-1-min (+ private-char-area-1-min total)) |
| 81 | (if (> (- private-char-area-2-max private-char-area-2-min) total) |
| 82 | (setq code-offset private-char-area-2-min |
| 83 | private-char-area-2-min (+ private-char-area-2-min total)) |
| 84 | (error "No more space for a new charset"))) |
| 85 | (list :dimension dim |
| 86 | :code-space code-space |
| 87 | :iso-final-char (aref info-vector 4) |
| 88 | :code-offset code-offset |
| 89 | :emacs-mule-id emacs-mule-id))) |
| 90 | |
| 91 | (defun define-charset (name docstring &rest props) |
| 92 | "Define NAME (symbol) as a charset with DOCSTRING. |
| 93 | The remaining arguments must come in pairs ATTRIBUTE VALUE. ATTRIBUTE |
| 94 | may be any symbol. The following have special meanings, and one of |
| 95 | `:code-offset', `:map', `:subset', `:superset' must be specified. |
| 96 | |
| 97 | `:short-name' |
| 98 | |
| 99 | VALUE must be a short string to identify the charset. If omitted, |
| 100 | NAME is used. |
| 101 | |
| 102 | `:long-name' |
| 103 | |
| 104 | VALUE must be a string longer than `:short-name' to identify the |
| 105 | charset. If omitted, the value of the `:short-name' attribute is used. |
| 106 | |
| 107 | `:dimension' |
| 108 | |
| 109 | VALUE must be an integer 0, 1, 2, or 3, specifying the dimension of |
| 110 | code-points of the charsets. If omitted, it is calculated from the |
| 111 | value of the `:code-space' attribute. |
| 112 | |
| 113 | `:code-space' |
| 114 | |
| 115 | VALUE must be a vector of length at most 8 specifying the byte code |
| 116 | range of each dimension in this format: |
| 117 | [ MIN-1 MAX-1 MIN-2 MAX-2 ... ] |
| 118 | where MIN-N is the minimum byte value of Nth dimension of code-point, |
| 119 | MAX-N is the maximum byte value of that. |
| 120 | |
| 121 | `:min-code' |
| 122 | |
| 123 | VALUE must be an integer specifying the minimum code point of the |
| 124 | charset. If omitted, it is calculated from `:code-space'. VALUE may |
| 125 | be a cons (HIGH . LOW), where HIGH is the most significant 16 bits of |
| 126 | the code point and LOW is the least significant 16 bits. |
| 127 | |
| 128 | `:max-code' |
| 129 | |
| 130 | VALUE must be an integer specifying the maximum code point of the |
| 131 | charset. If omitted, it is calculated from `:code-space'. VALUE may |
| 132 | be a cons (HIGH . LOW), where HIGH is the most significant 16 bits of |
| 133 | the code point and LOW is the least significant 16 bits. |
| 134 | |
| 135 | `:iso-final-char' |
| 136 | |
| 137 | VALUE must be a character in the range 32 to 127 (inclusive) |
| 138 | specifying the final char of the charset for ISO-2022 encoding. If |
| 139 | omitted, the charset can't be encoded by ISO-2022 based |
| 140 | coding-systems. |
| 141 | |
| 142 | `:iso-revision-number' |
| 143 | |
| 144 | VALUE must be an integer in the range 0..63, specifying the revision |
| 145 | number of the charset for ISO-2022 encoding. |
| 146 | |
| 147 | `:emacs-mule-id' |
| 148 | |
| 149 | VALUE must be an integer of 0, 129..255. If omitted, the charset |
| 150 | can't be encoded by coding-systems of type `emacs-mule'. |
| 151 | |
| 152 | `:ascii-compatible-p' |
| 153 | |
| 154 | VALUE must be nil or t (default nil). If VALUE is t, the charset is |
| 155 | compatible with ASCII, i.e. the first 128 code points map to ASCII. |
| 156 | |
| 157 | `:supplementary-p' |
| 158 | |
| 159 | VALUE must be nil or t. If the VALUE is t, the charset is |
| 160 | supplementary, which means it is used only as a parent or a |
| 161 | subset of some other charset, or it is provided just for backward |
| 162 | compatibility. |
| 163 | |
| 164 | `:invalid-code' |
| 165 | |
| 166 | VALUE must be a nonnegative integer that can be used as an invalid |
| 167 | code point of the charset. If the minimum code is 0 and the maximum |
| 168 | code is greater than Emacs' maximum integer value, `:invalid-code' |
| 169 | should not be omitted. |
| 170 | |
| 171 | `:code-offset' |
| 172 | |
| 173 | VALUE must be an integer added to the index number of a character to |
| 174 | get the corresponding character code. |
| 175 | |
| 176 | `:map' |
| 177 | |
| 178 | VALUE must be vector or string. |
| 179 | |
| 180 | If it is a vector, the format is [ CODE-1 CHAR-1 CODE-2 CHAR-2 ... ], |
| 181 | where CODE-n is a code-point of the charset, and CHAR-n is the |
| 182 | corresponding character code. |
| 183 | |
| 184 | If it is a string, it is a name of file that contains the above |
| 185 | information. Each line of the file must be this format: |
| 186 | 0xXXX 0xYYY |
| 187 | where XXX is a hexadecimal representation of CODE-n and YYY is a |
| 188 | hexadecimal representation of CHAR-n. A line starting with `#' is a |
| 189 | comment line. |
| 190 | |
| 191 | `:subset' |
| 192 | |
| 193 | VALUE must be a list: |
| 194 | ( PARENT MIN-CODE MAX-CODE OFFSET ) |
| 195 | PARENT is a parent charset. MIN-CODE and MAX-CODE specify the range |
| 196 | of characters inherited from the parent. OFFSET is an integer value |
| 197 | to add to a code point of the parent charset to get the corresponding |
| 198 | code point of this charset. |
| 199 | |
| 200 | `:superset' |
| 201 | |
| 202 | VALUE must be a list of parent charsets. The charset inherits |
| 203 | characters from them. Each element of the list may be a cons (PARENT |
| 204 | . OFFSET), where PARENT is a parent charset, and OFFSET is an offset |
| 205 | value to add to a code point of PARENT to get the corresponding code |
| 206 | point of this charset. |
| 207 | |
| 208 | `:unify-map' |
| 209 | |
| 210 | VALUE must be vector or string. |
| 211 | |
| 212 | If it is a vector, the format is [ CODE-1 CHAR-1 CODE-2 CHAR-2 ... ], |
| 213 | where CODE-n is a code-point of the charset, and CHAR-n is the |
| 214 | corresponding Unicode character code. |
| 215 | |
| 216 | If it is a string, it is a name of file that contains the above |
| 217 | information. The file format is the same as what described for `:map' |
| 218 | attribute." |
| 219 | (when (vectorp (car props)) |
| 220 | ;; Old style code: |
| 221 | ;; (define-charset CHARSET-ID CHARSET-SYMBOL INFO-VECTOR) |
| 222 | ;; Convert the argument to make it fit with the current style. |
| 223 | (let ((vec (car props))) |
| 224 | (setq props (convert-define-charset-argument name vec) |
| 225 | name docstring |
| 226 | docstring (aref vec 8)))) |
| 227 | (let ((attrs (mapcar 'list '(:dimension |
| 228 | :code-space |
| 229 | :min-code |
| 230 | :max-code |
| 231 | :iso-final-char |
| 232 | :iso-revision-number |
| 233 | :emacs-mule-id |
| 234 | :ascii-compatible-p |
| 235 | :supplementary-p |
| 236 | :invalid-code |
| 237 | :code-offset |
| 238 | :map |
| 239 | :subset |
| 240 | :superset |
| 241 | :unify-map |
| 242 | :plist)))) |
| 243 | |
| 244 | ;; If :dimension is omitted, get the dimension from :code-space. |
| 245 | (let ((dimension (plist-get props :dimension))) |
| 246 | (or dimension |
| 247 | (let ((code-space (plist-get props :code-space))) |
| 248 | (setq dimension (if code-space (/ (length code-space) 2) 4)) |
| 249 | (setq props (plist-put props :dimension dimension))))) |
| 250 | |
| 251 | (let ((code-space (plist-get props :code-space))) |
| 252 | (or code-space |
| 253 | (let ((dimension (plist-get props :dimension))) |
| 254 | (setq code-space (make-vector 8 0)) |
| 255 | (dotimes (i dimension) |
| 256 | (aset code-space (1+ (* i 2)) #xFF)) |
| 257 | (setq props (plist-put props :code-space code-space))))) |
| 258 | |
| 259 | ;; If :emacs-mule-id is specified, update emacs-mule-charset-table. |
| 260 | (let ((emacs-mule-id (plist-get props :emacs-mule-id))) |
| 261 | (if (integerp emacs-mule-id) |
| 262 | (aset emacs-mule-charset-table emacs-mule-id name))) |
| 263 | |
| 264 | (dolist (slot attrs) |
| 265 | (setcdr slot (purecopy (plist-get props (car slot))))) |
| 266 | |
| 267 | ;; Make sure that the value of :code-space is a vector of 8 |
| 268 | ;; elements. |
| 269 | (let* ((slot (assq :code-space attrs)) |
| 270 | (val (cdr slot)) |
| 271 | (len (length val))) |
| 272 | (if (< len 8) |
| 273 | (setcdr slot |
| 274 | (vconcat val (make-vector (- 8 len) 0))))) |
| 275 | |
| 276 | ;; Add :name and :docstring properties to PROPS. |
| 277 | (setq props |
| 278 | (cons :name (cons name (cons :docstring (cons (purecopy docstring) props))))) |
| 279 | (or (plist-get props :short-name) |
| 280 | (plist-put props :short-name (symbol-name name))) |
| 281 | (or (plist-get props :long-name) |
| 282 | (plist-put props :long-name (plist-get props :short-name))) |
| 283 | (plist-put props :base name) |
| 284 | ;; We can probably get a worthwhile amount in purespace. |
| 285 | (setq props |
| 286 | (mapcar (lambda (elt) |
| 287 | (if (stringp elt) |
| 288 | (purecopy elt) |
| 289 | elt)) |
| 290 | props)) |
| 291 | (setcdr (assq :plist attrs) props) |
| 292 | |
| 293 | (apply 'define-charset-internal name (mapcar 'cdr attrs)))) |
| 294 | |
| 295 | |
| 296 | (defun load-with-code-conversion (fullname file &optional noerror nomessage) |
| 297 | "Execute a file of Lisp code named FILE whose absolute name is FULLNAME. |
| 298 | The file contents are decoded before evaluation if necessary. |
| 299 | If optional third arg NOERROR is non-nil, |
| 300 | report no error if FILE doesn't exist. |
| 301 | Print messages at start and end of loading unless |
| 302 | optional fourth arg NOMESSAGE is non-nil. |
| 303 | Return t if file exists." |
| 304 | (if (null (file-readable-p fullname)) |
| 305 | (and (null noerror) |
| 306 | (signal 'file-error (list "Cannot open load file" file))) |
| 307 | ;; Read file with code conversion, and then eval. |
| 308 | (let* ((buffer |
| 309 | ;; We can't use `generate-new-buffer' because files.el |
| 310 | ;; is not yet loaded. |
| 311 | (get-buffer-create (generate-new-buffer-name " *load*"))) |
| 312 | (load-in-progress t) |
| 313 | (source (save-match-data (string-match "\\.el\\'" fullname)))) |
| 314 | (unless nomessage |
| 315 | (if source |
| 316 | (message "Loading %s (source)..." file) |
| 317 | (message "Loading %s..." file))) |
| 318 | (when purify-flag |
| 319 | (push (purecopy file) preloaded-file-list)) |
| 320 | (unwind-protect |
| 321 | (let ((load-file-name fullname) |
| 322 | (set-auto-coding-for-load t) |
| 323 | (inhibit-file-name-operation nil)) |
| 324 | (with-current-buffer buffer |
| 325 | ;; So that we don't get completely screwed if the |
| 326 | ;; file is encoded in some complicated character set, |
| 327 | ;; read it with real decoding, as a multibyte buffer. |
| 328 | (set-buffer-multibyte t) |
| 329 | ;; Don't let deactivate-mark remain set. |
| 330 | (let (deactivate-mark) |
| 331 | (insert-file-contents fullname)) |
| 332 | ;; If the loaded file was inserted with no-conversion or |
| 333 | ;; raw-text coding system, make the buffer unibyte. |
| 334 | ;; Otherwise, eval-buffer might try to interpret random |
| 335 | ;; binary junk as multibyte characters. |
| 336 | (if (and enable-multibyte-characters |
| 337 | (or (eq (coding-system-type last-coding-system-used) |
| 338 | 'raw-text))) |
| 339 | (set-buffer-multibyte nil)) |
| 340 | ;; Make `kill-buffer' quiet. |
| 341 | (set-buffer-modified-p nil)) |
| 342 | ;; Have the original buffer current while we eval. |
| 343 | (eval-buffer buffer nil |
| 344 | ;; This is compatible with what `load' does. |
| 345 | (if purify-flag file fullname) |
| 346 | nil t)) |
| 347 | (let (kill-buffer-hook kill-buffer-query-functions) |
| 348 | (kill-buffer buffer))) |
| 349 | (do-after-load-evaluation fullname) |
| 350 | |
| 351 | (unless (or nomessage noninteractive) |
| 352 | (if source |
| 353 | (message "Loading %s (source)...done" file) |
| 354 | (message "Loading %s...done" file))) |
| 355 | t))) |
| 356 | |
| 357 | (defun charset-info (charset) |
| 358 | "Return a vector of information of CHARSET. |
| 359 | This function is provided for backward compatibility. |
| 360 | |
| 361 | The elements of the vector are: |
| 362 | CHARSET-ID, BYTES, DIMENSION, CHARS, WIDTH, DIRECTION, |
| 363 | LEADING-CODE-BASE, LEADING-CODE-EXT, |
| 364 | ISO-FINAL-CHAR, ISO-GRAPHIC-PLANE, |
| 365 | REVERSE-CHARSET, SHORT-NAME, LONG-NAME, DESCRIPTION, |
| 366 | PLIST. |
| 367 | where |
| 368 | CHARSET-ID is always 0. |
| 369 | BYTES is always 0. |
| 370 | DIMENSION is the number of bytes of a code-point of the charset: |
| 371 | 1, 2, 3, or 4. |
| 372 | CHARS is the number of characters in a dimension: |
| 373 | 94, 96, 128, or 256. |
| 374 | WIDTH is always 0. |
| 375 | DIRECTION is always 0. |
| 376 | LEADING-CODE-BASE is always 0. |
| 377 | LEADING-CODE-EXT is always 0. |
| 378 | ISO-FINAL-CHAR (character) is the final character of the |
| 379 | corresponding ISO 2022 charset. If the charset is not assigned |
| 380 | any final character, the value is -1. |
| 381 | ISO-GRAPHIC-PLANE is always 0. |
| 382 | REVERSE-CHARSET is always -1. |
| 383 | SHORT-NAME (string) is the short name to refer to the charset. |
| 384 | LONG-NAME (string) is the long name to refer to the charset |
| 385 | DESCRIPTION (string) is the description string of the charset. |
| 386 | PLIST (property list) may contain any type of information a user |
| 387 | want to put and get by functions `put-charset-property' and |
| 388 | `get-charset-property' respectively." |
| 389 | (vector 0 |
| 390 | 0 |
| 391 | (charset-dimension charset) |
| 392 | (charset-chars charset) |
| 393 | 0 |
| 394 | 0 |
| 395 | 0 |
| 396 | 0 |
| 397 | (charset-iso-final-char charset) |
| 398 | 0 |
| 399 | -1 |
| 400 | (get-charset-property charset :short-name) |
| 401 | (get-charset-property charset :short-name) |
| 402 | (charset-description charset) |
| 403 | (charset-plist charset))) |
| 404 | |
| 405 | ;; It is better not to use backquote in this file, |
| 406 | ;; because that makes a bootstrapping problem |
| 407 | ;; if you need to recompile all the Lisp files using interpreted code. |
| 408 | |
| 409 | (defun charset-id (charset) |
| 410 | "Always return 0. This is provided for backward compatibility." |
| 411 | 0) |
| 412 | (make-obsolete 'charset-id "do not use it." "23.1") |
| 413 | |
| 414 | (defmacro charset-bytes (charset) |
| 415 | "Always return 0. This is provided for backward compatibility." |
| 416 | 0) |
| 417 | (make-obsolete 'charset-bytes "do not use it." "23.1") |
| 418 | |
| 419 | (defun get-charset-property (charset propname) |
| 420 | "Return the value of CHARSET's PROPNAME property. |
| 421 | This is the last value stored with |
| 422 | (put-charset-property CHARSET PROPNAME VALUE)." |
| 423 | (plist-get (charset-plist charset) propname)) |
| 424 | |
| 425 | (defun put-charset-property (charset propname value) |
| 426 | "Set CHARSETS's PROPNAME property to value VALUE. |
| 427 | It can be retrieved with `(get-charset-property CHARSET PROPNAME)'." |
| 428 | (set-charset-plist charset |
| 429 | (plist-put (charset-plist charset) propname |
| 430 | (if (stringp value) |
| 431 | (purecopy value) |
| 432 | value)))) |
| 433 | |
| 434 | (defun charset-description (charset) |
| 435 | "Return description string of CHARSET." |
| 436 | (plist-get (charset-plist charset) :docstring)) |
| 437 | |
| 438 | (defun charset-dimension (charset) |
| 439 | "Return dimension of CHARSET." |
| 440 | (plist-get (charset-plist charset) :dimension)) |
| 441 | |
| 442 | (defun charset-chars (charset &optional dimension) |
| 443 | "Return number of characters contained in DIMENSION of CHARSET. |
| 444 | DIMENSION defaults to the first dimension." |
| 445 | (unless dimension (setq dimension 1)) |
| 446 | (let ((code-space (plist-get (charset-plist charset) :code-space))) |
| 447 | (1+ (- (aref code-space (1- (* 2 dimension))) |
| 448 | (aref code-space (- (* 2 dimension) 2)))))) |
| 449 | |
| 450 | (defun charset-iso-final-char (charset) |
| 451 | "Return ISO-2022 final character of CHARSET. |
| 452 | Return -1 if charset isn't an ISO 2022 one." |
| 453 | (or (plist-get (charset-plist charset) :iso-final-char) |
| 454 | -1)) |
| 455 | |
| 456 | (defmacro charset-short-name (charset) |
| 457 | "Return short name of CHARSET." |
| 458 | (plist-get (charset-plist charset) :short-name)) |
| 459 | |
| 460 | (defmacro charset-long-name (charset) |
| 461 | "Return long name of CHARSET." |
| 462 | (plist-get (charset-plist charset) :long-name)) |
| 463 | |
| 464 | (defun charset-list () |
| 465 | "Return list of all charsets ever defined." |
| 466 | charset-list) |
| 467 | (make-obsolete 'charset-list "use variable `charset-list'." "23.1") |
| 468 | |
| 469 | \f |
| 470 | ;;; CHARACTER |
| 471 | (define-obsolete-function-alias 'char-valid-p 'characterp "23.1") |
| 472 | |
| 473 | (defun generic-char-p (char) |
| 474 | "Always return nil. This is provided for backward compatibility." |
| 475 | nil) |
| 476 | (make-obsolete 'generic-char-p "generic characters no longer exist." "23.1") |
| 477 | |
| 478 | (defun make-char-internal (charset-id &optional code1 code2) |
| 479 | (let ((charset (aref emacs-mule-charset-table charset-id))) |
| 480 | (or charset |
| 481 | (error "Invalid Emacs-mule charset ID: %d" charset-id)) |
| 482 | (make-char charset code1 code2))) |
| 483 | \f |
| 484 | ;; Save the ASCII case table in case we need it later. Some locales |
| 485 | ;; (such as Turkish) modify the case behavior of ASCII characters, |
| 486 | ;; which can interfere with networking code that uses ASCII strings. |
| 487 | |
| 488 | (defvar ascii-case-table |
| 489 | ;; Code copied from copy-case-table to avoid requiring case-table.el |
| 490 | (let ((tbl (copy-sequence (standard-case-table))) |
| 491 | (up (char-table-extra-slot (standard-case-table) 0))) |
| 492 | (if up (set-char-table-extra-slot tbl 0 (copy-sequence up))) |
| 493 | (set-char-table-extra-slot tbl 1 nil) |
| 494 | (set-char-table-extra-slot tbl 2 nil) |
| 495 | tbl) |
| 496 | "Case table for the ASCII character set.") |
| 497 | \f |
| 498 | ;; Coding system stuff |
| 499 | |
| 500 | ;; Coding system is a symbol that has been defined by the function |
| 501 | ;; `define-coding-system'. |
| 502 | |
| 503 | (defconst coding-system-iso-2022-flags |
| 504 | '(long-form |
| 505 | ascii-at-eol |
| 506 | ascii-at-cntl |
| 507 | 7-bit |
| 508 | locking-shift |
| 509 | single-shift |
| 510 | designation |
| 511 | revision |
| 512 | direction |
| 513 | init-at-bol |
| 514 | designate-at-bol |
| 515 | safe |
| 516 | latin-extra |
| 517 | composition |
| 518 | euc-tw-shift |
| 519 | use-roman |
| 520 | use-oldjis) |
| 521 | "List of symbols that control ISO-2022 encoder/decoder. |
| 522 | |
| 523 | The value of the `:flags' attribute in the argument of the function |
| 524 | `define-coding-system' must be one of them. |
| 525 | |
| 526 | If `long-form' is specified, use a long designation sequence on |
| 527 | encoding for the charsets `japanese-jisx0208-1978', `chinese-gb2312', |
| 528 | and `japanese-jisx0208'. The long designation sequence doesn't |
| 529 | conform to ISO 2022, but is used by such coding systems as |
| 530 | `compound-text'. |
| 531 | |
| 532 | If `ascii-at-eol' is specified, designate ASCII to g0 at end of line |
| 533 | on encoding. |
| 534 | |
| 535 | If `ascii-at-cntl' is specified, designate ASCII to g0 before control |
| 536 | codes and SPC on encoding. |
| 537 | |
| 538 | If `7-bit' is specified, use 7-bit code only on encoding. |
| 539 | |
| 540 | If `locking-shift' is specified, decode locking-shift code correctly |
| 541 | on decoding, and use locking-shift to invoke a graphic element on |
| 542 | encoding. |
| 543 | |
| 544 | If `single-shift' is specified, decode single-shift code correctly on |
| 545 | decoding, and use single-shift to invoke a graphic element on encoding. |
| 546 | |
| 547 | If `designation' is specified, decode designation code correctly on |
| 548 | decoding, and use designation to designate a charset to a graphic |
| 549 | element on encoding. |
| 550 | |
| 551 | If `revision' is specified, produce an escape sequence to specify |
| 552 | revision number of a charset on encoding. Such an escape sequence is |
| 553 | always correctly decoded on decoding. |
| 554 | |
| 555 | If `direction' is specified, decode ISO6429's code for specifying |
| 556 | direction correctly, and produce the code on encoding. |
| 557 | |
| 558 | If `init-at-bol' is specified, on encoding, it is assumed that |
| 559 | invocation and designation statuses are reset at each beginning of |
| 560 | line even if `ascii-at-eol' is not specified; thus no codes for |
| 561 | resetting them are produced. |
| 562 | |
| 563 | If `safe' is specified, on encoding, characters not supported by a |
| 564 | coding are replaced with `?'. |
| 565 | |
| 566 | If `latin-extra' is specified, the code-detection routine assumes that a |
| 567 | code specified in `latin-extra-code-table' (which see) is valid. |
| 568 | |
| 569 | If `composition' is specified, an escape sequence to specify |
| 570 | composition sequence is correctly decoded on decoding, and is produced |
| 571 | on encoding. |
| 572 | |
| 573 | If `euc-tw-shift' is specified, the EUC-TW specific shifting code is |
| 574 | correctly decoded on decoding, and is produced on encoding. |
| 575 | |
| 576 | If `use-roman' is specified, JIS0201-1976-Roman is designated instead |
| 577 | of ASCII. |
| 578 | |
| 579 | If `use-oldjis' is specified, JIS0208-1976 is designated instead of |
| 580 | JIS0208-1983.") |
| 581 | |
| 582 | (defun define-coding-system (name docstring &rest props) |
| 583 | "Define NAME (a symbol) as a coding system with DOCSTRING and attributes. |
| 584 | The remaining arguments must come in pairs ATTRIBUTE VALUE. ATTRIBUTE |
| 585 | may be any symbol. |
| 586 | |
| 587 | The following attributes have special meanings. Those labeled as |
| 588 | \"(required)\" should not be omitted. |
| 589 | |
| 590 | `:mnemonic' (required) |
| 591 | |
| 592 | VALUE is a character to display on mode line for the coding system. |
| 593 | |
| 594 | `:coding-type' (required) |
| 595 | |
| 596 | VALUE must be one of `charset', `utf-8', `utf-16', `iso-2022', |
| 597 | `emacs-mule', `shift-jis', `ccl', `raw-text', `undecided'. |
| 598 | |
| 599 | `:eol-type' |
| 600 | |
| 601 | VALUE is the EOL (end-of-line) format of the coding system. It must be |
| 602 | one of `unix', `dos', `mac'. The symbol `unix' means Unix-like EOL |
| 603 | \(i.e. single LF), `dos' means DOS-like EOL \(i.e. sequence of CR LF), |
| 604 | and `mac' means Mac-like EOL \(i.e. single CR). If omitted, Emacs |
| 605 | detects the EOL format automatically when decoding. |
| 606 | |
| 607 | `:charset-list' |
| 608 | |
| 609 | VALUE must be a list of charsets supported by the coding system. On |
| 610 | encoding by the coding system, if a character belongs to multiple |
| 611 | charsets in the list, a charset that comes earlier in the list is |
| 612 | selected. If `:coding-type' is `iso-2022', VALUE may be `iso-2022', |
| 613 | which indicates that the coding system supports all ISO-2022 based |
| 614 | charsets. If `:coding-type' is `emacs-mule', VALUE may be |
| 615 | `emacs-mule', which indicates that the coding system supports all |
| 616 | charsets that have the `:emacs-mule-id' property. |
| 617 | |
| 618 | `:ascii-compatible-p' |
| 619 | |
| 620 | If VALUE is non-nil, the coding system decodes all 7-bit bytes into |
| 621 | the corresponding ASCII characters, and encodes all ASCII characters |
| 622 | back to the corresponding 7-bit bytes. VALUE defaults to nil. |
| 623 | |
| 624 | `:decode-translation-table' |
| 625 | |
| 626 | VALUE must be a translation table to use on decoding. |
| 627 | |
| 628 | `:encode-translation-table' |
| 629 | |
| 630 | VALUE must be a translation table to use on encoding. |
| 631 | |
| 632 | `:post-read-conversion' |
| 633 | |
| 634 | VALUE must be a function to call after some text is inserted and |
| 635 | decoded by the coding system itself and before any functions in |
| 636 | `after-insert-functions' are called. This function is passed one |
| 637 | argument; the number of characters in the text to convert, with |
| 638 | point at the start of the text. The function should leave point |
| 639 | the same, and return the new character count. |
| 640 | |
| 641 | `:pre-write-conversion' |
| 642 | |
| 643 | VALUE must be a function to call after all functions in |
| 644 | `write-region-annotate-functions' and `buffer-file-format' are |
| 645 | called, and before the text is encoded by the coding system |
| 646 | itself. This function should convert the whole text in the |
| 647 | current buffer. For backward compatibility, this function is |
| 648 | passed two arguments which can be ignored. |
| 649 | |
| 650 | `:default-char' |
| 651 | |
| 652 | VALUE must be a character. On encoding, a character not supported by |
| 653 | the coding system is replaced with VALUE. |
| 654 | |
| 655 | `:for-unibyte' |
| 656 | |
| 657 | VALUE non-nil means that visiting a file with the coding system |
| 658 | results in a unibyte buffer. |
| 659 | |
| 660 | `:mime-charset' |
| 661 | |
| 662 | VALUE must be a symbol whose name is that of a MIME charset converted |
| 663 | to lower case. |
| 664 | |
| 665 | `:mime-text-unsuitable' |
| 666 | |
| 667 | VALUE non-nil means the `:mime-charset' property names a charset which |
| 668 | is unsuitable for the top-level media type \"text\". |
| 669 | |
| 670 | `:flags' |
| 671 | |
| 672 | VALUE must be a list of symbols that control the ISO-2022 converter. |
| 673 | Each must be a member of the list `coding-system-iso-2022-flags' |
| 674 | \(which see). This attribute has a meaning only when `:coding-type' |
| 675 | is `iso-2022'. |
| 676 | |
| 677 | `:designation' |
| 678 | |
| 679 | VALUE must be a vector [G0-USAGE G1-USAGE G2-USAGE G3-USAGE]. |
| 680 | GN-USAGE specifies the usage of graphic register GN as follows. |
| 681 | |
| 682 | If it is nil, no charset can be designated to GN. |
| 683 | |
| 684 | If it is a charset, the charset is initially designated to GN, and |
| 685 | never used by the other charsets. |
| 686 | |
| 687 | If it is a list, the elements must be charsets, nil, 94, or 96. GN |
| 688 | can be used by all the listed charsets. If the list contains 94, any |
| 689 | iso-2022 charset whose code-space ranges are 94 long can be designated |
| 690 | to GN. If the list contains 96, any charsets whose whose ranges are |
| 691 | 96 long can be designated to GN. If the first element is a charset, |
| 692 | that charset is initially designated to GN. |
| 693 | |
| 694 | This attribute has a meaning only when `:coding-type' is `iso-2022'. |
| 695 | |
| 696 | `:bom' |
| 697 | |
| 698 | This attributes specifies whether the coding system uses a `byte order |
| 699 | mark'. VALUE must be nil, t, or cons of coding systems whose |
| 700 | `:coding-type' is `utf-16' or `utf-8'. |
| 701 | |
| 702 | If the value is nil, on decoding, don't treat the first two-byte as |
| 703 | BOM, and on encoding, don't produce BOM bytes. |
| 704 | |
| 705 | If the value is t, on decoding, skip the first two-byte as BOM, and on |
| 706 | encoding, produce BOM bytes according to the value of `:endian'. |
| 707 | |
| 708 | If the value is cons, on decoding, check the first two-byte. If they |
| 709 | are 0xFE 0xFF, use the car part coding system of the value. If they |
| 710 | are 0xFF 0xFE, use the cdr part coding system of the value. |
| 711 | Otherwise, treat them as bytes for a normal character. On encoding, |
| 712 | produce BOM bytes according to the value of `:endian'. |
| 713 | |
| 714 | This attribute has a meaning only when `:coding-type' is `utf-16' or |
| 715 | `utf-8'. |
| 716 | |
| 717 | `:endian' |
| 718 | |
| 719 | VALUE must be `big' or `little' specifying big-endian and |
| 720 | little-endian respectively. The default value is `big'. |
| 721 | |
| 722 | This attribute has a meaning only when `:coding-type' is `utf-16'. |
| 723 | |
| 724 | `:ccl-decoder' |
| 725 | |
| 726 | VALUE is a symbol representing the registered CCL program used for |
| 727 | decoding. This attribute has a meaning only when `:coding-type' is |
| 728 | `ccl'. |
| 729 | |
| 730 | `:ccl-encoder' |
| 731 | |
| 732 | VALUE is a symbol representing the registered CCL program used for |
| 733 | encoding. This attribute has a meaning only when `:coding-type' is |
| 734 | `ccl'." |
| 735 | (let* ((common-attrs (mapcar 'list |
| 736 | '(:mnemonic |
| 737 | :coding-type |
| 738 | :charset-list |
| 739 | :ascii-compatible-p |
| 740 | :decode-translation-table |
| 741 | :encode-translation-table |
| 742 | :post-read-conversion |
| 743 | :pre-write-conversion |
| 744 | :default-char |
| 745 | :for-unibyte |
| 746 | :plist |
| 747 | :eol-type))) |
| 748 | (coding-type (plist-get props :coding-type)) |
| 749 | (spec-attrs (mapcar 'list |
| 750 | (cond ((eq coding-type 'iso-2022) |
| 751 | '(:initial |
| 752 | :reg-usage |
| 753 | :request |
| 754 | :flags)) |
| 755 | ((eq coding-type 'utf-8) |
| 756 | '(:bom)) |
| 757 | ((eq coding-type 'utf-16) |
| 758 | '(:bom |
| 759 | :endian)) |
| 760 | ((eq coding-type 'ccl) |
| 761 | '(:ccl-decoder |
| 762 | :ccl-encoder |
| 763 | :valids)))))) |
| 764 | |
| 765 | (dolist (slot common-attrs) |
| 766 | (setcdr slot (plist-get props (car slot)))) |
| 767 | |
| 768 | (dolist (slot spec-attrs) |
| 769 | (setcdr slot (plist-get props (car slot)))) |
| 770 | |
| 771 | (if (eq coding-type 'iso-2022) |
| 772 | (let ((designation (plist-get props :designation)) |
| 773 | (flags (plist-get props :flags)) |
| 774 | (initial (make-vector 4 nil)) |
| 775 | (reg-usage (cons 4 4)) |
| 776 | request elt) |
| 777 | (dotimes (i 4) |
| 778 | (setq elt (aref designation i)) |
| 779 | (cond ((charsetp elt) |
| 780 | (aset initial i elt) |
| 781 | (setq request (cons (cons elt i) request))) |
| 782 | ((consp elt) |
| 783 | (aset initial i (car elt)) |
| 784 | (if (charsetp (car elt)) |
| 785 | (setq request (cons (cons (car elt) i) request))) |
| 786 | (dolist (e (cdr elt)) |
| 787 | (cond ((charsetp e) |
| 788 | (setq request (cons (cons e i) request))) |
| 789 | ((eq e 94) |
| 790 | (setcar reg-usage i)) |
| 791 | ((eq e 96) |
| 792 | (setcdr reg-usage i)) |
| 793 | ((eq e t) |
| 794 | (setcar reg-usage i) |
| 795 | (setcdr reg-usage i))))))) |
| 796 | (setcdr (assq :initial spec-attrs) initial) |
| 797 | (setcdr (assq :reg-usage spec-attrs) reg-usage) |
| 798 | (setcdr (assq :request spec-attrs) request) |
| 799 | |
| 800 | ;; Change :flags value from a list to a bit-mask. |
| 801 | (let ((bits 0) |
| 802 | (i 0)) |
| 803 | (dolist (elt coding-system-iso-2022-flags) |
| 804 | (if (memq elt flags) |
| 805 | (setq bits (logior bits (lsh 1 i)))) |
| 806 | (setq i (1+ i))) |
| 807 | (setcdr (assq :flags spec-attrs) bits)))) |
| 808 | |
| 809 | ;; Add :name and :docstring properties to PROPS. |
| 810 | (setq props |
| 811 | (cons :name (cons name (cons :docstring (cons (purecopy docstring) |
| 812 | props))))) |
| 813 | (setcdr (assq :plist common-attrs) props) |
| 814 | (apply 'define-coding-system-internal |
| 815 | name (mapcar 'cdr (append common-attrs spec-attrs))))) |
| 816 | |
| 817 | (defun coding-system-doc-string (coding-system) |
| 818 | "Return the documentation string for CODING-SYSTEM." |
| 819 | (plist-get (coding-system-plist coding-system) :docstring)) |
| 820 | |
| 821 | (defun coding-system-mnemonic (coding-system) |
| 822 | "Return the mnemonic character of CODING-SYSTEM. |
| 823 | The mnemonic character of a coding system is used in mode line to |
| 824 | indicate the coding system. If CODING-SYSTEM is nil, return ?=." |
| 825 | (plist-get (coding-system-plist coding-system) :mnemonic)) |
| 826 | |
| 827 | (defun coding-system-type (coding-system) |
| 828 | "Return the coding type of CODING-SYSTEM. |
| 829 | A coding type is a symbol indicating the encoding method of CODING-SYSTEM. |
| 830 | See the function `define-coding-system' for more detail." |
| 831 | (plist-get (coding-system-plist coding-system) :coding-type)) |
| 832 | |
| 833 | (defun coding-system-charset-list (coding-system) |
| 834 | "Return list of charsets supported by CODING-SYSTEM. |
| 835 | If CODING-SYSTEM supports all ISO-2022 charsets, return `iso-2022'. |
| 836 | If CODING-SYSTEM supports all emacs-mule charsets, return `emacs-mule'." |
| 837 | (plist-get (coding-system-plist coding-system) :charset-list)) |
| 838 | |
| 839 | (defun coding-system-category (coding-system) |
| 840 | "Return a category symbol of CODING-SYSTEM." |
| 841 | (plist-get (coding-system-plist coding-system) :category)) |
| 842 | |
| 843 | (defun coding-system-get (coding-system prop) |
| 844 | "Extract a value from CODING-SYSTEM's property list for property PROP. |
| 845 | For compatibility with Emacs 20/21, this accepts old-style symbols |
| 846 | like `mime-charset' as well as the current style like `:mime-charset'." |
| 847 | (or (plist-get (coding-system-plist coding-system) prop) |
| 848 | (if (not (keywordp prop)) |
| 849 | ;; For backward compatibility. |
| 850 | (if (eq prop 'ascii-incompatible) |
| 851 | (not (plist-get (coding-system-plist coding-system) |
| 852 | :ascii-compatible-p)) |
| 853 | (plist-get (coding-system-plist coding-system) |
| 854 | (intern (concat ":" (symbol-name prop)))))))) |
| 855 | |
| 856 | (defun coding-system-eol-type-mnemonic (coding-system) |
| 857 | "Return the string indicating end-of-line format of CODING-SYSTEM." |
| 858 | (let* ((eol-type (coding-system-eol-type coding-system)) |
| 859 | (val (cond ((eq eol-type 0) eol-mnemonic-unix) |
| 860 | ((eq eol-type 1) eol-mnemonic-dos) |
| 861 | ((eq eol-type 2) eol-mnemonic-mac) |
| 862 | (t eol-mnemonic-undecided)))) |
| 863 | (if (stringp val) |
| 864 | val |
| 865 | (char-to-string val)))) |
| 866 | |
| 867 | (defun coding-system-lessp (x y) |
| 868 | (cond ((eq x 'no-conversion) t) |
| 869 | ((eq y 'no-conversion) nil) |
| 870 | ((eq x 'emacs-mule) t) |
| 871 | ((eq y 'emacs-mule) nil) |
| 872 | ((eq x 'undecided) t) |
| 873 | ((eq y 'undecided) nil) |
| 874 | (t (let ((c1 (coding-system-mnemonic x)) |
| 875 | (c2 (coding-system-mnemonic y))) |
| 876 | (or (< (downcase c1) (downcase c2)) |
| 877 | (and (not (> (downcase c1) (downcase c2))) |
| 878 | (< c1 c2))))))) |
| 879 | |
| 880 | (defun coding-system-equal (coding-system-1 coding-system-2) |
| 881 | "Return t if and only if CODING-SYSTEM-1 and CODING-SYSTEM-2 are identical. |
| 882 | Two coding systems are identical if both symbols are equal |
| 883 | or one is an alias of the other." |
| 884 | (or (eq coding-system-1 coding-system-2) |
| 885 | (and (equal (coding-system-plist coding-system-1) |
| 886 | (coding-system-plist coding-system-2)) |
| 887 | (let ((eol-type-1 (coding-system-eol-type coding-system-1)) |
| 888 | (eol-type-2 (coding-system-eol-type coding-system-2))) |
| 889 | (or (eq eol-type-1 eol-type-2) |
| 890 | (and (vectorp eol-type-1) (vectorp eol-type-2))))))) |
| 891 | |
| 892 | (defun add-to-coding-system-list (coding-system) |
| 893 | "Add CODING-SYSTEM to `coding-system-list' while keeping it sorted." |
| 894 | (if (or (null coding-system-list) |
| 895 | (coding-system-lessp coding-system (car coding-system-list))) |
| 896 | (setq coding-system-list (cons coding-system coding-system-list)) |
| 897 | (let ((len (length coding-system-list)) |
| 898 | mid (tem coding-system-list)) |
| 899 | (while (> len 1) |
| 900 | (setq mid (nthcdr (/ len 2) tem)) |
| 901 | (if (coding-system-lessp (car mid) coding-system) |
| 902 | (setq tem mid |
| 903 | len (- len (/ len 2))) |
| 904 | (setq len (/ len 2)))) |
| 905 | (setcdr tem (cons coding-system (cdr tem)))))) |
| 906 | |
| 907 | (defun coding-system-list (&optional base-only) |
| 908 | "Return a list of all existing non-subsidiary coding systems. |
| 909 | If optional arg BASE-ONLY is non-nil, only base coding systems are |
| 910 | listed. The value doesn't include subsidiary coding systems which are |
| 911 | made from bases and aliases automatically for various end-of-line |
| 912 | formats (e.g. iso-latin-1-unix, koi8-r-dos)." |
| 913 | (let ((codings nil)) |
| 914 | (dolist (coding coding-system-list) |
| 915 | (if (eq (coding-system-base coding) coding) |
| 916 | (if base-only |
| 917 | (setq codings (cons coding codings)) |
| 918 | (dolist (alias (coding-system-aliases coding)) |
| 919 | (setq codings (cons alias codings)))))) |
| 920 | codings)) |
| 921 | |
| 922 | (defconst char-coding-system-table nil |
| 923 | "It exists just for backward compatibility, and the value is always nil.") |
| 924 | (make-obsolete-variable 'char-coding-system-table nil "23.1") |
| 925 | |
| 926 | (defun transform-make-coding-system-args (name type &optional doc-string props) |
| 927 | "For internal use only. |
| 928 | Transform XEmacs style args for `make-coding-system' to Emacs style. |
| 929 | Value is a list of transformed arguments." |
| 930 | (let ((mnemonic (string-to-char (or (plist-get props 'mnemonic) "?"))) |
| 931 | (eol-type (plist-get props 'eol-type)) |
| 932 | properties tmp) |
| 933 | (cond |
| 934 | ((eq eol-type 'lf) (setq eol-type 'unix)) |
| 935 | ((eq eol-type 'crlf) (setq eol-type 'dos)) |
| 936 | ((eq eol-type 'cr) (setq eol-type 'mac))) |
| 937 | (if (setq tmp (plist-get props 'post-read-conversion)) |
| 938 | (setq properties (plist-put properties 'post-read-conversion tmp))) |
| 939 | (if (setq tmp (plist-get props 'pre-write-conversion)) |
| 940 | (setq properties (plist-put properties 'pre-write-conversion tmp))) |
| 941 | (cond |
| 942 | ((eq type 'shift-jis) |
| 943 | `(,name 1 ,mnemonic ,doc-string () ,properties ,eol-type)) |
| 944 | ((eq type 'iso2022) ; This is not perfect. |
| 945 | (if (plist-get props 'escape-quoted) |
| 946 | (error "escape-quoted is not supported: %S" |
| 947 | `(,name ,type ,doc-string ,props))) |
| 948 | (let ((g0 (plist-get props 'charset-g0)) |
| 949 | (g1 (plist-get props 'charset-g1)) |
| 950 | (g2 (plist-get props 'charset-g2)) |
| 951 | (g3 (plist-get props 'charset-g3)) |
| 952 | (use-roman |
| 953 | (and |
| 954 | (eq (cadr (assoc 'latin-jisx0201 |
| 955 | (plist-get props 'input-charset-conversion))) |
| 956 | 'ascii) |
| 957 | (eq (cadr (assoc 'ascii |
| 958 | (plist-get props 'output-charset-conversion))) |
| 959 | 'latin-jisx0201))) |
| 960 | (use-oldjis |
| 961 | (and |
| 962 | (eq (cadr (assoc 'japanese-jisx0208-1978 |
| 963 | (plist-get props 'input-charset-conversion))) |
| 964 | 'japanese-jisx0208) |
| 965 | (eq (cadr (assoc 'japanese-jisx0208 |
| 966 | (plist-get props 'output-charset-conversion))) |
| 967 | 'japanese-jisx0208-1978)))) |
| 968 | (if (charsetp g0) |
| 969 | (if (plist-get props 'force-g0-on-output) |
| 970 | (setq g0 `(nil ,g0)) |
| 971 | (setq g0 `(,g0 t)))) |
| 972 | (if (charsetp g1) |
| 973 | (if (plist-get props 'force-g1-on-output) |
| 974 | (setq g1 `(nil ,g1)) |
| 975 | (setq g1 `(,g1 t)))) |
| 976 | (if (charsetp g2) |
| 977 | (if (plist-get props 'force-g2-on-output) |
| 978 | (setq g2 `(nil ,g2)) |
| 979 | (setq g2 `(,g2 t)))) |
| 980 | (if (charsetp g3) |
| 981 | (if (plist-get props 'force-g3-on-output) |
| 982 | (setq g3 `(nil ,g3)) |
| 983 | (setq g3 `(,g3 t)))) |
| 984 | `(,name 2 ,mnemonic ,doc-string |
| 985 | (,g0 ,g1 ,g2 ,g3 |
| 986 | ,(plist-get props 'short) |
| 987 | ,(not (plist-get props 'no-ascii-eol)) |
| 988 | ,(not (plist-get props 'no-ascii-cntl)) |
| 989 | ,(plist-get props 'seven) |
| 990 | t |
| 991 | ,(not (plist-get props 'lock-shift)) |
| 992 | ,use-roman |
| 993 | ,use-oldjis |
| 994 | ,(plist-get props 'no-iso6429) |
| 995 | nil nil nil nil) |
| 996 | ,properties ,eol-type))) |
| 997 | ((eq type 'big5) |
| 998 | `(,name 3 ,mnemonic ,doc-string () ,properties ,eol-type)) |
| 999 | ((eq type 'ccl) |
| 1000 | `(,name 4 ,mnemonic ,doc-string |
| 1001 | (,(plist-get props 'decode) . ,(plist-get props 'encode)) |
| 1002 | ,properties ,eol-type)) |
| 1003 | (t |
| 1004 | (error "unsupported XEmacs style make-coding-style arguments: %S" |
| 1005 | `(,name ,type ,doc-string ,props)))))) |
| 1006 | |
| 1007 | (defun make-coding-system (coding-system type mnemonic doc-string |
| 1008 | &optional |
| 1009 | flags |
| 1010 | properties |
| 1011 | eol-type) |
| 1012 | "Define a new coding system CODING-SYSTEM (symbol). |
| 1013 | This function is provided for backward compatibility." |
| 1014 | ;; For compatibility with XEmacs, we check the type of TYPE. If it |
| 1015 | ;; is a symbol, perhaps, this function is called with XEmacs-style |
| 1016 | ;; arguments. Here, try to transform that kind of arguments to |
| 1017 | ;; Emacs style. |
| 1018 | (if (symbolp type) |
| 1019 | (let ((args (transform-make-coding-system-args coding-system type |
| 1020 | mnemonic doc-string))) |
| 1021 | (setq coding-system (car args) |
| 1022 | type (nth 1 args) |
| 1023 | mnemonic (nth 2 args) |
| 1024 | doc-string (nth 3 args) |
| 1025 | flags (nth 4 args) |
| 1026 | properties (nth 5 args) |
| 1027 | eol-type (nth 6 args)))) |
| 1028 | |
| 1029 | (setq type |
| 1030 | (cond ((eq type 0) 'emacs-mule) |
| 1031 | ((eq type 1) 'shift-jis) |
| 1032 | ((eq type 2) 'iso2022) |
| 1033 | ((eq type 3) 'big5) |
| 1034 | ((eq type 4) 'ccl) |
| 1035 | ((eq type 5) 'raw-text) |
| 1036 | (t |
| 1037 | (error "Invalid coding system type: %s" type)))) |
| 1038 | |
| 1039 | (setq properties |
| 1040 | (let ((plist nil) key) |
| 1041 | (dolist (elt properties) |
| 1042 | (setq key (car elt)) |
| 1043 | (cond ((eq key 'post-read-conversion) |
| 1044 | (setq key :post-read-conversion)) |
| 1045 | ((eq key 'pre-write-conversion) |
| 1046 | (setq key :pre-write-conversion)) |
| 1047 | ((eq key 'translation-table-for-decode) |
| 1048 | (setq key :decode-translation-table)) |
| 1049 | ((eq key 'translation-table-for-encode) |
| 1050 | (setq key :encode-translation-table)) |
| 1051 | ((eq key 'safe-charsets) |
| 1052 | (setq key :charset-list)) |
| 1053 | ((eq key 'mime-charset) |
| 1054 | (setq key :mime-charset)) |
| 1055 | ((eq key 'valid-codes) |
| 1056 | (setq key :valids))) |
| 1057 | (setq plist (plist-put plist key (cdr elt)))) |
| 1058 | plist)) |
| 1059 | (setq properties (plist-put properties :mnemonic mnemonic)) |
| 1060 | (plist-put properties :coding-type type) |
| 1061 | (cond ((eq eol-type 0) (setq eol-type 'unix)) |
| 1062 | ((eq eol-type 1) (setq eol-type 'dos)) |
| 1063 | ((eq eol-type 2) (setq eol-type 'mac)) |
| 1064 | ((vectorp eol-type) (setq eol-type nil))) |
| 1065 | (plist-put properties :eol-type eol-type) |
| 1066 | |
| 1067 | (cond |
| 1068 | ((eq type 'iso2022) |
| 1069 | (plist-put properties :flags |
| 1070 | (list (and (or (consp (nth 0 flags)) |
| 1071 | (consp (nth 1 flags)) |
| 1072 | (consp (nth 2 flags)) |
| 1073 | (consp (nth 3 flags))) 'designation) |
| 1074 | (or (nth 4 flags) 'long-form) |
| 1075 | (and (nth 5 flags) 'ascii-at-eol) |
| 1076 | (and (nth 6 flags) 'ascii-at-cntl) |
| 1077 | (and (nth 7 flags) '7-bit) |
| 1078 | (and (nth 8 flags) 'locking-shift) |
| 1079 | (and (nth 9 flags) 'single-shift) |
| 1080 | (and (nth 10 flags) 'use-roman) |
| 1081 | (and (nth 11 flags) 'use-oldjis) |
| 1082 | (or (nth 12 flags) 'direction) |
| 1083 | (and (nth 13 flags) 'init-at-bol) |
| 1084 | (and (nth 14 flags) 'designate-at-bol) |
| 1085 | (and (nth 15 flags) 'safe) |
| 1086 | (and (nth 16 flags) 'latin-extra))) |
| 1087 | (plist-put properties :designation |
| 1088 | (let ((vec (make-vector 4 nil))) |
| 1089 | (dotimes (i 4) |
| 1090 | (let ((spec (nth i flags))) |
| 1091 | (if (eq spec t) |
| 1092 | (aset vec i '(94 96)) |
| 1093 | (if (consp spec) |
| 1094 | (progn |
| 1095 | (if (memq t spec) |
| 1096 | (setq spec (append (delq t spec) '(94 96)))) |
| 1097 | (aset vec i spec)))))) |
| 1098 | vec))) |
| 1099 | |
| 1100 | ((eq type 'ccl) |
| 1101 | (plist-put properties :ccl-decoder (car flags)) |
| 1102 | (plist-put properties :ccl-encoder (cdr flags)))) |
| 1103 | |
| 1104 | (apply 'define-coding-system coding-system doc-string properties)) |
| 1105 | |
| 1106 | (make-obsolete 'make-coding-system 'define-coding-system "23.1") |
| 1107 | |
| 1108 | (defun merge-coding-systems (first second) |
| 1109 | "Fill in any unspecified aspects of coding system FIRST from SECOND. |
| 1110 | Return the resulting coding system." |
| 1111 | (let ((base (coding-system-base second)) |
| 1112 | (eol (coding-system-eol-type second))) |
| 1113 | ;; If FIRST doesn't specify text conversion, merge with that of SECOND. |
| 1114 | (if (eq (coding-system-base first) 'undecided) |
| 1115 | (setq first (coding-system-change-text-conversion first base))) |
| 1116 | ;; If FIRST doesn't specify eol conversion, merge with that of SECOND. |
| 1117 | (if (and (vectorp (coding-system-eol-type first)) |
| 1118 | (numberp eol) (>= eol 0) (<= eol 2)) |
| 1119 | (setq first (coding-system-change-eol-conversion |
| 1120 | first eol))) |
| 1121 | first)) |
| 1122 | |
| 1123 | (defun autoload-coding-system (symbol form) |
| 1124 | "Define SYMBOL as a coding-system that is defined on demand. |
| 1125 | |
| 1126 | FORM is a form to evaluate to define the coding-system." |
| 1127 | (put symbol 'coding-system-define-form form) |
| 1128 | (setq coding-system-alist (cons (list (symbol-name symbol)) |
| 1129 | coding-system-alist)) |
| 1130 | (dolist (elt '("-unix" "-dos" "-mac")) |
| 1131 | (let ((name (concat (symbol-name symbol) elt))) |
| 1132 | (put (intern name) 'coding-system-define-form form) |
| 1133 | (setq coding-system-alist (cons (list name) coding-system-alist))))) |
| 1134 | |
| 1135 | ;; This variable is set in these three cases: |
| 1136 | ;; (1) A file is read by a coding system specified explicitly. |
| 1137 | ;; after-insert-file-set-coding sets the car of this value to |
| 1138 | ;; coding-system-for-read, and sets the cdr to nil. |
| 1139 | ;; (2) A buffer is saved. |
| 1140 | ;; After writing, basic-save-buffer-1 sets the car of this value |
| 1141 | ;; to last-coding-system-used. |
| 1142 | ;; (3) set-buffer-file-coding-system is called. |
| 1143 | ;; The cdr of this value is set to the specified coding system. |
| 1144 | ;; This variable is used for decoding in revert-buffer and encoding in |
| 1145 | ;; select-safe-coding-system. |
| 1146 | (defvar buffer-file-coding-system-explicit nil |
| 1147 | "The file coding system explicitly specified for the current buffer. |
| 1148 | The value is a cons of coding systems for reading (decoding) and |
| 1149 | writing (encoding). |
| 1150 | Internal use only.") |
| 1151 | (make-variable-buffer-local 'buffer-file-coding-system-explicit) |
| 1152 | (put 'buffer-file-coding-system-explicit 'permanent-local t) |
| 1153 | |
| 1154 | (defun read-buffer-file-coding-system () |
| 1155 | (let* ((bcss (find-coding-systems-region (point-min) (point-max))) |
| 1156 | (css-table |
| 1157 | (unless (equal bcss '(undecided)) |
| 1158 | (append '("dos" "unix" "mac") |
| 1159 | (delq nil (mapcar (lambda (cs) |
| 1160 | (if (memq (coding-system-base cs) bcss) |
| 1161 | (symbol-name cs))) |
| 1162 | coding-system-list))))) |
| 1163 | (combined-table |
| 1164 | (if css-table |
| 1165 | (completion-table-in-turn css-table coding-system-alist) |
| 1166 | coding-system-alist)) |
| 1167 | (auto-cs |
| 1168 | (unless find-file-literally |
| 1169 | (save-excursion |
| 1170 | (save-restriction |
| 1171 | (widen) |
| 1172 | (goto-char (point-min)) |
| 1173 | (funcall set-auto-coding-function |
| 1174 | (or buffer-file-name "") (buffer-size)))))) |
| 1175 | (preferred |
| 1176 | (let ((bfcs (default-value 'buffer-file-coding-system))) |
| 1177 | (cons (and (or (equal bcss '(undecided)) |
| 1178 | (memq (coding-system-base bfcs) bcss)) |
| 1179 | bfcs) |
| 1180 | (mapcar (lambda (cs) |
| 1181 | (and (coding-system-p cs) |
| 1182 | (coding-system-get cs :mime-charset) |
| 1183 | (or (equal bcss '(undecided)) |
| 1184 | (memq (coding-system-base cs) bcss)) |
| 1185 | cs)) |
| 1186 | (coding-system-priority-list))))) |
| 1187 | (default |
| 1188 | (let ((current (coding-system-base buffer-file-coding-system))) |
| 1189 | ;; Generally use as a default the first preferred coding-system |
| 1190 | ;; different from the current coding-system, except for |
| 1191 | ;; the case of auto-cs since choosing anything else is asking |
| 1192 | ;; for trouble (would lead to using a different coding |
| 1193 | ;; system than specified in the coding tag). |
| 1194 | (or auto-cs |
| 1195 | (car (delq nil |
| 1196 | (mapcar (lambda (cs) |
| 1197 | (if (eq current (coding-system-base cs)) |
| 1198 | nil |
| 1199 | cs)) |
| 1200 | preferred)))))) |
| 1201 | (completion-ignore-case t) |
| 1202 | (completion-pcm--delim-wild-regex ; Let "u8" complete to "utf-8". |
| 1203 | (concat completion-pcm--delim-wild-regex |
| 1204 | "\\|\\([[:alpha:]]\\)[[:digit:]]")) |
| 1205 | (cs (completing-read |
| 1206 | (format "Coding system for saving file (default %s): " default) |
| 1207 | combined-table |
| 1208 | nil t nil 'coding-system-history |
| 1209 | (if default (symbol-name default))))) |
| 1210 | (unless (zerop (length cs)) (intern cs)))) |
| 1211 | |
| 1212 | (defun set-buffer-file-coding-system (coding-system &optional force nomodify) |
| 1213 | "Set the file coding-system of the current buffer to CODING-SYSTEM. |
| 1214 | This means that when you save the buffer, it will be converted |
| 1215 | according to CODING-SYSTEM. For a list of possible values of |
| 1216 | CODING-SYSTEM, use \\[list-coding-systems]. |
| 1217 | |
| 1218 | If CODING-SYSTEM leaves the text conversion unspecified, or if it leaves |
| 1219 | the end-of-line conversion unspecified, FORCE controls what to do. |
| 1220 | If FORCE is nil, get the unspecified aspect (or aspects) from the buffer's |
| 1221 | previous `buffer-file-coding-system' value (if it is specified there). |
| 1222 | Otherwise, leave it unspecified. |
| 1223 | |
| 1224 | This marks the buffer modified so that the succeeding \\[save-buffer] |
| 1225 | surely saves the buffer with CODING-SYSTEM. From a program, if you |
| 1226 | don't want to mark the buffer modified, specify t for NOMODIFY. |
| 1227 | If you know exactly what coding system you want to use, |
| 1228 | just set the variable `buffer-file-coding-system' directly." |
| 1229 | (interactive |
| 1230 | (list (read-buffer-file-coding-system) |
| 1231 | current-prefix-arg)) |
| 1232 | (check-coding-system coding-system) |
| 1233 | (if (and coding-system buffer-file-coding-system (null force)) |
| 1234 | (setq coding-system |
| 1235 | (merge-coding-systems coding-system buffer-file-coding-system))) |
| 1236 | (when (called-interactively-p 'interactive) |
| 1237 | ;; Check whether save would succeed, and jump to the offending char(s) |
| 1238 | ;; if not. |
| 1239 | (let ((css (find-coding-systems-region (point-min) (point-max)))) |
| 1240 | (unless (or (eq (car css) 'undecided) |
| 1241 | (memq (coding-system-base coding-system) css)) |
| 1242 | (setq coding-system (select-safe-coding-system-interactively |
| 1243 | (point-min) (point-max) css |
| 1244 | (list coding-system)))))) |
| 1245 | (setq buffer-file-coding-system coding-system) |
| 1246 | (if buffer-file-coding-system-explicit |
| 1247 | (setcdr buffer-file-coding-system-explicit coding-system) |
| 1248 | (setq buffer-file-coding-system-explicit (cons nil coding-system))) |
| 1249 | (unless nomodify |
| 1250 | (set-buffer-modified-p t)) |
| 1251 | (force-mode-line-update)) |
| 1252 | |
| 1253 | (defun revert-buffer-with-coding-system (coding-system &optional force) |
| 1254 | "Visit the current buffer's file again using coding system CODING-SYSTEM. |
| 1255 | For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems]. |
| 1256 | |
| 1257 | If CODING-SYSTEM leaves the text conversion unspecified, or if it leaves |
| 1258 | the end-of-line conversion unspecified, FORCE controls what to do. |
| 1259 | If FORCE is nil, get the unspecified aspect (or aspects) from the buffer's |
| 1260 | previous `buffer-file-coding-system' value (if it is specified there). |
| 1261 | Otherwise, determine it from the file contents as usual for visiting a file." |
| 1262 | (interactive "zCoding system for visited file (default nil): \nP") |
| 1263 | (check-coding-system coding-system) |
| 1264 | (if (and coding-system buffer-file-coding-system (null force)) |
| 1265 | (setq coding-system |
| 1266 | (merge-coding-systems coding-system buffer-file-coding-system))) |
| 1267 | (let ((coding-system-for-read coding-system)) |
| 1268 | (revert-buffer))) |
| 1269 | |
| 1270 | (defun set-file-name-coding-system (coding-system) |
| 1271 | "Set coding system for decoding and encoding file names to CODING-SYSTEM. |
| 1272 | It actually just set the variable `file-name-coding-system' (which see) |
| 1273 | to CODING-SYSTEM." |
| 1274 | (interactive "zCoding system for file names (default nil): ") |
| 1275 | (check-coding-system coding-system) |
| 1276 | (if (and coding-system |
| 1277 | (not (coding-system-get coding-system :ascii-compatible-p)) |
| 1278 | (not (coding-system-get coding-system :suitable-for-file-name))) |
| 1279 | (error "%s is not suitable for file names" coding-system)) |
| 1280 | (setq file-name-coding-system coding-system)) |
| 1281 | |
| 1282 | (defvar default-terminal-coding-system nil |
| 1283 | "Default value for the terminal coding system. |
| 1284 | This is normally set according to the selected language environment. |
| 1285 | See also the command `set-terminal-coding-system'.") |
| 1286 | |
| 1287 | (defun set-terminal-coding-system (coding-system &optional terminal) |
| 1288 | "Set coding system of terminal output to CODING-SYSTEM. |
| 1289 | All text output to TERMINAL will be encoded |
| 1290 | with the specified coding system. |
| 1291 | |
| 1292 | For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems]. |
| 1293 | The default is determined by the selected language environment |
| 1294 | or by the previous use of this command. |
| 1295 | |
| 1296 | TERMINAL may be a terminal object, a frame, or nil for the |
| 1297 | selected frame's terminal. The setting has no effect on |
| 1298 | graphical terminals." |
| 1299 | (interactive |
| 1300 | (list (let ((default (if (and (not (terminal-coding-system)) |
| 1301 | default-terminal-coding-system) |
| 1302 | default-terminal-coding-system))) |
| 1303 | (read-coding-system |
| 1304 | (format "Coding system for terminal display (default %s): " |
| 1305 | default) |
| 1306 | default)))) |
| 1307 | (if (and (not coding-system) |
| 1308 | (not (terminal-coding-system))) |
| 1309 | (setq coding-system default-terminal-coding-system)) |
| 1310 | (if coding-system |
| 1311 | (setq default-terminal-coding-system coding-system)) |
| 1312 | (set-terminal-coding-system-internal coding-system terminal) |
| 1313 | (redraw-frame (selected-frame))) |
| 1314 | |
| 1315 | (defvar default-keyboard-coding-system nil |
| 1316 | "Default value of the keyboard coding system. |
| 1317 | This is normally set according to the selected language environment. |
| 1318 | See also the command `set-keyboard-coding-system'.") |
| 1319 | |
| 1320 | (defun set-keyboard-coding-system (coding-system &optional terminal) |
| 1321 | "Set coding system for keyboard input on TERMINAL to CODING-SYSTEM. |
| 1322 | |
| 1323 | For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems]. |
| 1324 | The default is determined by the selected language environment |
| 1325 | or by the previous use of this command. |
| 1326 | |
| 1327 | If CODING-SYSTEM is nil or the coding-type of CODING-SYSTEM is |
| 1328 | `raw-text', the decoding of keyboard input is disabled. |
| 1329 | |
| 1330 | TERMINAL may be a terminal object, a frame, or nil for the |
| 1331 | selected frame's terminal. The setting has no effect on |
| 1332 | graphical terminals." |
| 1333 | (interactive |
| 1334 | (list (let* ((coding (keyboard-coding-system nil)) |
| 1335 | (default (if (eq (coding-system-type coding) 'raw-text) |
| 1336 | default-keyboard-coding-system))) |
| 1337 | (read-coding-system |
| 1338 | (format "Coding system for keyboard input (default %s): " |
| 1339 | default) |
| 1340 | default)))) |
| 1341 | (let ((coding-type (coding-system-type coding-system)) |
| 1342 | (saved-meta-mode |
| 1343 | (terminal-parameter terminal 'keyboard-coding-saved-meta-mode))) |
| 1344 | (if (not (eq coding-type 'raw-text)) |
| 1345 | (let (accept-8-bit) |
| 1346 | (if (not (or (coding-system-get coding-system :suitable-for-keyboard) |
| 1347 | (coding-system-get coding-system :ascii-compatible-p))) |
| 1348 | (error "Unsuitable coding system for keyboard: %s" coding-system)) |
| 1349 | (cond ((memq coding-type '(charset utf-8 shift-jis big5 ccl)) |
| 1350 | (setq accept-8-bit t)) |
| 1351 | ((eq coding-type 'iso-2022) |
| 1352 | (let ((flags (coding-system-get coding-system :flags))) |
| 1353 | (or (memq '7-bit flags) |
| 1354 | (setq accept-8-bit t)))) |
| 1355 | (t |
| 1356 | (error "Unsupported coding system for keyboard: %s" |
| 1357 | coding-system))) |
| 1358 | (when accept-8-bit |
| 1359 | (or saved-meta-mode |
| 1360 | (set-terminal-parameter terminal |
| 1361 | 'keyboard-coding-saved-meta-mode |
| 1362 | (cons (nth 2 (current-input-mode)) |
| 1363 | nil))) |
| 1364 | (set-input-meta-mode 8)) |
| 1365 | ;; Avoid end-of-line conversion. |
| 1366 | (setq coding-system |
| 1367 | (coding-system-change-eol-conversion coding-system 'unix))) |
| 1368 | |
| 1369 | (when saved-meta-mode |
| 1370 | (set-input-meta-mode (car saved-meta-mode)) |
| 1371 | (set-terminal-parameter terminal |
| 1372 | 'keyboard-coding-saved-meta-mode |
| 1373 | nil)))) |
| 1374 | (set-keyboard-coding-system-internal coding-system terminal) |
| 1375 | (setq keyboard-coding-system coding-system)) |
| 1376 | |
| 1377 | (defcustom keyboard-coding-system nil |
| 1378 | "Specify coding system for keyboard input. |
| 1379 | If you set this on a terminal which can't distinguish Meta keys from |
| 1380 | 8-bit characters, you will have to use ESC to type Meta characters. |
| 1381 | See Info node `Terminal Coding' and Info node `Unibyte Mode'. |
| 1382 | |
| 1383 | On non-windowing terminals, this is set from the locale by default. |
| 1384 | |
| 1385 | Setting this variable directly does not take effect; |
| 1386 | use either \\[customize] or \\[set-keyboard-coding-system]." |
| 1387 | :type '(coding-system :tag "Coding system") |
| 1388 | :link '(info-link "(emacs)Terminal Coding") |
| 1389 | :link '(info-link "(emacs)Unibyte Mode") |
| 1390 | :set (lambda (symbol value) |
| 1391 | ;; Don't load encoded-kb unnecessarily. |
| 1392 | (if (or value (boundp 'encoded-kbd-setup-display)) |
| 1393 | (set-keyboard-coding-system value) |
| 1394 | (set-default 'keyboard-coding-system nil))) ; must initialize |
| 1395 | :version "22.1" |
| 1396 | :group 'keyboard |
| 1397 | :group 'mule) |
| 1398 | |
| 1399 | (defun set-buffer-process-coding-system (decoding encoding) |
| 1400 | "Set coding systems for the process associated with the current buffer. |
| 1401 | DECODING is the coding system to be used to decode input from the process, |
| 1402 | ENCODING is the coding system to be used to encode output to the process. |
| 1403 | |
| 1404 | For a list of possible coding systems, use \\[list-coding-systems]." |
| 1405 | (interactive |
| 1406 | "zCoding-system for output from the process: \nzCoding-system for input to the process: ") |
| 1407 | (let ((proc (get-buffer-process (current-buffer)))) |
| 1408 | (if (null proc) |
| 1409 | (error "No process") |
| 1410 | (check-coding-system decoding) |
| 1411 | (check-coding-system encoding) |
| 1412 | (set-process-coding-system proc decoding encoding))) |
| 1413 | (force-mode-line-update)) |
| 1414 | |
| 1415 | (defalias 'set-clipboard-coding-system 'set-selection-coding-system) |
| 1416 | |
| 1417 | (defun set-selection-coding-system (coding-system) |
| 1418 | "Make CODING-SYSTEM used for communicating with other X clients. |
| 1419 | When sending or receiving text via cut_buffer, selection, and clipboard, |
| 1420 | the text is encoded or decoded by CODING-SYSTEM." |
| 1421 | (interactive "zCoding system for X selection: ") |
| 1422 | (check-coding-system coding-system) |
| 1423 | (setq selection-coding-system coding-system)) |
| 1424 | |
| 1425 | ;; Coding system lastly specified by the command |
| 1426 | ;; set-next-selection-coding-system. |
| 1427 | (defvar last-next-selection-coding-system nil) |
| 1428 | |
| 1429 | (defun set-next-selection-coding-system (coding-system) |
| 1430 | "Use CODING-SYSTEM for next communication with other window system clients. |
| 1431 | This setting is effective for the next communication only." |
| 1432 | (interactive |
| 1433 | (list (read-coding-system |
| 1434 | (if last-next-selection-coding-system |
| 1435 | (format "Coding system for the next selection (default %S): " |
| 1436 | last-next-selection-coding-system) |
| 1437 | "Coding system for the next selection: ") |
| 1438 | last-next-selection-coding-system))) |
| 1439 | (if coding-system |
| 1440 | (setq last-next-selection-coding-system coding-system) |
| 1441 | (setq coding-system last-next-selection-coding-system)) |
| 1442 | (check-coding-system coding-system) |
| 1443 | |
| 1444 | (setq next-selection-coding-system coding-system)) |
| 1445 | |
| 1446 | (defun set-coding-priority (arg) |
| 1447 | "Set priority of coding categories according to ARG. |
| 1448 | ARG is a list of coding categories ordered by priority. |
| 1449 | |
| 1450 | This function is provided for backward compatibility." |
| 1451 | (apply 'set-coding-system-priority |
| 1452 | (mapcar #'(lambda (x) (symbol-value x)) arg))) |
| 1453 | (make-obsolete 'set-coding-priority 'set-coding-system-priority "23.1") |
| 1454 | |
| 1455 | ;;; X selections |
| 1456 | |
| 1457 | (defvar ctext-non-standard-encodings-alist |
| 1458 | (mapcar 'purecopy |
| 1459 | '(("big5-0" big5 2 big5) |
| 1460 | ("ISO8859-14" iso-8859-14 1 latin-iso8859-14) |
| 1461 | ("ISO8859-15" iso-8859-15 1 latin-iso8859-15) |
| 1462 | ("gbk-0" gbk 2 chinese-gbk) |
| 1463 | ("koi8-r" koi8-r 1 koi8-r) |
| 1464 | ("microsoft-cp1251" windows-1251 1 windows-1251))) |
| 1465 | "Alist of non-standard encoding names vs the corresponding usages in CTEXT. |
| 1466 | |
| 1467 | It controls how extended segments of a compound text are handled |
| 1468 | by the coding system `compound-text-with-extensions'. |
| 1469 | |
| 1470 | Each element has the form (ENCODING-NAME CODING-SYSTEM N-OCTET CHARSET). |
| 1471 | |
| 1472 | ENCODING-NAME is an encoding name of an \"extended segment\". |
| 1473 | |
| 1474 | CODING-SYSTEM is the coding-system to encode (or decode) the |
| 1475 | characters into (or from) the extended segment. |
| 1476 | |
| 1477 | N-OCTET is the number of octets (bytes) that encodes a character |
| 1478 | in the segment. It can be 0 (meaning the number of octets per |
| 1479 | character is variable), 1, 2, 3, or 4. |
| 1480 | |
| 1481 | CHARSET is a character set containing characters that are encoded |
| 1482 | in the segment. It can be a list of character sets. |
| 1483 | |
| 1484 | On decoding CTEXT, all encoding names listed here are recognized. |
| 1485 | |
| 1486 | On encoding CTEXT, encoding names in the variable |
| 1487 | `ctext-non-standard-encodings' (which see) and in the information |
| 1488 | listed for the current language environment under the key |
| 1489 | `ctext-non-standard-encodings' are used.") |
| 1490 | |
| 1491 | (defvar ctext-non-standard-encodings nil |
| 1492 | "List of non-standard encoding names used in extended segments of CTEXT. |
| 1493 | Each element must be one of the names listed in the variable |
| 1494 | `ctext-non-standard-encodings-alist' (which see).") |
| 1495 | |
| 1496 | (defvar ctext-non-standard-encodings-regexp |
| 1497 | (purecopy |
| 1498 | (string-to-multibyte |
| 1499 | (concat |
| 1500 | ;; For non-standard encodings. |
| 1501 | "\\(\e%/[0-4][\200-\377][\200-\377]\\([^\002]+\\)\002\\)" |
| 1502 | "\\|" |
| 1503 | ;; For UTF-8 encoding. |
| 1504 | "\\(\e%G[^\e]*\e%@\\)")))) |
| 1505 | |
| 1506 | ;; Functions to support "Non-Standard Character Set Encodings" defined |
| 1507 | ;; by the COMPOUND-TEXT spec. They also support "The UTF-8 encoding" |
| 1508 | ;; described in the section 7 of the documentation of COMPOUND-TEXT |
| 1509 | ;; distributed with XFree86. |
| 1510 | |
| 1511 | (defun ctext-post-read-conversion (len) |
| 1512 | "Decode LEN characters encoded as Compound Text with Extended Segments." |
| 1513 | ;; We don't need the following because it is expected that this |
| 1514 | ;; function is mainly used for decoding X selection which is not |
| 1515 | ;; that big data. |
| 1516 | ;;(buffer-disable-undo) ; minimize consing due to insertions and deletions |
| 1517 | (save-match-data |
| 1518 | (save-restriction |
| 1519 | (narrow-to-region (point) (+ (point) len)) |
| 1520 | (let ((case-fold-search nil) |
| 1521 | last-coding-system-used |
| 1522 | pos bytes) |
| 1523 | (decode-coding-region (point-min) (point-max) 'ctext) |
| 1524 | (while (re-search-forward ctext-non-standard-encodings-regexp |
| 1525 | nil 'move) |
| 1526 | (setq pos (match-beginning 0)) |
| 1527 | (if (match-beginning 1) |
| 1528 | ;; ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES-- |
| 1529 | (let* ((M (multibyte-char-to-unibyte (char-after (+ pos 4)))) |
| 1530 | (L (multibyte-char-to-unibyte (char-after (+ pos 5)))) |
| 1531 | (encoding (match-string 2)) |
| 1532 | (encoding-info (assoc-string |
| 1533 | encoding |
| 1534 | ctext-non-standard-encodings-alist t)) |
| 1535 | (coding (if encoding-info |
| 1536 | (nth 1 encoding-info) |
| 1537 | (setq encoding (intern (downcase encoding))) |
| 1538 | (and (coding-system-p encoding) |
| 1539 | encoding)))) |
| 1540 | (setq bytes (- (+ (* (- M 128) 128) (- L 128)) |
| 1541 | (- (point) (+ pos 6)))) |
| 1542 | (when coding |
| 1543 | (delete-region pos (point)) |
| 1544 | (forward-char bytes) |
| 1545 | (decode-coding-region (- (point) bytes) (point) coding))) |
| 1546 | ;; ESC % G --UTF-8-BYTES-- ESC % @ |
| 1547 | (delete-char -3) |
| 1548 | (delete-region pos (+ pos 3)) |
| 1549 | (decode-coding-region pos (point) 'utf-8)))) |
| 1550 | (goto-char (point-min)) |
| 1551 | (- (point-max) (point))))) |
| 1552 | |
| 1553 | (defvar ctext-standard-encodings |
| 1554 | '(ascii latin-jisx0201 katakana-jisx0201 |
| 1555 | latin-iso8859-1 latin-iso8859-2 latin-iso8859-3 latin-iso8859-4 |
| 1556 | greek-iso8859-7 arabic-iso8859-6 hebrew-iso8859-8 cyrillic-iso8859-5 |
| 1557 | latin-iso8859-9 |
| 1558 | chinese-gb2312 japanese-jisx0208 korean-ksc5601) |
| 1559 | "List of approved standard encodings (i.e. charsets) of X's Compound Text. |
| 1560 | Coding-system `compound-text-with-extensions' encodes a character |
| 1561 | belonging to any of those charsets using the normal ISO2022 |
| 1562 | designation sequence unless the current language environment or |
| 1563 | the variable `ctext-non-standard-encodings' decide to use an extended |
| 1564 | segment of CTEXT for that character. See also the documentation |
| 1565 | of `ctext-non-standard-encodings-alist'.") |
| 1566 | |
| 1567 | ;; Return an alist of CHARSET vs CTEXT-USAGE-INFO generated from |
| 1568 | ;; `ctext-non-standard-encodings' and a list specified by the key |
| 1569 | ;; `ctext-non-standard-encodings' for the current language |
| 1570 | ;; environment. CTEXT-USAGE-INFO is one of the element of |
| 1571 | ;; `ctext-non-standard-encodings-alist' or nil. In the former case, a |
| 1572 | ;; character in CHARSET is encoded using extended segment. In the |
| 1573 | ;; latter case, a character in CHARSET is encoded using normal ISO2022 |
| 1574 | ;; designation sequence. If a character is not in any of CHARSETs, it |
| 1575 | ;; is encoded using UTF-8 encoding extension. |
| 1576 | |
| 1577 | (defun ctext-non-standard-encodings-table () |
| 1578 | (let* ((table (append ctext-non-standard-encodings |
| 1579 | (copy-sequence |
| 1580 | (get-language-info current-language-environment |
| 1581 | 'ctext-non-standard-encodings)))) |
| 1582 | (tail table) |
| 1583 | elt) |
| 1584 | (while tail |
| 1585 | (setq elt (car tail)) |
| 1586 | (let* ((slot (assoc elt ctext-non-standard-encodings-alist)) |
| 1587 | (charset (nth 3 slot))) |
| 1588 | (if (charsetp charset) |
| 1589 | (setcar tail |
| 1590 | (cons (plist-get (charset-plist charset) :base) slot)) |
| 1591 | (setcar tail (cons (car charset) slot)) |
| 1592 | (dolist (cs (cdr charset)) |
| 1593 | (setcdr tail |
| 1594 | (cons (cons (plist-get (charset-plist (car cs)) :base) slot) |
| 1595 | (cdr tail))) |
| 1596 | (setq tail (cdr tail)))) |
| 1597 | (setq tail (cdr tail)))) |
| 1598 | table)) |
| 1599 | |
| 1600 | (defun ctext-pre-write-conversion (from to) |
| 1601 | "Encode characters between FROM and TO as Compound Text w/Extended Segments. |
| 1602 | |
| 1603 | If FROM is a string, generate a new temp buffer, insert the text, |
| 1604 | and convert it in the temporary buffer. Otherwise, convert |
| 1605 | in-place." |
| 1606 | (save-match-data |
| 1607 | ;; Setup a working buffer if necessary. |
| 1608 | (when (stringp from) |
| 1609 | (set-buffer (generate-new-buffer " *temp")) |
| 1610 | (set-buffer-multibyte (multibyte-string-p from)) |
| 1611 | (insert from) |
| 1612 | (setq from (point-min) to (point-max))) |
| 1613 | (save-restriction |
| 1614 | (narrow-to-region from to) |
| 1615 | (goto-char from) |
| 1616 | (let ((encoding-table (ctext-non-standard-encodings-table)) |
| 1617 | (charset-list (sort-charsets |
| 1618 | (copy-sequence ctext-standard-encodings))) |
| 1619 | (end-pos (make-marker)) |
| 1620 | last-coding-system-used |
| 1621 | last-pos charset encoding-info) |
| 1622 | (dolist (elt encoding-table) |
| 1623 | (push (car elt) charset-list)) |
| 1624 | (setq end-pos (point-marker)) |
| 1625 | (while (re-search-forward "[^\0-\177]+" nil t) |
| 1626 | ;; Found a sequence of non-ASCII characters. |
| 1627 | (set-marker end-pos (match-end 0)) |
| 1628 | (goto-char (match-beginning 0)) |
| 1629 | (setq last-pos (point) |
| 1630 | charset (char-charset (following-char) charset-list)) |
| 1631 | (forward-char 1) |
| 1632 | (while (and (< (point) end-pos) |
| 1633 | (eq charset (char-charset (following-char) charset-list))) |
| 1634 | (forward-char 1)) |
| 1635 | (if charset |
| 1636 | (if (setq encoding-info (cdr (assq charset encoding-table))) |
| 1637 | ;; Encode this range using an extended segment. |
| 1638 | (let ((encoding-name (car encoding-info)) |
| 1639 | (coding-system (nth 1 encoding-info)) |
| 1640 | (noctets (nth 2 encoding-info)) |
| 1641 | len) |
| 1642 | (encode-coding-region last-pos (point) coding-system) |
| 1643 | (setq len (+ (length encoding-name) 1 |
| 1644 | (- (point) last-pos))) |
| 1645 | ;; According to the spec of CTEXT, it is not |
| 1646 | ;; necessary to produce this extra designation |
| 1647 | ;; sequence, but some buggy application |
| 1648 | ;; (e.g. crxvt-gb) requires it. |
| 1649 | (insert "\e(B") |
| 1650 | (save-excursion |
| 1651 | (goto-char last-pos) |
| 1652 | (insert (format "\e%%/%d" noctets)) |
| 1653 | (insert-byte (+ (/ len 128) 128) 1) |
| 1654 | (insert-byte (+ (% len 128) 128) 1) |
| 1655 | (insert encoding-name) |
| 1656 | (insert 2))) |
| 1657 | ;; Encode this range as characters in CHARSET. |
| 1658 | (put-text-property last-pos (point) 'charset charset)) |
| 1659 | ;; Encode this range using UTF-8 encoding extension. |
| 1660 | (encode-coding-region last-pos (point) 'mule-utf-8) |
| 1661 | (save-excursion |
| 1662 | (goto-char last-pos) |
| 1663 | (insert "\e%G")) |
| 1664 | (insert "\e%@"))) |
| 1665 | (goto-char (point-min))))) |
| 1666 | ;; Must return nil, as build_annotations_2 expects that. |
| 1667 | nil) |
| 1668 | |
| 1669 | ;;; FILE I/O |
| 1670 | |
| 1671 | (defcustom auto-coding-alist |
| 1672 | ;; .exe and .EXE are added to support archive-mode looking at DOS |
| 1673 | ;; self-extracting exe archives. |
| 1674 | (mapcar (lambda (arg) (cons (purecopy (car arg)) (cdr arg))) |
| 1675 | '(("\\.\\(\ |
| 1676 | arc\\|zip\\|lzh\\|lha\\|zoo\\|[jew]ar\\|xpi\\|rar\\|7z\\|\ |
| 1677 | ARC\\|ZIP\\|LZH\\|LHA\\|ZOO\\|[JEW]AR\\|XPI\\|RAR\\|7Z\\)\\'" |
| 1678 | . no-conversion-multibyte) |
| 1679 | ("\\.\\(exe\\|EXE\\)\\'" . no-conversion) |
| 1680 | ("\\.\\(sx[dmicw]\\|odt\\|tar\\|tgz\\)\\'" . no-conversion) |
| 1681 | ("\\.\\(gz\\|Z\\|bz\\|bz2\\|xz\\|gpg\\)\\'" . no-conversion) |
| 1682 | ("\\.\\(jpe?g\\|png\\|gif\\|tiff?\\|p[bpgn]m\\)\\'" . no-conversion) |
| 1683 | ("\\.pdf\\'" . no-conversion) |
| 1684 | ("/#[^/]+#\\'" . emacs-mule))) |
| 1685 | "Alist of filename patterns vs corresponding coding systems. |
| 1686 | Each element looks like (REGEXP . CODING-SYSTEM). |
| 1687 | A file whose name matches REGEXP is decoded by CODING-SYSTEM on reading. |
| 1688 | |
| 1689 | The settings in this alist take priority over `coding:' tags |
| 1690 | in the file (see the function `set-auto-coding') |
| 1691 | and the contents of `file-coding-system-alist'." |
| 1692 | :version "24.1" ; added xz |
| 1693 | :group 'files |
| 1694 | :group 'mule |
| 1695 | :type '(repeat (cons (regexp :tag "File name regexp") |
| 1696 | (symbol :tag "Coding system")))) |
| 1697 | |
| 1698 | (defcustom auto-coding-regexp-alist |
| 1699 | (mapcar (lambda (arg) (cons (purecopy (car arg)) (cdr arg))) |
| 1700 | '(("\\`BABYL OPTIONS:[ \t]*-\\*-[ \t]*rmail[ \t]*-\\*-" . no-conversion) |
| 1701 | ("\\`\xFE\xFF" . utf-16be-with-signature) |
| 1702 | ("\\`\xFF\xFE" . utf-16le-with-signature) |
| 1703 | ("\\`\xEF\xBB\xBF" . utf-8-with-signature) |
| 1704 | ("\\`;ELC\024\0\0\0" . emacs-mule))) ; Emacs 20-compiled |
| 1705 | "Alist of patterns vs corresponding coding systems. |
| 1706 | Each element looks like (REGEXP . CODING-SYSTEM). |
| 1707 | A file whose first bytes match REGEXP is decoded by CODING-SYSTEM on reading. |
| 1708 | |
| 1709 | The settings in this alist take priority over `coding:' tags |
| 1710 | in the file (see the function `set-auto-coding') |
| 1711 | and the contents of `file-coding-system-alist'." |
| 1712 | :group 'files |
| 1713 | :group 'mule |
| 1714 | :type '(repeat (cons (regexp :tag "Regexp") |
| 1715 | (symbol :tag "Coding system")))) |
| 1716 | |
| 1717 | (defun auto-coding-regexp-alist-lookup (from to) |
| 1718 | "Lookup `auto-coding-regexp-alist' for the contents of the current buffer. |
| 1719 | The value is a coding system is specified for the region FROM and TO, |
| 1720 | or nil." |
| 1721 | (save-excursion |
| 1722 | (goto-char from) |
| 1723 | (let ((alist auto-coding-regexp-alist) |
| 1724 | coding-system) |
| 1725 | (while (and alist (not coding-system)) |
| 1726 | (let ((regexp (car (car alist)))) |
| 1727 | (if enable-multibyte-characters |
| 1728 | (setq regexp (string-to-multibyte regexp))) |
| 1729 | (if (re-search-forward regexp to t) |
| 1730 | (setq coding-system (cdr (car alist))) |
| 1731 | (setq alist (cdr alist))))) |
| 1732 | coding-system))) |
| 1733 | |
| 1734 | ;; See the bottom of this file for built-in auto coding functions. |
| 1735 | (defcustom auto-coding-functions '(sgml-xml-auto-coding-function |
| 1736 | sgml-html-meta-auto-coding-function) |
| 1737 | "A list of functions which attempt to determine a coding system. |
| 1738 | |
| 1739 | Each function in this list should be written to operate on the |
| 1740 | current buffer, but should not modify it in any way. The buffer |
| 1741 | will contain undecoded text of parts of the file. Each function |
| 1742 | should take one argument, SIZE, which says how many characters |
| 1743 | \(starting from point) it should look at. |
| 1744 | |
| 1745 | If one of these functions succeeds in determining a coding |
| 1746 | system, it should return that coding system. Otherwise, it |
| 1747 | should return nil. |
| 1748 | |
| 1749 | If a file has a `coding:' tag, that takes precedence over these |
| 1750 | functions, so they won't be called at all." |
| 1751 | :group 'files |
| 1752 | :group 'mule |
| 1753 | :type '(repeat function)) |
| 1754 | |
| 1755 | (defvar set-auto-coding-for-load nil |
| 1756 | "Non-nil means look for `load-coding' property instead of `coding'. |
| 1757 | This is used for loading and byte-compiling Emacs Lisp files.") |
| 1758 | |
| 1759 | (defun auto-coding-alist-lookup (filename) |
| 1760 | "Return the coding system specified by `auto-coding-alist' for FILENAME." |
| 1761 | (let ((alist auto-coding-alist) |
| 1762 | (case-fold-search (memq system-type '(windows-nt ms-dos cygwin))) |
| 1763 | coding-system) |
| 1764 | (while (and alist (not coding-system)) |
| 1765 | (if (string-match (car (car alist)) filename) |
| 1766 | (setq coding-system (cdr (car alist))) |
| 1767 | (setq alist (cdr alist)))) |
| 1768 | coding-system)) |
| 1769 | |
| 1770 | (put 'enable-character-translation 'permanent-local t) |
| 1771 | (put 'enable-character-translation 'safe-local-variable 'booleanp) |
| 1772 | |
| 1773 | (defun find-auto-coding (filename size) |
| 1774 | "Find a coding system for a file FILENAME of which SIZE bytes follow point. |
| 1775 | These bytes should include at least the first 1k of the file |
| 1776 | and the last 3k of the file, but the middle may be omitted. |
| 1777 | |
| 1778 | The function checks FILENAME against the variable `auto-coding-alist'. |
| 1779 | If FILENAME doesn't match any entries in the variable, it checks the |
| 1780 | contents of the current buffer following point against |
| 1781 | `auto-coding-regexp-alist'. If no match is found, it checks for a |
| 1782 | `coding:' tag in the first one or two lines following point. If no |
| 1783 | `coding:' tag is found, it checks any local variables list in the last |
| 1784 | 3K bytes out of the SIZE bytes. Finally, if none of these methods |
| 1785 | succeed, it checks to see if any function in `auto-coding-functions' |
| 1786 | gives a match. |
| 1787 | |
| 1788 | If a coding system is specifed, the return value is a cons |
| 1789 | \(CODING . SOURCE), where CODING is the specified coding system and |
| 1790 | SOURCE is a symbol `auto-coding-alist', `auto-coding-regexp-alist', |
| 1791 | `:coding', or `auto-coding-functions' indicating by what CODING is |
| 1792 | specified. Note that the validity of CODING is not checked; |
| 1793 | it's the caller's responsibility to check it. |
| 1794 | |
| 1795 | If nothing is specified, the return value is nil." |
| 1796 | (or (let ((coding-system (auto-coding-alist-lookup filename))) |
| 1797 | (if coding-system |
| 1798 | (cons coding-system 'auto-coding-alist))) |
| 1799 | ;; Try using `auto-coding-regexp-alist'. |
| 1800 | (let ((coding-system (auto-coding-regexp-alist-lookup (point) |
| 1801 | (+ (point) size)))) |
| 1802 | (if coding-system |
| 1803 | (cons coding-system 'auto-coding-regexp-alist))) |
| 1804 | (let* ((case-fold-search t) |
| 1805 | (head-start (point)) |
| 1806 | (head-end (+ head-start (min size 1024))) |
| 1807 | (tail-start (+ head-start (max (- size 3072) 0))) |
| 1808 | (tail-end (+ head-start size)) |
| 1809 | coding-system head-found tail-found pos char-trans) |
| 1810 | ;; Try a short cut by searching for the string "coding:" |
| 1811 | ;; and for "unibyte:" at the head and tail of SIZE bytes. |
| 1812 | (setq head-found (or (search-forward "coding:" head-end t) |
| 1813 | (search-forward "unibyte:" head-end t) |
| 1814 | (search-forward "enable-character-translation:" |
| 1815 | head-end t))) |
| 1816 | (if (and head-found (> head-found tail-start)) |
| 1817 | ;; Head and tail are overlapped. |
| 1818 | (setq tail-found head-found) |
| 1819 | (goto-char tail-start) |
| 1820 | (setq tail-found (or (search-forward "coding:" tail-end t) |
| 1821 | (search-forward "unibyte:" tail-end t) |
| 1822 | (search-forward "enable-character-translation:" |
| 1823 | tail-end t)))) |
| 1824 | |
| 1825 | ;; At first check the head. |
| 1826 | (when head-found |
| 1827 | (goto-char head-start) |
| 1828 | (setq head-end (set-auto-mode-1)) |
| 1829 | (setq head-start (point)) |
| 1830 | (when (and head-end (< head-found head-end)) |
| 1831 | (goto-char head-start) |
| 1832 | (when (and set-auto-coding-for-load |
| 1833 | (re-search-forward |
| 1834 | "\\(.*;\\)?[ \t]*unibyte:[ \t]*\\([^ ;]+\\)" |
| 1835 | head-end t)) |
| 1836 | (setq coding-system 'raw-text)) |
| 1837 | (when (and (not coding-system) |
| 1838 | (re-search-forward |
| 1839 | "\\(.*;\\)?[ \t]*coding:[ \t]*\\([^ ;]+\\)" |
| 1840 | head-end t)) |
| 1841 | (setq coding-system (intern (match-string 2)))) |
| 1842 | (when (re-search-forward |
| 1843 | "\\(.*;\\)?[ \t]*enable-character-translation:[ \t]*\\([^ ;]+\\)" |
| 1844 | head-end t) |
| 1845 | (setq char-trans (match-string 2))))) |
| 1846 | |
| 1847 | ;; If no coding: tag in the head, check the tail. |
| 1848 | ;; Here we must pay attention to the case that the end-of-line |
| 1849 | ;; is just "\r" and we can't use "^" nor "$" in regexp. |
| 1850 | (when (and tail-found (or (not coding-system) (not char-trans))) |
| 1851 | (goto-char tail-start) |
| 1852 | (re-search-forward "[\r\n]\^L" tail-end t) |
| 1853 | (if (re-search-forward |
| 1854 | "[\r\n]\\([^[\r\n]*\\)[ \t]*Local Variables:[ \t]*\\([^\r\n]*\\)[\r\n]" |
| 1855 | tail-end t) |
| 1856 | ;; The prefix is what comes before "local variables:" in its |
| 1857 | ;; line. The suffix is what comes after "local variables:" |
| 1858 | ;; in its line. |
| 1859 | (let* ((prefix (regexp-quote (match-string 1))) |
| 1860 | (suffix (regexp-quote (match-string 2))) |
| 1861 | (re-coding |
| 1862 | (concat |
| 1863 | "[\r\n]" prefix |
| 1864 | ;; N.B. without the \n below, the regexp can |
| 1865 | ;; eat newlines. |
| 1866 | "[ \t]*coding[ \t]*:[ \t]*\\([^ \t\r\n]+\\)[ \t]*" |
| 1867 | suffix "[\r\n]")) |
| 1868 | (re-unibyte |
| 1869 | (concat |
| 1870 | "[\r\n]" prefix |
| 1871 | "[ \t]*unibyte[ \t]*:[ \t]*\\([^ \t\r\n]+\\)[ \t]*" |
| 1872 | suffix "[\r\n]")) |
| 1873 | (re-char-trans |
| 1874 | (concat |
| 1875 | "[\r\n]" prefix |
| 1876 | "[ \t]*enable-character-translation[ \t]*:[ \t]*\\([^ \t\r\n]+\\)[ \t]*" |
| 1877 | suffix "[\r\n]")) |
| 1878 | (re-end |
| 1879 | (concat "[\r\n]" prefix "[ \t]*End *:[ \t]*" suffix |
| 1880 | "[\r\n]?")) |
| 1881 | (pos (1- (point)))) |
| 1882 | (forward-char -1) ; skip back \r or \n. |
| 1883 | (re-search-forward re-end tail-end 'move) |
| 1884 | (setq tail-end (point)) |
| 1885 | (goto-char pos) |
| 1886 | (when (and set-auto-coding-for-load |
| 1887 | (re-search-forward re-unibyte tail-end t)) |
| 1888 | (setq coding-system 'raw-text)) |
| 1889 | (when (and (not coding-system) |
| 1890 | (re-search-forward re-coding tail-end t)) |
| 1891 | (setq coding-system (intern (match-string 1)))) |
| 1892 | (when (and (not char-trans) |
| 1893 | (re-search-forward re-char-trans tail-end t)) |
| 1894 | (setq char-trans (match-string 1)))))) |
| 1895 | (if coding-system |
| 1896 | ;; If the coding-system name ends with "!", remove it and |
| 1897 | ;; set char-trans to "nil". |
| 1898 | (let ((name (symbol-name coding-system))) |
| 1899 | (if (= (aref name (1- (length name))) ?!) |
| 1900 | (setq coding-system (intern (substring name 0 -1)) |
| 1901 | char-trans "nil")))) |
| 1902 | (when (and char-trans |
| 1903 | (not (setq char-trans (intern char-trans)))) |
| 1904 | (make-local-variable 'enable-character-translation) |
| 1905 | (setq enable-character-translation nil)) |
| 1906 | (if coding-system |
| 1907 | (cons coding-system :coding))) |
| 1908 | ;; Finally, try all the `auto-coding-functions'. |
| 1909 | (let ((funcs auto-coding-functions) |
| 1910 | (coding-system nil)) |
| 1911 | (while (and funcs (not coding-system)) |
| 1912 | (setq coding-system (condition-case e |
| 1913 | (save-excursion |
| 1914 | (goto-char (point-min)) |
| 1915 | (funcall (pop funcs) size)) |
| 1916 | (error nil)))) |
| 1917 | (if coding-system |
| 1918 | (cons coding-system 'auto-coding-functions))))) |
| 1919 | |
| 1920 | (defun set-auto-coding (filename size) |
| 1921 | "Return coding system for a file FILENAME of which SIZE bytes follow point. |
| 1922 | See `find-auto-coding' for how the coding system is found. |
| 1923 | Return nil if an invalid coding system is found. |
| 1924 | |
| 1925 | The variable `set-auto-coding-function' (which see) is set to this |
| 1926 | function by default." |
| 1927 | (let ((found (find-auto-coding filename size))) |
| 1928 | (if (and found (coding-system-p (car found))) |
| 1929 | (car found)))) |
| 1930 | |
| 1931 | (setq set-auto-coding-function 'set-auto-coding) |
| 1932 | |
| 1933 | (defun after-insert-file-set-coding (inserted &optional visit) |
| 1934 | "Set `buffer-file-coding-system' of current buffer after text is inserted. |
| 1935 | INSERTED is the number of characters that were inserted, as figured |
| 1936 | in the situation before this function. Return the number of characters |
| 1937 | inserted, as figured in the situation after. The two numbers can be |
| 1938 | different if the buffer has become unibyte. |
| 1939 | The optional second arg VISIT non-nil means that we are visiting a file." |
| 1940 | (if (and visit |
| 1941 | coding-system-for-read |
| 1942 | (not (eq coding-system-for-read 'auto-save-coding))) |
| 1943 | (setq buffer-file-coding-system-explicit |
| 1944 | (cons coding-system-for-read nil))) |
| 1945 | (if last-coding-system-used |
| 1946 | (let ((coding-system |
| 1947 | (find-new-buffer-file-coding-system last-coding-system-used))) |
| 1948 | (if coding-system |
| 1949 | (setq buffer-file-coding-system coding-system)))) |
| 1950 | inserted) |
| 1951 | |
| 1952 | ;; The coding-spec and eol-type of coding-system returned is decided |
| 1953 | ;; independently in the following order. |
| 1954 | ;; 1. That of buffer-file-coding-system locally bound. |
| 1955 | ;; 2. That of CODING. |
| 1956 | |
| 1957 | (defun find-new-buffer-file-coding-system (coding) |
| 1958 | "Return a coding system for a buffer when a file of CODING is inserted. |
| 1959 | The local variable `buffer-file-coding-system' of the current buffer |
| 1960 | is set to the returned value. |
| 1961 | Return nil if there's no need to set `buffer-file-coding-system'." |
| 1962 | (let (local-coding local-eol |
| 1963 | found-coding found-eol |
| 1964 | new-coding new-eol) |
| 1965 | (if (null coding) |
| 1966 | ;; Nothing found about coding. |
| 1967 | nil |
| 1968 | |
| 1969 | ;; Get information of `buffer-file-coding-system' in LOCAL-EOL |
| 1970 | ;; and LOCAL-CODING. |
| 1971 | (setq local-eol (coding-system-eol-type buffer-file-coding-system)) |
| 1972 | (if (null (numberp local-eol)) |
| 1973 | ;; But eol-type is not yet set. |
| 1974 | (setq local-eol nil)) |
| 1975 | (if (and buffer-file-coding-system |
| 1976 | (not (eq (coding-system-type buffer-file-coding-system) |
| 1977 | 'undecided))) |
| 1978 | (setq local-coding (coding-system-base buffer-file-coding-system))) |
| 1979 | |
| 1980 | (if (and (local-variable-p 'buffer-file-coding-system) |
| 1981 | local-eol local-coding) |
| 1982 | ;; The current buffer has already set full coding-system, we |
| 1983 | ;; had better not change it. |
| 1984 | nil |
| 1985 | |
| 1986 | (setq found-eol (coding-system-eol-type coding)) |
| 1987 | (if (null (numberp found-eol)) |
| 1988 | ;; But eol-type is not found. |
| 1989 | ;; If EOL conversions are inhibited, force unix eol-type. |
| 1990 | (setq found-eol (if inhibit-eol-conversion 0))) |
| 1991 | (setq found-coding (coding-system-base coding)) |
| 1992 | |
| 1993 | (if (and (not found-eol) (eq found-coding 'undecided)) |
| 1994 | ;; No valid coding information found. |
| 1995 | nil |
| 1996 | |
| 1997 | ;; Some coding information (eol or text) found. |
| 1998 | |
| 1999 | ;; The local setting takes precedence over the found one. |
| 2000 | (setq new-coding (if (local-variable-p 'buffer-file-coding-system) |
| 2001 | (or local-coding found-coding) |
| 2002 | (or found-coding local-coding))) |
| 2003 | (setq new-eol (if (local-variable-p 'buffer-file-coding-system) |
| 2004 | (or local-eol found-eol) |
| 2005 | (or found-eol local-eol))) |
| 2006 | |
| 2007 | (let ((eol-type (coding-system-eol-type new-coding))) |
| 2008 | (if (and (numberp new-eol) (vectorp eol-type)) |
| 2009 | (aref eol-type new-eol) |
| 2010 | new-coding))))))) |
| 2011 | |
| 2012 | (defun modify-coding-system-alist (target-type regexp coding-system) |
| 2013 | "Modify one of look up tables for finding a coding system on I/O operation. |
| 2014 | There are three of such tables, `file-coding-system-alist', |
| 2015 | `process-coding-system-alist', and `network-coding-system-alist'. |
| 2016 | |
| 2017 | TARGET-TYPE specifies which of them to modify. |
| 2018 | If it is `file', it affects `file-coding-system-alist' (which see). |
| 2019 | If it is `process', it affects `process-coding-system-alist' (which see). |
| 2020 | If it is `network', it affects `network-coding-system-alist' (which see). |
| 2021 | |
| 2022 | REGEXP is a regular expression matching a target of I/O operation. |
| 2023 | The target is a file name if TARGET-TYPE is `file', a program name if |
| 2024 | TARGET-TYPE is `process', or a network service name or a port number |
| 2025 | to connect to if TARGET-TYPE is `network'. |
| 2026 | |
| 2027 | CODING-SYSTEM is a coding system to perform code conversion on the I/O |
| 2028 | operation, or a cons cell (DECODING . ENCODING) specifying the coding |
| 2029 | systems for decoding and encoding respectively, or a function symbol |
| 2030 | which, when called, returns such a cons cell." |
| 2031 | (or (memq target-type '(file process network)) |
| 2032 | (error "Invalid target type: %s" target-type)) |
| 2033 | (or (stringp regexp) |
| 2034 | (and (eq target-type 'network) (integerp regexp)) |
| 2035 | (error "Invalid regular expression: %s" regexp)) |
| 2036 | (if (symbolp coding-system) |
| 2037 | (if (not (fboundp coding-system)) |
| 2038 | (progn |
| 2039 | (check-coding-system coding-system) |
| 2040 | (setq coding-system (cons coding-system coding-system)))) |
| 2041 | (check-coding-system (car coding-system)) |
| 2042 | (check-coding-system (cdr coding-system))) |
| 2043 | (cond ((eq target-type 'file) |
| 2044 | (let ((slot (assoc regexp file-coding-system-alist))) |
| 2045 | (if slot |
| 2046 | (setcdr slot coding-system) |
| 2047 | (setq file-coding-system-alist |
| 2048 | (cons (cons regexp coding-system) |
| 2049 | file-coding-system-alist))))) |
| 2050 | ((eq target-type 'process) |
| 2051 | (let ((slot (assoc regexp process-coding-system-alist))) |
| 2052 | (if slot |
| 2053 | (setcdr slot coding-system) |
| 2054 | (setq process-coding-system-alist |
| 2055 | (cons (cons regexp coding-system) |
| 2056 | process-coding-system-alist))))) |
| 2057 | (t |
| 2058 | (let ((slot (assoc regexp network-coding-system-alist))) |
| 2059 | (if slot |
| 2060 | (setcdr slot coding-system) |
| 2061 | (setq network-coding-system-alist |
| 2062 | (cons (cons regexp coding-system) |
| 2063 | network-coding-system-alist))))))) |
| 2064 | |
| 2065 | (defun decode-coding-inserted-region (from to filename |
| 2066 | &optional visit beg end replace) |
| 2067 | "Decode the region between FROM and TO as if it is read from file FILENAME. |
| 2068 | The idea is that the text between FROM and TO was just inserted somehow. |
| 2069 | Optional arguments VISIT, BEG, END, and REPLACE are the same as those |
| 2070 | of the function `insert-file-contents'. |
| 2071 | Part of the job of this function is setting `buffer-undo-list' appropriately." |
| 2072 | (save-excursion |
| 2073 | (save-restriction |
| 2074 | (let ((coding coding-system-for-read) |
| 2075 | undo-list-saved) |
| 2076 | (if visit |
| 2077 | ;; Temporarily turn off undo recording, if we're decoding the |
| 2078 | ;; text of a visited file. |
| 2079 | (setq buffer-undo-list t) |
| 2080 | ;; Otherwise, if we can recognize the undo elt for the insertion, |
| 2081 | ;; remove it and get ready to replace it later. |
| 2082 | ;; In the mean time, turn off undo recording. |
| 2083 | (let ((last (car-safe buffer-undo-list))) |
| 2084 | (if (and (consp last) (eql (car last) from) (eql (cdr last) to)) |
| 2085 | (setq undo-list-saved (cdr buffer-undo-list) |
| 2086 | buffer-undo-list t)))) |
| 2087 | (narrow-to-region from to) |
| 2088 | (goto-char (point-min)) |
| 2089 | (or coding |
| 2090 | (setq coding (funcall set-auto-coding-function |
| 2091 | filename (- (point-max) (point-min))))) |
| 2092 | (or coding |
| 2093 | (setq coding (car (find-operation-coding-system |
| 2094 | 'insert-file-contents |
| 2095 | (cons filename (current-buffer)) |
| 2096 | visit beg end replace)))) |
| 2097 | (if (coding-system-p coding) |
| 2098 | (or enable-multibyte-characters |
| 2099 | (setq coding |
| 2100 | (coding-system-change-text-conversion coding 'raw-text))) |
| 2101 | (setq coding nil)) |
| 2102 | (if coding |
| 2103 | (decode-coding-region (point-min) (point-max) coding) |
| 2104 | (setq last-coding-system-used coding)) |
| 2105 | ;; If we're decoding the text of a visited file, |
| 2106 | ;; the undo list should start out empty. |
| 2107 | (if visit |
| 2108 | (setq buffer-undo-list nil) |
| 2109 | ;; If we decided to replace the undo entry for the insertion, |
| 2110 | ;; do so now. |
| 2111 | (if undo-list-saved |
| 2112 | (setq buffer-undo-list |
| 2113 | (cons (cons from (point-max)) undo-list-saved)))))))) |
| 2114 | |
| 2115 | (defun recode-region (start end new-coding coding) |
| 2116 | "Re-decode the region (previously decoded by CODING) by NEW-CODING." |
| 2117 | (interactive |
| 2118 | (list (region-beginning) (region-end) |
| 2119 | (read-coding-system "Text was really in: ") |
| 2120 | (let ((coding (or buffer-file-coding-system last-coding-system-used))) |
| 2121 | (read-coding-system |
| 2122 | (concat "But was interpreted as" |
| 2123 | (if coding (format " (default %S): " coding) ": ")) |
| 2124 | coding)))) |
| 2125 | (or (and new-coding coding) |
| 2126 | (error "Coding system not specified")) |
| 2127 | ;; Check it before we encode the region. |
| 2128 | (check-coding-system new-coding) |
| 2129 | (save-restriction |
| 2130 | (narrow-to-region start end) |
| 2131 | (encode-coding-region (point-min) (point-max) coding) |
| 2132 | (decode-coding-region (point-min) (point-max) new-coding)) |
| 2133 | (if (region-active-p) |
| 2134 | (deactivate-mark))) |
| 2135 | |
| 2136 | (defun make-translation-table (&rest args) |
| 2137 | "Make a translation table from arguments. |
| 2138 | A translation table is a char table intended for character |
| 2139 | translation in CCL programs. |
| 2140 | |
| 2141 | Each argument is a list of elements of the form (FROM . TO), where FROM |
| 2142 | is a character to be translated to TO. |
| 2143 | |
| 2144 | The arguments and forms in each argument are processed in the given |
| 2145 | order, and if a previous form already translates TO to some other |
| 2146 | character, say TO-ALT, FROM is also translated to TO-ALT." |
| 2147 | (let ((table (make-char-table 'translation-table)) |
| 2148 | revlist) |
| 2149 | (dolist (elts args) |
| 2150 | (dolist (elt elts) |
| 2151 | (let ((from (car elt)) |
| 2152 | (to (cdr elt)) |
| 2153 | to-alt rev-from rev-to) |
| 2154 | ;; If we have already translated TO to TO-ALT, FROM should |
| 2155 | ;; also be translated to TO-ALT. |
| 2156 | (if (setq to-alt (aref table to)) |
| 2157 | (setq to to-alt)) |
| 2158 | (aset table from to) |
| 2159 | ;; If we have already translated some chars to FROM, they |
| 2160 | ;; should also be translated to TO. |
| 2161 | (when (setq rev-from (assq from revlist)) |
| 2162 | (dolist (elt (cdr rev-from)) |
| 2163 | (aset table elt to)) |
| 2164 | (setq revlist (delq rev-from revlist) |
| 2165 | rev-from (cdr rev-from))) |
| 2166 | ;; Now update REVLIST. |
| 2167 | (setq rev-to (assq to revlist)) |
| 2168 | (if rev-to |
| 2169 | (setcdr rev-to (cons from (cdr rev-to))) |
| 2170 | (setq rev-to (list to from) |
| 2171 | revlist (cons rev-to revlist))) |
| 2172 | (if rev-from |
| 2173 | (setcdr rev-to (append rev-from (cdr rev-to))))))) |
| 2174 | ;; Return TABLE just created. |
| 2175 | (set-char-table-extra-slot table 1 1) |
| 2176 | table)) |
| 2177 | |
| 2178 | (defun make-translation-table-from-vector (vec) |
| 2179 | "Make translation table from decoding vector VEC. |
| 2180 | VEC is an array of 256 elements to map unibyte codes to multibyte |
| 2181 | characters. Elements may be nil for undefined code points." |
| 2182 | (let ((table (make-char-table 'translation-table)) |
| 2183 | (rev-table (make-char-table 'translation-table)) |
| 2184 | ch) |
| 2185 | (dotimes (i 256) |
| 2186 | (setq ch (aref vec i)) |
| 2187 | (when ch |
| 2188 | (aset table i ch) |
| 2189 | (if (>= ch 256) |
| 2190 | (aset rev-table ch i)))) |
| 2191 | (set-char-table-extra-slot table 0 rev-table) |
| 2192 | (set-char-table-extra-slot table 1 1) |
| 2193 | (set-char-table-extra-slot rev-table 1 1) |
| 2194 | table)) |
| 2195 | |
| 2196 | (defun make-translation-table-from-alist (alist) |
| 2197 | "Make translation table from N<->M mapping in ALIST. |
| 2198 | ALIST is an alist, each element has the form (FROM . TO). |
| 2199 | FROM and TO are a character or a vector of characters. |
| 2200 | If FROM is a character, that character is translated to TO. |
| 2201 | If FROM is a vector of characters, that sequence is translated to TO. |
| 2202 | The first extra-slot of the value is a translation table for reverse mapping." |
| 2203 | (let ((tables (vector (make-char-table 'translation-table) |
| 2204 | (make-char-table 'translation-table))) |
| 2205 | table max-lookup from to idx val) |
| 2206 | (dotimes (i 2) |
| 2207 | (setq table (aref tables i)) |
| 2208 | (setq max-lookup 1) |
| 2209 | (dolist (elt alist) |
| 2210 | (if (= i 0) |
| 2211 | (setq from (car elt) to (cdr elt)) |
| 2212 | (setq from (cdr elt) to (car elt))) |
| 2213 | (if (characterp from) |
| 2214 | (setq idx from) |
| 2215 | (setq idx (aref from 0) |
| 2216 | max-lookup (max max-lookup (length from)))) |
| 2217 | (setq val (aref table idx)) |
| 2218 | (if val |
| 2219 | (progn |
| 2220 | (or (consp val) |
| 2221 | (setq val (list (cons (vector idx) val)))) |
| 2222 | (if (characterp from) |
| 2223 | (setq from (vector from))) |
| 2224 | (setq val (nconc val (list (cons from to))))) |
| 2225 | (if (characterp from) |
| 2226 | (setq val to) |
| 2227 | (setq val (list (cons from to))))) |
| 2228 | (aset table idx val)) |
| 2229 | (set-char-table-extra-slot table 1 max-lookup)) |
| 2230 | (set-char-table-extra-slot (aref tables 0) 0 (aref tables 1)) |
| 2231 | (aref tables 0))) |
| 2232 | |
| 2233 | (defun define-translation-table (symbol &rest args) |
| 2234 | "Define SYMBOL as the name of translation table made by ARGS. |
| 2235 | This sets up information so that the table can be used for |
| 2236 | translations in a CCL program. |
| 2237 | |
| 2238 | If the first element of ARGS is a char-table whose purpose is |
| 2239 | `translation-table', just define SYMBOL to name it. (Note that this |
| 2240 | function does not bind SYMBOL.) |
| 2241 | |
| 2242 | Any other ARGS should be suitable as arguments of the function |
| 2243 | `make-translation-table' (which see). |
| 2244 | |
| 2245 | This function sets properties `translation-table' and |
| 2246 | `translation-table-id' of SYMBOL to the created table itself and the |
| 2247 | identification number of the table respectively. It also registers |
| 2248 | the table in `translation-table-vector'." |
| 2249 | (let ((table (if (and (char-table-p (car args)) |
| 2250 | (eq (char-table-subtype (car args)) |
| 2251 | 'translation-table)) |
| 2252 | (car args) |
| 2253 | (apply 'make-translation-table args))) |
| 2254 | (len (length translation-table-vector)) |
| 2255 | (id 0) |
| 2256 | (done nil)) |
| 2257 | (put symbol 'translation-table table) |
| 2258 | (while (not done) |
| 2259 | (if (>= id len) |
| 2260 | (setq translation-table-vector |
| 2261 | (vconcat translation-table-vector (make-vector len nil)))) |
| 2262 | (let ((slot (aref translation-table-vector id))) |
| 2263 | (if (or (not slot) |
| 2264 | (eq (car slot) symbol)) |
| 2265 | (progn |
| 2266 | (aset translation-table-vector id (cons symbol table)) |
| 2267 | (setq done t)) |
| 2268 | (setq id (1+ id))))) |
| 2269 | (put symbol 'translation-table-id id) |
| 2270 | id)) |
| 2271 | |
| 2272 | (defun translate-region (start end table) |
| 2273 | "From START to END, translate characters according to TABLE. |
| 2274 | TABLE is a string or a char-table. |
| 2275 | If TABLE is a string, the Nth character in it is the mapping |
| 2276 | for the character with code N. |
| 2277 | If TABLE is a char-table, the element for character N is the mapping |
| 2278 | for the character with code N. |
| 2279 | It returns the number of characters changed." |
| 2280 | (interactive |
| 2281 | (list (region-beginning) |
| 2282 | (region-end) |
| 2283 | (let (table l) |
| 2284 | (dotimes (i (length translation-table-vector)) |
| 2285 | (if (consp (aref translation-table-vector i)) |
| 2286 | (push (list (symbol-name |
| 2287 | (car (aref translation-table-vector i)))) l))) |
| 2288 | (if (not l) |
| 2289 | (error "No translation table defined")) |
| 2290 | (while (not table) |
| 2291 | (setq table (completing-read "Translation table: " l nil t))) |
| 2292 | (intern table)))) |
| 2293 | (if (symbolp table) |
| 2294 | (let ((val (get table 'translation-table))) |
| 2295 | (or (char-table-p val) |
| 2296 | (error "Invalid translation table name: %s" table)) |
| 2297 | (setq table val))) |
| 2298 | (translate-region-internal start end table)) |
| 2299 | |
| 2300 | (defmacro with-category-table (table &rest body) |
| 2301 | "Execute BODY like `progn' with TABLE the current category table. |
| 2302 | The category table of the current buffer is saved, BODY is evaluated, |
| 2303 | then the saved table is restored, even in case of an abnormal exit. |
| 2304 | Value is what BODY returns." |
| 2305 | (declare (indent 1) (debug t)) |
| 2306 | (let ((old-table (make-symbol "old-table")) |
| 2307 | (old-buffer (make-symbol "old-buffer"))) |
| 2308 | `(let ((,old-table (category-table)) |
| 2309 | (,old-buffer (current-buffer))) |
| 2310 | (unwind-protect |
| 2311 | (progn |
| 2312 | (set-category-table ,table) |
| 2313 | ,@body) |
| 2314 | (with-current-buffer ,old-buffer |
| 2315 | (set-category-table ,old-table)))))) |
| 2316 | |
| 2317 | (defun define-translation-hash-table (symbol table) |
| 2318 | "Define SYMBOL as the name of the hash translation TABLE for use in CCL. |
| 2319 | |
| 2320 | Analogous to `define-translation-table', but updates |
| 2321 | `translation-hash-table-vector' and the table is for use in the CCL |
| 2322 | `lookup-integer' and `lookup-character' functions." |
| 2323 | (unless (and (symbolp symbol) |
| 2324 | (hash-table-p table)) |
| 2325 | (error "Bad args to define-translation-hash-table")) |
| 2326 | (let ((len (length translation-hash-table-vector)) |
| 2327 | (id 0) |
| 2328 | done) |
| 2329 | (put symbol 'translation-hash-table table) |
| 2330 | (while (not done) |
| 2331 | (if (>= id len) |
| 2332 | (setq translation-hash-table-vector |
| 2333 | (vconcat translation-hash-table-vector [nil]))) |
| 2334 | (let ((slot (aref translation-hash-table-vector id))) |
| 2335 | (if (or (not slot) |
| 2336 | (eq (car slot) symbol)) |
| 2337 | (progn |
| 2338 | (aset translation-hash-table-vector id (cons symbol table)) |
| 2339 | (setq done t)) |
| 2340 | (setq id (1+ id))))) |
| 2341 | (put symbol 'translation-hash-table-id id) |
| 2342 | id)) |
| 2343 | |
| 2344 | ;;; Initialize some variables. |
| 2345 | |
| 2346 | (put 'use-default-ascent 'char-table-extra-slots 0) |
| 2347 | (setq use-default-ascent (make-char-table 'use-default-ascent)) |
| 2348 | (put 'ignore-relative-composition 'char-table-extra-slots 0) |
| 2349 | (setq ignore-relative-composition |
| 2350 | (make-char-table 'ignore-relative-composition)) |
| 2351 | |
| 2352 | (make-obsolete 'set-char-table-default |
| 2353 | "generic characters no longer exist." "23.1") |
| 2354 | |
| 2355 | ;;; Built-in auto-coding-functions: |
| 2356 | |
| 2357 | (defun sgml-xml-auto-coding-function (size) |
| 2358 | "Determine whether the buffer is XML, and if so, its encoding. |
| 2359 | This function is intended to be added to `auto-coding-functions'." |
| 2360 | (setq size (+ (point) size)) |
| 2361 | (when (re-search-forward "\\`[[:space:]\n]*<\\?xml" size t) |
| 2362 | (let ((end (save-excursion |
| 2363 | ;; This is a hack. |
| 2364 | (re-search-forward "[\"']\\s-*\\?>" size t)))) |
| 2365 | (when end |
| 2366 | (if (re-search-forward "encoding=[\"']\\(.+?\\)[\"']" end t) |
| 2367 | (let* ((match (match-string 1)) |
| 2368 | (sym (intern (downcase match)))) |
| 2369 | (if (coding-system-p sym) |
| 2370 | sym |
| 2371 | (message "Warning: unknown coding system \"%s\"" match) |
| 2372 | nil)) |
| 2373 | ;; Files without an encoding tag should be UTF-8. But users |
| 2374 | ;; may be naive about encodings, and have saved the file from |
| 2375 | ;; another editor that does not help them get the encoding right. |
| 2376 | ;; Detect the encoding and warn the user if it is detected as |
| 2377 | ;; something other than UTF-8. |
| 2378 | (let ((detected |
| 2379 | (with-coding-priority '(utf-8) |
| 2380 | (coding-system-base |
| 2381 | (detect-coding-region (point-min) size t))))) |
| 2382 | ;; Pure ASCII always comes back as undecided. |
| 2383 | (if (memq detected '(utf-8 undecided)) |
| 2384 | 'utf-8 |
| 2385 | (warn "File contents detected as %s. |
| 2386 | Consider adding an encoding attribute to the xml declaration, |
| 2387 | or saving as utf-8, as mandated by the xml specification." detected) |
| 2388 | detected))))))) |
| 2389 | |
| 2390 | (defun sgml-html-meta-auto-coding-function (size) |
| 2391 | "If the buffer has an HTML meta tag, use it to determine encoding. |
| 2392 | This function is intended to be added to `auto-coding-functions'." |
| 2393 | (let ((case-fold-search t)) |
| 2394 | (setq size (min (+ (point) size) |
| 2395 | (save-excursion |
| 2396 | ;; Limit the search by the end of the HTML header. |
| 2397 | (or (search-forward "</head>" (+ (point) size) t) |
| 2398 | ;; In case of no header, search only 10 lines. |
| 2399 | (forward-line 10)) |
| 2400 | (point)))) |
| 2401 | ;; Make sure that the buffer really contains an HTML document, by |
| 2402 | ;; checking that it starts with a doctype or a <HTML> start tag |
| 2403 | ;; (allowing for whitespace at bob). Note: 'DOCTYPE NETSCAPE' is |
| 2404 | ;; useful for Mozilla bookmark files. |
| 2405 | (when (and (re-search-forward "\\`[[:space:]\n]*\\(<!doctype[[:space:]\n]+\\(html\\|netscape\\)\\|<html\\)" size t) |
| 2406 | (re-search-forward "<meta\\s-+\\(http-equiv=[\"']?content-type[\"']?\\s-+content=[\"']text/\\sw+;\\s-*\\)?charset=[\"']?\\(.+?\\)[\"'\\s-/>]" size t)) |
| 2407 | (let* ((match (match-string 2)) |
| 2408 | (sym (intern (downcase match)))) |
| 2409 | (if (coding-system-p sym) |
| 2410 | sym |
| 2411 | (message "Warning: unknown coding system \"%s\"" match) |
| 2412 | nil))))) |
| 2413 | |
| 2414 | (defun xml-find-file-coding-system (args) |
| 2415 | "Determine the coding system of an XML file without a declaration. |
| 2416 | Strictly speaking, the file should be utf-8, but mistakes are |
| 2417 | made, and there are genuine cases where XML fragments are saved, |
| 2418 | with the encoding properly specified in a master document, or |
| 2419 | added by processing software." |
| 2420 | (if (eq (car args) 'insert-file-contents) |
| 2421 | (let ((detected |
| 2422 | (with-coding-priority '(utf-8) |
| 2423 | (coding-system-base |
| 2424 | (detect-coding-region (point-min) (point-max) t))))) |
| 2425 | ;; Pure ASCII always comes back as undecided. |
| 2426 | (cond |
| 2427 | ((memq detected '(utf-8 undecided)) |
| 2428 | 'utf-8) |
| 2429 | ((eq detected 'utf-16le-with-signature) 'utf-16le-with-signature) |
| 2430 | ((eq detected 'utf-16be-with-signature) 'utf-16be-with-signature) |
| 2431 | (t |
| 2432 | (warn "File contents detected as %s. |
| 2433 | Consider adding an xml declaration with the encoding specified, |
| 2434 | or saving as utf-8, as mandated by the xml specification." detected) |
| 2435 | detected))) |
| 2436 | ;; Don't interfere with the user's wishes for saving the buffer. |
| 2437 | ;; We did what we could when the buffer was created to ensure the |
| 2438 | ;; correct encoding was used, or the user was warned, so any |
| 2439 | ;; non-conformity here is deliberate on the part of the user. |
| 2440 | 'undecided)) |
| 2441 | |
| 2442 | ;;; |
| 2443 | (provide 'mule) |
| 2444 | |
| 2445 | ;;; mule.el ends here |