| 1 | ;;; latexenc.el --- guess correct coding system in LaTeX files -*-coding: iso-2022-7bit -*- |
| 2 | |
| 3 | ;; Copyright (C) 2005, 2006, 2007, 2008 Free Software Foundation, Inc. |
| 4 | |
| 5 | ;; Author: Arne J\e,Ax\e(Brgensen <arne@arnested.dk> |
| 6 | ;; Keywords: mule, coding system, latex |
| 7 | |
| 8 | ;; This file is part of GNU Emacs. |
| 9 | |
| 10 | ;; GNU Emacs is free software: you can redistribute it and/or modify |
| 11 | ;; it under the terms of the GNU General Public License as published by |
| 12 | ;; the Free Software Foundation, either version 3 of the License, or |
| 13 | ;; (at your option) any later version. |
| 14 | |
| 15 | ;; GNU Emacs is distributed in the hope that it will be useful, |
| 16 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 17 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 18 | ;; GNU General Public License for more details. |
| 19 | |
| 20 | ;; You should have received a copy of the GNU General Public License |
| 21 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
| 22 | |
| 23 | ;;; Commentary: |
| 24 | |
| 25 | ;; This code tries to guess the correct coding system of a LaTeX file. |
| 26 | |
| 27 | ;; First it searches for a \inputencoding{...} or |
| 28 | ;; \usepackage[...]{inputenc} line in the file and looks up the ... in |
| 29 | ;; `latex-inputenc-coding-alist' to find the corresponding coding |
| 30 | ;; system. |
| 31 | |
| 32 | ;; If this fails it will search for AUCTeX's TeX-master or tex-mode's |
| 33 | ;; tex-main-file variable in the local variables section and visit |
| 34 | ;; that file to get the coding system from the master file. This check |
| 35 | ;; can be disabled by setting `latexenc-dont-use-TeX-master-flag' to |
| 36 | ;; t. |
| 37 | |
| 38 | ;; If we have still not found a coding system we will try to use the |
| 39 | ;; standard tex-mode's `tex-guess-main-file' and get the coding system |
| 40 | ;; from the main file. This check can be disabled by setting |
| 41 | ;; `latexenc-dont-use-tex-guess-main-file-flag' to t. |
| 42 | |
| 43 | ;; The functionality is enabled by adding the function |
| 44 | ;; `latexenc-find-file-coding-system' to `file-coding-system-alist' |
| 45 | ;; like this |
| 46 | |
| 47 | ;; (add-to-list 'file-coding-system-alist |
| 48 | ;; '("\\.\\(tex\\|ltx\\|dtx\\|drv\\)\\'" . latexenc-find-file-coding-system)) |
| 49 | |
| 50 | ;;; Code: |
| 51 | |
| 52 | ;;;###autoload |
| 53 | (defcustom latex-inputenc-coding-alist |
| 54 | '(("ansinew" . windows-1252) ; MS Windows ANSI encoding, extension of Latin-1 |
| 55 | ("applemac" . mac-roman) |
| 56 | ("ascii" . us-ascii) |
| 57 | ("cp1250" . windows-1250) ; MS Windows encoding, codepage 1250 |
| 58 | ("cp1252" . windows-1252) ; synonym of ansinew |
| 59 | ("cp1257" . cp1257) |
| 60 | ("cp437de" . cp437) ; IBM code page 437 (German version): 225 is \ss |
| 61 | ("cp437" . cp437) ; IBM code page 437: 225 is \beta |
| 62 | ("cp850" . cp850) ; IBM code page 850 |
| 63 | ("cp852" . cp852) ; IBM code page 852 |
| 64 | ("cp858" . cp858) ; IBM code page 850 but with a euro symbol |
| 65 | ("cp865" . cp865) ; IBM code page 865 |
| 66 | ("latin1" . iso-8859-1) |
| 67 | ("latin2" . iso-8859-2) |
| 68 | ("latin3" . iso-8859-3) |
| 69 | ("latin4" . iso-8859-4) |
| 70 | ("latin5" . iso-8859-5) |
| 71 | ("latin9" . iso-8859-15) |
| 72 | ;; ("latin10" . undecided) |
| 73 | ;; ("macce" . undecided) ; Apple Central European |
| 74 | ("next" . next) ; The Next encoding |
| 75 | ("utf8" . utf-8) |
| 76 | ("utf8x" . utf-8)) ; used by the Unicode LaTeX package |
| 77 | "Mapping from LaTeX encodings in \"inputenc.sty\" to Emacs coding systems. |
| 78 | LaTeX encodings are specified with \"\\usepackage[encoding]{inputenc}\". |
| 79 | Used by the function `latexenc-find-file-coding-system'." |
| 80 | :group 'files |
| 81 | :group 'mule |
| 82 | :type '(alist :key-type (string :tag "LaTeX input encoding") |
| 83 | :value-type (coding-system :tag "Coding system"))) |
| 84 | |
| 85 | ;;;###autoload |
| 86 | (defun latexenc-inputenc-to-coding-system (inputenc) |
| 87 | "Return the corresponding coding-system for the specified input encoding. |
| 88 | Return nil if no matching coding system can be found." |
| 89 | (cdr (assoc inputenc latex-inputenc-coding-alist))) |
| 90 | |
| 91 | ;;;###autoload |
| 92 | (defun latexenc-coding-system-to-inputenc (cs) |
| 93 | "Return the corresponding input encoding for the specified coding system. |
| 94 | Return nil if no matching input encoding can be found." |
| 95 | (let (result) |
| 96 | (catch 'result |
| 97 | (dolist (elem latex-inputenc-coding-alist result) |
| 98 | (let ((elem-cs (cdr elem))) |
| 99 | (when (and (coding-system-p elem-cs) |
| 100 | (coding-system-p cs) |
| 101 | (eq (coding-system-base cs) (coding-system-base elem-cs))) |
| 102 | (setq result (car elem)) |
| 103 | (throw 'result result))))))) |
| 104 | |
| 105 | (defvar latexenc-dont-use-TeX-master-flag nil |
| 106 | "Non-nil means don't follow TeX-master to find the coding system.") |
| 107 | |
| 108 | (defvar latexenc-dont-use-tex-guess-main-file-flag nil |
| 109 | "Non-nil means don't use tex-guessmain-file to find the coding system.") |
| 110 | |
| 111 | ;;;###autoload |
| 112 | (defun latexenc-find-file-coding-system (arg-list) |
| 113 | "Determine the coding system of a LaTeX file if it uses \"inputenc.sty\". |
| 114 | The mapping from LaTeX's \"inputenc.sty\" encoding names to Emacs |
| 115 | coding system names is determined from `latex-inputenc-coding-alist'." |
| 116 | (if (eq (car arg-list) 'insert-file-contents) |
| 117 | (save-excursion |
| 118 | ;; try to find the coding system in this file |
| 119 | (goto-char (point-min)) |
| 120 | (if (catch 'cs |
| 121 | (let ((case-fold-search nil)) |
| 122 | (while (search-forward "inputenc" nil t) |
| 123 | (goto-char (match-beginning 0)) |
| 124 | (beginning-of-line) |
| 125 | (if (or (looking-at "[^%\n]*\\\\usepackage\\[\\([^]]*\\)\\]{\\([^}]*,\\)?inputenc\\(,[^}]*\\)?}") |
| 126 | (looking-at "[^%\n]*\\\\inputencoding{\\([^}]*\\)}")) |
| 127 | (throw 'cs t) |
| 128 | (goto-char (match-end 0)))))) |
| 129 | (let* ((match (match-string 1)) |
| 130 | (sym (or (latexenc-inputenc-to-coding-system match) |
| 131 | (intern match)))) |
| 132 | (cond |
| 133 | ((coding-system-p sym) sym) |
| 134 | ((and (require 'code-pages nil t) (coding-system-p sym)) sym) |
| 135 | (t 'undecided))) |
| 136 | ;; else try to find it in the master/main file |
| 137 | |
| 138 | ;; Fixme: If the current file is in an archive (e.g. tar, |
| 139 | ;; zip), we should find the master file in that archive. |
| 140 | ;; But, that is not yet implemented. -- K.Handa |
| 141 | (let ((default-directory (if (stringp (nth 1 arg-list)) |
| 142 | (file-name-directory (nth 1 arg-list)) |
| 143 | default-directory)) |
| 144 | latexenc-main-file) |
| 145 | ;; Is there a TeX-master or tex-main-file in the local variables |
| 146 | ;; section? |
| 147 | (unless latexenc-dont-use-TeX-master-flag |
| 148 | (goto-char (point-max)) |
| 149 | (search-backward "\n\^L" (max (- (point-max) 3000) (point-min)) |
| 150 | 'move) |
| 151 | (search-forward "Local Variables:" nil t) |
| 152 | (when (re-search-forward |
| 153 | "^%+ *\\(TeX-master\\|tex-main-file\\): *\"\\(.+\\)\"" |
| 154 | nil t) |
| 155 | (let ((file (match-string 2))) |
| 156 | (dolist (ext `("" ,(if (boundp 'TeX-default-extension) |
| 157 | (concat "." TeX-default-extension) |
| 158 | "") |
| 159 | ".tex" ".ltx" ".dtx" ".drv")) |
| 160 | (if (and (null latexenc-main-file) ;Stop at first. |
| 161 | (file-exists-p (concat file ext))) |
| 162 | (setq latexenc-main-file (concat file ext))))))) |
| 163 | ;; try tex-modes tex-guess-main-file |
| 164 | (when (and (not latexenc-dont-use-tex-guess-main-file-flag) |
| 165 | (not latexenc-main-file)) |
| 166 | ;; Use a separate `when' so the byte-compiler sees the fboundp. |
| 167 | (when (fboundp 'tex-guess-main-file) |
| 168 | (let ((tex-start-of-header "\\\\document\\(style\\|class\\)")) |
| 169 | (setq latexenc-main-file (tex-guess-main-file))))) |
| 170 | ;; if we found a master/main file get the coding system from it |
| 171 | (if (and latexenc-main-file |
| 172 | (file-regular-p latexenc-main-file) |
| 173 | (file-readable-p latexenc-main-file)) |
| 174 | (let* ((latexenc-dont-use-tex-guess-main-file-flag t) |
| 175 | (latexenc-dont-use-TeX-master-flag t) |
| 176 | (latexenc-main-buffer |
| 177 | (find-file-noselect latexenc-main-file t))) |
| 178 | (coding-system-base ;Disregard the EOL part of the CS. |
| 179 | (with-current-buffer latexenc-main-buffer |
| 180 | (or coding-system-for-write buffer-file-coding-system |
| 181 | 'undecided)))) |
| 182 | 'undecided)))) |
| 183 | 'undecided)) |
| 184 | |
| 185 | \f |
| 186 | (provide 'latexenc) |
| 187 | |
| 188 | ;; arch-tag: f971bc3e-1fec-4609-8f2f-73dd41ab22e1 |
| 189 | ;;; latexenc.el ends here |