Commit | Line | Data |
---|---|---|
c80977a1 | 1 | ;;; latexenc.el --- guess correct coding system in LaTeX files -*-coding: iso-2022-7bit -*- |
18acc655 | 2 | |
114f9c96 | 3 | ;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc. |
18acc655 TTN |
4 | |
5 | ;; Author: Arne J\e,Ax\e(Brgensen <arne@arnested.dk> | |
6 | ;; Keywords: mule, coding system, latex | |
7 | ||
8 | ;; This file is part of GNU Emacs. | |
9 | ||
4936186e | 10 | ;; GNU Emacs is free software: you can redistribute it and/or modify |
18acc655 | 11 | ;; it under the terms of the GNU General Public License as published by |
4936186e GM |
12 | ;; the Free Software Foundation, either version 3 of the License, or |
13 | ;; (at your option) any later version. | |
18acc655 TTN |
14 | |
15 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
16 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
18 | ;; GNU General Public License for more details. | |
19 | ||
20 | ;; You should have received a copy of the GNU General Public License | |
4936186e | 21 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
18acc655 TTN |
22 | |
23 | ;;; Commentary: | |
24 | ||
25 | ;; This code tries to guess the correct coding system of a LaTeX file. | |
26 | ||
27 | ;; First it searches for a \inputencoding{...} or | |
28 | ;; \usepackage[...]{inputenc} line in the file and looks up the ... in | |
29 | ;; `latex-inputenc-coding-alist' to find the corresponding coding | |
30 | ;; system. | |
31 | ||
32 | ;; If this fails it will search for AUCTeX's TeX-master or tex-mode's | |
33 | ;; tex-main-file variable in the local variables section and visit | |
34 | ;; that file to get the coding system from the master file. This check | |
35 | ;; can be disabled by setting `latexenc-dont-use-TeX-master-flag' to | |
36 | ;; t. | |
37 | ||
38 | ;; If we have still not found a coding system we will try to use the | |
39 | ;; standard tex-mode's `tex-guess-main-file' and get the coding system | |
40 | ;; from the main file. This check can be disabled by setting | |
41 | ;; `latexenc-dont-use-tex-guess-main-file-flag' to t. | |
42 | ||
43 | ;; The functionality is enabled by adding the function | |
44 | ;; `latexenc-find-file-coding-system' to `file-coding-system-alist' | |
45 | ;; like this | |
46 | ||
47 | ;; (add-to-list 'file-coding-system-alist | |
a15d6d35 | 48 | ;; '("\\.\\(tex\\|ltx\\|dtx\\|drv\\)\\'" . latexenc-find-file-coding-system)) |
18acc655 TTN |
49 | |
50 | ;;; Code: | |
51 | ||
52 | ;;;###autoload | |
53 | (defcustom latex-inputenc-coding-alist | |
1e8780b1 | 54 | (purecopy |
18acc655 TTN |
55 | '(("ansinew" . windows-1252) ; MS Windows ANSI encoding, extension of Latin-1 |
56 | ("applemac" . mac-roman) | |
57 | ("ascii" . us-ascii) | |
58 | ("cp1250" . windows-1250) ; MS Windows encoding, codepage 1250 | |
59 | ("cp1252" . windows-1252) ; synonym of ansinew | |
60 | ("cp1257" . cp1257) | |
61 | ("cp437de" . cp437) ; IBM code page 437 (German version): 225 is \ss | |
62 | ("cp437" . cp437) ; IBM code page 437: 225 is \beta | |
63 | ("cp850" . cp850) ; IBM code page 850 | |
64 | ("cp852" . cp852) ; IBM code page 852 | |
656065bd | 65 | ("cp858" . cp858) ; IBM code page 850 but with a euro symbol |
18acc655 | 66 | ("cp865" . cp865) ; IBM code page 865 |
18acc655 TTN |
67 | ("latin1" . iso-8859-1) |
68 | ("latin2" . iso-8859-2) | |
69 | ("latin3" . iso-8859-3) | |
70 | ("latin4" . iso-8859-4) | |
71 | ("latin5" . iso-8859-5) | |
72 | ("latin9" . iso-8859-15) | |
73 | ;; ("latin10" . undecided) | |
74 | ;; ("macce" . undecided) ; Apple Central European | |
75 | ("next" . next) ; The Next encoding | |
76 | ("utf8" . utf-8) | |
1e8780b1 | 77 | ("utf8x" . utf-8))) ; used by the Unicode LaTeX package |
eac5c1f6 | 78 | "Mapping from LaTeX encodings in \"inputenc.sty\" to Emacs coding systems. |
06e2cc3d | 79 | LaTeX encodings are specified with \"\\usepackage[encoding]{inputenc}\". |
18acc655 TTN |
80 | Used by the function `latexenc-find-file-coding-system'." |
81 | :group 'files | |
82 | :group 'mule | |
83 | :type '(alist :key-type (string :tag "LaTeX input encoding") | |
84 | :value-type (coding-system :tag "Coding system"))) | |
85 | ||
86 | ;;;###autoload | |
87 | (defun latexenc-inputenc-to-coding-system (inputenc) | |
88 | "Return the corresponding coding-system for the specified input encoding. | |
89 | Return nil if no matching coding system can be found." | |
90 | (cdr (assoc inputenc latex-inputenc-coding-alist))) | |
91 | ||
92 | ;;;###autoload | |
93 | (defun latexenc-coding-system-to-inputenc (cs) | |
94 | "Return the corresponding input encoding for the specified coding system. | |
95 | Return nil if no matching input encoding can be found." | |
96 | (let (result) | |
97 | (catch 'result | |
98 | (dolist (elem latex-inputenc-coding-alist result) | |
99 | (let ((elem-cs (cdr elem))) | |
100 | (when (and (coding-system-p elem-cs) | |
101 | (coding-system-p cs) | |
102 | (eq (coding-system-base cs) (coding-system-base elem-cs))) | |
103 | (setq result (car elem)) | |
104 | (throw 'result result))))))) | |
105 | ||
106 | (defvar latexenc-dont-use-TeX-master-flag nil | |
107 | "Non-nil means don't follow TeX-master to find the coding system.") | |
108 | ||
109 | (defvar latexenc-dont-use-tex-guess-main-file-flag nil | |
110 | "Non-nil means don't use tex-guessmain-file to find the coding system.") | |
111 | ||
112 | ;;;###autoload | |
113 | (defun latexenc-find-file-coding-system (arg-list) | |
114 | "Determine the coding system of a LaTeX file if it uses \"inputenc.sty\". | |
115 | The mapping from LaTeX's \"inputenc.sty\" encoding names to Emacs | |
116 | coding system names is determined from `latex-inputenc-coding-alist'." | |
117 | (if (eq (car arg-list) 'insert-file-contents) | |
118 | (save-excursion | |
119 | ;; try to find the coding system in this file | |
120 | (goto-char (point-min)) | |
4b0fcb5d LK |
121 | (if (catch 'cs |
122 | (let ((case-fold-search nil)) | |
123 | (while (search-forward "inputenc" nil t) | |
124 | (goto-char (match-beginning 0)) | |
125 | (beginning-of-line) | |
126 | (if (or (looking-at "[^%\n]*\\\\usepackage\\[\\([^]]*\\)\\]{\\([^}]*,\\)?inputenc\\(,[^}]*\\)?}") | |
127 | (looking-at "[^%\n]*\\\\inputencoding{\\([^}]*\\)}")) | |
128 | (throw 'cs t) | |
129 | (goto-char (match-end 0)))))) | |
130 | (let* ((match (match-string 1)) | |
14cb9d7b SM |
131 | (sym (or (latexenc-inputenc-to-coding-system match) |
132 | (intern match)))) | |
133 | (cond | |
134 | ((coding-system-p sym) sym) | |
135 | ((and (require 'code-pages nil t) (coding-system-p sym)) sym) | |
136 | (t 'undecided))) | |
18acc655 | 137 | ;; else try to find it in the master/main file |
dccee2cb KH |
138 | |
139 | ;; Fixme: If the current file is in an archive (e.g. tar, | |
140 | ;; zip), we should find the master file in that archive. | |
141 | ;; But, that is not yet implemented. -- K.Handa | |
142 | (let ((default-directory (if (stringp (nth 1 arg-list)) | |
143 | (file-name-directory (nth 1 arg-list)) | |
144 | default-directory)) | |
145 | latexenc-main-file) | |
14cb9d7b SM |
146 | ;; Is there a TeX-master or tex-main-file in the local variables |
147 | ;; section? | |
18acc655 TTN |
148 | (unless latexenc-dont-use-TeX-master-flag |
149 | (goto-char (point-max)) | |
14cb9d7b SM |
150 | (search-backward "\n\^L" (max (- (point-max) 3000) (point-min)) |
151 | 'move) | |
4b0fcb5d | 152 | (search-forward "Local Variables:" nil t) |
14cb9d7b SM |
153 | (when (re-search-forward |
154 | "^%+ *\\(TeX-master\\|tex-main-file\\): *\"\\(.+\\)\"" | |
155 | nil t) | |
156 | (let ((file (match-string 2))) | |
157 | (dolist (ext `("" ,(if (boundp 'TeX-default-extension) | |
158 | (concat "." TeX-default-extension) | |
159 | "") | |
160 | ".tex" ".ltx" ".dtx" ".drv")) | |
161 | (if (and (null latexenc-main-file) ;Stop at first. | |
a15df48e | 162 | (file-exists-p (concat file ext))) |
14cb9d7b | 163 | (setq latexenc-main-file (concat file ext))))))) |
18acc655 TTN |
164 | ;; try tex-modes tex-guess-main-file |
165 | (when (and (not latexenc-dont-use-tex-guess-main-file-flag) | |
ed975fa6 SM |
166 | (not latexenc-main-file)) |
167 | ;; Use a separate `when' so the byte-compiler sees the fboundp. | |
168 | (when (fboundp 'tex-guess-main-file) | |
169 | (let ((tex-start-of-header "\\\\document\\(style\\|class\\)")) | |
170 | (setq latexenc-main-file (tex-guess-main-file))))) | |
18acc655 TTN |
171 | ;; if we found a master/main file get the coding system from it |
172 | (if (and latexenc-main-file | |
96a6ec17 | 173 | (file-regular-p latexenc-main-file) |
18acc655 TTN |
174 | (file-readable-p latexenc-main-file)) |
175 | (let* ((latexenc-dont-use-tex-guess-main-file-flag t) | |
176 | (latexenc-dont-use-TeX-master-flag t) | |
14cb9d7b SM |
177 | (latexenc-main-buffer |
178 | (find-file-noselect latexenc-main-file t))) | |
179 | (coding-system-base ;Disregard the EOL part of the CS. | |
180 | (with-current-buffer latexenc-main-buffer | |
d0b22d7d EZ |
181 | (or coding-system-for-write buffer-file-coding-system |
182 | 'undecided)))) | |
18acc655 TTN |
183 | 'undecided)))) |
184 | 'undecided)) | |
185 | ||
cc211a0f | 186 | \f |
18acc655 TTN |
187 | (provide 'latexenc) |
188 | ||
2183c76d | 189 | ;; arch-tag: f971bc3e-1fec-4609-8f2f-73dd41ab22e1 |
18acc655 | 190 | ;;; latexenc.el ends here |