Commit | Line | Data |
---|---|---|
c38e0c97 | 1 | ;;; po.el --- basic support of PO translation files -*- coding: utf-8; -*- |
f6e28751 | 2 | |
ba318903 | 3 | ;; Copyright (C) 1995-1998, 2000-2014 Free Software Foundation, Inc. |
f6e28751 | 4 | |
c38e0c97 | 5 | ;; Authors: François Pinard <pinard@iro.umontreal.ca>, |
f6e28751 EZ |
6 | ;; Greg McGary <gkm@magilla.cichlid.com>, |
7 | ;; Bruno Haible <bruno@clisp.org>. | |
8 | ;; Keywords: i18n, files | |
9 | ||
10 | ;; This file is part of GNU Emacs. | |
11 | ||
1fecc8fe | 12 | ;; GNU Emacs is free software: you can redistribute it and/or modify |
f6e28751 | 13 | ;; it under the terms of the GNU General Public License as published by |
1fecc8fe GM |
14 | ;; the Free Software Foundation, either version 3 of the License, or |
15 | ;; (at your option) any later version. | |
f6e28751 EZ |
16 | |
17 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
18 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
20 | ;; GNU General Public License for more details. | |
21 | ||
22 | ;; You should have received a copy of the GNU General Public License | |
1fecc8fe | 23 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
f6e28751 EZ |
24 | |
25 | ;;; Commentary: | |
26 | ||
27 | ;; This package makes sure visiting PO files decodes them correctly, | |
28 | ;; according to the Charset= header in the PO file. For more support | |
29 | ;; for editing PO files, see po-mode.el. | |
30 | ||
31 | ;;; Code: | |
32 | ||
f6e28751 | 33 | (defconst po-content-type-charset-alist |
3180897c | 34 | '(("ASCII" . undecided) |
efe7ebe3 | 35 | ("ANSI_X3.4-1968" . undecided) |
3180897c DL |
36 | ("US-ASCII" . undecided)) |
37 | "Alist of coding system versus GNU libc/libiconv canonical charset name. | |
38 | Contains canonical charset names that don't correspond to coding systems.") | |
f6e28751 EZ |
39 | |
40 | (defun po-find-charset (filename) | |
eb286ca8 | 41 | "Return PO charset value for FILENAME. |
b3658160 | 42 | If FILENAME is a cons cell, its CDR is a buffer that already contains |
eb286ca8 | 43 | the PO file (but not yet decoded)." |
f6e28751 | 44 | (let ((charset-regexp |
1f4dbcd9 | 45 | "^\"Content-Type:[ \t]*text/plain;[ \t]*charset=\\(.*\\)\\\\n\"") |
eb286ca8 | 46 | (buf (and (consp filename) (cdr filename))) |
f6e28751 | 47 | (short-read nil)) |
eb286ca8 KH |
48 | (when buf |
49 | (set-buffer buf) | |
50 | (goto-char (point-min))) | |
f6e28751 EZ |
51 | ;; Try the first 4096 bytes. In case we cannot find the charset value |
52 | ;; within the first 4096 bytes (the PO file might start with a long | |
53 | ;; comment) try the next 4096 bytes repeatedly until we'll know for sure | |
54 | ;; we've checked the empty header entry entirely. | |
eb286ca8 | 55 | (while (not (or short-read (re-search-forward "^msgid" nil t) buf)) |
f6e28751 EZ |
56 | (save-excursion |
57 | (goto-char (point-max)) | |
58 | (let ((pair (insert-file-contents-literally filename nil | |
59 | (1- (point)) | |
60 | (1- (+ (point) 4096))))) | |
61 | (setq short-read (< (nth 1 pair) 4096))))) | |
0cd9a1a9 | 62 | (cond ((re-search-forward charset-regexp nil t) (match-string 1)) |
eb286ca8 | 63 | ((or short-read buf) nil) |
f6e28751 EZ |
64 | ;; We've found the first msgid; maybe, only a part of the msgstr |
65 | ;; value was loaded. Load the next 1024 bytes; if charset still | |
66 | ;; isn't available, give up. | |
67 | (t (save-excursion | |
68 | (goto-char (point-max)) | |
69 | (insert-file-contents-literally filename nil | |
70 | (1- (point)) | |
71 | (1- (+ (point) 1024)))) | |
72 | (if (re-search-forward charset-regexp nil t) | |
73 | (match-string 1)))))) | |
74 | ||
75 | (defun po-find-file-coding-system-guts (operation filename) | |
3180897c | 76 | "Return a (DECODING . ENCODING) pair for OPERATION on PO file FILENAME. |
eb286ca8 KH |
77 | Do so according to FILENAME's declared charset. |
78 | FILENAME may be a cons (NAME . BUFFER). In that case, detect charset | |
79 | in BUFFER." | |
3180897c DL |
80 | (and |
81 | (eq operation 'insert-file-contents) | |
eb286ca8 KH |
82 | (or (if (consp filename) (buffer-live-p (cdr filename))) |
83 | (file-exists-p filename)) | |
3180897c DL |
84 | (with-temp-buffer |
85 | (let* ((coding-system-for-read 'no-conversion) | |
86 | (charset (or (po-find-charset filename) "ascii")) | |
87 | assoc) | |
88 | (list (cond | |
89 | ((setq assoc | |
5d10503f JB |
90 | (assoc-string charset |
91 | po-content-type-charset-alist | |
92 | t)) | |
3180897c | 93 | (cdr assoc)) |
5d10503f | 94 | ((or (setq assoc (assoc-string charset coding-system-alist t)) |
3180897c | 95 | (setq assoc |
5d10503f JB |
96 | (assoc-string (subst-char-in-string ?_ ?- |
97 | charset) | |
98 | coding-system-alist t))) | |
3180897c DL |
99 | (intern (car assoc))) |
100 | ;; In principle we should also check the `mime-charset' | |
101 | ;; property of everything in the base coding system | |
102 | ;; list, but there should always be a coding system | |
103 | ;; corresponding to the MIME name. | |
104 | ((featurep 'code-pages) | |
105 | ;; Give up. | |
106 | 'raw-text) | |
107 | (t | |
108 | ;; Try again with code-pages loaded. Maybe it's best | |
109 | ;; to require it initially? | |
110 | (require 'code-pages nil t) | |
111 | (if (or | |
5d10503f JB |
112 | (setq assoc (assoc-string charset coding-system-alist t)) |
113 | (setq assoc (assoc-string (subst-char-in-string | |
114 | ?_ ?- charset) | |
115 | coding-system-alist t))) | |
3180897c DL |
116 | (intern (car assoc)) |
117 | 'raw-text)))))))) | |
f6e28751 EZ |
118 | |
119 | ;;;###autoload | |
120 | (defun po-find-file-coding-system (arg-list) | |
3180897c DL |
121 | "Return a (DECODING . ENCODING) pair, according to PO file's charset. |
122 | Called through `file-coding-system-alist', before the file is visited for real." | |
f6e28751 EZ |
123 | (po-find-file-coding-system-guts (car arg-list) (car (cdr arg-list)))) |
124 | ;; This is for XEmacs. | |
125 | ;(defun po-find-file-coding-system (operation filename) | |
126 | ; "\ | |
127 | ;Return a Mule (DECODING . ENCODING) pair, according to PO file charset. | |
128 | ;Called through file-coding-system-alist, before the file is visited for real." | |
129 | ; (po-find-file-coding-system-guts operation filename)) | |
3180897c DL |
130 | |
131 | (provide 'po) | |
132 | ||
133 | ;;; po.el ends here |