scheme interaction mode
[bpt/emacs.git] / lisp / textmodes / po.el
CommitLineData
c38e0c97 1;;; po.el --- basic support of PO translation files -*- coding: utf-8; -*-
f6e28751 2
ba318903 3;; Copyright (C) 1995-1998, 2000-2014 Free Software Foundation, Inc.
f6e28751 4
c38e0c97 5;; Authors: François Pinard <pinard@iro.umontreal.ca>,
f6e28751
EZ
6;; Greg McGary <gkm@magilla.cichlid.com>,
7;; Bruno Haible <bruno@clisp.org>.
8;; Keywords: i18n, files
9
10;; This file is part of GNU Emacs.
11
1fecc8fe 12;; GNU Emacs is free software: you can redistribute it and/or modify
f6e28751 13;; it under the terms of the GNU General Public License as published by
1fecc8fe
GM
14;; the Free Software Foundation, either version 3 of the License, or
15;; (at your option) any later version.
f6e28751
EZ
16
17;; GNU Emacs is distributed in the hope that it will be useful,
18;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20;; GNU General Public License for more details.
21
22;; You should have received a copy of the GNU General Public License
1fecc8fe 23;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
f6e28751
EZ
24
25;;; Commentary:
26
27;; This package makes sure visiting PO files decodes them correctly,
28;; according to the Charset= header in the PO file. For more support
29;; for editing PO files, see po-mode.el.
30
31;;; Code:
32
f6e28751 33(defconst po-content-type-charset-alist
3180897c 34 '(("ASCII" . undecided)
efe7ebe3 35 ("ANSI_X3.4-1968" . undecided)
3180897c
DL
36 ("US-ASCII" . undecided))
37 "Alist of coding system versus GNU libc/libiconv canonical charset name.
38Contains canonical charset names that don't correspond to coding systems.")
f6e28751
EZ
39
40(defun po-find-charset (filename)
eb286ca8 41 "Return PO charset value for FILENAME.
b3658160 42If FILENAME is a cons cell, its CDR is a buffer that already contains
eb286ca8 43the PO file (but not yet decoded)."
f6e28751 44 (let ((charset-regexp
1f4dbcd9 45 "^\"Content-Type:[ \t]*text/plain;[ \t]*charset=\\(.*\\)\\\\n\"")
eb286ca8 46 (buf (and (consp filename) (cdr filename)))
f6e28751 47 (short-read nil))
eb286ca8
KH
48 (when buf
49 (set-buffer buf)
50 (goto-char (point-min)))
f6e28751
EZ
51 ;; Try the first 4096 bytes. In case we cannot find the charset value
52 ;; within the first 4096 bytes (the PO file might start with a long
53 ;; comment) try the next 4096 bytes repeatedly until we'll know for sure
54 ;; we've checked the empty header entry entirely.
eb286ca8 55 (while (not (or short-read (re-search-forward "^msgid" nil t) buf))
f6e28751
EZ
56 (save-excursion
57 (goto-char (point-max))
58 (let ((pair (insert-file-contents-literally filename nil
59 (1- (point))
60 (1- (+ (point) 4096)))))
61 (setq short-read (< (nth 1 pair) 4096)))))
0cd9a1a9 62 (cond ((re-search-forward charset-regexp nil t) (match-string 1))
eb286ca8 63 ((or short-read buf) nil)
f6e28751
EZ
64 ;; We've found the first msgid; maybe, only a part of the msgstr
65 ;; value was loaded. Load the next 1024 bytes; if charset still
66 ;; isn't available, give up.
67 (t (save-excursion
68 (goto-char (point-max))
69 (insert-file-contents-literally filename nil
70 (1- (point))
71 (1- (+ (point) 1024))))
72 (if (re-search-forward charset-regexp nil t)
73 (match-string 1))))))
74
75(defun po-find-file-coding-system-guts (operation filename)
3180897c 76 "Return a (DECODING . ENCODING) pair for OPERATION on PO file FILENAME.
eb286ca8
KH
77Do so according to FILENAME's declared charset.
78FILENAME may be a cons (NAME . BUFFER). In that case, detect charset
79in BUFFER."
3180897c
DL
80 (and
81 (eq operation 'insert-file-contents)
eb286ca8
KH
82 (or (if (consp filename) (buffer-live-p (cdr filename)))
83 (file-exists-p filename))
3180897c
DL
84 (with-temp-buffer
85 (let* ((coding-system-for-read 'no-conversion)
86 (charset (or (po-find-charset filename) "ascii"))
87 assoc)
88 (list (cond
89 ((setq assoc
5d10503f
JB
90 (assoc-string charset
91 po-content-type-charset-alist
92 t))
3180897c 93 (cdr assoc))
5d10503f 94 ((or (setq assoc (assoc-string charset coding-system-alist t))
3180897c 95 (setq assoc
5d10503f
JB
96 (assoc-string (subst-char-in-string ?_ ?-
97 charset)
98 coding-system-alist t)))
3180897c
DL
99 (intern (car assoc)))
100 ;; In principle we should also check the `mime-charset'
101 ;; property of everything in the base coding system
102 ;; list, but there should always be a coding system
103 ;; corresponding to the MIME name.
104 ((featurep 'code-pages)
105 ;; Give up.
106 'raw-text)
107 (t
108 ;; Try again with code-pages loaded. Maybe it's best
109 ;; to require it initially?
110 (require 'code-pages nil t)
111 (if (or
5d10503f
JB
112 (setq assoc (assoc-string charset coding-system-alist t))
113 (setq assoc (assoc-string (subst-char-in-string
114 ?_ ?- charset)
115 coding-system-alist t)))
3180897c
DL
116 (intern (car assoc))
117 'raw-text))))))))
f6e28751
EZ
118
119;;;###autoload
120(defun po-find-file-coding-system (arg-list)
3180897c
DL
121 "Return a (DECODING . ENCODING) pair, according to PO file's charset.
122Called through `file-coding-system-alist', before the file is visited for real."
f6e28751
EZ
123 (po-find-file-coding-system-guts (car arg-list) (car (cdr arg-list))))
124;; This is for XEmacs.
125;(defun po-find-file-coding-system (operation filename)
126; "\
127;Return a Mule (DECODING . ENCODING) pair, according to PO file charset.
128;Called through file-coding-system-alist, before the file is visited for real."
129; (po-find-file-coding-system-guts operation filename))
3180897c
DL
130
131(provide 'po)
132
133;;; po.el ends here