Fix up comment convention on the arch-tag lines.
[bpt/emacs.git] / lisp / textmodes / po.el
1 ;;; po.el --- basic support of PO translation files -*- coding: latin-1; -*-
2
3 ;; Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001, 2002, 2003,
4 ;; 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
5
6 ;; Authors: François Pinard <pinard@iro.umontreal.ca>,
7 ;; Greg McGary <gkm@magilla.cichlid.com>,
8 ;; Bruno Haible <bruno@clisp.org>.
9 ;; Keywords: i18n, files
10
11 ;; This file is part of GNU Emacs.
12
13 ;; GNU Emacs is free software; you can redistribute it and/or modify
14 ;; it under the terms of the GNU General Public License as published by
15 ;; the Free Software Foundation; either version 3, or (at your option)
16 ;; any later version.
17
18 ;; GNU Emacs is distributed in the hope that it will be useful,
19 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
20 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 ;; GNU General Public License for more details.
22
23 ;; You should have received a copy of the GNU General Public License
24 ;; along with GNU Emacs; see the file COPYING. If not, write to the
25 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
26 ;; Boston, MA 02110-1301, USA.
27
28 ;;; Commentary:
29
30 ;; This package makes sure visiting PO files decodes them correctly,
31 ;; according to the Charset= header in the PO file. For more support
32 ;; for editing PO files, see po-mode.el.
33
34 ;;; Code:
35
36 (defconst po-content-type-charset-alist
37 '(("ASCII" . undecided)
38 ("ANSI_X3.4-1968" . undecided)
39 ("US-ASCII" . undecided))
40 "Alist of coding system versus GNU libc/libiconv canonical charset name.
41 Contains canonical charset names that don't correspond to coding systems.")
42
43 (defun po-find-charset (filename)
44 "Return PO charset value for FILENAME.
45 If FILENAME is a cons cell, its CDR is a buffer that already contains
46 the PO file (but not yet decoded)."
47 (let ((charset-regexp
48 "^\"Content-Type:[ \t]*text/plain;[ \t]*charset=\\(.*\\)\\\\n\"")
49 (buf (and (consp filename) (cdr filename)))
50 (short-read nil))
51 (when buf
52 (set-buffer buf)
53 (goto-char (point-min)))
54 ;; Try the first 4096 bytes. In case we cannot find the charset value
55 ;; within the first 4096 bytes (the PO file might start with a long
56 ;; comment) try the next 4096 bytes repeatedly until we'll know for sure
57 ;; we've checked the empty header entry entirely.
58 (while (not (or short-read (re-search-forward "^msgid" nil t) buf))
59 (save-excursion
60 (goto-char (point-max))
61 (let ((pair (insert-file-contents-literally filename nil
62 (1- (point))
63 (1- (+ (point) 4096)))))
64 (setq short-read (< (nth 1 pair) 4096)))))
65 (cond ((re-search-forward charset-regexp nil t) (match-string 1))
66 ((or short-read buf) nil)
67 ;; We've found the first msgid; maybe, only a part of the msgstr
68 ;; value was loaded. Load the next 1024 bytes; if charset still
69 ;; isn't available, give up.
70 (t (save-excursion
71 (goto-char (point-max))
72 (insert-file-contents-literally filename nil
73 (1- (point))
74 (1- (+ (point) 1024))))
75 (if (re-search-forward charset-regexp nil t)
76 (match-string 1))))))
77
78 (defun po-find-file-coding-system-guts (operation filename)
79 "Return a (DECODING . ENCODING) pair for OPERATION on PO file FILENAME.
80 Do so according to FILENAME's declared charset.
81 FILENAME may be a cons (NAME . BUFFER). In that case, detect charset
82 in BUFFER."
83 (and
84 (eq operation 'insert-file-contents)
85 (or (if (consp filename) (buffer-live-p (cdr filename)))
86 (file-exists-p filename))
87 (with-temp-buffer
88 (let* ((coding-system-for-read 'no-conversion)
89 (charset (or (po-find-charset filename) "ascii"))
90 assoc)
91 (list (cond
92 ((setq assoc
93 (assoc-string charset
94 po-content-type-charset-alist
95 t))
96 (cdr assoc))
97 ((or (setq assoc (assoc-string charset coding-system-alist t))
98 (setq assoc
99 (assoc-string (subst-char-in-string ?_ ?-
100 charset)
101 coding-system-alist t)))
102 (intern (car assoc)))
103 ;; In principle we should also check the `mime-charset'
104 ;; property of everything in the base coding system
105 ;; list, but there should always be a coding system
106 ;; corresponding to the MIME name.
107 ((featurep 'code-pages)
108 ;; Give up.
109 'raw-text)
110 (t
111 ;; Try again with code-pages loaded. Maybe it's best
112 ;; to require it initially?
113 (require 'code-pages nil t)
114 (if (or
115 (setq assoc (assoc-string charset coding-system-alist t))
116 (setq assoc (assoc-string (subst-char-in-string
117 ?_ ?- charset)
118 coding-system-alist t)))
119 (intern (car assoc))
120 'raw-text))))))))
121
122 ;;;###autoload
123 (defun po-find-file-coding-system (arg-list)
124 "Return a (DECODING . ENCODING) pair, according to PO file's charset.
125 Called through `file-coding-system-alist', before the file is visited for real."
126 (po-find-file-coding-system-guts (car arg-list) (car (cdr arg-list))))
127 ;; This is for XEmacs.
128 ;(defun po-find-file-coding-system (operation filename)
129 ; "\
130 ;Return a Mule (DECODING . ENCODING) pair, according to PO file charset.
131 ;Called through file-coding-system-alist, before the file is visited for real."
132 ; (po-find-file-coding-system-guts operation filename))
133
134 (provide 'po)
135
136 ;; arch-tag: 56748a57-d64c-4200-8f6b-c3a70496eb8c
137 ;;; po.el ends here