* test/automated/package-x-test.el: Change the commentary.
[bpt/emacs.git] / test / automated / decoder-tests.el
CommitLineData
bc9a5003
KH
1;;; decoder-tests.el --- test for text decoder
2
3;; Copyright (C) 2013 Free Software Foundation, Inc.
4
5;; Author: Kenichi Handa <handa@gnu.org>
6
7;; This file is part of GNU Emacs.
8
9;; GNU Emacs is free software: you can redistribute it and/or modify
10;; it under the terms of the GNU General Public License as published by
11;; the Free Software Foundation, either version 3 of the License, or
12;; (at your option) any later version.
13
14;; GNU Emacs is distributed in the hope that it will be useful,
15;; but WITHOUT ANY WARRANTY; without even the implied warranty of
16;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17;; GNU General Public License for more details.
18
19;; You should have received a copy of the GNU General Public License
20;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
21
22;;; Code:
23
24(require 'ert)
25
26;;; Check ASCII optimizing decoder
27
28;; Directory to hold test data files.
29(defvar decoder-tests-workdir
30 (expand-file-name "decoder-tests" temporary-file-directory))
31
32;; Return the contents (specified by CONTENT-TYPE; ascii, latin, or
33;; binary) of a test file.
34(defun decoder-tests-file-contents (content-type)
35 (let* ((ascii "ABCDEFGHIJKLMNOPQRSTUVWXYZ\n")
36 (latin (concat ascii "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏ\n"))
37 (binary (string-to-multibyte
38 (concat (string-as-unibyte latin)
39 (unibyte-string #xC0 #xC1 ?\n)))))
40 (cond ((eq content-type 'ascii) ascii)
41 ((eq content-type 'latin) latin)
42 ((eq content-type 'binary) binary)
43 (t
44 (error "Invalid file content type: %s" content-type)))))
45
46;; Return the name of test file whose contents specified by
47;; CONTENT-TYPE and whose encoding specified by CODING-SYSTEM.
48(defun decoder-tests-filename (content-type coding-system)
49 (expand-file-name (format "%s-%s" content-type coding-system)
50 decoder-tests-workdir))
51
52;; Generate a test file whose contents specified by CONTENT-TYPE and
53;; whose encoding specified by CODING-SYSTEM.
54(defun decoder-tests-gen-file (content-type coding-system)
55 (or (file-directory-p decoder-tests-workdir)
56 (mkdir decoder-tests-workdir t))
57 (let ((file (decoder-tests-filename content-type coding-system)))
58 (with-temp-file file
59 (set-buffer-file-coding-system coding-system)
60 (insert (decoder-tests-file-contents content-type)))))
61
62;; Remove all generated test files.
63(defun decoder-tests-remove-files ()
64 (delete-directory decoder-tests-workdir t))
65
66;;; The following three functions are filters for contents of a test
67;;; file.
68
69;; Convert all LFs to CR LF sequences in the string STR.
70(defun decoder-tests-lf-to-crlf (str)
71 (with-temp-buffer
72 (insert str)
73 (goto-char (point-min))
74 (while (search-forward "\n" nil t)
75 (delete-char -1)
76 (insert "\r\n"))
77 (buffer-string)))
78
79;; Convert all LFs to CRs in the string STR.
80(defun decoder-tests-lf-to-cr (str)
81 (with-temp-buffer
82 (insert str)
83 (subst-char-in-region (point-min) (point-max) ?\n ?\r)
84 (buffer-string)))
85
86;; Convert all LFs to LF LF sequences in the string STR.
87(defun decoder-tests-lf-to-lflf (str)
88 (with-temp-buffer
89 (insert str)
90 (goto-char (point-min))
91 (while (search-forward "\n" nil t)
92 (insert "\n"))
93 (buffer-string)))
94
95;; Prepend the UTF-8 BOM to STR.
96(defun decoder-tests-add-bom (str)
97 (concat "\xfeff" str))
98
99;; Test the decoding of a file whose contents and encoding are
100;; specified by CONTENT-TYPE and WRITE-CODING. The test passes if the
101;; file is read by READ-CODING and detected as DETECTED-CODING and the
102;; contents is correctly decoded.
103;; Optional 5th arg TRANSLATOR is a function to translate the original
104;; file contents to match with the expected result of decoding. For
105;; instance, when a file of dos eol-type is read by unix eol-type,
106;; `decode-test-lf-to-crlf' must be specified.
107
108(defun decoder-tests (content-type write-coding read-coding detected-coding
109 &optional translator)
110 (prefer-coding-system 'utf-8-auto)
111 (let ((filename (decoder-tests-filename content-type write-coding)))
112 (with-temp-buffer
113 (let ((coding-system-for-read read-coding)
114 (contents (decoder-tests-file-contents content-type))
115 (disable-ascii-optimization nil))
116 (if translator
117 (setq contents (funcall translator contents)))
118 (insert-file-contents filename)
119 (if (and (coding-system-equal buffer-file-coding-system detected-coding)
120 (string= (buffer-string) contents))
121 nil
122 (list buffer-file-coding-system
123 (string-to-list (buffer-string))
124 (string-to-list contents)))))))
125
126(ert-deftest ert-test-decoder-ascii ()
127 (unwind-protect
128 (progn
129 (dolist (eol-type '(unix dos mac))
130 (decoder-tests-gen-file 'ascii eol-type))
131 (should-not (decoder-tests 'ascii 'unix 'undecided 'unix))
132 (should-not (decoder-tests 'ascii 'dos 'undecided 'dos))
133 (should-not (decoder-tests 'ascii 'dos 'dos 'dos))
134 (should-not (decoder-tests 'ascii 'mac 'undecided 'mac))
135 (should-not (decoder-tests 'ascii 'mac 'mac 'mac))
136 (should-not (decoder-tests 'ascii 'dos 'utf-8 'utf-8-dos))
137 (should-not (decoder-tests 'ascii 'dos 'unix 'unix
138 'decoder-tests-lf-to-crlf))
139 (should-not (decoder-tests 'ascii 'mac 'dos 'dos
140 'decoder-tests-lf-to-cr))
141 (should-not (decoder-tests 'ascii 'dos 'mac 'mac
142 'decoder-tests-lf-to-lflf)))
143 (decoder-tests-remove-files)))
144
145(ert-deftest ert-test-decoder-latin ()
146 (unwind-protect
147 (progn
148 (dolist (coding '("utf-8" "utf-8-with-signature"))
149 (dolist (eol-type '("unix" "dos" "mac"))
150 (decoder-tests-gen-file 'latin
151 (intern (concat coding "-" eol-type)))))
152 (should-not (decoder-tests 'latin 'utf-8-unix 'undecided 'utf-8-unix))
153 (should-not (decoder-tests 'latin 'utf-8-unix 'utf-8-unix 'utf-8-unix))
154 (should-not (decoder-tests 'latin 'utf-8-dos 'undecided 'utf-8-dos))
155 (should-not (decoder-tests 'latin 'utf-8-dos 'utf-8-dos 'utf-8-dos))
156 (should-not (decoder-tests 'latin 'utf-8-mac 'undecided 'utf-8-mac))
157 (should-not (decoder-tests 'latin 'utf-8-mac 'utf-8-mac 'utf-8-mac))
158 (should-not (decoder-tests 'latin 'utf-8-dos 'unix 'utf-8-unix
159 'decoder-tests-lf-to-crlf))
160 (should-not (decoder-tests 'latin 'utf-8-mac 'dos 'utf-8-dos
161 'decoder-tests-lf-to-cr))
162 (should-not (decoder-tests 'latin 'utf-8-dos 'mac 'utf-8-mac
163 'decoder-tests-lf-to-lflf))
164 (should-not (decoder-tests 'latin 'utf-8-with-signature-unix 'undecided
165 'utf-8-with-signature-unix))
166 (should-not (decoder-tests 'latin 'utf-8-with-signature-unix 'utf-8-auto
167 'utf-8-with-signature-unix))
168 (should-not (decoder-tests 'latin 'utf-8-with-signature-dos 'undecided
169 'utf-8-with-signature-dos))
170 (should-not (decoder-tests 'latin 'utf-8-with-signature-unix 'utf-8
171 'utf-8-unix 'decoder-tests-add-bom))
172 (should-not (decoder-tests 'latin 'utf-8-with-signature-unix 'utf-8
173 'utf-8-unix 'decoder-tests-add-bom)))
174 (decoder-tests-remove-files)))
175
176(ert-deftest ert-test-decoder-binary ()
177 (unwind-protect
178 (progn
179 (dolist (eol-type '("unix" "dos" "mac"))
180 (decoder-tests-gen-file 'binary
181 (intern (concat "raw-text" "-" eol-type))))
182 (should-not (decoder-tests 'binary 'raw-text-unix 'undecided
183 'raw-text-unix))
184 (should-not (decoder-tests 'binary 'raw-text-dos 'undecided
185 'raw-text-dos))
186 (should-not (decoder-tests 'binary 'raw-text-mac 'undecided
187 'raw-text-mac))
188 (should-not (decoder-tests 'binary 'raw-text-dos 'unix
189 'raw-text-unix 'decoder-tests-lf-to-crlf))
190 (should-not (decoder-tests 'binary 'raw-text-mac 'dos
191 'raw-text-dos 'decoder-tests-lf-to-cr))
192 (should-not (decoder-tests 'binary 'raw-text-dos 'mac
193 'raw-text-mac 'decoder-tests-lf-to-lflf)))
194 (decoder-tests-remove-files)))
195
196
197\f
198;;; The following is for benchmark testing of the new optimized
199;;; decoder, not for regression testing.
200
201(defun generate-ascii-file ()
202 (dotimes (i 100000)
203 (insert-char ?a 80)
204 (insert "\n")))
205
206(defun generate-rarely-nonascii-file ()
207 (dotimes (i 100000)
208 (if (/= i 50000)
209 (insert-char ?a 80)
210 (insert ?À)
211 (insert-char ?a 79))
212 (insert "\n")))
213
214(defun generate-mostly-nonascii-file ()
215 (dotimes (i 30000)
216 (insert-char ?a 80)
217 (insert "\n"))
218 (dotimes (i 20000)
219 (insert-char ?À 80)
220 (insert "\n"))
221 (dotimes (i 10000)
222 (insert-char ?あ 80)
223 (insert "\n")))
224
225
226(defvar test-file-list
227 '((generate-ascii-file
228 ("~/ascii-tag-utf-8-unix.unix" ";; -*- coding: utf-8-unix; -*-" unix)
229 ("~/ascii-tag-utf-8.unix" ";; -*- coding: utf-8; -*-" unix)
230 ("~/ascii-tag-none.unix" "" unix)
231 ("~/ascii-tag-utf-8-dos.dos" ";; -*- coding: utf-8-dos; -*-" dos)
232 ("~/ascii-tag-utf-8.dos" ";; -*- coding: utf-8; -*-" dos)
233 ("~/ascii-tag-none.dos" "" dos))
234 (generate-rarely-nonascii-file
235 ("~/utf-8-r-tag-utf-8-unix.unix" ";; -*- coding: utf-8-unix; -*-" utf-8-unix)
236 ("~/utf-8-r-tag-utf-8.unix" ";; -*- coding: utf-8; -*-" utf-8-unix)
237 ("~/utf-8-r-tag-none.unix" "" utf-8-unix)
238 ("~/utf-8-r-tag-utf-8-dos.dos" ";; -*- coding: utf-8-dos; -*-" utf-8-dos)
239 ("~/utf-8-r-tag-utf-8.dos" ";; -*- coding: utf-8; -*-" utf-8-dos)
240 ("~/utf-8-r-tag-none.dos" "" utf-8-dos))
241 (generate-mostly-nonascii-file
242 ("~/utf-8-m-tag-utf-8-unix.unix" ";; -*- coding: utf-8-unix; -*-" utf-8-unix)
243 ("~/utf-8-m-tag-utf-8.unix" ";; -*- coding: utf-8; -*-" utf-8-unix)
244 ("~/utf-8-m-tag-none.unix" "" utf-8-unix)
245 ("~/utf-8-m-tag-utf-8-dos.dos" ";; -*- coding: utf-8-dos; -*-" utf-8-dos)
246 ("~/utf-8-m-tag-utf-8.dos" ";; -*- coding: utf-8; -*-" utf-8-dos)
247 ("~/utf-8-m-tag-none.dos" "" utf-8-dos))))
248
249(defun generate-benchmark-test-file ()
250 (interactive)
251 (with-temp-buffer
252 (message "Generating data...")
253 (dolist (files test-file-list)
254 (delete-region (point-min) (point-max))
255 (funcall (car files))
256 (dolist (file (cdr files))
257 (message "Writing %s..." (car file))
258 (goto-char (point-min))
259 (insert (nth 1 file) "\n")
260 (let ((coding-system-for-write (nth 2 file)))
261 (write-region (point-min) (point-max) (car file)))
262 (delete-region (point-min) (point))))))
263
264(defun benchmark-decoder ()
265 (let ((gc-cons-threshold 4000000))
266 (insert "Without optimization:\n")
267 (dolist (files test-file-list)
268 (dolist (file (cdr files))
269 (let* ((disable-ascii-optimization t)
270 (result (benchmark-run 10
271 (with-temp-buffer (insert-file-contents (car file))))))
272 (insert (format "%s: %s\n" (car file) result)))))
273 (insert "With optimization:\n")
274 (dolist (files test-file-list)
275 (dolist (file (cdr files))
276 (let* ((disable-ascii-optimization nil)
277 (result (benchmark-run 10
278 (with-temp-buffer (insert-file-contents (car file))))))
279 (insert (format "%s: %s\n" (car file) result)))))))