Commit | Line | Data |
---|---|---|
c38e0c97 | 1 | ;;; html2text.el --- a simple html to plain text converter -*- coding: utf-8 -*- |
e84b4b86 | 2 | |
ba318903 | 3 | ;; Copyright (C) 2002-2014 Free Software Foundation, Inc. |
23f87bed MB |
4 | |
5 | ;; Author: Joakim Hove <hove@phys.ntnu.no> | |
6 | ||
7 | ;; This file is part of GNU Emacs. | |
8 | ||
5e809f55 | 9 | ;; GNU Emacs is free software: you can redistribute it and/or modify |
23f87bed | 10 | ;; it under the terms of the GNU General Public License as published by |
5e809f55 GM |
11 | ;; the Free Software Foundation, either version 3 of the License, or |
12 | ;; (at your option) any later version. | |
23f87bed MB |
13 | |
14 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
15 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
5e809f55 | 16 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
23f87bed MB |
17 | ;; GNU General Public License for more details. |
18 | ||
19 | ;; You should have received a copy of the GNU General Public License | |
5e809f55 | 20 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
23f87bed MB |
21 | |
22 | ;;; Commentary: | |
23 | ||
24 | ;; These functions provide a simple way to wash/clean html infected | |
25 | ;; mails. Definitely do not work in all cases, but some improvement | |
e2642250 | 26 | ;; in readability is generally obtained. Formatting is only done in |
23f87bed MB |
27 | ;; the buffer, so the next time you enter the article it will be |
28 | ;; "re-htmlized". | |
29 | ;; | |
e2642250 | 30 | ;; The main function is `html2text'. |
23f87bed MB |
31 | |
32 | ;;; Code: | |
33 | ||
34 | ;; | |
35 | ;; <Global variables> | |
36 | ;; | |
37 | ||
38 | (eval-when-compile | |
39 | (require 'cl)) | |
40 | ||
41 | (defvar html2text-format-single-element-list '(("hr" . html2text-clean-hr))) | |
42 | ||
43 | (defvar html2text-replace-list | |
01c52d31 MB |
44 | '(("´" . "`") |
45 | ("&" . "&") | |
46 | ("'" . "'") | |
47 | ("¦" . "|") | |
48 | ("¢" . "c") | |
49 | ("ˆ" . "^") | |
50 | ("©" . "(C)") | |
51 | ("¤" . "(#)") | |
52 | ("°" . "degree") | |
53 | ("÷" . "/") | |
54 | ("€" . "e") | |
55 | ("½" . "1/2") | |
56 | (">" . ">") | |
57 | ("¿" . "?") | |
58 | ("«" . "<<") | |
59 | ("&ldquo" . "\"") | |
60 | ("‹" . "(") | |
61 | ("‘" . "`") | |
62 | ("<" . "<") | |
63 | ("—" . "--") | |
64 | (" " . " ") | |
65 | ("–" . "-") | |
66 | ("‰" . "%%") | |
67 | ("±" . "+-") | |
c38e0c97 | 68 | ("£" . "£") |
01c52d31 MB |
69 | (""" . "\"") |
70 | ("»" . ">>") | |
71 | ("&rdquo" . "\"") | |
72 | ("®" . "(R)") | |
73 | ("›" . ")") | |
74 | ("’" . "'") | |
c38e0c97 | 75 | ("§" . "§") |
01c52d31 MB |
76 | ("¹" . "^1") |
77 | ("²" . "^2") | |
78 | ("³" . "^3") | |
79 | ("˜" . "~")) | |
23f87bed MB |
80 | "The map of entity to text. |
81 | ||
82 | This is an alist were each element is a dotted pair consisting of an | |
e2642250 MB |
83 | old string, and a replacement string. This replacement is done by the |
84 | function `html2text-substitute' which basically performs a | |
85 | `replace-string' operation for every element in the list. This is | |
23f87bed MB |
86 | completely verbatim - without any use of REGEXP.") |
87 | ||
88 | (defvar html2text-remove-tag-list | |
89 | '("html" "body" "p" "img" "dir" "head" "div" "br" "font" "title" "meta") | |
90 | "A list of removable tags. | |
91 | ||
92 | This is a list of tags which should be removed, without any | |
e2642250 | 93 | formatting. Note that tags in the list are presented *without* |
338ecb71 | 94 | any \"<\" or \">\". All occurrences of a tag appearing in this |
e2642250 MB |
95 | list are removed, irrespective of whether it is a closing or |
96 | opening tag, or if the tag has additional attributes. The | |
97 | deletion is done by the function `html2text-remove-tags'. | |
23f87bed MB |
98 | |
99 | For instance the text: | |
100 | ||
101 | \"Here comes something <font size\"+3\" face=\"Helvetica\"> big </font>.\" | |
102 | ||
103 | will be reduced to: | |
104 | ||
105 | \"Here comes something big.\" | |
106 | ||
107 | If this list contains the element \"font\".") | |
108 | ||
109 | (defvar html2text-format-tag-list | |
110 | '(("b" . html2text-clean-bold) | |
e2642250 | 111 | ("strong" . html2text-clean-bold) |
23f87bed MB |
112 | ("u" . html2text-clean-underline) |
113 | ("i" . html2text-clean-italic) | |
e2642250 | 114 | ("em" . html2text-clean-italic) |
23f87bed MB |
115 | ("blockquote" . html2text-clean-blockquote) |
116 | ("a" . html2text-clean-anchor) | |
117 | ("ul" . html2text-clean-ul) | |
118 | ("ol" . html2text-clean-ol) | |
119 | ("dl" . html2text-clean-dl) | |
120 | ("center" . html2text-clean-center)) | |
121 | "An alist of tags and processing functions. | |
122 | ||
123 | This is an alist where each dotted pair consists of a tag, and then | |
e2642250 | 124 | the name of a function to be called when this tag is found. The |
23f87bed | 125 | function is called with the arguments p1, p2, p3 and p4. These are |
4c36be58 | 126 | demonstrated below: |
23f87bed MB |
127 | |
128 | \"<b> This is bold text </b>\" | |
129 | ^ ^ ^ ^ | |
130 | | | | | | |
131 | p1 p2 p3 p4 | |
132 | ||
133 | Then the called function will typically format the text somewhat and | |
134 | remove the tags.") | |
135 | ||
136 | (defvar html2text-remove-tag-list2 '("li" "dt" "dd" "meta") | |
137 | "Another list of removable tags. | |
138 | ||
139 | This is a list of tags which are removed similarly to the list | |
140 | `html2text-remove-tag-list' - but these tags are retained for the | |
141 | formatting, and then moved afterward.") | |
142 | ||
143 | ;; | |
144 | ;; </Global variables> | |
145 | ;; | |
146 | ||
147 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
148 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
149 | ||
150 | ;; | |
151 | ;; <Utility functions> | |
152 | ;; | |
153 | ||
23f87bed | 154 | |
e2642250 MB |
155 | (defun html2text-replace-string (from-string to-string min max) |
156 | "Replace FROM-STRING with TO-STRING in region from MIN to MAX." | |
157 | (goto-char min) | |
23f87bed MB |
158 | (let ((delta (- (string-width to-string) (string-width from-string))) |
159 | (change 0)) | |
e2642250 | 160 | (while (search-forward from-string max t) |
23f87bed | 161 | (replace-match to-string) |
e2642250 MB |
162 | (setq change (+ change delta))) |
163 | change)) | |
23f87bed MB |
164 | |
165 | ;; | |
166 | ;; </Utility functions> | |
167 | ;; | |
168 | ||
169 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
170 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
171 | ||
172 | ;; | |
173 | ;; <Functions related to attributes> i.e. <font size=+3> | |
174 | ;; | |
175 | ||
e2642250 MB |
176 | (defun html2text-attr-value (list attribute) |
177 | "Get value of ATTRIBUTE from LIST." | |
178 | (nth 1 (assoc attribute list))) | |
23f87bed | 179 | |
0683d241 | 180 | (defun html2text-get-attr (p1 p2) |
23f87bed MB |
181 | (goto-char p1) |
182 | (re-search-forward " +[^ ]" p2 t) | |
183 | (let* ((attr-string (buffer-substring-no-properties (1- (point)) (1- p2))) | |
184 | (tmp-list (split-string attr-string)) | |
185 | (attr-list) | |
186 | (counter 0) | |
187 | (prev (car tmp-list)) | |
188 | (this (nth 1 tmp-list)) | |
189 | (next (nth 2 tmp-list)) | |
190 | (index 1)) | |
191 | ||
192 | (cond | |
193 | ;; size=3 | |
194 | ((string-match "[^ ]=[^ ]" prev) | |
195 | (let ((attr (nth 0 (split-string prev "="))) | |
e3b10342 | 196 | (value (substring prev (1+ (string-match "=" prev))))) |
e2642250 | 197 | (setq attr-list (cons (list attr value) attr-list)))) |
23f87bed MB |
198 | ;; size= 3 |
199 | ((string-match "[^ ]=\\'" prev) | |
e2642250 | 200 | (setq attr-list (cons (list (substring prev 0 -1) this) attr-list)))) |
23f87bed MB |
201 | |
202 | (while (< index (length tmp-list)) | |
203 | (cond | |
204 | ;; size=3 | |
205 | ((string-match "[^ ]=[^ ]" this) | |
206 | (let ((attr (nth 0 (split-string this "="))) | |
e3b10342 | 207 | (value (substring prev (1+ (string-match "=" this))))) |
e2642250 | 208 | (setq attr-list (cons (list attr value) attr-list)))) |
23f87bed MB |
209 | ;; size =3 |
210 | ((string-match "\\`=[^ ]" this) | |
211 | (setq attr-list (cons (list prev (substring this 1)) attr-list))) | |
23f87bed MB |
212 | ;; size= 3 |
213 | ((string-match "[^ ]=\\'" this) | |
e2642250 | 214 | (setq attr-list (cons (list (substring this 0 -1) next) attr-list))) |
23f87bed MB |
215 | ;; size = 3 |
216 | ((string= "=" this) | |
e2642250 | 217 | (setq attr-list (cons (list prev next) attr-list)))) |
23f87bed MB |
218 | (setq index (1+ index)) |
219 | (setq prev this) | |
220 | (setq this next) | |
e2642250 | 221 | (setq next (nth (1+ index) tmp-list))) |
23f87bed MB |
222 | ;; |
223 | ;; Tags with no accompanying "=" i.e. value=nil | |
224 | ;; | |
225 | (setq prev (car tmp-list)) | |
226 | (setq this (nth 1 tmp-list)) | |
227 | (setq next (nth 2 tmp-list)) | |
228 | (setq index 1) | |
229 | ||
e2642250 MB |
230 | (when (and (not (string-match "=" prev)) |
231 | (not (string= (substring this 0 1) "="))) | |
232 | (setq attr-list (cons (list prev nil) attr-list))) | |
23f87bed | 233 | (while (< index (1- (length tmp-list))) |
e2642250 MB |
234 | (when (and (not (string-match "=" this)) |
235 | (not (or (string= (substring next 0 1) "=") | |
236 | (string= (substring prev -1) "=")))) | |
237 | (setq attr-list (cons (list this nil) attr-list))) | |
23f87bed MB |
238 | (setq index (1+ index)) |
239 | (setq prev this) | |
240 | (setq this next) | |
e2642250 MB |
241 | (setq next (nth (1+ index) tmp-list))) |
242 | ||
243 | (when (and this | |
244 | (not (string-match "=" this)) | |
245 | (not (string= (substring prev -1) "="))) | |
246 | (setq attr-list (cons (list this nil) attr-list))) | |
247 | ;; return - value | |
248 | attr-list)) | |
23f87bed MB |
249 | |
250 | ;; | |
251 | ;; </Functions related to attributes> | |
252 | ;; | |
253 | ||
254 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
255 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
256 | ||
257 | ;; | |
258 | ;; <Functions to be called to format a tag-pair> | |
259 | ;; | |
260 | (defun html2text-clean-list-items (p1 p2 list-type) | |
261 | (goto-char p1) | |
262 | (let ((item-nr 0) | |
263 | (items 0)) | |
01c52d31 | 264 | (while (search-forward "<li>" p2 t) |
23f87bed MB |
265 | (setq items (1+ items))) |
266 | (goto-char p1) | |
267 | (while (< item-nr items) | |
268 | (setq item-nr (1+ item-nr)) | |
01c52d31 | 269 | (search-forward "<li>" (point-max) t) |
23f87bed MB |
270 | (cond |
271 | ((string= list-type "ul") (insert " o ")) | |
272 | ((string= list-type "ol") (insert (format " %s: " item-nr))) | |
e2642250 | 273 | (t (insert " x ")))))) |
23f87bed MB |
274 | |
275 | (defun html2text-clean-dtdd (p1 p2) | |
276 | (goto-char p1) | |
277 | (let ((items 0) | |
278 | (item-nr 0)) | |
01c52d31 | 279 | (while (search-forward "<dt>" p2 t) |
23f87bed MB |
280 | (setq items (1+ items))) |
281 | (goto-char p1) | |
282 | (while (< item-nr items) | |
283 | (setq item-nr (1+ item-nr)) | |
284 | (re-search-forward "<dt>\\([ ]*\\)" (point-max) t) | |
285 | (when (match-string 1) | |
286 | (delete-region (point) (- (point) (string-width (match-string 1))))) | |
287 | (let ((def-p1 (point)) | |
288 | (def-p2 0)) | |
289 | (re-search-forward "\\([ ]*\\)\\(</dt>\\|<dd>\\)" (point-max) t) | |
290 | (if (match-string 1) | |
291 | (progn | |
292 | (let* ((mw1 (string-width (match-string 1))) | |
293 | (mw2 (string-width (match-string 2))) | |
294 | (mw (+ mw1 mw2))) | |
295 | (goto-char (- (point) mw)) | |
296 | (delete-region (point) (+ (point) mw1)) | |
297 | (setq def-p2 (point)))) | |
298 | (setq def-p2 (- (point) (string-width (match-string 2))))) | |
299 | (put-text-property def-p1 def-p2 'face 'bold))))) | |
300 | ||
301 | (defun html2text-delete-tags (p1 p2 p3 p4) | |
302 | (delete-region p1 p2) | |
303 | (delete-region (- p3 (- p2 p1)) (- p4 (- p2 p1)))) | |
304 | ||
305 | (defun html2text-delete-single-tag (p1 p2) | |
306 | (delete-region p1 p2)) | |
307 | ||
308 | (defun html2text-clean-hr (p1 p2) | |
309 | (html2text-delete-single-tag p1 p2) | |
310 | (goto-char p1) | |
311 | (newline 1) | |
e2642250 | 312 | (insert (make-string fill-column ?-))) |
23f87bed MB |
313 | |
314 | (defun html2text-clean-ul (p1 p2 p3 p4) | |
315 | (html2text-delete-tags p1 p2 p3 p4) | |
e2642250 | 316 | (html2text-clean-list-items p1 (- p3 (- p1 p2)) "ul")) |
23f87bed MB |
317 | |
318 | (defun html2text-clean-ol (p1 p2 p3 p4) | |
319 | (html2text-delete-tags p1 p2 p3 p4) | |
e2642250 | 320 | (html2text-clean-list-items p1 (- p3 (- p1 p2)) "ol")) |
23f87bed MB |
321 | |
322 | (defun html2text-clean-dl (p1 p2 p3 p4) | |
323 | (html2text-delete-tags p1 p2 p3 p4) | |
e2642250 | 324 | (html2text-clean-dtdd p1 (- p3 (- p1 p2)))) |
23f87bed MB |
325 | |
326 | (defun html2text-clean-center (p1 p2 p3 p4) | |
327 | (html2text-delete-tags p1 p2 p3 p4) | |
e2642250 | 328 | (center-region p1 (- p3 (- p2 p1)))) |
23f87bed MB |
329 | |
330 | (defun html2text-clean-bold (p1 p2 p3 p4) | |
331 | (put-text-property p2 p3 'face 'bold) | |
e2642250 | 332 | (html2text-delete-tags p1 p2 p3 p4)) |
23f87bed MB |
333 | |
334 | (defun html2text-clean-title (p1 p2 p3 p4) | |
335 | (put-text-property p2 p3 'face 'bold) | |
e2642250 | 336 | (html2text-delete-tags p1 p2 p3 p4)) |
23f87bed MB |
337 | |
338 | (defun html2text-clean-underline (p1 p2 p3 p4) | |
339 | (put-text-property p2 p3 'face 'underline) | |
e2642250 | 340 | (html2text-delete-tags p1 p2 p3 p4)) |
23f87bed MB |
341 | |
342 | (defun html2text-clean-italic (p1 p2 p3 p4) | |
343 | (put-text-property p2 p3 'face 'italic) | |
e2642250 | 344 | (html2text-delete-tags p1 p2 p3 p4)) |
23f87bed MB |
345 | |
346 | (defun html2text-clean-font (p1 p2 p3 p4) | |
e2642250 | 347 | (html2text-delete-tags p1 p2 p3 p4)) |
23f87bed MB |
348 | |
349 | (defun html2text-clean-blockquote (p1 p2 p3 p4) | |
e2642250 | 350 | (html2text-delete-tags p1 p2 p3 p4)) |
23f87bed MB |
351 | |
352 | (defun html2text-clean-anchor (p1 p2 p3 p4) | |
e2642250 MB |
353 | ;; If someone can explain how to make the URL clickable I will surely |
354 | ;; improve upon this. | |
355 | ;; Maybe `goto-addr.el' can be used here. | |
0683d241 | 356 | (let* ((attr-list (html2text-get-attr p1 p2)) |
23f87bed MB |
357 | (href (html2text-attr-value attr-list "href"))) |
358 | (delete-region p1 p4) | |
359 | (when href | |
360 | (goto-char p1) | |
75d52912 AS |
361 | (insert (if (string-match "\\`['\"].*['\"]\\'" href) |
362 | (substring href 1 -1) href)) | |
23f87bed MB |
363 | (put-text-property p1 (point) 'face 'bold)))) |
364 | ||
365 | ;; | |
366 | ;; </Functions to be called to format a tag-pair> | |
367 | ;; | |
368 | ||
369 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
370 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
371 | ||
372 | ;; | |
373 | ;; <Functions to be called to fix up paragraphs> | |
374 | ;; | |
375 | ||
376 | (defun html2text-fix-paragraph (p1 p2) | |
377 | (goto-char p1) | |
01c52d31 | 378 | (let ((refill-start) |
23f87bed | 379 | (refill-stop)) |
e2642250 MB |
380 | (when (re-search-forward "<br>$" p2 t) |
381 | (goto-char p1) | |
382 | (when (re-search-forward ".+[^<][^b][^r][^>]$" p2 t) | |
383 | (beginning-of-line) | |
384 | (setq refill-start (point)) | |
385 | (goto-char p2) | |
386 | (re-search-backward ".+[^<][^b][^r][^>]$" refill-start t) | |
fdc90613 | 387 | (forward-line 1) |
e2642250 MB |
388 | (end-of-line) |
389 | ;; refill-stop should ideally be adjusted to | |
cc4a96c6 | 390 | ;; accommodate the "<br>" strings which are removed |
e2642250 MB |
391 | ;; between refill-start and refill-stop. Can simply |
392 | ;; be returned from my-replace-string | |
393 | (setq refill-stop (+ (point) | |
394 | (html2text-replace-string | |
395 | "<br>" "" | |
396 | refill-start (point)))) | |
397 | ;; (message "Point = %s refill-stop = %s" (point) refill-stop) | |
398 | ;; (sleep-for 4) | |
399 | (fill-region refill-start refill-stop)))) | |
400 | (html2text-replace-string "<br>" "" p1 p2)) | |
23f87bed MB |
401 | |
402 | ;; | |
403 | ;; This one is interactive ... | |
404 | ;; | |
405 | (defun html2text-fix-paragraphs () | |
406 | "This _tries_ to fix up the paragraphs - this is done in quite a ad-hook | |
407 | fashion, quite close to pure guess-work. It does work in some cases though." | |
408 | (interactive) | |
86713405 | 409 | (goto-char (point-min)) |
4a43ee9b MB |
410 | (while (re-search-forward "^<br>$" nil t) |
411 | (delete-region (match-beginning 0) (match-end 0))) | |
23f87bed | 412 | ;; Removing lonely <br> on a single line, if they are left intact we |
c7015153 | 413 | ;; don't have any paragraphs at all. |
86713405 | 414 | (goto-char (point-min)) |
23f87bed MB |
415 | (while (not (eobp)) |
416 | (let ((p1 (point))) | |
417 | (forward-paragraph 1) | |
418 | ;;(message "Kaller fix med p1=%s p2=%s " p1 (1- (point))) (sleep-for 5) | |
419 | (html2text-fix-paragraph p1 (1- (point))) | |
420 | (goto-char p1) | |
421 | (when (not (eobp)) | |
422 | (forward-paragraph 1))))) | |
423 | ||
424 | ;; | |
425 | ;; </Functions to be called to fix up paragraphs> | |
426 | ;; | |
427 | ||
428 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
429 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
430 | ||
431 | ;; | |
432 | ;; <Interactive functions> | |
433 | ;; | |
434 | ||
435 | (defun html2text-remove-tags (tag-list) | |
e2642250 | 436 | "Removes the tags listed in the list `html2text-remove-tag-list'. |
23f87bed MB |
437 | See the documentation for that variable." |
438 | (interactive) | |
439 | (dolist (tag tag-list) | |
86713405 | 440 | (goto-char (point-min)) |
23f87bed MB |
441 | (while (re-search-forward (format "\\(</?%s[^>]*>\\)" tag) (point-max) t) |
442 | (delete-region (match-beginning 0) (match-end 0))))) | |
443 | ||
444 | (defun html2text-format-tags () | |
e2642250 | 445 | "See the variable `html2text-format-tag-list' for documentation." |
23f87bed MB |
446 | (interactive) |
447 | (dolist (tag-and-function html2text-format-tag-list) | |
448 | (let ((tag (car tag-and-function)) | |
449 | (function (cdr tag-and-function))) | |
86713405 | 450 | (goto-char (point-min)) |
23f87bed MB |
451 | (while (re-search-forward (format "\\(<%s\\( [^>]*\\)?>\\)" tag) |
452 | (point-max) t) | |
453 | (let ((p1) | |
454 | (p2 (point)) | |
0683d241 | 455 | (p3) (p4)) |
23f87bed MB |
456 | (search-backward "<" (point-min) t) |
457 | (setq p1 (point)) | |
b193caa3 MB |
458 | (unless (search-forward (format "</%s>" tag) (point-max) t) |
459 | (goto-char p2) | |
460 | (insert (format "</%s>" tag))) | |
23f87bed MB |
461 | (setq p4 (point)) |
462 | (search-backward "</" (point-min) t) | |
463 | (setq p3 (point)) | |
464 | (funcall function p1 p2 p3 p4) | |
e2642250 | 465 | (goto-char p1)))))) |
23f87bed MB |
466 | |
467 | (defun html2text-substitute () | |
e2642250 | 468 | "See the variable `html2text-replace-list' for documentation." |
23f87bed MB |
469 | (interactive) |
470 | (dolist (e html2text-replace-list) | |
86713405 | 471 | (goto-char (point-min)) |
23f87bed MB |
472 | (let ((old-string (car e)) |
473 | (new-string (cdr e))) | |
e2642250 | 474 | (html2text-replace-string old-string new-string (point-min) (point-max))))) |
23f87bed MB |
475 | |
476 | (defun html2text-format-single-elements () | |
23f87bed MB |
477 | (interactive) |
478 | (dolist (tag-and-function html2text-format-single-element-list) | |
479 | (let ((tag (car tag-and-function)) | |
480 | (function (cdr tag-and-function))) | |
86713405 | 481 | (goto-char (point-min)) |
23f87bed MB |
482 | (while (re-search-forward (format "\\(<%s\\( [^>]*\\)?>\\)" tag) |
483 | (point-max) t) | |
484 | (let ((p1) | |
485 | (p2 (point))) | |
486 | (search-backward "<" (point-min) t) | |
487 | (setq p1 (point)) | |
e2642250 | 488 | (funcall function p1 p2)))))) |
23f87bed MB |
489 | |
490 | ;; | |
491 | ;; Main function | |
492 | ;; | |
493 | ||
494 | ;;;###autoload | |
495 | (defun html2text () | |
496 | "Convert HTML to plain text in the current buffer." | |
497 | (interactive) | |
498 | (save-excursion | |
499 | (let ((case-fold-search t) | |
500 | (buffer-read-only)) | |
501 | (html2text-remove-tags html2text-remove-tag-list) | |
502 | (html2text-format-tags) | |
503 | (html2text-remove-tags html2text-remove-tag-list2) | |
504 | (html2text-substitute) | |
505 | (html2text-format-single-elements) | |
506 | (html2text-fix-paragraphs)))) | |
507 | ||
508 | ;; | |
509 | ;; </Interactive functions> | |
510 | ;; | |
e2642250 | 511 | (provide 'html2text) |
53080505 | 512 | |
23f87bed | 513 | ;;; html2text.el ends here |