Commit | Line | Data |
---|---|---|
367f7f81 LMI |
1 | ;;; shr.el --- Simple HTML Renderer |
2 | ||
3 | ;; Copyright (C) 2010 Free Software Foundation, Inc. | |
4 | ||
5 | ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org> | |
6 | ;; Keywords: html | |
7 | ||
8 | ;; This file is part of GNU Emacs. | |
9 | ||
10 | ;; GNU Emacs is free software: you can redistribute it and/or modify | |
11 | ;; it under the terms of the GNU General Public License as published by | |
12 | ;; the Free Software Foundation, either version 3 of the License, or | |
13 | ;; (at your option) any later version. | |
14 | ||
15 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
16 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
18 | ;; GNU General Public License for more details. | |
19 | ||
20 | ;; You should have received a copy of the GNU General Public License | |
21 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. | |
22 | ||
23 | ;;; Commentary: | |
24 | ||
25 | ;; This package takes a HTML parse tree (as provided by | |
26 | ;; libxml-parse-html-region) and renders it in the current buffer. It | |
27 | ;; does not do CSS, JavaScript or anything advanced: It's geared | |
28 | ;; towards rendering typical short snippets of HTML, like what you'd | |
29 | ;; find in HTML email and the like. | |
30 | ||
31 | ;;; Code: | |
32 | ||
71e691a5 G |
33 | (require 'browse-url) |
34 | ||
870409d4 G |
35 | (defgroup shr nil |
36 | "Simple HTML Renderer" | |
37 | :group 'mail) | |
38 | ||
39 | (defcustom shr-max-image-proportion 0.9 | |
40 | "How big pictures displayed are in relation to the window they're in. | |
41 | A value of 0.7 means that they are allowed to take up 70% of the | |
42 | width and height of the window. If they are larger than this, | |
43 | and Emacs supports it, then the images will be rescaled down to | |
44 | fit these criteria." | |
45 | :version "24.1" | |
46 | :group 'shr | |
47 | :type 'float) | |
48 | ||
49 | (defcustom shr-blocked-images nil | |
50 | "Images that have URLs matching this regexp will be blocked." | |
51 | :version "24.1" | |
52 | :group 'shr | |
53 | :type 'regexp) | |
54 | ||
55 | (defvar shr-folding-mode nil) | |
56 | (defvar shr-state nil) | |
57 | (defvar shr-start nil) | |
a41c2e6d | 58 | (defvar shr-indentation 0) |
870409d4 G |
59 | |
60 | (defvar shr-width 70) | |
61 | ||
71e691a5 G |
62 | (defvar shr-map |
63 | (let ((map (make-sparse-keymap))) | |
64 | (define-key map "a" 'shr-show-alt-text) | |
65 | (define-key map "i" 'shr-browse-image) | |
66 | (define-key map "I" 'shr-insert-image) | |
67 | (define-key map "u" 'shr-copy-url) | |
68 | (define-key map "v" 'shr-browse-url) | |
69 | (define-key map "\r" 'shr-browse-url) | |
70 | map)) | |
71 | ||
870409d4 G |
72 | (defun shr-transform-dom (dom) |
73 | (let ((result (list (pop dom)))) | |
74 | (dolist (arg (pop dom)) | |
75 | (push (cons (intern (concat ":" (symbol-name (car arg))) obarray) | |
76 | (cdr arg)) | |
77 | result)) | |
78 | (dolist (sub dom) | |
79 | (if (stringp sub) | |
80 | (push (cons :text sub) result) | |
81 | (push (shr-transform-dom sub) result))) | |
82 | (nreverse result))) | |
83 | ||
84 | ;;;###autoload | |
85 | (defun shr-insert-document (dom) | |
86 | (let ((shr-state nil) | |
87 | (shr-start nil)) | |
88 | (shr-descend (shr-transform-dom dom)))) | |
89 | ||
90 | (defun shr-descend (dom) | |
a41c2e6d | 91 | (let ((function (intern (concat "shr-tag-" (symbol-name (car dom))) obarray))) |
870409d4 G |
92 | (if (fboundp function) |
93 | (funcall function (cdr dom)) | |
94 | (shr-generic (cdr dom))))) | |
95 | ||
96 | (defun shr-generic (cont) | |
97 | (dolist (sub cont) | |
98 | (cond | |
99 | ((eq (car sub) :text) | |
100 | (shr-insert (cdr sub))) | |
a41c2e6d | 101 | ((listp (cdr sub)) |
870409d4 G |
102 | (shr-descend sub))))) |
103 | ||
a41c2e6d G |
104 | (defun shr-tag-p (cont) |
105 | (shr-ensure-paragraph) | |
870409d4 | 106 | (shr-generic cont) |
a41c2e6d G |
107 | (shr-ensure-paragraph)) |
108 | ||
109 | (defun shr-ensure-paragraph () | |
110 | (unless (bobp) | |
111 | (if (bolp) | |
71e691a5 G |
112 | (unless (save-excursion |
113 | (forward-line -1) | |
114 | (looking-at " *$")) | |
a41c2e6d G |
115 | (insert "\n")) |
116 | (if (save-excursion | |
117 | (beginning-of-line) | |
5d2ef6db | 118 | (looking-at " *$")) |
a41c2e6d G |
119 | (insert "\n") |
120 | (insert "\n\n"))))) | |
121 | ||
122 | (defun shr-tag-b (cont) | |
870409d4 G |
123 | (shr-fontize-cont cont 'bold)) |
124 | ||
a41c2e6d | 125 | (defun shr-tag-i (cont) |
870409d4 G |
126 | (shr-fontize-cont cont 'italic)) |
127 | ||
562f5ce5 G |
128 | (defun shr-tag-em (cont) |
129 | (shr-fontize-cont cont 'bold)) | |
130 | ||
a41c2e6d | 131 | (defun shr-tag-u (cont) |
870409d4 G |
132 | (shr-fontize-cont cont 'underline)) |
133 | ||
a41c2e6d G |
134 | (defun shr-tag-s (cont) |
135 | (shr-fontize-cont cont 'strike-through)) | |
870409d4 | 136 | |
a41c2e6d | 137 | (defun shr-fontize-cont (cont &rest types) |
870409d4 G |
138 | (let (shr-start) |
139 | (shr-generic cont) | |
a41c2e6d G |
140 | (dolist (type types) |
141 | (shr-add-font (or shr-start (point)) (point) type)))) | |
870409d4 G |
142 | |
143 | (defun shr-add-font (start end type) | |
144 | (let ((overlay (make-overlay start end))) | |
145 | (overlay-put overlay 'face type))) | |
146 | ||
a41c2e6d | 147 | (defun shr-tag-a (cont) |
870409d4 | 148 | (let ((url (cdr (assq :href cont))) |
71e691a5 | 149 | (start (point)) |
870409d4 G |
150 | shr-start) |
151 | (shr-generic cont) | |
152 | (widget-convert-button | |
71e691a5 G |
153 | 'link (or shr-start start) (point) |
154 | :help-echo url) | |
155 | (put-text-property (or shr-start start) (point) 'keymap shr-map) | |
156 | (put-text-property (or shr-start start) (point) 'shr-url url))) | |
157 | ||
158 | (defun shr-browse-url () | |
159 | "Browse the URL under point." | |
160 | (interactive) | |
161 | (let ((url (get-text-property (point) 'shr-url))) | |
162 | (if (not url) | |
163 | (message "No link under point") | |
164 | (browse-url url)))) | |
165 | ||
166 | (defun shr-copy-url () | |
167 | "Copy the URL under point to the kill ring. | |
168 | If called twice, then try to fetch the URL and see whether it | |
169 | redirects somewhere else." | |
170 | (interactive) | |
171 | (let ((url (get-text-property (point) 'shr-url))) | |
172 | (cond | |
173 | ((not url) | |
174 | (message "No URL under point")) | |
175 | ;; Resolve redirected URLs. | |
176 | ((equal url (car kill-ring)) | |
177 | (url-retrieve | |
178 | url | |
179 | (lambda (a) | |
180 | (when (and (consp a) | |
181 | (eq (car a) :redirect)) | |
182 | (with-temp-buffer | |
183 | (insert (cadr a)) | |
184 | (goto-char (point-min)) | |
185 | ;; Remove common tracking junk from the URL. | |
186 | (when (re-search-forward ".utm_.*" nil t) | |
187 | (replace-match "" t t)) | |
188 | (message "Copied %s" (buffer-string)) | |
189 | (copy-region-as-kill (point-min) (point-max))))))) | |
190 | ;; Copy the URL to the kill ring. | |
191 | (t | |
192 | (with-temp-buffer | |
193 | (insert url) | |
194 | (copy-region-as-kill (point-min) (point-max)) | |
195 | (message "Copied %s" url)))))) | |
870409d4 | 196 | |
a41c2e6d G |
197 | (defun shr-tag-img (cont) |
198 | (when (and (> (current-column) 0) | |
199 | (not (eq shr-state 'image))) | |
200 | (insert "\n")) | |
870409d4 G |
201 | (let ((start (point-marker))) |
202 | (let ((alt (cdr (assq :alt cont))) | |
203 | (url (cdr (assq :src cont)))) | |
204 | (when (zerop (length alt)) | |
205 | (setq alt "[img]")) | |
206 | (cond | |
207 | ((and shr-blocked-images | |
208 | (string-match shr-blocked-images url)) | |
209 | (insert alt)) | |
97ab3539 | 210 | ((url-is-cached (browse-url-url-encode-chars url "[&)$ ]")) |
870409d4 G |
211 | (shr-put-image (shr-get-image-data url) (point) alt)) |
212 | (t | |
213 | (insert alt) | |
214 | (url-retrieve url 'shr-image-fetched | |
215 | (list (current-buffer) start (point-marker)) | |
216 | t))) | |
217 | (insert " ") | |
71e691a5 G |
218 | (put-text-property start (point) 'keymap shr-map) |
219 | (put-text-property start (point) 'shr-alt alt) | |
220 | (put-text-property start (point) 'shr-image url) | |
870409d4 G |
221 | (setq shr-state 'image)))) |
222 | ||
71e691a5 G |
223 | (defun shr-show-alt-text () |
224 | "Show the ALT text of the image under point." | |
225 | (interactive) | |
226 | (let ((text (get-text-property (point) 'shr-alt))) | |
227 | (if (not text) | |
228 | (message "No image under point") | |
229 | (message "%s" text)))) | |
230 | ||
231 | (defun shr-browse-image () | |
232 | "Browse the image under point." | |
233 | (interactive) | |
234 | (let ((url (get-text-property (point) 'shr-image))) | |
235 | (if (not url) | |
236 | (message "No image under point") | |
237 | (message "Browsing %s..." url) | |
238 | (browse-url url)))) | |
239 | ||
870409d4 G |
240 | (defun shr-image-fetched (status buffer start end) |
241 | (when (and (buffer-name buffer) | |
242 | (not (plist-get status :error))) | |
243 | (url-store-in-cache (current-buffer)) | |
244 | (when (or (search-forward "\n\n" nil t) | |
245 | (search-forward "\r\n\r\n" nil t)) | |
246 | (let ((data (buffer-substring (point) (point-max)))) | |
247 | (with-current-buffer buffer | |
248 | (let ((alt (buffer-substring start end)) | |
249 | (inhibit-read-only t)) | |
250 | (delete-region start end) | |
251 | (shr-put-image data start alt)))))) | |
252 | (kill-buffer (current-buffer))) | |
253 | ||
254 | (defun shr-put-image (data point alt) | |
255 | (if (not (display-graphic-p)) | |
256 | (insert alt) | |
a41c2e6d G |
257 | (let ((image (ignore-errors |
258 | (shr-rescale-image data)))) | |
259 | (when image | |
260 | (put-image image point alt))))) | |
870409d4 G |
261 | |
262 | (defun shr-rescale-image (data) | |
263 | (if (or (not (fboundp 'imagemagick-types)) | |
264 | (not (get-buffer-window (current-buffer)))) | |
265 | (create-image data nil t) | |
266 | (let* ((image (create-image data nil t)) | |
a41c2e6d | 267 | (size (image-size image t)) |
870409d4 G |
268 | (width (car size)) |
269 | (height (cdr size)) | |
270 | (edges (window-inside-pixel-edges | |
271 | (get-buffer-window (current-buffer)))) | |
272 | (window-width (truncate (* shr-max-image-proportion | |
273 | (- (nth 2 edges) (nth 0 edges))))) | |
274 | (window-height (truncate (* shr-max-image-proportion | |
275 | (- (nth 3 edges) (nth 1 edges))))) | |
276 | scaled-image) | |
277 | (when (> height window-height) | |
278 | (setq image (or (create-image data 'imagemagick t | |
279 | :height window-height) | |
280 | image)) | |
281 | (setq size (image-size image t))) | |
282 | (when (> (car size) window-width) | |
283 | (setq image (or | |
284 | (create-image data 'imagemagick t | |
285 | :width window-width) | |
286 | image))) | |
287 | image))) | |
288 | ||
a41c2e6d | 289 | (defun shr-tag-pre (cont) |
1e463294 | 290 | (let ((shr-folding-mode 'none)) |
870409d4 G |
291 | (shr-ensure-newline) |
292 | (shr-generic cont) | |
293 | (shr-ensure-newline))) | |
294 | ||
a41c2e6d | 295 | (defun shr-tag-blockquote (cont) |
8028ed5c | 296 | (shr-ensure-paragraph) |
a41c2e6d | 297 | (let ((shr-indentation (+ shr-indentation 4))) |
71e691a5 G |
298 | (shr-generic cont)) |
299 | (shr-ensure-paragraph)) | |
870409d4 G |
300 | |
301 | (defun shr-ensure-newline () | |
302 | (unless (zerop (current-column)) | |
303 | (insert "\n"))) | |
304 | ||
305 | (defun shr-insert (text) | |
306 | (when (eq shr-state 'image) | |
307 | (insert "\n") | |
308 | (setq shr-state nil)) | |
309 | (cond | |
310 | ((eq shr-folding-mode 'none) | |
1e463294 | 311 | (insert text)) |
870409d4 | 312 | (t |
a41c2e6d G |
313 | (let ((first t) |
314 | column) | |
23179cb9 | 315 | (when (and (string-match "\\`[ \t\n]" text) |
a41c2e6d G |
316 | (not (bolp))) |
317 | (insert " ")) | |
870409d4 G |
318 | (dolist (elem (split-string text)) |
319 | (setq column (current-column)) | |
d99a4591 | 320 | (when (> column 0) |
a41c2e6d | 321 | (cond |
1e463294 LMI |
322 | ((and (or (not first) |
323 | (eq shr-state 'space)) | |
324 | (> (+ column (length elem) 1) shr-width)) | |
a41c2e6d G |
325 | (insert "\n")) |
326 | ((not first) | |
327 | (insert " ")))) | |
328 | (setq first nil) | |
329 | (when (and (bolp) | |
330 | (> shr-indentation 0)) | |
71e691a5 | 331 | (shr-indent)) |
870409d4 G |
332 | ;; The shr-start is a special variable that is used to pass |
333 | ;; upwards the first point in the buffer where the text really | |
334 | ;; starts. | |
335 | (unless shr-start | |
336 | (setq shr-start (point))) | |
a41c2e6d | 337 | (insert elem)) |
1e463294 | 338 | (setq shr-state nil) |
23179cb9 | 339 | (when (and (string-match "[ \t\n]\\'" text) |
a41c2e6d | 340 | (not (bolp))) |
1e463294 LMI |
341 | (insert " ") |
342 | (setq shr-state 'space)))))) | |
870409d4 | 343 | |
71e691a5 G |
344 | (defun shr-indent () |
345 | (insert (make-string shr-indentation ? ))) | |
346 | ||
870409d4 G |
347 | (defun shr-get-image-data (url) |
348 | "Get image data for URL. | |
349 | Return a string with image data." | |
350 | (with-temp-buffer | |
351 | (mm-disable-multibyte) | |
71e691a5 G |
352 | (when (ignore-errors |
353 | (url-cache-extract (url-cache-create-filename url)) | |
354 | t) | |
355 | (when (or (search-forward "\n\n" nil t) | |
356 | (search-forward "\r\n\r\n" nil t)) | |
357 | (buffer-substring (point) (point-max)))))) | |
870409d4 | 358 | |
a41c2e6d G |
359 | (defvar shr-list-mode nil) |
360 | ||
361 | (defun shr-tag-ul (cont) | |
362 | (shr-ensure-paragraph) | |
363 | (let ((shr-list-mode 'ul)) | |
364 | (shr-generic cont))) | |
365 | ||
366 | (defun shr-tag-ol (cont) | |
367 | (let ((shr-list-mode 1)) | |
368 | (shr-generic cont))) | |
369 | ||
370 | (defun shr-tag-li (cont) | |
371 | (shr-ensure-newline) | |
8028ed5c LMI |
372 | (let* ((bullet |
373 | (if (numberp shr-list-mode) | |
374 | (prog1 | |
375 | (format "%d " shr-list-mode) | |
376 | (setq shr-list-mode (1+ shr-list-mode))) | |
377 | "* ")) | |
378 | (shr-indentation (+ shr-indentation (length bullet)))) | |
379 | (insert bullet) | |
380 | (shr-generic cont))) | |
a41c2e6d G |
381 | |
382 | (defun shr-tag-br (cont) | |
1e463294 LMI |
383 | (unless (bobp) |
384 | (insert "\n")) | |
a41c2e6d G |
385 | (shr-generic cont)) |
386 | ||
387 | (defun shr-tag-h1 (cont) | |
388 | (shr-heading cont 'bold 'underline)) | |
389 | ||
390 | (defun shr-tag-h2 (cont) | |
391 | (shr-heading cont 'bold)) | |
392 | ||
393 | (defun shr-tag-h3 (cont) | |
394 | (shr-heading cont 'italic)) | |
395 | ||
396 | (defun shr-tag-h4 (cont) | |
397 | (shr-heading cont)) | |
398 | ||
399 | (defun shr-tag-h5 (cont) | |
400 | (shr-heading cont)) | |
401 | ||
402 | (defun shr-tag-h6 (cont) | |
403 | (shr-heading cont)) | |
404 | ||
405 | (defun shr-heading (cont &rest types) | |
406 | (shr-ensure-paragraph) | |
407 | (apply #'shr-fontize-cont cont types) | |
408 | (shr-ensure-paragraph)) | |
409 | ||
71e691a5 G |
410 | (defun shr-tag-table (cont) |
411 | (shr-ensure-paragraph) | |
412 | (setq cont (or (cdr (assq 'tbody cont)) | |
413 | cont)) | |
414 | (let* ((columns (shr-column-specs cont)) | |
415 | (suggested-widths (shr-pro-rate-columns columns)) | |
416 | (sketch (shr-make-table cont suggested-widths)) | |
417 | (sketch-widths (shr-table-widths sketch (length suggested-widths)))) | |
418 | (shr-insert-table (shr-make-table cont sketch-widths t) sketch-widths))) | |
419 | ||
420 | (defun shr-insert-table (table widths) | |
421 | (shr-insert-table-ruler widths) | |
422 | (dolist (row table) | |
423 | (let ((start (point)) | |
424 | (height (let ((max 0)) | |
425 | (dolist (column row) | |
426 | (setq max (max max (cadr column)))) | |
427 | max))) | |
428 | (dotimes (i height) | |
429 | (shr-indent) | |
430 | (insert "|\n")) | |
431 | (dolist (column row) | |
432 | (goto-char start) | |
433 | (let ((lines (split-string (nth 2 column) "\n"))) | |
434 | (dolist (line lines) | |
435 | (when (> (length line) 0) | |
436 | (end-of-line) | |
437 | (insert line "|") | |
438 | (forward-line 1))) | |
439 | ;; Add blank lines at padding at the bottom of the TD, | |
440 | ;; possibly. | |
441 | (dotimes (i (- height (length lines))) | |
442 | (end-of-line) | |
443 | (insert (make-string (length (car lines)) ? ) "|") | |
444 | (forward-line 1))))) | |
445 | (shr-insert-table-ruler widths))) | |
446 | ||
447 | (defun shr-insert-table-ruler (widths) | |
448 | (shr-indent) | |
449 | (insert "+") | |
450 | (dotimes (i (length widths)) | |
451 | (insert (make-string (aref widths i) ?-) ?+)) | |
452 | (insert "\n")) | |
453 | ||
454 | (defun shr-table-widths (table length) | |
455 | (let ((widths (make-vector length 0))) | |
456 | (dolist (row table) | |
457 | (let ((i 0)) | |
458 | (dolist (column row) | |
459 | (aset widths i (max (aref widths i) | |
460 | (car column))) | |
461 | (incf i)))) | |
462 | widths)) | |
463 | ||
464 | (defun shr-make-table (cont widths &optional fill) | |
465 | (let ((trs nil)) | |
466 | (dolist (row cont) | |
467 | (when (eq (car row) 'tr) | |
468 | (let ((i 0) | |
469 | (tds nil)) | |
470 | (dolist (column (cdr row)) | |
471 | (when (memq (car column) '(td th)) | |
472 | (push (shr-render-td (cdr column) (aref widths i) fill) | |
473 | tds) | |
474 | (setq i (1+ i)))) | |
475 | (push (nreverse tds) trs)))) | |
476 | (nreverse trs))) | |
477 | ||
478 | (defun shr-render-td (cont width fill) | |
479 | (with-temp-buffer | |
480 | (let ((shr-width width) | |
481 | (shr-indentation 0)) | |
482 | (shr-generic cont)) | |
483 | (while (re-search-backward "\n *$" nil t) | |
484 | (delete-region (match-beginning 0) (match-end 0))) | |
485 | (goto-char (point-min)) | |
486 | (let ((max 0)) | |
487 | (while (not (eobp)) | |
488 | (end-of-line) | |
489 | (setq max (max max (current-column))) | |
490 | (forward-line 1)) | |
491 | (when fill | |
492 | (goto-char (point-min)) | |
493 | (while (not (eobp)) | |
494 | (end-of-line) | |
495 | (when (> (- width (current-column)) 0) | |
496 | (insert (make-string (- width (current-column)) ? ))) | |
497 | (forward-line 1))) | |
498 | (list max (count-lines (point-min) (point-max)) (buffer-string))))) | |
499 | ||
500 | (defun shr-pro-rate-columns (columns) | |
501 | (let ((total-percentage 0) | |
502 | (widths (make-vector (length columns) 0))) | |
503 | (dotimes (i (length columns)) | |
504 | (incf total-percentage (aref columns i))) | |
505 | (setq total-percentage (/ 1.0 total-percentage)) | |
506 | (dotimes (i (length columns)) | |
507 | (aset widths i (max (truncate (* (aref columns i) | |
508 | total-percentage | |
509 | shr-width)) | |
510 | 10))) | |
511 | widths)) | |
512 | ||
513 | ;; Return a summary of the number and shape of the TDs in the table. | |
514 | (defun shr-column-specs (cont) | |
515 | (let ((columns (make-vector (shr-max-columns cont) 1))) | |
516 | (dolist (row cont) | |
517 | (when (eq (car row) 'tr) | |
518 | (let ((i 0)) | |
519 | (dolist (column (cdr row)) | |
520 | (when (memq (car column) '(td th)) | |
521 | (let ((width (cdr (assq :width (cdr column))))) | |
522 | (when (and width | |
523 | (string-match "\\([0-9]+\\)%" width)) | |
524 | (aset columns i | |
525 | (/ (string-to-number (match-string 1 width)) | |
526 | 100.0))))) | |
527 | (setq i (1+ i)))))) | |
528 | columns)) | |
529 | ||
530 | (defun shr-count (cont elem) | |
531 | (let ((i 0)) | |
532 | (dolist (sub cont) | |
533 | (when (eq (car sub) elem) | |
534 | (setq i (1+ i)))) | |
535 | i)) | |
536 | ||
537 | (defun shr-max-columns (cont) | |
538 | (let ((max 0)) | |
539 | (dolist (row cont) | |
540 | (when (eq (car row) 'tr) | |
541 | (setq max (max max (shr-count (cdr row) 'td))))) | |
542 | max)) | |
543 | ||
f3fd95db | 544 | (provide 'shr) |
367f7f81 LMI |
545 | |
546 | ;;; shr.el ends here |