From f635daa1e056a564d13b4ef1ea8d1a4ff5b7401c Mon Sep 17 00:00:00 2001 From: Chong Yidong Date: Thu, 18 Aug 2011 11:53:29 -0400 Subject: [PATCH] Add L and R categories to standard category table, and use them. * lisp/international/characters.el: Add L and R categories. * lisp/subr.el (bidi-string-mark-left-to-right): Rename from string-mark-left-to-right. Use category search. * lisp/buff-menu.el (Buffer-menu-buffer+size): Callers changed. --- etc/NEWS | 13 +++++++------ lisp/ChangeLog | 9 +++++++++ lisp/buff-menu.el | 4 ++-- lisp/emacs-lisp/tabulated-list.el | 2 +- lisp/international/characters.el | 20 ++++++++++++++++++++ lisp/subr.el | 31 ++++++++++++------------------- 6 files changed, 51 insertions(+), 28 deletions(-) diff --git a/etc/NEWS b/etc/NEWS index 7b46259f7d..ec863dacef 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -1047,15 +1047,16 @@ of function value which looks like (closure ENV ARGS &rest BODY). declared as dynamically bound. +++ -** New function `string-mark-left-to-right'. +** New function `bidi-string-mark-left-to-right'. Given a string containing right-to-left (RTL) script, this function -returns another string with a terminating LRM (left-to-right mark) -character. If this string is inserted into a buffer, Emacs treats the -LRM as the end of an RTL segment and displays following text as LTR. +returns another string which can be safely inserted into a buffer as a +distinct RTL "segment", without causing any following text to be +displayed as RTL. (This is done by appending a Unicode "left-to-right +mark" character.) This is useful when the buffer has overall left-to-right (LTR) -paragraph direction and you need to insert a string whose contents -(and hence directionality) are not known in advance. +paragraph direction and you need to insert a string whose contents and +directionality are not known in advance. ** pre/post-command-hook are not reset to nil upon error. Instead, the offending function is removed. diff --git a/lisp/ChangeLog b/lisp/ChangeLog index 4f0e832bc6..38c536af62 100644 --- a/lisp/ChangeLog +++ b/lisp/ChangeLog @@ -1,3 +1,12 @@ +2011-08-18 Chong Yidong + + * international/characters.el: Add L and R categories. + + * subr.el (bidi-string-mark-left-to-right): Rename from + string-mark-left-to-right. Use category search. + + * buff-menu.el (Buffer-menu-buffer+size): Callers changed. + 2011-08-18 Juri Linkov * faces.el (error, warning, success): New faces with definitions diff --git a/lisp/buff-menu.el b/lisp/buff-menu.el index a5b45921d2..2eac33d815 100644 --- a/lisp/buff-menu.el +++ b/lisp/buff-menu.el @@ -681,9 +681,9 @@ For more information, see the function `buffer-menu'." (string-width tail) 2)) Buffer-menu-short-ellipsis - (string-mark-left-to-right tail)))) + (bidi-string-mark-left-to-right tail)))) ;; Don't put properties on (buffer-name). - (setq name (string-mark-left-to-right name))) + (setq name (bidi-string-mark-left-to-right name))) (add-text-properties 0 (length name) name-props name) (add-text-properties 0 (length size) size-props size) (let ((name+space-width (- Buffer-menu-buffer+size-width diff --git a/lisp/emacs-lisp/tabulated-list.el b/lisp/emacs-lisp/tabulated-list.el index 9b485b5860..75c9a01323 100644 --- a/lisp/emacs-lisp/tabulated-list.el +++ b/lisp/emacs-lisp/tabulated-list.el @@ -283,7 +283,7 @@ of column descriptors." (> (length label) width) (setq label (concat (substring label 0 (- width 3)) "..."))) - (setq label (string-mark-left-to-right label)) + (setq label (bidi-string-mark-left-to-right label)) (if (stringp desc) (insert (propertize label 'help-echo help-echo)) (apply 'insert-text-button label (cdr desc))) diff --git a/lisp/international/characters.el b/lisp/international/characters.el index a9657c17b9..47426784e5 100644 --- a/lisp/international/characters.el +++ b/lisp/international/characters.el @@ -114,6 +114,16 @@ A character which can't be placed at end of line.") Base characters (Unicode General Category L,N,P,S,Zs)") (define-category ?^ "Combining Combining diacritic or mark (Unicode General Category M)") + +;; bidi types +(define-category ?R "Right-to-left (strong) +Characters with \"strong\" right-to-left directionality, i.e. +with R, AL, RLE, or RLO Unicode bidi character type.") + +(define-category ?L "Left-to-right (strong) +Characters with \"strong\" left-to-right directionality, i.e. +with L, LRE, or LRO Unicode bidi character type.") + ;;; Setting syntax and category. @@ -478,6 +488,16 @@ Combining diacritic or mark (Unicode General Category M)") (modify-category-entry x category)) chars))))) +;; Bidi categories + +(map-char-table (lambda (key val) + (cond + ((memq val '(R AL RLO RLE)) + (modify-category-entry key ?R)) + ((memq val '(L LRE LRO)) + (modify-category-entry key ?L)))) + (unicode-property-table-internal 'bidi-class)) + ;; Latin (modify-category-entry '(#x80 . #x024F) ?l) diff --git a/lisp/subr.el b/lisp/subr.el index a4251b6fee..9aa895b0e9 100644 --- a/lisp/subr.el +++ b/lisp/subr.el @@ -3539,30 +3539,23 @@ to case differences." (eq t (compare-strings str1 nil nil str2 0 (length str1) ignore-case))) -(defun string-mark-left-to-right (str) +(defun bidi-string-mark-left-to-right (str) "Return a string that can be safely inserted in left-to-right text. -If STR contains right-to-left (RTL) script, return a string -consisting of STR followed by a terminating invisible -left-to-right mark (LRM) character. -The LRM character marks the end of an RTL segment, and resets the -display direction of any subsequent text to left-to-right. -\(Otherwise, some of that text might be displayed as part of the -RTL segment, based on the bidirectional display algorithm.) +Normally, inserting a string with right-to-left (RTL) script into +a buffer may cause some subsequent text to be displayed as part +of the RTL segment (usually this affects punctuation characters). +This function returns a string which displays as STR but forces +subsequent text to be displayed as left-to-right. -If STR contains no RTL characters, return STR." +If STR contains any RTL character, this function returns a string +consisting of STR followed by an invisible left-to-right mark +\(LRM) character. Otherwise, it returns STR." (unless (stringp str) (signal 'wrong-type-argument (list 'stringp str))) - (let ((len (length str)) - (n 0) - rtl-found) - (while (and (not rtl-found) (< n len)) - (setq rtl-found (memq (get-char-code-property - (aref str n) 'bidi-class) '(R AL RLO)) - n (1+ n))) - (if rtl-found - (concat str (propertize (string ?\x200e) 'invisible t)) - str))) + (if (string-match "\\cR" str) + (concat str (propertize (string ?\x200e) 'invisible t)) + str)) ;;;; invisibility specs -- 2.20.1