X-Git-Url: http://git.hcoop.net/bpt/emacs.git/blobdiff_plain/e6c3da2065ac72cc4e1a2bef22d367cd75401892..e4920bc99dfcee02c3bb83b46a761b0893f76626:/src/character.c?ds=sidebyside diff --git a/src/character.c b/src/character.c index 4087e8984d..88b1f11b96 100644 --- a/src/character.c +++ b/src/character.c @@ -35,6 +35,7 @@ along with GNU Emacs. If not, see . */ #include #include +#include #include "lisp.h" #include "character.h" #include "buffer.h" @@ -122,7 +123,7 @@ char_string (unsigned int c, unsigned char *p) if (c & CHAR_MODIFIER_MASK) { - c = (unsigned) char_resolve_modifier_mask ((int) c); + c = char_resolve_modifier_mask (c); /* If C still has any modifier bits, just ignore it. */ c &= ~CHAR_MODIFIER_MASK; } @@ -162,11 +163,11 @@ char_string (unsigned int c, unsigned char *p) } -/* Return a character whose multibyte form is at P. Set LEN is not +/* Return a character whose multibyte form is at P. If LEN is not NULL, it must be a pointer to integer. In that case, set *LEN to - the byte length of the multibyte form. If ADVANCED is not NULL, is + the byte length of the multibyte form. If ADVANCED is not NULL, it must be a pointer to unsigned char. In that case, set *ADVANCED to - the ending address (i.e. the starting address of the next + the ending address (i.e., the starting address of the next character) of the multibyte form. */ int @@ -206,11 +207,10 @@ string_char (const unsigned char *p, const unsigned char **advanced, int *len) } -/* Translate character C by translation table TABLE. If C is - negative, translate a character specified by CHARSET and CODE. If - no translation is found in TABLE, return the untranslated - character. If TABLE is a list, elements are char tables. In this - case, translace C by all tables. */ +/* Translate character C by translation table TABLE. If no translation is + found in TABLE, return the untranslated character. If TABLE is a list, + elements are char tables. In that case, recursively translate C by all the + tables in the list. */ int translate_char (Lisp_Object table, int c) @@ -258,7 +258,8 @@ multibyte_char_to_unibyte_safe (int c) } DEFUN ("characterp", Fcharacterp, Scharacterp, 1, 2, 0, - doc: /* Return non-nil if OBJECT is a character. */) + doc: /* Return non-nil if OBJECT is a character. +usage: (characterp OBJECT) */) (Lisp_Object object, Lisp_Object ignore) { return (CHARACTERP (object) ? Qt : Qnil); @@ -325,7 +326,7 @@ usage: (char-width CHAR) */) disp = dp ? DISP_CHAR_VECTOR (dp, c) : Qnil; if (VECTORP (disp)) - width = ASIZE (disp); + width = sanitize_char_width (ASIZE (disp)); else width = CHAR_WIDTH (c); @@ -357,7 +358,7 @@ c_string_width (const unsigned char *str, EMACS_INT len, int precision, { val = DISP_CHAR_VECTOR (dp, c); if (VECTORP (val)) - thiswidth = XVECTOR (val)->size; + thiswidth = sanitize_char_width (ASIZE (val)); else thiswidth = CHAR_WIDTH (c); } @@ -405,7 +406,7 @@ strwidth (const char *str, EMACS_INT len) in *NCHARS and *NBYTES respectively. */ EMACS_INT -lisp_string_width (Lisp_Object string, int precision, +lisp_string_width (Lisp_Object string, EMACS_INT precision, EMACS_INT *nchars, EMACS_INT *nbytes) { EMACS_INT len = SCHARS (string); @@ -420,9 +421,9 @@ lisp_string_width (Lisp_Object string, int precision, while (i < len) { - int chars, bytes, thiswidth; + EMACS_INT chars, bytes, thiswidth; Lisp_Object val; - int cmp_id; + ptrdiff_t cmp_id; EMACS_INT ignore, end; if (find_composition (i, -1, &ignore, &end, &val, string) @@ -438,7 +439,11 @@ lisp_string_width (Lisp_Object string, int precision, int c; if (multibyte) - c = STRING_CHAR_AND_LENGTH (str + i_byte, bytes); + { + int cbytes; + c = STRING_CHAR_AND_LENGTH (str + i_byte, cbytes); + bytes = cbytes; + } else c = str[i_byte], bytes = 1; chars = 1; @@ -446,7 +451,7 @@ lisp_string_width (Lisp_Object string, int precision, { val = DISP_CHAR_VECTOR (dp, c); if (VECTORP (val)) - thiswidth = XVECTOR (val)->size; + thiswidth = sanitize_char_width (ASIZE (val)); else thiswidth = CHAR_WIDTH (c); } @@ -456,8 +461,14 @@ lisp_string_width (Lisp_Object string, int precision, } } - if (precision > 0 - && (width + thiswidth > precision)) + if (precision <= 0) + { +#ifdef emacs + if (INT_ADD_OVERFLOW (width, thiswidth)) + string_overflow (); +#endif + } + else if (precision - width < thiswidth) { *nchars = i; *nbytes = i_byte; @@ -466,7 +477,7 @@ lisp_string_width (Lisp_Object string, int precision, i += chars; i_byte += bytes; width += thiswidth; - } + } if (precision > 0) { @@ -494,19 +505,6 @@ usage: (string-width STRING) */) return val; } -DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0, - doc: /* Return the direction of CHAR. -The returned value is 0 for left-to-right and 1 for right-to-left. -usage: (char-direction CHAR) */) - (Lisp_Object ch) -{ - int c; - - CHECK_CHARACTER (ch); - c = XINT (ch); - return CHAR_TABLE_REF (Vchar_direction_table, c); -} - /* Return the number of characters in the NBYTES bytes at PTR. This works by looking at the contents and checking for multibyte sequences while assuming that there's no invalid sequence. @@ -590,7 +588,7 @@ parse_str_as_multibyte (const unsigned char *str, EMACS_INT len, } /* Arrange unibyte text at STR of NBYTES bytes as a multibyte text. - It actually converts only such 8-bit characters that don't contruct + It actually converts only such 8-bit characters that don't construct a multibyte sequence to multibyte forms of Latin-1 characters. If NCHARS is nonzero, set *NCHARS to the number of characters in the text. It is assured that we can use LEN bytes at STR as a work @@ -671,22 +669,27 @@ str_as_multibyte (unsigned char *str, EMACS_INT len, EMACS_INT nbytes, } /* Parse unibyte string at STR of LEN bytes, and return the number of - bytes it may ocupy when converted to multibyte string by + bytes it may occupy when converted to multibyte string by `str_to_multibyte'. */ EMACS_INT -parse_str_to_multibyte (const unsigned char *str, EMACS_INT len) +count_size_as_multibyte (const unsigned char *str, EMACS_INT len) { const unsigned char *endp = str + len; EMACS_INT bytes; for (bytes = 0; str < endp; str++) - bytes += (*str < 0x80) ? 1 : 2; + { + int n = *str < 0x80 ? 1 : 2; + if (INT_ADD_OVERFLOW (bytes, n)) + string_overflow (); + bytes += n; + } return bytes; } -/* Convert unibyte text at STR of NBYTES bytes to a multibyte text +/* Convert unibyte text at STR of BYTES bytes to a multibyte text that contains the same single-byte characters. It actually converts all 8-bit characters to multibyte forms. It is assured that we can use LEN bytes at STR as a work area and that is @@ -836,8 +839,8 @@ string_escape_byte8 (Lisp_Object string) if (multibyte) { if ((MOST_POSITIVE_FIXNUM - nchars) / 3 < byte8_count - || (MOST_POSITIVE_FIXNUM - nbytes) / 2 < byte8_count) - error ("Maximum string size exceeded"); + || (STRING_BYTES_BOUND - nbytes) / 2 < byte8_count) + string_overflow (); /* Convert 2-byte sequence of byte8 chars to 4-byte octal. */ val = make_uninit_multibyte_string (nchars + byte8_count * 3, @@ -845,8 +848,9 @@ string_escape_byte8 (Lisp_Object string) } else { - if ((MOST_POSITIVE_FIXNUM - nchars) / 3 < byte8_count) - error ("Maximum string size exceeded"); + if ((STRING_BYTES_BOUND - nbytes) / 3 < byte8_count) + string_overflow (); + /* Convert 1-byte sequence of byte8 chars to 4-byte octal. */ val = make_uninit_string (nbytes + byte8_count * 3); } @@ -890,15 +894,15 @@ DEFUN ("string", Fstring, Sstring, 0, MANY, 0, doc: /* Concatenate all the argument characters and make the result a string. usage: (string &rest CHARACTERS) */) - (size_t n, Lisp_Object *args) + (ptrdiff_t n, Lisp_Object *args) { - size_t i; + ptrdiff_t i; int c; unsigned char *buf, *p; Lisp_Object str; USE_SAFE_ALLOCA; - SAFE_ALLOCA (buf, unsigned char *, MAX_MULTIBYTE_LENGTH * n); + SAFE_NALLOCA (buf, MAX_MULTIBYTE_LENGTH, n); p = buf; for (i = 0; i < n; i++) @@ -916,9 +920,9 @@ usage: (string &rest CHARACTERS) */) DEFUN ("unibyte-string", Funibyte_string, Sunibyte_string, 0, MANY, 0, doc: /* Concatenate all the argument bytes and make the result a unibyte string. usage: (unibyte-string &rest BYTES) */) - (size_t n, Lisp_Object *args) + (ptrdiff_t n, Lisp_Object *args) { - size_t i; + ptrdiff_t i; int c; unsigned char *buf, *p; Lisp_Object str; @@ -965,7 +969,7 @@ character is a target to get a byte value. In this case, POSITION, if non-nil, is an index of a target character in the string. If the current buffer (or STRING) is multibyte, and the target -character is not ASCII nor 8-bit character, an error is signalled. */) +character is not ASCII nor 8-bit character, an error is signaled. */) (Lisp_Object position, Lisp_Object string) { int c; @@ -1038,7 +1042,6 @@ syms_of_character (void) defsubr (&Smultibyte_char_to_unibyte); defsubr (&Schar_width); defsubr (&Sstring_width); - defsubr (&Schar_direction); defsubr (&Sstring); defsubr (&Sunibyte_string); defsubr (&Schar_resolve_modifiers); @@ -1067,10 +1070,6 @@ A char-table for width (columns) of each character. */); char_table_set_range (Vchar_width_table, MAX_5_BYTE_CHAR + 1, MAX_CHAR, make_number (4)); - DEFVAR_LISP ("char-direction-table", Vchar_direction_table, - doc: /* A char-table for direction of each character. */); - Vchar_direction_table = Fmake_char_table (Qnil, make_number (1)); - DEFVAR_LISP ("printable-chars", Vprintable_chars, doc: /* A char-table for each printable character. */); Vprintable_chars = Fmake_char_table (Qnil, Qnil);