X-Git-Url: http://git.hcoop.net/bpt/emacs.git/blobdiff_plain/810ef6bcdf43f657e8f40ff5439fe684173c4b24..1fa2385302f5b2e5bb17c8aad3cb4e320512ca46:/src/charset.c diff --git a/src/charset.c b/src/charset.c index bfebe02f52..6aa6fe46b5 100644 --- a/src/charset.c +++ b/src/charset.c @@ -61,7 +61,7 @@ Lisp_Object Vcharset_hash_table; /* Table of struct charset. */ struct charset *charset_table; -static int charset_table_size; +static ptrdiff_t charset_table_size; static int charset_table_used; Lisp_Object Qcharsetp; @@ -118,24 +118,25 @@ int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL]; #define CODE_POINT_TO_INDEX(charset, code) \ ((charset)->code_linear_p \ - ? (code) - (charset)->min_code \ + ? (int) ((code) - (charset)->min_code) \ : (((charset)->code_space_mask[(code) >> 24] & 0x8) \ && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \ && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \ && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \ - ? (((((code) >> 24) - (charset)->code_space[12]) \ - * (charset)->code_space[11]) \ - + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \ - * (charset)->code_space[7]) \ - + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \ - * (charset)->code_space[3]) \ - + (((code) & 0xFF) - (charset)->code_space[0]) \ - - ((charset)->char_index_offset)) \ + ? (int) (((((code) >> 24) - (charset)->code_space[12]) \ + * (charset)->code_space[11]) \ + + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \ + * (charset)->code_space[7]) \ + + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \ + * (charset)->code_space[3]) \ + + (((code) & 0xFF) - (charset)->code_space[0]) \ + - ((charset)->char_index_offset)) \ : -1) -/* Convert the character index IDX to code-point CODE for CHARSET. - It is assumed that IDX is in a valid range. */ +/* Return the code-point for the character index IDX in CHARSET. + IDX should be an unsigned int variable in a valid range (which is + always in nonnegative int range too). IDX contains garbage afterwards. */ #define INDEX_TO_CODE_POINT(charset, idx) \ ((charset)->code_linear_p \ @@ -363,7 +364,8 @@ load_charset_map (struct charset *charset, struct charset_map_entries *entries, && CHARSET_COMPACT_CODES_P (charset)) for (; from_index < lim_index; from_index++, from_c++) { - unsigned code = INDEX_TO_CODE_POINT (charset, from_index); + unsigned code = from_index; + code = INDEX_TO_CODE_POINT (charset, code); if (NILP (CHAR_TABLE_REF (table, from_c))) CHAR_TABLE_SET (table, from_c, make_number (code)); @@ -419,7 +421,7 @@ load_charset_map (struct charset *charset, struct charset_map_entries *entries, paying attention to comment character '#'. */ static inline unsigned -read_hex (FILE *fp, int *eof) +read_hex (FILE *fp, int *eof, int *overflow) { int c; unsigned n; @@ -441,15 +443,16 @@ read_hex (FILE *fp, int *eof) *eof = 1; return 0; } - *eof = 0; n = 0; - if (c == 'x') - while ((c = getc (fp)) != EOF && isxdigit (c)) + while (isxdigit (c = getc (fp))) + { + if (UINT_MAX >> 4 < n) + *overflow = 1; n = ((n << 4) - | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10)); - else - while ((c = getc (fp)) != EOF && isdigit (c)) - n = (n * 10) + c - '0'; + | (c - ('0' <= c && c <= '9' ? '0' + : 'A' <= c && c <= 'F' ? 'A' - 10 + : 'a' - 10))); + } if (c != EOF) ungetc (c, fp); return n; @@ -479,10 +482,10 @@ load_charset_map_from_file (struct charset *charset, Lisp_Object mapfile, int co unsigned max_code = CHARSET_MAX_CODE (charset); int fd; FILE *fp; - int eof; Lisp_Object suffixes; struct charset_map_entries *head, *entries; - int n_entries, count; + int n_entries; + ptrdiff_t count; USE_SAFE_ALLOCA; suffixes = Fcons (build_string (".map"), @@ -504,22 +507,27 @@ load_charset_map_from_file (struct charset *charset, Lisp_Object mapfile, int co memset (entries, 0, sizeof (struct charset_map_entries)); n_entries = 0; - eof = 0; while (1) { - unsigned from, to; - int c; + unsigned from, to, c; int idx; + int eof = 0, overflow = 0; - from = read_hex (fp, &eof); + from = read_hex (fp, &eof, &overflow); if (eof) break; if (getc (fp) == '-') - to = read_hex (fp, &eof); + to = read_hex (fp, &eof, &overflow); else to = from; - c = (int) read_hex (fp, &eof); + if (eof) + break; + c = read_hex (fp, &eof, &overflow); + if (eof) + break; + if (overflow) + continue; if (from < min_code || to > max_code || from > to || c > MAX_CHAR) continue; @@ -529,8 +537,9 @@ load_charset_map_from_file (struct charset *charset, Lisp_Object mapfile, int co sizeof (struct charset_map_entries)); entries = entries->next; memset (entries, 0, sizeof (struct charset_map_entries)); + n_entries = 0; } - idx = n_entries % 0x10000; + idx = n_entries; entries->entry[idx].from = from; entries->entry[idx].to = to; entries->entry[idx].c = c; @@ -571,7 +580,7 @@ load_charset_map_from_vector (struct charset *charset, Lisp_Object vec, int cont { Lisp_Object val, val2; unsigned from, to; - int c; + EMACS_INT c; int idx; val = AREF (vec, i); @@ -579,16 +588,11 @@ load_charset_map_from_vector (struct charset *charset, Lisp_Object vec, int cont { val2 = XCDR (val); val = XCAR (val); - CHECK_NATNUM (val); - CHECK_NATNUM (val2); from = XFASTINT (val); to = XFASTINT (val2); } else - { - CHECK_NATNUM (val); - from = to = XFASTINT (val); - } + from = to = XFASTINT (val); val = AREF (vec, i + 1); CHECK_NATNUM (val); c = XFASTINT (val); @@ -816,7 +820,6 @@ range of code points (in CHARSET) of target characters. */) from = CHARSET_MIN_CODE (cs); else { - CHECK_NATNUM (from_code); from = XINT (from_code); if (from < CHARSET_MIN_CODE (cs)) from = CHARSET_MIN_CODE (cs); @@ -825,7 +828,6 @@ range of code points (in CHARSET) of target characters. */) to = CHARSET_MAX_CODE (cs); else { - CHECK_NATNUM (to_code); to = XINT (to_code); if (to > CHARSET_MAX_CODE (cs)) to = CHARSET_MAX_CODE (cs); @@ -844,7 +846,7 @@ DEFUN ("define-charset-internal", Fdefine_charset_internal, Sdefine_charset_internal, charset_arg_max, MANY, 0, doc: /* For internal use only. usage: (define-charset-internal ...) */) - (size_t nargs, Lisp_Object *args) + (ptrdiff_t nargs, Lisp_Object *args) { /* Charset attr vector. */ Lisp_Object attrs; @@ -871,12 +873,15 @@ usage: (define-charset-internal ...) */) val = args[charset_arg_code_space]; for (i = 0, dimension = 0, nchars = 1; ; i++) { + Lisp_Object min_byte_obj, max_byte_obj; int min_byte, max_byte; - min_byte = XINT (Faref (val, make_number (i * 2))); - max_byte = XINT (Faref (val, make_number (i * 2 + 1))); - if (min_byte < 0 || min_byte > max_byte || max_byte >= 256) - error ("Invalid :code-space value"); + min_byte_obj = Faref (val, make_number (i * 2)); + max_byte_obj = Faref (val, make_number (i * 2 + 1)); + CHECK_RANGED_INTEGER (0, min_byte_obj, 255); + min_byte = XINT (min_byte_obj); + CHECK_RANGED_INTEGER (min_byte, max_byte_obj, 255); + max_byte = XINT (max_byte_obj); charset.code_space[i * 4] = min_byte; charset.code_space[i * 4 + 1] = max_byte; charset.code_space[i * 4 + 2] = max_byte - min_byte + 1; @@ -893,10 +898,8 @@ usage: (define-charset-internal ...) */) charset.dimension = dimension; else { - CHECK_NATNUM (val); + CHECK_RANGED_INTEGER (1, val, 4); charset.dimension = XINT (val); - if (charset.dimension < 1 || charset.dimension > 4) - args_out_of_range_3 (val, make_number (1), make_number (4)); } charset.code_linear_p @@ -922,31 +925,22 @@ usage: (define-charset-internal ...) */) charset.min_code = (charset.code_space[0] | (charset.code_space[4] << 8) | (charset.code_space[8] << 16) - | (charset.code_space[12] << 24)); + | ((unsigned) charset.code_space[12] << 24)); charset.max_code = (charset.code_space[1] | (charset.code_space[5] << 8) | (charset.code_space[9] << 16) - | (charset.code_space[13] << 24)); + | ((unsigned) charset.code_space[13] << 24)); charset.char_index_offset = 0; val = args[charset_arg_min_code]; if (! NILP (val)) { - unsigned code; + unsigned code = cons_to_unsigned (val, UINT_MAX); - if (INTEGERP (val)) - code = XINT (val); - else - { - CHECK_CONS (val); - CHECK_NUMBER_CAR (val); - CHECK_NUMBER_CDR (val); - code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val))); - } if (code < charset.min_code || code > charset.max_code) - args_out_of_range_3 (make_number (charset.min_code), - make_number (charset.max_code), val); + args_out_of_range_3 (make_fixnum_or_float (charset.min_code), + make_fixnum_or_float (charset.max_code), val); charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code); charset.min_code = code; } @@ -954,21 +948,12 @@ usage: (define-charset-internal ...) */) val = args[charset_arg_max_code]; if (! NILP (val)) { - unsigned code; + unsigned code = cons_to_unsigned (val, UINT_MAX); - if (INTEGERP (val)) - code = XINT (val); - else - { - CHECK_CONS (val); - CHECK_NUMBER_CAR (val); - CHECK_NUMBER_CDR (val); - code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val))); - } if (code < charset.min_code || code > charset.max_code) - args_out_of_range_3 (make_number (charset.min_code), - make_number (charset.max_code), val); + args_out_of_range_3 (make_fixnum_or_float (charset.min_code), + make_fixnum_or_float (charset.max_code), val); charset.max_code = code; } @@ -981,18 +966,14 @@ usage: (define-charset-internal ...) */) charset.invalid_code = 0; else { - XSETINT (val, charset.max_code + 1); - if (XINT (val) == charset.max_code + 1) + if (charset.max_code < UINT_MAX) charset.invalid_code = charset.max_code + 1; else error ("Attribute :invalid-code must be specified"); } } else - { - CHECK_NATNUM (val); - charset.invalid_code = XFASTINT (val); - } + charset.invalid_code = cons_to_unsigned (val, UINT_MAX); val = args[charset_arg_iso_final]; if (NILP (val)) @@ -1010,9 +991,7 @@ usage: (define-charset-internal ...) */) charset.iso_revision = -1; else { - CHECK_NUMBER (val); - if (XINT (val) > 63) - args_out_of_range (make_number (63), val); + CHECK_RANGED_INTEGER (-1, val, 63); charset.iso_revision = XINT (val); } @@ -1038,17 +1017,17 @@ usage: (define-charset-internal ...) */) if (! NILP (args[charset_arg_code_offset])) { val = args[charset_arg_code_offset]; - CHECK_NUMBER (val); + CHECK_CHARACTER (val); charset.method = CHARSET_METHOD_OFFSET; charset.code_offset = XINT (val); - i = CODE_POINT_TO_INDEX (&charset, charset.min_code); - charset.min_char = i + charset.code_offset; i = CODE_POINT_TO_INDEX (&charset, charset.max_code); - charset.max_char = i + charset.code_offset; - if (charset.max_char > MAX_CHAR) + if (MAX_CHAR - charset.code_offset < i) error ("Unsupported max char: %d", charset.max_char); + charset.max_char = i + charset.code_offset; + i = CODE_POINT_TO_INDEX (&charset, charset.min_code); + charset.min_char = i + charset.code_offset; i = (charset.min_char >> 7) << 7; for (; i < 0x10000 && i <= charset.max_char; i += 128) @@ -1119,7 +1098,7 @@ usage: (define-charset-internal ...) */) car_part = XCAR (elt); cdr_part = XCDR (elt); CHECK_CHARSET_GET_ID (car_part, this_id); - CHECK_NUMBER (cdr_part); + CHECK_TYPE_RANGED_INTEGER (int, cdr_part); offset = XINT (cdr_part); } else @@ -1163,13 +1142,25 @@ usage: (define-charset-internal ...) */) hash_code); if (charset_table_used == charset_table_size) { - struct charset *new_table - = (struct charset *) xmalloc (sizeof (struct charset) - * (charset_table_size + 16)); - memcpy (new_table, charset_table, - sizeof (struct charset) * charset_table_size); - charset_table_size += 16; + /* Ensure that charset IDs fit into 'int' as well as into the + restriction imposed by fixnums. Although the 'int' restriction + could be removed, too much other code would need altering; for + example, the IDs are stuffed into struct + coding_system.charbuf[i] entries, which are 'int'. */ + int old_size = charset_table_size; + struct charset *new_table = + xpalloc (0, &charset_table_size, 1, + min (INT_MAX, MOST_POSITIVE_FIXNUM), + sizeof *charset_table); + memcpy (new_table, charset_table, old_size * sizeof *new_table); charset_table = new_table; + /* FIXME: This leaks memory, as the old charset_table becomes + unreachable. If the old charset table is charset_table_init + then this leak is intentional; otherwise, it's unclear. + If the latter memory leak is intentional, a + comment should be added to explain this. If not, the old + charset_table should be freed, by passing it as the 1st argument + to xpalloc and removing the memcpy. */ } id = charset_table_used++; new_definition_p = 1; @@ -1396,8 +1387,8 @@ Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */) } else if (CHAR_TABLE_P (Vchar_unify_table)) { - int min_code = CHARSET_MIN_CODE (cs); - int max_code = CHARSET_MAX_CODE (cs); + unsigned min_code = CHARSET_MIN_CODE (cs); + unsigned max_code = CHARSET_MAX_CODE (cs); int min_char = DECODE_CHAR (cs, min_code); int max_char = DECODE_CHAR (cs, max_code); @@ -1512,7 +1503,7 @@ string_xstring_p (Lisp_Object string) It may lookup a translation table TABLE if supplied. */ static void -find_charsets_in_text (const unsigned char *ptr, EMACS_INT nchars, EMACS_INT nbytes, Lisp_Object charsets, Lisp_Object table, int multibyte) +find_charsets_in_text (const unsigned char *ptr, ptrdiff_t nchars, ptrdiff_t nbytes, Lisp_Object charsets, Lisp_Object table, int multibyte) { const unsigned char *pend = ptr + nbytes; @@ -1559,7 +1550,7 @@ only `ascii', `eight-bit-control', and `eight-bit-graphic'. */) (Lisp_Object beg, Lisp_Object end, Lisp_Object table) { Lisp_Object charsets; - EMACS_INT from, from_byte, to, stop, stop_byte; + ptrdiff_t from, from_byte, to, stop, stop_byte; int i; Lisp_Object val; int multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters)); @@ -1841,7 +1832,7 @@ encode_char (struct charset *charset, int c) } else /* method == CHARSET_METHOD_OFFSET */ { - int code_index = c - CHARSET_CODE_OFFSET (charset); + unsigned code_index = c - CHARSET_CODE_OFFSET (charset); code = INDEX_TO_CODE_POINT (charset, code_index); } @@ -1865,17 +1856,7 @@ and CODE-POINT to a character. Currently not supported and just ignored. */) struct charset *charsetp; CHECK_CHARSET_GET_ID (charset, id); - if (CONSP (code_point)) - { - CHECK_NATNUM_CAR (code_point); - CHECK_NATNUM_CDR (code_point); - code = (XINT (XCAR (code_point)) << 16) | (XINT (XCDR (code_point))); - } - else - { - CHECK_NATNUM (code_point); - code = XINT (code_point); - } + code = cons_to_unsigned (code_point, UINT_MAX); charsetp = CHARSET_FROM_ID (id); c = DECODE_CHAR (charsetp, code); return (c >= 0 ? make_number (c) : Qnil); @@ -1890,19 +1871,18 @@ Optional argument RESTRICTION specifies a way to map CH to a code-point in CCS. Currently not supported and just ignored. */) (Lisp_Object ch, Lisp_Object charset, Lisp_Object restriction) { - int id; + int c, id; unsigned code; struct charset *charsetp; CHECK_CHARSET_GET_ID (charset, id); - CHECK_NATNUM (ch); + CHECK_CHARACTER (ch); + c = XFASTINT (ch); charsetp = CHARSET_FROM_ID (id); - code = ENCODE_CHAR (charsetp, XINT (ch)); + code = ENCODE_CHAR (charsetp, c); if (code == CHARSET_INVALID_CODE (charsetp)) return Qnil; - if (code > 0x7FFFFFF) - return Fcons (make_number (code >> 16), make_number (code & 0xFFFF)); - return make_number (code); + return INTEGER_TO_CONS (code); } @@ -2174,11 +2154,11 @@ DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority, 1, MANY, 0, doc: /* Assign higher priority to the charsets given as arguments. usage: (set-charset-priority &rest charsets) */) - (size_t nargs, Lisp_Object *args) + (ptrdiff_t nargs, Lisp_Object *args) { Lisp_Object new_head, old_list, arglist[2]; Lisp_Object list_2022, list_emacs_mule; - size_t i; + ptrdiff_t i; int id; old_list = Fcopy_sequence (Vcharset_ordered_list); @@ -2239,14 +2219,16 @@ struct charset_sort_data { Lisp_Object charset; int id; - int priority; + ptrdiff_t priority; }; static int charset_compare (const void *d1, const void *d2) { const struct charset_sort_data *data1 = d1, *data2 = d2; - return (data1->priority - data2->priority); + if (data1->priority != data2->priority) + return data1->priority < data2->priority ? -1 : 1; + return 0; } DEFUN ("sort-charsets", Fsort_charsets, Ssort_charsets, 1, 1, 0, @@ -2256,7 +2238,8 @@ See also `charset-priority-list' and `set-charset-priority'. */) (Lisp_Object charsets) { Lisp_Object len = Flength (charsets); - int n = XFASTINT (len), i, j, done; + ptrdiff_t n = XFASTINT (len), i, j; + int done; Lisp_Object tail, elt, attrs; struct charset_sort_data *sort_data; int id, min_id = INT_MAX, max_id = INT_MIN; @@ -2264,7 +2247,7 @@ See also `charset-priority-list' and `set-charset-priority'. */) if (n == 0) return Qnil; - SAFE_ALLOCA (sort_data, struct charset_sort_data *, sizeof (*sort_data) * n); + SAFE_NALLOCA (sort_data, 1, n); for (tail = charsets, i = 0; CONSP (tail); tail = XCDR (tail), i++) { elt = XCAR (tail); @@ -2336,6 +2319,18 @@ init_charset_once (void) #ifdef emacs +/* Allocate an initial charset table that is large enough to handle + Emacs while it is bootstrapping. As of September 2011, the size + needs to be at least 166; make it a bit bigger to allow for future + expansion. + + Don't make the value so small that the table is reallocated during + bootstrapping, as glibc malloc calls larger than just under 64 KiB + during an initial bootstrap wreak havoc after dumping; see the + M_MMAP_THRESHOLD value in alloc.c, plus there is a extra overhead + internal to glibc malloc and perhaps to Emacs malloc debugging. */ +static struct charset charset_table_init[180]; + void syms_of_charset (void) { @@ -2371,9 +2366,8 @@ syms_of_charset (void) Vcharset_hash_table = Fmake_hash_table (2, args); } - charset_table_size = 128; - charset_table = ((struct charset *) - xmalloc (sizeof (struct charset) * charset_table_size)); + charset_table = charset_table_init; + charset_table_size = sizeof charset_table_init / sizeof *charset_table_init; charset_table_used = 0; defsubr (&Scharsetp);