X-Git-Url: http://git.hcoop.net/bpt/emacs.git/blobdiff_plain/84eb0351d8be4811897c8cf62a69757ff5d14001..1fa2385302f5b2e5bb17c8aad3cb4e320512ca46:/src/charset.c diff --git a/src/charset.c b/src/charset.c index 229f2c2cda..6aa6fe46b5 100644 --- a/src/charset.c +++ b/src/charset.c @@ -29,6 +29,7 @@ along with GNU Emacs. If not, see . */ #include #include #include +#include #include #include #include "lisp.h" @@ -60,24 +61,24 @@ Lisp_Object Vcharset_hash_table; /* Table of struct charset. */ struct charset *charset_table; -static int charset_table_size; +static ptrdiff_t charset_table_size; static int charset_table_used; Lisp_Object Qcharsetp; /* Special charset symbols. */ Lisp_Object Qascii; -Lisp_Object Qeight_bit; -Lisp_Object Qiso_8859_1; -Lisp_Object Qunicode; -Lisp_Object Qemacs; +static Lisp_Object Qeight_bit; +static Lisp_Object Qiso_8859_1; +static Lisp_Object Qunicode; +static Lisp_Object Qemacs; /* The corresponding charsets. */ int charset_ascii; int charset_eight_bit; -int charset_iso_8859_1; +static int charset_iso_8859_1; int charset_unicode; -int charset_emacs; +static int charset_emacs; /* The other special charsets. */ int charset_jisx0201_roman; @@ -86,7 +87,7 @@ int charset_jisx0208; int charset_ksc5601; /* Value of charset attribute `charset-iso-plane'. */ -Lisp_Object Qgl, Qgr; +static Lisp_Object Qgl, Qgr; /* Charset of unibyte characters. */ int charset_unibyte; @@ -117,24 +118,25 @@ int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL]; #define CODE_POINT_TO_INDEX(charset, code) \ ((charset)->code_linear_p \ - ? (code) - (charset)->min_code \ + ? (int) ((code) - (charset)->min_code) \ : (((charset)->code_space_mask[(code) >> 24] & 0x8) \ && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \ && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \ && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \ - ? (((((code) >> 24) - (charset)->code_space[12]) \ - * (charset)->code_space[11]) \ - + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \ - * (charset)->code_space[7]) \ - + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \ - * (charset)->code_space[3]) \ - + (((code) & 0xFF) - (charset)->code_space[0]) \ - - ((charset)->char_index_offset)) \ + ? (int) (((((code) >> 24) - (charset)->code_space[12]) \ + * (charset)->code_space[11]) \ + + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \ + * (charset)->code_space[7]) \ + + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \ + * (charset)->code_space[3]) \ + + (((code) & 0xFF) - (charset)->code_space[0]) \ + - ((charset)->char_index_offset)) \ : -1) -/* Convert the character index IDX to code-point CODE for CHARSET. - It is assumed that IDX is in a valid range. */ +/* Return the code-point for the character index IDX in CHARSET. + IDX should be an unsigned int variable in a valid range (which is + always in nonnegative int range too). IDX contains garbage afterwards. */ #define INDEX_TO_CODE_POINT(charset, idx) \ ((charset)->code_linear_p \ @@ -250,7 +252,7 @@ struct charset_map_entries static void load_charset_map (struct charset *charset, struct charset_map_entries *entries, int n_entries, int control_flag) { - Lisp_Object vec, table; + Lisp_Object vec IF_LINT (= Qnil), table IF_LINT (= Qnil); unsigned max_code = CHARSET_MAX_CODE (charset); int ascii_compatible_p = charset->ascii_compatible_p; int min_char, max_char, nonascii_min_char; @@ -316,7 +318,7 @@ load_charset_map (struct charset *charset, struct charset_map_entries *entries, for (i = 0; i < n_entries; i++) { unsigned from, to; - int from_index, to_index; + int from_index, to_index, lim_index; int from_c, to_c; int idx = i % 0x10000; @@ -338,6 +340,7 @@ load_charset_map (struct charset *charset, struct charset_map_entries *entries, } if (from_index < 0 || to_index < 0) continue; + lim_index = to_index + 1; if (to_c > max_char) max_char = to_c; @@ -347,10 +350,10 @@ load_charset_map (struct charset *charset, struct charset_map_entries *entries, if (control_flag == 1) { if (charset->method == CHARSET_METHOD_MAP) - for (; from_index <= to_index; from_index++, from_c++) + for (; from_index < lim_index; from_index++, from_c++) ASET (vec, from_index, make_number (from_c)); else - for (; from_index <= to_index; from_index++, from_c++) + for (; from_index < lim_index; from_index++, from_c++) CHAR_TABLE_SET (Vchar_unify_table, CHARSET_CODE_OFFSET (charset) + from_index, make_number (from_c)); @@ -359,25 +362,26 @@ load_charset_map (struct charset *charset, struct charset_map_entries *entries, { if (charset->method == CHARSET_METHOD_MAP && CHARSET_COMPACT_CODES_P (charset)) - for (; from_index <= to_index; from_index++, from_c++) + for (; from_index < lim_index; from_index++, from_c++) { - unsigned code = INDEX_TO_CODE_POINT (charset, from_index); + unsigned code = from_index; + code = INDEX_TO_CODE_POINT (charset, code); if (NILP (CHAR_TABLE_REF (table, from_c))) CHAR_TABLE_SET (table, from_c, make_number (code)); } else - for (; from_index <= to_index; from_index++, from_c++) + for (; from_index < lim_index; from_index++, from_c++) { if (NILP (CHAR_TABLE_REF (table, from_c))) CHAR_TABLE_SET (table, from_c, make_number (from_index)); } } else if (control_flag == 3) - for (; from_index <= to_index; from_index++, from_c++) + for (; from_index < lim_index; from_index++, from_c++) SET_TEMP_CHARSET_WORK_DECODER (from_c, from_index); else if (control_flag == 4) - for (; from_index <= to_index; from_index++, from_c++) + for (; from_index < lim_index; from_index++, from_c++) SET_TEMP_CHARSET_WORK_ENCODER (from_c, from_index); else /* control_flag == 0 */ { @@ -416,8 +420,8 @@ load_charset_map (struct charset *charset, struct charset_map_entries *entries, /* Read a hexadecimal number (preceded by "0x") from the file FP while paying attention to comment character '#'. */ -static INLINE unsigned -read_hex (FILE *fp, int *eof) +static inline unsigned +read_hex (FILE *fp, int *eof, int *overflow) { int c; unsigned n; @@ -439,15 +443,16 @@ read_hex (FILE *fp, int *eof) *eof = 1; return 0; } - *eof = 0; n = 0; - if (c == 'x') - while ((c = getc (fp)) != EOF && isxdigit (c)) + while (isxdigit (c = getc (fp))) + { + if (UINT_MAX >> 4 < n) + *overflow = 1; n = ((n << 4) - | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10)); - else - while ((c = getc (fp)) != EOF && isdigit (c)) - n = (n * 10) + c - '0'; + | (c - ('0' <= c && c <= '9' ? '0' + : 'A' <= c && c <= 'F' ? 'A' - 10 + : 'a' - 10))); + } if (c != EOF) ungetc (c, fp); return n; @@ -477,10 +482,10 @@ load_charset_map_from_file (struct charset *charset, Lisp_Object mapfile, int co unsigned max_code = CHARSET_MAX_CODE (charset); int fd; FILE *fp; - int eof; Lisp_Object suffixes; struct charset_map_entries *head, *entries; - int n_entries, count; + int n_entries; + ptrdiff_t count; USE_SAFE_ALLOCA; suffixes = Fcons (build_string (".map"), @@ -492,7 +497,7 @@ load_charset_map_from_file (struct charset *charset, Lisp_Object mapfile, int co unbind_to (count, Qnil); if (fd < 0 || ! (fp = fdopen (fd, "r"))) - error ("Failure in loading charset map: %S", SDATA (mapfile)); + error ("Failure in loading charset map: %s", SDATA (mapfile)); /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is large (larger than MAX_ALLOCA). */ @@ -502,22 +507,27 @@ load_charset_map_from_file (struct charset *charset, Lisp_Object mapfile, int co memset (entries, 0, sizeof (struct charset_map_entries)); n_entries = 0; - eof = 0; while (1) { - unsigned from, to; - int c; + unsigned from, to, c; int idx; + int eof = 0, overflow = 0; - from = read_hex (fp, &eof); + from = read_hex (fp, &eof, &overflow); if (eof) break; if (getc (fp) == '-') - to = read_hex (fp, &eof); + to = read_hex (fp, &eof, &overflow); else to = from; - c = (int) read_hex (fp, &eof); + if (eof) + break; + c = read_hex (fp, &eof, &overflow); + if (eof) + break; + if (overflow) + continue; if (from < min_code || to > max_code || from > to || c > MAX_CHAR) continue; @@ -527,8 +537,9 @@ load_charset_map_from_file (struct charset *charset, Lisp_Object mapfile, int co sizeof (struct charset_map_entries)); entries = entries->next; memset (entries, 0, sizeof (struct charset_map_entries)); + n_entries = 0; } - idx = n_entries % 0x10000; + idx = n_entries; entries->entry[idx].from = from; entries->entry[idx].to = to; entries->entry[idx].c = c; @@ -569,7 +580,7 @@ load_charset_map_from_vector (struct charset *charset, Lisp_Object vec, int cont { Lisp_Object val, val2; unsigned from, to; - int c; + EMACS_INT c; int idx; val = AREF (vec, i); @@ -577,16 +588,11 @@ load_charset_map_from_vector (struct charset *charset, Lisp_Object vec, int cont { val2 = XCDR (val); val = XCAR (val); - CHECK_NATNUM (val); - CHECK_NATNUM (val2); from = XFASTINT (val); to = XFASTINT (val2); } else - { - CHECK_NATNUM (val); - from = to = XFASTINT (val); - } + from = to = XFASTINT (val); val = AREF (vec, i + 1); CHECK_NATNUM (val); c = XFASTINT (val); @@ -629,8 +635,12 @@ load_charset (struct charset *charset, int control_flag) if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP) map = CHARSET_MAP (charset); - else if (CHARSET_UNIFIED_P (charset)) - map = CHARSET_UNIFY_MAP (charset); + else + { + if (! CHARSET_UNIFIED_P (charset)) + abort (); + map = CHARSET_UNIFY_MAP (charset); + } if (STRINGP (map)) load_charset_map_from_file (charset, map, control_flag); else @@ -646,12 +656,10 @@ DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0, } -void map_charset_for_dump (void (*c_function) (Lisp_Object, Lisp_Object), - Lisp_Object function, Lisp_Object arg, - unsigned from, unsigned to); - -void -map_charset_for_dump (void (*c_function) (Lisp_Object, Lisp_Object), Lisp_Object function, Lisp_Object arg, unsigned int from, unsigned int to) +static void +map_charset_for_dump (void (*c_function) (Lisp_Object, Lisp_Object), + Lisp_Object function, Lisp_Object arg, + unsigned int from, unsigned int to) { int from_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, from); int to_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, to); @@ -668,9 +676,9 @@ map_charset_for_dump (void (*c_function) (Lisp_Object, Lisp_Object), Lisp_Object while (1) { - int index = GET_TEMP_CHARSET_WORK_ENCODER (c); + int idx = GET_TEMP_CHARSET_WORK_ENCODER (c); - if (index >= from_idx && index <= to_idx) + if (idx >= from_idx && idx <= to_idx) { if (NILP (XCAR (range))) XSETCAR (range, make_number (c)); @@ -812,7 +820,6 @@ range of code points (in CHARSET) of target characters. */) from = CHARSET_MIN_CODE (cs); else { - CHECK_NATNUM (from_code); from = XINT (from_code); if (from < CHARSET_MIN_CODE (cs)) from = CHARSET_MIN_CODE (cs); @@ -821,7 +828,6 @@ range of code points (in CHARSET) of target characters. */) to = CHARSET_MAX_CODE (cs); else { - CHECK_NATNUM (to_code); to = XINT (to_code); if (to > CHARSET_MAX_CODE (cs)) to = CHARSET_MAX_CODE (cs); @@ -840,12 +846,12 @@ DEFUN ("define-charset-internal", Fdefine_charset_internal, Sdefine_charset_internal, charset_arg_max, MANY, 0, doc: /* For internal use only. usage: (define-charset-internal ...) */) - (int nargs, Lisp_Object *args) + (ptrdiff_t nargs, Lisp_Object *args) { /* Charset attr vector. */ Lisp_Object attrs; Lisp_Object val; - unsigned hash_code; + EMACS_UINT hash_code; struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table); int i, j; struct charset charset; @@ -865,21 +871,26 @@ usage: (define-charset-internal ...) */) ASET (attrs, charset_name, args[charset_arg_name]); val = args[charset_arg_code_space]; - for (i = 0, dimension = 0, nchars = 1; i < 4; i++) + for (i = 0, dimension = 0, nchars = 1; ; i++) { + Lisp_Object min_byte_obj, max_byte_obj; int min_byte, max_byte; - min_byte = XINT (Faref (val, make_number (i * 2))); - max_byte = XINT (Faref (val, make_number (i * 2 + 1))); - if (min_byte < 0 || min_byte > max_byte || max_byte >= 256) - error ("Invalid :code-space value"); + min_byte_obj = Faref (val, make_number (i * 2)); + max_byte_obj = Faref (val, make_number (i * 2 + 1)); + CHECK_RANGED_INTEGER (0, min_byte_obj, 255); + min_byte = XINT (min_byte_obj); + CHECK_RANGED_INTEGER (min_byte, max_byte_obj, 255); + max_byte = XINT (max_byte_obj); charset.code_space[i * 4] = min_byte; charset.code_space[i * 4 + 1] = max_byte; charset.code_space[i * 4 + 2] = max_byte - min_byte + 1; - nchars *= charset.code_space[i * 4 + 2]; - charset.code_space[i * 4 + 3] = nchars; if (max_byte > 0) dimension = i + 1; + if (i == 3) + break; + nchars *= charset.code_space[i * 4 + 2]; + charset.code_space[i * 4 + 3] = nchars; } val = args[charset_arg_dimension]; @@ -887,10 +898,8 @@ usage: (define-charset-internal ...) */) charset.dimension = dimension; else { - CHECK_NATNUM (val); + CHECK_RANGED_INTEGER (1, val, 4); charset.dimension = XINT (val); - if (charset.dimension < 1 || charset.dimension > 4) - args_out_of_range_3 (val, make_number (1), make_number (4)); } charset.code_linear_p @@ -916,31 +925,22 @@ usage: (define-charset-internal ...) */) charset.min_code = (charset.code_space[0] | (charset.code_space[4] << 8) | (charset.code_space[8] << 16) - | (charset.code_space[12] << 24)); + | ((unsigned) charset.code_space[12] << 24)); charset.max_code = (charset.code_space[1] | (charset.code_space[5] << 8) | (charset.code_space[9] << 16) - | (charset.code_space[13] << 24)); + | ((unsigned) charset.code_space[13] << 24)); charset.char_index_offset = 0; val = args[charset_arg_min_code]; if (! NILP (val)) { - unsigned code; + unsigned code = cons_to_unsigned (val, UINT_MAX); - if (INTEGERP (val)) - code = XINT (val); - else - { - CHECK_CONS (val); - CHECK_NUMBER_CAR (val); - CHECK_NUMBER_CDR (val); - code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val))); - } if (code < charset.min_code || code > charset.max_code) - args_out_of_range_3 (make_number (charset.min_code), - make_number (charset.max_code), val); + args_out_of_range_3 (make_fixnum_or_float (charset.min_code), + make_fixnum_or_float (charset.max_code), val); charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code); charset.min_code = code; } @@ -948,21 +948,12 @@ usage: (define-charset-internal ...) */) val = args[charset_arg_max_code]; if (! NILP (val)) { - unsigned code; + unsigned code = cons_to_unsigned (val, UINT_MAX); - if (INTEGERP (val)) - code = XINT (val); - else - { - CHECK_CONS (val); - CHECK_NUMBER_CAR (val); - CHECK_NUMBER_CDR (val); - code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val))); - } if (code < charset.min_code || code > charset.max_code) - args_out_of_range_3 (make_number (charset.min_code), - make_number (charset.max_code), val); + args_out_of_range_3 (make_fixnum_or_float (charset.min_code), + make_fixnum_or_float (charset.max_code), val); charset.max_code = code; } @@ -975,18 +966,14 @@ usage: (define-charset-internal ...) */) charset.invalid_code = 0; else { - XSETINT (val, charset.max_code + 1); - if (XINT (val) == charset.max_code + 1) + if (charset.max_code < UINT_MAX) charset.invalid_code = charset.max_code + 1; else error ("Attribute :invalid-code must be specified"); } } else - { - CHECK_NATNUM (val); - charset.invalid_code = XFASTINT (val); - } + charset.invalid_code = cons_to_unsigned (val, UINT_MAX); val = args[charset_arg_iso_final]; if (NILP (val)) @@ -995,7 +982,7 @@ usage: (define-charset-internal ...) */) { CHECK_NUMBER (val); if (XINT (val) < '0' || XINT (val) > 127) - error ("Invalid iso-final-char: %d", XINT (val)); + error ("Invalid iso-final-char: %"pI"d", XINT (val)); charset.iso_final = XINT (val); } @@ -1004,9 +991,7 @@ usage: (define-charset-internal ...) */) charset.iso_revision = -1; else { - CHECK_NUMBER (val); - if (XINT (val) > 63) - args_out_of_range (make_number (63), val); + CHECK_RANGED_INTEGER (-1, val, 63); charset.iso_revision = XINT (val); } @@ -1017,7 +1002,7 @@ usage: (define-charset-internal ...) */) { CHECK_NATNUM (val); if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256) - error ("Invalid emacs-mule-id: %d", XINT (val)); + error ("Invalid emacs-mule-id: %"pI"d", XINT (val)); charset.emacs_mule_id = XINT (val); } @@ -1032,17 +1017,17 @@ usage: (define-charset-internal ...) */) if (! NILP (args[charset_arg_code_offset])) { val = args[charset_arg_code_offset]; - CHECK_NUMBER (val); + CHECK_CHARACTER (val); charset.method = CHARSET_METHOD_OFFSET; charset.code_offset = XINT (val); - i = CODE_POINT_TO_INDEX (&charset, charset.min_code); - charset.min_char = i + charset.code_offset; i = CODE_POINT_TO_INDEX (&charset, charset.max_code); - charset.max_char = i + charset.code_offset; - if (charset.max_char > MAX_CHAR) + if (MAX_CHAR - charset.code_offset < i) error ("Unsupported max char: %d", charset.max_char); + charset.max_char = i + charset.code_offset; + i = CODE_POINT_TO_INDEX (&charset, charset.min_code); + charset.min_char = i + charset.code_offset; i = (charset.min_char >> 7) << 7; for (; i < 0x10000 && i <= charset.max_char; i += 128) @@ -1113,7 +1098,7 @@ usage: (define-charset-internal ...) */) car_part = XCAR (elt); cdr_part = XCDR (elt); CHECK_CHARSET_GET_ID (car_part, this_id); - CHECK_NUMBER (cdr_part); + CHECK_TYPE_RANGED_INTEGER (int, cdr_part); offset = XINT (cdr_part); } else @@ -1157,13 +1142,25 @@ usage: (define-charset-internal ...) */) hash_code); if (charset_table_used == charset_table_size) { - struct charset *new_table - = (struct charset *) xmalloc (sizeof (struct charset) - * (charset_table_size + 16)); - memcpy (new_table, charset_table, - sizeof (struct charset) * charset_table_size); - charset_table_size += 16; + /* Ensure that charset IDs fit into 'int' as well as into the + restriction imposed by fixnums. Although the 'int' restriction + could be removed, too much other code would need altering; for + example, the IDs are stuffed into struct + coding_system.charbuf[i] entries, which are 'int'. */ + int old_size = charset_table_size; + struct charset *new_table = + xpalloc (0, &charset_table_size, 1, + min (INT_MAX, MOST_POSITIVE_FIXNUM), + sizeof *charset_table); + memcpy (new_table, charset_table, old_size * sizeof *new_table); charset_table = new_table; + /* FIXME: This leaks memory, as the old charset_table becomes + unreachable. If the old charset table is charset_table_init + then this leak is intentional; otherwise, it's unclear. + If the latter memory leak is intentional, a + comment should be added to explain this. If not, the old + charset_table should be freed, by passing it as the 1st argument + to xpalloc and removing the memcpy. */ } id = charset_table_used++; new_definition_p = 1; @@ -1390,8 +1387,8 @@ Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */) } else if (CHAR_TABLE_P (Vchar_unify_table)) { - int min_code = CHARSET_MIN_CODE (cs); - int max_code = CHARSET_MAX_CODE (cs); + unsigned min_code = CHARSET_MIN_CODE (cs); + unsigned max_code = CHARSET_MAX_CODE (cs); int min_char = DECODE_CHAR (cs, min_code); int max_char = DECODE_CHAR (cs, max_code); @@ -1432,14 +1429,16 @@ check_iso_charset_parameter (Lisp_Object dimension, Lisp_Object chars, Lisp_Obje { CHECK_NATNUM (dimension); CHECK_NATNUM (chars); - CHECK_NATNUM (final_char); + CHECK_CHARACTER (final_char); if (XINT (dimension) > 3) - error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension)); + error ("Invalid DIMENSION %"pI"d, it should be 1, 2, or 3", + XINT (dimension)); if (XINT (chars) != 94 && XINT (chars) != 96) - error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars)); + error ("Invalid CHARS %"pI"d, it should be 94 or 96", XINT (chars)); if (XINT (final_char) < '0' || XINT (final_char) > '~') - error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars)); + error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", + (int)XINT (final_char)); } @@ -1504,7 +1503,7 @@ string_xstring_p (Lisp_Object string) It may lookup a translation table TABLE if supplied. */ static void -find_charsets_in_text (const unsigned char *ptr, EMACS_INT nchars, EMACS_INT nbytes, Lisp_Object charsets, Lisp_Object table, int multibyte) +find_charsets_in_text (const unsigned char *ptr, ptrdiff_t nchars, ptrdiff_t nbytes, Lisp_Object charsets, Lisp_Object table, int multibyte) { const unsigned char *pend = ptr + nbytes; @@ -1551,10 +1550,10 @@ only `ascii', `eight-bit-control', and `eight-bit-graphic'. */) (Lisp_Object beg, Lisp_Object end, Lisp_Object table) { Lisp_Object charsets; - EMACS_INT from, from_byte, to, stop, stop_byte; + ptrdiff_t from, from_byte, to, stop, stop_byte; int i; Lisp_Object val; - int multibyte = ! NILP (current_buffer->enable_multibyte_characters); + int multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters)); validate_region (&beg, &end); from = XFASTINT (beg); @@ -1629,7 +1628,7 @@ maybe_unify_char (int c, Lisp_Object val) struct charset *charset; if (INTEGERP (val)) - return XINT (val); + return XFASTINT (val); if (NILP (val)) return c; @@ -1639,7 +1638,7 @@ maybe_unify_char (int c, Lisp_Object val) { val = CHAR_TABLE_REF (Vchar_unify_table, c); if (! NILP (val)) - c = XINT (val); + c = XFASTINT (val); } else { @@ -1833,7 +1832,7 @@ encode_char (struct charset *charset, int c) } else /* method == CHARSET_METHOD_OFFSET */ { - int code_index = c - CHARSET_CODE_OFFSET (charset); + unsigned code_index = c - CHARSET_CODE_OFFSET (charset); code = INDEX_TO_CODE_POINT (charset, code_index); } @@ -1857,17 +1856,7 @@ and CODE-POINT to a character. Currently not supported and just ignored. */) struct charset *charsetp; CHECK_CHARSET_GET_ID (charset, id); - if (CONSP (code_point)) - { - CHECK_NATNUM_CAR (code_point); - CHECK_NATNUM_CDR (code_point); - code = (XINT (XCAR (code_point)) << 16) | (XINT (XCDR (code_point))); - } - else - { - CHECK_NATNUM (code_point); - code = XINT (code_point); - } + code = cons_to_unsigned (code_point, UINT_MAX); charsetp = CHARSET_FROM_ID (id); c = DECODE_CHAR (charsetp, code); return (c >= 0 ? make_number (c) : Qnil); @@ -1882,19 +1871,18 @@ Optional argument RESTRICTION specifies a way to map CH to a code-point in CCS. Currently not supported and just ignored. */) (Lisp_Object ch, Lisp_Object charset, Lisp_Object restriction) { - int id; + int c, id; unsigned code; struct charset *charsetp; CHECK_CHARSET_GET_ID (charset, id); - CHECK_NATNUM (ch); + CHECK_CHARACTER (ch); + c = XFASTINT (ch); charsetp = CHARSET_FROM_ID (id); - code = ENCODE_CHAR (charsetp, XINT (ch)); + code = ENCODE_CHAR (charsetp, c); if (code == CHARSET_INVALID_CODE (charsetp)) return Qnil; - if (code > 0x7FFFFFF) - return Fcons (make_number (code >> 16), make_number (code & 0xFFFF)); - return make_number (code); + return INTEGER_TO_CONS (code); } @@ -2066,10 +2054,10 @@ that case, find the charset from what supported by that coding system. */) for (; CONSP (restriction); restriction = XCDR (restriction)) { - struct charset *charset; + struct charset *rcharset; - CHECK_CHARSET_GET_CHARSET (XCAR (restriction), charset); - if (ENCODE_CHAR (charset, c) != CHARSET_INVALID_CODE (charset)) + CHECK_CHARSET_GET_CHARSET (XCAR (restriction), rcharset); + if (ENCODE_CHAR (rcharset, c) != CHARSET_INVALID_CODE (rcharset)) return XCAR (restriction); } return Qnil; @@ -2133,7 +2121,7 @@ It should be called only from temacs invoked for dumping. */) { if (temp_charset_work) { - free (temp_charset_work); + xfree (temp_charset_work); temp_charset_work = NULL; } @@ -2166,11 +2154,12 @@ DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority, 1, MANY, 0, doc: /* Assign higher priority to the charsets given as arguments. usage: (set-charset-priority &rest charsets) */) - (int nargs, Lisp_Object *args) + (ptrdiff_t nargs, Lisp_Object *args) { Lisp_Object new_head, old_list, arglist[2]; Lisp_Object list_2022, list_emacs_mule; - int i, id; + ptrdiff_t i; + int id; old_list = Fcopy_sequence (Vcharset_ordered_list); new_head = Qnil; @@ -2230,14 +2219,16 @@ struct charset_sort_data { Lisp_Object charset; int id; - int priority; + ptrdiff_t priority; }; static int charset_compare (const void *d1, const void *d2) { const struct charset_sort_data *data1 = d1, *data2 = d2; - return (data1->priority - data2->priority); + if (data1->priority != data2->priority) + return data1->priority < data2->priority ? -1 : 1; + return 0; } DEFUN ("sort-charsets", Fsort_charsets, Ssort_charsets, 1, 1, 0, @@ -2247,26 +2238,25 @@ See also `charset-priority-list' and `set-charset-priority'. */) (Lisp_Object charsets) { Lisp_Object len = Flength (charsets); - int n = XFASTINT (len), i, j, done; + ptrdiff_t n = XFASTINT (len), i, j; + int done; Lisp_Object tail, elt, attrs; struct charset_sort_data *sort_data; - int id, min_id, max_id; + int id, min_id = INT_MAX, max_id = INT_MIN; USE_SAFE_ALLOCA; if (n == 0) return Qnil; - SAFE_ALLOCA (sort_data, struct charset_sort_data *, sizeof (*sort_data) * n); + SAFE_NALLOCA (sort_data, 1, n); for (tail = charsets, i = 0; CONSP (tail); tail = XCDR (tail), i++) { elt = XCAR (tail); CHECK_CHARSET_GET_ATTR (elt, attrs); sort_data[i].charset = elt; sort_data[i].id = id = XINT (CHARSET_ATTR_ID (attrs)); - if (i == 0) - min_id = max_id = id; - else if (id < min_id) + if (id < min_id) min_id = id; - else if (id > max_id) + if (id > max_id) max_id = id; } for (done = 0, tail = Vcharset_ordered_list, i = 0; @@ -2329,6 +2319,18 @@ init_charset_once (void) #ifdef emacs +/* Allocate an initial charset table that is large enough to handle + Emacs while it is bootstrapping. As of September 2011, the size + needs to be at least 166; make it a bit bigger to allow for future + expansion. + + Don't make the value so small that the table is reallocated during + bootstrapping, as glibc malloc calls larger than just under 64 KiB + during an initial bootstrap wreak havoc after dumping; see the + M_MMAP_THRESHOLD value in alloc.c, plus there is a extra overhead + internal to glibc malloc and perhaps to Emacs malloc debugging. */ +static struct charset charset_table_init[180]; + void syms_of_charset (void) { @@ -2364,9 +2366,8 @@ syms_of_charset (void) Vcharset_hash_table = Fmake_hash_table (2, args); } - charset_table_size = 128; - charset_table = ((struct charset *) - xmalloc (sizeof (struct charset) * charset_table_size)); + charset_table = charset_table_init; + charset_table_size = sizeof charset_table_init / sizeof *charset_table_init; charset_table_used = 0; defsubr (&Scharsetp);