/* Basic character set support.
- Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007,
- 2008 Free Software Foundation, Inc.
+ Copyright (C) 2001-2011 Free Software Foundation, Inc.
Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
- 2005, 2006, 2007, 2008
+ 2005, 2006, 2007, 2008, 2009, 2010, 2011
National Institute of Advanced Industrial Science and Technology (AIST)
Registration Number H14PRO021
#include <stdio.h>
#include <unistd.h>
#include <ctype.h>
+#include <limits.h>
#include <sys/types.h>
+#include <setjmp.h>
#include "lisp.h"
#include "character.h"
#include "charset.h"
*/
-/* List of all charsets. This variable is used only from Emacs
- Lisp. */
-Lisp_Object Vcharset_list;
-
/* Hash table that contains attributes of each charset. Keys are
charset symbols, and values are vectors of charset attributes. */
Lisp_Object Vcharset_hash_table;
/* Table of struct charset. */
struct charset *charset_table;
-static int charset_table_size;
+static ptrdiff_t charset_table_size;
static int charset_table_used;
Lisp_Object Qcharsetp;
/* Special charset symbols. */
Lisp_Object Qascii;
-Lisp_Object Qeight_bit;
-Lisp_Object Qiso_8859_1;
-Lisp_Object Qunicode;
-Lisp_Object Qemacs;
+static Lisp_Object Qeight_bit;
+static Lisp_Object Qiso_8859_1;
+static Lisp_Object Qunicode;
+static Lisp_Object Qemacs;
/* The corresponding charsets. */
int charset_ascii;
int charset_eight_bit;
-int charset_iso_8859_1;
+static int charset_iso_8859_1;
int charset_unicode;
-int charset_emacs;
+static int charset_emacs;
/* The other special charsets. */
int charset_jisx0201_roman;
int charset_jisx0208_1978;
int charset_jisx0208;
+int charset_ksc5601;
/* Value of charset attribute `charset-iso-plane'. */
-Lisp_Object Qgl, Qgr;
+static Lisp_Object Qgl, Qgr;
/* Charset of unibyte characters. */
int charset_unibyte;
/* List of emacs-mule charsets. */
Lisp_Object Vemacs_mule_charset_list;
-struct charset *emacs_mule_charset[256];
+int emacs_mule_charset[256];
/* Mapping table from ISO2022's charset (specified by DIMENSION,
CHARS, and FINAL-CHAR) to Emacs' charset. */
int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
-Lisp_Object Vcharset_map_path;
-
-Lisp_Object Vchar_unified_charset_table;
-
-Lisp_Object Vcurrent_iso639_language;
-
-/* Defined in chartab.c */
-extern void
-map_char_table_for_charset P_ ((void (*c_function) (Lisp_Object, Lisp_Object),
- Lisp_Object function, Lisp_Object table,
- Lisp_Object arg, struct charset *charset,
- unsigned from, unsigned to));
-
#define CODE_POINT_TO_INDEX(charset, code) \
((charset)->code_linear_p \
- ? (code) - (charset)->min_code \
+ ? (int) ((code) - (charset)->min_code) \
: (((charset)->code_space_mask[(code) >> 24] & 0x8) \
&& ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
&& ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
&& ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
- ? (((((code) >> 24) - (charset)->code_space[12]) \
- * (charset)->code_space[11]) \
- + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
- * (charset)->code_space[7]) \
- + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
- * (charset)->code_space[3]) \
- + (((code) & 0xFF) - (charset)->code_space[0]) \
- - ((charset)->char_index_offset)) \
+ ? (int) (((((code) >> 24) - (charset)->code_space[12]) \
+ * (charset)->code_space[11]) \
+ + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
+ * (charset)->code_space[7]) \
+ + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
+ * (charset)->code_space[3]) \
+ + (((code) & 0xFF) - (charset)->code_space[0]) \
+ - ((charset)->char_index_offset)) \
: -1)
-/* Convert the character index IDX to code-point CODE for CHARSET.
- It is assumed that IDX is in a valid range. */
+/* Return the code-point for the character index IDX in CHARSET.
+ IDX should be an unsigned int variable in a valid range (which is
+ always in nonnegative int range too). IDX contains garbage afterwards. */
#define INDEX_TO_CODE_POINT(charset, idx) \
((charset)->code_linear_p \
| (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
<< 24))))
+/* Structure to hold mapping tables for a charset. Used by temacs
+ invoked for dumping. */
+static struct
+{
+ /* The current charset for which the following tables are setup. */
+ struct charset *current;
+
+ /* 1 iff the following table is used for encoder. */
+ short for_encoder;
+
+ /* When the following table is used for encoding, mininum and
+ maxinum character of the current charset. */
+ int min_char, max_char;
+
+ /* A Unicode character correspoinding to the code indice 0 (i.e. the
+ minimum code-point) of the current charset, or -1 if the code
+ indice 0 is not a Unicode character. This is checked when
+ table.encoder[CHAR] is zero. */
+ int zero_index_char;
+
+ union {
+ /* Table mapping code-indices (not code-points) of the current
+ charset to Unicode characters. If decoder[CHAR] is -1, CHAR
+ doesn't belong to the current charset. */
+ int decoder[0x10000];
+ /* Table mapping Unicode characters to code-indices of the current
+ charset. The first 0x10000 elements are for BMP (0..0xFFFF),
+ and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
+ (0x20000..0x2FFFF). Note that there is no charset map that
+ uses both SMP and SIP. */
+ unsigned short encoder[0x20000];
+ } table;
+} *temp_charset_work;
+
+#define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
+ do { \
+ if ((CODE) == 0) \
+ temp_charset_work->zero_index_char = (C); \
+ else if ((C) < 0x20000) \
+ temp_charset_work->table.encoder[(C)] = (CODE); \
+ else \
+ temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
+ } while (0)
+
+#define GET_TEMP_CHARSET_WORK_ENCODER(C) \
+ ((C) == temp_charset_work->zero_index_char ? 0 \
+ : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
+ ? (int) temp_charset_work->table.encoder[(C)] : -1) \
+ : temp_charset_work->table.encoder[(C) - 0x10000] \
+ ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
+
+#define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
+ (temp_charset_work->table.decoder[(CODE)] = (C))
+
+#define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
+ (temp_charset_work->table.decoder[(CODE)])
\f
/* Set to 1 to warn that a charset map is loaded and thus a buffer
struct charset_map_entries *next;
};
-/* Load the mapping information for CHARSET from ENTRIES.
+/* Load the mapping information of CHARSET from ENTRIES for
+ initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
+ encoding (CONTROL_FLAG == 2).
+
+ If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
+ and CHARSET->fast_map.
- If CONTROL_FLAG is 0, setup CHARSET->min_char and CHARSET->max_char.
+ If CONTROL_FLAG is 1, setup the following tables according to
+ CHARSET->method and inhibit_load_charset_map.
- If CONTROL_FLAG is 1, setup CHARSET->min_char, CHARSET->max_char,
- CHARSET->decoder, and CHARSET->encoder.
+ CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
+ ----------------------+--------------------+---------------------------
+ CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
+ ----------------------+--------------------+---------------------------
+ CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
- If CONTROL_FLAG is 2, setup CHARSET->deunifier and
- Vchar_unify_table. If Vchar_unified_charset_table is non-nil,
- setup it too. */
+ If CONTROL_FLAG is 2, setup the following tables.
+
+ CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
+ ----------------------+--------------------+---------------------------
+ CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
+ ----------------------+--------------------+--------------------------
+ CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
+*/
static void
-load_charset_map (charset, entries, n_entries, control_flag)
- struct charset *charset;
- struct charset_map_entries *entries;
- int n_entries;
- int control_flag;
+load_charset_map (struct charset *charset, struct charset_map_entries *entries, int n_entries, int control_flag)
{
- Lisp_Object vec, table;
+ Lisp_Object vec IF_LINT (= Qnil), table IF_LINT (= Qnil);
unsigned max_code = CHARSET_MAX_CODE (charset);
int ascii_compatible_p = charset->ascii_compatible_p;
int min_char, max_char, nonascii_min_char;
if (n_entries <= 0)
return;
- if (control_flag > 0)
+ if (control_flag)
{
- int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
-
- table = Fmake_char_table (Qnil, Qnil);
- if (control_flag == 1)
- vec = Fmake_vector (make_number (n), make_number (-1));
- else if (! CHAR_TABLE_P (Vchar_unify_table))
- Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
+ if (! inhibit_load_charset_map)
+ {
+ if (control_flag == 1)
+ {
+ if (charset->method == CHARSET_METHOD_MAP)
+ {
+ int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
+ vec = CHARSET_DECODER (charset)
+ = Fmake_vector (make_number (n), make_number (-1));
+ }
+ else
+ {
+ char_table_set_range (Vchar_unify_table,
+ charset->min_char, charset->max_char,
+ Qnil);
+ }
+ }
+ else
+ {
+ table = Fmake_char_table (Qnil, Qnil);
+ if (charset->method == CHARSET_METHOD_MAP)
+ CHARSET_ENCODER (charset) = table;
+ else
+ CHARSET_DEUNIFIER (charset) = table;
+ }
+ }
+ else
+ {
+ if (! temp_charset_work)
+ temp_charset_work = xmalloc (sizeof (*temp_charset_work));
+ if (control_flag == 1)
+ {
+ memset (temp_charset_work->table.decoder, -1,
+ sizeof (int) * 0x10000);
+ }
+ else
+ {
+ memset (temp_charset_work->table.encoder, 0,
+ sizeof (unsigned short) * 0x20000);
+ temp_charset_work->zero_index_char = -1;
+ }
+ temp_charset_work->current = charset;
+ temp_charset_work->for_encoder = (control_flag == 2);
+ control_flag += 2;
+ }
charset_map_loaded = 1;
}
for (i = 0; i < n_entries; i++)
{
unsigned from, to;
- int from_index, to_index;
+ int from_index, to_index, lim_index;
int from_c, to_c;
int idx = i % 0x10000;
}
if (from_index < 0 || to_index < 0)
continue;
+ lim_index = to_index + 1;
- if (control_flag < 2)
- {
- int c;
+ if (to_c > max_char)
+ max_char = to_c;
+ else if (from_c < min_char)
+ min_char = from_c;
- if (to_c > max_char)
- max_char = to_c;
- else if (from_c < min_char)
- min_char = from_c;
+ if (control_flag == 1)
+ {
+ if (charset->method == CHARSET_METHOD_MAP)
+ for (; from_index < lim_index; from_index++, from_c++)
+ ASET (vec, from_index, make_number (from_c));
+ else
+ for (; from_index < lim_index; from_index++, from_c++)
+ CHAR_TABLE_SET (Vchar_unify_table,
+ CHARSET_CODE_OFFSET (charset) + from_index,
+ make_number (from_c));
+ }
+ else if (control_flag == 2)
+ {
+ if (charset->method == CHARSET_METHOD_MAP
+ && CHARSET_COMPACT_CODES_P (charset))
+ for (; from_index < lim_index; from_index++, from_c++)
+ {
+ unsigned code = from_index;
+ code = INDEX_TO_CODE_POINT (charset, code);
+
+ if (NILP (CHAR_TABLE_REF (table, from_c)))
+ CHAR_TABLE_SET (table, from_c, make_number (code));
+ }
+ else
+ for (; from_index < lim_index; from_index++, from_c++)
+ {
+ if (NILP (CHAR_TABLE_REF (table, from_c)))
+ CHAR_TABLE_SET (table, from_c, make_number (from_index));
+ }
+ }
+ else if (control_flag == 3)
+ for (; from_index < lim_index; from_index++, from_c++)
+ SET_TEMP_CHARSET_WORK_DECODER (from_c, from_index);
+ else if (control_flag == 4)
+ for (; from_index < lim_index; from_index++, from_c++)
+ SET_TEMP_CHARSET_WORK_ENCODER (from_c, from_index);
+ else /* control_flag == 0 */
+ {
if (ascii_compatible_p)
{
if (! ASCII_BYTE_P (from_c))
}
}
- for (c = from_c; c <= to_c; c++)
- CHARSET_FAST_MAP_SET (c, fast_map);
-
- if (control_flag == 1)
- {
- unsigned code = from;
-
- if (CHARSET_COMPACT_CODES_P (charset))
- while (1)
- {
- ASET (vec, from_index, make_number (from_c));
- if (NILP (CHAR_TABLE_REF (table, from_c)))
- CHAR_TABLE_SET (table, from_c, make_number (code));
- if (from_index == to_index)
- break;
- from_index++, from_c++;
- code = INDEX_TO_CODE_POINT (charset, from_index);
- }
- else
- for (; from_index <= to_index; from_index++, from_c++)
- {
- ASET (vec, from_index, make_number (from_c));
- if (NILP (CHAR_TABLE_REF (table, from_c)))
- CHAR_TABLE_SET (table, from_c, make_number (from_index));
- }
- }
- }
- else
- {
- unsigned code = from;
-
- while (1)
- {
- int c1 = DECODE_CHAR (charset, code);
-
- if (c1 >= 0)
- {
- CHAR_TABLE_SET (table, from_c, make_number (c1));
- CHAR_TABLE_SET (Vchar_unify_table, c1, make_number (from_c));
- if (CHAR_TABLE_P (Vchar_unified_charset_table))
- CHAR_TABLE_SET (Vchar_unified_charset_table, c1,
- CHARSET_NAME (charset));
- }
- if (from_index == to_index)
- break;
- from_index++, from_c++;
- code = INDEX_TO_CODE_POINT (charset, from_index);
- }
+ for (; from_c <= to_c; from_c++)
+ CHARSET_FAST_MAP_SET (from_c, fast_map);
}
}
- if (control_flag < 2)
+ if (control_flag == 0)
{
CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
? nonascii_min_char : min_char);
CHARSET_MAX_CHAR (charset) = max_char;
- if (control_flag == 1)
- {
- CHARSET_DECODER (charset) = vec;
- CHARSET_ENCODER (charset) = table;
- }
}
- else
- CHARSET_DEUNIFIER (charset) = table;
+ else if (control_flag == 4)
+ {
+ temp_charset_work->min_char = min_char;
+ temp_charset_work->max_char = max_char;
+ }
}
/* Read a hexadecimal number (preceded by "0x") from the file FP while
- paying attention to comment charcter '#'. */
+ paying attention to comment character '#'. */
-static INLINE unsigned
-read_hex (fp, eof)
- FILE *fp;
- int *eof;
+static inline unsigned
+read_hex (FILE *fp, int *eof, int *overflow)
{
int c;
unsigned n;
*eof = 1;
return 0;
}
- *eof = 0;
n = 0;
- if (c == 'x')
- while ((c = getc (fp)) != EOF && isxdigit (c))
+ while (isxdigit (c = getc (fp)))
+ {
+ if (UINT_MAX >> 4 < n)
+ *overflow = 1;
n = ((n << 4)
- | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10));
- else
- while ((c = getc (fp)) != EOF && isdigit (c))
- n = (n * 10) + c - '0';
+ | (c - ('0' <= c && c <= '9' ? '0'
+ : 'A' <= c && c <= 'F' ? 'A' - 10
+ : 'a' - 10)));
+ }
if (c != EOF)
ungetc (c, fp);
return n;
}
-
/* Return a mapping vector for CHARSET loaded from MAPFILE.
Each line of MAPFILE has this form
0xAAAA 0xCCCC
The returned vector has this form:
[ CODE1 CHAR1 CODE2 CHAR2 .... ]
where CODE1 is a code-point or a cons of code-points specifying a
- range. */
+ range.
-extern void add_to_log P_ ((char *, Lisp_Object, Lisp_Object));
+ Note that this function uses `openp' to open MAPFILE but ignores
+ `file-name-handler-alist' to avoid running any Lisp code. */
static void
-load_charset_map_from_file (charset, mapfile, control_flag)
- struct charset *charset;
- Lisp_Object mapfile;
- int control_flag;
+load_charset_map_from_file (struct charset *charset, Lisp_Object mapfile, int control_flag)
{
unsigned min_code = CHARSET_MIN_CODE (charset);
unsigned max_code = CHARSET_MAX_CODE (charset);
int fd;
FILE *fp;
- int eof;
Lisp_Object suffixes;
struct charset_map_entries *head, *entries;
int n_entries;
+ ptrdiff_t count;
+ USE_SAFE_ALLOCA;
suffixes = Fcons (build_string (".map"),
Fcons (build_string (".TXT"), Qnil));
+ count = SPECPDL_INDEX ();
+ specbind (Qfile_name_handler_alist, Qnil);
fd = openp (Vcharset_map_path, mapfile, suffixes, NULL, Qnil);
+ unbind_to (count, Qnil);
if (fd < 0
|| ! (fp = fdopen (fd, "r")))
- {
- add_to_log ("Failure in loading charset map: %S", mapfile, Qnil);
- return;
- }
+ error ("Failure in loading charset map: %s", SDATA (mapfile));
+
+ /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
+ large (larger than MAX_ALLOCA). */
+ SAFE_ALLOCA (head, struct charset_map_entries *,
+ sizeof (struct charset_map_entries));
+ entries = head;
+ memset (entries, 0, sizeof (struct charset_map_entries));
- head = entries = ((struct charset_map_entries *)
- alloca (sizeof (struct charset_map_entries)));
n_entries = 0;
- eof = 0;
while (1)
{
- unsigned from, to;
- int c;
+ unsigned from, to, c;
int idx;
+ int eof = 0, overflow = 0;
- from = read_hex (fp, &eof);
+ from = read_hex (fp, &eof, &overflow);
if (eof)
break;
if (getc (fp) == '-')
- to = read_hex (fp, &eof);
+ to = read_hex (fp, &eof, &overflow);
else
to = from;
- c = (int) read_hex (fp, &eof);
+ if (eof)
+ break;
+ c = read_hex (fp, &eof, &overflow);
+ if (eof)
+ break;
+ if (overflow)
+ continue;
if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
continue;
if (n_entries > 0 && (n_entries % 0x10000) == 0)
{
- entries->next = ((struct charset_map_entries *)
- alloca (sizeof (struct charset_map_entries)));
+ SAFE_ALLOCA (entries->next, struct charset_map_entries *,
+ sizeof (struct charset_map_entries));
entries = entries->next;
+ memset (entries, 0, sizeof (struct charset_map_entries));
+ n_entries = 0;
}
- idx = n_entries % 0x10000;
+ idx = n_entries;
entries->entry[idx].from = from;
entries->entry[idx].to = to;
entries->entry[idx].c = c;
n_entries++;
}
fclose (fp);
- close (fd);
load_charset_map (charset, head, n_entries, control_flag);
+ SAFE_FREE ();
}
static void
-load_charset_map_from_vector (charset, vec, control_flag)
- struct charset *charset;
- Lisp_Object vec;
- int control_flag;
+load_charset_map_from_vector (struct charset *charset, Lisp_Object vec, int control_flag)
{
unsigned min_code = CHARSET_MIN_CODE (charset);
unsigned max_code = CHARSET_MAX_CODE (charset);
int n_entries;
int len = ASIZE (vec);
int i;
+ USE_SAFE_ALLOCA;
if (len % 2 == 1)
{
return;
}
- head = entries = ((struct charset_map_entries *)
- alloca (sizeof (struct charset_map_entries)));
+ /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
+ large (larger than MAX_ALLOCA). */
+ SAFE_ALLOCA (head, struct charset_map_entries *,
+ sizeof (struct charset_map_entries));
+ entries = head;
+ memset (entries, 0, sizeof (struct charset_map_entries));
+
n_entries = 0;
for (i = 0; i < len; i += 2)
{
Lisp_Object val, val2;
unsigned from, to;
- int c;
+ EMACS_INT c;
int idx;
val = AREF (vec, i);
{
val2 = XCDR (val);
val = XCAR (val);
- CHECK_NATNUM (val);
- CHECK_NATNUM (val2);
from = XFASTINT (val);
to = XFASTINT (val2);
}
else
- {
- CHECK_NATNUM (val);
- from = to = XFASTINT (val);
- }
+ from = to = XFASTINT (val);
val = AREF (vec, i + 1);
CHECK_NATNUM (val);
c = XFASTINT (val);
if (n_entries > 0 && (n_entries % 0x10000) == 0)
{
- entries->next = ((struct charset_map_entries *)
- alloca (sizeof (struct charset_map_entries)));
+ SAFE_ALLOCA (entries->next, struct charset_map_entries *,
+ sizeof (struct charset_map_entries));
entries = entries->next;
+ memset (entries, 0, sizeof (struct charset_map_entries));
}
idx = n_entries % 0x10000;
entries->entry[idx].from = from;
}
load_charset_map (charset, head, n_entries, control_flag);
+ SAFE_FREE ();
}
+
+/* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
+ map it is (see the comment of load_charset_map for the detail). */
+
static void
-load_charset (charset)
- struct charset *charset;
+load_charset (struct charset *charset, int control_flag)
{
- if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP_DEFERRED)
- {
- Lisp_Object map;
+ Lisp_Object map;
- map = CHARSET_MAP (charset);
- if (STRINGP (map))
- load_charset_map_from_file (charset, map, 1);
- else
- load_charset_map_from_vector (charset, map, 1);
- CHARSET_METHOD (charset) = CHARSET_METHOD_MAP;
+ if (inhibit_load_charset_map
+ && temp_charset_work
+ && charset == temp_charset_work->current
+ && ((control_flag == 2) == temp_charset_work->for_encoder))
+ return;
+
+ if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
+ map = CHARSET_MAP (charset);
+ else
+ {
+ if (! CHARSET_UNIFIED_P (charset))
+ abort ();
+ map = CHARSET_UNIFY_MAP (charset);
}
+ if (STRINGP (map))
+ load_charset_map_from_file (charset, map, control_flag);
+ else
+ load_charset_map_from_vector (charset, map, control_flag);
}
DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
doc: /* Return non-nil if and only if OBJECT is a charset.*/)
- (object)
- Lisp_Object object;
+ (Lisp_Object object)
{
return (CHARSETP (object) ? Qt : Qnil);
}
-void
-map_charset_chars (c_function, function, arg,
- charset, from, to)
- void (*c_function) P_ ((Lisp_Object, Lisp_Object));
- Lisp_Object function, arg;
- struct charset *charset;
- unsigned from, to;
+static void
+map_charset_for_dump (void (*c_function) (Lisp_Object, Lisp_Object),
+ Lisp_Object function, Lisp_Object arg,
+ unsigned int from, unsigned int to)
{
+ int from_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, from);
+ int to_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, to);
Lisp_Object range;
- int partial;
+ int c, stop;
+ struct gcpro gcpro1;
- if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP_DEFERRED)
- load_charset (charset);
+ range = Fcons (Qnil, Qnil);
+ GCPRO1 (range);
- partial = (from > CHARSET_MIN_CODE (charset)
- || to < CHARSET_MAX_CODE (charset));
+ c = temp_charset_work->min_char;
+ stop = (temp_charset_work->max_char < 0x20000
+ ? temp_charset_work->max_char : 0xFFFF);
- if (CHARSET_UNIFIED_P (charset)
- && CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
+ while (1)
{
- map_char_table_for_charset (c_function, function,
- CHARSET_DEUNIFIER (charset), arg,
- partial ? charset : NULL, from, to);
+ int idx = GET_TEMP_CHARSET_WORK_ENCODER (c);
+
+ if (idx >= from_idx && idx <= to_idx)
+ {
+ if (NILP (XCAR (range)))
+ XSETCAR (range, make_number (c));
+ }
+ else if (! NILP (XCAR (range)))
+ {
+ XSETCDR (range, make_number (c - 1));
+ if (c_function)
+ (*c_function) (arg, range);
+ else
+ call2 (function, range, arg);
+ XSETCAR (range, Qnil);
+ }
+ if (c == stop)
+ {
+ if (c == temp_charset_work->max_char)
+ {
+ if (! NILP (XCAR (range)))
+ {
+ XSETCDR (range, make_number (c));
+ if (c_function)
+ (*c_function) (arg, range);
+ else
+ call2 (function, range, arg);
+ }
+ break;
+ }
+ c = 0x1FFFF;
+ stop = temp_charset_work->max_char;
+ }
+ c++;
}
+ UNGCPRO;
+}
+
+void
+map_charset_chars (void (*c_function)(Lisp_Object, Lisp_Object), Lisp_Object function,
+ Lisp_Object arg, struct charset *charset, unsigned from, unsigned to)
+{
+ Lisp_Object range;
+ int partial;
+
+ partial = (from > CHARSET_MIN_CODE (charset)
+ || to < CHARSET_MAX_CODE (charset));
if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
{
int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
+ if (CHARSET_UNIFIED_P (charset))
+ {
+ if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
+ load_charset (charset, 2);
+ if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
+ map_char_table_for_charset (c_function, function,
+ CHARSET_DEUNIFIER (charset), arg,
+ partial ? charset : NULL, from, to);
+ else
+ map_charset_for_dump (c_function, function, arg, from, to);
+ }
+
range = Fcons (make_number (from_c), make_number (to_c));
if (NILP (function))
(*c_function) (arg, range);
else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
{
if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
- return;
- map_char_table_for_charset (c_function, function,
- CHARSET_ENCODER (charset), arg,
- partial ? charset : NULL, from, to);
+ load_charset (charset, 2);
+ if (CHAR_TABLE_P (CHARSET_ENCODER (charset)))
+ map_char_table_for_charset (c_function, function,
+ CHARSET_ENCODER (charset), arg,
+ partial ? charset : NULL, from, to);
+ else
+ map_charset_for_dump (c_function, function, arg, from, to);
}
else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
{
charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents))));
offset = XINT (XCDR (XCAR (parents)));
- this_from = from - offset;
- this_to = to - offset;
+ this_from = from > offset ? from - offset : 0;
+ this_to = to > offset ? to - offset : 0;
if (this_from < CHARSET_MIN_CODE (charset))
this_from = CHARSET_MIN_CODE (charset);
if (this_to > CHARSET_MAX_CODE (charset))
characters contained in CHARSET.
The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
-range of code points of target characters. */)
- (function, charset, arg, from_code, to_code)
- Lisp_Object function, charset, arg, from_code, to_code;
+range of code points (in CHARSET) of target characters. */)
+ (Lisp_Object function, Lisp_Object charset, Lisp_Object arg, Lisp_Object from_code, Lisp_Object to_code)
{
struct charset *cs;
unsigned from, to;
from = CHARSET_MIN_CODE (cs);
else
{
- CHECK_NATNUM (from_code);
from = XINT (from_code);
if (from < CHARSET_MIN_CODE (cs))
from = CHARSET_MIN_CODE (cs);
to = CHARSET_MAX_CODE (cs);
else
{
- CHECK_NATNUM (to_code);
to = XINT (to_code);
if (to > CHARSET_MAX_CODE (cs))
to = CHARSET_MAX_CODE (cs);
Sdefine_charset_internal, charset_arg_max, MANY, 0,
doc: /* For internal use only.
usage: (define-charset-internal ...) */)
- (nargs, args)
- int nargs;
- Lisp_Object *args;
+ (ptrdiff_t nargs, Lisp_Object *args)
{
/* Charset attr vector. */
Lisp_Object attrs;
Lisp_Object val;
- unsigned hash_code;
+ EMACS_UINT hash_code;
struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
int i, j;
struct charset charset;
ASET (attrs, charset_name, args[charset_arg_name]);
val = args[charset_arg_code_space];
- for (i = 0, dimension = 0, nchars = 1; i < 4; i++)
+ for (i = 0, dimension = 0, nchars = 1; ; i++)
{
+ Lisp_Object min_byte_obj, max_byte_obj;
int min_byte, max_byte;
- min_byte = XINT (Faref (val, make_number (i * 2)));
- max_byte = XINT (Faref (val, make_number (i * 2 + 1)));
- if (min_byte < 0 || min_byte > max_byte || max_byte >= 256)
- error ("Invalid :code-space value");
+ min_byte_obj = Faref (val, make_number (i * 2));
+ max_byte_obj = Faref (val, make_number (i * 2 + 1));
+ CHECK_RANGED_INTEGER (0, min_byte_obj, 255);
+ min_byte = XINT (min_byte_obj);
+ CHECK_RANGED_INTEGER (min_byte, max_byte_obj, 255);
+ max_byte = XINT (max_byte_obj);
charset.code_space[i * 4] = min_byte;
charset.code_space[i * 4 + 1] = max_byte;
charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
- nchars *= charset.code_space[i * 4 + 2];
- charset.code_space[i * 4 + 3] = nchars;
if (max_byte > 0)
dimension = i + 1;
+ if (i == 3)
+ break;
+ nchars *= charset.code_space[i * 4 + 2];
+ charset.code_space[i * 4 + 3] = nchars;
}
val = args[charset_arg_dimension];
charset.dimension = dimension;
else
{
- CHECK_NATNUM (val);
+ CHECK_RANGED_INTEGER (1, val, 4);
charset.dimension = XINT (val);
- if (charset.dimension < 1 || charset.dimension > 4)
- args_out_of_range_3 (val, make_number (1), make_number (4));
}
charset.code_linear_p
if (! charset.code_linear_p)
{
charset.code_space_mask = (unsigned char *) xmalloc (256);
- bzero (charset.code_space_mask, 256);
+ memset (charset.code_space_mask, 0, 256);
for (i = 0; i < 4; i++)
for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
j++)
charset.min_code = (charset.code_space[0]
| (charset.code_space[4] << 8)
| (charset.code_space[8] << 16)
- | (charset.code_space[12] << 24));
+ | ((unsigned) charset.code_space[12] << 24));
charset.max_code = (charset.code_space[1]
| (charset.code_space[5] << 8)
| (charset.code_space[9] << 16)
- | (charset.code_space[13] << 24));
+ | ((unsigned) charset.code_space[13] << 24));
charset.char_index_offset = 0;
val = args[charset_arg_min_code];
if (! NILP (val))
{
- unsigned code;
+ unsigned code = cons_to_unsigned (val, UINT_MAX);
- if (INTEGERP (val))
- code = XINT (val);
- else
- {
- CHECK_CONS (val);
- CHECK_NUMBER_CAR (val);
- CHECK_NUMBER_CDR (val);
- code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
- }
if (code < charset.min_code
|| code > charset.max_code)
- args_out_of_range_3 (make_number (charset.min_code),
- make_number (charset.max_code), val);
+ args_out_of_range_3 (make_fixnum_or_float (charset.min_code),
+ make_fixnum_or_float (charset.max_code), val);
charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
charset.min_code = code;
}
val = args[charset_arg_max_code];
if (! NILP (val))
{
- unsigned code;
+ unsigned code = cons_to_unsigned (val, UINT_MAX);
- if (INTEGERP (val))
- code = XINT (val);
- else
- {
- CHECK_CONS (val);
- CHECK_NUMBER_CAR (val);
- CHECK_NUMBER_CDR (val);
- code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
- }
if (code < charset.min_code
|| code > charset.max_code)
- args_out_of_range_3 (make_number (charset.min_code),
- make_number (charset.max_code), val);
+ args_out_of_range_3 (make_fixnum_or_float (charset.min_code),
+ make_fixnum_or_float (charset.max_code), val);
charset.max_code = code;
}
- charset.compact_codes_p = charset.max_code < 0x1000000;
+ charset.compact_codes_p = charset.max_code < 0x10000;
val = args[charset_arg_invalid_code];
if (NILP (val))
charset.invalid_code = 0;
else
{
- XSETINT (val, charset.max_code + 1);
- if (XINT (val) == charset.max_code + 1)
+ if (charset.max_code < UINT_MAX)
charset.invalid_code = charset.max_code + 1;
else
error ("Attribute :invalid-code must be specified");
}
}
else
- {
- CHECK_NATNUM (val);
- charset.invalid_code = XFASTINT (val);
- }
+ charset.invalid_code = cons_to_unsigned (val, UINT_MAX);
val = args[charset_arg_iso_final];
if (NILP (val))
{
CHECK_NUMBER (val);
if (XINT (val) < '0' || XINT (val) > 127)
- error ("Invalid iso-final-char: %d", XINT (val));
+ error ("Invalid iso-final-char: %"pI"d", XINT (val));
charset.iso_final = XINT (val);
}
charset.iso_revision = -1;
else
{
- CHECK_NUMBER (val);
- if (XINT (val) > 63)
- args_out_of_range (make_number (63), val);
+ CHECK_RANGED_INTEGER (-1, val, 63);
charset.iso_revision = XINT (val);
}
{
CHECK_NATNUM (val);
if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
- error ("Invalid emacs-mule-id: %d", XINT (val));
+ error ("Invalid emacs-mule-id: %"pI"d", XINT (val));
charset.emacs_mule_id = XINT (val);
}
charset.unified_p = 0;
- bzero (charset.fast_map, sizeof (charset.fast_map));
+ memset (charset.fast_map, 0, sizeof (charset.fast_map));
if (! NILP (args[charset_arg_code_offset]))
{
val = args[charset_arg_code_offset];
- CHECK_NUMBER (val);
+ CHECK_CHARACTER (val);
charset.method = CHARSET_METHOD_OFFSET;
charset.code_offset = XINT (val);
- i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
- charset.min_char = i + charset.code_offset;
i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
- charset.max_char = i + charset.code_offset;
- if (charset.max_char > MAX_CHAR)
+ if (MAX_CHAR - charset.code_offset < i)
error ("Unsupported max char: %d", charset.max_char);
+ charset.max_char = i + charset.code_offset;
+ i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
+ charset.min_char = i + charset.code_offset;
i = (charset.min_char >> 7) << 7;
for (; i < 0x10000 && i <= charset.max_char; i += 128)
i = (i >> 12) << 12;
for (; i <= charset.max_char; i += 0x1000)
CHARSET_FAST_MAP_SET (i, charset.fast_map);
+ if (charset.code_offset == 0 && charset.max_char >= 0x80)
+ charset.ascii_compatible_p = 1;
}
else if (! NILP (args[charset_arg_map]))
{
val = args[charset_arg_map];
ASET (attrs, charset_map, val);
- if (STRINGP (val))
- load_charset_map_from_file (&charset, val, 0);
- else
- load_charset_map_from_vector (&charset, val, 0);
- charset.method = CHARSET_METHOD_MAP_DEFERRED;
+ charset.method = CHARSET_METHOD_MAP;
}
else if (! NILP (args[charset_arg_subset]))
{
car_part = XCAR (elt);
cdr_part = XCDR (elt);
CHECK_CHARSET_GET_ID (car_part, this_id);
- CHECK_NUMBER (cdr_part);
+ CHECK_TYPE_RANGED_INTEGER (int, cdr_part);
offset = XINT (cdr_part);
}
else
hash_code);
if (charset_table_used == charset_table_size)
{
- struct charset *new_table
- = (struct charset *) xmalloc (sizeof (struct charset)
- * (charset_table_size + 16));
- bcopy (charset_table, new_table,
- sizeof (struct charset) * charset_table_size);
- charset_table_size += 16;
+ /* Ensure that charset IDs fit into 'int' as well as into the
+ restriction imposed by fixnums. Although the 'int' restriction
+ could be removed, too much other code would need altering; for
+ example, the IDs are stuffed into struct
+ coding_system.charbuf[i] entries, which are 'int'. */
+ int old_size = charset_table_size;
+ struct charset *new_table =
+ xpalloc (0, &charset_table_size, 1,
+ min (INT_MAX, MOST_POSITIVE_FIXNUM),
+ sizeof *charset_table);
+ memcpy (new_table, charset_table, old_size * sizeof *new_table);
charset_table = new_table;
+ /* FIXME: This leaks memory, as the old charset_table becomes
+ unreachable. If the old charset table is charset_table_init
+ then this leak is intentional; otherwise, it's unclear.
+ If the latter memory leak is intentional, a
+ comment should be added to explain this. If not, the old
+ charset_table should be freed, by passing it as the 1st argument
+ to xpalloc and removing the memcpy. */
}
id = charset_table_used++;
new_definition_p = 1;
charset.id = id;
charset_table[id] = charset;
+ if (charset.method == CHARSET_METHOD_MAP)
+ {
+ load_charset (&charset, 0);
+ charset_table[id] = charset;
+ }
+
if (charset.iso_final >= 0)
{
ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
charset_jisx0208_1978 = id;
else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
charset_jisx0208 = id;
+ else if (ISO_CHARSET_TABLE (2, 0, 'C') == id)
+ charset_ksc5601 = id;
}
if (charset.emacs_mule_id >= 0)
{
- emacs_mule_charset[charset.emacs_mule_id] = CHARSET_FROM_ID (id);
+ emacs_mule_charset[charset.emacs_mule_id] = id;
if (charset.emacs_mule_id < 0xA0)
emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
else
charset. */
static int
-define_charset_internal (name, dimension, code_space, min_code, max_code,
- iso_final, iso_revision, emacs_mule_id,
- ascii_compatible, supplementary,
- code_offset)
- Lisp_Object name;
- int dimension;
- unsigned char *code_space;
- unsigned min_code, max_code;
- int iso_final, iso_revision, emacs_mule_id;
- int ascii_compatible, supplementary;
- int code_offset;
+define_charset_internal (Lisp_Object name,
+ int dimension,
+ const char *code_space_chars,
+ unsigned min_code, unsigned max_code,
+ int iso_final, int iso_revision, int emacs_mule_id,
+ int ascii_compatible, int supplementary,
+ int code_offset)
{
+ const unsigned char *code_space = (const unsigned char *) code_space_chars;
Lisp_Object args[charset_arg_max];
Lisp_Object plist[14];
Lisp_Object val;
args[charset_arg_superset] = Qnil;
args[charset_arg_unify_map] = Qnil;
- plist[0] = intern (":name");
+ plist[0] = intern_c_string (":name");
plist[1] = args[charset_arg_name];
- plist[2] = intern (":dimension");
+ plist[2] = intern_c_string (":dimension");
plist[3] = args[charset_arg_dimension];
- plist[4] = intern (":code-space");
+ plist[4] = intern_c_string (":code-space");
plist[5] = args[charset_arg_code_space];
- plist[6] = intern (":iso-final-char");
+ plist[6] = intern_c_string (":iso-final-char");
plist[7] = args[charset_arg_iso_final];
- plist[8] = intern (":emacs-mule-id");
+ plist[8] = intern_c_string (":emacs-mule-id");
plist[9] = args[charset_arg_emacs_mule_id];
- plist[10] = intern (":ascii-compatible-p");
+ plist[10] = intern_c_string (":ascii-compatible-p");
plist[11] = args[charset_arg_ascii_compatible_p];
- plist[12] = intern (":code-offset");
+ plist[12] = intern_c_string (":code-offset");
plist[13] = args[charset_arg_code_offset];
args[charset_arg_plist] = Flist (14, plist);
DEFUN ("define-charset-alias", Fdefine_charset_alias,
Sdefine_charset_alias, 2, 2, 0,
doc: /* Define ALIAS as an alias for charset CHARSET. */)
- (alias, charset)
- Lisp_Object alias, charset;
+ (Lisp_Object alias, Lisp_Object charset)
{
Lisp_Object attr;
DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
doc: /* Return the property list of CHARSET. */)
- (charset)
- Lisp_Object charset;
+ (Lisp_Object charset)
{
Lisp_Object attrs;
DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
doc: /* Set CHARSET's property list to PLIST. */)
- (charset, plist)
- Lisp_Object charset, plist;
+ (Lisp_Object charset, Lisp_Object plist)
{
Lisp_Object attrs;
`define-charset' (which see).
Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
- (charset, unify_map, deunify)
- Lisp_Object charset, unify_map, deunify;
+ (Lisp_Object charset, Lisp_Object unify_map, Lisp_Object deunify)
{
int id;
struct charset *cs;
CHECK_CHARSET_GET_ID (charset, id);
cs = CHARSET_FROM_ID (id);
- if (CHARSET_METHOD (cs) == CHARSET_METHOD_MAP_DEFERRED)
- load_charset (cs);
if (NILP (deunify)
? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
: ! CHARSET_UNIFIED_P (cs))
CHARSET_UNIFIED_P (cs) = 0;
if (NILP (deunify))
{
- if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET)
+ if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET
+ || CHARSET_CODE_OFFSET (cs) < 0x110000)
error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset)));
if (NILP (unify_map))
unify_map = CHARSET_UNIFY_MAP (cs);
- if (STRINGP (unify_map))
- load_charset_map_from_file (cs, unify_map, 2);
- else if (VECTORP (unify_map))
- load_charset_map_from_vector (cs, unify_map, 2);
- else if (NILP (unify_map))
- error ("No unify-map for charset");
else
- error ("Bad unify-map arg");
+ {
+ if (! STRINGP (unify_map) && ! VECTORP (unify_map))
+ signal_error ("Bad unify-map", unify_map);
+ CHARSET_UNIFY_MAP (cs) = unify_map;
+ }
+ if (NILP (Vchar_unify_table))
+ Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
+ char_table_set_range (Vchar_unify_table,
+ cs->min_char, cs->max_char, charset);
CHARSET_UNIFIED_P (cs) = 1;
}
else if (CHAR_TABLE_P (Vchar_unify_table))
{
- int min_code = CHARSET_MIN_CODE (cs);
- int max_code = CHARSET_MAX_CODE (cs);
+ unsigned min_code = CHARSET_MIN_CODE (cs);
+ unsigned max_code = CHARSET_MAX_CODE (cs);
int min_char = DECODE_CHAR (cs, min_code);
int max_char = DECODE_CHAR (cs, max_code);
This final char is for private use, thus the range is `0' (48) .. `?' (63).
If there's no unused final char for the specified kind of charset,
return nil. */)
- (dimension, chars)
- Lisp_Object dimension, chars;
+ (Lisp_Object dimension, Lisp_Object chars)
{
int final_char;
}
static void
-check_iso_charset_parameter (dimension, chars, final_char)
- Lisp_Object dimension, chars, final_char;
+check_iso_charset_parameter (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char)
{
CHECK_NATNUM (dimension);
CHECK_NATNUM (chars);
- CHECK_NATNUM (final_char);
+ CHECK_CHARACTER (final_char);
if (XINT (dimension) > 3)
- error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension));
+ error ("Invalid DIMENSION %"pI"d, it should be 1, 2, or 3",
+ XINT (dimension));
if (XINT (chars) != 94 && XINT (chars) != 96)
- error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
+ error ("Invalid CHARS %"pI"d, it should be 94 or 96", XINT (chars));
if (XINT (final_char) < '0' || XINT (final_char) > '~')
- error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
+ error ("Invalid FINAL-CHAR %c, it should be `0'..`~'",
+ (int)XINT (final_char));
}
On decoding by an ISO-2022 base coding system, when a charset
specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
if CHARSET is designated instead. */)
- (dimension, chars, final_char, charset)
- Lisp_Object dimension, chars, final_char, charset;
+ (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char, Lisp_Object charset)
{
int id;
int chars_flag;
*/
int
-string_xstring_p (string)
- Lisp_Object string;
+string_xstring_p (Lisp_Object string)
{
const unsigned char *p = SDATA (string);
const unsigned char *endp = p + SBYTES (string);
It may lookup a translation table TABLE if supplied. */
static void
-find_charsets_in_text (ptr, nchars, nbytes, charsets, table, multibyte)
- const unsigned char *ptr;
- EMACS_INT nchars, nbytes;
- Lisp_Object charsets, table;
- int multibyte;
+find_charsets_in_text (const unsigned char *ptr, ptrdiff_t nchars, ptrdiff_t nbytes, Lisp_Object charsets, Lisp_Object table, int multibyte)
{
const unsigned char *pend = ptr + nbytes;
If the current buffer is unibyte, the returned list may contain
only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
- (beg, end, table)
- Lisp_Object beg, end, table;
+ (Lisp_Object beg, Lisp_Object end, Lisp_Object table)
{
Lisp_Object charsets;
- EMACS_INT from, from_byte, to, stop, stop_byte;
+ ptrdiff_t from, from_byte, to, stop, stop_byte;
int i;
Lisp_Object val;
- int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
+ int multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
validate_region (&beg, &end);
from = XFASTINT (beg);
If STR is unibyte, the returned list may contain
only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
- (str, table)
- Lisp_Object str, table;
+ (Lisp_Object str, Lisp_Object table)
{
Lisp_Object charsets;
int i;
\f
+/* Return a unified character code for C (>= 0x110000). VAL is a
+ value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
+ charset symbol. */
+int
+maybe_unify_char (int c, Lisp_Object val)
+{
+ struct charset *charset;
+
+ if (INTEGERP (val))
+ return XFASTINT (val);
+ if (NILP (val))
+ return c;
+
+ CHECK_CHARSET_GET_CHARSET (val, charset);
+ load_charset (charset, 1);
+ if (! inhibit_load_charset_map)
+ {
+ val = CHAR_TABLE_REF (Vchar_unify_table, c);
+ if (! NILP (val))
+ c = XFASTINT (val);
+ }
+ else
+ {
+ int code_index = c - CHARSET_CODE_OFFSET (charset);
+ int unified = GET_TEMP_CHARSET_WORK_DECODER (code_index);
+
+ if (unified > 0)
+ c = unified;
+ }
+ return c;
+}
+
+
/* Return a character correponding to the code-point CODE of
CHARSET. */
int
-decode_char (charset, code)
- struct charset *charset;
- unsigned code;
+decode_char (struct charset *charset, unsigned int code)
{
int c, char_index;
enum charset_method method = CHARSET_METHOD (charset);
if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
return -1;
- if (method == CHARSET_METHOD_MAP_DEFERRED)
- {
- load_charset (charset);
- method = CHARSET_METHOD (charset);
- }
-
if (method == CHARSET_METHOD_SUBSET)
{
Lisp_Object subset_info;
decoder = CHARSET_DECODER (charset);
if (! VECTORP (decoder))
- return -1;
- c = XINT (AREF (decoder, char_index));
+ {
+ load_charset (charset, 1);
+ decoder = CHARSET_DECODER (charset);
+ }
+ if (VECTORP (decoder))
+ c = XINT (AREF (decoder, char_index));
+ else
+ c = GET_TEMP_CHARSET_WORK_DECODER (char_index);
}
- else
+ else /* method == CHARSET_METHOD_OFFSET */
{
c = char_index + CHARSET_CODE_OFFSET (charset);
+ if (CHARSET_UNIFIED_P (charset)
+ && c > MAX_UNICODE_CHAR)
+ MAYBE_UNIFY_CHAR (c);
}
}
- if (CHARSET_UNIFIED_P (charset)
- && c >= 0)
- {
- MAYBE_UNIFY_CHAR (c);
- }
-
return c;
}
use CHARSET's strict_max_char instead of max_char. */
unsigned
-encode_char (charset, c)
- struct charset *charset;
- int c;
+encode_char (struct charset *charset, int c)
{
unsigned code;
enum charset_method method = CHARSET_METHOD (charset);
if (CHARSET_UNIFIED_P (charset))
{
- Lisp_Object deunifier, deunified;
+ Lisp_Object deunifier;
+ int code_index = -1;
deunifier = CHARSET_DEUNIFIER (charset);
if (! CHAR_TABLE_P (deunifier))
{
- Funify_charset (CHARSET_NAME (charset), Qnil, Qnil);
+ load_charset (charset, 2);
deunifier = CHARSET_DEUNIFIER (charset);
}
- deunified = CHAR_TABLE_REF (deunifier, c);
- if (! NILP (deunified))
- c = XINT (deunified);
+ if (CHAR_TABLE_P (deunifier))
+ {
+ Lisp_Object deunified = CHAR_TABLE_REF (deunifier, c);
+
+ if (INTEGERP (deunified))
+ code_index = XINT (deunified);
+ }
+ else
+ {
+ code_index = GET_TEMP_CHARSET_WORK_ENCODER (c);
+ }
+ if (code_index >= 0)
+ c = CHARSET_CODE_OFFSET (charset) + code_index;
}
if (method == CHARSET_METHOD_SUBSET)
|| c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
return CHARSET_INVALID_CODE (charset);
- if (method == CHARSET_METHOD_MAP_DEFERRED)
- {
- load_charset (charset);
- method = CHARSET_METHOD (charset);
- }
-
if (method == CHARSET_METHOD_MAP)
{
Lisp_Object encoder;
encoder = CHARSET_ENCODER (charset);
if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
- return CHARSET_INVALID_CODE (charset);
- val = CHAR_TABLE_REF (encoder, c);
- if (NILP (val))
- return CHARSET_INVALID_CODE (charset);
- code = XINT (val);
- if (! CHARSET_COMPACT_CODES_P (charset))
- code = INDEX_TO_CODE_POINT (charset, code);
+ {
+ load_charset (charset, 2);
+ encoder = CHARSET_ENCODER (charset);
+ }
+ if (CHAR_TABLE_P (encoder))
+ {
+ val = CHAR_TABLE_REF (encoder, c);
+ if (NILP (val))
+ return CHARSET_INVALID_CODE (charset);
+ code = XINT (val);
+ if (! CHARSET_COMPACT_CODES_P (charset))
+ code = INDEX_TO_CODE_POINT (charset, code);
+ }
+ else
+ {
+ code = GET_TEMP_CHARSET_WORK_ENCODER (c);
+ code = INDEX_TO_CODE_POINT (charset, code);
+ }
}
else /* method == CHARSET_METHOD_OFFSET */
{
- code = c - CHARSET_CODE_OFFSET (charset);
- code = INDEX_TO_CODE_POINT (charset, code);
+ unsigned code_index = c - CHARSET_CODE_OFFSET (charset);
+
+ code = INDEX_TO_CODE_POINT (charset, code_index);
}
return code;
Optional argument RESTRICTION specifies a way to map the pair of CCS
and CODE-POINT to a character. Currently not supported and just ignored. */)
- (charset, code_point, restriction)
- Lisp_Object charset, code_point, restriction;
+ (Lisp_Object charset, Lisp_Object code_point, Lisp_Object restriction)
{
int c, id;
unsigned code;
struct charset *charsetp;
CHECK_CHARSET_GET_ID (charset, id);
- if (CONSP (code_point))
- {
- CHECK_NATNUM_CAR (code_point);
- CHECK_NATNUM_CDR (code_point);
- code = (XINT (XCAR (code_point)) << 16) | (XINT (XCDR (code_point)));
- }
- else
- {
- CHECK_NATNUM (code_point);
- code = XINT (code_point);
- }
+ code = cons_to_unsigned (code_point, UINT_MAX);
charsetp = CHARSET_FROM_ID (id);
c = DECODE_CHAR (charsetp, code);
return (c >= 0 ? make_number (c) : Qnil);
Optional argument RESTRICTION specifies a way to map CH to a
code-point in CCS. Currently not supported and just ignored. */)
- (ch, charset, restriction)
- Lisp_Object ch, charset, restriction;
+ (Lisp_Object ch, Lisp_Object charset, Lisp_Object restriction)
{
- int id;
+ int c, id;
unsigned code;
struct charset *charsetp;
CHECK_CHARSET_GET_ID (charset, id);
- CHECK_NATNUM (ch);
+ CHECK_CHARACTER (ch);
+ c = XFASTINT (ch);
charsetp = CHARSET_FROM_ID (id);
- code = ENCODE_CHAR (charsetp, XINT (ch));
+ code = ENCODE_CHAR (charsetp, c);
if (code == CHARSET_INVALID_CODE (charsetp))
return Qnil;
- if (code > 0x7FFFFFF)
- return Fcons (make_number (code >> 16), make_number (code & 0xFFFF));
- return make_number (code);
+ return INTEGER_TO_CONS (code);
}
CODE1 through CODE4 are optional, but if you don't supply sufficient
position codes, it is assumed that the minimum code in each dimension
is specified. */)
- (charset, code1, code2, code3, code4)
- Lisp_Object charset, code1, code2, code3, code4;
+ (Lisp_Object charset, Lisp_Object code1, Lisp_Object code2, Lisp_Object code3, Lisp_Object code4)
{
int id, dimension;
struct charset *charsetp;
Vcharset_ordered_list. */
struct charset *
-char_charset (c, charset_list, code_return)
- int c;
- Lisp_Object charset_list;
- unsigned *code_return;
+char_charset (int c, Lisp_Object charset_list, unsigned int *code_return)
{
int maybe_null = 0;
return charset;
}
charset_list = XCDR (charset_list);
- if (c <= MAX_UNICODE_CHAR
- && EQ (charset_list, Vcharset_non_preferred_head))
+ if (! maybe_null
+ && c <= MAX_UNICODE_CHAR
+ && EQ (charset_list, Vcharset_non_preferred_head))
return CHARSET_FROM_ID (charset_unicode);
}
return (maybe_null ? NULL
The charset is decided by the current priority order of charsets.
A position-code is a byte value of each dimension of the code-point of
CH in the charset. */)
- (ch)
- Lisp_Object ch;
+ (Lisp_Object ch)
{
struct charset *charset;
int c, dimension;
}
-DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
- doc: /* Return the charset of highest priority that contains CH. */)
- (ch)
- Lisp_Object ch;
+DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 2, 0,
+ doc: /* Return the charset of highest priority that contains CH.
+If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets
+from which to find the charset. It may also be a coding system. In
+that case, find the charset from what supported by that coding system. */)
+ (Lisp_Object ch, Lisp_Object restriction)
{
struct charset *charset;
CHECK_CHARACTER (ch);
- charset = CHAR_CHARSET (XINT (ch));
+ if (NILP (restriction))
+ charset = CHAR_CHARSET (XINT (ch));
+ else
+ {
+ if (CONSP (restriction))
+ {
+ int c = XFASTINT (ch);
+
+ for (; CONSP (restriction); restriction = XCDR (restriction))
+ {
+ struct charset *rcharset;
+
+ CHECK_CHARSET_GET_CHARSET (XCAR (restriction), rcharset);
+ if (ENCODE_CHAR (rcharset, c) != CHARSET_INVALID_CODE (rcharset))
+ return XCAR (restriction);
+ }
+ return Qnil;
+ }
+ restriction = coding_system_charset_list (restriction);
+ charset = char_charset (XINT (ch), restriction, NULL);
+ if (! charset)
+ return Qnil;
+ }
return (CHARSET_NAME (charset));
}
Return charset of a character in the current buffer at position POS.
If POS is nil, it defauls to the current point.
If POS is out of range, the value is nil. */)
- (pos)
- Lisp_Object pos;
+ (Lisp_Object pos)
{
Lisp_Object ch;
struct charset *charset;
whereas Emacs distinguishes them by charset symbol.
See the documentation of the function `charset-info' for the meanings of
DIMENSION, CHARS, and FINAL-CHAR. */)
- (dimension, chars, final_char)
- Lisp_Object dimension, chars, final_char;
+ (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char)
{
int id;
int chars_flag;
DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
0, 0, 0,
doc: /*
-Clear encoder and decoder of charsets that are loaded from mapfiles. */)
- ()
+Internal use only.
+Clear temporary charset mapping tables.
+It should be called only from temacs invoked for dumping. */)
+ (void)
{
- int i;
- struct charset *charset;
- Lisp_Object attrs;
-
- for (i = 0; i < charset_table_used; i++)
+ if (temp_charset_work)
{
- charset = CHARSET_FROM_ID (i);
- attrs = CHARSET_ATTRIBUTES (charset);
-
- if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
- {
- CHARSET_ATTR_DECODER (attrs) = Qnil;
- CHARSET_ATTR_ENCODER (attrs) = Qnil;
- CHARSET_METHOD (charset) = CHARSET_METHOD_MAP_DEFERRED;
- }
-
- if (CHARSET_UNIFIED_P (charset))
- CHARSET_ATTR_DEUNIFIER (attrs) = Qnil;
+ xfree (temp_charset_work);
+ temp_charset_work = NULL;
}
- if (CHAR_TABLE_P (Vchar_unified_charset_table))
- {
- Foptimize_char_table (Vchar_unified_charset_table, Qnil);
- Vchar_unify_table = Vchar_unified_charset_table;
- Vchar_unified_charset_table = Qnil;
- }
+ if (CHAR_TABLE_P (Vchar_unify_table))
+ Foptimize_char_table (Vchar_unify_table, Qnil);
return Qnil;
}
Scharset_priority_list, 0, 1, 0,
doc: /* Return the list of charsets ordered by priority.
HIGHESTP non-nil means just return the highest priority one. */)
- (highestp)
- Lisp_Object highestp;
+ (Lisp_Object highestp)
{
Lisp_Object val = Qnil, list = Vcharset_ordered_list;
1, MANY, 0,
doc: /* Assign higher priority to the charsets given as arguments.
usage: (set-charset-priority &rest charsets) */)
- (nargs, args)
- int nargs;
- Lisp_Object *args;
+ (ptrdiff_t nargs, Lisp_Object *args)
{
Lisp_Object new_head, old_list, arglist[2];
Lisp_Object list_2022, list_emacs_mule;
- int i, id;
+ ptrdiff_t i;
+ int id;
old_list = Fcopy_sequence (Vcharset_ordered_list);
new_head = Qnil;
Vcharset_ordered_list = Fnconc (2, arglist);
charset_ordered_list_tick++;
+ charset_unibyte = -1;
for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil;
CONSP (old_list); old_list = XCDR (old_list))
{
list_2022 = Fcons (XCAR (old_list), list_2022);
if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list)))
list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule);
+ if (charset_unibyte < 0)
+ {
+ struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (old_list)));
+
+ if (CHARSET_DIMENSION (charset) == 1
+ && CHARSET_ASCII_COMPATIBLE_P (charset)
+ && CHARSET_MAX_CHAR (charset) >= 0x80)
+ charset_unibyte = CHARSET_ID (charset);
+ }
}
Viso_2022_charset_list = Fnreverse (list_2022);
Vemacs_mule_charset_list = Fnreverse (list_emacs_mule);
+ if (charset_unibyte < 0)
+ charset_unibyte = charset_iso_8859_1;
return Qnil;
}
0, 1, 0,
doc: /* Internal use only.
Return charset identification number of CHARSET. */)
- (charset)
- Lisp_Object charset;
+ (Lisp_Object charset)
{
int id;
return make_number (id);
}
+struct charset_sort_data
+{
+ Lisp_Object charset;
+ int id;
+ ptrdiff_t priority;
+};
+
+static int
+charset_compare (const void *d1, const void *d2)
+{
+ const struct charset_sort_data *data1 = d1, *data2 = d2;
+ if (data1->priority != data2->priority)
+ return data1->priority < data2->priority ? -1 : 1;
+ return 0;
+}
+
+DEFUN ("sort-charsets", Fsort_charsets, Ssort_charsets, 1, 1, 0,
+ doc: /* Sort charset list CHARSETS by a priority of each charset.
+Return the sorted list. CHARSETS is modified by side effects.
+See also `charset-priority-list' and `set-charset-priority'. */)
+ (Lisp_Object charsets)
+{
+ Lisp_Object len = Flength (charsets);
+ ptrdiff_t n = XFASTINT (len), i, j;
+ int done;
+ Lisp_Object tail, elt, attrs;
+ struct charset_sort_data *sort_data;
+ int id, min_id = INT_MAX, max_id = INT_MIN;
+ USE_SAFE_ALLOCA;
+
+ if (n == 0)
+ return Qnil;
+ SAFE_NALLOCA (sort_data, 1, n);
+ for (tail = charsets, i = 0; CONSP (tail); tail = XCDR (tail), i++)
+ {
+ elt = XCAR (tail);
+ CHECK_CHARSET_GET_ATTR (elt, attrs);
+ sort_data[i].charset = elt;
+ sort_data[i].id = id = XINT (CHARSET_ATTR_ID (attrs));
+ if (id < min_id)
+ min_id = id;
+ if (id > max_id)
+ max_id = id;
+ }
+ for (done = 0, tail = Vcharset_ordered_list, i = 0;
+ done < n && CONSP (tail); tail = XCDR (tail), i++)
+ {
+ elt = XCAR (tail);
+ id = XFASTINT (elt);
+ if (id >= min_id && id <= max_id)
+ for (j = 0; j < n; j++)
+ if (sort_data[j].id == id)
+ {
+ sort_data[j].priority = i;
+ done++;
+ }
+ }
+ qsort (sort_data, n, sizeof *sort_data, charset_compare);
+ for (i = 0, tail = charsets; CONSP (tail); tail = XCDR (tail), i++)
+ XSETCAR (tail, sort_data[i].charset);
+ SAFE_FREE ();
+ return charsets;
+}
+
\f
void
-init_charset ()
+init_charset (void)
{
Lisp_Object tempdir;
tempdir = Fexpand_file_name (build_string ("charsets"), Vdata_directory);
- if (access (SDATA (tempdir), 0) < 0)
+ if (access (SSDATA (tempdir), 0) < 0)
{
dir_warning ("Error: charsets directory (%s) does not exist.\n\
Emacs will not function correctly without the character map files.\n\
void
-init_charset_once ()
+init_charset_once (void)
{
int i, j, k;
iso_charset_table[i][j][k] = -1;
for (i = 0; i < 256; i++)
- emacs_mule_charset[i] = NULL;
+ emacs_mule_charset[i] = -1;
charset_jisx0201_roman = -1;
charset_jisx0208_1978 = -1;
charset_jisx0208 = -1;
-
- for (i = 0; i < 128; i++)
- unibyte_to_multibyte_table[i] = i;
- for (; i < 256; i++)
- unibyte_to_multibyte_table[i] = BYTE8_TO_CHAR (i);
+ charset_ksc5601 = -1;
}
#ifdef emacs
+/* Allocate an initial charset table that is large enough to handle
+ Emacs while it is bootstrapping. As of September 2011, the size
+ needs to be at least 166; make it a bit bigger to allow for future
+ expansion.
+
+ Don't make the value so small that the table is reallocated during
+ bootstrapping, as glibc malloc calls larger than just under 64 KiB
+ during an initial bootstrap wreak havoc after dumping; see the
+ M_MMAP_THRESHOLD value in alloc.c, plus there is a extra overhead
+ internal to glibc malloc and perhaps to Emacs malloc debugging. */
+static struct charset charset_table_init[180];
+
void
-syms_of_charset ()
+syms_of_charset (void)
{
DEFSYM (Qcharsetp, "charsetp");
Vemacs_mule_charset_list = Qnil;
/* Don't staticpro them here. It's done in syms_of_fns. */
- QCtest = intern (":test");
- Qeq = intern ("eq");
+ QCtest = intern_c_string (":test");
+ Qeq = intern_c_string ("eq");
staticpro (&Vcharset_hash_table);
{
Vcharset_hash_table = Fmake_hash_table (2, args);
}
- charset_table_size = 128;
- charset_table = ((struct charset *)
- xmalloc (sizeof (struct charset) * charset_table_size));
+ charset_table = charset_table_init;
+ charset_table_size = sizeof charset_table_init / sizeof *charset_table_init;
charset_table_used = 0;
- staticpro (&Vchar_unified_charset_table);
- Vchar_unified_charset_table = Fmake_char_table (Qnil, make_number (-1));
-
defsubr (&Scharsetp);
defsubr (&Smap_charset_chars);
defsubr (&Sdefine_charset_internal);
defsubr (&Scharset_priority_list);
defsubr (&Sset_charset_priority);
defsubr (&Scharset_id_internal);
+ defsubr (&Ssort_charsets);
- DEFVAR_LISP ("charset-map-path", &Vcharset_map_path,
+ DEFVAR_LISP ("charset-map-path", Vcharset_map_path,
doc: /* *List of directories to search for charset map files. */);
Vcharset_map_path = Qnil;
- DEFVAR_LISP ("charset-list", &Vcharset_list,
+ DEFVAR_BOOL ("inhibit-load-charset-map", inhibit_load_charset_map,
+ doc: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
+ inhibit_load_charset_map = 0;
+
+ DEFVAR_LISP ("charset-list", Vcharset_list,
doc: /* List of all charsets ever defined. */);
Vcharset_list = Qnil;
- DEFVAR_LISP ("current-iso639-language", &Vcurrent_iso639_language,
+ DEFVAR_LISP ("current-iso639-language", Vcurrent_iso639_language,
doc: /* ISO639 language mnemonic symbol for the current language environment.
If the current language environment is for multiple languages (e.g. "Latin-1"),
the value may be a list of mnemonics. */);
= define_charset_internal (Qeight_bit, 1, "\x80\xFF\x00\x00\x00\x00",
128, 255, -1, 0, -1, 0, 1,
MAX_5_BYTE_CHAR + 1);
+ charset_unibyte = charset_iso_8859_1;
}
#endif /* emacs */
-
-/* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
- (do not change this comment) */