/* Basic character set support.
Copyright (C) 1995, 97, 98, 2000, 2001 Electrotechnical Laboratory, JAPAN.
- Licensed to the Free Software Foundation.
+ Licensed to the Free Software Foundation.
Copyright (C) 2001 Free Software Foundation, Inc.
- Copyright (C) 2001, 2002
+ Copyright (C) 2003
National Institute of Advanced Industrial Science and Technology (AIST)
Registration Number H13PRO009
struct charset *charset_table;
static int charset_table_size;
-int charset_table_used;
+static int charset_table_used;
Lisp_Object Qcharsetp;
CHARS, and FINAL-CHAR) to Emacs' charset. */
int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
-Lisp_Object Vcharset_map_directory;
+Lisp_Object Vcharset_map_path;
Lisp_Object Vchar_unified_charset_table;
while (1)
{
ASET (vec, from_index, make_number (from_c));
- CHAR_TABLE_SET (table, from_c, make_number (code));
+ if (NILP (CHAR_TABLE_REF (table, from_c)))
+ CHAR_TABLE_SET (table, from_c, make_number (code));
if (from_index == to_index)
break;
from_index++, from_c++;
for (; from_index <= to_index; from_index++, from_c++)
{
ASET (vec, from_index, make_number (from_c));
- CHAR_TABLE_SET (table, from_c, make_number (from_index));
+ if (NILP (CHAR_TABLE_REF (table, from_c)))
+ CHAR_TABLE_SET (table, from_c, make_number (from_index));
}
}
}
while (1)
{
int c1 = DECODE_CHAR (charset, code);
-
+
if (c1 >= 0)
{
CHAR_TABLE_SET (table, from_c, make_number (c1));
}
}
else
- CHARSET_DEUNIFIER (charset) = table;
+ CHARSET_DEUNIFIER (charset) = table;
}
if ((c = getc (fp)) == EOF || c == 'x')
break;
}
- }
+ }
if (c == EOF)
{
*eof = 1;
suffixes = Fcons (build_string (".map"),
Fcons (build_string (".TXT"), Qnil));
- fd = openp (Fcons (Vcharset_map_directory, Qnil), mapfile, suffixes,
- NULL, 0);
+ fd = openp (Vcharset_map_path, mapfile, suffixes, NULL, Qnil);
if (fd < 0
|| ! (fp = fdopen (fd, "r")))
{
Lisp_Object function, arg;
struct charset *charset;
unsigned from, to;
-
{
Lisp_Object range;
int partial;
- if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP_DEFERRED)
+ if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP_DEFERRED)
load_charset (charset);
partial = (from > CHARSET_MIN_CODE (charset)
range = Fcons (make_number (from_c), make_number (to_c));
if (NILP (function))
- (*c_function) (range, arg);
+ (*c_function) (arg, range);
else
call2 (function, range, arg);
}
XSETCAR (range, make_number (127));
if (NILP (function))
- (*c_function) (range, arg);
+ (*c_function) (arg, range);
else
call2 (function, range, arg);
}
this_from = CHARSET_MIN_CODE (charset);
if (this_to > CHARSET_MAX_CODE (charset))
this_to = CHARSET_MAX_CODE (charset);
- map_charset_chars (c_function, function, arg, charset, from, to);
+ map_charset_chars (c_function, function, arg, charset,
+ this_from, this_to);
}
}
}
-
DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
doc: /* Call FUNCTION for all characters in CHARSET.
characters contained in CHARSET.
The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
-range of code points of targer characters. */)
+range of code points of target characters. */)
(function, charset, arg, from_code, to_code)
Lisp_Object function, charset, arg, from_code, to_code;
{
else
{
CHECK_CONS (val);
- CHECK_NUMBER (XCAR (val));
- CHECK_NUMBER (XCDR (val));
+ CHECK_NUMBER_CAR (val);
+ CHECK_NUMBER_CDR (val);
code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
}
if (code < charset.min_code
else
{
CHECK_CONS (val);
- CHECK_NUMBER (XCAR (val));
- CHECK_NUMBER (XCDR (val));
+ CHECK_NUMBER_CAR (val);
+ CHECK_NUMBER_CDR (val);
code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
}
if (code < charset.min_code
error ("Invalid iso-final-char: %d", XINT (val));
charset.iso_final = XINT (val);
}
-
+
val = args[charset_arg_iso_revision];
if (NILP (val))
charset.iso_revision = -1;
if (new_definition_p)
{
Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
- Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
+ Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
Fcons (make_number (id), Qnil));
charset_ordered_list_tick++;
}
CHECK_CHARSET_GET_CHARSET (charset, cs);
if (! cs->ascii_compatible_p
|| cs->dimension != 1)
- error ("Inappropriate unibyte charset: %s", XSYMBOL (charset)->name->data);
+ error ("Inappropriate unibyte charset: %s", SDATA (SYMBOL_NAME (charset)));
charset_unibyte = cs->id;
for (i = 128; i < 256; i++)
{
{
int id;
struct charset *cs;
-
+
CHECK_CHARSET_GET_ID (charset, id);
cs = CHARSET_FROM_ID (id);
if (CHARSET_METHOD (cs) == CHARSET_METHOD_MAP_DEFERRED)
if (NILP (deunify))
{
if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET)
- error ("Can't unify charset: %s", XSYMBOL (charset)->name->data);
+ error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset)));
if (NILP (unify_map))
unify_map = CHARSET_UNIFY_MAP (cs);
if (STRINGP (unify_map))
int max_code = CHARSET_MAX_CODE (cs);
int min_char = DECODE_CHAR (cs, min_code);
int max_char = DECODE_CHAR (cs, max_code);
-
+
char_table_set_range (Vchar_unify_table, min_char, max_char, Qnil);
}
-
+
return Qnil;
}
string_xstring_p (string)
Lisp_Object string;
{
- const unsigned char *p = XSTRING (string)->data;
- const unsigned char *endp = p + STRING_BYTES (XSTRING (string));
- struct charset *charset;
+ const unsigned char *p = SDATA (string);
+ const unsigned char *endp = p + SBYTES (string);
- if (XSTRING (string)->size == STRING_BYTES (XSTRING (string)))
+ if (SCHARS (string) == SBYTES (string))
return 0;
- charset = CHARSET_FROM_ID (charset_iso_8859_1);
while (p < endp)
{
int c = STRING_CHAR_ADVANCE (p);
- /* Fixme: comparison of unsigned expression < 0 is always false */
- if (ENCODE_CHAR (charset, c) < 0)
+ if (c >= 0x100)
return 2;
}
return 1;
/* Find charsets in the string at PTR of NCHARS and NBYTES.
- CHARSETS is a vector. Each element is a cons of CHARSET and
- FOUND-FLAG. CHARSET is a charset id, and FOUND-FLAG is nil or t.
- FOUND-FLAG t (or nil) means that the corresponding charset is
- already found (or not yet found).
+ CHARSETS is a vector. If Nth element is non-nil, it means the
+ charset whose id is N is already found.
It may lookup a translation table TABLE if supplied. */
static void
-find_charsets_in_text (ptr, nchars, nbytes, charsets, table)
+find_charsets_in_text (ptr, nchars, nbytes, charsets, table, multibyte)
const unsigned char *ptr;
- int nchars, nbytes;
+ EMACS_INT nchars, nbytes;
Lisp_Object charsets, table;
+ int multibyte;
{
const unsigned char *pend = ptr + nbytes;
- int ncharsets = ASIZE (charsets);
if (nchars == nbytes)
- return;
-
- while (ptr < pend)
{
- int c = STRING_CHAR_ADVANCE (ptr);
- int i;
- int all_found = 1;
- Lisp_Object elt;
-
- if (!NILP (table))
- c = translate_char (table, c);
- for (i = 0; i < ncharsets; i++)
+ if (multibyte)
+ ASET (charsets, charset_ascii, Qt);
+ else
+ while (ptr < pend)
+ {
+ int c = *ptr++;
+
+ if (!NILP (table))
+ c = translate_char (table, c);
+ if (ASCII_BYTE_P (c))
+ ASET (charsets, charset_ascii, Qt);
+ else
+ ASET (charsets, charset_eight_bit, Qt);
+ }
+ }
+ else
+ {
+ while (ptr < pend)
{
- elt = AREF (charsets, i);
- if (NILP (XCDR (elt)))
- {
- struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (elt)));
+ int c = STRING_CHAR_ADVANCE (ptr);
+ struct charset *charset;
- if (ENCODE_CHAR (charset, c) != CHARSET_INVALID_CODE (charset))
- XCDR (elt) = Qt;
- else
- all_found = 0;
- }
+ if (!NILP (table))
+ c = translate_char (table, c);
+ charset = CHAR_CHARSET (c);
+ ASET (charsets, CHARSET_ID (charset), Qt);
}
- if (all_found)
- break;
}
}
-/* Fixme: returns nil for unibyte. */
DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
2, 3, 0,
doc: /* Return a list of charsets in the region between BEG and END.
Lisp_Object beg, end, table;
{
Lisp_Object charsets;
- int from, from_byte, to, stop, stop_byte, i;
+ EMACS_INT from, from_byte, to, stop, stop_byte;
+ int i;
Lisp_Object val;
+ int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
validate_region (&beg, &end);
from = XFASTINT (beg);
from_byte = CHAR_TO_BYTE (from);
charsets = Fmake_vector (make_number (charset_table_used), Qnil);
- for (i = 0; i < charset_table_used; i++)
- ASET (charsets, i, Fcons (make_number (i), Qnil));
-
while (1)
{
find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
- stop_byte - from_byte, charsets, table);
+ stop_byte - from_byte, charsets, table,
+ multibyte);
if (stop < to)
{
from = stop, from_byte = stop_byte;
val = Qnil;
for (i = charset_table_used - 1; i >= 0; i--)
- if (!NILP (XCDR (AREF (charsets, i))))
+ if (!NILP (AREF (charsets, i)))
val = Fcons (CHARSET_NAME (charset_table + i), val);
return val;
}
-/* Fixme: returns nil for unibyte. */
DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
1, 2, 0,
doc: /* Return a list of charsets in STR.
CHECK_STRING (str);
charsets = Fmake_vector (make_number (charset_table_used), Qnil);
- for (i = 0; i < charset_table_used; i++)
- ASET (charsets, i, Fcons (make_number (i), Qnil));
- find_charsets_in_text (XSTRING (str)->data, XSTRING (str)->size,
- STRING_BYTES (XSTRING (str)), charsets, table);
-
+ find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str),
+ charsets, table,
+ STRING_MULTIBYTE (str));
val = Qnil;
for (i = charset_table_used - 1; i >= 0; i--)
- if (!NILP (XCDR (AREF (charsets, i))))
+ if (!NILP (AREF (charsets, i)))
val = Fcons (CHARSET_NAME (charset_table + i), val);
return val;
}
CHECK_CHARSET_GET_ID (charset, id);
if (CONSP (code_point))
{
- CHECK_NATNUM (XCAR (code_point));
- CHECK_NATNUM (XCDR (code_point));
+ CHECK_NATNUM_CAR (code_point);
+ CHECK_NATNUM_CDR (code_point);
code = (XINT (XCAR (code_point)) << 16) | (XINT (XCDR (code_point)));
}
else
}
-/* Fixme: `unknown' can't happen now? */
DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
- doc: /*Return list of charset and one to three position-codes of CHAR.
-If CHAR is invalid as a character code, return a list `(unknown CHAR)'. */)
+ doc:
+ /*Return list of charset and one to four position-codes of CHAR.
+The charset is decided by the current priority order of charsets.
+A position-code is a byte value of each dimension of the code-point of
+CHAR in the charset. */)
(ch)
Lisp_Object ch;
{
c = XFASTINT (ch);
charset = CHAR_CHARSET (c);
if (! charset)
- return Fcons (intern ("unknown"), Fcons (ch, Qnil));
-
+ abort ();
code = ENCODE_CHAR (charset, c);
if (code == CHARSET_INVALID_CODE (charset))
abort ();
dimension = CHARSET_DIMENSION (charset);
- val = (dimension == 1 ? Fcons (make_number (code), Qnil)
- : dimension == 2 ? Fcons (make_number (code >> 8),
- Fcons (make_number (code & 0xFF), Qnil))
- : Fcons (make_number (code >> 16),
- Fcons (make_number ((code >> 8) & 0xFF),
- Fcons (make_number (code & 0xFF), Qnil))));
+ for (val = Qnil; dimension > 0; dimension--)
+ {
+ val = Fcons (make_number (code & 0xFF), val);
+ code >>= 8;
+ }
return Fcons (CHARSET_NAME (charset), val);
}
Lisp_Object *args;
{
Lisp_Object new_head, old_list, arglist[2];
+ Lisp_Object list_2022, list_emacs_mule;
int i, id;
old_list = Fcopy_sequence (Vcharset_ordered_list);
Vcharset_ordered_list = Fnconc (2, arglist);
charset_ordered_list_tick++;
- for (old_list = Vcharset_ordered_list, new_head = Qnil;
+ for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil;
CONSP (old_list); old_list = XCDR (old_list))
{
if (! NILP (Fmemq (XCAR (old_list), Viso_2022_charset_list)))
- new_head = Fcons (XCAR (old_list), new_head);
+ list_2022 = Fcons (XCAR (old_list), list_2022);
+ if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list)))
+ list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule);
}
- Viso_2022_charset_list = Fnreverse (new_head);
+ Viso_2022_charset_list = Fnreverse (list_2022);
+ Vemacs_mule_charset_list = Fnreverse (list_emacs_mule);
return Qnil;
}
void
init_charset ()
{
-
+ Vcharset_map_path
+ = Fcons (Fexpand_file_name (build_string ("charsets"), Vdata_directory),
+ Qnil);
}
void
syms_of_charset ()
{
- char *p;
-
DEFSYM (Qcharsetp, "charsetp");
DEFSYM (Qascii, "ascii");
DEFSYM (Qgl, "gl");
DEFSYM (Qgr, "gr");
- p = (char *) xmalloc (30000);
-
staticpro (&Vcharset_ordered_list);
Vcharset_ordered_list = Qnil;
Vemacs_mule_charset_list = Qnil;
staticpro (&Vcharset_hash_table);
- Vcharset_hash_table = Fmakehash (Qeq);
+ {
+ Lisp_Object args[2];
+ args[0] = QCtest;
+ args[1] = Qeq;
+ Vcharset_hash_table = Fmake_hash_table (2, args);
+ }
charset_table_size = 128;
charset_table = ((struct charset *)
defsubr (&Sset_charset_priority);
defsubr (&Scharset_id_internal);
- DEFVAR_LISP ("charset-map-directory", &Vcharset_map_directory,
- doc: /* Directory of charset map files that come with GNU Emacs.
-The default value is sub-directory "charsets" of `data-directory'. */);
- Vcharset_map_directory = Fexpand_file_name (build_string ("charsets"),
- Vdata_directory);
+ DEFVAR_LISP ("charset-map-path", &Vcharset_map_path,
+ doc: /* *Lisp of directories to search for charset map files. */);
+ Vcharset_map_path = Qnil;
DEFVAR_LISP ("charset-list", &Vcharset_list,
doc: /* List of all charsets ever defined. */);