CODING SYSTEM
- Coding system is an object for a encoding mechanism that contains
- information about how to convert byte sequence to character
+ A coding system is an object for an encoding mechanism that contains
+ information about how to convert byte sequences to character
sequences and vice versa. When we say "decode", it means converting
a byte sequence of a specific coding system into a character
sequence that is represented by Emacs' internal coding system
In Emacs Lisp, a coding system is represented by a Lisp symbol. In
C level, a coding system is represented by a vector of attributes
- stored in the hash table Vcharset_hash_table. The conversion from a
+ stored in the hash table Vcharset_hash_table. The conversion from
coding system symbol to attributes vector is done by looking up
Vcharset_hash_table by the symbol.
Coding systems are classified into the following types depending on
- the mechanism of encoding. Here's a brief descrition about type.
+ the encoding mechanism. Here's a brief description of the types.
o UTF-8
o Charset-base coding system
A coding system defined by one or more (coded) character sets.
- Decoding and encoding are done by code converter defined for each
+ Decoding and encoding are done by a code converter defined for each
character set.
- o Old Emacs' internal format (emacs-mule)
+ o Old Emacs internal format (emacs-mule)
- The coding system adopted by an old versions of Emacs (20 and 21).
+ The coding system adopted by old versions of Emacs (20 and 21).
o ISO2022-base coding system
o CCL
- If a user wants to decode/encode a text encoded in a coding system
+ If a user wants to decode/encode text encoded in a coding system
not listed above, he can supply a decoder and an encoder for it in
CCL (Code Conversion Language) programs. Emacs executes the CCL
program while decoding/encoding.
o Raw-text
A coding system for a text containing raw eight-bit data. Emacs
- treat each byte of source text as a character (except for
+ treats each byte of source text as a character (except for
end-of-line conversion).
o No-conversion
END-OF-LINE FORMAT
- How end-of-line of a text is encoded depends on a system. For
+ How text end-of-line is encoded depends on operating system. For
instance, Unix's format is just one byte of LF (line-feed) code,
whereas DOS's format is two-byte sequence of `carriage-return' and
`line-feed' codes. MacOS's format is usually one byte of
`carriage-return'.
- Since text characters encoding and end-of-line encoding are
+ Since text character encoding and end-of-line encoding are
independent, any coding system described above can take any format
of end-of-line (except for no-conversion).
Before using a coding system for code conversion (i.e. decoding and
encoding), we setup a structure of type `struct coding_system'.
This structure keeps various information about a specific code
- conversion (e.g. the location of source and destination data).
+ conversion (e.g. the location of source and destination data).
*/
Lisp_Object Vcoding_system_hash_table;
Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
-Lisp_Object Qunix, Qdos, Qmac;
+Lisp_Object Qunix, Qdos;
+extern Lisp_Object Qmac; /* frame.c */
Lisp_Object Qbuffer_file_coding_system;
Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
Lisp_Object Qdefault_char;
to avoid infinite recursive call. */
static int inhibit_pre_post_conversion;
-/* Char-table containing safe coding systems of each character. */
-Lisp_Object Vchar_coding_system_table;
-Lisp_Object Qchar_coding_system;
-
/* Two special coding systems. */
Lisp_Object Vsjis_coding_system;
Lisp_Object Vbig5_coding_system;
error ("Undecodable char found"); \
c = ((c & 1) << 6) | *src++; \
} \
+ consumed_chars++; \
} while (0)
else
{
struct buffer *buf = XBUFFER (coding->src_object);
- EMACS_INT beg_byte = BUF_BEG_BYTE (buf);
EMACS_INT gpt_byte = BUF_GPT_BYTE (buf);
unsigned char *beg_addr = BUF_BEG_ADDR (buf);
}
+/* Fixme: deal with surrogates? */
static void
decode_coding_utf_8 (coding)
struct coding_system *coding;
if (! UTF_8_EXTRA_OCTET_P (c2))
goto invalid_code;
if (UTF_8_2_OCTET_LEADING_P (c1))
- c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
+ {
+ c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
+ /* Reject overlong sequences here and below. Encoders
+ producing them are incorrect, they can be misleading,
+ and they mess up read/write invariance. */
+ if (c < 128)
+ goto invalid_code;
+ }
else
{
ONE_MORE_BYTE (c3);
if (! UTF_8_EXTRA_OCTET_P (c3))
goto invalid_code;
if (UTF_8_3_OCTET_LEADING_P (c1))
- c = (((c1 & 0xF) << 12)
- | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
+ {
+ c = (((c1 & 0xF) << 12)
+ | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
+ if (c < 0x800)
+ goto invalid_code;
+ }
else
{
ONE_MORE_BYTE (c4);
if (! UTF_8_EXTRA_OCTET_P (c4))
goto invalid_code;
if (UTF_8_4_OCTET_LEADING_P (c1))
+ {
c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
| ((c3 & 0x3F) << 6) | (c4 & 0x3F));
+ if (c < 0x10000)
+ goto invalid_code;
+ }
else
{
ONE_MORE_BYTE (c5);
c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
| ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
| (c5 & 0x3F));
- if (c > MAX_CHAR)
+ if ((c > MAX_CHAR) || (c < 0x200000))
goto invalid_code;
}
else
int
-emacs_mule_char (coding, composition, nbytes, nchars)
+emacs_mule_char (coding, src, nbytes, nchars)
struct coding_system *coding;
- int composition;
+ unsigned char *src;
int *nbytes, *nchars;
{
- unsigned char *src = coding->source + coding->consumed;
unsigned char *src_end = coding->source + coding->src_bytes;
int multibytep = coding->src_multibyte;
unsigned char *src_base = src;
int consumed_chars = 0;
ONE_MORE_BYTE (c);
- if (composition)
- {
- c -= 0x20;
- if (c == 0x80)
- {
- ONE_MORE_BYTE (c);
- if (c < 0xA0)
- goto invalid_code;
- *nbytes = src - src_base;
- *nchars = consumed_chars;
- return (c - 0x80);
- }
- }
-
switch (emacs_mule_bytes[c])
{
case 2:
if (! (charset = emacs_mule_charset[c]))
goto invalid_code;
ONE_MORE_BYTE (c);
- code = (c & 0x7F) << 7;
+ code = (c & 0x7F) << 8;
ONE_MORE_BYTE (c);
code |= c & 0x7F;
}
break;
case 4:
+ ONE_MORE_BYTE (c);
if (! (charset = emacs_mule_charset[c]))
goto invalid_code;
ONE_MORE_BYTE (c);
- code = (c & 0x7F) << 7;
+ code = (c & 0x7F) << 8;
ONE_MORE_BYTE (c);
code |= c & 0x7F;
break;
\
if (src == src_end) \
break; \
- c = emacs_mule_char (coding, 1, &nbytes, &nchars); \
+ c = emacs_mule_char (coding, src, &nbytes, &nchars); \
if (c < 0) \
{ \
if (c == -2) \
/* Decode a composition rule represented as a component of composition
- sequence of Emacs 20 style at SRC. Set C to the rule. If SRC
- points an invalid byte sequence, set C to -1. */
+ sequence of Emacs 20 style at SRC. Store the decoded rule in *BUF,
+ and increment BUF. If SRC points an invalid byte sequence, set C
+ to -1. */
-#define DECODE_EMACS_MULE_COMPOSITION_RULE(buf) \
+#define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf) \
do { \
int c, gref, nref; \
\
- if (src < src_end) \
+ if (src >= src_end) \
goto invalid_code; \
ONE_MORE_BYTE_NO_CHECK (c); \
- c -= 0xA0; \
+ c -= 0x20; \
if (c < 0 || c >= 81) \
goto invalid_code; \
\
} while (0)
+/* Decode a composition rule represented as a component of composition
+ sequence of Emacs 21 style at SRC. Store the decoded rule in *BUF,
+ and increment BUF. If SRC points an invalid byte sequence, set C
+ to -1. */
+
+#define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf) \
+ do { \
+ int gref, nref; \
+ \
+ if (src + 1>= src_end) \
+ goto invalid_code; \
+ ONE_MORE_BYTE_NO_CHECK (gref); \
+ gref -= 0x20; \
+ ONE_MORE_BYTE_NO_CHECK (nref); \
+ nref -= 0x20; \
+ if (gref < 0 || gref >= 81 \
+ || nref < 0 || nref >= 81) \
+ goto invalid_code; \
+ *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
+ } while (0)
+
+
#define ADD_COMPOSITION_DATA(buf, method, nchars) \
do { \
*buf++ = -5; \
#define DECODE_EMACS_MULE_21_COMPOSITION(c) \
do { \
/* Emacs 21 style format. The first three bytes at SRC are \
- (METHOD - 0xF0), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \
+ (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \
the byte length of this composition information, CHARS is the \
number of characters composed by this composition. */ \
- enum composition_method method = c - 0xF0; \
+ enum composition_method method = c - 0xF2; \
+ int *charbuf_base = charbuf; \
int consumed_chars_limit; \
int nbytes, nchars; \
\
while (consumed_chars < consumed_chars_limit) \
{ \
if (i % 2 && method != COMPOSITION_WITH_ALTCHARS) \
- DECODE_EMACS_MULE_COMPOSITION_RULE (charbuf); \
+ DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf); \
else \
DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf); \
+ i++; \
} \
if (consumed_chars < consumed_chars_limit) \
goto invalid_code; \
+ charbuf_base[0] -= i; \
} \
} while (0)
DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
{ \
- DECODE_EMACS_MULE_COMPOSITION_RULE (buf); \
+ DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf); \
DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
} \
if (i < 1 || (buf - components) % 2 == 0) \
if (charbuf + 5 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 > charbuf_end)
break;
ONE_MORE_BYTE (c);
- if (c - 0xF0 >= COMPOSITION_RELATIVE
- && c - 0xF0 <= COMPOSITION_WITH_RULE_ALTCHARS)
+ if (c - 0xF2 >= COMPOSITION_RELATIVE
+ && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
DECODE_EMACS_MULE_21_COMPOSITION (c);
else if (c < 0xC0)
DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
else
goto invalid_code;
+ coding->annotated = 1;
}
else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
{
int nbytes, nchars;
- src--;
- c = emacs_mule_char (coding, 0, &nbytes, &nchars);
+ src = src_base;
+ consumed_chars = consumed_chars_base;
+ c = emacs_mule_char (coding, src, &nbytes, &nchars);
if (c < 0)
{
if (c == -2)
goto invalid_code;
}
*charbuf++ = c;
+ src += nbytes;
+ consumed_chars += nchars;
char_offset++;
}
continue;
(CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
static void
-setup_iso_safe_charsets (Lisp_Object attrs)
+setup_iso_safe_charsets (attrs)
+ Lisp_Object attrs;
{
Lisp_Object charset_list, safe_charsets;
Lisp_Object request;
#define DECODE_COMPOSITION_START(c1) \
do { \
if (c1 == '0' \
- && composition_state == COMPOSING_COMPONENT_CHAR) \
+ && composition_state == COMPOSING_COMPONENT_RULE) \
{ \
component_len = component_idx; \
composition_state = COMPOSING_CHAR; \
composition_state--;
continue;
}
- else if (method == COMPOSITION_WITH_RULE)
- composition_state = COMPOSING_RULE;
- else if (method == COMPOSITION_WITH_RULE_ALTCHARS
- && composition_state == COMPOSING_COMPONENT_CHAR)
- composition_state = COMPOSING_COMPONENT_CHAR;
}
if (charset_id_0 < 0
|| ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
- {
- /* This is SPACE or DEL. */
- charset = CHARSET_FROM_ID (charset_ascii);
- break;
- }
- /* This is a graphic character, we fall down ... */
+ /* This is SPACE or DEL. */
+ charset = CHARSET_FROM_ID (charset_ascii);
+ else
+ charset = CHARSET_FROM_ID (charset_id_0);
+ break;
case ISO_graphic_plane_0:
- if (composition_state == COMPOSING_RULE)
+ if (composition_state != COMPOSING_NO)
{
- DECODE_COMPOSITION_RULE (c1);
- components[component_idx++] = c1;
- composition_state = COMPOSING_CHAR;
+ if (composition_state == COMPOSING_RULE
+ || composition_state == COMPOSING_COMPONENT_RULE)
+ {
+ DECODE_COMPOSITION_RULE (c1);
+ components[component_idx++] = c1;
+ composition_state--;
+ continue;
+ }
}
charset = CHARSET_FROM_ID (charset_id_0);
break;
char_offset++;
}
else
- components[component_idx++] = c;
+ {
+ components[component_idx++] = c;
+ if (method == COMPOSITION_WITH_RULE
+ || (method == COMPOSITION_WITH_RULE_ALTCHARS
+ && composition_state == COMPOSING_COMPONENT_CHAR))
+ composition_state++;
+ }
continue;
invalid_code:
int c;
CODING_GET_INFO (coding, attrs, eol_type, charset_list);
+ setup_iso_safe_charsets (attrs);
+ coding->safe_charsets
+ = (char *) XSTRING (CODING_ATTR_SAFE_CHARSETS(attrs))->data;
ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
if (!charset)
{
- c = coding->default_char;
- charset = char_charset (c, charset_list, NULL);
+ if (coding->mode & CODING_MODE_SAFE_ENCODING)
+ {
+ c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
+ charset = CHARSET_FROM_ID (charset_ascii);
+ }
+ else
+ {
+ c = coding->default_char;
+ charset = char_charset (c, charset_list, NULL);
+ }
}
ENCODE_ISO_CHARACTER (charset, c);
}
if (!charset)
{
- c = coding->default_char;
- charset = char_charset (c, charset_list, &code);
+ if (coding->mode & CODING_MODE_SAFE_ENCODING)
+ {
+ code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
+ charset = CHARSET_FROM_ID (charset_ascii);
+ }
+ else
+ {
+ c = coding->default_char;
+ charset = char_charset (c, charset_list, &code);
+ }
}
if (code == CHARSET_INVALID_CODE (charset))
abort ();
if (! charset)
{
- c = coding->default_char;
- charset = char_charset (c, charset_list, &code);
+ if (coding->mode & CODING_MODE_SAFE_ENCODING)
+ {
+ code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
+ charset = CHARSET_FROM_ID (charset_ascii);
+ }
+ else
+ {
+ c = coding->default_char;
+ charset = char_charset (c, charset_list, &code);
+ }
}
if (code == CHARSET_INVALID_CODE (charset))
abort ();
int *charbuf_end = charbuf + coding->charbuf_size;
int consumed_chars = 0, consumed_chars_base;
int multibytep = coding->src_multibyte;
- struct charset *charset;
- Lisp_Object attrs, eol_type, charset_list;
+ Lisp_Object attrs, eol_type, charset_list, valids;
CODING_GET_INFO (coding, attrs, eol_type, charset_list);
- charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
+ valids = AREF (attrs, coding_attr_charset_valids);
while (1)
{
- int c, c1;
+ int c;
src_base = src;
consumed_chars_base = consumed_chars;
if (charbuf >= charbuf_end)
break;
- ONE_MORE_BYTE (c1);
+ ONE_MORE_BYTE (c);
if (c == '\r')
{
+ /* Here we assume that no charset maps '\r' to something
+ else. */
if (EQ (eol_type, Qdos))
{
- if (src == src_end)
- goto no_more_source;
- if (*src == '\n')
+ if (src < src_end
+ && *src == '\n')
ONE_MORE_BYTE (c);
}
else if (EQ (eol_type, Qmac))
}
else
{
- CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
+ Lisp_Object val;
+ struct charset *charset;
+ int dim;
+ int len = 1;
+ unsigned code = c;
+
+ val = AREF (valids, c);
+ if (NILP (val))
+ goto invalid_code;
+ if (INTEGERP (val))
+ {
+ charset = CHARSET_FROM_ID (XFASTINT (val));
+ dim = CHARSET_DIMENSION (charset);
+ while (len < dim)
+ {
+ ONE_MORE_BYTE (c);
+ code = (code << 8) | c;
+ len++;
+ }
+ CODING_DECODE_CHAR (coding, src, src_base, src_end,
+ charset, code, c);
+ }
+ else
+ {
+ /* VAL is a list of charset IDs. It is assured that the
+ list is sorted by charset dimensions (smaller one
+ comes first). */
+ while (CONSP (val))
+ {
+ charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
+ dim = CHARSET_DIMENSION (charset);
+ while (len < dim)
+ {
+ ONE_MORE_BYTE (c);
+ code = (code << 8) | c;
+ len++;
+ }
+ CODING_DECODE_CHAR (coding, src, src_base,
+ src_end, charset, code, c);
+ if (c >= 0)
+ break;
+ val = XCDR (val);
+ }
+ }
if (c < 0)
goto invalid_code;
}
unsigned char *dst_end = coding->destination + coding->dst_bytes;
int safe_room = MAX_MULTIBYTE_LENGTH;
int produced_chars = 0;
- struct charset *charset;
Lisp_Object attrs, eol_type, charset_list;
int ascii_compatible;
int c;
CODING_GET_INFO (coding, attrs, eol_type, charset_list);
- charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
while (charbuf < charbuf_end)
{
+ struct charset *charset;
unsigned code;
ASSURE_DESTINATION (safe_room);
c = *charbuf++;
if (ascii_compatible && ASCII_CHAR_P (c))
EMIT_ONE_ASCII_BYTE (c);
- else if ((code = ENCODE_CHAR (charset, c))
- != CHARSET_INVALID_CODE (charset))
- EMIT_ONE_BYTE (code);
else
- EMIT_ONE_BYTE (coding->default_char);
+ {
+ charset = char_charset (c, charset_list, &code);
+ if (charset)
+ {
+ if (CHARSET_DIMENSION (charset) == 1)
+ EMIT_ONE_BYTE (code);
+ else if (CHARSET_DIMENSION (charset) == 2)
+ EMIT_TWO_BYTES (code >> 8, code & 0xFF);
+ else if (CHARSET_DIMENSION (charset) == 3)
+ EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
+ else
+ EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
+ (code >> 8) & 0xFF, code & 0xFF);
+ }
+ else
+ {
+ if (coding->mode & CODING_MODE_SAFE_ENCODING)
+ c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
+ else
+ c = coding->default_char;
+ EMIT_ONE_BYTE (c);
+ }
+ }
}
coding->result = CODING_RESULT_SUCCESS;
Lisp_Object
coding_inherit_eol_type (coding_system, parent)
+ Lisp_Object coding_system, parent;
{
Lisp_Object spec, attrs, eol_type;
EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
Lisp_Object val;
+ TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
GCPRO2 (coding->src_object, coding->dst_object);
val = call1 (CODING_ATTR_POST_READ (attrs),
make_number (coding->produced_char));
if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
{
- Lisp_Object val;
-
coding->src_object = make_conversion_work_buffer (coding->src_multibyte);
set_buffer_internal (XBUFFER (coding->src_object));
if (STRINGP (src_object))
set_buffer_internal (XBUFFER (coding->src_object));
}
- val = call2 (CODING_ATTR_PRE_WRITE (attrs),
- make_number (1), make_number (chars));
- CHECK_NATNUM (val);
+ call2 (CODING_ATTR_PRE_WRITE (attrs),
+ make_number (BEG), make_number (Z));
+ coding->src_object = Fcurrent_buffer ();
if (BEG != GPT)
move_gap_both (BEG, BEG_BYTE);
coding->src_chars = Z - BEG;
else if (EQ (dst_object, Qt))
{
coding->dst_object = Qnil;
- coding->destination = (unsigned char *) xmalloc (coding->src_chars);
coding->dst_bytes = coding->src_chars;
+ if (coding->dst_bytes == 0)
+ coding->dst_bytes = 1;
+ coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
coding->dst_multibyte = 0;
}
else
val = CODING_ATTR_CHARSET_LIST (attrs);
charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
- charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
- charset_kana = CHARSET_FROM_ID (XINT (XCAR (val)));
+ charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
+ charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
if (c <= 0x7F)
charset = charset_roman;
}
else
{
- int s1 = c >> 8, s2 = c & 0x7F;
+ int s1 = c >> 8, s2 = c & 0xFF;
if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
|| s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
Sset_terminal_coding_system_internal, 1, 1, 0,
doc: /* Internal use only. */)
(coding_system)
+ Lisp_Object coding_system;
{
CHECK_SYMBOL (coding_system);
setup_coding_system (Fcheck_coding_system (coding_system),
Sset_safe_terminal_coding_system_internal, 1, 1, 0,
doc: /* Internal use only. */)
(coding_system)
+ Lisp_Object coding_system;
{
CHECK_SYMBOL (coding_system);
setup_coding_system (Fcheck_coding_system (coding_system),
DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
Sset_coding_system_priority, 1, MANY, 0,
- doc: /* Put higher priority to coding systems of the arguments. */)
+ doc: /* Assign higher priority to the coding systems given as arguments.
+usage: (set-coding-system-priority CODING-SYSTEM ...) */)
(nargs, args)
int nargs;
Lisp_Object *args;
DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
Scoding_system_priority_list, 0, 1, 0,
- doc: /* Return a list of coding systems ordered by their priorities. */)
+ doc: /* Return a list of coding systems ordered by their priorities.
+HIGHESTP non-nil means just return the highest priority one. */)
(highestp)
Lisp_Object highestp;
{
return Fnreverse (val);
}
+static char *suffixes[] = { "-unix", "-dos", "-mac" };
+
static Lisp_Object
make_subsidiaries (base)
Lisp_Object base;
{
Lisp_Object subsidiaries;
- char *suffixes[] = { "-unix", "-dos", "-mac" };
int base_name_len = STRING_BYTES (XSYMBOL (base)->name);
char *buf = (char *) alloca (base_name_len + 6);
int i;
DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
- doc: /* For internal use only. */)
+ doc: /* For internal use only.
+usage: (define-coding-system-internal ...) */)
(nargs, args)
int nargs;
Lisp_Object *args;
if (EQ (coding_type, Qcharset))
{
+ /* Generate a lisp vector of 256 elements. Each element is nil,
+ integer, or a list of charset IDs.
+
+ If Nth element is nil, the byte code N is invalid in this
+ coding system.
+
+ If Nth element is a number NUM, N is the first byte of a
+ charset whose ID is NUM.
+
+ If Nth element is a list of charset IDs, N is the first byte
+ of one of them. The list is sorted by dimensions of the
+ charsets. A charset of smaller dimension comes firtst.
+ */
val = Fmake_vector (make_number (256), Qnil);
for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
{
- struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
+ struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
+ int dim = CHARSET_DIMENSION (charset);
+ int idx = (dim - 1) * 4;
+
+ for (i = charset->code_space[idx];
+ i <= charset->code_space[idx + 1]; i++)
+ {
+ Lisp_Object tmp, tmp2;
+ int dim2;
- for (i = charset->code_space[0]; i <= charset->code_space[1]; i++)
- if (NILP (AREF (val, i)))
- ASET (val, i, XCAR (tail));
+ tmp = AREF (val, i);
+ if (NILP (tmp))
+ tmp = XCAR (tail);
+ else if (NUMBERP (tmp))
+ {
+ dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
+ if (dim < dim2)
+ tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
+ else
+ tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
+ }
+ else
+ {
+ for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
+ {
+ dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
+ if (dim < dim2)
+ break;
+ }
+ if (NILP (tmp2))
+ tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
+ else
+ {
+ XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
+ XSETCAR (tmp2, XCAR (tail));
+ }
+ }
+ ASET (val, i, tmp);
+ }
}
ASET (attrs, coding_attr_charset_valids, val);
category = coding_category_charset;
else if (EQ (coding_type, Qiso_2022))
{
Lisp_Object initial, reg_usage, request, flags;
- struct charset *charset;
int i, id;
if (nargs < coding_arg_iso2022_max)
make_number (nargs)));
}
+/* Fixme: should this record the alias relationships for
+ diagnostics? */
DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
Sdefine_coding_system_alias, 2, 2, 0,
doc: /* Define ALIAS as an alias for CODING-SYSTEM. */)
}
Fputhash (alias, spec, Vcoding_system_hash_table);
- Vcoding_system_alist = Fcons (Fcons (alias, Qnil), Vcoding_system_alist);
+ Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
+ Vcoding_system_alist);
return Qnil;
}
DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
1, 1, 0,
doc: /* Return the base of CODING-SYSTEM.
-Any alias or subsidiary coding systems are not base coding system. */)
+Any alias or subsidiary coding system is not a base coding system. */)
(coding_system)
Lisp_Object coding_system;
{
DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
1, 1, 0,
- doc: /* Return the list of aliases of CODING-SYSTEM.
-A base coding system is what made by `define-coding-system'.
-Any alias nor subsidiary coding systems are not base coding system. */)
+ doc: /* Return the list of aliases of CODING-SYSTEM. */)
(coding_system)
Lisp_Object coding_system;
{
if (NILP (coding_system))
coding_system = Qno_conversion;
CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
- return AREF (spec, 2);
+ return AREF (spec, 1);
}
DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
{
emacs_mule_bytes[i] = 1;
}
+ emacs_mule_bytes[LEADING_CODE_PRIVATE_11] = 3;
+ emacs_mule_bytes[LEADING_CODE_PRIVATE_12] = 3;
+ emacs_mule_bytes[LEADING_CODE_PRIVATE_21] = 4;
+ emacs_mule_bytes[LEADING_CODE_PRIVATE_22] = 4;
}
#ifdef emacs
DEFSYM (Qeol_type, "eol-type");
DEFSYM (Qunix, "unix");
DEFSYM (Qdos, "dos");
- DEFSYM (Qmac, "mac");
DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
DEFSYM (Qpost_read_conversion, "post-read-conversion");
DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
- DEFSYM (Qchar_coding_system, "char-coding-system");
-
- Fput (Qchar_coding_system, Qchar_table_extra_slots, make_number (2));
-
DEFSYM (Qvalid_codes, "valid-codes");
DEFSYM (Qemacs_mule, "emacs-mule");
The default value is `select-safe-coding-system' (which see). */);
Vselect_safe_coding_system_function = Qnil;
- DEFVAR_LISP ("char-coding-system-table", &Vchar_coding_system_table,
- doc: /*
-Char-table containing safe coding systems of each characters.
-Each element doesn't include such generic coding systems that can
-encode any characters. They are in the first extra slot. */);
- Vchar_coding_system_table = Fmake_char_table (Qchar_coding_system, Qnil);
-
DEFVAR_BOOL ("inhibit-iso-escape-detection",
&inhibit_iso_escape_detection,
doc: /*