2. Emacs' internal format (emacs-mule) handlers
3. ISO2022 handlers
4. Shift-JIS and BIG5 handlers
- 5. End-of-line handlers
- 6. C library functions
- 7. Emacs Lisp library functions
- 8. Post-amble
+ 5. CCL handlers
+ 6. End-of-line handlers
+ 7. C library functions
+ 8. Emacs Lisp library functions
+ 9. Post-amble
*/
Lisp_Object Qno_conversion, Qundecided;
Lisp_Object Qcoding_system_history;
Lisp_Object Qsafe_charsets;
+Lisp_Object Qvalid_codes;
extern Lisp_Object Qinsert_file_contents, Qwrite_region;
Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
Lisp_Object Vlast_coding_system_used;
/* A vector of length 256 which contains information about special
- Latin codes (espepcially for dealing with Microsoft code). */
+ Latin codes (especially for dealing with Microsoft codes). */
Lisp_Object Vlatin_extra_code_table;
/* Flag to inhibit code conversion of end-of-line format. */
/* Coding system of what is sent from terminal keyboard. */
struct coding_system keyboard_coding;
+/* Default coding system to be used to write a file. */
+struct coding_system default_buffer_file_coding;
+
Lisp_Object Vfile_coding_system_alist;
Lisp_Object Vprocess_coding_system_alist;
Lisp_Object Vnetwork_coding_system_alist;
"coding-category-iso-8-2",
"coding-category-iso-7-else",
"coding-category-iso-8-else",
+ "coding-category-ccl",
"coding-category-big5",
"coding-category-raw-text",
"coding-category-binary"
};
-/* Table pointers to coding systems corresponding to each coding
+/* Table of pointers to coding systems corresponding to each coding
categories. */
struct coding_system *coding_system_table[CODING_CATEGORY_IDX_MAX];
-/* Flag to tell if we look up unification table on character code
+/* Table of coding category masks. Nth element is a mask for a coding
+ cateogry of which priority is Nth. */
+static
+int coding_priorities[CODING_CATEGORY_IDX_MAX];
+
+/* Flag to tell if we look up translation table on character code
conversion. */
-Lisp_Object Venable_character_unification;
-/* Standard unification table to look up on decoding (reading). */
-Lisp_Object Vstandard_character_unification_table_for_decode;
-/* Standard unification table to look up on encoding (writing). */
-Lisp_Object Vstandard_character_unification_table_for_encode;
+Lisp_Object Venable_character_translation;
+/* Standard translation table to look up on decoding (reading). */
+Lisp_Object Vstandard_translation_table_for_decode;
+/* Standard translation table to look up on encoding (writing). */
+Lisp_Object Vstandard_translation_table_for_encode;
-Lisp_Object Qcharacter_unification_table;
-Lisp_Object Qcharacter_unification_table_for_decode;
-Lisp_Object Qcharacter_unification_table_for_encode;
+Lisp_Object Qtranslation_table;
+Lisp_Object Qtranslation_table_id;
+Lisp_Object Qtranslation_table_for_decode;
+Lisp_Object Qtranslation_table_for_encode;
/* Alist of charsets vs revision number. */
Lisp_Object Vcharset_revision_alist;
enum iso_code_class_type iso_code_class[256];
-#define CHARSET_OK(idx, charset) \
- (coding_system_table[idx]->safe_charsets[charset] \
- || (CODING_SPEC_ISO_REQUESTED_DESIGNATION \
- (coding_system_table[idx], charset) \
- != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
+#define CHARSET_OK(idx, charset) \
+ (coding_system_table[idx] \
+ && (coding_system_table[idx]->safe_charsets[charset] \
+ || (CODING_SPEC_ISO_REQUESTED_DESIGNATION \
+ (coding_system_table[idx], charset) \
+ != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)))
#define SHIFT_OUT_OK(idx) \
(CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)
{
int mask = CODING_CATEGORY_MASK_ISO;
int mask_found = 0;
- int reg[4], shift_out = 0;
+ int reg[4], shift_out = 0, single_shifting = 0;
int c, c1, i, charset;
reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
switch (c)
{
case ISO_CODE_ESC:
+ single_shifting = 0;
if (src >= src_end)
break;
c = *src++;
break;
case ISO_CODE_SO:
+ single_shifting = 0;
if (shift_out == 0
&& (reg[1] >= 0
|| SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE)
break;
case ISO_CODE_SI:
+ single_shifting = 0;
if (shift_out == 1)
{
/* Locking shift in. */
break;
case ISO_CODE_CSI:
+ single_shifting = 0;
case ISO_CODE_SS2:
case ISO_CODE_SS3:
{
if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
& CODING_FLAG_ISO_SINGLE_SHIFT)
newmask |= CODING_CATEGORY_MASK_ISO_8_2;
+ single_shifting = 1;
}
if (VECTORP (Vlatin_extra_code_table)
&& !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
default:
if (c < 0x80)
- break;
+ {
+ single_shifting = 0;
+ break;
+ }
else if (c < 0xA0)
{
+ single_shifting = 0;
if (VECTORP (Vlatin_extra_code_table)
&& !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
{
mask &= ~(CODING_CATEGORY_MASK_ISO_7BIT
| CODING_CATEGORY_MASK_ISO_7_ELSE);
mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
- while (src < src_end && *src >= 0xA0)
- src++;
- if ((src - src_begin - 1) & 1 && src < src_end)
- mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
- else
- mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
+ /* Check the length of succeeding codes of the range
+ 0xA0..0FF. If the byte length is odd, we exclude
+ CODING_CATEGORY_MASK_ISO_8_2. We can check this only
+ when we are not single shifting. */
+ if (!single_shifting)
+ {
+ while (src < src_end && *src >= 0xA0)
+ src++;
+ if ((src - src_begin - 1) & 1 && src < src_end)
+ mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
+ else
+ mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
+ }
}
break;
}
*dst++ = 0xFF; \
coding->composing += 2; \
} \
- if ((charset) >= 0) \
+ if (charset_alt >= 0) \
{ \
- if (CHARSET_DIMENSION (charset) == 2) \
+ if (CHARSET_DIMENSION (charset_alt) == 2) \
{ \
ONE_MORE_BYTE (c2); \
if (iso_code_class[(c2) & 0x7F] != ISO_0x20_or_0x7F \
&& iso_code_class[(c2) & 0x7F] != ISO_graphic_plane_0) \
{ \
src--; \
- c2 = ' '; \
+ charset_alt = CHARSET_ASCII; \
} \
} \
- if (!NILP (unification_table) \
- && ((c_alt = unify_char (unification_table, \
- -1, (charset), c1, c2)) >= 0)) \
+ if (!NILP (translation_table) \
+ && ((c_alt = translate_char (translation_table, \
+ -1, charset_alt, c1, c2)) >= 0)) \
SPLIT_CHAR (c_alt, charset_alt, c1, c2); \
} \
if (charset_alt == CHARSET_ASCII || charset_alt < 0) \
Else, if it contains only valid codes, return 0.
Else return the length of the composing sequence. */
-int check_composing_code (coding, src, src_end)
+int
+check_composing_code (coding, src, src_end)
struct coding_system *coding;
unsigned char *src, *src_end;
{
invalid_code_found = 1;
}
}
- return ((coding->mode & CODING_MODE_LAST_BLOCK) ? src_end - src_start : -1);
+ return (invalid_code_found
+ ? src - src_start
+ : (coding->mode & CODING_MODE_LAST_BLOCK ? 0 : -1));
}
/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
/* Charsets invoked to graphic plane 0 and 1 respectively. */
int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
- Lisp_Object unification_table
- = coding->character_unification_table_for_decode;
+ Lisp_Object translation_table
+ = coding->translation_table_for_decode;
int result = CODING_FINISH_NORMAL;
- if (!NILP (Venable_character_unification) && NILP (unification_table))
- unification_table = Vstandard_character_unification_table_for_decode;
+ if (!NILP (Venable_character_translation) && NILP (translation_table))
+ translation_table = Vstandard_translation_table_for_decode;
coding->produced_char = 0;
coding->fake_multibyte = 0;
result1 = check_composing_code (coding, src, src_end);
if (result1 == 0)
- coding->composing = (c1 == '0'
- ? COMPOSING_NO_RULE_HEAD
- : COMPOSING_WITH_RULE_HEAD);
+ {
+ coding->composing = (c1 == '0'
+ ? COMPOSING_NO_RULE_HEAD
+ : COMPOSING_WITH_RULE_HEAD);
+ coding->produced_char++;
+ }
else if (result1 > 0)
{
if (result1 + 2 < (dst_bytes ? dst_end : src_base) - dst)
case '1': /* end composing */
coding->composing = COMPOSING_NO;
- coding->produced_char++;
break;
case '[': /* specification of direction */
dst = encode_invocation_designation (charset, coding, dst); \
} while (1)
-#define ENCODE_ISO_CHARACTER(charset, c1, c2) \
- do { \
- int c_alt, charset_alt; \
- if (!NILP (unification_table) \
- && ((c_alt = unify_char (unification_table, -1, charset, c1, c2)) \
- >= 0)) \
- SPLIT_CHAR (c_alt, charset_alt, c1, c2); \
- else \
- charset_alt = charset; \
- if (CHARSET_DIMENSION (charset_alt) == 1) \
- { \
- if (charset == CHARSET_ASCII \
- && coding->flags & CODING_FLAG_ISO_USE_ROMAN) \
- charset_alt = charset_latin_jisx0201; \
- ENCODE_ISO_CHARACTER_DIMENSION1 (charset_alt, c1); \
- } \
- else \
- { \
- if (charset == charset_jisx0208 \
- && coding->flags & CODING_FLAG_ISO_USE_OLDJIS) \
- charset_alt = charset_jisx0208_1978; \
- ENCODE_ISO_CHARACTER_DIMENSION2 (charset_alt, c1, c2); \
- } \
- if (! COMPOSING_P (coding->composing)) \
- coding->consumed_char++; \
- } while (0)
+#define ENCODE_ISO_CHARACTER(charset, c1, c2) \
+ do { \
+ int c_alt, charset_alt; \
+ if (!NILP (translation_table) \
+ && ((c_alt = translate_char (translation_table, -1, \
+ charset, c1, c2)) \
+ >= 0)) \
+ SPLIT_CHAR (c_alt, charset_alt, c1, c2); \
+ else \
+ charset_alt = charset; \
+ if (CHARSET_DIMENSION (charset_alt) == 1) \
+ { \
+ if (charset == CHARSET_ASCII \
+ && coding->flags & CODING_FLAG_ISO_USE_ROMAN) \
+ charset_alt = charset_latin_jisx0201; \
+ ENCODE_ISO_CHARACTER_DIMENSION1 (charset_alt, c1); \
+ } \
+ else \
+ { \
+ if (charset == charset_jisx0208 \
+ && coding->flags & CODING_FLAG_ISO_USE_OLDJIS) \
+ charset_alt = charset_jisx0208_1978; \
+ ENCODE_ISO_CHARACTER_DIMENSION2 (charset_alt, c1, c2); \
+ } \
+ if (! COMPOSING_P (coding->composing)) \
+ coding->consumed_char++; \
+ } while (0)
/* Produce designation and invocation codes at a place pointed by DST
to use CHARSET. The element `spec.iso2022' of *CODING is updated.
unsigned char c1, c2;
SPLIT_STRING(src, bytes, charset, c1, c2);
- if ((c_alt = unify_char (table, -1, charset, c1, c2)) >= 0)
+ if ((c_alt = translate_char (table, -1, charset, c1, c2)) >= 0)
charset = CHAR_CHARSET (c_alt);
}
from DST_END to assure overflow checking is necessary only at the
head of loop. */
unsigned char *adjusted_dst_end = dst_end - 19;
- Lisp_Object unification_table
- = coding->character_unification_table_for_encode;
+ Lisp_Object translation_table
+ = coding->translation_table_for_encode;
int result = CODING_FINISH_NORMAL;
- if (!NILP (Venable_character_unification) && NILP (unification_table))
- unification_table = Vstandard_character_unification_table_for_encode;
+ if (!NILP (Venable_character_translation) && NILP (translation_table))
+ translation_table = Vstandard_translation_table_for_encode;
coding->consumed_char = 0;
coding->fake_multibyte = 0;
&& CODING_SPEC_ISO_BOL (coding))
{
/* We have to produce designation sequences if any now. */
- encode_designation_at_bol (coding, unification_table,
+ encode_designation_at_bol (coding, translation_table,
src, src_end, &dst);
CODING_SPEC_ISO_BOL (coding) = 0;
}
{
/* invalid sequence */
*dst++ = c1;
- *dst++ = c2;
- coding->consumed_char += 2;
+ src--;
+ coding->consumed_char++;
}
else
ENCODE_ISO_CHARACTER (c1, c2, /* dummy */ c3);
{
/* invalid sequence */
*dst++ = c1;
- *dst++ = c2;
- *dst++ = c3;
- coding->consumed_char += 3;
+ src -= 2;
+ coding->consumed_char++;
}
else if (c1 < LEADING_CODE_PRIVATE_11)
ENCODE_ISO_CHARACTER (c1, c2, c3);
{
/* invalid sequence */
*dst++ = c1;
- *dst++ = c2;
- *dst++ = c3;
- *dst++ = c4;
- coding->consumed_char += 4;
+ src -= 3;
+ coding->consumed_char++;
}
else
ENCODE_ISO_CHARACTER (c2, c3, c4);
{
/* invalid sequence */
*dst++ = c1;
- *dst++ = c2;
- coding->consumed_char += 2;
+ src--;
+ coding->consumed_char++;
}
else if (c2 == 0xFF)
{
break;
}
- if (src < src_end)
+ if (src < src_end && result == CODING_FINISH_NORMAL)
+ result = CODING_FINISH_INSUFFICIENT_DST;
+
+ /* If this is the last block of the text to be encoded, we must
+ reset graphic planes and registers to the initial state, and
+ flush out the carryover if any. */
+ if (coding->mode & CODING_MODE_LAST_BLOCK)
{
- if (result == CODING_FINISH_NORMAL)
- result = CODING_FINISH_INSUFFICIENT_DST;
- else
- /* If this is the last block of the text to be encoded, we
- must reset graphic planes and registers to the initial
- state, and flush out the carryover if any. */
- if (coding->mode & CODING_MODE_LAST_BLOCK)
- ENCODE_RESET_PLANE_AND_REGISTER;
+ ENCODE_RESET_PLANE_AND_REGISTER;
+ if (COMPOSING_P (coding->composing))
+ ENCODE_COMPOSITION_END;
}
-
coding->consumed = src - source;
coding->produced = coding->produced_char = dst - destination;
return result;
(character set) (range)
ASCII 0x00 .. 0x7F
KATAKANA-JISX0201 0xA0 .. 0xDF
- JISX0208 (1st byte) 0x80 .. 0x9F and 0xE0 .. 0xFF
+ JISX0208 (1st byte) 0x80 .. 0x9F and 0xE0 .. 0xEF
(2nd byte) 0x40 .. 0xFF
-------------------------------
#define DECODE_SJIS_BIG5_CHARACTER(charset, c1, c2) \
do { \
int c_alt, charset_alt = (charset); \
- if (!NILP (unification_table) \
- && ((c_alt = unify_char (unification_table, \
- -1, (charset), c1, c2)) >= 0)) \
+ if (!NILP (translation_table) \
+ && ((c_alt = translate_char (translation_table, \
+ -1, (charset), c1, c2)) >= 0)) \
SPLIT_CHAR (c_alt, charset_alt, c1, c2); \
if (charset_alt == CHARSET_ASCII || charset_alt < 0) \
DECODE_CHARACTER_ASCII (c1); \
DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2); \
} while (0)
-#define ENCODE_SJIS_BIG5_CHARACTER(charset, c1, c2) \
- do { \
- int c_alt, charset_alt; \
- if (!NILP (unification_table) \
- && ((c_alt = unify_char (unification_table, -1, charset, c1, c2)) \
- >= 0)) \
- SPLIT_CHAR (c_alt, charset_alt, c1, c2); \
- else \
- charset_alt = charset; \
- if (charset_alt == charset_ascii) \
- *dst++ = c1; \
- else if (CHARSET_DIMENSION (charset_alt) == 1) \
- { \
- if (sjis_p && charset_alt == charset_katakana_jisx0201) \
- *dst++ = c1; \
- else \
- { \
- *dst++ = charset_alt, *dst++ = c1; \
- coding->fake_multibyte = 1; \
- } \
- } \
- else \
- { \
- c1 &= 0x7F, c2 &= 0x7F; \
- if (sjis_p && charset_alt == charset_jisx0208) \
- { \
- unsigned char s1, s2; \
- \
- ENCODE_SJIS (c1, c2, s1, s2); \
- *dst++ = s1, *dst++ = s2; \
- coding->fake_multibyte = 1; \
- } \
- else if (!sjis_p \
- && (charset_alt == charset_big5_1 \
- || charset_alt == charset_big5_2)) \
- { \
- unsigned char b1, b2; \
- \
- ENCODE_BIG5 (charset_alt, c1, c2, b1, b2); \
- *dst++ = b1, *dst++ = b2; \
- } \
- else \
- { \
- *dst++ = charset_alt, *dst++ = c1, *dst++ = c2; \
- coding->fake_multibyte = 1; \
- } \
- } \
- coding->consumed_char++; \
+#define ENCODE_SJIS_BIG5_CHARACTER(charset, c1, c2) \
+ do { \
+ int c_alt, charset_alt; \
+ if (!NILP (translation_table) \
+ && ((c_alt = translate_char (translation_table, -1, \
+ charset, c1, c2)) \
+ >= 0)) \
+ SPLIT_CHAR (c_alt, charset_alt, c1, c2); \
+ else \
+ charset_alt = charset; \
+ if (charset_alt == charset_ascii) \
+ *dst++ = c1; \
+ else if (CHARSET_DIMENSION (charset_alt) == 1) \
+ { \
+ if (sjis_p && charset_alt == charset_katakana_jisx0201) \
+ *dst++ = c1; \
+ else \
+ { \
+ *dst++ = charset_alt, *dst++ = c1; \
+ coding->fake_multibyte = 1; \
+ } \
+ } \
+ else \
+ { \
+ c1 &= 0x7F, c2 &= 0x7F; \
+ if (sjis_p && charset_alt == charset_jisx0208) \
+ { \
+ unsigned char s1, s2; \
+ \
+ ENCODE_SJIS (c1, c2, s1, s2); \
+ *dst++ = s1, *dst++ = s2; \
+ coding->fake_multibyte = 1; \
+ } \
+ else if (!sjis_p \
+ && (charset_alt == charset_big5_1 \
+ || charset_alt == charset_big5_2)) \
+ { \
+ unsigned char b1, b2; \
+ \
+ ENCODE_BIG5 (charset_alt, c1, c2, b1, b2); \
+ *dst++ = b1, *dst++ = b2; \
+ } \
+ else \
+ { \
+ *dst++ = charset_alt, *dst++ = c1, *dst++ = c2; \
+ coding->fake_multibyte = 1; \
+ } \
+ } \
+ coding->consumed_char++; \
} while (0);
/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
from DST_END to assure overflow checking is necessary only at the
head of loop. */
unsigned char *adjusted_dst_end = dst_end - 3;
- Lisp_Object unification_table
- = coding->character_unification_table_for_decode;
+ Lisp_Object translation_table
+ = coding->translation_table_for_decode;
int result = CODING_FINISH_NORMAL;
- if (!NILP (Venable_character_unification) && NILP (unification_table))
- unification_table = Vstandard_character_unification_table_for_decode;
+ if (!NILP (Venable_character_translation) && NILP (translation_table))
+ translation_table = Vstandard_translation_table_for_decode;
coding->produced_char = 0;
coding->fake_multibyte = 0;
}
else if (c1 < 0x80)
DECODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2);
- else if (c1 < 0xA0)
+ else
{
- /* SJIS -> JISX0208 */
if (sjis_p)
{
- ONE_MORE_BYTE (c2);
- if (c2 >= 0x40)
+ if (c1 < 0xA0 || (c1 >= 0xE0 && c1 < 0xF0))
{
- DECODE_SJIS (c1, c2, c3, c4);
- DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4);
+ /* SJIS -> JISX0208 */
+ ONE_MORE_BYTE (c2);
+ if (c2 >= 0x40)
+ {
+ DECODE_SJIS (c1, c2, c3, c4);
+ DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4);
+ }
+ else
+ goto label_invalid_code_2;
}
+ else if (c1 < 0xE0)
+ /* SJIS -> JISX0201-Kana */
+ DECODE_SJIS_BIG5_CHARACTER (charset_katakana_jisx0201, c1,
+ /* dummy */ c2);
else
- goto label_invalid_code_2;
+ goto label_invalid_code_1;
}
else
- goto label_invalid_code_1;
- }
- else if (c1 < 0xE0)
- {
- /* SJIS -> JISX0201-Kana, BIG5 -> Big5 */
- if (sjis_p)
- DECODE_SJIS_BIG5_CHARACTER (charset_katakana_jisx0201, c1,
- /* dummy */ c2);
- else
- {
- int charset;
-
- ONE_MORE_BYTE (c2);
- if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE))
- {
- DECODE_BIG5 (c1, c2, charset, c3, c4);
- DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4);
- }
- else
- goto label_invalid_code_2;
- }
- }
- else /* C1 >= 0xE0 */
- {
- /* SJIS -> JISX0208, BIG5 -> Big5 */
- if (sjis_p)
{
- ONE_MORE_BYTE (c2);
- if (c2 >= 0x40)
+ /* BIG5 -> Big5 */
+ if (c1 >= 0xA1 && c1 <= 0xFE)
{
- DECODE_SJIS (c1, c2, c3, c4);
- DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4);
- }
- else
- goto label_invalid_code_2;
- }
- else
- {
- int charset;
+ ONE_MORE_BYTE (c2);
+ if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE))
+ {
+ int charset;
- ONE_MORE_BYTE (c2);
- if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE))
- {
- DECODE_BIG5 (c1, c2, charset, c3, c4);
- DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4);
+ DECODE_BIG5 (c1, c2, charset, c3, c4);
+ DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4);
+ }
+ else
+ goto label_invalid_code_2;
}
else
- goto label_invalid_code_2;
+ goto label_invalid_code_1;
}
}
continue;
from DST_END to assure overflow checking is necessary only at the
head of loop. */
unsigned char *adjusted_dst_end = dst_end - 1;
- Lisp_Object unification_table
- = coding->character_unification_table_for_encode;
+ Lisp_Object translation_table
+ = coding->translation_table_for_encode;
int result = CODING_FINISH_NORMAL;
- if (!NILP (Venable_character_unification) && NILP (unification_table))
- unification_table = Vstandard_character_unification_table_for_encode;
+ if (!NILP (Venable_character_translation) && NILP (translation_table))
+ translation_table = Vstandard_translation_table_for_encode;
coding->consumed_char = 0;
coding->fake_multibyte = 0;
}
\f
-/*** 5. End-of-line handlers ***/
+/*** 5. CCL handlers ***/
+
+/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
+ Check if a text is encoded in a coding system of which
+ encoder/decoder are written in CCL program. If it is, return
+ CODING_CATEGORY_MASK_CCL, else return 0. */
+
+int
+detect_coding_ccl (src, src_end)
+ unsigned char *src, *src_end;
+{
+ unsigned char *valid;
+
+ /* No coding system is assigned to coding-category-ccl. */
+ if (!coding_system_table[CODING_CATEGORY_IDX_CCL])
+ return 0;
+
+ valid = coding_system_table[CODING_CATEGORY_IDX_CCL]->spec.ccl.valid_codes;
+ while (src < src_end)
+ {
+ if (! valid[*src]) return 0;
+ src++;
+ }
+ return CODING_CATEGORY_MASK_CCL;
+}
+
+\f
+/*** 6. End-of-line handlers ***/
/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
This function is called only when `coding->eol_type' is
else
safe_bcopy (source, destination, src_bytes);
src += src_bytes;
- dst += dst_bytes;
+ dst += src_bytes;
coding->fake_multibyte = 1;
break;
}
if (dst_bytes)
bcopy (source, destination, src_bytes);
else
- {
- safe_bcopy (source, destination, src_bytes);
- dst_bytes = src_bytes;
- }
- if (coding->eol_type == CODING_EOL_CRLF)
+ safe_bcopy (source, destination, src_bytes);
+ dst_bytes = src_bytes;
+ if (coding->eol_type == CODING_EOL_CR)
{
while (src_bytes--)
{
if ((c = *dst++) == '\n')
dst[-1] = '\r';
else if (BASE_LEADING_CODE_P (c))
- coding->fake_multibyte = 1;
+ coding->fake_multibyte = 1;
}
}
else
}
\f
-/*** 6. C library functions ***/
+/*** 7. C library functions ***/
/* In Emacs Lisp, coding system is represented by a Lisp symbol which
has a property `coding-system'. The value of this property is a
/* Initialize remaining fields. */
coding->composing = 0;
- coding->character_unification_table_for_decode = Qnil;
- coding->character_unification_table_for_encode = Qnil;
+ coding->translation_table_for_decode = Qnil;
+ coding->translation_table_for_encode = Qnil;
/* Get values of coding system properties:
`post-read-conversion', `pre-write-conversion',
- `character-unification-table-for-decode',
- `character-unification-table-for-encode'. */
+ `translation-table-for-decode', `translation-table-for-encode'. */
plist = XVECTOR (coding_spec)->contents[3];
coding->post_read_conversion = Fplist_get (plist, Qpost_read_conversion);
coding->pre_write_conversion = Fplist_get (plist, Qpre_write_conversion);
- val = Fplist_get (plist, Qcharacter_unification_table_for_decode);
+ val = Fplist_get (plist, Qtranslation_table_for_decode);
if (SYMBOLP (val))
- val = Fget (val, Qcharacter_unification_table_for_decode);
- coding->character_unification_table_for_decode
- = CHAR_TABLE_P (val) ? val : Qnil;
- val = Fplist_get (plist, Qcharacter_unification_table_for_encode);
+ val = Fget (val, Qtranslation_table_for_decode);
+ coding->translation_table_for_decode = CHAR_TABLE_P (val) ? val : Qnil;
+ val = Fplist_get (plist, Qtranslation_table_for_encode);
if (SYMBOLP (val))
- val = Fget (val, Qcharacter_unification_table_for_encode);
- coding->character_unification_table_for_encode
- = CHAR_TABLE_P (val) ? val : Qnil;
+ val = Fget (val, Qtranslation_table_for_encode);
+ coding->translation_table_for_encode = CHAR_TABLE_P (val) ? val : Qnil;
val = Fplist_get (plist, Qcoding_category);
if (!NILP (val))
{
}
else if (CONSP (flags[i]))
{
- Lisp_Object tail = flags[i];
+ Lisp_Object tail;
+ tail = flags[i];
coding->flags |= CODING_FLAG_ISO_DESIGNATION;
if (INTEGERP (XCONS (tail)->car)
coding->common_flags
|= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
{
- Lisp_Object val = XVECTOR (coding_spec)->contents[4];
+ Lisp_Object val;
+ Lisp_Object decoder, encoder;
+
+ val = XVECTOR (coding_spec)->contents[4];
if (CONSP (val)
- && VECTORP (XCONS (val)->car)
- && VECTORP (XCONS (val)->cdr))
+ && SYMBOLP (XCONS (val)->car)
+ && !NILP (decoder = Fget (XCONS (val)->car, Qccl_program_idx))
+ && !NILP (decoder = Fcdr (Faref (Vccl_program_table, decoder)))
+ && SYMBOLP (XCONS (val)->cdr)
+ && !NILP (encoder = Fget (XCONS (val)->cdr, Qccl_program_idx))
+ && !NILP (encoder = Fcdr (Faref (Vccl_program_table, encoder))))
{
- setup_ccl_program (&(coding->spec.ccl.decoder), XCONS (val)->car);
- setup_ccl_program (&(coding->spec.ccl.encoder), XCONS (val)->cdr);
+ setup_ccl_program (&(coding->spec.ccl.decoder), decoder);
+ setup_ccl_program (&(coding->spec.ccl.encoder), encoder);
}
else
goto label_invalid_coding_system;
+
+ bzero (coding->spec.ccl.valid_codes, 256);
+ val = Fplist_get (plist, Qvalid_codes);
+ if (CONSP (val))
+ {
+ Lisp_Object this;
+
+ for (; CONSP (val); val = XCONS (val)->cdr)
+ {
+ this = XCONS (val)->car;
+ if (INTEGERP (this)
+ && XINT (this) >= 0 && XINT (this) < 256)
+ coding->spec.ccl.valid_codes[XINT (this)] = 1;
+ else if (CONSP (this)
+ && INTEGERP (XCONS (this)->car)
+ && INTEGERP (XCONS (this)->cdr))
+ {
+ int start = XINT (XCONS (this)->car);
+ int end = XINT (XCONS (this)->cdr);
+
+ if (start >= 0 && start <= end && end < 256)
+ while (start < end)
+ coding->spec.ccl.valid_codes[start++] = 1;
+ }
+ }
+ }
}
coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
break;
return -1;
}
+/* Setup raw-text or one of its subsidiaries in the structure
+ coding_system CODING according to the already setup value eol_type
+ in CODING. CODING should be setup for some coding system in
+ advance. */
+
+void
+setup_raw_text_coding_system (coding)
+ struct coding_system *coding;
+{
+ if (coding->type != coding_type_raw_text)
+ {
+ coding->symbol = Qraw_text;
+ coding->type = coding_type_raw_text;
+ if (coding->eol_type != CODING_EOL_UNDECIDED)
+ {
+ Lisp_Object subsidiaries;
+ subsidiaries = Fget (Qraw_text, Qeol_type);
+
+ if (VECTORP (subsidiaries)
+ && XVECTOR (subsidiaries)->size == 3)
+ coding->symbol
+ = XVECTOR (subsidiaries)->contents[coding->eol_type];
+ }
+ }
+ return;
+}
+
/* Emacs has a mechanism to automatically detect a coding system if it
is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But,
it's impossible to distinguish some coding systems accurately
as BIG5. Assigned the coding-system (Lisp symbol)
`cn-big5' by default.
+ o coding-category-ccl
+
+ The category for a coding system of which encoder/decoder is
+ written in CCL programs. The default value is nil, i.e., no
+ coding system is assigned.
+
o coding-category-binary
The category for a coding system not categorized in any of the
*/
+static
+int ascii_skip_code[256];
+
/* Detect how a text of length SRC_BYTES pointed by SOURCE is encoded.
If it detects possible coding systems, return an integer in which
appropriate flag bits are set. Flag bits are defined by macros
{
register unsigned char c;
unsigned char *src = source, *src_end = source + src_bytes;
- unsigned int mask = (CODING_CATEGORY_MASK_ISO_7BIT
- | CODING_CATEGORY_MASK_ISO_SHIFT);
+ unsigned int mask;
int i;
/* At first, skip all ASCII characters and control characters except
for three ISO2022 specific control characters. */
+ ascii_skip_code[ISO_CODE_SO] = 0;
+ ascii_skip_code[ISO_CODE_SI] = 0;
+ ascii_skip_code[ISO_CODE_ESC] = 0;
+
label_loop_detect_coding:
- while (src < src_end)
- {
- c = *src;
- if (c >= 0x80
- || ((mask & CODING_CATEGORY_MASK_ISO_7BIT)
- && c == ISO_CODE_ESC)
- || ((mask & CODING_CATEGORY_MASK_ISO_SHIFT)
- && (c == ISO_CODE_SI || c == ISO_CODE_SO)))
- break;
- src++;
- }
+ while (src < src_end && ascii_skip_code[*src]) src++;
*skip = src - source;
if (src >= src_end)
/* We found nothing other than ASCII. There's nothing to do. */
return 0;
+ c = *src;
/* The text seems to be encoded in some multilingual coding system.
Now, try to find in which coding system the text is encoded. */
if (c < 0x80)
{
/* No valid ISO2022 code follows C. Try again. */
src++;
- mask = (c != ISO_CODE_ESC
- ? CODING_CATEGORY_MASK_ISO_7BIT
- : CODING_CATEGORY_MASK_ISO_SHIFT);
+ if (c == ISO_CODE_ESC)
+ ascii_skip_code[ISO_CODE_ESC] = 1;
+ else
+ ascii_skip_code[ISO_CODE_SO] = ascii_skip_code[ISO_CODE_SI] = 1;
goto label_loop_detect_coding;
}
if (priorities)
| CODING_CATEGORY_MASK_SJIS
| CODING_CATEGORY_MASK_BIG5);
+ /* Or, we may have to consider the possibility of CCL. */
+ if (coding_system_table[CODING_CATEGORY_IDX_CCL]
+ && (coding_system_table[CODING_CATEGORY_IDX_CCL]
+ ->spec.ccl.valid_codes)[c])
+ try |= CODING_CATEGORY_MASK_CCL;
+
mask = 0;
if (priorities)
{
for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
{
- priorities[i] &= try;
- if (priorities[i] & CODING_CATEGORY_MASK_ISO)
+ if (priorities[i] & try & CODING_CATEGORY_MASK_ISO)
mask = detect_coding_iso2022 (src, src_end);
- else if (priorities[i] & CODING_CATEGORY_MASK_SJIS)
+ else if (priorities[i] & try & CODING_CATEGORY_MASK_SJIS)
mask = detect_coding_sjis (src, src_end);
- else if (priorities[i] & CODING_CATEGORY_MASK_BIG5)
+ else if (priorities[i] & try & CODING_CATEGORY_MASK_BIG5)
mask = detect_coding_big5 (src, src_end);
- else if (priorities[i] & CODING_CATEGORY_MASK_EMACS_MULE)
+ else if (priorities[i] & try & CODING_CATEGORY_MASK_EMACS_MULE)
mask = detect_coding_emacs_mule (src, src_end);
+ else if (priorities[i] & try & CODING_CATEGORY_MASK_CCL)
+ mask = detect_coding_ccl (src, src_end);
+ else if (priorities[i] & CODING_CATEGORY_MASK_RAW_TEXT)
+ mask = CODING_CATEGORY_MASK_RAW_TEXT;
+ else if (priorities[i] & CODING_CATEGORY_MASK_BINARY)
+ mask = CODING_CATEGORY_MASK_BINARY;
if (mask)
goto label_return_highest_only;
}
if (try & CODING_CATEGORY_MASK_BIG5)
mask |= detect_coding_big5 (src, src_end);
if (try & CODING_CATEGORY_MASK_EMACS_MULE)
- mask |= detect_coding_emacs_mule (src, src_end);
+ mask |= detect_coding_emacs_mule (src, src_end);
+ if (try & CODING_CATEGORY_MASK_CCL)
+ mask |= detect_coding_ccl (src, src_end);
}
- return (mask | CODING_CATEGORY_MASK_RAW_TEXT);
+ return (mask | CODING_CATEGORY_MASK_RAW_TEXT | CODING_CATEGORY_MASK_BINARY);
label_return_highest_only:
for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
{
unsigned int idx;
int skip, mask, i;
- int priorities[CODING_CATEGORY_IDX_MAX];
- Lisp_Object val = Vcoding_category_list;
-
- i = 0;
- while (CONSP (val) && i < CODING_CATEGORY_IDX_MAX)
- {
- if (! SYMBOLP (XCONS (val)->car))
- break;
- idx = XFASTINT (Fget (XCONS (val)->car, Qcoding_category_index));
- if (idx >= CODING_CATEGORY_IDX_MAX)
- break;
- priorities[i++] = (1 << idx);
- val = XCONS (val)->cdr;
- }
- /* If coding-category-list is valid and contains all coding
- categories, `i' should be CODING_CATEGORY_IDX_MAX now. If not,
- the following code saves Emacs from craching. */
- while (i < CODING_CATEGORY_IDX_MAX)
- priorities[i++] = CODING_CATEGORY_MASK_RAW_TEXT;
+ Lisp_Object val;
- mask = detect_coding_mask (src, src_bytes, priorities, &skip);
+ val = Vcoding_category_list;
+ mask = detect_coding_mask (src, src_bytes, coding_priorities, &skip);
coding->heading_ascii = skip;
if (!mask) return;
if (coding->eol_type != CODING_EOL_UNDECIDED)
{
- Lisp_Object tmp = Fget (val, Qeol_type);
+ Lisp_Object tmp;
+ tmp = Fget (val, Qeol_type);
if (VECTORP (tmp))
val = XVECTOR (tmp)->contents[coding->eol_type];
}
= encodep ? &coding->spec.ccl.encoder : &coding->spec.ccl.decoder;
int result;
+ if (encodep)
+ ccl->last_block = coding->mode & CODING_MODE_LAST_BLOCK;
+
coding->produced = ccl_driver (ccl, source, destination,
src_bytes, dst_bytes, &(coding->consumed));
if (encodep)
return;
}
- if (coding->heading_ascii >= 0)
+ eol_conversion = (coding->eol_type != CODING_EOL_LF);
+
+ if ((! eol_conversion) && (coding->heading_ascii >= 0))
/* Detection routine has already found how much we can skip at the
head. */
*beg += coding->heading_ascii;
endp_orig = endp = begp + *end - *beg;
}
- eol_conversion = (coding->eol_type != CODING_EOL_LF);
-
switch (coding->type)
{
case coding_type_emacs_mule:
{
if (coding->heading_ascii < 0)
while (begp < endp && *begp != '\r' && *begp < 0x80) begp++;
- while (begp < endp && *(endp - 1) != '\r' && *(endp - 1) < 0x80)
+ while (begp < endp && endp[-1] != '\r' && endp[-1] < 0x80)
endp--;
+ /* Do not consider LF as ascii if preceded by CR, since that
+ confuses eol decoding. */
+ if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
+ endp++;
}
else
begp = endp;
while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\r') endp--;
else
while (begp < endp && endp[-1] < 0x80) endp--;
+ /* Do not consider LF as ascii if preceded by CR, since that
+ confuses eol decoding. */
+ if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
+ endp++;
if (begp < endp && endp < endp_orig && endp[-1] >= 0x80)
endp++;
break;
while (begp < endp && (c = endp[-1]) < 0x80 && c != '\r') endp--;
else
while (begp < endp && endp[-1] < 0x80) endp--;
+ /* Do not consider LF as ascii if preceded by CR, since that
+ confuses eol decoding. */
+ if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
+ endp++;
break;
case CODING_CATEGORY_IDX_ISO_7:
while (begp < endp
&& (c = endp[-1]) < 0x80 && c != ISO_CODE_ESC)
endp--;
+ /* Do not consider LF as ascii if preceded by CR, since that
+ confuses eol decoding. */
+ if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
+ endp++;
if (begp < endp && endp[-1] == ISO_CODE_ESC)
{
if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
int len = to - from, len_byte = to_byte - from_byte;
int require, inserted, inserted_byte;
int head_skip, tail_skip, total_skip;
- Lisp_Object saved_coding_symbol = Qnil;
+ Lisp_Object saved_coding_symbol;
int multibyte = !NILP (current_buffer->enable_multibyte_characters);
int first = 1;
int fake_multibyte = 0;
unsigned char *src, *dst;
- Lisp_Object deletion = Qnil;
+ Lisp_Object deletion;
+
+ deletion = Qnil;
+ saved_coding_symbol = Qnil;
+
+ if (from < PT && PT < to)
+ SET_PT_BOTH (from, from_byte);
if (replace)
{
inserted += len_byte;
inserted_byte += len_byte;
while (len_byte--)
- *src++ = *dst++;
+ *dst++ = *src++;
fake_multibyte = 1;
break;
}
int from = 0, to = XSTRING (str)->size;
int to_byte = STRING_BYTES (XSTRING (str));
struct gcpro gcpro1;
- Lisp_Object saved_coding_symbol = Qnil;
+ Lisp_Object saved_coding_symbol;
int result;
+ saved_coding_symbol = Qnil;
if (encodep && !NILP (coding->pre_write_conversion)
|| !encodep && !NILP (coding->post_read_conversion))
{
if (encodep)
str = make_unibyte_string (buf, len + coding->produced);
else
- str = make_string_from_bytes (buf, len + coding->produced_char,
- len + coding->produced);
+ {
+ int chars= (coding->fake_multibyte
+ ? multibyte_chars_in_text (buf + from, coding->produced)
+ : coding->produced_char);
+ str = make_multibyte_string (buf, len + chars, len + coding->produced);
+ }
+
return str;
}
\f
#ifdef emacs
-/*** 7. Emacs Lisp library functions ***/
+/*** 8. Emacs Lisp library functions ***/
DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
"Return t if OBJECT is nil or a coding-system.\n\
coding_mask = detect_coding_mask (src, src_bytes, NULL, &dummy);
eol_type = detect_eol_type (src, src_bytes, &dummy);
if (eol_type == CODING_EOL_INCONSISTENT)
- eol_type == CODING_EOL_UNDECIDED;
+ eol_type = CODING_EOL_UNDECIDED;
if (!coding_mask)
{
if (VECTORP (val2))
val = XVECTOR (val2)->contents[eol_type];
}
- return val;
+ return (highest ? val : Fcons (val, Qnil));
}
/* At first, gather possible coding systems in VAL. */
if (!highest)
val = Fnreverse (val);
- /* Then, substitute the elements by subsidiary coding systems. */
+ /* Then, replace the elements with subsidiary coding systems. */
for (tmp = val; !NILP (tmp); tmp = XCONS (tmp)->cdr)
{
- if (eol_type != CODING_EOL_UNDECIDED)
+ if (eol_type != CODING_EOL_UNDECIDED
+ && eol_type != CODING_EOL_INCONSISTENT)
{
Lisp_Object eol;
eol = Fget (XCONS (tmp)->car, Qeol_type);
"Detect coding system of the text in the region between START and END.\n\
Return a list of possible coding systems ordered by priority.\n\
\n\
-If only ASCII characters are found, it returns `undecided'\n\
-or its subsidiary coding system according to a detected end-of-line format.\n\
+If only ASCII characters are found, it returns a list of single element\n\
+`undecided' or its subsidiary coding system according to a detected\n\
+end-of-line format.\n\
\n\
If optional argument HIGHEST is non-nil, return the coding system of\n\
highest priority.")
"Detect coding system of the text in STRING.\n\
Return a list of possible coding systems ordered by priority.\n\
\n\
-If only ASCII characters are found, it returns `undecided'\n\
-or its subsidiary coding system according to a detected end-of-line format.\n\
+If only ASCII characters are found, it returns a list of single element\n\
+`undecided' or its subsidiary coding system according to a detected\n\
+end-of-line format.\n\
\n\
If optional argument HIGHEST is non-nil, return the coding system of\n\
highest priority.")
coding.mode |= CODING_MODE_LAST_BLOCK;
code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to),
&coding, encodep, 1);
+ Vlast_coding_system_used = coding.symbol;
return make_number (coding.produced_char);
}
"Decode the current region by specified coding system.\n\
When called from a program, takes three arguments:\n\
START, END, and CODING-SYSTEM. START and END are buffer positions.\n\
-Return length of decoded text.")
+This function sets `last-coding-system-used' to the precise coding system\n\
+used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
+not fully specified.)\n\
+It returns the length of the decoded text.")
(start, end, coding_system)
Lisp_Object start, end, coding_system;
{
"Encode the current region by specified coding system.\n\
When called from a program, takes three arguments:\n\
START, END, and CODING-SYSTEM. START and END are buffer positions.\n\
-Return length of encoded text.")
+This function sets `last-coding-system-used' to the precise coding system\n\
+used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
+not fully specified.)\n\
+It returns the length of the encoded text.")
(start, end, coding_system)
Lisp_Object start, end, coding_system;
{
error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
coding.mode |= CODING_MODE_LAST_BLOCK;
+ Vlast_coding_system_used = coding.symbol;
return code_convert_string (string, &coding, encodep, !NILP (nocopy));
}
2, 3, 0,
"Decode STRING which is encoded in CODING-SYSTEM, and return the result.\n\
Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
-if the decoding operation is trivial.")
+if the decoding operation is trivial.\n\
+This function sets `last-coding-system-used' to the precise coding system\n\
+used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
+not fully specified.)")
(string, coding_system, nocopy)
Lisp_Object string, coding_system, nocopy;
{
- return code_convert_string1(string, coding_system, nocopy, 0);
+ return code_convert_string1 (string, coding_system, nocopy, 0);
}
DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
2, 3, 0,
"Encode STRING to CODING-SYSTEM, and return the result.\n\
Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
-if the encoding operation is trivial.")
+if the encoding operation is trivial.\n\
+This function sets `last-coding-system-used' to the precise coding system\n\
+used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
+not fully specified.)")
(string, coding_system, nocopy)
Lisp_Object string, coding_system, nocopy;
{
- return code_convert_string1(string, coding_system, nocopy, 1);
+ return code_convert_string1 (string, coding_system, nocopy, 1);
}
+/* Encode or decode STRING according to CODING_SYSTEM.
+ Do not set Vlast_coding_system_used. */
+
+Lisp_Object
+code_convert_string_norecord (string, coding_system, encodep)
+ Lisp_Object string, coding_system;
+ int encodep;
+{
+ struct coding_system coding;
+
+ CHECK_STRING (string, 0);
+ CHECK_SYMBOL (coding_system, 1);
+
+ if (NILP (coding_system))
+ return string;
+
+ if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
+ error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
+
+ coding.mode |= CODING_MODE_LAST_BLOCK;
+ return code_convert_string (string, &coding, encodep, Qt);
+}
\f
DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
"Decode a JISX0208 character of shift-jis encoding.\n\
return Qnil;
}
-DEFUN ("update-iso-coding-systems", Fupdate_iso_coding_systems,
- Supdate_iso_coding_systems, 0, 0, 0,
- "Update internal database for ISO2022 based coding systems.\n\
+DEFUN ("update-coding-systems-internal", Fupdate_coding_systems_internal,
+ Supdate_coding_systems_internal, 0, 0, 0,
+ "Update internal database for ISO2022 and CCL based coding systems.\n\
When values of the following coding categories are changed, you must\n\
call this function:\n\
coding-category-iso-7, coding-category-iso-7-tight,\n\
coding-category-iso-8-1, coding-category-iso-8-2,\n\
- coding-category-iso-7-else, coding-category-iso-8-else")
+ coding-category-iso-7-else, coding-category-iso-8-else,\n\
+ coding-category-ccl")
()
{
int i;
- for (i = CODING_CATEGORY_IDX_ISO_7; i <= CODING_CATEGORY_IDX_ISO_8_ELSE;
- i++)
+ for (i = CODING_CATEGORY_IDX_ISO_7; i <= CODING_CATEGORY_IDX_CCL; i++)
{
- if (! coding_system_table[i])
- coding_system_table[i]
- = (struct coding_system *) xmalloc (sizeof (struct coding_system));
- setup_coding_system
- (XSYMBOL (XVECTOR (Vcoding_category_table)->contents[i])->value,
- coding_system_table[i]);
+ Lisp_Object val;
+
+ val = XSYMBOL (XVECTOR (Vcoding_category_table)->contents[i])->value;
+ if (!NILP (val))
+ {
+ if (! coding_system_table[i])
+ coding_system_table[i] = ((struct coding_system *)
+ xmalloc (sizeof (struct coding_system)));
+ setup_coding_system (val, coding_system_table[i]);
+ }
+ else if (coding_system_table[i])
+ {
+ xfree (coding_system_table[i]);
+ coding_system_table[i] = NULL;
+ }
}
+
+ return Qnil;
+}
+
+DEFUN ("set-coding-priority-internal", Fset_coding_priority_internal,
+ Sset_coding_priority_internal, 0, 0, 0,
+ "Update internal database for the current value of `coding-category-list'.\n\
+This function is internal use only.")
+ ()
+{
+ int i = 0, idx;
+ Lisp_Object val;
+
+ val = Vcoding_category_list;
+
+ while (CONSP (val) && i < CODING_CATEGORY_IDX_MAX)
+ {
+ if (! SYMBOLP (XCONS (val)->car))
+ break;
+ idx = XFASTINT (Fget (XCONS (val)->car, Qcoding_category_index));
+ if (idx >= CODING_CATEGORY_IDX_MAX)
+ break;
+ coding_priorities[i++] = (1 << idx);
+ val = XCONS (val)->cdr;
+ }
+ /* If coding-category-list is valid and contains all coding
+ categories, `i' should be CODING_CATEGORY_IDX_MAX now. If not,
+ the following code saves Emacs from craching. */
+ while (i < CODING_CATEGORY_IDX_MAX)
+ coding_priorities[i++] = CODING_CATEGORY_MASK_RAW_TEXT;
+
return Qnil;
}
#endif /* emacs */
\f
-/*** 8. Post-amble ***/
+/*** 9. Post-amble ***/
+
+void
+init_coding ()
+{
+ conversion_buffer = (char *) xmalloc (MINIMUM_CONVERSION_BUFFER_SIZE);
+}
void
init_coding_once ()
iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
conversion_buffer_size = MINIMUM_CONVERSION_BUFFER_SIZE;
- conversion_buffer = (char *) xmalloc (MINIMUM_CONVERSION_BUFFER_SIZE);
setup_coding_system (Qnil, &keyboard_coding);
setup_coding_system (Qnil, &terminal_coding);
setup_coding_system (Qnil, &safe_terminal_coding);
+ setup_coding_system (Qnil, &default_buffer_file_coding);
bzero (coding_system_table, sizeof coding_system_table);
+ bzero (ascii_skip_code, sizeof ascii_skip_code);
+ for (i = 0; i < 128; i++)
+ ascii_skip_code[i] = 1;
+
#if defined (MSDOS) || defined (WINDOWSNT)
system_eol_type = CODING_EOL_CRLF;
#else
}
}
- Qcharacter_unification_table = intern ("character-unification-table");
- staticpro (&Qcharacter_unification_table);
- Fput (Qcharacter_unification_table, Qchar_table_extra_slots,
- make_number (0));
+ Qtranslation_table = intern ("translation-table");
+ staticpro (&Qtranslation_table);
+ Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
- Qcharacter_unification_table_for_decode
- = intern ("character-unification-table-for-decode");
- staticpro (&Qcharacter_unification_table_for_decode);
+ Qtranslation_table_id = intern ("translation-table-id");
+ staticpro (&Qtranslation_table_id);
- Qcharacter_unification_table_for_encode
- = intern ("character-unification-table-for-encode");
- staticpro (&Qcharacter_unification_table_for_encode);
+ Qtranslation_table_for_decode = intern ("translation-table-for-decode");
+ staticpro (&Qtranslation_table_for_decode);
+
+ Qtranslation_table_for_encode = intern ("translation-table-for-encode");
+ staticpro (&Qtranslation_table_for_encode);
Qsafe_charsets = intern ("safe-charsets");
staticpro (&Qsafe_charsets);
+ Qvalid_codes = intern ("valid-codes");
+ staticpro (&Qvalid_codes);
+
Qemacs_mule = intern ("emacs-mule");
staticpro (&Qemacs_mule);
defsubr (&Sset_keyboard_coding_system_internal);
defsubr (&Skeyboard_coding_system);
defsubr (&Sfind_operation_coding_system);
- defsubr (&Supdate_iso_coding_systems);
+ defsubr (&Supdate_coding_systems_internal);
+ defsubr (&Sset_coding_priority_internal);
DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
"List of coding systems.\n\
If VAL is a function symbol, the function must return a coding system\n\
or a cons of coding systems which are used as above.\n\
\n\
-See also the function `find-operation-coding-system'.");
+See also the function `find-operation-coding-system'.\n\
+and the variable `auto-coding-alist'.");
Vfile_coding_system_alist = Qnil;
DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
"Mnemonic character indicating end-of-line format is not yet decided.");
eol_mnemonic_undecided = ':';
- DEFVAR_LISP ("enable-character-unification", &Venable_character_unification,
- "Non-nil means ISO 2022 encoder/decoder do character unification.");
- Venable_character_unification = Qt;
+ DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
+ "*Non-nil enables character translation while encoding and decoding.");
+ Venable_character_translation = Qt;
- DEFVAR_LISP ("standard-character-unification-table-for-decode",
- &Vstandard_character_unification_table_for_decode,
- "Table for unifying characters when reading.");
- Vstandard_character_unification_table_for_decode = Qnil;
+ DEFVAR_LISP ("standard-translation-table-for-decode",
+ &Vstandard_translation_table_for_decode,
+ "Table for translating characters while decoding.");
+ Vstandard_translation_table_for_decode = Qnil;
- DEFVAR_LISP ("standard-character-unification-table-for-encode",
- &Vstandard_character_unification_table_for_encode,
- "Table for unifying characters when writing.");
- Vstandard_character_unification_table_for_encode = Qnil;
+ DEFVAR_LISP ("standard-translation-table-for-encode",
+ &Vstandard_translation_table_for_encode,
+ "Table for translationg characters while encoding.");
+ Vstandard_translation_table_for_encode = Qnil;
DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist,
"Alist of charsets vs revision numbers.\n\