to avoid infinite recursive call. */
static int inhibit_pre_post_conversion;
-/* Char-table containing safe coding systems of each character. */
-Lisp_Object Vchar_coding_system_table;
Lisp_Object Qchar_coding_system;
/* Return `safe-chars' property of CODING_SYSTEM (symbol). Don't check
/* Record one COMPONENT (alternate character or composition rule). */
-#define CODING_ADD_COMPOSITION_COMPONENT(coding, component) \
- (coding->cmp_data->data[coding->cmp_data->used++] = component)
+#define CODING_ADD_COMPOSITION_COMPONENT(coding, component) \
+ do { \
+ coding->cmp_data->data[coding->cmp_data->used++] = component; \
+ if (coding->cmp_data->used - coding->cmp_data_start \
+ == COMPOSITION_DATA_MAX_BUNCH_LENGTH) \
+ { \
+ CODING_ADD_COMPOSITION_END (coding, coding->produced_char); \
+ coding->composing = COMPOSITION_NO; \
+ } \
+ } while (0)
/* Get one byte from a data pointed by SRC and increment SRC. If SRC
#define SHIFT_OUT_OK(idx) \
(CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)
+#define COMPOSITION_OK(idx) \
+ (coding_system_table[idx]->composing != COMPOSITION_DISABLED)
+
/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
Check if a text is encoded in ISO2022. If it is, return an
integer in which appropriate flag bits any of:
else if (c >= '0' && c <= '4')
{
/* ESC <Fp> for start/end composition. */
- mask_found |= CODING_CATEGORY_MASK_ISO;
+ if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7))
+ mask_found |= CODING_CATEGORY_MASK_ISO_7;
+ else
+ mask &= ~CODING_CATEGORY_MASK_ISO_7;
+ if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT))
+ mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
+ else
+ mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
+ if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_1))
+ mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
+ else
+ mask &= ~CODING_CATEGORY_MASK_ISO_8_1;
+ if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_2))
+ mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
+ else
+ mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
+ if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7_ELSE))
+ mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE;
+ else
+ mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
+ if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_ELSE))
+ mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE;
+ else
+ mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
break;
}
else
/* Instead of encoding character C, produce one or two `?'s. */
-#define ENCODE_UNSAFE_CHARACTER(c) \
- do { \
- ENCODE_ISO_CHARACTER (CODING_INHIBIT_CHARACTER_SUBSTITUTION); \
- if (CHARSET_WIDTH (CHAR_CHARSET (c)) > 1) \
- ENCODE_ISO_CHARACTER (CODING_INHIBIT_CHARACTER_SUBSTITUTION); \
+#define ENCODE_UNSAFE_CHARACTER(c) \
+ do { \
+ ENCODE_ISO_CHARACTER (CODING_REPLACEMENT_CHARACTER); \
+ if (CHARSET_WIDTH (CHAR_CHARSET (c)) > 1) \
+ ENCODE_ISO_CHARACTER (CODING_REPLACEMENT_CHARACTER); \
} while (0)
Lisp_Object translation_table;
Lisp_Object safe_chars;
+ if (coding->flags & CODING_FLAG_ISO_SAFE)
+ coding->mode |= CODING_MODE_INHIBIT_UNENCODABLE_CHAR;
+
safe_chars = coding_safe_chars (coding->symbol);
if (NILP (Venable_character_translation))
}
else
{
- if (coding->flags & CODING_FLAG_ISO_SAFE
+ if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR
&& ! CODING_SAFE_CHAR_P (safe_chars, c))
ENCODE_UNSAFE_CHARACTER (c);
else
*dst++ = c;
coding->errors++;
}
- else if (coding->flags & CODING_FLAG_ISO_SAFE
+ else if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR
&& ! CODING_SAFE_CHAR_P (safe_chars, c))
ENCODE_UNSAFE_CHARACTER (c);
else
int multibytep;
{
unsigned char c1, c2;
- /* Dummy for TWO_MORE_BYTES. */
+ /* Dummy for ONE_MORE_BYTE_CHECK_MULTIBYTE. */
struct coding_system dummy_coding;
struct coding_system *coding = &dummy_coding;
EMIT_ONE_BYTE (c1 | 0x80);
else if (charset == charset_latin_jisx0201)
EMIT_ONE_BYTE (c1);
+ else if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR)
+ {
+ EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
+ if (CHARSET_WIDTH (charset) > 1)
+ EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
+ }
else
/* There's no way other than producing the internal
codes as is. */
ENCODE_BIG5 (charset, c1, c2, c1, c2);
EMIT_TWO_BYTES (c1, c2);
}
+ else if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR)
+ {
+ EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
+ if (CHARSET_WIDTH (charset) > 1)
+ EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
+ }
else
/* There's no way other than producing the internal
codes as is. */
} while (0)
static Lisp_Object
-code_convert_region_unwind (dummy)
- Lisp_Object dummy;
+code_convert_region_unwind (arg)
+ Lisp_Object arg;
{
inhibit_pre_post_conversion = 0;
+ Vlast_coding_system_used = arg;
return Qnil;
}
int len = data[0] - 4, j;
Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
+ if (method == COMPOSITION_WITH_RULE_ALTCHARS
+ && len % 2 == 0)
+ len --;
for (j = 0; j < len; j++)
args[j] = make_number (data[4 + j]);
components = (method == COMPOSITION_WITH_ALTCHARS
struct buffer *prev = current_buffer;
Lisp_Object new;
- record_unwind_protect (code_convert_region_unwind, Qnil);
+ record_unwind_protect (code_convert_region_unwind,
+ Vlast_coding_system_used);
/* We should not call any more pre-write/post-read-conversion
functions while this pre-write-conversion is running. */
inhibit_pre_post_conversion = 1;
&& ! encodep && ! NILP (coding->post_read_conversion))
{
Lisp_Object val;
+ Lisp_Object saved_coding_system;
if (from != PT)
TEMP_SET_PT_BOTH (from, from_byte);
prev_Z = Z;
- record_unwind_protect (code_convert_region_unwind, Qnil);
+ record_unwind_protect (code_convert_region_unwind,
+ Vlast_coding_system_used);
+ saved_coding_system = Vlast_coding_system_used;
+ Vlast_coding_system_used = coding->symbol;
/* We should not call any more pre-write/post-read-conversion
functions while this post-read-conversion is running. */
inhibit_pre_post_conversion = 1;
val = call1 (coding->post_read_conversion, make_number (inserted));
inhibit_pre_post_conversion = 0;
+ coding->symbol = Vlast_coding_system_used;
+ Vlast_coding_system_used = saved_coding_system;
/* Discard the unwind protect. */
specpdl_ptr--;
CHECK_NUMBER (val);
Lisp_Object old_deactivate_mark;
record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
- record_unwind_protect (code_convert_region_unwind, Qnil);
+ record_unwind_protect (code_convert_region_unwind,
+ Vlast_coding_system_used);
/* It is not crucial to specbind this. */
old_deactivate_mark = Vdeactivate_mark;
GCPRO2 (str, old_deactivate_mark);
call2 (coding->pre_write_conversion, make_number (BEG), make_number (Z));
else
{
+ Vlast_coding_system_used = coding->symbol;
TEMP_SET_PT_BOTH (BEG, BEG_BYTE);
call1 (coding->post_read_conversion, make_number (Z - BEG));
+ coding->symbol = Vlast_coding_system_used;
}
inhibit_pre_post_conversion = 0;
Vdeactivate_mark = old_deactivate_mark;
shrinked_bytes = from + (SBYTES (str) - to_byte);
}
- if (!require_decoding)
+ if (!require_decoding
+ && !(SYMBOLP (coding->post_read_conversion)
+ && !NILP (Ffboundp (coding->post_read_conversion))))
{
coding->consumed = SBYTES (str);
coding->consumed_char = SCHARS (str);
DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
2, 3, 0,
- doc: /* Detect coding system of the text in the region between START and END.
-Return a list of possible coding systems ordered by priority.
+ doc: /* Detect how the byte sequence in the region is encoded.
+Return a list of possible coding systems used on decoding a byte
+sequence containing the bytes in the region between START and END when
+the coding system `undecided' is specified. The list is ordered by
+priority decided in the current language environment.
If only ASCII characters are found, it returns a list of single element
`undecided' or its subsidiary coding system according to a detected
DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
1, 2, 0,
- doc: /* Detect coding system of the text in STRING.
-Return a list of possible coding systems ordered by priority.
+ doc: /* Detect how the byte sequence in STRING is encoded.
+Return a list of possible coding systems used on decoding a byte
+sequence containing the bytes in STRING when the coding system
+`undecided' is specified. The list is ordered by priority decided in
+the current language environment.
If only ASCII characters are found, it returns a list of single element
`undecided' or its subsidiary coding system according to a detected
STRING_MULTIBYTE (string));
}
-/* Return an intersection of lists L1 and L2. */
-
-static Lisp_Object
-intersection (l1, l2)
- Lisp_Object l1, l2;
-{
- Lisp_Object val = Fcons (Qnil, Qnil), tail;
-
- for (tail = val; CONSP (l1); l1 = XCDR (l1))
- {
- if (!NILP (Fmemq (XCAR (l1), l2)))
- {
- XSETCDR (tail, Fcons (XCAR (l1), Qnil));
- tail = XCDR (tail);
- }
- }
- return XCDR (val);
-}
-
-
/* Subroutine for Fsafe_coding_systems_region_internal.
Return a list of coding systems that safely encode the multibyte
- text between P and PEND. SAFE_CODINGS, if non-nil, is a list of
+ text between P and PEND. SAFE_CODINGS, if non-nil, is an alist of
possible coding systems. If it is nil, it means that we have not
yet found any coding systems.
unsigned char *p, *pend;
Lisp_Object safe_codings, work_table;
int *single_byte_char_found;
-{
- int c, len, idx;
- Lisp_Object val;
-
- while (p < pend)
- {
- c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
- p += len;
- if (ASCII_BYTE_P (c))
- /* We can ignore ASCII characters here. */
- continue;
- if (SINGLE_BYTE_CHAR_P (c))
- *single_byte_char_found = 1;
- if (NILP (safe_codings))
- continue;
- /* Check the safe coding systems for C. */
- val = char_table_ref_and_index (work_table, c, &idx);
- if (EQ (val, Qt))
- /* This element was already checked. Ignore it. */
- continue;
- /* Remember that we checked this element. */
- CHAR_TABLE_SET (work_table, make_number (idx), Qt);
-
- /* If there are some safe coding systems for C and we have
- already found the other set of coding systems for the
- different characters, get the intersection of them. */
- if (!EQ (safe_codings, Qt) && !NILP (val))
- val = intersection (safe_codings, val);
- safe_codings = val;
- }
- return safe_codings;
-}
-
-
-/* Return a list of coding systems that safely encode the text between
- START and END. If the text contains only ASCII or is unibyte,
- return t. */
-
-DEFUN ("find-coding-systems-region-internal",
- Ffind_coding_systems_region_internal,
- Sfind_coding_systems_region_internal, 2, 2, 0,
- doc: /* Internal use only. */)
- (start, end)
- Lisp_Object start, end;
-{
- Lisp_Object work_table, safe_codings;
- int non_ascii_p = 0;
- int single_byte_char_found = 0;
- const unsigned char *p1, *p1end, *p2, *p2end, *p;
-
- if (STRINGP (start))
- {
- if (!STRING_MULTIBYTE (start))
- return Qt;
- p1 = SDATA (start), p1end = p1 + SBYTES (start);
- p2 = p2end = p1end;
- if (SCHARS (start) != SBYTES (start))
- non_ascii_p = 1;
- }
- else
- {
- int from, to, stop;
-
- CHECK_NUMBER_COERCE_MARKER (start);
- CHECK_NUMBER_COERCE_MARKER (end);
- if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
- args_out_of_range (start, end);
- if (NILP (current_buffer->enable_multibyte_characters))
- return Qt;
- from = CHAR_TO_BYTE (XINT (start));
- to = CHAR_TO_BYTE (XINT (end));
- stop = from < GPT_BYTE && GPT_BYTE < to ? GPT_BYTE : to;
- p1 = BYTE_POS_ADDR (from), p1end = p1 + (stop - from);
- if (stop == to)
- p2 = p2end = p1end;
- else
- p2 = BYTE_POS_ADDR (stop), p2end = p2 + (to - stop);
- if (XINT (end) - XINT (start) != to - from)
- non_ascii_p = 1;
- }
-
- if (!non_ascii_p)
- {
- /* We are sure that the text contains no multibyte character.
- Check if it contains eight-bit-graphic. */
- p = p1;
- for (p = p1; p < p1end && ASCII_BYTE_P (*p); p++);
- if (p == p1end)
- {
- for (p = p2; p < p2end && ASCII_BYTE_P (*p); p++);
- if (p == p2end)
- return Qt;
- }
- }
-
- /* The text contains non-ASCII characters. */
- work_table = Fcopy_sequence (Vchar_coding_system_table);
- safe_codings = find_safe_codings (p1, p1end, Qt, work_table,
- &single_byte_char_found);
- if (p2 < p2end)
- safe_codings = find_safe_codings (p2, p2end, safe_codings, work_table,
- &single_byte_char_found);
-
- if (EQ (safe_codings, Qt))
- ; /* Nothing to be done. */
- else if (!single_byte_char_found)
- {
- /* Append generic coding systems. */
- Lisp_Object args[2];
- args[0] = safe_codings;
- args[1] = Fchar_table_extra_slot (Vchar_coding_system_table,
- make_number (0));
- safe_codings = Fappend (2, args);
- }
- else
- safe_codings = Fcons (Qraw_text,
- Fcons (Qemacs_mule,
- Fcons (Qno_conversion, safe_codings)));
- return safe_codings;
-}
-
-
-static Lisp_Object
-find_safe_codings_2 (p, pend, safe_codings, work_table, single_byte_char_found)
- unsigned char *p, *pend;
- Lisp_Object safe_codings, work_table;
- int *single_byte_char_found;
{
int c, len, i;
Lisp_Object val, ch;
if (SINGLE_BYTE_CHAR_P (c))
*single_byte_char_found = 1;
if (NILP (safe_codings))
- /* Already all coding systems are excluded. */
+ /* Already all coding systems are excluded. But, we can't
+ terminate the loop here because non-ASCII single-byte char
+ must be found. */
continue;
/* Check the safe coding systems for C. */
ch = make_number (c);
for (prev = tail = safe_codings; CONSP (tail); tail = XCDR (tail))
{
- val = XCAR (tail);
- if (NILP (Faref (XCDR (val), ch)))
+ Lisp_Object elt, translation_table, hash_table, accept_latin_extra;
+ int encodable;
+
+ elt = XCAR (tail);
+ if (CONSP (XCDR (elt)))
+ {
+ /* This entry has this format now:
+ ( CODING SAFE-CHARS TRANSLATION-TABLE HASH-TABLE
+ ACCEPT-LATIN-EXTRA ) */
+ val = XCDR (elt);
+ encodable = ! NILP (Faref (XCAR (val), ch));
+ if (! encodable)
+ {
+ val = XCDR (val);
+ translation_table = XCAR (val);
+ hash_table = XCAR (XCDR (val));
+ accept_latin_extra = XCAR (XCDR (XCDR (val)));
+ }
+ }
+ else
+ {
+ /* This entry has this format now: ( CODING . SAFE-CHARS) */
+ encodable = ! NILP (Faref (XCDR (elt), ch));
+ if (! encodable)
+ {
+ /* Transform the format to:
+ ( CODING SAFE-CHARS TRANSLATION-TABLE HASH-TABLE
+ ACCEPT-LATIN-EXTRA ) */
+ val = Fget (XCAR (elt), Qcoding_system);
+ translation_table
+ = Fplist_get (AREF (val, 3),
+ Qtranslation_table_for_encode);
+ if (SYMBOLP (translation_table))
+ translation_table = Fget (translation_table,
+ Qtranslation_table);
+ hash_table
+ = (CHAR_TABLE_P (translation_table)
+ ? XCHAR_TABLE (translation_table)->extras[1]
+ : Qnil);
+ accept_latin_extra
+ = ((EQ (AREF (val, 0), make_number (2))
+ && VECTORP (AREF (val, 4)))
+ ? AREF (AREF (val, 4), CODING_FLAG_ISO_LATIN_EXTRA)
+ : Qnil);
+ XSETCAR (tail, list5 (XCAR (elt), XCDR (elt),
+ translation_table, hash_table,
+ accept_latin_extra));
+ }
+ }
+
+ if (! encodable
+ && ((CHAR_TABLE_P (translation_table)
+ && ! NILP (Faref (translation_table, ch)))
+ || (HASH_TABLE_P (hash_table)
+ && ! NILP (Fgethash (ch, hash_table, Qnil)))
+ || (SINGLE_BYTE_CHAR_P (c)
+ && ! NILP (accept_latin_extra)
+ && VECTORP (Vlatin_extra_code_table)
+ && ! NILP (AREF (Vlatin_extra_code_table, c)))))
+ encodable = 1;
+ if (encodable)
+ prev = tail;
+ else
{
- /* Exclued this coding system from SAFE_CODINGS. */
+ /* Exclude this coding system from SAFE_CODINGS. */
if (EQ (tail, safe_codings))
safe_codings = XCDR (safe_codings);
else
XSETCDR (prev, XCDR (tail));
}
- else
- prev = tail;
}
}
return safe_codings;
}
-DEFUN ("find-coding-systems-region-internal-2",
- Ffind_coding_systems_region_internal_2,
- Sfind_coding_systems_region_internal_2, 2, 2, 0,
+DEFUN ("find-coding-systems-region-internal",
+ Ffind_coding_systems_region_internal,
+ Sfind_coding_systems_region_internal, 2, 2, 0,
doc: /* Internal use only. */)
(start, end)
Lisp_Object start, end;
work_table = Fmake_char_table (Qchar_coding_system, Qnil);
safe_codings = Fcopy_sequence (XCDR (Vcoding_system_safe_chars));
- safe_codings = find_safe_codings_2 (p1, p1end, safe_codings, work_table,
- &single_byte_char_found);
+ safe_codings = find_safe_codings (p1, p1end, safe_codings, work_table,
+ &single_byte_char_found);
if (p2 < p2end)
- safe_codings = find_safe_codings_2 (p2, p2end, safe_codings, work_table,
- &single_byte_char_found);
+ safe_codings = find_safe_codings (p2, p2end, safe_codings, work_table,
+ &single_byte_char_found);
if (EQ (safe_codings, XCDR (Vcoding_system_safe_chars)))
safe_codings = Qt;
else
CHECK_SYMBOL (coding_system);
setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding);
/* We had better not send unsafe characters to terminal. */
- terminal_coding.flags |= CODING_FLAG_ISO_SAFE;
+ terminal_coding.mode |= CODING_MODE_INHIBIT_UNENCODABLE_CHAR;
/* Character composition should be disabled. */
terminal_coding.composing = COMPOSITION_DISABLED;
/* Error notification should be suppressed. */
Qtranslation_table = intern ("translation-table");
staticpro (&Qtranslation_table);
- Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
+ Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
Qtranslation_table_id = intern ("translation-table-id");
staticpro (&Qtranslation_table_id);
But don't staticpro it here--that is done in alloc.c. */
Qchar_table_extra_slots = intern ("char-table-extra-slots");
Fput (Qsafe_chars, Qchar_table_extra_slots, make_number (0));
- Fput (Qchar_coding_system, Qchar_table_extra_slots, make_number (2));
+ Fput (Qchar_coding_system, Qchar_table_extra_slots, make_number (0));
Qvalid_codes = intern ("valid-codes");
staticpro (&Qvalid_codes);
defsubr (&Sdetect_coding_region);
defsubr (&Sdetect_coding_string);
defsubr (&Sfind_coding_systems_region_internal);
- defsubr (&Sfind_coding_systems_region_internal_2);
defsubr (&Sunencodable_char_position);
defsubr (&Sdecode_coding_region);
defsubr (&Sencode_coding_region);
Vcoding_system_for_write = Qnil;
DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
- doc: /* Coding system used in the latest file or process I/O. */);
+ doc: /* Coding system used in the latest file or process I/O.
+Also set by `encode-coding-region', `decode-coding-region',
+`encode-coding-string' and `decode-coding-string'. */);
Vlast_coding_system_used = Qnil;
DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
coding_system_require_warning = 0;
- DEFVAR_LISP ("char-coding-system-table", &Vchar_coding_system_table,
- doc: /* Char-table containing safe coding systems of each characters.
-Each element doesn't include such generic coding systems that can
-encode any characters. They are in the first extra slot. */);
- Vchar_coding_system_table = Fmake_char_table (Qchar_coding_system, Qnil);
-
DEFVAR_BOOL ("inhibit-iso-escape-detection",
&inhibit_iso_escape_detection,
doc: /* If non-nil, Emacs ignores ISO2022's escape sequence on code detection.