else \
c = *src, bytes = 1; \
if (!NILP (translation_table)) \
- c = translate_char (translation_table, c, 0, 0, 0); \
+ c = translate_char (translation_table, c, -1, 0, 0); \
src += bytes; \
} while (0)
Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
Lisp_Object Qno_conversion, Qundecided;
Lisp_Object Qcoding_system_history;
-Lisp_Object Qsafe_charsets;
+Lisp_Object Qsafe_chars;
Lisp_Object Qvalid_codes;
extern Lisp_Object Qinsert_file_contents, Qwrite_region;
/* Flag to inhibit code conversion of end-of-line format. */
int inhibit_eol_conversion;
+/* Flag to inhibit ISO2022 escape sequence detection. */
+int inhibit_iso_escape_detection;
+
/* Flag to make buffer-file-coding-system inherit from process-coding. */
int inherit_process_coding_system;
to avoid infinite recursive call. */
static int inhibit_pre_post_conversion;
+/* Char-table containing safe coding systems of each character. */
+Lisp_Object Vchar_coding_system_table;
+Lisp_Object Qchar_coding_system;
+
+/* Return `safe-chars' property of coding system CODING. Don't check
+ validity of CODING. */
+
+Lisp_Object
+coding_safe_chars (coding)
+ struct coding_system *coding;
+{
+ Lisp_Object coding_spec, plist, safe_chars;
+
+ coding_spec = Fget (coding->symbol, Qcoding_system);
+ plist = XVECTOR (coding_spec)->contents[3];
+ safe_chars = Fplist_get (XVECTOR (coding_spec)->contents[3], Qsafe_chars);
+ return (CHAR_TABLE_P (safe_chars) ? safe_chars : Qt);
+}
+
+#define CODING_SAFE_CHAR_P(safe_chars, c) \
+ (EQ (safe_chars, Qt) || !NILP (CHAR_TABLE_REF (safe_chars, c)))
+
\f
/*** 2. Emacs internal format (emacs-mule) handlers ***/
enum iso_code_class_type iso_code_class[256];
-#define CHARSET_OK(idx, charset) \
- (coding_system_table[idx] \
- && (coding_system_table[idx]->safe_charsets[charset] \
- || (CODING_SPEC_ISO_REQUESTED_DESIGNATION \
- (coding_system_table[idx], charset) \
- != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)))
+#define CHARSET_OK(idx, charset, c) \
+ (coding_system_table[idx] \
+ && (charset == CHARSET_ASCII \
+ || (safe_chars = coding_safe_chars (coding_system_table[idx]), \
+ CODING_SAFE_CHAR_P (safe_chars, c))) \
+ && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding_system_table[idx], \
+ charset) \
+ != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
#define SHIFT_OUT_OK(idx) \
(CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)
/* Dummy for ONE_MORE_BYTE. */
struct coding_system dummy_coding;
struct coding_system *coding = &dummy_coding;
+ Lisp_Object safe_chars;
reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
while (mask && src < src_end)
switch (c)
{
case ISO_CODE_ESC:
+ if (inhibit_iso_escape_detection)
+ break;
single_shifting = 0;
ONE_MORE_BYTE (c);
if (c >= '(' && c <= '/')
/* We found a valid designation sequence for CHARSET. */
mask &= ~CODING_CATEGORY_MASK_ISO_8BIT;
- if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7, charset))
+ c = MAKE_CHAR (charset, 0, 0);
+ if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7, charset, c))
mask_found |= CODING_CATEGORY_MASK_ISO_7;
else
mask &= ~CODING_CATEGORY_MASK_ISO_7;
- if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT, charset))
+ if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT, charset, c))
mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
else
mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
- if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE, charset))
+ if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE, charset, c))
mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE;
else
mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
- if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset))
+ if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset, c))
mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE;
else
mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
break;
case ISO_CODE_SO:
+ if (inhibit_iso_escape_detection)
+ break;
single_shifting = 0;
if (shift_out == 0
&& (reg[1] >= 0
break;
case ISO_CODE_SI:
+ if (inhibit_iso_escape_detection)
+ break;
single_shifting = 0;
if (shift_out == 1)
{
{
int newmask = CODING_CATEGORY_MASK_ISO_8_ELSE;
+ if (inhibit_iso_escape_detection)
+ break;
if (c != ISO_CODE_CSI)
{
if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
/* Set designation state into CODING. */
#define DECODE_DESIGNATION(reg, dimension, chars, final_char) \
do { \
- int charset; \
+ int charset, c; \
\
if (final_char < '0' || final_char >= 128) \
goto label_invalid_code; \
charset = ISO_CHARSET_TABLE (make_number (dimension), \
make_number (chars), \
make_number (final_char)); \
+ c = MAKE_CHAR (charset, 0, 0); \
if (charset >= 0 \
&& (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) == reg \
- || coding->safe_charsets[charset])) \
+ || CODING_SAFE_CHAR_P (safe_chars, c))) \
{ \
if (coding->spec.iso2022.last_invalid_designation_register == 0 \
&& reg == 0 \
unsigned char *src_base;
int c, charset;
Lisp_Object translation_table;
+ Lisp_Object safe_chars;
+
+ safe_chars = coding_safe_chars (coding);
if (NILP (Venable_character_translation))
translation_table = Qnil;
goto label_invalid_code;
charset = CODING_SPEC_ISO_DESIGNATION (coding, 2);
ONE_MORE_BYTE (c1);
+ if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
+ goto label_invalid_code;
break;
case 'O': /* invocation of single-shift-3 */
goto label_invalid_code;
charset = CODING_SPEC_ISO_DESIGNATION (coding, 3);
ONE_MORE_BYTE (c1);
+ if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
+ goto label_invalid_code;
break;
case '0': case '2': case '3': case '4': /* start composition */
*dst++ = c1 | 0x80; \
break; \
} \
- else if (coding->flags & CODING_FLAG_ISO_SAFE \
- && !coding->safe_charsets[charset]) \
- { \
- /* We should not encode this character, instead produce one or \
- two `?'s. */ \
- *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \
- if (CHARSET_WIDTH (charset) == 2) \
- *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \
- break; \
- } \
else \
/* Since CHARSET is not yet invoked to any graphic planes, we \
must invoke it, or, at first, designate it to some graphic \
*dst++ = c1 | 0x80, *dst++= c2 | 0x80; \
break; \
} \
- else if (coding->flags & CODING_FLAG_ISO_SAFE \
- && !coding->safe_charsets[charset]) \
- { \
- /* We should not encode this character, instead produce one or \
- two `?'s. */ \
- *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \
- if (CHARSET_WIDTH (charset) == 2) \
- *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \
- break; \
- } \
else \
/* Since CHARSET is not yet invoked to any graphic planes, we \
must invoke it, or, at first, designate it to some graphic \
dst = encode_invocation_designation (charset, coding, dst); \
} while (1)
-#define ENCODE_ISO_CHARACTER(charset, c1, c2) \
+#define ENCODE_ISO_CHARACTER(c) \
+ do { \
+ int charset, c1, c2; \
+ \
+ SPLIT_CHAR (c, charset, c1, c2); \
+ if (CHARSET_DEFINED_P (charset)) \
+ { \
+ if (CHARSET_DIMENSION (charset) == 1) \
+ { \
+ if (charset == CHARSET_ASCII \
+ && coding->flags & CODING_FLAG_ISO_USE_ROMAN) \
+ charset = charset_latin_jisx0201; \
+ ENCODE_ISO_CHARACTER_DIMENSION1 (charset, c1); \
+ } \
+ else \
+ { \
+ if (charset == charset_jisx0208 \
+ && coding->flags & CODING_FLAG_ISO_USE_OLDJIS) \
+ charset = charset_jisx0208_1978; \
+ ENCODE_ISO_CHARACTER_DIMENSION2 (charset, c1, c2); \
+ } \
+ } \
+ else \
+ { \
+ *dst++ = c1; \
+ if (c2 >= 0) \
+ *dst++ = c2; \
+ } \
+ } while (0)
+
+
+/* Instead of encoding character C, produce one or two `?'s. */
+
+#define ENCODE_UNSAFE_CHARACTER(c) \
do { \
- int alt_charset = charset; \
- \
- if (CHARSET_DEFINED_P (charset)) \
- { \
- if (CHARSET_DIMENSION (charset) == 1) \
- { \
- if (charset == CHARSET_ASCII \
- && coding->flags & CODING_FLAG_ISO_USE_ROMAN) \
- alt_charset = charset_latin_jisx0201; \
- ENCODE_ISO_CHARACTER_DIMENSION1 (alt_charset, c1); \
- } \
- else \
- { \
- if (charset == charset_jisx0208 \
- && coding->flags & CODING_FLAG_ISO_USE_OLDJIS) \
- alt_charset = charset_jisx0208_1978; \
- ENCODE_ISO_CHARACTER_DIMENSION2 (alt_charset, c1, c2); \
- } \
- } \
- else \
- { \
- *dst++ = c1; \
- if (c2 >= 0) \
- *dst++ = c2; \
- } \
+ ENCODE_ISO_CHARACTER (CODING_INHIBIT_CHARACTER_SUBSTITUTION); \
+ if (CHARSET_WIDTH (CHAR_CHARSET (c)) > 1) \
+ ENCODE_ISO_CHARACTER (CODING_INHIBIT_CHARACTER_SUBSTITUTION); \
} while (0)
+
/* Produce designation and invocation codes at a place pointed by DST
to use CHARSET. The element `spec.iso2022' of *CODING is updated.
Return new DST. */
unsigned char *src_base;
int c;
Lisp_Object translation_table;
+ Lisp_Object safe_chars;
+
+ safe_chars = coding_safe_chars (coding);
if (NILP (Venable_character_translation))
translation_table = Qnil;
coding->errors = 0;
while (1)
{
- int charset, c1, c2;
-
src_base = src;
if (dst >= (dst_bytes ? adjusted_dst_end : (src - 19)))
}
else
{
- SPLIT_CHAR (c, charset, c1, c2);
- ENCODE_ISO_CHARACTER (charset, c1, c2);
+ if (coding->flags & CODING_FLAG_ISO_SAFE
+ && ! CODING_SAFE_CHAR_P (safe_chars, c))
+ ENCODE_UNSAFE_CHARACTER (c);
+ else
+ ENCODE_ISO_CHARACTER (c);
if (coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)
coding->composition_rule_follows = 1;
}
}
}
else if (ASCII_BYTE_P (c))
- ENCODE_ISO_CHARACTER (CHARSET_ASCII, c, /* dummy */ c1);
+ ENCODE_ISO_CHARACTER (c);
else if (SINGLE_BYTE_CHAR_P (c))
{
*dst++ = c;
coding->errors++;
}
+ else if (coding->flags & CODING_FLAG_ISO_SAFE
+ && ! CODING_SAFE_CHAR_P (safe_chars, c))
+ ENCODE_UNSAFE_CHARACTER (c);
else
- {
- SPLIT_CHAR (c, charset, c1, c2);
- ENCODE_ISO_CHARACTER (charset, c1, c2);
- }
+ ENCODE_ISO_CHARACTER (c);
coding->consumed_char++;
}
while (1)
{
ONE_MORE_BYTE (c);
- if ((c >= 0x80 && c < 0xA0) || c >= 0xE0)
+ if (c >= 0x81)
{
- ONE_MORE_BYTE (c);
- if (c < 0x40)
+ if (c <= 0x9F || (c >= 0xE0 && c <= 0xEF))
+ {
+ ONE_MORE_BYTE (c);
+ if (c < 0x40 || c == 0x7F || c > 0xFC)
+ return 0;
+ }
+ else if (c > 0xDF)
return 0;
}
}
translation_table = Qnil;
else
{
- translation_table = coding->translation_table_for_decode;
+ translation_table = coding->translation_table_for_encode;
if (NILP (translation_table))
- translation_table = Vstandard_translation_table_for_decode;
+ translation_table = Vstandard_translation_table_for_encode;
}
while (1)
ENCODE_SJIS (c1, c2, c1, c2);
EMIT_TWO_BYTES (c1, c2);
}
+ else if (charset == charset_katakana_jisx0201)
+ EMIT_ONE_BYTE (c1 | 0x80);
else if (charset == charset_latin_jisx0201)
EMIT_ONE_BYTE (c1);
else
}
else
{
- if (src_bytes <= dst_bytes)
+ if (!dst_bytes || src_bytes <= dst_bytes)
{
safe_bcopy (src, dst, src_bytes);
src_base = src_end;
coding->consumed = src_base - source;
coding->produced = dst - destination;
+ coding->produced_char = coding->produced;
}
\f
else
goto label_invalid_coding_system;
- val = Fplist_get (plist, Qsafe_charsets);
- if (EQ (val, Qt))
- {
- for (i = 0; i <= MAX_CHARSET; i++)
- coding->safe_charsets[i] = 1;
- }
- else
- {
- bzero (coding->safe_charsets, MAX_CHARSET + 1);
- while (CONSP (val))
- {
- if ((i = get_charset_id (XCAR (val))) >= 0)
- coding->safe_charsets[i] = 1;
- val = XCDR (val);
- }
- }
-
/* If the coding system has non-nil `composition' property, enable
composition handling. */
val = Fplist_get (plist, Qcomposition);
if (reg_bits)
for (charset = 0; charset <= MAX_CHARSET; charset++)
{
- if (CHARSET_VALID_P (charset))
+ if (CHARSET_VALID_P (charset)
+ && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
+ == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
{
/* There exist some default graphic registers to be
- used CHARSET. */
+ used by CHARSET. */
/* We had better avoid designating a charset of
CHARS96 to REG 0 as far as possible. */
return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM);
}
-#ifndef MINIMUM_CONVERSION_BUFFER_SIZE
-#define MINIMUM_CONVERSION_BUFFER_SIZE 1024
-#endif
+/* Working buffer for code conversion. */
+struct conversion_buffer
+{
+ int size; /* size of data. */
+ int on_stack; /* 1 if allocated by alloca. */
+ unsigned char *data;
+};
-char *conversion_buffer;
-int conversion_buffer_size;
+/* Don't use alloca for allocating memory space larger than this, lest
+ we overflow their stack. */
+#define MAX_ALLOCA 16*1024
-/* Return a pointer to a SIZE bytes of buffer to be used for encoding
- or decoding. Sufficient memory is allocated automatically. If we
- run out of memory, return NULL. */
+/* Allocate LEN bytes of memory for BUF (struct conversion_buffer). */
+#define allocate_conversion_buffer(buf, len) \
+ do { \
+ if (len < MAX_ALLOCA) \
+ { \
+ buf.data = (unsigned char *) alloca (len); \
+ buf.on_stack = 1; \
+ } \
+ else \
+ { \
+ buf.data = (unsigned char *) xmalloc (len); \
+ buf.on_stack = 0; \
+ } \
+ buf.size = len; \
+ } while (0)
-char *
-get_conversion_buffer (size)
- int size;
+/* Double the allocated memory for *BUF. */
+static void
+extend_conversion_buffer (buf)
+ struct conversion_buffer *buf;
{
- if (size > conversion_buffer_size)
+ if (buf->on_stack)
+ {
+ unsigned char *save = buf->data;
+ buf->data = (unsigned char *) xmalloc (buf->size * 2);
+ bcopy (save, buf->data, buf->size);
+ buf->on_stack = 0;
+ }
+ else
{
- char *buf;
- int real_size = conversion_buffer_size * 2;
-
- while (real_size < size) real_size *= 2;
- buf = (char *) xmalloc (real_size);
- xfree (conversion_buffer);
- conversion_buffer = buf;
- conversion_buffer_size = real_size;
+ buf->data = (unsigned char *) xrealloc (buf->data, buf->size * 2);
}
- return conversion_buffer;
+ buf->size *= 2;
+}
+
+/* Free the allocated memory for BUF if it is not on stack. */
+static void
+free_conversion_buffer (buf)
+ struct conversion_buffer *buf;
+{
+ if (!buf->on_stack)
+ xfree (buf->data);
}
int
ccl->last_block = coding->mode & CODING_MODE_LAST_BLOCK;
if (encodep)
ccl->eol_type = coding->eol_type;
+ ccl->multibyte = coding->src_multibyte;
coding->produced = ccl_driver (ccl, source, destination,
src_bytes, dst_bytes, &(coding->consumed));
if (encodep)
switch (ccl->status)
{
case CCL_STAT_SUSPEND_BY_SRC:
- result = CODING_FINISH_INSUFFICIENT_SRC;
+ coding->result = CODING_FINISH_INSUFFICIENT_SRC;
break;
case CCL_STAT_SUSPEND_BY_DST:
- result = CODING_FINISH_INSUFFICIENT_DST;
+ coding->result = CODING_FINISH_INSUFFICIENT_DST;
break;
case CCL_STAT_QUIT:
case CCL_STAT_INVALID_CMD:
- result = CODING_FINISH_INTERRUPT;
+ coding->result = CODING_FINISH_INTERRUPT;
break;
default:
- result = CODING_FINISH_NORMAL;
+ coding->result = CODING_FINISH_NORMAL;
break;
}
- return result;
+ return coding->result;
}
/* Decode EOL format of the text at PTR of BYTES length destructively
unsigned char *dst = destination + coding->produced;
src_bytes -= coding->consumed;
- coding->errors++;
+ coding->errors++;
if (COMPOSING_P (coding))
DECODE_COMPOSITION_END ('1');
while (src_bytes--)
}
coding->consumed = coding->consumed_char = src - source;
coding->produced = dst - destination;
+ coding->result = CODING_FINISH_NORMAL;
}
if (!coding->dst_multibyte)
encode_eol (coding, source, destination, src_bytes, dst_bytes);
}
- if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
- && coding->consumed == src_bytes)
- coding->result = CODING_FINISH_NORMAL;
-
- if (coding->mode & CODING_MODE_LAST_BLOCK)
+ if (coding->mode & CODING_MODE_LAST_BLOCK
+ && coding->result == CODING_FINISH_INSUFFICIENT_SRC)
{
unsigned char *src = source + coding->consumed;
unsigned char *src_end = src + src_bytes;
coding->consumed = src_bytes;
}
coding->produced = coding->produced_char = dst - destination;
+ coding->result = CODING_FINISH_NORMAL;
}
+ if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
+ && coding->consumed == src_bytes)
+ coding->result = CODING_FINISH_NORMAL;
+
return coding->result;
}
{
int i;
- for (i = 0; i < cmp_data->used; i += cmp_data->data[i])
+ for (i = 0; i < cmp_data->used && cmp_data->data[i] > 0;
+ i += cmp_data->data[i])
{
int *data = cmp_data->data + i;
enum composition_method method = (enum composition_method) data[3];
if (replace)
{
int saved_from = from;
+ int saved_inhibit_modification_hooks;
prepare_to_modify_buffer (from, to, &from);
if (saved_from != from)
from_byte = CHAR_TO_BYTE (from), to_byte = CHAR_TO_BYTE (to);
len_byte = to_byte - from_byte;
}
+
+ /* The code conversion routine can not preserve text properties
+ for now. So, we must remove all text properties in the
+ region. Here, we must suppress all modification hooks. */
+ saved_inhibit_modification_hooks = inhibit_modification_hooks;
+ inhibit_modification_hooks = 1;
+ Fset_text_properties (make_number (from), make_number (to), Qnil, Qnil);
+ inhibit_modification_hooks = saved_inhibit_modification_hooks;
}
if (! encodep && CODING_REQUIRE_DETECTION (coding))
{
detect_coding (coding, BYTE_POS_ADDR (from_byte), len_byte);
if (coding->type == coding_type_undecided)
- /* It seems that the text contains only ASCII, but we
- should not left it undecided because the deeper
- decoding routine (decode_coding) tries to detect the
- encodings again in vain. */
- coding->type = coding_type_emacs_mule;
+ {
+ /* It seems that the text contains only ASCII, but we
+ should not left it undecided because the deeper
+ decoding routine (decode_coding) tries to detect the
+ encodings again in vain. */
+ coding->type = coding_type_emacs_mule;
+ coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
+ }
}
if (coding->eol_type == CODING_EOL_UNDECIDED
&& coding->type != coding_type_ccl)
}
/* Try to skip the heading and tailing ASCIIs. */
- {
- int from_byte_orig = from_byte, to_byte_orig = to_byte;
-
- if (from < GPT && GPT < to)
- move_gap_both (from, from_byte);
- SHRINK_CONVERSION_REGION (&from_byte, &to_byte, coding, NULL, encodep);
- if (from_byte == to_byte
- && (encodep || NILP (coding->post_read_conversion))
- && ! CODING_REQUIRE_FLUSHING (coding))
- {
- coding->produced = len_byte;
- coding->produced_char = len;
- if (!replace)
- /* We must record and adjust for this new text now. */
- adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len);
- return 0;
- }
+ if (coding->type != coding_type_ccl)
+ {
+ int from_byte_orig = from_byte, to_byte_orig = to_byte;
- head_skip = from_byte - from_byte_orig;
- tail_skip = to_byte_orig - to_byte;
- total_skip = head_skip + tail_skip;
- from += head_skip;
- to -= tail_skip;
- len -= total_skip; len_byte -= total_skip;
- }
+ if (from < GPT && GPT < to)
+ move_gap_both (from, from_byte);
+ SHRINK_CONVERSION_REGION (&from_byte, &to_byte, coding, NULL, encodep);
+ if (from_byte == to_byte
+ && (encodep || NILP (coding->post_read_conversion))
+ && ! CODING_REQUIRE_FLUSHING (coding))
+ {
+ coding->produced = len_byte;
+ coding->produced_char = len;
+ if (!replace)
+ /* We must record and adjust for this new text now. */
+ adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len);
+ return 0;
+ }
- /* The code conversion routine can not preserve text properties for
- now. So, we must remove all text properties in the region.
- Here, we must suppress all modification hooks. */
- if (replace)
- {
- int saved_inhibit_modification_hooks = inhibit_modification_hooks;
- inhibit_modification_hooks = 1;
- Fset_text_properties (make_number (from), make_number (to), Qnil, Qnil);
- inhibit_modification_hooks = saved_inhibit_modification_hooks;
+ head_skip = from_byte - from_byte_orig;
+ tail_skip = to_byte_orig - to_byte;
+ total_skip = head_skip + tail_skip;
+ from += head_skip;
+ to -= tail_skip;
+ len -= total_skip; len_byte -= total_skip;
}
/* For converion, we must put the gap before the text in addition to
call1 (coding->post_read_conversion, make_number (Z - BEG));
}
inhibit_pre_post_conversion = 0;
- str = make_buffer_string (BEG, Z, 0);
+ str = make_buffer_string (BEG, Z, 1);
return unbind_to (count, str);
}
int nocopy;
{
int len;
- char *buf;
+ struct conversion_buffer buf;
int from, to, to_byte;
struct gcpro gcpro1;
Lisp_Object saved_coding_symbol;
int result;
+ int require_decoding;
+ int shrinked_bytes = 0;
+ Lisp_Object newstr;
+ int consumed, consumed_char, produced, produced_char;
from = 0;
to = XSTRING (str)->size;
}
}
- if (! CODING_REQUIRE_DECODING (coding))
- {
- if (!STRING_MULTIBYTE (str))
- {
- str = Fstring_as_multibyte (str);
- nocopy = 1;
- }
- return (nocopy ? str : Fcopy_sequence (str));
- }
+ require_decoding = CODING_REQUIRE_DECODING (coding);
if (STRING_MULTIBYTE (str))
{
str = Fstring_as_unibyte (str);
to_byte = STRING_BYTES (XSTRING (str));
nocopy = 1;
- coding->src_multibyte = 0;
}
- coding->dst_multibyte = 1;
-
- if (coding->composing != COMPOSITION_DISABLED)
- coding_allocate_composition_data (coding, from);
+ coding->src_multibyte = 0;
+ coding->dst_multibyte = (coding->type != coding_type_no_conversion
+ && coding->type != coding_type_raw_text);
/* Try to skip the heading and tailing ASCIIs. */
- {
- int from_orig = from;
+ if (require_decoding && coding->type != coding_type_ccl)
+ {
+ SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data,
+ 0);
+ if (from == to_byte)
+ require_decoding = 0;
+ shrinked_bytes = from + (STRING_BYTES (XSTRING (str)) - to_byte);
+ }
- SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data,
- 0);
- if (from == to_byte)
+ if (!require_decoding)
+ {
+ coding->consumed = STRING_BYTES (XSTRING (str));
+ coding->consumed_char = XSTRING (str)->size;
+ if (coding->dst_multibyte)
+ {
+ str = Fstring_as_multibyte (str);
+ nocopy = 1;
+ }
+ coding->produced = STRING_BYTES (XSTRING (str));
+ coding->produced_char = XSTRING (str)->size;
return (nocopy ? str : Fcopy_sequence (str));
- }
+ }
+ if (coding->composing != COMPOSITION_DISABLED)
+ coding_allocate_composition_data (coding, from);
len = decoding_buffer_size (coding, to_byte - from);
- len += from + STRING_BYTES (XSTRING (str)) - to_byte;
- GCPRO1 (str);
- buf = get_conversion_buffer (len);
- UNGCPRO;
+ allocate_conversion_buffer (buf, len);
- if (from > 0)
- bcopy (XSTRING (str)->data, buf, from);
- result = decode_coding (coding, XSTRING (str)->data + from,
- buf + from, to_byte - from, len);
- if (result == CODING_FINISH_INCONSISTENT_EOL)
+ consumed = consumed_char = produced = produced_char = 0;
+ while (1)
{
- /* We simply try to decode the whole string again but without
- eol-conversion this time. */
- coding->eol_type = CODING_EOL_LF;
- coding->symbol = saved_coding_symbol;
- coding_free_composition_data (coding);
- return decode_coding_string (str, coding, nocopy);
+ result = decode_coding (coding, XSTRING (str)->data + from + consumed,
+ buf.data + produced, to_byte - from - consumed,
+ buf.size - produced);
+ consumed += coding->consumed;
+ consumed_char += coding->consumed_char;
+ produced += coding->produced;
+ produced_char += coding->produced_char;
+ if (result == CODING_FINISH_NORMAL
+ || (result == CODING_FINISH_INSUFFICIENT_SRC
+ && coding->consumed == 0))
+ break;
+ if (result == CODING_FINISH_INSUFFICIENT_CMP)
+ coding_allocate_composition_data (coding, from + produced_char);
+ else if (result == CODING_FINISH_INSUFFICIENT_DST)
+ extend_conversion_buffer (&buf);
+ else if (result == CODING_FINISH_INCONSISTENT_EOL)
+ {
+ /* Recover the original EOL format. */
+ if (coding->eol_type == CODING_EOL_CR)
+ {
+ unsigned char *p;
+ for (p = buf.data; p < buf.data + produced; p++)
+ if (*p == '\n') *p = '\r';
+ }
+ else if (coding->eol_type == CODING_EOL_CRLF)
+ {
+ int num_eol = 0;
+ unsigned char *p0, *p1;
+ for (p0 = buf.data, p1 = p0 + produced; p0 < p1; p0++)
+ if (*p0 == '\n') num_eol++;
+ if (produced + num_eol >= buf.size)
+ extend_conversion_buffer (&buf);
+ for (p0 = buf.data + produced, p1 = p0 + num_eol; p0 > buf.data;)
+ {
+ *--p1 = *--p0;
+ if (*p0 == '\n') *--p1 = '\r';
+ }
+ produced += num_eol;
+ produced_char += num_eol;
+ }
+ coding->eol_type = CODING_EOL_LF;
+ coding->symbol = saved_coding_symbol;
+ }
}
- bcopy (XSTRING (str)->data + to_byte, buf + from + coding->produced,
- STRING_BYTES (XSTRING (str)) - to_byte);
+ coding->consumed = consumed;
+ coding->consumed_char = consumed_char;
+ coding->produced = produced;
+ coding->produced_char = produced_char;
- len = from + STRING_BYTES (XSTRING (str)) - to_byte;
- str = make_multibyte_string (buf, len + coding->produced_char,
- len + coding->produced);
+ if (coding->dst_multibyte)
+ newstr = make_uninit_multibyte_string (produced_char + shrinked_bytes,
+ produced + shrinked_bytes);
+ else
+ newstr = make_uninit_string (produced + shrinked_bytes);
+ if (from > 0)
+ bcopy (XSTRING (str)->data, XSTRING (newstr)->data, from);
+ bcopy (buf.data, XSTRING (newstr)->data + from, produced);
+ if (shrinked_bytes > from)
+ bcopy (XSTRING (str)->data + to_byte,
+ XSTRING (newstr)->data + from + produced,
+ shrinked_bytes - from);
+ free_conversion_buffer (&buf);
if (coding->cmp_data && coding->cmp_data->used)
- coding_restore_composition (coding, str);
+ coding_restore_composition (coding, newstr);
coding_free_composition_data (coding);
if (SYMBOLP (coding->post_read_conversion)
&& !NILP (Ffboundp (coding->post_read_conversion)))
- str = run_pre_post_conversion_on_str (str, coding, 0);
+ newstr = run_pre_post_conversion_on_str (newstr, coding, 0);
- return str;
+ return newstr;
}
Lisp_Object
int nocopy;
{
int len;
- char *buf;
+ struct conversion_buffer buf;
int from, to, to_byte;
struct gcpro gcpro1;
Lisp_Object saved_coding_symbol;
int result;
+ int shrinked_bytes = 0;
+ Lisp_Object newstr;
+ int consumed, consumed_char, produced, produced_char;
if (SYMBOLP (coding->pre_write_conversion)
&& !NILP (Ffboundp (coding->pre_write_conversion)))
saved_coding_symbol = Qnil;
if (! CODING_REQUIRE_ENCODING (coding))
{
+ coding->consumed = STRING_BYTES (XSTRING (str));
+ coding->consumed_char = XSTRING (str)->size;
if (STRING_MULTIBYTE (str))
{
str = Fstring_as_unibyte (str);
nocopy = 1;
}
+ coding->produced = STRING_BYTES (XSTRING (str));
+ coding->produced_char = XSTRING (str)->size;
return (nocopy ? str : Fcopy_sequence (str));
}
coding_save_composition (coding, from, to, str);
/* Try to skip the heading and tailing ASCIIs. */
- {
- int from_orig = from;
-
- SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data,
- 1);
- if (from == to_byte)
- return (nocopy ? str : Fcopy_sequence (str));
- }
+ if (coding->type != coding_type_ccl)
+ {
+ SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data,
+ 1);
+ if (from == to_byte)
+ return (nocopy ? str : Fcopy_sequence (str));
+ shrinked_bytes = from + (STRING_BYTES (XSTRING (str)) - to_byte);
+ }
len = encoding_buffer_size (coding, to_byte - from);
- len += from + STRING_BYTES (XSTRING (str)) - to_byte;
- GCPRO1 (str);
- buf = get_conversion_buffer (len);
- UNGCPRO;
+ allocate_conversion_buffer (buf, len);
+ consumed = consumed_char = produced = produced_char = 0;
+ while (1)
+ {
+ result = encode_coding (coding, XSTRING (str)->data + from + consumed,
+ buf.data + produced, to_byte - from - consumed,
+ buf.size - produced);
+ consumed += coding->consumed;
+ consumed_char += coding->consumed_char;
+ produced += coding->produced;
+ produced_char += coding->produced_char;
+ if (result == CODING_FINISH_NORMAL
+ || (result == CODING_FINISH_INSUFFICIENT_SRC
+ && coding->consumed == 0))
+ break;
+ /* Now result should be CODING_FINISH_INSUFFICIENT_DST. */
+ extend_conversion_buffer (&buf);
+ }
+
+ coding->consumed = consumed;
+ coding->consumed_char = consumed_char;
+ coding->produced = produced;
+ coding->produced_char = produced_char;
+
+ newstr = make_uninit_string (produced + shrinked_bytes);
if (from > 0)
- bcopy (XSTRING (str)->data, buf, from);
- result = encode_coding (coding, XSTRING (str)->data + from,
- buf + from, to_byte - from, len);
- bcopy (XSTRING (str)->data + to_byte, buf + from + coding->produced,
- STRING_BYTES (XSTRING (str)) - to_byte);
-
- len = from + STRING_BYTES (XSTRING (str)) - to_byte;
- str = make_unibyte_string (buf, len + coding->produced);
+ bcopy (XSTRING (str)->data, XSTRING (newstr)->data, from);
+ bcopy (buf.data, XSTRING (newstr)->data + from, produced);
+ if (shrinked_bytes > from)
+ bcopy (XSTRING (str)->data + to_byte,
+ XSTRING (newstr)->data + from + produced,
+ shrinked_bytes - from);
+
+ free_conversion_buffer (&buf);
coding_free_composition_data (coding);
- return str;
+ return newstr;
}
\f
!NILP (highest));
}
+/* Return an intersection of lists L1 and L2. */
+
+static Lisp_Object
+intersection (l1, l2)
+ Lisp_Object l1, l2;
+{
+ Lisp_Object val;
+
+ for (val = Qnil; CONSP (l1); l1 = XCDR (l1))
+ {
+ if (!NILP (Fmemq (XCAR (l1), l2)))
+ val = Fcons (XCAR (l1), val);
+ }
+ return val;
+}
+
+
+/* Subroutine for Fsafe_coding_systems_region_internal.
+
+ Return a list of coding systems that safely encode the multibyte
+ text between P and PEND. SAFE_CODINGS, if non-nil, is a list of
+ possible coding systems. If it is nil, it means that we have not
+ yet found any coding systems.
+
+ WORK_TABLE is a copy of the char-table Vchar_coding_system_table. An
+ element of WORK_TABLE is set to t once the element is looked up.
+
+ If a non-ASCII single byte char is found, set
+ *single_byte_char_found to 1. */
+
+static Lisp_Object
+find_safe_codings (p, pend, safe_codings, work_table, single_byte_char_found)
+ unsigned char *p, *pend;
+ Lisp_Object safe_codings, work_table;
+ int *single_byte_char_found;
+{
+ int c, len, idx;
+ Lisp_Object val;
+
+ while (p < pend)
+ {
+ c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
+ p += len;
+ if (ASCII_BYTE_P (c))
+ /* We can ignore ASCII characters here. */
+ continue;
+ if (SINGLE_BYTE_CHAR_P (c))
+ *single_byte_char_found = 1;
+ if (NILP (safe_codings))
+ continue;
+ /* Check the safe coding systems for C. */
+ val = char_table_ref_and_index (work_table, c, &idx);
+ if (EQ (val, Qt))
+ /* This element was already checked. Ignore it. */
+ continue;
+ /* Remember that we checked this element. */
+ CHAR_TABLE_SET (work_table, make_number (idx), Qt);
+
+ /* If there are some safe coding systems for C and we have
+ already found the other set of coding systems for the
+ different characters, get the intersection of them. */
+ if (!EQ (safe_codings, Qt) && !NILP (val))
+ val = intersection (safe_codings, val);
+ safe_codings = val;
+ }
+ return safe_codings;
+}
+
+
+/* Return a list of coding systems that safely encode the text between
+ START and END. If the text contains only ASCII or is unibyte,
+ return t. */
+
+DEFUN ("find-coding-systems-region-internal",
+ Ffind_coding_systems_region_internal,
+ Sfind_coding_systems_region_internal, 2, 2, 0,
+ "Internal use only.")
+ (start, end)
+ Lisp_Object start, end;
+{
+ Lisp_Object work_table, safe_codings;
+ int non_ascii_p = 0;
+ int single_byte_char_found = 0;
+ unsigned char *p1, *p1end, *p2, *p2end, *p;
+ Lisp_Object args[2];
+
+ if (STRINGP (start))
+ {
+ if (!STRING_MULTIBYTE (start))
+ return Qt;
+ p1 = XSTRING (start)->data, p1end = p1 + STRING_BYTES (XSTRING (start));
+ p2 = p2end = p1end;
+ if (XSTRING (start)->size != STRING_BYTES (XSTRING (start)))
+ non_ascii_p = 1;
+ }
+ else
+ {
+ int from, to, stop;
+
+ CHECK_NUMBER_COERCE_MARKER (start, 0);
+ CHECK_NUMBER_COERCE_MARKER (end, 1);
+ if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
+ args_out_of_range (start, end);
+ if (NILP (current_buffer->enable_multibyte_characters))
+ return Qt;
+ from = CHAR_TO_BYTE (XINT (start));
+ to = CHAR_TO_BYTE (XINT (end));
+ stop = from < GPT_BYTE && GPT_BYTE < to ? GPT_BYTE : to;
+ p1 = BYTE_POS_ADDR (from), p1end = p1 + (stop - from);
+ if (stop == to)
+ p2 = p2end = p1end;
+ else
+ p2 = BYTE_POS_ADDR (stop), p2end = p2 + (to - stop);
+ if (XINT (end) - XINT (start) != to - from)
+ non_ascii_p = 1;
+ }
+
+ if (!non_ascii_p)
+ {
+ /* We are sure that the text contains no multibyte character.
+ Check if it contains eight-bit-graphic. */
+ p = p1;
+ for (p = p1; p < p1end && ASCII_BYTE_P (*p); p++);
+ if (p == p1end)
+ {
+ for (p = p2; p < p2end && ASCII_BYTE_P (*p); p++);
+ if (p == p2end)
+ return Qt;
+ }
+ }
+
+ /* The text contains non-ASCII characters. */
+ work_table = Fcopy_sequence (Vchar_coding_system_table);
+ safe_codings = find_safe_codings (p1, p1end, Qt, work_table,
+ &single_byte_char_found);
+ if (p2 < p2end)
+ safe_codings = find_safe_codings (p2, p2end, safe_codings, work_table,
+ &single_byte_char_found);
+
+ if (!single_byte_char_found)
+ {
+ /* Append generic coding systems. */
+ Lisp_Object args[2];
+ args[0] = safe_codings;
+ args[1] = Fchar_table_extra_slot (Vchar_coding_system_table,
+ make_number (0));
+ safe_codings = Fappend (2, args);
+ }
+ else
+ safe_codings = Fcons (Qraw_text, Fcons (Qemacs_mule, safe_codings));
+ return safe_codings;
+}
+
+
Lisp_Object
code_convert_region1 (start, end, coding_system, encodep)
Lisp_Object start, end, coding_system;
\f
/*** 9. Post-amble ***/
-void
-init_coding ()
-{
- conversion_buffer = (char *) xmalloc (MINIMUM_CONVERSION_BUFFER_SIZE);
-}
-
void
init_coding_once ()
{
iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
- conversion_buffer_size = MINIMUM_CONVERSION_BUFFER_SIZE;
-
setup_coding_system (Qnil, &keyboard_coding);
setup_coding_system (Qnil, &terminal_coding);
setup_coding_system (Qnil, &safe_terminal_coding);
Qtranslation_table_for_encode = intern ("translation-table-for-encode");
staticpro (&Qtranslation_table_for_encode);
- Qsafe_charsets = intern ("safe-charsets");
- staticpro (&Qsafe_charsets);
+ Qsafe_chars = intern ("safe-chars");
+ staticpro (&Qsafe_chars);
+
+ Qchar_coding_system = intern ("char-coding-system");
+ staticpro (&Qchar_coding_system);
+
+ /* Intern this now in case it isn't already done.
+ Setting this variable twice is harmless.
+ But don't staticpro it here--that is done in alloc.c. */
+ Qchar_table_extra_slots = intern ("char-table-extra-slots");
+ Fput (Qsafe_chars, Qchar_table_extra_slots, make_number (0));
+ Fput (Qchar_coding_system, Qchar_table_extra_slots, make_number (1));
Qvalid_codes = intern ("valid-codes");
staticpro (&Qvalid_codes);
defsubr (&Scheck_coding_system);
defsubr (&Sdetect_coding_region);
defsubr (&Sdetect_coding_string);
+ defsubr (&Sfind_coding_systems_region_internal);
defsubr (&Sdecode_coding_region);
defsubr (&Sencode_coding_region);
defsubr (&Sdecode_coding_string);
The default value is `select-safe-coding-system' (which see).");
Vselect_safe_coding_system_function = Qnil;
+ DEFVAR_LISP ("char-coding-system-table", &Vchar_coding_system_table,
+ "Char-table containing safe coding systems of each characters.\n\
+Each element doesn't include such generic coding systems that can\n\
+encode any characters. They are in the first extra slot.");
+ Vchar_coding_system_table = Fmake_char_table (Qchar_coding_system, Qnil);
+
+ DEFVAR_BOOL ("inhibit-iso-escape-detection",
+ &inhibit_iso_escape_detection,
+ "If non-nil, Emacs ignores ISO2022's escape sequence on code detection.\n\
+\n\
+By default, on reading a file, Emacs tries to detect how the text is\n\
+encoded. This code detection is sensitive to escape sequences. If\n\
+the sequence is valid as ISO2022, the code is determined as one of\n\
+the ISO2022 encodings, and the file is decoded by the corresponding\n\
+coding system (e.g. `iso-2022-7bit').\n\
+\n\
+However, there may be a case that you want to read escape sequences in\n\
+a file as is. In such a case, you can set this variable to non-nil.\n\
+Then, as the code detection ignores any escape sequences, no file is\n\
+detected as encoded in some ISO2022 encoding. The result is that all\n\
+escape sequences become visible in a buffer.\n\
+\n\
+The default value is nil, and it is strongly recommended not to change\n\
+it. That is because many Emacs Lisp source files that contain\n\
+non-ASCII characters are encoded by the coding system `iso-2022-7bit'\n\
+in Emacs's distribution, and they won't be decoded correctly on\n\
+reading if you suppress escape sequence detection.\n\
+\n\
+The other way to read escape sequences in a file without decoding is\n\
+to explicitly specify some coding system that doesn't use ISO2022's\n\
+escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].");
+ inhibit_iso_escape_detection = 0;
}
char *