#include <config.h>
#include <stdio.h>
+#ifdef HAVE_WCHAR_H
+#include <wchar.h>
+#endif /* HAVE_WCHAR_H */
+
#include "lisp.h"
#include "character.h"
#include "buffer.h"
Lisp_Object Qstart_process, Qopen_network_stream;
static Lisp_Object Qtarget_idx;
-static Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source;
-static Lisp_Object Qinterrupted, Qinsufficient_memory;
+static Lisp_Object Qinsufficient_source, Qinvalid_source, Qinterrupted;
/* If a symbol has this property, evaluate the value to define the
symbol as a coding system. */
#define max(a, b) ((a) > (b) ? (a) : (b))
#endif
+/* Encode a flag that can be nil, something else, or t as -1, 0, 1. */
+
+static int
+encode_inhibit_flag (Lisp_Object flag)
+{
+ return NILP (flag) ? -1 : EQ (flag, Qt);
+}
+
+/* True if the value of ENCODED_FLAG says a flag should be treated as set.
+ 1 means yes, -1 means no, 0 means ask the user variable VAR. */
+
+static bool
+inhibit_flag (int encoded_flag, bool var)
+{
+ return 0 < encoded_flag + var;
+}
+
#define CODING_GET_INFO(coding, attrs, charset_list) \
do { \
(attrs) = CODING_ID_ATTRS ((coding)->id); \
(charset_list) = CODING_ATTR_CHARSET_LIST (attrs); \
} while (0)
+static void
+CHECK_NATNUM_CAR (Lisp_Object x)
+{
+ Lisp_Object tmp = XCAR (x);
+ CHECK_NATNUM (tmp);
+ XSETCAR (x, tmp);
+}
+
+static void
+CHECK_NATNUM_CDR (Lisp_Object x)
+{
+ Lisp_Object tmp = XCDR (x);
+ CHECK_NATNUM (tmp);
+ XSETCDR (x, tmp);
+}
+
/* Safely get one byte from the source text pointed by SRC which ends
at SRC_END, and set C to that byte. If there are not enough bytes
case CODING_RESULT_INSUFFICIENT_SRC:
Vlast_code_conversion_error = Qinsufficient_source;
break;
- case CODING_RESULT_INCONSISTENT_EOL:
- Vlast_code_conversion_error = Qinconsistent_eol;
- break;
case CODING_RESULT_INVALID_SRC:
Vlast_code_conversion_error = Qinvalid_source;
break;
case CODING_RESULT_INTERRUPT:
Vlast_code_conversion_error = Qinterrupted;
break;
- case CODING_RESULT_INSUFFICIENT_MEM:
- Vlast_code_conversion_error = Qinsufficient_memory;
- break;
case CODING_RESULT_INSUFFICIENT_DST:
/* Don't record this error in Vlast_code_conversion_error
because it happens just temporarily and is resolved when the
GPT -= gap_head_used, GPT_BYTE -= gap_head_used;
}
else
- {
- Lisp_Object this_buffer;
-
- this_buffer = Fcurrent_buffer ();
- set_buffer_internal (XBUFFER (coding->dst_object));
- make_gap (bytes);
- set_buffer_internal (XBUFFER (this_buffer));
- }
+ make_gap_1 (XBUFFER (coding->dst_object), bytes);
}
*buf++ = id; \
} while (0)
+
+/* Bitmasks for coding->eol_seen. */
+
+#define EOL_SEEN_NONE 0
+#define EOL_SEEN_LF 1
+#define EOL_SEEN_CR 2
+#define EOL_SEEN_CRLF 4
+
\f
/*** 2. Emacs' internal format (emacs-utf-8) ***/
#define UTF_8_BOM_2 0xBB
#define UTF_8_BOM_3 0xBF
+/* Unlike the other detect_coding_XXX, this function counts number of
+ characters and check EOL format. */
+
static bool
detect_coding_utf_8 (struct coding_system *coding,
struct coding_detection_info *detect_info)
bool multibytep = coding->src_multibyte;
ptrdiff_t consumed_chars = 0;
bool bom_found = 0;
- bool found = 0;
+ int nchars = coding->head_ascii;
+ int eol_seen = coding->eol_seen;
detect_info->checked |= CATEGORY_MASK_UTF_8;
/* A coding system of this category is always ASCII compatible. */
- src += coding->head_ascii;
+ src += nchars;
+
+ if (src == coding->source /* BOM should be at the head. */
+ && src + 3 < src_end /* BOM is 3-byte long. */
+ && src[0] == UTF_8_BOM_1
+ && src[1] == UTF_8_BOM_2
+ && src[2] == UTF_8_BOM_3)
+ {
+ bom_found = 1;
+ src += 3;
+ nchars++;
+ }
while (1)
{
src_base = src;
ONE_MORE_BYTE (c);
if (c < 0 || UTF_8_1_OCTET_P (c))
- continue;
+ {
+ nchars++;
+ if (c == '\r')
+ {
+ if (src < src_end && *src == '\n')
+ {
+ eol_seen |= EOL_SEEN_CRLF;
+ src++;
+ nchars++;
+ }
+ else
+ eol_seen |= EOL_SEEN_CR;
+ }
+ else if (c == '\n')
+ eol_seen |= EOL_SEEN_LF;
+ continue;
+ }
ONE_MORE_BYTE (c1);
if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
break;
if (UTF_8_2_OCTET_LEADING_P (c))
{
- found = 1;
+ nchars++;
continue;
}
ONE_MORE_BYTE (c2);
break;
if (UTF_8_3_OCTET_LEADING_P (c))
{
- found = 1;
- if (src_base == coding->source
- && c == UTF_8_BOM_1 && c1 == UTF_8_BOM_2 && c2 == UTF_8_BOM_3)
- bom_found = 1;
+ nchars++;
continue;
}
ONE_MORE_BYTE (c3);
break;
if (UTF_8_4_OCTET_LEADING_P (c))
{
- found = 1;
+ nchars++;
continue;
}
ONE_MORE_BYTE (c4);
break;
if (UTF_8_5_OCTET_LEADING_P (c))
{
- found = 1;
+ nchars++;
continue;
}
break;
if (bom_found)
{
/* The first character 0xFFFE doesn't necessarily mean a BOM. */
- detect_info->found |= CATEGORY_MASK_UTF_8_SIG | CATEGORY_MASK_UTF_8_NOSIG;
+ detect_info->found |= CATEGORY_MASK_UTF_8_AUTO | CATEGORY_MASK_UTF_8_SIG | CATEGORY_MASK_UTF_8_NOSIG;
}
else
{
detect_info->rejected |= CATEGORY_MASK_UTF_8_SIG;
- if (found)
- detect_info->found |= CATEGORY_MASK_UTF_8_NOSIG;
+ if (nchars < src_end - coding->source)
+ /* The found characters are less than source bytes, which
+ means that we found a valid non-ASCII characters. */
+ detect_info->found |= CATEGORY_MASK_UTF_8_AUTO | CATEGORY_MASK_UTF_8_NOSIG;
}
+ coding->detected_utf8_chars = nchars;
return 1;
}
}
if (single_shifting)
break;
- check_extra_latin:
- if (! VECTORP (Vlatin_extra_code_table)
- || NILP (AREF (Vlatin_extra_code_table, c)))
- {
- rejected = CATEGORY_MASK_ISO;
- break;
- }
- if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
- & CODING_ISO_FLAG_LATIN_EXTRA)
- found |= CATEGORY_MASK_ISO_8_1;
- else
- rejected |= CATEGORY_MASK_ISO_8_1;
- rejected |= CATEGORY_MASK_ISO_8_2;
- break;
+ goto check_extra_latin;
default:
if (c < 0)
}
break;
}
+ check_extra_latin:
+ if (! VECTORP (Vlatin_extra_code_table)
+ || NILP (AREF (Vlatin_extra_code_table, c)))
+ {
+ rejected = CATEGORY_MASK_ISO;
+ break;
+ }
+ if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
+ & CODING_ISO_FLAG_LATIN_EXTRA)
+ found |= CATEGORY_MASK_ISO_8_1;
+ else
+ rejected |= CATEGORY_MASK_ISO_8_1;
+ rejected |= CATEGORY_MASK_ISO_8_2;
+ break;
}
}
detect_info->rejected |= CATEGORY_MASK_ISO;
*charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
char_offset++;
coding->errors++;
+ /* Reset the invocation and designation status to the safest
+ one; i.e. designate ASCII to the graphic register 0, and
+ invoke that register to the graphic plane 0. This typically
+ helps the case that an designation sequence for ASCII "ESC (
+ B" is somehow broken (e.g. broken by a newline). */
+ CODING_ISO_INVOCATION (coding, 0) = 0;
+ CODING_ISO_DESIGNATION (coding, 0) = charset_ascii;
+ charset_id_0 = charset_ascii;
continue;
break_loop:
eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
coding->mode = 0;
- coding->head_ascii = -1;
if (VECTORP (eol_type))
coding->common_flags = (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_DETECTION_MASK);
coding->decoder = decode_coding_raw_text;
coding->encoder = encode_coding_raw_text;
coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
+ coding->spec.undecided.inhibit_nbd
+ = (encode_inhibit_flag
+ (AREF (attrs, coding_attr_undecided_inhibit_null_byte_detection)));
+ coding->spec.undecided.inhibit_ied
+ = (encode_inhibit_flag
+ (AREF (attrs, coding_attr_undecided_inhibit_iso_escape_detection)));
+ coding->spec.undecided.prefer_utf_8
+ = ! NILP (AREF (attrs, coding_attr_undecided_prefer_utf_8));
}
else if (EQ (coding_type, Qiso_2022))
{
*/
-#define EOL_SEEN_NONE 0
-#define EOL_SEEN_LF 1
-#define EOL_SEEN_CR 2
-#define EOL_SEEN_CRLF 4
+static Lisp_Object adjust_coding_eol_type (struct coding_system *coding,
+ int eol_seen);
+
+
+/* Return the number of ASCII characters at the head of the source.
+ By side effects, set coding->head_ascii and update
+ coding->eol_seen. The value of coding->eol_seen is "logical or" of
+ EOL_SEEN_LF, EOL_SEEN_CR, and EOL_SEEN_CRLF, but the value is
+ reliable only when all the source bytes are ASCII. */
+
+static int
+check_ascii (struct coding_system *coding)
+{
+ const unsigned char *src, *end;
+ Lisp_Object eol_type = CODING_ID_EOL_TYPE (coding->id);
+ int eol_seen = coding->eol_seen;
+
+ coding_set_source (coding);
+ src = coding->source;
+ end = src + coding->src_bytes;
+
+ if (inhibit_eol_conversion
+ || SYMBOLP (eol_type))
+ {
+ /* We don't have to check EOL format. */
+ while (src < end && !( *src & 0x80))
+ {
+ if (*src++ == '\n')
+ eol_seen |= EOL_SEEN_LF;
+ }
+ }
+ else
+ {
+ end--; /* We look ahead one byte for "CR LF". */
+ while (src < end)
+ {
+ int c = *src;
+
+ if (c & 0x80)
+ break;
+ src++;
+ if (c == '\r')
+ {
+ if (*src == '\n')
+ {
+ eol_seen |= EOL_SEEN_CRLF;
+ src++;
+ }
+ else
+ eol_seen |= EOL_SEEN_CR;
+ }
+ else if (c == '\n')
+ eol_seen |= EOL_SEEN_LF;
+ }
+ if (src == end)
+ {
+ int c = *src;
+
+ /* All bytes but the last one C are ASCII. */
+ if (! (c & 0x80))
+ {
+ if (c == '\r')
+ eol_seen |= EOL_SEEN_CR;
+ else if (c == '\n')
+ eol_seen |= EOL_SEEN_LF;
+ src++;
+ }
+ }
+ }
+ coding->head_ascii = src - coding->source;
+ coding->eol_seen = eol_seen;
+ return (coding->head_ascii);
+}
+
+
+/* Return the number of characters at the source if all the bytes are
+ valid UTF-8 (of Unicode range). Otherwise, return -1. By side
+ effects, update coding->eol_seen. The value of coding->eol_seen is
+ "logical or" of EOL_SEEN_LF, EOL_SEEN_CR, and EOL_SEEN_CRLF, but
+ the value is reliable only when all the source bytes are valid
+ UTF-8. */
+
+static int
+check_utf_8 (struct coding_system *coding)
+{
+ const unsigned char *src, *end;
+ int eol_seen;
+ int nchars = coding->head_ascii;
+
+ if (coding->head_ascii < 0)
+ check_ascii (coding);
+ else
+ coding_set_source (coding);
+ src = coding->source + coding->head_ascii;
+ /* We look ahead one byte for CR LF. */
+ end = coding->source + coding->src_bytes - 1;
+ eol_seen = coding->eol_seen;
+ while (src < end)
+ {
+ int c = *src;
+
+ if (UTF_8_1_OCTET_P (*src))
+ {
+ src++;
+ if (c < 0x20)
+ {
+ if (c == '\r')
+ {
+ if (*src == '\n')
+ {
+ eol_seen |= EOL_SEEN_CRLF;
+ src++;
+ nchars++;
+ }
+ else
+ eol_seen |= EOL_SEEN_CR;
+ }
+ else if (c == '\n')
+ eol_seen |= EOL_SEEN_LF;
+ }
+ }
+ else if (UTF_8_2_OCTET_LEADING_P (c))
+ {
+ if (c < 0xC2 /* overlong sequence */
+ || src + 1 >= end
+ || ! UTF_8_EXTRA_OCTET_P (src[1]))
+ return -1;
+ src += 2;
+ }
+ else if (UTF_8_3_OCTET_LEADING_P (c))
+ {
+ if (src + 2 >= end
+ || ! (UTF_8_EXTRA_OCTET_P (src[1])
+ && UTF_8_EXTRA_OCTET_P (src[2])))
+ return -1;
+ c = (((c & 0xF) << 12)
+ | ((src[1] & 0x3F) << 6) | (src[2] & 0x3F));
+ if (c < 0x800 /* overlong sequence */
+ || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
+ return -1;
+ src += 3;
+ }
+ else if (UTF_8_4_OCTET_LEADING_P (c))
+ {
+ if (src + 3 >= end
+ || ! (UTF_8_EXTRA_OCTET_P (src[1])
+ && UTF_8_EXTRA_OCTET_P (src[2])
+ && UTF_8_EXTRA_OCTET_P (src[3])))
+ return -1;
+ c = (((c & 0x7) << 18) | ((src[1] & 0x3F) << 12)
+ | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F));
+ if (c < 0x10000 /* overlong sequence */
+ || c >= 0x110000) /* non-Unicode character */
+ return -1;
+ src += 4;
+ }
+ else
+ return -1;
+ nchars++;
+ }
+
+ if (src == end)
+ {
+ if (! UTF_8_1_OCTET_P (*src))
+ return -1;
+ nchars++;
+ if (*src == '\r')
+ eol_seen |= EOL_SEEN_CR;
+ else if (*src == '\n')
+ eol_seen |= EOL_SEEN_LF;
+ }
+ coding->eol_seen = eol_seen;
+ return nchars;
+}
+
/* Detect how end-of-line of a text of length SRC_BYTES pointed by
SOURCE is encoded. If CATEGORY is one of
Lisp_Object eol_type;
eol_type = CODING_ID_EOL_TYPE (coding->id);
+ if (! VECTORP (eol_type))
+ /* Already adjusted. */
+ return eol_type;
if (eol_seen & EOL_SEEN_LF)
{
coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
{
const unsigned char *src, *src_end;
unsigned int saved_mode = coding->mode;
+ Lisp_Object found = Qnil;
+ Lisp_Object eol_type = CODING_ID_EOL_TYPE (coding->id);
coding->consumed = coding->consumed_char = 0;
coding->produced = coding->produced_char = 0;
coding_set_source (coding);
src_end = coding->source + coding->src_bytes;
- coding->head_ascii = 0;
+ coding->eol_seen = EOL_SEEN_NONE;
/* If we have not yet decided the text encoding type, detect it
now. */
if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
int c, i;
struct coding_detection_info detect_info;
bool null_byte_found = 0, eight_bit_found = 0;
+ bool inhibit_nbd = inhibit_flag (coding->spec.undecided.inhibit_nbd,
+ inhibit_null_byte_detection);
+ bool inhibit_ied = inhibit_flag (coding->spec.undecided.inhibit_ied,
+ inhibit_iso_escape_detection);
+ bool prefer_utf_8 = coding->spec.undecided.prefer_utf_8;
+ coding->head_ascii = 0;
detect_info.checked = detect_info.found = detect_info.rejected = 0;
for (src = coding->source; src < src_end; src++)
{
else if (c < 0x20)
{
if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
- && ! inhibit_iso_escape_detection
+ && ! inhibit_ied
&& ! detect_info.checked)
{
if (detect_coding_iso_2022 (coding, &detect_info))
break;
}
}
- else if (! c && !inhibit_null_byte_detection)
+ else if (! c && !inhibit_nbd)
{
null_byte_found = 1;
if (eight_bit_found)
break;
}
+ else if (! disable_ascii_optimization
+ && ! inhibit_eol_conversion)
+ {
+ if (c == '\r')
+ {
+ if (src < src_end && src[1] == '\n')
+ {
+ coding->eol_seen |= EOL_SEEN_CRLF;
+ src++;
+ if (! eight_bit_found)
+ coding->head_ascii++;
+ }
+ else
+ coding->eol_seen |= EOL_SEEN_CR;
+ }
+ else if (c == '\n')
+ {
+ coding->eol_seen |= EOL_SEEN_LF;
+ }
+ }
+
if (! eight_bit_found)
coding->head_ascii++;
}
detect_info.checked |= ~CATEGORY_MASK_UTF_16;
detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
}
+ else if (prefer_utf_8
+ && detect_coding_utf_8 (coding, &detect_info))
+ {
+ detect_info.checked |= ~CATEGORY_MASK_UTF_8;
+ detect_info.rejected |= ~CATEGORY_MASK_UTF_8;
+ }
for (i = 0; i < coding_category_raw_text; i++)
{
category = coding_priorities[i];
}
else if ((*(this->detector)) (coding, &detect_info)
&& detect_info.found & (1 << category))
- {
- if (category == coding_category_utf_16_auto)
- {
- if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
- category = coding_category_utf_16_le;
- else
- category = coding_category_utf_16_be;
- }
- break;
- }
+ break;
}
}
if (i < coding_category_raw_text)
- setup_coding_system (CODING_ID_NAME (this->id), coding);
+ {
+ if (category == coding_category_utf_8_auto)
+ {
+ Lisp_Object coding_systems;
+
+ coding_systems = AREF (CODING_ID_ATTRS (this->id),
+ coding_attr_utf_bom);
+ if (CONSP (coding_systems))
+ {
+ if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
+ found = XCAR (coding_systems);
+ else
+ found = XCDR (coding_systems);
+ }
+ else
+ found = CODING_ID_NAME (this->id);
+ }
+ else if (category == coding_category_utf_16_auto)
+ {
+ Lisp_Object coding_systems;
+
+ coding_systems = AREF (CODING_ID_ATTRS (this->id),
+ coding_attr_utf_bom);
+ if (CONSP (coding_systems))
+ {
+ if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
+ found = XCAR (coding_systems);
+ else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
+ found = XCDR (coding_systems);
+ }
+ else
+ found = CODING_ID_NAME (this->id);
+ }
+ else
+ found = CODING_ID_NAME (this->id);
+ }
else if (null_byte_found)
- setup_coding_system (Qno_conversion, coding);
+ found = Qno_conversion;
else if ((detect_info.rejected & CATEGORY_MASK_ANY)
== CATEGORY_MASK_ANY)
- setup_coding_system (Qraw_text, coding);
+ found = Qraw_text;
else if (detect_info.rejected)
for (i = 0; i < coding_category_raw_text; i++)
if (! (detect_info.rejected & (1 << coding_priorities[i])))
{
this = coding_categories + coding_priorities[i];
- setup_coding_system (CODING_ID_NAME (this->id), coding);
+ found = CODING_ID_NAME (this->id);
break;
}
}
coding_systems
= AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
detect_info.found = detect_info.rejected = 0;
- coding->head_ascii = 0;
- if (CONSP (coding_systems)
- && detect_coding_utf_8 (coding, &detect_info))
+ if (check_ascii (coding) == coding->src_bytes)
{
- if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
- setup_coding_system (XCAR (coding_systems), coding);
- else
- setup_coding_system (XCDR (coding_systems), coding);
+ if (CONSP (coding_systems))
+ found = XCDR (coding_systems);
+ }
+ else
+ {
+ if (CONSP (coding_systems)
+ && detect_coding_utf_8 (coding, &detect_info))
+ {
+ if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
+ found = XCAR (coding_systems);
+ else
+ found = XCDR (coding_systems);
+ }
}
}
else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
&& detect_coding_utf_16 (coding, &detect_info))
{
if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
- setup_coding_system (XCAR (coding_systems), coding);
+ found = XCAR (coding_systems);
else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
- setup_coding_system (XCDR (coding_systems), coding);
+ found = XCDR (coding_systems);
}
}
+
+ if (! NILP (found))
+ {
+ int specified_eol = (VECTORP (eol_type) ? EOL_SEEN_NONE
+ : EQ (eol_type, Qdos) ? EOL_SEEN_CRLF
+ : EQ (eol_type, Qmac) ? EOL_SEEN_CR
+ : EOL_SEEN_LF);
+
+ setup_coding_system (found, coding);
+ if (specified_eol != EOL_SEEN_NONE)
+ adjust_coding_eol_type (coding, specified_eol);
+ }
+
coding->mode = saved_mode;
}
if (CHAR_TABLE_P (standard))
{
if (CONSP (translation_table))
- translation_table = nconc2 (translation_table,
- Fcons (standard, Qnil));
+ translation_table = nconc2 (translation_table, list1 (standard));
else
- translation_table = Fcons (translation_table,
- Fcons (standard, Qnil));
+ translation_table = list2 (translation_table, standard);
}
}
produced = dst - (coding->destination + coding->produced);
if (BUFFERP (coding->dst_object) && produced_chars > 0)
- insert_from_gap (produced_chars, produced);
+ insert_from_gap (produced_chars, produced, 0);
coding->produced += produced;
coding->produced_char += produced_chars;
return carryover;
#define ALLOC_CONVERSION_WORK_AREA(coding) \
do { \
- int size = CHARBUF_SIZE; \
- \
- coding->charbuf = NULL; \
- while (size > 1024) \
- { \
- coding->charbuf = alloca (sizeof (int) * size); \
- if (coding->charbuf) \
- break; \
- size >>= 1; \
- } \
- if (! coding->charbuf) \
- { \
- record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
- return; \
- } \
- coding->charbuf_size = size; \
+ coding->charbuf = SAFE_ALLOCA (CHARBUF_SIZE * sizeof (int)); \
+ coding->charbuf_size = CHARBUF_SIZE; \
} while (0)
int carryover;
int i;
+ USE_SAFE_ALLOCA;
+
if (BUFFERP (coding->src_object)
&& coding->src_pos > 0
&& coding->src_pos < GPT
bset_undo_list (current_buffer, undo_list);
record_insert (coding->dst_pos, coding->produced_char);
}
+
+ SAFE_FREE ();
}
int max_lookup;
struct ccl_spec cclspec;
+ USE_SAFE_ALLOCA;
+
attrs = CODING_ID_ATTRS (coding->id);
if (coding->encoder == encode_coding_raw_text)
translation_table = Qnil, max_lookup = 0;
} while (coding->consumed_char < coding->src_chars);
if (BUFFERP (coding->dst_object) && coding->produced_char > 0)
- insert_from_gap (coding->produced_char, coding->produced);
+ insert_from_gap (coding->produced_char, coding->produced, 0);
+
+ SAFE_FREE ();
}
}
-static Lisp_Object
+static void
code_conversion_restore (Lisp_Object arg)
{
Lisp_Object current, workbuf;
}
set_buffer_internal (XBUFFER (current));
UNGCPRO;
- return Qnil;
}
Lisp_Object
ptrdiff_t count = SPECPDL_INDEX ();
Lisp_Object attrs;
- code_conversion_save (0, 0);
-
coding->src_object = Fcurrent_buffer ();
coding->src_chars = chars;
coding->src_bytes = bytes;
coding->dst_pos_byte = PT_BYTE;
coding->dst_multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
+ coding->head_ascii = -1;
+ coding->detected_utf8_chars = -1;
+ coding->eol_seen = EOL_SEEN_NONE;
if (CODING_REQUIRE_DETECTION (coding))
detect_coding (coding);
+ attrs = CODING_ID_ATTRS (coding->id);
+ if (! disable_ascii_optimization
+ && ! coding->src_multibyte
+ && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs))
+ && NILP (CODING_ATTR_POST_READ (attrs))
+ && NILP (get_translation_table (attrs, 0, NULL)))
+ {
+ chars = coding->head_ascii;
+ if (chars < 0)
+ chars = check_ascii (coding);
+ if (chars != bytes)
+ {
+ /* There exists a non-ASCII byte. */
+ if (EQ (CODING_ATTR_TYPE (attrs), Qutf_8))
+ {
+ if (coding->detected_utf8_chars >= 0)
+ chars = coding->detected_utf8_chars;
+ else
+ chars = check_utf_8 (coding);
+ if (CODING_UTF_8_BOM (coding) != utf_without_bom
+ && coding->head_ascii == 0
+ && coding->source[0] == UTF_8_BOM_1
+ && coding->source[1] == UTF_8_BOM_2
+ && coding->source[2] == UTF_8_BOM_3)
+ {
+ chars--;
+ bytes -= 3;
+ coding->src_bytes -= 3;
+ }
+ }
+ else
+ chars = -1;
+ }
+ if (chars >= 0)
+ {
+ Lisp_Object eol_type;
+
+ eol_type = CODING_ID_EOL_TYPE (coding->id);
+ if (VECTORP (eol_type))
+ {
+ if (coding->eol_seen != EOL_SEEN_NONE)
+ eol_type = adjust_coding_eol_type (coding, coding->eol_seen);
+ }
+ if (EQ (eol_type, Qmac))
+ {
+ unsigned char *src_end = GAP_END_ADDR;
+ unsigned char *src = src_end - coding->src_bytes;
+
+ while (src < src_end)
+ {
+ if (*src++ == '\r')
+ src[-1] = '\n';
+ }
+ }
+ else if (EQ (eol_type, Qdos))
+ {
+ unsigned char *src = GAP_END_ADDR;
+ unsigned char *src_beg = src - coding->src_bytes;
+ unsigned char *dst = src;
+ ptrdiff_t diff;
+
+ while (src_beg < src)
+ {
+ *--dst = *--src;
+ if (*src == '\n' && src > src_beg && src[-1] == '\r')
+ src--;
+ }
+ diff = dst - src;
+ bytes -= diff;
+ chars -= diff;
+ }
+ coding->produced = bytes;
+ coding->produced_char = chars;
+ insert_from_gap (chars, bytes, 1);
+ return;
+ }
+ }
+ code_conversion_save (0, 0);
coding->mode |= CODING_MODE_LAST_BLOCK;
current_buffer->text->inhibit_shrinking = 1;
decode_coding (coding);
current_buffer->text->inhibit_shrinking = 0;
- attrs = CODING_ID_ATTRS (coding->id);
if (! NILP (CODING_ATTR_POST_READ (attrs)))
{
ptrdiff_t prev_Z = Z, prev_Z_BYTE = Z_BYTE;
set_buffer_internal (XBUFFER (coding->dst_object));
if (dst_bytes < coding->produced)
{
+ eassert (coding->produced > 0);
destination = xrealloc (destination, coding->produced);
- if (! destination)
- {
- record_conversion_result (coding,
- CODING_RESULT_INSUFFICIENT_MEM);
- unbind_to (count, Qnil);
- return;
- }
if (BEGV < GPT && GPT < BEGV + coding->produced_char)
move_gap_both (BEGV, BEGV_BYTE);
memcpy (destination, BEGV_ADDR, coding->produced);
return code_convert_string_norecord (str, Qutf_16le, 0);
}
+Lisp_Object
+from_unicode_buffer (const wchar_t* wstr)
+{
+ return from_unicode (
+ make_unibyte_string (
+ (char*) wstr,
+ /* we get one of the two final 0 bytes for free. */
+ 1 + sizeof (wchar_t) * wcslen (wstr)));
+}
+
wchar_t *
to_unicode (Lisp_Object str, Lisp_Object *buf)
{
*buf = code_convert_string_norecord (str, Qutf_16le, 1);
- /* We need to make a another copy (in addition to the one made by
+ /* We need to make another copy (in addition to the one made by
code_convert_string_norecord) to ensure that the final string is
_doubly_ zero terminated --- that is, that the string is
terminated by two zero bytes and one utf-16le null character.
enum coding_category category IF_LINT (= 0);
struct coding_system *this IF_LINT (= NULL);
int c, i;
+ bool inhibit_nbd = inhibit_flag (coding.spec.undecided.inhibit_nbd,
+ inhibit_null_byte_detection);
+ bool inhibit_ied = inhibit_flag (coding.spec.undecided.inhibit_ied,
+ inhibit_iso_escape_detection);
+ bool prefer_utf_8 = coding.spec.undecided.prefer_utf_8;
/* Skip all ASCII bytes except for a few ISO2022 controls. */
for (; src < src_end; src++)
else if (c < 0x20)
{
if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
- && ! inhibit_iso_escape_detection
+ && ! inhibit_ied
&& ! detect_info.checked)
{
if (detect_coding_iso_2022 (&coding, &detect_info))
break;
}
}
- else if (! c && !inhibit_null_byte_detection)
+ else if (! c && !inhibit_nbd)
{
null_byte_found = 1;
if (eight_bit_found)
detect_info.checked |= ~CATEGORY_MASK_UTF_16;
detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
}
+ else if (prefer_utf_8
+ && detect_coding_utf_8 (&coding, &detect_info))
+ {
+ detect_info.checked |= ~CATEGORY_MASK_UTF_8;
+ detect_info.rejected |= ~CATEGORY_MASK_UTF_8;
+ }
for (i = 0; i < coding_category_raw_text; i++)
{
category = coding_priorities[i];
{
detect_info.found = CATEGORY_MASK_RAW_TEXT;
id = CODING_SYSTEM_ID (Qno_conversion);
- val = Fcons (make_number (id), Qnil);
+ val = list1 (make_number (id));
}
else if (! detect_info.rejected && ! detect_info.found)
{
detect_info.found = CATEGORY_MASK_ANY;
id = coding_categories[coding_category_undecided].id;
- val = Fcons (make_number (id), Qnil);
+ val = list1 (make_number (id));
}
else if (highest)
{
if (detect_info.found)
{
detect_info.found = 1 << category;
- val = Fcons (make_number (this->id), Qnil);
+ val = list1 (make_number (this->id));
}
else
for (i = 0; i < coding_category_raw_text; i++)
{
detect_info.found = 1 << coding_priorities[i];
id = coding_categories[coding_priorities[i]].id;
- val = Fcons (make_number (id), Qnil);
+ val = list1 (make_number (id));
break;
}
}
found |= 1 << category;
id = coding_categories[category].id;
if (id >= 0)
- val = Fcons (make_number (id), val);
+ val = list1 (make_number (id));
}
}
for (i = coding_category_raw_text - 1; i >= 0; i--)
this = coding_categories + coding_category_utf_8_sig;
else
this = coding_categories + coding_category_utf_8_nosig;
- val = Fcons (make_number (this->id), Qnil);
+ val = list1 (make_number (this->id));
}
}
else if (base_category == coding_category_utf_16_auto)
this = coding_categories + coding_category_utf_16_be_nosig;
else
this = coding_categories + coding_category_utf_16_le_nosig;
- val = Fcons (make_number (this->id), Qnil);
+ val = list1 (make_number (this->id));
}
}
else
{
detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
- val = Fcons (make_number (coding.id), Qnil);
+ val = list1 (make_number (coding.id));
}
/* Then, detect eol-format if necessary. */
Lisp_Object attrs;
attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
- if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
- && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
+ if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs)))
{
ASET (attrs, coding_attr_trans_tbl,
get_translation_table (attrs, 1, NULL));
attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
ASET (attrs, coding_attr_trans_tbl,
get_translation_table (attrs, 1, NULL));
- list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list);
+ list = Fcons (list2 (elt, attrs), list);
}
if (STRINGP (start))
tset_charset_list
(term, (terminal_coding->common_flags & CODING_REQUIRE_ENCODING_MASK
? coding_charset_list (terminal_coding)
- : Fcons (make_number (charset_ascii), Qnil)));
+ : list1 (make_number (charset_ascii))));
return Qnil;
}
int i;
memcpy (buf, SDATA (SYMBOL_NAME (base)), base_name_len);
- subsidiaries = Fmake_vector (make_number (3), Qnil);
+ subsidiaries = make_uninit_vector (3);
for (i = 0; i < 3; i++)
{
strcpy (buf + base_name_len, suffixes[i]);
{
dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
if (dim < dim2)
- tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
+ tmp = list2 (XCAR (tail), tmp);
else
- tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
+ tmp = list2 (tmp, XCAR (tail));
}
else
{
break;
}
if (NILP (tmp2))
- tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
+ tmp = nconc2 (tmp, list1 (XCAR (tail)));
else
{
XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
CHECK_VECTOR (initial);
for (i = 0; i < 4; i++)
{
- val = Faref (initial, make_number (i));
+ val = AREF (initial, i);
if (! NILP (val))
{
struct charset *charset;
: coding_category_utf_8_sig);
}
else if (EQ (coding_type, Qundecided))
- category = coding_category_undecided;
+ {
+ if (nargs < coding_arg_undecided_max)
+ goto short_args;
+ ASET (attrs, coding_attr_undecided_inhibit_null_byte_detection,
+ args[coding_arg_undecided_inhibit_null_byte_detection]);
+ ASET (attrs, coding_attr_undecided_inhibit_iso_escape_detection,
+ args[coding_arg_undecided_inhibit_iso_escape_detection]);
+ ASET (attrs, coding_attr_undecided_prefer_utf_8,
+ args[coding_arg_undecided_prefer_utf_8]);
+ category = coding_category_undecided;
+ }
else
error ("Invalid coding system type: %s",
SDATA (SYMBOL_NAME (coding_type)));
&& ! EQ (eol_type, Qmac))
error ("Invalid eol-type");
- aliases = Fcons (name, Qnil);
+ aliases = list1 (name);
if (NILP (eol_type))
{
Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
this_name = AREF (eol_type, i);
- this_aliases = Fcons (this_name, Qnil);
+ this_aliases = list1 (this_name);
this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
- this_spec = Fmake_vector (make_number (3), attrs);
+ this_spec = make_uninit_vector (3);
+ ASET (this_spec, 0, attrs);
ASET (this_spec, 1, this_aliases);
ASET (this_spec, 2, this_eol_type);
Fputhash (this_name, this_spec, Vcoding_system_hash_table);
}
}
- spec_vec = Fmake_vector (make_number (3), attrs);
+ spec_vec = make_uninit_vector (3);
+ ASET (spec_vec, 0, attrs);
ASET (spec_vec, 1, aliases);
ASET (spec_vec, 2, eol_type);
list. */
while (!NILP (XCDR (aliases)))
aliases = XCDR (aliases);
- XSETCDR (aliases, Fcons (alias, Qnil));
+ XSETCDR (aliases, list1 (alias));
eol_type = AREF (spec, 2);
if (VECTORP (eol_type))
intern_c_string ("coding-category-undecided"));
DEFSYM (Qinsufficient_source, "insufficient-source");
- DEFSYM (Qinconsistent_eol, "inconsistent-eol");
DEFSYM (Qinvalid_source, "invalid-source");
DEFSYM (Qinterrupted, "interrupted");
- DEFSYM (Qinsufficient_memory, "insufficient-memory");
DEFSYM (Qcoding_system_define_form, "coding-system-define-form");
defsubr (&Scoding_system_p);
The other way to read escape sequences in a file without decoding is
to explicitly specify some coding system that doesn't use ISO-2022
-escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */);
+escape sequence (e.g., `latin-1') on reading by \\[universal-coding-system-argument]. */);
inhibit_iso_escape_detection = 0;
DEFVAR_BOOL ("inhibit-null-byte-detection",
decode text as usual. */);
inhibit_null_byte_detection = 0;
+ DEFVAR_BOOL ("disable-ascii-optimization", disable_ascii_optimization,
+ doc: /* If non-nil, Emacs does not optimize code decoder for ASCII files.
+Internal use only. Removed after the experimental optimizer gets stable. */);
+ disable_ascii_optimization = 0;
+
DEFVAR_LISP ("translation-table-for-input", Vtranslation_table_for_input,
doc: /* Char table for translating self-inserting characters.
This is applied to the result of input methods, not their input.
Vtranslation_table_for_input = Qnil;
{
- Lisp_Object args[coding_arg_max];
+ Lisp_Object args[coding_arg_undecided_max];
Lisp_Object plist[16];
int i;
- for (i = 0; i < coding_arg_max; i++)
+ for (i = 0; i < coding_arg_undecided_max; i++)
args[i] = Qnil;
plist[0] = intern_c_string (":name");
plist[13] = build_pure_c_string ("No conversion on encoding, automatic conversion on decoding.");
plist[15] = args[coding_arg_eol_type] = Qnil;
args[coding_arg_plist] = Flist (16, plist);
- Fdefine_coding_system_internal (coding_arg_max, args);
+ args[coding_arg_undecided_inhibit_null_byte_detection] = make_number (0);
+ args[coding_arg_undecided_inhibit_iso_escape_detection] = make_number (0);
+ Fdefine_coding_system_internal (coding_arg_undecided_max, args);
}
setup_coding_system (Qno_conversion, &safe_terminal_coding);