#define CODING_ISO_FLAG_USE_OLDJIS 0x10000
+#define CODING_ISO_FLAG_LEVEL_4 0x20000
+
#define CODING_ISO_FLAG_FULL_SUPPORT 0x100000
/* A character to be produced on output if encoding of the original
#define max(a, b) ((a) > (b) ? (a) : (b))
#endif
+/* Encode a flag that can be nil, something else, or t as -1, 0, 1. */
+
+static int
+encode_inhibit_flag (Lisp_Object flag)
+{
+ return NILP (flag) ? -1 : EQ (flag, Qt);
+}
+
+/* True if the value of ENCODED_FLAG says a flag should be treated as set.
+ 1 means yes, -1 means no, 0 means ask the user variable VAR. */
+
+static bool
+inhibit_flag (int encoded_flag, bool var)
+{
+ return 0 < encoded_flag + var;
+}
+
#define CODING_GET_INFO(coding, attrs, charset_list) \
do { \
(attrs) = CODING_ID_ATTRS ((coding)->id); \
(charset_list) = CODING_ATTR_CHARSET_LIST (attrs); \
} while (0)
+static void
+CHECK_NATNUM_CAR (Lisp_Object x)
+{
+ Lisp_Object tmp = XCAR (x);
+ CHECK_NATNUM (tmp);
+ XSETCAR (x, tmp);
+}
+
+static void
+CHECK_NATNUM_CDR (Lisp_Object x)
+{
+ Lisp_Object tmp = XCDR (x);
+ CHECK_NATNUM (tmp);
+ XSETCDR (x, tmp);
+}
+
/* Safely get one byte from the source text pointed by SRC which ends
at SRC_END, and set C to that byte. If there are not enough bytes
break;
}
+ /* In the simple case, rapidly handle ordinary characters */
+ if (multibytep && ! eol_dos
+ && charbuf < charbuf_end - 6 && src < src_end - 6)
+ {
+ while (charbuf < charbuf_end - 6 && src < src_end - 6)
+ {
+ c1 = *src;
+ if (c1 & 0x80)
+ break;
+ src++;
+ consumed_chars++;
+ *charbuf++ = c1;
+
+ c1 = *src;
+ if (c1 & 0x80)
+ break;
+ src++;
+ consumed_chars++;
+ *charbuf++ = c1;
+
+ c1 = *src;
+ if (c1 & 0x80)
+ break;
+ src++;
+ consumed_chars++;
+ *charbuf++ = c1;
+
+ c1 = *src;
+ if (c1 & 0x80)
+ break;
+ src++;
+ consumed_chars++;
+ *charbuf++ = c1;
+ }
+ /* If we handled at least one character, restart the main loop. */
+ if (src != src_base)
+ continue;
+ }
+
if (byte_after_cr >= 0)
c1 = byte_after_cr, byte_after_cr = -1;
else
else
charset = CHARSET_FROM_ID (charset_id_2);
ONE_MORE_BYTE (c1);
- if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
+ if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0)
+ || (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
+ && ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LEVEL_4)
+ ? c1 >= 0x80 : c1 < 0x80)))
goto invalid_code;
break;
else
charset = CHARSET_FROM_ID (charset_id_3);
ONE_MORE_BYTE (c1);
- if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
+ if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0)
+ || (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
+ && ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LEVEL_4)
+ ? c1 >= 0x80 : c1 < 0x80)))
goto invalid_code;
break;
coding->safe_charsets = SDATA (val);
coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
coding->carryover_bytes = 0;
+ coding->raw_destination = 0;
coding_type = CODING_ATTR_TYPE (attrs);
if (EQ (coding_type, Qundecided))
coding->decoder = decode_coding_raw_text;
coding->encoder = encode_coding_raw_text;
coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
+ coding->spec.undecided.inhibit_nbd
+ = (encode_inhibit_flag
+ (AREF (attrs, coding_attr_undecided_inhibit_null_byte_detection)));
+ coding->spec.undecided.inhibit_ied
+ = (encode_inhibit_flag
+ (AREF (attrs, coding_attr_undecided_inhibit_iso_escape_detection)));
+ coding->spec.undecided.prefer_utf_8
+ = ! NILP (AREF (attrs, coding_attr_undecided_prefer_utf_8));
}
else if (EQ (coding_type, Qiso_2022))
{
int c, i;
struct coding_detection_info detect_info;
bool null_byte_found = 0, eight_bit_found = 0;
+ bool inhibit_nbd = inhibit_flag (coding->spec.undecided.inhibit_nbd,
+ inhibit_null_byte_detection);
+ bool inhibit_ied = inhibit_flag (coding->spec.undecided.inhibit_ied,
+ inhibit_iso_escape_detection);
+ bool prefer_utf_8 = coding->spec.undecided.prefer_utf_8;
coding->head_ascii = 0;
detect_info.checked = detect_info.found = detect_info.rejected = 0;
else if (c < 0x20)
{
if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
- && ! inhibit_iso_escape_detection
+ && ! inhibit_ied
&& ! detect_info.checked)
{
if (detect_coding_iso_2022 (coding, &detect_info))
break;
}
}
- else if (! c && !inhibit_null_byte_detection)
+ else if (! c && !inhibit_nbd)
{
null_byte_found = 1;
if (eight_bit_found)
detect_info.checked |= ~CATEGORY_MASK_UTF_16;
detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
}
+ else if (prefer_utf_8
+ && detect_coding_utf_8 (coding, &detect_info))
+ {
+ detect_info.checked |= ~CATEGORY_MASK_UTF_8;
+ detect_info.rejected |= ~CATEGORY_MASK_UTF_8;
+ }
for (i = 0; i < coding_category_raw_text; i++)
{
category = coding_priorities[i];
if (CHAR_TABLE_P (standard))
{
if (CONSP (translation_table))
- translation_table = nconc2 (translation_table,
- Fcons (standard, Qnil));
+ translation_table = nconc2 (translation_table, list1 (standard));
else
- translation_table = Fcons (translation_table,
- Fcons (standard, Qnil));
+ translation_table = list2 (translation_table, standard);
}
}
/* We found a composition. Store the corresponding
annotation data in BUF. */
int *head = buf;
- enum composition_method method = COMPOSITION_METHOD (prop);
+ enum composition_method method = composition_method (prop);
int nchars = COMPOSITION_LENGTH (prop);
ADD_COMPOSITION_DATA (buf, nchars, 0, method);
}
-static Lisp_Object
+static void
code_conversion_restore (Lisp_Object arg)
{
Lisp_Object current, workbuf;
}
set_buffer_internal (XBUFFER (current));
UNGCPRO;
- return Qnil;
}
Lisp_Object
{
if (BUFFERP (coding->dst_object))
coding->dst_object = Fbuffer_string ();
+ else if (coding->raw_destination)
+ /* This is used to avoid creating huge Lisp string.
+ NOTE: caller who sets `raw_destination' is also
+ responsible for freeing `destination' buffer. */
+ coding->dst_object = Qnil;
else
{
coding->dst_object
enum coding_category category IF_LINT (= 0);
struct coding_system *this IF_LINT (= NULL);
int c, i;
+ bool inhibit_nbd = inhibit_flag (coding.spec.undecided.inhibit_nbd,
+ inhibit_null_byte_detection);
+ bool inhibit_ied = inhibit_flag (coding.spec.undecided.inhibit_ied,
+ inhibit_iso_escape_detection);
+ bool prefer_utf_8 = coding.spec.undecided.prefer_utf_8;
/* Skip all ASCII bytes except for a few ISO2022 controls. */
for (; src < src_end; src++)
else if (c < 0x20)
{
if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
- && ! inhibit_iso_escape_detection
+ && ! inhibit_ied
&& ! detect_info.checked)
{
if (detect_coding_iso_2022 (&coding, &detect_info))
break;
}
}
- else if (! c && !inhibit_null_byte_detection)
+ else if (! c && !inhibit_nbd)
{
null_byte_found = 1;
if (eight_bit_found)
detect_info.checked |= ~CATEGORY_MASK_UTF_16;
detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
}
+ else if (prefer_utf_8
+ && detect_coding_utf_8 (&coding, &detect_info))
+ {
+ detect_info.checked |= ~CATEGORY_MASK_UTF_8;
+ detect_info.rejected |= ~CATEGORY_MASK_UTF_8;
+ }
for (i = 0; i < coding_category_raw_text; i++)
{
category = coding_priorities[i];
{
detect_info.found = CATEGORY_MASK_RAW_TEXT;
id = CODING_SYSTEM_ID (Qno_conversion);
- val = Fcons (make_number (id), Qnil);
+ val = list1 (make_number (id));
}
else if (! detect_info.rejected && ! detect_info.found)
{
detect_info.found = CATEGORY_MASK_ANY;
id = coding_categories[coding_category_undecided].id;
- val = Fcons (make_number (id), Qnil);
+ val = list1 (make_number (id));
}
else if (highest)
{
if (detect_info.found)
{
detect_info.found = 1 << category;
- val = Fcons (make_number (this->id), Qnil);
+ val = list1 (make_number (this->id));
}
else
for (i = 0; i < coding_category_raw_text; i++)
{
detect_info.found = 1 << coding_priorities[i];
id = coding_categories[coding_priorities[i]].id;
- val = Fcons (make_number (id), Qnil);
+ val = list1 (make_number (id));
break;
}
}
found |= 1 << category;
id = coding_categories[category].id;
if (id >= 0)
- val = Fcons (make_number (id), val);
+ val = list1 (make_number (id));
}
}
for (i = coding_category_raw_text - 1; i >= 0; i--)
this = coding_categories + coding_category_utf_8_sig;
else
this = coding_categories + coding_category_utf_8_nosig;
- val = Fcons (make_number (this->id), Qnil);
+ val = list1 (make_number (this->id));
}
}
else if (base_category == coding_category_utf_16_auto)
this = coding_categories + coding_category_utf_16_be_nosig;
else
this = coding_categories + coding_category_utf_16_le_nosig;
- val = Fcons (make_number (this->id), Qnil);
+ val = list1 (make_number (this->id));
}
}
else
{
detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
- val = Fcons (make_number (coding.id), Qnil);
+ val = list1 (make_number (coding.id));
}
/* Then, detect eol-format if necessary. */
Lisp_Object attrs;
attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
- if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
- && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
+ if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs)))
{
ASET (attrs, coding_attr_trans_tbl,
get_translation_table (attrs, 1, NULL));
attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
ASET (attrs, coding_attr_trans_tbl,
get_translation_table (attrs, 1, NULL));
- list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list);
+ list = Fcons (list2 (elt, attrs), list);
}
if (STRINGP (start))
tset_charset_list
(term, (terminal_coding->common_flags & CODING_REQUIRE_ENCODING_MASK
? coding_charset_list (terminal_coding)
- : Fcons (make_number (charset_ascii), Qnil)));
+ : list1 (make_number (charset_ascii))));
return Qnil;
}
{
dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
if (dim < dim2)
- tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
+ tmp = list2 (XCAR (tail), tmp);
else
- tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
+ tmp = list2 (tmp, XCAR (tail));
}
else
{
break;
}
if (NILP (tmp2))
- tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
+ tmp = nconc2 (tmp, list1 (XCAR (tail)));
else
{
XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
: coding_category_utf_8_sig);
}
else if (EQ (coding_type, Qundecided))
- category = coding_category_undecided;
+ {
+ if (nargs < coding_arg_undecided_max)
+ goto short_args;
+ ASET (attrs, coding_attr_undecided_inhibit_null_byte_detection,
+ args[coding_arg_undecided_inhibit_null_byte_detection]);
+ ASET (attrs, coding_attr_undecided_inhibit_iso_escape_detection,
+ args[coding_arg_undecided_inhibit_iso_escape_detection]);
+ ASET (attrs, coding_attr_undecided_prefer_utf_8,
+ args[coding_arg_undecided_prefer_utf_8]);
+ category = coding_category_undecided;
+ }
else
error ("Invalid coding system type: %s",
SDATA (SYMBOL_NAME (coding_type)));
&& ! EQ (eol_type, Qmac))
error ("Invalid eol-type");
- aliases = Fcons (name, Qnil);
+ aliases = list1 (name);
if (NILP (eol_type))
{
Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
this_name = AREF (eol_type, i);
- this_aliases = Fcons (this_name, Qnil);
+ this_aliases = list1 (this_name);
this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
this_spec = make_uninit_vector (3);
ASET (this_spec, 0, attrs);
list. */
while (!NILP (XCDR (aliases)))
aliases = XCDR (aliases);
- XSETCDR (aliases, Fcons (alias, Qnil));
+ XSETCDR (aliases, list1 (alias));
eol_type = AREF (spec, 2);
if (VECTORP (eol_type))
Fput (Qcoding_system_error, Qerror_message,
build_pure_c_string ("Invalid coding system"));
- /* Intern this now in case it isn't already done.
- Setting this variable twice is harmless.
- But don't staticpro it here--that is done in alloc.c. */
- Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots");
-
DEFSYM (Qtranslation_table, "translation-table");
Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
DEFSYM (Qtranslation_table_id, "translation-table-id");
Vtranslation_table_for_input = Qnil;
{
- Lisp_Object args[coding_arg_max];
+ Lisp_Object args[coding_arg_undecided_max];
Lisp_Object plist[16];
int i;
- for (i = 0; i < coding_arg_max; i++)
+ for (i = 0; i < coding_arg_undecided_max; i++)
args[i] = Qnil;
plist[0] = intern_c_string (":name");
plist[13] = build_pure_c_string ("No conversion on encoding, automatic conversion on decoding.");
plist[15] = args[coding_arg_eol_type] = Qnil;
args[coding_arg_plist] = Flist (16, plist);
- Fdefine_coding_system_internal (coding_arg_max, args);
+ args[coding_arg_undecided_inhibit_null_byte_detection] = make_number (0);
+ args[coding_arg_undecided_inhibit_iso_escape_detection] = make_number (0);
+ Fdefine_coding_system_internal (coding_arg_undecided_max, args);
}
setup_coding_system (Qno_conversion, &safe_terminal_coding);