coding_arg_ccl_max
};
+enum define_coding_undecided_arg_index
+ {
+ coding_arg_undecided_inhibit_null_byte_detection = coding_arg_max,
+ coding_arg_undecided_inhibit_iso_escape_detection,
+ coding_arg_undecided_prefer_utf_8,
+ coding_arg_undecided_max
+ };
+
/* Hash table for all coding systems. Keys are coding system symbols
and values are spec vectors of the corresponding coding system. A
spec vector has the form [ ATTRS ALIASES EOL-TYPE ]. ATTRS is a
coding_attr_emacs_mule_full,
+ coding_attr_undecided_inhibit_null_byte_detection,
+ coding_attr_undecided_inhibit_iso_escape_detection,
+ coding_attr_undecided_prefer_utf_8,
+
coding_attr_last_index
};
hash_lookup (XHASH_TABLE (Vcoding_system_hash_table), \
coding_system_symbol, NULL)
-/* Return 1 if CODING_SYSTEM_SYMBOL is a coding system. */
+/* Return true if CODING_SYSTEM_SYMBOL is a coding system. */
#define CODING_SYSTEM_P(coding_system_symbol) \
(CODING_SYSTEM_ID (coding_system_symbol) >= 0 \
if (CODING_SYSTEM_ID (x) < 0 \
&& NILP (Fcheck_coding_system (x))) \
wrong_type_argument (Qcoding_system_p, (x)); \
- } while (0)
+ } while (false)
/* Check if X is a coding system or not. If it is, set SEPC to the
} \
if (NILP (spec)) \
wrong_type_argument (Qcoding_system_p, (x)); \
- } while (0)
+ } while (false)
/* Check if X is a coding system or not. If it is, set ID to the
} \
if (id < 0) \
wrong_type_argument (Qcoding_system_p, (x)); \
- } while (0)
+ } while (false)
/*** GENERAL section ***/
CODING_RESULT_SUCCESS,
CODING_RESULT_INSUFFICIENT_SRC,
CODING_RESULT_INSUFFICIENT_DST,
- CODING_RESULT_INCONSISTENT_EOL,
CODING_RESULT_INVALID_SRC,
- CODING_RESULT_INTERRUPT,
- CODING_RESULT_INSUFFICIENT_MEM
+ CODING_RESULT_INTERRUPT
};
/* Macros used for the member `mode' of the struct coding_system. */
-/* If set, recover the original CR or LF of the already decoded text
- when the decoding routine encounters an inconsistent eol format. */
-#define CODING_MODE_INHIBIT_INCONSISTENT_EOL 0x01
-
/* If set, the decoding/encoding routines treat the current data as
the last block of the whole text to be converted, and do the
appropriate finishing job. */
-#define CODING_MODE_LAST_BLOCK 0x02
+#define CODING_MODE_LAST_BLOCK 0x01
/* If set, it means that the current source text is in a buffer which
enables selective display. */
-#define CODING_MODE_SELECTIVE_DISPLAY 0x04
+#define CODING_MODE_SELECTIVE_DISPLAY 0x02
/* This flag is used by the decoding/encoding routines on the fly. If
set, it means that right-to-left text is being processed. */
-#define CODING_MODE_DIRECTION 0x08
+#define CODING_MODE_DIRECTION 0x04
-#define CODING_MODE_FIXED_DESTINATION 0x10
+#define CODING_MODE_FIXED_DESTINATION 0x08
/* If set, it means that the encoding routines produces some safe
ASCII characters (usually '?') for unsupported characters. */
-#define CODING_MODE_SAFE_ENCODING 0x20
+#define CODING_MODE_SAFE_ENCODING 0x10
/* For handling composition sequence. */
#include "composite.h"
/* True temporarily only when graphic register 2 or 3 is invoked by
single-shift while encoding. */
- unsigned single_shifting : 1;
+ bool_bf single_shifting : 1;
/* True temporarily only when processing at beginning of line. */
- unsigned bol : 1;
+ bool_bf bol : 1;
/* If true, we are now scanning embedded UTF-8 sequence. */
- unsigned embedded_utf_8 : 1;
+ bool_bf embedded_utf_8 : 1;
/* The current composition. */
struct composition_status cmp_status;
struct composition_status cmp_status;
};
-struct ccl_spec;
+struct undecided_spec
+{
+ /* Inhibit null byte detection. 1 means always inhibit,
+ -1 means do not inhibit, 0 means rely on user variable. */
+ int inhibit_nbd;
+
+ /* Inhibit ISO escape detection. -1, 0, 1 as above. */
+ int inhibit_ied;
+
+ /* Prefer UTF-8 when the input could be other encodings. */
+ bool prefer_utf_8;
+};
enum utf_bom_type
{
struct utf_16_spec utf_16;
enum utf_bom_type utf_8_bom;
struct emacs_mule_spec emacs_mule;
+ struct undecided_spec undecided;
} spec;
int max_charset_id;
unsigned char *safe_charsets;
/* The following two members specify how binary 8-bit code 128..255
- are represented in source and destination text respectively. 1
- means they are represented by 2-byte sequence, 0 means they are
+ are represented in source and destination text respectively. True
+ means they are represented by 2-byte sequence, false means they are
represented by 1-byte as is (see the comment in character.h). */
- unsigned src_multibyte : 1;
- unsigned dst_multibyte : 1;
+ bool_bf src_multibyte : 1;
+ bool_bf dst_multibyte : 1;
/* How may heading bytes we can skip for decoding. This is set to
-1 in setup_coding_system, and updated by detect_coding. So,
when this is equal to the byte length of the text being
- converted, we can skip the actual conversion process. */
+ converted, we can skip the actual conversion process except for
+ the eol format. */
ptrdiff_t head_ascii;
+ ptrdiff_t detected_utf8_chars;
+
+ /* Used internally in coding.c. See the comment of detect_ascii. */
+ int eol_seen;
+
/* The following members are set by encoding/decoding routine. */
ptrdiff_t produced, produced_char, consumed, consumed_char;
/* True if the source of conversion is not in the member
`charbuf', but at `src_object'. */
- unsigned chars_at_source : 1;
+ bool_bf chars_at_source : 1;
+
+ /* Nonzero if the result of conversion is in `destination'
+ buffer rather than in `dst_object'. */
+ bool_bf raw_destination : 1;
- /* Set to 1 if charbuf contains an annotation. */
- unsigned annotated : 1;
+ /* Set to true if charbuf contains an annotation. */
+ bool_bf annotated : 1;
unsigned char carryover[64];
int carryover_bytes;
#define CODING_REQUIRE_DETECTION_MASK 0x1000
#define CODING_RESET_AT_BOL_MASK 0x2000
-/* Return 1 if the coding context CODING requires annotation
+/* Return nonzero if the coding context CODING requires annotation
handling. */
#define CODING_REQUIRE_ANNOTATION(coding) \
((coding)->common_flags & CODING_ANNOTATION_MASK)
-/* Return 1 if the coding context CODING prefers decoding into unibyte. */
+/* Return nonzero if the coding context CODING prefers decoding into
+ unibyte. */
#define CODING_FOR_UNIBYTE(coding) \
((coding)->common_flags & CODING_FOR_UNIBYTE_MASK)
-/* Return 1 if the coding context CODING requires specific code to be
+/* Return nonzero if the coding context CODING requires specific code to be
attached at the tail of converted text. */
#define CODING_REQUIRE_FLUSHING(coding) \
((coding)->common_flags & CODING_REQUIRE_FLUSHING_MASK)
-/* Return 1 if the coding context CODING requires code conversion on
+/* Return nonzero if the coding context CODING requires code conversion on
decoding. */
#define CODING_REQUIRE_DECODING(coding) \
((coding)->dst_multibyte \
|| (coding)->common_flags & CODING_REQUIRE_DECODING_MASK)
-/* Return 1 if the coding context CODING requires code conversion on
+/* Return nonzero if the coding context CODING requires code conversion on
encoding.
The non-multibyte part of the condition is to support encoding of
unibyte strings/buffers generated by string-as-unibyte or
|| (coding)->mode & CODING_MODE_SELECTIVE_DISPLAY)
-/* Return 1 if the coding context CODING requires some kind of code
+/* Return nonzero if the coding context CODING requires some kind of code
detection. */
#define CODING_REQUIRE_DETECTION(coding) \
((coding)->common_flags & CODING_REQUIRE_DETECTION_MASK)
-/* Return 1 if the coding context CODING requires code conversion on
+/* Return nonzero if the coding context CODING requires code conversion on
decoding or some kind of code detection. */
#define CODING_MAY_REQUIRE_DECODING(coding) \
(CODING_REQUIRE_DECODING (coding) \
(j1 = s1 * 2 - ((s1 >= 0xE0) ? 0x161 : 0xE1), \
j2 = s2 - ((s2 >= 0x7F) ? 0x20 : 0x1F)); \
(code) = (j1 << 8) | j2; \
- } while (0)
+ } while (false)
#define SJIS_TO_JIS2(code) \
do { \
j2 = s2 - ((s2 >= 0x7F ? 0x20 : 0x1F)); \
} \
(code) = (j1 << 8) | j2; \
- } while (0)
+ } while (false)
#define JIS_TO_SJIS(code) \
(s1 = j1 / 2 + ((j1 < 0x5F) ? 0x70 : 0xB0), \
s2 = j2 + 0x7E); \
(code) = (s1 << 8) | s2; \
- } while (0)
+ } while (false)
#define JIS_TO_SJIS2(code) \
do { \
s2 = j2 + 0x7E; \
} \
(code) = (s1 << 8) | s2; \
- } while (0)
+ } while (false)
/* Encode the file name NAME using the specified coding system
for file names, if any. */
-#define ENCODE_FILE(name) \
- (! NILP (Vfile_name_coding_system) \
- ? code_convert_string_norecord (name, Vfile_name_coding_system, 1) \
- : (! NILP (Vdefault_file_name_coding_system) \
- ? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 1) \
- : name))
-
+#define ENCODE_FILE(NAME) encode_file_name (NAME)
/* Decode the file name NAME using the specified coding system
for file names, if any. */
-#define DECODE_FILE(name) \
- (! NILP (Vfile_name_coding_system) \
- ? code_convert_string_norecord (name, Vfile_name_coding_system, 0) \
- : (! NILP (Vdefault_file_name_coding_system) \
- ? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 0) \
- : name))
-
+#define DECODE_FILE(NAME) decode_file_name (NAME)
/* Encode the string STR using the specified coding system
for system functions, if any. */
#define ENCODE_SYSTEM(str) \
(! NILP (Vlocale_coding_system) \
- ? code_convert_string_norecord (str, Vlocale_coding_system, 1) \
+ ? code_convert_string_norecord (str, Vlocale_coding_system, true) \
: str)
/* Decode the string STR using the specified coding system
for system functions, if any. */
#define DECODE_SYSTEM(str) \
(! NILP (Vlocale_coding_system) \
- ? code_convert_string_norecord (str, Vlocale_coding_system, 0) \
+ ? code_convert_string_norecord (str, Vlocale_coding_system, false) \
: str)
/* Note that this encodes utf-8, not utf-8-emacs, so it's not a no-op. */
-#define ENCODE_UTF_8(str) code_convert_string_norecord (str, Qutf_8, 1)
+#define ENCODE_UTF_8(str) code_convert_string_norecord (str, Qutf_8, true)
/* Extern declarations. */
extern Lisp_Object code_conversion_save (bool, bool);
Lisp_Object, bool, bool, bool);
extern Lisp_Object code_convert_string_norecord (Lisp_Object, Lisp_Object,
bool);
+extern Lisp_Object encode_file_name (Lisp_Object);
+extern Lisp_Object decode_file_name (Lisp_Object);
extern Lisp_Object raw_text_coding_system (Lisp_Object);
extern Lisp_Object coding_inherit_eol_type (Lisp_Object, Lisp_Object);
extern Lisp_Object complement_process_encoding_system (Lisp_Object);
failure modes. STR itself is not modified. */
extern Lisp_Object from_unicode (Lisp_Object str);
+/* Convert WSTR to an Emacs string. */
+extern Lisp_Object from_unicode_buffer (const wchar_t* wstr);
+
#endif /* WINDOWSNT || CYGWIN */
/* Macros for backward compatibility. */
-#define decode_coding_region(coding, from, to) \
- decode_coding_object (coding, Fcurrent_buffer (), \
- from, CHAR_TO_BYTE (from), \
- to, CHAR_TO_BYTE (to), Fcurrent_buffer ())
-
-
-#define encode_coding_region(coding, from, to) \
- encode_coding_object (coding, Fcurrent_buffer (), \
- from, CHAR_TO_BYTE (from), \
- to, CHAR_TO_BYTE (to), Fcurrent_buffer ())
-
-
-#define decode_coding_string(coding, string, nocopy) \
- decode_coding_object (coding, string, 0, 0, SCHARS (string), \
- SBYTES (string), Qt)
-
#define encode_coding_string(coding, string, nocopy) \
(STRING_MULTIBYTE(string) ? \
(encode_coding_object (coding, string, 0, 0, SCHARS (string), \
(coding)->src_chars = (coding)->src_bytes = (bytes); \
decode_coding_object ((coding), Qnil, 0, 0, (bytes), (bytes), \
(dst_object)); \
- } while (0)
+ } while (false)
extern Lisp_Object preferred_coding_system (void);