Lisp_Object Qemacs_mule, Qraw_text;
Lisp_Object Qutf_8_emacs;
+#if defined (WINDOWSNT) || defined (CYGWIN)
+static Lisp_Object Qutf_16le;
+#endif
+
/* Coding-systems are handed between Emacs Lisp programs and C internal
routines by the following three variables. */
/* Coding system to be used to encode text for terminal display when
ISO_shift_out, /* ISO_CODE_SO (0x0E) */
ISO_shift_in, /* ISO_CODE_SI (0x0F) */
ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */
- ISO_escape, /* ISO_CODE_SO (0x1B) */
+ ISO_escape, /* ISO_CODE_ESC (0x1B) */
ISO_control_1, /* Control codes in the range
0x80..0x9F, except for the
following 3 codes. */
/* Store multibyte form of the character C in P, and advance P to the
- end of the multibyte form. This is like CHAR_STRING_ADVANCE but it
- never calls MAYBE_UNIFY_CHAR. */
-
-#define CHAR_STRING_ADVANCE_NO_UNIFY(c, p) \
- do { \
- if ((c) <= MAX_1_BYTE_CHAR) \
- *(p)++ = (c); \
- else if ((c) <= MAX_2_BYTE_CHAR) \
- *(p)++ = (0xC0 | ((c) >> 6)), \
- *(p)++ = (0x80 | ((c) & 0x3F)); \
- else if ((c) <= MAX_3_BYTE_CHAR) \
- *(p)++ = (0xE0 | ((c) >> 12)), \
- *(p)++ = (0x80 | (((c) >> 6) & 0x3F)), \
- *(p)++ = (0x80 | ((c) & 0x3F)); \
- else if ((c) <= MAX_4_BYTE_CHAR) \
- *(p)++ = (0xF0 | (c >> 18)), \
- *(p)++ = (0x80 | ((c >> 12) & 0x3F)), \
- *(p)++ = (0x80 | ((c >> 6) & 0x3F)), \
- *(p)++ = (0x80 | (c & 0x3F)); \
- else if ((c) <= MAX_5_BYTE_CHAR) \
- *(p)++ = 0xF8, \
- *(p)++ = (0x80 | ((c >> 18) & 0x0F)), \
- *(p)++ = (0x80 | ((c >> 12) & 0x3F)), \
- *(p)++ = (0x80 | ((c >> 6) & 0x3F)), \
- *(p)++ = (0x80 | (c & 0x3F)); \
- else \
- (p) += BYTE8_STRING ((c) - 0x3FFF80, p); \
- } while (0)
+ end of the multibyte form. This used to be like CHAR_STRING_ADVANCE
+ without ever calling MAYBE_UNIFY_CHAR, but nowadays we don't call
+ MAYBE_UNIFY_CHAR in CHAR_STRING_ADVANCE. */
+#define CHAR_STRING_ADVANCE_NO_UNIFY(c, p) CHAR_STRING_ADVANCE(c, p)
/* Return the character code of character whose multibyte form is at
- P, and advance P to the end of the multibyte form. This is like
- STRING_CHAR_ADVANCE, but it never calls MAYBE_UNIFY_CHAR. */
-
-#define STRING_CHAR_ADVANCE_NO_UNIFY(p) \
- (!((p)[0] & 0x80) \
- ? *(p)++ \
- : ! ((p)[0] & 0x20) \
- ? ((p) += 2, \
- ((((p)[-2] & 0x1F) << 6) \
- | ((p)[-1] & 0x3F) \
- | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0))) \
- : ! ((p)[0] & 0x10) \
- ? ((p) += 3, \
- ((((p)[-3] & 0x0F) << 12) \
- | (((p)[-2] & 0x3F) << 6) \
- | ((p)[-1] & 0x3F))) \
- : ! ((p)[0] & 0x08) \
- ? ((p) += 4, \
- ((((p)[-4] & 0xF) << 18) \
- | (((p)[-3] & 0x3F) << 12) \
- | (((p)[-2] & 0x3F) << 6) \
- | ((p)[-1] & 0x3F))) \
- : ((p) += 5, \
- ((((p)[-4] & 0x3F) << 18) \
- | (((p)[-3] & 0x3F) << 12) \
- | (((p)[-2] & 0x3F) << 6) \
- | ((p)[-1] & 0x3F))))
+ P, and advance P to the end of the multibyte form. This used to be
+ like STRING_CHAR_ADVANCE without ever calling MAYBE_UNIFY_CHAR, but
+ nowadays STRING_CHAR_ADVANCE doesn't call MAYBE_UNIFY_CHAR. */
+#define STRING_CHAR_ADVANCE_NO_UNIFY(p) STRING_CHAR_ADVANCE(p)
/* Set coding->source from coding->src_object. */
while (1)
{
const unsigned char *p = src;
+ ptrdiff_t offset;
int i = 0;
if (multibytep)
if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
ccl->last_block = 1;
+ /* As ccl_driver calls DECODE_CHAR, buffer may be relocated. */
+ charset_map_loaded = 0;
ccl_driver (ccl, source_charbuf, charbuf, i, charbuf_end - charbuf,
charset_list);
+ if (charset_map_loaded
+ && (offset = coding_change_source (coding)))
+ {
+ p += offset;
+ src += offset;
+ src_end += offset;
+ }
charbuf += ccl->produced;
if (multibytep)
src += source_byteidx[ccl->consumed];
do
{
+ ptrdiff_t offset;
+
+ /* As ccl_driver calls DECODE_CHAR, buffer may be relocated. */
+ charset_map_loaded = 0;
ccl_driver (ccl, charbuf, destination_charbuf,
charbuf_end - charbuf, 1024, charset_list);
+ if (charset_map_loaded
+ && (offset = coding_change_destination (coding)))
+ dst += offset;
if (multibytep)
{
ASSURE_DESTINATION (ccl->produced * 2);
{
category = coding_priorities[i];
this = coding_categories + category;
+ /* Some of this->detector (e.g. detect_coding_sjis)
+ require this information. */
+ coding->id = this->id;
if (this->id < 0)
{
/* No coding system of this category is defined. */
[ -LENGTH ANNOTATION_MASK NCHARS NBYTES METHOD [ COMPONENTS... ] ]
*/
-static inline void
+static void
produce_composition (struct coding_system *coding, int *charbuf, ptrdiff_t pos)
{
int len;
[ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ]
*/
-static inline void
+static void
produce_charset (struct coding_system *coding, int *charbuf, ptrdiff_t pos)
{
ptrdiff_t from = pos - charbuf[2];
position of a composition after POS (if any) or to LIMIT, and
return BUF. */
-static inline int *
+static int *
handle_composition_annotation (ptrdiff_t pos, ptrdiff_t limit,
struct coding_system *coding, int *buf,
ptrdiff_t *stop)
If the property value is nil, set *STOP to the position where the
property value is non-nil (limiting by LIMIT), and return BUF. */
-static inline int *
+static int *
handle_charset_annotation (ptrdiff_t pos, ptrdiff_t limit,
struct coding_system *coding, int *buf,
ptrdiff_t *stop)
return CODING_ID_NAME (id);
}
+#if defined (WINDOWSNT) || defined (CYGWIN)
+
+Lisp_Object
+from_unicode (Lisp_Object str)
+{
+ CHECK_STRING (str);
+ if (!STRING_MULTIBYTE (str) &&
+ SBYTES (str) & 1)
+ {
+ str = Fsubstring (str, make_number (0), make_number (-1));
+ }
+
+ return code_convert_string_norecord (str, Qutf_16le, 0);
+}
+
+wchar_t *
+to_unicode (Lisp_Object str, Lisp_Object *buf)
+{
+ *buf = code_convert_string_norecord (str, Qutf_16le, 1);
+ /* We need to make a another copy (in addition to the one made by
+ code_convert_string_norecord) to ensure that the final string is
+ _doubly_ zero terminated --- that is, that the string is
+ terminated by two zero bytes and one utf-16le null character.
+ Because strings are already terminated with a single zero byte,
+ we just add one additional zero. */
+ str = make_uninit_string (SBYTES (*buf) + 1);
+ memcpy (SDATA (str), SDATA (*buf), SBYTES (*buf));
+ SDATA (str) [SBYTES (*buf)] = '\0';
+ *buf = str;
+ return WCSDATA (*buf);
+}
+
+#endif /* WINDOWSNT || CYGWIN */
+
\f
#ifdef emacs
/*** 8. Emacs Lisp library functions ***/
ptrdiff_t from, to;
ptrdiff_t from_byte, to_byte;
- CHECK_NUMBER_COERCE_MARKER (start);
- CHECK_NUMBER_COERCE_MARKER (end);
-
validate_region (&start, &end);
from = XINT (start), to = XINT (end);
from_byte = CHAR_TO_BYTE (from);
}
-static inline bool
+static bool
char_encodable_p (int c, Lisp_Object attrs)
{
Lisp_Object tail;
ptrdiff_t from, from_byte, to, to_byte;
Lisp_Object src_object;
- CHECK_NUMBER_COERCE_MARKER (start);
- CHECK_NUMBER_COERCE_MARKER (end);
if (NILP (coding_system))
coding_system = Qno_conversion;
else
DEFSYM (Qutf_8, "utf-8");
DEFSYM (Qutf_8_emacs, "utf-8-emacs");
+#if defined (WINDOWSNT) || defined (CYGWIN)
+ /* No, not utf-16-le: that one has a BOM. */
+ DEFSYM (Qutf_16le, "utf-16le");
+#endif
+
DEFSYM (Qutf_16, "utf-16");
DEFSYM (Qbig, "big");
DEFSYM (Qlittle, "little");