/* Coding system handler (conversion, detection, etc).
- Copyright (C) 2001-2011 Free Software Foundation, Inc.
+ Copyright (C) 2001-2012 Free Software Foundation, Inc.
Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
2005, 2006, 2007, 2008, 2009, 2010, 2011
National Institute of Advanced Industrial Science and Technology (AIST)
static void decode_coding_raw_text (struct coding_system *);
static int encode_coding_raw_text (struct coding_system *);
-static void coding_set_source (struct coding_system *);
-static void coding_set_destination (struct coding_system *);
+static EMACS_INT coding_set_source (struct coding_system *);
+static EMACS_INT coding_set_destination (struct coding_system *);
static void coding_alloc_by_realloc (struct coding_system *, EMACS_INT);
static void coding_alloc_by_making_gap (struct coding_system *,
EMACS_INT, EMACS_INT);
static unsigned char *alloc_destination (struct coding_system *,
EMACS_INT, unsigned char *);
static void setup_iso_safe_charsets (Lisp_Object);
-static unsigned char *encode_designation_at_bol (struct coding_system *,
- int *, unsigned char *);
+static int encode_designation_at_bol (struct coding_system *,
+ int *, int *, unsigned char *);
static int detect_eol (const unsigned char *,
EMACS_INT, enum coding_category);
static Lisp_Object adjust_coding_eol_type (struct coding_system *, int);
}
}
-/* This wrapper macro is used to preserve validity of pointers into
- buffer text across calls to decode_char, which could cause
- relocation of buffers if it loads a charset map, because loading a
- charset map allocates large structures. */
+/* These wrapper macros are used to preserve validity of pointers into
+ buffer text across calls to decode_char, encode_char, etc, which
+ could cause relocation of buffers if it loads a charset map,
+ because loading a charset map allocates large structures. */
+
#define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
do { \
+ EMACS_INT offset; \
+ \
charset_map_loaded = 0; \
c = DECODE_CHAR (charset, code); \
- if (charset_map_loaded) \
+ if (charset_map_loaded \
+ && (offset = coding_set_source (coding))) \
{ \
- const unsigned char *orig = coding->source; \
- EMACS_INT offset; \
- \
- coding_set_source (coding); \
- offset = coding->source - orig; \
src += offset; \
src_base += offset; \
src_end += offset; \
} \
} while (0)
+#define CODING_ENCODE_CHAR(coding, dst, dst_end, charset, c, code) \
+ do { \
+ EMACS_INT offset; \
+ \
+ charset_map_loaded = 0; \
+ code = ENCODE_CHAR (charset, c); \
+ if (charset_map_loaded \
+ && (offset = coding_set_destination (coding))) \
+ { \
+ dst += offset; \
+ dst_end += offset; \
+ } \
+ } while (0)
+
+#define CODING_CHAR_CHARSET(coding, dst, dst_end, c, charset_list, code_return, charset) \
+ do { \
+ EMACS_INT offset; \
+ \
+ charset_map_loaded = 0; \
+ charset = char_charset (c, charset_list, code_return); \
+ if (charset_map_loaded \
+ && (offset = coding_set_destination (coding))) \
+ { \
+ dst += offset; \
+ dst_end += offset; \
+ } \
+ } while (0)
+
+#define CODING_CHAR_CHARSET_P(coding, dst, dst_end, c, charset, result) \
+ do { \
+ EMACS_INT offset; \
+ \
+ charset_map_loaded = 0; \
+ result = CHAR_CHARSET_P (c, charset); \
+ if (charset_map_loaded \
+ && (offset = coding_set_destination (coding))) \
+ { \
+ dst += offset; \
+ dst_end += offset; \
+ } \
+ } while (0)
+
/* If there are at least BYTES length of room at dst, allocate memory
for coding->destination and update dst and dst_end. We don't have
| ((p)[-1] & 0x3F))))
-static void
+/* Update coding->source from coding->src_object, and return how many
+ bytes coding->source was changed. */
+
+static EMACS_INT
coding_set_source (struct coding_system *coding)
{
+ const unsigned char *orig = coding->source;
+
if (BUFFERP (coding->src_object))
{
struct buffer *buf = XBUFFER (coding->src_object);
/* Otherwise, the source is C string and is never relocated
automatically. Thus we don't have to update anything. */
}
+ return coding->source - orig;
}
-static void
+
+/* Update coding->destination from coding->dst_object, and return how
+ many bytes coding->destination was changed. */
+
+static EMACS_INT
coding_set_destination (struct coding_system *coding)
{
+ const unsigned char *orig = coding->destination;
+
if (BUFFERP (coding->dst_object))
{
- if (coding->src_pos < 0)
+ if (BUFFERP (coding->src_object) && coding->src_pos < 0)
{
coding->destination = BEG_ADDR + coding->dst_pos_byte - BEG_BYTE;
coding->dst_bytes = (GAP_END_ADDR
/* Otherwise, the destination is C string and is never relocated
automatically. Thus we don't have to update anything. */
}
+ return coding->destination - orig;
}
if (preferred_charset_id >= 0)
{
+ int result;
+
charset = CHARSET_FROM_ID (preferred_charset_id);
- if (CHAR_CHARSET_P (c, charset))
+ CODING_CHAR_CHARSET_P (coding, dst, dst_end, c, charset, result);
+ if (result)
code = ENCODE_CHAR (charset, c);
else
- charset = char_charset (c, charset_list, &code);
+ CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
+ &code, charset);
}
else
- charset = char_charset (c, charset_list, &code);
+ CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
+ &code, charset);
if (! charset)
{
c = coding->default_char;
EMIT_ONE_ASCII_BYTE (c);
continue;
}
- charset = char_charset (c, charset_list, &code);
+ CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
+ &code, charset);
}
dimension = CHARSET_DIMENSION (charset);
emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
#define ENCODE_ISO_CHARACTER(charset, c) \
do { \
- int code = ENCODE_CHAR ((charset), (c)); \
+ int code; \
+ CODING_ENCODE_CHAR (coding, dst, dst_end, (charset), (c), code); \
\
if (CHARSET_DIMENSION (charset) == 1) \
ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \
/* Produce designation sequences of charsets in the line started from
- SRC to a place pointed by DST, and return updated DST.
+ CHARBUF to a place pointed by DST, and return the number of
+ produced bytes. DST should not directly point a buffer text area
+ which may be relocated by char_charset call.
If the current block ends before any end-of-line, we may fail to
find all the necessary designations. */
-static unsigned char *
-encode_designation_at_bol (struct coding_system *coding, int *charbuf,
+static int
+encode_designation_at_bol (struct coding_system *coding,
+ int *charbuf, int *charbuf_end,
unsigned char *dst)
{
+ unsigned char *orig = dst;
struct charset *charset;
/* Table of charsets to be designated to each graphic register. */
int r[4];
for (reg = 0; reg < 4; reg++)
r[reg] = -1;
- while (found < 4)
+ while (charbuf < charbuf_end && found < 4)
{
int id;
ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
}
- return dst;
+ return dst - orig;
}
/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
if (bol_designation)
{
- unsigned char *dst_prev = dst;
-
/* We have to produce designation sequences if any now. */
- dst = encode_designation_at_bol (coding, charbuf, dst);
- bol_designation = 0;
+ unsigned char desig_buf[16];
+ int nbytes;
+ EMACS_INT offset;
+
+ charset_map_loaded = 0;
+ nbytes = encode_designation_at_bol (coding, charbuf, charbuf_end,
+ desig_buf);
+ if (charset_map_loaded
+ && (offset = coding_set_destination (coding)))
+ {
+ dst += offset;
+ dst_end += offset;
+ }
+ memcpy (dst, desig_buf, nbytes);
+ dst += nbytes;
/* We are sure that designation sequences are all ASCII bytes. */
- produced_chars += dst - dst_prev;
+ produced_chars += nbytes;
+ bol_designation = 0;
+ ASSURE_DESTINATION (safe_room);
}
c = *charbuf++;
if (preferred_charset_id >= 0)
{
+ int result;
+
charset = CHARSET_FROM_ID (preferred_charset_id);
- if (! CHAR_CHARSET_P (c, charset))
- charset = char_charset (c, charset_list, NULL);
+ CODING_CHAR_CHARSET_P (coding, dst, dst_end, c, charset, result);
+ if (! result)
+ CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
+ NULL, charset);
}
else
- charset = char_charset (c, charset_list, NULL);
+ CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
+ NULL, charset);
if (!charset)
{
if (coding->mode & CODING_MODE_SAFE_ENCODING)
else
{
c = coding->default_char;
- charset = char_charset (c, charset_list, NULL);
+ CODING_CHAR_CHARSET (coding, dst, dst_end, c,
+ charset_list, NULL, charset);
}
}
ENCODE_ISO_CHARACTER (charset, c);
else
{
unsigned code;
- struct charset *charset = char_charset (c, charset_list, &code);
+ struct charset *charset;
+ CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
+ &code, charset);
if (!charset)
{
else
{
c = coding->default_char;
- charset = char_charset (c, charset_list, &code);
+ CODING_CHAR_CHARSET (coding, dst, dst_end, c,
+ charset_list, &code, charset);
}
}
if (code == CHARSET_INVALID_CODE (charset))
else
{
unsigned code;
- struct charset *charset = char_charset (c, charset_list, &code);
+ struct charset *charset;
+ CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
+ &code, charset);
if (! charset)
{
else
{
c = coding->default_char;
- charset = char_charset (c, charset_list, &code);
+ CODING_CHAR_CHARSET (coding, dst, dst_end, c,
+ charset_list, &code, charset);
}
}
if (code == CHARSET_INVALID_CODE (charset))
&& coding->mode & CODING_MODE_LAST_BLOCK)
ccl->last_block = 1;
- while (charbuf < charbuf_end)
+ do
{
ccl_driver (ccl, charbuf, destination_charbuf,
charbuf_end - charbuf, 1024, charset_list);
|| ccl->status == CCL_STAT_INVALID_CMD)
break;
}
+ while (charbuf < charbuf_end);
switch (ccl->status)
{
}
else
{
- charset = char_charset (c, charset_list, &code);
+ CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
+ &code, charset);
+
if (charset)
{
if (CHARSET_DIMENSION (charset) == 1)
Lisp_Object
coding_system_charset_list (Lisp_Object coding_system)
{
- int id;
+ ptrdiff_t id;
Lisp_Object attrs, charset_list;
CHECK_CODING_SYSTEM_GET_ID (coding_system, id);
break;
}
- if (dst + MAX_MULTIBYTE_LENGTH * to_nchars > dst_end)
+ if ((dst_end - dst) / MAX_MULTIBYTE_LENGTH < to_nchars)
{
+ if (((min (PTRDIFF_MAX, SIZE_MAX) - (buf_end - buf))
+ / MAX_MULTIBYTE_LENGTH)
+ < to_nchars)
+ memory_full (SIZE_MAX);
dst = alloc_destination (coding,
buf_end - buf
+ MAX_MULTIBYTE_LENGTH * to_nchars,
}
else if (EQ (dst_object, Qt))
{
+ ptrdiff_t dst_bytes = max (1, coding->src_chars);
coding->dst_object = Qnil;
- coding->dst_bytes = coding->src_chars;
- if (coding->dst_bytes == 0)
- coding->dst_bytes = 1;
- coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
+ coding->destination = (unsigned char *) xmalloc (dst_bytes);
+ coding->dst_bytes = dst_bytes;
coding->dst_multibyte = 0;
}
else
Lisp_Object attrs, eol_type;
Lisp_Object val = Qnil;
struct coding_system coding;
- int id;
+ ptrdiff_t id;
struct coding_detection_info detect_info;
enum coding_category base_category;
int null_byte_found = 0, eight_bit_found = 0;
}
positions = Qnil;
+ charset_map_loaded = 0;
while (1)
{
int c;
}
from++;
+ if (charset_map_loaded && NILP (string))
+ {
+ p = CHAR_POS_ADDR (from);
+ pend = CHAR_POS_ADDR (to);
+ if (from < GPT && to >= GPT)
+ stop = GPT_ADDR;
+ else
+ stop = pend;
+ charset_map_loaded = 0;
+ }
}
return (NILP (count) ? Fcar (positions) : Fnreverse (positions));
= TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
Lisp_Object coding_system = CODING_ID_NAME (terminal_coding->id);
- /* For backward compatibility, return nil if it is `undecided'. */
+ /* For backward compatibility, return nil if it is `undecided'. */
return (! EQ (coding_system, Qundecided) ? coding_system : Qnil);
}
{
Lisp_Object bom;
- CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
-
if (nargs < coding_arg_utf8_max)
goto short_args;
CHECK_CODING_SYSTEM (val);
}
ASET (attrs, coding_attr_utf_bom, bom);
+ if (NILP (bom))
+ CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
category = (CONSP (bom) ? coding_category_utf_8_auto
: NILP (bom) ? coding_category_utf_8_nosig