character sequence of emacs-utf-8 to a byte sequence of a specific
coding system.
- In Emacs Lisp, a coding system is represented by a Lisp symbol. In
- C level, a coding system is represented by a vector of attributes
+ In Emacs Lisp, a coding system is represented by a Lisp symbol. On
+ the C level, a coding system is represented by a vector of attributes
stored in the hash table Vcharset_hash_table. The conversion from
coding system symbol to attributes vector is done by looking up
Vcharset_hash_table by the symbol.
static void decode_coding_raw_text (struct coding_system *);
static int encode_coding_raw_text (struct coding_system *);
-static void coding_set_source (struct coding_system *);
-static void coding_set_destination (struct coding_system *);
+static EMACS_INT coding_set_source (struct coding_system *);
+static EMACS_INT coding_set_destination (struct coding_system *);
static void coding_alloc_by_realloc (struct coding_system *, EMACS_INT);
static void coding_alloc_by_making_gap (struct coding_system *,
EMACS_INT, EMACS_INT);
static unsigned char *alloc_destination (struct coding_system *,
EMACS_INT, unsigned char *);
static void setup_iso_safe_charsets (Lisp_Object);
-static unsigned char *encode_designation_at_bol (struct coding_system *,
- int *, unsigned char *);
+static int encode_designation_at_bol (struct coding_system *,
+ int *, int *, unsigned char *);
static int detect_eol (const unsigned char *,
EMACS_INT, enum coding_category);
static Lisp_Object adjust_coding_eol_type (struct coding_system *, int);
static Lisp_Object get_translation_table (Lisp_Object, int, int *);
static Lisp_Object get_translation (Lisp_Object, int *, int *);
static int produce_chars (struct coding_system *, Lisp_Object, int);
-static INLINE void produce_charset (struct coding_system *, int *,
+static inline void produce_charset (struct coding_system *, int *,
EMACS_INT);
static void produce_annotation (struct coding_system *, EMACS_INT);
static int decode_coding (struct coding_system *);
-static INLINE int *handle_composition_annotation (EMACS_INT, EMACS_INT,
+static inline int *handle_composition_annotation (EMACS_INT, EMACS_INT,
struct coding_system *,
int *, EMACS_INT *);
-static INLINE int *handle_charset_annotation (EMACS_INT, EMACS_INT,
+static inline int *handle_charset_annotation (EMACS_INT, EMACS_INT,
struct coding_system *,
int *, EMACS_INT *);
static void consume_chars (struct coding_system *, Lisp_Object, int);
static int encode_coding (struct coding_system *);
static Lisp_Object make_conversion_work_buffer (int);
static Lisp_Object code_conversion_restore (Lisp_Object);
-static INLINE int char_encodable_p (int, Lisp_Object);
+static inline int char_encodable_p (int, Lisp_Object);
static Lisp_Object make_subsidiaries (Lisp_Object);
static void
}
}
-/* This wrapper macro is used to preserve validity of pointers into
- buffer text across calls to decode_char, which could cause
- relocation of buffers if it loads a charset map, because loading a
- charset map allocates large structures. */
+/* These wrapper macros are used to preserve validity of pointers into
+ buffer text across calls to decode_char, encode_char, etc, which
+ could cause relocation of buffers if it loads a charset map,
+ because loading a charset map allocates large structures. */
+
#define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
do { \
+ EMACS_INT offset; \
+ \
charset_map_loaded = 0; \
c = DECODE_CHAR (charset, code); \
- if (charset_map_loaded) \
+ if (charset_map_loaded \
+ && (offset = coding_set_source (coding))) \
{ \
- const unsigned char *orig = coding->source; \
- EMACS_INT offset; \
- \
- coding_set_source (coding); \
- offset = coding->source - orig; \
src += offset; \
src_base += offset; \
src_end += offset; \
} \
} while (0)
+#define CODING_ENCODE_CHAR(coding, dst, dst_end, charset, c, code) \
+ do { \
+ EMACS_INT offset; \
+ \
+ charset_map_loaded = 0; \
+ code = ENCODE_CHAR (charset, c); \
+ if (charset_map_loaded \
+ && (offset = coding_set_destination (coding))) \
+ { \
+ dst += offset; \
+ dst_end += offset; \
+ } \
+ } while (0)
+
+#define CODING_CHAR_CHARSET(coding, dst, dst_end, c, charset_list, code_return, charset) \
+ do { \
+ EMACS_INT offset; \
+ \
+ charset_map_loaded = 0; \
+ charset = char_charset (c, charset_list, code_return); \
+ if (charset_map_loaded \
+ && (offset = coding_set_destination (coding))) \
+ { \
+ dst += offset; \
+ dst_end += offset; \
+ } \
+ } while (0)
+
+#define CODING_CHAR_CHARSET_P(coding, dst, dst_end, c, charset, result) \
+ do { \
+ EMACS_INT offset; \
+ \
+ charset_map_loaded = 0; \
+ result = CHAR_CHARSET_P (c, charset); \
+ if (charset_map_loaded \
+ && (offset = coding_set_destination (coding))) \
+ { \
+ dst += offset; \
+ dst_end += offset; \
+ } \
+ } while (0)
+
/* If there are at least BYTES length of room at dst, allocate memory
for coding->destination and update dst and dst_end. We don't have
| ((p)[-1] & 0x3F))))
-static void
+/* Update coding->source from coding->src_object, and return how many
+ bytes coding->source was changed. */
+
+static EMACS_INT
coding_set_source (struct coding_system *coding)
{
+ const unsigned char *orig = coding->source;
+
if (BUFFERP (coding->src_object))
{
struct buffer *buf = XBUFFER (coding->src_object);
/* Otherwise, the source is C string and is never relocated
automatically. Thus we don't have to update anything. */
}
+ return coding->source - orig;
}
-static void
+
+/* Update coding->destination from coding->dst_object, and return how
+ many bytes coding->destination was changed. */
+
+static EMACS_INT
coding_set_destination (struct coding_system *coding)
{
+ const unsigned char *orig = coding->destination;
+
if (BUFFERP (coding->dst_object))
{
- if (coding->src_pos < 0)
+ if (BUFFERP (coding->src_object) && coding->src_pos < 0)
{
coding->destination = BEG_ADDR + coding->dst_pos_byte - BEG_BYTE;
coding->dst_bytes = (GAP_END_ADDR
/* Otherwise, the destination is C string and is never relocated
automatically. Thus we don't have to update anything. */
}
+ return coding->destination - orig;
}
static void
coding_alloc_by_realloc (struct coding_system *coding, EMACS_INT bytes)
{
- if (coding->dst_bytes >= MOST_POSITIVE_FIXNUM - bytes)
- error ("Maximum size of buffer or string exceeded");
+ if (STRING_BYTES_BOUND - coding->dst_bytes < bytes)
+ string_overflow ();
coding->destination = (unsigned char *) xrealloc (coding->destination,
coding->dst_bytes + bytes);
coding->dst_bytes += bytes;
if (preferred_charset_id >= 0)
{
+ int result;
+
charset = CHARSET_FROM_ID (preferred_charset_id);
- if (CHAR_CHARSET_P (c, charset))
+ CODING_CHAR_CHARSET_P (coding, dst, dst_end, c, charset, result);
+ if (result)
code = ENCODE_CHAR (charset, c);
else
- charset = char_charset (c, charset_list, &code);
+ CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
+ &code, charset);
}
else
- charset = char_charset (c, charset_list, &code);
+ CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
+ &code, charset);
if (! charset)
{
c = coding->default_char;
EMIT_ONE_ASCII_BYTE (c);
continue;
}
- charset = char_charset (c, charset_list, &code);
+ CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
+ &code, charset);
}
dimension = CHARSET_DIMENSION (charset);
emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
#define ENCODE_ISO_CHARACTER(charset, c) \
do { \
- int code = ENCODE_CHAR ((charset), (c)); \
+ int code; \
+ CODING_ENCODE_CHAR (coding, dst, dst_end, (charset), (c), code); \
\
if (CHARSET_DIMENSION (charset) == 1) \
ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \
/* Produce designation sequences of charsets in the line started from
- SRC to a place pointed by DST, and return updated DST.
+ CHARBUF to a place pointed by DST, and return the number of
+ produced bytes. DST should not directly point a buffer text area
+ which may be relocated by char_charset call.
If the current block ends before any end-of-line, we may fail to
find all the necessary designations. */
-static unsigned char *
-encode_designation_at_bol (struct coding_system *coding, int *charbuf,
+static int
+encode_designation_at_bol (struct coding_system *coding,
+ int *charbuf, int *charbuf_end,
unsigned char *dst)
{
+ unsigned char *orig = dst;
struct charset *charset;
/* Table of charsets to be designated to each graphic register. */
int r[4];
for (reg = 0; reg < 4; reg++)
r[reg] = -1;
- while (found < 4)
+ while (charbuf < charbuf_end && found < 4)
{
int id;
ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
}
- return dst;
+ return dst - orig;
}
/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
if (bol_designation)
{
- unsigned char *dst_prev = dst;
-
/* We have to produce designation sequences if any now. */
- dst = encode_designation_at_bol (coding, charbuf, dst);
- bol_designation = 0;
+ unsigned char desig_buf[16];
+ int nbytes;
+ EMACS_INT offset;
+
+ charset_map_loaded = 0;
+ nbytes = encode_designation_at_bol (coding, charbuf, charbuf_end,
+ desig_buf);
+ if (charset_map_loaded
+ && (offset = coding_set_destination (coding)))
+ {
+ dst += offset;
+ dst_end += offset;
+ }
+ memcpy (dst, desig_buf, nbytes);
+ dst += nbytes;
/* We are sure that designation sequences are all ASCII bytes. */
- produced_chars += dst - dst_prev;
+ produced_chars += nbytes;
+ bol_designation = 0;
+ ASSURE_DESTINATION (safe_room);
}
c = *charbuf++;
if (preferred_charset_id >= 0)
{
+ int result;
+
charset = CHARSET_FROM_ID (preferred_charset_id);
- if (! CHAR_CHARSET_P (c, charset))
- charset = char_charset (c, charset_list, NULL);
+ CODING_CHAR_CHARSET_P (coding, dst, dst_end, c, charset, result);
+ if (! result)
+ CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
+ NULL, charset);
}
else
- charset = char_charset (c, charset_list, NULL);
+ CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
+ NULL, charset);
if (!charset)
{
if (coding->mode & CODING_MODE_SAFE_ENCODING)
else
{
c = coding->default_char;
- charset = char_charset (c, charset_list, NULL);
+ CODING_CHAR_CHARSET (coding, dst, dst_end, c,
+ charset_list, NULL, charset);
}
}
ENCODE_ISO_CHARACTER (charset, c);
else
{
unsigned code;
- struct charset *charset = char_charset (c, charset_list, &code);
+ struct charset *charset;
+ CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
+ &code, charset);
if (!charset)
{
else
{
c = coding->default_char;
- charset = char_charset (c, charset_list, &code);
+ CODING_CHAR_CHARSET (coding, dst, dst_end, c,
+ charset_list, &code, charset);
}
}
if (code == CHARSET_INVALID_CODE (charset))
else
{
unsigned code;
- struct charset *charset = char_charset (c, charset_list, &code);
+ struct charset *charset;
+ CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
+ &code, charset);
if (! charset)
{
else
{
c = coding->default_char;
- charset = char_charset (c, charset_list, &code);
+ CODING_CHAR_CHARSET (coding, dst, dst_end, c,
+ charset_list, &code, charset);
}
}
if (code == CHARSET_INVALID_CODE (charset))
&& coding->mode & CODING_MODE_LAST_BLOCK)
ccl->last_block = 1;
- while (charbuf < charbuf_end)
+ do
{
ccl_driver (ccl, charbuf, destination_charbuf,
charbuf_end - charbuf, 1024, charset_list);
|| ccl->status == CCL_STAT_INVALID_CMD)
break;
}
+ while (charbuf < charbuf_end);
switch (ccl->status)
{
}
else
{
- charset = char_charset (c, charset_list, &code);
+ CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
+ &code, charset);
+
if (charset)
{
if (CHARSET_DIMENSION (charset) == 1)
Lisp_Object
coding_system_charset_list (Lisp_Object coding_system)
{
- int id;
+ ptrdiff_t id;
Lisp_Object attrs, charset_list;
CHECK_CODING_SYSTEM_GET_ID (coding_system, id);
break;
}
- if (dst + MAX_MULTIBYTE_LENGTH * to_nchars > dst_end)
+ if ((dst_end - dst) / MAX_MULTIBYTE_LENGTH < to_nchars)
{
+ if (((min (PTRDIFF_MAX, SIZE_MAX) - (buf_end - buf))
+ / MAX_MULTIBYTE_LENGTH)
+ < to_nchars)
+ memory_full (SIZE_MAX);
dst = alloc_destination (coding,
buf_end - buf
+ MAX_MULTIBYTE_LENGTH * to_nchars,
[ -LENGTH ANNOTATION_MASK NCHARS NBYTES METHOD [ COMPONENTS... ] ]
*/
-static INLINE void
+static inline void
produce_composition (struct coding_system *coding, int *charbuf, EMACS_INT pos)
{
int len;
[ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ]
*/
-static INLINE void
+static inline void
produce_charset (struct coding_system *coding, int *charbuf, EMACS_INT pos)
{
EMACS_INT from = pos - charbuf[2];
position of a composition after POS (if any) or to LIMIT, and
return BUF. */
-static INLINE int *
+static inline int *
handle_composition_annotation (EMACS_INT pos, EMACS_INT limit,
struct coding_system *coding, int *buf,
EMACS_INT *stop)
If the property value is nil, set *STOP to the position where the
property value is non-nil (limiting by LIMIT), and return BUF. */
-static INLINE int *
+static inline int *
handle_charset_annotation (EMACS_INT pos, EMACS_INT limit,
struct coding_system *coding, int *buf,
EMACS_INT *stop)
}
else if (EQ (dst_object, Qt))
{
+ ptrdiff_t dst_bytes = max (1, coding->src_chars);
coding->dst_object = Qnil;
- coding->dst_bytes = coding->src_chars;
- if (coding->dst_bytes == 0)
- coding->dst_bytes = 1;
- coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
+ coding->destination = (unsigned char *) xmalloc (dst_bytes);
+ coding->dst_bytes = dst_bytes;
coding->dst_multibyte = 0;
}
else
Lisp_Object attrs, eol_type;
Lisp_Object val = Qnil;
struct coding_system coding;
- int id;
+ ptrdiff_t id;
struct coding_detection_info detect_info;
enum coding_category base_category;
int null_byte_found = 0, eight_bit_found = 0;
}
-static INLINE int
+static inline int
char_encodable_p (int c, Lisp_Object attrs)
{
Lisp_Object tail;
(Lisp_Object string, Lisp_Object coding_system, Lisp_Object nocopy, Lisp_Object buffer)
{
return code_convert_string (string, coding_system, buffer,
- 1, ! NILP (nocopy), 1);
+ 1, ! NILP (nocopy), 0);
}
\f
= TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
Lisp_Object coding_system = CODING_ID_NAME (terminal_coding->id);
- /* For backward compatibility, return nil if it is `undecided'. */
+ /* For backward compatibility, return nil if it is `undecided'. */
return (! EQ (coding_system, Qundecided) ? coding_system : Qnil);
}
contents of BUFFER instead of reading the file.
usage: (find-operation-coding-system OPERATION ARGUMENTS...) */)
- (size_t nargs, Lisp_Object *args)
+ (ptrdiff_t nargs, Lisp_Object *args)
{
Lisp_Object operation, target_idx, target, val;
register Lisp_Object chain;
all but the first one are ignored.
usage: (set-coding-system-priority &rest coding-systems) */)
- (size_t nargs, Lisp_Object *args)
+ (ptrdiff_t nargs, Lisp_Object *args)
{
- size_t i, j;
+ ptrdiff_t i, j;
int changed[coding_category_max];
enum coding_category priorities[coding_category_max];
make_subsidiaries (Lisp_Object base)
{
Lisp_Object subsidiaries;
- int base_name_len = SBYTES (SYMBOL_NAME (base));
+ ptrdiff_t base_name_len = SBYTES (SYMBOL_NAME (base));
char *buf = (char *) alloca (base_name_len + 6);
int i;
subsidiaries = Fmake_vector (make_number (3), Qnil);
for (i = 0; i < 3; i++)
{
- memcpy (buf + base_name_len, suffixes[i], strlen (suffixes[i]) + 1);
+ strcpy (buf + base_name_len, suffixes[i]);
ASET (subsidiaries, i, intern (buf));
}
return subsidiaries;
Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
doc: /* For internal use only.
usage: (define-coding-system-internal ...) */)
- (size_t nargs, Lisp_Object *args)
+ (ptrdiff_t nargs, Lisp_Object *args)
{
Lisp_Object name;
Lisp_Object spec_vec; /* [ ATTRS ALIASE EOL_TYPE ] */