/* Coding system handler (conversion, detection, etc).
Copyright (C) 2001, 2002, 2003, 2004, 2005,
- 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
+ 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
- 2005, 2006, 2007, 2008, 2009
+ 2005, 2006, 2007, 2008, 2009, 2010, 2011
National Institute of Advanced Industrial Science and Technology (AIST)
Registration Number H14PRO021
Copyright (C) 2003
while (1)
{
- /* Get one byte from the source. If the souce is exausted, jump
+ /* Get one byte from the source. If the source is exhausted, jump
to no_more_source:. */
ONE_MORE_BYTE (c);
return 0;
no_more_source:
- /* The source exausted successfully. */
+ /* The source exhausted successfully. */
detect_info->found |= found;
return 1;
}
#include <config.h>
#include <stdio.h>
+#include <setjmp.h>
#include "lisp.h"
#include "buffer.h"
on output. */
#define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
-/* If set, do not encode unsafe charactes on output. */
+/* If set, do not encode unsafe characters on output. */
#define CODING_ISO_FLAG_SAFE 0x0800
/* If set, extra latin codes (128..159) are accepted as a valid code
static Lisp_Object Vcoding_category_list;
/* Table of coding categories (Lisp symbols). This variable is for
- internal use oly. */
+ internal use only. */
static Lisp_Object Vcoding_category_table;
/* Table of coding-categories ordered by priority. */
} while (0)
-/* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2. */
+/* Like EMIT_ONE_ASCII_BYTE but store two bytes; C1 and C2. */
#define EMIT_TWO_ASCII_BYTES(c1, c2) \
do { \
case CODING_RESULT_INSUFFICIENT_MEM:
Vlast_code_conversion_error = Qinsufficient_memory;
break;
+ case CODING_RESULT_INSUFFICIENT_DST:
+ /* Don't record this error in Vlast_code_conversion_error
+ because it happens just temporarily and is resolved when the
+ whole conversion is finished. */
+ break;
case CODING_RESULT_SUCCESS:
break;
default:
}
}
+/* This wrapper macro is used to preserve validity of pointers into
+ buffer text across calls to decode_char, which could cause
+ relocation of buffers if it loads a charset map, because loading a
+ charset map allocates large structures. */
#define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
do { \
charset_map_loaded = 0; \
METHOD is one of enum composition_method.
- Optionnal COMPOSITION-COMPONENTS are characters and composition
+ Optional COMPOSITION-COMPONENTS are characters and composition
rules.
In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
{
ASSURE_DESTINATION (safe_room);
c = *charbuf++;
- if (c >= MAX_UNICODE_CHAR)
+ if (c > MAX_UNICODE_CHAR)
c = coding->default_char;
if (c < 0x10000)
CHARS is 0xA0 plus a number of characters composed by this
data,
- COMPONENTs are characters of multibye form or composition
+ COMPONENTs are characters of multibyte form or composition
rules encoded by two-byte of ASCII codes.
In addition, for backward compatibility, the following formats are
}
else
{
- int more_bytes = emacs_mule_bytes[*src_base] - 1;
+ int more_bytes = emacs_mule_bytes[c] - 1;
while (more_bytes > 0)
{
/* Parse emacs-mule multibyte sequence at SRC and return the decoded
character. If CMP_STATUS indicates that we must expect MSEQ or
RULE described above, decode it and return the negative value of
- the deocded character or rule. If an invalid byte is found, return
+ the decoded character or rule. If an invalid byte is found, return
-1. If SRC is too short, return -2. */
int
const unsigned char *src_end = coding->source + coding->src_bytes;
const unsigned char *src_base = src;
int multibytep = coding->src_multibyte;
- struct charset *charset;
+ int charset_id;
unsigned code;
int c;
int consumed_chars = 0;
if (c < 0)
{
c = -c;
- charset = emacs_mule_charset[0];
+ charset_id = emacs_mule_charset[0];
}
else
{
switch (emacs_mule_bytes[c])
{
case 2:
- if (! (charset = emacs_mule_charset[c]))
+ if ((charset_id = emacs_mule_charset[c]) < 0)
goto invalid_code;
ONE_MORE_BYTE (c);
if (c < 0xA0)
|| c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
{
ONE_MORE_BYTE (c);
- if (c < 0xA0 || ! (charset = emacs_mule_charset[c]))
+ if (c < 0xA0 || (charset_id = emacs_mule_charset[c]) < 0)
goto invalid_code;
ONE_MORE_BYTE (c);
if (c < 0xA0)
}
else
{
- if (! (charset = emacs_mule_charset[c]))
+ if ((charset_id = emacs_mule_charset[c]) < 0)
goto invalid_code;
ONE_MORE_BYTE (c);
if (c < 0xA0)
case 4:
ONE_MORE_BYTE (c);
- if (c < 0 || ! (charset = emacs_mule_charset[c]))
+ if (c < 0 || (charset_id = emacs_mule_charset[c]) < 0)
goto invalid_code;
ONE_MORE_BYTE (c);
if (c < 0xA0)
case 1:
code = c;
- charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
- ? charset_ascii : charset_eight_bit);
+ charset_id = ASCII_BYTE_P (code) ? charset_ascii : charset_eight_bit;
break;
default:
abort ();
}
- c = DECODE_CHAR (charset, code);
+ CODING_DECODE_CHAR (coding, src, src_base, src_end,
+ CHARSET_FROM_ID (charset_id), code, c);
if (c < 0)
goto invalid_code;
}
*nbytes = src - src_base;
*nchars = consumed_chars;
if (id)
- *id = charset->id;
+ *id = charset_id;
return (mseq_found ? -c : c);
no_more_source:
const unsigned char *src_end = coding->source + coding->src_bytes;
const unsigned char *src_base;
int *charbuf = coding->charbuf + coding->charbuf_used;
- /* We may produce two annocations (charset and composition) in one
- loop and one more charset annocation at the end. */
+ /* We may produce two annotations (charset and composition) in one
+ loop and one more charset annotation at the end. */
int *charbuf_end
= coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3);
int consumed_chars = 0, consumed_chars_base;
else
{
int nchars, nbytes;
+ /* emacs_mule_char can load a charset map from a file, which
+ allocates a large structure and might cause buffer text
+ to be relocated as result. Thus, we need to remember the
+ original pointer to buffer text, and fix up all related
+ pointers after the call. */
+ const unsigned char *orig = coding->source;
+ EMACS_INT offset;
c = emacs_mule_char (coding, src_base, &nbytes, &nchars, &id,
cmp_status);
+ offset = coding->source - orig;
+ if (offset)
+ {
+ src += offset;
+ src_base += offset;
+ src_end += offset;
+ }
if (c < 0)
{
if (c == -1)
cmp_status->ncomps -= nchars;
}
- /* Now if C >= 0, we found a normally encoded characer, if C <
+ /* Now if C >= 0, we found a normally encoded character, if C <
0, we found an old-style composition component character or
rule. */
/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
- Check if a text is encoded in one of ISO-2022 based codig systems.
+ Check if a text is encoded in one of ISO-2022 based coding systems.
If it is, return 1, else return 0. */
static int
int i = 1;
while (src < src_end)
{
+ src_base = src;
ONE_MORE_BYTE (c);
if (c < 0xA0)
- break;
+ {
+ src = src_base;
+ break;
+ }
i++;
}
return new_chars;
}
-/* If characers are under composition, finish the composition. */
+/* If characters are under composition, finish the composition. */
#define MAYBE_FINISH_COMPOSITION() \
do { \
if (cmp_status->state != COMPOSING_NO) \
const unsigned char *src_end = coding->source + coding->src_bytes;
const unsigned char *src_base;
int *charbuf = coding->charbuf + coding->charbuf_used;
- /* We may produce two annocations (charset and composition) in one
- loop and one more charset annocation at the end. */
+ /* We may produce two annotations (charset and composition) in one
+ loop and one more charset annotation at the end. */
int *charbuf_end
= coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3);
int consumed_chars = 0, consumed_chars_base;
while (1)
{
- int c1, c2;
+ int c1, c2, c3;
src_base = src;
consumed_chars_base = consumed_chars;
continue;
case ISO_single_shift_2_7:
+ if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS))
+ goto invalid_code;
case ISO_single_shift_2:
if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
goto invalid_code;
continue;
case '[': /* specification of direction */
- if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
+ if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION))
goto invalid_code;
/* For the moment, nested direction is not supported.
So, `coding->mode & CODING_MODE_DIRECTION' zero means
- left-to-right, and nozero means right-to-left. */
+ left-to-right, and nonzero means right-to-left. */
ONE_MORE_BYTE (c1);
switch (c1)
{
int size;
ONE_MORE_BYTE (dim);
- if (dim < 0 || dim > 4)
+ if (dim < '0' || dim > '4')
goto invalid_code;
ONE_MORE_BYTE (M);
if (M < 128)
}
/* Now we know CHARSET and 1st position code C1 of a character.
- Produce a decoded character while getting 2nd position code
- C2 if necessary. */
- c1 &= 0x7F;
+ Produce a decoded character while getting 2nd and 3rd
+ position codes C2, C3 if necessary. */
if (CHARSET_DIMENSION (charset) > 1)
{
ONE_MORE_BYTE (c2);
- if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
+ if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0)
+ || ((c1 & 0x80) != (c2 & 0x80)))
/* C2 is not in a valid range. */
goto invalid_code;
- c1 = (c1 << 8) | (c2 & 0x7F);
- if (CHARSET_DIMENSION (charset) > 2)
+ if (CHARSET_DIMENSION (charset) == 2)
+ c1 = (c1 << 8) | c2;
+ else
{
- ONE_MORE_BYTE (c2);
- if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
- /* C2 is not in a valid range. */
+ ONE_MORE_BYTE (c3);
+ if (c3 < 0x20 || (c3 >= 0x80 && c3 < 0xA0)
+ || ((c1 & 0x80) != (c3 & 0x80)))
+ /* C3 is not in a valid range. */
goto invalid_code;
- c1 = (c1 << 8) | (c2 & 0x7F);
+ c1 = (c1 << 16) | (c2 << 8) | c2;
}
}
-
+ c1 &= 0x7F7F7F;
CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
if (c < 0)
{
charset_list = CODING_ATTR_CHARSET_LIST (attrs);
coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs));
- ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
+ ascii_compatible
+ = (! NILP (CODING_ATTR_ASCII_COMPAT (attrs))
+ && ! (CODING_ISO_FLAGS (coding) & (CODING_ISO_FLAG_DESIGNATION
+ | CODING_ISO_FLAG_LOCKING_SHIFT)));
while (charbuf < charbuf_end)
{
int consumed_chars = 0;
int found = 0;
int c;
+ Lisp_Object attrs, charset_list;
+ int max_first_byte_of_2_byte_code;
+
+ CODING_GET_INFO (coding, attrs, charset_list);
+ max_first_byte_of_2_byte_code
+ = (XINT (Flength (charset_list)) > 3 ? 0xFC : 0xEF);
detect_info->checked |= CATEGORY_MASK_SJIS;
/* A coding system of this category is always ASCII compatible. */
ONE_MORE_BYTE (c);
if (c < 0x80)
continue;
- if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
+ if ((c >= 0x81 && c <= 0x9F)
+ || (c >= 0xE0 && c <= max_first_byte_of_2_byte_code))
{
ONE_MORE_BYTE (c);
if (c < 0x40 || c == 0x7F || c > 0xFC)
const unsigned char *src_end = coding->source + coding->src_bytes;
const unsigned char *src_base;
int *charbuf = coding->charbuf + coding->charbuf_used;
- /* We may produce one charset annocation in one loop and one more at
+ /* We may produce one charset annotation in one loop and one more at
the end. */
int *charbuf_end
= coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
const unsigned char *src_end = coding->source + coding->src_bytes;
const unsigned char *src_base;
int *charbuf = coding->charbuf + coding->charbuf_used;
- /* We may produce one charset annocation in one loop and one more at
+ /* We may produce one charset annotation in one loop and one more at
the end. */
int *charbuf_end
= coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
int c1, c2;
c1 = code >> 8;
- if (c1 == 0x21 || (c1 >= 0x23 && c1 < 0x25)
+ if (c1 == 0x21 || (c1 >= 0x23 && c1 <= 0x25)
+ || c1 == 0x28
|| (c1 >= 0x2C && c1 <= 0x2F) || c1 >= 0x6E)
{
JIS_TO_SJIS2 (code);
int *charbuf_end = coding->charbuf + coding->charbuf_size;
int consumed_chars = 0;
int multibytep = coding->src_multibyte;
- struct ccl_program ccl;
+ struct ccl_program *ccl = &coding->spec.ccl->ccl;
int source_charbuf[1024];
- int source_byteidx[1024];
+ int source_byteidx[1025];
Lisp_Object attrs, charset_list;
CODING_GET_INFO (coding, attrs, charset_list);
- setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
- while (src < src_end)
+ while (1)
{
const unsigned char *p = src;
- int *source, *source_end;
int i = 0;
if (multibytep)
- while (i < 1024 && p < src_end)
- {
- source_byteidx[i] = p - src;
- source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
- }
+ {
+ while (i < 1024 && p < src_end)
+ {
+ source_byteidx[i] = p - src;
+ source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
+ }
+ source_byteidx[i] = p - src;
+ }
else
while (i < 1024 && p < src_end)
source_charbuf[i++] = *p++;
if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
- ccl.last_block = 1;
-
- source = source_charbuf;
- source_end = source + i;
- while (source < source_end)
- {
- ccl_driver (&ccl, source, charbuf,
- source_end - source, charbuf_end - charbuf,
- charset_list);
- source += ccl.consumed;
- charbuf += ccl.produced;
- if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
- break;
- }
- if (source < source_end)
- src += source_byteidx[source - source_charbuf];
+ ccl->last_block = 1;
+ ccl_driver (ccl, source_charbuf, charbuf, i, charbuf_end - charbuf,
+ charset_list);
+ charbuf += ccl->produced;
+ if (multibytep)
+ src += source_byteidx[ccl->consumed];
else
- src = p;
- consumed_chars += source - source_charbuf;
-
- if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
- && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
+ src += ccl->consumed;
+ consumed_chars += ccl->consumed;
+ if (p == src_end || ccl->status != CCL_STAT_SUSPEND_BY_SRC)
break;
}
- switch (ccl.status)
+ switch (ccl->status)
{
case CCL_STAT_SUSPEND_BY_SRC:
record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
break;
case CCL_STAT_SUSPEND_BY_DST:
+ record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
break;
case CCL_STAT_QUIT:
case CCL_STAT_INVALID_CMD:
encode_coding_ccl (coding)
struct coding_system *coding;
{
- struct ccl_program ccl;
+ struct ccl_program *ccl = &coding->spec.ccl->ccl;
int multibytep = coding->dst_multibyte;
int *charbuf = coding->charbuf;
int *charbuf_end = charbuf + coding->charbuf_used;
Lisp_Object attrs, charset_list;
CODING_GET_INFO (coding, attrs, charset_list);
- setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
-
- ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
- ccl.dst_multibyte = coding->dst_multibyte;
+ if (coding->consumed_char == coding->src_chars
+ && coding->mode & CODING_MODE_LAST_BLOCK)
+ ccl->last_block = 1;
while (charbuf < charbuf_end)
{
- ccl_driver (&ccl, charbuf, destination_charbuf,
+ ccl_driver (ccl, charbuf, destination_charbuf,
charbuf_end - charbuf, 1024, charset_list);
if (multibytep)
{
- ASSURE_DESTINATION (ccl.produced * 2);
- for (i = 0; i < ccl.produced; i++)
+ ASSURE_DESTINATION (ccl->produced * 2);
+ for (i = 0; i < ccl->produced; i++)
EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
}
else
{
- ASSURE_DESTINATION (ccl.produced);
- for (i = 0; i < ccl.produced; i++)
+ ASSURE_DESTINATION (ccl->produced);
+ for (i = 0; i < ccl->produced; i++)
*dst++ = destination_charbuf[i] & 0xFF;
- produced_chars += ccl.produced;
+ produced_chars += ccl->produced;
}
- charbuf += ccl.consumed;
- if (ccl.status == CCL_STAT_QUIT
- || ccl.status == CCL_STAT_INVALID_CMD)
+ charbuf += ccl->consumed;
+ if (ccl->status == CCL_STAT_QUIT
+ || ccl->status == CCL_STAT_INVALID_CMD)
break;
}
- switch (ccl.status)
+ switch (ccl->status)
{
case CCL_STAT_SUSPEND_BY_SRC:
record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
const unsigned char *src_end = coding->source + coding->src_bytes;
const unsigned char *src_base;
int *charbuf = coding->charbuf + coding->charbuf_used;
- /* We may produce one charset annocation in one loop and one more at
+ /* We may produce one charset annotation in one loop and one more at
the end. */
int *charbuf_end
= coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
coding->max_charset_id = SCHARS (val) - 1;
coding->safe_charsets = SDATA (val);
coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
+ coding->carryover_bytes = 0;
coding_type = CODING_ATTR_TYPE (attrs);
if (EQ (coding_type, Qundecided))
}
-/* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
- does, return one of the subsidiary that has the same eol-spec as
- PARENT. Otherwise, return CODING_SYSTEM. If PARENT is nil,
- inherit end-of-line format from the system's setting
+/* If CODING_SYSTEM doesn't specify end-of-line format, return one of
+ the subsidiary that has the same eol-spec as PARENT (if it is not
+ nil and specifies end-of-line format) or the system's setting
(system_eol_type). */
Lisp_Object
parent_spec = CODING_SYSTEM_SPEC (parent);
parent_eol_type = AREF (parent_spec, 2);
+ if (VECTORP (parent_eol_type))
+ parent_eol_type = system_eol_type;
}
else
parent_eol_type = system_eol_type;
return coding_system;
}
+
+/* Check if text-conversion and eol-conversion of CODING_SYSTEM are
+ decided for writing to a process. If not, complement them, and
+ return a new coding system. */
+
+Lisp_Object
+complement_process_encoding_system (coding_system)
+ Lisp_Object coding_system;
+{
+ Lisp_Object coding_base = Qnil, eol_base = Qnil;
+ Lisp_Object spec, attrs;
+ int i;
+
+ for (i = 0; i < 3; i++)
+ {
+ if (i == 1)
+ coding_system = CDR_SAFE (Vdefault_process_coding_system);
+ else if (i == 2)
+ coding_system = preferred_coding_system ();
+ spec = CODING_SYSTEM_SPEC (coding_system);
+ if (NILP (spec))
+ continue;
+ attrs = AREF (spec, 0);
+ if (NILP (coding_base) && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
+ coding_base = CODING_ATTR_BASE_NAME (attrs);
+ if (NILP (eol_base) && ! VECTORP (AREF (spec, 2)))
+ eol_base = coding_system;
+ if (! NILP (coding_base) && ! NILP (eol_base))
+ break;
+ }
+
+ if (i > 0)
+ /* The original CODING_SYSTEM didn't specify text-conversion or
+ eol-conversion. Be sure that we return a fully complemented
+ coding system. */
+ coding_system = coding_inherit_eol_type (coding_base, eol_base);
+ return coding_system;
+}
+
+
/* Emacs has a mechanism to automatically detect a coding system if it
is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But,
it's impossible to distinguish some coding systems accurately
o coding-category-iso-7-else
The category for a coding system which has the same code range
- as ISO2022 of 7-bit environemnt but uses locking shift or
+ as ISO2022 of 7-bit environment but uses locking shift or
single shift functions. Assigned the coding-system (Lisp
symbol) `iso-2022-7bit-lock' by default.
o coding-category-iso-8-else
The category for a coding system which has the same code range
- as ISO2022 of 8-bit environemnt but uses locking shift or
+ as ISO2022 of 8-bit environment but uses locking shift or
single shift functions. Assigned the coding-system (Lisp
symbol) `iso-2022-8bit-ss2' by default.
Lisp_Object attrs;
Lisp_Object undo_list;
Lisp_Object translation_table;
+ struct ccl_spec cclspec;
int carryover;
int i;
translation_table = get_translation_table (attrs, 0, NULL);
carryover = 0;
+ if (coding->decoder == decode_coding_ccl)
+ {
+ coding->spec.ccl = &cclspec;
+ setup_ccl_program (&cclspec.ccl, CODING_CCL_DECODER (coding));
+ }
do
{
EMACS_INT pos = coding->dst_pos + coding->produced_char;
coding->charbuf[i]
= coding->charbuf[coding->charbuf_used - carryover + i];
}
- while (coding->consumed < coding->src_bytes
- && (coding->result == CODING_RESULT_SUCCESS
- || coding->result == CODING_RESULT_INVALID_SRC));
+ while (coding->result == CODING_RESULT_INSUFFICIENT_DST
+ || (coding->consumed < coding->src_bytes
+ && (coding->result == CODING_RESULT_SUCCESS
+ || coding->result == CODING_RESULT_INVALID_SRC)));
if (carryover > 0)
{
components = COMPOSITION_COMPONENTS (prop);
if (VECTORP (components))
{
- len = XVECTOR (components)->size;
+ len = XVECTOR_SIZE (components);
for (i = 0; i < len; i++)
*buf++ = XINT (AREF (components, i));
}
{
EMACS_INT bytes;
- if (coding->encoder == encode_coding_raw_text)
+ if (coding->encoder == encode_coding_raw_text
+ || coding->encoder == encode_coding_ccl)
c = *src++, pos++;
else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos += bytes;
Lisp_Object attrs;
Lisp_Object translation_table;
int max_lookup;
+ struct ccl_spec cclspec;
attrs = CODING_ID_ATTRS (coding->id);
if (coding->encoder == encode_coding_raw_text)
ALLOC_CONVERSION_WORK_AREA (coding);
+ if (coding->encoder == encode_coding_ccl)
+ {
+ coding->spec.ccl = &cclspec;
+ setup_ccl_program (&cclspec.ccl, CODING_CCL_ENCODER (coding));
+ }
do {
coding_set_source (coding);
consume_chars (coding, translation_table, max_lookup);
static int reused_workbuf_in_use;
-/* Return a working buffer of code convesion. MULTIBYTE specifies the
+/* Return a working buffer of code conversion. MULTIBYTE specifies the
multibyteness of returning buffer. */
static Lisp_Object
if (! destination)
{
record_conversion_result (coding,
- CODING_RESULT_INSUFFICIENT_DST);
+ CODING_RESULT_INSUFFICIENT_MEM);
unbind_to (count, Qnil);
return;
}
\f
/* Detect how the bytes at SRC of length SRC_BYTES are encoded. If
HIGHEST is nonzero, return the coding system of the highest
- priority among the detected coding systems. Otherwize return a
+ priority among the detected coding systems. Otherwise return a
list of detected coding systems sorted by their priorities. If
MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
multibyte form but contains only ASCII and eight-bit chars.
EMACS_INT start_byte, end_byte;
const unsigned char *p, *pbeg, *pend;
int c;
- Lisp_Object tail, elt;
+ Lisp_Object tail, elt, work_table;
if (STRINGP (start))
{
while (p < pend && ASCII_BYTE_P (*p)) p++;
while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
+ work_table = Fmake_char_table (Qnil, Qnil);
while (p < pend)
{
if (ASCII_BYTE_P (*p))
else
{
c = STRING_CHAR_ADVANCE (p);
+ if (!NILP (char_table_ref (work_table, c)))
+ /* This character was already checked. Ignore it. */
+ continue;
charset_map_loaded = 0;
for (tail = coding_attrs_list; CONSP (tail);)
p = pbeg + p_offset;
pend = pbeg + pend_offset;
}
+ char_table_set (work_table, c, Qt);
}
}
setup_coding_system (Fcheck_coding_system (coding_system), terminal_coding);
/* We had better not send unsafe characters to terminal. */
terminal_coding->mode |= CODING_MODE_SAFE_ENCODING;
- /* Characer composition should be disabled. */
+ /* Character composition should be disabled. */
terminal_coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
terminal_coding->src_multibyte = 1;
terminal_coding->dst_multibyte = 0;
CHECK_SYMBOL (coding_system);
setup_coding_system (Fcheck_coding_system (coding_system),
&safe_terminal_coding);
- /* Characer composition should be disabled. */
+ /* Character composition should be disabled. */
safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
safe_terminal_coding.src_multibyte = 1;
safe_terminal_coding.dst_multibyte = 0;
{
struct terminal *t = get_terminal (terminal, 1);
CHECK_SYMBOL (coding_system);
- setup_coding_system (Fcheck_coding_system (coding_system),
- TERMINAL_KEYBOARD_CODING (t));
- /* Characer composition should be disabled. */
+ if (NILP (coding_system))
+ coding_system = Qno_conversion;
+ else
+ Fcheck_coding_system (coding_system);
+ setup_coding_system (coding_system, TERMINAL_KEYBOARD_CODING (t));
+ /* Character composition should be disabled. */
TERMINAL_KEYBOARD_CODING (t)->common_flags
&= ~CODING_ANNOTATE_COMPOSITION_MASK;
return Qnil;
return Fnreverse (val);
}
-static char *suffixes[] = { "-unix", "-dos", "-mac" };
+static const char *const suffixes[] = { "-unix", "-dos", "-mac" };
static Lisp_Object
make_subsidiaries (base)
If Nth element is a list of charset IDs, N is the first byte
of one of them. The list is sorted by dimensions of the
- charsets. A charset of smaller dimension comes firtst. */
+ charsets. A charset of smaller dimension comes first. */
val = Fmake_vector (make_number (256), Qnil);
for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
Vcode_conversion_reused_workbuf = Qnil;
staticpro (&Vcode_conversion_workbuf_name);
- Vcode_conversion_workbuf_name = build_string (" *code-conversion-work*");
+ Vcode_conversion_workbuf_name = make_pure_c_string (" *code-conversion-work*");
reused_workbuf_in_use = 0;
DEFSYM (Qcoding_system_error, "coding-system-error");
Fput (Qcoding_system_error, Qerror_conditions,
- Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
+ pure_cons (Qcoding_system_error, pure_cons (Qerror, Qnil)));
Fput (Qcoding_system_error, Qerror_message,
- build_string ("Invalid coding system"));
+ make_pure_c_string ("Invalid coding system"));
/* Intern this now in case it isn't already done.
Setting this variable twice is harmless.
But don't staticpro it here--that is done in alloc.c. */
- Qchar_table_extra_slots = intern ("char-table-extra-slots");
+ Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots");
DEFSYM (Qtranslation_table, "translation-table");
Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
staticpro (&Vcoding_category_table);
/* Followings are target of code detection. */
ASET (Vcoding_category_table, coding_category_iso_7,
- intern ("coding-category-iso-7"));
+ intern_c_string ("coding-category-iso-7"));
ASET (Vcoding_category_table, coding_category_iso_7_tight,
- intern ("coding-category-iso-7-tight"));
+ intern_c_string ("coding-category-iso-7-tight"));
ASET (Vcoding_category_table, coding_category_iso_8_1,
- intern ("coding-category-iso-8-1"));
+ intern_c_string ("coding-category-iso-8-1"));
ASET (Vcoding_category_table, coding_category_iso_8_2,
- intern ("coding-category-iso-8-2"));
+ intern_c_string ("coding-category-iso-8-2"));
ASET (Vcoding_category_table, coding_category_iso_7_else,
- intern ("coding-category-iso-7-else"));
+ intern_c_string ("coding-category-iso-7-else"));
ASET (Vcoding_category_table, coding_category_iso_8_else,
- intern ("coding-category-iso-8-else"));
+ intern_c_string ("coding-category-iso-8-else"));
ASET (Vcoding_category_table, coding_category_utf_8_auto,
- intern ("coding-category-utf-8-auto"));
+ intern_c_string ("coding-category-utf-8-auto"));
ASET (Vcoding_category_table, coding_category_utf_8_nosig,
- intern ("coding-category-utf-8"));
+ intern_c_string ("coding-category-utf-8"));
ASET (Vcoding_category_table, coding_category_utf_8_sig,
- intern ("coding-category-utf-8-sig"));
+ intern_c_string ("coding-category-utf-8-sig"));
ASET (Vcoding_category_table, coding_category_utf_16_be,
- intern ("coding-category-utf-16-be"));
+ intern_c_string ("coding-category-utf-16-be"));
ASET (Vcoding_category_table, coding_category_utf_16_auto,
- intern ("coding-category-utf-16-auto"));
+ intern_c_string ("coding-category-utf-16-auto"));
ASET (Vcoding_category_table, coding_category_utf_16_le,
- intern ("coding-category-utf-16-le"));
+ intern_c_string ("coding-category-utf-16-le"));
ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
- intern ("coding-category-utf-16-be-nosig"));
+ intern_c_string ("coding-category-utf-16-be-nosig"));
ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
- intern ("coding-category-utf-16-le-nosig"));
+ intern_c_string ("coding-category-utf-16-le-nosig"));
ASET (Vcoding_category_table, coding_category_charset,
- intern ("coding-category-charset"));
+ intern_c_string ("coding-category-charset"));
ASET (Vcoding_category_table, coding_category_sjis,
- intern ("coding-category-sjis"));
+ intern_c_string ("coding-category-sjis"));
ASET (Vcoding_category_table, coding_category_big5,
- intern ("coding-category-big5"));
+ intern_c_string ("coding-category-big5"));
ASET (Vcoding_category_table, coding_category_ccl,
- intern ("coding-category-ccl"));
+ intern_c_string ("coding-category-ccl"));
ASET (Vcoding_category_table, coding_category_emacs_mule,
- intern ("coding-category-emacs-mule"));
+ intern_c_string ("coding-category-emacs-mule"));
/* Followings are NOT target of code detection. */
ASET (Vcoding_category_table, coding_category_raw_text,
- intern ("coding-category-raw-text"));
+ intern_c_string ("coding-category-raw-text"));
ASET (Vcoding_category_table, coding_category_undecided,
- intern ("coding-category-undecided"));
+ intern_c_string ("coding-category-undecided"));
DEFSYM (Qinsufficient_source, "insufficient-source");
DEFSYM (Qinconsistent_eol, "inconsistent-eol");
DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
doc: /*
*String displayed in mode line for UNIX-like (LF) end-of-line format. */);
- eol_mnemonic_unix = build_string (":");
+ eol_mnemonic_unix = make_pure_c_string (":");
DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
doc: /*
*String displayed in mode line for DOS-like (CRLF) end-of-line format. */);
- eol_mnemonic_dos = build_string ("\\");
+ eol_mnemonic_dos = make_pure_c_string ("\\");
DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
doc: /*
*String displayed in mode line for MAC-like (CR) end-of-line format. */);
- eol_mnemonic_mac = build_string ("/");
+ eol_mnemonic_mac = make_pure_c_string ("/");
DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
doc: /*
*String displayed in mode line when end-of-line format is not yet determined. */);
- eol_mnemonic_undecided = build_string (":");
+ eol_mnemonic_undecided = make_pure_c_string (":");
DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
doc: /*
for (i = 0; i < coding_arg_max; i++)
args[i] = Qnil;
- plist[0] = intern (":name");
+ plist[0] = intern_c_string (":name");
plist[1] = args[coding_arg_name] = Qno_conversion;
- plist[2] = intern (":mnemonic");
+ plist[2] = intern_c_string (":mnemonic");
plist[3] = args[coding_arg_mnemonic] = make_number ('=');
- plist[4] = intern (":coding-type");
+ plist[4] = intern_c_string (":coding-type");
plist[5] = args[coding_arg_coding_type] = Qraw_text;
- plist[6] = intern (":ascii-compatible-p");
+ plist[6] = intern_c_string (":ascii-compatible-p");
plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
- plist[8] = intern (":default-char");
+ plist[8] = intern_c_string (":default-char");
plist[9] = args[coding_arg_default_char] = make_number (0);
- plist[10] = intern (":for-unibyte");
+ plist[10] = intern_c_string (":for-unibyte");
plist[11] = args[coding_arg_for_unibyte] = Qt;
- plist[12] = intern (":docstring");
- plist[13] = build_string ("Do no conversion.\n\
+ plist[12] = intern_c_string (":docstring");
+ plist[13] = make_pure_c_string ("Do no conversion.\n\
\n\
When you visit a file with this coding, the file is read into a\n\
unibyte buffer as is, thus each byte of a file is treated as a\n\
character.");
- plist[14] = intern (":eol-type");
+ plist[14] = intern_c_string (":eol-type");
plist[15] = args[coding_arg_eol_type] = Qunix;
args[coding_arg_plist] = Flist (16, plist);
Fdefine_coding_system_internal (coding_arg_max, args);
plist[5] = args[coding_arg_coding_type] = Qundecided;
/* This is already set.
plist[7] = args[coding_arg_ascii_compatible_p] = Qt; */
- plist[8] = intern (":charset-list");
+ plist[8] = intern_c_string (":charset-list");
plist[9] = args[coding_arg_charset_list] = Fcons (Qascii, Qnil);
plist[11] = args[coding_arg_for_unibyte] = Qnil;
- plist[13] = build_string ("No conversion on encoding, automatic conversion on decoding.");
+ plist[13] = make_pure_c_string ("No conversion on encoding, automatic conversion on decoding.");
plist[15] = args[coding_arg_eol_type] = Qnil;
args[coding_arg_plist] = Flist (16, plist);
Fdefine_coding_system_internal (coding_arg_max, args);