/* Coding system handler (conversion, detection, etc).
Copyright (C) 2001, 2002, 2003, 2004, 2005,
- 2006 Free Software Foundation, Inc.
- Copyright (C) 1995, 1997, 1998, 2002, 2003, 2004, 2005
+ 2006, 2007 Free Software Foundation, Inc.
+ Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
+ 2005, 2006, 2007
National Institute of Advanced Industrial Science and Technology (AIST)
Registration Number H14PRO021
Copyright (C) 2003
decided. */
Lisp_Object eol_mnemonic_undecided;
+/* Format of end-of-line decided by system. This is Qunix on
+ Unix and Mac, Qdos on DOS/Windows.
+ This has an effect only for external encoding (i.e. for output to
+ file and process), not for in-buffer or Lisp string encoding. */
+static Lisp_Object system_eol_type;
+
#ifdef emacs
Lisp_Object Vcoding_system_list, Vcoding_system_alist;
static void coding_set_destination P_ ((struct coding_system *));
static void coding_alloc_by_realloc P_ ((struct coding_system *, EMACS_INT));
static void coding_alloc_by_making_gap P_ ((struct coding_system *,
- EMACS_INT));
+ EMACS_INT, EMACS_INT));
static unsigned char *alloc_destination P_ ((struct coding_system *,
EMACS_INT, unsigned char *));
static void setup_iso_safe_charsets P_ ((Lisp_Object));
}
static void
-coding_alloc_by_making_gap (coding, bytes)
+coding_alloc_by_making_gap (coding, offset, bytes)
struct coding_system *coding;
- EMACS_INT bytes;
+ EMACS_INT offset, bytes;
{
if (BUFFERP (coding->dst_object)
&& EQ (coding->src_object, coding->dst_object))
{
- EMACS_INT add = coding->src_bytes - coding->consumed;
+ EMACS_INT add = offset + (coding->src_bytes - coding->consumed);
+ GPT += offset, GPT_BYTE += offset;
GAP_SIZE -= add; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
make_gap (bytes);
GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
+ GPT -= offset, GPT_BYTE -= offset;
}
else
{
EMACS_INT offset = dst - coding->destination;
if (BUFFERP (coding->dst_object))
- coding_alloc_by_making_gap (coding, nbytes);
+ coding_alloc_by_making_gap (coding, offset, nbytes);
else
coding_alloc_by_realloc (coding, nbytes);
record_conversion_result (coding, CODING_RESULT_SUCCESS);
}
else
{
+ if (c >= 0xA0)
+ {
+ /* Old style component character of a compostion. */
+ if (c == 0xA0)
+ {
+ ONE_MORE_BYTE (c);
+ c -= 0x80;
+ }
+ else
+ c -= 0x20;
+ }
+
switch (emacs_mule_bytes[c])
{
case 2:
if (src >= src_end) \
goto invalid_code; \
ONE_MORE_BYTE_NO_CHECK (c); \
- c -= 0x20; \
+ c -= 0xA0; \
if (c < 0 || c >= 81) \
goto invalid_code; \
\
} while (0)
-#define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c) \
- do { \
- /* Emacs 20 style format for relative composition. */ \
- /* Store multibyte form of characters to be composed. */ \
- enum composition_method method = COMPOSITION_RELATIVE; \
- int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
- int *buf = components; \
- int i, j; \
- \
- src = src_base; \
- ONE_MORE_BYTE (c); /* skip 0x80 */ \
- for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
- DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
- if (i < 2) \
- goto invalid_code; \
- ADD_COMPOSITION_DATA (charbuf, i, method); \
- for (j = 0; j < i; j++) \
- *charbuf++ = components[j]; \
+#define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c) \
+ do { \
+ /* Emacs 20 style format for relative composition. */ \
+ /* Store multibyte form of characters to be composed. */ \
+ enum composition_method method = COMPOSITION_RELATIVE; \
+ int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
+ int *buf = components; \
+ int i, j; \
+ \
+ src = src_base; \
+ ONE_MORE_BYTE (c); /* skip 0x80 */ \
+ for (i = 0; *src >= 0xA0 && i < MAX_COMPOSITION_COMPONENTS; i++) \
+ DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
+ if (i < 2) \
+ goto invalid_code; \
+ ADD_COMPOSITION_DATA (charbuf, i, method); \
+ for (j = 0; j < i; j++) \
+ *charbuf++ = components[j]; \
} while (0)
/* Emacs 20 style format for rule-base composition. */ \
/* Store multibyte form of characters to be composed. */ \
enum composition_method method = COMPOSITION_WITH_RULE; \
+ int *charbuf_base = charbuf; \
int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
int *buf = components; \
int i, j; \
- \
+ \
DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
- for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
+ for (i = 1; i < MAX_COMPOSITION_COMPONENTS; i++) \
{ \
+ if (*src < 0xA0) \
+ break; \
DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf); \
DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
} \
- if (i < 1 || (buf - components) % 2 == 0) \
+ if (i <= 1 || (buf - components) % 2 == 0) \
goto invalid_code; \
- if (charbuf + i + (i / 2) + 1 < charbuf_end) \
+ if (charbuf + i + (i / 2) + 1 >= charbuf_end) \
goto no_more_source; \
- ADD_COMPOSITION_DATA (buf, i, method); \
+ ADD_COMPOSITION_DATA (charbuf, i, method); \
+ i = i * 2 - 1; \
for (j = 0; j < i; j++) \
*charbuf++ = components[j]; \
+ charbuf_base[0] -= i; \
for (j = 0; j < i; j += 2) \
*charbuf++ = components[j]; \
} while (0)
consumed_chars += nchars;
char_offset++;
}
+ else
+ goto invalid_code;
continue;
invalid_code:
break; \
if (p == src_end - 1) \
{ \
- if (coding->mode & CODING_MODE_LAST_BLOCK) \
- goto invalid_code; \
+ /* The current composition doesn't end in the current \
+ source. */ \
+ record_conversion_result \
+ (coding, CODING_RESULT_INSUFFICIENT_SRC); \
goto no_more_source; \
} \
\
CODING_GET_INFO (coding, attrs, charset_list);
setup_iso_safe_charsets (attrs);
+ /* Charset list may have been changed. */
+ charset_list = CODING_ATTR_CHARSET_LIST (attrs);
+ coding->safe_charsets = (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs));
while (1)
{
setup_iso_safe_charsets (attrs);
/* Charset list may have been changed. */
- charset_list = CODING_ATTR_CHARSET_LIST (attrs); \
+ charset_list = CODING_ATTR_CHARSET_LIST (attrs);
coding->safe_charsets = (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs));
ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
int *charbuf_end = charbuf + coding->charbuf_used;
unsigned char *dst = coding->destination + coding->produced;
unsigned char *dst_end = coding->destination + coding->dst_bytes;
- unsigned char *adjusted_dst_end = dst_end - 1;
int destination_charbuf[1024];
int i, produced_chars = 0;
Lisp_Object attrs, charset_list;
ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
ccl.dst_multibyte = coding->dst_multibyte;
- while (charbuf < charbuf_end && dst < adjusted_dst_end)
+ while (charbuf < charbuf_end)
{
- int dst_bytes = dst_end - dst;
- if (dst_bytes > 1024)
- dst_bytes = 1024;
-
ccl_driver (&ccl, charbuf, destination_charbuf,
- charbuf_end - charbuf, dst_bytes, charset_list);
- charbuf += ccl.consumed;
+ charbuf_end - charbuf, 1024, charset_list);
if (multibytep)
- for (i = 0; i < ccl.produced; i++)
- EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
+ {
+ ASSURE_DESTINATION (ccl.produced * 2);
+ for (i = 0; i < ccl.produced; i++)
+ EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
+ }
else
{
+ ASSURE_DESTINATION (ccl.produced);
for (i = 0; i < ccl.produced; i++)
*dst++ = destination_charbuf[i] & 0xFF;
produced_chars += ccl.produced;
}
+ charbuf += ccl.consumed;
+ if (ccl.status == CCL_STAT_QUIT
+ || ccl.status == CCL_STAT_INVALID_CMD)
+ break;
}
switch (ccl.status)
int consumed_chars = 0;
Lisp_Object attrs, valids;
int found = 0;
+ int head_ascii = coding->head_ascii;
detect_info->checked |= CATEGORY_MASK_CHARSET;
valids = AREF (attrs, coding_attr_charset_valids);
if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
- src += coding->head_ascii;
+ src += head_ascii;
while (1)
{
int c;
+ Lisp_Object val;
+ struct charset *charset;
+ int dim, idx;
src_base = src;
ONE_MORE_BYTE (c);
if (c < 0)
continue;
- if (NILP (AREF (valids, c)))
+ val = AREF (valids, c);
+ if (NILP (val))
break;
if (c >= 0x80)
found = CATEGORY_MASK_CHARSET;
+ if (INTEGERP (val))
+ {
+ charset = CHARSET_FROM_ID (XFASTINT (val));
+ dim = CHARSET_DIMENSION (charset);
+ for (idx = 1; idx < dim; idx++)
+ {
+ if (src == src_end)
+ goto too_short;
+ ONE_MORE_BYTE (c);
+ if (c < charset->code_space[(dim - 1 - idx) * 2]
+ || c > charset->code_space[(dim - 1 - idx) * 2 + 1])
+ break;
+ }
+ if (idx < dim)
+ break;
+ }
+ else
+ {
+ idx = 1;
+ for (; CONSP (val); val = XCDR (val))
+ {
+ charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
+ dim = CHARSET_DIMENSION (charset);
+ while (idx < dim)
+ {
+ if (src == src_end)
+ goto too_short;
+ ONE_MORE_BYTE (c);
+ if (c < charset->code_space[(dim - 1 - idx) * 4]
+ || c > charset->code_space[(dim - 1 - idx) * 4 + 1])
+ break;
+ idx++;
+ }
+ if (idx == dim)
+ {
+ val = Qnil;
+ break;
+ }
+ }
+ if (CONSP (val))
+ break;
+ }
}
+ too_short:
detect_info->rejected |= CATEGORY_MASK_CHARSET;
return 0;
coding->mode = 0;
coding->head_ascii = -1;
- coding->common_flags
- = (VECTORP (eol_type) ? CODING_REQUIRE_DETECTION_MASK : 0);
+ if (VECTORP (eol_type))
+ coding->common_flags = (CODING_REQUIRE_DECODING_MASK
+ | CODING_REQUIRE_DETECTION_MASK);
+ else if (! EQ (eol_type, Qunix))
+ coding->common_flags = (CODING_REQUIRE_DECODING_MASK
+ | CODING_REQUIRE_ENCODING_MASK);
+ else
+ coding->common_flags = 0;
if (! NILP (CODING_ATTR_POST_READ (attrs)))
coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
/* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
does, return one of the subsidiary that has the same eol-spec as
- PARENT. Otherwise, return CODING_SYSTEM. */
+ PARENT. Otherwise, return CODING_SYSTEM. If PARENT is nil,
+ inherit end-of-line format from the system's setting
+ (system_eol_type). */
Lisp_Object
coding_inherit_eol_type (coding_system, parent)
coding_system = Qraw_text;
spec = CODING_SYSTEM_SPEC (coding_system);
eol_type = AREF (spec, 2);
- if (VECTORP (eol_type)
- && ! NILP (parent))
+ if (VECTORP (eol_type))
{
- Lisp_Object parent_spec;
Lisp_Object parent_eol_type;
- parent_spec
- = CODING_SYSTEM_SPEC (buffer_defaults.buffer_file_coding_system);
- parent_eol_type = AREF (parent_spec, 2);
+ if (! NILP (parent))
+ {
+ Lisp_Object parent_spec;
+
+ parent_spec = CODING_SYSTEM_SPEC (parent);
+ parent_eol_type = AREF (parent_spec, 2);
+ }
+ else
+ parent_eol_type = system_eol_type;
if (EQ (parent_eol_type, Qunix))
coding_system = AREF (eol_type, 0);
else if (EQ (parent_eol_type, Qdos))
if (NILP (coding->dst_object))
{
+ /* Start deleting '\r' from the tail to minimize the memory
+ movement. */
for (p = pend - 2; p >= pbeg; p--)
if (*p == '\r')
{
}
else
{
- for (p = pend - 2; p >= pbeg; p--)
- if (*p == '\r')
- {
- int pos_byte = coding->dst_pos_byte + (p - pbeg);
- int pos = BYTE_TO_CHAR (pos_byte);
-
- del_range_2 (pos, pos_byte, pos + 1, pos_byte + 1, 0);
- n++;
- }
+ int pos_byte = coding->dst_pos_byte;
+ int pos = coding->dst_pos;
+ int pos_end = pos + coding->produced_char - 1;
+
+ while (pos < pos_end)
+ {
+ p = BYTE_POS_ADDR (pos_byte);
+ if (*p == '\r' && p[1] == '\n')
+ {
+ del_range_2 (pos, pos_byte, pos + 1, pos_byte + 1, 0);
+ n++;
+ pos_end--;
+ }
+ pos++;
+ pos_byte += BYTES_BY_CHAR_HEAD (*p);
+ }
}
coding->produced -= n;
coding->produced_char -= n;
for (i = 0; i < len; i++)
{
args[i] = make_number (charbuf[i]);
- if (args[i] < 0)
+ if (charbuf[i] < 0)
return;
}
components = (method == COMPOSITION_WITH_ALTCHARS
same buffer as CODING->dst_object, CODING->src_pos must be
negative.
- If CODING->src_object is a string, CODING->src_pos in an index to
+ If CODING->src_object is a string, CODING->src_pos is an index to
that string.
If CODING->src_object is nil, CODING->source must already point to
= coding->charbuf[coding->charbuf_used - carryover + i];
}
while (coding->consumed < coding->src_bytes
- && ! coding->result);
+ && (coding->result == CODING_RESULT_SUCCESS
+ || coding->result == CODING_RESULT_INVALID_SRC));
if (carryover > 0)
{
coding->consumed = coding->src_bytes;
}
+ if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
+ decode_eol (coding);
if (BUFFERP (coding->dst_object))
{
current_buffer->undo_list = undo_list;
record_insert (coding->dst_pos, coding->produced_char);
}
- if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
- decode_eol (coding);
return coding->result;
}
coding->dst_pos = PT;
coding->dst_pos_byte = PT_BYTE;
coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
- coding->mode |= CODING_MODE_LAST_BLOCK;
if (CODING_REQUIRE_DETECTION (coding))
detect_coding (coding);
+ coding->mode |= CODING_MODE_LAST_BLOCK;
+ current_buffer->text->inhibit_shrinking = 1;
decode_coding (coding);
+ current_buffer->text->inhibit_shrinking = 0;
attrs = CODING_ID_ATTRS (coding->id);
if (! NILP (CODING_ATTR_POST_READ (attrs)))
}
if (!NILP (Fcoding_system_p (coding_system)))
return coding_system;
- while (1)
- Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
+ xsignal1 (Qcoding_system_error, coding_system);
}
\f
break;
if (c < 0x20
&& (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
- && inhibit_iso_escape_detection)
+ && ! inhibit_iso_escape_detection)
{
coding.head_ascii = src - coding.source;
if (detect_coding_iso_2022 (&coding, &detect_info))
for (i = 0; i < coding_category_raw_text; i++)
{
category = coding_priorities[i];
+ this = coding_categories + category;
if (detect_info.found & (1 << category))
break;
}
{
found |= 1 << category;
id = coding_categories[category].id;
- val = Fcons (make_number (id), val);
+ if (id >= 0)
+ val = Fcons (make_number (id), val);
}
}
for (i = coding_category_raw_text - 1; i >= 0; i--)
doc: /* Detect coding system of the text in the region between START and END.
Return a list of possible coding systems ordered by priority.
-If only ASCII characters are found, it returns a list of single element
+If only ASCII characters are found (except for such ISO-2022 control
+characters ISO-2022 as ESC), it returns a list of single element
`undecided' or its subsidiary coding system according to a detected
end-of-line format.
doc: /* Detect coding system of the text in STRING.
Return a list of possible coding systems ordered by priority.
-If only ASCII characters are found, it returns a list of single element
+If only ASCII characters are found (except for such ISO-2022 control
+characters ISO-2022 as ESC), it returns a list of single element
`undecided' or its subsidiary coding system according to a detected
end-of-line format.
DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
- doc: /* Encode a Japanese character CHAR to shift_jis encoding.
+ doc: /* Encode a Japanese character CH to shift_jis encoding.
Return the corresponding code in SJIS. */)
(ch)
Lisp_Object ch;
}
DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
- doc: /* Encode the Big5 character CHAR to BIG5 coding system.
+ doc: /* Encode the Big5 character CH to BIG5 coding system.
Return the corresponding character code in Big5. */)
(ch)
Lisp_Object ch;
coding_system = CODING_ID_NAME (terminal_coding.id);
/* For backward compatibility, return nil if it is `undecided'. */
- return (coding_system != Qundecided ? coding_system : Qnil);
+ return (! EQ (coding_system, Qundecided) ? coding_system : Qnil);
}
DEFUN ("set-keyboard-coding-system-internal",
whichever argument specifies the file name is TARGET.
TARGET has a meaning which depends on OPERATION:
- For file I/O, TARGET is a file name.
+ For file I/O, TARGET is a file name (except for the special case below).
For process I/O, TARGET is a process name.
For network I/O, TARGET is a service name or a port number
In the last case, we call the function with one argument,
which is a list of all the arguments given to this function.
+If OPERATION is `insert-file-contents', the argument corresponding to
+TARGET may be a cons (FILENAME . BUFFER). In that case, FILENAME is a
+file name to look up, and BUFFER is a buffer that contains the file's
+contents (not yet decoded). If `file-coding-system-alist' specifies a
+function to call for FILENAME, that function should examine the
+contents of BUFFER instead of reading the file.
+
usage: (find-operation-coding-system OPERATION ARGUMENTS ...) */)
(nargs, args)
int nargs;
SDATA (SYMBOL_NAME (operation)));
target = args[XINT (target_idx) + 1];
if (!(STRINGP (target)
+ || (EQ (operation, Qinsert_file_contents) && CONSP (target)
+ && STRINGP (XCAR (target)) && BUFFERP (XCDR (target)))
|| (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
error ("Invalid %dth argument", XINT (target_idx) + 1);
+ if (CONSP (target))
+ target = XCAR (target);
chain = ((EQ (operation, Qinsert_file_contents)
|| EQ (operation, Qwrite_region))
return Fcons (val, val);
if (! NILP (Ffboundp (val)))
{
+ /* We use call1 rather than safe_call1
+ so as to get bug reports about functions called here
+ which don't handle the current interface. */
val = call1 (val, Flist (nargs, args));
if (CONSP (val))
return val;
for (i = 0; i < coding_category_max; i++)
Fset (AREF (Vcoding_category_table, i), Qno_conversion);
}
+#if defined (MSDOS) || defined (WINDOWSNT)
+ system_eol_type = Qdos;
+#else
+ system_eol_type = Qunix;
+#endif
+ staticpro (&system_eol_type);
}
char *