/* Coding system handler (conversion, detection, and etc).
- Copyright (C) 1995,97,1998,2002,2003 Electrotechnical Laboratory, JAPAN.
- Licensed to the Free Software Foundation.
- Copyright (C) 2001,2002,2003 Free Software Foundation, Inc.
+ Copyright (C) 2001, 2002, 2003, 2004, 2005,
+ 2006, 2007 Free Software Foundation, Inc.
+ Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
+ 2005, 2006, 2007
+ National Institute of Advanced Industrial Science and Technology (AIST)
+ Registration Number H14PRO021
This file is part of GNU Emacs.
GNU Emacs is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
+the Free Software Foundation; either version 3, or (at your option)
any later version.
GNU Emacs is distributed in the hope that it will be useful,
You should have received a copy of the GNU General Public License
along with GNU Emacs; see the file COPYING. If not, write to
-the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA. */
+the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+Boston, MA 02110-1301, USA. */
/*** TABLE OF CONTENTS ***
/* Like ONE_MORE_BYTE, but 8-bit bytes of data at SRC are in multibyte
- form if MULTIBYTEP is nonzero. */
+ form if MULTIBYTEP is nonzero. In addition, if SRC is not less
+ than SRC_END, return with RET. */
-#define ONE_MORE_BYTE_CHECK_MULTIBYTE(c1, multibytep) \
+#define ONE_MORE_BYTE_CHECK_MULTIBYTE(c1, multibytep, ret) \
do { \
if (src >= src_end) \
{ \
coding->result = CODING_FINISH_INSUFFICIENT_SRC; \
- goto label_end_of_loop; \
+ return ret; \
} \
c1 = *src++; \
if (multibytep && c1 == LEADING_CODE_8_BIT_CONTROL) \
Lisp_Object Qcoding_system_history;
Lisp_Object Qsafe_chars;
Lisp_Object Qvalid_codes;
+Lisp_Object Qascii_incompatible;
extern Lisp_Object Qinsert_file_contents, Qwrite_region;
-Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
+Lisp_Object Qcall_process, Qcall_process_region;
Lisp_Object Qstart_process, Qopen_network_stream;
Lisp_Object Qtarget_idx;
Lisp_Object eol_mnemonic_undecided;
/* Format of end-of-line decided by system. This is CODING_EOL_LF on
- Unix, CODING_EOL_CRLF on DOS/Windows, and CODING_EOL_CR on Mac. */
+ Unix, CODING_EOL_CRLF on DOS/Windows, and CODING_EOL_CR on Mac.
+ This has an effect only for external encoding (i.e. for output to
+ file and process), not for in-buffer or Lisp string encoding. */
int system_eol_type;
#ifdef emacs
while (1)
{
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
-
+ ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep,
+ CODING_CATEGORY_MASK_EMACS_MULE);
if (composing)
{
if (c < 0xA0)
composing = 0;
else if (c == 0xA0)
{
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+ ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, 0);
c &= 0x7F;
}
else
}
}
}
- label_end_of_loop:
- return CODING_CATEGORY_MASK_EMACS_MULE;
}
c = -1; \
else \
{ \
- c -= 0xA0; \
+ c -= 0x80; \
*p++ = c; \
} \
} \
component[ncomponent] = c;
}
}
- else
+ else if (c >= 0x80)
{
/* This may be an old Emacs 20 style format. See the comment at
the section 2 of this file. */
else
return 0;
}
+ else
+ return 0;
if (buf == bufp || dst + (bufp - buf) <= (dst_bytes ? dst_end : src))
{
Lisp_Object safe_chars;
reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
- while (mask && src < src_end)
+ while (mask)
{
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+ ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, mask & mask_found);
retry:
switch (c)
{
if (inhibit_iso_escape_detection)
break;
single_shifting = 0;
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+ ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, mask & mask_found);
if (c >= '(' && c <= '/')
{
/* Designation sequence for a charset of dimension 1. */
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
+ ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep, mask & mask_found);
if (c1 < ' ' || c1 >= 0x80
|| (charset = iso_charset_table[0][c >= ','][c1]) < 0)
/* Invalid designation sequence. Just ignore. */
else if (c == '$')
{
/* Designation sequence for a charset of dimension 2. */
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+ ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, mask & mask_found);
if (c >= '@' && c <= 'B')
/* Designation for JISX0208.1978, GB2312, or JISX0208. */
reg[0] = charset = iso_charset_table[1][0][c];
else if (c >= '(' && c <= '/')
{
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
+ ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep,
+ mask & mask_found);
if (c1 < ' ' || c1 >= 0x80
|| (charset = iso_charset_table[1][c >= ','][c1]) < 0)
/* Invalid designation sequence. Just ignore. */
c = -1;
while (src < src_end)
{
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+ ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep,
+ mask & mask_found);
if (c < 0xA0)
break;
i++;
break;
}
}
- label_end_of_loop:
return (mask & mask_found);
}
while (1)
{
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+ ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, CODING_CATEGORY_MASK_SJIS);
if (c < 0x80)
continue;
if (c == 0x80 || c == 0xA0 || c > 0xEF)
return 0;
if (c <= 0x9F || c >= 0xE0)
{
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+ ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, 0);
if (c < 0x40 || c == 0x7F || c > 0xFC)
return 0;
}
}
- label_end_of_loop:
- return CODING_CATEGORY_MASK_SJIS;
}
/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
while (1)
{
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+ ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, CODING_CATEGORY_MASK_BIG5);
if (c < 0x80)
continue;
if (c < 0xA1 || c > 0xFE)
return 0;
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+ ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, 0);
if (c < 0x40 || (c > 0x7F && c < 0xA1) || c > 0xFE)
return 0;
}
- label_end_of_loop:
- return CODING_CATEGORY_MASK_BIG5;
}
/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
while (1)
{
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+ ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, CODING_CATEGORY_MASK_UTF_8);
if (UTF_8_1_OCTET_P (c))
continue;
else if (UTF_8_2_OCTET_LEADING_P (c))
do
{
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+ ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, 0);
if (!UTF_8_EXTRA_OCTET_P (c))
return 0;
seq_maybe_bytes--;
}
while (seq_maybe_bytes > 0);
}
-
- label_end_of_loop:
- return CODING_CATEGORY_MASK_UTF_8;
}
/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
struct coding_system dummy_coding;
struct coding_system *coding = &dummy_coding;
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c2, multibytep);
+ ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep, 0);
+ ONE_MORE_BYTE_CHECK_MULTIBYTE (c2, multibytep, 0);
if ((c1 == 0xFF) && (c2 == 0xFE))
return CODING_CATEGORY_MASK_UTF_16_LE;
else if ((c1 == 0xFE) && (c2 == 0xFF))
return CODING_CATEGORY_MASK_UTF_16_BE;
-
- label_end_of_loop:
return 0;
}
valid = coding_system_table[CODING_CATEGORY_IDX_CCL]->spec.ccl.valid_codes;
while (1)
{
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+ ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, CODING_CATEGORY_MASK_CCL);
if (! valid[c])
return 0;
}
- label_end_of_loop:
- return CODING_CATEGORY_MASK_CCL;
}
\f
{
coding->eol_type = CODING_EOL_UNDECIDED;
coding->common_flags = CODING_REQUIRE_DETECTION_MASK;
+ if (system_eol_type != CODING_EOL_LF)
+ coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
}
else if (XFASTINT (eol_type) == 1)
{
= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
}
else
- coding->eol_type = CODING_EOL_LF;
+ {
+ coding->common_flags = 0;
+ coding->eol_type = CODING_EOL_LF;
+ }
coding_type = XVECTOR (coding_spec)->contents[0];
/* Try short cut. */
if (!NILP (val))
coding->composing = COMPOSITION_NO;
+ /* If the coding system is ascii-incompatible, record it in
+ common_flags. */
+ val = Fplist_get (plist, Qascii_incompatible);
+ if (! NILP (val))
+ coding->common_flags |= CODING_ASCII_INCOMPATIBLE_MASK;
+
switch (XFASTINT (coding_type))
{
case 0:
coding->type = coding_type_no_conversion;
coding->category_idx = CODING_CATEGORY_IDX_BINARY;
coding->common_flags = 0;
- coding->eol_type = CODING_EOL_LF;
+ coding->eol_type = CODING_EOL_UNDECIDED;
coding->pre_write_conversion = coding->post_read_conversion = Qnil;
- return -1;
+ return NILP (coding_system) ? 0 : -1;
}
/* Free memory blocks allocated for storing composition information. */
coding->consumed = coding->consumed_char = 0;
coding->errors = 0;
coding->result = CODING_FINISH_NORMAL;
+ if (coding->eol_type == CODING_EOL_UNDECIDED)
+ coding->eol_type = CODING_EOL_LF;
switch (coding->type)
{
} \
} while (0)
+/* ARG is (CODING BUFFER ...) where CODING is what to be set in
+ Vlast_coding_system_used and the remaining elements are buffers to
+ kill. */
static Lisp_Object
code_convert_region_unwind (arg)
Lisp_Object arg;
{
+ struct gcpro gcpro1;
+ GCPRO1 (arg);
+
inhibit_pre_post_conversion = 0;
- Vlast_coding_system_used = arg;
+ Vlast_coding_system_used = XCAR (arg);
+ for (arg = XCDR (arg); ! NILP (arg); arg = XCDR (arg))
+ Fkill_buffer (XCAR (arg));
+
+ UNGCPRO;
return Qnil;
}
inhibit_modification_hooks = saved_inhibit_modification_hooks;
}
+ coding->heading_ascii = 0;
+
if (! encodep && CODING_REQUIRE_DETECTION (coding))
{
/* We must detect encoding of text and eol format. */
Lisp_Object new;
record_unwind_protect (code_convert_region_unwind,
- Vlast_coding_system_used);
+ Fcons (Vlast_coding_system_used, Qnil));
/* We should not call any more pre-write/post-read-conversion
functions while this pre-write-conversion is running. */
inhibit_pre_post_conversion = 1;
TEMP_SET_PT_BOTH (from, from_byte);
prev_Z = Z;
record_unwind_protect (code_convert_region_unwind,
- Vlast_coding_system_used);
+ Fcons (Vlast_coding_system_used, Qnil));
saved_coding_system = Vlast_coding_system_used;
Vlast_coding_system_used = coding->symbol;
/* We should not call any more pre-write/post-read-conversion
/* Set the current buffer to the working buffer prepared for
code-conversion. MULTIBYTE specifies the multibyteness of the
- buffer. */
+ buffer. Return the buffer we set if it must be killed after use.
+ Otherwise return Qnil. */
-static struct buffer *
+static Lisp_Object
set_conversion_work_buffer (multibyte)
int multibyte;
{
- Lisp_Object buffer;
+ Lisp_Object buffer, buffer_to_kill;
struct buffer *buf;
buffer = Fget_buffer_create (Vcode_conversion_workbuf_name);
buf = XBUFFER (buffer);
+ if (buf == current_buffer)
+ {
+ /* As we are already in the work buffer, we must generate a new
+ buffer for the work. */
+ Lisp_Object name;
+
+ name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
+ buffer = buffer_to_kill = Fget_buffer_create (name);
+ buf = XBUFFER (buffer);
+ }
+ else
+ buffer_to_kill = Qnil;
+
delete_all_overlays (buf);
buf->directory = current_buffer->directory;
buf->read_only = Qnil;
Fwiden ();
del_range_2 (BEG, BEG_BYTE, Z, Z_BYTE, 0);
buf->enable_multibyte_characters = multibyte ? Qt : Qnil;
- return buf;
+ return buffer_to_kill;
}
Lisp_Object
int count = SPECPDL_INDEX ();
struct gcpro gcpro1, gcpro2;
int multibyte = STRING_MULTIBYTE (str);
- struct buffer *buf;
Lisp_Object old_deactivate_mark;
+ Lisp_Object buffer_to_kill;
+ Lisp_Object unwind_arg;
record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
- record_unwind_protect (code_convert_region_unwind,
- Vlast_coding_system_used);
/* It is not crucial to specbind this. */
old_deactivate_mark = Vdeactivate_mark;
GCPRO2 (str, old_deactivate_mark);
/* We must insert the contents of STR as is without
unibyte<->multibyte conversion. For that, we adjust the
multibyteness of the working buffer to that of STR. */
- set_conversion_work_buffer (multibyte);
+ buffer_to_kill = set_conversion_work_buffer (multibyte);
+ if (NILP (buffer_to_kill))
+ unwind_arg = Fcons (Vlast_coding_system_used, Qnil);
+ else
+ unwind_arg = list2 (Vlast_coding_system_used, buffer_to_kill);
+ record_unwind_protect (code_convert_region_unwind, unwind_arg);
insert_from_string (str, 0, 0,
SCHARS (str), SBYTES (str), 0);
UNGCPRO;
inhibit_pre_post_conversion = 1;
if (encodep)
- call2 (coding->pre_write_conversion, make_number (BEG), make_number (Z));
+ {
+ struct buffer *prev = current_buffer;
+
+ call2 (coding->pre_write_conversion, make_number (BEG), make_number (Z));
+ if (prev != current_buffer)
+ /* We must kill the current buffer too. */
+ Fsetcdr (unwind_arg, Fcons (Fcurrent_buffer (), XCDR (unwind_arg)));
+ }
else
{
Vlast_coding_system_used = coding->symbol;
{
struct gcpro gcpro1, gcpro2;
struct buffer *cur = current_buffer;
+ struct buffer *prev;
Lisp_Object old_deactivate_mark, old_last_coding_system_used;
Lisp_Object args[3];
+ Lisp_Object buffer_to_kill;
/* It is not crucial to specbind this. */
old_deactivate_mark = Vdeactivate_mark;
/* We must insert the contents of STR as is without
unibyte<->multibyte conversion. For that, we adjust the
multibyteness of the working buffer to that of STR. */
- set_conversion_work_buffer (coding->src_multibyte);
+ buffer_to_kill = set_conversion_work_buffer (coding->src_multibyte);
insert_1_both (*str, nchars, nbytes, 0, 0, 0);
UNGCPRO;
inhibit_pre_post_conversion = 1;
+ prev = current_buffer;
args[0] = coding->pre_write_conversion;
args[1] = make_number (BEG);
args[2] = make_number (Z);
bcopy (BEG_ADDR, *str, coding->produced);
coding->src_multibyte
= ! NILP (current_buffer->enable_multibyte_characters);
+ if (prev != current_buffer)
+ Fkill_buffer (Fcurrent_buffer ());
set_buffer_internal (cur);
+ if (! NILP (buffer_to_kill))
+ Fkill_buffer (buffer_to_kill);
}
saved_coding_symbol = coding->symbol;
coding->src_multibyte = STRING_MULTIBYTE (str);
coding->dst_multibyte = 1;
+ coding->heading_ascii = 0;
+
if (CODING_REQUIRE_DETECTION (coding))
{
/* See the comments in code_convert_region. */
produced += coding->produced;
produced_char += coding->produced_char;
if (result == CODING_FINISH_NORMAL
+ || result == CODING_FINISH_INTERRUPT
|| (result == CODING_FINISH_INSUFFICIENT_SRC
&& coding->consumed == 0))
break;
/* Try to skip the heading and tailing ASCIIs. We can't skip them
if we must run CCL program or there are compositions to
encode. */
+ coding->heading_ascii = 0;
if (coding->type != coding_type_ccl
&& (! coding->cmp_data || coding->cmp_data->used == 0))
{
}
if (!NILP (Fcoding_system_p (coding_system)))
return coding_system;
- while (1)
- Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
+ xsignal1 (Qcoding_system_error, coding_system);
}
\f
Lisp_Object
the coding system `undecided' is specified. The list is ordered by
priority decided in the current language environment.
-If only ASCII characters are found, it returns a list of single element
+If only ASCII characters are found (except for such ISO-2022 control
+characters ISO-2022 as ESC), it returns a list of single element
`undecided' or its subsidiary coding system according to a detected
end-of-line format.
`undecided' is specified. The list is ordered by priority decided in
the current language environment.
-If only ASCII characters are found, it returns a list of single element
+If only ASCII characters are found (except for such ISO-2022 control
+characters ISO-2022 as ESC), it returns a list of single element
`undecided' or its subsidiary coding system according to a detected
end-of-line format.
STRING_MULTIBYTE (string));
}
-/* Subroutine for Fsafe_coding_systems_region_internal.
+/* Subroutine for Ffind_coding_systems_region_internal.
Return a list of coding systems that safely encode the multibyte
text between P and PEND. SAFE_CODINGS, if non-nil, is an alist of
}
DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
- doc: /* Encode a Japanese character CHAR to shift_jis encoding.
+ doc: /* Encode a Japanese character CH to shift_jis encoding.
Return the corresponding code in SJIS. */)
(ch)
Lisp_Object ch;
}
DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
- doc: /* Encode the Big5 character CHAR to BIG5 coding system.
+ doc: /* Encode the Big5 character CH to BIG5 coding system.
Return the corresponding character code in Big5. */)
(ch)
Lisp_Object ch;
whichever argument specifies the file name is TARGET.
TARGET has a meaning which depends on OPERATION:
- For file I/O, TARGET is a file name.
+ For file I/O, TARGET is a file name (except for the special case below).
For process I/O, TARGET is a process name.
For network I/O, TARGET is a service name or a port number
or a function symbol to call.
In the last case, we call the function with one argument,
which is a list of all the arguments given to this function.
+If the function can't decide a coding system, it can return
+`undecided' so that the normal code-detection is performed.
+
+If OPERATION is `insert-file-contents', the argument corresponding to
+TARGET may be a cons (FILENAME . BUFFER). In that case, FILENAME is a
+file name to look up, and BUFFER is a buffer that contains the file's
+contents (not yet decoded). If `file-coding-system-alist' specifies a
+function to call for FILENAME, that function should examine the
+contents of BUFFER instead of reading the file.
-usage: (find-operation-coding-system OPERATION ARGUMENTS ...) */)
+usage: (find-operation-coding-system OPERATION ARGUMENTS...) */)
(nargs, args)
int nargs;
Lisp_Object *args;
target_idx = make_number (4);
target = args[XINT (target_idx) + 1];
if (!(STRINGP (target)
+ || (EQ (operation, Qinsert_file_contents) && CONSP (target)
+ && STRINGP (XCAR (target)) && BUFFERP (XCDR (target)))
|| (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
error ("Invalid argument %d", XINT (target_idx) + 1);
+ if (CONSP (target))
+ target = XCAR (target);
chain = ((EQ (operation, Qinsert_file_contents)
|| EQ (operation, Qwrite_region))
return Fcons (val, val);
if (! NILP (Ffboundp (val)))
{
+ /* We use call1 rather than safe_call1
+ so as to get bug reports about functions called here
+ which don't handle the current interface. */
val = call1 (val, Flist (nargs, args));
if (CONSP (val))
return val;
Lisp_Object safe_chars, slot;
if (NILP (Fcheck_coding_system (coding_system)))
- Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
+ xsignal1 (Qcoding_system_error, coding_system);
+
safe_chars = coding_safe_chars (coding_system);
if (! EQ (safe_chars, Qt) && ! CHAR_TABLE_P (safe_chars))
error ("No valid safe-chars property for %s",
SDATA (SYMBOL_NAME (coding_system)));
+
if (EQ (safe_chars, Qt))
{
if (NILP (Fmemq (coding_system, XCAR (Vcoding_system_safe_chars))))
Qvalid_codes = intern ("valid-codes");
staticpro (&Qvalid_codes);
+ Qascii_incompatible = intern ("ascii-incompatible");
+ staticpro (&Qascii_incompatible);
+
Qemacs_mule = intern ("emacs-mule");
staticpro (&Qemacs_mule);
If VAL is a cons of coding systems, the car part is used for decoding,
and the cdr part is used for encoding.
If VAL is a function symbol, the function must return a coding system
-or a cons of coding systems which are used as above. The function gets
-the arguments with which `find-operation-coding-system' was called.
+or a cons of coding systems which are used as above. The function is
+called with an argument that is a list of the arguments with which
+`find-operation-coding-system' was called. If the function can't decide
+a coding system, it can return `undecided' so that the normal
+code-detection is performed.
See also the function `find-operation-coding-system'
and the variable `auto-coding-alist'. */);