X-Git-Url: http://git.hcoop.net/bpt/emacs.git/blobdiff_plain/a90f2c3549a5173e59755ac834c4f85505b3fcc8..9fb446e3f1fcc75cf018fef830d7d660f425ae46:/src/coding.c diff --git a/src/coding.c b/src/coding.c index 23ccfbce39..b96ec82fc1 100644 --- a/src/coding.c +++ b/src/coding.c @@ -1,7 +1,9 @@ /* Coding system handler (conversion, detection, and etc). - Copyright (C) 1995, 1997, 1998, 2002 Electrotechnical Laboratory, JAPAN. - Licensed to the Free Software Foundation. - Copyright (C) 2001 Free Software Foundation, Inc. + Copyright (C) 2001, 2002, 2003, 2004, 2005, + 2006 Free Software Foundation, Inc. + Copyright (C) 1995, 1997, 1998, 2002, 2003, 2004, 2005 + National Institute of Advanced Industrial Science and Technology (AIST) + Registration Number H14PRO021 This file is part of GNU Emacs. @@ -17,8 +19,8 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License along with GNU Emacs; see the file COPYING. If not, write to -the Free Software Foundation, Inc., 59 Temple Place - Suite 330, -Boston, MA 02111-1307, USA. */ +the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +Boston, MA 02110-1301, USA. */ /*** TABLE OF CONTENTS *** @@ -147,7 +149,8 @@ detect_coding_emacs_mule (src, src_end, multibytep) static void decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes) struct coding_system *coding; - unsigned char *source, *destination; + const unsigned char *source; + unsigned char *destination; int src_bytes, dst_bytes; { ... @@ -345,6 +348,7 @@ encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes) #include "ccl.h" #include "coding.h" #include "window.h" +#include "intervals.h" #else /* not emacs */ @@ -361,12 +365,18 @@ Lisp_Object Qsafe_chars; Lisp_Object Qvalid_codes; extern Lisp_Object Qinsert_file_contents, Qwrite_region; -Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument; +Lisp_Object Qcall_process, Qcall_process_region; Lisp_Object Qstart_process, Qopen_network_stream; Lisp_Object Qtarget_idx; +/* If a symbol has this property, evaluate the value to define the + symbol as a coding system. */ +Lisp_Object Qcoding_system_define_form; + Lisp_Object Vselect_safe_coding_system_function; +int coding_system_require_warning; + /* Mnemonic string for each format of end-of-line. */ Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac; /* Mnemonic string to indicate format of end-of-line is not yet @@ -379,6 +389,16 @@ int system_eol_type; #ifdef emacs +/* Information about which coding system is safe for which chars. + The value has the form (GENERIC-LIST . NON-GENERIC-ALIST). + + GENERIC-LIST is a list of generic coding systems which can encode + any characters. + + NON-GENERIC-ALIST is an alist of non generic coding systems vs the + corresponding char table that contains safe chars. */ +Lisp_Object Vcoding_system_safe_chars; + Lisp_Object Vcoding_system_list, Vcoding_system_alist; Lisp_Object Qcoding_system_p, Qcoding_system_error; @@ -387,6 +407,8 @@ Lisp_Object Qcoding_system_p, Qcoding_system_error; end-of-line format. */ Lisp_Object Qemacs_mule, Qraw_text; +Lisp_Object Qutf_8; + /* Coding-systems are handed between Emacs Lisp programs and C internal routines by the following three variables. */ /* Coding-system for reading files and receiving data from process. */ @@ -485,26 +507,27 @@ Lisp_Object Vcharset_revision_alist; /* Default coding systems used for process I/O. */ Lisp_Object Vdefault_process_coding_system; +/* Char table for translating Quail and self-inserting input. */ +Lisp_Object Vtranslation_table_for_input; + /* Global flag to tell that we can't call post-read-conversion and pre-write-conversion functions. Usually the value is zero, but it is set to 1 temporarily while such functions are running. This is to avoid infinite recursive call. */ static int inhibit_pre_post_conversion; -/* Char-table containing safe coding systems of each character. */ -Lisp_Object Vchar_coding_system_table; Lisp_Object Qchar_coding_system; -/* Return `safe-chars' property of coding system CODING. Don't check - validity of CODING. */ +/* Return `safe-chars' property of CODING_SYSTEM (symbol). Don't check + its validity. */ Lisp_Object -coding_safe_chars (coding) - struct coding_system *coding; +coding_safe_chars (coding_system) + Lisp_Object coding_system; { Lisp_Object coding_spec, plist, safe_chars; - coding_spec = Fget (coding->symbol, Qcoding_system); + coding_spec = Fget (coding_system, Qcoding_system); plist = XVECTOR (coding_spec)->contents[3]; safe_chars = Fplist_get (XVECTOR (coding_spec)->contents[3], Qsafe_chars); return (CHAR_TABLE_P (safe_chars) ? safe_chars : Qt); @@ -673,8 +696,16 @@ detect_coding_emacs_mule (src, src_end, multibytep) /* Record one COMPONENT (alternate character or composition rule). */ -#define CODING_ADD_COMPOSITION_COMPONENT(coding, component) \ - (coding->cmp_data->data[coding->cmp_data->used++] = component) +#define CODING_ADD_COMPOSITION_COMPONENT(coding, component) \ + do { \ + coding->cmp_data->data[coding->cmp_data->used++] = component; \ + if (coding->cmp_data->used - coding->cmp_data_start \ + == COMPOSITION_DATA_MAX_BUNCH_LENGTH) \ + { \ + CODING_ADD_COMPOSITION_END (coding, coding->produced_char); \ + coding->composing = COMPOSITION_NO; \ + } \ + } while (0) /* Get one byte from a data pointed by SRC and increment SRC. If SRC @@ -691,7 +722,7 @@ detect_coding_emacs_mule (src, src_end, multibytep) #define DECODE_EMACS_MULE_COMPOSITION_CHAR(c, p) \ do { \ int bytes; \ - \ + \ c = SAFE_ONE_MORE_BYTE (); \ if (c < 0) \ break; \ @@ -722,7 +753,10 @@ detect_coding_emacs_mule (src, src_end, multibytep) break; \ *p++ = c; \ } \ - if (UNIBYTE_STR_AS_MULTIBYTE_P (p0, p - p0, bytes)) \ + if (UNIBYTE_STR_AS_MULTIBYTE_P (p0, p - p0, bytes) \ + || (coding->flags /* We are recovering a file. */ \ + && p0[0] == LEADING_CODE_8_BIT_CONTROL \ + && ! CHAR_HEAD_P (p0[1]))) \ c = STRING_CHAR (p0, bytes); \ else \ c = -1; \ @@ -769,12 +803,13 @@ static INLINE int decode_composition_emacs_mule (coding, src, src_end, destination, dst_end, dst_bytes) struct coding_system *coding; - unsigned char *src, *src_end, **destination, *dst_end; + const unsigned char *src, *src_end; + unsigned char **destination, *dst_end; int dst_bytes; { unsigned char *dst = *destination; int method, data_len, nchars; - unsigned char *src_base = src++; + const unsigned char *src_base = src++; /* Store components of composition. */ int component[COMPOSITION_DATA_MAX_BUNCH_LENGTH]; int ncomponent; @@ -826,7 +861,10 @@ decode_composition_emacs_mule (coding, src, src_end, else { int bytes; - if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes)) + if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes) + || (coding->flags /* We are recovering a file. */ + && src[0] == LEADING_CODE_8_BIT_CONTROL + && ! CHAR_HEAD_P (src[1]))) c = STRING_CHAR (src, bytes); else c = *src, bytes = 1; @@ -835,7 +873,7 @@ decode_composition_emacs_mule (coding, src, src_end, component[ncomponent] = c; } } - else + else if (c >= 0x80) { /* This may be an old Emacs 20 style format. See the comment at the section 2 of this file. */ @@ -887,6 +925,8 @@ decode_composition_emacs_mule (coding, src, src_end, else return 0; } + else + return 0; if (buf == bufp || dst + (bufp - buf) <= (dst_bytes ? dst_end : src)) { @@ -912,23 +952,25 @@ decode_composition_emacs_mule (coding, src, src_end, static void decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes) struct coding_system *coding; - unsigned char *source, *destination; + const unsigned char *source; + unsigned char *destination; int src_bytes, dst_bytes; { - unsigned char *src = source; - unsigned char *src_end = source + src_bytes; + const unsigned char *src = source; + const unsigned char *src_end = source + src_bytes; unsigned char *dst = destination; unsigned char *dst_end = destination + dst_bytes; /* SRC_BASE remembers the start position in source in each loop. The loop will be exited when there's not enough source code, or when there's not enough destination area to produce a character. */ - unsigned char *src_base; + const unsigned char *src_base; coding->produced_char = 0; while ((src_base = src) < src_end) { - unsigned char tmp[MAX_MULTIBYTE_LENGTH], *p; + unsigned char tmp[MAX_MULTIBYTE_LENGTH]; + const unsigned char *p; int bytes; if (*src == '\r') @@ -942,11 +984,6 @@ decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes) ONE_MORE_BYTE (c); if (c != '\n') { - if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL) - { - coding->result = CODING_FINISH_INCONSISTENT_EOL; - goto label_end_of_loop; - } src--; c = '\r'; } @@ -968,7 +1005,7 @@ decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes) coding->produced_char++; continue; } - else if (*src == 0x80) + else if (*src == 0x80 && coding->cmp_data) { /* Start of composition data. */ int consumed = decode_composition_emacs_mule (coding, src, src_end, @@ -985,16 +1022,36 @@ decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes) p = tmp; src++; } - else if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes)) + else if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes) + || (coding->flags /* We are recovering a file. */ + && src[0] == LEADING_CODE_8_BIT_CONTROL + && ! CHAR_HEAD_P (src[1]))) { p = src; src += bytes; } else { - bytes = CHAR_STRING (*src, tmp); - p = tmp; + int i, c; + + bytes = BYTES_BY_CHAR_HEAD (*src); src++; + for (i = 1; i < bytes; i++) + { + ONE_MORE_BYTE (c); + if (CHAR_HEAD_P (c)) + break; + } + if (i < bytes) + { + bytes = CHAR_STRING (*src_base, tmp); + p = tmp; + src = src_base + 1; + } + else + { + p = src_base; + } } if (dst + bytes >= (dst_bytes ? dst_end : src)) { @@ -1061,20 +1118,21 @@ decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes) } while (0) -static void encode_eol P_ ((struct coding_system *, unsigned char *, +static void encode_eol P_ ((struct coding_system *, const unsigned char *, unsigned char *, int, int)); static void encode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes) struct coding_system *coding; - unsigned char *source, *destination; + const unsigned char *source; + unsigned char *destination; int src_bytes, dst_bytes; { - unsigned char *src = source; - unsigned char *src_end = source + src_bytes; + const unsigned char *src = source; + const unsigned char *src_end = source + src_bytes; unsigned char *dst = destination; unsigned char *dst_end = destination + dst_bytes; - unsigned char *src_base; + const unsigned char *src_base; int c; int char_offset; int *data; @@ -1116,7 +1174,22 @@ encode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes) EMIT_ONE_BYTE ('\r'); } else if (SINGLE_BYTE_CHAR_P (c)) - EMIT_ONE_BYTE (c); + { + if (coding->flags && ! ASCII_BYTE_P (c)) + { + /* As we are auto saving, retain the multibyte form for + 8-bit chars. */ + unsigned char buf[MAX_MULTIBYTE_LENGTH]; + int bytes = CHAR_STRING (c, buf); + + if (bytes == 1) + EMIT_ONE_BYTE (buf[0]); + else + EMIT_TWO_BYTES (buf[0], buf[1]); + } + else + EMIT_ONE_BYTE (c); + } else EMIT_BYTES (src_base, src); coding->consumed_char++; @@ -1310,7 +1383,7 @@ enum iso_code_class_type iso_code_class[256]; #define CHARSET_OK(idx, charset, c) \ (coding_system_table[idx] \ && (charset == CHARSET_ASCII \ - || (safe_chars = coding_safe_chars (coding_system_table[idx]), \ + || (safe_chars = coding_safe_chars (coding_system_table[idx]->symbol), \ CODING_SAFE_CHAR_P (safe_chars, c))) \ && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding_system_table[idx], \ charset) \ @@ -1319,6 +1392,9 @@ enum iso_code_class_type iso_code_class[256]; #define SHIFT_OUT_OK(idx) \ (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0) +#define COMPOSITION_OK(idx) \ + (coding_system_table[idx]->composing != COMPOSITION_DISABLED) + /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". Check if a text is encoded in ISO2022. If it is, return an integer in which appropriate flag bits any of: @@ -1349,6 +1425,7 @@ detect_coding_iso2022 (src, src_end, multibytep) while (mask && src < src_end) { ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); + retry: switch (c) { case ISO_CODE_ESC: @@ -1395,7 +1472,30 @@ detect_coding_iso2022 (src, src_end, multibytep) else if (c >= '0' && c <= '4') { /* ESC for start/end composition. */ - mask_found |= CODING_CATEGORY_MASK_ISO; + if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7)) + mask_found |= CODING_CATEGORY_MASK_ISO_7; + else + mask &= ~CODING_CATEGORY_MASK_ISO_7; + if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT)) + mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT; + else + mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT; + if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_1)) + mask_found |= CODING_CATEGORY_MASK_ISO_8_1; + else + mask &= ~CODING_CATEGORY_MASK_ISO_8_1; + if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_2)) + mask_found |= CODING_CATEGORY_MASK_ISO_8_2; + else + mask &= ~CODING_CATEGORY_MASK_ISO_8_2; + if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7_ELSE)) + mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE; + else + mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE; + if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_ELSE)) + mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE; + else + mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE; break; } else @@ -1523,6 +1623,8 @@ detect_coding_iso2022 (src, src_end, multibytep) && mask & CODING_CATEGORY_MASK_ISO_8_2) { int i = 1; + + c = -1; while (src < src_end) { ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); @@ -1535,6 +1637,9 @@ detect_coding_iso2022 (src, src_end, multibytep) mask &= ~CODING_CATEGORY_MASK_ISO_8_2; else mask_found |= CODING_CATEGORY_MASK_ISO_8_2; + if (c >= 0) + /* This means that we have read one extra byte. */ + goto retry; } } break; @@ -1610,6 +1715,7 @@ coding_allocate_composition_data (coding, char_offset) coding->cmp_data->next = cmp_data; coding->cmp_data = cmp_data; coding->cmp_data_start = 0; + coding->composing = COMPOSITION_NO; } /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4. @@ -1713,11 +1819,12 @@ coding_allocate_composition_data (coding, char_offset) static void decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) struct coding_system *coding; - unsigned char *source, *destination; + const unsigned char *source; + unsigned char *destination; int src_bytes, dst_bytes; { - unsigned char *src = source; - unsigned char *src_end = source + src_bytes; + const unsigned char *src = source; + const unsigned char *src_end = source + src_bytes; unsigned char *dst = destination; unsigned char *dst_end = destination + dst_bytes; /* Charsets invoked to graphic plane 0 and 1 respectively. */ @@ -1728,12 +1835,12 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) (within macro ONE_MORE_BYTE), or when there's not enough destination area to produce a character (within macro EMIT_CHAR). */ - unsigned char *src_base; + const unsigned char *src_base; int c, charset; Lisp_Object translation_table; Lisp_Object safe_chars; - safe_chars = coding_safe_chars (coding); + safe_chars = coding_safe_chars (coding->symbol); if (NILP (Venable_character_translation)) translation_table = Qnil; @@ -1748,7 +1855,7 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) while (1) { - int c1, c2; + int c1, c2 = 0; src_base = src; ONE_MORE_BYTE (c1); @@ -1824,11 +1931,6 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) ONE_MORE_BYTE (c1); if (c1 != ISO_CODE_LF) { - if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL) - { - coding->result = CODING_FINISH_INCONSISTENT_EOL; - goto label_end_of_loop; - } src--; c1 = '\r'; } @@ -1993,6 +2095,81 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) } continue; + case '%': + if (COMPOSING_P (coding)) + DECODE_COMPOSITION_END ('1'); + ONE_MORE_BYTE (c1); + if (c1 == '/') + { + /* CTEXT extended segment: + ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES-- + We keep these bytes as is for the moment. + They may be decoded by post-read-conversion. */ + int dim, M, L; + int size, required; + int produced_chars; + + ONE_MORE_BYTE (dim); + ONE_MORE_BYTE (M); + ONE_MORE_BYTE (L); + size = ((M - 128) * 128) + (L - 128); + required = 8 + size * 2; + if (dst + required > (dst_bytes ? dst_end : src)) + goto label_end_of_loop; + *dst++ = ISO_CODE_ESC; + *dst++ = '%'; + *dst++ = '/'; + *dst++ = dim; + produced_chars = 4; + dst += CHAR_STRING (M, dst), produced_chars++; + dst += CHAR_STRING (L, dst), produced_chars++; + while (size-- > 0) + { + ONE_MORE_BYTE (c1); + dst += CHAR_STRING (c1, dst), produced_chars++; + } + coding->produced_char += produced_chars; + } + else if (c1 == 'G') + { + unsigned char *d = dst; + int produced_chars; + + /* XFree86 extension for embedding UTF-8 in CTEXT: + ESC % G --UTF-8-BYTES-- ESC % @ + We keep these bytes as is for the moment. + They may be decoded by post-read-conversion. */ + if (d + 6 > (dst_bytes ? dst_end : src)) + goto label_end_of_loop; + *d++ = ISO_CODE_ESC; + *d++ = '%'; + *d++ = 'G'; + produced_chars = 3; + while (d + 1 < (dst_bytes ? dst_end : src)) + { + ONE_MORE_BYTE (c1); + if (c1 == ISO_CODE_ESC + && src + 1 < src_end + && src[0] == '%' + && src[1] == '@') + { + src += 2; + break; + } + d += CHAR_STRING (c1, d), produced_chars++; + } + if (d + 3 > (dst_bytes ? dst_end : src)) + goto label_end_of_loop; + *d++ = ISO_CODE_ESC; + *d++ = '%'; + *d++ = '@'; + dst = d; + coding->produced_char += produced_chars + 3; + } + else + goto label_invalid_code; + continue; + default: if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION)) goto label_invalid_code; @@ -2035,6 +2212,8 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) DECODE_COMPOSITION_END ('1'); src = src_base; c = *src++; + if (! NILP (translation_table)) + c = translate_char (translation_table, c, 0, 0, 0); EMIT_CHAR (c); } @@ -2258,11 +2437,11 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) /* Instead of encoding character C, produce one or two `?'s. */ -#define ENCODE_UNSAFE_CHARACTER(c) \ - do { \ - ENCODE_ISO_CHARACTER (CODING_INHIBIT_CHARACTER_SUBSTITUTION); \ - if (CHARSET_WIDTH (CHAR_CHARSET (c)) > 1) \ - ENCODE_ISO_CHARACTER (CODING_INHIBIT_CHARACTER_SUBSTITUTION); \ +#define ENCODE_UNSAFE_CHARACTER(c) \ + do { \ + ENCODE_ISO_CHARACTER (CODING_REPLACEMENT_CHARACTER); \ + if (CHARSET_WIDTH (CHAR_CHARSET (c)) > 1) \ + ENCODE_ISO_CHARACTER (CODING_REPLACEMENT_CHARACTER); \ } while (0) @@ -2429,7 +2608,8 @@ static unsigned char * encode_designation_at_bol (coding, translation_table, src, src_end, dst) struct coding_system *coding; Lisp_Object translation_table; - unsigned char *src, *src_end, *dst; + const unsigned char *src, *src_end; + unsigned char *dst; { int charset, c, found = 0, reg; /* Table of charsets to be designated to each graphic register. */ @@ -2470,11 +2650,12 @@ encode_designation_at_bol (coding, translation_table, src, src_end, dst) static void encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) struct coding_system *coding; - unsigned char *source, *destination; + const unsigned char *source; + unsigned char *destination; int src_bytes, dst_bytes; { - unsigned char *src = source; - unsigned char *src_end = source + src_bytes; + const unsigned char *src = source; + const unsigned char *src_end = source + src_bytes; unsigned char *dst = destination; unsigned char *dst_end = destination + dst_bytes; /* Since the maximum bytes produced by each loop is 20, we subtract 19 @@ -2486,12 +2667,15 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) analyze multi-byte codes (within macro ONE_MORE_CHAR), or when there's not enough destination area to produce encoded codes (within macro EMIT_BYTES). */ - unsigned char *src_base; + const unsigned char *src_base; int c; Lisp_Object translation_table; Lisp_Object safe_chars; - safe_chars = coding_safe_chars (coding); + if (coding->flags & CODING_FLAG_ISO_SAFE) + coding->mode |= CODING_MODE_INHIBIT_UNENCODABLE_CHAR; + + safe_chars = coding_safe_chars (coding->symbol); if (NILP (Venable_character_translation)) translation_table = Qnil; @@ -2558,7 +2742,7 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) } else { - if (coding->flags & CODING_FLAG_ISO_SAFE + if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR && ! CODING_SAFE_CHAR_P (safe_chars, c)) ENCODE_UNSAFE_CHARACTER (c); else @@ -2627,7 +2811,7 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) *dst++ = c; coding->errors++; } - else if (coding->flags & CODING_FLAG_ISO_SAFE + else if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR && ! CODING_SAFE_CHAR_P (safe_chars, c)) ENCODE_UNSAFE_CHARACTER (c); else @@ -2854,7 +3038,7 @@ detect_coding_utf_16 (src, src_end, multibytep) int multibytep; { unsigned char c1, c2; - /* Dummy for TWO_MORE_BYTES. */ + /* Dummy for ONE_MORE_BYTE_CHECK_MULTIBYTE. */ struct coding_system dummy_coding; struct coding_system *coding = &dummy_coding; @@ -2877,12 +3061,13 @@ static void decode_coding_sjis_big5 (coding, source, destination, src_bytes, dst_bytes, sjis_p) struct coding_system *coding; - unsigned char *source, *destination; + const unsigned char *source; + unsigned char *destination; int src_bytes, dst_bytes; int sjis_p; { - unsigned char *src = source; - unsigned char *src_end = source + src_bytes; + const unsigned char *src = source; + const unsigned char *src_end = source + src_bytes; unsigned char *dst = destination; unsigned char *dst_end = destination + dst_bytes; /* SRC_BASE remembers the start position in source in each loop. @@ -2890,7 +3075,7 @@ decode_coding_sjis_big5 (coding, source, destination, (within macro ONE_MORE_BYTE), or when there's not enough destination area to produce a character (within macro EMIT_CHAR). */ - unsigned char *src_base; + const unsigned char *src_base; Lisp_Object translation_table; if (NILP (Venable_character_translation)) @@ -2905,7 +3090,7 @@ decode_coding_sjis_big5 (coding, source, destination, coding->produced_char = 0; while (1) { - int c, charset, c1, c2; + int c, charset, c1, c2 = 0; src_base = src; ONE_MORE_BYTE (c1); @@ -2922,12 +3107,6 @@ decode_coding_sjis_big5 (coding, source, destination, ONE_MORE_BYTE (c2); if (c2 == '\n') c1 = c2; - else if (coding->mode - & CODING_MODE_INHIBIT_INCONSISTENT_EOL) - { - coding->result = CODING_FINISH_INCONSISTENT_EOL; - goto label_end_of_loop; - } else /* To process C2 again, SRC is subtracted by 1. */ src--; @@ -3076,6 +3255,12 @@ encode_coding_sjis_big5 (coding, source, destination, EMIT_ONE_BYTE (c1 | 0x80); else if (charset == charset_latin_jisx0201) EMIT_ONE_BYTE (c1); + else if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR) + { + EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER); + if (CHARSET_WIDTH (charset) > 1) + EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER); + } else /* There's no way other than producing the internal codes as is. */ @@ -3088,6 +3273,12 @@ encode_coding_sjis_big5 (coding, source, destination, ENCODE_BIG5 (charset, c1, c2, c1, c2); EMIT_TWO_BYTES (c1, c2); } + else if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR) + { + EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER); + if (CHARSET_WIDTH (charset) > 1) + EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER); + } else /* There's no way other than producing the internal codes as is. */ @@ -3144,12 +3335,13 @@ detect_coding_ccl (src, src_end, multibytep) static void decode_eol (coding, source, destination, src_bytes, dst_bytes) struct coding_system *coding; - unsigned char *source, *destination; + const unsigned char *source; + unsigned char *destination; int src_bytes, dst_bytes; { - unsigned char *src = source; + const unsigned char *src = source; unsigned char *dst = destination; - unsigned char *src_end = src + src_bytes; + const unsigned char *src_end = src + src_bytes; unsigned char *dst_end = dst + dst_bytes; Lisp_Object translation_table; /* SRC_BASE remembers the start position in source in each loop. @@ -3157,7 +3349,7 @@ decode_eol (coding, source, destination, src_bytes, dst_bytes) (within macro ONE_MORE_BYTE), or when there's not enough destination area to produce a character (within macro EMIT_CHAR). */ - unsigned char *src_base; + const unsigned char *src_base; int c; translation_table = Qnil; @@ -3173,11 +3365,6 @@ decode_eol (coding, source, destination, src_bytes, dst_bytes) ONE_MORE_BYTE (c); if (c != '\n') { - if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL) - { - coding->result = CODING_FINISH_INCONSISTENT_EOL; - goto label_end_of_loop; - } src--; c = '\r'; } @@ -3236,12 +3423,13 @@ decode_eol (coding, source, destination, src_bytes, dst_bytes) static void encode_eol (coding, source, destination, src_bytes, dst_bytes) struct coding_system *coding; - unsigned char *source, *destination; + const unsigned char *source; + unsigned char *destination; int src_bytes, dst_bytes; { - unsigned char *src = source; + const unsigned char *src = source; unsigned char *dst = destination; - unsigned char *src_end = src + src_bytes; + const unsigned char *src_end = src + src_bytes; unsigned char *dst_end = dst + dst_bytes; Lisp_Object translation_table; /* SRC_BASE remembers the start position in source in each loop. @@ -3249,7 +3437,8 @@ encode_eol (coding, source, destination, src_bytes, dst_bytes) analyze multi-byte codes (within macro ONE_MORE_CHAR), or when there's not enough destination area to produce encoded codes (within macro EMIT_BYTES). */ - unsigned char *src_base; + const unsigned char *src_base; + unsigned char *tmp; int c; int selective_display = coding->mode & CODING_MODE_SELECTIVE_DISPLAY; @@ -3299,13 +3488,13 @@ encode_eol (coding, source, destination, src_bytes, dst_bytes) } if (coding->eol_type == CODING_EOL_CR) { - for (src = destination; src < dst; src++) - if (*src == '\n') *src = '\r'; + for (tmp = destination; tmp < dst; tmp++) + if (*tmp == '\n') *tmp = '\r'; } else if (selective_display) { - for (src = destination; src < dst; src++) - if (*src == '\r') *src = '\n'; + for (tmp = destination; tmp < dst; tmp++) + if (*tmp == '\r') *tmp = '\n'; } } if (coding->src_multibyte) @@ -3498,7 +3687,6 @@ setup_coding_system (coding_system, coding) coding->type = coding_type_emacs_mule; coding->common_flags |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK; - coding->composing = COMPOSITION_NO; if (!NILP (coding->post_read_conversion)) coding->common_flags |= CODING_REQUIRE_DECODING_MASK; if (!NILP (coding->pre_write_conversion)) @@ -3874,7 +4062,7 @@ setup_raw_text_coding_system (coding) o coding-category-utf-8 The category for a coding system which has the same code range - as UTF-8 (cf. RFC2279). Assigned the coding-system (Lisp + as UTF-8 (cf. RFC3629). Assigned the coding-system (Lisp symbol) `utf-8' by default. o coding-category-utf-16-be @@ -4094,7 +4282,7 @@ detect_coding_mask (source, src_bytes, priorities, skip, multibytep) void detect_coding (coding, src, src_bytes) struct coding_system *coding; - unsigned char *src; + const unsigned char *src; int src_bytes; { unsigned int idx; @@ -4267,7 +4455,7 @@ detect_eol_type_in_2_octet_form (source, src_bytes, skip, big_endian_p) void detect_eol (coding, src, src_bytes) struct coding_system *coding; - unsigned char *src; + const unsigned char *src; int src_bytes; { Lisp_Object val; @@ -4359,7 +4547,11 @@ encoding_buffer_size (coding, src_bytes) int magnification; if (coding->type == coding_type_ccl) - magnification = coding->spec.ccl.encoder.buf_magnification; + { + magnification = coding->spec.ccl.encoder.buf_magnification; + if (coding->eol_type == CODING_EOL_CRLF) + magnification *= 2; + } else if (CODING_REQUIRE_ENCODING (coding)) magnification = 3; else @@ -4376,10 +4568,6 @@ struct conversion_buffer unsigned char *data; }; -/* Don't use alloca for allocating memory space larger than this, lest - we overflow their stack. */ -#define MAX_ALLOCA 16*1024 - /* Allocate LEN bytes of memory for BUF (struct conversion_buffer). */ #define allocate_conversion_buffer(buf, len) \ do { \ @@ -4444,7 +4632,10 @@ ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep) if (ccl->eol_type ==CODING_EOL_UNDECIDED) ccl->eol_type = CODING_EOL_LF; ccl->cr_consumed = coding->spec.ccl.cr_carryover; + ccl->eight_bit_control = coding->dst_multibyte; } + else + ccl->eight_bit_control = 1; ccl->multibyte = coding->src_multibyte; if (coding->spec.ccl.eight_bit_carryover[0] != 0) { @@ -4685,7 +4876,8 @@ decode_eol_post_ccl (coding, ptr, bytes) int decode_coding (coding, source, destination, src_bytes, dst_bytes) struct coding_system *coding; - unsigned char *source, *destination; + const unsigned char *source; + unsigned char *destination; int src_bytes, dst_bytes; { int extra = 0; @@ -4764,7 +4956,7 @@ decode_coding (coding, source, destination, src_bytes, dst_bytes) if (coding->mode & CODING_MODE_LAST_BLOCK && coding->result == CODING_FINISH_INSUFFICIENT_SRC) { - unsigned char *src = source + coding->consumed; + const unsigned char *src = source + coding->consumed; unsigned char *dst = destination + coding->produced; src_bytes -= coding->consumed; @@ -4798,7 +4990,8 @@ decode_coding (coding, source, destination, src_bytes, dst_bytes) int encode_coding (coding, source, destination, src_bytes, dst_bytes) struct coding_system *coding; - unsigned char *source, *destination; + const unsigned char *source; + unsigned char *destination; int src_bytes, dst_bytes; { coding->produced = coding->produced_char = 0; @@ -4840,7 +5033,7 @@ encode_coding (coding, source, destination, src_bytes, dst_bytes) if (coding->mode & CODING_MODE_LAST_BLOCK && coding->result == CODING_FINISH_INSUFFICIENT_SRC) { - unsigned char *src = source + coding->consumed; + const unsigned char *src = source + coding->consumed; unsigned char *dst = destination + coding->produced; if (coding->type == coding_type_iso2022) @@ -5164,11 +5357,22 @@ static int shrink_conversion_region_threshhold = 1024; } \ } while (0) +/* ARG is (CODING BUFFER ...) where CODING is what to be set in + Vlast_coding_system_used and the remaining elements are buffers to + kill. */ static Lisp_Object -code_convert_region_unwind (dummy) - Lisp_Object dummy; +code_convert_region_unwind (arg) + Lisp_Object arg; { + struct gcpro gcpro1; + GCPRO1 (arg); + inhibit_pre_post_conversion = 0; + Vlast_coding_system_used = XCAR (arg); + for (arg = XCDR (arg); ! NILP (arg); arg = XCDR (arg)) + Fkill_buffer (XCAR (arg)); + + UNGCPRO; return Qnil; } @@ -5279,6 +5483,10 @@ coding_restore_composition (coding, obj) enum composition_method method = (enum composition_method) data[3]; Lisp_Object components; + if (data[0] < 0 || i + data[0] > cmp_data->used) + /* Invalid composition data. */ + break; + if (method == COMPOSITION_RELATIVE) components = Qnil; else @@ -5286,10 +5494,17 @@ coding_restore_composition (coding, obj) int len = data[0] - 4, j; Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1]; + if (method == COMPOSITION_WITH_RULE_ALTCHARS + && len % 2 == 0) + len --; + if (len < 1) + /* Invalid composition data. */ + break; for (j = 0; j < len; j++) args[j] = make_number (data[4 + j]); components = (method == COMPOSITION_WITH_ALTCHARS - ? Fstring (len, args) : Fvector (len, args)); + ? Fstring (len, args) + : Fvector (len, args)); } compose_text (data[1], data[2], components, Qnil, obj); } @@ -5409,7 +5624,8 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace) struct buffer *prev = current_buffer; Lisp_Object new; - record_unwind_protect (code_convert_region_unwind, Qnil); + record_unwind_protect (code_convert_region_unwind, + Fcons (Vlast_coding_system_used, Qnil)); /* We should not call any more pre-write/post-read-conversion functions while this pre-write-conversion is running. */ inhibit_pre_post_conversion = 1; @@ -5460,8 +5676,11 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace) coding_allocate_composition_data (coding, from); } - /* Try to skip the heading and tailing ASCIIs. */ - if (coding->type != coding_type_ccl) + /* Try to skip the heading and tailing ASCIIs. We can't skip them + if we must run CCL program or there are compositions to + encode. */ + if (coding->type != coding_type_ccl + && (! coding->cmp_data || coding->cmp_data->used == 0)) { int from_byte_orig = from_byte, to_byte_orig = to_byte; @@ -5477,6 +5696,7 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace) if (!replace) /* We must record and adjust for this new text now. */ adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len); + coding_free_composition_data (coding); return 0; } @@ -5687,9 +5907,19 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace) REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG) REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG Here, we are sure that NEW >= ORIG. */ - float ratio = coding->produced - coding->consumed; - ratio /= coding->consumed; - require = len_byte * ratio; + + if (coding->produced <= coding->consumed) + { + /* This happens because of CCL-based coding system with + eol-type CRLF. */ + require = 0; + } + else + { + float ratio = coding->produced - coding->consumed; + ratio /= coding->consumed; + require = len_byte * ratio; + } first = 0; } if ((src - dst) < (require + 2000)) @@ -5757,16 +5987,22 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace) && ! encodep && ! NILP (coding->post_read_conversion)) { Lisp_Object val; + Lisp_Object saved_coding_system; if (from != PT) TEMP_SET_PT_BOTH (from, from_byte); prev_Z = Z; - record_unwind_protect (code_convert_region_unwind, Qnil); + record_unwind_protect (code_convert_region_unwind, + Fcons (Vlast_coding_system_used, Qnil)); + saved_coding_system = Vlast_coding_system_used; + Vlast_coding_system_used = coding->symbol; /* We should not call any more pre-write/post-read-conversion functions while this post-read-conversion is running. */ inhibit_pre_post_conversion = 1; val = call1 (coding->post_read_conversion, make_number (inserted)); inhibit_pre_post_conversion = 0; + coding->symbol = Vlast_coding_system_used; + Vlast_coding_system_used = saved_coding_system; /* Discard the unwind protect. */ specpdl_ptr--; CHECK_NUMBER (val); @@ -5798,6 +6034,51 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace) return 0; } +/* Name (or base name) of work buffer for code conversion. */ +static Lisp_Object Vcode_conversion_workbuf_name; + +/* Set the current buffer to the working buffer prepared for + code-conversion. MULTIBYTE specifies the multibyteness of the + buffer. Return the buffer we set if it must be killed after use. + Otherwise return Qnil. */ + +static Lisp_Object +set_conversion_work_buffer (multibyte) + int multibyte; +{ + Lisp_Object buffer, buffer_to_kill; + struct buffer *buf; + + buffer = Fget_buffer_create (Vcode_conversion_workbuf_name); + buf = XBUFFER (buffer); + if (buf == current_buffer) + { + /* As we are already in the work buffer, we must generate a new + buffer for the work. */ + Lisp_Object name; + + name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil); + buffer = buffer_to_kill = Fget_buffer_create (name); + buf = XBUFFER (buffer); + } + else + buffer_to_kill = Qnil; + + delete_all_overlays (buf); + buf->directory = current_buffer->directory; + buf->read_only = Qnil; + buf->filename = Qnil; + buf->undo_list = Qt; + eassert (buf->overlays_before == NULL); + eassert (buf->overlays_after == NULL); + set_buffer_internal (buf); + if (BEG != BEGV || Z != ZV) + Fwiden (); + del_range_2 (BEG, BEG_BYTE, Z, Z_BYTE, 0); + buf->enable_multibyte_characters = multibyte ? Qt : Qnil; + return buffer_to_kill; +} + Lisp_Object run_pre_post_conversion_on_str (str, coding, encodep) Lisp_Object str; @@ -5805,48 +6086,118 @@ run_pre_post_conversion_on_str (str, coding, encodep) int encodep; { int count = SPECPDL_INDEX (); - struct gcpro gcpro1; + struct gcpro gcpro1, gcpro2; int multibyte = STRING_MULTIBYTE (str); - Lisp_Object buffer; - struct buffer *buf; + Lisp_Object old_deactivate_mark; + Lisp_Object buffer_to_kill; + Lisp_Object unwind_arg; record_unwind_protect (Fset_buffer, Fcurrent_buffer ()); - record_unwind_protect (code_convert_region_unwind, Qnil); - GCPRO1 (str); + /* It is not crucial to specbind this. */ + old_deactivate_mark = Vdeactivate_mark; + GCPRO2 (str, old_deactivate_mark); - buffer = Fget_buffer_create (build_string (" *code-converting-work*")); - buf = XBUFFER (buffer); - - buf->directory = current_buffer->directory; - buf->read_only = Qnil; - buf->filename = Qnil; - buf->undo_list = Qt; - buf->overlays_before = Qnil; - buf->overlays_after = Qnil; - - set_buffer_internal (buf); /* We must insert the contents of STR as is without unibyte<->multibyte conversion. For that, we adjust the multibyteness of the working buffer to that of STR. */ - Ferase_buffer (); - buf->enable_multibyte_characters = multibyte ? Qt : Qnil; + buffer_to_kill = set_conversion_work_buffer (multibyte); + if (NILP (buffer_to_kill)) + unwind_arg = Fcons (Vlast_coding_system_used, Qnil); + else + unwind_arg = list2 (Vlast_coding_system_used, buffer_to_kill); + record_unwind_protect (code_convert_region_unwind, unwind_arg); insert_from_string (str, 0, 0, SCHARS (str), SBYTES (str), 0); UNGCPRO; inhibit_pre_post_conversion = 1; if (encodep) - call2 (coding->pre_write_conversion, make_number (BEG), make_number (Z)); + { + struct buffer *prev = current_buffer; + + call2 (coding->pre_write_conversion, make_number (BEG), make_number (Z)); + if (prev != current_buffer) + /* We must kill the current buffer too. */ + Fsetcdr (unwind_arg, Fcons (Fcurrent_buffer (), XCDR (unwind_arg))); + } else { + Vlast_coding_system_used = coding->symbol; TEMP_SET_PT_BOTH (BEG, BEG_BYTE); call1 (coding->post_read_conversion, make_number (Z - BEG)); + coding->symbol = Vlast_coding_system_used; } inhibit_pre_post_conversion = 0; + Vdeactivate_mark = old_deactivate_mark; str = make_buffer_string (BEG, Z, 1); return unbind_to (count, str); } + +/* Run pre-write-conversion function of CODING on NCHARS/NBYTES + text in *STR. *SIZE is the allocated bytes for STR. As it + is intended that this function is called from encode_terminal_code, + the pre-write-conversion function is run by safe_call and thus + "Error during redisplay: ..." is logged when an error occurs. + + Store the resulting text in *STR and set CODING->produced_char and + CODING->produced to the number of characters and bytes + respectively. If the size of *STR is too small, enlarge it by + xrealloc and update *STR and *SIZE. */ + +void +run_pre_write_conversin_on_c_str (str, size, nchars, nbytes, coding) + unsigned char **str; + int *size, nchars, nbytes; + struct coding_system *coding; +{ + struct gcpro gcpro1, gcpro2; + struct buffer *cur = current_buffer; + struct buffer *prev; + Lisp_Object old_deactivate_mark, old_last_coding_system_used; + Lisp_Object args[3]; + Lisp_Object buffer_to_kill; + + /* It is not crucial to specbind this. */ + old_deactivate_mark = Vdeactivate_mark; + old_last_coding_system_used = Vlast_coding_system_used; + GCPRO2 (old_deactivate_mark, old_last_coding_system_used); + + /* We must insert the contents of STR as is without + unibyte<->multibyte conversion. For that, we adjust the + multibyteness of the working buffer to that of STR. */ + buffer_to_kill = set_conversion_work_buffer (coding->src_multibyte); + insert_1_both (*str, nchars, nbytes, 0, 0, 0); + UNGCPRO; + inhibit_pre_post_conversion = 1; + prev = current_buffer; + args[0] = coding->pre_write_conversion; + args[1] = make_number (BEG); + args[2] = make_number (Z); + safe_call (3, args); + inhibit_pre_post_conversion = 0; + Vdeactivate_mark = old_deactivate_mark; + Vlast_coding_system_used = old_last_coding_system_used; + coding->produced_char = Z - BEG; + coding->produced = Z_BYTE - BEG_BYTE; + if (coding->produced > *size) + { + *size = coding->produced; + *str = xrealloc (*str, *size); + } + if (BEG < GPT && GPT < Z) + move_gap (BEG); + bcopy (BEG_ADDR, *str, coding->produced); + coding->src_multibyte + = ! NILP (current_buffer->enable_multibyte_characters); + if (prev != current_buffer) + Fkill_buffer (Fcurrent_buffer ()); + set_buffer_internal (cur); + if (! NILP (buffer_to_kill)) + Fkill_buffer (buffer_to_kill); +} + + Lisp_Object decode_coding_string (str, coding, nocopy) Lisp_Object str; @@ -5923,7 +6274,9 @@ decode_coding_string (str, coding, nocopy) shrinked_bytes = from + (SBYTES (str) - to_byte); } - if (!require_decoding) + if (!require_decoding + && !(SYMBOLP (coding->post_read_conversion) + && !NILP (Ffboundp (coding->post_read_conversion)))) { coding->consumed = SBYTES (str); coding->consumed_char = SCHARS (str); @@ -5953,6 +6306,7 @@ decode_coding_string (str, coding, nocopy) produced += coding->produced; produced_char += coding->produced_char; if (result == CODING_FINISH_NORMAL + || result == CODING_FINISH_INTERRUPT || (result == CODING_FINISH_INSUFFICIENT_SRC && coding->consumed == 0)) break; @@ -6014,14 +6368,19 @@ decode_coding_string (str, coding, nocopy) else newstr = make_uninit_string (produced + shrinked_bytes); if (from > 0) - bcopy (SDATA (str), SDATA (newstr), from); - bcopy (buf.data, SDATA (newstr) + from, produced); + STRING_COPYIN (newstr, 0, SDATA (str), from); + STRING_COPYIN (newstr, from, buf.data, produced); if (shrinked_bytes > from) - bcopy (SDATA (str) + to_byte, - SDATA (newstr) + from + produced, - shrinked_bytes - from); + STRING_COPYIN (newstr, from + produced, + SDATA (str) + to_byte, + shrinked_bytes - from); free_conversion_buffer (&buf); + coding->consumed += shrinked_bytes; + coding->consumed_char += shrinked_bytes; + coding->produced += shrinked_bytes; + coding->produced_char += shrinked_bytes; + if (coding->cmp_data && coding->cmp_data->used) coding_restore_composition (coding, newstr); coding_free_composition_data (coding); @@ -6049,7 +6408,12 @@ encode_coding_string (str, coding, nocopy) if (SYMBOLP (coding->pre_write_conversion) && !NILP (Ffboundp (coding->pre_write_conversion))) - str = run_pre_post_conversion_on_str (str, coding, 1); + { + str = run_pre_post_conversion_on_str (str, coding, 1); + /* As STR is just newly generated, we don't have to copy it + anymore. */ + nocopy = 1; + } from = 0; to = SCHARS (str); @@ -6057,32 +6421,27 @@ encode_coding_string (str, coding, nocopy) /* Encoding routines determine the multibyteness of the source text by coding->src_multibyte. */ - coding->src_multibyte = STRING_MULTIBYTE (str); + coding->src_multibyte = SCHARS (str) < SBYTES (str); coding->dst_multibyte = 0; if (! CODING_REQUIRE_ENCODING (coding)) - { - coding->consumed = SBYTES (str); - coding->consumed_char = SCHARS (str); - if (STRING_MULTIBYTE (str)) - { - str = Fstring_as_unibyte (str); - nocopy = 1; - } - coding->produced = SBYTES (str); - coding->produced_char = SCHARS (str); - return (nocopy ? str : Fcopy_sequence (str)); - } + goto no_need_of_encoding; if (coding->composing != COMPOSITION_DISABLED) coding_save_composition (coding, from, to, str); - /* Try to skip the heading and tailing ASCIIs. */ - if (coding->type != coding_type_ccl) + /* Try to skip the heading and tailing ASCIIs. We can't skip them + if we must run CCL program or there are compositions to + encode. */ + if (coding->type != coding_type_ccl + && (! coding->cmp_data || coding->cmp_data->used == 0)) { SHRINK_CONVERSION_REGION (&from, &to_byte, coding, SDATA (str), 1); if (from == to_byte) - return (nocopy ? str : Fcopy_sequence (str)); + { + coding_free_composition_data (coding); + goto no_need_of_encoding; + } shrinked_bytes = from + (SBYTES (str) - to_byte); } @@ -6100,6 +6459,7 @@ encode_coding_string (str, coding, nocopy) produced += coding->produced; produced_char += coding->produced_char; if (result == CODING_FINISH_NORMAL + || result == CODING_FINISH_INTERRUPT || (result == CODING_FINISH_INSUFFICIENT_SRC && coding->consumed == 0)) break; @@ -6114,17 +6474,36 @@ encode_coding_string (str, coding, nocopy) newstr = make_uninit_string (produced + shrinked_bytes); if (from > 0) - bcopy (SDATA (str), SDATA (newstr), from); - bcopy (buf.data, SDATA (newstr) + from, produced); + STRING_COPYIN (newstr, 0, SDATA (str), from); + STRING_COPYIN (newstr, from, buf.data, produced); if (shrinked_bytes > from) - bcopy (SDATA (str) + to_byte, - SDATA (newstr) + from + produced, - shrinked_bytes - from); + STRING_COPYIN (newstr, from + produced, + SDATA (str) + to_byte, + shrinked_bytes - from); free_conversion_buffer (&buf); coding_free_composition_data (coding); return newstr; + + no_need_of_encoding: + coding->consumed = SBYTES (str); + coding->consumed_char = SCHARS (str); + if (STRING_MULTIBYTE (str)) + { + if (nocopy) + /* We are sure that STR doesn't contain a multibyte + character. */ + STRING_SET_UNIBYTE (str); + else + { + str = Fstring_as_unibyte (str); + nocopy = 1; + } + } + coding->produced = SBYTES (str); + coding->produced_char = SCHARS (str); + return (nocopy ? str : Fcopy_sequence (str)); } @@ -6142,6 +6521,8 @@ about coding-system objects. */) return Qt; if (!SYMBOLP (obj)) return Qnil; + if (! NILP (Fget (obj, Qcoding_system_define_form))) + return Qt; /* Get coding-spec vector for OBJ. */ obj = Fget (obj, Qcoding_system); return ((VECTORP (obj) && XVECTOR (obj)->size == 5) @@ -6183,12 +6564,19 @@ DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system, 1, 1, 0, doc: /* Check validity of CODING-SYSTEM. If valid, return CODING-SYSTEM, else signal a `coding-system-error' error. -It is valid if it is a symbol with a non-nil `coding-system' property. -The value of property should be a vector of length 5. */) +It is valid if it is nil or a symbol with a non-nil `coding-system' property. +The value of this property should be a vector of length 5. */) (coding_system) Lisp_Object coding_system; { - CHECK_SYMBOL (coding_system); + Lisp_Object define_form; + + define_form = Fget (coding_system, Qcoding_system_define_form); + if (! NILP (define_form)) + { + Fput (coding_system, Qcoding_system_define_form, Qnil); + safe_eval (define_form); + } if (!NILP (Fcoding_system_p (coding_system))) return coding_system; while (1) @@ -6197,7 +6585,7 @@ The value of property should be a vector of length 5. */) Lisp_Object detect_coding_system (src, src_bytes, highest, multibytep) - unsigned char *src; + const unsigned char *src; int src_bytes, highest; int multibytep; { @@ -6260,8 +6648,11 @@ detect_coding_system (src, src_bytes, highest, multibytep) DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region, 2, 3, 0, - doc: /* Detect coding system of the text in the region between START and END. -Return a list of possible coding systems ordered by priority. + doc: /* Detect how the byte sequence in the region is encoded. +Return a list of possible coding systems used on decoding a byte +sequence containing the bytes in the region between START and END when +the coding system `undecided' is specified. The list is ordered by +priority decided in the current language environment. If only ASCII characters are found, it returns a list of single element `undecided' or its subsidiary coding system according to a detected @@ -6290,7 +6681,7 @@ highest priority. */) the detecting source. Then code detectors can handle the tailing byte sequence more accurately. - Fix me: This is not an perfect solution. It is better that we + Fix me: This is not a perfect solution. It is better that we add one more argument, say LAST_BLOCK, to all detect_coding_XXX. */ if (to == Z || (to == GPT && GAP_SIZE > 0)) @@ -6304,8 +6695,11 @@ highest priority. */) DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string, 1, 2, 0, - doc: /* Detect coding system of the text in STRING. -Return a list of possible coding systems ordered by priority. + doc: /* Detect how the byte sequence in STRING is encoded. +Return a list of possible coding systems used on decoding a byte +sequence containing the bytes in STRING when the coding system +`undecided' is specified. The list is ordered by priority decided in +the current language environment. If only ASCII characters are found, it returns a list of single element `undecided' or its subsidiary coding system according to a detected @@ -6328,35 +6722,15 @@ highest priority. */) STRING_MULTIBYTE (string)); } -/* Return an intersection of lists L1 and L2. */ - -static Lisp_Object -intersection (l1, l2) - Lisp_Object l1, l2; -{ - Lisp_Object val = Fcons (Qnil, Qnil), tail; - - for (tail = val; CONSP (l1); l1 = XCDR (l1)) - { - if (!NILP (Fmemq (XCAR (l1), l2))) - { - XSETCDR (tail, Fcons (XCAR (l1), Qnil)); - tail = XCDR (tail); - } - } - return XCDR (val); -} - - -/* Subroutine for Fsafe_coding_systems_region_internal. +/* Subroutine for Ffind_coding_systems_region_internal. Return a list of coding systems that safely encode the multibyte - text between P and PEND. SAFE_CODINGS, if non-nil, is a list of + text between P and PEND. SAFE_CODINGS, if non-nil, is an alist of possible coding systems. If it is nil, it means that we have not yet found any coding systems. - WORK_TABLE is a copy of the char-table Vchar_coding_system_table. An - element of WORK_TABLE is set to t once the element is looked up. + WORK_TABLE a char-table of which element is set to t once the + element is looked up. If a non-ASCII single byte char is found, set *single_byte_char_found to 1. */ @@ -6367,9 +6741,12 @@ find_safe_codings (p, pend, safe_codings, work_table, single_byte_char_found) Lisp_Object safe_codings, work_table; int *single_byte_char_found; { - int c, len, idx; - Lisp_Object val; + int c, len; + Lisp_Object val, ch; + Lisp_Object prev, tail; + if (NILP (safe_codings)) + goto done_safe_codings; while (p < pend) { c = STRING_CHAR_AND_LENGTH (p, pend - p, len); @@ -6379,31 +6756,113 @@ find_safe_codings (p, pend, safe_codings, work_table, single_byte_char_found) continue; if (SINGLE_BYTE_CHAR_P (c)) *single_byte_char_found = 1; - if (NILP (safe_codings)) - continue; /* Check the safe coding systems for C. */ - val = char_table_ref_and_index (work_table, c, &idx); + ch = make_number (c); + val = Faref (work_table, ch); if (EQ (val, Qt)) /* This element was already checked. Ignore it. */ continue; /* Remember that we checked this element. */ - CHAR_TABLE_SET (work_table, make_number (idx), Qt); + Faset (work_table, ch, Qt); - /* If there are some safe coding systems for C and we have - already found the other set of coding systems for the - different characters, get the intersection of them. */ - if (!EQ (safe_codings, Qt) && !NILP (val)) - val = intersection (safe_codings, val); - safe_codings = val; + for (prev = tail = safe_codings; CONSP (tail); tail = XCDR (tail)) + { + Lisp_Object elt, translation_table, hash_table, accept_latin_extra; + int encodable; + + elt = XCAR (tail); + if (CONSP (XCDR (elt))) + { + /* This entry has this format now: + ( CODING SAFE-CHARS TRANSLATION-TABLE HASH-TABLE + ACCEPT-LATIN-EXTRA ) */ + val = XCDR (elt); + encodable = ! NILP (Faref (XCAR (val), ch)); + if (! encodable) + { + val = XCDR (val); + translation_table = XCAR (val); + hash_table = XCAR (XCDR (val)); + accept_latin_extra = XCAR (XCDR (XCDR (val))); + } + } + else + { + /* This entry has this format now: ( CODING . SAFE-CHARS) */ + encodable = ! NILP (Faref (XCDR (elt), ch)); + if (! encodable) + { + /* Transform the format to: + ( CODING SAFE-CHARS TRANSLATION-TABLE HASH-TABLE + ACCEPT-LATIN-EXTRA ) */ + val = Fget (XCAR (elt), Qcoding_system); + translation_table + = Fplist_get (AREF (val, 3), + Qtranslation_table_for_encode); + if (SYMBOLP (translation_table)) + translation_table = Fget (translation_table, + Qtranslation_table); + hash_table + = (CHAR_TABLE_P (translation_table) + ? XCHAR_TABLE (translation_table)->extras[1] + : Qnil); + accept_latin_extra + = ((EQ (AREF (val, 0), make_number (2)) + && VECTORP (AREF (val, 4))) + ? AREF (AREF (val, 4), 16) + : Qnil); + XSETCAR (tail, list5 (XCAR (elt), XCDR (elt), + translation_table, hash_table, + accept_latin_extra)); + } + } + + if (! encodable + && ((CHAR_TABLE_P (translation_table) + && ! NILP (Faref (translation_table, ch))) + || (HASH_TABLE_P (hash_table) + && ! NILP (Fgethash (ch, hash_table, Qnil))) + || (SINGLE_BYTE_CHAR_P (c) + && ! NILP (accept_latin_extra) + && VECTORP (Vlatin_extra_code_table) + && ! NILP (AREF (Vlatin_extra_code_table, c))))) + encodable = 1; + if (encodable) + prev = tail; + else + { + /* Exclude this coding system from SAFE_CODINGS. */ + if (EQ (tail, safe_codings)) + { + safe_codings = XCDR (safe_codings); + if (NILP (safe_codings)) + goto done_safe_codings; + } + else + XSETCDR (prev, XCDR (tail)); + } + } } + + done_safe_codings: + /* If the above loop was terminated before P reaches PEND, it means + SAFE_CODINGS was set to nil. If we have not yet found an + non-ASCII single-byte char, check it now. */ + if (! *single_byte_char_found) + while (p < pend) + { + c = STRING_CHAR_AND_LENGTH (p, pend - p, len); + p += len; + if (! ASCII_BYTE_P (c) + && SINGLE_BYTE_CHAR_P (c)) + { + *single_byte_char_found = 1; + break; + } + } return safe_codings; } - -/* Return a list of coding systems that safely encode the text between - START and END. If the text contains only ASCII or is unibyte, - return t. */ - DEFUN ("find-coding-systems-region-internal", Ffind_coding_systems_region_internal, Sfind_coding_systems_region_internal, 2, 2, 0, @@ -6462,29 +6921,175 @@ DEFUN ("find-coding-systems-region-internal", } /* The text contains non-ASCII characters. */ - work_table = Fcopy_sequence (Vchar_coding_system_table); - safe_codings = find_safe_codings (p1, p1end, Qt, work_table, + + work_table = Fmake_char_table (Qchar_coding_system, Qnil); + safe_codings = Fcopy_sequence (XCDR (Vcoding_system_safe_chars)); + + safe_codings = find_safe_codings (p1, p1end, safe_codings, work_table, &single_byte_char_found); if (p2 < p2end) safe_codings = find_safe_codings (p2, p2end, safe_codings, work_table, &single_byte_char_found); + if (EQ (safe_codings, XCDR (Vcoding_system_safe_chars))) + safe_codings = Qt; + else + { + /* Turn safe_codings to a list of coding systems... */ + Lisp_Object val; + + if (single_byte_char_found) + /* ... and append these for eight-bit chars. */ + val = Fcons (Qraw_text, + Fcons (Qemacs_mule, Fcons (Qno_conversion, Qnil))); + else + /* ... and append generic coding systems. */ + val = Fcopy_sequence (XCAR (Vcoding_system_safe_chars)); + + for (; CONSP (safe_codings); safe_codings = XCDR (safe_codings)) + val = Fcons (XCAR (XCAR (safe_codings)), val); + safe_codings = val; + } + + return safe_codings; +} + + +/* Search from position POS for such characters that are unencodable + accoding to SAFE_CHARS, and return a list of their positions. P + points where in the memory the character at POS exists. Limit the + search at PEND or when Nth unencodable characters are found. + + If SAFE_CHARS is a char table, an element for an unencodable + character is nil. + + If SAFE_CHARS is nil, all non-ASCII characters are unencodable. + + Otherwise, SAFE_CHARS is t, and only eight-bit-contrl and + eight-bit-graphic characters are unencodable. */ + +static Lisp_Object +unencodable_char_position (safe_chars, pos, p, pend, n) + Lisp_Object safe_chars; + int pos; + unsigned char *p, *pend; + int n; +{ + Lisp_Object pos_list; + + pos_list = Qnil; + while (p < pend) + { + int len; + int c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, len); + + if (c >= 128 + && (CHAR_TABLE_P (safe_chars) + ? NILP (CHAR_TABLE_REF (safe_chars, c)) + : (NILP (safe_chars) || c < 256))) + { + pos_list = Fcons (make_number (pos), pos_list); + if (--n <= 0) + break; + } + pos++; + p += len; + } + return Fnreverse (pos_list); +} + + +DEFUN ("unencodable-char-position", Funencodable_char_position, + Sunencodable_char_position, 3, 5, 0, + doc: /* +Return position of first un-encodable character in a region. +START and END specfiy the region and CODING-SYSTEM specifies the +encoding to check. Return nil if CODING-SYSTEM does encode the region. - if (EQ (safe_codings, Qt)) - ; /* Nothing to be done. */ - else if (!single_byte_char_found) +If optional 4th argument COUNT is non-nil, it specifies at most how +many un-encodable characters to search. In this case, the value is a +list of positions. + +If optional 5th argument STRING is non-nil, it is a string to search +for un-encodable characters. In that case, START and END are indexes +to the string. */) + (start, end, coding_system, count, string) + Lisp_Object start, end, coding_system, count, string; +{ + int n; + Lisp_Object safe_chars; + struct coding_system coding; + Lisp_Object positions; + int from, to; + unsigned char *p, *pend; + + if (NILP (string)) { - /* Append generic coding systems. */ - Lisp_Object args[2]; - args[0] = safe_codings; - args[1] = Fchar_table_extra_slot (Vchar_coding_system_table, - make_number (0)); - safe_codings = Fappend (2, args); + validate_region (&start, &end); + from = XINT (start); + to = XINT (end); + if (NILP (current_buffer->enable_multibyte_characters)) + return Qnil; + p = CHAR_POS_ADDR (from); + if (to == GPT) + pend = GPT_ADDR; + else + pend = CHAR_POS_ADDR (to); } else - safe_codings = Fcons (Qraw_text, - Fcons (Qemacs_mule, - Fcons (Qno_conversion, safe_codings))); - return safe_codings; + { + CHECK_STRING (string); + CHECK_NATNUM (start); + CHECK_NATNUM (end); + from = XINT (start); + to = XINT (end); + if (from > to + || to > SCHARS (string)) + args_out_of_range_3 (string, start, end); + if (! STRING_MULTIBYTE (string)) + return Qnil; + p = SDATA (string) + string_char_to_byte (string, from); + pend = SDATA (string) + string_char_to_byte (string, to); + } + + setup_coding_system (Fcheck_coding_system (coding_system), &coding); + + if (NILP (count)) + n = 1; + else + { + CHECK_NATNUM (count); + n = XINT (count); + } + + if (coding.type == coding_type_no_conversion + || coding.type == coding_type_raw_text) + return Qnil; + + if (coding.type == coding_type_undecided) + safe_chars = Qnil; + else + safe_chars = coding_safe_chars (coding_system); + + if (STRINGP (string) + || from >= GPT || to <= GPT) + positions = unencodable_char_position (safe_chars, from, p, pend, n); + else + { + Lisp_Object args[2]; + + args[0] = unencodable_char_position (safe_chars, from, p, GPT_ADDR, n); + n -= XINT (Flength (args[0])); + if (n <= 0) + positions = args[0]; + else + { + args[1] = unencodable_char_position (safe_chars, GPT, GAP_END_ADDR, + pend, n); + positions = Fappend (2, args); + } + } + + return (NILP (count) ? Fcar (positions) : positions); } @@ -6750,8 +7355,7 @@ Return the corresponding character code in Big5. */) return val; } -DEFUN ("set-terminal-coding-system-internal", - Fset_terminal_coding_system_internal, +DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal, Sset_terminal_coding_system_internal, 1, 1, 0, doc: /* Internal use only. */) (coding_system) @@ -6760,7 +7364,7 @@ DEFUN ("set-terminal-coding-system-internal", CHECK_SYMBOL (coding_system); setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding); /* We had better not send unsafe characters to terminal. */ - terminal_coding.flags |= CODING_FLAG_ISO_SAFE; + terminal_coding.mode |= CODING_MODE_INHIBIT_UNENCODABLE_CHAR; /* Character composition should be disabled. */ terminal_coding.composing = COMPOSITION_DISABLED; /* Error notification should be suppressed. */ @@ -6770,8 +7374,7 @@ DEFUN ("set-terminal-coding-system-internal", return Qnil; } -DEFUN ("set-safe-terminal-coding-system-internal", - Fset_safe_terminal_coding_system_internal, +DEFUN ("set-safe-terminal-coding-system-internal", Fset_safe_terminal_coding_system_internal, Sset_safe_terminal_coding_system_internal, 1, 1, 0, doc: /* Internal use only. */) (coding_system) @@ -6783,22 +7386,21 @@ DEFUN ("set-safe-terminal-coding-system-internal", /* Character composition should be disabled. */ safe_terminal_coding.composing = COMPOSITION_DISABLED; /* Error notification should be suppressed. */ - terminal_coding.suppress_error = 1; + safe_terminal_coding.suppress_error = 1; safe_terminal_coding.src_multibyte = 1; safe_terminal_coding.dst_multibyte = 0; return Qnil; } -DEFUN ("terminal-coding-system", - Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0, +DEFUN ("terminal-coding-system", Fterminal_coding_system, + Sterminal_coding_system, 0, 0, 0, doc: /* Return coding system specified for terminal output. */) () { return terminal_coding.symbol; } -DEFUN ("set-keyboard-coding-system-internal", - Fset_keyboard_coding_system_internal, +DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal, Sset_keyboard_coding_system_internal, 1, 1, 0, doc: /* Internal use only. */) (coding_system) @@ -6811,8 +7413,8 @@ DEFUN ("set-keyboard-coding-system-internal", return Qnil; } -DEFUN ("keyboard-coding-system", - Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0, +DEFUN ("keyboard-coding-system", Fkeyboard_coding_system, + Skeyboard_coding_system, 0, 0, 0, doc: /* Return coding system specified for decoding keyboard input. */) () { @@ -6868,6 +7470,13 @@ usage: (find-operation-coding-system OPERATION ARGUMENTS ...) */) if (nargs < 1 + XINT (target_idx)) error ("Too few arguments for operation: %s", SDATA (SYMBOL_NAME (operation))); + /* For write-region, if the 6th argument (i.e. VISIT, the 5th + argument to write-region) is string, it must be treated as a + target file name. */ + if (EQ (operation, Qwrite_region) + && nargs > 5 + && STRINGP (args[5])) + target_idx = make_number (4); target = args[XINT (target_idx) + 1]; if (!(STRINGP (target) || (EQ (operation, Qopen_network_stream) && INTEGERP (target)))) @@ -6977,6 +7586,40 @@ This function is internal use only. */) return Qnil; } +DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal, + Sdefine_coding_system_internal, 1, 1, 0, + doc: /* Register CODING-SYSTEM as a base coding system. +This function is internal use only. */) + (coding_system) + Lisp_Object coding_system; +{ + Lisp_Object safe_chars, slot; + + if (NILP (Fcheck_coding_system (coding_system))) + Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil)); + safe_chars = coding_safe_chars (coding_system); + if (! EQ (safe_chars, Qt) && ! CHAR_TABLE_P (safe_chars)) + error ("No valid safe-chars property for %s", + SDATA (SYMBOL_NAME (coding_system))); + if (EQ (safe_chars, Qt)) + { + if (NILP (Fmemq (coding_system, XCAR (Vcoding_system_safe_chars)))) + XSETCAR (Vcoding_system_safe_chars, + Fcons (coding_system, XCAR (Vcoding_system_safe_chars))); + } + else + { + slot = Fassq (coding_system, XCDR (Vcoding_system_safe_chars)); + if (NILP (slot)) + XSETCDR (Vcoding_system_safe_chars, + nconc2 (XCDR (Vcoding_system_safe_chars), + Fcons (Fcons (coding_system, safe_chars), Qnil))); + else + XSETCDR (slot, safe_chars); + } + return Qnil; +} + #endif /* emacs */ @@ -7047,6 +7690,9 @@ init_coding_once () void syms_of_coding () { + staticpro (&Vcode_conversion_workbuf_name); + Vcode_conversion_workbuf_name = build_string (" *code-conversion-work*"); + Qtarget_idx = intern ("target-idx"); staticpro (&Qtarget_idx); @@ -7130,9 +7776,12 @@ syms_of_coding () } } + Vcoding_system_safe_chars = Fcons (Qnil, Qnil); + staticpro (&Vcoding_system_safe_chars); + Qtranslation_table = intern ("translation-table"); staticpro (&Qtranslation_table); - Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1)); + Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2)); Qtranslation_table_id = intern ("translation-table-id"); staticpro (&Qtranslation_table_id); @@ -7154,7 +7803,7 @@ syms_of_coding () But don't staticpro it here--that is done in alloc.c. */ Qchar_table_extra_slots = intern ("char-table-extra-slots"); Fput (Qsafe_chars, Qchar_table_extra_slots, make_number (0)); - Fput (Qchar_coding_system, Qchar_table_extra_slots, make_number (2)); + Fput (Qchar_coding_system, Qchar_table_extra_slots, make_number (0)); Qvalid_codes = intern ("valid-codes"); staticpro (&Qvalid_codes); @@ -7165,6 +7814,12 @@ syms_of_coding () Qraw_text = intern ("raw-text"); staticpro (&Qraw_text); + Qutf_8 = intern ("utf-8"); + staticpro (&Qutf_8); + + Qcoding_system_define_form = intern ("coding-system-define-form"); + staticpro (&Qcoding_system_define_form); + defsubr (&Scoding_system_p); defsubr (&Sread_coding_system); defsubr (&Sread_non_nil_coding_system); @@ -7172,6 +7827,7 @@ syms_of_coding () defsubr (&Sdetect_coding_region); defsubr (&Sdetect_coding_string); defsubr (&Sfind_coding_systems_region_internal); + defsubr (&Sunencodable_char_position); defsubr (&Sdecode_coding_region); defsubr (&Sencode_coding_region); defsubr (&Sdecode_coding_string); @@ -7188,6 +7844,7 @@ syms_of_coding () defsubr (&Sfind_operation_coding_system); defsubr (&Supdate_coding_systems_internal); defsubr (&Sset_coding_priority_internal); + defsubr (&Sdefine_coding_system_internal); DEFVAR_LISP ("coding-system-list", &Vcoding_system_list, doc: /* List of coding systems. @@ -7213,7 +7870,9 @@ updated by the functions `make-coding-system' and On detecting a coding system, Emacs tries code detection algorithms associated with each coding-category one by one in this order. When one algorithm agrees with a byte sequence of source text, the coding -system bound to the corresponding coding-category is selected. */); +system bound to the corresponding coding-category is selected. + +Don't modify this variable directly, but use `set-coding-priority'. */); { int i; @@ -7248,7 +7907,9 @@ the value of `buffer-file-coding-system' is used. */); Vcoding_system_for_write = Qnil; DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used, - doc: /* Coding system used in the latest file or process I/O. */); + doc: /* Coding system used in the latest file or process I/O. +Also set by `encode-coding-region', `decode-coding-region', +`encode-coding-string' and `decode-coding-string'. */); Vlast_coding_system_used = Qnil; DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion, @@ -7382,11 +8043,14 @@ coding system used in each operation can't encode the text. The default value is `select-safe-coding-system' (which see). */); Vselect_safe_coding_system_function = Qnil; - DEFVAR_LISP ("char-coding-system-table", &Vchar_coding_system_table, - doc: /* Char-table containing safe coding systems of each characters. -Each element doesn't include such generic coding systems that can -encode any characters. They are in the first extra slot. */); - Vchar_coding_system_table = Fmake_char_table (Qchar_coding_system, Qnil); + DEFVAR_BOOL ("coding-system-require-warning", + &coding_system_require_warning, + doc: /* Internal use only. +If non-nil, on writing a file, `select-safe-coding-system-function' is +called even if `coding-system-for-write' is non-nil. The command +`universal-coding-system-argument' binds this variable to t temporarily. */); + coding_system_require_warning = 0; + DEFVAR_BOOL ("inhibit-iso-escape-detection", &inhibit_iso_escape_detection, @@ -7414,6 +8078,12 @@ The other way to read escape sequences in a file without decoding is to explicitly specify some coding system that doesn't use ISO2022's escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */); inhibit_iso_escape_detection = 0; + + DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input, + doc: /* Char table for translating self-inserting characters. +This is applied to the result of input methods, not their input. See also +`keyboard-translate-table'. */); + Vtranslation_table_for_input = Qnil; } char * @@ -7438,3 +8108,5 @@ emacs_strerror (error_number) #endif /* emacs */ +/* arch-tag: 3a3a2b01-5ff6-4071-9afe-f5b808d9229d + (do not change this comment) */