X-Git-Url: http://git.hcoop.net/bpt/emacs.git/blobdiff_plain/7f1faf1cc202ec9ee543bd9c6b35d89e162fbe5b..964b0e76b0c609ddd0dd71b7ab7c7c44627ec044:/src/coding.c diff --git a/src/coding.c b/src/coding.c index 6dbf05ce0a..555e662338 100644 --- a/src/coding.c +++ b/src/coding.c @@ -1,8 +1,8 @@ /* Coding system handler (conversion, detection, etc). Copyright (C) 2001, 2002, 2003, 2004, 2005, - 2006, 2007, 2008, 2009 Free Software Foundation, Inc. + 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, - 2005, 2006, 2007, 2008, 2009 + 2005, 2006, 2007, 2008, 2009, 2010, 2011 National Institute of Advanced Industrial Science and Technology (AIST) Registration Number H14PRO021 Copyright (C) 2003 @@ -167,7 +167,7 @@ detect_coding_XXX (coding, detect_info) while (1) { - /* Get one byte from the source. If the souce is exausted, jump + /* Get one byte from the source. If the source is exhausted, jump to no_more_source:. */ ONE_MORE_BYTE (c); @@ -181,7 +181,7 @@ detect_coding_XXX (coding, detect_info) return 0; no_more_source: - /* The source exausted successfully. */ + /* The source exhausted successfully. */ detect_info->found |= found; return 1; } @@ -289,6 +289,7 @@ encode_coding_XXX (coding) #include #include +#include #include "lisp.h" #include "buffer.h" @@ -536,7 +537,7 @@ enum iso_code_class_type on output. */ #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400 -/* If set, do not encode unsafe charactes on output. */ +/* If set, do not encode unsafe characters on output. */ #define CODING_ISO_FLAG_SAFE 0x0800 /* If set, extra latin codes (128..159) are accepted as a valid code @@ -692,7 +693,7 @@ enum coding_category static Lisp_Object Vcoding_category_list; /* Table of coding categories (Lisp symbols). This variable is for - internal use oly. */ + internal use only. */ static Lisp_Object Vcoding_category_table; /* Table of coding-categories ordered by priority. */ @@ -824,7 +825,7 @@ static struct coding_system coding_categories[coding_category_max]; } while (0) -/* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2. */ +/* Like EMIT_ONE_ASCII_BYTE but store two bytes; C1 and C2. */ #define EMIT_TWO_ASCII_BYTES(c1, c2) \ do { \ @@ -992,6 +993,11 @@ record_conversion_result (struct coding_system *coding, case CODING_RESULT_INSUFFICIENT_MEM: Vlast_code_conversion_error = Qinsufficient_memory; break; + case CODING_RESULT_INSUFFICIENT_DST: + /* Don't record this error in Vlast_code_conversion_error + because it happens just temporarily and is resolved when the + whole conversion is finished. */ + break; case CODING_RESULT_SUCCESS: break; default: @@ -999,6 +1005,10 @@ record_conversion_result (struct coding_system *coding, } } +/* This wrapper macro is used to preserve validity of pointers into + buffer text across calls to decode_char, which could cause + relocation of buffers if it loads a charset map, because loading a + charset map allocates large structures. */ #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \ do { \ charset_map_loaded = 0; \ @@ -1231,7 +1241,7 @@ alloc_destination (coding, nbytes, dst) METHOD is one of enum composition_method. - Optionnal COMPOSITION-COMPONENTS are characters and composition + Optional COMPOSITION-COMPONENTS are characters and composition rules. In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID @@ -1858,7 +1868,7 @@ encode_coding_utf_16 (coding) { ASSURE_DESTINATION (safe_room); c = *charbuf++; - if (c >= MAX_UNICODE_CHAR) + if (c > MAX_UNICODE_CHAR) c = coding->default_char; if (c < 0x10000) @@ -1944,7 +1954,7 @@ encode_coding_utf_16 (coding) CHARS is 0xA0 plus a number of characters composed by this data, - COMPONENTs are characters of multibye form or composition + COMPONENTs are characters of multibyte form or composition rules encoded by two-byte of ASCII codes. In addition, for backward compatibility, the following formats are @@ -2021,7 +2031,7 @@ detect_coding_emacs_mule (coding, detect_info) } else { - int more_bytes = emacs_mule_bytes[*src_base] - 1; + int more_bytes = emacs_mule_bytes[c] - 1; while (more_bytes > 0) { @@ -2055,7 +2065,7 @@ detect_coding_emacs_mule (coding, detect_info) /* Parse emacs-mule multibyte sequence at SRC and return the decoded character. If CMP_STATUS indicates that we must expect MSEQ or RULE described above, decode it and return the negative value of - the deocded character or rule. If an invalid byte is found, return + the decoded character or rule. If an invalid byte is found, return -1. If SRC is too short, return -2. */ int @@ -2068,7 +2078,7 @@ emacs_mule_char (coding, src, nbytes, nchars, id, cmp_status) const unsigned char *src_end = coding->source + coding->src_bytes; const unsigned char *src_base = src; int multibytep = coding->src_multibyte; - struct charset *charset; + int charset_id; unsigned code; int c; int consumed_chars = 0; @@ -2078,7 +2088,7 @@ emacs_mule_char (coding, src, nbytes, nchars, id, cmp_status) if (c < 0) { c = -c; - charset = emacs_mule_charset[0]; + charset_id = emacs_mule_charset[0]; } else { @@ -2114,7 +2124,7 @@ emacs_mule_char (coding, src, nbytes, nchars, id, cmp_status) switch (emacs_mule_bytes[c]) { case 2: - if (! (charset = emacs_mule_charset[c])) + if ((charset_id = emacs_mule_charset[c]) < 0) goto invalid_code; ONE_MORE_BYTE (c); if (c < 0xA0) @@ -2127,7 +2137,7 @@ emacs_mule_char (coding, src, nbytes, nchars, id, cmp_status) || c == EMACS_MULE_LEADING_CODE_PRIVATE_12) { ONE_MORE_BYTE (c); - if (c < 0xA0 || ! (charset = emacs_mule_charset[c])) + if (c < 0xA0 || (charset_id = emacs_mule_charset[c]) < 0) goto invalid_code; ONE_MORE_BYTE (c); if (c < 0xA0) @@ -2136,7 +2146,7 @@ emacs_mule_char (coding, src, nbytes, nchars, id, cmp_status) } else { - if (! (charset = emacs_mule_charset[c])) + if ((charset_id = emacs_mule_charset[c]) < 0) goto invalid_code; ONE_MORE_BYTE (c); if (c < 0xA0) @@ -2151,7 +2161,7 @@ emacs_mule_char (coding, src, nbytes, nchars, id, cmp_status) case 4: ONE_MORE_BYTE (c); - if (c < 0 || ! (charset = emacs_mule_charset[c])) + if (c < 0 || (charset_id = emacs_mule_charset[c]) < 0) goto invalid_code; ONE_MORE_BYTE (c); if (c < 0xA0) @@ -2165,21 +2175,21 @@ emacs_mule_char (coding, src, nbytes, nchars, id, cmp_status) case 1: code = c; - charset = CHARSET_FROM_ID (ASCII_BYTE_P (code) - ? charset_ascii : charset_eight_bit); + charset_id = ASCII_BYTE_P (code) ? charset_ascii : charset_eight_bit; break; default: abort (); } - c = DECODE_CHAR (charset, code); + CODING_DECODE_CHAR (coding, src, src_base, src_end, + CHARSET_FROM_ID (charset_id), code, c); if (c < 0) goto invalid_code; } *nbytes = src - src_base; *nchars = consumed_chars; if (id) - *id = charset->id; + *id = charset_id; return (mseq_found ? -c : c); no_more_source: @@ -2445,8 +2455,8 @@ decode_coding_emacs_mule (coding) const unsigned char *src_end = coding->source + coding->src_bytes; const unsigned char *src_base; int *charbuf = coding->charbuf + coding->charbuf_used; - /* We may produce two annocations (charset and composition) in one - loop and one more charset annocation at the end. */ + /* We may produce two annotations (charset and composition) in one + loop and one more charset annotation at the end. */ int *charbuf_end = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3); int consumed_chars = 0, consumed_chars_base; @@ -2519,9 +2529,23 @@ decode_coding_emacs_mule (coding) else { int nchars, nbytes; + /* emacs_mule_char can load a charset map from a file, which + allocates a large structure and might cause buffer text + to be relocated as result. Thus, we need to remember the + original pointer to buffer text, and fix up all related + pointers after the call. */ + const unsigned char *orig = coding->source; + EMACS_INT offset; c = emacs_mule_char (coding, src_base, &nbytes, &nchars, &id, cmp_status); + offset = coding->source - orig; + if (offset) + { + src += offset; + src_base += offset; + src_end += offset; + } if (c < 0) { if (c == -1) @@ -2535,7 +2559,7 @@ decode_coding_emacs_mule (coding) cmp_status->ncomps -= nchars; } - /* Now if C >= 0, we found a normally encoded characer, if C < + /* Now if C >= 0, we found a normally encoded character, if C < 0, we found an old-style composition component character or rule. */ @@ -3048,7 +3072,7 @@ setup_iso_safe_charsets (attrs) /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". - Check if a text is encoded in one of ISO-2022 based codig systems. + Check if a text is encoded in one of ISO-2022 based coding systems. If it is, return 1, else return 0. */ static int @@ -3238,9 +3262,13 @@ detect_coding_iso_2022 (coding, detect_info) int i = 1; while (src < src_end) { + src_base = src; ONE_MORE_BYTE (c); if (c < 0xA0) - break; + { + src = src_base; + break; + } i++; } @@ -3456,7 +3484,7 @@ finish_composition (charbuf, cmp_status) return new_chars; } -/* If characers are under composition, finish the composition. */ +/* If characters are under composition, finish the composition. */ #define MAYBE_FINISH_COMPOSITION() \ do { \ if (cmp_status->state != COMPOSING_NO) \ @@ -3563,8 +3591,8 @@ decode_coding_iso_2022 (coding) const unsigned char *src_end = coding->source + coding->src_bytes; const unsigned char *src_base; int *charbuf = coding->charbuf + coding->charbuf_used; - /* We may produce two annocations (charset and composition) in one - loop and one more charset annocation at the end. */ + /* We may produce two annotations (charset and composition) in one + loop and one more charset annotation at the end. */ int *charbuf_end = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3); int consumed_chars = 0, consumed_chars_base; @@ -3600,7 +3628,7 @@ decode_coding_iso_2022 (coding) while (1) { - int c1, c2; + int c1, c2, c3; src_base = src; consumed_chars_base = consumed_chars; @@ -3725,6 +3753,8 @@ decode_coding_iso_2022 (coding) continue; case ISO_single_shift_2_7: + if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)) + goto invalid_code; case ISO_single_shift_2: if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)) goto invalid_code; @@ -3860,11 +3890,11 @@ decode_coding_iso_2022 (coding) continue; case '[': /* specification of direction */ - if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION) + if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)) goto invalid_code; /* For the moment, nested direction is not supported. So, `coding->mode & CODING_MODE_DIRECTION' zero means - left-to-right, and nozero means right-to-left. */ + left-to-right, and nonzero means right-to-left. */ ONE_MORE_BYTE (c1); switch (c1) { @@ -3905,7 +3935,7 @@ decode_coding_iso_2022 (coding) int size; ONE_MORE_BYTE (dim); - if (dim < 0 || dim > 4) + if (dim < '0' || dim > '4') goto invalid_code; ONE_MORE_BYTE (M); if (M < 128) @@ -3984,26 +4014,28 @@ decode_coding_iso_2022 (coding) } /* Now we know CHARSET and 1st position code C1 of a character. - Produce a decoded character while getting 2nd position code - C2 if necessary. */ - c1 &= 0x7F; + Produce a decoded character while getting 2nd and 3rd + position codes C2, C3 if necessary. */ if (CHARSET_DIMENSION (charset) > 1) { ONE_MORE_BYTE (c2); - if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0)) + if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0) + || ((c1 & 0x80) != (c2 & 0x80))) /* C2 is not in a valid range. */ goto invalid_code; - c1 = (c1 << 8) | (c2 & 0x7F); - if (CHARSET_DIMENSION (charset) > 2) + if (CHARSET_DIMENSION (charset) == 2) + c1 = (c1 << 8) | c2; + else { - ONE_MORE_BYTE (c2); - if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0)) - /* C2 is not in a valid range. */ + ONE_MORE_BYTE (c3); + if (c3 < 0x20 || (c3 >= 0x80 && c3 < 0xA0) + || ((c1 & 0x80) != (c3 & 0x80))) + /* C3 is not in a valid range. */ goto invalid_code; - c1 = (c1 << 8) | (c2 & 0x7F); + c1 = (c1 << 16) | (c2 << 8) | c2; } } - + c1 &= 0x7F7F7F; CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c); if (c < 0) { @@ -4496,7 +4528,10 @@ encode_coding_iso_2022 (coding) charset_list = CODING_ATTR_CHARSET_LIST (attrs); coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs)); - ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)); + ascii_compatible + = (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)) + && ! (CODING_ISO_FLAGS (coding) & (CODING_ISO_FLAG_DESIGNATION + | CODING_ISO_FLAG_LOCKING_SHIFT))); while (charbuf < charbuf_end) { @@ -4669,6 +4704,12 @@ detect_coding_sjis (coding, detect_info) int consumed_chars = 0; int found = 0; int c; + Lisp_Object attrs, charset_list; + int max_first_byte_of_2_byte_code; + + CODING_GET_INFO (coding, attrs, charset_list); + max_first_byte_of_2_byte_code + = (XINT (Flength (charset_list)) > 3 ? 0xFC : 0xEF); detect_info->checked |= CATEGORY_MASK_SJIS; /* A coding system of this category is always ASCII compatible. */ @@ -4680,7 +4721,8 @@ detect_coding_sjis (coding, detect_info) ONE_MORE_BYTE (c); if (c < 0x80) continue; - if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF)) + if ((c >= 0x81 && c <= 0x9F) + || (c >= 0xE0 && c <= max_first_byte_of_2_byte_code)) { ONE_MORE_BYTE (c); if (c < 0x40 || c == 0x7F || c > 0xFC) @@ -4765,7 +4807,7 @@ decode_coding_sjis (coding) const unsigned char *src_end = coding->source + coding->src_bytes; const unsigned char *src_base; int *charbuf = coding->charbuf + coding->charbuf_used; - /* We may produce one charset annocation in one loop and one more at + /* We may produce one charset annotation in one loop and one more at the end. */ int *charbuf_end = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2); @@ -4884,7 +4926,7 @@ decode_coding_big5 (coding) const unsigned char *src_end = coding->source + coding->src_bytes; const unsigned char *src_base; int *charbuf = coding->charbuf + coding->charbuf_used; - /* We may produce one charset annocation in one loop and one more at + /* We may produce one charset annotation in one loop and one more at the end. */ int *charbuf_end = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2); @@ -5053,7 +5095,8 @@ encode_coding_sjis (coding) int c1, c2; c1 = code >> 8; - if (c1 == 0x21 || (c1 >= 0x23 && c1 < 0x25) + if (c1 == 0x21 || (c1 >= 0x23 && c1 <= 0x25) + || c1 == 0x28 || (c1 >= 0x2C && c1 <= 0x2F) || c1 >= 0x6E) { JIS_TO_SJIS2 (code); @@ -5203,62 +5246,52 @@ decode_coding_ccl (coding) int *charbuf_end = coding->charbuf + coding->charbuf_size; int consumed_chars = 0; int multibytep = coding->src_multibyte; - struct ccl_program ccl; + struct ccl_program *ccl = &coding->spec.ccl->ccl; int source_charbuf[1024]; - int source_byteidx[1024]; + int source_byteidx[1025]; Lisp_Object attrs, charset_list; CODING_GET_INFO (coding, attrs, charset_list); - setup_ccl_program (&ccl, CODING_CCL_DECODER (coding)); - while (src < src_end) + while (1) { const unsigned char *p = src; - int *source, *source_end; int i = 0; if (multibytep) - while (i < 1024 && p < src_end) - { - source_byteidx[i] = p - src; - source_charbuf[i++] = STRING_CHAR_ADVANCE (p); - } + { + while (i < 1024 && p < src_end) + { + source_byteidx[i] = p - src; + source_charbuf[i++] = STRING_CHAR_ADVANCE (p); + } + source_byteidx[i] = p - src; + } else while (i < 1024 && p < src_end) source_charbuf[i++] = *p++; if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK) - ccl.last_block = 1; - - source = source_charbuf; - source_end = source + i; - while (source < source_end) - { - ccl_driver (&ccl, source, charbuf, - source_end - source, charbuf_end - charbuf, - charset_list); - source += ccl.consumed; - charbuf += ccl.produced; - if (ccl.status != CCL_STAT_SUSPEND_BY_DST) - break; - } - if (source < source_end) - src += source_byteidx[source - source_charbuf]; + ccl->last_block = 1; + ccl_driver (ccl, source_charbuf, charbuf, i, charbuf_end - charbuf, + charset_list); + charbuf += ccl->produced; + if (multibytep) + src += source_byteidx[ccl->consumed]; else - src = p; - consumed_chars += source - source_charbuf; - - if (ccl.status != CCL_STAT_SUSPEND_BY_SRC - && ccl.status != CODING_RESULT_INSUFFICIENT_SRC) + src += ccl->consumed; + consumed_chars += ccl->consumed; + if (p == src_end || ccl->status != CCL_STAT_SUSPEND_BY_SRC) break; } - switch (ccl.status) + switch (ccl->status) { case CCL_STAT_SUSPEND_BY_SRC: record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC); break; case CCL_STAT_SUSPEND_BY_DST: + record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST); break; case CCL_STAT_QUIT: case CCL_STAT_INVALID_CMD: @@ -5277,7 +5310,7 @@ static int encode_coding_ccl (coding) struct coding_system *coding; { - struct ccl_program ccl; + struct ccl_program *ccl = &coding->spec.ccl->ccl; int multibytep = coding->dst_multibyte; int *charbuf = coding->charbuf; int *charbuf_end = charbuf + coding->charbuf_used; @@ -5288,35 +5321,34 @@ encode_coding_ccl (coding) Lisp_Object attrs, charset_list; CODING_GET_INFO (coding, attrs, charset_list); - setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding)); - - ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK; - ccl.dst_multibyte = coding->dst_multibyte; + if (coding->consumed_char == coding->src_chars + && coding->mode & CODING_MODE_LAST_BLOCK) + ccl->last_block = 1; while (charbuf < charbuf_end) { - ccl_driver (&ccl, charbuf, destination_charbuf, + ccl_driver (ccl, charbuf, destination_charbuf, charbuf_end - charbuf, 1024, charset_list); if (multibytep) { - ASSURE_DESTINATION (ccl.produced * 2); - for (i = 0; i < ccl.produced; i++) + ASSURE_DESTINATION (ccl->produced * 2); + for (i = 0; i < ccl->produced; i++) EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF); } else { - ASSURE_DESTINATION (ccl.produced); - for (i = 0; i < ccl.produced; i++) + ASSURE_DESTINATION (ccl->produced); + for (i = 0; i < ccl->produced; i++) *dst++ = destination_charbuf[i] & 0xFF; - produced_chars += ccl.produced; + produced_chars += ccl->produced; } - charbuf += ccl.consumed; - if (ccl.status == CCL_STAT_QUIT - || ccl.status == CCL_STAT_INVALID_CMD) + charbuf += ccl->consumed; + if (ccl->status == CCL_STAT_QUIT + || ccl->status == CCL_STAT_INVALID_CMD) break; } - switch (ccl.status) + switch (ccl->status) { case CCL_STAT_SUSPEND_BY_SRC: record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC); @@ -5560,7 +5592,7 @@ decode_coding_charset (coding) const unsigned char *src_end = coding->source + coding->src_bytes; const unsigned char *src_base; int *charbuf = coding->charbuf + coding->charbuf_used; - /* We may produce one charset annocation in one loop and one more at + /* We may produce one charset annotation in one loop and one more at the end. */ int *charbuf_end = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2); @@ -5791,6 +5823,7 @@ setup_coding_system (coding_system, coding) coding->max_charset_id = SCHARS (val) - 1; coding->safe_charsets = SDATA (val); coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs)); + coding->carryover_bytes = 0; coding_type = CODING_ATTR_TYPE (attrs); if (EQ (coding_type, Qundecided)) @@ -6040,10 +6073,9 @@ raw_text_coding_system (coding_system) } -/* If CODING_SYSTEM doesn't specify end-of-line format but PARENT - does, return one of the subsidiary that has the same eol-spec as - PARENT. Otherwise, return CODING_SYSTEM. If PARENT is nil, - inherit end-of-line format from the system's setting +/* If CODING_SYSTEM doesn't specify end-of-line format, return one of + the subsidiary that has the same eol-spec as PARENT (if it is not + nil and specifies end-of-line format) or the system's setting (system_eol_type). */ Lisp_Object @@ -6066,6 +6098,8 @@ coding_inherit_eol_type (coding_system, parent) parent_spec = CODING_SYSTEM_SPEC (parent); parent_eol_type = AREF (parent_spec, 2); + if (VECTORP (parent_eol_type)) + parent_eol_type = system_eol_type; } else parent_eol_type = system_eol_type; @@ -6079,6 +6113,46 @@ coding_inherit_eol_type (coding_system, parent) return coding_system; } + +/* Check if text-conversion and eol-conversion of CODING_SYSTEM are + decided for writing to a process. If not, complement them, and + return a new coding system. */ + +Lisp_Object +complement_process_encoding_system (coding_system) + Lisp_Object coding_system; +{ + Lisp_Object coding_base = Qnil, eol_base = Qnil; + Lisp_Object spec, attrs; + int i; + + for (i = 0; i < 3; i++) + { + if (i == 1) + coding_system = CDR_SAFE (Vdefault_process_coding_system); + else if (i == 2) + coding_system = preferred_coding_system (); + spec = CODING_SYSTEM_SPEC (coding_system); + if (NILP (spec)) + continue; + attrs = AREF (spec, 0); + if (NILP (coding_base) && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided)) + coding_base = CODING_ATTR_BASE_NAME (attrs); + if (NILP (eol_base) && ! VECTORP (AREF (spec, 2))) + eol_base = coding_system; + if (! NILP (coding_base) && ! NILP (eol_base)) + break; + } + + if (i > 0) + /* The original CODING_SYSTEM didn't specify text-conversion or + eol-conversion. Be sure that we return a fully complemented + coding system. */ + coding_system = coding_inherit_eol_type (coding_base, eol_base); + return coding_system; +} + + /* Emacs has a mechanism to automatically detect a coding system if it is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But, it's impossible to distinguish some coding systems accurately @@ -6129,14 +6203,14 @@ coding_inherit_eol_type (coding_system, parent) o coding-category-iso-7-else The category for a coding system which has the same code range - as ISO2022 of 7-bit environemnt but uses locking shift or + as ISO2022 of 7-bit environment but uses locking shift or single shift functions. Assigned the coding-system (Lisp symbol) `iso-2022-7bit-lock' by default. o coding-category-iso-8-else The category for a coding system which has the same code range - as ISO2022 of 8-bit environemnt but uses locking shift or + as ISO2022 of 8-bit environment but uses locking shift or single shift functions. Assigned the coding-system (Lisp symbol) `iso-2022-8bit-ss2' by default. @@ -7099,6 +7173,7 @@ decode_coding (coding) Lisp_Object attrs; Lisp_Object undo_list; Lisp_Object translation_table; + struct ccl_spec cclspec; int carryover; int i; @@ -7131,6 +7206,11 @@ decode_coding (coding) translation_table = get_translation_table (attrs, 0, NULL); carryover = 0; + if (coding->decoder == decode_coding_ccl) + { + coding->spec.ccl = &cclspec; + setup_ccl_program (&cclspec.ccl, CODING_CCL_DECODER (coding)); + } do { EMACS_INT pos = coding->dst_pos + coding->produced_char; @@ -7147,9 +7227,10 @@ decode_coding (coding) coding->charbuf[i] = coding->charbuf[coding->charbuf_used - carryover + i]; } - while (coding->consumed < coding->src_bytes - && (coding->result == CODING_RESULT_SUCCESS - || coding->result == CODING_RESULT_INVALID_SRC)); + while (coding->result == CODING_RESULT_INSUFFICIENT_DST + || (coding->consumed < coding->src_bytes + && (coding->result == CODING_RESULT_SUCCESS + || coding->result == CODING_RESULT_INVALID_SRC))); if (carryover > 0) { @@ -7258,7 +7339,7 @@ handle_composition_annotation (pos, limit, coding, buf, stop) components = COMPOSITION_COMPONENTS (prop); if (VECTORP (components)) { - len = XVECTOR (components)->size; + len = XVECTOR_SIZE (components); for (i = 0; i < len; i++) *buf++ = XINT (AREF (components, i)); } @@ -7399,7 +7480,8 @@ consume_chars (coding, translation_table, max_lookup) { EMACS_INT bytes; - if (coding->encoder == encode_coding_raw_text) + if (coding->encoder == encode_coding_raw_text + || coding->encoder == encode_coding_ccl) c = *src++, pos++; else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0) c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos += bytes; @@ -7498,6 +7580,7 @@ encode_coding (coding) Lisp_Object attrs; Lisp_Object translation_table; int max_lookup; + struct ccl_spec cclspec; attrs = CODING_ID_ATTRS (coding->id); if (coding->encoder == encode_coding_raw_text) @@ -7519,6 +7602,11 @@ encode_coding (coding) ALLOC_CONVERSION_WORK_AREA (coding); + if (coding->encoder == encode_coding_ccl) + { + coding->spec.ccl = &cclspec; + setup_ccl_program (&cclspec.ccl, CODING_CCL_ENCODER (coding)); + } do { coding_set_source (coding); consume_chars (coding, translation_table, max_lookup); @@ -7547,7 +7635,7 @@ static Lisp_Object Vcode_conversion_reused_workbuf; static int reused_workbuf_in_use; -/* Return a working buffer of code convesion. MULTIBYTE specifies the +/* Return a working buffer of code conversion. MULTIBYTE specifies the multibyteness of returning buffer. */ static Lisp_Object @@ -7852,7 +7940,7 @@ decode_coding_object (coding, src_object, from, from_byte, to, to_byte, if (! destination) { record_conversion_result (coding, - CODING_RESULT_INSUFFICIENT_DST); + CODING_RESULT_INSUFFICIENT_MEM); unbind_to (count, Qnil); return; } @@ -8210,7 +8298,7 @@ function `define-coding-system'. */) /* Detect how the bytes at SRC of length SRC_BYTES are encoded. If HIGHEST is nonzero, return the coding system of the highest - priority among the detected coding systems. Otherwize return a + priority among the detected coding systems. Otherwise return a list of detected coding systems sorted by their priorities. If MULTIBYTEP is nonzero, it is assumed that the bytes are in correct multibyte form but contains only ASCII and eight-bit chars. @@ -8638,7 +8726,7 @@ DEFUN ("find-coding-systems-region-internal", EMACS_INT start_byte, end_byte; const unsigned char *p, *pbeg, *pend; int c; - Lisp_Object tail, elt; + Lisp_Object tail, elt, work_table; if (STRINGP (start)) { @@ -8696,6 +8784,7 @@ DEFUN ("find-coding-systems-region-internal", while (p < pend && ASCII_BYTE_P (*p)) p++; while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--; + work_table = Fmake_char_table (Qnil, Qnil); while (p < pend) { if (ASCII_BYTE_P (*p)) @@ -8703,6 +8792,9 @@ DEFUN ("find-coding-systems-region-internal", else { c = STRING_CHAR_ADVANCE (p); + if (!NILP (char_table_ref (work_table, c))) + /* This character was already checked. Ignore it. */ + continue; charset_map_loaded = 0; for (tail = coding_attrs_list; CONSP (tail);) @@ -8734,6 +8826,7 @@ DEFUN ("find-coding-systems-region-internal", p = pbeg + p_offset; pend = pbeg + pend_offset; } + char_table_set (work_table, c, Qt); } } @@ -9330,7 +9423,7 @@ DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_intern setup_coding_system (Fcheck_coding_system (coding_system), terminal_coding); /* We had better not send unsafe characters to terminal. */ terminal_coding->mode |= CODING_MODE_SAFE_ENCODING; - /* Characer composition should be disabled. */ + /* Character composition should be disabled. */ terminal_coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK; terminal_coding->src_multibyte = 1; terminal_coding->dst_multibyte = 0; @@ -9347,7 +9440,7 @@ DEFUN ("set-safe-terminal-coding-system-internal", CHECK_SYMBOL (coding_system); setup_coding_system (Fcheck_coding_system (coding_system), &safe_terminal_coding); - /* Characer composition should be disabled. */ + /* Character composition should be disabled. */ safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK; safe_terminal_coding.src_multibyte = 1; safe_terminal_coding.dst_multibyte = 0; @@ -9379,9 +9472,12 @@ DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_intern { struct terminal *t = get_terminal (terminal, 1); CHECK_SYMBOL (coding_system); - setup_coding_system (Fcheck_coding_system (coding_system), - TERMINAL_KEYBOARD_CODING (t)); - /* Characer composition should be disabled. */ + if (NILP (coding_system)) + coding_system = Qno_conversion; + else + Fcheck_coding_system (coding_system); + setup_coding_system (coding_system, TERMINAL_KEYBOARD_CODING (t)); + /* Character composition should be disabled. */ TERMINAL_KEYBOARD_CODING (t)->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK; return Qnil; @@ -9601,7 +9697,7 @@ HIGHESTP non-nil means just return the highest priority one. */) return Fnreverse (val); } -static char *suffixes[] = { "-unix", "-dos", "-mac" }; +static const char *const suffixes[] = { "-unix", "-dos", "-mac" }; static Lisp_Object make_subsidiaries (base) @@ -9758,7 +9854,7 @@ usage: (define-coding-system-internal ...) */) If Nth element is a list of charset IDs, N is the first byte of one of them. The list is sorted by dimensions of the - charsets. A charset of smaller dimension comes firtst. */ + charsets. A charset of smaller dimension comes first. */ val = Fmake_vector (make_number (256), Qnil); for (tail = charset_list; CONSP (tail); tail = XCDR (tail)) @@ -10395,7 +10491,7 @@ syms_of_coding () Vcode_conversion_reused_workbuf = Qnil; staticpro (&Vcode_conversion_workbuf_name); - Vcode_conversion_workbuf_name = build_string (" *code-conversion-work*"); + Vcode_conversion_workbuf_name = make_pure_c_string (" *code-conversion-work*"); reused_workbuf_in_use = 0; @@ -10456,14 +10552,14 @@ syms_of_coding () DEFSYM (Qcoding_system_error, "coding-system-error"); Fput (Qcoding_system_error, Qerror_conditions, - Fcons (Qcoding_system_error, Fcons (Qerror, Qnil))); + pure_cons (Qcoding_system_error, pure_cons (Qerror, Qnil))); Fput (Qcoding_system_error, Qerror_message, - build_string ("Invalid coding system")); + make_pure_c_string ("Invalid coding system")); /* Intern this now in case it isn't already done. Setting this variable twice is harmless. But don't staticpro it here--that is done in alloc.c. */ - Qchar_table_extra_slots = intern ("char-table-extra-slots"); + Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots"); DEFSYM (Qtranslation_table, "translation-table"); Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2)); @@ -10489,48 +10585,48 @@ syms_of_coding () staticpro (&Vcoding_category_table); /* Followings are target of code detection. */ ASET (Vcoding_category_table, coding_category_iso_7, - intern ("coding-category-iso-7")); + intern_c_string ("coding-category-iso-7")); ASET (Vcoding_category_table, coding_category_iso_7_tight, - intern ("coding-category-iso-7-tight")); + intern_c_string ("coding-category-iso-7-tight")); ASET (Vcoding_category_table, coding_category_iso_8_1, - intern ("coding-category-iso-8-1")); + intern_c_string ("coding-category-iso-8-1")); ASET (Vcoding_category_table, coding_category_iso_8_2, - intern ("coding-category-iso-8-2")); + intern_c_string ("coding-category-iso-8-2")); ASET (Vcoding_category_table, coding_category_iso_7_else, - intern ("coding-category-iso-7-else")); + intern_c_string ("coding-category-iso-7-else")); ASET (Vcoding_category_table, coding_category_iso_8_else, - intern ("coding-category-iso-8-else")); + intern_c_string ("coding-category-iso-8-else")); ASET (Vcoding_category_table, coding_category_utf_8_auto, - intern ("coding-category-utf-8-auto")); + intern_c_string ("coding-category-utf-8-auto")); ASET (Vcoding_category_table, coding_category_utf_8_nosig, - intern ("coding-category-utf-8")); + intern_c_string ("coding-category-utf-8")); ASET (Vcoding_category_table, coding_category_utf_8_sig, - intern ("coding-category-utf-8-sig")); + intern_c_string ("coding-category-utf-8-sig")); ASET (Vcoding_category_table, coding_category_utf_16_be, - intern ("coding-category-utf-16-be")); + intern_c_string ("coding-category-utf-16-be")); ASET (Vcoding_category_table, coding_category_utf_16_auto, - intern ("coding-category-utf-16-auto")); + intern_c_string ("coding-category-utf-16-auto")); ASET (Vcoding_category_table, coding_category_utf_16_le, - intern ("coding-category-utf-16-le")); + intern_c_string ("coding-category-utf-16-le")); ASET (Vcoding_category_table, coding_category_utf_16_be_nosig, - intern ("coding-category-utf-16-be-nosig")); + intern_c_string ("coding-category-utf-16-be-nosig")); ASET (Vcoding_category_table, coding_category_utf_16_le_nosig, - intern ("coding-category-utf-16-le-nosig")); + intern_c_string ("coding-category-utf-16-le-nosig")); ASET (Vcoding_category_table, coding_category_charset, - intern ("coding-category-charset")); + intern_c_string ("coding-category-charset")); ASET (Vcoding_category_table, coding_category_sjis, - intern ("coding-category-sjis")); + intern_c_string ("coding-category-sjis")); ASET (Vcoding_category_table, coding_category_big5, - intern ("coding-category-big5")); + intern_c_string ("coding-category-big5")); ASET (Vcoding_category_table, coding_category_ccl, - intern ("coding-category-ccl")); + intern_c_string ("coding-category-ccl")); ASET (Vcoding_category_table, coding_category_emacs_mule, - intern ("coding-category-emacs-mule")); + intern_c_string ("coding-category-emacs-mule")); /* Followings are NOT target of code detection. */ ASET (Vcoding_category_table, coding_category_raw_text, - intern ("coding-category-raw-text")); + intern_c_string ("coding-category-raw-text")); ASET (Vcoding_category_table, coding_category_undecided, - intern ("coding-category-undecided")); + intern_c_string ("coding-category-undecided")); DEFSYM (Qinsufficient_source, "insufficient-source"); DEFSYM (Qinconsistent_eol, "inconsistent-eol"); @@ -10731,22 +10827,22 @@ Also used for decoding keyboard input on X Window system. */); DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix, doc: /* *String displayed in mode line for UNIX-like (LF) end-of-line format. */); - eol_mnemonic_unix = build_string (":"); + eol_mnemonic_unix = make_pure_c_string (":"); DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos, doc: /* *String displayed in mode line for DOS-like (CRLF) end-of-line format. */); - eol_mnemonic_dos = build_string ("\\"); + eol_mnemonic_dos = make_pure_c_string ("\\"); DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac, doc: /* *String displayed in mode line for MAC-like (CR) end-of-line format. */); - eol_mnemonic_mac = build_string ("/"); + eol_mnemonic_mac = make_pure_c_string ("/"); DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided, doc: /* *String displayed in mode line when end-of-line format is not yet determined. */); - eol_mnemonic_undecided = build_string (":"); + eol_mnemonic_undecided = make_pure_c_string (":"); DEFVAR_LISP ("enable-character-translation", &Venable_character_translation, doc: /* @@ -10871,25 +10967,25 @@ internal character representation. */); for (i = 0; i < coding_arg_max; i++) args[i] = Qnil; - plist[0] = intern (":name"); + plist[0] = intern_c_string (":name"); plist[1] = args[coding_arg_name] = Qno_conversion; - plist[2] = intern (":mnemonic"); + plist[2] = intern_c_string (":mnemonic"); plist[3] = args[coding_arg_mnemonic] = make_number ('='); - plist[4] = intern (":coding-type"); + plist[4] = intern_c_string (":coding-type"); plist[5] = args[coding_arg_coding_type] = Qraw_text; - plist[6] = intern (":ascii-compatible-p"); + plist[6] = intern_c_string (":ascii-compatible-p"); plist[7] = args[coding_arg_ascii_compatible_p] = Qt; - plist[8] = intern (":default-char"); + plist[8] = intern_c_string (":default-char"); plist[9] = args[coding_arg_default_char] = make_number (0); - plist[10] = intern (":for-unibyte"); + plist[10] = intern_c_string (":for-unibyte"); plist[11] = args[coding_arg_for_unibyte] = Qt; - plist[12] = intern (":docstring"); - plist[13] = build_string ("Do no conversion.\n\ + plist[12] = intern_c_string (":docstring"); + plist[13] = make_pure_c_string ("Do no conversion.\n\ \n\ When you visit a file with this coding, the file is read into a\n\ unibyte buffer as is, thus each byte of a file is treated as a\n\ character."); - plist[14] = intern (":eol-type"); + plist[14] = intern_c_string (":eol-type"); plist[15] = args[coding_arg_eol_type] = Qunix; args[coding_arg_plist] = Flist (16, plist); Fdefine_coding_system_internal (coding_arg_max, args); @@ -10899,10 +10995,10 @@ character."); plist[5] = args[coding_arg_coding_type] = Qundecided; /* This is already set. plist[7] = args[coding_arg_ascii_compatible_p] = Qt; */ - plist[8] = intern (":charset-list"); + plist[8] = intern_c_string (":charset-list"); plist[9] = args[coding_arg_charset_list] = Fcons (Qascii, Qnil); plist[11] = args[coding_arg_for_unibyte] = Qnil; - plist[13] = build_string ("No conversion on encoding, automatic conversion on decoding."); + plist[13] = make_pure_c_string ("No conversion on encoding, automatic conversion on decoding."); plist[15] = args[coding_arg_eol_type] = Qnil; args[coding_arg_plist] = Flist (16, plist); Fdefine_coding_system_internal (coding_arg_max, args);