X-Git-Url: http://git.hcoop.net/bpt/emacs.git/blobdiff_plain/ba3189039adc8ec5eba5ed3e21d42019a4616b7c..e0d9c3c9a26ba2982595ec2ec4a1167ee7e39ddb:/src/coding.c diff --git a/src/coding.c b/src/coding.c index e4b9238599..5dbaf96840 100644 --- a/src/coding.c +++ b/src/coding.c @@ -1202,7 +1202,7 @@ detect_coding_utf_8 (struct coding_system *coding, bool multibytep = coding->src_multibyte; ptrdiff_t consumed_chars = 0; bool bom_found = 0; - int nchars = coding->head_ascii; + ptrdiff_t nchars = coding->head_ascii; int eol_seen = coding->eol_seen; detect_info->checked |= CATEGORY_MASK_UTF_8; @@ -1300,6 +1300,7 @@ detect_coding_utf_8 (struct coding_system *coding, means that we found a valid non-ASCII characters. */ detect_info->found |= CATEGORY_MASK_UTF_8_AUTO | CATEGORY_MASK_UTF_8_NOSIG; } + coding->detected_utf8_bytes = src_base - coding->source; coding->detected_utf8_chars = nchars; return 1; } @@ -1484,7 +1485,7 @@ decode_coding_utf_8 (struct coding_system *coding) src = src_base; consumed_chars = consumed_chars_base; ONE_MORE_BYTE (c); - *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c); + *charbuf++ = ASCII_CHAR_P (c) ? c : BYTE8_TO_CHAR (c); coding->errors++; } @@ -1548,8 +1549,8 @@ encode_coding_utf_8 (struct coding_system *coding) *dst++ = CHAR_TO_BYTE8 (c); else CHAR_STRING_ADVANCE_NO_UNIFY (c, dst); - produced_chars++; } + produced_chars = dst - (coding->destination + coding->produced); } record_conversion_result (coding, CODING_RESULT_SUCCESS); coding->produced_char += produced_chars; @@ -1724,7 +1725,7 @@ decode_coding_utf_16 (struct coding_system *coding) ONE_MORE_BYTE (c2); if (c2 < 0) { - *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1); + *charbuf++ = ASCII_CHAR_P (c1) ? c1 : BYTE8_TO_CHAR (c1); *charbuf++ = -c2; continue; } @@ -2013,7 +2014,7 @@ emacs_mule_char (struct coding_system *coding, const unsigned char *src, int charset_ID; unsigned code; int c; - int consumed_chars = 0; + ptrdiff_t consumed_chars = 0; bool mseq_found = 0; ONE_MORE_BYTE (c); @@ -2107,7 +2108,7 @@ emacs_mule_char (struct coding_system *coding, const unsigned char *src, case 1: code = c; - charset_ID = ASCII_BYTE_P (code) ? charset_ascii : charset_eight_bit; + charset_ID = ASCII_CHAR_P (code) ? charset_ascii : charset_eight_bit; break; default: @@ -2595,7 +2596,7 @@ decode_coding_emacs_mule (struct coding_system *coding) src = src_base; consumed_chars = consumed_chars_base; ONE_MORE_BYTE (c); - *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c); + *charbuf++ = ASCII_CHAR_P (c) ? c : BYTE8_TO_CHAR (c); char_offset++; coding->errors++; } @@ -3190,7 +3191,7 @@ detect_coding_iso_2022 (struct coding_system *coding, if (! single_shifting && ! (rejected & CATEGORY_MASK_ISO_8_2)) { - int len = 1; + ptrdiff_t len = 1; while (src < src_end) { src_base = src; @@ -3572,7 +3573,7 @@ decode_coding_iso_2022 (struct coding_system *coding) if (CODING_ISO_EXTSEGMENT_LEN (coding) > 0) { - *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1); + *charbuf++ = ASCII_CHAR_P (c1) ? c1 : BYTE8_TO_CHAR (c1); char_offset++; CODING_ISO_EXTSEGMENT_LEN (coding)--; continue; @@ -3599,7 +3600,7 @@ decode_coding_iso_2022 (struct coding_system *coding) } else { - *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1); + *charbuf++ = ASCII_CHAR_P (c1) ? c1 : BYTE8_TO_CHAR (c1); char_offset++; } continue; @@ -3973,7 +3974,7 @@ decode_coding_iso_2022 (struct coding_system *coding) MAYBE_FINISH_COMPOSITION (); for (; src_base < src; src_base++, char_offset++) { - if (ASCII_BYTE_P (*src_base)) + if (ASCII_CHAR_P (*src_base)) *charbuf++ = *src_base; else *charbuf++ = BYTE8_TO_CHAR (*src_base); @@ -4003,7 +4004,7 @@ decode_coding_iso_2022 (struct coding_system *coding) src = src_base; consumed_chars = consumed_chars_base; ONE_MORE_BYTE (c); - *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c); + *charbuf++ = c < 0 ? -c : ASCII_CHAR_P (c) ? c : BYTE8_TO_CHAR (c); char_offset++; coding->errors++; /* Reset the invocation and designation status to the safest @@ -4456,7 +4457,7 @@ encode_coding_iso_2022 (struct coding_system *coding) { /* We have to produce designation sequences if any now. */ unsigned char desig_buf[16]; - int nbytes; + ptrdiff_t nbytes; ptrdiff_t offset; charset_map_loaded = 0; @@ -5639,7 +5640,7 @@ decode_coding_charset (struct coding_system *coding) src = src_base; consumed_chars = consumed_chars_base; ONE_MORE_BYTE (c); - *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c); + *charbuf++ = c < 0 ? -c : ASCII_CHAR_P (c) ? c : BYTE8_TO_CHAR (c); char_offset++; coding->errors++; } @@ -6211,7 +6212,7 @@ static Lisp_Object adjust_coding_eol_type (struct coding_system *coding, EOL_SEEN_LF, EOL_SEEN_CR, and EOL_SEEN_CRLF, but the value is reliable only when all the source bytes are ASCII. */ -static int +static ptrdiff_t check_ascii (struct coding_system *coding) { const unsigned char *src, *end; @@ -6283,12 +6284,12 @@ check_ascii (struct coding_system *coding) the value is reliable only when all the source bytes are valid UTF-8. */ -static int +static ptrdiff_t check_utf_8 (struct coding_system *coding) { const unsigned char *src, *end; int eol_seen; - int nchars = coding->head_ascii; + ptrdiff_t nchars = coding->head_ascii; if (coding->head_ascii < 0) check_ascii (coding); @@ -7264,13 +7265,16 @@ produce_charset (struct coding_system *coding, int *charbuf, ptrdiff_t pos) coding->dst_object); } +#define MAX_CHARBUF_SIZE 0x4000 +#define MIN_CHARBUF_SIZE 0x10 -#define CHARBUF_SIZE 0x4000 - -#define ALLOC_CONVERSION_WORK_AREA(coding) \ - do { \ - coding->charbuf = SAFE_ALLOCA (CHARBUF_SIZE * sizeof (int)); \ - coding->charbuf_size = CHARBUF_SIZE; \ +#define ALLOC_CONVERSION_WORK_AREA(coding, size) \ + do { \ + int units = ((size) > MAX_CHARBUF_SIZE ? MAX_CHARBUF_SIZE \ + : (size) < MIN_CHARBUF_SIZE ? MIN_CHARBUF_SIZE \ + : size); \ + coding->charbuf = SAFE_ALLOCA ((units) * sizeof (int)); \ + coding->charbuf_size = (units); \ } while (0) @@ -7372,7 +7376,7 @@ decode_coding (struct coding_system *coding) record_conversion_result (coding, CODING_RESULT_SUCCESS); coding->errors = 0; - ALLOC_CONVERSION_WORK_AREA (coding); + ALLOC_CONVERSION_WORK_AREA (coding, coding->src_bytes); attrs = CODING_ID_ATTRS (coding->id); translation_table = get_translation_table (attrs, 0, NULL); @@ -7414,7 +7418,7 @@ decode_coding (struct coding_system *coding) coding->carryover_bytes = 0; if (coding->consumed < coding->src_bytes) { - int nbytes = coding->src_bytes - coding->consumed; + ptrdiff_t nbytes = coding->src_bytes - coding->consumed; const unsigned char *src; coding_set_source (coding); @@ -7768,7 +7772,7 @@ encode_coding (struct coding_system *coding) record_conversion_result (coding, CODING_RESULT_SUCCESS); coding->errors = 0; - ALLOC_CONVERSION_WORK_AREA (coding); + ALLOC_CONVERSION_WORK_AREA (coding, coding->src_chars); if (coding->encoder == encode_coding_ccl) { @@ -7890,7 +7894,7 @@ decode_coding_gap (struct coding_system *coding, coding->dst_multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters)); coding->head_ascii = -1; - coding->detected_utf8_chars = -1; + coding->detected_utf8_bytes = coding->detected_utf8_chars = -1; coding->eol_seen = EOL_SEEN_NONE; if (CODING_REQUIRE_DETECTION (coding)) detect_coding (coding); @@ -7907,7 +7911,8 @@ decode_coding_gap (struct coding_system *coding, if (chars != bytes) { /* There exists a non-ASCII byte. */ - if (EQ (CODING_ATTR_TYPE (attrs), Qutf_8)) + if (EQ (CODING_ATTR_TYPE (attrs), Qutf_8) + && coding->detected_utf8_bytes == coding->src_bytes) { if (coding->detected_utf8_chars >= 0) chars = coding->detected_utf8_chars; @@ -8441,11 +8446,11 @@ from_unicode (Lisp_Object str) } Lisp_Object -from_unicode_buffer (const wchar_t* wstr) +from_unicode_buffer (const wchar_t *wstr) { return from_unicode ( make_unibyte_string ( - (char*) wstr, + (char *) wstr, /* we get one of the two final 0 bytes for free. */ 1 + sizeof (wchar_t) * wcslen (wstr))); } @@ -9029,13 +9034,13 @@ DEFUN ("find-coding-systems-region-internal", p = pbeg = BYTE_POS_ADDR (start_byte); pend = p + (end_byte - start_byte); - while (p < pend && ASCII_BYTE_P (*p)) p++; - while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--; + while (p < pend && ASCII_CHAR_P (*p)) p++; + while (p < pend && ASCII_CHAR_P (*(pend - 1))) pend--; work_table = Fmake_char_table (Qnil, Qnil); while (p < pend) { - if (ASCII_BYTE_P (*p)) + if (ASCII_CHAR_P (*p)) p++; else { @@ -9089,8 +9094,7 @@ DEFUN ("find-coding-systems-region-internal", DEFUN ("unencodable-char-position", Funencodable_char_position, Sunencodable_char_position, 3, 5, 0, - doc: /* -Return position of first un-encodable character in a region. + doc: /* Return position of first un-encodable character in a region. START and END specify the region and CODING-SYSTEM specifies the encoding to check. Return nil if CODING-SYSTEM does encode the region. @@ -9100,8 +9104,9 @@ list of positions. If optional 5th argument STRING is non-nil, it is a string to search for un-encodable characters. In that case, START and END are indexes -to the string. */) - (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system, Lisp_Object count, Lisp_Object string) +to the string and treated as in `substring'. */) + (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system, + Lisp_Object count, Lisp_Object string) { EMACS_INT n; struct coding_system coding; @@ -9138,12 +9143,7 @@ to the string. */) else { CHECK_STRING (string); - CHECK_NATNUM (start); - CHECK_NATNUM (end); - if (! (XINT (start) <= XINT (end) && XINT (end) <= SCHARS (string))) - args_out_of_range_3 (string, start, end); - from = XINT (start); - to = XINT (end); + validate_subarray (string, start, end, SCHARS (string), &from, &to); if (! STRING_MULTIBYTE (string)) return Qnil; p = SDATA (string) + string_char_to_byte (string, from); @@ -9167,7 +9167,7 @@ to the string. */) int c; if (ascii_compatible) - while (p < stop && ASCII_BYTE_P (*p)) + while (p < stop && ASCII_CHAR_P (*p)) p++, from++; if (p >= stop) { @@ -9283,12 +9283,12 @@ is nil. */) p = pbeg = BYTE_POS_ADDR (start_byte); pend = p + (end_byte - start_byte); - while (p < pend && ASCII_BYTE_P (*p)) p++, pos++; - while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--; + while (p < pend && ASCII_CHAR_P (*p)) p++, pos++; + while (p < pend && ASCII_CHAR_P (*(pend - 1))) pend--; while (p < pend) { - if (ASCII_BYTE_P (*p)) + if (ASCII_CHAR_P (*p)) p++; else { @@ -9596,7 +9596,7 @@ Return the corresponding character. */) CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec); attrs = AREF (spec, 0); - if (ASCII_BYTE_P (ch) + if (ASCII_CHAR_P (ch) && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs))) return code; @@ -9677,7 +9677,7 @@ Return the corresponding character. */) CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec); attrs = AREF (spec, 0); - if (ASCII_BYTE_P (ch) + if (ASCII_CHAR_P (ch) && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs))) return code;