(all uppercase), we mean the coding system, and when we write
"Big5" (capitalized), we mean the character set.
- 4. Other
+ 4. Raw text
+
+ A coding system to for a text containing random 8-bit code. Emacs
+ does no code conversion on such a text except for end-of-line
+ format.
+
+ 5. Other
If a user wants to read/write a text encoded in a coding system not
listed above, he can supply a decoder and an encoder for it in CCL
Lisp_Object Qcoding_system, Qeol_type;
Lisp_Object Qbuffer_file_coding_system;
Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
+Lisp_Object Qno_conversion, Qundecided;
+Lisp_Object Qcoding_system_history;
extern Lisp_Object Qinsert_file_contents, Qwrite_region;
Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
/* Coding-system actually used in the latest I/O. */
Lisp_Object Vlast_coding_system_used;
+/* A vector of length 256 which contains information about special
+ Latin codes (espepcially for dealing with Microsoft code). */
+Lisp_Object Vlatin_extra_code_table;
+
/* Flag to inhibit code conversion of end-of-line format. */
int inhibit_eol_conversion;
-/* Coding-system of what terminal accept for displaying. */
+/* Coding system to be used to encode text for terminal display. */
struct coding_system terminal_coding;
-/* Coding-system of what is sent from terminal keyboard. */
+/* Coding system to be used to encode text for terminal display when
+ terminal coding system is nil. */
+struct coding_system safe_terminal_coding;
+
+/* Coding system of what is sent from terminal keyboard. */
struct coding_system keyboard_coding;
Lisp_Object Vfile_coding_system_alist;
"coding-category-iso-7",
"coding-category-iso-8-1",
"coding-category-iso-8-2",
- "coding-category-iso-else",
+ "coding-category-iso-7-else",
+ "coding-category-iso-8-else",
"coding-category-big5",
+ "coding-category-raw-text",
"coding-category-binary"
};
function control char escape sequence description
----------------------------------------------------------------------
SI (shift-in) 0x0F none invoke G0 to GL
- SI (shift-out) 0x0E none invoke G1 to GL
+ SO (shift-out) 0x0E none invoke G1 to GL
LS2 (locking-shift-2) none ESC 'n' invoke G2 into GL
LS3 (locking-shift-3) none ESC 'o' invoke G3 into GL
SS2 (single-shift-2) 0x8E ESC 'N' invoke G2 into GL
CODING_CATEGORY_MASK_ISO_7
CODING_CATEGORY_MASK_ISO_8_1
CODING_CATEGORY_MASK_ISO_8_2
- CODING_CATEGORY_MASK_ISO_ELSE
+ CODING_CATEGORY_MASK_ISO_7_ELSE
+ CODING_CATEGORY_MASK_ISO_8_ELSE
are set. If a code which should never appear in ISO2022 is found,
returns 0. */
int mask = (CODING_CATEGORY_MASK_ISO_7
| CODING_CATEGORY_MASK_ISO_8_1
| CODING_CATEGORY_MASK_ISO_8_2
- | CODING_CATEGORY_MASK_ISO_ELSE);
+ | CODING_CATEGORY_MASK_ISO_7_ELSE
+ | CODING_CATEGORY_MASK_ISO_8_ELSE
+ );
int g1 = 0; /* 1 iff designating to G1. */
int c, i;
+ struct coding_system coding_iso_8_1, coding_iso_8_2;
- while (src < src_end)
+ /* Coding systems of these categories may accept latin extra codes. */
+ setup_coding_system
+ (XSYMBOL (coding_category_table[CODING_CATEGORY_IDX_ISO_8_1])->value,
+ &coding_iso_8_1);
+ setup_coding_system
+ (XSYMBOL (coding_category_table[CODING_CATEGORY_IDX_ISO_8_2])->value,
+ &coding_iso_8_2);
+
+ while (mask && src < src_end)
{
c = *src++;
switch (c)
if (src >= src_end)
break;
c = *src++;
- if (src < src_end
- && ((c >= '(' && c <= '/')
- || c == '$' && ((*src >= '(' && *src <= '/')
- || (*src >= '@' && *src <= 'B'))))
+ if ((c >= '(' && c <= '/'))
+ {
+ /* Designation sequence for a charset of dimension 1. */
+ if (src >= src_end)
+ break;
+ c = *src++;
+ if (c < ' ' || c >= 0x80)
+ /* Invalid designation sequence. */
+ return 0;
+ }
+ else if (c == '$')
{
- /* Valid designation sequence. */
- if (c == ')' || (c == '$' && *src == ')'))
+ /* Designation sequence for a charset of dimension 2. */
+ if (src >= src_end)
+ break;
+ c = *src++;
+ if (c >= '@' && c <= 'B')
+ /* Designation for JISX0208.1978, GB2312, or JISX0208. */
+ ;
+ else if (c >= '(' && c <= '/')
{
- g1 = 1;
- mask &= ~CODING_CATEGORY_MASK_ISO_7;
+ if (src >= src_end)
+ break;
+ c = *src++;
+ if (c < ' ' || c >= 0x80)
+ /* Invalid designation sequence. */
+ return 0;
}
- src++;
- break;
+ else
+ /* Invalid designation sequence. */
+ return 0;
}
else if (c == 'N' || c == 'O' || c == 'n' || c == 'o')
- return CODING_CATEGORY_MASK_ISO_ELSE;
+ /* Locking shift. */
+ mask &= (CODING_CATEGORY_MASK_ISO_7_ELSE
+ | CODING_CATEGORY_MASK_ISO_8_ELSE);
+ else if (c == '0' || c == '1' || c == '2')
+ /* Start/end composition. */
+ ;
+ else
+ /* Invalid escape sequence. */
+ return 0;
break;
case ISO_CODE_SO:
- if (g1)
- return CODING_CATEGORY_MASK_ISO_ELSE;
+ mask &= (CODING_CATEGORY_MASK_ISO_7_ELSE
+ | CODING_CATEGORY_MASK_ISO_8_ELSE);
break;
case ISO_CODE_CSI:
case ISO_CODE_SS2:
case ISO_CODE_SS3:
- mask &= ~CODING_CATEGORY_MASK_ISO_7;
+ {
+ int newmask = CODING_CATEGORY_MASK_ISO_8_ELSE;
+
+ if (VECTORP (Vlatin_extra_code_table)
+ && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
+ {
+ if (coding_iso_8_1.flags & CODING_FLAG_ISO_LATIN_EXTRA)
+ newmask |= CODING_CATEGORY_MASK_ISO_8_1;
+ if (coding_iso_8_2.flags & CODING_FLAG_ISO_LATIN_EXTRA)
+ newmask |= CODING_CATEGORY_MASK_ISO_8_2;
+ }
+ mask &= newmask;
+ }
break;
default:
if (c < 0x80)
break;
else if (c < 0xA0)
- return 0;
+ {
+ if (VECTORP (Vlatin_extra_code_table)
+ && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
+ {
+ int newmask = 0;
+
+ if (coding_iso_8_1.flags & CODING_FLAG_ISO_LATIN_EXTRA)
+ newmask |= CODING_CATEGORY_MASK_ISO_8_1;
+ if (coding_iso_8_2.flags & CODING_FLAG_ISO_LATIN_EXTRA)
+ newmask |= CODING_CATEGORY_MASK_ISO_8_2;
+ mask &= newmask;
+ }
+ else
+ return 0;
+ }
else
{
- int count = 1;
+ unsigned char *src_begin = src;
- mask &= ~CODING_CATEGORY_MASK_ISO_7;
+ mask &= ~(CODING_CATEGORY_MASK_ISO_7
+ | CODING_CATEGORY_MASK_ISO_7_ELSE);
while (src < src_end && *src >= 0xA0)
- count++, src++;
- if (count & 1 && src < src_end)
+ src++;
+ if ((src - src_begin - 1) & 1 && src < src_end)
mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
}
break;
*dst++ = c1 | 0x80; \
break; \
} \
+ else if (coding->flags & CODING_FLAG_ISO_SAFE \
+ && !CODING_SPEC_ISO_EXPECTED_CHARSETS (coding)[charset]) \
+ { \
+ /* We should not encode this character, instead produce one or \
+ two `?'s. */ \
+ *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \
+ if (CHARSET_WIDTH (charset) == 2) \
+ *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \
+ break; \
+ } \
else \
/* Since CHARSET is not yet invoked to any graphic planes, we \
must invoke it, or, at first, designate it to some graphic \
*dst++ = c1 | 0x80, *dst++= c2 | 0x80; \
break; \
} \
+ else if (coding->flags & CODING_FLAG_ISO_SAFE \
+ && !CODING_SPEC_ISO_EXPECTED_CHARSETS (coding)[charset]) \
+ { \
+ /* We should not encode this character, instead produce one or \
+ two `?'s. */ \
+ *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \
+ if (CHARSET_WIDTH (charset) == 2) \
+ *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \
+ break; \
+ } \
else \
/* Since CHARSET is not yet invoked to any graphic planes, we \
must invoke it, or, at first, designate it to some graphic \
case EMACS_leading_code_2:
ONE_MORE_BYTE (c2);
- ENCODE_ISO_CHARACTER (c1, c2, /* dummy */ c3);
+ if (c2 < 0xA0)
+ {
+ /* invalid sequence */
+ *dst++ = c1;
+ *dst++ = c2;
+ }
+ else
+ ENCODE_ISO_CHARACTER (c1, c2, /* dummy */ c3);
break;
case EMACS_leading_code_3:
TWO_MORE_BYTES (c2, c3);
- if (c1 < LEADING_CODE_PRIVATE_11)
+ if (c2 < 0xA0 || c3 < 0xA0)
+ {
+ /* invalid sequence */
+ *dst++ = c1;
+ *dst++ = c2;
+ *dst++ = c3;
+ }
+ else if (c1 < LEADING_CODE_PRIVATE_11)
ENCODE_ISO_CHARACTER (c1, c2, c3);
else
ENCODE_ISO_CHARACTER (c2, c3, /* dummy */ c4);
case EMACS_leading_code_4:
THREE_MORE_BYTES (c2, c3, c4);
- ENCODE_ISO_CHARACTER (c2, c3, c4);
+ if (c2 < 0xA0 || c3 < 0xA0 || c4 < 0xA0)
+ {
+ /* invalid sequence */
+ *dst++ = c1;
+ *dst++ = c2;
+ *dst++ = c3;
+ *dst++ = c4;
+ }
+ else
+ ENCODE_ISO_CHARACTER (c2, c3, c4);
break;
case EMACS_leading_code_composition:
- ONE_MORE_BYTE (c1);
- if (c1 == 0xFF)
+ ONE_MORE_BYTE (c2);
+ if (c2 < 0xA0)
+ {
+ /* invalid sequence */
+ *dst++ = c1;
+ *dst++ = c2;
+ }
+ else if (c2 == 0xFF)
{
coding->composing = COMPOSING_WITH_RULE_HEAD;
ENCODE_COMPOSITION_WITH_RULE_START;
}
continue;
label_end_of_loop:
- coding->carryover_size = src - src_base;
+ /* We reach here because the source date ends not at character
+ boundary. */
+ coding->carryover_size = src_end - src_base;
bcopy (src_base, coding->carryover, coding->carryover_size);
+ src = src_end;
break;
}
continue;
label_end_of_loop:
- coding->carryover_size = src - src_base;
+ coding->carryover_size = src_end - src_base;
bcopy (src_base, coding->carryover, coding->carryover_size);
- src = src_base;
+ src = src_end;
break;
}
case 2:
coding->type = coding_type_iso2022;
{
- Lisp_Object val = XVECTOR (coding_system)->contents[4];
+ Lisp_Object val;
Lisp_Object *flags;
int i, charset, default_reg_bits = 0;
+ val = XVECTOR (coding_system)->contents[4];
+
if (!VECTORP (val) || XVECTOR (val)->size != 32)
goto label_invalid_coding_system;
| (NILP (flags[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS)
| (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION)
| (NILP (flags[13]) ? 0 : CODING_FLAG_ISO_INIT_AT_BOL)
- | (NILP (flags[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL));
+ | (NILP (flags[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL)
+ | (NILP (flags[15]) ? 0 : CODING_FLAG_ISO_SAFE)
+ | (NILP (flags[16]) ? 0 : CODING_FLAG_ISO_LATIN_EXTRA)
+ );
/* Invoke graphic register 0 to plane 0. */
CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
CODING_SPEC_ISO_INVOCATION (coding, 1)
= (coding->flags & CODING_FLAG_ISO_SEVEN_BITS ? -1 : 1);
/* Not single shifting at first. */
- CODING_SPEC_ISO_SINGLE_SHIFTING(coding) = 0;
+ CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;
/* Beginning of buffer should also be regarded as bol. */
- CODING_SPEC_ISO_BOL(coding) = 1;
+ CODING_SPEC_ISO_BOL (coding) = 1;
/* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations.
FLAGS[REG] can be one of below:
for (charset = 0; charset <= MAX_CHARSET; charset++)
CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
= CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION;
+ bzero (CODING_SPEC_ISO_EXPECTED_CHARSETS (coding), MAX_CHARSET + 1);
for (i = 0; i < 4; i++)
{
if (INTEGERP (flags[i])
{
CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i;
+ CODING_SPEC_ISO_EXPECTED_CHARSETS (coding)[charset] = 1;
}
else if (EQ (flags[i], Qt))
{
{
CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i;
+ CODING_SPEC_ISO_EXPECTED_CHARSETS (coding)[charset] = 1;
}
else
CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
&& (charset = XINT (XCONS (tail)->car),
CHARSET_VALID_P (charset))
|| (charset = get_charset_id (XCONS (tail)->car)) >= 0)
- CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
- = i;
+ {
+ CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
+ = i;
+ CODING_SPEC_ISO_EXPECTED_CHARSETS (coding)[charset]
+ = 1;
+ }
else if (EQ (XCONS (tail)->car, Qt))
default_reg_bits |= 1 << i;
tail = XCONS (tail)->cdr;
coding->require_flushing = 1;
break;
+ case 5:
+ coding->type = coding_type_raw_text;
+ break;
+
default:
if (EQ (type, Qt))
coding->type = coding_type_undecided;
The category for a coding system which has the same code range
as SJIS. Assigned the coding-system (Lisp
- symbol) `shift-jis' by default.
+ symbol) `japanese-shift-jis' by default.
o coding-category-iso-7
The category for a coding system which has the same code range
- as ISO2022 of 7-bit environment. Assigned the coding-system
- (Lisp symbol) `iso-2022-7' by default.
+ as ISO2022 of 7-bit environment. This doesn't use any locking
+ shift and single shift functions. Assigned the coding-system
+ (Lisp symbol) `iso-2022-7bit' by default.
o coding-category-iso-8-1
The category for a coding system which has the same code range
as ISO2022 of 8-bit environment and graphic plane 1 used only
- for DIMENSION1 charset. Assigned the coding-system (Lisp
- symbol) `iso-8859-1' by default.
+ for DIMENSION1 charset. This doesn't use any locking shift
+ and single shift functions. Assigned the coding-system (Lisp
+ symbol) `iso-latin-1' by default.
o coding-category-iso-8-2
The category for a coding system which has the same code range
as ISO2022 of 8-bit environment and graphic plane 1 used only
- for DIMENSION2 charset. Assigned the coding-system (Lisp
- symbol) `euc-japan' by default.
+ for DIMENSION2 charset. This doesn't use any locking shift
+ and single shift functions. Assigned the coding-system (Lisp
+ symbol) `japanese-iso-8bit' by default.
+
+ o coding-category-iso-7-else
- o coding-category-iso-else
+ The category for a coding system which has the same code range
+ as ISO2022 of 7-bit environemnt but uses locking shift or
+ single shift functions. Assigned the coding-system (Lisp
+ symbol) `iso-2022-7bit-lock' by default.
+
+ o coding-category-iso-8-else
The category for a coding system which has the same code range
- as ISO2022 but not belongs to any of the above three
- categories. Assigned the coding-system (Lisp symbol)
- `iso-2022-ss2-7' by default.
+ as ISO2022 of 8-bit environemnt but uses locking shift or
+ single shift functions. Assigned the coding-system (Lisp
+ symbol) `iso-2022-8bit-ss2' by default.
o coding-category-big5
/* C is an ISO2022 specific control code of C0. */
mask = detect_coding_iso2022 (src, src_end);
src++;
- if (mask == CODING_CATEGORY_MASK_ANY)
+ if (mask == 0)
/* No valid ISO2022 code follows C. Try again. */
goto label_loop_detect_coding;
+ mask |= CODING_CATEGORY_MASK_RAW_TEXT;
}
- else if (c == ISO_CODE_SS2 || c == ISO_CODE_SS3 || c == ISO_CODE_CSI)
- /* C is an ISO2022 specific control code of C1,
- or the first byte of SJIS's 2-byte character code,
- or a leading code of Emacs. */
- mask = (detect_coding_iso2022 (src, src_end)
- | detect_coding_sjis (src, src_end)
- | detect_coding_emacs_mule (src, src_end));
-
else if (c < 0xA0)
- /* C is the first byte of SJIS character code,
- or a leading-code of Emacs. */
- mask = (detect_coding_sjis (src, src_end)
- | detect_coding_emacs_mule (src, src_end));
+ {
+ /* If C is a special latin extra code,
+ or is an ISO2022 specific control code of C1 (SS2 or SS3),
+ or is an ISO2022 control-sequence-introducer (CSI),
+ we should also consider the possibility of ISO2022 codings. */
+ if ((VECTORP (Vlatin_extra_code_table)
+ && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
+ || (c == ISO_CODE_SS2 || c == ISO_CODE_SS3)
+ || (c == ISO_CODE_CSI
+ && (src < src_end
+ && (*src == ']'
+ || (src + 1 < src_end
+ && src[1] == ']'
+ && (*src == '0' || *src == '1' || *src == '2'))))))
+ mask = (detect_coding_iso2022 (src, src_end)
+ | detect_coding_sjis (src, src_end)
+ | detect_coding_emacs_mule (src, src_end)
+ | CODING_CATEGORY_MASK_RAW_TEXT);
+ else
+ /* C is the first byte of SJIS character code,
+ or a leading-code of Emacs' internal format (emacs-mule). */
+ mask = (detect_coding_sjis (src, src_end)
+ | detect_coding_emacs_mule (src, src_end)
+ | CODING_CATEGORY_MASK_RAW_TEXT);
+ }
else
/* C is a character of ISO2022 in graphic plane right,
or a SJIS's 1-byte character code (i.e. JISX0201),
or the first byte of BIG5's 2-byte code. */
mask = (detect_coding_iso2022 (src, src_end)
| detect_coding_sjis (src, src_end)
- | detect_coding_big5 (src, src_end));
+ | detect_coding_big5 (src, src_end)
+ | CODING_CATEGORY_MASK_RAW_TEXT);
return mask;
}
{
int mask = detect_coding_mask (src, src_bytes);
int idx;
+ Lisp_Object val = Vcoding_category_list;
if (mask == CODING_CATEGORY_MASK_ANY)
/* We found nothing other than ASCII. There's nothing to do. */
return;
- if (!mask)
- /* The source text seems to be encoded in unknown coding system.
- Emacs regards the category of such a kind of coding system as
- `coding-category-binary'. We assume that a user has assigned
- an appropriate coding system for a `coding-category-binary'. */
- idx = CODING_CATEGORY_IDX_BINARY;
- else
- {
- /* We found some plausible coding systems. Let's use a coding
- system of the highest priority. */
- Lisp_Object val = Vcoding_category_list;
+ /* We found some plausible coding systems. Let's use a coding
+ system of the highest priority. */
- if (CONSP (val))
- while (!NILP (val))
- {
- idx = XFASTINT (Fget (XCONS (val)->car, Qcoding_category_index));
- if ((idx < CODING_CATEGORY_IDX_MAX) && (mask & (1 << idx)))
- break;
- val = XCONS (val)->cdr;
- }
- else
- val = Qnil;
+ if (CONSP (val))
+ while (!NILP (val))
+ {
+ idx = XFASTINT (Fget (XCONS (val)->car, Qcoding_category_index));
+ if ((idx < CODING_CATEGORY_IDX_MAX) && (mask & (1 << idx)))
+ break;
+ val = XCONS (val)->cdr;
+ }
+ else
+ val = Qnil;
- if (NILP (val))
- {
- /* For unknown reason, `Vcoding_category_list' contains none
- of found categories. Let's use any of them. */
- for (idx = 0; idx < CODING_CATEGORY_IDX_MAX; idx++)
- if (mask & (1 << idx))
- break;
- }
+ if (NILP (val))
+ {
+ /* For unknown reason, `Vcoding_category_list' contains none of
+ found categories. Let's use any of them. */
+ for (idx = 0; idx < CODING_CATEGORY_IDX_MAX; idx++)
+ if (mask & (1 << idx))
+ break;
}
setup_coding_system (XSYMBOL (coding_category_table[idx])->value, coding);
}
is encoded. Return one of CODING_EOL_LF, CODING_EOL_CRLF,
CODING_EOL_CR, and CODING_EOL_UNDECIDED. */
+#define MAX_EOL_CHECK_COUNT 3
+
int
detect_eol_type (src, src_bytes)
unsigned char *src;
{
unsigned char *src_end = src + src_bytes;
unsigned char c;
+ int total = 0; /* How many end-of-lines are found so far. */
+ int eol_type = CODING_EOL_UNDECIDED;
+ int this_eol_type;
- while (src < src_end)
+ while (src < src_end && total < MAX_EOL_CHECK_COUNT)
{
c = *src++;
- if (c == '\n')
- return CODING_EOL_LF;
- else if (c == '\r')
+ if (c == '\n' || c == '\r')
{
- if (src < src_end && *src == '\n')
- return CODING_EOL_CRLF;
+ total++;
+ if (c == '\n')
+ this_eol_type = CODING_EOL_LF;
+ else if (src >= src_end || *src != '\n')
+ this_eol_type = CODING_EOL_CR;
else
- return CODING_EOL_CR;
+ this_eol_type = CODING_EOL_CRLF, src++;
+
+ if (eol_type == CODING_EOL_UNDECIDED)
+ /* This is the first end-of-line. */
+ eol_type = this_eol_type;
+ else if (eol_type != this_eol_type)
+ /* The found type is different from what found before.
+ Let's notice the caller about this inconsistency. */
+ return CODING_EOL_INCONSISTENT;
}
}
- return CODING_EOL_UNDECIDED;
+
+ return eol_type;
}
/* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC
unsigned char *src;
int src_bytes;
{
- Lisp_Object val;
+ Lisp_Object val, coding_system;
int eol_type = detect_eol_type (src, src_bytes);
if (eol_type == CODING_EOL_UNDECIDED)
/* We found no end-of-line in the source text. */
return;
- val = Fget (coding->symbol, Qeol_type);
+ if (eol_type == CODING_EOL_INCONSISTENT)
+ {
+#if 0
+ /* This code is suppressed until we find a better way to
+ distinguish raw text file and binary file. */
+
+ /* If we have already detected that the coding is raw-text, the
+ coding should actually be no-conversion. */
+ if (coding->type == coding_type_raw_text)
+ {
+ setup_coding_system (Qno_conversion, coding);
+ return;
+ }
+ /* Else, let's decode only text code anyway. */
+#endif /* 0 */
+ eol_type = CODING_EOL_LF;
+ }
+
+ coding_system = coding->symbol;
+ while (!NILP (coding_system)
+ && NILP (val = Fget (coding_system, Qeol_type)))
+ coding_system = Fget (coding_system, Qcoding_system);
if (VECTORP (val) && XVECTOR (val)->size == 3)
setup_coding_system (XVECTOR (val)->contents[eol_type], coding);
}
case coding_type_emacs_mule:
case coding_type_undecided:
+ case coding_type_raw_text:
if (coding->eol_type == CODING_EOL_LF
|| coding->eol_type == CODING_EOL_UNDECIDED)
goto label_no_conversion;
{
int produced;
- coding->carryover_size = 0;
switch (coding->type)
{
case coding_type_no_conversion:
case coding_type_emacs_mule:
case coding_type_undecided:
+ case coding_type_raw_text:
if (coding->eol_type == CODING_EOL_LF
|| coding->eol_type == CODING_EOL_UNDECIDED)
goto label_no_conversion;
do
{
val = Fcompleting_read (prompt, Vobarray, Qcoding_system_spec,
- Qt, Qnil, Qnil, Qnil);
+ Qt, Qnil, Qnil, Qnil, Qnil);
}
while (XSTRING (val)->size == 0);
return (Fintern (val, Qnil));
(prompt)
Lisp_Object prompt;
{
- Lisp_Object val = Fcompleting_read (prompt, Vobarray, Qcoding_system_p,
- Qt, Qnil, Qnil, Qnil);
+ Lisp_Object val;
+ val = Fcompleting_read (prompt, Vobarray, Qcoding_system_p,
+ Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
}
DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
2, 2, 0,
- "Detect coding-system of the text in the region between START and END.\n\
-Return a list of possible coding-systems ordered by priority.\n\
+ "Detect coding system of the text in the region between START and END.\n\
+Return a list of possible coding systems ordered by priority.\n\
If only ASCII characters are found, it returns `undecided'\n\
- or its subsidiary coding-system according to a detected end-of-line format.")
+ or its subsidiary coding system according to a detected end-of-line format.")
(b, e)
Lisp_Object b, e;
{
if (coding_mask == CODING_CATEGORY_MASK_ANY)
{
- val = intern ("undecided");
- if (eol_type != CODING_EOL_UNDECIDED)
+ val = Qundecided;
+ if (eol_type != CODING_EOL_UNDECIDED
+ && eol_type != CODING_EOL_INCONSISTENT)
{
- Lisp_Object val2 = Fget (val, Qeol_type);
+ Lisp_Object val2;
+ val2 = Fget (Qundecided, Qeol_type);
if (VECTORP (val2))
val = XVECTOR (val2)->contents[eol_type];
}
int idx
= XFASTINT (Fget (XCONS (val2)->car, Qcoding_category_index));
if (coding_mask & (1 << idx))
- val = Fcons (Fsymbol_value (XCONS (val2)->car), val);
+ {
+#if 0
+ /* This code is suppressed until we find a better way to
+ distinguish raw text file and binary file. */
+
+ if (idx == CODING_CATEGORY_IDX_RAW_TEXT
+ && eol_type == CODING_EOL_INCONSISTENT)
+ val = Fcons (Qno_conversion, val);
+ else
+#endif /* 0 */
+ val = Fcons (Fsymbol_value (XCONS (val2)->car), val);
+ }
}
/* Then, change the order of the list, while getting subsidiary
coding-systems. */
val2 = val;
val = Qnil;
+ if (eol_type == CODING_EOL_INCONSISTENT)
+ eol_type == CODING_EOL_UNDECIDED;
for (; !NILP (val2); val2 = XCONS (val2)->cdr)
{
if (eol_type == CODING_EOL_UNDECIDED)
val = Fcons (XCONS (val2)->car, val);
else
{
- Lisp_Object val3 = Fget (XCONS (val2)->car, Qeol_type);
+ Lisp_Object val3;
+ val3 = Fget (XCONS (val2)->car, Qeol_type);
if (VECTORP (val3))
val = Fcons (XVECTOR (val3)->contents[eol_type], val);
else
case coding_type_no_conversion:
case coding_type_emacs_mule:
case coding_type_undecided:
+ case coding_type_raw_text:
/* We need no conversion. */
*begp = *endp;
return;
*begp = *endp;
return;
case coding_type_emacs_mule:
+ case coding_type_raw_text:
if (coding->eol_type == CODING_EOL_LF)
{
/* We need no conversion. */
{
CHECK_SYMBOL (coding_system, 0);
setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding);
+ /* We had better not send unexpected characters to terminal. */
+ terminal_coding.flags |= CODING_FLAG_ISO_SAFE;
+
+ return Qnil;
+}
+
+DEFUN ("set-safe-terminal-coding-system-internal",
+ Fset_safe_terminal_coding_system_internal,
+ Sset_safe_terminal_coding_system_internal, 1, 1, 0, "")
+ (coding_system)
+ Lisp_Object coding_system;
+{
+ CHECK_SYMBOL (coding_system, 0);
+ setup_coding_system (Fcheck_coding_system (coding_system),
+ &safe_terminal_coding);
return Qnil;
}
for (; CONSP (chain); chain = XCONS (chain)->cdr)
{
- Lisp_Object elt = XCONS (chain)->car;
+ Lisp_Object elt;
+ elt = XCONS (chain)->car;
if (CONSP (elt)
&& ((STRINGP (target)
return Qnil;
if (! NILP (Fcoding_system_p (val)))
return Fcons (val, val);
- if (!NILP (Fboundp (val)))
+ if (!NILP (Ffboundp (val)))
return call1 (val, Flist (nargs, args));
return Qnil;
}
setup_coding_system (Qnil, &keyboard_coding);
setup_coding_system (Qnil, &terminal_coding);
+ setup_coding_system (Qnil, &safe_terminal_coding);
#if defined (MSDOS) || defined (WINDOWSNT)
system_eol_type = CODING_EOL_CRLF;
Qtarget_idx = intern ("target-idx");
staticpro (&Qtarget_idx);
+ Qcoding_system_history = intern ("coding-system-history");
+ staticpro (&Qcoding_system_history);
+ Fset (Qcoding_system_history, Qnil);
+
/* Target FILENAME is the first argument. */
Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
/* Target FILENAME is the third argument. */
Qpre_write_conversion = intern ("pre-write-conversion");
staticpro (&Qpre_write_conversion);
+ Qno_conversion = intern ("no-conversion");
+ staticpro (&Qno_conversion);
+
+ Qundecided = intern ("undecided");
+ staticpro (&Qundecided);
+
Qcoding_system_spec = intern ("coding-system-spec");
staticpro (&Qcoding_system_spec);
defsubr (&Sdecode_big5_char);
defsubr (&Sencode_big5_char);
defsubr (&Sset_terminal_coding_system_internal);
+ defsubr (&Sset_safe_terminal_coding_system_internal);
defsubr (&Sterminal_coding_system);
defsubr (&Sset_keyboard_coding_system_internal);
defsubr (&Skeyboard_coding_system);
}
DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
- "A variable of internal use only.\n\
+ "Specify the coding system for read operations.\n\
+It is useful to bind this variable with `let', but do not set it globally.\n\
If the value is a coding system, it is used for decoding on read operation.\n\
-If not, an appropriate element in `coding-system-alist' (which see) is used.");
+If not, an appropriate element is used from one of the coding system alists:\n\
+There are three such tables, `file-coding-system-alist',\n\
+`process-coding-system-alist', and `network-coding-system-alist'.");
Vcoding_system_for_read = Qnil;
DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
- "A variable of internal use only.\n\
+ "Specify the coding system for write operations.\n\
+It is useful to bind this variable with `let', but do not set it globally.\n\
If the value is a coding system, it is used for encoding on write operation.\n\
-If not, an appropriate element in `coding-system-alist' (which see) is used.");
+If not, an appropriate element is used from one of the coding system alists:\n\
+There are three such tables, `file-coding-system-alist',\n\
+`process-coding-system-alist', and `network-coding-system-alist'.");
Vcoding_system_for_write = Qnil;
DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
- "Coding-system used in the latest file or process I/O.");
+ "Coding system used in the latest file or process I/O.");
Vlast_coding_system_used = Qnil;
DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
The car part is used for decoding a process output,\n\
the cdr part is used for encoding a text to be sent to a process.");
Vdefault_process_coding_system = Qnil;
+
+ DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
+ "Table of extra Latin codes in the range 128..159 (inclusive).\n\
+This is a vector of length 256.\n\
+If Nth element is non-nil, the existence of code N in a file\n\
+\(or output of subprocess) doesn't prevent it to be detected as\n\
+a coding system of ISO 2022 variant which has a flag\n\
+`accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file\n\
+or reading output of a subprocess.\n\
+Only 128th through 159th elements has a meaning.");
+ Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
}
#endif /* emacs */