Lisp_Object Vcoding_system_hash_table;
Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
-Lisp_Object Qunix, Qdos, Qmac;
+Lisp_Object Qunix, Qdos;
+extern Lisp_Object Qmac; /* frame.c */
Lisp_Object Qbuffer_file_coding_system;
Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
Lisp_Object Qdefault_char;
}
+/* Fixme: deal with surrogates? */
static void
decode_coding_utf_8 (coding)
struct coding_system *coding;
if (! UTF_8_EXTRA_OCTET_P (c2))
goto invalid_code;
if (UTF_8_2_OCTET_LEADING_P (c1))
- c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
+ {
+ c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
+ /* Reject overlong sequences here and below. Encoders
+ producing them are incorrect, they can be misleading,
+ and they mess up read/write invariance. */
+ if (c < 128)
+ goto invalid_code;
+ }
else
{
ONE_MORE_BYTE (c3);
if (! UTF_8_EXTRA_OCTET_P (c3))
goto invalid_code;
if (UTF_8_3_OCTET_LEADING_P (c1))
- c = (((c1 & 0xF) << 12)
- | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
+ {
+ c = (((c1 & 0xF) << 12)
+ | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
+ if (c < 0x800)
+ goto invalid_code;
+ }
else
{
ONE_MORE_BYTE (c4);
if (! UTF_8_EXTRA_OCTET_P (c4))
goto invalid_code;
if (UTF_8_4_OCTET_LEADING_P (c1))
+ {
c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
| ((c3 & 0x3F) << 6) | (c4 & 0x3F));
+ if (c < 0x10000)
+ goto invalid_code;
+ }
else
{
ONE_MORE_BYTE (c5);
c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
| ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
| (c5 & 0x3F));
- if (c > MAX_CHAR)
+ if ((c > MAX_CHAR) || (c < 0x200000))
goto invalid_code;
}
else
(CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
static void
-setup_iso_safe_charsets (Lisp_Object attrs)
+setup_iso_safe_charsets (attrs)
+ Lisp_Object attrs;
{
Lisp_Object charset_list, safe_charsets;
Lisp_Object request;
Lisp_Object val;
struct charset *charset;
int dim;
- unsigned code;
- int c1;
+ int len = 1;
+ unsigned code = c;
val = AREF (valids, c);
if (NILP (val))
{
charset = CHARSET_FROM_ID (XFASTINT (val));
dim = CHARSET_DIMENSION (charset);
- code = c;
- if (dim > 1)
+ while (len < dim)
{
- ONE_MORE_BYTE (c1);
- code = (code << 8) | c1;
- if (dim > 2)
- {
- ONE_MORE_BYTE (c1);
- code = (code << 8) | c1;
- if (dim > 3)
- {
- ONE_MORE_BYTE (c1);
- code = (c << 8) | c1;
- }
- }
+ ONE_MORE_BYTE (c);
+ code = (code << 8) | c;
+ len++;
}
CODING_DECODE_CHAR (coding, src, src_base, src_end,
charset, code, c);
/* VAL is a list of charset IDs. It is assured that the
list is sorted by charset dimensions (smaller one
comes first). */
- int b[4];
- int len = 1;
-
- b[0] = c;
- /* VAL is a list of charset IDs. */
while (CONSP (val))
{
charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
dim = CHARSET_DIMENSION (charset);
while (len < dim)
{
- ONE_MORE_BYTE (c1);
- b[len++] = c1;
+ ONE_MORE_BYTE (c);
+ code = (code << 8) | c;
+ len++;
}
- if (dim == 1)
- code = b[0];
- else if (dim == 2)
- code = (b[0] << 8) | b[1];
- else if (dim == 3)
- code = (b[0] << 16) | (b[1] << 8) | b[2];
- else
- code = (b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3];
CODING_DECODE_CHAR (coding, src, src_base,
src_end, charset, code, c);
if (c >= 0)
DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
Sset_coding_system_priority, 1, MANY, 0,
- doc: /* Assign higher priority to coding systems given as arguments.
+ doc: /* Assign higher priority to the coding systems given as arguments.
usage: (set-coding-system-priority CODING-SYSTEM ...) */)
(nargs, args)
int nargs;
DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
Scoding_system_priority_list, 0, 1, 0,
- doc: /* Return a list of coding systems ordered by their priorities. */)
+ doc: /* Return a list of coding systems ordered by their priorities.
+HIGHESTP non-nil means just return the highest priority one. */)
(highestp)
Lisp_Object highestp;
{
return Fnreverse (val);
}
+static char *suffixes[] = { "-unix", "-dos", "-mac" };
+
static Lisp_Object
make_subsidiaries (base)
Lisp_Object base;
{
Lisp_Object subsidiaries;
- char *suffixes[] = { "-unix", "-dos", "-mac" };
int base_name_len = STRING_BYTES (XSYMBOL (base)->name);
char *buf = (char *) alloca (base_name_len + 6);
int i;
{
dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
if (dim < dim2)
- tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
- else
tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
+ else
+ tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
}
else
{
make_number (nargs)));
}
+/* Fixme: should this record the alias relationships for
+ diagnostics? */
DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
Sdefine_coding_system_alias, 2, 2, 0,
doc: /* Define ALIAS as an alias for CODING-SYSTEM. */)
DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
1, 1, 0,
doc: /* Return the base of CODING-SYSTEM.
-Any alias or subsidiary coding systems are not base coding system. */)
+Any alias or subsidiary coding system is not a base coding system. */)
(coding_system)
Lisp_Object coding_system;
{
DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
1, 1, 0,
- doc: /* Return the list of aliases of CODING-SYSTEM.
-A base coding system is what made by `define-coding-system'.
-Any alias nor subsidiary coding systems are not base coding system. */)
+ doc: /* Return the list of aliases of CODING-SYSTEM. */)
(coding_system)
Lisp_Object coding_system;
{
if (NILP (coding_system))
coding_system = Qno_conversion;
CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
- return AREF (spec, 2);
+ return AREF (spec, 1);
}
DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
DEFSYM (Qeol_type, "eol-type");
DEFSYM (Qunix, "unix");
DEFSYM (Qdos, "dos");
- DEFSYM (Qmac, "mac");
DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
DEFSYM (Qpost_read_conversion, "post-read-conversion");