unsigned char *_fetch_multibyte_char_p;
int _fetch_multibyte_char_len;
+#define min(X, Y) ((X) < (Y) ? (X) : (Y))
+#define max(X, Y) ((X) > (Y) ? (X) : (Y))
+\f
/* Set STR a pointer to the multi-byte form of the character C. If C
is not a composite character, the multi-byte form is set in WORKBUF
and STR points WORKBUF. The caller should allocate at least 4-byte
}
else
{
- error ("Invalid characer: %d", c);
+ error ("Invalid character: %d", c);
}
}
|| ! CHARSET_DEFINED_P (charset)
|| c1 >= 0 && c1 < 32
|| c2 >= 0 && c2 < 32)
- error ("Invalid characer: %d", c);
+ error ("Invalid character: %d", c);
*str = workbuf;
*workbuf++ = CHARSET_LEADING_CODE_BASE (charset);
int len, *charsets;
Lisp_Object table;
{
- int num = 0;
+ register int num = 0, c;
if (! CHAR_TABLE_P (table))
table = Qnil;
while (len > 0)
{
- int bytes = BYTES_BY_CHAR_HEAD (*str);
- int charset;
+ int bytes, charset;
+ c = *str;
- if (NILP (table))
- charset = CHARSET_AT (str);
- else
+ if (c == LEADING_CODE_COMPOSITION)
{
- int c, charset;
- unsigned char c1, c2;
+ int cmpchar_id = str_cmpchar_id (str, len);
+ GLYPH *glyph;
- SPLIT_STRING(str, bytes, charset, c1, c2);
- if ((c = unify_char (table, -1, charset, c1, c2)) >= 0)
- charset = CHAR_CHARSET (c);
+ if (cmpchar_id > 0)
+ {
+ struct cmpchar_info *cmpcharp = cmpchar_table[cmpchar_id];
+ int i;
+
+ for (i = 0; i < cmpcharp->glyph_len; i++)
+ {
+ c = cmpcharp->glyph[i];
+ if (!NILP (table))
+ {
+ if ((c = unify_char (table, c, 0, 0, 0)) < 0)
+ c = cmpcharp->glyph[i];
+ }
+ if ((charset = CHAR_CHARSET (c)) < 0)
+ charset = CHARSET_ASCII;
+ if (!charsets[charset])
+ {
+ charsets[charset] = 1;
+ num += 1;
+ }
+ }
+ str += cmpcharp->len;
+ len -= cmpcharp->len;
+ continue;
+ }
+
+ charset = CHARSET_ASCII;
+ bytes = 1;
+ }
+ else
+ {
+ c = STRING_CHAR_AND_LENGTH (str, len, bytes);
+ if (! NILP (table))
+ {
+ int c1 = unify_char (table, c, 0, 0, 0);
+ if (c1 >= 0)
+ c = c1;
+ }
+ charset = CHAR_CHARSET (c);
}
if (!charsets[charset])
return CHARSET_SYMBOL (charset);
}
+/* If GENERICP is nonzero, return nonzero iff C is a valid normal or
+ generic character. If GENERICP is zero, return nonzero iff C is a
+ valid normal character. Do not call this function directly,
+ instead use macro CHAR_VALID_P. */
+int
+char_valid_p (c, genericp)
+ int c, genericp;
+{
+ int charset, c1, c2;
+
+ if (c < 0)
+ return 0;
+ if (SINGLE_BYTE_CHAR_P (c))
+ return 1;
+ SPLIT_NON_ASCII_CHAR (c, charset, c1, c2);
+ if (!CHARSET_VALID_P (charset))
+ return 0;
+ return (c < MIN_CHAR_COMPOSITION
+ ? ((c & CHAR_FIELD1_MASK) /* i.e. dimension of C is two. */
+ ? (genericp && c1 == 0 && c2 == 0
+ || c1 >= 32 && c2 >= 32)
+ : (genericp && c1 == 0
+ || c1 >= 32))
+ : c < MIN_CHAR_COMPOSITION + n_cmpchars);
+}
+
+DEFUN ("char-valid-p", Fchar_valid_p, Schar_valid_p, 1, 2, 0,
+ "Return t if OBJECT is a valid normal character.
+If optional arg GENERICP is non-nil, also return t if OBJECT is
+a valid generic character.")
+ (object, genericp)
+ Lisp_Object object, genericp;
+{
+ if (! NATNUMP (object))
+ return Qnil;
+ return (CHAR_VALID_P (XFASTINT (object), !NILP (genericp)) ? Qt : Qnil);
+}
+
DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
"Return byte length of multi-byte form of CHAR.")
(ch)
{
Lisp_Object val, disp;
int c;
+ struct Lisp_Char_Table *dp = buffer_display_table ();
CHECK_NUMBER (ch, 0);
c = XINT (ch);
/* Get the way the display table would display it. */
- disp = DISP_CHAR_VECTOR (buffer_display_table (current_buffer), (c));
+ disp = dp ? DISP_CHAR_VECTOR (dp, c) : Qnil;
if (VECTORP (disp))
XSETINT (val, XVECTOR (disp)->size);
{
unsigned char *endp = str + len;
int width = 0;
- struct Lisp_Char_Table *dp = buffer_display_table (current_buffer);
+ struct Lisp_Char_Table *dp = buffer_display_table ();
while (str < endp)
{
DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
"Return width of STRING when displayed in the current buffer.\n\
Width is measured by how many columns it occupies on the screen.\n\
-When calculating width of a multi-byte character in STRING,\n\
- only the base leading-code is considered and the validity of\n\
- the following bytes are not checked.")
+When calculating width of a multibyte character in STRING,\n\
+only the base leading-code is considered; the validity of\n\
+the following bytes is not checked.")
(str)
Lisp_Object str;
{
}
DEFUN ("chars-in-string", Fchars_in_string, Schars_in_string, 1, 1, 0,
- "Return number of characters in STRING.")
+ "Return number of characters in STRING.\n\
+When using multibyte characters, this is not the necessarily same as\n\
+the length of STRING; the length counts a multibyte characters as\n\
+several bytes, but this function counts a multibyte character as one\n\
+character.")
(str)
Lisp_Object str;
{
CHECK_STRING (str, 0);
+ if (NILP (current_buffer->enable_multibyte_characters))
+ return make_number (XSTRING (str)->size);
+
p = XSTRING (str)->data; endp = p + XSTRING (str)->size;
chars = 0;
while (p < endp)
return val;
}
+DEFUN ("chars-in-region", Fchars_in_region, Schars_in_region, 2, 2, 0,
+ "Return number of characters between BEG and END.\n\
+When using multibyte characters, this is not the necessarily same\n\
+as (- END BEG); that subtraction gives you the number of bytes, which\n\
+may be more than the number of characters.")
+ (beg, end)
+ Lisp_Object beg, end;
+{
+ int from, to, stop;
+ Lisp_Object val;
+ int chars = 0;
+ unsigned char *p, *endp;
+
+ validate_region (&beg, &end);
+
+ from = min (XFASTINT (beg), XFASTINT (end));
+ to = max (XFASTINT (beg), XFASTINT (end));
+ p = POS_ADDR (from);
+
+ if (NILP (current_buffer->enable_multibyte_characters))
+ return make_number (to - from);
+
+ if (from < GPT && GPT <= to)
+ {
+ stop = GPT;
+ endp = GPT_ADDR;
+ }
+ else
+ {
+ stop = to;
+ endp = POS_ADDR (stop);
+ }
+
+ while (1)
+ {
+ if (p >= endp)
+ {
+ if (stop >= to)
+ break;
+
+ p = POS_ADDR (stop);
+ stop = to;
+ endp = POS_ADDR (stop);
+ }
+
+ if (*p == LEADING_CODE_COMPOSITION)
+ {
+ p++;
+ while (p < endp && ! CHAR_HEAD_P (p)) p++;
+ }
+ else
+ p += BYTES_BY_CHAR_HEAD (*p);
+
+ chars++;
+ }
+
+ return make_number (chars);
+}
+
DEFUN ("char-boundary-p", Fchar_boundary_p, Schar_boundary_p, 1, 1, 0,
"Return non-nil value if POS is at character boundary of multibyte form.\n\
-The return value is:\n\
+When the value is non-nil, it contains some additional information:\n\
0 if POS is at an ASCII character or at the end of range,\n\
- 1 if POS is at a head of 2-byte length multi-byte form,\n\
- 2 if POS is at a head of 3-byte length multi-byte form,\n\
- 3 if POS is at a head of 4-byte length multi-byte form,\n\
- 4 if POS is at a head of multi-byte form of a composite character.\n\
+ 1 if POS is before a 2-byte length multi-byte form,\n\
+ 2 if POS is before a 3-byte length multi-byte form,\n\
+ 3 if POS is before a 4-byte length multi-byte form,\n\
+ 4 if POS is before a composite character.\n\
If POS is out of range or not at character boundary, return NIL.")
(pos)
Lisp_Object pos;
defsubr (&Ssplit_char);
defsubr (&Schar_charset);
defsubr (&Siso_charset);
+ defsubr (&Schar_valid_p);
defsubr (&Schar_bytes);
defsubr (&Schar_width);
defsubr (&Sstring_width);
defsubr (&Schar_direction);
defsubr (&Schars_in_string);
+ defsubr (&Schars_in_region);
defsubr (&Schar_boundary_p);
defsubr (&Sconcat_chars);
defsubr (&Scmpcharp);