#ifndef EMACS_CHARACTER_H
#define EMACS_CHARACTER_H
+#include <verify.h>
+
/* character code 1st byte byte sequence
-------------- -------- -------------
0-7F 00..7F 0xxxxxxx
#define CHAR_TO_BYTE8(c) \
(CHAR_BYTE8_P (c) \
? (c) - 0x3FFF00 \
- : multibyte_char_to_unibyte (c, Qnil))
+ : multibyte_char_to_unibyte (c))
/* Return the raw 8-bit byte for character C,
or -1 if C doesn't correspond to a byte. */
#define make_char(c) make_number (c)
/* Nonzero iff C is an ASCII byte. */
-#define ASCII_BYTE_P(c) ((unsigned) (c) < 0x80)
+#define ASCII_BYTE_P(c) UNSIGNED_CMP (c, <, 0x80)
/* Nonzero iff X is a character. */
#define CHARACTERP(x) (NATNUMP (x) && XFASTINT (x) <= MAX_CHAR)
-/* Nonzero iff C is valid as a character code. GENERICP is not used. */
-#define CHAR_VALID_P(c, genericp) ((unsigned) (c) <= MAX_CHAR)
+/* Nonzero iff C is valid as a character code. */
+#define CHAR_VALID_P(c) UNSIGNED_CMP (c, <=, MAX_CHAR)
/* Check if Lisp object X is a character or not. */
#define CHECK_CHARACTER(x) \
} while (0)
/* Nonzero iff C is a character of code less than 0x100. */
-#define SINGLE_BYTE_CHAR_P(c) ((unsigned) (c) < 0x100)
+#define SINGLE_BYTE_CHAR_P(c) UNSIGNED_CMP (c, <, 0x100)
/* Nonzero if character C has a printable glyph. */
#define CHAR_PRINTABLE_P(c) \
Returns the length of the multibyte form. */
#define CHAR_STRING(c, p) \
- ((unsigned) (c) <= MAX_1_BYTE_CHAR \
+ (UNSIGNED_CMP (c, <=, MAX_1_BYTE_CHAR) \
? ((p)[0] = (c), \
1) \
- : (unsigned) (c) <= MAX_2_BYTE_CHAR \
+ : UNSIGNED_CMP (c, <=, MAX_2_BYTE_CHAR) \
? ((p)[0] = (0xC0 | ((c) >> 6)), \
(p)[1] = (0x80 | ((c) & 0x3F)), \
2) \
- : (unsigned) (c) <= MAX_3_BYTE_CHAR \
+ : UNSIGNED_CMP (c, <=, MAX_3_BYTE_CHAR) \
? ((p)[0] = (0xE0 | ((c) >> 12)), \
(p)[1] = (0x80 | (((c) >> 6) & 0x3F)), \
(p)[2] = (0x80 | ((c) & 0x3F)), \
3) \
- : char_string ((unsigned) c, p))
+ : verify_expr (sizeof (c) <= sizeof (unsigned), char_string (c, p)))
/* Store multibyte form of byte B in P. The caller should allocate at
least MAX_MULTIBYTE_LENGTH bytes area at P in advance. Returns the
*(p)++ = (0x80 | (((c) >> 6) & 0x3F)), \
*(p)++ = (0x80 | ((c) & 0x3F)); \
else \
- (p) += char_string ((c), (p)); \
+ { \
+ verify (sizeof (c) <= sizeof (unsigned)); \
+ (p) += char_string (c, p); \
+ } \
} while (0)
do { \
if ((p) > (limit)) \
{ \
- const unsigned char *pcb = (p); \
+ const unsigned char *chp = (p); \
do { \
- pcb--; \
- } while (pcb >= limit && ! CHAR_HEAD_P (*pcb)); \
- (p) = (BYTES_BY_CHAR_HEAD (*pcb) == (p) - pcb) ? pcb : (p) - 1; \
+ chp--; \
+ } while (chp >= limit && ! CHAR_HEAD_P (*chp)); \
+ (p) = (BYTES_BY_CHAR_HEAD (*chp) == (p) - chp) ? chp : (p) - 1; \
} \
} while (0)
CHARIDX++; \
if (STRING_MULTIBYTE (STRING)) \
{ \
- unsigned char *string_ptr = &SDATA (STRING)[BYTEIDX]; \
- int string_len; \
+ unsigned char *chp = &SDATA (STRING)[BYTEIDX]; \
+ int chlen; \
\
- OUTPUT = STRING_CHAR_AND_LENGTH (string_ptr, string_len); \
- BYTEIDX += string_len; \
+ OUTPUT = STRING_CHAR_AND_LENGTH (chp, chlen); \
+ BYTEIDX += chlen; \
} \
else \
{ \
CHARIDX++; \
if (STRING_MULTIBYTE (STRING)) \
{ \
- unsigned char *ptr = &SDATA (STRING)[BYTEIDX]; \
- int ptrlen; \
+ unsigned char *chp = &SDATA (STRING)[BYTEIDX]; \
+ int chlen; \
\
- OUTPUT = STRING_CHAR_AND_LENGTH (ptr, ptrlen); \
- BYTEIDX += ptrlen; \
+ OUTPUT = STRING_CHAR_AND_LENGTH (chp, chlen); \
+ BYTEIDX += chlen; \
} \
else \
{ \
CHARIDX++; \
if (!NILP (BVAR (current_buffer, enable_multibyte_characters))) \
{ \
- unsigned char *ptr = BYTE_POS_ADDR (BYTEIDX); \
- int string_len; \
+ unsigned char *chp = BYTE_POS_ADDR (BYTEIDX); \
+ int chlen; \
\
- OUTPUT= STRING_CHAR_AND_LENGTH (ptr, string_len); \
- BYTEIDX += string_len; \
+ OUTPUT= STRING_CHAR_AND_LENGTH (chp, chlen); \
+ BYTEIDX += chlen; \
} \
else \
{ \
#define FETCH_CHAR_ADVANCE_NO_CHECK(OUTPUT, CHARIDX, BYTEIDX) \
do \
{ \
- unsigned char *ptr = BYTE_POS_ADDR (BYTEIDX); \
- int len; \
+ unsigned char *chp = BYTE_POS_ADDR (BYTEIDX); \
+ int chlen; \
\
- OUTPUT = STRING_CHAR_AND_LENGTH (ptr, len); \
- BYTEIDX += len; \
+ OUTPUT = STRING_CHAR_AND_LENGTH (chp, chlen); \
+ BYTEIDX += chlen; \
CHARIDX++; \
} \
while (0)
#define INC_POS(pos_byte) \
do { \
- unsigned char *ptr = BYTE_POS_ADDR (pos_byte); \
- pos_byte += BYTES_BY_CHAR_HEAD (*ptr); \
+ unsigned char *chp = BYTE_POS_ADDR (pos_byte); \
+ pos_byte += BYTES_BY_CHAR_HEAD (*chp); \
} while (0)
#define DEC_POS(pos_byte) \
do { \
- unsigned char *ptr; \
+ unsigned char *chp; \
\
pos_byte--; \
if (pos_byte < GPT_BYTE) \
- ptr = BEG_ADDR + pos_byte - BEG_BYTE; \
+ chp = BEG_ADDR + pos_byte - BEG_BYTE; \
else \
- ptr = BEG_ADDR + GAP_SIZE + pos_byte - BEG_BYTE; \
- while (!CHAR_HEAD_P (*ptr)) \
+ chp = BEG_ADDR + GAP_SIZE + pos_byte - BEG_BYTE; \
+ while (!CHAR_HEAD_P (*chp)) \
{ \
- ptr--; \
+ chp--; \
pos_byte--; \
} \
} while (0)
#define BUF_INC_POS(buf, pos_byte) \
do { \
- unsigned char *bbp = BUF_BYTE_ADDRESS (buf, pos_byte); \
- pos_byte += BYTES_BY_CHAR_HEAD (*bbp); \
+ unsigned char *chp = BUF_BYTE_ADDRESS (buf, pos_byte); \
+ pos_byte += BYTES_BY_CHAR_HEAD (*chp); \
} while (0)
#define BUF_DEC_POS(buf, pos_byte) \
do { \
- unsigned char *p; \
+ unsigned char *chp; \
pos_byte--; \
if (pos_byte < BUF_GPT_BYTE (buf)) \
- p = BUF_BEG_ADDR (buf) + pos_byte - BEG_BYTE; \
+ chp = BUF_BEG_ADDR (buf) + pos_byte - BEG_BYTE; \
else \
- p = BUF_BEG_ADDR (buf) + BUF_GAP_SIZE (buf) + pos_byte - BEG_BYTE;\
- while (!CHAR_HEAD_P (*p)) \
+ chp = BUF_BEG_ADDR (buf) + BUF_GAP_SIZE (buf) + pos_byte - BEG_BYTE;\
+ while (!CHAR_HEAD_P (*chp)) \
{ \
- p--; \
+ chp--; \
pos_byte--; \
} \
} while (0)
Lisp_Object val; \
val = CHAR_TABLE_REF (Vchar_unify_table, c); \
if (INTEGERP (val)) \
- c = XINT (val); \
+ c = XFASTINT (val); \
else if (! NILP (val)) \
c = maybe_unify_char (c, val); \
} \
: (c) <= 0xDFFF ? 2 \
: 0)
+/* Data type for Unicode general category.
+
+ The order of members must be in sync with the 8th element of the
+ member of unidata-prop-alist (in admin/unidata/unidata-getn.el) for
+ Unicode character property `general-category'. */
+
+typedef enum {
+ UNICODE_CATEGORY_UNKNOWN = 0,
+ UNICODE_CATEGORY_Lu,
+ UNICODE_CATEGORY_Ll,
+ UNICODE_CATEGORY_Lt,
+ UNICODE_CATEGORY_Lm,
+ UNICODE_CATEGORY_Lo,
+ UNICODE_CATEGORY_Mn,
+ UNICODE_CATEGORY_Mc,
+ UNICODE_CATEGORY_Me,
+ UNICODE_CATEGORY_Nd,
+ UNICODE_CATEGORY_Nl,
+ UNICODE_CATEGORY_No,
+ UNICODE_CATEGORY_Pc,
+ UNICODE_CATEGORY_Pd,
+ UNICODE_CATEGORY_Ps,
+ UNICODE_CATEGORY_Pe,
+ UNICODE_CATEGORY_Pi,
+ UNICODE_CATEGORY_Pf,
+ UNICODE_CATEGORY_Po,
+ UNICODE_CATEGORY_Sm,
+ UNICODE_CATEGORY_Sc,
+ UNICODE_CATEGORY_Sk,
+ UNICODE_CATEGORY_So,
+ UNICODE_CATEGORY_Zs,
+ UNICODE_CATEGORY_Zl,
+ UNICODE_CATEGORY_Zp,
+ UNICODE_CATEGORY_Cc,
+ UNICODE_CATEGORY_Cf,
+ UNICODE_CATEGORY_Cs,
+ UNICODE_CATEGORY_Co,
+ UNICODE_CATEGORY_Cn
+} unicode_category_t;
extern int char_resolve_modifier_mask (int);
extern int char_string (unsigned, unsigned char *);
extern int char_printable_p (int c);
extern void parse_str_as_multibyte (const unsigned char *,
EMACS_INT, EMACS_INT *, EMACS_INT *);
-extern EMACS_INT parse_str_to_multibyte (const unsigned char *, EMACS_INT);
+extern EMACS_INT count_size_as_multibyte (const unsigned char *, EMACS_INT);
extern EMACS_INT str_as_multibyte (unsigned char *, EMACS_INT, EMACS_INT,
EMACS_INT *);
extern EMACS_INT str_to_multibyte (unsigned char *, EMACS_INT, EMACS_INT);
extern EMACS_INT strwidth (const char *, EMACS_INT);
extern EMACS_INT c_string_width (const unsigned char *, EMACS_INT, int,
EMACS_INT *, EMACS_INT *);
-extern EMACS_INT lisp_string_width (Lisp_Object, int,
+extern EMACS_INT lisp_string_width (Lisp_Object, EMACS_INT,
EMACS_INT *, EMACS_INT *);
-extern Lisp_Object Qcharacterp, Qauto_fill_chars;
+extern Lisp_Object Qcharacterp;
extern Lisp_Object Vchar_unify_table;
extern Lisp_Object string_escape_byte8 (Lisp_Object);