/* Header for multilingual character handler.
- Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN.
+ Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
Licensed to the Free Software Foundation.
This file is part of GNU Emacs.
#define LEADING_CODE_PRIVATE_11 0x9A /* for private DIMENSION1 of 1-column */
#define LEADING_CODE_PRIVATE_12 0x9B /* for private DIMENSION1 of 2-column */
#define LEADING_CODE_PRIVATE_21 0x9C /* for private DIMENSION2 of 1-column */
-#define LEADING_CODE_PRIVATE_22 0x9D /* for private DIMENSION2o f 2-column */
+#define LEADING_CODE_PRIVATE_22 0x9D /* for private DIMENSION2 of 2-column */
/* Extended leading-code. */
/* Start of each extended leading-codes. */
extern int charset_big5_1; /* Big5 Level 1 (Chinese Traditional) */
extern int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */
-/* Check if STR points the head of multi-byte form, i.e. *STR is an
- ASCII character or a base leading-code. */
-#define CHAR_HEAD_P(str) ((unsigned char) *(str) < 0xA0)
+/* Check if CH is the head of multi-byte form, i.e.,
+ an ASCII character or a base leading-code. */
+#define CHAR_HEAD_P(ch) ((unsigned char) (ch) < 0xA0)
/*** GENERAL NOTE on CHARACTER REPRESENTATION ***
((MIN_CHARSET_PRIVATE_DIMENSION2 - 0xE0) << 14)
#define MIN_CHAR_COMPOSITION \
(0x1F << 14)
+#define MAX_CHAR_COMPOSITION GLYPH_MASK_CHAR
/* 1 if C is an ASCII character, else 0. */
#define SINGLE_BYTE_CHAR_P(c) ((c) < 0x100)
/* 1 if C is an composite character, else 0. */
#define COMPOSITE_CHAR_P(c) ((c) >= MIN_CHAR_COMPOSITION)
+/* 1 if BYTE is a character in itself, in multibyte mode. */
+#define ASCII_BYTE_P(byte) ((byte) < 0x80)
+
/* A char-table containing information of each character set.
Unlike ordinary char-tables, this doesn't contain any nested table.
? CHAR_FIELD1 (c) + 0x8F \
: ((c) < MIN_CHAR_COMPOSITION \
? CHAR_FIELD1 (c) + 0xE0 \
- : CHARSET_COMPOSITION))))
+ : ((c) <= MAX_CHAR_COMPOSITION \
+ ? CHARSET_COMPOSITION \
+ : CHARSET_ASCII)))))
/* Return charset at the place pointed by P. */
#define CHARSET_AT(p) \
? (c1) \
: MAKE_NON_ASCII_CHAR ((charset), (c1) & 0x7F, (c2) & 0x7F))
-/* 1 if C is in the range of possible character code Emacs can have. */
-#define VALID_CHAR_P(c) \
- ((c) >= 0 \
- && (SINGLE_BYTE_CHAR_P (c) \
- || ((c) < MIN_CHAR_COMPOSITION \
- ? ((c) & CHAR_FIELD1_MASK \
- ? (CHAR_FIELD2 (c) >= 32 && CHAR_FIELD3 (c) >= 32) \
- : (CHAR_FIELD2 (c) >= 16 && CHAR_FIELD3 (c) >= 32)) \
- : (c) < MIN_CHAR_COMPOSITION + n_cmpchars)))
+/* If GENERICP is nonzero, return nonzero iff C is a valid normal or
+ generic character. If GENERICP is zero, return nonzero iff C is a
+ valid normal character. */
+#define CHAR_VALID_P(c, genericp) \
+ ((c) >= 0 \
+ && (SINGLE_BYTE_CHAR_P (c) || char_valid_p (c, genericp)))
/* The charset of non-ASCII character C is stored in CHARSET, and the
position-codes of C are stored in C1 and C2.
#define SPLIT_STRING(str, len, charset, c1, c2) \
((BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) < 2 \
|| BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) > len \
- || split_non_ascii_string (str, len, &charset, &c1, &c2, 0) < 0) \
+ || split_non_ascii_string (str, len, &charset, &c1, &c2) < 0) \
? c1 = *(str), charset = CHARSET_ASCII \
: charset)
? (actual_len = 1), (unsigned char) *(str) \
: string_to_non_ascii_char (str, len, &actual_len))
+/* Fetch the "next" multibyte character from Lisp string STRING
+ at byte position BYTEIDX, character position CHARIDX.
+ Store it into OUTPUT.
+
+ All the args must be side-effect-free.
+ BYTEIDX and CHARIDX must be lvalues;
+ we increment them past the character fetched. */
+
+#define FETCH_STRING_CHAR_ADVANCE(OUTPUT, STRING, CHARIDX, BYTEIDX) \
+if (1) \
+ { \
+ unsigned char *fetch_string_char_ptr = &XSTRING (STRING)->data[BYTEIDX]; \
+ int fetch_string_char_space_left = XSTRING (STRING)->size_byte - BYTEIDX; \
+ int actual_len; \
+ \
+ OUTPUT \
+ = STRING_CHAR_AND_LENGTH (fetch_string_char_ptr, \
+ fetch_string_char_space_left, actual_len); \
+ \
+ BYTEIDX += actual_len; \
+ CHARIDX++; \
+ } \
+else
+
/* Return the length of the multi-byte form at string STR of length LEN. */
#define MULTIBYTE_FORM_LENGTH(str, len) \
range checking of POS. */
#define INC_POS(pos) \
do { \
- unsigned char *p = POS_ADDR (pos); \
+ unsigned char *p = BYTE_POS_ADDR (pos); \
pos++; \
if (*p++ >= 0x80) \
- while (!CHAR_HEAD_P (p)) p++, pos++; \
+ while (!CHAR_HEAD_P (*p)) p++, pos++; \
} while (0)
/* Decrease the buffer point POS of the current buffer to the previous
do { \
unsigned char *p, *p_min; \
int pos_saved = --pos; \
- if (pos < GPT) \
+ if (pos < GPT_BYTE) \
p = BEG_ADDR + pos - 1, p_min = BEG_ADDR; \
else \
p = BEG_ADDR + GAP_SIZE + pos - 1, p_min = GAP_END_ADDR; \
- while (p > p_min && !CHAR_HEAD_P (p)) p--, pos--; \
+ while (p > p_min && !CHAR_HEAD_P (*p)) p--, pos--; \
+ if (*p < 0x80 && pos != pos_saved) pos = pos_saved; \
+ } while (0)
+
+/* Increment both CHARPOS and BYTEPOS, each in the appropriate way. */
+
+#define INC_BOTH(charpos, bytepos) \
+do \
+ { \
+ (charpos)++; \
+ if (NILP (current_buffer->enable_multibyte_characters)) \
+ (bytepos)++; \
+ else \
+ INC_POS ((bytepos)); \
+ } \
+while (0)
+
+/* Decrement both CHARPOS and BYTEPOS, each in the appropriate way. */
+
+#define DEC_BOTH(charpos, bytepos) \
+do \
+ { \
+ (charpos)--; \
+ if (NILP (current_buffer->enable_multibyte_characters)) \
+ (bytepos)--; \
+ else \
+ DEC_POS ((bytepos)); \
+ } \
+while (0)
+
+/* Increase the buffer point POS of the current buffer to the next
+ character boundary. This macro relies on the fact that *GPT_ADDR
+ and *Z_ADDR are always accessible and the values are '\0'. No
+ range checking of POS. */
+#define BUF_INC_POS(buf, pos) \
+ do { \
+ unsigned char *p = BUF_BYTE_ADDRESS (buf, pos); \
+ pos++; \
+ if (*p++ >= 0x80) \
+ while (!CHAR_HEAD_P (*p)) p++, pos++; \
+ } while (0)
+
+/* Decrease the buffer point POS of the current buffer to the previous
+ character boundary. No range checking of POS. */
+#define BUF_DEC_POS(buf, pos) \
+ do { \
+ unsigned char *p, *p_min; \
+ int pos_saved = --pos; \
+ if (pos < BUF_GPT_BYTE (buf)) \
+ { \
+ p = BUF_BEG_ADDR (buf) + pos - 1; \
+ p_min = BUF_BEG_ADDR (buf); \
+ } \
+ else \
+ { \
+ p = BUF_BEG_ADDR (buf) + BUF_GAP_SIZE (buf) + pos - 1; \
+ p_min = BUF_GAP_END_ADDR (buf); \
+ } \
+ while (p > p_min && !CHAR_HEAD_P (*p)) p--, pos--; \
if (*p < 0x80 && pos != pos_saved) pos = pos_saved; \
} while (0)
/* Maximum character code currently used. */
#define MAX_CHAR (MIN_CHAR_COMPOSITION + n_cmpchars)
-extern int unify_char ();
+extern int unify_char P_ ((Lisp_Object, int, int, int, int));
+extern int split_non_ascii_string P_ ((unsigned char *, int, int *,
+ unsigned char *, unsigned char *));
+extern int string_to_non_ascii_char P_ ((unsigned char *, int, int *));
+extern int non_ascii_char_to_string P_ ((int, unsigned char *, unsigned char **));
+extern int multibyte_form_length P_ ((unsigned char *, int));
+extern int str_cmpchar_id P_ ((unsigned char *, int));
+extern int get_charset_id P_ ((Lisp_Object));
+extern int cmpchar_component P_ ((unsigned int, unsigned int));
+extern int find_charset_in_str P_ ((unsigned char *, int, int *, Lisp_Object));
+extern int strwidth P_ ((unsigned char *, int));
+
+extern Lisp_Object Vcharacter_unification_table_vector;
+#define UNIFICATION_ID_TABLE(id) \
+ (XCONS(XVECTOR(Vcharacter_unification_table_vector)->contents[(id)])->cdr)
+
+/* Copy LEN bytes from FROM to TO. This macro should be used only
+ when a caller knows that LEN is short and the obvious copy loop is
+ faster than calling bcopy which has some overhead. */
+
+#define BCOPY_SHORT(from, to, len) \
+ do { \
+ int i = len; \
+ unsigined char *from_p = from, *to_p = to; \
+ while (i--) *from_p++ = *to_p++; \
+ } while (0)
#endif /* _CHARSET_H */