/* Header for multilingual character handler.
- Ver.1.0
- Copyright (C) 1995 Free Software Foundation, Inc.
- Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
+ Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
+ Licensed to the Free Software Foundation.
This file is part of GNU Emacs.
#define LEADING_CODE_PRIVATE_11 0x9A /* for private DIMENSION1 of 1-column */
#define LEADING_CODE_PRIVATE_12 0x9B /* for private DIMENSION1 of 2-column */
#define LEADING_CODE_PRIVATE_21 0x9C /* for private DIMENSION2 of 1-column */
-#define LEADING_CODE_PRIVATE_22 0x9D /* for private DIMENSION2o f 2-column */
+#define LEADING_CODE_PRIVATE_22 0x9D /* for private DIMENSION2 of 2-column */
/* Extended leading-code. */
/* Start of each extended leading-codes. */
#define MIN_CHARSET_PRIVATE_DIMENSION1 LEADING_CODE_EXT_11
#define MIN_CHARSET_PRIVATE_DIMENSION2 LEADING_CODE_EXT_21
+/* Maximum value of overall charset identification number. */
+#define MAX_CHARSET 0xFE
+
/* Definition of special charsets. */
#define CHARSET_ASCII 0
#define CHARSET_COMPOSITION 0x80
extern int charset_big5_1; /* Big5 Level 1 (Chinese Traditional) */
extern int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */
-/* Check if STR points the head of multi-byte form, i.e. *STR is an
- ASCII character or a base leading-code. */
-#define CHAR_HEAD_P(str) ((unsigned char) *(str) < 0xA0)
+/* Check if CH is the head of multi-byte form, i.e.,
+ an ASCII character or a base leading-code. */
+#define CHAR_HEAD_P(ch) ((unsigned char) (ch) < 0xA0)
/*** GENERAL NOTE on CHARACTER REPRESENTATION ***
((MIN_CHARSET_PRIVATE_DIMENSION2 - 0xE0) << 14)
#define MIN_CHAR_COMPOSITION \
(0x1F << 14)
+#define MAX_CHAR_COMPOSITION GLYPH_MASK_CHAR
/* 1 if C is an ASCII character, else 0. */
#define SINGLE_BYTE_CHAR_P(c) ((c) < 0x100)
/* 1 if C is an composite character, else 0. */
#define COMPOSITE_CHAR_P(c) ((c) >= MIN_CHAR_COMPOSITION)
+/* 1 if BYTE is a character in itself, in multibyte mode. */
+#define ASCII_BYTE_P(byte) ((byte) < 0x80)
+
/* A char-table containing information of each character set.
Unlike ordinary char-tables, this doesn't contain any nested table.
We provide these macros for efficiency. No range check of CHARSET. */
/* Return entry of CHARSET (lisp integer) in Vcharset_table. */
-#define CHARSET_TABLE_ENTRY(charset) \
- XCHAR_TABLE (Vcharset_table)->contents[charset]
+#define CHARSET_TABLE_ENTRY(charset) \
+ XCHAR_TABLE (Vcharset_table)->contents[((charset) == CHARSET_ASCII \
+ ? 0 : (charset) + 128)]
/* Return information INFO-IDX of CHARSET. */
#define CHARSET_TABLE_INFO(charset, info_idx) \
#define CHARSET_VALID_P(charset) \
((charset) == 0 \
|| ((charset) >= 0x80 && (charset) <= MAX_CHARSET_OFFICIAL_DIMENSION2) \
- || ((charset) >= MIN_CHARSET_PRIVATE_DIMENSION1 && (charset) < MAX_CHARSET))
+ || ((charset) >= MIN_CHARSET_PRIVATE_DIMENSION1 && (charset) <= MAX_CHARSET))
/* 1 if CHARSET is already defined, else 0. */
#define CHARSET_DEFINED_P(charset) \
- (((charset) >= 0) && ((charset) < MAX_CHARSET) \
+ (((charset) >= 0) && ((charset) <= MAX_CHARSET) \
&& !NILP (CHARSET_TABLE_ENTRY (charset)))
/* Since the information CHARSET-BYTES and CHARSET-WIDTH of
? CHAR_FIELD1 (c) + 0x8F \
: ((c) < MIN_CHAR_COMPOSITION \
? CHAR_FIELD1 (c) + 0xE0 \
- : CHARSET_COMPOSITION))))
+ : ((c) <= MAX_CHAR_COMPOSITION \
+ ? CHARSET_COMPOSITION \
+ : CHARSET_ASCII)))))
/* Return charset at the place pointed by P. */
#define CHARSET_AT(p) \
? (c1) \
: MAKE_NON_ASCII_CHAR ((charset), (c1) & 0x7F, (c2) & 0x7F))
-/* The charset of non-ASCII character C is set to CHARSET, and the
- position-codes of C are set to C1 and C2. C2 of DIMENSION1 character
- is 0. */
+/* If GENERICP is nonzero, return nonzero iff C is a valid normal or
+ generic character. If GENERICP is zero, return nonzero iff C is a
+ valid normal character. */
+#define CHAR_VALID_P(c, genericp) \
+ ((c) >= 0 \
+ && (SINGLE_BYTE_CHAR_P (c) || char_valid_p (c, genericp)))
+
+/* The charset of non-ASCII character C is stored in CHARSET, and the
+ position-codes of C are stored in C1 and C2.
+ We store -1 in C2 if the character is just 2 bytes.
+
+ Do not use this macro for an ASCII character. */
+
#define SPLIT_NON_ASCII_CHAR(c, charset, c1, c2) \
((c) < MIN_CHAR_OFFICIAL_DIMENSION2 \
? (charset = CHAR_FIELD2 (c) + 0x70, \
c1 = CHAR_FIELD3 (c), \
- c2 = 0) \
+ c2 = -1) \
: (charset = ((c) < MIN_CHAR_COMPOSITION \
? (CHAR_FIELD1 (c) \
+ ((c) < MIN_CHAR_PRIVATE_DIMENSION2 ? 0x8F : 0xE0)) \
c1 = CHAR_FIELD2 (c), \
c2 = CHAR_FIELD3 (c)))
-/* The charset of character C is set to CHARSET, and the
- position-codes of C are set to C1 and C2. C2 of DIMENSION1 character
- is 0. */
+/* The charset of character C is stored in CHARSET, and the
+ position-codes of C are stored in C1 and C2.
+ We store -1 in C2 if the character is just 2 bytes. */
+
#define SPLIT_CHAR(c, charset, c1, c2) \
(SINGLE_BYTE_CHAR_P (c) \
- ? charset = CHARSET_ASCII, c1 = (c), c2 = 0 \
+ ? charset = CHARSET_ASCII, c1 = (c), c2 = -1 \
: SPLIT_NON_ASCII_CHAR (c, charset, c1, c2))
-/* The charset of the character at STR is set to CHARSET, and the
- position-codes are set to C1 and C2. C2 of DIMENSION1 character is 0.
+/* The charset of the character at STR is stored in CHARSET, and the
+ position-codes are stored in C1 and C2.
+ We store -1 in C2 if the character is just 2 bytes.
+
If the character is a composite character, the upper 7-bit and
lower 7-bit of CMPCHAR-ID are set in C1 and C2 respectively. No
range checking. */
+
#define SPLIT_STRING(str, len, charset, c1, c2) \
((BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) < 2 \
|| BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) > len \
- || split_non_ascii_string (str, len, &charset, &c1, &c2, 0) < 0) \
+ || split_non_ascii_string (str, len, &charset, &c1, &c2) < 0) \
? c1 = *(str), charset = CHARSET_ASCII \
: charset)
-#define MAX_CHARSET 0xFF
-
/* Mapping table from ISO2022's charset (specified by DIMENSION,
CHARS, and FINAL_CHAR) to Emacs' charset. Should be accessed by
macro ISO_CHARSET_TABLE (DIMENSION, CHARS, FINAL_CHAR). */
is not a composite character, the multi-byte form is set in WORKBUF
and STR points WORKBUF. The caller should allocate at least 4-byte
area at WORKBUF in advance. Returns the length of the multi-byte
- form. */
+ form. If C is an invalid character code, signal an error. */
#define CHAR_STRING(c, workbuf, str) \
(SINGLE_BYTE_CHAR_P (c) \
? (actual_len = 1), (unsigned char) *(str) \
: string_to_non_ascii_char (str, len, &actual_len))
+/* Fetch the "next" multibyte character from Lisp string STRING
+ at byte position BYTEIDX, character position CHARIDX.
+ Store it into OUTPUT.
+
+ All the args must be side-effect-free.
+ BYTEIDX and CHARIDX must be lvalues;
+ we increment them past the character fetched. */
+
+#define FETCH_STRING_CHAR_ADVANCE(OUTPUT, STRING, CHARIDX, BYTEIDX) \
+if (1) \
+ { \
+ unsigned char *fetch_string_char_ptr = &XSTRING (STRING)->data[BYTEIDX]; \
+ int fetch_string_char_space_left = XSTRING (STRING)->size_byte - BYTEIDX; \
+ int actual_len; \
+ \
+ OUTPUT \
+ = STRING_CHAR_AND_LENGTH (fetch_string_char_ptr, \
+ fetch_string_char_space_left, actual_len); \
+ \
+ BYTEIDX += actual_len; \
+ CHARIDX++; \
+ } \
+else
+
/* Return the length of the multi-byte form at string STR of length LEN. */
#define MULTIBYTE_FORM_LENGTH(str, len) \
character boundary. This macro relies on the fact that *GPT_ADDR
and *Z_ADDR are always accessible and the values are '\0'. No
range checking of POS. */
-#define INC_POS(pos) \
- do { \
- unsigned char *p = POS_ADDR (pos) + 1; \
- pos++; \
- while (!CHAR_HEAD_P (p)) p++, pos++; \
+#define INC_POS(pos) \
+ do { \
+ unsigned char *p = BYTE_POS_ADDR (pos); \
+ pos++; \
+ if (*p++ >= 0x80) \
+ while (!CHAR_HEAD_P (*p)) p++, pos++; \
} while (0)
/* Decrease the buffer point POS of the current buffer to the previous
#define DEC_POS(pos) \
do { \
unsigned char *p, *p_min; \
- if (--pos < GPT) \
+ int pos_saved = --pos; \
+ if (pos < GPT_BYTE) \
p = BEG_ADDR + pos - 1, p_min = BEG_ADDR; \
else \
p = BEG_ADDR + GAP_SIZE + pos - 1, p_min = GAP_END_ADDR; \
- while (p > p_min && !CHAR_HEAD_P (p)) p--, pos--; \
+ while (p > p_min && !CHAR_HEAD_P (*p)) p--, pos--; \
+ if (*p < 0x80 && pos != pos_saved) pos = pos_saved; \
+ } while (0)
+
+/* Increment both CHARPOS and BYTEPOS, each in the appropriate way. */
+
+#define INC_BOTH(charpos, bytepos) \
+do \
+ { \
+ (charpos)++; \
+ if (NILP (current_buffer->enable_multibyte_characters)) \
+ (bytepos)++; \
+ else \
+ INC_POS ((bytepos)); \
+ } \
+while (0)
+
+/* Decrement both CHARPOS and BYTEPOS, each in the appropriate way. */
+
+#define DEC_BOTH(charpos, bytepos) \
+do \
+ { \
+ (charpos)--; \
+ if (NILP (current_buffer->enable_multibyte_characters)) \
+ (bytepos)--; \
+ else \
+ DEC_POS ((bytepos)); \
+ } \
+while (0)
+
+/* Increase the buffer point POS of the current buffer to the next
+ character boundary. This macro relies on the fact that *GPT_ADDR
+ and *Z_ADDR are always accessible and the values are '\0'. No
+ range checking of POS. */
+#define BUF_INC_POS(buf, pos) \
+ do { \
+ unsigned char *p = BUF_BYTE_ADDRESS (buf, pos); \
+ pos++; \
+ if (*p++ >= 0x80) \
+ while (!CHAR_HEAD_P (*p)) p++, pos++; \
+ } while (0)
+
+/* Decrease the buffer point POS of the current buffer to the previous
+ character boundary. No range checking of POS. */
+#define BUF_DEC_POS(buf, pos) \
+ do { \
+ unsigned char *p, *p_min; \
+ int pos_saved = --pos; \
+ if (pos < BUF_GPT_BYTE (buf)) \
+ { \
+ p = BUF_BEG_ADDR (buf) + pos - 1; \
+ p_min = BUF_BEG_ADDR (buf); \
+ } \
+ else \
+ { \
+ p = BUF_BEG_ADDR (buf) + BUF_GAP_SIZE (buf) + pos - 1; \
+ p_min = BUF_GAP_END_ADDR (buf); \
+ } \
+ while (p > p_min && !CHAR_HEAD_P (*p)) p--, pos--; \
+ if (*p < 0x80 && pos != pos_saved) pos = pos_saved; \
} while (0)
#endif /* emacs */
/* This is the maximum length of multi-byte form. */
#define MAX_LENGTH_OF_MULTI_BYTE_FORM (MAX_COMPONENT_COUNT * 6)
-#endif /* _CHARSET_H */
+/* Maximum character code currently used. */
+#define MAX_CHAR (MIN_CHAR_COMPOSITION + n_cmpchars)
+
+extern int unify_char P_ ((Lisp_Object, int, int, int, int));
+extern int split_non_ascii_string P_ ((unsigned char *, int, int *,
+ unsigned char *, unsigned char *));
+extern int string_to_non_ascii_char P_ ((unsigned char *, int, int *));
+extern int non_ascii_char_to_string P_ ((int, unsigned char *, unsigned char **));
+extern int multibyte_form_length P_ ((unsigned char *, int));
+extern int str_cmpchar_id P_ ((unsigned char *, int));
+extern int get_charset_id P_ ((Lisp_Object));
+extern int cmpchar_component P_ ((unsigned int, unsigned int));
+extern int find_charset_in_str P_ ((unsigned char *, int, int *, Lisp_Object));
+extern int strwidth P_ ((unsigned char *, int));
+
+extern Lisp_Object Vcharacter_unification_table_vector;
+#define UNIFICATION_ID_TABLE(id) \
+ (XCONS(XVECTOR(Vcharacter_unification_table_vector)->contents[(id)])->cdr)
+
+/* Copy LEN bytes from FROM to TO. This macro should be used only
+ when a caller knows that LEN is short and the obvious copy loop is
+ faster than calling bcopy which has some overhead. */
+
+#define BCOPY_SHORT(from, to, len) \
+ do { \
+ int i = len; \
+ unsigined char *from_p = from, *to_p = to; \
+ while (i--) *from_p++ = *to_p++; \
+ } while (0)
+#endif /* _CHARSET_H */