(try_window_id): Use DEC_POS only for multibyte buffer.
[bpt/emacs.git] / src / charset.h
index d4bd00f..290ee11 100644 (file)
@@ -1,5 +1,5 @@
 /* Header for multilingual character handler.
-   Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN.
+   Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
    Licensed to the Free Software Foundation.
 
 This file is part of GNU Emacs.
@@ -98,7 +98,7 @@ Boston, MA 02111-1307, USA.  */
 #define LEADING_CODE_PRIVATE_11        0x9A /* for private DIMENSION1 of 1-column */
 #define LEADING_CODE_PRIVATE_12        0x9B /* for private DIMENSION1 of 2-column */
 #define LEADING_CODE_PRIVATE_21        0x9C /* for private DIMENSION2 of 1-column */
-#define LEADING_CODE_PRIVATE_22        0x9D /* for private DIMENSION2f 2-column */
+#define LEADING_CODE_PRIVATE_22        0x9D /* for private DIMENSION2 of 2-column */
 
 /* Extended leading-code.  */
 /* Start of each extended leading-codes.  */
@@ -134,9 +134,9 @@ extern int charset_latin_jisx0201; /* JISX0201.Roman (Japanese Roman) */
 extern int charset_big5_1;     /* Big5 Level 1 (Chinese Traditional) */
 extern int charset_big5_2;     /* Big5 Level 2 (Chinese Traditional) */
 
-/* Check if STR points the head of multi-byte form, i.e. *STR is an
-   ASCII character or a base leading-code.  */
-#define CHAR_HEAD_P(str) ((unsigned char) *(str) < 0xA0)
+/* Check if CH is the head of multi-byte form, i.e.,
+   an ASCII character or a base leading-code.  */
+#define CHAR_HEAD_P(ch) ((unsigned char) (ch) < 0xA0)
 
 /*** GENERAL NOTE on CHARACTER REPRESENTATION ***
 
@@ -244,12 +244,16 @@ extern int charset_big5_2;        /* Big5 Level 2 (Chinese Traditional) */
   ((MIN_CHARSET_PRIVATE_DIMENSION2 - 0xE0) << 14)
 #define MIN_CHAR_COMPOSITION \
   (0x1F << 14)
+#define MAX_CHAR_COMPOSITION GLYPH_MASK_CHAR
 
 /* 1 if C is an ASCII character, else 0.  */
 #define SINGLE_BYTE_CHAR_P(c) ((c) < 0x100)
 /* 1 if C is an composite character, else 0.  */
 #define COMPOSITE_CHAR_P(c) ((c) >= MIN_CHAR_COMPOSITION)
 
+/* 1 if BYTE is a character in itself, in multibyte mode.  */
+#define ASCII_BYTE_P(byte) ((byte) < 0x80)
+
 /* A char-table containing information of each character set.
 
    Unlike ordinary char-tables, this doesn't contain any nested table.
@@ -401,7 +405,9 @@ extern int width_by_char_head[256];
         ? CHAR_FIELD1 (c) + 0x8F               \
         : ((c) < MIN_CHAR_COMPOSITION          \
            ? CHAR_FIELD1 (c) + 0xE0            \
-           : CHARSET_COMPOSITION))))
+           : ((c) <= MAX_CHAR_COMPOSITION      \
+              ? CHARSET_COMPOSITION            \
+              : CHARSET_ASCII)))))
 
 /* Return charset at the place pointed by P.  */
 #define CHARSET_AT(p)                          \
@@ -462,15 +468,12 @@ extern int width_by_char_head[256];
    ? (c1)                                              \
    : MAKE_NON_ASCII_CHAR ((charset), (c1) & 0x7F, (c2) & 0x7F))
 
-/* 1 if C is in the range of possible character code Emacs can have.  */
-#define VALID_CHAR_P(c)                                                        \
-  ((c) >= 0                                                            \
-   && (SINGLE_BYTE_CHAR_P (c)                                          \
-       || ((c) < MIN_CHAR_COMPOSITION                                  \
-          ? ((c) & CHAR_FIELD1_MASK                                    \
-             ? (CHAR_FIELD2 (c) >= 32 && CHAR_FIELD3 (c) >= 32)        \
-             : (CHAR_FIELD2 (c) >= 16 && CHAR_FIELD3 (c) >= 32))       \
-          : (c) < MIN_CHAR_COMPOSITION + n_cmpchars)))
+/* If GENERICP is nonzero, return nonzero iff C is a valid normal or
+   generic character.  If GENERICP is zero, return nonzero iff C is a
+   valid normal character.  */
+#define CHAR_VALID_P(c, genericp)      \
+  ((c) >= 0                            \
+   && (SINGLE_BYTE_CHAR_P (c) || char_valid_p (c, genericp)))
 
 /* The charset of non-ASCII character C is stored in CHARSET, and the
    position-codes of C are stored in C1 and C2.
@@ -510,7 +513,7 @@ extern int width_by_char_head[256];
 #define SPLIT_STRING(str, len, charset, c1, c2)                                \
   ((BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) < 2                    \
     || BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) > len               \
-    || split_non_ascii_string (str, len, &charset, &c1, &c2, 0) < 0)   \
+    || split_non_ascii_string (str, len, &charset, &c1, &c2) < 0)      \
    ? c1 = *(str), charset = CHARSET_ASCII                              \
    : charset)
 
@@ -560,6 +563,30 @@ extern int iso_charset_table[2][2][128];
    ? (actual_len = 1), (unsigned char) *(str)                  \
    : string_to_non_ascii_char (str, len, &actual_len))
 
+/* Fetch the "next" multibyte character from Lisp string STRING
+   at byte position BYTEIDX, character position CHARIDX.
+   Store it into OUTPUT.
+
+   All the args must be side-effect-free.
+   BYTEIDX and CHARIDX must be lvalues;
+   we increment them past the character fetched.  */
+
+#define FETCH_STRING_CHAR_ADVANCE(OUTPUT, STRING, CHARIDX, BYTEIDX)          \
+if (1)                                                                       \
+  {                                                                          \
+    unsigned char *fetch_string_char_ptr = &XSTRING (STRING)->data[BYTEIDX];  \
+    int fetch_string_char_space_left = XSTRING (STRING)->size_byte - BYTEIDX; \
+    int actual_len;                                                          \
+                                                                             \
+    OUTPUT                                                                   \
+      = STRING_CHAR_AND_LENGTH (fetch_string_char_ptr,                       \
+                               fetch_string_char_space_left, actual_len);    \
+                                                                             \
+    BYTEIDX += actual_len;                                                   \
+    CHARIDX++;                                                               \
+  }                                                                          \
+else
+
 /* Return the length of the multi-byte form at string STR of length LEN.  */
 
 #define MULTIBYTE_FORM_LENGTH(str, len)                                \
@@ -599,10 +626,10 @@ extern int iso_charset_table[2][2][128];
    range checking of POS.  */
 #define INC_POS(pos)                           \
   do {                                         \
-    unsigned char *p = POS_ADDR (pos);         \
+    unsigned char *p = BYTE_POS_ADDR (pos);    \
     pos++;                                     \
     if (*p++ >= 0x80)                          \
-      while (!CHAR_HEAD_P (p)) p++, pos++;     \
+      while (!CHAR_HEAD_P (*p)) p++, pos++;    \
   } while (0)
 
 /* Decrease the buffer point POS of the current buffer to the previous
@@ -611,11 +638,69 @@ extern int iso_charset_table[2][2][128];
   do {                                                         \
     unsigned char *p, *p_min;                                  \
     int pos_saved = --pos;                                     \
-    if (pos < GPT)                                             \
+    if (pos < GPT_BYTE)                                                \
       p = BEG_ADDR + pos - 1, p_min = BEG_ADDR;                        \
     else                                                       \
       p = BEG_ADDR + GAP_SIZE + pos - 1, p_min = GAP_END_ADDR; \
-    while (p > p_min && !CHAR_HEAD_P (p)) p--, pos--;          \
+    while (p > p_min && !CHAR_HEAD_P (*p)) p--, pos--;         \
+    if (*p < 0x80 && pos != pos_saved) pos = pos_saved;                \
+  } while (0)
+
+/* Increment both CHARPOS and BYTEPOS, each in the appropriate way.  */
+
+#define INC_BOTH(charpos, bytepos)                             \
+do                                                             \
+  {                                                            \
+    (charpos)++;                                               \
+    if (NILP (current_buffer->enable_multibyte_characters))    \
+      (bytepos)++;                                             \
+    else                                                       \
+      INC_POS ((bytepos));                                     \
+  }                                                            \
+while (0)
+
+/* Decrement both CHARPOS and BYTEPOS, each in the appropriate way.  */
+
+#define DEC_BOTH(charpos, bytepos)                             \
+do                                                             \
+  {                                                            \
+    (charpos)--;                                               \
+    if (NILP (current_buffer->enable_multibyte_characters))    \
+      (bytepos)--;                                             \
+    else                                                       \
+      DEC_POS ((bytepos));                                     \
+  }                                                            \
+while (0)
+
+/* Increase the buffer point POS of the current buffer to the next
+   character boundary.  This macro relies on the fact that *GPT_ADDR
+   and *Z_ADDR are always accessible and the values are '\0'.  No
+   range checking of POS.  */
+#define BUF_INC_POS(buf, pos)                  \
+  do {                                         \
+    unsigned char *p = BUF_BYTE_ADDRESS (buf, pos);    \
+    pos++;                                     \
+    if (*p++ >= 0x80)                          \
+      while (!CHAR_HEAD_P (*p)) p++, pos++;    \
+  } while (0)
+
+/* Decrease the buffer point POS of the current buffer to the previous
+   character boundary.  No range checking of POS.  */
+#define BUF_DEC_POS(buf, pos)                                  \
+  do {                                                         \
+    unsigned char *p, *p_min;                                  \
+    int pos_saved = --pos;                                     \
+    if (pos < BUF_GPT_BYTE (buf))                              \
+      {                                                                \
+       p = BUF_BEG_ADDR (buf) + pos - 1;                       \
+       p_min = BUF_BEG_ADDR (buf);                             \
+      }                                                                \
+    else                                                       \
+      {                                                                \
+       p = BUF_BEG_ADDR (buf) + BUF_GAP_SIZE (buf) + pos - 1;  \
+       p_min = BUF_GAP_END_ADDR (buf);                         \
+      }                                                                \
+    while (p > p_min && !CHAR_HEAD_P (*p)) p--, pos--;         \
     if (*p < 0x80 && pos != pos_saved) pos = pos_saved;                \
   } while (0)
 
@@ -671,6 +756,31 @@ extern int n_cmpchars;
 /* Maximum character code currently used.  */
 #define MAX_CHAR (MIN_CHAR_COMPOSITION + n_cmpchars)
 
-extern int unify_char ();
+extern int unify_char P_ ((Lisp_Object, int, int, int, int));
+extern int split_non_ascii_string P_ ((unsigned char *, int, int *,
+                                      unsigned char *, unsigned char *));
+extern int string_to_non_ascii_char P_ ((unsigned char *, int, int *));
+extern int non_ascii_char_to_string P_ ((int, unsigned char *, unsigned char **));
+extern int multibyte_form_length P_ ((unsigned char *, int));
+extern int str_cmpchar_id P_ ((unsigned char *, int));
+extern int get_charset_id P_ ((Lisp_Object));
+extern int cmpchar_component P_ ((unsigned int, unsigned int));
+extern int find_charset_in_str P_ ((unsigned char *, int, int *, Lisp_Object));
+extern int strwidth P_ ((unsigned char *, int));
+
+extern Lisp_Object Vcharacter_unification_table_vector;
+#define UNIFICATION_ID_TABLE(id) \
+  (XCONS(XVECTOR(Vcharacter_unification_table_vector)->contents[(id)])->cdr)
+
+/* Copy LEN bytes from FROM to TO.  This macro should be used only
+   when a caller knows that LEN is short and the obvious copy loop is
+   faster than calling bcopy which has some overhead.  */
+
+#define BCOPY_SHORT(from, to, len)             \
+  do {                                         \
+    int i = len;                               \
+    unsigined char *from_p = from, *to_p = to; \
+    while (i--) *from_p++ = *to_p++;           \
+  } while (0)
 
 #endif /* _CHARSET_H */