src/character.h

   1 /* Header for multibyte character handler.
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3      Licensed to the Free Software Foundation.
   4    Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008
   5      National Institute of Advanced Industrial Science and Technology (AIST)
   6      Registration Number H13PRO009
   7
   8 This file is part of GNU Emacs.
   9
  10 GNU Emacs is free software: you can redistribute it and/or modify
  11 it under the terms of the GNU General Public License as published by
  12 the Free Software Foundation, either version 3 of the License, or
  13 (at your option) any later version.
  14
  15 GNU Emacs is distributed in the hope that it will be useful,
  16 but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18 GNU General Public License for more details.
  19
  20 You should have received a copy of the GNU General Public License
  21 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  22
  23 #ifndef EMACS_CHARACTER_H
  24 #define EMACS_CHARACTER_H
  25
  26 /* character code       1st byte   byte sequence
  27    --------------       --------   -------------
  28         0-7F            00..7F     0xxxxxxx
  29        80-7FF           C2..DF     110xxxxx 10xxxxxx
  30       800-FFFF          E0..EF     1110xxxx 10xxxxxx 10xxxxxx
  31     10000-1FFFFF        F0..F7     11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
  32    200000-3FFF7F        F8         11111000 1000xxxx 10xxxxxx 10xxxxxx 10xxxxxx
  33    3FFF80-3FFFFF        C0..C1     1100000x 10xxxxxx (for eight-bit-char)
  34    400000-...           invalid
  35
  36    invalid 1st byte     80..BF     10xxxxxx
  37                         F9..FF     11111xxx (xxx != 000)
  38 */
  39
  40 /* Maximum character code ((1 << CHARACTERBITS) - 1).  */
  41 #define MAX_CHAR  0x3FFFFF
  42
  43 /* Maximum Unicode character code.  */
  44 #define MAX_UNICODE_CHAR 0x10FFFF
  45
  46 /* Maximum N-byte character codes.  */
  47 #define MAX_1_BYTE_CHAR 0x7F
  48 #define MAX_2_BYTE_CHAR 0x7FF
  49 #define MAX_3_BYTE_CHAR 0xFFFF
  50 #define MAX_4_BYTE_CHAR 0x1FFFFF
  51 #define MAX_5_BYTE_CHAR 0x3FFF7F
  52
  53 /* Minimum leading code of multibyte characters.  */
  54 #define MIN_MULTIBYTE_LEADING_CODE 0xC0
  55 /* Maximum leading code of multibyte characters.  */
  56 #define MAX_MULTIBYTE_LEADING_CODE 0xF8
  57
  58 /* Nonzero iff C is a character that corresponds to a raw 8-bit
  59    byte.  */
  60 #define CHAR_BYTE8_P(c) ((c) > MAX_5_BYTE_CHAR)
  61
  62 /* Return the character code for raw 8-bit byte BYTE.  */
  63 #define BYTE8_TO_CHAR(byte) ((byte) + 0x3FFF00)
  64
  65 /* Return the raw 8-bit byte for character C.  */
  66 #define CHAR_TO_BYTE8(c)        \
  67   (CHAR_BYTE8_P (c)             \
  68    ? (c) - 0x3FFF00             \
  69    : multibyte_char_to_unibyte (c, Qnil))
  70
  71 /* Return the raw 8-bit byte for character C,
  72    or -1 if C doesn't correspond to a byte.  */
  73 #define CHAR_TO_BYTE_SAFE(c)    \
  74   (CHAR_BYTE8_P (c)             \
  75    ? (c) - 0x3FFF00             \
  76    : multibyte_char_to_unibyte_safe (c))
  77
  78 /* Nonzero iff BYTE is the 1st byte of a multibyte form of a character
  79    that corresponds to a raw 8-bit byte.  */
  80 #define CHAR_BYTE8_HEAD_P(byte) ((byte) == 0xC0 || (byte) == 0xC1)
  81
  82 /* Mapping table from unibyte chars to multibyte chars.  */
  83 extern int unibyte_to_multibyte_table[256];
  84
  85 /* Convert the unibyte character C to the corresponding multibyte
  86    character.  If C can't be converted, return C.  */
  87 #define unibyte_char_to_multibyte(c)    \
  88   ((c) < 256 ? unibyte_to_multibyte_table[(c)] : (c))
  89
  90 /* Nth element is 1 iff unibyte char N can be mapped to a multibyte
  91    char.  */
  92 extern char unibyte_has_multibyte_table[256];
  93
  94 #define UNIBYTE_CHAR_HAS_MULTIBYTE_P(c) (unibyte_has_multibyte_table[(c)])
  95
  96 /* If C is not ASCII, make it unibyte. */
  97 #define MAKE_CHAR_UNIBYTE(c)    \
  98   do {                          \
  99     if (! ASCII_CHAR_P (c))     \
 100       c = CHAR_TO_BYTE8 (c);    \
 101   } while (0)
 102
 103
 104 /* If C is not ASCII, make it multibyte.  It assumes C < 256.  */
 105 #define MAKE_CHAR_MULTIBYTE(c) \
 106   (eassert ((c) >= 0 && (c) < 256), (c) = unibyte_to_multibyte_table[(c)])
 107
 108 /* This is the maximum byte length of multibyte form.  */
 109 #define MAX_MULTIBYTE_LENGTH 5
 110
 111 /* Return a Lisp character whose character code is C.  It assumes C is
 112    a valid character code.  */
 113 #define make_char(c) make_number (c)
 114
 115 /* Nonzero iff C is an ASCII byte.  */
 116 #define ASCII_BYTE_P(c) ((unsigned) (c) < 0x80)
 117
 118 /* Nonzero iff X is a character.  */
 119 #define CHARACTERP(x) (NATNUMP (x) && XFASTINT (x) <= MAX_CHAR)
 120
 121 /* Nonzero iff C is valid as a character code.  GENERICP is not used
 122    now.  */
 123 #define CHAR_VALID_P(c, genericp) ((unsigned) (c) <= MAX_CHAR)
 124
 125 /* Check if Lisp object X is a character or not.  */
 126 #define CHECK_CHARACTER(x) \
 127   CHECK_TYPE (CHARACTERP (x), Qcharacterp, x)
 128
 129 #define CHECK_CHARACTER_CAR(x) \
 130   do {                                  \
 131     Lisp_Object tmp = XCAR (x);         \
 132     CHECK_CHARACTER (tmp);              \
 133     XSETCAR ((x), tmp);                 \
 134   } while (0)
 135
 136 #define CHECK_CHARACTER_CDR(x) \
 137   do {                                  \
 138     Lisp_Object tmp = XCDR (x);         \
 139     CHECK_CHARACTER (tmp);              \
 140     XSETCDR ((x), tmp);                 \
 141   } while (0)
 142
 143 /* Nonzero iff C is an ASCII character.  */
 144 #define ASCII_CHAR_P(c) ((unsigned) (c) < 0x80)
 145
 146 /* Nonzero iff C is a character of code less than 0x100.  */
 147 #define SINGLE_BYTE_CHAR_P(c) ((unsigned) (c) < 0x100)
 148
 149 /* Nonzero if character C has a printable glyph.  */
 150 #define CHAR_PRINTABLE_P(c)     \
 151   (((c) >= 32 && ((c) < 127)    \
 152     || ! NILP (CHAR_TABLE_REF (Vprintable_chars, (c)))))
 153
 154 /* Return byte length of multibyte form for character C.  */
 155 #define CHAR_BYTES(c)                   \
 156   ( (c) <= MAX_1_BYTE_CHAR ? 1          \
 157     : (c) <= MAX_2_BYTE_CHAR ? 2        \
 158     : (c) <= MAX_3_BYTE_CHAR ? 3        \
 159     : (c) <= MAX_4_BYTE_CHAR ? 4        \
 160     : (c) <= MAX_5_BYTE_CHAR ? 5        \
 161     : 2)
 162
 163
 164 /* Return the leading code of multibyte form of C.  */
 165 #define CHAR_LEADING_CODE(c)                            \
 166   ((c) <= MAX_1_BYTE_CHAR ? c                           \
 167    : (c) <= MAX_2_BYTE_CHAR ? (0xC0 | ((c) >> 6))       \
 168    : (c) <= MAX_3_BYTE_CHAR ? (0xE0 | ((c) >> 12))      \
 169    : (c) <= MAX_4_BYTE_CHAR ? (0xF0 | ((c) >> 18))      \
 170    : (c) <= MAX_5_BYTE_CHAR ? 0xF8                      \
 171    : (0xC0 | (((c) >> 6) & 0x01)))
 172
 173
 174 /* Store multibyte form of the character C in P.  The caller should
 175    allocate at least MAX_MULTIBYTE_LENGTH bytes area at P in advance.
 176    Returns the length of the multibyte form.  */
 177
 178 #define CHAR_STRING(c, p)                       \
 179   ((unsigned) (c) <= MAX_1_BYTE_CHAR            \
 180    ? ((p)[0] = (c),                             \
 181       1)                                        \
 182    : (unsigned) (c) <= MAX_2_BYTE_CHAR          \
 183    ? ((p)[0] = (0xC0 | ((c) >> 6)),             \
 184       (p)[1] = (0x80 | ((c) & 0x3F)),           \
 185       2)                                        \
 186    : (unsigned) (c) <= MAX_3_BYTE_CHAR          \
 187    ? ((p)[0] = (0xE0 | ((c) >> 12)),            \
 188       (p)[1] = (0x80 | (((c) >> 6) & 0x3F)),    \
 189       (p)[2] = (0x80 | ((c) & 0x3F)),           \
 190       3)                                        \
 191    : char_string ((unsigned) c, p))
 192
 193 /* Store multibyte form of byte B in P.  The caller should allocate at
 194    least MAX_MULTIBYTE_LENGTH bytes area at P in advance.  Returns the
 195    length of the multibyte form.  */
 196
 197 #define BYTE8_STRING(b, p)                      \
 198   ((p)[0] = (0xC0 | (((b) >> 6) & 0x01)),       \
 199    (p)[1] = (0x80 | ((b) & 0x3F)),              \
 200    2)
 201
 202
 203 /* Store multibyte form of the character C in P.  The caller should
 204    allocate at least MAX_MULTIBYTE_LENGTH bytes area at P in advance.
 205    And, advance P to the end of the multibyte form.  */
 206
 207 #define CHAR_STRING_ADVANCE(c, p)               \
 208   do {                                          \
 209     if ((c) <= MAX_1_BYTE_CHAR)                 \
 210       *(p)++ = (c);                             \
 211     else if ((c) <= MAX_2_BYTE_CHAR)            \
 212       *(p)++ = (0xC0 | ((c) >> 6)),             \
 213         *(p)++ = (0x80 | ((c) & 0x3F));         \
 214     else if ((c) <= MAX_3_BYTE_CHAR)            \
 215       *(p)++ = (0xE0 | ((c) >> 12)),            \
 216         *(p)++ = (0x80 | (((c) >> 6) & 0x3F)),  \
 217         *(p)++ = (0x80 | ((c) & 0x3F));         \
 218     else                                        \
 219       (p) += char_string ((c), (p));            \
 220   } while (0)
 221
 222
 223 /* Nonzero iff BYTE starts a non-ASCII character in a multibyte
 224    form.  */
 225 #define LEADING_CODE_P(byte) (((byte) & 0xC0) == 0xC0)
 226
 227 /* Nonzero iff BYTE is a trailing code of a non-ASCII character in a
 228    multibyte form.  */
 229 #define TRAILING_CODE_P(byte) (((byte) & 0xC0) == 0x80)
 230
 231 /* Nonzero iff BYTE starts a character in a multibyte form.
 232    This is equivalent to:
 233         (ASCII_BYTE_P (byte) || LEADING_CODE_P (byte))  */
 234 #define CHAR_HEAD_P(byte) (((byte) & 0xC0) != 0x80)
 235
 236 /* Just kept for backward compatibility.  This macro will be removed
 237    in the future.  */
 238 #define BASE_LEADING_CODE_P LEADING_CODE_P
 239
 240 /* How many bytes a character that starts with BYTE occupies in a
 241    multibyte form.  */
 242 #define BYTES_BY_CHAR_HEAD(byte)        \
 243   (!((byte) & 0x80) ? 1                 \
 244    : !((byte) & 0x20) ? 2               \
 245    : !((byte) & 0x10) ? 3               \
 246    : !((byte) & 0x08) ? 4               \
 247    : 5)
 248
 249
 250 /* Return the length of the multi-byte form at string STR of length
 251    LEN while assuming that STR points a valid multi-byte form.  As
 252    this macro isn't necessary anymore, all callers will be changed to
 253    use BYTES_BY_CHAR_HEAD directly in the future.  */
 254
 255 #define MULTIBYTE_FORM_LENGTH(str, len)         \
 256   BYTES_BY_CHAR_HEAD (*(str))
 257
 258 /* Parse multibyte string STR of length LENGTH and set BYTES to the
 259    byte length of a character at STR while assuming that STR points a
 260    valid multibyte form.  As this macro isn't necessary anymore, all
 261    callers will be changed to use BYTES_BY_CHAR_HEAD directly in the
 262    future.  */
 263
 264 #define PARSE_MULTIBYTE_SEQ(str, length, bytes) \
 265   (bytes) = BYTES_BY_CHAR_HEAD (*(str))
 266
 267 /* The byte length of multibyte form at unibyte string P ending at
 268    PEND.  If STR doesn't point a valid multibyte form, return 0.  */
 269
 270 #define MULTIBYTE_LENGTH(p, pend)                               \
 271   (p >= pend ? 0                                                \
 272    : !((p)[0] & 0x80) ? 1                                       \
 273    : ((p + 1 >= pend) || (((p)[1] & 0xC0) != 0x80)) ? 0         \
 274    : ((p)[0] & 0xE0) == 0xC0 ? 2                                \
 275    : ((p + 2 >= pend) || (((p)[2] & 0xC0) != 0x80)) ? 0         \
 276    : ((p)[0] & 0xF0) == 0xE0 ? 3                                \
 277    : ((p + 3 >= pend) || (((p)[3] & 0xC0) != 0x80)) ? 0         \
 278    : ((p)[0] & 0xF8) == 0xF0 ? 4                                \
 279    : ((p + 4 >= pend) || (((p)[4] & 0xC0) != 0x80)) ? 0         \
 280    : (p)[0] == 0xF8 && ((p)[1] & 0xF0) == 0x80 ? 5              \
 281    : 0)
 282
 283
 284 /* Like MULTIBYTE_LENGTH but don't check the ending address.  */
 285
 286 #define MULTIBYTE_LENGTH_NO_CHECK(p)                    \
 287   (!((p)[0] & 0x80) ? 1                                 \
 288    : ((p)[1] & 0xC0) != 0x80 ? 0                        \
 289    : ((p)[0] & 0xE0) == 0xC0 ? 2                        \
 290    : ((p)[2] & 0xC0) != 0x80 ? 0                        \
 291    : ((p)[0] & 0xF0) == 0xE0 ? 3                        \
 292    : ((p)[3] & 0xC0) != 0x80 ? 0                        \
 293    : ((p)[0] & 0xF8) == 0xF0 ? 4                        \
 294    : ((p)[4] & 0xC0) != 0x80 ? 0                        \
 295    : (p)[0] == 0xF8 && ((p)[1] & 0xF0) == 0x80 ? 5      \
 296    : 0)
 297
 298 /* If P is before LIMIT, advance P to the next character boundary.  It
 299    assumes that P is already at a character boundary of the sane
 300    mulitbyte form whose end address is LIMIT.  */
 301
 302 #define NEXT_CHAR_BOUNDARY(p, limit)    \
 303   do {                                  \
 304     if ((p) < (limit))                  \
 305       (p) += BYTES_BY_CHAR_HEAD (*(p)); \
 306   } while (0)
 307
 308
 309 /* If P is after LIMIT, advance P to the previous character boundary.
 310    It assumes that P is already at a character boundary of the sane
 311    mulitbyte form whose beginning address is LIMIT.  */
 312
 313 #define PREV_CHAR_BOUNDARY(p, limit)                                    \
 314   do {                                                                  \
 315     if ((p) > (limit))                                                  \
 316       {                                                                 \
 317         const unsigned char *p0 = (p);                                  \
 318         do {                                                            \
 319           p0--;                                                         \
 320         } while (p0 >= limit && ! CHAR_HEAD_P (*p0));                   \
 321         (p) = (BYTES_BY_CHAR_HEAD (*p0) == (p) - p0) ? p0 : (p) - 1;    \
 322       }                                                                 \
 323   } while (0)
 324
 325 /* Return the character code of character whose multibyte form is at
 326    P.  The argument LEN is ignored.  It will be removed in the
 327    future.  */
 328
 329 #define STRING_CHAR(p, len)                                     \
 330   (!((p)[0] & 0x80)                                             \
 331    ? (p)[0]                                                     \
 332    : ! ((p)[0] & 0x20)                                          \
 333    ? (((((p)[0] & 0x1F) << 6)                                   \
 334        | ((p)[1] & 0x3F))                                       \
 335       + (((unsigned char) (p)[0]) < 0xC2 ? 0x3FFF80 : 0))       \
 336    : ! ((p)[0] & 0x10)                                          \
 337    ? ((((p)[0] & 0x0F) << 12)                                   \
 338       | (((p)[1] & 0x3F) << 6)                                  \
 339       | ((p)[2] & 0x3F))                                        \
 340    : string_char ((p), NULL, NULL))
 341
 342
 343 /* Like STRING_CHAR but set ACTUAL_LEN to the length of multibyte
 344    form.  The argument LEN is ignored.  It will be removed in the
 345    future.  */
 346
 347 #define STRING_CHAR_AND_LENGTH(p, len, actual_len)              \
 348   (!((p)[0] & 0x80)                                             \
 349    ? ((actual_len) = 1, (p)[0])                                 \
 350    : ! ((p)[0] & 0x20)                                          \
 351    ? ((actual_len) = 2,                                         \
 352       (((((p)[0] & 0x1F) << 6)                                  \
 353         | ((p)[1] & 0x3F))                                      \
 354        + (((unsigned char) (p)[0]) < 0xC2 ? 0x3FFF80 : 0)))     \
 355    : ! ((p)[0] & 0x10)                                          \
 356    ? ((actual_len) = 3,                                         \
 357       ((((p)[0] & 0x0F) << 12)                                  \
 358        | (((p)[1] & 0x3F) << 6)                                 \
 359        | ((p)[2] & 0x3F)))                                      \
 360    : string_char ((p), NULL, &actual_len))
 361
 362
 363 /* Like STRING_CHAR but advance P to the end of multibyte form.  */
 364
 365 #define STRING_CHAR_ADVANCE(p)                                  \
 366   (!((p)[0] & 0x80)                                             \
 367    ? *(p)++                                                     \
 368    : ! ((p)[0] & 0x20)                                          \
 369    ? ((p) += 2,                                                 \
 370       ((((p)[-2] & 0x1F) << 6)                                  \
 371        | ((p)[-1] & 0x3F)                                       \
 372        | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0)))    \
 373    : ! ((p)[0] & 0x10)                                          \
 374    ? ((p) += 3,                                                 \
 375       ((((p)[-3] & 0x0F) << 12)                                 \
 376        | (((p)[-2] & 0x3F) << 6)                                \
 377        | ((p)[-1] & 0x3F)))                                     \
 378    : string_char ((p), &(p), NULL))
 379
 380
 381 /* Fetch the "next" character from Lisp string STRING at byte position
 382    BYTEIDX, character position CHARIDX.  Store it into OUTPUT.
 383
 384    All the args must be side-effect-free.
 385    BYTEIDX and CHARIDX must be lvalues;
 386    we increment them past the character fetched.  */
 387
 388 #define FETCH_STRING_CHAR_ADVANCE(OUTPUT, STRING, CHARIDX, BYTEIDX)     \
 389   do                                                                    \
 390     {                                                                   \
 391       CHARIDX++;                                                        \
 392       if (STRING_MULTIBYTE (STRING))                                    \
 393         {                                                               \
 394           unsigned char *ptr = &SDATA (STRING)[BYTEIDX];                \
 395           int len;                                                      \
 396                                                                         \
 397           OUTPUT = STRING_CHAR_AND_LENGTH (ptr, 0, len);                \
 398           BYTEIDX += len;                                               \
 399         }                                                               \
 400       else                                                              \
 401         {                                                               \
 402           OUTPUT = SREF (STRING, BYTEIDX);                              \
 403           BYTEIDX++;                                                    \
 404         }                                                               \
 405     }                                                                   \
 406   while (0)
 407
 408 /* Like FETCH_STRING_CHAR_ADVANCE but return a multibyte character eve
 409    if STRING is unibyte.  */
 410
 411 #define FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE(OUTPUT, STRING, CHARIDX, BYTEIDX) \
 412   do                                                                          \
 413     {                                                                         \
 414       CHARIDX++;                                                              \
 415       if (STRING_MULTIBYTE (STRING))                                          \
 416         {                                                                     \
 417           unsigned char *ptr = &SDATA (STRING)[BYTEIDX];                      \
 418           int len;                                                            \
 419                                                                               \
 420           OUTPUT = STRING_CHAR_AND_LENGTH (ptr, 0, len);                      \
 421           BYTEIDX += len;                                                     \
 422         }                                                                     \
 423       else                                                                    \
 424         {                                                                     \
 425           OUTPUT = SREF (STRING, BYTEIDX);                                    \
 426           BYTEIDX++;                                                          \
 427           MAKE_CHAR_MULTIBYTE (OUTPUT);                                       \
 428         }                                                                     \
 429     }                                                                         \
 430   while (0)
 431
 432
 433 /* Like FETCH_STRING_CHAR_ADVANCE but assumes STRING is multibyte.  */
 434
 435 #define FETCH_STRING_CHAR_ADVANCE_NO_CHECK(OUTPUT, STRING, CHARIDX, BYTEIDX) \
 436   do                                                                         \
 437     {                                                                        \
 438       unsigned char *ptr = &SDATA (STRING)[BYTEIDX];                         \
 439       int len;                                                               \
 440                                                                              \
 441       OUTPUT = STRING_CHAR_AND_LENGTH (ptr, 0, len);                         \
 442       BYTEIDX += len;                                                        \
 443       CHARIDX++;                                                             \
 444     }                                                                        \
 445   while (0)
 446
 447
 448 /* Like FETCH_STRING_CHAR_ADVANCE but fetch character from the current
 449    buffer.  */
 450
 451 #define FETCH_CHAR_ADVANCE(OUTPUT, CHARIDX, BYTEIDX)            \
 452   do                                                            \
 453     {                                                           \
 454       CHARIDX++;                                                \
 455       if (!NILP (current_buffer->enable_multibyte_characters))  \
 456         {                                                       \
 457           unsigned char *ptr = BYTE_POS_ADDR (BYTEIDX);         \
 458           int len;                                              \
 459                                                                 \
 460           OUTPUT= STRING_CHAR_AND_LENGTH (ptr, 0, len);         \
 461           BYTEIDX += len;                                       \
 462         }                                                       \
 463       else                                                      \
 464         {                                                       \
 465           OUTPUT = *(BYTE_POS_ADDR (BYTEIDX));                  \
 466           BYTEIDX++;                                            \
 467         }                                                       \
 468     }                                                           \
 469   while (0)
 470
 471
 472 /* Like FETCH_CHAR_ADVANCE but assumes the current buffer is multibyte.  */
 473
 474 #define FETCH_CHAR_ADVANCE_NO_CHECK(OUTPUT, CHARIDX, BYTEIDX)   \
 475   do                                                            \
 476     {                                                           \
 477       unsigned char *ptr = BYTE_POS_ADDR (BYTEIDX);             \
 478       int len;                                                  \
 479                                                                 \
 480       OUTPUT= STRING_CHAR_AND_LENGTH (ptr, 0, len);             \
 481       BYTEIDX += len;                                           \
 482       CHARIDX++;                                                \
 483     }                                                           \
 484   while (0)
 485
 486
 487 /* Increase the buffer byte position POS_BYTE of the current buffer to
 488    the next character boundary.  No range checking of POS.  */
 489
 490 #define INC_POS(pos_byte)                               \
 491   do {                                                  \
 492     unsigned char *p = BYTE_POS_ADDR (pos_byte);        \
 493     pos_byte += BYTES_BY_CHAR_HEAD (*p);                \
 494   } while (0)
 495
 496
 497 /* Decrease the buffer byte position POS_BYTE of the current buffer to
 498    the previous character boundary.  No range checking of POS.  */
 499
 500 #define DEC_POS(pos_byte)                       \
 501   do {                                          \
 502     unsigned char *p;                           \
 503                                                 \
 504     pos_byte--;                                 \
 505     if (pos_byte < GPT_BYTE)                    \
 506       p = BEG_ADDR + pos_byte - BEG_BYTE;       \
 507     else                                        \
 508       p = BEG_ADDR + GAP_SIZE + pos_byte - BEG_BYTE;\
 509     while (!CHAR_HEAD_P (*p))                   \
 510       {                                         \
 511         p--;                                    \
 512         pos_byte--;                             \
 513       }                                         \
 514   } while (0)
 515
 516 /* Increment both CHARPOS and BYTEPOS, each in the appropriate way.  */
 517
 518 #define INC_BOTH(charpos, bytepos)                              \
 519   do                                                            \
 520     {                                                           \
 521       (charpos)++;                                              \
 522       if (NILP (current_buffer->enable_multibyte_characters))   \
 523         (bytepos)++;                                            \
 524       else                                                      \
 525         INC_POS ((bytepos));                                    \
 526     }                                                           \
 527   while (0)
 528
 529
 530 /* Decrement both CHARPOS and BYTEPOS, each in the appropriate way.  */
 531
 532 #define DEC_BOTH(charpos, bytepos)                              \
 533   do                                                            \
 534     {                                                           \
 535       (charpos)--;                                              \
 536       if (NILP (current_buffer->enable_multibyte_characters))   \
 537         (bytepos)--;                                            \
 538       else                                                      \
 539         DEC_POS ((bytepos));                                    \
 540     }                                                           \
 541   while (0)
 542
 543
 544 /* Increase the buffer byte position POS_BYTE of the current buffer to
 545    the next character boundary.  This macro relies on the fact that
 546    *GPT_ADDR and *Z_ADDR are always accessible and the values are
 547    '\0'.  No range checking of POS_BYTE.  */
 548
 549 #define BUF_INC_POS(buf, pos_byte)                              \
 550   do {                                                          \
 551     unsigned char *p = BUF_BYTE_ADDRESS (buf, pos_byte);        \
 552     pos_byte += BYTES_BY_CHAR_HEAD (*p);                        \
 553   } while (0)
 554
 555
 556 /* Decrease the buffer byte position POS_BYTE of the current buffer to
 557    the previous character boundary.  No range checking of POS_BYTE.  */
 558
 559 #define BUF_DEC_POS(buf, pos_byte)                                      \
 560   do {                                                                  \
 561     unsigned char *p;                                                   \
 562     pos_byte--;                                                         \
 563     if (pos_byte < BUF_GPT_BYTE (buf))                                  \
 564       p = BUF_BEG_ADDR (buf) + pos_byte - BEG_BYTE;                     \
 565     else                                                                \
 566       p = BUF_BEG_ADDR (buf) + BUF_GAP_SIZE (buf) + pos_byte - BEG_BYTE;\
 567     while (!CHAR_HEAD_P (*p))                                           \
 568       {                                                                 \
 569         p--;                                                            \
 570         pos_byte--;                                                     \
 571       }                                                                 \
 572   } while (0)
 573
 574
 575 /* If C is a character to be unified with a Unicode character, return
 576    the unified Unicode character.  */
 577
 578 #define MAYBE_UNIFY_CHAR(c)                                     \
 579   if (c > MAX_UNICODE_CHAR                                      \
 580       && CHAR_TABLE_P (Vchar_unify_table))                      \
 581     {                                                           \
 582       Lisp_Object val;                                          \
 583       int unified;                                              \
 584                                                                 \
 585       val = CHAR_TABLE_REF (Vchar_unify_table, c);              \
 586       if (! NILP (val))                                         \
 587         {                                                       \
 588           if (SYMBOLP (val))                                    \
 589             {                                                   \
 590               Funify_charset (val, Qnil, Qnil);                 \
 591               val = CHAR_TABLE_REF (Vchar_unify_table, c);      \
 592             }                                                   \
 593           if ((unified = XINT (val)) >= 0)                      \
 594             c = unified;                                        \
 595         }                                                       \
 596     }                                                           \
 597   else
 598
 599
 600 /* Return the width of ASCII character C.  The width is measured by
 601    how many columns occupied on the screen when displayed in the
 602    current buffer.  */
 603
 604 #define ASCII_CHAR_WIDTH(c)                                             \
 605   (c < 0x20                                                             \
 606    ? (c == '\t'                                                         \
 607       ? XFASTINT (current_buffer->tab_width)                            \
 608       : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2)))   \
 609    : (c < 0x7f                                                          \
 610       ? 1                                                               \
 611       : ((NILP (current_buffer->ctl_arrow) ? 4 : 2))))
 612
 613 /* Return the width of character C.  The width is measured by how many
 614    columns occupied on the screen when displayed in the current
 615    buffer.  */
 616
 617 #define CHAR_WIDTH(c)           \
 618   (ASCII_CHAR_P (c)             \
 619    ? ASCII_CHAR_WIDTH (c)       \
 620    : XINT (CHAR_TABLE_REF (Vchar_width_table, c)))
 621
 622 extern int char_resolve_modifier_mask P_ ((int));
 623 extern int char_string P_ ((unsigned, unsigned char *));
 624 extern int string_char P_ ((const unsigned char *,
 625                             const unsigned char **, int *));
 626
 627 extern int translate_char P_ ((Lisp_Object, int c));
 628 extern int char_printable_p P_ ((int c));
 629 extern void parse_str_as_multibyte P_ ((const unsigned char *, int, int *,
 630                                         int *));
 631 extern int parse_str_to_multibyte P_ ((unsigned char *, int));
 632 extern int str_as_multibyte P_ ((unsigned char *, int, int, int *));
 633 extern int str_to_multibyte P_ ((unsigned char *, int, int));
 634 extern int str_as_unibyte P_ ((unsigned char *, int));
 635 extern EMACS_INT str_to_unibyte P_ ((const unsigned char *, unsigned char *,
 636                                      EMACS_INT, int));
 637 extern int strwidth P_ ((unsigned char *, int));
 638 extern int c_string_width P_ ((const unsigned char *, int, int, int *, int *));
 639 extern int lisp_string_width P_ ((Lisp_Object, int, int *, int *));
 640
 641 extern Lisp_Object Vprintable_chars;
 642
 643 extern Lisp_Object Qcharacterp, Qauto_fill_chars;
 644 extern Lisp_Object Vtranslation_table_vector;
 645 extern Lisp_Object Vchar_width_table;
 646 extern Lisp_Object Vchar_direction_table;
 647 extern Lisp_Object Vchar_unify_table;
 648
 649 extern Lisp_Object string_escape_byte8 P_ ((Lisp_Object));
 650
 651 /* Return a translation table of id number ID.  */
 652 #define GET_TRANSLATION_TABLE(id) \
 653   (XCDR(XVECTOR(Vtranslation_table_vector)->contents[(id)]))
 654
 655 /* A char-table for characters which may invoke auto-filling.  */
 656 extern Lisp_Object Vauto_fill_chars;
 657
 658 extern Lisp_Object Vchar_script_table;
 659 extern Lisp_Object Vscript_representative_chars;
 660
 661 /* Copy LEN bytes from FROM to TO.  This macro should be used only
 662    when a caller knows that LEN is short and the obvious copy loop is
 663    faster than calling bcopy which has some overhead.  Copying a
 664    multibyte sequence of a character is the typical case.  */
 665
 666 #define BCOPY_SHORT(from, to, len)              \
 667   do {                                          \
 668     int i = len;                                \
 669     unsigned char *from_p = from, *to_p = to;   \
 670     while (i--) *to_p++ = *from_p++;            \
 671   } while (0)
 672
 673 #define DEFSYM(sym, name)       \
 674   do { (sym) = intern ((name)); staticpro (&(sym)); } while (0)
 675
 676 #endif /* EMACS_CHARACTER_H */
 677
 678 /* arch-tag: 4ef86004-2eff-4073-8cea-cfcbcf7188ac
 679    (do not change this comment) */