src/character.h

/* Header for multibyte character handler.
   Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
     Licensed to the Free Software Foundation.
   Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
     National Institute of Advanced Industrial Science and Technology (AIST)
     Registration Number H13PRO009

This file is part of GNU Emacs.

GNU Emacs is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

GNU Emacs is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */

#ifndef EMACS_CHARACTER_H
#define EMACS_CHARACTER_H

#include <verify.h>

INLINE_HEADER_BEGIN

/* character code	1st byte   byte sequence
   --------------	--------   -------------
        0-7F		00..7F	   0xxxxxxx
       80-7FF		C2..DF	   110xxxxx 10xxxxxx
      800-FFFF		E0..EF	   1110xxxx 10xxxxxx 10xxxxxx
    10000-1FFFFF	F0..F7	   11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
   200000-3FFF7F	F8	   11111000 1000xxxx 10xxxxxx 10xxxxxx 10xxxxxx
   3FFF80-3FFFFF	C0..C1	   1100000x 10xxxxxx (for eight-bit-char)
   400000-...		invalid

   invalid 1st byte	80..BF	   10xxxxxx
			F9..FF	   11111xxx (xxx != 000)
*/

/* Maximum character code ((1 << CHARACTERBITS) - 1).  */
#define MAX_CHAR  0x3FFFFF

/* Maximum Unicode character code.  */
#define MAX_UNICODE_CHAR 0x10FFFF

/* Maximum N-byte character codes.  */
#define MAX_1_BYTE_CHAR 0x7F
#define MAX_2_BYTE_CHAR 0x7FF
#define MAX_3_BYTE_CHAR 0xFFFF
#define MAX_4_BYTE_CHAR 0x1FFFFF
#define MAX_5_BYTE_CHAR 0x3FFF7F

/* Minimum leading code of multibyte characters.  */
#define MIN_MULTIBYTE_LEADING_CODE 0xC0
/* Maximum leading code of multibyte characters.  */
#define MAX_MULTIBYTE_LEADING_CODE 0xF8

/* Nonzero iff C is a character that corresponds to a raw 8-bit
   byte.  */
#define CHAR_BYTE8_P(c) ((c) > MAX_5_BYTE_CHAR)

/* Return the character code for raw 8-bit byte BYTE.  */
#define BYTE8_TO_CHAR(byte) ((byte) + 0x3FFF00)

#define UNIBYTE_TO_CHAR(byte) \
  (ASCII_CHAR_P (byte) ? (byte) : BYTE8_TO_CHAR (byte))

/* Return the raw 8-bit byte for character C.  */
#define CHAR_TO_BYTE8(c) (CHAR_BYTE8_P (c) ? (c) - 0x3FFF00 : (c & 0xFF))

/* Return the raw 8-bit byte for character C,
   or -1 if C doesn't correspond to a byte.  */
#define CHAR_TO_BYTE_SAFE(c)						\
  (ASCII_CHAR_P (c) ? c : (CHAR_BYTE8_P (c) ? (c) - 0x3FFF00 : -1))

/* Nonzero iff BYTE is the 1st byte of a multibyte form of a character
   that corresponds to a raw 8-bit byte.  */
#define CHAR_BYTE8_HEAD_P(byte) ((byte) == 0xC0 || (byte) == 0xC1)

/* If C is not ASCII, make it unibyte. */
#define MAKE_CHAR_UNIBYTE(c)	\
  do {				\
    if (! ASCII_CHAR_P (c))	\
      c = CHAR_TO_BYTE8 (c);	\
  } while (false)


/* If C is not ASCII, make it multibyte.  Assumes C < 256.  */
#define MAKE_CHAR_MULTIBYTE(c) \
  (eassert ((c) >= 0 && (c) < 256), (c) = UNIBYTE_TO_CHAR (c))

/* This is the maximum byte length of multibyte form.  */
#define MAX_MULTIBYTE_LENGTH 5

/* Nonzero iff X is a character.  */
#define CHARACTERP(x) (NATNUMP (x) && XFASTINT (x) <= MAX_CHAR)

/* Nonzero iff C is valid as a character code.  */
#define CHAR_VALID_P(c) UNSIGNED_CMP (c, <=, MAX_CHAR)

/* Check if Lisp object X is a character or not.  */
#define CHECK_CHARACTER(x) \
  CHECK_TYPE (CHARACTERP (x), Qcharacterp, x)

#define CHECK_CHARACTER_CAR(x) \
  do {					\
    Lisp_Object tmp = XCAR (x);		\
    CHECK_CHARACTER (tmp);		\
    XSETCAR ((x), tmp);			\
  } while (false)

#define CHECK_CHARACTER_CDR(x) \
  do {					\
    Lisp_Object tmp = XCDR (x);		\
    CHECK_CHARACTER (tmp);		\
    XSETCDR ((x), tmp);			\
  } while (false)

/* Nonzero iff C is a character of code less than 0x100.  */
#define SINGLE_BYTE_CHAR_P(c) UNSIGNED_CMP (c, <, 0x100)

/* Nonzero if character C has a printable glyph.  */
#define CHAR_PRINTABLE_P(c)	\
  (((c) >= 32 && (c) < 127)	\
   || ! NILP (CHAR_TABLE_REF (Vprintable_chars, (c))))

/* Return byte length of multibyte form for character C.  */
#define CHAR_BYTES(c)			\
  ( (c) <= MAX_1_BYTE_CHAR ? 1		\
    : (c) <= MAX_2_BYTE_CHAR ? 2	\
    : (c) <= MAX_3_BYTE_CHAR ? 3	\
    : (c) <= MAX_4_BYTE_CHAR ? 4	\
    : (c) <= MAX_5_BYTE_CHAR ? 5	\
    : 2)


/* Return the leading code of multibyte form of C.  */
#define CHAR_LEADING_CODE(c)				\
  ((c) <= MAX_1_BYTE_CHAR ? c				\
   : (c) <= MAX_2_BYTE_CHAR ? (0xC0 | ((c) >> 6))	\
   : (c) <= MAX_3_BYTE_CHAR ? (0xE0 | ((c) >> 12))	\
   : (c) <= MAX_4_BYTE_CHAR ? (0xF0 | ((c) >> 18))	\
   : (c) <= MAX_5_BYTE_CHAR ? 0xF8			\
   : (0xC0 | (((c) >> 6) & 0x01)))


/* Store multibyte form of the character C in P.  The caller should
   allocate at least MAX_MULTIBYTE_LENGTH bytes area at P in advance.
   Returns the length of the multibyte form.  */

#define CHAR_STRING(c, p)			\
  (UNSIGNED_CMP (c, <=, MAX_1_BYTE_CHAR)	\
   ? ((p)[0] = (c),				\
      1)					\
   : UNSIGNED_CMP (c, <=, MAX_2_BYTE_CHAR)	\
   ? ((p)[0] = (0xC0 | ((c) >> 6)),		\
      (p)[1] = (0x80 | ((c) & 0x3F)),		\
      2)					\
   : UNSIGNED_CMP (c, <=, MAX_3_BYTE_CHAR)	\
   ? ((p)[0] = (0xE0 | ((c) >> 12)),		\
      (p)[1] = (0x80 | (((c) >> 6) & 0x3F)),	\
      (p)[2] = (0x80 | ((c) & 0x3F)),		\
      3)					\
   : verify_expr (sizeof (c) <= sizeof (unsigned), char_string (c, p)))

/* Store multibyte form of byte B in P.  The caller should allocate at
   least MAX_MULTIBYTE_LENGTH bytes area at P in advance.  Returns the
   length of the multibyte form.  */

#define BYTE8_STRING(b, p)			\
  ((p)[0] = (0xC0 | (((b) >> 6) & 0x01)),	\
   (p)[1] = (0x80 | ((b) & 0x3F)),		\
   2)


/* Store multibyte form of the character C in P and advance P to the
   end of the multibyte form.  The caller should allocate at least
   MAX_MULTIBYTE_LENGTH bytes area at P in advance.  */

#define CHAR_STRING_ADVANCE(c, p)		\
  do {						\
    if ((c) <= MAX_1_BYTE_CHAR)			\
      *(p)++ = (c);				\
    else if ((c) <= MAX_2_BYTE_CHAR)		\
      *(p)++ = (0xC0 | ((c) >> 6)),		\
	*(p)++ = (0x80 | ((c) & 0x3F));		\
    else if ((c) <= MAX_3_BYTE_CHAR)		\
      *(p)++ = (0xE0 | ((c) >> 12)),		\
	*(p)++ = (0x80 | (((c) >> 6) & 0x3F)),	\
	*(p)++ = (0x80 | ((c) & 0x3F));		\
    else					\
      {						\
	verify (sizeof (c) <= sizeof (unsigned));	\
	(p) += char_string (c, p);		\
      }						\
  } while (false)


/* Nonzero iff BYTE starts a non-ASCII character in a multibyte
   form.  */
#define LEADING_CODE_P(byte) (((byte) & 0xC0) == 0xC0)

/* Nonzero iff BYTE is a trailing code of a non-ASCII character in a
   multibyte form.  */
#define TRAILING_CODE_P(byte) (((byte) & 0xC0) == 0x80)

/* Nonzero iff BYTE starts a character in a multibyte form.
   This is equivalent to:
	(ASCII_CHAR_P (byte) || LEADING_CODE_P (byte))  */
#define CHAR_HEAD_P(byte) (((byte) & 0xC0) != 0x80)

/* How many bytes a character that starts with BYTE occupies in a
   multibyte form.  */
#define BYTES_BY_CHAR_HEAD(byte)	\
  (!((byte) & 0x80) ? 1			\
   : !((byte) & 0x20) ? 2		\
   : !((byte) & 0x10) ? 3		\
   : !((byte) & 0x08) ? 4		\
   : 5)


/* The byte length of multibyte form at unibyte string P ending at
   PEND.  If STR doesn't point to a valid multibyte form, return 0.  */

#define MULTIBYTE_LENGTH(p, pend)				\
  (p >= pend ? 0						\
   : !((p)[0] & 0x80) ? 1					\
   : ((p + 1 >= pend) || (((p)[1] & 0xC0) != 0x80)) ? 0		\
   : ((p)[0] & 0xE0) == 0xC0 ? 2				\
   : ((p + 2 >= pend) || (((p)[2] & 0xC0) != 0x80)) ? 0		\
   : ((p)[0] & 0xF0) == 0xE0 ? 3				\
   : ((p + 3 >= pend) || (((p)[3] & 0xC0) != 0x80)) ? 0		\
   : ((p)[0] & 0xF8) == 0xF0 ? 4				\
   : ((p + 4 >= pend) || (((p)[4] & 0xC0) != 0x80)) ? 0		\
   : (p)[0] == 0xF8 && ((p)[1] & 0xF0) == 0x80 ? 5		\
   : 0)


/* Like MULTIBYTE_LENGTH, but don't check the ending address.  */

#define MULTIBYTE_LENGTH_NO_CHECK(p)			\
  (!((p)[0] & 0x80) ? 1					\
   : ((p)[1] & 0xC0) != 0x80 ? 0			\
   : ((p)[0] & 0xE0) == 0xC0 ? 2			\
   : ((p)[2] & 0xC0) != 0x80 ? 0			\
   : ((p)[0] & 0xF0) == 0xE0 ? 3			\
   : ((p)[3] & 0xC0) != 0x80 ? 0			\
   : ((p)[0] & 0xF8) == 0xF0 ? 4			\
   : ((p)[4] & 0xC0) != 0x80 ? 0			\
   : (p)[0] == 0xF8 && ((p)[1] & 0xF0) == 0x80 ? 5	\
   : 0)

/* If P is before LIMIT, advance P to the next character boundary.
   Assumes that P is already at a character boundary of the same
   multibyte form whose end address is LIMIT.  */

#define NEXT_CHAR_BOUNDARY(p, limit)	\
  do {					\
    if ((p) < (limit))			\
      (p) += BYTES_BY_CHAR_HEAD (*(p));	\
  } while (false)


/* If P is after LIMIT, advance P to the previous character boundary.
   Assumes that P is already at a character boundary of the same
   multibyte form whose beginning address is LIMIT.  */

#define PREV_CHAR_BOUNDARY(p, limit)					\
  do {									\
    if ((p) > (limit))							\
      {									\
	const unsigned char *chp = (p);					\
	do {								\
	  chp--;							\
	} while (chp >= limit && ! CHAR_HEAD_P (*chp));			\
	(p) = (BYTES_BY_CHAR_HEAD (*chp) == (p) - chp) ? chp : (p) - 1;	\
      }									\
  } while (false)

/* Return the character code of character whose multibyte form is at
   P.  Note that this macro unifies CJK characters whose codepoints
   are in the Private Use Areas (PUAs), so it might return a different
   codepoint from the one actually stored at P.  */

#define STRING_CHAR(p)						\
  (!((p)[0] & 0x80)						\
   ? (p)[0]							\
   : ! ((p)[0] & 0x20)						\
   ? (((((p)[0] & 0x1F) << 6)					\
       | ((p)[1] & 0x3F))					\
      + (((unsigned char) (p)[0]) < 0xC2 ? 0x3FFF80 : 0))	\
   : ! ((p)[0] & 0x10)						\
   ? ((((p)[0] & 0x0F) << 12)					\
      | (((p)[1] & 0x3F) << 6)					\
      | ((p)[2] & 0x3F))					\
   : string_char ((p), NULL, NULL))


/* Like STRING_CHAR, but set ACTUAL_LEN to the length of multibyte
   form.

   Note: This macro returns the actual length of the character's
   multibyte sequence as it is stored in a buffer or string.  The
   character it returns might have a different codepoint that has a
   different multibyte sequence of a different length, due to possible
   unification of CJK characters inside string_char.  Therefore do NOT
   assume that the length returned by this macro is identical to the
   length of the multibyte sequence of the character it returns.  */

#define STRING_CHAR_AND_LENGTH(p, actual_len)			\
  (!((p)[0] & 0x80)						\
   ? ((actual_len) = 1, (p)[0])					\
   : ! ((p)[0] & 0x20)						\
   ? ((actual_len) = 2,						\
      (((((p)[0] & 0x1F) << 6)					\
	| ((p)[1] & 0x3F))					\
       + (((unsigned char) (p)[0]) < 0xC2 ? 0x3FFF80 : 0)))	\
   : ! ((p)[0] & 0x10)						\
   ? ((actual_len) = 3,						\
      ((((p)[0] & 0x0F) << 12)					\
       | (((p)[1] & 0x3F) << 6)					\
       | ((p)[2] & 0x3F)))					\
   : string_char ((p), NULL, &actual_len))


/* Like STRING_CHAR, but advance P to the end of multibyte form.  */

#define STRING_CHAR_ADVANCE(p)					\
  (!((p)[0] & 0x80)						\
   ? *(p)++							\
   : ! ((p)[0] & 0x20)						\
   ? ((p) += 2,							\
      ((((p)[-2] & 0x1F) << 6)					\
       | ((p)[-1] & 0x3F)					\
       | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0)))	\
   : ! ((p)[0] & 0x10)						\
   ? ((p) += 3,							\
      ((((p)[-3] & 0x0F) << 12)					\
       | (((p)[-2] & 0x3F) << 6)				\
       | ((p)[-1] & 0x3F)))					\
   : string_char ((p), &(p), NULL))


/* Fetch the "next" character from Lisp string STRING at byte position
   BYTEIDX, character position CHARIDX.  Store it into OUTPUT.

   All the args must be side-effect-free.
   BYTEIDX and CHARIDX must be lvalues;
   we increment them past the character fetched.  */

#define FETCH_STRING_CHAR_ADVANCE(OUTPUT, STRING, CHARIDX, BYTEIDX)	\
  do                                                                    \
    {									\
      CHARIDX++;							\
      if (STRING_MULTIBYTE (STRING))					\
	{								\
	  unsigned char *chp = &SDATA (STRING)[BYTEIDX];		\
	  int chlen;							\
									\
	  OUTPUT = STRING_CHAR_AND_LENGTH (chp, chlen);			\
	  BYTEIDX += chlen;						\
	}								\
      else								\
	{								\
	  OUTPUT = SREF (STRING, BYTEIDX);				\
	  BYTEIDX++;							\
	}								\
    }									\
  while (false)

/* Like FETCH_STRING_CHAR_ADVANCE, but return a multibyte character
   even if STRING is unibyte.  */

#define FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE(OUTPUT, STRING, CHARIDX, BYTEIDX) \
  do                                                                          \
    {									      \
      CHARIDX++;							      \
      if (STRING_MULTIBYTE (STRING))					      \
	{								      \
	  unsigned char *chp = &SDATA (STRING)[BYTEIDX];		      \
	  int chlen;							      \
									      \
	  OUTPUT = STRING_CHAR_AND_LENGTH (chp, chlen);			      \
	  BYTEIDX += chlen;						      \
	}								      \
      else								      \
	{								      \
	  OUTPUT = SREF (STRING, BYTEIDX);				      \
	  BYTEIDX++;							      \
	  MAKE_CHAR_MULTIBYTE (OUTPUT);					      \
	}								      \
    }									      \
  while (false)


/* Like FETCH_STRING_CHAR_ADVANCE, but assumes STRING is multibyte.  */

#define FETCH_STRING_CHAR_ADVANCE_NO_CHECK(OUTPUT, STRING, CHARIDX, BYTEIDX) \
  do    								     \
    {									     \
      unsigned char *fetch_ptr = &SDATA (STRING)[BYTEIDX];		     \
      int fetch_len;							     \
									     \
      OUTPUT = STRING_CHAR_AND_LENGTH (fetch_ptr, fetch_len);		     \
      BYTEIDX += fetch_len;						     \
      CHARIDX++;							     \
    }									     \
  while (false)


/* Like FETCH_STRING_CHAR_ADVANCE, but fetch character from the current
   buffer.  */

#define FETCH_CHAR_ADVANCE(OUTPUT, CHARIDX, BYTEIDX)		\
  do    							\
    {								\
      CHARIDX++;						\
      if (!NILP (BVAR (current_buffer, enable_multibyte_characters)))	\
	{							\
	  unsigned char *chp = BYTE_POS_ADDR (BYTEIDX);		\
	  int chlen;						\
								\
	  OUTPUT = STRING_CHAR_AND_LENGTH (chp, chlen);		\
	  BYTEIDX += chlen;					\
	}							\
      else							\
	{							\
	  OUTPUT = *(BYTE_POS_ADDR (BYTEIDX));			\
	  BYTEIDX++;						\
	}							\
    }								\
  while (false)


/* Like FETCH_CHAR_ADVANCE, but assumes the current buffer is multibyte.  */

#define FETCH_CHAR_ADVANCE_NO_CHECK(OUTPUT, CHARIDX, BYTEIDX)	\
  do    							\
    {								\
      unsigned char *chp = BYTE_POS_ADDR (BYTEIDX);		\
      int chlen;							\
								\
      OUTPUT = STRING_CHAR_AND_LENGTH (chp, chlen);		\
      BYTEIDX += chlen;						\
      CHARIDX++;						\
    }								\
  while (false)


/* Increment the buffer byte position POS_BYTE of the current buffer to
   the next character boundary.  No range checking of POS.  */

#define INC_POS(pos_byte)				\
  do {							\
    unsigned char *chp = BYTE_POS_ADDR (pos_byte);	\
    pos_byte += BYTES_BY_CHAR_HEAD (*chp);		\
  } while (false)


/* Decrement the buffer byte position POS_BYTE of the current buffer to
   the previous character boundary.  No range checking of POS.  */

#define DEC_POS(pos_byte)			\
  do {						\
    unsigned char *chp;				\
    						\
    pos_byte--;					\
    if (pos_byte < GPT_BYTE)			\
      chp = BEG_ADDR + pos_byte - BEG_BYTE;	\
    else					\
      chp = BEG_ADDR + GAP_SIZE + pos_byte - BEG_BYTE; \
    while (!CHAR_HEAD_P (*chp))			\
      {						\
	chp--;					\
	pos_byte--;				\
      }						\
  } while (false)

/* Increment both CHARPOS and BYTEPOS, each in the appropriate way.  */

#define INC_BOTH(charpos, bytepos)				\
  do								\
    {								\
      (charpos)++;						\
      if (NILP (BVAR (current_buffer, enable_multibyte_characters)))	\
	(bytepos)++;						\
      else							\
	INC_POS ((bytepos));					\
    }								\
  while (false)


/* Decrement both CHARPOS and BYTEPOS, each in the appropriate way.  */

#define DEC_BOTH(charpos, bytepos)				\
  do								\
    {								\
      (charpos)--;						\
      if (NILP (BVAR (current_buffer, enable_multibyte_characters)))	\
	(bytepos)--;						\
      else							\
	DEC_POS ((bytepos));					\
    }								\
  while (false)


/* Increment the buffer byte position POS_BYTE of the current buffer to
   the next character boundary.  This macro relies on the fact that
   *GPT_ADDR and *Z_ADDR are always accessible and the values are
   '\0'.  No range checking of POS_BYTE.  */

#define BUF_INC_POS(buf, pos_byte)				\
  do {								\
    unsigned char *chp = BUF_BYTE_ADDRESS (buf, pos_byte);	\
    pos_byte += BYTES_BY_CHAR_HEAD (*chp);			\
  } while (false)


/* Decrement the buffer byte position POS_BYTE of the current buffer to
   the previous character boundary.  No range checking of POS_BYTE.  */

#define BUF_DEC_POS(buf, pos_byte)					\
  do {									\
    unsigned char *chp;							\
    pos_byte--;								\
    if (pos_byte < BUF_GPT_BYTE (buf))					\
      chp = BUF_BEG_ADDR (buf) + pos_byte - BEG_BYTE;			\
    else								\
      chp = BUF_BEG_ADDR (buf) + BUF_GAP_SIZE (buf) + pos_byte - BEG_BYTE;\
    while (!CHAR_HEAD_P (*chp))						\
      {									\
	chp--;								\
	pos_byte--;							\
      }									\
  } while (false)


/* Return a non-outlandish value for the tab width.  */

#define SANE_TAB_WIDTH(buf) \
  sanitize_tab_width (XFASTINT (BVAR (buf, tab_width)))
INLINE int
sanitize_tab_width (EMACS_INT width)
{
  return 0 < width && width <= 1000 ? width : 8;
}

/* Return the width of ASCII character C.  The width is measured by
   how many columns C will occupy on the screen when displayed in the
   current buffer.  */

#define ASCII_CHAR_WIDTH(c)						\
  (c < 0x20								\
   ? (c == '\t'								\
      ? SANE_TAB_WIDTH (current_buffer)					\
      : (c == '\n' ? 0 : (NILP (BVAR (current_buffer, ctl_arrow)) ? 4 : 2)))	\
   : (c < 0x7f								\
      ? 1								\
      : ((NILP (BVAR (current_buffer, ctl_arrow)) ? 4 : 2))))

/* Return a non-outlandish value for a character width.  */

INLINE int
sanitize_char_width (EMACS_INT width)
{
  return 0 <= width && width <= 1000 ? width : 1000;
}

/* Return the width of character C.  The width is measured by how many
   columns C will occupy on the screen when displayed in the current
   buffer.  */

#define CHAR_WIDTH(c)		\
  (ASCII_CHAR_P (c)		\
   ? ASCII_CHAR_WIDTH (c)	\
   : sanitize_char_width (XINT (CHAR_TABLE_REF (Vchar_width_table, c))))

/* If C is a variation selector, return the index of the
   variation selector (1..256).  Otherwise, return 0.  */

#define CHAR_VARIATION_SELECTOR_P(c)		\
  ((c) < 0xFE00 ? 0				\
   : (c) <= 0xFE0F ? (c) - 0xFE00 + 1		\
   : (c) < 0xE0100 ? 0				\
   : (c) <= 0xE01EF ? (c) - 0xE0100 + 17	\
   : 0)

/* If C is a high surrogate, return 1.  If C is a low surrogate,
   return 2.  Otherwise, return 0.  */

#define CHAR_SURROGATE_PAIR_P(c)	\
  ((c) < 0xD800 ? 0			\
   : (c) <= 0xDBFF ? 1			\
   : (c) <= 0xDFFF ? 2			\
   : 0)

/* Data type for Unicode general category.

   The order of members must be in sync with the 8th element of the
   member of unidata-prop-alist (in admin/unidata/unidata-gen.el) for
   Unicode character property `general-category'.  */

typedef enum {
  UNICODE_CATEGORY_UNKNOWN = 0,
  UNICODE_CATEGORY_Lu,
  UNICODE_CATEGORY_Ll,
  UNICODE_CATEGORY_Lt,
  UNICODE_CATEGORY_Lm,
  UNICODE_CATEGORY_Lo,
  UNICODE_CATEGORY_Mn,
  UNICODE_CATEGORY_Mc,
  UNICODE_CATEGORY_Me,
  UNICODE_CATEGORY_Nd,
  UNICODE_CATEGORY_Nl,
  UNICODE_CATEGORY_No,
  UNICODE_CATEGORY_Pc,
  UNICODE_CATEGORY_Pd,
  UNICODE_CATEGORY_Ps,
  UNICODE_CATEGORY_Pe,
  UNICODE_CATEGORY_Pi,
  UNICODE_CATEGORY_Pf,
  UNICODE_CATEGORY_Po,
  UNICODE_CATEGORY_Sm,
  UNICODE_CATEGORY_Sc,
  UNICODE_CATEGORY_Sk,
  UNICODE_CATEGORY_So,
  UNICODE_CATEGORY_Zs,
  UNICODE_CATEGORY_Zl,
  UNICODE_CATEGORY_Zp,
  UNICODE_CATEGORY_Cc,
  UNICODE_CATEGORY_Cf,
  UNICODE_CATEGORY_Cs,
  UNICODE_CATEGORY_Co,
  UNICODE_CATEGORY_Cn
} unicode_category_t;

extern EMACS_INT char_resolve_modifier_mask (EMACS_INT) ATTRIBUTE_CONST;
extern int char_string (unsigned, unsigned char *);
extern int string_char (const unsigned char *,
                        const unsigned char **, int *);

extern int translate_char (Lisp_Object, int c);
extern void parse_str_as_multibyte (const unsigned char *,
				    ptrdiff_t, ptrdiff_t *, ptrdiff_t *);
extern ptrdiff_t count_size_as_multibyte (const unsigned char *, ptrdiff_t);
extern ptrdiff_t str_as_multibyte (unsigned char *, ptrdiff_t, ptrdiff_t,
				   ptrdiff_t *);
extern ptrdiff_t str_to_multibyte (unsigned char *, ptrdiff_t, ptrdiff_t);
extern ptrdiff_t str_as_unibyte (unsigned char *, ptrdiff_t);
extern ptrdiff_t str_to_unibyte (const unsigned char *, unsigned char *,
                                 ptrdiff_t);
extern ptrdiff_t strwidth (const char *, ptrdiff_t);
extern ptrdiff_t c_string_width (const unsigned char *, ptrdiff_t, int,
				 ptrdiff_t *, ptrdiff_t *);
extern ptrdiff_t lisp_string_width (Lisp_Object, ptrdiff_t,
				    ptrdiff_t *, ptrdiff_t *);

extern Lisp_Object Qcharacterp;
extern Lisp_Object Vchar_unify_table;
extern Lisp_Object string_escape_byte8 (Lisp_Object);

/* Return a translation table of id number ID.  */
#define GET_TRANSLATION_TABLE(id) \
  (XCDR (XVECTOR (Vtranslation_table_vector)->contents[(id)]))

INLINE_HEADER_END

#endif /* EMACS_CHARACTER_H */
Commit	Line	Data
	1	/* Header for multibyte character handler.
	2	Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
	3	Licensed to the Free Software Foundation.
	4	Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
	5	National Institute of Advanced Industrial Science and Technology (AIST)
	6	Registration Number H13PRO009
	7
	8	This file is part of GNU Emacs.
	9
	10	GNU Emacs is free software: you can redistribute it and/or modify
	11	it under the terms of the GNU General Public License as published by
	12	the Free Software Foundation, either version 3 of the License, or
	13	(at your option) any later version.
	14
	15	GNU Emacs is distributed in the hope that it will be useful,
	16	but WITHOUT ANY WARRANTY; without even the implied warranty of
	17	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	18	GNU General Public License for more details.
	19
	20	You should have received a copy of the GNU General Public License
	21	along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
	22
	23	#ifndef EMACS_CHARACTER_H
	24	#define EMACS_CHARACTER_H
	25
	26	#include <verify.h>
	27
	28	INLINE_HEADER_BEGIN
	29
	30	/* character code 1st byte byte sequence
	31	-------------- -------- -------------
	32	0-7F 00..7F 0xxxxxxx
	33	80-7FF C2..DF 110xxxxx 10xxxxxx
	34	800-FFFF E0..EF 1110xxxx 10xxxxxx 10xxxxxx
	35	10000-1FFFFF F0..F7 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
	36	200000-3FFF7F F8 11111000 1000xxxx 10xxxxxx 10xxxxxx 10xxxxxx
	37	3FFF80-3FFFFF C0..C1 1100000x 10xxxxxx (for eight-bit-char)
	38	400000-... invalid
	39
	40	invalid 1st byte 80..BF 10xxxxxx
	41	F9..FF 11111xxx (xxx != 000)
	42	*/
	43
	44	/* Maximum character code ((1 << CHARACTERBITS) - 1). */
	45	#define MAX_CHAR 0x3FFFFF
	46
	47	/* Maximum Unicode character code. */
	48	#define MAX_UNICODE_CHAR 0x10FFFF
	49
	50	/* Maximum N-byte character codes. */
	51	#define MAX_1_BYTE_CHAR 0x7F
	52	#define MAX_2_BYTE_CHAR 0x7FF
	53	#define MAX_3_BYTE_CHAR 0xFFFF
	54	#define MAX_4_BYTE_CHAR 0x1FFFFF
	55	#define MAX_5_BYTE_CHAR 0x3FFF7F
	56
	57	/* Minimum leading code of multibyte characters. */
	58	#define MIN_MULTIBYTE_LEADING_CODE 0xC0
	59	/* Maximum leading code of multibyte characters. */
	60	#define MAX_MULTIBYTE_LEADING_CODE 0xF8
	61
	62	/* Nonzero iff C is a character that corresponds to a raw 8-bit
	63	byte. */
	64	#define CHAR_BYTE8_P(c) ((c) > MAX_5_BYTE_CHAR)
	65
	66	/* Return the character code for raw 8-bit byte BYTE. */
	67	#define BYTE8_TO_CHAR(byte) ((byte) + 0x3FFF00)
	68
	69	#define UNIBYTE_TO_CHAR(byte) \
	70	(ASCII_CHAR_P (byte) ? (byte) : BYTE8_TO_CHAR (byte))
	71
	72	/* Return the raw 8-bit byte for character C. */
	73	#define CHAR_TO_BYTE8(c) (CHAR_BYTE8_P (c) ? (c) - 0x3FFF00 : (c & 0xFF))
	74
	75	/* Return the raw 8-bit byte for character C,
	76	or -1 if C doesn't correspond to a byte. */
	77	#define CHAR_TO_BYTE_SAFE(c) \
	78	(ASCII_CHAR_P (c) ? c : (CHAR_BYTE8_P (c) ? (c) - 0x3FFF00 : -1))
	79
	80	/* Nonzero iff BYTE is the 1st byte of a multibyte form of a character
	81	that corresponds to a raw 8-bit byte. */
	82	#define CHAR_BYTE8_HEAD_P(byte) ((byte) == 0xC0 \|\| (byte) == 0xC1)
	83
	84	/* If C is not ASCII, make it unibyte. */
	85	#define MAKE_CHAR_UNIBYTE(c) \
	86	do { \
	87	if (! ASCII_CHAR_P (c)) \
	88	c = CHAR_TO_BYTE8 (c); \
	89	} while (false)
	90
	91
	92	/* If C is not ASCII, make it multibyte. Assumes C < 256. */
	93	#define MAKE_CHAR_MULTIBYTE(c) \
	94	(eassert ((c) >= 0 && (c) < 256), (c) = UNIBYTE_TO_CHAR (c))
	95
	96	/* This is the maximum byte length of multibyte form. */
	97	#define MAX_MULTIBYTE_LENGTH 5
	98
	99	/* Nonzero iff X is a character. */
	100	#define CHARACTERP(x) (NATNUMP (x) && XFASTINT (x) <= MAX_CHAR)
	101
	102	/* Nonzero iff C is valid as a character code. */
	103	#define CHAR_VALID_P(c) UNSIGNED_CMP (c, <=, MAX_CHAR)
	104
	105	/* Check if Lisp object X is a character or not. */
	106	#define CHECK_CHARACTER(x) \
	107	CHECK_TYPE (CHARACTERP (x), Qcharacterp, x)
	108
	109	#define CHECK_CHARACTER_CAR(x) \
	110	do { \
	111	Lisp_Object tmp = XCAR (x); \
	112	CHECK_CHARACTER (tmp); \
	113	XSETCAR ((x), tmp); \
	114	} while (false)
	115
	116	#define CHECK_CHARACTER_CDR(x) \
	117	do { \
	118	Lisp_Object tmp = XCDR (x); \
	119	CHECK_CHARACTER (tmp); \
	120	XSETCDR ((x), tmp); \
	121	} while (false)
	122
	123	/* Nonzero iff C is a character of code less than 0x100. */
	124	#define SINGLE_BYTE_CHAR_P(c) UNSIGNED_CMP (c, <, 0x100)
	125
	126	/* Nonzero if character C has a printable glyph. */
	127	#define CHAR_PRINTABLE_P(c) \
	128	(((c) >= 32 && (c) < 127) \
	129	\|\| ! NILP (CHAR_TABLE_REF (Vprintable_chars, (c))))
	130
	131	/* Return byte length of multibyte form for character C. */
	132	#define CHAR_BYTES(c) \
	133	( (c) <= MAX_1_BYTE_CHAR ? 1 \
	134	: (c) <= MAX_2_BYTE_CHAR ? 2 \
	135	: (c) <= MAX_3_BYTE_CHAR ? 3 \
	136	: (c) <= MAX_4_BYTE_CHAR ? 4 \
	137	: (c) <= MAX_5_BYTE_CHAR ? 5 \
	138	: 2)
	139
	140
	141	/* Return the leading code of multibyte form of C. */
	142	#define CHAR_LEADING_CODE(c) \
	143	((c) <= MAX_1_BYTE_CHAR ? c \
	144	: (c) <= MAX_2_BYTE_CHAR ? (0xC0 \| ((c) >> 6)) \
	145	: (c) <= MAX_3_BYTE_CHAR ? (0xE0 \| ((c) >> 12)) \
	146	: (c) <= MAX_4_BYTE_CHAR ? (0xF0 \| ((c) >> 18)) \
	147	: (c) <= MAX_5_BYTE_CHAR ? 0xF8 \
	148	: (0xC0 \| (((c) >> 6) & 0x01)))
	149
	150
	151	/* Store multibyte form of the character C in P. The caller should
	152	allocate at least MAX_MULTIBYTE_LENGTH bytes area at P in advance.
	153	Returns the length of the multibyte form. */
	154
	155	#define CHAR_STRING(c, p) \
	156	(UNSIGNED_CMP (c, <=, MAX_1_BYTE_CHAR) \
	157	? ((p)[0] = (c), \
	158	1) \
	159	: UNSIGNED_CMP (c, <=, MAX_2_BYTE_CHAR) \
	160	? ((p)[0] = (0xC0 \| ((c) >> 6)), \
	161	(p)[1] = (0x80 \| ((c) & 0x3F)), \
	162	2) \
	163	: UNSIGNED_CMP (c, <=, MAX_3_BYTE_CHAR) \
	164	? ((p)[0] = (0xE0 \| ((c) >> 12)), \
	165	(p)[1] = (0x80 \| (((c) >> 6) & 0x3F)), \
	166	(p)[2] = (0x80 \| ((c) & 0x3F)), \
	167	3) \
	168	: verify_expr (sizeof (c) <= sizeof (unsigned), char_string (c, p)))
	169
	170	/* Store multibyte form of byte B in P. The caller should allocate at
	171	least MAX_MULTIBYTE_LENGTH bytes area at P in advance. Returns the
	172	length of the multibyte form. */
	173
	174	#define BYTE8_STRING(b, p) \
	175	((p)[0] = (0xC0 \| (((b) >> 6) & 0x01)), \
	176	(p)[1] = (0x80 \| ((b) & 0x3F)), \
	177	2)
	178
	179
	180	/* Store multibyte form of the character C in P and advance P to the
	181	end of the multibyte form. The caller should allocate at least
	182	MAX_MULTIBYTE_LENGTH bytes area at P in advance. */
	183
	184	#define CHAR_STRING_ADVANCE(c, p) \
	185	do { \
	186	if ((c) <= MAX_1_BYTE_CHAR) \
	187	*(p)++ = (c); \
	188	else if ((c) <= MAX_2_BYTE_CHAR) \
	189	*(p)++ = (0xC0 \| ((c) >> 6)), \
	190	*(p)++ = (0x80 \| ((c) & 0x3F)); \
	191	else if ((c) <= MAX_3_BYTE_CHAR) \
	192	*(p)++ = (0xE0 \| ((c) >> 12)), \
	193	*(p)++ = (0x80 \| (((c) >> 6) & 0x3F)), \
	194	*(p)++ = (0x80 \| ((c) & 0x3F)); \
	195	else \
	196	{ \
	197	verify (sizeof (c) <= sizeof (unsigned)); \
	198	(p) += char_string (c, p); \
	199	} \
	200	} while (false)
	201
	202
	203	/* Nonzero iff BYTE starts a non-ASCII character in a multibyte
	204	form. */
	205	#define LEADING_CODE_P(byte) (((byte) & 0xC0) == 0xC0)
	206
	207	/* Nonzero iff BYTE is a trailing code of a non-ASCII character in a
	208	multibyte form. */
	209	#define TRAILING_CODE_P(byte) (((byte) & 0xC0) == 0x80)
	210
	211	/* Nonzero iff BYTE starts a character in a multibyte form.
	212	This is equivalent to:
	213	(ASCII_CHAR_P (byte) \|\| LEADING_CODE_P (byte)) */
	214	#define CHAR_HEAD_P(byte) (((byte) & 0xC0) != 0x80)
	215
	216	/* How many bytes a character that starts with BYTE occupies in a
	217	multibyte form. */
	218	#define BYTES_BY_CHAR_HEAD(byte) \
	219	(!((byte) & 0x80) ? 1 \
	220	: !((byte) & 0x20) ? 2 \
	221	: !((byte) & 0x10) ? 3 \
	222	: !((byte) & 0x08) ? 4 \
	223	: 5)
	224
	225
	226	/* The byte length of multibyte form at unibyte string P ending at
	227	PEND. If STR doesn't point to a valid multibyte form, return 0. */
	228
	229	#define MULTIBYTE_LENGTH(p, pend) \
	230	(p >= pend ? 0 \
	231	: !((p)[0] & 0x80) ? 1 \
	232	: ((p + 1 >= pend) \|\| (((p)[1] & 0xC0) != 0x80)) ? 0 \
	233	: ((p)[0] & 0xE0) == 0xC0 ? 2 \
	234	: ((p + 2 >= pend) \|\| (((p)[2] & 0xC0) != 0x80)) ? 0 \
	235	: ((p)[0] & 0xF0) == 0xE0 ? 3 \
	236	: ((p + 3 >= pend) \|\| (((p)[3] & 0xC0) != 0x80)) ? 0 \
	237	: ((p)[0] & 0xF8) == 0xF0 ? 4 \
	238	: ((p + 4 >= pend) \|\| (((p)[4] & 0xC0) != 0x80)) ? 0 \
	239	: (p)[0] == 0xF8 && ((p)[1] & 0xF0) == 0x80 ? 5 \
	240	: 0)
	241
	242
	243	/* Like MULTIBYTE_LENGTH, but don't check the ending address. */
	244
	245	#define MULTIBYTE_LENGTH_NO_CHECK(p) \
	246	(!((p)[0] & 0x80) ? 1 \
	247	: ((p)[1] & 0xC0) != 0x80 ? 0 \
	248	: ((p)[0] & 0xE0) == 0xC0 ? 2 \
	249	: ((p)[2] & 0xC0) != 0x80 ? 0 \
	250	: ((p)[0] & 0xF0) == 0xE0 ? 3 \
	251	: ((p)[3] & 0xC0) != 0x80 ? 0 \
	252	: ((p)[0] & 0xF8) == 0xF0 ? 4 \
	253	: ((p)[4] & 0xC0) != 0x80 ? 0 \
	254	: (p)[0] == 0xF8 && ((p)[1] & 0xF0) == 0x80 ? 5 \
	255	: 0)
	256
	257	/* If P is before LIMIT, advance P to the next character boundary.
	258	Assumes that P is already at a character boundary of the same
	259	multibyte form whose end address is LIMIT. */
	260
	261	#define NEXT_CHAR_BOUNDARY(p, limit) \
	262	do { \
	263	if ((p) < (limit)) \
	264	(p) += BYTES_BY_CHAR_HEAD (*(p)); \
	265	} while (false)
	266
	267
	268	/* If P is after LIMIT, advance P to the previous character boundary.
	269	Assumes that P is already at a character boundary of the same
	270	multibyte form whose beginning address is LIMIT. */
	271
	272	#define PREV_CHAR_BOUNDARY(p, limit) \
	273	do { \
	274	if ((p) > (limit)) \
	275	{ \
	276	const unsigned char *chp = (p); \
	277	do { \
	278	chp--; \
	279	} while (chp >= limit && ! CHAR_HEAD_P (*chp)); \
	280	(p) = (BYTES_BY_CHAR_HEAD (*chp) == (p) - chp) ? chp : (p) - 1; \
	281	} \
	282	} while (false)
	283
	284	/* Return the character code of character whose multibyte form is at
	285	P. Note that this macro unifies CJK characters whose codepoints
	286	are in the Private Use Areas (PUAs), so it might return a different
	287	codepoint from the one actually stored at P. */
	288
	289	#define STRING_CHAR(p) \
	290	(!((p)[0] & 0x80) \
	291	? (p)[0] \
	292	: ! ((p)[0] & 0x20) \
	293	? (((((p)[0] & 0x1F) << 6) \
	294	\| ((p)[1] & 0x3F)) \
	295	+ (((unsigned char) (p)[0]) < 0xC2 ? 0x3FFF80 : 0)) \
	296	: ! ((p)[0] & 0x10) \
	297	? ((((p)[0] & 0x0F) << 12) \
	298	\| (((p)[1] & 0x3F) << 6) \
	299	\| ((p)[2] & 0x3F)) \
	300	: string_char ((p), NULL, NULL))
	301
	302
	303	/* Like STRING_CHAR, but set ACTUAL_LEN to the length of multibyte
	304	form.
	305
	306	Note: This macro returns the actual length of the character's
	307	multibyte sequence as it is stored in a buffer or string. The
	308	character it returns might have a different codepoint that has a
	309	different multibyte sequence of a different length, due to possible
	310	unification of CJK characters inside string_char. Therefore do NOT
	311	assume that the length returned by this macro is identical to the
	312	length of the multibyte sequence of the character it returns. */
	313
	314	#define STRING_CHAR_AND_LENGTH(p, actual_len) \
	315	(!((p)[0] & 0x80) \
	316	? ((actual_len) = 1, (p)[0]) \
	317	: ! ((p)[0] & 0x20) \
	318	? ((actual_len) = 2, \
	319	(((((p)[0] & 0x1F) << 6) \
	320	\| ((p)[1] & 0x3F)) \
	321	+ (((unsigned char) (p)[0]) < 0xC2 ? 0x3FFF80 : 0))) \
	322	: ! ((p)[0] & 0x10) \
	323	? ((actual_len) = 3, \
	324	((((p)[0] & 0x0F) << 12) \
	325	\| (((p)[1] & 0x3F) << 6) \
	326	\| ((p)[2] & 0x3F))) \
	327	: string_char ((p), NULL, &actual_len))
	328
	329
	330	/* Like STRING_CHAR, but advance P to the end of multibyte form. */
	331
	332	#define STRING_CHAR_ADVANCE(p) \
	333	(!((p)[0] & 0x80) \
	334	? *(p)++ \
	335	: ! ((p)[0] & 0x20) \
	336	? ((p) += 2, \
	337	((((p)[-2] & 0x1F) << 6) \
	338	\| ((p)[-1] & 0x3F) \
	339	\| ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0))) \
	340	: ! ((p)[0] & 0x10) \
	341	? ((p) += 3, \
	342	((((p)[-3] & 0x0F) << 12) \
	343	\| (((p)[-2] & 0x3F) << 6) \
	344	\| ((p)[-1] & 0x3F))) \
	345	: string_char ((p), &(p), NULL))
	346
	347
	348	/* Fetch the "next" character from Lisp string STRING at byte position
	349	BYTEIDX, character position CHARIDX. Store it into OUTPUT.
	350
	351	All the args must be side-effect-free.
	352	BYTEIDX and CHARIDX must be lvalues;
	353	we increment them past the character fetched. */
	354
	355	#define FETCH_STRING_CHAR_ADVANCE(OUTPUT, STRING, CHARIDX, BYTEIDX) \
	356	do \
	357	{ \
	358	CHARIDX++; \
	359	if (STRING_MULTIBYTE (STRING)) \
	360	{ \
	361	unsigned char *chp = &SDATA (STRING)[BYTEIDX]; \
	362	int chlen; \
	363	\
	364	OUTPUT = STRING_CHAR_AND_LENGTH (chp, chlen); \
	365	BYTEIDX += chlen; \
	366	} \
	367	else \
	368	{ \
	369	OUTPUT = SREF (STRING, BYTEIDX); \
	370	BYTEIDX++; \
	371	} \
	372	} \
	373	while (false)
	374
	375	/* Like FETCH_STRING_CHAR_ADVANCE, but return a multibyte character
	376	even if STRING is unibyte. */
	377
	378	#define FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE(OUTPUT, STRING, CHARIDX, BYTEIDX) \
	379	do \
	380	{ \
	381	CHARIDX++; \
	382	if (STRING_MULTIBYTE (STRING)) \
	383	{ \
	384	unsigned char *chp = &SDATA (STRING)[BYTEIDX]; \
	385	int chlen; \
	386	\
	387	OUTPUT = STRING_CHAR_AND_LENGTH (chp, chlen); \
	388	BYTEIDX += chlen; \
	389	} \
	390	else \
	391	{ \
	392	OUTPUT = SREF (STRING, BYTEIDX); \
	393	BYTEIDX++; \
	394	MAKE_CHAR_MULTIBYTE (OUTPUT); \
	395	} \
	396	} \
	397	while (false)
	398
	399
	400	/* Like FETCH_STRING_CHAR_ADVANCE, but assumes STRING is multibyte. */
	401
	402	#define FETCH_STRING_CHAR_ADVANCE_NO_CHECK(OUTPUT, STRING, CHARIDX, BYTEIDX) \
	403	do \
	404	{ \
	405	unsigned char *fetch_ptr = &SDATA (STRING)[BYTEIDX]; \
	406	int fetch_len; \
	407	\
	408	OUTPUT = STRING_CHAR_AND_LENGTH (fetch_ptr, fetch_len); \
	409	BYTEIDX += fetch_len; \
	410	CHARIDX++; \
	411	} \
	412	while (false)
	413
	414
	415	/* Like FETCH_STRING_CHAR_ADVANCE, but fetch character from the current
	416	buffer. */
	417
	418	#define FETCH_CHAR_ADVANCE(OUTPUT, CHARIDX, BYTEIDX) \
	419	do \
	420	{ \
	421	CHARIDX++; \
	422	if (!NILP (BVAR (current_buffer, enable_multibyte_characters))) \
	423	{ \
	424	unsigned char *chp = BYTE_POS_ADDR (BYTEIDX); \
	425	int chlen; \
	426	\
	427	OUTPUT = STRING_CHAR_AND_LENGTH (chp, chlen); \
	428	BYTEIDX += chlen; \
	429	} \
	430	else \
	431	{ \
	432	OUTPUT = *(BYTE_POS_ADDR (BYTEIDX)); \
	433	BYTEIDX++; \
	434	} \
	435	} \
	436	while (false)
	437
	438
	439	/* Like FETCH_CHAR_ADVANCE, but assumes the current buffer is multibyte. */
	440
	441	#define FETCH_CHAR_ADVANCE_NO_CHECK(OUTPUT, CHARIDX, BYTEIDX) \
	442	do \
	443	{ \
	444	unsigned char *chp = BYTE_POS_ADDR (BYTEIDX); \
	445	int chlen; \
	446	\
	447	OUTPUT = STRING_CHAR_AND_LENGTH (chp, chlen); \
	448	BYTEIDX += chlen; \
	449	CHARIDX++; \
	450	} \
	451	while (false)
	452
	453
	454	/* Increment the buffer byte position POS_BYTE of the current buffer to
	455	the next character boundary. No range checking of POS. */
	456
	457	#define INC_POS(pos_byte) \
	458	do { \
	459	unsigned char *chp = BYTE_POS_ADDR (pos_byte); \
	460	pos_byte += BYTES_BY_CHAR_HEAD (*chp); \
	461	} while (false)
	462
	463
	464	/* Decrement the buffer byte position POS_BYTE of the current buffer to
	465	the previous character boundary. No range checking of POS. */
	466
	467	#define DEC_POS(pos_byte) \
	468	do { \
	469	unsigned char *chp; \
	470	\
	471	pos_byte--; \
	472	if (pos_byte < GPT_BYTE) \
	473	chp = BEG_ADDR + pos_byte - BEG_BYTE; \
	474	else \
	475	chp = BEG_ADDR + GAP_SIZE + pos_byte - BEG_BYTE; \
	476	while (!CHAR_HEAD_P (*chp)) \
	477	{ \
	478	chp--; \
	479	pos_byte--; \
	480	} \
	481	} while (false)
	482
	483	/* Increment both CHARPOS and BYTEPOS, each in the appropriate way. */
	484
	485	#define INC_BOTH(charpos, bytepos) \
	486	do \
	487	{ \
	488	(charpos)++; \
	489	if (NILP (BVAR (current_buffer, enable_multibyte_characters))) \
	490	(bytepos)++; \
	491	else \
	492	INC_POS ((bytepos)); \
	493	} \
	494	while (false)
	495
	496
	497	/* Decrement both CHARPOS and BYTEPOS, each in the appropriate way. */
	498
	499	#define DEC_BOTH(charpos, bytepos) \
	500	do \
	501	{ \
	502	(charpos)--; \
	503	if (NILP (BVAR (current_buffer, enable_multibyte_characters))) \
	504	(bytepos)--; \
	505	else \
	506	DEC_POS ((bytepos)); \
	507	} \
	508	while (false)
	509
	510
	511	/* Increment the buffer byte position POS_BYTE of the current buffer to
	512	the next character boundary. This macro relies on the fact that
	513	GPT_ADDR and Z_ADDR are always accessible and the values are
	514	'\0'. No range checking of POS_BYTE. */
	515
	516	#define BUF_INC_POS(buf, pos_byte) \
	517	do { \
	518	unsigned char *chp = BUF_BYTE_ADDRESS (buf, pos_byte); \
	519	pos_byte += BYTES_BY_CHAR_HEAD (*chp); \
	520	} while (false)
	521
	522
	523	/* Decrement the buffer byte position POS_BYTE of the current buffer to
	524	the previous character boundary. No range checking of POS_BYTE. */
	525
	526	#define BUF_DEC_POS(buf, pos_byte) \
	527	do { \
	528	unsigned char *chp; \
	529	pos_byte--; \
	530	if (pos_byte < BUF_GPT_BYTE (buf)) \
	531	chp = BUF_BEG_ADDR (buf) + pos_byte - BEG_BYTE; \
	532	else \
	533	chp = BUF_BEG_ADDR (buf) + BUF_GAP_SIZE (buf) + pos_byte - BEG_BYTE;\
	534	while (!CHAR_HEAD_P (*chp)) \
	535	{ \
	536	chp--; \
	537	pos_byte--; \
	538	} \
	539	} while (false)
	540
	541
	542	/* Return a non-outlandish value for the tab width. */
	543
	544	#define SANE_TAB_WIDTH(buf) \
	545	sanitize_tab_width (XFASTINT (BVAR (buf, tab_width)))
	546	INLINE int
	547	sanitize_tab_width (EMACS_INT width)
	548	{
	549	return 0 < width && width <= 1000 ? width : 8;
	550	}
	551
	552	/* Return the width of ASCII character C. The width is measured by
	553	how many columns C will occupy on the screen when displayed in the
	554	current buffer. */
	555
	556	#define ASCII_CHAR_WIDTH(c) \
	557	(c < 0x20 \
	558	? (c == '\t' \
	559	? SANE_TAB_WIDTH (current_buffer) \
	560	: (c == '\n' ? 0 : (NILP (BVAR (current_buffer, ctl_arrow)) ? 4 : 2))) \
	561	: (c < 0x7f \
	562	? 1 \
	563	: ((NILP (BVAR (current_buffer, ctl_arrow)) ? 4 : 2))))
	564
	565	/* Return a non-outlandish value for a character width. */
	566
	567	INLINE int
	568	sanitize_char_width (EMACS_INT width)
	569	{
	570	return 0 <= width && width <= 1000 ? width : 1000;
	571	}
	572
	573	/* Return the width of character C. The width is measured by how many
	574	columns C will occupy on the screen when displayed in the current
	575	buffer. */
	576
	577	#define CHAR_WIDTH(c) \
	578	(ASCII_CHAR_P (c) \
	579	? ASCII_CHAR_WIDTH (c) \
	580	: sanitize_char_width (XINT (CHAR_TABLE_REF (Vchar_width_table, c))))
	581
	582	/* If C is a variation selector, return the index of the
	583	variation selector (1..256). Otherwise, return 0. */
	584
	585	#define CHAR_VARIATION_SELECTOR_P(c) \
	586	((c) < 0xFE00 ? 0 \
	587	: (c) <= 0xFE0F ? (c) - 0xFE00 + 1 \
	588	: (c) < 0xE0100 ? 0 \
	589	: (c) <= 0xE01EF ? (c) - 0xE0100 + 17 \
	590	: 0)
	591
	592	/* If C is a high surrogate, return 1. If C is a low surrogate,
	593	return 2. Otherwise, return 0. */
	594
	595	#define CHAR_SURROGATE_PAIR_P(c) \
	596	((c) < 0xD800 ? 0 \
	597	: (c) <= 0xDBFF ? 1 \
	598	: (c) <= 0xDFFF ? 2 \
	599	: 0)
	600
	601	/* Data type for Unicode general category.
	602
	603	The order of members must be in sync with the 8th element of the
	604	member of unidata-prop-alist (in admin/unidata/unidata-gen.el) for
	605	Unicode character property `general-category'. */
	606
	607	typedef enum {
	608	UNICODE_CATEGORY_UNKNOWN = 0,
	609	UNICODE_CATEGORY_Lu,
	610	UNICODE_CATEGORY_Ll,
	611	UNICODE_CATEGORY_Lt,
	612	UNICODE_CATEGORY_Lm,
	613	UNICODE_CATEGORY_Lo,
	614	UNICODE_CATEGORY_Mn,
	615	UNICODE_CATEGORY_Mc,
	616	UNICODE_CATEGORY_Me,
	617	UNICODE_CATEGORY_Nd,
	618	UNICODE_CATEGORY_Nl,
	619	UNICODE_CATEGORY_No,
	620	UNICODE_CATEGORY_Pc,
	621	UNICODE_CATEGORY_Pd,
	622	UNICODE_CATEGORY_Ps,
	623	UNICODE_CATEGORY_Pe,
	624	UNICODE_CATEGORY_Pi,
	625	UNICODE_CATEGORY_Pf,
	626	UNICODE_CATEGORY_Po,
	627	UNICODE_CATEGORY_Sm,
	628	UNICODE_CATEGORY_Sc,
	629	UNICODE_CATEGORY_Sk,
	630	UNICODE_CATEGORY_So,
	631	UNICODE_CATEGORY_Zs,
	632	UNICODE_CATEGORY_Zl,
	633	UNICODE_CATEGORY_Zp,
	634	UNICODE_CATEGORY_Cc,
	635	UNICODE_CATEGORY_Cf,
	636	UNICODE_CATEGORY_Cs,
	637	UNICODE_CATEGORY_Co,
	638	UNICODE_CATEGORY_Cn
	639	} unicode_category_t;
	640
	641	extern EMACS_INT char_resolve_modifier_mask (EMACS_INT) ATTRIBUTE_CONST;
	642	extern int char_string (unsigned, unsigned char *);
	643	extern int string_char (const unsigned char *,
	644	const unsigned char *, int );
	645
	646	extern int translate_char (Lisp_Object, int c);
	647	extern void parse_str_as_multibyte (const unsigned char *,
	648	ptrdiff_t, ptrdiff_t , ptrdiff_t );
	649	extern ptrdiff_t count_size_as_multibyte (const unsigned char *, ptrdiff_t);
	650	extern ptrdiff_t str_as_multibyte (unsigned char *, ptrdiff_t, ptrdiff_t,
	651	ptrdiff_t *);
	652	extern ptrdiff_t str_to_multibyte (unsigned char *, ptrdiff_t, ptrdiff_t);
	653	extern ptrdiff_t str_as_unibyte (unsigned char *, ptrdiff_t);
	654	extern ptrdiff_t str_to_unibyte (const unsigned char , unsigned char ,
	655	ptrdiff_t);
	656	extern ptrdiff_t strwidth (const char *, ptrdiff_t);
	657	extern ptrdiff_t c_string_width (const unsigned char *, ptrdiff_t, int,
	658	ptrdiff_t , ptrdiff_t );
	659	extern ptrdiff_t lisp_string_width (Lisp_Object, ptrdiff_t,
	660	ptrdiff_t , ptrdiff_t );
	661
	662	extern Lisp_Object Qcharacterp;
	663	extern Lisp_Object Vchar_unify_table;
	664	extern Lisp_Object string_escape_byte8 (Lisp_Object);
	665
	666	/* Return a translation table of id number ID. */
	667	#define GET_TRANSLATION_TABLE(id) \
	668	(XCDR (XVECTOR (Vtranslation_table_vector)->contents[(id)]))
	669
	670	INLINE_HEADER_END
	671
	672	#endif /* EMACS_CHARACTER_H */