[bpt/emacs.git] / src / character.h

/* Header for multibyte character handler.
   Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   Licensed to the Free Software Foundation.
   Copyright (C) 2001, 2002
     National Institute of Advanced Industrial Science and Technology (AIST)
     Registration Number H13PRO009

This file is part of GNU Emacs.

GNU Emacs is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.

GNU Emacs is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with GNU Emacs; see the file COPYING.  If not, write to
the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA.  */

#ifndef EMACS_CHARACTER_H
#define EMACS_CHARACTER_H

/*      0-7F		0xxxxxxx
			00..7F
       80-7FF		110xxxxx 10xxxxxx
			C2..DF   80..BF
      800-FFFF		1110xxxx 10xxxxxx 10xxxxxx
			E0..EF   80..BF   80..BF
    10000-1FFFFF	11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
			F0..F7   80..BF   80..BF   80..BF
   200000-3FFF7F	11111000 1000xxxx 10xxxxxx 10xxxxxx 10xxxxxx
			F8       80..8F   80..BF   80..BF   80..BF
   invalid		11111001
			F9
   invalid		1111101x
			FA..FB
   invalid		111111xx
			FC..FE

   raw-8-bit
   3FFF80-3FFFFF	1100000x 10xxxxxx
			C0..C1   80..BF

*/

/* This is the maximum character code ((1 << CHARACTERBITS) - 1).  */
#define MAX_CHAR  0x3FFFFF

#define MAX_UNICODE_CHAR 0x10FFFF

#define MAX_1_BYTE_CHAR 0x7F
#define MAX_2_BYTE_CHAR 0x7FF
#define MAX_3_BYTE_CHAR 0xFFFF
#define MAX_4_BYTE_CHAR 0x1FFFFF
#define MAX_5_BYTE_CHAR 0x3FFF7F

#define BYTE8_TO_CHAR(byte) ((byte) + 0x3FFF00)
#define CHAR_TO_BYTE8(c) ((c) - 0x3FFF00)
#define CHAR_BYTE8_P(c) ((c) > MAX_5_BYTE_CHAR)
#define CHAR_BYTE8_HEAD_P(byte) ((byte) == 0xC0 || (byte) == 0xC1)

/* This is the maximum byte length of multi-byte sequence.  */
#define MAX_MULTIBYTE_LENGTH 5

/* Return a Lisp character whose code is C. */
#define make_char(c) make_number (c)

/* Nonzero iff C is an ASCII byte.  */
#define ASCII_BYTE_P(c) ((unsigned) (c) < 0x80)

/* Nonzero iff X is a character.  */
#define CHARACTERP(x) (NATNUMP (x) && XFASTINT (x) <= MAX_CHAR)

/* Nozero iff C is valid as a charater code.  GENERICP is not used
   now.  It will be removed in the future.  */
#define CHAR_VALID_P(c, genericp) CHARACTERP (c)

/* Check if Lisp object X is a character or not.  */
#define CHECK_CHARACTER(x)						\
  do {									\
    if (! CHARACTERP(x)) x = wrong_type_argument (Qcharacterp, (x));	\
  } while (0)

/* Nonzero iff C is an ASCII character.  */
#define ASCII_CHAR_P(c) ((unsigned) (c) < 0x80)

/* Nonzero iff C is a character of code less than 0x100.  */
#define SINGLE_BYTE_CHAR_P(c) ((unsigned) (c) < 0x100)

/* Nonzero if character C has a printable glyph.  */
#define CHAR_PRINTABLE_P(c)	\
  (((c) >= 32 && ((c) < 127)	\
    || ! NILP (CHAR_TABLE_REF (Vprintable_chars, (c)))))

/* How many bytes C occupies in a multibyte buffer.  */
#define CHAR_BYTES(c)			\
  ( (c) <= MAX_1_BYTE_CHAR ? 1		\
    : (c) <= MAX_2_BYTE_CHAR ? 2	\
    : (c) <= MAX_3_BYTE_CHAR ? 3	\
    : (c) <= MAX_4_BYTE_CHAR ? 4	\
    : (c) <= MAX_5_BYTE_CHAR ? 5	\
    : 2)

/* Store multibyte form of the character C in STR.  The caller should
   allocate at least MAX_MULTIBYTE_LENGTH bytes area at STR in
   advance.  Returns the length of the multibyte form.  */

#define CHAR_STRING(c, p)			\
  ((unsigned) (c) <= MAX_1_BYTE_CHAR		\
   ? ((p)[0] = (c),				\
      1)					\
   : (unsigned) (c) <= MAX_2_BYTE_CHAR		\
   ? ((p)[0] = (0xC0 | ((c) >> 6)),		\
      (p)[1] = (0x80 | ((c) & 0x3F)),		\
      2)					\
   : (unsigned) (c) <= MAX_3_BYTE_CHAR		\
   ? ((p)[0] = (0xE0 | ((c) >> 12)),		\
      (p)[1] = (0x80 | (((c) >> 6) & 0x3F)),	\
      (p)[2] = (0x80 | ((c) & 0x3F)),		\
      3)					\
   : (unsigned) (c) <= MAX_5_BYTE_CHAR		\
   ? char_string_with_unification (c, p, NULL)	\
   : ((p)[0] = (0xC0 | (((c) >> 6) & 0x01)),	\
      (p)[1] = (0x80 | ((c) & 0x3F)),		\
      2))


/* Like CHAR_STRING, but advance P to the end of the multibyte
   form.  */

#define CHAR_STRING_ADVANCE(c, p)		\
  ((unsigned) (c) <= MAX_1_BYTE_CHAR		\
   ? *(p)++ = (c)				\
   : (unsigned) (c) <= MAX_2_BYTE_CHAR		\
   ? (*(p)++ = (0xC0 | ((c) >> 6)),		\
      *(p)++ = (0x80 | ((c) & 0x3F)))		\
   : (unsigned) (c) <= MAX_3_BYTE_CHAR		\
   ? (*(p)++ = (0xE0 | ((c) >> 12)),		\
      *(p)++ = (0x80 | (((c) >> 6) & 0x3F)),	\
      *(p)++ = (0x80 | ((c) & 0x3F)))		\
   : (unsigned) (c) <= MAX_5_BYTE_CHAR		\
   ? char_string_with_unification (c, p, &p)	\
   : (*(p)++ = (0xC0 | (((c) >> 6) & 0x01)),	\
      *(p)++ = (0x80 | ((c) & 0x3F))))


/* Nonzero iff BYTE starts a character in a multibyte form.  */
#define CHAR_HEAD_P(byte) (((byte) & 0xC0) != 0x80)

/* Nonzero iff BYTE starts a non-ASCII character in a multibyte
   form.  */
#define LEADING_CODE_P(byte) (((byte) & 0xC0) == 0xC0)

/* Just kept for backward compatibility.  This macro will be removed
   in the future.  */
#define BASE_LEADING_CODE_P LEADING_CODE_P

/* How many bytes a character that starts with BYTE occupies in a
   multibyte form.  */
#define BYTES_BY_CHAR_HEAD(byte)	\
  (!((byte) & 0x80) ? 1			\
   : !((byte) & 0x20) ? 2		\
   : !((byte) & 0x10) ? 3		\
   : !((byte) & 0x08) ? 4		\
   : 5)


/* Return the length of the multi-byte form at string STR of length
   LEN while assuming that STR points a valid multi-byte form.  As
   this macro isn't necessary anymore, all callers will be changed to
   use BYTES_BY_CHAR_HEAD directly in the future.  */

#define MULTIBYTE_FORM_LENGTH(str, len)		\
  BYTES_BY_CHAR_HEAD (*(str))

/* Parse multibyte string STR of length LENGTH and set BYTES to the
   byte length of a character at STR while assuming that STR points a
   valid multibyte form.  As this macro isn't necessary anymore, all
   callers will be changed to use BYTES_BY_CHAR_HEAD directly in the
   future.  */

#define PARSE_MULTIBYTE_SEQ(str, length, bytes)	\
  (bytes) = BYTES_BY_CHAR_HEAD (*(str))

/* The byte length of multibyte form at unibyte string P ending at
   PEND.  If STR doesn't point a valid multibyte form, return 0.  */

#define MULTIBYTE_LENGTH(p, pend)				\
  (p >= pend ? 0						\
   : !((p)[0] & 0x80) ? 1					\
   : ((p + 1 >= pend) || (((p)[1] & 0xC0) != 0x80)) ? 0		\
   : ((p)[0] & 0xE0) == 0xC0 ? 2				\
   : ((p + 2 >= pend) || (((p)[2] & 0xC0) != 0x80)) ? 0		\
   : ((p)[0] & 0xF0) == 0xE0 ? 3				\
   : ((p + 3 >= pend) || (((p)[3] & 0xC0) != 0x80)) ? 0		\
   : ((p)[0] & 0xF8) == 0xF0 ? 4				\
   : ((p + 4 >= pend) || (((p)[4] & 0xC0) != 0x80)) ? 0		\
   : (p)[0] == 0xF8 && ((p)[1] & 0xF0) == 0x80 ? 5		\
   : 0)


/* Like MULTIBYTE_LENGTH but don't check the ending address.  */

#define MULTIBYTE_LENGTH_NO_CHECK(p)			\
  (!((p)[0] & 0x80) ? 1					\
   : ((p)[1] & 0xC0) != 0x80 ? 0			\
   : ((p)[0] & 0xE0) == 0xC0 ? 2			\
   : ((p)[2] & 0xC0) != 0x80 ? 0			\
   : ((p)[0] & 0xF0) == 0xE0 ? 3			\
   : ((p)[3] & 0xC0) != 0x80 ? 0			\
   : ((p)[0] & 0xF8) == 0xF0 ? 4			\
   : ((p)[4] & 0xC0) != 0x80 ? 0			\
   : (p)[0] == 0xF8 && ((p)[1] & 0xF0) == 0x80 ? 5	\
   : 0)


/* Return the character code of character whose multibyte form is at
   P.  The argument LEN is ignored.  It will be removed in the
   future.  */

#define STRING_CHAR(p, len)					\
  (!((p)[0] & 0x80)						\
   ? (p)[0]							\
   : ! ((p)[0] & 0x20)						\
   ? (((((p)[0] & 0x1F) << 6)					\
       | ((p)[1] & 0x3F))					\
      + (((unsigned char) (p)[0]) < 0xC2 ? 0x3FFF80 : 0))	\
   : ! ((p)[0] & 0x10)						\
   ? ((((p)[0] & 0x0F) << 12)					\
      | (((p)[1] & 0x3F) << 6)					\
      | ((p)[2] & 0x3F))					\
   : string_char_with_unification (p, NULL, NULL))


/* Like STRING_CHAR but set ACTUAL_LEN to the length of multibyte
   form.  The argument LEN is ignored.  It will be removed in the
   future.  */

#define STRING_CHAR_AND_LENGTH(p, len, actual_len)		\
  (!((p)[0] & 0x80)						\
   ? ((actual_len) = 1, (p)[0])					\
   : ! ((p)[0] & 0x20)						\
   ? ((actual_len) = 2,						\
      (((((p)[0] & 0x1F) << 6)					\
	| ((p)[1] & 0x3F))					\
       + (((unsigned char) (p)[0]) < 0xC2 ? 0x3FFF80 : 0)))	\
   : ! ((p)[0] & 0x10)						\
   ? ((actual_len) = 3,						\
      ((((p)[0] & 0x0F) << 12)					\
       | (((p)[1] & 0x3F) << 6)					\
       | ((p)[2] & 0x3F)))					\
   : string_char_with_unification (p, NULL, &actual_len))


/* Like STRING_CHAR but advacen P to the end of multibyte form.  */

#define STRING_CHAR_ADVANCE(p)					\
  (!((p)[0] & 0x80)						\
   ? *(p)++							\
   : ! ((p)[0] & 0x20)						\
   ? ((p) += 2,							\
      ((((p)[-2] & 0x1F) << 6)					\
       | ((p)[-1] & 0x3F)					\
       | (((unsigned char) (p)[-2]) < 0xC2 ? 0x3FFF80 : 0)))	\
   : ! ((p)[0] & 0x10)						\
   ? ((p) += 3,							\
      ((((p)[-3] & 0x0F) << 12)					\
       | (((p)[-2] & 0x3F) << 6)				\
       | ((p)[-1] & 0x3F)))					\
   : string_char_with_unification (p, &p, NULL))


/* Fetch the "next" character from Lisp string STRING at byte position
   BYTEIDX, character position CHARIDX.  Store it into OUTPUT.

   All the args must be side-effect-free.
   BYTEIDX and CHARIDX must be lvalues;
   we increment them past the character fetched.  */

#define FETCH_STRING_CHAR_ADVANCE(OUTPUT, STRING, CHARIDX, BYTEIDX)	\
  if (1)								\
    {									\
      CHARIDX++;							\
      if (STRING_MULTIBYTE (STRING))					\
	{								\
	  unsigned char *ptr = &XSTRING (STRING)->data[BYTEIDX];	\
	  int len;							\
									\
	  OUTPUT = STRING_CHAR_AND_LENGTH (ptr, 0, len);		\
	  BYTEIDX += len;						\
	}								\
      else								\
	OUTPUT = XSTRING (STRING)->data[BYTEIDX++];			\
    }									\
  else


/* Like FETCH_STRING_CHAR_ADVANCE but assumes STRING is multibyte.  */

#define FETCH_STRING_CHAR_ADVANCE_NO_CHECK(OUTPUT, STRING, CHARIDX, BYTEIDX) \
  if (1)								     \
    {									     \
      unsigned char *ptr = &XSTRING (STRING)->data[BYTEIDX];		     \
      int len;								     \
									     \
      OUTPUT = STRING_CHAR_AND_LENGTH (ptr, 0, len);			     \
      BYTEIDX += len;							     \
      CHARIDX++;							     \
    }									     \
  else


/* Like FETCH_STRING_CHAR_ADVANCE but fetch character from the current
   buffer.  */

#define FETCH_CHAR_ADVANCE(OUTPUT, CHARIDX, BYTEIDX)		\
  if (1)							\
    {								\
      CHARIDX++;						\
      if (!NILP (current_buffer->enable_multibyte_characters))	\
	{							\
	  unsigned char *ptr = BYTE_POS_ADDR (BYTEIDX);		\
	  int len;						\
								\
	  OUTPUT= STRING_CHAR_AND_LENGTH (ptr, 0, len);		\
	  BYTEIDX += len;					\
	}							\
      else							\
	{							\
	  OUTPUT = *(BYTE_POS_ADDR (BYTEIDX));			\
	  BYTEIDX++;						\
	}							\
    }								\
  else


/* Like FETCH_CHAR_ADVANCE but assumes STRING is multibyte.  */

#define FETCH_CHAR_ADVANCE_NO_CHECK(OUTPUT, CHARIDX, BYTEIDX)	\
  if (1)							\
    {								\
      unsigned char *ptr = BYTE_POS_ADDR (BYTEIDX);		\
      int len;							\
								\
      OUTPUT= STRING_CHAR_AND_LENGTH (ptr, 0, len);		\
      BYTEIDX += len;						\
      CHARIDX++;						\
    }								\
  else


/* Increase the buffer byte position POS_BYTE of the current buffer to
   the next character boundary.  No range checking of POS.  */

#define INC_POS(pos_byte)				\
  do {							\
    unsigned char *p = BYTE_POS_ADDR (pos_byte);	\
    pos_byte += BYTES_BY_CHAR_HEAD (*p);		\
  } while (0)


/* Decrease the buffer byte position POS_BYTE of the current buffer to
   the previous character boundary.  No range checking of POS.  */

#define DEC_POS(pos_byte)			\
  do {						\
    unsigned char *p;				\
    						\
    pos_byte--;					\
    if (pos_byte < GPT_BYTE)			\
      p = BEG_ADDR + pos_byte - 1;		\
    else					\
      p = BEG_ADDR + GAP_SIZE + pos_byte - 1;	\
    while (!CHAR_HEAD_P (*p))			\
      {						\
	p--;					\
	pos_byte--;				\
      }						\
  } while (0)

/* Increment both CHARPOS and BYTEPOS, each in the appropriate way.  */

#define INC_BOTH(charpos, bytepos)				\
  do								\
    {								\
      (charpos)++;						\
      if (NILP (current_buffer->enable_multibyte_characters))	\
	(bytepos)++;						\
      else							\
	INC_POS ((bytepos));					\
    }								\
  while (0)


/* Decrement both CHARPOS and BYTEPOS, each in the appropriate way.  */

#define DEC_BOTH(charpos, bytepos)				\
  do								\
    {								\
      (charpos)--;						\
      if (NILP (current_buffer->enable_multibyte_characters))	\
	(bytepos)--;						\
      else							\
	DEC_POS ((bytepos));					\
    }								\
  while (0)


/* Increase the buffer byte position POS_BYTE of the current buffer to
   the next character boundary.  This macro relies on the fact that
   *GPT_ADDR and *Z_ADDR are always accessible and the values are
   '\0'.  No range checking of POS_BYTE.  */

#define BUF_INC_POS(buf, pos_byte)				\
  do {								\
    unsigned char *p = BUF_BYTE_ADDRESS (buf, pos_byte);	\
    pos_byte += BYTES_BY_CHAR_HEAD (*p);			\
  } while (0)


/* Decrease the buffer byte position POS_BYTE of the current buffer to
   the previous character boundary.  No range checking of POS_BYTE.  */

#define BUF_DEC_POS(buf, pos_byte)					\
  do {									\
    unsigned char *p;							\
    pos_byte--;								\
    if (pos_byte < BUF_GPT_BYTE (buf))					\
      p = BUF_BEG_ADDR (buf) + pos_byte - 1;				\
    else								\
      p = BUF_BEG_ADDR (buf) + BUF_GAP_SIZE (buf) + pos_byte - 1;	\
    while (!CHAR_HEAD_P (*p))						\
      {									\
	p--;								\
	pos_byte--;							\
      }									\
  } while (0)


#define MAYBE_UNIFY_CHAR(c)				\
  if (CHAR_TABLE_P (Vchar_unify_table))			\
    {							\
      Lisp_Object val;					\
      int unified;					\
							\
      val = CHAR_TABLE_REF (Vchar_unify_table, c);	\
      if (SYMBOLP (val))				\
	{						\
	  Funify_charset (val, Qnil);			\
	  val = CHAR_TABLE_REF (Vchar_unify_table, c);	\
	}						\
      if ((unified = XINT (val)) >= 0)			\
	c = unified;					\
    }							\
  else

/* Return the width of ASCII character C.  The width is measured by
   how many columns occupied on the screen when displayed in the
   current buffer.  */

#define ASCII_CHAR_WIDTH(c)						\
  (c < 0x20								\
   ? (c == '\t'								\
      ? XFASTINT (current_buffer->tab_width)				\
      : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2)))	\
   : (c < 0x7f								\
      ? 1								\
      : ((NILP (current_buffer->ctl_arrow) ? 4 : 2))))

/* Return the width of character C.  The width is measured by how many
   columns occupied on the screen when displayed in the current
   buffer.  */

#define CHAR_WIDTH(c)		\
  (ASCII_CHAR_P (c)		\
   ? ASCII_CHAR_WIDTH (c)	\
   : XINT (CHAR_TABLE_REF (Vchar_width_table, c)))

extern int char_string_with_unification P_ ((int, unsigned char *,
					     unsigned char **));
extern int string_char_with_unification P_ ((unsigned char *,
					     unsigned char **, int *));

extern int translate_char P_ ((Lisp_Object, int c));
extern int char_printable_p P_ ((int c));
extern void parse_str_as_multibyte P_ ((unsigned char *, int, int *, int *));
extern int parse_str_to_multibyte P_ ((unsigned char *, int));
extern int str_as_multibyte P_ ((unsigned char *, int, int, int *));
extern int str_to_multibyte P_ ((unsigned char *, int, int));
extern int str_as_unibyte P_ ((unsigned char *, int));
extern int strwidth P_ ((unsigned char *, int));
extern int c_string_width P_ ((unsigned char *, int, int, int *, int *));
extern int lisp_string_width P_ ((Lisp_Object, int, int *, int *));

extern Lisp_Object Vprintable_chars;

extern Lisp_Object Qcharacterp, Qauto_fill_chars;
extern Lisp_Object Vtranslation_table_vector;
extern Lisp_Object Vchar_width_table;
extern Lisp_Object Vchar_direction_table;
extern Lisp_Object Vchar_unify_table;

/* Return a translation table of id number ID.  */
#define GET_TRANSLATION_TABLE(id) \
  (XCDR(XVECTOR(Vtranslation_table_vector)->contents[(id)]))

/* A char-table for characters which may invoke auto-filling.  */
extern Lisp_Object Vauto_fill_chars;

/* Copy LEN bytes from FROM to TO.  This macro should be used only
   when a caller knows that LEN is short and the obvious copy loop is
   faster than calling bcopy which has some overhead.  Copying a
   multibyte sequence of a character is the typical case.  */

#define BCOPY_SHORT(from, to, len)		\
  do {						\
    int i = len;				\
    unsigned char *from_p = from, *to_p = to;	\
    while (i--) *to_p++ = *from_p++;		\
  } while (0)

#define DEFSYM(sym, name)	\
  do { (sym) = intern ((name)); staticpro (&(sym)); } while (0)

#endif /* EMACS_CHARACTER_H */
Commit	Line	Data
0168c3d8 KH	1	/* Header for multibyte character handler.
	2	Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
	3	Licensed to the Free Software Foundation.
	4	Copyright (C) 2001, 2002
	5	National Institute of Advanced Industrial Science and Technology (AIST)
	6	Registration Number H13PRO009
	7
	8	This file is part of GNU Emacs.
	9
	10	GNU Emacs is free software; you can redistribute it and/or modify
	11	it under the terms of the GNU General Public License as published by
	12	the Free Software Foundation; either version 2, or (at your option)
	13	any later version.
	14
	15	GNU Emacs is distributed in the hope that it will be useful,
	16	but WITHOUT ANY WARRANTY; without even the implied warranty of
	17	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	18	GNU General Public License for more details.
	19
	20	You should have received a copy of the GNU General Public License
	21	along with GNU Emacs; see the file COPYING. If not, write to
	22	the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
	23	Boston, MA 02111-1307, USA. */
	24
	25	#ifndef EMACS_CHARACTER_H
	26	#define EMACS_CHARACTER_H
	27
	28	/* 0-7F 0xxxxxxx
	29	00..7F
	30	80-7FF 110xxxxx 10xxxxxx
	31	C2..DF 80..BF
	32	800-FFFF 1110xxxx 10xxxxxx 10xxxxxx
	33	E0..EF 80..BF 80..BF
	34	10000-1FFFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
	35	F0..F7 80..BF 80..BF 80..BF
	36	200000-3FFF7F 11111000 1000xxxx 10xxxxxx 10xxxxxx 10xxxxxx
	37	F8 80..8F 80..BF 80..BF 80..BF
	38	invalid 11111001
	39	F9
	40	invalid 1111101x
	41	FA..FB
	42	invalid 111111xx
	43	FC..FE
	44
	45	raw-8-bit
	46	3FFF80-3FFFFF 1100000x 10xxxxxx
	47	C0..C1 80..BF
	48
	49	*/
	50
	51	/* This is the maximum character code ((1 << CHARACTERBITS) - 1). */
	52	#define MAX_CHAR 0x3FFFFF
	53
	54	#define MAX_UNICODE_CHAR 0x10FFFF
	55
	56	#define MAX_1_BYTE_CHAR 0x7F
	57	#define MAX_2_BYTE_CHAR 0x7FF
	58	#define MAX_3_BYTE_CHAR 0xFFFF
	59	#define MAX_4_BYTE_CHAR 0x1FFFFF
	60	#define MAX_5_BYTE_CHAR 0x3FFF7F
	61
	62	#define BYTE8_TO_CHAR(byte) ((byte) + 0x3FFF00)
	63	#define CHAR_TO_BYTE8(c) ((c) - 0x3FFF00)
	64	#define CHAR_BYTE8_P(c) ((c) > MAX_5_BYTE_CHAR)
65	#define CHAR_BYTE8_HEAD_P(byte) ((byte) == 0xC0 \|\| (byte) == 0xC1)
66
67	/* This is the maximum byte length of multi-byte sequence. */
68	#define MAX_MULTIBYTE_LENGTH 5
69
70	/* Return a Lisp character whose code is C. */
71	#define make_char(c) make_number (c)
72
73	/* Nonzero iff C is an ASCII byte. */
74	#define ASCII_BYTE_P(c) ((unsigned) (c) < 0x80)
75
76	/* Nonzero iff X is a character. */
77	#define CHARACTERP(x) (NATNUMP (x) && XFASTINT (x) <= MAX_CHAR)
78
79	/* Nozero iff C is valid as a charater code. GENERICP is not used
80	now. It will be removed in the future. */
81	#define CHAR_VALID_P(c, genericp) CHARACTERP (c)
82
83	/* Check if Lisp object X is a character or not. */
84	#define CHECK_CHARACTER(x) \
85	do { \
86	if (! CHARACTERP(x)) x = wrong_type_argument (Qcharacterp, (x)); \
87	} while (0)
88
89	/* Nonzero iff C is an ASCII character. */
90	#define ASCII_CHAR_P(c) ((unsigned) (c) < 0x80)
91
92	/* Nonzero iff C is a character of code less than 0x100. */
93	#define SINGLE_BYTE_CHAR_P(c) ((unsigned) (c) < 0x100)
94
95	/* Nonzero if character C has a printable glyph. */
96	#define CHAR_PRINTABLE_P(c) \
97	(((c) >= 32 && ((c) < 127) \
98	\|\| ! NILP (CHAR_TABLE_REF (Vprintable_chars, (c)))))
99
100	/* How many bytes C occupies in a multibyte buffer. */
101	#define CHAR_BYTES(c) \
102	( (c) <= MAX_1_BYTE_CHAR ? 1 \
103	: (c) <= MAX_2_BYTE_CHAR ? 2 \
104	: (c) <= MAX_3_BYTE_CHAR ? 3 \
105	: (c) <= MAX_4_BYTE_CHAR ? 4 \
106	: (c) <= MAX_5_BYTE_CHAR ? 5 \
107	: 2)
108
109	/* Store multibyte form of the character C in STR. The caller should
110	allocate at least MAX_MULTIBYTE_LENGTH bytes area at STR in
111	advance. Returns the length of the multibyte form. */
112
113	#define CHAR_STRING(c, p) \
114	((unsigned) (c) <= MAX_1_BYTE_CHAR \
115	? ((p)[0] = (c), \
116	1) \
117	: (unsigned) (c) <= MAX_2_BYTE_CHAR \
118	? ((p)[0] = (0xC0 \| ((c) >> 6)), \
119	(p)[1] = (0x80 \| ((c) & 0x3F)), \
120	2) \
121	: (unsigned) (c) <= MAX_3_BYTE_CHAR \
122	? ((p)[0] = (0xE0 \| ((c) >> 12)), \
123	(p)[1] = (0x80 \| (((c) >> 6) & 0x3F)), \
124	(p)[2] = (0x80 \| ((c) & 0x3F)), \
125	3) \
126	: (unsigned) (c) <= MAX_5_BYTE_CHAR \
127	? char_string_with_unification (c, p, NULL) \
128	: ((p)[0] = (0xC0 \| (((c) >> 6) & 0x01)), \
129	(p)[1] = (0x80 \| ((c) & 0x3F)), \
130	2))
131
132
133	/* Like CHAR_STRING, but advance P to the end of the multibyte
134	form. */
135
136	#define CHAR_STRING_ADVANCE(c, p) \
137	((unsigned) (c) <= MAX_1_BYTE_CHAR \
138	? *(p)++ = (c) \
139	: (unsigned) (c) <= MAX_2_BYTE_CHAR \
140	? (*(p)++ = (0xC0 \| ((c) >> 6)), \
141	*(p)++ = (0x80 \| ((c) & 0x3F))) \
142	: (unsigned) (c) <= MAX_3_BYTE_CHAR \
143	? (*(p)++ = (0xE0 \| ((c) >> 12)), \
144	*(p)++ = (0x80 \| (((c) >> 6) & 0x3F)), \
145	*(p)++ = (0x80 \| ((c) & 0x3F))) \
146	: (unsigned) (c) <= MAX_5_BYTE_CHAR \
147	? char_string_with_unification (c, p, &p) \
148	: (*(p)++ = (0xC0 \| (((c) >> 6) & 0x01)), \
149	*(p)++ = (0x80 \| ((c) & 0x3F))))
150
151
152	/* Nonzero iff BYTE starts a character in a multibyte form. */
153	#define CHAR_HEAD_P(byte) (((byte) & 0xC0) != 0x80)
154
155	/* Nonzero iff BYTE starts a non-ASCII character in a multibyte
156	form. */
157	#define LEADING_CODE_P(byte) (((byte) & 0xC0) == 0xC0)
158
159	/* Just kept for backward compatibility. This macro will be removed
160	in the future. */
161	#define BASE_LEADING_CODE_P LEADING_CODE_P
162
163	/* How many bytes a character that starts with BYTE occupies in a
164	multibyte form. */
165	#define BYTES_BY_CHAR_HEAD(byte) \
166	(!((byte) & 0x80) ? 1 \
167	: !((byte) & 0x20) ? 2 \
168	: !((byte) & 0x10) ? 3 \
169	: !((byte) & 0x08) ? 4 \
170	: 5)
171
172
173	/* Return the length of the multi-byte form at string STR of length
174	LEN while assuming that STR points a valid multi-byte form. As
175	this macro isn't necessary anymore, all callers will be changed to
176	use BYTES_BY_CHAR_HEAD directly in the future. */
177
178	#define MULTIBYTE_FORM_LENGTH(str, len) \
179	BYTES_BY_CHAR_HEAD (*(str))
180
181	/* Parse multibyte string STR of length LENGTH and set BYTES to the
182	byte length of a character at STR while assuming that STR points a
183	valid multibyte form. As this macro isn't necessary anymore, all
184	callers will be changed to use BYTES_BY_CHAR_HEAD directly in the
185	future. */
186
187	#define PARSE_MULTIBYTE_SEQ(str, length, bytes) \
188	(bytes) = BYTES_BY_CHAR_HEAD (*(str))
189
190	/* The byte length of multibyte form at unibyte string P ending at
191	PEND. If STR doesn't point a valid multibyte form, return 0. */
192
193	#define MULTIBYTE_LENGTH(p, pend) \
194	(p >= pend ? 0 \
195	: !((p)[0] & 0x80) ? 1 \
196	: ((p + 1 >= pend) \|\| (((p)[1] & 0xC0) != 0x80)) ? 0 \
197	: ((p)[0] & 0xE0) == 0xC0 ? 2 \
198	: ((p + 2 >= pend) \|\| (((p)[2] & 0xC0) != 0x80)) ? 0 \
199	: ((p)[0] & 0xF0) == 0xE0 ? 3 \
200	: ((p + 3 >= pend) \|\| (((p)[3] & 0xC0) != 0x80)) ? 0 \
201	: ((p)[0] & 0xF8) == 0xF0 ? 4 \
202	: ((p + 4 >= pend) \|\| (((p)[4] & 0xC0) != 0x80)) ? 0 \
203	: (p)[0] == 0xF8 && ((p)[1] & 0xF0) == 0x80 ? 5 \
204	: 0)
205
206
207	/* Like MULTIBYTE_LENGTH but don't check the ending address. */
208
209	#define MULTIBYTE_LENGTH_NO_CHECK(p) \
210	(!((p)[0] & 0x80) ? 1 \
211	: ((p)[1] & 0xC0) != 0x80 ? 0 \
212	: ((p)[0] & 0xE0) == 0xC0 ? 2 \
213	: ((p)[2] & 0xC0) != 0x80 ? 0 \
214	: ((p)[0] & 0xF0) == 0xE0 ? 3 \
215	: ((p)[3] & 0xC0) != 0x80 ? 0 \
216	: ((p)[0] & 0xF8) == 0xF0 ? 4 \
217	: ((p)[4] & 0xC0) != 0x80 ? 0 \
218	: (p)[0] == 0xF8 && ((p)[1] & 0xF0) == 0x80 ? 5 \
219	: 0)
220
221
222	/* Return the character code of character whose multibyte form is at
223	P. The argument LEN is ignored. It will be removed in the
224	future. */
225
226	#define STRING_CHAR(p, len) \
227	(!((p)[0] & 0x80) \
228	? (p)[0] \
229	: ! ((p)[0] & 0x20) \
230	? (((((p)[0] & 0x1F) << 6) \
231	\| ((p)[1] & 0x3F)) \
232	+ (((unsigned char) (p)[0]) < 0xC2 ? 0x3FFF80 : 0)) \
233	: ! ((p)[0] & 0x10) \
234	? ((((p)[0] & 0x0F) << 12) \
235	\| (((p)[1] & 0x3F) << 6) \
236	\| ((p)[2] & 0x3F)) \
237	: string_char_with_unification (p, NULL, NULL))
238
239
240	/* Like STRING_CHAR but set ACTUAL_LEN to the length of multibyte
241	form. The argument LEN is ignored. It will be removed in the
242	future. */
243
244	#define STRING_CHAR_AND_LENGTH(p, len, actual_len) \
245	(!((p)[0] & 0x80) \
246	? ((actual_len) = 1, (p)[0]) \
247	: ! ((p)[0] & 0x20) \
248	? ((actual_len) = 2, \
249	(((((p)[0] & 0x1F) << 6) \
250	\| ((p)[1] & 0x3F)) \
251	+ (((unsigned char) (p)[0]) < 0xC2 ? 0x3FFF80 : 0))) \
252	: ! ((p)[0] & 0x10) \
253	? ((actual_len) = 3, \
254	((((p)[0] & 0x0F) << 12) \
255	\| (((p)[1] & 0x3F) << 6) \
256	\| ((p)[2] & 0x3F))) \
257	: string_char_with_unification (p, NULL, &actual_len))
258
259
260	/* Like STRING_CHAR but advacen P to the end of multibyte form. */
261
262	#define STRING_CHAR_ADVANCE(p) \
263	(!((p)[0] & 0x80) \
264	? *(p)++ \
265	: ! ((p)[0] & 0x20) \
266	? ((p) += 2, \
267	((((p)[-2] & 0x1F) << 6) \
268	\| ((p)[-1] & 0x3F) \
269	\| (((unsigned char) (p)[-2]) < 0xC2 ? 0x3FFF80 : 0))) \
270	: ! ((p)[0] & 0x10) \
271	? ((p) += 3, \
272	((((p)[-3] & 0x0F) << 12) \
273	\| (((p)[-2] & 0x3F) << 6) \
274	\| ((p)[-1] & 0x3F))) \
275	: string_char_with_unification (p, &p, NULL))
276
277
278	/* Fetch the "next" character from Lisp string STRING at byte position
279	BYTEIDX, character position CHARIDX. Store it into OUTPUT.
280
281	All the args must be side-effect-free.
282	BYTEIDX and CHARIDX must be lvalues;
283	we increment them past the character fetched. */
284
285	#define FETCH_STRING_CHAR_ADVANCE(OUTPUT, STRING, CHARIDX, BYTEIDX) \
286	if (1) \
287	{ \
288	CHARIDX++; \
289	if (STRING_MULTIBYTE (STRING)) \
290	{ \
291	unsigned char *ptr = &XSTRING (STRING)->data[BYTEIDX]; \
292	int len; \
293	\
294	OUTPUT = STRING_CHAR_AND_LENGTH (ptr, 0, len); \
295	BYTEIDX += len; \
296	} \
297	else \
298	OUTPUT = XSTRING (STRING)->data[BYTEIDX++]; \
299	} \
300	else
301
302
303	/* Like FETCH_STRING_CHAR_ADVANCE but assumes STRING is multibyte. */
304
305	#define FETCH_STRING_CHAR_ADVANCE_NO_CHECK(OUTPUT, STRING, CHARIDX, BYTEIDX) \
306	if (1) \
307	{ \
308	unsigned char *ptr = &XSTRING (STRING)->data[BYTEIDX]; \
309	int len; \
310	\
311	OUTPUT = STRING_CHAR_AND_LENGTH (ptr, 0, len); \
312	BYTEIDX += len; \
313	CHARIDX++; \
314	} \
315	else
316
317
318	/* Like FETCH_STRING_CHAR_ADVANCE but fetch character from the current
319	buffer. */
320
321	#define FETCH_CHAR_ADVANCE(OUTPUT, CHARIDX, BYTEIDX) \
322	if (1) \
323	{ \
324	CHARIDX++; \
325	if (!NILP (current_buffer->enable_multibyte_characters)) \
326	{ \
327	unsigned char *ptr = BYTE_POS_ADDR (BYTEIDX); \
328	int len; \
329	\
330	OUTPUT= STRING_CHAR_AND_LENGTH (ptr, 0, len); \
331	BYTEIDX += len; \
332	} \
333	else \
334	{ \
335	OUTPUT = *(BYTE_POS_ADDR (BYTEIDX)); \
336	BYTEIDX++; \
337	} \
338	} \
339	else
340
341
342	/* Like FETCH_CHAR_ADVANCE but assumes STRING is multibyte. */
343
344	#define FETCH_CHAR_ADVANCE_NO_CHECK(OUTPUT, CHARIDX, BYTEIDX) \
345	if (1) \
346	{ \
347	unsigned char *ptr = BYTE_POS_ADDR (BYTEIDX); \
348	int len; \
349	\
350	OUTPUT= STRING_CHAR_AND_LENGTH (ptr, 0, len); \
351	BYTEIDX += len; \
352	CHARIDX++; \
353	} \
354	else
355
356
357	/* Increase the buffer byte position POS_BYTE of the current buffer to
358	the next character boundary. No range checking of POS. */
359
360	#define INC_POS(pos_byte) \
361	do { \
362	unsigned char *p = BYTE_POS_ADDR (pos_byte); \
363	pos_byte += BYTES_BY_CHAR_HEAD (*p); \
364	} while (0)
365
366
367	/* Decrease the buffer byte position POS_BYTE of the current buffer to
368	the previous character boundary. No range checking of POS. */
369
370	#define DEC_POS(pos_byte) \
371	do { \
372	unsigned char *p; \
373	\
374	pos_byte--; \
375	if (pos_byte < GPT_BYTE) \
376	p = BEG_ADDR + pos_byte - 1; \
377	else \
378	p = BEG_ADDR + GAP_SIZE + pos_byte - 1; \
379	while (!CHAR_HEAD_P (*p)) \
380	{ \
381	p--; \
382	pos_byte--; \
383	} \
384	} while (0)
385
386	/* Increment both CHARPOS and BYTEPOS, each in the appropriate way. */
387
388	#define INC_BOTH(charpos, bytepos) \
389	do \
390	{ \
391	(charpos)++; \
392	if (NILP (current_buffer->enable_multibyte_characters)) \
393	(bytepos)++; \
394	else \
395	INC_POS ((bytepos)); \
396	} \
397	while (0)
398
399
400	/* Decrement both CHARPOS and BYTEPOS, each in the appropriate way. */
401
402	#define DEC_BOTH(charpos, bytepos) \
403	do \
404	{ \
405	(charpos)--; \
406	if (NILP (current_buffer->enable_multibyte_characters)) \
407	(bytepos)--; \
408	else \
409	DEC_POS ((bytepos)); \
410	} \
411	while (0)
412
413
414	/* Increase the buffer byte position POS_BYTE of the current buffer to
415	the next character boundary. This macro relies on the fact that
416	GPT_ADDR and Z_ADDR are always accessible and the values are
417	'\0'. No range checking of POS_BYTE. */
418
419	#define BUF_INC_POS(buf, pos_byte) \
420	do { \
421	unsigned char *p = BUF_BYTE_ADDRESS (buf, pos_byte); \
422	pos_byte += BYTES_BY_CHAR_HEAD (*p); \
423	} while (0)
424
425
426	/* Decrease the buffer byte position POS_BYTE of the current buffer to
427	the previous character boundary. No range checking of POS_BYTE. */
428
429	#define BUF_DEC_POS(buf, pos_byte) \
430	do { \
431	unsigned char *p; \
432	pos_byte--; \
433	if (pos_byte < BUF_GPT_BYTE (buf)) \
434	p = BUF_BEG_ADDR (buf) + pos_byte - 1; \
435	else \
436	p = BUF_BEG_ADDR (buf) + BUF_GAP_SIZE (buf) + pos_byte - 1; \
437	while (!CHAR_HEAD_P (*p)) \
438	{ \
439	p--; \
440	pos_byte--; \
441	} \
442	} while (0)
443
444
445	#define MAYBE_UNIFY_CHAR(c) \
446	if (CHAR_TABLE_P (Vchar_unify_table)) \
447	{ \
448	Lisp_Object val; \
449	int unified; \
450	\
451	val = CHAR_TABLE_REF (Vchar_unify_table, c); \
452	if (SYMBOLP (val)) \
453	{ \
454	Funify_charset (val, Qnil); \
455	val = CHAR_TABLE_REF (Vchar_unify_table, c); \
456	} \
457	if ((unified = XINT (val)) >= 0) \
458	c = unified; \
459	} \
460	else
461
462	/* Return the width of ASCII character C. The width is measured by
463	how many columns occupied on the screen when displayed in the
464	current buffer. */
465
466	#define ASCII_CHAR_WIDTH(c) \
467	(c < 0x20 \
468	? (c == '\t' \
469	? XFASTINT (current_buffer->tab_width) \
470	: (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2))) \
471	: (c < 0x7f \
472	? 1 \
473	: ((NILP (current_buffer->ctl_arrow) ? 4 : 2))))
474
475	/* Return the width of character C. The width is measured by how many
476	columns occupied on the screen when displayed in the current
477	buffer. */
478
479	#define CHAR_WIDTH(c) \
480	(ASCII_CHAR_P (c) \
481	? ASCII_CHAR_WIDTH (c) \
482	: XINT (CHAR_TABLE_REF (Vchar_width_table, c)))
483
484	extern int char_string_with_unification P_ ((int, unsigned char *,
485	unsigned char **));
486	extern int string_char_with_unification P_ ((unsigned char *,
487	unsigned char *, int ));
488
489	extern int translate_char P_ ((Lisp_Object, int c));
490	extern int char_printable_p P_ ((int c));
491	extern void parse_str_as_multibyte P_ ((unsigned char , int, int , int *));
492	extern int parse_str_to_multibyte P_ ((unsigned char *, int));
493	extern int str_as_multibyte P_ ((unsigned char , int, int, int ));
494	extern int str_to_multibyte P_ ((unsigned char *, int, int));
495	extern int str_as_unibyte P_ ((unsigned char *, int));
496	extern int strwidth P_ ((unsigned char *, int));
497	extern int c_string_width P_ ((unsigned char , int, int, int , int *));
498	extern int lisp_string_width P_ ((Lisp_Object, int, int , int ));
499
500	extern Lisp_Object Vprintable_chars;
501
502	extern Lisp_Object Qcharacterp, Qauto_fill_chars;
503	extern Lisp_Object Vtranslation_table_vector;
504	extern Lisp_Object Vchar_width_table;
505	extern Lisp_Object Vchar_direction_table;
506	extern Lisp_Object Vchar_unify_table;
507
508	/* Return a translation table of id number ID. */
509	#define GET_TRANSLATION_TABLE(id) \
510	(XCDR(XVECTOR(Vtranslation_table_vector)->contents[(id)]))
511
512	/* A char-table for characters which may invoke auto-filling. */
513	extern Lisp_Object Vauto_fill_chars;
514
515	/* Copy LEN bytes from FROM to TO. This macro should be used only
516	when a caller knows that LEN is short and the obvious copy loop is
517	faster than calling bcopy which has some overhead. Copying a
518	multibyte sequence of a character is the typical case. */
519
520	#define BCOPY_SHORT(from, to, len) \
521	do { \
522	int i = len; \
523	unsigned char from_p = from, to_p = to; \
524	while (i--) to_p++ = from_p++; \
525	} while (0)
526
527	#define DEFSYM(sym, name) \
528	do { (sym) = intern ((name)); staticpro (&(sym)); } while (0)
529
530	#endif /* EMACS_CHARACTER_H */