[bpt/emacs.git] / src / character.h

/* Header for multibyte character handler.
   Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   Licensed to the Free Software Foundation.
   Copyright (C) 2001, 2002
     National Institute of Advanced Industrial Science and Technology (AIST)
     Registration Number H13PRO009

This file is part of GNU Emacs.

GNU Emacs is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.

GNU Emacs is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with GNU Emacs; see the file COPYING.  If not, write to
the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA.  */

#ifndef EMACS_CHARACTER_H
#define EMACS_CHARACTER_H

/* character code	1st byte   byte sequence
   --------------	--------   -------------
        0-7F		00..7F	   0xxxxxxx
       80-7FF		C2..DF	   110xxxxx 10xxxxxx
      800-FFFF		E0..EF	   1110xxxx 10xxxxxx 10xxxxxx
    10000-1FFFFF	F0..F7	   11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
   200000-3FFF7F	F8	   11111000 1000xxxx 10xxxxxx 10xxxxxx 10xxxxxx
      invalid		F9..FF

   raw-8-bit
   3FFF80-3FFFFF	C0..C1	   1100000x 10xxxxxx
*/

/* Maximum character code ((1 << CHARACTERBITS) - 1).  */
#define MAX_CHAR  0x3FFFFF

/* Maximum Unicode character code.  */
#define MAX_UNICODE_CHAR 0x10FFFF

/* Maximum N-byte character codes.  */
#define MAX_1_BYTE_CHAR 0x7F
#define MAX_2_BYTE_CHAR 0x7FF
#define MAX_3_BYTE_CHAR 0xFFFF
#define MAX_4_BYTE_CHAR 0x1FFFFF
#define MAX_5_BYTE_CHAR 0x3FFF7F

/* Leading code range of Latin-1 chars.  */
#define LEADING_CODE_LATIN_1_MIN 0xC2
#define LEADING_CODE_LATIN_1_MAX 0xC3

/* Nonzero iff C is a character that corresponds to a raw 8-bit
   byte.  */
#define CHAR_BYTE8_P(c) ((c) > MAX_5_BYTE_CHAR)

/* Return the character code for raw 8-bit byte BYTE.  */
#define BYTE8_TO_CHAR(byte) ((byte) + 0x3FFF00)

/* Return the raw 8-bit byte for character C.  */
#define CHAR_TO_BYTE8(c)	\
  (CHAR_BYTE8_P (c)		\
   ? (c) - 0x3FFF00		\
   : multibyte_char_to_unibyte (c, Qnil))

/* Nonzero iff BYTE is the 1st byte of a multibyte form of a character
   that corresponds to a raw 8-bit byte.  */
#define CHAR_BYTE8_HEAD_P(byte) ((byte) == 0xC0 || (byte) == 0xC1)

/* Mapping table from unibyte chars to multibyte chars.  */
extern int unibyte_to_multibyte_table[256];

/* Convert the unibyte character C to the corresponding multibyte
   character.  If C can't be converted, return C.  */
#define unibyte_char_to_multibyte(c)	\
  ((c) < 256 ? unibyte_to_multibyte_table[(c)] : (c))

/* If C is not ASCII, make it unibyte. */
#define MAKE_CHAR_UNIBYTE(c)	\
  do {				\
    if (! ASCII_CHAR_P (c))	\
      c = CHAR_TO_BYTE8 (c);	\
  } while (0)


/* If C is not ASCII, make it multibyte.  It assumes C < 256.  */
#define MAKE_CHAR_MULTIBYTE(c) ((c) = unibyte_to_multibyte_table[(c)])

/* This is the maximum byte length of multibyte form.  */
#define MAX_MULTIBYTE_LENGTH 5

/* Return a Lisp character whose character code is C. */
#define make_char(c) make_number (c)

/* Nonzero iff C is an ASCII byte.  */
#define ASCII_BYTE_P(c) ((unsigned) (c) < 0x80)

/* Nonzero iff X is a character.  */
#define CHARACTERP(x) (NATNUMP (x) && XFASTINT (x) <= MAX_CHAR)

/* Nonzero iff C is valid as a character code.  GENERICP is not used
   now.  */
#define CHAR_VALID_P(c, genericp) ((unsigned) (c) <= MAX_CHAR)

/* Check if Lisp object X is a character or not.  */
#define CHECK_CHARACTER(x)						\
  do {									\
    if (! CHARACTERP(x)) x = wrong_type_argument (Qcharacterp, (x));	\
  } while (0)

/* Nonzero iff C is an ASCII character.  */
#define ASCII_CHAR_P(c) ((unsigned) (c) < 0x80)

/* Nonzero iff C is a character of code less than 0x100.  */
#define SINGLE_BYTE_CHAR_P(c) ((unsigned) (c) < 0x100)

/* Nonzero if character C has a printable glyph.  */
#define CHAR_PRINTABLE_P(c)	\
  (((c) >= 32 && ((c) < 127)	\
    || ! NILP (CHAR_TABLE_REF (Vprintable_chars, (c)))))

/* Return byte length of multibyte form for character C.  */
#define CHAR_BYTES(c)			\
  ( (c) <= MAX_1_BYTE_CHAR ? 1		\
    : (c) <= MAX_2_BYTE_CHAR ? 2	\
    : (c) <= MAX_3_BYTE_CHAR ? 3	\
    : (c) <= MAX_4_BYTE_CHAR ? 4	\
    : (c) <= MAX_5_BYTE_CHAR ? 5	\
    : 2)


/* Return the leading code of multibyte form of C.  */
#define CHAR_LEADING_CODE(c)				\
  ((c) <= MAX_1_BYTE_CHAR ? c				\
   : (c) <= MAX_2_BYTE_CHAR ? (0xC0 | ((c) >> 6))	\
   : (c) <= MAX_3_BYTE_CHAR ? (0xE0 | ((c) >> 12))	\
   : (c) <= MAX_4_BYTE_CHAR ? (0xF0 | ((c) >> 18))	\
   : (c) <= MAX_5_BYTE_CHAR ? 0xF8			\
   : (0xC0 | (((c) >> 6) & 0x01)))


/* Store multibyte form of the character C in P.  The caller should
   allocate at least MAX_MULTIBYTE_LENGTH bytes area at P in advance.
   Returns the length of the multibyte form.  */

#define CHAR_STRING(c, p)			\
  ((unsigned) (c) <= MAX_1_BYTE_CHAR		\
   ? ((p)[0] = (c),				\
      1)					\
   : (unsigned) (c) <= MAX_2_BYTE_CHAR		\
   ? ((p)[0] = (0xC0 | ((c) >> 6)),		\
      (p)[1] = (0x80 | ((c) & 0x3F)),		\
      2)					\
   : (unsigned) (c) <= MAX_3_BYTE_CHAR		\
   ? ((p)[0] = (0xE0 | ((c) >> 12)),		\
      (p)[1] = (0x80 | (((c) >> 6) & 0x3F)),	\
      (p)[2] = (0x80 | ((c) & 0x3F)),		\
      3)					\
   : char_string (c, p))

/* Store multibyte form of byte B in P.  The caller should allocate at
   least MAX_MULTIBYTE_LENGTH bytes area at P in advance.  Returns the
   length of the multibyte form.  */

#define BYTE8_STRING(b, p)			\
  ((p)[0] = (0xC0 | (((b) >> 6) & 0x01)),	\
   (p)[1] = (0x80 | ((c) & 0x3F)),		\
   2)


/* Store multibyte form of the character C in P.  The caller should
   allocate at least MAX_MULTIBYTE_LENGTH bytes area at P in advance.
   And, advance P to the end of the multibyte form.  */

#define CHAR_STRING_ADVANCE(c, p)		\
  do {						\
    if ((c) <= MAX_1_BYTE_CHAR)			\
      *(p)++ = (c);				\
    else if ((c) <= MAX_2_BYTE_CHAR)		\
      *(p)++ = (0xC0 | ((c) >> 6)),		\
	*(p)++ = (0x80 | ((c) & 0x3F));		\
    else if ((c) <= MAX_3_BYTE_CHAR)		\
      *(p)++ = (0xE0 | ((c) >> 12)),		\
	*(p)++ = (0x80 | (((c) >> 6) & 0x3F)),	\
	*(p)++ = (0x80 | ((c) & 0x3F));		\
    else					\
      (p) += char_string ((c), (p));		\
  } while (0)


/* Nonzero iff BYTE starts a non-ASCII character in a multibyte
   form.  */
#define LEADING_CODE_P(byte) (((byte) & 0xC0) == 0xC0)

/* Nonzero iff BYTE is a trailing code of a non-ASCII character in a
   multibyte form.  */
#define TRAILING_CODE_P(byte) (((byte) & 0xC0) == 0x80)

/* Nonzero iff BYTE starts a character in a multibyte form.
   This is equivalent to:
	(ASCII_BYTE_P (byte) || LEADING_CODE_P (byte))  */
#define CHAR_HEAD_P(byte) (((byte) & 0xC0) != 0x80)

/* Just kept for backward compatibility.  This macro will be removed
   in the future.  */
#define BASE_LEADING_CODE_P LEADING_CODE_P

/* How many bytes a character that starts with BYTE occupies in a
   multibyte form.  */
#define BYTES_BY_CHAR_HEAD(byte)	\
  (!((byte) & 0x80) ? 1			\
   : !((byte) & 0x20) ? 2		\
   : !((byte) & 0x10) ? 3		\
   : !((byte) & 0x08) ? 4		\
   : 5)


/* Return the length of the multi-byte form at string STR of length
   LEN while assuming that STR points a valid multi-byte form.  As
   this macro isn't necessary anymore, all callers will be changed to
   use BYTES_BY_CHAR_HEAD directly in the future.  */

#define MULTIBYTE_FORM_LENGTH(str, len)		\
  BYTES_BY_CHAR_HEAD (*(str))

/* Parse multibyte string STR of length LENGTH and set BYTES to the
   byte length of a character at STR while assuming that STR points a
   valid multibyte form.  As this macro isn't necessary anymore, all
   callers will be changed to use BYTES_BY_CHAR_HEAD directly in the
   future.  */

#define PARSE_MULTIBYTE_SEQ(str, length, bytes)	\
  (bytes) = BYTES_BY_CHAR_HEAD (*(str))

/* The byte length of multibyte form at unibyte string P ending at
   PEND.  If STR doesn't point a valid multibyte form, return 0.  */

#define MULTIBYTE_LENGTH(p, pend)				\
  (p >= pend ? 0						\
   : !((p)[0] & 0x80) ? 1					\
   : ((p + 1 >= pend) || (((p)[1] & 0xC0) != 0x80)) ? 0		\
   : ((p)[0] & 0xE0) == 0xC0 ? 2				\
   : ((p + 2 >= pend) || (((p)[2] & 0xC0) != 0x80)) ? 0		\
   : ((p)[0] & 0xF0) == 0xE0 ? 3				\
   : ((p + 3 >= pend) || (((p)[3] & 0xC0) != 0x80)) ? 0		\
   : ((p)[0] & 0xF8) == 0xF0 ? 4				\
   : ((p + 4 >= pend) || (((p)[4] & 0xC0) != 0x80)) ? 0		\
   : (p)[0] == 0xF8 && ((p)[1] & 0xF0) == 0x80 ? 5		\
   : 0)


/* Like MULTIBYTE_LENGTH but don't check the ending address.  */

#define MULTIBYTE_LENGTH_NO_CHECK(p)			\
  (!((p)[0] & 0x80) ? 1					\
   : ((p)[1] & 0xC0) != 0x80 ? 0			\
   : ((p)[0] & 0xE0) == 0xC0 ? 2			\
   : ((p)[2] & 0xC0) != 0x80 ? 0			\
   : ((p)[0] & 0xF0) == 0xE0 ? 3			\
   : ((p)[3] & 0xC0) != 0x80 ? 0			\
   : ((p)[0] & 0xF8) == 0xF0 ? 4			\
   : ((p)[4] & 0xC0) != 0x80 ? 0			\
   : (p)[0] == 0xF8 && ((p)[1] & 0xF0) == 0x80 ? 5	\
   : 0)


/* Return the character code of character whose multibyte form is at
   P.  The argument LEN is ignored.  It will be removed in the
   future.  */

#define STRING_CHAR(p, len)					\
  (!((p)[0] & 0x80)						\
   ? (p)[0]							\
   : ! ((p)[0] & 0x20)						\
   ? (((((p)[0] & 0x1F) << 6)					\
       | ((p)[1] & 0x3F))					\
      + (((unsigned char) (p)[0]) < 0xC2 ? 0x3FFF80 : 0))	\
   : ! ((p)[0] & 0x10)						\
   ? ((((p)[0] & 0x0F) << 12)					\
      | (((p)[1] & 0x3F) << 6)					\
      | ((p)[2] & 0x3F))					\
   : string_char ((p), NULL, NULL))


/* Like STRING_CHAR but set ACTUAL_LEN to the length of multibyte
   form.  The argument LEN is ignored.  It will be removed in the
   future.  */

#define STRING_CHAR_AND_LENGTH(p, len, actual_len)		\
  (!((p)[0] & 0x80)						\
   ? ((actual_len) = 1, (p)[0])					\
   : ! ((p)[0] & 0x20)						\
   ? ((actual_len) = 2,						\
      (((((p)[0] & 0x1F) << 6)					\
	| ((p)[1] & 0x3F))					\
       + (((unsigned char) (p)[0]) < 0xC2 ? 0x3FFF80 : 0)))	\
   : ! ((p)[0] & 0x10)						\
   ? ((actual_len) = 3,						\
      ((((p)[0] & 0x0F) << 12)					\
       | (((p)[1] & 0x3F) << 6)					\
       | ((p)[2] & 0x3F)))					\
   : string_char ((p), NULL, &actual_len))


/* Like STRING_CHAR but advacen P to the end of multibyte form.  */

#define STRING_CHAR_ADVANCE(p)					\
  (!((p)[0] & 0x80)						\
   ? *(p)++							\
   : ! ((p)[0] & 0x20)						\
   ? ((p) += 2,							\
      ((((p)[-2] & 0x1F) << 6)					\
       | ((p)[-1] & 0x3F)					\
       | (((unsigned char) (p)[-2]) < 0xC2 ? 0x3FFF80 : 0)))	\
   : ! ((p)[0] & 0x10)						\
   ? ((p) += 3,							\
      ((((p)[-3] & 0x0F) << 12)					\
       | (((p)[-2] & 0x3F) << 6)				\
       | ((p)[-1] & 0x3F)))					\
   : string_char ((p), &(p), NULL))


/* Fetch the "next" character from Lisp string STRING at byte position
   BYTEIDX, character position CHARIDX.  Store it into OUTPUT.

   All the args must be side-effect-free.
   BYTEIDX and CHARIDX must be lvalues;
   we increment them past the character fetched.  */

#define FETCH_STRING_CHAR_ADVANCE(OUTPUT, STRING, CHARIDX, BYTEIDX)	\
  if (1)								\
    {									\
      CHARIDX++;							\
      if (STRING_MULTIBYTE (STRING))					\
	{								\
	  unsigned char *ptr = &XSTRING (STRING)->data[BYTEIDX];	\
	  int len;							\
									\
	  OUTPUT = STRING_CHAR_AND_LENGTH (ptr, 0, len);		\
	  BYTEIDX += len;						\
	}								\
      else								\
	OUTPUT = XSTRING (STRING)->data[BYTEIDX++];			\
    }									\
  else

/* Like FETCH_STRING_CHAR_ADVANCE */

#define FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE(OUTPUT, STRING, CHARIDX, BYTEIDX) \
  if (1)								      \
    {									      \
      CHARIDX++;							      \
      if (STRING_MULTIBYTE (STRING))					      \
	{								      \
	  unsigned char *ptr = &XSTRING (STRING)->data[BYTEIDX];	      \
	  int len;							      \
									      \
	  OUTPUT = STRING_CHAR_AND_LENGTH (ptr, 0, len);		      \
	  BYTEIDX += len;						      \
	}								      \
      else								      \
	{								      \
	  OUTPUT = XSTRING (STRING)->data[BYTEIDX++];			      \
	  MAKE_CHAR_MULTIBYTE (OUTPUT);					      \
	}								      \
    }									      \
  else


/* Like FETCH_STRING_CHAR_ADVANCE but assumes STRING is multibyte.  */

#define FETCH_STRING_CHAR_ADVANCE_NO_CHECK(OUTPUT, STRING, CHARIDX, BYTEIDX) \
  if (1)								     \
    {									     \
      unsigned char *ptr = &XSTRING (STRING)->data[BYTEIDX];		     \
      int len;								     \
									     \
      OUTPUT = STRING_CHAR_AND_LENGTH (ptr, 0, len);			     \
      BYTEIDX += len;							     \
      CHARIDX++;							     \
    }									     \
  else


/* Like FETCH_STRING_CHAR_ADVANCE but fetch character from the current
   buffer.  */

#define FETCH_CHAR_ADVANCE(OUTPUT, CHARIDX, BYTEIDX)		\
  if (1)							\
    {								\
      CHARIDX++;						\
      if (!NILP (current_buffer->enable_multibyte_characters))	\
	{							\
	  unsigned char *ptr = BYTE_POS_ADDR (BYTEIDX);		\
	  int len;						\
								\
	  OUTPUT= STRING_CHAR_AND_LENGTH (ptr, 0, len);		\
	  BYTEIDX += len;					\
	}							\
      else							\
	{							\
	  OUTPUT = *(BYTE_POS_ADDR (BYTEIDX));			\
	  BYTEIDX++;						\
	}							\
    }								\
  else


/* Like FETCH_CHAR_ADVANCE but assumes STRING is multibyte.  */

#define FETCH_CHAR_ADVANCE_NO_CHECK(OUTPUT, CHARIDX, BYTEIDX)	\
  if (1)							\
    {								\
      unsigned char *ptr = BYTE_POS_ADDR (BYTEIDX);		\
      int len;							\
								\
      OUTPUT= STRING_CHAR_AND_LENGTH (ptr, 0, len);		\
      BYTEIDX += len;						\
      CHARIDX++;						\
    }								\
  else


/* Increase the buffer byte position POS_BYTE of the current buffer to
   the next character boundary.  No range checking of POS.  */

#define INC_POS(pos_byte)				\
  do {							\
    unsigned char *p = BYTE_POS_ADDR (pos_byte);	\
    pos_byte += BYTES_BY_CHAR_HEAD (*p);		\
  } while (0)


/* Decrease the buffer byte position POS_BYTE of the current buffer to
   the previous character boundary.  No range checking of POS.  */

#define DEC_POS(pos_byte)			\
  do {						\
    unsigned char *p;				\
    						\
    pos_byte--;					\
    if (pos_byte < GPT_BYTE)			\
      p = BEG_ADDR + pos_byte - 1;		\
    else					\
      p = BEG_ADDR + GAP_SIZE + pos_byte - 1;	\
    while (!CHAR_HEAD_P (*p))			\
      {						\
	p--;					\
	pos_byte--;				\
      }						\
  } while (0)

/* Increment both CHARPOS and BYTEPOS, each in the appropriate way.  */

#define INC_BOTH(charpos, bytepos)				\
  do								\
    {								\
      (charpos)++;						\
      if (NILP (current_buffer->enable_multibyte_characters))	\
	(bytepos)++;						\
      else							\
	INC_POS ((bytepos));					\
    }								\
  while (0)


/* Decrement both CHARPOS and BYTEPOS, each in the appropriate way.  */

#define DEC_BOTH(charpos, bytepos)				\
  do								\
    {								\
      (charpos)--;						\
      if (NILP (current_buffer->enable_multibyte_characters))	\
	(bytepos)--;						\
      else							\
	DEC_POS ((bytepos));					\
    }								\
  while (0)


/* Increase the buffer byte position POS_BYTE of the current buffer to
   the next character boundary.  This macro relies on the fact that
   *GPT_ADDR and *Z_ADDR are always accessible and the values are
   '\0'.  No range checking of POS_BYTE.  */

#define BUF_INC_POS(buf, pos_byte)				\
  do {								\
    unsigned char *p = BUF_BYTE_ADDRESS (buf, pos_byte);	\
    pos_byte += BYTES_BY_CHAR_HEAD (*p);			\
  } while (0)


/* Decrease the buffer byte position POS_BYTE of the current buffer to
   the previous character boundary.  No range checking of POS_BYTE.  */

#define BUF_DEC_POS(buf, pos_byte)					\
  do {									\
    unsigned char *p;							\
    pos_byte--;								\
    if (pos_byte < BUF_GPT_BYTE (buf))					\
      p = BUF_BEG_ADDR (buf) + pos_byte - 1;				\
    else								\
      p = BUF_BEG_ADDR (buf) + BUF_GAP_SIZE (buf) + pos_byte - 1;	\
    while (!CHAR_HEAD_P (*p))						\
      {									\
	p--;								\
	pos_byte--;							\
      }									\
  } while (0)


#define MAYBE_UNIFY_CHAR(c)					\
  if (c > MAX_UNICODE_CHAR					\
      && CHAR_TABLE_P (Vchar_unify_table))			\
    {								\
      Lisp_Object val;						\
      int unified;						\
								\
      val = CHAR_TABLE_REF (Vchar_unify_table, c);		\
      if (! NILP (val))						\
	{							\
	  if (SYMBOLP (val))					\
	    {							\
	      Funify_charset (val, Qnil, Qnil);			\
	      val = CHAR_TABLE_REF (Vchar_unify_table, c);	\
	    }							\
	  if ((unified = XINT (val)) >= 0)			\
	    c = unified;					\
	}							\
    }								\
  else


/* Return the width of ASCII character C.  The width is measured by
   how many columns occupied on the screen when displayed in the
   current buffer.  */

#define ASCII_CHAR_WIDTH(c)						\
  (c < 0x20								\
   ? (c == '\t'								\
      ? XFASTINT (current_buffer->tab_width)				\
      : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2)))	\
   : (c < 0x7f								\
      ? 1								\
      : ((NILP (current_buffer->ctl_arrow) ? 4 : 2))))

/* Return the width of character C.  The width is measured by how many
   columns occupied on the screen when displayed in the current
   buffer.  */

#define CHAR_WIDTH(c)		\
  (ASCII_CHAR_P (c)		\
   ? ASCII_CHAR_WIDTH (c)	\
   : XINT (CHAR_TABLE_REF (Vchar_width_table, c)))

extern int char_resolve_modifier_mask P_ ((int));
extern int char_string P_ ((int, unsigned char *));
extern int string_char P_ ((const unsigned char *,
			    const unsigned char **, int *));

extern int translate_char P_ ((Lisp_Object, int c));
extern int char_printable_p P_ ((int c));
extern void parse_str_as_multibyte P_ ((unsigned char *, int, int *, int *));
extern int parse_str_to_multibyte P_ ((unsigned char *, int));
extern int str_as_multibyte P_ ((unsigned char *, int, int, int *));
extern int str_to_multibyte P_ ((unsigned char *, int, int));
extern int str_as_unibyte P_ ((unsigned char *, int));
extern int strwidth P_ ((unsigned char *, int));
extern int c_string_width P_ ((unsigned char *, int, int, int *, int *));
extern int lisp_string_width P_ ((Lisp_Object, int, int *, int *));

extern Lisp_Object Vprintable_chars;

extern Lisp_Object Qcharacterp, Qauto_fill_chars;
extern Lisp_Object Vtranslation_table_vector;
extern Lisp_Object Vchar_width_table;
extern Lisp_Object Vchar_direction_table;
extern Lisp_Object Vchar_unify_table;

extern Lisp_Object string_escape_byte8 P_ ((Lisp_Object));

/* Return a translation table of id number ID.  */
#define GET_TRANSLATION_TABLE(id) \
  (XCDR(XVECTOR(Vtranslation_table_vector)->contents[(id)]))

/* A char-table for characters which may invoke auto-filling.  */
extern Lisp_Object Vauto_fill_chars;

extern Lisp_Object Vchar_script_table;

/* Copy LEN bytes from FROM to TO.  This macro should be used only
   when a caller knows that LEN is short and the obvious copy loop is
   faster than calling bcopy which has some overhead.  Copying a
   multibyte sequence of a character is the typical case.  */

#define BCOPY_SHORT(from, to, len)		\
  do {						\
    int i = len;				\
    unsigned char *from_p = from, *to_p = to;	\
    while (i--) *to_p++ = *from_p++;		\
  } while (0)

#define DEFSYM(sym, name)	\
  do { (sym) = intern ((name)); staticpro (&(sym)); } while (0)

#endif /* EMACS_CHARACTER_H */
Commit	Line	Data
0168c3d8 KH	1	/* Header for multibyte character handler.
	2	Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
	3	Licensed to the Free Software Foundation.
	4	Copyright (C) 2001, 2002
	5	National Institute of Advanced Industrial Science and Technology (AIST)
	6	Registration Number H13PRO009
	7
	8	This file is part of GNU Emacs.
	9
	10	GNU Emacs is free software; you can redistribute it and/or modify
	11	it under the terms of the GNU General Public License as published by
	12	the Free Software Foundation; either version 2, or (at your option)
	13	any later version.
	14
	15	GNU Emacs is distributed in the hope that it will be useful,
	16	but WITHOUT ANY WARRANTY; without even the implied warranty of
	17	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	18	GNU General Public License for more details.
	19
	20	You should have received a copy of the GNU General Public License
	21	along with GNU Emacs; see the file COPYING. If not, write to
	22	the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
	23	Boston, MA 02111-1307, USA. */
	24
	25	#ifndef EMACS_CHARACTER_H
	26	#define EMACS_CHARACTER_H
	27
885317d8 KH	28	/* character code 1st byte byte sequence
	29	-------------- -------- -------------
	30	0-7F 00..7F 0xxxxxxx
	31	80-7FF C2..DF 110xxxxx 10xxxxxx
	32	800-FFFF E0..EF 1110xxxx 10xxxxxx 10xxxxxx
	33	10000-1FFFFF F0..F7 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
	34	200000-3FFF7F F8 11111000 1000xxxx 10xxxxxx 10xxxxxx 10xxxxxx
	35	invalid F9..FF
0168c3d8 KH	36
0168c3d8 KH	37	raw-8-bit
885317d8	38	3FFF80-3FFFFF C0..C1 1100000x 10xxxxxx
0168c3d8 KH	39	*/
0168c3d8 KH	40
885317d8	41	/* Maximum character code ((1 << CHARACTERBITS) - 1). */
0168c3d8 KH	42	#define MAX_CHAR 0x3FFFFF
0168c3d8 KH	43
885317d8	44	/* Maximum Unicode character code. */
0168c3d8 KH	45	#define MAX_UNICODE_CHAR 0x10FFFF
0168c3d8 KH	46
885317d8	47	/* Maximum N-byte character codes. */
0168c3d8 KH	48	#define MAX_1_BYTE_CHAR 0x7F
	49	#define MAX_2_BYTE_CHAR 0x7FF
	50	#define MAX_3_BYTE_CHAR 0xFFFF
	51	#define MAX_4_BYTE_CHAR 0x1FFFFF
	52	#define MAX_5_BYTE_CHAR 0x3FFF7F
	53
43c47483 KH	54	/* Leading code range of Latin-1 chars. */
	55	#define LEADING_CODE_LATIN_1_MIN 0xC2
	56	#define LEADING_CODE_LATIN_1_MAX 0xC3
	57
8bc28f69 KH	58	/* Nonzero iff C is a character that corresponds to a raw 8-bit
	59	byte. */
	60	#define CHAR_BYTE8_P(c) ((c) > MAX_5_BYTE_CHAR)
	61
885317d8	62	/* Return the character code for raw 8-bit byte BYTE. */
0168c3d8	63	#define BYTE8_TO_CHAR(byte) ((byte) + 0x3FFF00)
885317d8 KH	64
885317d8 KH	65	/* Return the raw 8-bit byte for character C. */
8bc28f69 KH	66	#define CHAR_TO_BYTE8(c) \
	67	(CHAR_BYTE8_P (c) \
	68	? (c) - 0x3FFF00 \
	69	: multibyte_char_to_unibyte (c, Qnil))
885317d8 KH	70
	71	/* Nonzero iff BYTE is the 1st byte of a multibyte form of a character
	72	that corresponds to a raw 8-bit byte. */
0168c3d8 KH	73	#define CHAR_BYTE8_HEAD_P(byte) ((byte) == 0xC0 \|\| (byte) == 0xC1)
0168c3d8 KH	74
43c47483 KH	75	/* Mapping table from unibyte chars to multibyte chars. */
43c47483 KH	76	extern int unibyte_to_multibyte_table[256];
3e411074	77
43c47483 KH	78	/* Convert the unibyte character C to the corresponding multibyte
	79	character. If C can't be converted, return C. */
	80	#define unibyte_char_to_multibyte(c) \
	81	((c) < 256 ? unibyte_to_multibyte_table[(c)] : (c))
3e411074	82
43c47483 KH	83	/* If C is not ASCII, make it unibyte. */
	84	#define MAKE_CHAR_UNIBYTE(c) \
	85	do { \
	86	if (! ASCII_CHAR_P (c)) \
	87	c = CHAR_TO_BYTE8 (c); \
	88	} while (0)
3e411074	89
3e411074	90
43c47483 KH	91	/* If C is not ASCII, make it multibyte. It assumes C < 256. */
43c47483 KH	92	#define MAKE_CHAR_MULTIBYTE(c) ((c) = unibyte_to_multibyte_table[(c)])
3e411074	93
885317d8	94	/* This is the maximum byte length of multibyte form. */
0168c3d8 KH	95	#define MAX_MULTIBYTE_LENGTH 5
0168c3d8 KH	96
885317d8	97	/* Return a Lisp character whose character code is C. */
0168c3d8 KH	98	#define make_char(c) make_number (c)
	99
	100	/* Nonzero iff C is an ASCII byte. */
	101	#define ASCII_BYTE_P(c) ((unsigned) (c) < 0x80)
	102
	103	/* Nonzero iff X is a character. */
	104	#define CHARACTERP(x) (NATNUMP (x) && XFASTINT (x) <= MAX_CHAR)
	105
f65c6d94	106	/* Nonzero iff C is valid as a character code. GENERICP is not used
885317d8 KH	107	now. */
885317d8 KH	108	#define CHAR_VALID_P(c, genericp) ((unsigned) (c) <= MAX_CHAR)
0168c3d8 KH	109
	110	/* Check if Lisp object X is a character or not. */
	111	#define CHECK_CHARACTER(x) \
	112	do { \
	113	if (! CHARACTERP(x)) x = wrong_type_argument (Qcharacterp, (x)); \
	114	} while (0)
	115
	116	/* Nonzero iff C is an ASCII character. */
	117	#define ASCII_CHAR_P(c) ((unsigned) (c) < 0x80)
	118
	119	/* Nonzero iff C is a character of code less than 0x100. */
	120	#define SINGLE_BYTE_CHAR_P(c) ((unsigned) (c) < 0x100)
	121
	122	/* Nonzero if character C has a printable glyph. */
	123	#define CHAR_PRINTABLE_P(c) \
	124	(((c) >= 32 && ((c) < 127) \
	125	\|\| ! NILP (CHAR_TABLE_REF (Vprintable_chars, (c)))))
	126
885317d8	127	/* Return byte length of multibyte form for character C. */
0168c3d8 KH	128	#define CHAR_BYTES(c) \
	129	( (c) <= MAX_1_BYTE_CHAR ? 1 \
	130	: (c) <= MAX_2_BYTE_CHAR ? 2 \
	131	: (c) <= MAX_3_BYTE_CHAR ? 3 \
	132	: (c) <= MAX_4_BYTE_CHAR ? 4 \
	133	: (c) <= MAX_5_BYTE_CHAR ? 5 \
	134	: 2)
	135
43c47483 KH	136
	137	/* Return the leading code of multibyte form of C. */
	138	#define CHAR_LEADING_CODE(c) \
	139	((c) <= MAX_1_BYTE_CHAR ? c \
	140	: (c) <= MAX_2_BYTE_CHAR ? (0xC0 \| ((c) >> 6)) \
	141	: (c) <= MAX_3_BYTE_CHAR ? (0xE0 \| ((c) >> 12)) \
	142	: (c) <= MAX_4_BYTE_CHAR ? (0xF0 \| ((c) >> 18)) \
	143	: (c) <= MAX_5_BYTE_CHAR ? 0xF8 \
	144	: (0xC0 \| (((c) >> 6) & 0x01)))
	145
	146
885317d8 KH	147	/* Store multibyte form of the character C in P. The caller should
	148	allocate at least MAX_MULTIBYTE_LENGTH bytes area at P in advance.
	149	Returns the length of the multibyte form. */
0168c3d8 KH	150
	151	#define CHAR_STRING(c, p) \
	152	((unsigned) (c) <= MAX_1_BYTE_CHAR \
	153	? ((p)[0] = (c), \
	154	1) \
	155	: (unsigned) (c) <= MAX_2_BYTE_CHAR \
	156	? ((p)[0] = (0xC0 \| ((c) >> 6)), \
	157	(p)[1] = (0x80 \| ((c) & 0x3F)), \
	158	2) \
	159	: (unsigned) (c) <= MAX_3_BYTE_CHAR \
	160	? ((p)[0] = (0xE0 \| ((c) >> 12)), \
	161	(p)[1] = (0x80 \| (((c) >> 6) & 0x3F)), \
	162	(p)[2] = (0x80 \| ((c) & 0x3F)), \
	163	3) \
eb41da4c	164	: char_string (c, p))
0168c3d8	165
eb41da4c KH	166	/* Store multibyte form of byte B in P. The caller should allocate at
	167	least MAX_MULTIBYTE_LENGTH bytes area at P in advance. Returns the
	168	length of the multibyte form. */
1106ea2b KH	169
	170	#define BYTE8_STRING(b, p) \
	171	((p)[0] = (0xC0 \| (((b) >> 6) & 0x01)), \
	172	(p)[1] = (0x80 \| ((c) & 0x3F)), \
	173	2)
	174
0168c3d8	175
885317d8 KH	176	/* Store multibyte form of the character C in P. The caller should
	177	allocate at least MAX_MULTIBYTE_LENGTH bytes area at P in advance.
	178	And, advance P to the end of the multibyte form. */
0168c3d8	179
eb41da4c KH	180	#define CHAR_STRING_ADVANCE(c, p) \
	181	do { \
	182	if ((c) <= MAX_1_BYTE_CHAR) \
	183	*(p)++ = (c); \
	184	else if ((c) <= MAX_2_BYTE_CHAR) \
	185	*(p)++ = (0xC0 \| ((c) >> 6)), \
	186	*(p)++ = (0x80 \| ((c) & 0x3F)); \
	187	else if ((c) <= MAX_3_BYTE_CHAR) \
	188	*(p)++ = (0xE0 \| ((c) >> 12)), \
	189	*(p)++ = (0x80 \| (((c) >> 6) & 0x3F)), \
	190	*(p)++ = (0x80 \| ((c) & 0x3F)); \
	191	else \
	192	(p) += char_string ((c), (p)); \
885317d8	193	} while (0)
0168c3d8	194
eb41da4c	195
0168c3d8 KH	196	/* Nonzero iff BYTE starts a non-ASCII character in a multibyte
	197	form. */
	198	#define LEADING_CODE_P(byte) (((byte) & 0xC0) == 0xC0)
	199
b5c7dbe6 KH	200	/* Nonzero iff BYTE is a trailing code of a non-ASCII character in a
	201	multibyte form. */
	202	#define TRAILING_CODE_P(byte) (((byte) & 0xC0) == 0x80)
	203
885317d8 KH	204	/* Nonzero iff BYTE starts a character in a multibyte form.
	205	This is equivalent to:
	206	(ASCII_BYTE_P (byte) \|\| LEADING_CODE_P (byte)) */
	207	#define CHAR_HEAD_P(byte) (((byte) & 0xC0) != 0x80)
	208
0168c3d8 KH	209	/* Just kept for backward compatibility. This macro will be removed
	210	in the future. */
	211	#define BASE_LEADING_CODE_P LEADING_CODE_P
	212
	213	/* How many bytes a character that starts with BYTE occupies in a
	214	multibyte form. */
	215	#define BYTES_BY_CHAR_HEAD(byte) \
	216	(!((byte) & 0x80) ? 1 \
	217	: !((byte) & 0x20) ? 2 \
	218	: !((byte) & 0x10) ? 3 \
	219	: !((byte) & 0x08) ? 4 \
	220	: 5)
	221
	222
	223	/* Return the length of the multi-byte form at string STR of length
	224	LEN while assuming that STR points a valid multi-byte form. As
	225	this macro isn't necessary anymore, all callers will be changed to
	226	use BYTES_BY_CHAR_HEAD directly in the future. */
	227
	228	#define MULTIBYTE_FORM_LENGTH(str, len) \
	229	BYTES_BY_CHAR_HEAD (*(str))
	230
	231	/* Parse multibyte string STR of length LENGTH and set BYTES to the
	232	byte length of a character at STR while assuming that STR points a
	233	valid multibyte form. As this macro isn't necessary anymore, all
	234	callers will be changed to use BYTES_BY_CHAR_HEAD directly in the
	235	future. */
	236
	237	#define PARSE_MULTIBYTE_SEQ(str, length, bytes) \
	238	(bytes) = BYTES_BY_CHAR_HEAD (*(str))
	239
	240	/* The byte length of multibyte form at unibyte string P ending at
	241	PEND. If STR doesn't point a valid multibyte form, return 0. */
	242
	243	#define MULTIBYTE_LENGTH(p, pend) \
	244	(p >= pend ? 0 \
	245	: !((p)[0] & 0x80) ? 1 \
	246	: ((p + 1 >= pend) \|\| (((p)[1] & 0xC0) != 0x80)) ? 0 \
	247	: ((p)[0] & 0xE0) == 0xC0 ? 2 \
	248	: ((p + 2 >= pend) \|\| (((p)[2] & 0xC0) != 0x80)) ? 0 \
	249	: ((p)[0] & 0xF0) == 0xE0 ? 3 \
	250	: ((p + 3 >= pend) \|\| (((p)[3] & 0xC0) != 0x80)) ? 0 \
	251	: ((p)[0] & 0xF8) == 0xF0 ? 4 \
	252	: ((p + 4 >= pend) \|\| (((p)[4] & 0xC0) != 0x80)) ? 0 \
	253	: (p)[0] == 0xF8 && ((p)[1] & 0xF0) == 0x80 ? 5 \
	254	: 0)
	255
	256
	257	/* Like MULTIBYTE_LENGTH but don't check the ending address. */
	258
	259	#define MULTIBYTE_LENGTH_NO_CHECK(p) \
	260	(!((p)[0] & 0x80) ? 1 \
	261	: ((p)[1] & 0xC0) != 0x80 ? 0 \
	262	: ((p)[0] & 0xE0) == 0xC0 ? 2 \
	263	: ((p)[2] & 0xC0) != 0x80 ? 0 \
	264	: ((p)[0] & 0xF0) == 0xE0 ? 3 \
	265	: ((p)[3] & 0xC0) != 0x80 ? 0 \
	266	: ((p)[0] & 0xF8) == 0xF0 ? 4 \
	267	: ((p)[4] & 0xC0) != 0x80 ? 0 \
	268	: (p)[0] == 0xF8 && ((p)[1] & 0xF0) == 0x80 ? 5 \
	269	: 0)
	270
	271
	272	/* Return the character code of character whose multibyte form is at
273	P. The argument LEN is ignored. It will be removed in the
274	future. */
275
276	#define STRING_CHAR(p, len) \
277	(!((p)[0] & 0x80) \
278	? (p)[0] \
279	: ! ((p)[0] & 0x20) \
280	? (((((p)[0] & 0x1F) << 6) \
281	\| ((p)[1] & 0x3F)) \
282	+ (((unsigned char) (p)[0]) < 0xC2 ? 0x3FFF80 : 0)) \
283	: ! ((p)[0] & 0x10) \
284	? ((((p)[0] & 0x0F) << 12) \
285	\| (((p)[1] & 0x3F) << 6) \
286	\| ((p)[2] & 0x3F)) \
eb41da4c	287	: string_char ((p), NULL, NULL))
0168c3d8 KH	288
	289
	290	/* Like STRING_CHAR but set ACTUAL_LEN to the length of multibyte
	291	form. The argument LEN is ignored. It will be removed in the
	292	future. */
	293
	294	#define STRING_CHAR_AND_LENGTH(p, len, actual_len) \
	295	(!((p)[0] & 0x80) \
	296	? ((actual_len) = 1, (p)[0]) \
	297	: ! ((p)[0] & 0x20) \
	298	? ((actual_len) = 2, \
	299	(((((p)[0] & 0x1F) << 6) \
	300	\| ((p)[1] & 0x3F)) \
	301	+ (((unsigned char) (p)[0]) < 0xC2 ? 0x3FFF80 : 0))) \
	302	: ! ((p)[0] & 0x10) \
	303	? ((actual_len) = 3, \
	304	((((p)[0] & 0x0F) << 12) \
	305	\| (((p)[1] & 0x3F) << 6) \
	306	\| ((p)[2] & 0x3F))) \
eb41da4c	307	: string_char ((p), NULL, &actual_len))
0168c3d8 KH	308
	309
	310	/* Like STRING_CHAR but advacen P to the end of multibyte form. */
	311
	312	#define STRING_CHAR_ADVANCE(p) \
	313	(!((p)[0] & 0x80) \
	314	? *(p)++ \
	315	: ! ((p)[0] & 0x20) \
	316	? ((p) += 2, \
	317	((((p)[-2] & 0x1F) << 6) \
	318	\| ((p)[-1] & 0x3F) \
	319	\| (((unsigned char) (p)[-2]) < 0xC2 ? 0x3FFF80 : 0))) \
	320	: ! ((p)[0] & 0x10) \
	321	? ((p) += 3, \
	322	((((p)[-3] & 0x0F) << 12) \
	323	\| (((p)[-2] & 0x3F) << 6) \
	324	\| ((p)[-1] & 0x3F))) \
eb41da4c	325	: string_char ((p), &(p), NULL))
0168c3d8 KH	326
	327
	328	/* Fetch the "next" character from Lisp string STRING at byte position
	329	BYTEIDX, character position CHARIDX. Store it into OUTPUT.
	330
	331	All the args must be side-effect-free.
	332	BYTEIDX and CHARIDX must be lvalues;
	333	we increment them past the character fetched. */
	334
	335	#define FETCH_STRING_CHAR_ADVANCE(OUTPUT, STRING, CHARIDX, BYTEIDX) \
	336	if (1) \
	337	{ \
	338	CHARIDX++; \
	339	if (STRING_MULTIBYTE (STRING)) \
	340	{ \
	341	unsigned char *ptr = &XSTRING (STRING)->data[BYTEIDX]; \
	342	int len; \
	343	\
	344	OUTPUT = STRING_CHAR_AND_LENGTH (ptr, 0, len); \
	345	BYTEIDX += len; \
	346	} \
	347	else \
	348	OUTPUT = XSTRING (STRING)->data[BYTEIDX++]; \
	349	} \
	350	else
	351
43c47483 KH	352	/* Like FETCH_STRING_CHAR_ADVANCE */
	353
	354	#define FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE(OUTPUT, STRING, CHARIDX, BYTEIDX) \
	355	if (1) \
	356	{ \
	357	CHARIDX++; \
	358	if (STRING_MULTIBYTE (STRING)) \
	359	{ \
	360	unsigned char *ptr = &XSTRING (STRING)->data[BYTEIDX]; \
	361	int len; \
	362	\
	363	OUTPUT = STRING_CHAR_AND_LENGTH (ptr, 0, len); \
	364	BYTEIDX += len; \
	365	} \
	366	else \
	367	{ \
	368	OUTPUT = XSTRING (STRING)->data[BYTEIDX++]; \
	369	MAKE_CHAR_MULTIBYTE (OUTPUT); \
	370	} \
	371	} \
	372	else
	373
0168c3d8 KH	374
	375	/* Like FETCH_STRING_CHAR_ADVANCE but assumes STRING is multibyte. */
	376
	377	#define FETCH_STRING_CHAR_ADVANCE_NO_CHECK(OUTPUT, STRING, CHARIDX, BYTEIDX) \
	378	if (1) \
	379	{ \
	380	unsigned char *ptr = &XSTRING (STRING)->data[BYTEIDX]; \
	381	int len; \
	382	\
	383	OUTPUT = STRING_CHAR_AND_LENGTH (ptr, 0, len); \
	384	BYTEIDX += len; \
	385	CHARIDX++; \
	386	} \
	387	else
	388
	389
	390	/* Like FETCH_STRING_CHAR_ADVANCE but fetch character from the current
	391	buffer. */
	392
	393	#define FETCH_CHAR_ADVANCE(OUTPUT, CHARIDX, BYTEIDX) \
	394	if (1) \
	395	{ \
	396	CHARIDX++; \
	397	if (!NILP (current_buffer->enable_multibyte_characters)) \
	398	{ \
	399	unsigned char *ptr = BYTE_POS_ADDR (BYTEIDX); \
	400	int len; \
	401	\
	402	OUTPUT= STRING_CHAR_AND_LENGTH (ptr, 0, len); \
	403	BYTEIDX += len; \
	404	} \
	405	else \
	406	{ \
	407	OUTPUT = *(BYTE_POS_ADDR (BYTEIDX)); \
	408	BYTEIDX++; \
	409	} \
	410	} \
	411	else
	412
	413
	414	/* Like FETCH_CHAR_ADVANCE but assumes STRING is multibyte. */
	415
	416	#define FETCH_CHAR_ADVANCE_NO_CHECK(OUTPUT, CHARIDX, BYTEIDX) \
	417	if (1) \
	418	{ \
	419	unsigned char *ptr = BYTE_POS_ADDR (BYTEIDX); \
	420	int len; \
	421	\
	422	OUTPUT= STRING_CHAR_AND_LENGTH (ptr, 0, len); \
	423	BYTEIDX += len; \
	424	CHARIDX++; \
	425	} \
	426	else
	427
	428
	429	/* Increase the buffer byte position POS_BYTE of the current buffer to
	430	the next character boundary. No range checking of POS. */
	431
	432	#define INC_POS(pos_byte) \
	433	do { \
	434	unsigned char *p = BYTE_POS_ADDR (pos_byte); \
	435	pos_byte += BYTES_BY_CHAR_HEAD (*p); \
	436	} while (0)
	437
438
439	/* Decrease the buffer byte position POS_BYTE of the current buffer to
440	the previous character boundary. No range checking of POS. */
441
442	#define DEC_POS(pos_byte) \
443	do { \
444	unsigned char *p; \
445	\
446	pos_byte--; \
447	if (pos_byte < GPT_BYTE) \
448	p = BEG_ADDR + pos_byte - 1; \
449	else \
450	p = BEG_ADDR + GAP_SIZE + pos_byte - 1; \
451	while (!CHAR_HEAD_P (*p)) \
452	{ \
453	p--; \
454	pos_byte--; \
455	} \
456	} while (0)
457
458	/* Increment both CHARPOS and BYTEPOS, each in the appropriate way. */
459
460	#define INC_BOTH(charpos, bytepos) \
461	do \
462	{ \
463	(charpos)++; \
464	if (NILP (current_buffer->enable_multibyte_characters)) \
465	(bytepos)++; \
466	else \
467	INC_POS ((bytepos)); \
468	} \
469	while (0)
470
471
472	/* Decrement both CHARPOS and BYTEPOS, each in the appropriate way. */
473
474	#define DEC_BOTH(charpos, bytepos) \
475	do \
476	{ \
477	(charpos)--; \
478	if (NILP (current_buffer->enable_multibyte_characters)) \
479	(bytepos)--; \
480	else \
481	DEC_POS ((bytepos)); \
482	} \
483	while (0)
484
485
486	/* Increase the buffer byte position POS_BYTE of the current buffer to
487	the next character boundary. This macro relies on the fact that
488	GPT_ADDR and Z_ADDR are always accessible and the values are
489	'\0'. No range checking of POS_BYTE. */
490
491	#define BUF_INC_POS(buf, pos_byte) \
492	do { \
493	unsigned char *p = BUF_BYTE_ADDRESS (buf, pos_byte); \
494	pos_byte += BYTES_BY_CHAR_HEAD (*p); \
495	} while (0)
496
497
498	/* Decrease the buffer byte position POS_BYTE of the current buffer to
499	the previous character boundary. No range checking of POS_BYTE. */
500
501	#define BUF_DEC_POS(buf, pos_byte) \
502	do { \
503	unsigned char *p; \
504	pos_byte--; \
505	if (pos_byte < BUF_GPT_BYTE (buf)) \
506	p = BUF_BEG_ADDR (buf) + pos_byte - 1; \
507	else \
508	p = BUF_BEG_ADDR (buf) + BUF_GAP_SIZE (buf) + pos_byte - 1; \
509	while (!CHAR_HEAD_P (*p)) \
510	{ \
511	p--; \
512	pos_byte--; \
513	} \
514	} while (0)
515
516
fc9d9d2a	517	#define MAYBE_UNIFY_CHAR(c) \
eb41da4c KH	518	if (c > MAX_UNICODE_CHAR \
eb41da4c KH	519	&& CHAR_TABLE_P (Vchar_unify_table)) \
fc9d9d2a KH	520	{ \
	521	Lisp_Object val; \
	522	int unified; \
	523	\
	524	val = CHAR_TABLE_REF (Vchar_unify_table, c); \
	525	if (! NILP (val)) \
	526	{ \
	527	if (SYMBOLP (val)) \
	528	{ \
b5c7dbe6	529	Funify_charset (val, Qnil, Qnil); \
fc9d9d2a KH	530	val = CHAR_TABLE_REF (Vchar_unify_table, c); \
	531	} \
	532	if ((unified = XINT (val)) >= 0) \
	533	c = unified; \
	534	} \
	535	} \
0168c3d8 KH	536	else
0168c3d8 KH	537
fc9d9d2a	538
0168c3d8 KH	539	/* Return the width of ASCII character C. The width is measured by
	540	how many columns occupied on the screen when displayed in the
	541	current buffer. */
	542
	543	#define ASCII_CHAR_WIDTH(c) \
	544	(c < 0x20 \
	545	? (c == '\t' \
	546	? XFASTINT (current_buffer->tab_width) \
	547	: (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2))) \
	548	: (c < 0x7f \
	549	? 1 \
	550	: ((NILP (current_buffer->ctl_arrow) ? 4 : 2))))
	551
	552	/* Return the width of character C. The width is measured by how many
	553	columns occupied on the screen when displayed in the current
	554	buffer. */
	555
	556	#define CHAR_WIDTH(c) \
	557	(ASCII_CHAR_P (c) \
	558	? ASCII_CHAR_WIDTH (c) \
	559	: XINT (CHAR_TABLE_REF (Vchar_width_table, c)))
	560
eb41da4c KH	561	extern int char_resolve_modifier_mask P_ ((int));
	562	extern int char_string P_ ((int, unsigned char *));
	563	extern int string_char P_ ((const unsigned char *,
	564	const unsigned char *, int ));
0168c3d8 KH	565
	566	extern int translate_char P_ ((Lisp_Object, int c));
	567	extern int char_printable_p P_ ((int c));
	568	extern void parse_str_as_multibyte P_ ((unsigned char , int, int , int *));
	569	extern int parse_str_to_multibyte P_ ((unsigned char *, int));
	570	extern int str_as_multibyte P_ ((unsigned char , int, int, int ));
	571	extern int str_to_multibyte P_ ((unsigned char *, int, int));
	572	extern int str_as_unibyte P_ ((unsigned char *, int));
	573	extern int strwidth P_ ((unsigned char *, int));
	574	extern int c_string_width P_ ((unsigned char , int, int, int , int *));
	575	extern int lisp_string_width P_ ((Lisp_Object, int, int , int ));
	576
	577	extern Lisp_Object Vprintable_chars;
	578
	579	extern Lisp_Object Qcharacterp, Qauto_fill_chars;
	580	extern Lisp_Object Vtranslation_table_vector;
	581	extern Lisp_Object Vchar_width_table;
	582	extern Lisp_Object Vchar_direction_table;
	583	extern Lisp_Object Vchar_unify_table;
	584
fac2bdc4 DL	585	extern Lisp_Object string_escape_byte8 P_ ((Lisp_Object));
fac2bdc4 DL	586
0168c3d8 KH	587	/* Return a translation table of id number ID. */
	588	#define GET_TRANSLATION_TABLE(id) \
	589	(XCDR(XVECTOR(Vtranslation_table_vector)->contents[(id)]))
	590
	591	/* A char-table for characters which may invoke auto-filling. */
	592	extern Lisp_Object Vauto_fill_chars;
	593
e18ef64a	594	extern Lisp_Object Vchar_script_table;
b5c7dbe6	595
0168c3d8 KH	596	/* Copy LEN bytes from FROM to TO. This macro should be used only
	597	when a caller knows that LEN is short and the obvious copy loop is
	598	faster than calling bcopy which has some overhead. Copying a
	599	multibyte sequence of a character is the typical case. */
	600
	601	#define BCOPY_SHORT(from, to, len) \
	602	do { \
	603	int i = len; \
	604	unsigned char from_p = from, to_p = to; \
	605	while (i--) to_p++ = from_p++; \
	606	} while (0)
	607
	608	#define DEFSYM(sym, name) \
	609	do { (sym) = intern ((name)); staticpro (&(sym)); } while (0)
	610
	611	#endif /* EMACS_CHARACTER_H */