/* Header for multibyte character handler.
Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
Licensed to the Free Software Foundation.
- Copyright (C) 2003, 2006
+ Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009
National Institute of Advanced Industrial Science and Technology (AIST)
Registration Number H13PRO009
This file is part of GNU Emacs.
-GNU Emacs is free software; you can redistribute it and/or modify
+GNU Emacs is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
-any later version.
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
GNU Emacs is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
-along with GNU Emacs; see the file COPYING. If not, write to
-the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA. */
+along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
#ifndef EMACS_CHARACTER_H
#define EMACS_CHARACTER_H
/* Return the character code for raw 8-bit byte BYTE. */
#define BYTE8_TO_CHAR(byte) ((byte) + 0x3FFF00)
+#define UNIBYTE_TO_CHAR(byte) \
+ (ASCII_BYTE_P (byte) ? (byte) : BYTE8_TO_CHAR (byte))
+
/* Return the raw 8-bit byte for character C. */
#define CHAR_TO_BYTE8(c) \
(CHAR_BYTE8_P (c) \
? (c) - 0x3FFF00 \
: multibyte_char_to_unibyte (c, Qnil))
+/* Return the raw 8-bit byte for character C,
+ or -1 if C doesn't correspond to a byte. */
+#define CHAR_TO_BYTE_SAFE(c) \
+ (CHAR_BYTE8_P (c) \
+ ? (c) - 0x3FFF00 \
+ : multibyte_char_to_unibyte_safe (c))
+
/* Nonzero iff BYTE is the 1st byte of a multibyte form of a character
that corresponds to a raw 8-bit byte. */
#define CHAR_BYTE8_HEAD_P(byte) ((byte) == 0xC0 || (byte) == 0xC1)
-/* Mapping table from unibyte chars to multibyte chars. */
-extern int unibyte_to_multibyte_table[256];
-
-/* Convert the unibyte character C to the corresponding multibyte
- character. If C can't be converted, return C. */
-#define unibyte_char_to_multibyte(c) \
- ((c) < 256 ? unibyte_to_multibyte_table[(c)] : (c))
-
-/* Nth element is 1 iff unibyte char N can be mapped to a multibyte
- char. */
-extern char unibyte_has_multibyte_table[256];
-
-#define UNIBYTE_CHAR_HAS_MULTIBYTE_P(c) (unibyte_has_multibyte_table[(c)])
-
/* If C is not ASCII, make it unibyte. */
#define MAKE_CHAR_UNIBYTE(c) \
do { \
} while (0)
-/* If C is not ASCII, make it multibyte. It assumes C < 256. */
-#define MAKE_CHAR_MULTIBYTE(c) ((c) = unibyte_to_multibyte_table[(c)])
+/* If C is not ASCII, make it multibyte. Assumes C < 256. */
+#define MAKE_CHAR_MULTIBYTE(c) \
+ (eassert ((c) >= 0 && (c) < 256), (c) = UNIBYTE_TO_CHAR (c))
/* This is the maximum byte length of multibyte form. */
#define MAX_MULTIBYTE_LENGTH 5
-/* Return a Lisp character whose character code is C. It assumes C is
+/* Return a Lisp character whose character code is C. Assumes C is
a valid character code. */
#define make_char(c) make_number (c)
/* Nonzero iff X is a character. */
#define CHARACTERP(x) (NATNUMP (x) && XFASTINT (x) <= MAX_CHAR)
-/* Nonzero iff C is valid as a character code. GENERICP is not used
- now. */
+/* Nonzero iff C is valid as a character code. GENERICP is not used. */
#define CHAR_VALID_P(c, genericp) ((unsigned) (c) <= MAX_CHAR)
/* Check if Lisp object X is a character or not. */
(p)[1] = (0x80 | (((c) >> 6) & 0x3F)), \
(p)[2] = (0x80 | ((c) & 0x3F)), \
3) \
- : char_string (c, p))
+ : char_string ((unsigned) c, p))
/* Store multibyte form of byte B in P. The caller should allocate at
least MAX_MULTIBYTE_LENGTH bytes area at P in advance. Returns the
2)
-/* Store multibyte form of the character C in P. The caller should
- allocate at least MAX_MULTIBYTE_LENGTH bytes area at P in advance.
- And, advance P to the end of the multibyte form. */
+/* Store multibyte form of the character C in P and advance P to the
+ end of the multibyte form. The caller should allocate at least
+ MAX_MULTIBYTE_LENGTH bytes area at P in advance. */
#define CHAR_STRING_ADVANCE(c, p) \
do { \
(ASCII_BYTE_P (byte) || LEADING_CODE_P (byte)) */
#define CHAR_HEAD_P(byte) (((byte) & 0xC0) != 0x80)
-/* Just kept for backward compatibility. This macro will be removed
- in the future. */
+/* Kept for backward compatibility. This macro will be removed in the
+ future. */
#define BASE_LEADING_CODE_P LEADING_CODE_P
/* How many bytes a character that starts with BYTE occupies in a
(bytes) = BYTES_BY_CHAR_HEAD (*(str))
/* The byte length of multibyte form at unibyte string P ending at
- PEND. If STR doesn't point a valid multibyte form, return 0. */
+ PEND. If STR doesn't point to a valid multibyte form, return 0. */
#define MULTIBYTE_LENGTH(p, pend) \
(p >= pend ? 0 \
: 0)
-/* Like MULTIBYTE_LENGTH but don't check the ending address. */
+/* Like MULTIBYTE_LENGTH, but don't check the ending address. */
#define MULTIBYTE_LENGTH_NO_CHECK(p) \
(!((p)[0] & 0x80) ? 1 \
: (p)[0] == 0xF8 && ((p)[1] & 0xF0) == 0x80 ? 5 \
: 0)
-/* If P is before LIMIT, advance P to the next character boundary. It
- assumes that P is already at a character boundary of the sane
+/* If P is before LIMIT, advance P to the next character boundary.
+ Assumes that P is already at a character boundary of the same
mulitbyte form whose end address is LIMIT. */
#define NEXT_CHAR_BOUNDARY(p, limit) \
/* If P is after LIMIT, advance P to the previous character boundary.
- It assumes that P is already at a character boundary of the sane
+ Assumes that P is already at a character boundary of the same
mulitbyte form whose beginning address is LIMIT. */
#define PREV_CHAR_BOUNDARY(p, limit) \
: string_char ((p), NULL, NULL))
-/* Like STRING_CHAR but set ACTUAL_LEN to the length of multibyte
+/* Like STRING_CHAR, but set ACTUAL_LEN to the length of multibyte
form. The argument LEN is ignored. It will be removed in the
future. */
: string_char ((p), NULL, &actual_len))
-/* Like STRING_CHAR but advance P to the end of multibyte form. */
+/* Like STRING_CHAR, but advance P to the end of multibyte form. */
#define STRING_CHAR_ADVANCE(p) \
(!((p)[0] & 0x80) \
we increment them past the character fetched. */
#define FETCH_STRING_CHAR_ADVANCE(OUTPUT, STRING, CHARIDX, BYTEIDX) \
- if (1) \
+ do \
{ \
CHARIDX++; \
if (STRING_MULTIBYTE (STRING)) \
{ \
- unsigned char *ptr = &XSTRING (STRING)->data[BYTEIDX]; \
+ unsigned char *ptr = &SDATA (STRING)[BYTEIDX]; \
int len; \
\
OUTPUT = STRING_CHAR_AND_LENGTH (ptr, 0, len); \
BYTEIDX += len; \
} \
else \
- OUTPUT = XSTRING (STRING)->data[BYTEIDX++]; \
+ { \
+ OUTPUT = SREF (STRING, BYTEIDX); \
+ BYTEIDX++; \
+ } \
} \
- else
+ while (0)
-/* Like FETCH_STRING_CHAR_ADVANCE but return a multibyte character eve
- if STRING is unibyte. */
+/* Like FETCH_STRING_CHAR_ADVANCE, but return a multibyte character
+ even if STRING is unibyte. */
#define FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE(OUTPUT, STRING, CHARIDX, BYTEIDX) \
- if (1) \
+ do \
{ \
CHARIDX++; \
if (STRING_MULTIBYTE (STRING)) \
{ \
- unsigned char *ptr = &XSTRING (STRING)->data[BYTEIDX]; \
+ unsigned char *ptr = &SDATA (STRING)[BYTEIDX]; \
int len; \
\
OUTPUT = STRING_CHAR_AND_LENGTH (ptr, 0, len); \
} \
else \
{ \
- OUTPUT = XSTRING (STRING)->data[BYTEIDX++]; \
+ OUTPUT = SREF (STRING, BYTEIDX); \
+ BYTEIDX++; \
MAKE_CHAR_MULTIBYTE (OUTPUT); \
} \
} \
- else
+ while (0)
-/* Like FETCH_STRING_CHAR_ADVANCE but assumes STRING is multibyte. */
+/* Like FETCH_STRING_CHAR_ADVANCE, but assumes STRING is multibyte. */
#define FETCH_STRING_CHAR_ADVANCE_NO_CHECK(OUTPUT, STRING, CHARIDX, BYTEIDX) \
- if (1) \
+ do \
{ \
- unsigned char *ptr = &XSTRING (STRING)->data[BYTEIDX]; \
+ unsigned char *ptr = &SDATA (STRING)[BYTEIDX]; \
int len; \
\
OUTPUT = STRING_CHAR_AND_LENGTH (ptr, 0, len); \
BYTEIDX += len; \
CHARIDX++; \
} \
- else
+ while (0)
-/* Like FETCH_STRING_CHAR_ADVANCE but fetch character from the current
+/* Like FETCH_STRING_CHAR_ADVANCE, but fetch character from the current
buffer. */
#define FETCH_CHAR_ADVANCE(OUTPUT, CHARIDX, BYTEIDX) \
- if (1) \
+ do \
{ \
CHARIDX++; \
if (!NILP (current_buffer->enable_multibyte_characters)) \
BYTEIDX++; \
} \
} \
- else
+ while (0)
-/* Like FETCH_CHAR_ADVANCE but assumes the current buffer is multibyte. */
+/* Like FETCH_CHAR_ADVANCE, but assumes the current buffer is multibyte. */
#define FETCH_CHAR_ADVANCE_NO_CHECK(OUTPUT, CHARIDX, BYTEIDX) \
- if (1) \
+ do \
{ \
unsigned char *ptr = BYTE_POS_ADDR (BYTEIDX); \
int len; \
BYTEIDX += len; \
CHARIDX++; \
} \
- else
+ while (0)
-/* Increase the buffer byte position POS_BYTE of the current buffer to
+/* Increment the buffer byte position POS_BYTE of the current buffer to
the next character boundary. No range checking of POS. */
#define INC_POS(pos_byte) \
} while (0)
-/* Decrease the buffer byte position POS_BYTE of the current buffer to
+/* Decrement the buffer byte position POS_BYTE of the current buffer to
the previous character boundary. No range checking of POS. */
#define DEC_POS(pos_byte) \
\
pos_byte--; \
if (pos_byte < GPT_BYTE) \
- p = BEG_ADDR + pos_byte - 1; \
+ p = BEG_ADDR + pos_byte - BEG_BYTE; \
else \
- p = BEG_ADDR + GAP_SIZE + pos_byte - 1; \
+ p = BEG_ADDR + GAP_SIZE + pos_byte - BEG_BYTE;\
while (!CHAR_HEAD_P (*p)) \
{ \
p--; \
while (0)
-/* Increase the buffer byte position POS_BYTE of the current buffer to
+/* Increment the buffer byte position POS_BYTE of the current buffer to
the next character boundary. This macro relies on the fact that
*GPT_ADDR and *Z_ADDR are always accessible and the values are
'\0'. No range checking of POS_BYTE. */
} while (0)
-/* Decrease the buffer byte position POS_BYTE of the current buffer to
+/* Decrement the buffer byte position POS_BYTE of the current buffer to
the previous character boundary. No range checking of POS_BYTE. */
#define BUF_DEC_POS(buf, pos_byte) \
unsigned char *p; \
pos_byte--; \
if (pos_byte < BUF_GPT_BYTE (buf)) \
- p = BUF_BEG_ADDR (buf) + pos_byte - 1; \
+ p = BUF_BEG_ADDR (buf) + pos_byte - BEG_BYTE; \
else \
- p = BUF_BEG_ADDR (buf) + BUF_GAP_SIZE (buf) + pos_byte - 1; \
+ p = BUF_BEG_ADDR (buf) + BUF_GAP_SIZE (buf) + pos_byte - BEG_BYTE;\
while (!CHAR_HEAD_P (*p)) \
{ \
p--; \
/* If C is a character to be unified with a Unicode character, return
the unified Unicode character. */
-#define MAYBE_UNIFY_CHAR(c) \
- if (c > MAX_UNICODE_CHAR \
- && CHAR_TABLE_P (Vchar_unify_table)) \
- { \
- Lisp_Object val; \
- int unified; \
- \
- val = CHAR_TABLE_REF (Vchar_unify_table, c); \
- if (! NILP (val)) \
- { \
- if (SYMBOLP (val)) \
- { \
- Funify_charset (val, Qnil, Qnil); \
- val = CHAR_TABLE_REF (Vchar_unify_table, c); \
- } \
- if ((unified = XINT (val)) >= 0) \
- c = unified; \
- } \
- } \
- else
+#define MAYBE_UNIFY_CHAR(c) \
+ do { \
+ if (c > MAX_UNICODE_CHAR && c <= MAX_5_BYTE_CHAR) \
+ { \
+ Lisp_Object val; \
+ val = CHAR_TABLE_REF (Vchar_unify_table, c); \
+ if (INTEGERP (val)) \
+ c = XINT (val); \
+ else if (! NILP (val)) \
+ c = maybe_unify_char (c, val); \
+ } \
+ } while (0)
/* Return the width of ASCII character C. The width is measured by
- how many columns occupied on the screen when displayed in the
+ how many columns C will occupy on the screen when displayed in the
current buffer. */
#define ASCII_CHAR_WIDTH(c) \
: ((NILP (current_buffer->ctl_arrow) ? 4 : 2))))
/* Return the width of character C. The width is measured by how many
- columns occupied on the screen when displayed in the current
+ columns C will occupy on the screen when displayed in the current
buffer. */
#define CHAR_WIDTH(c) \
? ASCII_CHAR_WIDTH (c) \
: XINT (CHAR_TABLE_REF (Vchar_width_table, c)))
+/* If C is a variation selector, return the index numnber of the
+ variation selector (1..256). Otherwise, return 0. */
+
+#define CHAR_VARIATION_SELECTOR_P(c) \
+ ((c) < 0xFE00 ? 0 \
+ : (c) <= 0xFE0F ? (c) - 0xFE00 + 1 \
+ : (c) < 0xE0100 ? 0 \
+ : (c) <= 0xE01EF ? (c) - 0xE0100 + 17 \
+ : 0)
+
+/* If C is a high surrogate, return 1. If C is a low surrogate,
+ return 0. Otherwise, return 0. */
+
+#define CHAR_SURROGATE_PAIR_P(c) \
+ ((c) < 0xD800 ? 0 \
+ : (c) <= 0xDBFF ? 1 \
+ : (c) <= 0xDFFF ? 2 \
+ : 0)
+
+
extern int char_resolve_modifier_mask P_ ((int));
-extern int char_string P_ ((int, unsigned char *));
+extern int char_string P_ ((unsigned, unsigned char *));
extern int string_char P_ ((const unsigned char *,
const unsigned char **, int *));
extern int str_as_multibyte P_ ((unsigned char *, int, int, int *));
extern int str_to_multibyte P_ ((unsigned char *, int, int));
extern int str_as_unibyte P_ ((unsigned char *, int));
+extern EMACS_INT str_to_unibyte P_ ((const unsigned char *, unsigned char *,
+ EMACS_INT, int));
extern int strwidth P_ ((unsigned char *, int));
extern int c_string_width P_ ((const unsigned char *, int, int, int *, int *));
extern int lisp_string_width P_ ((Lisp_Object, int, int *, int *));
extern Lisp_Object Vchar_width_table;
extern Lisp_Object Vchar_direction_table;
extern Lisp_Object Vchar_unify_table;
+extern Lisp_Object Vunicode_category_table;
extern Lisp_Object string_escape_byte8 P_ ((Lisp_Object));
} while (0)
#define DEFSYM(sym, name) \
- do { (sym) = intern ((name)); staticpro (&(sym)); } while (0)
+ do { (sym) = intern_c_string ((name)); staticpro (&(sym)); } while (0)
#endif /* EMACS_CHARACTER_H */