/* Header for multibyte character handler.
Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
Licensed to the Free Software Foundation.
- Copyright (C) 2003, 2006
+ Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008
National Institute of Advanced Industrial Science and Technology (AIST)
Registration Number H13PRO009
This file is part of GNU Emacs.
-GNU Emacs is free software; you can redistribute it and/or modify
+GNU Emacs is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
-any later version.
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
GNU Emacs is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
-along with GNU Emacs; see the file COPYING. If not, write to
-the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA. */
+along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
#ifndef EMACS_CHARACTER_H
#define EMACS_CHARACTER_H
? (c) - 0x3FFF00 \
: multibyte_char_to_unibyte (c, Qnil))
+/* Return the raw 8-bit byte for character C,
+ or -1 if C doesn't correspond to a byte. */
+#define CHAR_TO_BYTE_SAFE(c) \
+ (CHAR_BYTE8_P (c) \
+ ? (c) - 0x3FFF00 \
+ : multibyte_char_to_unibyte_safe (c))
+
/* Nonzero iff BYTE is the 1st byte of a multibyte form of a character
that corresponds to a raw 8-bit byte. */
#define CHAR_BYTE8_HEAD_P(byte) ((byte) == 0xC0 || (byte) == 0xC1)
} while (0)
-/* If C is not ASCII, make it multibyte. It assumes C < 256. */
-#define MAKE_CHAR_MULTIBYTE(c) ((c) = unibyte_to_multibyte_table[(c)])
+/* If C is not ASCII, make it multibyte. Assumes C < 256. */
+#define MAKE_CHAR_MULTIBYTE(c) \
+ (eassert ((c) >= 0 && (c) < 256), (c) = unibyte_to_multibyte_table[(c)])
/* This is the maximum byte length of multibyte form. */
#define MAX_MULTIBYTE_LENGTH 5
-/* Return a Lisp character whose character code is C. It assumes C is
+/* Return a Lisp character whose character code is C. Assumes C is
a valid character code. */
#define make_char(c) make_number (c)
/* Nonzero iff X is a character. */
#define CHARACTERP(x) (NATNUMP (x) && XFASTINT (x) <= MAX_CHAR)
-/* Nonzero iff C is valid as a character code. GENERICP is not used
- now. */
+/* Nonzero iff C is valid as a character code. GENERICP is not used. */
#define CHAR_VALID_P(c, genericp) ((unsigned) (c) <= MAX_CHAR)
/* Check if Lisp object X is a character or not. */
2)
-/* Store multibyte form of the character C in P. The caller should
- allocate at least MAX_MULTIBYTE_LENGTH bytes area at P in advance.
- And, advance P to the end of the multibyte form. */
+/* Store multibyte form of the character C in P and advance P to the
+ end of the multibyte form. The caller should allocate at least
+ MAX_MULTIBYTE_LENGTH bytes area at P in advance. */
#define CHAR_STRING_ADVANCE(c, p) \
do { \
(ASCII_BYTE_P (byte) || LEADING_CODE_P (byte)) */
#define CHAR_HEAD_P(byte) (((byte) & 0xC0) != 0x80)
-/* Just kept for backward compatibility. This macro will be removed
- in the future. */
+/* Kept for backward compatibility. This macro will be removed in the
+ future. */
#define BASE_LEADING_CODE_P LEADING_CODE_P
/* How many bytes a character that starts with BYTE occupies in a
(bytes) = BYTES_BY_CHAR_HEAD (*(str))
/* The byte length of multibyte form at unibyte string P ending at
- PEND. If STR doesn't point a valid multibyte form, return 0. */
+ PEND. If STR doesn't point to a valid multibyte form, return 0. */
#define MULTIBYTE_LENGTH(p, pend) \
(p >= pend ? 0 \
: 0)
-/* Like MULTIBYTE_LENGTH but don't check the ending address. */
+/* Like MULTIBYTE_LENGTH, but don't check the ending address. */
#define MULTIBYTE_LENGTH_NO_CHECK(p) \
(!((p)[0] & 0x80) ? 1 \
: (p)[0] == 0xF8 && ((p)[1] & 0xF0) == 0x80 ? 5 \
: 0)
-/* If P is before LIMIT, advance P to the next character boundary. It
- assumes that P is already at a character boundary of the sane
+/* If P is before LIMIT, advance P to the next character boundary.
+ Assumes that P is already at a character boundary of the same
mulitbyte form whose end address is LIMIT. */
#define NEXT_CHAR_BOUNDARY(p, limit) \
/* If P is after LIMIT, advance P to the previous character boundary.
- It assumes that P is already at a character boundary of the sane
+ Assumes that P is already at a character boundary of the same
mulitbyte form whose beginning address is LIMIT. */
#define PREV_CHAR_BOUNDARY(p, limit) \
: string_char ((p), NULL, NULL))
-/* Like STRING_CHAR but set ACTUAL_LEN to the length of multibyte
+/* Like STRING_CHAR, but set ACTUAL_LEN to the length of multibyte
form. The argument LEN is ignored. It will be removed in the
future. */
: string_char ((p), NULL, &actual_len))
-/* Like STRING_CHAR but advance P to the end of multibyte form. */
+/* Like STRING_CHAR, but advance P to the end of multibyte form. */
#define STRING_CHAR_ADVANCE(p) \
(!((p)[0] & 0x80) \
we increment them past the character fetched. */
#define FETCH_STRING_CHAR_ADVANCE(OUTPUT, STRING, CHARIDX, BYTEIDX) \
- if (1) \
+ do \
{ \
CHARIDX++; \
if (STRING_MULTIBYTE (STRING)) \
{ \
- unsigned char *ptr = &XSTRING (STRING)->data[BYTEIDX]; \
+ unsigned char *ptr = &SDATA (STRING)[BYTEIDX]; \
int len; \
\
OUTPUT = STRING_CHAR_AND_LENGTH (ptr, 0, len); \
BYTEIDX += len; \
} \
else \
- OUTPUT = XSTRING (STRING)->data[BYTEIDX++]; \
+ { \
+ OUTPUT = SREF (STRING, BYTEIDX); \
+ BYTEIDX++; \
+ } \
} \
- else
+ while (0)
-/* Like FETCH_STRING_CHAR_ADVANCE but return a multibyte character eve
- if STRING is unibyte. */
+/* Like FETCH_STRING_CHAR_ADVANCE, but return a multibyte character
+ even if STRING is unibyte. */
#define FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE(OUTPUT, STRING, CHARIDX, BYTEIDX) \
- if (1) \
+ do \
{ \
CHARIDX++; \
if (STRING_MULTIBYTE (STRING)) \
{ \
- unsigned char *ptr = &XSTRING (STRING)->data[BYTEIDX]; \
+ unsigned char *ptr = &SDATA (STRING)[BYTEIDX]; \
int len; \
\
OUTPUT = STRING_CHAR_AND_LENGTH (ptr, 0, len); \
} \
else \
{ \
- OUTPUT = XSTRING (STRING)->data[BYTEIDX++]; \
+ OUTPUT = SREF (STRING, BYTEIDX); \
+ BYTEIDX++; \
MAKE_CHAR_MULTIBYTE (OUTPUT); \
} \
} \
- else
+ while (0)
-/* Like FETCH_STRING_CHAR_ADVANCE but assumes STRING is multibyte. */
+/* Like FETCH_STRING_CHAR_ADVANCE, but assumes STRING is multibyte. */
#define FETCH_STRING_CHAR_ADVANCE_NO_CHECK(OUTPUT, STRING, CHARIDX, BYTEIDX) \
- if (1) \
+ do \
{ \
- unsigned char *ptr = &XSTRING (STRING)->data[BYTEIDX]; \
+ unsigned char *ptr = &SDATA (STRING)[BYTEIDX]; \
int len; \
\
OUTPUT = STRING_CHAR_AND_LENGTH (ptr, 0, len); \
BYTEIDX += len; \
CHARIDX++; \
} \
- else
+ while (0)
-/* Like FETCH_STRING_CHAR_ADVANCE but fetch character from the current
+/* Like FETCH_STRING_CHAR_ADVANCE, but fetch character from the current
buffer. */
#define FETCH_CHAR_ADVANCE(OUTPUT, CHARIDX, BYTEIDX) \
- if (1) \
+ do \
{ \
CHARIDX++; \
if (!NILP (current_buffer->enable_multibyte_characters)) \
BYTEIDX++; \
} \
} \
- else
+ while (0)
-/* Like FETCH_CHAR_ADVANCE but assumes the current buffer is multibyte. */
+/* Like FETCH_CHAR_ADVANCE, but assumes the current buffer is multibyte. */
#define FETCH_CHAR_ADVANCE_NO_CHECK(OUTPUT, CHARIDX, BYTEIDX) \
- if (1) \
+ do \
{ \
unsigned char *ptr = BYTE_POS_ADDR (BYTEIDX); \
int len; \
BYTEIDX += len; \
CHARIDX++; \
} \
- else
+ while (0)
-/* Increase the buffer byte position POS_BYTE of the current buffer to
+/* Increment the buffer byte position POS_BYTE of the current buffer to
the next character boundary. No range checking of POS. */
#define INC_POS(pos_byte) \
} while (0)
-/* Decrease the buffer byte position POS_BYTE of the current buffer to
+/* Decrement the buffer byte position POS_BYTE of the current buffer to
the previous character boundary. No range checking of POS. */
#define DEC_POS(pos_byte) \
\
pos_byte--; \
if (pos_byte < GPT_BYTE) \
- p = BEG_ADDR + pos_byte - 1; \
+ p = BEG_ADDR + pos_byte - BEG_BYTE; \
else \
- p = BEG_ADDR + GAP_SIZE + pos_byte - 1; \
+ p = BEG_ADDR + GAP_SIZE + pos_byte - BEG_BYTE;\
while (!CHAR_HEAD_P (*p)) \
{ \
p--; \
while (0)
-/* Increase the buffer byte position POS_BYTE of the current buffer to
+/* Increment the buffer byte position POS_BYTE of the current buffer to
the next character boundary. This macro relies on the fact that
*GPT_ADDR and *Z_ADDR are always accessible and the values are
'\0'. No range checking of POS_BYTE. */
} while (0)
-/* Decrease the buffer byte position POS_BYTE of the current buffer to
+/* Decrement the buffer byte position POS_BYTE of the current buffer to
the previous character boundary. No range checking of POS_BYTE. */
#define BUF_DEC_POS(buf, pos_byte) \
unsigned char *p; \
pos_byte--; \
if (pos_byte < BUF_GPT_BYTE (buf)) \
- p = BUF_BEG_ADDR (buf) + pos_byte - 1; \
+ p = BUF_BEG_ADDR (buf) + pos_byte - BEG_BYTE; \
else \
- p = BUF_BEG_ADDR (buf) + BUF_GAP_SIZE (buf) + pos_byte - 1; \
+ p = BUF_BEG_ADDR (buf) + BUF_GAP_SIZE (buf) + pos_byte - BEG_BYTE;\
while (!CHAR_HEAD_P (*p)) \
{ \
p--; \
/* Return the width of ASCII character C. The width is measured by
- how many columns occupied on the screen when displayed in the
+ how many columns C will occupy on the screen when displayed in the
current buffer. */
#define ASCII_CHAR_WIDTH(c) \
: ((NILP (current_buffer->ctl_arrow) ? 4 : 2))))
/* Return the width of character C. The width is measured by how many
- columns occupied on the screen when displayed in the current
+ columns C will occupy on the screen when displayed in the current
buffer. */
#define CHAR_WIDTH(c) \
extern int str_as_multibyte P_ ((unsigned char *, int, int, int *));
extern int str_to_multibyte P_ ((unsigned char *, int, int));
extern int str_as_unibyte P_ ((unsigned char *, int));
+extern EMACS_INT str_to_unibyte P_ ((const unsigned char *, unsigned char *,
+ EMACS_INT, int));
extern int strwidth P_ ((unsigned char *, int));
extern int c_string_width P_ ((const unsigned char *, int, int, int *, int *));
extern int lisp_string_width P_ ((Lisp_Object, int, int *, int *));
extern Lisp_Object Vchar_width_table;
extern Lisp_Object Vchar_direction_table;
extern Lisp_Object Vchar_unify_table;
+extern Lisp_Object Vunicode_category_table;
extern Lisp_Object string_escape_byte8 P_ ((Lisp_Object));