Update copyright notices for 2013.
[bpt/emacs.git] / src / character.h
CommitLineData
0168c3d8
KH
1/* Header for multibyte character handler.
2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
8f924df7 3 Licensed to the Free Software Foundation.
5df4f04c 4 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
0168c3d8
KH
5 National Institute of Advanced Industrial Science and Technology (AIST)
6 Registration Number H13PRO009
7
8This file is part of GNU Emacs.
9
b9b1cc14 10GNU Emacs is free software: you can redistribute it and/or modify
0168c3d8 11it under the terms of the GNU General Public License as published by
b9b1cc14
GM
12the Free Software Foundation, either version 3 of the License, or
13(at your option) any later version.
0168c3d8
KH
14
15GNU Emacs is distributed in the hope that it will be useful,
16but WITHOUT ANY WARRANTY; without even the implied warranty of
17MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18GNU General Public License for more details.
19
20You should have received a copy of the GNU General Public License
b9b1cc14 21along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
0168c3d8
KH
22
23#ifndef EMACS_CHARACTER_H
24#define EMACS_CHARACTER_H
25
13bdea59
PE
26#include <verify.h>
27
f162bcc3
PE
28INLINE_HEADER_BEGIN
29#ifndef CHARACTER_INLINE
30# define CHARACTER_INLINE INLINE
31#endif
32
885317d8
KH
33/* character code 1st byte byte sequence
34 -------------- -------- -------------
35 0-7F 00..7F 0xxxxxxx
36 80-7FF C2..DF 110xxxxx 10xxxxxx
37 800-FFFF E0..EF 1110xxxx 10xxxxxx 10xxxxxx
38 10000-1FFFFF F0..F7 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
39 200000-3FFF7F F8 11111000 1000xxxx 10xxxxxx 10xxxxxx 10xxxxxx
c43e85a9
KH
40 3FFF80-3FFFFF C0..C1 1100000x 10xxxxxx (for eight-bit-char)
41 400000-... invalid
0168c3d8 42
c43e85a9
KH
43 invalid 1st byte 80..BF 10xxxxxx
44 F9..FF 11111xxx (xxx != 000)
0168c3d8
KH
45*/
46
885317d8 47/* Maximum character code ((1 << CHARACTERBITS) - 1). */
0168c3d8
KH
48#define MAX_CHAR 0x3FFFFF
49
885317d8 50/* Maximum Unicode character code. */
0168c3d8
KH
51#define MAX_UNICODE_CHAR 0x10FFFF
52
885317d8 53/* Maximum N-byte character codes. */
0168c3d8
KH
54#define MAX_1_BYTE_CHAR 0x7F
55#define MAX_2_BYTE_CHAR 0x7FF
56#define MAX_3_BYTE_CHAR 0xFFFF
57#define MAX_4_BYTE_CHAR 0x1FFFFF
58#define MAX_5_BYTE_CHAR 0x3FFF7F
59
3a0a38de
KH
60/* Minimum leading code of multibyte characters. */
61#define MIN_MULTIBYTE_LEADING_CODE 0xC0
62/* Maximum leading code of multibyte characters. */
63#define MAX_MULTIBYTE_LEADING_CODE 0xF8
64
8bc28f69
KH
65/* Nonzero iff C is a character that corresponds to a raw 8-bit
66 byte. */
67#define CHAR_BYTE8_P(c) ((c) > MAX_5_BYTE_CHAR)
68
885317d8 69/* Return the character code for raw 8-bit byte BYTE. */
0168c3d8 70#define BYTE8_TO_CHAR(byte) ((byte) + 0x3FFF00)
885317d8 71
4c0354d7
SM
72#define UNIBYTE_TO_CHAR(byte) \
73 (ASCII_BYTE_P (byte) ? (byte) : BYTE8_TO_CHAR (byte))
74
885317d8 75/* Return the raw 8-bit byte for character C. */
8bc28f69
KH
76#define CHAR_TO_BYTE8(c) \
77 (CHAR_BYTE8_P (c) \
78 ? (c) - 0x3FFF00 \
461c2ab9 79 : multibyte_char_to_unibyte (c))
885317d8 80
2afc21f5
SM
81/* Return the raw 8-bit byte for character C,
82 or -1 if C doesn't correspond to a byte. */
83#define CHAR_TO_BYTE_SAFE(c) \
84 (CHAR_BYTE8_P (c) \
85 ? (c) - 0x3FFF00 \
455af463 86 : multibyte_char_to_unibyte_safe (c))
2afc21f5 87
885317d8
KH
88/* Nonzero iff BYTE is the 1st byte of a multibyte form of a character
89 that corresponds to a raw 8-bit byte. */
0168c3d8
KH
90#define CHAR_BYTE8_HEAD_P(byte) ((byte) == 0xC0 || (byte) == 0xC1)
91
43c47483
KH
92/* If C is not ASCII, make it unibyte. */
93#define MAKE_CHAR_UNIBYTE(c) \
94 do { \
95 if (! ASCII_CHAR_P (c)) \
96 c = CHAR_TO_BYTE8 (c); \
97 } while (0)
3e411074 98
3e411074 99
18a2979d 100/* If C is not ASCII, make it multibyte. Assumes C < 256. */
c0dc8f64 101#define MAKE_CHAR_MULTIBYTE(c) \
4c0354d7 102 (eassert ((c) >= 0 && (c) < 256), (c) = UNIBYTE_TO_CHAR (c))
3e411074 103
885317d8 104/* This is the maximum byte length of multibyte form. */
0168c3d8
KH
105#define MAX_MULTIBYTE_LENGTH 5
106
18a2979d 107/* Return a Lisp character whose character code is C. Assumes C is
b583cead 108 a valid character code. */
0168c3d8
KH
109#define make_char(c) make_number (c)
110
111/* Nonzero iff C is an ASCII byte. */
ea204efb 112#define ASCII_BYTE_P(c) UNSIGNED_CMP (c, <, 0x80)
0168c3d8
KH
113
114/* Nonzero iff X is a character. */
115#define CHARACTERP(x) (NATNUMP (x) && XFASTINT (x) <= MAX_CHAR)
116
2638320e
PE
117/* Nonzero iff C is valid as a character code. */
118#define CHAR_VALID_P(c) UNSIGNED_CMP (c, <=, MAX_CHAR)
0168c3d8
KH
119
120/* Check if Lisp object X is a character or not. */
63db3c1b
MB
121#define CHECK_CHARACTER(x) \
122 CHECK_TYPE (CHARACTERP (x), Qcharacterp, x)
0168c3d8 123
8f924df7
KH
124#define CHECK_CHARACTER_CAR(x) \
125 do { \
126 Lisp_Object tmp = XCAR (x); \
127 CHECK_CHARACTER (tmp); \
128 XSETCAR ((x), tmp); \
129 } while (0)
130
131#define CHECK_CHARACTER_CDR(x) \
132 do { \
133 Lisp_Object tmp = XCDR (x); \
134 CHECK_CHARACTER (tmp); \
135 XSETCDR ((x), tmp); \
136 } while (0)
137
0168c3d8 138/* Nonzero iff C is a character of code less than 0x100. */
ea204efb 139#define SINGLE_BYTE_CHAR_P(c) UNSIGNED_CMP (c, <, 0x100)
0168c3d8
KH
140
141/* Nonzero if character C has a printable glyph. */
142#define CHAR_PRINTABLE_P(c) \
944c7a26
AS
143 (((c) >= 32 && (c) < 127) \
144 || ! NILP (CHAR_TABLE_REF (Vprintable_chars, (c))))
0168c3d8 145
885317d8 146/* Return byte length of multibyte form for character C. */
0168c3d8
KH
147#define CHAR_BYTES(c) \
148 ( (c) <= MAX_1_BYTE_CHAR ? 1 \
149 : (c) <= MAX_2_BYTE_CHAR ? 2 \
150 : (c) <= MAX_3_BYTE_CHAR ? 3 \
151 : (c) <= MAX_4_BYTE_CHAR ? 4 \
152 : (c) <= MAX_5_BYTE_CHAR ? 5 \
153 : 2)
154
43c47483
KH
155
156/* Return the leading code of multibyte form of C. */
157#define CHAR_LEADING_CODE(c) \
158 ((c) <= MAX_1_BYTE_CHAR ? c \
159 : (c) <= MAX_2_BYTE_CHAR ? (0xC0 | ((c) >> 6)) \
160 : (c) <= MAX_3_BYTE_CHAR ? (0xE0 | ((c) >> 12)) \
161 : (c) <= MAX_4_BYTE_CHAR ? (0xF0 | ((c) >> 18)) \
162 : (c) <= MAX_5_BYTE_CHAR ? 0xF8 \
163 : (0xC0 | (((c) >> 6) & 0x01)))
164
165
885317d8
KH
166/* Store multibyte form of the character C in P. The caller should
167 allocate at least MAX_MULTIBYTE_LENGTH bytes area at P in advance.
168 Returns the length of the multibyte form. */
0168c3d8
KH
169
170#define CHAR_STRING(c, p) \
ea204efb 171 (UNSIGNED_CMP (c, <=, MAX_1_BYTE_CHAR) \
0168c3d8
KH
172 ? ((p)[0] = (c), \
173 1) \
ea204efb 174 : UNSIGNED_CMP (c, <=, MAX_2_BYTE_CHAR) \
0168c3d8
KH
175 ? ((p)[0] = (0xC0 | ((c) >> 6)), \
176 (p)[1] = (0x80 | ((c) & 0x3F)), \
177 2) \
ea204efb 178 : UNSIGNED_CMP (c, <=, MAX_3_BYTE_CHAR) \
0168c3d8
KH
179 ? ((p)[0] = (0xE0 | ((c) >> 12)), \
180 (p)[1] = (0x80 | (((c) >> 6) & 0x3F)), \
181 (p)[2] = (0x80 | ((c) & 0x3F)), \
182 3) \
a2a01861 183 : verify_expr (sizeof (c) <= sizeof (unsigned), char_string (c, p)))
0168c3d8 184
eb41da4c
KH
185/* Store multibyte form of byte B in P. The caller should allocate at
186 least MAX_MULTIBYTE_LENGTH bytes area at P in advance. Returns the
187 length of the multibyte form. */
1106ea2b
KH
188
189#define BYTE8_STRING(b, p) \
190 ((p)[0] = (0xC0 | (((b) >> 6) & 0x01)), \
7f464917 191 (p)[1] = (0x80 | ((b) & 0x3F)), \
1106ea2b
KH
192 2)
193
0168c3d8 194
18a2979d
EZ
195/* Store multibyte form of the character C in P and advance P to the
196 end of the multibyte form. The caller should allocate at least
197 MAX_MULTIBYTE_LENGTH bytes area at P in advance. */
0168c3d8 198
eb41da4c
KH
199#define CHAR_STRING_ADVANCE(c, p) \
200 do { \
201 if ((c) <= MAX_1_BYTE_CHAR) \
202 *(p)++ = (c); \
203 else if ((c) <= MAX_2_BYTE_CHAR) \
204 *(p)++ = (0xC0 | ((c) >> 6)), \
205 *(p)++ = (0x80 | ((c) & 0x3F)); \
206 else if ((c) <= MAX_3_BYTE_CHAR) \
207 *(p)++ = (0xE0 | ((c) >> 12)), \
208 *(p)++ = (0x80 | (((c) >> 6) & 0x3F)), \
209 *(p)++ = (0x80 | ((c) & 0x3F)); \
210 else \
13bdea59
PE
211 { \
212 verify (sizeof (c) <= sizeof (unsigned)); \
213 (p) += char_string (c, p); \
214 } \
885317d8 215 } while (0)
0168c3d8 216
eb41da4c 217
0168c3d8
KH
218/* Nonzero iff BYTE starts a non-ASCII character in a multibyte
219 form. */
220#define LEADING_CODE_P(byte) (((byte) & 0xC0) == 0xC0)
221
b5c7dbe6
KH
222/* Nonzero iff BYTE is a trailing code of a non-ASCII character in a
223 multibyte form. */
224#define TRAILING_CODE_P(byte) (((byte) & 0xC0) == 0x80)
225
885317d8
KH
226/* Nonzero iff BYTE starts a character in a multibyte form.
227 This is equivalent to:
228 (ASCII_BYTE_P (byte) || LEADING_CODE_P (byte)) */
229#define CHAR_HEAD_P(byte) (((byte) & 0xC0) != 0x80)
230
0168c3d8
KH
231/* How many bytes a character that starts with BYTE occupies in a
232 multibyte form. */
233#define BYTES_BY_CHAR_HEAD(byte) \
234 (!((byte) & 0x80) ? 1 \
235 : !((byte) & 0x20) ? 2 \
236 : !((byte) & 0x10) ? 3 \
237 : !((byte) & 0x08) ? 4 \
238 : 5)
239
240
0168c3d8 241/* The byte length of multibyte form at unibyte string P ending at
18a2979d 242 PEND. If STR doesn't point to a valid multibyte form, return 0. */
0168c3d8
KH
243
244#define MULTIBYTE_LENGTH(p, pend) \
245 (p >= pend ? 0 \
246 : !((p)[0] & 0x80) ? 1 \
247 : ((p + 1 >= pend) || (((p)[1] & 0xC0) != 0x80)) ? 0 \
248 : ((p)[0] & 0xE0) == 0xC0 ? 2 \
249 : ((p + 2 >= pend) || (((p)[2] & 0xC0) != 0x80)) ? 0 \
250 : ((p)[0] & 0xF0) == 0xE0 ? 3 \
251 : ((p + 3 >= pend) || (((p)[3] & 0xC0) != 0x80)) ? 0 \
252 : ((p)[0] & 0xF8) == 0xF0 ? 4 \
253 : ((p + 4 >= pend) || (((p)[4] & 0xC0) != 0x80)) ? 0 \
254 : (p)[0] == 0xF8 && ((p)[1] & 0xF0) == 0x80 ? 5 \
255 : 0)
256
257
18a2979d 258/* Like MULTIBYTE_LENGTH, but don't check the ending address. */
0168c3d8
KH
259
260#define MULTIBYTE_LENGTH_NO_CHECK(p) \
261 (!((p)[0] & 0x80) ? 1 \
262 : ((p)[1] & 0xC0) != 0x80 ? 0 \
263 : ((p)[0] & 0xE0) == 0xC0 ? 2 \
264 : ((p)[2] & 0xC0) != 0x80 ? 0 \
265 : ((p)[0] & 0xF0) == 0xE0 ? 3 \
266 : ((p)[3] & 0xC0) != 0x80 ? 0 \
267 : ((p)[0] & 0xF8) == 0xF0 ? 4 \
268 : ((p)[4] & 0xC0) != 0x80 ? 0 \
269 : (p)[0] == 0xF8 && ((p)[1] & 0xF0) == 0x80 ? 5 \
270 : 0)
271
18a2979d
EZ
272/* If P is before LIMIT, advance P to the next character boundary.
273 Assumes that P is already at a character boundary of the same
78edd3b7 274 multibyte form whose end address is LIMIT. */
8f924df7
KH
275
276#define NEXT_CHAR_BOUNDARY(p, limit) \
277 do { \
278 if ((p) < (limit)) \
279 (p) += BYTES_BY_CHAR_HEAD (*(p)); \
280 } while (0)
281
282
283/* If P is after LIMIT, advance P to the previous character boundary.
18a2979d 284 Assumes that P is already at a character boundary of the same
ce5b453a 285 multibyte form whose beginning address is LIMIT. */
8f924df7
KH
286
287#define PREV_CHAR_BOUNDARY(p, limit) \
288 do { \
289 if ((p) > (limit)) \
290 { \
760fbc2c 291 const unsigned char *chp = (p); \
8f924df7 292 do { \
760fbc2c
PE
293 chp--; \
294 } while (chp >= limit && ! CHAR_HEAD_P (*chp)); \
295 (p) = (BYTES_BY_CHAR_HEAD (*chp) == (p) - chp) ? chp : (p) - 1; \
8f924df7
KH
296 } \
297 } while (0)
0168c3d8
KH
298
299/* Return the character code of character whose multibyte form is at
2f8e16b2
EZ
300 P. Note that this macro unifies CJK characters whose codepoints
301 are in the Private Use Areas (PUAs), so it might return a different
302 codepoint from the one actually stored at P. */
0168c3d8 303
62a6e103 304#define STRING_CHAR(p) \
0168c3d8
KH
305 (!((p)[0] & 0x80) \
306 ? (p)[0] \
307 : ! ((p)[0] & 0x20) \
308 ? (((((p)[0] & 0x1F) << 6) \
309 | ((p)[1] & 0x3F)) \
310 + (((unsigned char) (p)[0]) < 0xC2 ? 0x3FFF80 : 0)) \
311 : ! ((p)[0] & 0x10) \
312 ? ((((p)[0] & 0x0F) << 12) \
313 | (((p)[1] & 0x3F) << 6) \
314 | ((p)[2] & 0x3F)) \
eb41da4c 315 : string_char ((p), NULL, NULL))
0168c3d8
KH
316
317
18a2979d 318/* Like STRING_CHAR, but set ACTUAL_LEN to the length of multibyte
2f8e16b2
EZ
319 form.
320
321 Note: This macro returns the actual length of the character's
322 multibyte sequence as it is stored in a buffer or string. The
323 character it returns might have a different codepoint that has a
5ae255c7 324 different multibyte sequence of a different length, due to possible
2f8e16b2
EZ
325 unification of CJK characters inside string_char. Therefore do NOT
326 assume that the length returned by this macro is identical to the
327 length of the multibyte sequence of the character it returns. */
0168c3d8 328
62a6e103 329#define STRING_CHAR_AND_LENGTH(p, actual_len) \
0168c3d8
KH
330 (!((p)[0] & 0x80) \
331 ? ((actual_len) = 1, (p)[0]) \
332 : ! ((p)[0] & 0x20) \
333 ? ((actual_len) = 2, \
334 (((((p)[0] & 0x1F) << 6) \
335 | ((p)[1] & 0x3F)) \
336 + (((unsigned char) (p)[0]) < 0xC2 ? 0x3FFF80 : 0))) \
337 : ! ((p)[0] & 0x10) \
338 ? ((actual_len) = 3, \
339 ((((p)[0] & 0x0F) << 12) \
340 | (((p)[1] & 0x3F) << 6) \
341 | ((p)[2] & 0x3F))) \
eb41da4c 342 : string_char ((p), NULL, &actual_len))
0168c3d8
KH
343
344
18a2979d 345/* Like STRING_CHAR, but advance P to the end of multibyte form. */
0168c3d8
KH
346
347#define STRING_CHAR_ADVANCE(p) \
348 (!((p)[0] & 0x80) \
349 ? *(p)++ \
350 : ! ((p)[0] & 0x20) \
351 ? ((p) += 2, \
352 ((((p)[-2] & 0x1F) << 6) \
353 | ((p)[-1] & 0x3F) \
8f924df7 354 | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0))) \
0168c3d8
KH
355 : ! ((p)[0] & 0x10) \
356 ? ((p) += 3, \
357 ((((p)[-3] & 0x0F) << 12) \
358 | (((p)[-2] & 0x3F) << 6) \
359 | ((p)[-1] & 0x3F))) \
eb41da4c 360 : string_char ((p), &(p), NULL))
0168c3d8
KH
361
362
363/* Fetch the "next" character from Lisp string STRING at byte position
364 BYTEIDX, character position CHARIDX. Store it into OUTPUT.
365
366 All the args must be side-effect-free.
367 BYTEIDX and CHARIDX must be lvalues;
368 we increment them past the character fetched. */
369
370#define FETCH_STRING_CHAR_ADVANCE(OUTPUT, STRING, CHARIDX, BYTEIDX) \
a1c2ac9a 371 do \
0168c3d8
KH
372 { \
373 CHARIDX++; \
374 if (STRING_MULTIBYTE (STRING)) \
375 { \
760fbc2c
PE
376 unsigned char *chp = &SDATA (STRING)[BYTEIDX]; \
377 int chlen; \
0168c3d8 378 \
760fbc2c
PE
379 OUTPUT = STRING_CHAR_AND_LENGTH (chp, chlen); \
380 BYTEIDX += chlen; \
0168c3d8
KH
381 } \
382 else \
f1c99628
SM
383 { \
384 OUTPUT = SREF (STRING, BYTEIDX); \
385 BYTEIDX++; \
386 } \
0168c3d8 387 } \
a1c2ac9a 388 while (0)
0168c3d8 389
18a2979d
EZ
390/* Like FETCH_STRING_CHAR_ADVANCE, but return a multibyte character
391 even if STRING is unibyte. */
43c47483
KH
392
393#define FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE(OUTPUT, STRING, CHARIDX, BYTEIDX) \
a1c2ac9a 394 do \
43c47483
KH
395 { \
396 CHARIDX++; \
397 if (STRING_MULTIBYTE (STRING)) \
398 { \
760fbc2c
PE
399 unsigned char *chp = &SDATA (STRING)[BYTEIDX]; \
400 int chlen; \
43c47483 401 \
760fbc2c
PE
402 OUTPUT = STRING_CHAR_AND_LENGTH (chp, chlen); \
403 BYTEIDX += chlen; \
43c47483
KH
404 } \
405 else \
406 { \
f1c99628
SM
407 OUTPUT = SREF (STRING, BYTEIDX); \
408 BYTEIDX++; \
43c47483
KH
409 MAKE_CHAR_MULTIBYTE (OUTPUT); \
410 } \
411 } \
a1c2ac9a 412 while (0)
43c47483 413
0168c3d8 414
18a2979d 415/* Like FETCH_STRING_CHAR_ADVANCE, but assumes STRING is multibyte. */
0168c3d8
KH
416
417#define FETCH_STRING_CHAR_ADVANCE_NO_CHECK(OUTPUT, STRING, CHARIDX, BYTEIDX) \
a1c2ac9a 418 do \
0168c3d8 419 { \
a6670b0b
PE
420 unsigned char *fetch_ptr = &SDATA (STRING)[BYTEIDX]; \
421 int fetch_len; \
0168c3d8 422 \
a6670b0b
PE
423 OUTPUT = STRING_CHAR_AND_LENGTH (fetch_ptr, fetch_len); \
424 BYTEIDX += fetch_len; \
0168c3d8
KH
425 CHARIDX++; \
426 } \
a1c2ac9a 427 while (0)
0168c3d8
KH
428
429
18a2979d 430/* Like FETCH_STRING_CHAR_ADVANCE, but fetch character from the current
0168c3d8
KH
431 buffer. */
432
433#define FETCH_CHAR_ADVANCE(OUTPUT, CHARIDX, BYTEIDX) \
a1c2ac9a 434 do \
0168c3d8
KH
435 { \
436 CHARIDX++; \
4b4deea2 437 if (!NILP (BVAR (current_buffer, enable_multibyte_characters))) \
0168c3d8 438 { \
760fbc2c
PE
439 unsigned char *chp = BYTE_POS_ADDR (BYTEIDX); \
440 int chlen; \
0168c3d8 441 \
e8d32c7e 442 OUTPUT = STRING_CHAR_AND_LENGTH (chp, chlen); \
760fbc2c 443 BYTEIDX += chlen; \
0168c3d8
KH
444 } \
445 else \
446 { \
447 OUTPUT = *(BYTE_POS_ADDR (BYTEIDX)); \
448 BYTEIDX++; \
449 } \
450 } \
a1c2ac9a 451 while (0)
0168c3d8
KH
452
453
18a2979d 454/* Like FETCH_CHAR_ADVANCE, but assumes the current buffer is multibyte. */
0168c3d8
KH
455
456#define FETCH_CHAR_ADVANCE_NO_CHECK(OUTPUT, CHARIDX, BYTEIDX) \
a1c2ac9a 457 do \
0168c3d8 458 { \
760fbc2c
PE
459 unsigned char *chp = BYTE_POS_ADDR (BYTEIDX); \
460 int chlen; \
0168c3d8 461 \
760fbc2c
PE
462 OUTPUT = STRING_CHAR_AND_LENGTH (chp, chlen); \
463 BYTEIDX += chlen; \
0168c3d8
KH
464 CHARIDX++; \
465 } \
a1c2ac9a 466 while (0)
0168c3d8
KH
467
468
18a2979d 469/* Increment the buffer byte position POS_BYTE of the current buffer to
0168c3d8
KH
470 the next character boundary. No range checking of POS. */
471
472#define INC_POS(pos_byte) \
473 do { \
760fbc2c
PE
474 unsigned char *chp = BYTE_POS_ADDR (pos_byte); \
475 pos_byte += BYTES_BY_CHAR_HEAD (*chp); \
0168c3d8
KH
476 } while (0)
477
478
18a2979d 479/* Decrement the buffer byte position POS_BYTE of the current buffer to
0168c3d8
KH
480 the previous character boundary. No range checking of POS. */
481
482#define DEC_POS(pos_byte) \
483 do { \
760fbc2c 484 unsigned char *chp; \
0168c3d8
KH
485 \
486 pos_byte--; \
487 if (pos_byte < GPT_BYTE) \
760fbc2c 488 chp = BEG_ADDR + pos_byte - BEG_BYTE; \
0168c3d8 489 else \
760fbc2c
PE
490 chp = BEG_ADDR + GAP_SIZE + pos_byte - BEG_BYTE; \
491 while (!CHAR_HEAD_P (*chp)) \
0168c3d8 492 { \
760fbc2c 493 chp--; \
0168c3d8
KH
494 pos_byte--; \
495 } \
496 } while (0)
497
498/* Increment both CHARPOS and BYTEPOS, each in the appropriate way. */
499
500#define INC_BOTH(charpos, bytepos) \
501 do \
502 { \
503 (charpos)++; \
4b4deea2 504 if (NILP (BVAR (current_buffer, enable_multibyte_characters))) \
0168c3d8
KH
505 (bytepos)++; \
506 else \
507 INC_POS ((bytepos)); \
508 } \
509 while (0)
510
511
512/* Decrement both CHARPOS and BYTEPOS, each in the appropriate way. */
513
514#define DEC_BOTH(charpos, bytepos) \
515 do \
516 { \
517 (charpos)--; \
4b4deea2 518 if (NILP (BVAR (current_buffer, enable_multibyte_characters))) \
0168c3d8
KH
519 (bytepos)--; \
520 else \
521 DEC_POS ((bytepos)); \
522 } \
523 while (0)
524
525
18a2979d 526/* Increment the buffer byte position POS_BYTE of the current buffer to
0168c3d8
KH
527 the next character boundary. This macro relies on the fact that
528 *GPT_ADDR and *Z_ADDR are always accessible and the values are
529 '\0'. No range checking of POS_BYTE. */
530
531#define BUF_INC_POS(buf, pos_byte) \
532 do { \
760fbc2c
PE
533 unsigned char *chp = BUF_BYTE_ADDRESS (buf, pos_byte); \
534 pos_byte += BYTES_BY_CHAR_HEAD (*chp); \
0168c3d8
KH
535 } while (0)
536
537
18a2979d 538/* Decrement the buffer byte position POS_BYTE of the current buffer to
0168c3d8
KH
539 the previous character boundary. No range checking of POS_BYTE. */
540
541#define BUF_DEC_POS(buf, pos_byte) \
542 do { \
760fbc2c 543 unsigned char *chp; \
0168c3d8
KH
544 pos_byte--; \
545 if (pos_byte < BUF_GPT_BYTE (buf)) \
760fbc2c 546 chp = BUF_BEG_ADDR (buf) + pos_byte - BEG_BYTE; \
0168c3d8 547 else \
760fbc2c
PE
548 chp = BUF_BEG_ADDR (buf) + BUF_GAP_SIZE (buf) + pos_byte - BEG_BYTE;\
549 while (!CHAR_HEAD_P (*chp)) \
0168c3d8 550 { \
760fbc2c 551 chp--; \
0168c3d8
KH
552 pos_byte--; \
553 } \
554 } while (0)
555
556
a2271ba2
PE
557/* Return a non-outlandish value for the tab width. */
558
5637687f
PE
559#define SANE_TAB_WIDTH(buf) \
560 sanitize_tab_width (XFASTINT (BVAR (buf, tab_width)))
f162bcc3 561CHARACTER_INLINE int
5637687f 562sanitize_tab_width (EMACS_INT width)
a2271ba2
PE
563{
564 return 0 < width && width <= 1000 ? width : 8;
565}
566
0168c3d8 567/* Return the width of ASCII character C. The width is measured by
18a2979d 568 how many columns C will occupy on the screen when displayed in the
0168c3d8
KH
569 current buffer. */
570
571#define ASCII_CHAR_WIDTH(c) \
572 (c < 0x20 \
573 ? (c == '\t' \
a2271ba2 574 ? SANE_TAB_WIDTH (current_buffer) \
4b4deea2 575 : (c == '\n' ? 0 : (NILP (BVAR (current_buffer, ctl_arrow)) ? 4 : 2))) \
0168c3d8
KH
576 : (c < 0x7f \
577 ? 1 \
4b4deea2 578 : ((NILP (BVAR (current_buffer, ctl_arrow)) ? 4 : 2))))
0168c3d8 579
5637687f
PE
580/* Return a non-outlandish value for a character width. */
581
f162bcc3 582CHARACTER_INLINE int
5637687f
PE
583sanitize_char_width (EMACS_INT width)
584{
585 return 0 <= width && width <= 1000 ? width : 1000;
586}
587
0168c3d8 588/* Return the width of character C. The width is measured by how many
18a2979d 589 columns C will occupy on the screen when displayed in the current
0168c3d8
KH
590 buffer. */
591
592#define CHAR_WIDTH(c) \
593 (ASCII_CHAR_P (c) \
594 ? ASCII_CHAR_WIDTH (c) \
5637687f 595 : sanitize_char_width (XINT (CHAR_TABLE_REF (Vchar_width_table, c))))
0168c3d8 596
e1dbe924 597/* If C is a variation selector, return the index of the
c19cab20
KH
598 variation selector (1..256). Otherwise, return 0. */
599
600#define CHAR_VARIATION_SELECTOR_P(c) \
601 ((c) < 0xFE00 ? 0 \
602 : (c) <= 0xFE0F ? (c) - 0xFE00 + 1 \
603 : (c) < 0xE0100 ? 0 \
604 : (c) <= 0xE01EF ? (c) - 0xE0100 + 17 \
605 : 0)
606
607/* If C is a high surrogate, return 1. If C is a low surrogate,
409f2919 608 return 0. Otherwise, return 0. */
c19cab20
KH
609
610#define CHAR_SURROGATE_PAIR_P(c) \
611 ((c) < 0xD800 ? 0 \
612 : (c) <= 0xDBFF ? 1 \
613 : (c) <= 0xDFFF ? 2 \
614 : 0)
615
c805dec0
KH
616/* Data type for Unicode general category.
617
618 The order of members must be in sync with the 8th element of the
619 member of unidata-prop-alist (in admin/unidata/unidata-getn.el) for
620 Unicode character property `general-category'. */
621
622typedef enum {
623 UNICODE_CATEGORY_UNKNOWN = 0,
624 UNICODE_CATEGORY_Lu,
625 UNICODE_CATEGORY_Ll,
626 UNICODE_CATEGORY_Lt,
627 UNICODE_CATEGORY_Lm,
628 UNICODE_CATEGORY_Lo,
629 UNICODE_CATEGORY_Mn,
630 UNICODE_CATEGORY_Mc,
631 UNICODE_CATEGORY_Me,
632 UNICODE_CATEGORY_Nd,
633 UNICODE_CATEGORY_Nl,
634 UNICODE_CATEGORY_No,
635 UNICODE_CATEGORY_Pc,
636 UNICODE_CATEGORY_Pd,
637 UNICODE_CATEGORY_Ps,
638 UNICODE_CATEGORY_Pe,
639 UNICODE_CATEGORY_Pi,
640 UNICODE_CATEGORY_Pf,
641 UNICODE_CATEGORY_Po,
642 UNICODE_CATEGORY_Sm,
643 UNICODE_CATEGORY_Sc,
644 UNICODE_CATEGORY_Sk,
645 UNICODE_CATEGORY_So,
646 UNICODE_CATEGORY_Zs,
647 UNICODE_CATEGORY_Zl,
648 UNICODE_CATEGORY_Zp,
649 UNICODE_CATEGORY_Cc,
650 UNICODE_CATEGORY_Cf,
651 UNICODE_CATEGORY_Cs,
652 UNICODE_CATEGORY_Co,
653 UNICODE_CATEGORY_Cn
654} unicode_category_t;
c19cab20 655
5994c183 656extern EMACS_INT char_resolve_modifier_mask (EMACS_INT) ATTRIBUTE_CONST;
383e0970
J
657extern int char_string (unsigned, unsigned char *);
658extern int string_char (const unsigned char *,
659 const unsigned char **, int *);
660
661extern int translate_char (Lisp_Object, int c);
14162469 662extern void parse_str_as_multibyte (const unsigned char *,
d311d28c
PE
663 ptrdiff_t, ptrdiff_t *, ptrdiff_t *);
664extern ptrdiff_t count_size_as_multibyte (const unsigned char *, ptrdiff_t);
665extern ptrdiff_t str_as_multibyte (unsigned char *, ptrdiff_t, ptrdiff_t,
666 ptrdiff_t *);
667extern ptrdiff_t str_to_multibyte (unsigned char *, ptrdiff_t, ptrdiff_t);
668extern ptrdiff_t str_as_unibyte (unsigned char *, ptrdiff_t);
669extern ptrdiff_t str_to_unibyte (const unsigned char *, unsigned char *,
d5172d4f 670 ptrdiff_t);
d311d28c
PE
671extern ptrdiff_t strwidth (const char *, ptrdiff_t);
672extern ptrdiff_t c_string_width (const unsigned char *, ptrdiff_t, int,
673 ptrdiff_t *, ptrdiff_t *);
674extern ptrdiff_t lisp_string_width (Lisp_Object, ptrdiff_t,
675 ptrdiff_t *, ptrdiff_t *);
0168c3d8 676
955cbe7b 677extern Lisp_Object Qcharacterp;
0168c3d8 678extern Lisp_Object Vchar_unify_table;
383e0970 679extern Lisp_Object string_escape_byte8 (Lisp_Object);
fac2bdc4 680
0168c3d8
KH
681/* Return a translation table of id number ID. */
682#define GET_TRANSLATION_TABLE(id) \
683 (XCDR(XVECTOR(Vtranslation_table_vector)->contents[(id)]))
684
f162bcc3
PE
685INLINE_HEADER_END
686
0168c3d8 687#endif /* EMACS_CHARACTER_H */