use guile conses
[bpt/emacs.git] / src / character.h
CommitLineData
0168c3d8
KH
1/* Header for multibyte character handler.
2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
8f924df7 3 Licensed to the Free Software Foundation.
5df4f04c 4 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
0168c3d8
KH
5 National Institute of Advanced Industrial Science and Technology (AIST)
6 Registration Number H13PRO009
7
8This file is part of GNU Emacs.
9
b9b1cc14 10GNU Emacs is free software: you can redistribute it and/or modify
0168c3d8 11it under the terms of the GNU General Public License as published by
b9b1cc14
GM
12the Free Software Foundation, either version 3 of the License, or
13(at your option) any later version.
0168c3d8
KH
14
15GNU Emacs is distributed in the hope that it will be useful,
16but WITHOUT ANY WARRANTY; without even the implied warranty of
17MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18GNU General Public License for more details.
19
20You should have received a copy of the GNU General Public License
b9b1cc14 21along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
0168c3d8
KH
22
23#ifndef EMACS_CHARACTER_H
24#define EMACS_CHARACTER_H
25
13bdea59
PE
26#include <verify.h>
27
f162bcc3 28INLINE_HEADER_BEGIN
f162bcc3 29
885317d8
KH
30/* character code 1st byte byte sequence
31 -------------- -------- -------------
32 0-7F 00..7F 0xxxxxxx
33 80-7FF C2..DF 110xxxxx 10xxxxxx
34 800-FFFF E0..EF 1110xxxx 10xxxxxx 10xxxxxx
35 10000-1FFFFF F0..F7 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
36 200000-3FFF7F F8 11111000 1000xxxx 10xxxxxx 10xxxxxx 10xxxxxx
c43e85a9
KH
37 3FFF80-3FFFFF C0..C1 1100000x 10xxxxxx (for eight-bit-char)
38 400000-... invalid
0168c3d8 39
c43e85a9
KH
40 invalid 1st byte 80..BF 10xxxxxx
41 F9..FF 11111xxx (xxx != 000)
0168c3d8
KH
42*/
43
885317d8 44/* Maximum character code ((1 << CHARACTERBITS) - 1). */
0168c3d8
KH
45#define MAX_CHAR 0x3FFFFF
46
885317d8 47/* Maximum Unicode character code. */
0168c3d8
KH
48#define MAX_UNICODE_CHAR 0x10FFFF
49
885317d8 50/* Maximum N-byte character codes. */
0168c3d8
KH
51#define MAX_1_BYTE_CHAR 0x7F
52#define MAX_2_BYTE_CHAR 0x7FF
53#define MAX_3_BYTE_CHAR 0xFFFF
54#define MAX_4_BYTE_CHAR 0x1FFFFF
55#define MAX_5_BYTE_CHAR 0x3FFF7F
56
3a0a38de
KH
57/* Minimum leading code of multibyte characters. */
58#define MIN_MULTIBYTE_LEADING_CODE 0xC0
59/* Maximum leading code of multibyte characters. */
60#define MAX_MULTIBYTE_LEADING_CODE 0xF8
61
8bc28f69
KH
62/* Nonzero iff C is a character that corresponds to a raw 8-bit
63 byte. */
64#define CHAR_BYTE8_P(c) ((c) > MAX_5_BYTE_CHAR)
65
885317d8 66/* Return the character code for raw 8-bit byte BYTE. */
0168c3d8 67#define BYTE8_TO_CHAR(byte) ((byte) + 0x3FFF00)
885317d8 68
4c0354d7 69#define UNIBYTE_TO_CHAR(byte) \
200fc949 70 (ASCII_CHAR_P (byte) ? (byte) : BYTE8_TO_CHAR (byte))
4c0354d7 71
885317d8 72/* Return the raw 8-bit byte for character C. */
200fc949 73#define CHAR_TO_BYTE8(c) (CHAR_BYTE8_P (c) ? (c) - 0x3FFF00 : (c & 0xFF))
885317d8 74
2afc21f5
SM
75/* Return the raw 8-bit byte for character C,
76 or -1 if C doesn't correspond to a byte. */
200fc949
DA
77#define CHAR_TO_BYTE_SAFE(c) \
78 (ASCII_CHAR_P (c) ? c : (CHAR_BYTE8_P (c) ? (c) - 0x3FFF00 : -1))
2afc21f5 79
885317d8
KH
80/* Nonzero iff BYTE is the 1st byte of a multibyte form of a character
81 that corresponds to a raw 8-bit byte. */
0168c3d8
KH
82#define CHAR_BYTE8_HEAD_P(byte) ((byte) == 0xC0 || (byte) == 0xC1)
83
43c47483
KH
84/* If C is not ASCII, make it unibyte. */
85#define MAKE_CHAR_UNIBYTE(c) \
86 do { \
87 if (! ASCII_CHAR_P (c)) \
88 c = CHAR_TO_BYTE8 (c); \
96c06863 89 } while (false)
3e411074 90
3e411074 91
18a2979d 92/* If C is not ASCII, make it multibyte. Assumes C < 256. */
c0dc8f64 93#define MAKE_CHAR_MULTIBYTE(c) \
4c0354d7 94 (eassert ((c) >= 0 && (c) < 256), (c) = UNIBYTE_TO_CHAR (c))
3e411074 95
885317d8 96/* This is the maximum byte length of multibyte form. */
0168c3d8
KH
97#define MAX_MULTIBYTE_LENGTH 5
98
0168c3d8
KH
99/* Nonzero iff X is a character. */
100#define CHARACTERP(x) (NATNUMP (x) && XFASTINT (x) <= MAX_CHAR)
101
2638320e
PE
102/* Nonzero iff C is valid as a character code. */
103#define CHAR_VALID_P(c) UNSIGNED_CMP (c, <=, MAX_CHAR)
0168c3d8
KH
104
105/* Check if Lisp object X is a character or not. */
63db3c1b
MB
106#define CHECK_CHARACTER(x) \
107 CHECK_TYPE (CHARACTERP (x), Qcharacterp, x)
0168c3d8 108
8f924df7
KH
109#define CHECK_CHARACTER_CAR(x) \
110 do { \
111 Lisp_Object tmp = XCAR (x); \
112 CHECK_CHARACTER (tmp); \
113 XSETCAR ((x), tmp); \
96c06863 114 } while (false)
8f924df7
KH
115
116#define CHECK_CHARACTER_CDR(x) \
117 do { \
118 Lisp_Object tmp = XCDR (x); \
119 CHECK_CHARACTER (tmp); \
120 XSETCDR ((x), tmp); \
96c06863 121 } while (false)
8f924df7 122
0168c3d8 123/* Nonzero iff C is a character of code less than 0x100. */
ea204efb 124#define SINGLE_BYTE_CHAR_P(c) UNSIGNED_CMP (c, <, 0x100)
0168c3d8
KH
125
126/* Nonzero if character C has a printable glyph. */
127#define CHAR_PRINTABLE_P(c) \
944c7a26
AS
128 (((c) >= 32 && (c) < 127) \
129 || ! NILP (CHAR_TABLE_REF (Vprintable_chars, (c))))
0168c3d8 130
885317d8 131/* Return byte length of multibyte form for character C. */
0168c3d8
KH
132#define CHAR_BYTES(c) \
133 ( (c) <= MAX_1_BYTE_CHAR ? 1 \
134 : (c) <= MAX_2_BYTE_CHAR ? 2 \
135 : (c) <= MAX_3_BYTE_CHAR ? 3 \
136 : (c) <= MAX_4_BYTE_CHAR ? 4 \
137 : (c) <= MAX_5_BYTE_CHAR ? 5 \
138 : 2)
139
43c47483
KH
140
141/* Return the leading code of multibyte form of C. */
142#define CHAR_LEADING_CODE(c) \
143 ((c) <= MAX_1_BYTE_CHAR ? c \
144 : (c) <= MAX_2_BYTE_CHAR ? (0xC0 | ((c) >> 6)) \
145 : (c) <= MAX_3_BYTE_CHAR ? (0xE0 | ((c) >> 12)) \
146 : (c) <= MAX_4_BYTE_CHAR ? (0xF0 | ((c) >> 18)) \
147 : (c) <= MAX_5_BYTE_CHAR ? 0xF8 \
148 : (0xC0 | (((c) >> 6) & 0x01)))
149
150
885317d8
KH
151/* Store multibyte form of the character C in P. The caller should
152 allocate at least MAX_MULTIBYTE_LENGTH bytes area at P in advance.
153 Returns the length of the multibyte form. */
0168c3d8
KH
154
155#define CHAR_STRING(c, p) \
ea204efb 156 (UNSIGNED_CMP (c, <=, MAX_1_BYTE_CHAR) \
0168c3d8
KH
157 ? ((p)[0] = (c), \
158 1) \
ea204efb 159 : UNSIGNED_CMP (c, <=, MAX_2_BYTE_CHAR) \
0168c3d8
KH
160 ? ((p)[0] = (0xC0 | ((c) >> 6)), \
161 (p)[1] = (0x80 | ((c) & 0x3F)), \
162 2) \
ea204efb 163 : UNSIGNED_CMP (c, <=, MAX_3_BYTE_CHAR) \
0168c3d8
KH
164 ? ((p)[0] = (0xE0 | ((c) >> 12)), \
165 (p)[1] = (0x80 | (((c) >> 6) & 0x3F)), \
166 (p)[2] = (0x80 | ((c) & 0x3F)), \
167 3) \
a2a01861 168 : verify_expr (sizeof (c) <= sizeof (unsigned), char_string (c, p)))
0168c3d8 169
eb41da4c
KH
170/* Store multibyte form of byte B in P. The caller should allocate at
171 least MAX_MULTIBYTE_LENGTH bytes area at P in advance. Returns the
172 length of the multibyte form. */
1106ea2b
KH
173
174#define BYTE8_STRING(b, p) \
175 ((p)[0] = (0xC0 | (((b) >> 6) & 0x01)), \
7f464917 176 (p)[1] = (0x80 | ((b) & 0x3F)), \
1106ea2b
KH
177 2)
178
0168c3d8 179
18a2979d
EZ
180/* Store multibyte form of the character C in P and advance P to the
181 end of the multibyte form. The caller should allocate at least
182 MAX_MULTIBYTE_LENGTH bytes area at P in advance. */
0168c3d8 183
eb41da4c
KH
184#define CHAR_STRING_ADVANCE(c, p) \
185 do { \
186 if ((c) <= MAX_1_BYTE_CHAR) \
187 *(p)++ = (c); \
188 else if ((c) <= MAX_2_BYTE_CHAR) \
189 *(p)++ = (0xC0 | ((c) >> 6)), \
190 *(p)++ = (0x80 | ((c) & 0x3F)); \
191 else if ((c) <= MAX_3_BYTE_CHAR) \
192 *(p)++ = (0xE0 | ((c) >> 12)), \
193 *(p)++ = (0x80 | (((c) >> 6) & 0x3F)), \
194 *(p)++ = (0x80 | ((c) & 0x3F)); \
195 else \
13bdea59
PE
196 { \
197 verify (sizeof (c) <= sizeof (unsigned)); \
198 (p) += char_string (c, p); \
199 } \
96c06863 200 } while (false)
0168c3d8 201
eb41da4c 202
0168c3d8
KH
203/* Nonzero iff BYTE starts a non-ASCII character in a multibyte
204 form. */
205#define LEADING_CODE_P(byte) (((byte) & 0xC0) == 0xC0)
206
b5c7dbe6
KH
207/* Nonzero iff BYTE is a trailing code of a non-ASCII character in a
208 multibyte form. */
209#define TRAILING_CODE_P(byte) (((byte) & 0xC0) == 0x80)
210
885317d8
KH
211/* Nonzero iff BYTE starts a character in a multibyte form.
212 This is equivalent to:
200fc949 213 (ASCII_CHAR_P (byte) || LEADING_CODE_P (byte)) */
885317d8
KH
214#define CHAR_HEAD_P(byte) (((byte) & 0xC0) != 0x80)
215
0168c3d8
KH
216/* How many bytes a character that starts with BYTE occupies in a
217 multibyte form. */
218#define BYTES_BY_CHAR_HEAD(byte) \
219 (!((byte) & 0x80) ? 1 \
220 : !((byte) & 0x20) ? 2 \
221 : !((byte) & 0x10) ? 3 \
222 : !((byte) & 0x08) ? 4 \
223 : 5)
224
225
0168c3d8 226/* The byte length of multibyte form at unibyte string P ending at
18a2979d 227 PEND. If STR doesn't point to a valid multibyte form, return 0. */
0168c3d8
KH
228
229#define MULTIBYTE_LENGTH(p, pend) \
230 (p >= pend ? 0 \
231 : !((p)[0] & 0x80) ? 1 \
232 : ((p + 1 >= pend) || (((p)[1] & 0xC0) != 0x80)) ? 0 \
233 : ((p)[0] & 0xE0) == 0xC0 ? 2 \
234 : ((p + 2 >= pend) || (((p)[2] & 0xC0) != 0x80)) ? 0 \
235 : ((p)[0] & 0xF0) == 0xE0 ? 3 \
236 : ((p + 3 >= pend) || (((p)[3] & 0xC0) != 0x80)) ? 0 \
237 : ((p)[0] & 0xF8) == 0xF0 ? 4 \
238 : ((p + 4 >= pend) || (((p)[4] & 0xC0) != 0x80)) ? 0 \
239 : (p)[0] == 0xF8 && ((p)[1] & 0xF0) == 0x80 ? 5 \
240 : 0)
241
242
18a2979d 243/* Like MULTIBYTE_LENGTH, but don't check the ending address. */
0168c3d8
KH
244
245#define MULTIBYTE_LENGTH_NO_CHECK(p) \
246 (!((p)[0] & 0x80) ? 1 \
247 : ((p)[1] & 0xC0) != 0x80 ? 0 \
248 : ((p)[0] & 0xE0) == 0xC0 ? 2 \
249 : ((p)[2] & 0xC0) != 0x80 ? 0 \
250 : ((p)[0] & 0xF0) == 0xE0 ? 3 \
251 : ((p)[3] & 0xC0) != 0x80 ? 0 \
252 : ((p)[0] & 0xF8) == 0xF0 ? 4 \
253 : ((p)[4] & 0xC0) != 0x80 ? 0 \
254 : (p)[0] == 0xF8 && ((p)[1] & 0xF0) == 0x80 ? 5 \
255 : 0)
256
18a2979d
EZ
257/* If P is before LIMIT, advance P to the next character boundary.
258 Assumes that P is already at a character boundary of the same
78edd3b7 259 multibyte form whose end address is LIMIT. */
8f924df7
KH
260
261#define NEXT_CHAR_BOUNDARY(p, limit) \
262 do { \
263 if ((p) < (limit)) \
264 (p) += BYTES_BY_CHAR_HEAD (*(p)); \
96c06863 265 } while (false)
8f924df7
KH
266
267
268/* If P is after LIMIT, advance P to the previous character boundary.
18a2979d 269 Assumes that P is already at a character boundary of the same
ce5b453a 270 multibyte form whose beginning address is LIMIT. */
8f924df7
KH
271
272#define PREV_CHAR_BOUNDARY(p, limit) \
273 do { \
274 if ((p) > (limit)) \
275 { \
760fbc2c 276 const unsigned char *chp = (p); \
8f924df7 277 do { \
760fbc2c
PE
278 chp--; \
279 } while (chp >= limit && ! CHAR_HEAD_P (*chp)); \
280 (p) = (BYTES_BY_CHAR_HEAD (*chp) == (p) - chp) ? chp : (p) - 1; \
8f924df7 281 } \
96c06863 282 } while (false)
0168c3d8
KH
283
284/* Return the character code of character whose multibyte form is at
2f8e16b2
EZ
285 P. Note that this macro unifies CJK characters whose codepoints
286 are in the Private Use Areas (PUAs), so it might return a different
287 codepoint from the one actually stored at P. */
0168c3d8 288
62a6e103 289#define STRING_CHAR(p) \
0168c3d8
KH
290 (!((p)[0] & 0x80) \
291 ? (p)[0] \
292 : ! ((p)[0] & 0x20) \
293 ? (((((p)[0] & 0x1F) << 6) \
294 | ((p)[1] & 0x3F)) \
295 + (((unsigned char) (p)[0]) < 0xC2 ? 0x3FFF80 : 0)) \
296 : ! ((p)[0] & 0x10) \
297 ? ((((p)[0] & 0x0F) << 12) \
298 | (((p)[1] & 0x3F) << 6) \
299 | ((p)[2] & 0x3F)) \
eb41da4c 300 : string_char ((p), NULL, NULL))
0168c3d8
KH
301
302
18a2979d 303/* Like STRING_CHAR, but set ACTUAL_LEN to the length of multibyte
2f8e16b2
EZ
304 form.
305
306 Note: This macro returns the actual length of the character's
307 multibyte sequence as it is stored in a buffer or string. The
308 character it returns might have a different codepoint that has a
5ae255c7 309 different multibyte sequence of a different length, due to possible
2f8e16b2
EZ
310 unification of CJK characters inside string_char. Therefore do NOT
311 assume that the length returned by this macro is identical to the
312 length of the multibyte sequence of the character it returns. */
0168c3d8 313
62a6e103 314#define STRING_CHAR_AND_LENGTH(p, actual_len) \
0168c3d8
KH
315 (!((p)[0] & 0x80) \
316 ? ((actual_len) = 1, (p)[0]) \
317 : ! ((p)[0] & 0x20) \
318 ? ((actual_len) = 2, \
319 (((((p)[0] & 0x1F) << 6) \
320 | ((p)[1] & 0x3F)) \
321 + (((unsigned char) (p)[0]) < 0xC2 ? 0x3FFF80 : 0))) \
322 : ! ((p)[0] & 0x10) \
323 ? ((actual_len) = 3, \
324 ((((p)[0] & 0x0F) << 12) \
325 | (((p)[1] & 0x3F) << 6) \
326 | ((p)[2] & 0x3F))) \
eb41da4c 327 : string_char ((p), NULL, &actual_len))
0168c3d8
KH
328
329
18a2979d 330/* Like STRING_CHAR, but advance P to the end of multibyte form. */
0168c3d8
KH
331
332#define STRING_CHAR_ADVANCE(p) \
333 (!((p)[0] & 0x80) \
334 ? *(p)++ \
335 : ! ((p)[0] & 0x20) \
336 ? ((p) += 2, \
337 ((((p)[-2] & 0x1F) << 6) \
338 | ((p)[-1] & 0x3F) \
8f924df7 339 | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0))) \
0168c3d8
KH
340 : ! ((p)[0] & 0x10) \
341 ? ((p) += 3, \
342 ((((p)[-3] & 0x0F) << 12) \
343 | (((p)[-2] & 0x3F) << 6) \
344 | ((p)[-1] & 0x3F))) \
eb41da4c 345 : string_char ((p), &(p), NULL))
0168c3d8
KH
346
347
348/* Fetch the "next" character from Lisp string STRING at byte position
349 BYTEIDX, character position CHARIDX. Store it into OUTPUT.
350
351 All the args must be side-effect-free.
352 BYTEIDX and CHARIDX must be lvalues;
353 we increment them past the character fetched. */
354
355#define FETCH_STRING_CHAR_ADVANCE(OUTPUT, STRING, CHARIDX, BYTEIDX) \
a1c2ac9a 356 do \
0168c3d8
KH
357 { \
358 CHARIDX++; \
359 if (STRING_MULTIBYTE (STRING)) \
360 { \
760fbc2c
PE
361 unsigned char *chp = &SDATA (STRING)[BYTEIDX]; \
362 int chlen; \
0168c3d8 363 \
760fbc2c
PE
364 OUTPUT = STRING_CHAR_AND_LENGTH (chp, chlen); \
365 BYTEIDX += chlen; \
0168c3d8
KH
366 } \
367 else \
f1c99628
SM
368 { \
369 OUTPUT = SREF (STRING, BYTEIDX); \
370 BYTEIDX++; \
371 } \
0168c3d8 372 } \
96c06863 373 while (false)
0168c3d8 374
18a2979d
EZ
375/* Like FETCH_STRING_CHAR_ADVANCE, but return a multibyte character
376 even if STRING is unibyte. */
43c47483
KH
377
378#define FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE(OUTPUT, STRING, CHARIDX, BYTEIDX) \
a1c2ac9a 379 do \
43c47483
KH
380 { \
381 CHARIDX++; \
382 if (STRING_MULTIBYTE (STRING)) \
383 { \
760fbc2c
PE
384 unsigned char *chp = &SDATA (STRING)[BYTEIDX]; \
385 int chlen; \
43c47483 386 \
760fbc2c
PE
387 OUTPUT = STRING_CHAR_AND_LENGTH (chp, chlen); \
388 BYTEIDX += chlen; \
43c47483
KH
389 } \
390 else \
391 { \
f1c99628
SM
392 OUTPUT = SREF (STRING, BYTEIDX); \
393 BYTEIDX++; \
43c47483
KH
394 MAKE_CHAR_MULTIBYTE (OUTPUT); \
395 } \
396 } \
96c06863 397 while (false)
43c47483 398
0168c3d8 399
18a2979d 400/* Like FETCH_STRING_CHAR_ADVANCE, but assumes STRING is multibyte. */
0168c3d8
KH
401
402#define FETCH_STRING_CHAR_ADVANCE_NO_CHECK(OUTPUT, STRING, CHARIDX, BYTEIDX) \
a1c2ac9a 403 do \
0168c3d8 404 { \
a6670b0b
PE
405 unsigned char *fetch_ptr = &SDATA (STRING)[BYTEIDX]; \
406 int fetch_len; \
0168c3d8 407 \
a6670b0b
PE
408 OUTPUT = STRING_CHAR_AND_LENGTH (fetch_ptr, fetch_len); \
409 BYTEIDX += fetch_len; \
0168c3d8
KH
410 CHARIDX++; \
411 } \
96c06863 412 while (false)
0168c3d8
KH
413
414
18a2979d 415/* Like FETCH_STRING_CHAR_ADVANCE, but fetch character from the current
0168c3d8
KH
416 buffer. */
417
418#define FETCH_CHAR_ADVANCE(OUTPUT, CHARIDX, BYTEIDX) \
a1c2ac9a 419 do \
0168c3d8
KH
420 { \
421 CHARIDX++; \
4b4deea2 422 if (!NILP (BVAR (current_buffer, enable_multibyte_characters))) \
0168c3d8 423 { \
760fbc2c
PE
424 unsigned char *chp = BYTE_POS_ADDR (BYTEIDX); \
425 int chlen; \
0168c3d8 426 \
e8d32c7e 427 OUTPUT = STRING_CHAR_AND_LENGTH (chp, chlen); \
760fbc2c 428 BYTEIDX += chlen; \
0168c3d8
KH
429 } \
430 else \
431 { \
432 OUTPUT = *(BYTE_POS_ADDR (BYTEIDX)); \
433 BYTEIDX++; \
434 } \
435 } \
96c06863 436 while (false)
0168c3d8
KH
437
438
18a2979d 439/* Like FETCH_CHAR_ADVANCE, but assumes the current buffer is multibyte. */
0168c3d8
KH
440
441#define FETCH_CHAR_ADVANCE_NO_CHECK(OUTPUT, CHARIDX, BYTEIDX) \
a1c2ac9a 442 do \
0168c3d8 443 { \
760fbc2c
PE
444 unsigned char *chp = BYTE_POS_ADDR (BYTEIDX); \
445 int chlen; \
0168c3d8 446 \
760fbc2c
PE
447 OUTPUT = STRING_CHAR_AND_LENGTH (chp, chlen); \
448 BYTEIDX += chlen; \
0168c3d8
KH
449 CHARIDX++; \
450 } \
96c06863 451 while (false)
0168c3d8
KH
452
453
18a2979d 454/* Increment the buffer byte position POS_BYTE of the current buffer to
0168c3d8
KH
455 the next character boundary. No range checking of POS. */
456
457#define INC_POS(pos_byte) \
458 do { \
760fbc2c
PE
459 unsigned char *chp = BYTE_POS_ADDR (pos_byte); \
460 pos_byte += BYTES_BY_CHAR_HEAD (*chp); \
96c06863 461 } while (false)
0168c3d8
KH
462
463
18a2979d 464/* Decrement the buffer byte position POS_BYTE of the current buffer to
0168c3d8
KH
465 the previous character boundary. No range checking of POS. */
466
467#define DEC_POS(pos_byte) \
468 do { \
760fbc2c 469 unsigned char *chp; \
0168c3d8
KH
470 \
471 pos_byte--; \
472 if (pos_byte < GPT_BYTE) \
760fbc2c 473 chp = BEG_ADDR + pos_byte - BEG_BYTE; \
0168c3d8 474 else \
760fbc2c
PE
475 chp = BEG_ADDR + GAP_SIZE + pos_byte - BEG_BYTE; \
476 while (!CHAR_HEAD_P (*chp)) \
0168c3d8 477 { \
760fbc2c 478 chp--; \
0168c3d8
KH
479 pos_byte--; \
480 } \
96c06863 481 } while (false)
0168c3d8
KH
482
483/* Increment both CHARPOS and BYTEPOS, each in the appropriate way. */
484
485#define INC_BOTH(charpos, bytepos) \
486 do \
487 { \
488 (charpos)++; \
4b4deea2 489 if (NILP (BVAR (current_buffer, enable_multibyte_characters))) \
0168c3d8
KH
490 (bytepos)++; \
491 else \
492 INC_POS ((bytepos)); \
493 } \
96c06863 494 while (false)
0168c3d8
KH
495
496
497/* Decrement both CHARPOS and BYTEPOS, each in the appropriate way. */
498
499#define DEC_BOTH(charpos, bytepos) \
500 do \
501 { \
502 (charpos)--; \
4b4deea2 503 if (NILP (BVAR (current_buffer, enable_multibyte_characters))) \
0168c3d8
KH
504 (bytepos)--; \
505 else \
506 DEC_POS ((bytepos)); \
507 } \
96c06863 508 while (false)
0168c3d8
KH
509
510
18a2979d 511/* Increment the buffer byte position POS_BYTE of the current buffer to
0168c3d8
KH
512 the next character boundary. This macro relies on the fact that
513 *GPT_ADDR and *Z_ADDR are always accessible and the values are
514 '\0'. No range checking of POS_BYTE. */
515
516#define BUF_INC_POS(buf, pos_byte) \
517 do { \
760fbc2c
PE
518 unsigned char *chp = BUF_BYTE_ADDRESS (buf, pos_byte); \
519 pos_byte += BYTES_BY_CHAR_HEAD (*chp); \
96c06863 520 } while (false)
0168c3d8
KH
521
522
18a2979d 523/* Decrement the buffer byte position POS_BYTE of the current buffer to
0168c3d8
KH
524 the previous character boundary. No range checking of POS_BYTE. */
525
526#define BUF_DEC_POS(buf, pos_byte) \
527 do { \
760fbc2c 528 unsigned char *chp; \
0168c3d8
KH
529 pos_byte--; \
530 if (pos_byte < BUF_GPT_BYTE (buf)) \
760fbc2c 531 chp = BUF_BEG_ADDR (buf) + pos_byte - BEG_BYTE; \
0168c3d8 532 else \
760fbc2c
PE
533 chp = BUF_BEG_ADDR (buf) + BUF_GAP_SIZE (buf) + pos_byte - BEG_BYTE;\
534 while (!CHAR_HEAD_P (*chp)) \
0168c3d8 535 { \
760fbc2c 536 chp--; \
0168c3d8
KH
537 pos_byte--; \
538 } \
96c06863 539 } while (false)
0168c3d8
KH
540
541
a2271ba2
PE
542/* Return a non-outlandish value for the tab width. */
543
5637687f
PE
544#define SANE_TAB_WIDTH(buf) \
545 sanitize_tab_width (XFASTINT (BVAR (buf, tab_width)))
00382e8b 546INLINE int
5637687f 547sanitize_tab_width (EMACS_INT width)
a2271ba2
PE
548{
549 return 0 < width && width <= 1000 ? width : 8;
550}
551
0168c3d8 552/* Return the width of ASCII character C. The width is measured by
18a2979d 553 how many columns C will occupy on the screen when displayed in the
0168c3d8
KH
554 current buffer. */
555
556#define ASCII_CHAR_WIDTH(c) \
557 (c < 0x20 \
558 ? (c == '\t' \
a2271ba2 559 ? SANE_TAB_WIDTH (current_buffer) \
4b4deea2 560 : (c == '\n' ? 0 : (NILP (BVAR (current_buffer, ctl_arrow)) ? 4 : 2))) \
0168c3d8
KH
561 : (c < 0x7f \
562 ? 1 \
4b4deea2 563 : ((NILP (BVAR (current_buffer, ctl_arrow)) ? 4 : 2))))
0168c3d8 564
5637687f
PE
565/* Return a non-outlandish value for a character width. */
566
00382e8b 567INLINE int
5637687f
PE
568sanitize_char_width (EMACS_INT width)
569{
570 return 0 <= width && width <= 1000 ? width : 1000;
571}
572
0168c3d8 573/* Return the width of character C. The width is measured by how many
18a2979d 574 columns C will occupy on the screen when displayed in the current
0168c3d8
KH
575 buffer. */
576
577#define CHAR_WIDTH(c) \
578 (ASCII_CHAR_P (c) \
579 ? ASCII_CHAR_WIDTH (c) \
5637687f 580 : sanitize_char_width (XINT (CHAR_TABLE_REF (Vchar_width_table, c))))
0168c3d8 581
e1dbe924 582/* If C is a variation selector, return the index of the
c19cab20
KH
583 variation selector (1..256). Otherwise, return 0. */
584
585#define CHAR_VARIATION_SELECTOR_P(c) \
586 ((c) < 0xFE00 ? 0 \
587 : (c) <= 0xFE0F ? (c) - 0xFE00 + 1 \
588 : (c) < 0xE0100 ? 0 \
589 : (c) <= 0xE01EF ? (c) - 0xE0100 + 17 \
590 : 0)
591
592/* If C is a high surrogate, return 1. If C is a low surrogate,
96c06863 593 return 2. Otherwise, return 0. */
c19cab20
KH
594
595#define CHAR_SURROGATE_PAIR_P(c) \
596 ((c) < 0xD800 ? 0 \
597 : (c) <= 0xDBFF ? 1 \
598 : (c) <= 0xDFFF ? 2 \
599 : 0)
600
c805dec0
KH
601/* Data type for Unicode general category.
602
603 The order of members must be in sync with the 8th element of the
e0cece64 604 member of unidata-prop-alist (in admin/unidata/unidata-gen.el) for
c805dec0
KH
605 Unicode character property `general-category'. */
606
607typedef enum {
608 UNICODE_CATEGORY_UNKNOWN = 0,
609 UNICODE_CATEGORY_Lu,
610 UNICODE_CATEGORY_Ll,
611 UNICODE_CATEGORY_Lt,
612 UNICODE_CATEGORY_Lm,
613 UNICODE_CATEGORY_Lo,
614 UNICODE_CATEGORY_Mn,
615 UNICODE_CATEGORY_Mc,
616 UNICODE_CATEGORY_Me,
617 UNICODE_CATEGORY_Nd,
618 UNICODE_CATEGORY_Nl,
619 UNICODE_CATEGORY_No,
620 UNICODE_CATEGORY_Pc,
621 UNICODE_CATEGORY_Pd,
622 UNICODE_CATEGORY_Ps,
623 UNICODE_CATEGORY_Pe,
624 UNICODE_CATEGORY_Pi,
625 UNICODE_CATEGORY_Pf,
626 UNICODE_CATEGORY_Po,
627 UNICODE_CATEGORY_Sm,
628 UNICODE_CATEGORY_Sc,
629 UNICODE_CATEGORY_Sk,
630 UNICODE_CATEGORY_So,
631 UNICODE_CATEGORY_Zs,
632 UNICODE_CATEGORY_Zl,
633 UNICODE_CATEGORY_Zp,
634 UNICODE_CATEGORY_Cc,
635 UNICODE_CATEGORY_Cf,
636 UNICODE_CATEGORY_Cs,
637 UNICODE_CATEGORY_Co,
638 UNICODE_CATEGORY_Cn
639} unicode_category_t;
c19cab20 640
5994c183 641extern EMACS_INT char_resolve_modifier_mask (EMACS_INT) ATTRIBUTE_CONST;
383e0970
J
642extern int char_string (unsigned, unsigned char *);
643extern int string_char (const unsigned char *,
644 const unsigned char **, int *);
645
646extern int translate_char (Lisp_Object, int c);
14162469 647extern void parse_str_as_multibyte (const unsigned char *,
d311d28c
PE
648 ptrdiff_t, ptrdiff_t *, ptrdiff_t *);
649extern ptrdiff_t count_size_as_multibyte (const unsigned char *, ptrdiff_t);
650extern ptrdiff_t str_as_multibyte (unsigned char *, ptrdiff_t, ptrdiff_t,
651 ptrdiff_t *);
652extern ptrdiff_t str_to_multibyte (unsigned char *, ptrdiff_t, ptrdiff_t);
653extern ptrdiff_t str_as_unibyte (unsigned char *, ptrdiff_t);
654extern ptrdiff_t str_to_unibyte (const unsigned char *, unsigned char *,
d5172d4f 655 ptrdiff_t);
d311d28c
PE
656extern ptrdiff_t strwidth (const char *, ptrdiff_t);
657extern ptrdiff_t c_string_width (const unsigned char *, ptrdiff_t, int,
658 ptrdiff_t *, ptrdiff_t *);
659extern ptrdiff_t lisp_string_width (Lisp_Object, ptrdiff_t,
660 ptrdiff_t *, ptrdiff_t *);
0168c3d8 661
955cbe7b 662extern Lisp_Object Qcharacterp;
0168c3d8 663extern Lisp_Object Vchar_unify_table;
383e0970 664extern Lisp_Object string_escape_byte8 (Lisp_Object);
fac2bdc4 665
0168c3d8
KH
666/* Return a translation table of id number ID. */
667#define GET_TRANSLATION_TABLE(id) \
91f2d272 668 (XCDR (XVECTOR (Vtranslation_table_vector)->contents[(id)]))
0168c3d8 669
f162bcc3
PE
670INLINE_HEADER_END
671
0168c3d8 672#endif /* EMACS_CHARACTER_H */