Replace `iff' in comments.
[bpt/emacs.git] / src / charset.c
CommitLineData
75c8c592 1/* Basic multilingual character support.
aaef169d 2 Copyright (C) 2001, 2002, 2003, 2004, 2005,
4e6835db 3 2006, 2007 Free Software Foundation, Inc.
7976eda0 4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
4e6835db 5 2005, 2006, 2007
ce03bf76
KH
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
4ed46869 8
369314dc
KH
9This file is part of GNU Emacs.
10
11GNU Emacs is free software; you can redistribute it and/or modify
12it under the terms of the GNU General Public License as published by
1427aa65 13the Free Software Foundation; either version 3, or (at your option)
369314dc 14any later version.
4ed46869 15
369314dc
KH
16GNU Emacs is distributed in the hope that it will be useful,
17but WITHOUT ANY WARRANTY; without even the implied warranty of
18MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19GNU General Public License for more details.
4ed46869 20
369314dc
KH
21You should have received a copy of the GNU General Public License
22along with GNU Emacs; see the file COPYING. If not, write to
4fc5845f
LK
23the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
24Boston, MA 02110-1301, USA. */
4ed46869
KH
25
26/* At first, see the document in `charset.h' to understand the code in
27 this file. */
28
68c45bf0
PE
29#ifdef emacs
30#include <config.h>
31#endif
32
4ed46869
KH
33#include <stdio.h>
34
35#ifdef emacs
36
37#include <sys/types.h>
4ed46869
KH
38#include "lisp.h"
39#include "buffer.h"
40#include "charset.h"
3f62427c 41#include "composite.h"
4ed46869 42#include "coding.h"
fc6b09bf 43#include "disptab.h"
4ed46869
KH
44
45#else /* not emacs */
46
47#include "mulelib.h"
48
49#endif /* emacs */
50
2e344af3 51Lisp_Object Qcharset, Qascii, Qeight_bit_control, Qeight_bit_graphic;
0282eb69 52Lisp_Object Qunknown;
4ed46869
KH
53
54/* Declaration of special leading-codes. */
4bc26a6c
SM
55EMACS_INT leading_code_private_11; /* for private DIMENSION1 of 1-column */
56EMACS_INT leading_code_private_12; /* for private DIMENSION1 of 2-column */
57EMACS_INT leading_code_private_21; /* for private DIMENSION2 of 1-column */
58EMACS_INT leading_code_private_22; /* for private DIMENSION2 of 2-column */
4ed46869 59
2e344af3
KH
60/* Declaration of special charsets. The values are set by
61 Fsetup_special_charsets. */
4ed46869
KH
62int charset_latin_iso8859_1; /* ISO8859-1 (Latin-1) */
63int charset_jisx0208_1978; /* JISX0208.1978 (Japanese Kanji old set) */
64int charset_jisx0208; /* JISX0208.1983 (Japanese Kanji) */
65int charset_katakana_jisx0201; /* JISX0201.Kana (Japanese Katakana) */
66int charset_latin_jisx0201; /* JISX0201.Roman (Japanese Roman) */
67int charset_big5_1; /* Big5 Level 1 (Chinese Traditional) */
68int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */
c64582be
KH
69int charset_mule_unicode_0100_24ff;
70int charset_mule_unicode_2500_33ff;
71int charset_mule_unicode_e000_ffff;
4ed46869
KH
72
73Lisp_Object Qcharset_table;
74
75/* A char-table containing information of each character set. */
76Lisp_Object Vcharset_table;
77
78/* A vector of charset symbol indexed by charset-id. This is used
79 only for returning charset symbol from C functions. */
80Lisp_Object Vcharset_symbol_table;
81
82/* A list of charset symbols ever defined. */
83Lisp_Object Vcharset_list;
84
537efd8d
KH
85/* Vector of translation table ever defined.
86 ID of a translation table is used to index this vector. */
87Lisp_Object Vtranslation_table_vector;
b0e3cf2b 88
c1a08b4c
KH
89/* A char-table for characters which may invoke auto-filling. */
90Lisp_Object Vauto_fill_chars;
91
92Lisp_Object Qauto_fill_chars;
93
4ed46869
KH
94/* Tables used by macros BYTES_BY_CHAR_HEAD and WIDTH_BY_CHAR_HEAD. */
95int bytes_by_char_head[256];
96int width_by_char_head[256];
97
98/* Mapping table from ISO2022's charset (specified by DIMENSION,
99 CHARS, and FINAL-CHAR) to Emacs' charset. */
100int iso_charset_table[2][2][128];
101
102/* Variables used locally in the macro FETCH_MULTIBYTE_CHAR. */
103unsigned char *_fetch_multibyte_char_p;
104int _fetch_multibyte_char_len;
105
35e623fb 106/* Offset to add to a non-ASCII value when inserting it. */
31ade731 107EMACS_INT nonascii_insert_offset;
35e623fb 108
4cf9710d
RS
109/* Translation table for converting non-ASCII unibyte characters
110 to multibyte codes, or nil. */
b4e9dd77 111Lisp_Object Vnonascii_translation_table;
4cf9710d 112
8a73a704
KH
113/* List of all possible generic characters. */
114Lisp_Object Vgeneric_character_list;
115
046b1f03 116\f
93bcb785
KH
117void
118invalid_character (c)
119 int c;
120{
85416bda 121 error ("Invalid character: %d, #o%o, #x%x", c, c, c);
93bcb785
KH
122}
123
2e344af3
KH
124/* Parse string STR of length LENGTH and fetch information of a
125 character at STR. Set BYTES to the byte length the character
126 occupies, CHARSET, C1, C2 to proper values of the character. */
127
128#define SPLIT_MULTIBYTE_SEQ(str, length, bytes, charset, c1, c2) \
129 do { \
130 (c1) = *(str); \
131 (bytes) = BYTES_BY_CHAR_HEAD (c1); \
132 if ((bytes) == 1) \
133 (charset) = ASCII_BYTE_P (c1) ? CHARSET_ASCII : CHARSET_8_BIT_GRAPHIC; \
134 else if ((bytes) == 2) \
135 { \
136 if ((c1) == LEADING_CODE_8_BIT_CONTROL) \
137 (charset) = CHARSET_8_BIT_CONTROL, (c1) = (str)[1] - 0x20; \
138 else \
139 (charset) = (c1), (c1) = (str)[1] & 0x7F; \
140 } \
141 else if ((bytes) == 3) \
142 { \
143 if ((c1) < LEADING_CODE_PRIVATE_11) \
144 (charset) = (c1), (c1) = (str)[1] & 0x7F, (c2) = (str)[2] & 0x7F; \
145 else \
146 (charset) = (str)[1], (c1) = (str)[2] & 0x7F; \
147 } \
148 else \
149 (charset) = (str)[1], (c1) = (str)[2] & 0x7F, (c2) = (str)[3] & 0x7F; \
ac4137cc
KH
150 } while (0)
151
d37478d0
EZ
152/* 1 if CHARSET, C1, and C2 compose a valid character, else 0.
153 Note that this intentionally allows invalid components, such
154 as 0xA0 0xA0, because there exist many files that contain
155 such invalid byte sequences, especially in EUC-GB. */
44c6492d 156#define CHAR_COMPONENTS_VALID_P(charset, c1, c2) \
2e344af3 157 ((charset) == CHARSET_ASCII \
63f4d579 158 ? ((c1) >= 0 && (c1) <= 0x7F) \
2e344af3
KH
159 : ((charset) == CHARSET_8_BIT_CONTROL \
160 ? ((c1) >= 0x80 && (c1) <= 0x9F) \
161 : ((charset) == CHARSET_8_BIT_GRAPHIC \
162 ? ((c1) >= 0x80 && (c1) <= 0xFF) \
163 : (CHARSET_DIMENSION (charset) == 1 \
164 ? ((c1) >= 0x20 && (c1) <= 0x7F) \
165 : ((c1) >= 0x20 && (c1) <= 0x7F \
166 && (c2) >= 0x20 && (c2) <= 0x7F)))))
93bcb785 167
99529c2c
KH
168/* Store multi-byte form of the character C in STR. The caller should
169 allocate at least 4-byte area at STR in advance. Returns the
170 length of the multi-byte form. If C is an invalid character code,
12bcae05 171 return -1. */
4ed46869
KH
172
173int
12bcae05 174char_to_string_1 (c, str)
4ed46869 175 int c;
99529c2c 176 unsigned char *str;
4ed46869 177{
99529c2c
KH
178 unsigned char *p = str;
179
6662e69b 180 if (c & CHAR_MODIFIER_MASK) /* This includes the case C is negative. */
8ac5a9cc 181 {
6662e69b
KH
182 /* Multibyte character can't have a modifier bit. */
183 if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_MODIFIER_MASK)))
12bcae05 184 return -1;
6662e69b
KH
185
186 /* For Meta, Shift, and Control modifiers, we need special care. */
8ac5a9cc 187 if (c & CHAR_META)
6662e69b
KH
188 {
189 /* Move the meta bit to the right place for a string. */
190 c = (c & ~CHAR_META) | 0x80;
191 }
192 if (c & CHAR_SHIFT)
193 {
194 /* Shift modifier is valid only with [A-Za-z]. */
195 if ((c & 0377) >= 'A' && (c & 0377) <= 'Z')
196 c &= ~CHAR_SHIFT;
197 else if ((c & 0377) >= 'a' && (c & 0377) <= 'z')
198 c = (c & ~CHAR_SHIFT) - ('a' - 'A');
199 }
8ac5a9cc 200 if (c & CHAR_CTL)
6662e69b
KH
201 {
202 /* Simulate the code in lread.c. */
203 /* Allow `\C- ' and `\C-?'. */
204 if (c == (CHAR_CTL | ' '))
205 c = 0;
206 else if (c == (CHAR_CTL | '?'))
207 c = 127;
208 /* ASCII control chars are made from letters (both cases),
209 as well as the non-letters within 0100...0137. */
210 else if ((c & 0137) >= 0101 && (c & 0137) <= 0132)
211 c &= (037 | (~0177 & ~CHAR_CTL));
212 else if ((c & 0177) >= 0100 && (c & 0177) <= 0137)
213 c &= (037 | (~0177 & ~CHAR_CTL));
214 }
215
5ef834ea
KH
216 /* If C still has any modifier bits, just ignore it. */
217 c &= ~CHAR_MODIFIER_MASK;
2e344af3 218 }
177c0ea7 219
2e344af3
KH
220 if (SINGLE_BYTE_CHAR_P (c))
221 {
222 if (ASCII_BYTE_P (c) || c >= 0xA0)
223 *p++ = c;
224 else
225 {
226 *p++ = LEADING_CODE_8_BIT_CONTROL;
227 *p++ = c + 0x20;
228 }
8ac5a9cc 229 }
6ce974d4 230 else if (CHAR_VALID_P (c, 0))
4ed46869 231 {
ac4137cc 232 int charset, c1, c2;
4ed46869 233
2e344af3 234 SPLIT_CHAR (c, charset, c1, c2);
99529c2c
KH
235
236 if (charset >= LEADING_CODE_EXT_11)
237 *p++ = (charset < LEADING_CODE_EXT_12
238 ? LEADING_CODE_PRIVATE_11
239 : (charset < LEADING_CODE_EXT_21
240 ? LEADING_CODE_PRIVATE_12
241 : (charset < LEADING_CODE_EXT_22
242 ? LEADING_CODE_PRIVATE_21
243 : LEADING_CODE_PRIVATE_22)));
244 *p++ = charset;
05ffc44b 245 if ((c1 > 0 && c1 < 32) || (c2 > 0 && c2 < 32))
12bcae05 246 return -1;
99529c2c 247 if (c1)
4ed46869 248 {
99529c2c
KH
249 *p++ = c1 | 0x80;
250 if (c2 > 0)
251 *p++ = c2 | 0x80;
4ed46869
KH
252 }
253 }
2e344af3 254 else
12bcae05 255 return -1;
4ed46869 256
2e344af3 257 return (p - str);
4ed46869
KH
258}
259
12bcae05
GM
260
261/* Store multi-byte form of the character C in STR. The caller should
262 allocate at least 4-byte area at STR in advance. Returns the
263 length of the multi-byte form. If C is an invalid character code,
264 signal an error.
265
266 Use macro `CHAR_STRING (C, STR)' instead of calling this function
267 directly if C can be an ASCII character. */
268
269int
270char_to_string (c, str)
271 int c;
272 unsigned char *str;
273{
274 int len;
275 len = char_to_string_1 (c, str);
276 if (len == -1)
277 invalid_character (c);
278 return len;
279}
280
281
44c6492d
KH
282/* Return the non-ASCII character corresponding to multi-byte form at
283 STR of length LEN. If ACTUAL_LEN is not NULL, store the byte
284 length of the multibyte form in *ACTUAL_LEN.
537efd8d 285
99529c2c
KH
286 Use macros STRING_CHAR or STRING_CHAR_AND_LENGTH instead of calling
287 this function directly if you want ot handle ASCII characters as
288 well. */
4ed46869 289
dfcf069d 290int
99529c2c 291string_to_char (str, len, actual_len)
8867de67 292 const unsigned char *str;
ac4137cc 293 int len, *actual_len;
4ed46869 294{
ac4137cc 295 int c, bytes, charset, c1, c2;
4ed46869 296
ac4137cc
KH
297 SPLIT_MULTIBYTE_SEQ (str, len, bytes, charset, c1, c2);
298 c = MAKE_CHAR (charset, c1, c2);
4ed46869 299 if (actual_len)
ac4137cc 300 *actual_len = bytes;
4ed46869
KH
301 return c;
302}
303
44c6492d
KH
304/* Return the length of the multi-byte form at string STR of length LEN.
305 Use the macro MULTIBYTE_FORM_LENGTH instead. */
4ed46869
KH
306int
307multibyte_form_length (str, len)
8867de67 308 const unsigned char *str;
4ed46869
KH
309 int len;
310{
ac4137cc 311 int bytes;
4ed46869 312
ac4137cc 313 PARSE_MULTIBYTE_SEQ (str, len, bytes);
90d7b74e 314 return bytes;
4ed46869
KH
315}
316
ac4137cc
KH
317/* Check multibyte form at string STR of length LEN and set variables
318 pointed by CHARSET, C1, and C2 to charset and position codes of the
319 character at STR, and return 0. If there's no multibyte character,
4ed46869
KH
320 return -1. This should be used only in the macro SPLIT_STRING
321 which checks range of STR in advance. */
322
dfcf069d 323int
99529c2c 324split_string (str, len, charset, c1, c2)
ac4137cc
KH
325 const unsigned char *str;
326 unsigned char *c1, *c2;
327 int len, *charset;
4ed46869 328{
ac4137cc 329 register int bytes, cs, code1, code2 = -1;
4ed46869 330
ac4137cc
KH
331 SPLIT_MULTIBYTE_SEQ (str, len, bytes, cs, code1, code2);
332 if (cs == CHARSET_ASCII)
4ed46869 333 return -1;
ac4137cc
KH
334 *charset = cs;
335 *c1 = code1;
336 *c2 = code2;
5865af0d 337 return 0;
ac4137cc
KH
338}
339
e0f24100 340/* Return 1 if character C has valid printable glyph.
44c6492d 341 Use the macro CHAR_PRINTABLE_P instead. */
ac4137cc
KH
342int
343char_printable_p (c)
344 int c;
345{
8ebae00c 346 int charset, c1, c2;
ac4137cc 347
2e344af3 348 if (ASCII_BYTE_P (c))
ac4137cc 349 return 1;
2e344af3
KH
350 else if (SINGLE_BYTE_CHAR_P (c))
351 return 0;
352 else if (c >= MAX_CHAR)
99529c2c 353 return 0;
177c0ea7 354
2e344af3 355 SPLIT_CHAR (c, charset, c1, c2);
ac4137cc
KH
356 if (! CHARSET_DEFINED_P (charset))
357 return 0;
358 if (CHARSET_CHARS (charset) == 94
359 ? c1 <= 32 || c1 >= 127
360 : c1 < 32)
361 return 0;
362 if (CHARSET_DIMENSION (charset) == 2
363 && (CHARSET_CHARS (charset) == 94
364 ? c2 <= 32 || c2 >= 127
365 : c2 < 32))
366 return 0;
367 return 1;
4ed46869
KH
368}
369
537efd8d 370/* Translate character C by translation table TABLE. If C
b4e9dd77
KH
371 is negative, translate a character specified by CHARSET, C1, and C2
372 (C1 and C2 are code points of the character). If no translation is
373 found in TABLE, return C. */
dfcf069d 374int
b4e9dd77 375translate_char (table, c, charset, c1, c2)
23d2a7f1
KH
376 Lisp_Object table;
377 int c, charset, c1, c2;
378{
379 Lisp_Object ch;
380 int alt_charset, alt_c1, alt_c2, dimension;
381
0ad3f83d 382 if (c < 0) c = MAKE_CHAR (charset, (c1 & 0x7F) , (c2 & 0x7F));
23d2a7f1 383 if (!CHAR_TABLE_P (table)
ac4137cc 384 || (ch = Faref (table, make_number (c)), !NATNUMP (ch)))
23d2a7f1
KH
385 return c;
386
387 SPLIT_CHAR (XFASTINT (ch), alt_charset, alt_c1, alt_c2);
388 dimension = CHARSET_DIMENSION (alt_charset);
05ffc44b 389 if ((dimension == 1 && alt_c1 > 0) || (dimension == 2 && alt_c2 > 0))
23d2a7f1
KH
390 /* CH is not a generic character, just return it. */
391 return XFASTINT (ch);
392
393 /* Since CH is a generic character, we must return a specific
394 charater which has the same position codes as C from CH. */
395 if (charset < 0)
396 SPLIT_CHAR (c, charset, c1, c2);
397 if (dimension != CHARSET_DIMENSION (charset))
398 /* We can't make such a character because of dimension mismatch. */
399 return c;
23d2a7f1
KH
400 return MAKE_CHAR (alt_charset, c1, c2);
401}
402
d2665018 403/* Convert the unibyte character C to multibyte based on
b4e9dd77 404 Vnonascii_translation_table or nonascii_insert_offset. If they can't
d2665018
KH
405 convert C to a valid multibyte character, convert it based on
406 DEFAULT_NONASCII_INSERT_OFFSET which makes C a Latin-1 character. */
35e623fb 407
dfcf069d 408int
35e623fb
RS
409unibyte_char_to_multibyte (c)
410 int c;
411{
543b4f61 412 if (c < 0400 && c >= 0200)
35e623fb 413 {
d2665018
KH
414 int c_save = c;
415
b4e9dd77 416 if (! NILP (Vnonascii_translation_table))
bbf12bb3
KH
417 {
418 c = XINT (Faref (Vnonascii_translation_table, make_number (c)));
ac4137cc 419 if (c >= 0400 && ! char_valid_p (c, 0))
bbf12bb3
KH
420 c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
421 }
422 else if (c >= 0240 && nonascii_insert_offset > 0)
423 {
424 c += nonascii_insert_offset;
ac4137cc 425 if (c < 0400 || ! char_valid_p (c, 0))
bbf12bb3
KH
426 c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
427 }
428 else if (c >= 0240)
d2665018 429 c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
35e623fb
RS
430 }
431 return c;
432}
76d7b829
KH
433
434
435/* Convert the multibyte character C to unibyte 8-bit character based
436 on Vnonascii_translation_table or nonascii_insert_offset. If
437 REV_TBL is non-nil, it should be a reverse table of
438 Vnonascii_translation_table, i.e. what given by:
439 Fchar_table_extra_slot (Vnonascii_translation_table, make_number (0)) */
440
441int
442multibyte_char_to_unibyte (c, rev_tbl)
443 int c;
444 Lisp_Object rev_tbl;
445{
446 if (!SINGLE_BYTE_CHAR_P (c))
447 {
448 int c_save = c;
449
450 if (! CHAR_TABLE_P (rev_tbl)
451 && CHAR_TABLE_P (Vnonascii_translation_table))
452 rev_tbl = Fchar_table_extra_slot (Vnonascii_translation_table,
453 make_number (0));
454 if (CHAR_TABLE_P (rev_tbl))
455 {
456 Lisp_Object temp;
457 temp = Faref (rev_tbl, make_number (c));
458 if (INTEGERP (temp))
459 c = XINT (temp);
bbf12bb3
KH
460 if (c >= 256)
461 c = (c_save & 0177) + 0200;
462 }
463 else
464 {
465 if (nonascii_insert_offset > 0)
466 c -= nonascii_insert_offset;
467 if (c < 128 || c >= 256)
468 c = (c_save & 0177) + 0200;
76d7b829 469 }
76d7b829
KH
470 }
471
472 return c;
473}
474
35e623fb 475\f
4ed46869
KH
476/* Update the table Vcharset_table with the given arguments (see the
477 document of `define-charset' for the meaning of each argument).
478 Several other table contents are also updated. The caller should
479 check the validity of CHARSET-ID and the remaining arguments in
480 advance. */
481
482void
483update_charset_table (charset_id, dimension, chars, width, direction,
484 iso_final_char, iso_graphic_plane,
485 short_name, long_name, description)
486 Lisp_Object charset_id, dimension, chars, width, direction;
487 Lisp_Object iso_final_char, iso_graphic_plane;
488 Lisp_Object short_name, long_name, description;
489{
490 int charset = XINT (charset_id);
491 int bytes;
492 unsigned char leading_code_base, leading_code_ext;
493
6dc0722d
KH
494 if (NILP (CHARSET_TABLE_ENTRY (charset)))
495 CHARSET_TABLE_ENTRY (charset)
496 = Fmake_vector (make_number (CHARSET_MAX_IDX), Qnil);
4ed46869 497
d78bc582
KH
498 if (NILP (long_name))
499 long_name = short_name;
500 if (NILP (description))
501 description = long_name;
502
4ed46869
KH
503 /* Get byte length of multibyte form, base leading-code, and
504 extended leading-code of the charset. See the comment under the
505 title "GENERAL NOTE on CHARACTER SET (CHARSET)" in charset.h. */
506 bytes = XINT (dimension);
507 if (charset < MIN_CHARSET_PRIVATE_DIMENSION1)
508 {
509 /* Official charset, it doesn't have an extended leading-code. */
2e344af3 510 if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC)
4ed46869
KH
511 bytes += 1; /* For a base leading-code. */
512 leading_code_base = charset;
513 leading_code_ext = 0;
514 }
515 else
516 {
517 /* Private charset. */
518 bytes += 2; /* For base and extended leading-codes. */
519 leading_code_base
520 = (charset < LEADING_CODE_EXT_12
521 ? LEADING_CODE_PRIVATE_11
522 : (charset < LEADING_CODE_EXT_21
523 ? LEADING_CODE_PRIVATE_12
524 : (charset < LEADING_CODE_EXT_22
525 ? LEADING_CODE_PRIVATE_21
526 : LEADING_CODE_PRIVATE_22)));
527 leading_code_ext = charset;
c83ef371
KH
528 if (BYTES_BY_CHAR_HEAD (leading_code_base) != bytes)
529 error ("Invalid dimension for the charset-ID %d", charset);
530 }
6ef23ebb 531
4ed46869
KH
532 CHARSET_TABLE_INFO (charset, CHARSET_ID_IDX) = charset_id;
533 CHARSET_TABLE_INFO (charset, CHARSET_BYTES_IDX) = make_number (bytes);
534 CHARSET_TABLE_INFO (charset, CHARSET_DIMENSION_IDX) = dimension;
535 CHARSET_TABLE_INFO (charset, CHARSET_CHARS_IDX) = chars;
536 CHARSET_TABLE_INFO (charset, CHARSET_WIDTH_IDX) = width;
537 CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX) = direction;
538 CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_BASE_IDX)
539 = make_number (leading_code_base);
540 CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX)
541 = make_number (leading_code_ext);
542 CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX) = iso_final_char;
543 CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX)
544 = iso_graphic_plane;
545 CHARSET_TABLE_INFO (charset, CHARSET_SHORT_NAME_IDX) = short_name;
546 CHARSET_TABLE_INFO (charset, CHARSET_LONG_NAME_IDX) = long_name;
547 CHARSET_TABLE_INFO (charset, CHARSET_DESCRIPTION_IDX) = description;
548 CHARSET_TABLE_INFO (charset, CHARSET_PLIST_IDX) = Qnil;
549
550 {
551 /* If we have already defined a charset which has the same
552 DIMENSION, CHARS and ISO-FINAL-CHAR but the different
553 DIRECTION, we must update the entry REVERSE-CHARSET of both
554 charsets. If there's no such charset, the value of the entry
555 is set to nil. */
556 int i;
557
513ee442 558 for (i = 0; i <= MAX_CHARSET; i++)
4ed46869
KH
559 if (!NILP (CHARSET_TABLE_ENTRY (i)))
560 {
561 if (CHARSET_DIMENSION (i) == XINT (dimension)
562 && CHARSET_CHARS (i) == XINT (chars)
563 && CHARSET_ISO_FINAL_CHAR (i) == XINT (iso_final_char)
564 && CHARSET_DIRECTION (i) != XINT (direction))
565 {
566 CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
567 = make_number (i);
568 CHARSET_TABLE_INFO (i, CHARSET_REVERSE_CHARSET_IDX) = charset_id;
569 break;
570 }
571 }
513ee442 572 if (i > MAX_CHARSET)
4ed46869
KH
573 /* No such a charset. */
574 CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
575 = make_number (-1);
576 }
577
c83ef371 578 if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC
4ed46869
KH
579 && charset < MIN_CHARSET_PRIVATE_DIMENSION1)
580 {
c83ef371 581 bytes_by_char_head[leading_code_base] = bytes;
4ed46869
KH
582 width_by_char_head[leading_code_base] = XINT (width);
583
584 /* Update table emacs_code_class. */
585 emacs_code_class[charset] = (bytes == 2
586 ? EMACS_leading_code_2
587 : (bytes == 3
588 ? EMACS_leading_code_3
589 : EMACS_leading_code_4));
590 }
591
592 /* Update table iso_charset_table. */
52e386c2 593 if (XINT (iso_final_char) >= 0
2e344af3 594 && ISO_CHARSET_TABLE (dimension, chars, iso_final_char) < 0)
4ed46869
KH
595 ISO_CHARSET_TABLE (dimension, chars, iso_final_char) = charset;
596}
597
598#ifdef emacs
599
600/* Return charset id of CHARSET_SYMBOL, or return -1 if CHARSET_SYMBOL
601 is invalid. */
602int
603get_charset_id (charset_symbol)
604 Lisp_Object charset_symbol;
605{
606 Lisp_Object val;
607 int charset;
608
1a45ff10
EZ
609 /* This originally used a ?: operator, but reportedly the HP-UX
610 compiler version HP92453-01 A.10.32.22 miscompiles that. */
611 if (SYMBOLP (charset_symbol)
612 && VECTORP (val = Fget (charset_symbol, Qcharset))
613 && CHARSET_VALID_P (charset =
614 XINT (XVECTOR (val)->contents[CHARSET_ID_IDX])))
615 return charset;
616 else
617 return -1;
4ed46869
KH
618}
619
620/* Return an identification number for a new private charset of
621 DIMENSION and WIDTH. If there's no more room for the new charset,
622 return 0. */
623Lisp_Object
624get_new_private_charset_id (dimension, width)
625 int dimension, width;
626{
627 int charset, from, to;
628
629 if (dimension == 1)
630 {
3bb7b08b
KH
631 from = LEADING_CODE_EXT_11;
632 to = LEADING_CODE_EXT_21;
4ed46869
KH
633 }
634 else
635 {
3bb7b08b
KH
636 from = LEADING_CODE_EXT_21;
637 to = LEADING_CODE_EXT_MAX + 1;
4ed46869
KH
638 }
639
640 for (charset = from; charset < to; charset++)
641 if (!CHARSET_DEFINED_P (charset)) break;
642
643 return make_number (charset < to ? charset : 0);
644}
645
646DEFUN ("define-charset", Fdefine_charset, Sdefine_charset, 3, 3, 0,
fdb82f93
PJ
647 doc: /* Define CHARSET-ID as the identification number of CHARSET with INFO-VECTOR.
648If CHARSET-ID is nil, it is decided automatically, which means CHARSET is
649 treated as a private charset.
650INFO-VECTOR is a vector of the format:
651 [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE
652 SHORT-NAME LONG-NAME DESCRIPTION]
653The meanings of each elements is as follows:
654DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.
655CHARS (integer) is the number of characters in a dimension: 94 or 96.
656WIDTH (integer) is the number of columns a character in the charset
657occupies on the screen: one of 0, 1, and 2.
658
659DIRECTION (integer) is the rendering direction of characters in the
660charset when rendering. If 0, render from left to right, else
661render from right to left.
662
663ISO-FINAL-CHAR (character) is the final character of the
664corresponding ISO 2022 charset.
665It may be -1 if the charset is internal use only.
666
667ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked
668while encoding to variants of ISO 2022 coding system, one of the
669following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).
670It may be -1 if the charset is internal use only.
671
672SHORT-NAME (string) is the short name to refer to the charset.
673
674LONG-NAME (string) is the long name to refer to the charset.
675
676DESCRIPTION (string) is the description string of the charset. */)
677 (charset_id, charset_symbol, info_vector)
4ed46869
KH
678 Lisp_Object charset_id, charset_symbol, info_vector;
679{
680 Lisp_Object *vec;
681
682 if (!NILP (charset_id))
b7826503
PJ
683 CHECK_NUMBER (charset_id);
684 CHECK_SYMBOL (charset_symbol);
685 CHECK_VECTOR (info_vector);
4ed46869
KH
686
687 if (! NILP (charset_id))
688 {
689 if (! CHARSET_VALID_P (XINT (charset_id)))
690 error ("Invalid CHARSET: %d", XINT (charset_id));
691 else if (CHARSET_DEFINED_P (XINT (charset_id)))
692 error ("Already defined charset: %d", XINT (charset_id));
693 }
694
695 vec = XVECTOR (info_vector)->contents;
696 if (XVECTOR (info_vector)->size != 9
697 || !INTEGERP (vec[0]) || !(XINT (vec[0]) == 1 || XINT (vec[0]) == 2)
698 || !INTEGERP (vec[1]) || !(XINT (vec[1]) == 94 || XINT (vec[1]) == 96)
699 || !INTEGERP (vec[2]) || !(XINT (vec[2]) == 1 || XINT (vec[2]) == 2)
700 || !INTEGERP (vec[3]) || !(XINT (vec[3]) == 0 || XINT (vec[3]) == 1)
2e344af3 701 || !INTEGERP (vec[4])
05ffc44b 702 || !(XINT (vec[4]) == -1 || (XINT (vec[4]) >= '0' && XINT (vec[4]) <= '~'))
2e344af3
KH
703 || !INTEGERP (vec[5])
704 || !(XINT (vec[5]) == -1 || XINT (vec[5]) == 0 || XINT (vec[5]) == 1)
4ed46869
KH
705 || !STRINGP (vec[6])
706 || !STRINGP (vec[7])
707 || !STRINGP (vec[8]))
708 error ("Invalid info-vector argument for defining charset %s",
d5db4077 709 SDATA (SYMBOL_NAME (charset_symbol)));
4ed46869
KH
710
711 if (NILP (charset_id))
712 {
713 charset_id = get_new_private_charset_id (XINT (vec[0]), XINT (vec[2]));
714 if (XINT (charset_id) == 0)
715 error ("There's no room for a new private charset %s",
d5db4077 716 SDATA (SYMBOL_NAME (charset_symbol)));
4ed46869
KH
717 }
718
719 update_charset_table (charset_id, vec[0], vec[1], vec[2], vec[3],
720 vec[4], vec[5], vec[6], vec[7], vec[8]);
6dc0722d 721 Fput (charset_symbol, Qcharset, CHARSET_TABLE_ENTRY (XINT (charset_id)));
4ed46869
KH
722 CHARSET_SYMBOL (XINT (charset_id)) = charset_symbol;
723 Vcharset_list = Fcons (charset_symbol, Vcharset_list);
538482ab 724 Fupdate_coding_systems_internal ();
4ed46869
KH
725 return Qnil;
726}
727
8a73a704
KH
728DEFUN ("generic-character-list", Fgeneric_character_list,
729 Sgeneric_character_list, 0, 0, 0,
fdb82f93
PJ
730 doc: /* Return a list of all possible generic characters.
731It includes a generic character for a charset not yet defined. */)
732 ()
8a73a704
KH
733{
734 return Vgeneric_character_list;
735}
736
3fac5a51
KH
737DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
738 Sget_unused_iso_final_char, 2, 2, 0,
bc001814 739 doc: /* Return an unused ISO's final char for a charset of DIMENSION and CHARS.
fdb82f93
PJ
740DIMENSION is the number of bytes to represent a character: 1 or 2.
741CHARS is the number of characters in a dimension: 94 or 96.
742
743This final char is for private use, thus the range is `0' (48) .. `?' (63).
744If there's no unused final char for the specified kind of charset,
745return nil. */)
746 (dimension, chars)
3fac5a51
KH
747 Lisp_Object dimension, chars;
748{
749 int final_char;
750
b7826503
PJ
751 CHECK_NUMBER (dimension);
752 CHECK_NUMBER (chars);
3fac5a51
KH
753 if (XINT (dimension) != 1 && XINT (dimension) != 2)
754 error ("Invalid charset dimension %d, it should be 1 or 2",
755 XINT (dimension));
756 if (XINT (chars) != 94 && XINT (chars) != 96)
757 error ("Invalid charset chars %d, it should be 94 or 96",
758 XINT (chars));
759 for (final_char = '0'; final_char <= '?'; final_char++)
760 {
761 if (ISO_CHARSET_TABLE (dimension, chars, make_number (final_char)) < 0)
762 break;
763 }
764 return (final_char <= '?' ? make_number (final_char) : Qnil);
765}
766
4ed46869
KH
767DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
768 4, 4, 0,
74ac5074
KH
769 doc: /* Declare an equivalent charset for ISO-2022 decoding.
770
771On decoding by an ISO-2022 base coding system, when a charset
772specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
773if CHARSET is designated instead. */)
774 (dimension, chars, final_char, charset)
775 Lisp_Object dimension, chars, final_char, charset;
4ed46869 776{
74ac5074 777 int charset_id;
4ed46869 778
b7826503
PJ
779 CHECK_NUMBER (dimension);
780 CHECK_NUMBER (chars);
781 CHECK_NUMBER (final_char);
74ac5074 782 CHECK_SYMBOL (charset);
4ed46869
KH
783
784 if (XINT (dimension) != 1 && XINT (dimension) != 2)
785 error ("Invalid DIMENSION %d, it should be 1 or 2", XINT (dimension));
786 if (XINT (chars) != 94 && XINT (chars) != 96)
787 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
788 if (XINT (final_char) < '0' || XFASTINT (final_char) > '~')
789 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
74ac5074
KH
790 if ((charset_id = get_charset_id (charset)) < 0)
791 error ("Invalid charset %s", SDATA (SYMBOL_NAME (charset)));
4ed46869 792
74ac5074 793 ISO_CHARSET_TABLE (dimension, chars, final_char) = charset_id;
4ed46869
KH
794 return Qnil;
795}
796
2e344af3
KH
797/* Return information about charsets in the text at PTR of NBYTES
798 bytes, which are NCHARS characters. The value is:
f6302ac9 799
cfe34140 800 0: Each character is represented by one byte. This is always
f6302ac9 801 true for unibyte text.
2e344af3
KH
802 1: No charsets other than ascii eight-bit-control,
803 eight-bit-graphic, and latin-1 are found.
804 2: Otherwise.
1d67c29b 805
2e344af3
KH
806 In addition, if CHARSETS is nonzero, for each found charset N, set
807 CHARSETS[N] to 1. For that, callers should allocate CHARSETS
808 (MAX_CHARSET + 1 elements) in advance. It may lookup a translation
809 table TABLE if supplied. For invalid charsets, set CHARSETS[1] to
810 1 (note that there's no charset whose ID is 1). */
4ed46869
KH
811
812int
2e344af3 813find_charset_in_text (ptr, nchars, nbytes, charsets, table)
fb4f7f50 814 const unsigned char *ptr;
2e344af3 815 int nchars, nbytes, *charsets;
23d2a7f1 816 Lisp_Object table;
4ed46869 817{
2e344af3 818 if (nchars == nbytes)
0282eb69 819 {
2e344af3 820 if (charsets && nbytes > 0)
0282eb69 821 {
fb4f7f50 822 const unsigned char *endp = ptr + nbytes;
2e344af3
KH
823 int maskbits = 0;
824
825 while (ptr < endp && maskbits != 7)
826 {
827 maskbits |= (*ptr < 0x80 ? 1 : *ptr < 0xA0 ? 2 : 4);
828 ptr++;
177c0ea7 829 }
2e344af3
KH
830
831 if (maskbits & 1)
832 charsets[CHARSET_ASCII] = 1;
833 if (maskbits & 2)
834 charsets[CHARSET_8_BIT_CONTROL] = 1;
835 if (maskbits & 4)
836 charsets[CHARSET_8_BIT_GRAPHIC] = 1;
0282eb69 837 }
2e344af3 838 return 0;
0282eb69 839 }
2e344af3 840 else
4ed46869 841 {
2e344af3 842 int return_val = 1;
99529c2c 843 int bytes, charset, c1, c2;
05505664 844
2e344af3
KH
845 if (! CHAR_TABLE_P (table))
846 table = Qnil;
05505664 847
2e344af3 848 while (nchars-- > 0)
23d2a7f1 849 {
2e344af3
KH
850 SPLIT_MULTIBYTE_SEQ (ptr, len, bytes, charset, c1, c2);
851 ptr += bytes;
4ed46869 852
2e344af3
KH
853 if (!CHARSET_DEFINED_P (charset))
854 charset = 1;
855 else if (! NILP (table))
856 {
857 int c = translate_char (table, -1, charset, c1, c2);
858 if (c >= 0)
859 charset = CHAR_CHARSET (c);
860 }
861
862 if (return_val == 1
863 && charset != CHARSET_ASCII
864 && charset != CHARSET_8_BIT_CONTROL
865 && charset != CHARSET_8_BIT_GRAPHIC
866 && charset != charset_latin_iso8859_1)
867 return_val = 2;
868
869 if (charsets)
870 charsets[charset] = 1;
871 else if (return_val == 2)
872 break;
4ed46869 873 }
2e344af3 874 return return_val;
4ed46869 875 }
4ed46869
KH
876}
877
878DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
23d2a7f1 879 2, 3, 0,
fdb82f93
PJ
880 doc: /* Return a list of charsets in the region between BEG and END.
881BEG and END are buffer positions.
882Optional arg TABLE if non-nil is a translation table to look up.
883
884If the region contains invalid multibyte characters,
885`unknown' is included in the returned list.
886
887If the current buffer is unibyte, the returned list may contain
888only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
889 (beg, end, table)
23d2a7f1 890 Lisp_Object beg, end, table;
4ed46869 891{
028d516b 892 int charsets[MAX_CHARSET + 1];
6ae1f27e 893 int from, from_byte, to, stop, stop_byte, i;
4ed46869
KH
894 Lisp_Object val;
895
896 validate_region (&beg, &end);
897 from = XFASTINT (beg);
898 stop = to = XFASTINT (end);
6ae1f27e 899
4ed46869 900 if (from < GPT && GPT < to)
6ae1f27e
RS
901 {
902 stop = GPT;
903 stop_byte = GPT_BYTE;
904 }
905 else
906 stop_byte = CHAR_TO_BYTE (stop);
907
908 from_byte = CHAR_TO_BYTE (from);
909
028d516b 910 bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
4ed46869
KH
911 while (1)
912 {
2e344af3
KH
913 find_charset_in_text (BYTE_POS_ADDR (from_byte), stop - from,
914 stop_byte - from_byte, charsets, table);
4ed46869 915 if (stop < to)
6ae1f27e
RS
916 {
917 from = stop, from_byte = stop_byte;
918 stop = to, stop_byte = CHAR_TO_BYTE (stop);
919 }
4ed46869
KH
920 else
921 break;
922 }
6ae1f27e 923
4ed46869 924 val = Qnil;
2e344af3 925 if (charsets[1])
0282eb69 926 val = Fcons (Qunknown, val);
2e344af3
KH
927 for (i = MAX_CHARSET; i >= MIN_CHARSET_OFFICIAL_DIMENSION1; i--)
928 if (charsets[i])
929 val = Fcons (CHARSET_SYMBOL (i), val);
930 if (charsets[0])
931 val = Fcons (Qascii, val);
4ed46869
KH
932 return val;
933}
934
935DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
23d2a7f1 936 1, 2, 0,
fdb82f93
PJ
937 doc: /* Return a list of charsets in STR.
938Optional arg TABLE if non-nil is a translation table to look up.
939
940If the string contains invalid multibyte characters,
941`unknown' is included in the returned list.
942
943If STR is unibyte, the returned list may contain
944only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
945 (str, table)
23d2a7f1 946 Lisp_Object str, table;
4ed46869 947{
a29e3b1b 948 int charsets[MAX_CHARSET + 1];
4ed46869
KH
949 int i;
950 Lisp_Object val;
951
b7826503 952 CHECK_STRING (str);
87b089ad 953
a29e3b1b 954 bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
d5db4077
KR
955 find_charset_in_text (SDATA (str), SCHARS (str),
956 SBYTES (str), charsets, table);
2e344af3 957
4ed46869 958 val = Qnil;
2e344af3 959 if (charsets[1])
0282eb69 960 val = Fcons (Qunknown, val);
2e344af3
KH
961 for (i = MAX_CHARSET; i >= MIN_CHARSET_OFFICIAL_DIMENSION1; i--)
962 if (charsets[i])
963 val = Fcons (CHARSET_SYMBOL (i), val);
964 if (charsets[0])
965 val = Fcons (Qascii, val);
4ed46869
KH
966 return val;
967}
2e344af3 968
4ed46869
KH
969\f
970DEFUN ("make-char-internal", Fmake_char_internal, Smake_char_internal, 1, 3, 0,
fdb82f93
PJ
971 doc: /* Return a character made from arguments.
972Internal use only. */)
973 (charset, code1, code2)
4ed46869
KH
974 Lisp_Object charset, code1, code2;
975{
ac4137cc
KH
976 int charset_id, c1, c2;
977
b7826503 978 CHECK_NUMBER (charset);
ac4137cc
KH
979 charset_id = XINT (charset);
980 if (!CHARSET_DEFINED_P (charset_id))
981 error ("Invalid charset ID: %d", XINT (charset));
4ed46869
KH
982
983 if (NILP (code1))
ac4137cc 984 c1 = 0;
4ed46869 985 else
ac4137cc 986 {
b7826503 987 CHECK_NUMBER (code1);
ac4137cc
KH
988 c1 = XINT (code1);
989 }
4ed46869 990 if (NILP (code2))
ac4137cc 991 c2 = 0;
4ed46869 992 else
ac4137cc 993 {
b7826503 994 CHECK_NUMBER (code2);
ac4137cc
KH
995 c2 = XINT (code2);
996 }
4ed46869 997
2e344af3
KH
998 if (charset_id == CHARSET_ASCII)
999 {
1000 if (c1 < 0 || c1 > 0x7F)
1001 goto invalid_code_posints;
1002 return make_number (c1);
1003 }
1004 else if (charset_id == CHARSET_8_BIT_CONTROL)
1005 {
30736012
KH
1006 if (NILP (code1))
1007 c1 = 0x80;
1008 else if (c1 < 0x80 || c1 > 0x9F)
2e344af3
KH
1009 goto invalid_code_posints;
1010 return make_number (c1);
1011 }
1012 else if (charset_id == CHARSET_8_BIT_GRAPHIC)
1013 {
30736012
KH
1014 if (NILP (code1))
1015 c1 = 0xA0;
1016 else if (c1 < 0xA0 || c1 > 0xFF)
2e344af3
KH
1017 goto invalid_code_posints;
1018 return make_number (c1);
1019 }
1020 else if (c1 < 0 || c1 > 0xFF || c2 < 0 || c2 > 0xFF)
1021 goto invalid_code_posints;
ac4137cc
KH
1022 c1 &= 0x7F;
1023 c2 &= 0x7F;
1024 if (c1 == 0
1025 ? c2 != 0
1026 : (c2 == 0
87f67317
KR
1027 ? !CHAR_COMPONENTS_VALID_P (charset_id, c1, 0x20)
1028 : !CHAR_COMPONENTS_VALID_P (charset_id, c1, c2)))
2e344af3 1029 goto invalid_code_posints;
ac4137cc 1030 return make_number (MAKE_CHAR (charset_id, c1, c2));
2e344af3
KH
1031
1032 invalid_code_posints:
1033 error ("Invalid code points for charset ID %d: %d %d", charset_id, c1, c2);
4ed46869
KH
1034}
1035
1036DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
bc001814
JB
1037 doc: /* Return list of charset and one or two position-codes of CH.
1038If CH is invalid as a character code,
1039return a list of symbol `unknown' and CH. */)
fdb82f93 1040 (ch)
4ed46869
KH
1041 Lisp_Object ch;
1042{
0282eb69 1043 int c, charset, c1, c2;
4ed46869 1044
b7826503 1045 CHECK_NUMBER (ch);
0282eb69
KH
1046 c = XFASTINT (ch);
1047 if (!CHAR_VALID_P (c, 1))
1048 return Fcons (Qunknown, Fcons (ch, Qnil));
4ed46869 1049 SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
6dc0722d 1050 return (c2 >= 0
4ed46869
KH
1051 ? Fcons (CHARSET_SYMBOL (charset),
1052 Fcons (make_number (c1), Fcons (make_number (c2), Qnil)))
1053 : Fcons (CHARSET_SYMBOL (charset), Fcons (make_number (c1), Qnil)));
1054}
1055
1056DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
bc001814 1057 doc: /* Return charset of CH. */)
fdb82f93 1058 (ch)
4ed46869
KH
1059 Lisp_Object ch;
1060{
b7826503 1061 CHECK_NUMBER (ch);
4ed46869
KH
1062
1063 return CHARSET_SYMBOL (CHAR_CHARSET (XINT (ch)));
1064}
1065
90d7b74e 1066DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
fdb82f93
PJ
1067 doc: /* Return charset of a character in the current buffer at position POS.
1068If POS is nil, it defauls to the current point.
1069If POS is out of range, the value is nil. */)
1070 (pos)
90d7b74e
KH
1071 Lisp_Object pos;
1072{
2e344af3
KH
1073 Lisp_Object ch;
1074 int charset;
ac4137cc 1075
2e344af3
KH
1076 ch = Fchar_after (pos);
1077 if (! INTEGERP (ch))
1078 return ch;
1079 charset = CHAR_CHARSET (XINT (ch));
90d7b74e
KH
1080 return CHARSET_SYMBOL (charset);
1081}
1082
4ed46869 1083DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
fdb82f93
PJ
1084 doc: /* Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
1085
1086ISO 2022's designation sequence (escape sequence) distinguishes charsets
1087by their DIMENSION, CHARS, and FINAL-CHAR,
1088where as Emacs distinguishes them by charset symbol.
1089See the documentation of the function `charset-info' for the meanings of
1090DIMENSION, CHARS, and FINAL-CHAR. */)
1091 (dimension, chars, final_char)
4ed46869
KH
1092 Lisp_Object dimension, chars, final_char;
1093{
1094 int charset;
1095
b7826503
PJ
1096 CHECK_NUMBER (dimension);
1097 CHECK_NUMBER (chars);
1098 CHECK_NUMBER (final_char);
4ed46869
KH
1099
1100 if ((charset = ISO_CHARSET_TABLE (dimension, chars, final_char)) < 0)
1101 return Qnil;
1102 return CHARSET_SYMBOL (charset);
1103}
1104
e0f24100
GM
1105/* If GENERICP is nonzero, return nonzero if C is a valid normal or
1106 generic character. If GENERICP is zero, return nonzero if C is a
9d3d8cba
KH
1107 valid normal character. Do not call this function directly,
1108 instead use macro CHAR_VALID_P. */
1109int
1110char_valid_p (c, genericp)
1111 int c, genericp;
1112{
1113 int charset, c1, c2;
1114
0e235b7e 1115 if (c < 0 || c >= MAX_CHAR)
9d3d8cba
KH
1116 return 0;
1117 if (SINGLE_BYTE_CHAR_P (c))
1118 return 1;
2e344af3 1119 SPLIT_CHAR (c, charset, c1, c2);
ac4137cc
KH
1120 if (genericp)
1121 {
1122 if (c1)
1123 {
1124 if (c2 <= 0) c2 = 0x20;
1125 }
1126 else
1127 {
1128 if (c2 <= 0) c1 = c2 = 0x20;
1129 }
1130 }
1131 return (CHARSET_DEFINED_P (charset)
44c6492d 1132 && CHAR_COMPONENTS_VALID_P (charset, c1, c2));
9d3d8cba
KH
1133}
1134
1135DEFUN ("char-valid-p", Fchar_valid_p, Schar_valid_p, 1, 2, 0,
fdb82f93
PJ
1136 doc: /* Return t if OBJECT is a valid normal character.
1137If optional arg GENERICP is non-nil, also return t if OBJECT is
1138a valid generic character. */)
1139 (object, genericp)
9d3d8cba
KH
1140 Lisp_Object object, genericp;
1141{
1142 if (! NATNUMP (object))
1143 return Qnil;
1144 return (CHAR_VALID_P (XFASTINT (object), !NILP (genericp)) ? Qt : Qnil);
1145}
1146
d2665018
KH
1147DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte,
1148 Sunibyte_char_to_multibyte, 1, 1, 0,
fdb82f93
PJ
1149 doc: /* Convert the unibyte character CH to multibyte character.
1150The conversion is done based on `nonascii-translation-table' (which see)
1151 or `nonascii-insert-offset' (which see). */)
1152 (ch)
d2665018
KH
1153 Lisp_Object ch;
1154{
1155 int c;
1156
b7826503 1157 CHECK_NUMBER (ch);
d2665018
KH
1158 c = XINT (ch);
1159 if (c < 0 || c >= 0400)
1160 error ("Invalid unibyte character: %d", c);
1161 c = unibyte_char_to_multibyte (c);
1162 if (c < 0)
1163 error ("Can't convert to multibyte character: %d", XINT (ch));
1164 return make_number (c);
1165}
1166
1bcc1567
RS
1167DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte,
1168 Smultibyte_char_to_unibyte, 1, 1, 0,
fdb82f93
PJ
1169 doc: /* Convert the multibyte character CH to unibyte character.
1170The conversion is done based on `nonascii-translation-table' (which see)
1171 or `nonascii-insert-offset' (which see). */)
1172 (ch)
1bcc1567
RS
1173 Lisp_Object ch;
1174{
1175 int c;
1176
b7826503 1177 CHECK_NUMBER (ch);
1bcc1567 1178 c = XINT (ch);
ac4137cc 1179 if (! CHAR_VALID_P (c, 0))
1bcc1567
RS
1180 error ("Invalid multibyte character: %d", c);
1181 c = multibyte_char_to_unibyte (c, Qnil);
1182 if (c < 0)
1183 error ("Can't convert to unibyte character: %d", XINT (ch));
1184 return make_number (c);
1185}
1186
4ed46869 1187DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
bc001814 1188 doc: /* Return 1 regardless of the argument CH. */)
fdb82f93 1189 (ch)
4ed46869
KH
1190 Lisp_Object ch;
1191{
b7826503 1192 CHECK_NUMBER (ch);
9b6a601f
KH
1193 return make_number (1);
1194}
1195
1196/* Return how many bytes C will occupy in a multibyte buffer.
1197 Don't call this function directly, instead use macro CHAR_BYTES. */
1198int
1199char_bytes (c)
1200 int c;
1201{
99529c2c 1202 int charset;
9b6a601f 1203
2e344af3
KH
1204 if (ASCII_BYTE_P (c) || (c & ~((1 << CHARACTERBITS) -1)))
1205 return 1;
1206 if (SINGLE_BYTE_CHAR_P (c) && c >= 0xA0)
8ac5a9cc
KH
1207 return 1;
1208
99529c2c
KH
1209 charset = CHAR_CHARSET (c);
1210 return (CHARSET_DEFINED_P (charset) ? CHARSET_BYTES (charset) : 1);
4ed46869
KH
1211}
1212
1213/* Return the width of character of which multi-byte form starts with
1214 C. The width is measured by how many columns occupied on the
1215 screen when displayed in the current buffer. */
1216
1217#define ONE_BYTE_CHAR_WIDTH(c) \
1218 (c < 0x20 \
1219 ? (c == '\t' \
53316e55 1220 ? XFASTINT (current_buffer->tab_width) \
4ed46869
KH
1221 : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2))) \
1222 : (c < 0x7f \
1223 ? 1 \
1224 : (c == 0x7F \
1225 ? (NILP (current_buffer->ctl_arrow) ? 4 : 2) \
1226 : ((! NILP (current_buffer->enable_multibyte_characters) \
1227 && BASE_LEADING_CODE_P (c)) \
1228 ? WIDTH_BY_CHAR_HEAD (c) \
b4e9dd77 1229 : 4))))
4ed46869
KH
1230
1231DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
bc001814 1232 doc: /* Return width of CH when displayed in the current buffer.
fdb82f93
PJ
1233The width is measured by how many columns it occupies on the screen.
1234Tab is taken to occupy `tab-width' columns. */)
1235 (ch)
1236 Lisp_Object ch;
4ed46869 1237{
859f2b3c 1238 Lisp_Object val, disp;
4ed46869 1239 int c;
51c4025f 1240 struct Lisp_Char_Table *dp = buffer_display_table ();
4ed46869 1241
b7826503 1242 CHECK_NUMBER (ch);
4ed46869 1243
859f2b3c
RS
1244 c = XINT (ch);
1245
1246 /* Get the way the display table would display it. */
51c4025f 1247 disp = dp ? DISP_CHAR_VECTOR (dp, c) : Qnil;
859f2b3c
RS
1248
1249 if (VECTORP (disp))
1250 XSETINT (val, XVECTOR (disp)->size);
1251 else if (SINGLE_BYTE_CHAR_P (c))
1252 XSETINT (val, ONE_BYTE_CHAR_WIDTH (c));
4ed46869
KH
1253 else
1254 {
1255 int charset = CHAR_CHARSET (c);
1256
1257 XSETFASTINT (val, CHARSET_WIDTH (charset));
1258 }
1259 return val;
1260}
1261
1262/* Return width of string STR of length LEN when displayed in the
1263 current buffer. The width is measured by how many columns it
1264 occupies on the screen. */
859f2b3c 1265
4ed46869
KH
1266int
1267strwidth (str, len)
1268 unsigned char *str;
1269 int len;
1270{
beeedaad
KH
1271 return c_string_width (str, len, -1, NULL, NULL);
1272}
1273
1274/* Return width of string STR of length LEN when displayed in the
1275 current buffer. The width is measured by how many columns it
1276 occupies on the screen. If PRECISION > 0, return the width of
1277 longest substring that doesn't exceed PRECISION, and set number of
1278 characters and bytes of the substring in *NCHARS and *NBYTES
1279 respectively. */
1280
1281int
1282c_string_width (str, len, precision, nchars, nbytes)
fb4f7f50 1283 const unsigned char *str;
020ea113 1284 int len, precision, *nchars, *nbytes;
beeedaad
KH
1285{
1286 int i = 0, i_byte = 0;
4ed46869 1287 int width = 0;
beeedaad 1288 int chars;
c4a4e28f 1289 struct Lisp_Char_Table *dp = buffer_display_table ();
4ed46869 1290
beeedaad 1291 while (i_byte < len)
859f2b3c 1292 {
beeedaad
KH
1293 int bytes, thiswidth;
1294 Lisp_Object val;
859f2b3c 1295
99529c2c 1296 if (dp)
beeedaad
KH
1297 {
1298 int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
1299
1300 chars = 1;
1301 val = DISP_CHAR_VECTOR (dp, c);
1302 if (VECTORP (val))
1303 thiswidth = XVECTOR (val)->size;
1304 else
1305 thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1306 }
859f2b3c 1307 else
beeedaad
KH
1308 {
1309 chars = 1;
1310 PARSE_MULTIBYTE_SEQ (str + i_byte, len - i_byte, bytes);
1311 thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1312 }
99529c2c 1313
beeedaad
KH
1314 if (precision > 0
1315 && (width + thiswidth > precision))
1316 {
1317 *nchars = i;
1318 *nbytes = i_byte;
1319 return width;
1320 }
1321 i++;
1322 i_byte += bytes;
1323 width += thiswidth;
1324 }
99529c2c 1325
beeedaad
KH
1326 if (precision > 0)
1327 {
1328 *nchars = i;
1329 *nbytes = i_byte;
859f2b3c 1330 }
beeedaad 1331
4ed46869
KH
1332 return width;
1333}
1334
beeedaad
KH
1335/* Return width of Lisp string STRING when displayed in the current
1336 buffer. The width is measured by how many columns it occupies on
1337 the screen while paying attention to compositions. If PRECISION >
1338 0, return the width of longest substring that doesn't exceed
1339 PRECISION, and set number of characters and bytes of the substring
1340 in *NCHARS and *NBYTES respectively. */
1341
3f62427c 1342int
beeedaad
KH
1343lisp_string_width (string, precision, nchars, nbytes)
1344 Lisp_Object string;
1345 int precision, *nchars, *nbytes;
3f62427c 1346{
d5db4077
KR
1347 int len = SCHARS (string);
1348 int len_byte = SBYTES (string);
212cc638
KH
1349 /* This set multibyte to 0 even if STRING is multibyte when it
1350 contains only ascii and eight-bit-graphic, but that's
1351 intentional. */
1352 int multibyte = len < len_byte;
fb4f7f50 1353 const unsigned char *str = SDATA (string);
beeedaad 1354 int i = 0, i_byte = 0;
3f62427c 1355 int width = 0;
beeedaad 1356 struct Lisp_Char_Table *dp = buffer_display_table ();
3f62427c
KH
1357
1358 while (i < len)
1359 {
beeedaad
KH
1360 int chars, bytes, thiswidth;
1361 Lisp_Object val;
1362 int cmp_id;
1363 int ignore, end;
1364
1365 if (find_composition (i, -1, &ignore, &end, &val, string)
1366 && ((cmp_id = get_composition_id (i, i_byte, end - i, val, string))
1367 >= 0))
3f62427c 1368 {
beeedaad
KH
1369 thiswidth = composition_table[cmp_id]->width;
1370 chars = end - i;
1371 bytes = string_char_to_byte (string, end) - i_byte;
1372 }
1373 else if (dp)
1374 {
212cc638 1375 int c;
beeedaad 1376
212cc638
KH
1377 if (multibyte)
1378 c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
1379 else
1380 c = str[i_byte], bytes = 1;
beeedaad
KH
1381 chars = 1;
1382 val = DISP_CHAR_VECTOR (dp, c);
1383 if (VECTORP (val))
1384 thiswidth = XVECTOR (val)->size;
1385 else
1386 thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
3f62427c
KH
1387 }
1388 else
1389 {
beeedaad 1390 chars = 1;
212cc638
KH
1391 if (multibyte)
1392 PARSE_MULTIBYTE_SEQ (str + i_byte, len_byte - i_byte, bytes);
1393 else
1394 bytes = 1;
beeedaad
KH
1395 thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1396 }
1397
1398 if (precision > 0
1399 && (width + thiswidth > precision))
1400 {
1401 *nchars = i;
1402 *nbytes = i_byte;
1403 return width;
3f62427c 1404 }
beeedaad
KH
1405 i += chars;
1406 i_byte += bytes;
1407 width += thiswidth;
1408 }
1409
1410 if (precision > 0)
1411 {
1412 *nchars = i;
1413 *nbytes = i_byte;
3f62427c 1414 }
beeedaad 1415
3f62427c
KH
1416 return width;
1417}
1418
4ed46869 1419DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
fdb82f93
PJ
1420 doc: /* Return width of STRING when displayed in the current buffer.
1421Width is measured by how many columns it occupies on the screen.
1422When calculating width of a multibyte character in STRING,
1423only the base leading-code is considered; the validity of
1424the following bytes is not checked. Tabs in STRING are always
1425taken to occupy `tab-width' columns. */)
bc001814
JB
1426 (string)
1427 Lisp_Object string;
4ed46869
KH
1428{
1429 Lisp_Object val;
1430
bc001814
JB
1431 CHECK_STRING (string);
1432 XSETFASTINT (val, lisp_string_width (string, -1, NULL, NULL));
4ed46869
KH
1433 return val;
1434}
1435
1436DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
bc001814 1437 doc: /* Return the direction of CH.
fdb82f93
PJ
1438The returned value is 0 for left-to-right and 1 for right-to-left. */)
1439 (ch)
4ed46869
KH
1440 Lisp_Object ch;
1441{
1442 int charset;
1443
b7826503 1444 CHECK_NUMBER (ch);
4ed46869
KH
1445 charset = CHAR_CHARSET (XFASTINT (ch));
1446 if (!CHARSET_DEFINED_P (charset))
93bcb785 1447 invalid_character (XINT (ch));
4ed46869
KH
1448 return CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX);
1449}
1450
87b089ad
RS
1451/* Return the number of characters in the NBYTES bytes at PTR.
1452 This works by looking at the contents and checking for multibyte sequences.
1453 However, if the current buffer has enable-multibyte-characters = nil,
1454 we treat each byte as a character. */
1455
6ae1f27e
RS
1456int
1457chars_in_text (ptr, nbytes)
fb4f7f50 1458 const unsigned char *ptr;
6ae1f27e
RS
1459 int nbytes;
1460{
87b089ad
RS
1461 /* current_buffer is null at early stages of Emacs initialization. */
1462 if (current_buffer == 0
1463 || NILP (current_buffer->enable_multibyte_characters))
6ae1f27e 1464 return nbytes;
a8a35e61 1465
ac4137cc 1466 return multibyte_chars_in_text (ptr, nbytes);
046b1f03
RS
1467}
1468
87b089ad
RS
1469/* Return the number of characters in the NBYTES bytes at PTR.
1470 This works by looking at the contents and checking for multibyte sequences.
1471 It ignores enable-multibyte-characters. */
1472
1473int
1474multibyte_chars_in_text (ptr, nbytes)
fb4f7f50 1475 const unsigned char *ptr;
87b089ad
RS
1476 int nbytes;
1477{
fb4f7f50 1478 const unsigned char *endp;
ac4137cc 1479 int chars, bytes;
87b089ad
RS
1480
1481 endp = ptr + nbytes;
1482 chars = 0;
1483
1484 while (ptr < endp)
1485 {
2e344af3
KH
1486 PARSE_MULTIBYTE_SEQ (ptr, endp - ptr, bytes);
1487 ptr += bytes;
1488 chars++;
1489 }
1490
1491 return chars;
1492}
1493
d8e4f486 1494/* Parse unibyte text at STR of LEN bytes as multibyte text, and
2e344af3 1495 count the numbers of characters and bytes in it. On counting
d8e4f486
DL
1496 bytes, pay attention to the fact that 8-bit characters in the range
1497 0x80..0x9F are represented by 2 bytes in multibyte text. */
2e344af3
KH
1498void
1499parse_str_as_multibyte (str, len, nchars, nbytes)
fb4f7f50 1500 const unsigned char *str;
2e344af3
KH
1501 int len, *nchars, *nbytes;
1502{
fb4f7f50 1503 const unsigned char *endp = str + len;
2e344af3
KH
1504 int n, chars = 0, bytes = 0;
1505
1506 while (str < endp)
1507 {
1508 if (UNIBYTE_STR_AS_MULTIBYTE_P (str, endp - str, n))
1509 str += n, bytes += n;
1510 else
1511 str++, bytes += 2;
1512 chars++;
1513 }
1514 *nchars = chars;
1515 *nbytes = bytes;
1516 return;
1517}
1518
d8e4f486 1519/* Arrange unibyte text at STR of NBYTES bytes as multibyte text.
2e344af3
KH
1520 It actually converts only 8-bit characters in the range 0x80..0x9F
1521 that don't contruct multibyte characters to multibyte forms. If
1522 NCHARS is nonzero, set *NCHARS to the number of characters in the
1523 text. It is assured that we can use LEN bytes at STR as a work
1524 area and that is enough. Return the number of bytes of the
1525 resulting text. */
1526
1527int
1528str_as_multibyte (str, len, nbytes, nchars)
1529 unsigned char *str;
1530 int len, nbytes, *nchars;
1531{
1532 unsigned char *p = str, *endp = str + nbytes;
1533 unsigned char *to;
1534 int chars = 0;
1535 int n;
1536
1537 while (p < endp && UNIBYTE_STR_AS_MULTIBYTE_P (p, endp - p, n))
1538 p += n, chars++;
1539 if (nchars)
1540 *nchars = chars;
1541 if (p == endp)
1542 return nbytes;
1543
1544 to = p;
1545 nbytes = endp - p;
1546 endp = str + len;
1547 safe_bcopy (p, endp - nbytes, nbytes);
1548 p = endp - nbytes;
1549 while (p < endp)
1550 {
1551 if (UNIBYTE_STR_AS_MULTIBYTE_P (p, endp - p, n))
ac4137cc 1552 {
2e344af3
KH
1553 while (n--)
1554 *to++ = *p++;
177c0ea7 1555 }
ac4137cc
KH
1556 else
1557 {
2e344af3
KH
1558 *to++ = LEADING_CODE_8_BIT_CONTROL;
1559 *to++ = *p++ + 0x20;
ac4137cc 1560 }
87b089ad
RS
1561 chars++;
1562 }
2e344af3
KH
1563 if (nchars)
1564 *nchars = chars;
1565 return (to - str);
1566}
87b089ad 1567
740f080d
KH
1568/* Parse unibyte string at STR of LEN bytes, and return the number of
1569 bytes it may ocupy when converted to multibyte string by
1570 `str_to_multibyte'. */
1571
1572int
1573parse_str_to_multibyte (str, len)
1574 unsigned char *str;
1575 int len;
1576{
1577 unsigned char *endp = str + len;
1578 int bytes;
1579
1580 for (bytes = 0; str < endp; str++)
1581 bytes += (*str < 0x80 || *str >= 0xA0) ? 1 : 2;
1582 return bytes;
1583}
1584
d8e4f486 1585/* Convert unibyte text at STR of NBYTES bytes to multibyte text
2e344af3
KH
1586 that contains the same single-byte characters. It actually
1587 converts all 8-bit characters to multibyte forms. It is assured
1588 that we can use LEN bytes at STR as a work area and that is
1589 enough. */
1590
1591int
1592str_to_multibyte (str, len, bytes)
1593 unsigned char *str;
1594 int len, bytes;
1595{
1596 unsigned char *p = str, *endp = str + bytes;
1597 unsigned char *to;
2e344af3
KH
1598
1599 while (p < endp && (*p < 0x80 || *p >= 0xA0)) p++;
1600 if (p == endp)
1601 return bytes;
1602 to = p;
1603 bytes = endp - p;
1604 endp = str + len;
1605 safe_bcopy (p, endp - bytes, bytes);
1606 p = endp - bytes;
177c0ea7 1607 while (p < endp)
2e344af3
KH
1608 {
1609 if (*p < 0x80 || *p >= 0xA0)
1610 *to++ = *p++;
1611 else
1612 *to++ = LEADING_CODE_8_BIT_CONTROL, *to++ = *p++ + 0x20;
1613 }
1614 return (to - str);
87b089ad
RS
1615}
1616
2e344af3
KH
1617/* Arrange multibyte text at STR of LEN bytes as a unibyte text. It
1618 actually converts only 8-bit characters in the range 0x80..0x9F to
1619 unibyte forms. */
1620
1621int
1622str_as_unibyte (str, bytes)
1623 unsigned char *str;
1624 int bytes;
1625{
1626 unsigned char *p = str, *endp = str + bytes;
1627 unsigned char *to = str;
1628
1629 while (p < endp && *p != LEADING_CODE_8_BIT_CONTROL) p++;
1630 to = p;
177c0ea7 1631 while (p < endp)
2e344af3
KH
1632 {
1633 if (*p == LEADING_CODE_8_BIT_CONTROL)
1634 *to++ = *(p + 1) - 0x20, p += 2;
1635 else
1636 *to++ = *p++;
1637 }
1638 return (to - str);
1639}
1640
1641\f
4bc26a6c 1642DEFUN ("string", Fstring, Sstring, 0, MANY, 0,
5f5d497e
MB
1643 doc: /* Concatenate all the argument characters and make the result a string.
1644usage: (string &rest CHARACTERS) */)
fdb82f93 1645 (n, args)
53316e55 1646 int n;
4ed46869
KH
1647 Lisp_Object *args;
1648{
4894eddd
KS
1649 int i, bufsize;
1650 unsigned char *buf, *p;
2e344af3 1651 int c;
5729c92f 1652 int multibyte = 0;
4894eddd
KS
1653 Lisp_Object ret;
1654 USE_SAFE_ALLOCA;
1655
1656 bufsize = MAX_MULTIBYTE_LENGTH * n;
1657 SAFE_ALLOCA (buf, unsigned char *, bufsize);
1658 p = buf;
4ed46869
KH
1659
1660 for (i = 0; i < n; i++)
1661 {
b7826503 1662 CHECK_NUMBER (args[i]);
5729c92f
KH
1663 if (!multibyte && !SINGLE_BYTE_CHAR_P (XFASTINT (args[i])))
1664 multibyte = 1;
1665 }
1666
1667 for (i = 0; i < n; i++)
1668 {
4ed46869 1669 c = XINT (args[i]);
5729c92f
KH
1670 if (multibyte)
1671 p += CHAR_STRING (c, p);
1672 else
555e35d2 1673 *p++ = c;
4ed46869
KH
1674 }
1675
4894eddd 1676 ret = make_string_from_bytes (buf, n, p - buf);
233f3db6 1677 SAFE_FREE ();
4894eddd
KS
1678
1679 return ret;
4ed46869
KH
1680}
1681
1682#endif /* emacs */
1683\f
dfcf069d 1684int
4ed46869
KH
1685charset_id_internal (charset_name)
1686 char *charset_name;
1687{
76d7b829 1688 Lisp_Object val;
4ed46869 1689
76d7b829 1690 val= Fget (intern (charset_name), Qcharset);
4ed46869
KH
1691 if (!VECTORP (val))
1692 error ("Charset %s is not defined", charset_name);
1693
1694 return (XINT (XVECTOR (val)->contents[0]));
1695}
1696
1697DEFUN ("setup-special-charsets", Fsetup_special_charsets,
fdb82f93
PJ
1698 Ssetup_special_charsets, 0, 0, 0, doc: /* Internal use only. */)
1699 ()
4ed46869
KH
1700{
1701 charset_latin_iso8859_1 = charset_id_internal ("latin-iso8859-1");
1702 charset_jisx0208_1978 = charset_id_internal ("japanese-jisx0208-1978");
1703 charset_jisx0208 = charset_id_internal ("japanese-jisx0208");
1704 charset_katakana_jisx0201 = charset_id_internal ("katakana-jisx0201");
1705 charset_latin_jisx0201 = charset_id_internal ("latin-jisx0201");
1706 charset_big5_1 = charset_id_internal ("chinese-big5-1");
1707 charset_big5_2 = charset_id_internal ("chinese-big5-2");
c64582be
KH
1708 charset_mule_unicode_0100_24ff
1709 = charset_id_internal ("mule-unicode-0100-24ff");
1710 charset_mule_unicode_2500_33ff
1711 = charset_id_internal ("mule-unicode-2500-33ff");
1712 charset_mule_unicode_e000_ffff
1713 = charset_id_internal ("mule-unicode-e000-ffff");
4ed46869
KH
1714 return Qnil;
1715}
1716
dfcf069d 1717void
4ed46869
KH
1718init_charset_once ()
1719{
1720 int i, j, k;
1721
1722 staticpro (&Vcharset_table);
1723 staticpro (&Vcharset_symbol_table);
8a73a704 1724 staticpro (&Vgeneric_character_list);
4ed46869
KH
1725
1726 /* This has to be done here, before we call Fmake_char_table. */
1727 Qcharset_table = intern ("charset-table");
1728 staticpro (&Qcharset_table);
1729
1730 /* Intern this now in case it isn't already done.
1731 Setting this variable twice is harmless.
1732 But don't staticpro it here--that is done in alloc.c. */
1733 Qchar_table_extra_slots = intern ("char-table-extra-slots");
1734
1735 /* Now we are ready to set up this property, so we can
1736 create the charset table. */
1737 Fput (Qcharset_table, Qchar_table_extra_slots, make_number (0));
1738 Vcharset_table = Fmake_char_table (Qcharset_table, Qnil);
1739
0282eb69
KH
1740 Qunknown = intern ("unknown");
1741 staticpro (&Qunknown);
1742 Vcharset_symbol_table = Fmake_vector (make_number (MAX_CHARSET + 1),
1743 Qunknown);
4ed46869
KH
1744
1745 /* Setup tables. */
1746 for (i = 0; i < 2; i++)
1747 for (j = 0; j < 2; j++)
1748 for (k = 0; k < 128; k++)
1749 iso_charset_table [i][j][k] = -1;
1750
60383934 1751 for (i = 0; i < 256; i++)
2e344af3 1752 bytes_by_char_head[i] = 1;
2e344af3
KH
1753 bytes_by_char_head[LEADING_CODE_PRIVATE_11] = 3;
1754 bytes_by_char_head[LEADING_CODE_PRIVATE_12] = 3;
1755 bytes_by_char_head[LEADING_CODE_PRIVATE_21] = 4;
1756 bytes_by_char_head[LEADING_CODE_PRIVATE_22] = 4;
4ed46869
KH
1757
1758 for (i = 0; i < 128; i++)
2e344af3 1759 width_by_char_head[i] = 1;
4ed46869 1760 for (; i < 256; i++)
2e344af3
KH
1761 width_by_char_head[i] = 4;
1762 width_by_char_head[LEADING_CODE_PRIVATE_11] = 1;
1763 width_by_char_head[LEADING_CODE_PRIVATE_12] = 2;
1764 width_by_char_head[LEADING_CODE_PRIVATE_21] = 1;
1765 width_by_char_head[LEADING_CODE_PRIVATE_22] = 2;
8a73a704
KH
1766
1767 {
76d7b829 1768 Lisp_Object val;
8a73a704 1769
76d7b829 1770 val = Qnil;
8a73a704
KH
1771 for (i = 0x81; i < 0x90; i++)
1772 val = Fcons (make_number ((i - 0x70) << 7), val);
1773 for (; i < 0x9A; i++)
1774 val = Fcons (make_number ((i - 0x8F) << 14), val);
1775 for (i = 0xA0; i < 0xF0; i++)
1776 val = Fcons (make_number ((i - 0x70) << 7), val);
1777 for (; i < 0xFF; i++)
1778 val = Fcons (make_number ((i - 0xE0) << 14), val);
8a73a704
KH
1779 Vgeneric_character_list = Fnreverse (val);
1780 }
bbf12bb3
KH
1781
1782 nonascii_insert_offset = 0;
1783 Vnonascii_translation_table = Qnil;
4ed46869
KH
1784}
1785
1786#ifdef emacs
1787
dfcf069d 1788void
4ed46869
KH
1789syms_of_charset ()
1790{
2e344af3
KH
1791 Qcharset = intern ("charset");
1792 staticpro (&Qcharset);
1793
4ed46869
KH
1794 Qascii = intern ("ascii");
1795 staticpro (&Qascii);
1796
2e344af3
KH
1797 Qeight_bit_control = intern ("eight-bit-control");
1798 staticpro (&Qeight_bit_control);
1799
1800 Qeight_bit_graphic = intern ("eight-bit-graphic");
1801 staticpro (&Qeight_bit_graphic);
4ed46869 1802
2e344af3
KH
1803 /* Define special charsets ascii, eight-bit-control, and
1804 eight-bit-graphic. */
4ed46869
KH
1805 update_charset_table (make_number (CHARSET_ASCII),
1806 make_number (1), make_number (94),
1807 make_number (1),
1808 make_number (0),
1809 make_number ('B'),
1810 make_number (0),
1811 build_string ("ASCII"),
d78bc582 1812 Qnil, /* same as above */
4ed46869
KH
1813 build_string ("ASCII (ISO646 IRV)"));
1814 CHARSET_SYMBOL (CHARSET_ASCII) = Qascii;
1815 Fput (Qascii, Qcharset, CHARSET_TABLE_ENTRY (CHARSET_ASCII));
1816
2e344af3
KH
1817 update_charset_table (make_number (CHARSET_8_BIT_CONTROL),
1818 make_number (1), make_number (96),
797a084a 1819 make_number (4),
2e344af3
KH
1820 make_number (0),
1821 make_number (-1),
1822 make_number (-1),
1823 build_string ("8-bit control code (0x80..0x9F)"),
d78bc582
KH
1824 Qnil, /* same as above */
1825 Qnil); /* same as above */
2e344af3
KH
1826 CHARSET_SYMBOL (CHARSET_8_BIT_CONTROL) = Qeight_bit_control;
1827 Fput (Qeight_bit_control, Qcharset,
1828 CHARSET_TABLE_ENTRY (CHARSET_8_BIT_CONTROL));
1829
1830 update_charset_table (make_number (CHARSET_8_BIT_GRAPHIC),
1831 make_number (1), make_number (96),
797a084a 1832 make_number (4),
2e344af3
KH
1833 make_number (0),
1834 make_number (-1),
1835 make_number (-1),
2e344af3 1836 build_string ("8-bit graphic char (0xA0..0xFF)"),
d78bc582
KH
1837 Qnil, /* same as above */
1838 Qnil); /* same as above */
2e344af3
KH
1839 CHARSET_SYMBOL (CHARSET_8_BIT_GRAPHIC) = Qeight_bit_graphic;
1840 Fput (Qeight_bit_graphic, Qcharset,
1841 CHARSET_TABLE_ENTRY (CHARSET_8_BIT_GRAPHIC));
1842
c1a08b4c
KH
1843 Qauto_fill_chars = intern ("auto-fill-chars");
1844 staticpro (&Qauto_fill_chars);
1845 Fput (Qauto_fill_chars, Qchar_table_extra_slots, make_number (0));
1846
4ed46869 1847 defsubr (&Sdefine_charset);
8a73a704 1848 defsubr (&Sgeneric_character_list);
3fac5a51 1849 defsubr (&Sget_unused_iso_final_char);
4ed46869
KH
1850 defsubr (&Sdeclare_equiv_charset);
1851 defsubr (&Sfind_charset_region);
1852 defsubr (&Sfind_charset_string);
1853 defsubr (&Smake_char_internal);
1854 defsubr (&Ssplit_char);
1855 defsubr (&Schar_charset);
90d7b74e 1856 defsubr (&Scharset_after);
4ed46869 1857 defsubr (&Siso_charset);
9d3d8cba 1858 defsubr (&Schar_valid_p);
d2665018 1859 defsubr (&Sunibyte_char_to_multibyte);
1bcc1567 1860 defsubr (&Smultibyte_char_to_unibyte);
4ed46869
KH
1861 defsubr (&Schar_bytes);
1862 defsubr (&Schar_width);
1863 defsubr (&Sstring_width);
1864 defsubr (&Schar_direction);
87b089ad 1865 defsubr (&Sstring);
4ed46869
KH
1866 defsubr (&Ssetup_special_charsets);
1867
1868 DEFVAR_LISP ("charset-list", &Vcharset_list,
fdb82f93 1869 doc: /* List of charsets ever defined. */);
2e344af3
KH
1870 Vcharset_list = Fcons (Qascii, Fcons (Qeight_bit_control,
1871 Fcons (Qeight_bit_graphic, Qnil)));
4ed46869 1872
537efd8d 1873 DEFVAR_LISP ("translation-table-vector", &Vtranslation_table_vector,
fdb82f93
PJ
1874 doc: /* Vector of cons cell of a symbol and translation table ever defined.
1875An ID of a translation table is an index of this vector. */);
537efd8d 1876 Vtranslation_table_vector = Fmake_vector (make_number (16), Qnil);
b0e3cf2b 1877
4ed46869 1878 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11,
fdb82f93 1879 doc: /* Leading-code of private TYPE9N charset of column-width 1. */);
4ed46869
KH
1880 leading_code_private_11 = LEADING_CODE_PRIVATE_11;
1881
1882 DEFVAR_INT ("leading-code-private-12", &leading_code_private_12,
fdb82f93 1883 doc: /* Leading-code of private TYPE9N charset of column-width 2. */);
4ed46869
KH
1884 leading_code_private_12 = LEADING_CODE_PRIVATE_12;
1885
1886 DEFVAR_INT ("leading-code-private-21", &leading_code_private_21,
fdb82f93 1887 doc: /* Leading-code of private TYPE9Nx9N charset of column-width 1. */);
4ed46869
KH
1888 leading_code_private_21 = LEADING_CODE_PRIVATE_21;
1889
1890 DEFVAR_INT ("leading-code-private-22", &leading_code_private_22,
fdb82f93 1891 doc: /* Leading-code of private TYPE9Nx9N charset of column-width 2. */);
4ed46869 1892 leading_code_private_22 = LEADING_CODE_PRIVATE_22;
35e623fb
RS
1893
1894 DEFVAR_INT ("nonascii-insert-offset", &nonascii_insert_offset,
fdb82f93
PJ
1895 doc: /* Offset for converting non-ASCII unibyte codes 0240...0377 to multibyte.
1896This is used for converting unibyte text to multibyte,
1897and for inserting character codes specified by number.
1898
1899This serves to convert a Latin-1 or similar 8-bit character code
1900to the corresponding Emacs multibyte character code.
1901Typically the value should be (- (make-char CHARSET 0) 128),
1902for your choice of character set.
1903If `nonascii-translation-table' is non-nil, it overrides this variable. */);
35e623fb 1904 nonascii_insert_offset = 0;
b0e3cf2b 1905
b4e9dd77 1906 DEFVAR_LISP ("nonascii-translation-table", &Vnonascii_translation_table,
fdb82f93
PJ
1907 doc: /* Translation table to convert non-ASCII unibyte codes to multibyte.
1908This is used for converting unibyte text to multibyte,
1909and for inserting character codes specified by number.
1910
1911Conversion is performed only when multibyte characters are enabled,
1912and it serves to convert a Latin-1 or similar 8-bit character code
1913to the corresponding Emacs character code.
1914
1915If this is nil, `nonascii-insert-offset' is used instead.
1916See also the docstring of `make-translation-table'. */);
b4e9dd77 1917 Vnonascii_translation_table = Qnil;
4cf9710d 1918
c1a08b4c 1919 DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars,
fdb82f93
PJ
1920 doc: /* A char-table for characters which invoke auto-filling.
1921Such characters have value t in this table. */);
c1a08b4c 1922 Vauto_fill_chars = Fmake_char_table (Qauto_fill_chars, Qnil);
60022cb7
AS
1923 CHAR_TABLE_SET (Vauto_fill_chars, make_number (' '), Qt);
1924 CHAR_TABLE_SET (Vauto_fill_chars, make_number ('\n'), Qt);
4ed46869
KH
1925}
1926
1927#endif /* emacs */
ab5796a9
MB
1928
1929/* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
1930 (do not change this comment) */