Fixed copyright notice.
[bpt/emacs.git] / src / charset.c
CommitLineData
75c8c592 1/* Basic multilingual character support.
aaef169d
TTN
2 Copyright (C) 2001, 2002, 2003, 2004, 2005,
3 2006 Free Software Foundation, Inc.
ce03bf76
KH
4 Copyright (C) 1995, 1997, 1998, 1999, 2000, 2001
5 National Institute of Advanced Industrial Science and Technology (AIST)
6 Registration Number H14PRO021
4ed46869 7
369314dc
KH
8This file is part of GNU Emacs.
9
10GNU Emacs is free software; you can redistribute it and/or modify
11it under the terms of the GNU General Public License as published by
12the Free Software Foundation; either version 2, or (at your option)
13any later version.
4ed46869 14
369314dc
KH
15GNU Emacs is distributed in the hope that it will be useful,
16but WITHOUT ANY WARRANTY; without even the implied warranty of
17MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18GNU General Public License for more details.
4ed46869 19
369314dc
KH
20You should have received a copy of the GNU General Public License
21along with GNU Emacs; see the file COPYING. If not, write to
4fc5845f
LK
22the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
23Boston, MA 02110-1301, USA. */
4ed46869
KH
24
25/* At first, see the document in `charset.h' to understand the code in
26 this file. */
27
68c45bf0
PE
28#ifdef emacs
29#include <config.h>
30#endif
31
4ed46869
KH
32#include <stdio.h>
33
34#ifdef emacs
35
36#include <sys/types.h>
4ed46869
KH
37#include "lisp.h"
38#include "buffer.h"
39#include "charset.h"
3f62427c 40#include "composite.h"
4ed46869 41#include "coding.h"
fc6b09bf 42#include "disptab.h"
4ed46869
KH
43
44#else /* not emacs */
45
46#include "mulelib.h"
47
48#endif /* emacs */
49
2e344af3 50Lisp_Object Qcharset, Qascii, Qeight_bit_control, Qeight_bit_graphic;
0282eb69 51Lisp_Object Qunknown;
4ed46869
KH
52
53/* Declaration of special leading-codes. */
4bc26a6c
SM
54EMACS_INT leading_code_private_11; /* for private DIMENSION1 of 1-column */
55EMACS_INT leading_code_private_12; /* for private DIMENSION1 of 2-column */
56EMACS_INT leading_code_private_21; /* for private DIMENSION2 of 1-column */
57EMACS_INT leading_code_private_22; /* for private DIMENSION2 of 2-column */
4ed46869 58
2e344af3
KH
59/* Declaration of special charsets. The values are set by
60 Fsetup_special_charsets. */
4ed46869
KH
61int charset_latin_iso8859_1; /* ISO8859-1 (Latin-1) */
62int charset_jisx0208_1978; /* JISX0208.1978 (Japanese Kanji old set) */
63int charset_jisx0208; /* JISX0208.1983 (Japanese Kanji) */
64int charset_katakana_jisx0201; /* JISX0201.Kana (Japanese Katakana) */
65int charset_latin_jisx0201; /* JISX0201.Roman (Japanese Roman) */
66int charset_big5_1; /* Big5 Level 1 (Chinese Traditional) */
67int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */
c64582be
KH
68int charset_mule_unicode_0100_24ff;
69int charset_mule_unicode_2500_33ff;
70int charset_mule_unicode_e000_ffff;
4ed46869
KH
71
72Lisp_Object Qcharset_table;
73
74/* A char-table containing information of each character set. */
75Lisp_Object Vcharset_table;
76
77/* A vector of charset symbol indexed by charset-id. This is used
78 only for returning charset symbol from C functions. */
79Lisp_Object Vcharset_symbol_table;
80
81/* A list of charset symbols ever defined. */
82Lisp_Object Vcharset_list;
83
537efd8d
KH
84/* Vector of translation table ever defined.
85 ID of a translation table is used to index this vector. */
86Lisp_Object Vtranslation_table_vector;
b0e3cf2b 87
c1a08b4c
KH
88/* A char-table for characters which may invoke auto-filling. */
89Lisp_Object Vauto_fill_chars;
90
91Lisp_Object Qauto_fill_chars;
92
4ed46869
KH
93/* Tables used by macros BYTES_BY_CHAR_HEAD and WIDTH_BY_CHAR_HEAD. */
94int bytes_by_char_head[256];
95int width_by_char_head[256];
96
97/* Mapping table from ISO2022's charset (specified by DIMENSION,
98 CHARS, and FINAL-CHAR) to Emacs' charset. */
99int iso_charset_table[2][2][128];
100
101/* Variables used locally in the macro FETCH_MULTIBYTE_CHAR. */
102unsigned char *_fetch_multibyte_char_p;
103int _fetch_multibyte_char_len;
104
35e623fb 105/* Offset to add to a non-ASCII value when inserting it. */
31ade731 106EMACS_INT nonascii_insert_offset;
35e623fb 107
4cf9710d
RS
108/* Translation table for converting non-ASCII unibyte characters
109 to multibyte codes, or nil. */
b4e9dd77 110Lisp_Object Vnonascii_translation_table;
4cf9710d 111
8a73a704
KH
112/* List of all possible generic characters. */
113Lisp_Object Vgeneric_character_list;
114
046b1f03 115\f
93bcb785
KH
116void
117invalid_character (c)
118 int c;
119{
85416bda 120 error ("Invalid character: %d, #o%o, #x%x", c, c, c);
93bcb785
KH
121}
122
2e344af3
KH
123/* Parse string STR of length LENGTH and fetch information of a
124 character at STR. Set BYTES to the byte length the character
125 occupies, CHARSET, C1, C2 to proper values of the character. */
126
127#define SPLIT_MULTIBYTE_SEQ(str, length, bytes, charset, c1, c2) \
128 do { \
129 (c1) = *(str); \
130 (bytes) = BYTES_BY_CHAR_HEAD (c1); \
131 if ((bytes) == 1) \
132 (charset) = ASCII_BYTE_P (c1) ? CHARSET_ASCII : CHARSET_8_BIT_GRAPHIC; \
133 else if ((bytes) == 2) \
134 { \
135 if ((c1) == LEADING_CODE_8_BIT_CONTROL) \
136 (charset) = CHARSET_8_BIT_CONTROL, (c1) = (str)[1] - 0x20; \
137 else \
138 (charset) = (c1), (c1) = (str)[1] & 0x7F; \
139 } \
140 else if ((bytes) == 3) \
141 { \
142 if ((c1) < LEADING_CODE_PRIVATE_11) \
143 (charset) = (c1), (c1) = (str)[1] & 0x7F, (c2) = (str)[2] & 0x7F; \
144 else \
145 (charset) = (str)[1], (c1) = (str)[2] & 0x7F; \
146 } \
147 else \
148 (charset) = (str)[1], (c1) = (str)[2] & 0x7F, (c2) = (str)[3] & 0x7F; \
ac4137cc
KH
149 } while (0)
150
d37478d0
EZ
151/* 1 if CHARSET, C1, and C2 compose a valid character, else 0.
152 Note that this intentionally allows invalid components, such
153 as 0xA0 0xA0, because there exist many files that contain
154 such invalid byte sequences, especially in EUC-GB. */
44c6492d 155#define CHAR_COMPONENTS_VALID_P(charset, c1, c2) \
2e344af3 156 ((charset) == CHARSET_ASCII \
63f4d579 157 ? ((c1) >= 0 && (c1) <= 0x7F) \
2e344af3
KH
158 : ((charset) == CHARSET_8_BIT_CONTROL \
159 ? ((c1) >= 0x80 && (c1) <= 0x9F) \
160 : ((charset) == CHARSET_8_BIT_GRAPHIC \
161 ? ((c1) >= 0x80 && (c1) <= 0xFF) \
162 : (CHARSET_DIMENSION (charset) == 1 \
163 ? ((c1) >= 0x20 && (c1) <= 0x7F) \
164 : ((c1) >= 0x20 && (c1) <= 0x7F \
165 && (c2) >= 0x20 && (c2) <= 0x7F)))))
93bcb785 166
99529c2c
KH
167/* Store multi-byte form of the character C in STR. The caller should
168 allocate at least 4-byte area at STR in advance. Returns the
169 length of the multi-byte form. If C is an invalid character code,
12bcae05 170 return -1. */
4ed46869
KH
171
172int
12bcae05 173char_to_string_1 (c, str)
4ed46869 174 int c;
99529c2c 175 unsigned char *str;
4ed46869 176{
99529c2c
KH
177 unsigned char *p = str;
178
6662e69b 179 if (c & CHAR_MODIFIER_MASK) /* This includes the case C is negative. */
8ac5a9cc 180 {
6662e69b
KH
181 /* Multibyte character can't have a modifier bit. */
182 if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_MODIFIER_MASK)))
12bcae05 183 return -1;
6662e69b
KH
184
185 /* For Meta, Shift, and Control modifiers, we need special care. */
8ac5a9cc 186 if (c & CHAR_META)
6662e69b
KH
187 {
188 /* Move the meta bit to the right place for a string. */
189 c = (c & ~CHAR_META) | 0x80;
190 }
191 if (c & CHAR_SHIFT)
192 {
193 /* Shift modifier is valid only with [A-Za-z]. */
194 if ((c & 0377) >= 'A' && (c & 0377) <= 'Z')
195 c &= ~CHAR_SHIFT;
196 else if ((c & 0377) >= 'a' && (c & 0377) <= 'z')
197 c = (c & ~CHAR_SHIFT) - ('a' - 'A');
198 }
8ac5a9cc 199 if (c & CHAR_CTL)
6662e69b
KH
200 {
201 /* Simulate the code in lread.c. */
202 /* Allow `\C- ' and `\C-?'. */
203 if (c == (CHAR_CTL | ' '))
204 c = 0;
205 else if (c == (CHAR_CTL | '?'))
206 c = 127;
207 /* ASCII control chars are made from letters (both cases),
208 as well as the non-letters within 0100...0137. */
209 else if ((c & 0137) >= 0101 && (c & 0137) <= 0132)
210 c &= (037 | (~0177 & ~CHAR_CTL));
211 else if ((c & 0177) >= 0100 && (c & 0177) <= 0137)
212 c &= (037 | (~0177 & ~CHAR_CTL));
213 }
214
5ef834ea
KH
215 /* If C still has any modifier bits, just ignore it. */
216 c &= ~CHAR_MODIFIER_MASK;
2e344af3 217 }
177c0ea7 218
2e344af3
KH
219 if (SINGLE_BYTE_CHAR_P (c))
220 {
221 if (ASCII_BYTE_P (c) || c >= 0xA0)
222 *p++ = c;
223 else
224 {
225 *p++ = LEADING_CODE_8_BIT_CONTROL;
226 *p++ = c + 0x20;
227 }
8ac5a9cc 228 }
6ce974d4 229 else if (CHAR_VALID_P (c, 0))
4ed46869 230 {
ac4137cc 231 int charset, c1, c2;
4ed46869 232
2e344af3 233 SPLIT_CHAR (c, charset, c1, c2);
99529c2c
KH
234
235 if (charset >= LEADING_CODE_EXT_11)
236 *p++ = (charset < LEADING_CODE_EXT_12
237 ? LEADING_CODE_PRIVATE_11
238 : (charset < LEADING_CODE_EXT_21
239 ? LEADING_CODE_PRIVATE_12
240 : (charset < LEADING_CODE_EXT_22
241 ? LEADING_CODE_PRIVATE_21
242 : LEADING_CODE_PRIVATE_22)));
243 *p++ = charset;
05ffc44b 244 if ((c1 > 0 && c1 < 32) || (c2 > 0 && c2 < 32))
12bcae05 245 return -1;
99529c2c 246 if (c1)
4ed46869 247 {
99529c2c
KH
248 *p++ = c1 | 0x80;
249 if (c2 > 0)
250 *p++ = c2 | 0x80;
4ed46869
KH
251 }
252 }
2e344af3 253 else
12bcae05 254 return -1;
4ed46869 255
2e344af3 256 return (p - str);
4ed46869
KH
257}
258
12bcae05
GM
259
260/* Store multi-byte form of the character C in STR. The caller should
261 allocate at least 4-byte area at STR in advance. Returns the
262 length of the multi-byte form. If C is an invalid character code,
263 signal an error.
264
265 Use macro `CHAR_STRING (C, STR)' instead of calling this function
266 directly if C can be an ASCII character. */
267
268int
269char_to_string (c, str)
270 int c;
271 unsigned char *str;
272{
273 int len;
274 len = char_to_string_1 (c, str);
275 if (len == -1)
276 invalid_character (c);
277 return len;
278}
279
280
44c6492d
KH
281/* Return the non-ASCII character corresponding to multi-byte form at
282 STR of length LEN. If ACTUAL_LEN is not NULL, store the byte
283 length of the multibyte form in *ACTUAL_LEN.
537efd8d 284
99529c2c
KH
285 Use macros STRING_CHAR or STRING_CHAR_AND_LENGTH instead of calling
286 this function directly if you want ot handle ASCII characters as
287 well. */
4ed46869 288
dfcf069d 289int
99529c2c 290string_to_char (str, len, actual_len)
8867de67 291 const unsigned char *str;
ac4137cc 292 int len, *actual_len;
4ed46869 293{
ac4137cc 294 int c, bytes, charset, c1, c2;
4ed46869 295
ac4137cc
KH
296 SPLIT_MULTIBYTE_SEQ (str, len, bytes, charset, c1, c2);
297 c = MAKE_CHAR (charset, c1, c2);
4ed46869 298 if (actual_len)
ac4137cc 299 *actual_len = bytes;
4ed46869
KH
300 return c;
301}
302
44c6492d
KH
303/* Return the length of the multi-byte form at string STR of length LEN.
304 Use the macro MULTIBYTE_FORM_LENGTH instead. */
4ed46869
KH
305int
306multibyte_form_length (str, len)
8867de67 307 const unsigned char *str;
4ed46869
KH
308 int len;
309{
ac4137cc 310 int bytes;
4ed46869 311
ac4137cc 312 PARSE_MULTIBYTE_SEQ (str, len, bytes);
90d7b74e 313 return bytes;
4ed46869
KH
314}
315
ac4137cc
KH
316/* Check multibyte form at string STR of length LEN and set variables
317 pointed by CHARSET, C1, and C2 to charset and position codes of the
318 character at STR, and return 0. If there's no multibyte character,
4ed46869
KH
319 return -1. This should be used only in the macro SPLIT_STRING
320 which checks range of STR in advance. */
321
dfcf069d 322int
99529c2c 323split_string (str, len, charset, c1, c2)
ac4137cc
KH
324 const unsigned char *str;
325 unsigned char *c1, *c2;
326 int len, *charset;
4ed46869 327{
ac4137cc 328 register int bytes, cs, code1, code2 = -1;
4ed46869 329
ac4137cc
KH
330 SPLIT_MULTIBYTE_SEQ (str, len, bytes, cs, code1, code2);
331 if (cs == CHARSET_ASCII)
4ed46869 332 return -1;
ac4137cc
KH
333 *charset = cs;
334 *c1 = code1;
335 *c2 = code2;
5865af0d 336 return 0;
ac4137cc
KH
337}
338
44c6492d
KH
339/* Return 1 iff character C has valid printable glyph.
340 Use the macro CHAR_PRINTABLE_P instead. */
ac4137cc
KH
341int
342char_printable_p (c)
343 int c;
344{
8ebae00c 345 int charset, c1, c2;
ac4137cc 346
2e344af3 347 if (ASCII_BYTE_P (c))
ac4137cc 348 return 1;
2e344af3
KH
349 else if (SINGLE_BYTE_CHAR_P (c))
350 return 0;
351 else if (c >= MAX_CHAR)
99529c2c 352 return 0;
177c0ea7 353
2e344af3 354 SPLIT_CHAR (c, charset, c1, c2);
ac4137cc
KH
355 if (! CHARSET_DEFINED_P (charset))
356 return 0;
357 if (CHARSET_CHARS (charset) == 94
358 ? c1 <= 32 || c1 >= 127
359 : c1 < 32)
360 return 0;
361 if (CHARSET_DIMENSION (charset) == 2
362 && (CHARSET_CHARS (charset) == 94
363 ? c2 <= 32 || c2 >= 127
364 : c2 < 32))
365 return 0;
366 return 1;
4ed46869
KH
367}
368
537efd8d 369/* Translate character C by translation table TABLE. If C
b4e9dd77
KH
370 is negative, translate a character specified by CHARSET, C1, and C2
371 (C1 and C2 are code points of the character). If no translation is
372 found in TABLE, return C. */
dfcf069d 373int
b4e9dd77 374translate_char (table, c, charset, c1, c2)
23d2a7f1
KH
375 Lisp_Object table;
376 int c, charset, c1, c2;
377{
378 Lisp_Object ch;
379 int alt_charset, alt_c1, alt_c2, dimension;
380
0ad3f83d 381 if (c < 0) c = MAKE_CHAR (charset, (c1 & 0x7F) , (c2 & 0x7F));
23d2a7f1 382 if (!CHAR_TABLE_P (table)
ac4137cc 383 || (ch = Faref (table, make_number (c)), !NATNUMP (ch)))
23d2a7f1
KH
384 return c;
385
386 SPLIT_CHAR (XFASTINT (ch), alt_charset, alt_c1, alt_c2);
387 dimension = CHARSET_DIMENSION (alt_charset);
05ffc44b 388 if ((dimension == 1 && alt_c1 > 0) || (dimension == 2 && alt_c2 > 0))
23d2a7f1
KH
389 /* CH is not a generic character, just return it. */
390 return XFASTINT (ch);
391
392 /* Since CH is a generic character, we must return a specific
393 charater which has the same position codes as C from CH. */
394 if (charset < 0)
395 SPLIT_CHAR (c, charset, c1, c2);
396 if (dimension != CHARSET_DIMENSION (charset))
397 /* We can't make such a character because of dimension mismatch. */
398 return c;
23d2a7f1
KH
399 return MAKE_CHAR (alt_charset, c1, c2);
400}
401
d2665018 402/* Convert the unibyte character C to multibyte based on
b4e9dd77 403 Vnonascii_translation_table or nonascii_insert_offset. If they can't
d2665018
KH
404 convert C to a valid multibyte character, convert it based on
405 DEFAULT_NONASCII_INSERT_OFFSET which makes C a Latin-1 character. */
35e623fb 406
dfcf069d 407int
35e623fb
RS
408unibyte_char_to_multibyte (c)
409 int c;
410{
543b4f61 411 if (c < 0400 && c >= 0200)
35e623fb 412 {
d2665018
KH
413 int c_save = c;
414
b4e9dd77 415 if (! NILP (Vnonascii_translation_table))
bbf12bb3
KH
416 {
417 c = XINT (Faref (Vnonascii_translation_table, make_number (c)));
ac4137cc 418 if (c >= 0400 && ! char_valid_p (c, 0))
bbf12bb3
KH
419 c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
420 }
421 else if (c >= 0240 && nonascii_insert_offset > 0)
422 {
423 c += nonascii_insert_offset;
ac4137cc 424 if (c < 0400 || ! char_valid_p (c, 0))
bbf12bb3
KH
425 c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
426 }
427 else if (c >= 0240)
d2665018 428 c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
35e623fb
RS
429 }
430 return c;
431}
76d7b829
KH
432
433
434/* Convert the multibyte character C to unibyte 8-bit character based
435 on Vnonascii_translation_table or nonascii_insert_offset. If
436 REV_TBL is non-nil, it should be a reverse table of
437 Vnonascii_translation_table, i.e. what given by:
438 Fchar_table_extra_slot (Vnonascii_translation_table, make_number (0)) */
439
440int
441multibyte_char_to_unibyte (c, rev_tbl)
442 int c;
443 Lisp_Object rev_tbl;
444{
445 if (!SINGLE_BYTE_CHAR_P (c))
446 {
447 int c_save = c;
448
449 if (! CHAR_TABLE_P (rev_tbl)
450 && CHAR_TABLE_P (Vnonascii_translation_table))
451 rev_tbl = Fchar_table_extra_slot (Vnonascii_translation_table,
452 make_number (0));
453 if (CHAR_TABLE_P (rev_tbl))
454 {
455 Lisp_Object temp;
456 temp = Faref (rev_tbl, make_number (c));
457 if (INTEGERP (temp))
458 c = XINT (temp);
bbf12bb3
KH
459 if (c >= 256)
460 c = (c_save & 0177) + 0200;
461 }
462 else
463 {
464 if (nonascii_insert_offset > 0)
465 c -= nonascii_insert_offset;
466 if (c < 128 || c >= 256)
467 c = (c_save & 0177) + 0200;
76d7b829 468 }
76d7b829
KH
469 }
470
471 return c;
472}
473
35e623fb 474\f
4ed46869
KH
475/* Update the table Vcharset_table with the given arguments (see the
476 document of `define-charset' for the meaning of each argument).
477 Several other table contents are also updated. The caller should
478 check the validity of CHARSET-ID and the remaining arguments in
479 advance. */
480
481void
482update_charset_table (charset_id, dimension, chars, width, direction,
483 iso_final_char, iso_graphic_plane,
484 short_name, long_name, description)
485 Lisp_Object charset_id, dimension, chars, width, direction;
486 Lisp_Object iso_final_char, iso_graphic_plane;
487 Lisp_Object short_name, long_name, description;
488{
489 int charset = XINT (charset_id);
490 int bytes;
491 unsigned char leading_code_base, leading_code_ext;
492
6dc0722d
KH
493 if (NILP (CHARSET_TABLE_ENTRY (charset)))
494 CHARSET_TABLE_ENTRY (charset)
495 = Fmake_vector (make_number (CHARSET_MAX_IDX), Qnil);
4ed46869 496
d78bc582
KH
497 if (NILP (long_name))
498 long_name = short_name;
499 if (NILP (description))
500 description = long_name;
501
4ed46869
KH
502 /* Get byte length of multibyte form, base leading-code, and
503 extended leading-code of the charset. See the comment under the
504 title "GENERAL NOTE on CHARACTER SET (CHARSET)" in charset.h. */
505 bytes = XINT (dimension);
506 if (charset < MIN_CHARSET_PRIVATE_DIMENSION1)
507 {
508 /* Official charset, it doesn't have an extended leading-code. */
2e344af3 509 if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC)
4ed46869
KH
510 bytes += 1; /* For a base leading-code. */
511 leading_code_base = charset;
512 leading_code_ext = 0;
513 }
514 else
515 {
516 /* Private charset. */
517 bytes += 2; /* For base and extended leading-codes. */
518 leading_code_base
519 = (charset < LEADING_CODE_EXT_12
520 ? LEADING_CODE_PRIVATE_11
521 : (charset < LEADING_CODE_EXT_21
522 ? LEADING_CODE_PRIVATE_12
523 : (charset < LEADING_CODE_EXT_22
524 ? LEADING_CODE_PRIVATE_21
525 : LEADING_CODE_PRIVATE_22)));
526 leading_code_ext = charset;
c83ef371
KH
527 if (BYTES_BY_CHAR_HEAD (leading_code_base) != bytes)
528 error ("Invalid dimension for the charset-ID %d", charset);
529 }
6ef23ebb 530
4ed46869
KH
531 CHARSET_TABLE_INFO (charset, CHARSET_ID_IDX) = charset_id;
532 CHARSET_TABLE_INFO (charset, CHARSET_BYTES_IDX) = make_number (bytes);
533 CHARSET_TABLE_INFO (charset, CHARSET_DIMENSION_IDX) = dimension;
534 CHARSET_TABLE_INFO (charset, CHARSET_CHARS_IDX) = chars;
535 CHARSET_TABLE_INFO (charset, CHARSET_WIDTH_IDX) = width;
536 CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX) = direction;
537 CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_BASE_IDX)
538 = make_number (leading_code_base);
539 CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX)
540 = make_number (leading_code_ext);
541 CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX) = iso_final_char;
542 CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX)
543 = iso_graphic_plane;
544 CHARSET_TABLE_INFO (charset, CHARSET_SHORT_NAME_IDX) = short_name;
545 CHARSET_TABLE_INFO (charset, CHARSET_LONG_NAME_IDX) = long_name;
546 CHARSET_TABLE_INFO (charset, CHARSET_DESCRIPTION_IDX) = description;
547 CHARSET_TABLE_INFO (charset, CHARSET_PLIST_IDX) = Qnil;
548
549 {
550 /* If we have already defined a charset which has the same
551 DIMENSION, CHARS and ISO-FINAL-CHAR but the different
552 DIRECTION, we must update the entry REVERSE-CHARSET of both
553 charsets. If there's no such charset, the value of the entry
554 is set to nil. */
555 int i;
556
513ee442 557 for (i = 0; i <= MAX_CHARSET; i++)
4ed46869
KH
558 if (!NILP (CHARSET_TABLE_ENTRY (i)))
559 {
560 if (CHARSET_DIMENSION (i) == XINT (dimension)
561 && CHARSET_CHARS (i) == XINT (chars)
562 && CHARSET_ISO_FINAL_CHAR (i) == XINT (iso_final_char)
563 && CHARSET_DIRECTION (i) != XINT (direction))
564 {
565 CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
566 = make_number (i);
567 CHARSET_TABLE_INFO (i, CHARSET_REVERSE_CHARSET_IDX) = charset_id;
568 break;
569 }
570 }
513ee442 571 if (i > MAX_CHARSET)
4ed46869
KH
572 /* No such a charset. */
573 CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
574 = make_number (-1);
575 }
576
c83ef371 577 if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC
4ed46869
KH
578 && charset < MIN_CHARSET_PRIVATE_DIMENSION1)
579 {
c83ef371 580 bytes_by_char_head[leading_code_base] = bytes;
4ed46869
KH
581 width_by_char_head[leading_code_base] = XINT (width);
582
583 /* Update table emacs_code_class. */
584 emacs_code_class[charset] = (bytes == 2
585 ? EMACS_leading_code_2
586 : (bytes == 3
587 ? EMACS_leading_code_3
588 : EMACS_leading_code_4));
589 }
590
591 /* Update table iso_charset_table. */
52e386c2 592 if (XINT (iso_final_char) >= 0
2e344af3 593 && ISO_CHARSET_TABLE (dimension, chars, iso_final_char) < 0)
4ed46869
KH
594 ISO_CHARSET_TABLE (dimension, chars, iso_final_char) = charset;
595}
596
597#ifdef emacs
598
599/* Return charset id of CHARSET_SYMBOL, or return -1 if CHARSET_SYMBOL
600 is invalid. */
601int
602get_charset_id (charset_symbol)
603 Lisp_Object charset_symbol;
604{
605 Lisp_Object val;
606 int charset;
607
1a45ff10
EZ
608 /* This originally used a ?: operator, but reportedly the HP-UX
609 compiler version HP92453-01 A.10.32.22 miscompiles that. */
610 if (SYMBOLP (charset_symbol)
611 && VECTORP (val = Fget (charset_symbol, Qcharset))
612 && CHARSET_VALID_P (charset =
613 XINT (XVECTOR (val)->contents[CHARSET_ID_IDX])))
614 return charset;
615 else
616 return -1;
4ed46869
KH
617}
618
619/* Return an identification number for a new private charset of
620 DIMENSION and WIDTH. If there's no more room for the new charset,
621 return 0. */
622Lisp_Object
623get_new_private_charset_id (dimension, width)
624 int dimension, width;
625{
626 int charset, from, to;
627
628 if (dimension == 1)
629 {
3bb7b08b
KH
630 from = LEADING_CODE_EXT_11;
631 to = LEADING_CODE_EXT_21;
4ed46869
KH
632 }
633 else
634 {
3bb7b08b
KH
635 from = LEADING_CODE_EXT_21;
636 to = LEADING_CODE_EXT_MAX + 1;
4ed46869
KH
637 }
638
639 for (charset = from; charset < to; charset++)
640 if (!CHARSET_DEFINED_P (charset)) break;
641
642 return make_number (charset < to ? charset : 0);
643}
644
645DEFUN ("define-charset", Fdefine_charset, Sdefine_charset, 3, 3, 0,
fdb82f93
PJ
646 doc: /* Define CHARSET-ID as the identification number of CHARSET with INFO-VECTOR.
647If CHARSET-ID is nil, it is decided automatically, which means CHARSET is
648 treated as a private charset.
649INFO-VECTOR is a vector of the format:
650 [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE
651 SHORT-NAME LONG-NAME DESCRIPTION]
652The meanings of each elements is as follows:
653DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.
654CHARS (integer) is the number of characters in a dimension: 94 or 96.
655WIDTH (integer) is the number of columns a character in the charset
656occupies on the screen: one of 0, 1, and 2.
657
658DIRECTION (integer) is the rendering direction of characters in the
659charset when rendering. If 0, render from left to right, else
660render from right to left.
661
662ISO-FINAL-CHAR (character) is the final character of the
663corresponding ISO 2022 charset.
664It may be -1 if the charset is internal use only.
665
666ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked
667while encoding to variants of ISO 2022 coding system, one of the
668following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).
669It may be -1 if the charset is internal use only.
670
671SHORT-NAME (string) is the short name to refer to the charset.
672
673LONG-NAME (string) is the long name to refer to the charset.
674
675DESCRIPTION (string) is the description string of the charset. */)
676 (charset_id, charset_symbol, info_vector)
4ed46869
KH
677 Lisp_Object charset_id, charset_symbol, info_vector;
678{
679 Lisp_Object *vec;
680
681 if (!NILP (charset_id))
b7826503
PJ
682 CHECK_NUMBER (charset_id);
683 CHECK_SYMBOL (charset_symbol);
684 CHECK_VECTOR (info_vector);
4ed46869
KH
685
686 if (! NILP (charset_id))
687 {
688 if (! CHARSET_VALID_P (XINT (charset_id)))
689 error ("Invalid CHARSET: %d", XINT (charset_id));
690 else if (CHARSET_DEFINED_P (XINT (charset_id)))
691 error ("Already defined charset: %d", XINT (charset_id));
692 }
693
694 vec = XVECTOR (info_vector)->contents;
695 if (XVECTOR (info_vector)->size != 9
696 || !INTEGERP (vec[0]) || !(XINT (vec[0]) == 1 || XINT (vec[0]) == 2)
697 || !INTEGERP (vec[1]) || !(XINT (vec[1]) == 94 || XINT (vec[1]) == 96)
698 || !INTEGERP (vec[2]) || !(XINT (vec[2]) == 1 || XINT (vec[2]) == 2)
699 || !INTEGERP (vec[3]) || !(XINT (vec[3]) == 0 || XINT (vec[3]) == 1)
2e344af3 700 || !INTEGERP (vec[4])
05ffc44b 701 || !(XINT (vec[4]) == -1 || (XINT (vec[4]) >= '0' && XINT (vec[4]) <= '~'))
2e344af3
KH
702 || !INTEGERP (vec[5])
703 || !(XINT (vec[5]) == -1 || XINT (vec[5]) == 0 || XINT (vec[5]) == 1)
4ed46869
KH
704 || !STRINGP (vec[6])
705 || !STRINGP (vec[7])
706 || !STRINGP (vec[8]))
707 error ("Invalid info-vector argument for defining charset %s",
d5db4077 708 SDATA (SYMBOL_NAME (charset_symbol)));
4ed46869
KH
709
710 if (NILP (charset_id))
711 {
712 charset_id = get_new_private_charset_id (XINT (vec[0]), XINT (vec[2]));
713 if (XINT (charset_id) == 0)
714 error ("There's no room for a new private charset %s",
d5db4077 715 SDATA (SYMBOL_NAME (charset_symbol)));
4ed46869
KH
716 }
717
718 update_charset_table (charset_id, vec[0], vec[1], vec[2], vec[3],
719 vec[4], vec[5], vec[6], vec[7], vec[8]);
6dc0722d 720 Fput (charset_symbol, Qcharset, CHARSET_TABLE_ENTRY (XINT (charset_id)));
4ed46869
KH
721 CHARSET_SYMBOL (XINT (charset_id)) = charset_symbol;
722 Vcharset_list = Fcons (charset_symbol, Vcharset_list);
538482ab 723 Fupdate_coding_systems_internal ();
4ed46869
KH
724 return Qnil;
725}
726
8a73a704
KH
727DEFUN ("generic-character-list", Fgeneric_character_list,
728 Sgeneric_character_list, 0, 0, 0,
fdb82f93
PJ
729 doc: /* Return a list of all possible generic characters.
730It includes a generic character for a charset not yet defined. */)
731 ()
8a73a704
KH
732{
733 return Vgeneric_character_list;
734}
735
3fac5a51
KH
736DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
737 Sget_unused_iso_final_char, 2, 2, 0,
bc001814 738 doc: /* Return an unused ISO's final char for a charset of DIMENSION and CHARS.
fdb82f93
PJ
739DIMENSION is the number of bytes to represent a character: 1 or 2.
740CHARS is the number of characters in a dimension: 94 or 96.
741
742This final char is for private use, thus the range is `0' (48) .. `?' (63).
743If there's no unused final char for the specified kind of charset,
744return nil. */)
745 (dimension, chars)
3fac5a51
KH
746 Lisp_Object dimension, chars;
747{
748 int final_char;
749
b7826503
PJ
750 CHECK_NUMBER (dimension);
751 CHECK_NUMBER (chars);
3fac5a51
KH
752 if (XINT (dimension) != 1 && XINT (dimension) != 2)
753 error ("Invalid charset dimension %d, it should be 1 or 2",
754 XINT (dimension));
755 if (XINT (chars) != 94 && XINT (chars) != 96)
756 error ("Invalid charset chars %d, it should be 94 or 96",
757 XINT (chars));
758 for (final_char = '0'; final_char <= '?'; final_char++)
759 {
760 if (ISO_CHARSET_TABLE (dimension, chars, make_number (final_char)) < 0)
761 break;
762 }
763 return (final_char <= '?' ? make_number (final_char) : Qnil);
764}
765
4ed46869
KH
766DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
767 4, 4, 0,
74ac5074
KH
768 doc: /* Declare an equivalent charset for ISO-2022 decoding.
769
770On decoding by an ISO-2022 base coding system, when a charset
771specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
772if CHARSET is designated instead. */)
773 (dimension, chars, final_char, charset)
774 Lisp_Object dimension, chars, final_char, charset;
4ed46869 775{
74ac5074 776 int charset_id;
4ed46869 777
b7826503
PJ
778 CHECK_NUMBER (dimension);
779 CHECK_NUMBER (chars);
780 CHECK_NUMBER (final_char);
74ac5074 781 CHECK_SYMBOL (charset);
4ed46869
KH
782
783 if (XINT (dimension) != 1 && XINT (dimension) != 2)
784 error ("Invalid DIMENSION %d, it should be 1 or 2", XINT (dimension));
785 if (XINT (chars) != 94 && XINT (chars) != 96)
786 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
787 if (XINT (final_char) < '0' || XFASTINT (final_char) > '~')
788 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
74ac5074
KH
789 if ((charset_id = get_charset_id (charset)) < 0)
790 error ("Invalid charset %s", SDATA (SYMBOL_NAME (charset)));
4ed46869 791
74ac5074 792 ISO_CHARSET_TABLE (dimension, chars, final_char) = charset_id;
4ed46869
KH
793 return Qnil;
794}
795
2e344af3
KH
796/* Return information about charsets in the text at PTR of NBYTES
797 bytes, which are NCHARS characters. The value is:
f6302ac9 798
cfe34140 799 0: Each character is represented by one byte. This is always
f6302ac9 800 true for unibyte text.
2e344af3
KH
801 1: No charsets other than ascii eight-bit-control,
802 eight-bit-graphic, and latin-1 are found.
803 2: Otherwise.
1d67c29b 804
2e344af3
KH
805 In addition, if CHARSETS is nonzero, for each found charset N, set
806 CHARSETS[N] to 1. For that, callers should allocate CHARSETS
807 (MAX_CHARSET + 1 elements) in advance. It may lookup a translation
808 table TABLE if supplied. For invalid charsets, set CHARSETS[1] to
809 1 (note that there's no charset whose ID is 1). */
4ed46869
KH
810
811int
2e344af3 812find_charset_in_text (ptr, nchars, nbytes, charsets, table)
fb4f7f50 813 const unsigned char *ptr;
2e344af3 814 int nchars, nbytes, *charsets;
23d2a7f1 815 Lisp_Object table;
4ed46869 816{
2e344af3 817 if (nchars == nbytes)
0282eb69 818 {
2e344af3 819 if (charsets && nbytes > 0)
0282eb69 820 {
fb4f7f50 821 const unsigned char *endp = ptr + nbytes;
2e344af3
KH
822 int maskbits = 0;
823
824 while (ptr < endp && maskbits != 7)
825 {
826 maskbits |= (*ptr < 0x80 ? 1 : *ptr < 0xA0 ? 2 : 4);
827 ptr++;
177c0ea7 828 }
2e344af3
KH
829
830 if (maskbits & 1)
831 charsets[CHARSET_ASCII] = 1;
832 if (maskbits & 2)
833 charsets[CHARSET_8_BIT_CONTROL] = 1;
834 if (maskbits & 4)
835 charsets[CHARSET_8_BIT_GRAPHIC] = 1;
0282eb69 836 }
2e344af3 837 return 0;
0282eb69 838 }
2e344af3 839 else
4ed46869 840 {
2e344af3 841 int return_val = 1;
99529c2c 842 int bytes, charset, c1, c2;
05505664 843
2e344af3
KH
844 if (! CHAR_TABLE_P (table))
845 table = Qnil;
05505664 846
2e344af3 847 while (nchars-- > 0)
23d2a7f1 848 {
2e344af3
KH
849 SPLIT_MULTIBYTE_SEQ (ptr, len, bytes, charset, c1, c2);
850 ptr += bytes;
4ed46869 851
2e344af3
KH
852 if (!CHARSET_DEFINED_P (charset))
853 charset = 1;
854 else if (! NILP (table))
855 {
856 int c = translate_char (table, -1, charset, c1, c2);
857 if (c >= 0)
858 charset = CHAR_CHARSET (c);
859 }
860
861 if (return_val == 1
862 && charset != CHARSET_ASCII
863 && charset != CHARSET_8_BIT_CONTROL
864 && charset != CHARSET_8_BIT_GRAPHIC
865 && charset != charset_latin_iso8859_1)
866 return_val = 2;
867
868 if (charsets)
869 charsets[charset] = 1;
870 else if (return_val == 2)
871 break;
4ed46869 872 }
2e344af3 873 return return_val;
4ed46869 874 }
4ed46869
KH
875}
876
877DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
23d2a7f1 878 2, 3, 0,
fdb82f93
PJ
879 doc: /* Return a list of charsets in the region between BEG and END.
880BEG and END are buffer positions.
881Optional arg TABLE if non-nil is a translation table to look up.
882
883If the region contains invalid multibyte characters,
884`unknown' is included in the returned list.
885
886If the current buffer is unibyte, the returned list may contain
887only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
888 (beg, end, table)
23d2a7f1 889 Lisp_Object beg, end, table;
4ed46869 890{
028d516b 891 int charsets[MAX_CHARSET + 1];
6ae1f27e 892 int from, from_byte, to, stop, stop_byte, i;
4ed46869
KH
893 Lisp_Object val;
894
895 validate_region (&beg, &end);
896 from = XFASTINT (beg);
897 stop = to = XFASTINT (end);
6ae1f27e 898
4ed46869 899 if (from < GPT && GPT < to)
6ae1f27e
RS
900 {
901 stop = GPT;
902 stop_byte = GPT_BYTE;
903 }
904 else
905 stop_byte = CHAR_TO_BYTE (stop);
906
907 from_byte = CHAR_TO_BYTE (from);
908
028d516b 909 bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
4ed46869
KH
910 while (1)
911 {
2e344af3
KH
912 find_charset_in_text (BYTE_POS_ADDR (from_byte), stop - from,
913 stop_byte - from_byte, charsets, table);
4ed46869 914 if (stop < to)
6ae1f27e
RS
915 {
916 from = stop, from_byte = stop_byte;
917 stop = to, stop_byte = CHAR_TO_BYTE (stop);
918 }
4ed46869
KH
919 else
920 break;
921 }
6ae1f27e 922
4ed46869 923 val = Qnil;
2e344af3 924 if (charsets[1])
0282eb69 925 val = Fcons (Qunknown, val);
2e344af3
KH
926 for (i = MAX_CHARSET; i >= MIN_CHARSET_OFFICIAL_DIMENSION1; i--)
927 if (charsets[i])
928 val = Fcons (CHARSET_SYMBOL (i), val);
929 if (charsets[0])
930 val = Fcons (Qascii, val);
4ed46869
KH
931 return val;
932}
933
934DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
23d2a7f1 935 1, 2, 0,
fdb82f93
PJ
936 doc: /* Return a list of charsets in STR.
937Optional arg TABLE if non-nil is a translation table to look up.
938
939If the string contains invalid multibyte characters,
940`unknown' is included in the returned list.
941
942If STR is unibyte, the returned list may contain
943only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
944 (str, table)
23d2a7f1 945 Lisp_Object str, table;
4ed46869 946{
a29e3b1b 947 int charsets[MAX_CHARSET + 1];
4ed46869
KH
948 int i;
949 Lisp_Object val;
950
b7826503 951 CHECK_STRING (str);
87b089ad 952
a29e3b1b 953 bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
d5db4077
KR
954 find_charset_in_text (SDATA (str), SCHARS (str),
955 SBYTES (str), charsets, table);
2e344af3 956
4ed46869 957 val = Qnil;
2e344af3 958 if (charsets[1])
0282eb69 959 val = Fcons (Qunknown, val);
2e344af3
KH
960 for (i = MAX_CHARSET; i >= MIN_CHARSET_OFFICIAL_DIMENSION1; i--)
961 if (charsets[i])
962 val = Fcons (CHARSET_SYMBOL (i), val);
963 if (charsets[0])
964 val = Fcons (Qascii, val);
4ed46869
KH
965 return val;
966}
2e344af3 967
4ed46869
KH
968\f
969DEFUN ("make-char-internal", Fmake_char_internal, Smake_char_internal, 1, 3, 0,
fdb82f93
PJ
970 doc: /* Return a character made from arguments.
971Internal use only. */)
972 (charset, code1, code2)
4ed46869
KH
973 Lisp_Object charset, code1, code2;
974{
ac4137cc
KH
975 int charset_id, c1, c2;
976
b7826503 977 CHECK_NUMBER (charset);
ac4137cc
KH
978 charset_id = XINT (charset);
979 if (!CHARSET_DEFINED_P (charset_id))
980 error ("Invalid charset ID: %d", XINT (charset));
4ed46869
KH
981
982 if (NILP (code1))
ac4137cc 983 c1 = 0;
4ed46869 984 else
ac4137cc 985 {
b7826503 986 CHECK_NUMBER (code1);
ac4137cc
KH
987 c1 = XINT (code1);
988 }
4ed46869 989 if (NILP (code2))
ac4137cc 990 c2 = 0;
4ed46869 991 else
ac4137cc 992 {
b7826503 993 CHECK_NUMBER (code2);
ac4137cc
KH
994 c2 = XINT (code2);
995 }
4ed46869 996
2e344af3
KH
997 if (charset_id == CHARSET_ASCII)
998 {
999 if (c1 < 0 || c1 > 0x7F)
1000 goto invalid_code_posints;
1001 return make_number (c1);
1002 }
1003 else if (charset_id == CHARSET_8_BIT_CONTROL)
1004 {
30736012
KH
1005 if (NILP (code1))
1006 c1 = 0x80;
1007 else if (c1 < 0x80 || c1 > 0x9F)
2e344af3
KH
1008 goto invalid_code_posints;
1009 return make_number (c1);
1010 }
1011 else if (charset_id == CHARSET_8_BIT_GRAPHIC)
1012 {
30736012
KH
1013 if (NILP (code1))
1014 c1 = 0xA0;
1015 else if (c1 < 0xA0 || c1 > 0xFF)
2e344af3
KH
1016 goto invalid_code_posints;
1017 return make_number (c1);
1018 }
1019 else if (c1 < 0 || c1 > 0xFF || c2 < 0 || c2 > 0xFF)
1020 goto invalid_code_posints;
ac4137cc
KH
1021 c1 &= 0x7F;
1022 c2 &= 0x7F;
1023 if (c1 == 0
1024 ? c2 != 0
1025 : (c2 == 0
87f67317
KR
1026 ? !CHAR_COMPONENTS_VALID_P (charset_id, c1, 0x20)
1027 : !CHAR_COMPONENTS_VALID_P (charset_id, c1, c2)))
2e344af3 1028 goto invalid_code_posints;
ac4137cc 1029 return make_number (MAKE_CHAR (charset_id, c1, c2));
2e344af3
KH
1030
1031 invalid_code_posints:
1032 error ("Invalid code points for charset ID %d: %d %d", charset_id, c1, c2);
4ed46869
KH
1033}
1034
1035DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
bc001814
JB
1036 doc: /* Return list of charset and one or two position-codes of CH.
1037If CH is invalid as a character code,
1038return a list of symbol `unknown' and CH. */)
fdb82f93 1039 (ch)
4ed46869
KH
1040 Lisp_Object ch;
1041{
0282eb69 1042 int c, charset, c1, c2;
4ed46869 1043
b7826503 1044 CHECK_NUMBER (ch);
0282eb69
KH
1045 c = XFASTINT (ch);
1046 if (!CHAR_VALID_P (c, 1))
1047 return Fcons (Qunknown, Fcons (ch, Qnil));
4ed46869 1048 SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
6dc0722d 1049 return (c2 >= 0
4ed46869
KH
1050 ? Fcons (CHARSET_SYMBOL (charset),
1051 Fcons (make_number (c1), Fcons (make_number (c2), Qnil)))
1052 : Fcons (CHARSET_SYMBOL (charset), Fcons (make_number (c1), Qnil)));
1053}
1054
1055DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
bc001814 1056 doc: /* Return charset of CH. */)
fdb82f93 1057 (ch)
4ed46869
KH
1058 Lisp_Object ch;
1059{
b7826503 1060 CHECK_NUMBER (ch);
4ed46869
KH
1061
1062 return CHARSET_SYMBOL (CHAR_CHARSET (XINT (ch)));
1063}
1064
90d7b74e 1065DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
fdb82f93
PJ
1066 doc: /* Return charset of a character in the current buffer at position POS.
1067If POS is nil, it defauls to the current point.
1068If POS is out of range, the value is nil. */)
1069 (pos)
90d7b74e
KH
1070 Lisp_Object pos;
1071{
2e344af3
KH
1072 Lisp_Object ch;
1073 int charset;
ac4137cc 1074
2e344af3
KH
1075 ch = Fchar_after (pos);
1076 if (! INTEGERP (ch))
1077 return ch;
1078 charset = CHAR_CHARSET (XINT (ch));
90d7b74e
KH
1079 return CHARSET_SYMBOL (charset);
1080}
1081
4ed46869 1082DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
fdb82f93
PJ
1083 doc: /* Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
1084
1085ISO 2022's designation sequence (escape sequence) distinguishes charsets
1086by their DIMENSION, CHARS, and FINAL-CHAR,
1087where as Emacs distinguishes them by charset symbol.
1088See the documentation of the function `charset-info' for the meanings of
1089DIMENSION, CHARS, and FINAL-CHAR. */)
1090 (dimension, chars, final_char)
4ed46869
KH
1091 Lisp_Object dimension, chars, final_char;
1092{
1093 int charset;
1094
b7826503
PJ
1095 CHECK_NUMBER (dimension);
1096 CHECK_NUMBER (chars);
1097 CHECK_NUMBER (final_char);
4ed46869
KH
1098
1099 if ((charset = ISO_CHARSET_TABLE (dimension, chars, final_char)) < 0)
1100 return Qnil;
1101 return CHARSET_SYMBOL (charset);
1102}
1103
9d3d8cba
KH
1104/* If GENERICP is nonzero, return nonzero iff C is a valid normal or
1105 generic character. If GENERICP is zero, return nonzero iff C is a
1106 valid normal character. Do not call this function directly,
1107 instead use macro CHAR_VALID_P. */
1108int
1109char_valid_p (c, genericp)
1110 int c, genericp;
1111{
1112 int charset, c1, c2;
1113
0e235b7e 1114 if (c < 0 || c >= MAX_CHAR)
9d3d8cba
KH
1115 return 0;
1116 if (SINGLE_BYTE_CHAR_P (c))
1117 return 1;
2e344af3 1118 SPLIT_CHAR (c, charset, c1, c2);
ac4137cc
KH
1119 if (genericp)
1120 {
1121 if (c1)
1122 {
1123 if (c2 <= 0) c2 = 0x20;
1124 }
1125 else
1126 {
1127 if (c2 <= 0) c1 = c2 = 0x20;
1128 }
1129 }
1130 return (CHARSET_DEFINED_P (charset)
44c6492d 1131 && CHAR_COMPONENTS_VALID_P (charset, c1, c2));
9d3d8cba
KH
1132}
1133
1134DEFUN ("char-valid-p", Fchar_valid_p, Schar_valid_p, 1, 2, 0,
fdb82f93
PJ
1135 doc: /* Return t if OBJECT is a valid normal character.
1136If optional arg GENERICP is non-nil, also return t if OBJECT is
1137a valid generic character. */)
1138 (object, genericp)
9d3d8cba
KH
1139 Lisp_Object object, genericp;
1140{
1141 if (! NATNUMP (object))
1142 return Qnil;
1143 return (CHAR_VALID_P (XFASTINT (object), !NILP (genericp)) ? Qt : Qnil);
1144}
1145
d2665018
KH
1146DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte,
1147 Sunibyte_char_to_multibyte, 1, 1, 0,
fdb82f93
PJ
1148 doc: /* Convert the unibyte character CH to multibyte character.
1149The conversion is done based on `nonascii-translation-table' (which see)
1150 or `nonascii-insert-offset' (which see). */)
1151 (ch)
d2665018
KH
1152 Lisp_Object ch;
1153{
1154 int c;
1155
b7826503 1156 CHECK_NUMBER (ch);
d2665018
KH
1157 c = XINT (ch);
1158 if (c < 0 || c >= 0400)
1159 error ("Invalid unibyte character: %d", c);
1160 c = unibyte_char_to_multibyte (c);
1161 if (c < 0)
1162 error ("Can't convert to multibyte character: %d", XINT (ch));
1163 return make_number (c);
1164}
1165
1bcc1567
RS
1166DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte,
1167 Smultibyte_char_to_unibyte, 1, 1, 0,
fdb82f93
PJ
1168 doc: /* Convert the multibyte character CH to unibyte character.
1169The conversion is done based on `nonascii-translation-table' (which see)
1170 or `nonascii-insert-offset' (which see). */)
1171 (ch)
1bcc1567
RS
1172 Lisp_Object ch;
1173{
1174 int c;
1175
b7826503 1176 CHECK_NUMBER (ch);
1bcc1567 1177 c = XINT (ch);
ac4137cc 1178 if (! CHAR_VALID_P (c, 0))
1bcc1567
RS
1179 error ("Invalid multibyte character: %d", c);
1180 c = multibyte_char_to_unibyte (c, Qnil);
1181 if (c < 0)
1182 error ("Can't convert to unibyte character: %d", XINT (ch));
1183 return make_number (c);
1184}
1185
4ed46869 1186DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
bc001814 1187 doc: /* Return 1 regardless of the argument CH. */)
fdb82f93 1188 (ch)
4ed46869
KH
1189 Lisp_Object ch;
1190{
b7826503 1191 CHECK_NUMBER (ch);
9b6a601f
KH
1192 return make_number (1);
1193}
1194
1195/* Return how many bytes C will occupy in a multibyte buffer.
1196 Don't call this function directly, instead use macro CHAR_BYTES. */
1197int
1198char_bytes (c)
1199 int c;
1200{
99529c2c 1201 int charset;
9b6a601f 1202
2e344af3
KH
1203 if (ASCII_BYTE_P (c) || (c & ~((1 << CHARACTERBITS) -1)))
1204 return 1;
1205 if (SINGLE_BYTE_CHAR_P (c) && c >= 0xA0)
8ac5a9cc
KH
1206 return 1;
1207
99529c2c
KH
1208 charset = CHAR_CHARSET (c);
1209 return (CHARSET_DEFINED_P (charset) ? CHARSET_BYTES (charset) : 1);
4ed46869
KH
1210}
1211
1212/* Return the width of character of which multi-byte form starts with
1213 C. The width is measured by how many columns occupied on the
1214 screen when displayed in the current buffer. */
1215
1216#define ONE_BYTE_CHAR_WIDTH(c) \
1217 (c < 0x20 \
1218 ? (c == '\t' \
53316e55 1219 ? XFASTINT (current_buffer->tab_width) \
4ed46869
KH
1220 : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2))) \
1221 : (c < 0x7f \
1222 ? 1 \
1223 : (c == 0x7F \
1224 ? (NILP (current_buffer->ctl_arrow) ? 4 : 2) \
1225 : ((! NILP (current_buffer->enable_multibyte_characters) \
1226 && BASE_LEADING_CODE_P (c)) \
1227 ? WIDTH_BY_CHAR_HEAD (c) \
b4e9dd77 1228 : 4))))
4ed46869
KH
1229
1230DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
bc001814 1231 doc: /* Return width of CH when displayed in the current buffer.
fdb82f93
PJ
1232The width is measured by how many columns it occupies on the screen.
1233Tab is taken to occupy `tab-width' columns. */)
1234 (ch)
1235 Lisp_Object ch;
4ed46869 1236{
859f2b3c 1237 Lisp_Object val, disp;
4ed46869 1238 int c;
51c4025f 1239 struct Lisp_Char_Table *dp = buffer_display_table ();
4ed46869 1240
b7826503 1241 CHECK_NUMBER (ch);
4ed46869 1242
859f2b3c
RS
1243 c = XINT (ch);
1244
1245 /* Get the way the display table would display it. */
51c4025f 1246 disp = dp ? DISP_CHAR_VECTOR (dp, c) : Qnil;
859f2b3c
RS
1247
1248 if (VECTORP (disp))
1249 XSETINT (val, XVECTOR (disp)->size);
1250 else if (SINGLE_BYTE_CHAR_P (c))
1251 XSETINT (val, ONE_BYTE_CHAR_WIDTH (c));
4ed46869
KH
1252 else
1253 {
1254 int charset = CHAR_CHARSET (c);
1255
1256 XSETFASTINT (val, CHARSET_WIDTH (charset));
1257 }
1258 return val;
1259}
1260
1261/* Return width of string STR of length LEN when displayed in the
1262 current buffer. The width is measured by how many columns it
1263 occupies on the screen. */
859f2b3c 1264
4ed46869
KH
1265int
1266strwidth (str, len)
1267 unsigned char *str;
1268 int len;
1269{
beeedaad
KH
1270 return c_string_width (str, len, -1, NULL, NULL);
1271}
1272
1273/* Return width of string STR of length LEN when displayed in the
1274 current buffer. The width is measured by how many columns it
1275 occupies on the screen. If PRECISION > 0, return the width of
1276 longest substring that doesn't exceed PRECISION, and set number of
1277 characters and bytes of the substring in *NCHARS and *NBYTES
1278 respectively. */
1279
1280int
1281c_string_width (str, len, precision, nchars, nbytes)
fb4f7f50 1282 const unsigned char *str;
020ea113 1283 int len, precision, *nchars, *nbytes;
beeedaad
KH
1284{
1285 int i = 0, i_byte = 0;
4ed46869 1286 int width = 0;
beeedaad 1287 int chars;
c4a4e28f 1288 struct Lisp_Char_Table *dp = buffer_display_table ();
4ed46869 1289
beeedaad 1290 while (i_byte < len)
859f2b3c 1291 {
beeedaad
KH
1292 int bytes, thiswidth;
1293 Lisp_Object val;
859f2b3c 1294
99529c2c 1295 if (dp)
beeedaad
KH
1296 {
1297 int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
1298
1299 chars = 1;
1300 val = DISP_CHAR_VECTOR (dp, c);
1301 if (VECTORP (val))
1302 thiswidth = XVECTOR (val)->size;
1303 else
1304 thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1305 }
859f2b3c 1306 else
beeedaad
KH
1307 {
1308 chars = 1;
1309 PARSE_MULTIBYTE_SEQ (str + i_byte, len - i_byte, bytes);
1310 thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1311 }
99529c2c 1312
beeedaad
KH
1313 if (precision > 0
1314 && (width + thiswidth > precision))
1315 {
1316 *nchars = i;
1317 *nbytes = i_byte;
1318 return width;
1319 }
1320 i++;
1321 i_byte += bytes;
1322 width += thiswidth;
1323 }
99529c2c 1324
beeedaad
KH
1325 if (precision > 0)
1326 {
1327 *nchars = i;
1328 *nbytes = i_byte;
859f2b3c 1329 }
beeedaad 1330
4ed46869
KH
1331 return width;
1332}
1333
beeedaad
KH
1334/* Return width of Lisp string STRING when displayed in the current
1335 buffer. The width is measured by how many columns it occupies on
1336 the screen while paying attention to compositions. If PRECISION >
1337 0, return the width of longest substring that doesn't exceed
1338 PRECISION, and set number of characters and bytes of the substring
1339 in *NCHARS and *NBYTES respectively. */
1340
3f62427c 1341int
beeedaad
KH
1342lisp_string_width (string, precision, nchars, nbytes)
1343 Lisp_Object string;
1344 int precision, *nchars, *nbytes;
3f62427c 1345{
d5db4077
KR
1346 int len = SCHARS (string);
1347 int len_byte = SBYTES (string);
212cc638
KH
1348 /* This set multibyte to 0 even if STRING is multibyte when it
1349 contains only ascii and eight-bit-graphic, but that's
1350 intentional. */
1351 int multibyte = len < len_byte;
fb4f7f50 1352 const unsigned char *str = SDATA (string);
beeedaad 1353 int i = 0, i_byte = 0;
3f62427c 1354 int width = 0;
beeedaad 1355 struct Lisp_Char_Table *dp = buffer_display_table ();
3f62427c
KH
1356
1357 while (i < len)
1358 {
beeedaad
KH
1359 int chars, bytes, thiswidth;
1360 Lisp_Object val;
1361 int cmp_id;
1362 int ignore, end;
1363
1364 if (find_composition (i, -1, &ignore, &end, &val, string)
1365 && ((cmp_id = get_composition_id (i, i_byte, end - i, val, string))
1366 >= 0))
3f62427c 1367 {
beeedaad
KH
1368 thiswidth = composition_table[cmp_id]->width;
1369 chars = end - i;
1370 bytes = string_char_to_byte (string, end) - i_byte;
1371 }
1372 else if (dp)
1373 {
212cc638 1374 int c;
beeedaad 1375
212cc638
KH
1376 if (multibyte)
1377 c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
1378 else
1379 c = str[i_byte], bytes = 1;
beeedaad
KH
1380 chars = 1;
1381 val = DISP_CHAR_VECTOR (dp, c);
1382 if (VECTORP (val))
1383 thiswidth = XVECTOR (val)->size;
1384 else
1385 thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
3f62427c
KH
1386 }
1387 else
1388 {
beeedaad 1389 chars = 1;
212cc638
KH
1390 if (multibyte)
1391 PARSE_MULTIBYTE_SEQ (str + i_byte, len_byte - i_byte, bytes);
1392 else
1393 bytes = 1;
beeedaad
KH
1394 thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1395 }
1396
1397 if (precision > 0
1398 && (width + thiswidth > precision))
1399 {
1400 *nchars = i;
1401 *nbytes = i_byte;
1402 return width;
3f62427c 1403 }
beeedaad
KH
1404 i += chars;
1405 i_byte += bytes;
1406 width += thiswidth;
1407 }
1408
1409 if (precision > 0)
1410 {
1411 *nchars = i;
1412 *nbytes = i_byte;
3f62427c 1413 }
beeedaad 1414
3f62427c
KH
1415 return width;
1416}
1417
4ed46869 1418DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
fdb82f93
PJ
1419 doc: /* Return width of STRING when displayed in the current buffer.
1420Width is measured by how many columns it occupies on the screen.
1421When calculating width of a multibyte character in STRING,
1422only the base leading-code is considered; the validity of
1423the following bytes is not checked. Tabs in STRING are always
1424taken to occupy `tab-width' columns. */)
bc001814
JB
1425 (string)
1426 Lisp_Object string;
4ed46869
KH
1427{
1428 Lisp_Object val;
1429
bc001814
JB
1430 CHECK_STRING (string);
1431 XSETFASTINT (val, lisp_string_width (string, -1, NULL, NULL));
4ed46869
KH
1432 return val;
1433}
1434
1435DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
bc001814 1436 doc: /* Return the direction of CH.
fdb82f93
PJ
1437The returned value is 0 for left-to-right and 1 for right-to-left. */)
1438 (ch)
4ed46869
KH
1439 Lisp_Object ch;
1440{
1441 int charset;
1442
b7826503 1443 CHECK_NUMBER (ch);
4ed46869
KH
1444 charset = CHAR_CHARSET (XFASTINT (ch));
1445 if (!CHARSET_DEFINED_P (charset))
93bcb785 1446 invalid_character (XINT (ch));
4ed46869
KH
1447 return CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX);
1448}
1449
87b089ad
RS
1450/* Return the number of characters in the NBYTES bytes at PTR.
1451 This works by looking at the contents and checking for multibyte sequences.
1452 However, if the current buffer has enable-multibyte-characters = nil,
1453 we treat each byte as a character. */
1454
6ae1f27e
RS
1455int
1456chars_in_text (ptr, nbytes)
fb4f7f50 1457 const unsigned char *ptr;
6ae1f27e
RS
1458 int nbytes;
1459{
87b089ad
RS
1460 /* current_buffer is null at early stages of Emacs initialization. */
1461 if (current_buffer == 0
1462 || NILP (current_buffer->enable_multibyte_characters))
6ae1f27e 1463 return nbytes;
a8a35e61 1464
ac4137cc 1465 return multibyte_chars_in_text (ptr, nbytes);
046b1f03
RS
1466}
1467
87b089ad
RS
1468/* Return the number of characters in the NBYTES bytes at PTR.
1469 This works by looking at the contents and checking for multibyte sequences.
1470 It ignores enable-multibyte-characters. */
1471
1472int
1473multibyte_chars_in_text (ptr, nbytes)
fb4f7f50 1474 const unsigned char *ptr;
87b089ad
RS
1475 int nbytes;
1476{
fb4f7f50 1477 const unsigned char *endp;
ac4137cc 1478 int chars, bytes;
87b089ad
RS
1479
1480 endp = ptr + nbytes;
1481 chars = 0;
1482
1483 while (ptr < endp)
1484 {
2e344af3
KH
1485 PARSE_MULTIBYTE_SEQ (ptr, endp - ptr, bytes);
1486 ptr += bytes;
1487 chars++;
1488 }
1489
1490 return chars;
1491}
1492
d8e4f486 1493/* Parse unibyte text at STR of LEN bytes as multibyte text, and
2e344af3 1494 count the numbers of characters and bytes in it. On counting
d8e4f486
DL
1495 bytes, pay attention to the fact that 8-bit characters in the range
1496 0x80..0x9F are represented by 2 bytes in multibyte text. */
2e344af3
KH
1497void
1498parse_str_as_multibyte (str, len, nchars, nbytes)
fb4f7f50 1499 const unsigned char *str;
2e344af3
KH
1500 int len, *nchars, *nbytes;
1501{
fb4f7f50 1502 const unsigned char *endp = str + len;
2e344af3
KH
1503 int n, chars = 0, bytes = 0;
1504
1505 while (str < endp)
1506 {
1507 if (UNIBYTE_STR_AS_MULTIBYTE_P (str, endp - str, n))
1508 str += n, bytes += n;
1509 else
1510 str++, bytes += 2;
1511 chars++;
1512 }
1513 *nchars = chars;
1514 *nbytes = bytes;
1515 return;
1516}
1517
d8e4f486 1518/* Arrange unibyte text at STR of NBYTES bytes as multibyte text.
2e344af3
KH
1519 It actually converts only 8-bit characters in the range 0x80..0x9F
1520 that don't contruct multibyte characters to multibyte forms. If
1521 NCHARS is nonzero, set *NCHARS to the number of characters in the
1522 text. It is assured that we can use LEN bytes at STR as a work
1523 area and that is enough. Return the number of bytes of the
1524 resulting text. */
1525
1526int
1527str_as_multibyte (str, len, nbytes, nchars)
1528 unsigned char *str;
1529 int len, nbytes, *nchars;
1530{
1531 unsigned char *p = str, *endp = str + nbytes;
1532 unsigned char *to;
1533 int chars = 0;
1534 int n;
1535
1536 while (p < endp && UNIBYTE_STR_AS_MULTIBYTE_P (p, endp - p, n))
1537 p += n, chars++;
1538 if (nchars)
1539 *nchars = chars;
1540 if (p == endp)
1541 return nbytes;
1542
1543 to = p;
1544 nbytes = endp - p;
1545 endp = str + len;
1546 safe_bcopy (p, endp - nbytes, nbytes);
1547 p = endp - nbytes;
1548 while (p < endp)
1549 {
1550 if (UNIBYTE_STR_AS_MULTIBYTE_P (p, endp - p, n))
ac4137cc 1551 {
2e344af3
KH
1552 while (n--)
1553 *to++ = *p++;
177c0ea7 1554 }
ac4137cc
KH
1555 else
1556 {
2e344af3
KH
1557 *to++ = LEADING_CODE_8_BIT_CONTROL;
1558 *to++ = *p++ + 0x20;
ac4137cc 1559 }
87b089ad
RS
1560 chars++;
1561 }
2e344af3
KH
1562 if (nchars)
1563 *nchars = chars;
1564 return (to - str);
1565}
87b089ad 1566
740f080d
KH
1567/* Parse unibyte string at STR of LEN bytes, and return the number of
1568 bytes it may ocupy when converted to multibyte string by
1569 `str_to_multibyte'. */
1570
1571int
1572parse_str_to_multibyte (str, len)
1573 unsigned char *str;
1574 int len;
1575{
1576 unsigned char *endp = str + len;
1577 int bytes;
1578
1579 for (bytes = 0; str < endp; str++)
1580 bytes += (*str < 0x80 || *str >= 0xA0) ? 1 : 2;
1581 return bytes;
1582}
1583
d8e4f486 1584/* Convert unibyte text at STR of NBYTES bytes to multibyte text
2e344af3
KH
1585 that contains the same single-byte characters. It actually
1586 converts all 8-bit characters to multibyte forms. It is assured
1587 that we can use LEN bytes at STR as a work area and that is
1588 enough. */
1589
1590int
1591str_to_multibyte (str, len, bytes)
1592 unsigned char *str;
1593 int len, bytes;
1594{
1595 unsigned char *p = str, *endp = str + bytes;
1596 unsigned char *to;
2e344af3
KH
1597
1598 while (p < endp && (*p < 0x80 || *p >= 0xA0)) p++;
1599 if (p == endp)
1600 return bytes;
1601 to = p;
1602 bytes = endp - p;
1603 endp = str + len;
1604 safe_bcopy (p, endp - bytes, bytes);
1605 p = endp - bytes;
177c0ea7 1606 while (p < endp)
2e344af3
KH
1607 {
1608 if (*p < 0x80 || *p >= 0xA0)
1609 *to++ = *p++;
1610 else
1611 *to++ = LEADING_CODE_8_BIT_CONTROL, *to++ = *p++ + 0x20;
1612 }
1613 return (to - str);
87b089ad
RS
1614}
1615
2e344af3
KH
1616/* Arrange multibyte text at STR of LEN bytes as a unibyte text. It
1617 actually converts only 8-bit characters in the range 0x80..0x9F to
1618 unibyte forms. */
1619
1620int
1621str_as_unibyte (str, bytes)
1622 unsigned char *str;
1623 int bytes;
1624{
1625 unsigned char *p = str, *endp = str + bytes;
1626 unsigned char *to = str;
1627
1628 while (p < endp && *p != LEADING_CODE_8_BIT_CONTROL) p++;
1629 to = p;
177c0ea7 1630 while (p < endp)
2e344af3
KH
1631 {
1632 if (*p == LEADING_CODE_8_BIT_CONTROL)
1633 *to++ = *(p + 1) - 0x20, p += 2;
1634 else
1635 *to++ = *p++;
1636 }
1637 return (to - str);
1638}
1639
1640\f
4bc26a6c 1641DEFUN ("string", Fstring, Sstring, 0, MANY, 0,
5f5d497e
MB
1642 doc: /* Concatenate all the argument characters and make the result a string.
1643usage: (string &rest CHARACTERS) */)
fdb82f93 1644 (n, args)
53316e55 1645 int n;
4ed46869
KH
1646 Lisp_Object *args;
1647{
4894eddd
KS
1648 int i, bufsize;
1649 unsigned char *buf, *p;
2e344af3 1650 int c;
5729c92f 1651 int multibyte = 0;
4894eddd
KS
1652 Lisp_Object ret;
1653 USE_SAFE_ALLOCA;
1654
1655 bufsize = MAX_MULTIBYTE_LENGTH * n;
1656 SAFE_ALLOCA (buf, unsigned char *, bufsize);
1657 p = buf;
4ed46869
KH
1658
1659 for (i = 0; i < n; i++)
1660 {
b7826503 1661 CHECK_NUMBER (args[i]);
5729c92f
KH
1662 if (!multibyte && !SINGLE_BYTE_CHAR_P (XFASTINT (args[i])))
1663 multibyte = 1;
1664 }
1665
1666 for (i = 0; i < n; i++)
1667 {
4ed46869 1668 c = XINT (args[i]);
5729c92f
KH
1669 if (multibyte)
1670 p += CHAR_STRING (c, p);
1671 else
555e35d2 1672 *p++ = c;
4ed46869
KH
1673 }
1674
4894eddd 1675 ret = make_string_from_bytes (buf, n, p - buf);
233f3db6 1676 SAFE_FREE ();
4894eddd
KS
1677
1678 return ret;
4ed46869
KH
1679}
1680
1681#endif /* emacs */
1682\f
dfcf069d 1683int
4ed46869
KH
1684charset_id_internal (charset_name)
1685 char *charset_name;
1686{
76d7b829 1687 Lisp_Object val;
4ed46869 1688
76d7b829 1689 val= Fget (intern (charset_name), Qcharset);
4ed46869
KH
1690 if (!VECTORP (val))
1691 error ("Charset %s is not defined", charset_name);
1692
1693 return (XINT (XVECTOR (val)->contents[0]));
1694}
1695
1696DEFUN ("setup-special-charsets", Fsetup_special_charsets,
fdb82f93
PJ
1697 Ssetup_special_charsets, 0, 0, 0, doc: /* Internal use only. */)
1698 ()
4ed46869
KH
1699{
1700 charset_latin_iso8859_1 = charset_id_internal ("latin-iso8859-1");
1701 charset_jisx0208_1978 = charset_id_internal ("japanese-jisx0208-1978");
1702 charset_jisx0208 = charset_id_internal ("japanese-jisx0208");
1703 charset_katakana_jisx0201 = charset_id_internal ("katakana-jisx0201");
1704 charset_latin_jisx0201 = charset_id_internal ("latin-jisx0201");
1705 charset_big5_1 = charset_id_internal ("chinese-big5-1");
1706 charset_big5_2 = charset_id_internal ("chinese-big5-2");
c64582be
KH
1707 charset_mule_unicode_0100_24ff
1708 = charset_id_internal ("mule-unicode-0100-24ff");
1709 charset_mule_unicode_2500_33ff
1710 = charset_id_internal ("mule-unicode-2500-33ff");
1711 charset_mule_unicode_e000_ffff
1712 = charset_id_internal ("mule-unicode-e000-ffff");
4ed46869
KH
1713 return Qnil;
1714}
1715
dfcf069d 1716void
4ed46869
KH
1717init_charset_once ()
1718{
1719 int i, j, k;
1720
1721 staticpro (&Vcharset_table);
1722 staticpro (&Vcharset_symbol_table);
8a73a704 1723 staticpro (&Vgeneric_character_list);
4ed46869
KH
1724
1725 /* This has to be done here, before we call Fmake_char_table. */
1726 Qcharset_table = intern ("charset-table");
1727 staticpro (&Qcharset_table);
1728
1729 /* Intern this now in case it isn't already done.
1730 Setting this variable twice is harmless.
1731 But don't staticpro it here--that is done in alloc.c. */
1732 Qchar_table_extra_slots = intern ("char-table-extra-slots");
1733
1734 /* Now we are ready to set up this property, so we can
1735 create the charset table. */
1736 Fput (Qcharset_table, Qchar_table_extra_slots, make_number (0));
1737 Vcharset_table = Fmake_char_table (Qcharset_table, Qnil);
1738
0282eb69
KH
1739 Qunknown = intern ("unknown");
1740 staticpro (&Qunknown);
1741 Vcharset_symbol_table = Fmake_vector (make_number (MAX_CHARSET + 1),
1742 Qunknown);
4ed46869
KH
1743
1744 /* Setup tables. */
1745 for (i = 0; i < 2; i++)
1746 for (j = 0; j < 2; j++)
1747 for (k = 0; k < 128; k++)
1748 iso_charset_table [i][j][k] = -1;
1749
60383934 1750 for (i = 0; i < 256; i++)
2e344af3 1751 bytes_by_char_head[i] = 1;
2e344af3
KH
1752 bytes_by_char_head[LEADING_CODE_PRIVATE_11] = 3;
1753 bytes_by_char_head[LEADING_CODE_PRIVATE_12] = 3;
1754 bytes_by_char_head[LEADING_CODE_PRIVATE_21] = 4;
1755 bytes_by_char_head[LEADING_CODE_PRIVATE_22] = 4;
4ed46869
KH
1756
1757 for (i = 0; i < 128; i++)
2e344af3 1758 width_by_char_head[i] = 1;
4ed46869 1759 for (; i < 256; i++)
2e344af3
KH
1760 width_by_char_head[i] = 4;
1761 width_by_char_head[LEADING_CODE_PRIVATE_11] = 1;
1762 width_by_char_head[LEADING_CODE_PRIVATE_12] = 2;
1763 width_by_char_head[LEADING_CODE_PRIVATE_21] = 1;
1764 width_by_char_head[LEADING_CODE_PRIVATE_22] = 2;
8a73a704
KH
1765
1766 {
76d7b829 1767 Lisp_Object val;
8a73a704 1768
76d7b829 1769 val = Qnil;
8a73a704
KH
1770 for (i = 0x81; i < 0x90; i++)
1771 val = Fcons (make_number ((i - 0x70) << 7), val);
1772 for (; i < 0x9A; i++)
1773 val = Fcons (make_number ((i - 0x8F) << 14), val);
1774 for (i = 0xA0; i < 0xF0; i++)
1775 val = Fcons (make_number ((i - 0x70) << 7), val);
1776 for (; i < 0xFF; i++)
1777 val = Fcons (make_number ((i - 0xE0) << 14), val);
8a73a704
KH
1778 Vgeneric_character_list = Fnreverse (val);
1779 }
bbf12bb3
KH
1780
1781 nonascii_insert_offset = 0;
1782 Vnonascii_translation_table = Qnil;
4ed46869
KH
1783}
1784
1785#ifdef emacs
1786
dfcf069d 1787void
4ed46869
KH
1788syms_of_charset ()
1789{
2e344af3
KH
1790 Qcharset = intern ("charset");
1791 staticpro (&Qcharset);
1792
4ed46869
KH
1793 Qascii = intern ("ascii");
1794 staticpro (&Qascii);
1795
2e344af3
KH
1796 Qeight_bit_control = intern ("eight-bit-control");
1797 staticpro (&Qeight_bit_control);
1798
1799 Qeight_bit_graphic = intern ("eight-bit-graphic");
1800 staticpro (&Qeight_bit_graphic);
4ed46869 1801
2e344af3
KH
1802 /* Define special charsets ascii, eight-bit-control, and
1803 eight-bit-graphic. */
4ed46869
KH
1804 update_charset_table (make_number (CHARSET_ASCII),
1805 make_number (1), make_number (94),
1806 make_number (1),
1807 make_number (0),
1808 make_number ('B'),
1809 make_number (0),
1810 build_string ("ASCII"),
d78bc582 1811 Qnil, /* same as above */
4ed46869
KH
1812 build_string ("ASCII (ISO646 IRV)"));
1813 CHARSET_SYMBOL (CHARSET_ASCII) = Qascii;
1814 Fput (Qascii, Qcharset, CHARSET_TABLE_ENTRY (CHARSET_ASCII));
1815
2e344af3
KH
1816 update_charset_table (make_number (CHARSET_8_BIT_CONTROL),
1817 make_number (1), make_number (96),
797a084a 1818 make_number (4),
2e344af3
KH
1819 make_number (0),
1820 make_number (-1),
1821 make_number (-1),
1822 build_string ("8-bit control code (0x80..0x9F)"),
d78bc582
KH
1823 Qnil, /* same as above */
1824 Qnil); /* same as above */
2e344af3
KH
1825 CHARSET_SYMBOL (CHARSET_8_BIT_CONTROL) = Qeight_bit_control;
1826 Fput (Qeight_bit_control, Qcharset,
1827 CHARSET_TABLE_ENTRY (CHARSET_8_BIT_CONTROL));
1828
1829 update_charset_table (make_number (CHARSET_8_BIT_GRAPHIC),
1830 make_number (1), make_number (96),
797a084a 1831 make_number (4),
2e344af3
KH
1832 make_number (0),
1833 make_number (-1),
1834 make_number (-1),
2e344af3 1835 build_string ("8-bit graphic char (0xA0..0xFF)"),
d78bc582
KH
1836 Qnil, /* same as above */
1837 Qnil); /* same as above */
2e344af3
KH
1838 CHARSET_SYMBOL (CHARSET_8_BIT_GRAPHIC) = Qeight_bit_graphic;
1839 Fput (Qeight_bit_graphic, Qcharset,
1840 CHARSET_TABLE_ENTRY (CHARSET_8_BIT_GRAPHIC));
1841
c1a08b4c
KH
1842 Qauto_fill_chars = intern ("auto-fill-chars");
1843 staticpro (&Qauto_fill_chars);
1844 Fput (Qauto_fill_chars, Qchar_table_extra_slots, make_number (0));
1845
4ed46869 1846 defsubr (&Sdefine_charset);
8a73a704 1847 defsubr (&Sgeneric_character_list);
3fac5a51 1848 defsubr (&Sget_unused_iso_final_char);
4ed46869
KH
1849 defsubr (&Sdeclare_equiv_charset);
1850 defsubr (&Sfind_charset_region);
1851 defsubr (&Sfind_charset_string);
1852 defsubr (&Smake_char_internal);
1853 defsubr (&Ssplit_char);
1854 defsubr (&Schar_charset);
90d7b74e 1855 defsubr (&Scharset_after);
4ed46869 1856 defsubr (&Siso_charset);
9d3d8cba 1857 defsubr (&Schar_valid_p);
d2665018 1858 defsubr (&Sunibyte_char_to_multibyte);
1bcc1567 1859 defsubr (&Smultibyte_char_to_unibyte);
4ed46869
KH
1860 defsubr (&Schar_bytes);
1861 defsubr (&Schar_width);
1862 defsubr (&Sstring_width);
1863 defsubr (&Schar_direction);
87b089ad 1864 defsubr (&Sstring);
4ed46869
KH
1865 defsubr (&Ssetup_special_charsets);
1866
1867 DEFVAR_LISP ("charset-list", &Vcharset_list,
fdb82f93 1868 doc: /* List of charsets ever defined. */);
2e344af3
KH
1869 Vcharset_list = Fcons (Qascii, Fcons (Qeight_bit_control,
1870 Fcons (Qeight_bit_graphic, Qnil)));
4ed46869 1871
537efd8d 1872 DEFVAR_LISP ("translation-table-vector", &Vtranslation_table_vector,
fdb82f93
PJ
1873 doc: /* Vector of cons cell of a symbol and translation table ever defined.
1874An ID of a translation table is an index of this vector. */);
537efd8d 1875 Vtranslation_table_vector = Fmake_vector (make_number (16), Qnil);
b0e3cf2b 1876
4ed46869 1877 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11,
fdb82f93 1878 doc: /* Leading-code of private TYPE9N charset of column-width 1. */);
4ed46869
KH
1879 leading_code_private_11 = LEADING_CODE_PRIVATE_11;
1880
1881 DEFVAR_INT ("leading-code-private-12", &leading_code_private_12,
fdb82f93 1882 doc: /* Leading-code of private TYPE9N charset of column-width 2. */);
4ed46869
KH
1883 leading_code_private_12 = LEADING_CODE_PRIVATE_12;
1884
1885 DEFVAR_INT ("leading-code-private-21", &leading_code_private_21,
fdb82f93 1886 doc: /* Leading-code of private TYPE9Nx9N charset of column-width 1. */);
4ed46869
KH
1887 leading_code_private_21 = LEADING_CODE_PRIVATE_21;
1888
1889 DEFVAR_INT ("leading-code-private-22", &leading_code_private_22,
fdb82f93 1890 doc: /* Leading-code of private TYPE9Nx9N charset of column-width 2. */);
4ed46869 1891 leading_code_private_22 = LEADING_CODE_PRIVATE_22;
35e623fb
RS
1892
1893 DEFVAR_INT ("nonascii-insert-offset", &nonascii_insert_offset,
fdb82f93
PJ
1894 doc: /* Offset for converting non-ASCII unibyte codes 0240...0377 to multibyte.
1895This is used for converting unibyte text to multibyte,
1896and for inserting character codes specified by number.
1897
1898This serves to convert a Latin-1 or similar 8-bit character code
1899to the corresponding Emacs multibyte character code.
1900Typically the value should be (- (make-char CHARSET 0) 128),
1901for your choice of character set.
1902If `nonascii-translation-table' is non-nil, it overrides this variable. */);
35e623fb 1903 nonascii_insert_offset = 0;
b0e3cf2b 1904
b4e9dd77 1905 DEFVAR_LISP ("nonascii-translation-table", &Vnonascii_translation_table,
fdb82f93
PJ
1906 doc: /* Translation table to convert non-ASCII unibyte codes to multibyte.
1907This is used for converting unibyte text to multibyte,
1908and for inserting character codes specified by number.
1909
1910Conversion is performed only when multibyte characters are enabled,
1911and it serves to convert a Latin-1 or similar 8-bit character code
1912to the corresponding Emacs character code.
1913
1914If this is nil, `nonascii-insert-offset' is used instead.
1915See also the docstring of `make-translation-table'. */);
b4e9dd77 1916 Vnonascii_translation_table = Qnil;
4cf9710d 1917
c1a08b4c 1918 DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars,
fdb82f93
PJ
1919 doc: /* A char-table for characters which invoke auto-filling.
1920Such characters have value t in this table. */);
c1a08b4c 1921 Vauto_fill_chars = Fmake_char_table (Qauto_fill_chars, Qnil);
60022cb7
AS
1922 CHAR_TABLE_SET (Vauto_fill_chars, make_number (' '), Qt);
1923 CHAR_TABLE_SET (Vauto_fill_chars, make_number ('\n'), Qt);
4ed46869
KH
1924}
1925
1926#endif /* emacs */
ab5796a9
MB
1927
1928/* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
1929 (do not change this comment) */