(openp): Initialized encoded_fn before GCPRO it.
[bpt/emacs.git] / src / charset.c
CommitLineData
75c8c592 1/* Basic multilingual character support.
0b5538bd 2 Copyright (C) 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
ce03bf76
KH
3 Copyright (C) 1995, 1997, 1998, 1999, 2000, 2001
4 National Institute of Advanced Industrial Science and Technology (AIST)
5 Registration Number H14PRO021
4ed46869 6
369314dc
KH
7This file is part of GNU Emacs.
8
9GNU Emacs is free software; you can redistribute it and/or modify
10it under the terms of the GNU General Public License as published by
11the Free Software Foundation; either version 2, or (at your option)
12any later version.
4ed46869 13
369314dc
KH
14GNU Emacs is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
4ed46869 18
369314dc
KH
19You should have received a copy of the GNU General Public License
20along with GNU Emacs; see the file COPYING. If not, write to
4fc5845f
LK
21the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
22Boston, MA 02110-1301, USA. */
4ed46869
KH
23
24/* At first, see the document in `charset.h' to understand the code in
25 this file. */
26
68c45bf0
PE
27#ifdef emacs
28#include <config.h>
29#endif
30
4ed46869
KH
31#include <stdio.h>
32
33#ifdef emacs
34
35#include <sys/types.h>
4ed46869
KH
36#include "lisp.h"
37#include "buffer.h"
38#include "charset.h"
3f62427c 39#include "composite.h"
4ed46869 40#include "coding.h"
fc6b09bf 41#include "disptab.h"
4ed46869
KH
42
43#else /* not emacs */
44
45#include "mulelib.h"
46
47#endif /* emacs */
48
2e344af3 49Lisp_Object Qcharset, Qascii, Qeight_bit_control, Qeight_bit_graphic;
0282eb69 50Lisp_Object Qunknown;
4ed46869
KH
51
52/* Declaration of special leading-codes. */
4bc26a6c
SM
53EMACS_INT leading_code_private_11; /* for private DIMENSION1 of 1-column */
54EMACS_INT leading_code_private_12; /* for private DIMENSION1 of 2-column */
55EMACS_INT leading_code_private_21; /* for private DIMENSION2 of 1-column */
56EMACS_INT leading_code_private_22; /* for private DIMENSION2 of 2-column */
4ed46869 57
2e344af3
KH
58/* Declaration of special charsets. The values are set by
59 Fsetup_special_charsets. */
4ed46869
KH
60int charset_latin_iso8859_1; /* ISO8859-1 (Latin-1) */
61int charset_jisx0208_1978; /* JISX0208.1978 (Japanese Kanji old set) */
62int charset_jisx0208; /* JISX0208.1983 (Japanese Kanji) */
63int charset_katakana_jisx0201; /* JISX0201.Kana (Japanese Katakana) */
64int charset_latin_jisx0201; /* JISX0201.Roman (Japanese Roman) */
65int charset_big5_1; /* Big5 Level 1 (Chinese Traditional) */
66int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */
c64582be
KH
67int charset_mule_unicode_0100_24ff;
68int charset_mule_unicode_2500_33ff;
69int charset_mule_unicode_e000_ffff;
4ed46869
KH
70
71Lisp_Object Qcharset_table;
72
73/* A char-table containing information of each character set. */
74Lisp_Object Vcharset_table;
75
76/* A vector of charset symbol indexed by charset-id. This is used
77 only for returning charset symbol from C functions. */
78Lisp_Object Vcharset_symbol_table;
79
80/* A list of charset symbols ever defined. */
81Lisp_Object Vcharset_list;
82
537efd8d
KH
83/* Vector of translation table ever defined.
84 ID of a translation table is used to index this vector. */
85Lisp_Object Vtranslation_table_vector;
b0e3cf2b 86
c1a08b4c
KH
87/* A char-table for characters which may invoke auto-filling. */
88Lisp_Object Vauto_fill_chars;
89
90Lisp_Object Qauto_fill_chars;
91
4ed46869
KH
92/* Tables used by macros BYTES_BY_CHAR_HEAD and WIDTH_BY_CHAR_HEAD. */
93int bytes_by_char_head[256];
94int width_by_char_head[256];
95
96/* Mapping table from ISO2022's charset (specified by DIMENSION,
97 CHARS, and FINAL-CHAR) to Emacs' charset. */
98int iso_charset_table[2][2][128];
99
100/* Variables used locally in the macro FETCH_MULTIBYTE_CHAR. */
101unsigned char *_fetch_multibyte_char_p;
102int _fetch_multibyte_char_len;
103
35e623fb 104/* Offset to add to a non-ASCII value when inserting it. */
31ade731 105EMACS_INT nonascii_insert_offset;
35e623fb 106
4cf9710d
RS
107/* Translation table for converting non-ASCII unibyte characters
108 to multibyte codes, or nil. */
b4e9dd77 109Lisp_Object Vnonascii_translation_table;
4cf9710d 110
8a73a704
KH
111/* List of all possible generic characters. */
112Lisp_Object Vgeneric_character_list;
113
046b1f03 114\f
93bcb785
KH
115void
116invalid_character (c)
117 int c;
118{
85416bda 119 error ("Invalid character: %d, #o%o, #x%x", c, c, c);
93bcb785
KH
120}
121
2e344af3
KH
122/* Parse string STR of length LENGTH and fetch information of a
123 character at STR. Set BYTES to the byte length the character
124 occupies, CHARSET, C1, C2 to proper values of the character. */
125
126#define SPLIT_MULTIBYTE_SEQ(str, length, bytes, charset, c1, c2) \
127 do { \
128 (c1) = *(str); \
129 (bytes) = BYTES_BY_CHAR_HEAD (c1); \
130 if ((bytes) == 1) \
131 (charset) = ASCII_BYTE_P (c1) ? CHARSET_ASCII : CHARSET_8_BIT_GRAPHIC; \
132 else if ((bytes) == 2) \
133 { \
134 if ((c1) == LEADING_CODE_8_BIT_CONTROL) \
135 (charset) = CHARSET_8_BIT_CONTROL, (c1) = (str)[1] - 0x20; \
136 else \
137 (charset) = (c1), (c1) = (str)[1] & 0x7F; \
138 } \
139 else if ((bytes) == 3) \
140 { \
141 if ((c1) < LEADING_CODE_PRIVATE_11) \
142 (charset) = (c1), (c1) = (str)[1] & 0x7F, (c2) = (str)[2] & 0x7F; \
143 else \
144 (charset) = (str)[1], (c1) = (str)[2] & 0x7F; \
145 } \
146 else \
147 (charset) = (str)[1], (c1) = (str)[2] & 0x7F, (c2) = (str)[3] & 0x7F; \
ac4137cc
KH
148 } while (0)
149
d37478d0
EZ
150/* 1 if CHARSET, C1, and C2 compose a valid character, else 0.
151 Note that this intentionally allows invalid components, such
152 as 0xA0 0xA0, because there exist many files that contain
153 such invalid byte sequences, especially in EUC-GB. */
44c6492d 154#define CHAR_COMPONENTS_VALID_P(charset, c1, c2) \
2e344af3 155 ((charset) == CHARSET_ASCII \
63f4d579 156 ? ((c1) >= 0 && (c1) <= 0x7F) \
2e344af3
KH
157 : ((charset) == CHARSET_8_BIT_CONTROL \
158 ? ((c1) >= 0x80 && (c1) <= 0x9F) \
159 : ((charset) == CHARSET_8_BIT_GRAPHIC \
160 ? ((c1) >= 0x80 && (c1) <= 0xFF) \
161 : (CHARSET_DIMENSION (charset) == 1 \
162 ? ((c1) >= 0x20 && (c1) <= 0x7F) \
163 : ((c1) >= 0x20 && (c1) <= 0x7F \
164 && (c2) >= 0x20 && (c2) <= 0x7F)))))
93bcb785 165
99529c2c
KH
166/* Store multi-byte form of the character C in STR. The caller should
167 allocate at least 4-byte area at STR in advance. Returns the
168 length of the multi-byte form. If C is an invalid character code,
12bcae05 169 return -1. */
4ed46869
KH
170
171int
12bcae05 172char_to_string_1 (c, str)
4ed46869 173 int c;
99529c2c 174 unsigned char *str;
4ed46869 175{
99529c2c
KH
176 unsigned char *p = str;
177
6662e69b 178 if (c & CHAR_MODIFIER_MASK) /* This includes the case C is negative. */
8ac5a9cc 179 {
6662e69b
KH
180 /* Multibyte character can't have a modifier bit. */
181 if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_MODIFIER_MASK)))
12bcae05 182 return -1;
6662e69b
KH
183
184 /* For Meta, Shift, and Control modifiers, we need special care. */
8ac5a9cc 185 if (c & CHAR_META)
6662e69b
KH
186 {
187 /* Move the meta bit to the right place for a string. */
188 c = (c & ~CHAR_META) | 0x80;
189 }
190 if (c & CHAR_SHIFT)
191 {
192 /* Shift modifier is valid only with [A-Za-z]. */
193 if ((c & 0377) >= 'A' && (c & 0377) <= 'Z')
194 c &= ~CHAR_SHIFT;
195 else if ((c & 0377) >= 'a' && (c & 0377) <= 'z')
196 c = (c & ~CHAR_SHIFT) - ('a' - 'A');
197 }
8ac5a9cc 198 if (c & CHAR_CTL)
6662e69b
KH
199 {
200 /* Simulate the code in lread.c. */
201 /* Allow `\C- ' and `\C-?'. */
202 if (c == (CHAR_CTL | ' '))
203 c = 0;
204 else if (c == (CHAR_CTL | '?'))
205 c = 127;
206 /* ASCII control chars are made from letters (both cases),
207 as well as the non-letters within 0100...0137. */
208 else if ((c & 0137) >= 0101 && (c & 0137) <= 0132)
209 c &= (037 | (~0177 & ~CHAR_CTL));
210 else if ((c & 0177) >= 0100 && (c & 0177) <= 0137)
211 c &= (037 | (~0177 & ~CHAR_CTL));
212 }
213
5ef834ea
KH
214 /* If C still has any modifier bits, just ignore it. */
215 c &= ~CHAR_MODIFIER_MASK;
2e344af3 216 }
177c0ea7 217
2e344af3
KH
218 if (SINGLE_BYTE_CHAR_P (c))
219 {
220 if (ASCII_BYTE_P (c) || c >= 0xA0)
221 *p++ = c;
222 else
223 {
224 *p++ = LEADING_CODE_8_BIT_CONTROL;
225 *p++ = c + 0x20;
226 }
8ac5a9cc 227 }
6ce974d4 228 else if (CHAR_VALID_P (c, 0))
4ed46869 229 {
ac4137cc 230 int charset, c1, c2;
4ed46869 231
2e344af3 232 SPLIT_CHAR (c, charset, c1, c2);
99529c2c
KH
233
234 if (charset >= LEADING_CODE_EXT_11)
235 *p++ = (charset < LEADING_CODE_EXT_12
236 ? LEADING_CODE_PRIVATE_11
237 : (charset < LEADING_CODE_EXT_21
238 ? LEADING_CODE_PRIVATE_12
239 : (charset < LEADING_CODE_EXT_22
240 ? LEADING_CODE_PRIVATE_21
241 : LEADING_CODE_PRIVATE_22)));
242 *p++ = charset;
05ffc44b 243 if ((c1 > 0 && c1 < 32) || (c2 > 0 && c2 < 32))
12bcae05 244 return -1;
99529c2c 245 if (c1)
4ed46869 246 {
99529c2c
KH
247 *p++ = c1 | 0x80;
248 if (c2 > 0)
249 *p++ = c2 | 0x80;
4ed46869
KH
250 }
251 }
2e344af3 252 else
12bcae05 253 return -1;
4ed46869 254
2e344af3 255 return (p - str);
4ed46869
KH
256}
257
12bcae05
GM
258
259/* Store multi-byte form of the character C in STR. The caller should
260 allocate at least 4-byte area at STR in advance. Returns the
261 length of the multi-byte form. If C is an invalid character code,
262 signal an error.
263
264 Use macro `CHAR_STRING (C, STR)' instead of calling this function
265 directly if C can be an ASCII character. */
266
267int
268char_to_string (c, str)
269 int c;
270 unsigned char *str;
271{
272 int len;
273 len = char_to_string_1 (c, str);
274 if (len == -1)
275 invalid_character (c);
276 return len;
277}
278
279
44c6492d
KH
280/* Return the non-ASCII character corresponding to multi-byte form at
281 STR of length LEN. If ACTUAL_LEN is not NULL, store the byte
282 length of the multibyte form in *ACTUAL_LEN.
537efd8d 283
99529c2c
KH
284 Use macros STRING_CHAR or STRING_CHAR_AND_LENGTH instead of calling
285 this function directly if you want ot handle ASCII characters as
286 well. */
4ed46869 287
dfcf069d 288int
99529c2c 289string_to_char (str, len, actual_len)
8867de67 290 const unsigned char *str;
ac4137cc 291 int len, *actual_len;
4ed46869 292{
ac4137cc 293 int c, bytes, charset, c1, c2;
4ed46869 294
ac4137cc
KH
295 SPLIT_MULTIBYTE_SEQ (str, len, bytes, charset, c1, c2);
296 c = MAKE_CHAR (charset, c1, c2);
4ed46869 297 if (actual_len)
ac4137cc 298 *actual_len = bytes;
4ed46869
KH
299 return c;
300}
301
44c6492d
KH
302/* Return the length of the multi-byte form at string STR of length LEN.
303 Use the macro MULTIBYTE_FORM_LENGTH instead. */
4ed46869
KH
304int
305multibyte_form_length (str, len)
8867de67 306 const unsigned char *str;
4ed46869
KH
307 int len;
308{
ac4137cc 309 int bytes;
4ed46869 310
ac4137cc 311 PARSE_MULTIBYTE_SEQ (str, len, bytes);
90d7b74e 312 return bytes;
4ed46869
KH
313}
314
ac4137cc
KH
315/* Check multibyte form at string STR of length LEN and set variables
316 pointed by CHARSET, C1, and C2 to charset and position codes of the
317 character at STR, and return 0. If there's no multibyte character,
4ed46869
KH
318 return -1. This should be used only in the macro SPLIT_STRING
319 which checks range of STR in advance. */
320
dfcf069d 321int
99529c2c 322split_string (str, len, charset, c1, c2)
ac4137cc
KH
323 const unsigned char *str;
324 unsigned char *c1, *c2;
325 int len, *charset;
4ed46869 326{
ac4137cc 327 register int bytes, cs, code1, code2 = -1;
4ed46869 328
ac4137cc
KH
329 SPLIT_MULTIBYTE_SEQ (str, len, bytes, cs, code1, code2);
330 if (cs == CHARSET_ASCII)
4ed46869 331 return -1;
ac4137cc
KH
332 *charset = cs;
333 *c1 = code1;
334 *c2 = code2;
5865af0d 335 return 0;
ac4137cc
KH
336}
337
44c6492d
KH
338/* Return 1 iff character C has valid printable glyph.
339 Use the macro CHAR_PRINTABLE_P instead. */
ac4137cc
KH
340int
341char_printable_p (c)
342 int c;
343{
8ebae00c 344 int charset, c1, c2;
ac4137cc 345
2e344af3 346 if (ASCII_BYTE_P (c))
ac4137cc 347 return 1;
2e344af3
KH
348 else if (SINGLE_BYTE_CHAR_P (c))
349 return 0;
350 else if (c >= MAX_CHAR)
99529c2c 351 return 0;
177c0ea7 352
2e344af3 353 SPLIT_CHAR (c, charset, c1, c2);
ac4137cc
KH
354 if (! CHARSET_DEFINED_P (charset))
355 return 0;
356 if (CHARSET_CHARS (charset) == 94
357 ? c1 <= 32 || c1 >= 127
358 : c1 < 32)
359 return 0;
360 if (CHARSET_DIMENSION (charset) == 2
361 && (CHARSET_CHARS (charset) == 94
362 ? c2 <= 32 || c2 >= 127
363 : c2 < 32))
364 return 0;
365 return 1;
4ed46869
KH
366}
367
537efd8d 368/* Translate character C by translation table TABLE. If C
b4e9dd77
KH
369 is negative, translate a character specified by CHARSET, C1, and C2
370 (C1 and C2 are code points of the character). If no translation is
371 found in TABLE, return C. */
dfcf069d 372int
b4e9dd77 373translate_char (table, c, charset, c1, c2)
23d2a7f1
KH
374 Lisp_Object table;
375 int c, charset, c1, c2;
376{
377 Lisp_Object ch;
378 int alt_charset, alt_c1, alt_c2, dimension;
379
0ad3f83d 380 if (c < 0) c = MAKE_CHAR (charset, (c1 & 0x7F) , (c2 & 0x7F));
23d2a7f1 381 if (!CHAR_TABLE_P (table)
ac4137cc 382 || (ch = Faref (table, make_number (c)), !NATNUMP (ch)))
23d2a7f1
KH
383 return c;
384
385 SPLIT_CHAR (XFASTINT (ch), alt_charset, alt_c1, alt_c2);
386 dimension = CHARSET_DIMENSION (alt_charset);
05ffc44b 387 if ((dimension == 1 && alt_c1 > 0) || (dimension == 2 && alt_c2 > 0))
23d2a7f1
KH
388 /* CH is not a generic character, just return it. */
389 return XFASTINT (ch);
390
391 /* Since CH is a generic character, we must return a specific
392 charater which has the same position codes as C from CH. */
393 if (charset < 0)
394 SPLIT_CHAR (c, charset, c1, c2);
395 if (dimension != CHARSET_DIMENSION (charset))
396 /* We can't make such a character because of dimension mismatch. */
397 return c;
23d2a7f1
KH
398 return MAKE_CHAR (alt_charset, c1, c2);
399}
400
d2665018 401/* Convert the unibyte character C to multibyte based on
b4e9dd77 402 Vnonascii_translation_table or nonascii_insert_offset. If they can't
d2665018
KH
403 convert C to a valid multibyte character, convert it based on
404 DEFAULT_NONASCII_INSERT_OFFSET which makes C a Latin-1 character. */
35e623fb 405
dfcf069d 406int
35e623fb
RS
407unibyte_char_to_multibyte (c)
408 int c;
409{
543b4f61 410 if (c < 0400 && c >= 0200)
35e623fb 411 {
d2665018
KH
412 int c_save = c;
413
b4e9dd77 414 if (! NILP (Vnonascii_translation_table))
bbf12bb3
KH
415 {
416 c = XINT (Faref (Vnonascii_translation_table, make_number (c)));
ac4137cc 417 if (c >= 0400 && ! char_valid_p (c, 0))
bbf12bb3
KH
418 c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
419 }
420 else if (c >= 0240 && nonascii_insert_offset > 0)
421 {
422 c += nonascii_insert_offset;
ac4137cc 423 if (c < 0400 || ! char_valid_p (c, 0))
bbf12bb3
KH
424 c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
425 }
426 else if (c >= 0240)
d2665018 427 c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
35e623fb
RS
428 }
429 return c;
430}
76d7b829
KH
431
432
433/* Convert the multibyte character C to unibyte 8-bit character based
434 on Vnonascii_translation_table or nonascii_insert_offset. If
435 REV_TBL is non-nil, it should be a reverse table of
436 Vnonascii_translation_table, i.e. what given by:
437 Fchar_table_extra_slot (Vnonascii_translation_table, make_number (0)) */
438
439int
440multibyte_char_to_unibyte (c, rev_tbl)
441 int c;
442 Lisp_Object rev_tbl;
443{
444 if (!SINGLE_BYTE_CHAR_P (c))
445 {
446 int c_save = c;
447
448 if (! CHAR_TABLE_P (rev_tbl)
449 && CHAR_TABLE_P (Vnonascii_translation_table))
450 rev_tbl = Fchar_table_extra_slot (Vnonascii_translation_table,
451 make_number (0));
452 if (CHAR_TABLE_P (rev_tbl))
453 {
454 Lisp_Object temp;
455 temp = Faref (rev_tbl, make_number (c));
456 if (INTEGERP (temp))
457 c = XINT (temp);
bbf12bb3
KH
458 if (c >= 256)
459 c = (c_save & 0177) + 0200;
460 }
461 else
462 {
463 if (nonascii_insert_offset > 0)
464 c -= nonascii_insert_offset;
465 if (c < 128 || c >= 256)
466 c = (c_save & 0177) + 0200;
76d7b829 467 }
76d7b829
KH
468 }
469
470 return c;
471}
472
35e623fb 473\f
4ed46869
KH
474/* Update the table Vcharset_table with the given arguments (see the
475 document of `define-charset' for the meaning of each argument).
476 Several other table contents are also updated. The caller should
477 check the validity of CHARSET-ID and the remaining arguments in
478 advance. */
479
480void
481update_charset_table (charset_id, dimension, chars, width, direction,
482 iso_final_char, iso_graphic_plane,
483 short_name, long_name, description)
484 Lisp_Object charset_id, dimension, chars, width, direction;
485 Lisp_Object iso_final_char, iso_graphic_plane;
486 Lisp_Object short_name, long_name, description;
487{
488 int charset = XINT (charset_id);
489 int bytes;
490 unsigned char leading_code_base, leading_code_ext;
491
6dc0722d
KH
492 if (NILP (CHARSET_TABLE_ENTRY (charset)))
493 CHARSET_TABLE_ENTRY (charset)
494 = Fmake_vector (make_number (CHARSET_MAX_IDX), Qnil);
4ed46869 495
d78bc582
KH
496 if (NILP (long_name))
497 long_name = short_name;
498 if (NILP (description))
499 description = long_name;
500
4ed46869
KH
501 /* Get byte length of multibyte form, base leading-code, and
502 extended leading-code of the charset. See the comment under the
503 title "GENERAL NOTE on CHARACTER SET (CHARSET)" in charset.h. */
504 bytes = XINT (dimension);
505 if (charset < MIN_CHARSET_PRIVATE_DIMENSION1)
506 {
507 /* Official charset, it doesn't have an extended leading-code. */
2e344af3 508 if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC)
4ed46869
KH
509 bytes += 1; /* For a base leading-code. */
510 leading_code_base = charset;
511 leading_code_ext = 0;
512 }
513 else
514 {
515 /* Private charset. */
516 bytes += 2; /* For base and extended leading-codes. */
517 leading_code_base
518 = (charset < LEADING_CODE_EXT_12
519 ? LEADING_CODE_PRIVATE_11
520 : (charset < LEADING_CODE_EXT_21
521 ? LEADING_CODE_PRIVATE_12
522 : (charset < LEADING_CODE_EXT_22
523 ? LEADING_CODE_PRIVATE_21
524 : LEADING_CODE_PRIVATE_22)));
525 leading_code_ext = charset;
c83ef371
KH
526 if (BYTES_BY_CHAR_HEAD (leading_code_base) != bytes)
527 error ("Invalid dimension for the charset-ID %d", charset);
528 }
6ef23ebb 529
4ed46869
KH
530 CHARSET_TABLE_INFO (charset, CHARSET_ID_IDX) = charset_id;
531 CHARSET_TABLE_INFO (charset, CHARSET_BYTES_IDX) = make_number (bytes);
532 CHARSET_TABLE_INFO (charset, CHARSET_DIMENSION_IDX) = dimension;
533 CHARSET_TABLE_INFO (charset, CHARSET_CHARS_IDX) = chars;
534 CHARSET_TABLE_INFO (charset, CHARSET_WIDTH_IDX) = width;
535 CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX) = direction;
536 CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_BASE_IDX)
537 = make_number (leading_code_base);
538 CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX)
539 = make_number (leading_code_ext);
540 CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX) = iso_final_char;
541 CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX)
542 = iso_graphic_plane;
543 CHARSET_TABLE_INFO (charset, CHARSET_SHORT_NAME_IDX) = short_name;
544 CHARSET_TABLE_INFO (charset, CHARSET_LONG_NAME_IDX) = long_name;
545 CHARSET_TABLE_INFO (charset, CHARSET_DESCRIPTION_IDX) = description;
546 CHARSET_TABLE_INFO (charset, CHARSET_PLIST_IDX) = Qnil;
547
548 {
549 /* If we have already defined a charset which has the same
550 DIMENSION, CHARS and ISO-FINAL-CHAR but the different
551 DIRECTION, we must update the entry REVERSE-CHARSET of both
552 charsets. If there's no such charset, the value of the entry
553 is set to nil. */
554 int i;
555
513ee442 556 for (i = 0; i <= MAX_CHARSET; i++)
4ed46869
KH
557 if (!NILP (CHARSET_TABLE_ENTRY (i)))
558 {
559 if (CHARSET_DIMENSION (i) == XINT (dimension)
560 && CHARSET_CHARS (i) == XINT (chars)
561 && CHARSET_ISO_FINAL_CHAR (i) == XINT (iso_final_char)
562 && CHARSET_DIRECTION (i) != XINT (direction))
563 {
564 CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
565 = make_number (i);
566 CHARSET_TABLE_INFO (i, CHARSET_REVERSE_CHARSET_IDX) = charset_id;
567 break;
568 }
569 }
513ee442 570 if (i > MAX_CHARSET)
4ed46869
KH
571 /* No such a charset. */
572 CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
573 = make_number (-1);
574 }
575
c83ef371 576 if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC
4ed46869
KH
577 && charset < MIN_CHARSET_PRIVATE_DIMENSION1)
578 {
c83ef371 579 bytes_by_char_head[leading_code_base] = bytes;
4ed46869
KH
580 width_by_char_head[leading_code_base] = XINT (width);
581
582 /* Update table emacs_code_class. */
583 emacs_code_class[charset] = (bytes == 2
584 ? EMACS_leading_code_2
585 : (bytes == 3
586 ? EMACS_leading_code_3
587 : EMACS_leading_code_4));
588 }
589
590 /* Update table iso_charset_table. */
52e386c2 591 if (XINT (iso_final_char) >= 0
2e344af3 592 && ISO_CHARSET_TABLE (dimension, chars, iso_final_char) < 0)
4ed46869
KH
593 ISO_CHARSET_TABLE (dimension, chars, iso_final_char) = charset;
594}
595
596#ifdef emacs
597
598/* Return charset id of CHARSET_SYMBOL, or return -1 if CHARSET_SYMBOL
599 is invalid. */
600int
601get_charset_id (charset_symbol)
602 Lisp_Object charset_symbol;
603{
604 Lisp_Object val;
605 int charset;
606
1a45ff10
EZ
607 /* This originally used a ?: operator, but reportedly the HP-UX
608 compiler version HP92453-01 A.10.32.22 miscompiles that. */
609 if (SYMBOLP (charset_symbol)
610 && VECTORP (val = Fget (charset_symbol, Qcharset))
611 && CHARSET_VALID_P (charset =
612 XINT (XVECTOR (val)->contents[CHARSET_ID_IDX])))
613 return charset;
614 else
615 return -1;
4ed46869
KH
616}
617
618/* Return an identification number for a new private charset of
619 DIMENSION and WIDTH. If there's no more room for the new charset,
620 return 0. */
621Lisp_Object
622get_new_private_charset_id (dimension, width)
623 int dimension, width;
624{
625 int charset, from, to;
626
627 if (dimension == 1)
628 {
3bb7b08b
KH
629 from = LEADING_CODE_EXT_11;
630 to = LEADING_CODE_EXT_21;
4ed46869
KH
631 }
632 else
633 {
3bb7b08b
KH
634 from = LEADING_CODE_EXT_21;
635 to = LEADING_CODE_EXT_MAX + 1;
4ed46869
KH
636 }
637
638 for (charset = from; charset < to; charset++)
639 if (!CHARSET_DEFINED_P (charset)) break;
640
641 return make_number (charset < to ? charset : 0);
642}
643
644DEFUN ("define-charset", Fdefine_charset, Sdefine_charset, 3, 3, 0,
fdb82f93
PJ
645 doc: /* Define CHARSET-ID as the identification number of CHARSET with INFO-VECTOR.
646If CHARSET-ID is nil, it is decided automatically, which means CHARSET is
647 treated as a private charset.
648INFO-VECTOR is a vector of the format:
649 [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE
650 SHORT-NAME LONG-NAME DESCRIPTION]
651The meanings of each elements is as follows:
652DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.
653CHARS (integer) is the number of characters in a dimension: 94 or 96.
654WIDTH (integer) is the number of columns a character in the charset
655occupies on the screen: one of 0, 1, and 2.
656
657DIRECTION (integer) is the rendering direction of characters in the
658charset when rendering. If 0, render from left to right, else
659render from right to left.
660
661ISO-FINAL-CHAR (character) is the final character of the
662corresponding ISO 2022 charset.
663It may be -1 if the charset is internal use only.
664
665ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked
666while encoding to variants of ISO 2022 coding system, one of the
667following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).
668It may be -1 if the charset is internal use only.
669
670SHORT-NAME (string) is the short name to refer to the charset.
671
672LONG-NAME (string) is the long name to refer to the charset.
673
674DESCRIPTION (string) is the description string of the charset. */)
675 (charset_id, charset_symbol, info_vector)
4ed46869
KH
676 Lisp_Object charset_id, charset_symbol, info_vector;
677{
678 Lisp_Object *vec;
679
680 if (!NILP (charset_id))
b7826503
PJ
681 CHECK_NUMBER (charset_id);
682 CHECK_SYMBOL (charset_symbol);
683 CHECK_VECTOR (info_vector);
4ed46869
KH
684
685 if (! NILP (charset_id))
686 {
687 if (! CHARSET_VALID_P (XINT (charset_id)))
688 error ("Invalid CHARSET: %d", XINT (charset_id));
689 else if (CHARSET_DEFINED_P (XINT (charset_id)))
690 error ("Already defined charset: %d", XINT (charset_id));
691 }
692
693 vec = XVECTOR (info_vector)->contents;
694 if (XVECTOR (info_vector)->size != 9
695 || !INTEGERP (vec[0]) || !(XINT (vec[0]) == 1 || XINT (vec[0]) == 2)
696 || !INTEGERP (vec[1]) || !(XINT (vec[1]) == 94 || XINT (vec[1]) == 96)
697 || !INTEGERP (vec[2]) || !(XINT (vec[2]) == 1 || XINT (vec[2]) == 2)
698 || !INTEGERP (vec[3]) || !(XINT (vec[3]) == 0 || XINT (vec[3]) == 1)
2e344af3 699 || !INTEGERP (vec[4])
05ffc44b 700 || !(XINT (vec[4]) == -1 || (XINT (vec[4]) >= '0' && XINT (vec[4]) <= '~'))
2e344af3
KH
701 || !INTEGERP (vec[5])
702 || !(XINT (vec[5]) == -1 || XINT (vec[5]) == 0 || XINT (vec[5]) == 1)
4ed46869
KH
703 || !STRINGP (vec[6])
704 || !STRINGP (vec[7])
705 || !STRINGP (vec[8]))
706 error ("Invalid info-vector argument for defining charset %s",
d5db4077 707 SDATA (SYMBOL_NAME (charset_symbol)));
4ed46869
KH
708
709 if (NILP (charset_id))
710 {
711 charset_id = get_new_private_charset_id (XINT (vec[0]), XINT (vec[2]));
712 if (XINT (charset_id) == 0)
713 error ("There's no room for a new private charset %s",
d5db4077 714 SDATA (SYMBOL_NAME (charset_symbol)));
4ed46869
KH
715 }
716
717 update_charset_table (charset_id, vec[0], vec[1], vec[2], vec[3],
718 vec[4], vec[5], vec[6], vec[7], vec[8]);
6dc0722d 719 Fput (charset_symbol, Qcharset, CHARSET_TABLE_ENTRY (XINT (charset_id)));
4ed46869
KH
720 CHARSET_SYMBOL (XINT (charset_id)) = charset_symbol;
721 Vcharset_list = Fcons (charset_symbol, Vcharset_list);
538482ab 722 Fupdate_coding_systems_internal ();
4ed46869
KH
723 return Qnil;
724}
725
8a73a704
KH
726DEFUN ("generic-character-list", Fgeneric_character_list,
727 Sgeneric_character_list, 0, 0, 0,
fdb82f93
PJ
728 doc: /* Return a list of all possible generic characters.
729It includes a generic character for a charset not yet defined. */)
730 ()
8a73a704
KH
731{
732 return Vgeneric_character_list;
733}
734
3fac5a51
KH
735DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
736 Sget_unused_iso_final_char, 2, 2, 0,
bc001814 737 doc: /* Return an unused ISO's final char for a charset of DIMENSION and CHARS.
fdb82f93
PJ
738DIMENSION is the number of bytes to represent a character: 1 or 2.
739CHARS is the number of characters in a dimension: 94 or 96.
740
741This final char is for private use, thus the range is `0' (48) .. `?' (63).
742If there's no unused final char for the specified kind of charset,
743return nil. */)
744 (dimension, chars)
3fac5a51
KH
745 Lisp_Object dimension, chars;
746{
747 int final_char;
748
b7826503
PJ
749 CHECK_NUMBER (dimension);
750 CHECK_NUMBER (chars);
3fac5a51
KH
751 if (XINT (dimension) != 1 && XINT (dimension) != 2)
752 error ("Invalid charset dimension %d, it should be 1 or 2",
753 XINT (dimension));
754 if (XINT (chars) != 94 && XINT (chars) != 96)
755 error ("Invalid charset chars %d, it should be 94 or 96",
756 XINT (chars));
757 for (final_char = '0'; final_char <= '?'; final_char++)
758 {
759 if (ISO_CHARSET_TABLE (dimension, chars, make_number (final_char)) < 0)
760 break;
761 }
762 return (final_char <= '?' ? make_number (final_char) : Qnil);
763}
764
4ed46869
KH
765DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
766 4, 4, 0,
74ac5074
KH
767 doc: /* Declare an equivalent charset for ISO-2022 decoding.
768
769On decoding by an ISO-2022 base coding system, when a charset
770specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
771if CHARSET is designated instead. */)
772 (dimension, chars, final_char, charset)
773 Lisp_Object dimension, chars, final_char, charset;
4ed46869 774{
74ac5074 775 int charset_id;
4ed46869 776
b7826503
PJ
777 CHECK_NUMBER (dimension);
778 CHECK_NUMBER (chars);
779 CHECK_NUMBER (final_char);
74ac5074 780 CHECK_SYMBOL (charset);
4ed46869
KH
781
782 if (XINT (dimension) != 1 && XINT (dimension) != 2)
783 error ("Invalid DIMENSION %d, it should be 1 or 2", XINT (dimension));
784 if (XINT (chars) != 94 && XINT (chars) != 96)
785 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
786 if (XINT (final_char) < '0' || XFASTINT (final_char) > '~')
787 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
74ac5074
KH
788 if ((charset_id = get_charset_id (charset)) < 0)
789 error ("Invalid charset %s", SDATA (SYMBOL_NAME (charset)));
4ed46869 790
74ac5074 791 ISO_CHARSET_TABLE (dimension, chars, final_char) = charset_id;
4ed46869
KH
792 return Qnil;
793}
794
2e344af3
KH
795/* Return information about charsets in the text at PTR of NBYTES
796 bytes, which are NCHARS characters. The value is:
f6302ac9 797
cfe34140 798 0: Each character is represented by one byte. This is always
f6302ac9 799 true for unibyte text.
2e344af3
KH
800 1: No charsets other than ascii eight-bit-control,
801 eight-bit-graphic, and latin-1 are found.
802 2: Otherwise.
1d67c29b 803
2e344af3
KH
804 In addition, if CHARSETS is nonzero, for each found charset N, set
805 CHARSETS[N] to 1. For that, callers should allocate CHARSETS
806 (MAX_CHARSET + 1 elements) in advance. It may lookup a translation
807 table TABLE if supplied. For invalid charsets, set CHARSETS[1] to
808 1 (note that there's no charset whose ID is 1). */
4ed46869
KH
809
810int
2e344af3 811find_charset_in_text (ptr, nchars, nbytes, charsets, table)
fb4f7f50 812 const unsigned char *ptr;
2e344af3 813 int nchars, nbytes, *charsets;
23d2a7f1 814 Lisp_Object table;
4ed46869 815{
2e344af3 816 if (nchars == nbytes)
0282eb69 817 {
2e344af3 818 if (charsets && nbytes > 0)
0282eb69 819 {
fb4f7f50 820 const unsigned char *endp = ptr + nbytes;
2e344af3
KH
821 int maskbits = 0;
822
823 while (ptr < endp && maskbits != 7)
824 {
825 maskbits |= (*ptr < 0x80 ? 1 : *ptr < 0xA0 ? 2 : 4);
826 ptr++;
177c0ea7 827 }
2e344af3
KH
828
829 if (maskbits & 1)
830 charsets[CHARSET_ASCII] = 1;
831 if (maskbits & 2)
832 charsets[CHARSET_8_BIT_CONTROL] = 1;
833 if (maskbits & 4)
834 charsets[CHARSET_8_BIT_GRAPHIC] = 1;
0282eb69 835 }
2e344af3 836 return 0;
0282eb69 837 }
2e344af3 838 else
4ed46869 839 {
2e344af3 840 int return_val = 1;
99529c2c 841 int bytes, charset, c1, c2;
05505664 842
2e344af3
KH
843 if (! CHAR_TABLE_P (table))
844 table = Qnil;
05505664 845
2e344af3 846 while (nchars-- > 0)
23d2a7f1 847 {
2e344af3
KH
848 SPLIT_MULTIBYTE_SEQ (ptr, len, bytes, charset, c1, c2);
849 ptr += bytes;
4ed46869 850
2e344af3
KH
851 if (!CHARSET_DEFINED_P (charset))
852 charset = 1;
853 else if (! NILP (table))
854 {
855 int c = translate_char (table, -1, charset, c1, c2);
856 if (c >= 0)
857 charset = CHAR_CHARSET (c);
858 }
859
860 if (return_val == 1
861 && charset != CHARSET_ASCII
862 && charset != CHARSET_8_BIT_CONTROL
863 && charset != CHARSET_8_BIT_GRAPHIC
864 && charset != charset_latin_iso8859_1)
865 return_val = 2;
866
867 if (charsets)
868 charsets[charset] = 1;
869 else if (return_val == 2)
870 break;
4ed46869 871 }
2e344af3 872 return return_val;
4ed46869 873 }
4ed46869
KH
874}
875
876DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
23d2a7f1 877 2, 3, 0,
fdb82f93
PJ
878 doc: /* Return a list of charsets in the region between BEG and END.
879BEG and END are buffer positions.
880Optional arg TABLE if non-nil is a translation table to look up.
881
882If the region contains invalid multibyte characters,
883`unknown' is included in the returned list.
884
885If the current buffer is unibyte, the returned list may contain
886only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
887 (beg, end, table)
23d2a7f1 888 Lisp_Object beg, end, table;
4ed46869 889{
028d516b 890 int charsets[MAX_CHARSET + 1];
6ae1f27e 891 int from, from_byte, to, stop, stop_byte, i;
4ed46869
KH
892 Lisp_Object val;
893
894 validate_region (&beg, &end);
895 from = XFASTINT (beg);
896 stop = to = XFASTINT (end);
6ae1f27e 897
4ed46869 898 if (from < GPT && GPT < to)
6ae1f27e
RS
899 {
900 stop = GPT;
901 stop_byte = GPT_BYTE;
902 }
903 else
904 stop_byte = CHAR_TO_BYTE (stop);
905
906 from_byte = CHAR_TO_BYTE (from);
907
028d516b 908 bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
4ed46869
KH
909 while (1)
910 {
2e344af3
KH
911 find_charset_in_text (BYTE_POS_ADDR (from_byte), stop - from,
912 stop_byte - from_byte, charsets, table);
4ed46869 913 if (stop < to)
6ae1f27e
RS
914 {
915 from = stop, from_byte = stop_byte;
916 stop = to, stop_byte = CHAR_TO_BYTE (stop);
917 }
4ed46869
KH
918 else
919 break;
920 }
6ae1f27e 921
4ed46869 922 val = Qnil;
2e344af3 923 if (charsets[1])
0282eb69 924 val = Fcons (Qunknown, val);
2e344af3
KH
925 for (i = MAX_CHARSET; i >= MIN_CHARSET_OFFICIAL_DIMENSION1; i--)
926 if (charsets[i])
927 val = Fcons (CHARSET_SYMBOL (i), val);
928 if (charsets[0])
929 val = Fcons (Qascii, val);
4ed46869
KH
930 return val;
931}
932
933DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
23d2a7f1 934 1, 2, 0,
fdb82f93
PJ
935 doc: /* Return a list of charsets in STR.
936Optional arg TABLE if non-nil is a translation table to look up.
937
938If the string contains invalid multibyte characters,
939`unknown' is included in the returned list.
940
941If STR is unibyte, the returned list may contain
942only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
943 (str, table)
23d2a7f1 944 Lisp_Object str, table;
4ed46869 945{
a29e3b1b 946 int charsets[MAX_CHARSET + 1];
4ed46869
KH
947 int i;
948 Lisp_Object val;
949
b7826503 950 CHECK_STRING (str);
87b089ad 951
a29e3b1b 952 bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
d5db4077
KR
953 find_charset_in_text (SDATA (str), SCHARS (str),
954 SBYTES (str), charsets, table);
2e344af3 955
4ed46869 956 val = Qnil;
2e344af3 957 if (charsets[1])
0282eb69 958 val = Fcons (Qunknown, val);
2e344af3
KH
959 for (i = MAX_CHARSET; i >= MIN_CHARSET_OFFICIAL_DIMENSION1; i--)
960 if (charsets[i])
961 val = Fcons (CHARSET_SYMBOL (i), val);
962 if (charsets[0])
963 val = Fcons (Qascii, val);
4ed46869
KH
964 return val;
965}
2e344af3 966
4ed46869
KH
967\f
968DEFUN ("make-char-internal", Fmake_char_internal, Smake_char_internal, 1, 3, 0,
fdb82f93
PJ
969 doc: /* Return a character made from arguments.
970Internal use only. */)
971 (charset, code1, code2)
4ed46869
KH
972 Lisp_Object charset, code1, code2;
973{
ac4137cc
KH
974 int charset_id, c1, c2;
975
b7826503 976 CHECK_NUMBER (charset);
ac4137cc
KH
977 charset_id = XINT (charset);
978 if (!CHARSET_DEFINED_P (charset_id))
979 error ("Invalid charset ID: %d", XINT (charset));
4ed46869
KH
980
981 if (NILP (code1))
ac4137cc 982 c1 = 0;
4ed46869 983 else
ac4137cc 984 {
b7826503 985 CHECK_NUMBER (code1);
ac4137cc
KH
986 c1 = XINT (code1);
987 }
4ed46869 988 if (NILP (code2))
ac4137cc 989 c2 = 0;
4ed46869 990 else
ac4137cc 991 {
b7826503 992 CHECK_NUMBER (code2);
ac4137cc
KH
993 c2 = XINT (code2);
994 }
4ed46869 995
2e344af3
KH
996 if (charset_id == CHARSET_ASCII)
997 {
998 if (c1 < 0 || c1 > 0x7F)
999 goto invalid_code_posints;
1000 return make_number (c1);
1001 }
1002 else if (charset_id == CHARSET_8_BIT_CONTROL)
1003 {
30736012
KH
1004 if (NILP (code1))
1005 c1 = 0x80;
1006 else if (c1 < 0x80 || c1 > 0x9F)
2e344af3
KH
1007 goto invalid_code_posints;
1008 return make_number (c1);
1009 }
1010 else if (charset_id == CHARSET_8_BIT_GRAPHIC)
1011 {
30736012
KH
1012 if (NILP (code1))
1013 c1 = 0xA0;
1014 else if (c1 < 0xA0 || c1 > 0xFF)
2e344af3
KH
1015 goto invalid_code_posints;
1016 return make_number (c1);
1017 }
1018 else if (c1 < 0 || c1 > 0xFF || c2 < 0 || c2 > 0xFF)
1019 goto invalid_code_posints;
ac4137cc
KH
1020 c1 &= 0x7F;
1021 c2 &= 0x7F;
1022 if (c1 == 0
1023 ? c2 != 0
1024 : (c2 == 0
87f67317
KR
1025 ? !CHAR_COMPONENTS_VALID_P (charset_id, c1, 0x20)
1026 : !CHAR_COMPONENTS_VALID_P (charset_id, c1, c2)))
2e344af3 1027 goto invalid_code_posints;
ac4137cc 1028 return make_number (MAKE_CHAR (charset_id, c1, c2));
2e344af3
KH
1029
1030 invalid_code_posints:
1031 error ("Invalid code points for charset ID %d: %d %d", charset_id, c1, c2);
4ed46869
KH
1032}
1033
1034DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
bc001814
JB
1035 doc: /* Return list of charset and one or two position-codes of CH.
1036If CH is invalid as a character code,
1037return a list of symbol `unknown' and CH. */)
fdb82f93 1038 (ch)
4ed46869
KH
1039 Lisp_Object ch;
1040{
0282eb69 1041 int c, charset, c1, c2;
4ed46869 1042
b7826503 1043 CHECK_NUMBER (ch);
0282eb69
KH
1044 c = XFASTINT (ch);
1045 if (!CHAR_VALID_P (c, 1))
1046 return Fcons (Qunknown, Fcons (ch, Qnil));
4ed46869 1047 SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
6dc0722d 1048 return (c2 >= 0
4ed46869
KH
1049 ? Fcons (CHARSET_SYMBOL (charset),
1050 Fcons (make_number (c1), Fcons (make_number (c2), Qnil)))
1051 : Fcons (CHARSET_SYMBOL (charset), Fcons (make_number (c1), Qnil)));
1052}
1053
1054DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
bc001814 1055 doc: /* Return charset of CH. */)
fdb82f93 1056 (ch)
4ed46869
KH
1057 Lisp_Object ch;
1058{
b7826503 1059 CHECK_NUMBER (ch);
4ed46869
KH
1060
1061 return CHARSET_SYMBOL (CHAR_CHARSET (XINT (ch)));
1062}
1063
90d7b74e 1064DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
fdb82f93
PJ
1065 doc: /* Return charset of a character in the current buffer at position POS.
1066If POS is nil, it defauls to the current point.
1067If POS is out of range, the value is nil. */)
1068 (pos)
90d7b74e
KH
1069 Lisp_Object pos;
1070{
2e344af3
KH
1071 Lisp_Object ch;
1072 int charset;
ac4137cc 1073
2e344af3
KH
1074 ch = Fchar_after (pos);
1075 if (! INTEGERP (ch))
1076 return ch;
1077 charset = CHAR_CHARSET (XINT (ch));
90d7b74e
KH
1078 return CHARSET_SYMBOL (charset);
1079}
1080
4ed46869 1081DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
fdb82f93
PJ
1082 doc: /* Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
1083
1084ISO 2022's designation sequence (escape sequence) distinguishes charsets
1085by their DIMENSION, CHARS, and FINAL-CHAR,
1086where as Emacs distinguishes them by charset symbol.
1087See the documentation of the function `charset-info' for the meanings of
1088DIMENSION, CHARS, and FINAL-CHAR. */)
1089 (dimension, chars, final_char)
4ed46869
KH
1090 Lisp_Object dimension, chars, final_char;
1091{
1092 int charset;
1093
b7826503
PJ
1094 CHECK_NUMBER (dimension);
1095 CHECK_NUMBER (chars);
1096 CHECK_NUMBER (final_char);
4ed46869
KH
1097
1098 if ((charset = ISO_CHARSET_TABLE (dimension, chars, final_char)) < 0)
1099 return Qnil;
1100 return CHARSET_SYMBOL (charset);
1101}
1102
9d3d8cba
KH
1103/* If GENERICP is nonzero, return nonzero iff C is a valid normal or
1104 generic character. If GENERICP is zero, return nonzero iff C is a
1105 valid normal character. Do not call this function directly,
1106 instead use macro CHAR_VALID_P. */
1107int
1108char_valid_p (c, genericp)
1109 int c, genericp;
1110{
1111 int charset, c1, c2;
1112
0e235b7e 1113 if (c < 0 || c >= MAX_CHAR)
9d3d8cba
KH
1114 return 0;
1115 if (SINGLE_BYTE_CHAR_P (c))
1116 return 1;
2e344af3 1117 SPLIT_CHAR (c, charset, c1, c2);
ac4137cc
KH
1118 if (genericp)
1119 {
1120 if (c1)
1121 {
1122 if (c2 <= 0) c2 = 0x20;
1123 }
1124 else
1125 {
1126 if (c2 <= 0) c1 = c2 = 0x20;
1127 }
1128 }
1129 return (CHARSET_DEFINED_P (charset)
44c6492d 1130 && CHAR_COMPONENTS_VALID_P (charset, c1, c2));
9d3d8cba
KH
1131}
1132
1133DEFUN ("char-valid-p", Fchar_valid_p, Schar_valid_p, 1, 2, 0,
fdb82f93
PJ
1134 doc: /* Return t if OBJECT is a valid normal character.
1135If optional arg GENERICP is non-nil, also return t if OBJECT is
1136a valid generic character. */)
1137 (object, genericp)
9d3d8cba
KH
1138 Lisp_Object object, genericp;
1139{
1140 if (! NATNUMP (object))
1141 return Qnil;
1142 return (CHAR_VALID_P (XFASTINT (object), !NILP (genericp)) ? Qt : Qnil);
1143}
1144
d2665018
KH
1145DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte,
1146 Sunibyte_char_to_multibyte, 1, 1, 0,
fdb82f93
PJ
1147 doc: /* Convert the unibyte character CH to multibyte character.
1148The conversion is done based on `nonascii-translation-table' (which see)
1149 or `nonascii-insert-offset' (which see). */)
1150 (ch)
d2665018
KH
1151 Lisp_Object ch;
1152{
1153 int c;
1154
b7826503 1155 CHECK_NUMBER (ch);
d2665018
KH
1156 c = XINT (ch);
1157 if (c < 0 || c >= 0400)
1158 error ("Invalid unibyte character: %d", c);
1159 c = unibyte_char_to_multibyte (c);
1160 if (c < 0)
1161 error ("Can't convert to multibyte character: %d", XINT (ch));
1162 return make_number (c);
1163}
1164
1bcc1567
RS
1165DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte,
1166 Smultibyte_char_to_unibyte, 1, 1, 0,
fdb82f93
PJ
1167 doc: /* Convert the multibyte character CH to unibyte character.
1168The conversion is done based on `nonascii-translation-table' (which see)
1169 or `nonascii-insert-offset' (which see). */)
1170 (ch)
1bcc1567
RS
1171 Lisp_Object ch;
1172{
1173 int c;
1174
b7826503 1175 CHECK_NUMBER (ch);
1bcc1567 1176 c = XINT (ch);
ac4137cc 1177 if (! CHAR_VALID_P (c, 0))
1bcc1567
RS
1178 error ("Invalid multibyte character: %d", c);
1179 c = multibyte_char_to_unibyte (c, Qnil);
1180 if (c < 0)
1181 error ("Can't convert to unibyte character: %d", XINT (ch));
1182 return make_number (c);
1183}
1184
4ed46869 1185DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
bc001814 1186 doc: /* Return 1 regardless of the argument CH. */)
fdb82f93 1187 (ch)
4ed46869
KH
1188 Lisp_Object ch;
1189{
b7826503 1190 CHECK_NUMBER (ch);
9b6a601f
KH
1191 return make_number (1);
1192}
1193
1194/* Return how many bytes C will occupy in a multibyte buffer.
1195 Don't call this function directly, instead use macro CHAR_BYTES. */
1196int
1197char_bytes (c)
1198 int c;
1199{
99529c2c 1200 int charset;
9b6a601f 1201
2e344af3
KH
1202 if (ASCII_BYTE_P (c) || (c & ~((1 << CHARACTERBITS) -1)))
1203 return 1;
1204 if (SINGLE_BYTE_CHAR_P (c) && c >= 0xA0)
8ac5a9cc
KH
1205 return 1;
1206
99529c2c
KH
1207 charset = CHAR_CHARSET (c);
1208 return (CHARSET_DEFINED_P (charset) ? CHARSET_BYTES (charset) : 1);
4ed46869
KH
1209}
1210
1211/* Return the width of character of which multi-byte form starts with
1212 C. The width is measured by how many columns occupied on the
1213 screen when displayed in the current buffer. */
1214
1215#define ONE_BYTE_CHAR_WIDTH(c) \
1216 (c < 0x20 \
1217 ? (c == '\t' \
53316e55 1218 ? XFASTINT (current_buffer->tab_width) \
4ed46869
KH
1219 : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2))) \
1220 : (c < 0x7f \
1221 ? 1 \
1222 : (c == 0x7F \
1223 ? (NILP (current_buffer->ctl_arrow) ? 4 : 2) \
1224 : ((! NILP (current_buffer->enable_multibyte_characters) \
1225 && BASE_LEADING_CODE_P (c)) \
1226 ? WIDTH_BY_CHAR_HEAD (c) \
b4e9dd77 1227 : 4))))
4ed46869
KH
1228
1229DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
bc001814 1230 doc: /* Return width of CH when displayed in the current buffer.
fdb82f93
PJ
1231The width is measured by how many columns it occupies on the screen.
1232Tab is taken to occupy `tab-width' columns. */)
1233 (ch)
1234 Lisp_Object ch;
4ed46869 1235{
859f2b3c 1236 Lisp_Object val, disp;
4ed46869 1237 int c;
51c4025f 1238 struct Lisp_Char_Table *dp = buffer_display_table ();
4ed46869 1239
b7826503 1240 CHECK_NUMBER (ch);
4ed46869 1241
859f2b3c
RS
1242 c = XINT (ch);
1243
1244 /* Get the way the display table would display it. */
51c4025f 1245 disp = dp ? DISP_CHAR_VECTOR (dp, c) : Qnil;
859f2b3c
RS
1246
1247 if (VECTORP (disp))
1248 XSETINT (val, XVECTOR (disp)->size);
1249 else if (SINGLE_BYTE_CHAR_P (c))
1250 XSETINT (val, ONE_BYTE_CHAR_WIDTH (c));
4ed46869
KH
1251 else
1252 {
1253 int charset = CHAR_CHARSET (c);
1254
1255 XSETFASTINT (val, CHARSET_WIDTH (charset));
1256 }
1257 return val;
1258}
1259
1260/* Return width of string STR of length LEN when displayed in the
1261 current buffer. The width is measured by how many columns it
1262 occupies on the screen. */
859f2b3c 1263
4ed46869
KH
1264int
1265strwidth (str, len)
1266 unsigned char *str;
1267 int len;
1268{
beeedaad
KH
1269 return c_string_width (str, len, -1, NULL, NULL);
1270}
1271
1272/* Return width of string STR of length LEN when displayed in the
1273 current buffer. The width is measured by how many columns it
1274 occupies on the screen. If PRECISION > 0, return the width of
1275 longest substring that doesn't exceed PRECISION, and set number of
1276 characters and bytes of the substring in *NCHARS and *NBYTES
1277 respectively. */
1278
1279int
1280c_string_width (str, len, precision, nchars, nbytes)
fb4f7f50 1281 const unsigned char *str;
020ea113 1282 int len, precision, *nchars, *nbytes;
beeedaad
KH
1283{
1284 int i = 0, i_byte = 0;
4ed46869 1285 int width = 0;
beeedaad 1286 int chars;
c4a4e28f 1287 struct Lisp_Char_Table *dp = buffer_display_table ();
4ed46869 1288
beeedaad 1289 while (i_byte < len)
859f2b3c 1290 {
beeedaad
KH
1291 int bytes, thiswidth;
1292 Lisp_Object val;
859f2b3c 1293
99529c2c 1294 if (dp)
beeedaad
KH
1295 {
1296 int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
1297
1298 chars = 1;
1299 val = DISP_CHAR_VECTOR (dp, c);
1300 if (VECTORP (val))
1301 thiswidth = XVECTOR (val)->size;
1302 else
1303 thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1304 }
859f2b3c 1305 else
beeedaad
KH
1306 {
1307 chars = 1;
1308 PARSE_MULTIBYTE_SEQ (str + i_byte, len - i_byte, bytes);
1309 thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1310 }
99529c2c 1311
beeedaad
KH
1312 if (precision > 0
1313 && (width + thiswidth > precision))
1314 {
1315 *nchars = i;
1316 *nbytes = i_byte;
1317 return width;
1318 }
1319 i++;
1320 i_byte += bytes;
1321 width += thiswidth;
1322 }
99529c2c 1323
beeedaad
KH
1324 if (precision > 0)
1325 {
1326 *nchars = i;
1327 *nbytes = i_byte;
859f2b3c 1328 }
beeedaad 1329
4ed46869
KH
1330 return width;
1331}
1332
beeedaad
KH
1333/* Return width of Lisp string STRING when displayed in the current
1334 buffer. The width is measured by how many columns it occupies on
1335 the screen while paying attention to compositions. If PRECISION >
1336 0, return the width of longest substring that doesn't exceed
1337 PRECISION, and set number of characters and bytes of the substring
1338 in *NCHARS and *NBYTES respectively. */
1339
3f62427c 1340int
beeedaad
KH
1341lisp_string_width (string, precision, nchars, nbytes)
1342 Lisp_Object string;
1343 int precision, *nchars, *nbytes;
3f62427c 1344{
d5db4077
KR
1345 int len = SCHARS (string);
1346 int len_byte = SBYTES (string);
212cc638
KH
1347 /* This set multibyte to 0 even if STRING is multibyte when it
1348 contains only ascii and eight-bit-graphic, but that's
1349 intentional. */
1350 int multibyte = len < len_byte;
fb4f7f50 1351 const unsigned char *str = SDATA (string);
beeedaad 1352 int i = 0, i_byte = 0;
3f62427c 1353 int width = 0;
beeedaad 1354 struct Lisp_Char_Table *dp = buffer_display_table ();
3f62427c
KH
1355
1356 while (i < len)
1357 {
beeedaad
KH
1358 int chars, bytes, thiswidth;
1359 Lisp_Object val;
1360 int cmp_id;
1361 int ignore, end;
1362
1363 if (find_composition (i, -1, &ignore, &end, &val, string)
1364 && ((cmp_id = get_composition_id (i, i_byte, end - i, val, string))
1365 >= 0))
3f62427c 1366 {
beeedaad
KH
1367 thiswidth = composition_table[cmp_id]->width;
1368 chars = end - i;
1369 bytes = string_char_to_byte (string, end) - i_byte;
1370 }
1371 else if (dp)
1372 {
212cc638 1373 int c;
beeedaad 1374
212cc638
KH
1375 if (multibyte)
1376 c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
1377 else
1378 c = str[i_byte], bytes = 1;
beeedaad
KH
1379 chars = 1;
1380 val = DISP_CHAR_VECTOR (dp, c);
1381 if (VECTORP (val))
1382 thiswidth = XVECTOR (val)->size;
1383 else
1384 thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
3f62427c
KH
1385 }
1386 else
1387 {
beeedaad 1388 chars = 1;
212cc638
KH
1389 if (multibyte)
1390 PARSE_MULTIBYTE_SEQ (str + i_byte, len_byte - i_byte, bytes);
1391 else
1392 bytes = 1;
beeedaad
KH
1393 thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1394 }
1395
1396 if (precision > 0
1397 && (width + thiswidth > precision))
1398 {
1399 *nchars = i;
1400 *nbytes = i_byte;
1401 return width;
3f62427c 1402 }
beeedaad
KH
1403 i += chars;
1404 i_byte += bytes;
1405 width += thiswidth;
1406 }
1407
1408 if (precision > 0)
1409 {
1410 *nchars = i;
1411 *nbytes = i_byte;
3f62427c 1412 }
beeedaad 1413
3f62427c
KH
1414 return width;
1415}
1416
4ed46869 1417DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
fdb82f93
PJ
1418 doc: /* Return width of STRING when displayed in the current buffer.
1419Width is measured by how many columns it occupies on the screen.
1420When calculating width of a multibyte character in STRING,
1421only the base leading-code is considered; the validity of
1422the following bytes is not checked. Tabs in STRING are always
1423taken to occupy `tab-width' columns. */)
bc001814
JB
1424 (string)
1425 Lisp_Object string;
4ed46869
KH
1426{
1427 Lisp_Object val;
1428
bc001814
JB
1429 CHECK_STRING (string);
1430 XSETFASTINT (val, lisp_string_width (string, -1, NULL, NULL));
4ed46869
KH
1431 return val;
1432}
1433
1434DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
bc001814 1435 doc: /* Return the direction of CH.
fdb82f93
PJ
1436The returned value is 0 for left-to-right and 1 for right-to-left. */)
1437 (ch)
4ed46869
KH
1438 Lisp_Object ch;
1439{
1440 int charset;
1441
b7826503 1442 CHECK_NUMBER (ch);
4ed46869
KH
1443 charset = CHAR_CHARSET (XFASTINT (ch));
1444 if (!CHARSET_DEFINED_P (charset))
93bcb785 1445 invalid_character (XINT (ch));
4ed46869
KH
1446 return CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX);
1447}
1448
87b089ad
RS
1449/* Return the number of characters in the NBYTES bytes at PTR.
1450 This works by looking at the contents and checking for multibyte sequences.
1451 However, if the current buffer has enable-multibyte-characters = nil,
1452 we treat each byte as a character. */
1453
6ae1f27e
RS
1454int
1455chars_in_text (ptr, nbytes)
fb4f7f50 1456 const unsigned char *ptr;
6ae1f27e
RS
1457 int nbytes;
1458{
87b089ad
RS
1459 /* current_buffer is null at early stages of Emacs initialization. */
1460 if (current_buffer == 0
1461 || NILP (current_buffer->enable_multibyte_characters))
6ae1f27e 1462 return nbytes;
a8a35e61 1463
ac4137cc 1464 return multibyte_chars_in_text (ptr, nbytes);
046b1f03
RS
1465}
1466
87b089ad
RS
1467/* Return the number of characters in the NBYTES bytes at PTR.
1468 This works by looking at the contents and checking for multibyte sequences.
1469 It ignores enable-multibyte-characters. */
1470
1471int
1472multibyte_chars_in_text (ptr, nbytes)
fb4f7f50 1473 const unsigned char *ptr;
87b089ad
RS
1474 int nbytes;
1475{
fb4f7f50 1476 const unsigned char *endp;
ac4137cc 1477 int chars, bytes;
87b089ad
RS
1478
1479 endp = ptr + nbytes;
1480 chars = 0;
1481
1482 while (ptr < endp)
1483 {
2e344af3
KH
1484 PARSE_MULTIBYTE_SEQ (ptr, endp - ptr, bytes);
1485 ptr += bytes;
1486 chars++;
1487 }
1488
1489 return chars;
1490}
1491
d8e4f486 1492/* Parse unibyte text at STR of LEN bytes as multibyte text, and
2e344af3 1493 count the numbers of characters and bytes in it. On counting
d8e4f486
DL
1494 bytes, pay attention to the fact that 8-bit characters in the range
1495 0x80..0x9F are represented by 2 bytes in multibyte text. */
2e344af3
KH
1496void
1497parse_str_as_multibyte (str, len, nchars, nbytes)
fb4f7f50 1498 const unsigned char *str;
2e344af3
KH
1499 int len, *nchars, *nbytes;
1500{
fb4f7f50 1501 const unsigned char *endp = str + len;
2e344af3
KH
1502 int n, chars = 0, bytes = 0;
1503
1504 while (str < endp)
1505 {
1506 if (UNIBYTE_STR_AS_MULTIBYTE_P (str, endp - str, n))
1507 str += n, bytes += n;
1508 else
1509 str++, bytes += 2;
1510 chars++;
1511 }
1512 *nchars = chars;
1513 *nbytes = bytes;
1514 return;
1515}
1516
d8e4f486 1517/* Arrange unibyte text at STR of NBYTES bytes as multibyte text.
2e344af3
KH
1518 It actually converts only 8-bit characters in the range 0x80..0x9F
1519 that don't contruct multibyte characters to multibyte forms. If
1520 NCHARS is nonzero, set *NCHARS to the number of characters in the
1521 text. It is assured that we can use LEN bytes at STR as a work
1522 area and that is enough. Return the number of bytes of the
1523 resulting text. */
1524
1525int
1526str_as_multibyte (str, len, nbytes, nchars)
1527 unsigned char *str;
1528 int len, nbytes, *nchars;
1529{
1530 unsigned char *p = str, *endp = str + nbytes;
1531 unsigned char *to;
1532 int chars = 0;
1533 int n;
1534
1535 while (p < endp && UNIBYTE_STR_AS_MULTIBYTE_P (p, endp - p, n))
1536 p += n, chars++;
1537 if (nchars)
1538 *nchars = chars;
1539 if (p == endp)
1540 return nbytes;
1541
1542 to = p;
1543 nbytes = endp - p;
1544 endp = str + len;
1545 safe_bcopy (p, endp - nbytes, nbytes);
1546 p = endp - nbytes;
1547 while (p < endp)
1548 {
1549 if (UNIBYTE_STR_AS_MULTIBYTE_P (p, endp - p, n))
ac4137cc 1550 {
2e344af3
KH
1551 while (n--)
1552 *to++ = *p++;
177c0ea7 1553 }
ac4137cc
KH
1554 else
1555 {
2e344af3
KH
1556 *to++ = LEADING_CODE_8_BIT_CONTROL;
1557 *to++ = *p++ + 0x20;
ac4137cc 1558 }
87b089ad
RS
1559 chars++;
1560 }
2e344af3
KH
1561 if (nchars)
1562 *nchars = chars;
1563 return (to - str);
1564}
87b089ad 1565
740f080d
KH
1566/* Parse unibyte string at STR of LEN bytes, and return the number of
1567 bytes it may ocupy when converted to multibyte string by
1568 `str_to_multibyte'. */
1569
1570int
1571parse_str_to_multibyte (str, len)
1572 unsigned char *str;
1573 int len;
1574{
1575 unsigned char *endp = str + len;
1576 int bytes;
1577
1578 for (bytes = 0; str < endp; str++)
1579 bytes += (*str < 0x80 || *str >= 0xA0) ? 1 : 2;
1580 return bytes;
1581}
1582
d8e4f486 1583/* Convert unibyte text at STR of NBYTES bytes to multibyte text
2e344af3
KH
1584 that contains the same single-byte characters. It actually
1585 converts all 8-bit characters to multibyte forms. It is assured
1586 that we can use LEN bytes at STR as a work area and that is
1587 enough. */
1588
1589int
1590str_to_multibyte (str, len, bytes)
1591 unsigned char *str;
1592 int len, bytes;
1593{
1594 unsigned char *p = str, *endp = str + bytes;
1595 unsigned char *to;
2e344af3
KH
1596
1597 while (p < endp && (*p < 0x80 || *p >= 0xA0)) p++;
1598 if (p == endp)
1599 return bytes;
1600 to = p;
1601 bytes = endp - p;
1602 endp = str + len;
1603 safe_bcopy (p, endp - bytes, bytes);
1604 p = endp - bytes;
177c0ea7 1605 while (p < endp)
2e344af3
KH
1606 {
1607 if (*p < 0x80 || *p >= 0xA0)
1608 *to++ = *p++;
1609 else
1610 *to++ = LEADING_CODE_8_BIT_CONTROL, *to++ = *p++ + 0x20;
1611 }
1612 return (to - str);
87b089ad
RS
1613}
1614
2e344af3
KH
1615/* Arrange multibyte text at STR of LEN bytes as a unibyte text. It
1616 actually converts only 8-bit characters in the range 0x80..0x9F to
1617 unibyte forms. */
1618
1619int
1620str_as_unibyte (str, bytes)
1621 unsigned char *str;
1622 int bytes;
1623{
1624 unsigned char *p = str, *endp = str + bytes;
1625 unsigned char *to = str;
1626
1627 while (p < endp && *p != LEADING_CODE_8_BIT_CONTROL) p++;
1628 to = p;
177c0ea7 1629 while (p < endp)
2e344af3
KH
1630 {
1631 if (*p == LEADING_CODE_8_BIT_CONTROL)
1632 *to++ = *(p + 1) - 0x20, p += 2;
1633 else
1634 *to++ = *p++;
1635 }
1636 return (to - str);
1637}
1638
1639\f
4bc26a6c 1640DEFUN ("string", Fstring, Sstring, 0, MANY, 0,
5f5d497e
MB
1641 doc: /* Concatenate all the argument characters and make the result a string.
1642usage: (string &rest CHARACTERS) */)
fdb82f93 1643 (n, args)
53316e55 1644 int n;
4ed46869
KH
1645 Lisp_Object *args;
1646{
4894eddd
KS
1647 int i, bufsize;
1648 unsigned char *buf, *p;
2e344af3 1649 int c;
5729c92f 1650 int multibyte = 0;
4894eddd
KS
1651 Lisp_Object ret;
1652 USE_SAFE_ALLOCA;
1653
1654 bufsize = MAX_MULTIBYTE_LENGTH * n;
1655 SAFE_ALLOCA (buf, unsigned char *, bufsize);
1656 p = buf;
4ed46869
KH
1657
1658 for (i = 0; i < n; i++)
1659 {
b7826503 1660 CHECK_NUMBER (args[i]);
5729c92f
KH
1661 if (!multibyte && !SINGLE_BYTE_CHAR_P (XFASTINT (args[i])))
1662 multibyte = 1;
1663 }
1664
1665 for (i = 0; i < n; i++)
1666 {
4ed46869 1667 c = XINT (args[i]);
5729c92f
KH
1668 if (multibyte)
1669 p += CHAR_STRING (c, p);
1670 else
555e35d2 1671 *p++ = c;
4ed46869
KH
1672 }
1673
4894eddd 1674 ret = make_string_from_bytes (buf, n, p - buf);
233f3db6 1675 SAFE_FREE ();
4894eddd
KS
1676
1677 return ret;
4ed46869
KH
1678}
1679
1680#endif /* emacs */
1681\f
dfcf069d 1682int
4ed46869
KH
1683charset_id_internal (charset_name)
1684 char *charset_name;
1685{
76d7b829 1686 Lisp_Object val;
4ed46869 1687
76d7b829 1688 val= Fget (intern (charset_name), Qcharset);
4ed46869
KH
1689 if (!VECTORP (val))
1690 error ("Charset %s is not defined", charset_name);
1691
1692 return (XINT (XVECTOR (val)->contents[0]));
1693}
1694
1695DEFUN ("setup-special-charsets", Fsetup_special_charsets,
fdb82f93
PJ
1696 Ssetup_special_charsets, 0, 0, 0, doc: /* Internal use only. */)
1697 ()
4ed46869
KH
1698{
1699 charset_latin_iso8859_1 = charset_id_internal ("latin-iso8859-1");
1700 charset_jisx0208_1978 = charset_id_internal ("japanese-jisx0208-1978");
1701 charset_jisx0208 = charset_id_internal ("japanese-jisx0208");
1702 charset_katakana_jisx0201 = charset_id_internal ("katakana-jisx0201");
1703 charset_latin_jisx0201 = charset_id_internal ("latin-jisx0201");
1704 charset_big5_1 = charset_id_internal ("chinese-big5-1");
1705 charset_big5_2 = charset_id_internal ("chinese-big5-2");
c64582be
KH
1706 charset_mule_unicode_0100_24ff
1707 = charset_id_internal ("mule-unicode-0100-24ff");
1708 charset_mule_unicode_2500_33ff
1709 = charset_id_internal ("mule-unicode-2500-33ff");
1710 charset_mule_unicode_e000_ffff
1711 = charset_id_internal ("mule-unicode-e000-ffff");
4ed46869
KH
1712 return Qnil;
1713}
1714
dfcf069d 1715void
4ed46869
KH
1716init_charset_once ()
1717{
1718 int i, j, k;
1719
1720 staticpro (&Vcharset_table);
1721 staticpro (&Vcharset_symbol_table);
8a73a704 1722 staticpro (&Vgeneric_character_list);
4ed46869
KH
1723
1724 /* This has to be done here, before we call Fmake_char_table. */
1725 Qcharset_table = intern ("charset-table");
1726 staticpro (&Qcharset_table);
1727
1728 /* Intern this now in case it isn't already done.
1729 Setting this variable twice is harmless.
1730 But don't staticpro it here--that is done in alloc.c. */
1731 Qchar_table_extra_slots = intern ("char-table-extra-slots");
1732
1733 /* Now we are ready to set up this property, so we can
1734 create the charset table. */
1735 Fput (Qcharset_table, Qchar_table_extra_slots, make_number (0));
1736 Vcharset_table = Fmake_char_table (Qcharset_table, Qnil);
1737
0282eb69
KH
1738 Qunknown = intern ("unknown");
1739 staticpro (&Qunknown);
1740 Vcharset_symbol_table = Fmake_vector (make_number (MAX_CHARSET + 1),
1741 Qunknown);
4ed46869
KH
1742
1743 /* Setup tables. */
1744 for (i = 0; i < 2; i++)
1745 for (j = 0; j < 2; j++)
1746 for (k = 0; k < 128; k++)
1747 iso_charset_table [i][j][k] = -1;
1748
60383934 1749 for (i = 0; i < 256; i++)
2e344af3 1750 bytes_by_char_head[i] = 1;
2e344af3
KH
1751 bytes_by_char_head[LEADING_CODE_PRIVATE_11] = 3;
1752 bytes_by_char_head[LEADING_CODE_PRIVATE_12] = 3;
1753 bytes_by_char_head[LEADING_CODE_PRIVATE_21] = 4;
1754 bytes_by_char_head[LEADING_CODE_PRIVATE_22] = 4;
4ed46869
KH
1755
1756 for (i = 0; i < 128; i++)
2e344af3 1757 width_by_char_head[i] = 1;
4ed46869 1758 for (; i < 256; i++)
2e344af3
KH
1759 width_by_char_head[i] = 4;
1760 width_by_char_head[LEADING_CODE_PRIVATE_11] = 1;
1761 width_by_char_head[LEADING_CODE_PRIVATE_12] = 2;
1762 width_by_char_head[LEADING_CODE_PRIVATE_21] = 1;
1763 width_by_char_head[LEADING_CODE_PRIVATE_22] = 2;
8a73a704
KH
1764
1765 {
76d7b829 1766 Lisp_Object val;
8a73a704 1767
76d7b829 1768 val = Qnil;
8a73a704
KH
1769 for (i = 0x81; i < 0x90; i++)
1770 val = Fcons (make_number ((i - 0x70) << 7), val);
1771 for (; i < 0x9A; i++)
1772 val = Fcons (make_number ((i - 0x8F) << 14), val);
1773 for (i = 0xA0; i < 0xF0; i++)
1774 val = Fcons (make_number ((i - 0x70) << 7), val);
1775 for (; i < 0xFF; i++)
1776 val = Fcons (make_number ((i - 0xE0) << 14), val);
8a73a704
KH
1777 Vgeneric_character_list = Fnreverse (val);
1778 }
bbf12bb3
KH
1779
1780 nonascii_insert_offset = 0;
1781 Vnonascii_translation_table = Qnil;
4ed46869
KH
1782}
1783
1784#ifdef emacs
1785
dfcf069d 1786void
4ed46869
KH
1787syms_of_charset ()
1788{
2e344af3
KH
1789 Qcharset = intern ("charset");
1790 staticpro (&Qcharset);
1791
4ed46869
KH
1792 Qascii = intern ("ascii");
1793 staticpro (&Qascii);
1794
2e344af3
KH
1795 Qeight_bit_control = intern ("eight-bit-control");
1796 staticpro (&Qeight_bit_control);
1797
1798 Qeight_bit_graphic = intern ("eight-bit-graphic");
1799 staticpro (&Qeight_bit_graphic);
4ed46869 1800
2e344af3
KH
1801 /* Define special charsets ascii, eight-bit-control, and
1802 eight-bit-graphic. */
4ed46869
KH
1803 update_charset_table (make_number (CHARSET_ASCII),
1804 make_number (1), make_number (94),
1805 make_number (1),
1806 make_number (0),
1807 make_number ('B'),
1808 make_number (0),
1809 build_string ("ASCII"),
d78bc582 1810 Qnil, /* same as above */
4ed46869
KH
1811 build_string ("ASCII (ISO646 IRV)"));
1812 CHARSET_SYMBOL (CHARSET_ASCII) = Qascii;
1813 Fput (Qascii, Qcharset, CHARSET_TABLE_ENTRY (CHARSET_ASCII));
1814
2e344af3
KH
1815 update_charset_table (make_number (CHARSET_8_BIT_CONTROL),
1816 make_number (1), make_number (96),
797a084a 1817 make_number (4),
2e344af3
KH
1818 make_number (0),
1819 make_number (-1),
1820 make_number (-1),
1821 build_string ("8-bit control code (0x80..0x9F)"),
d78bc582
KH
1822 Qnil, /* same as above */
1823 Qnil); /* same as above */
2e344af3
KH
1824 CHARSET_SYMBOL (CHARSET_8_BIT_CONTROL) = Qeight_bit_control;
1825 Fput (Qeight_bit_control, Qcharset,
1826 CHARSET_TABLE_ENTRY (CHARSET_8_BIT_CONTROL));
1827
1828 update_charset_table (make_number (CHARSET_8_BIT_GRAPHIC),
1829 make_number (1), make_number (96),
797a084a 1830 make_number (4),
2e344af3
KH
1831 make_number (0),
1832 make_number (-1),
1833 make_number (-1),
2e344af3 1834 build_string ("8-bit graphic char (0xA0..0xFF)"),
d78bc582
KH
1835 Qnil, /* same as above */
1836 Qnil); /* same as above */
2e344af3
KH
1837 CHARSET_SYMBOL (CHARSET_8_BIT_GRAPHIC) = Qeight_bit_graphic;
1838 Fput (Qeight_bit_graphic, Qcharset,
1839 CHARSET_TABLE_ENTRY (CHARSET_8_BIT_GRAPHIC));
1840
c1a08b4c
KH
1841 Qauto_fill_chars = intern ("auto-fill-chars");
1842 staticpro (&Qauto_fill_chars);
1843 Fput (Qauto_fill_chars, Qchar_table_extra_slots, make_number (0));
1844
4ed46869 1845 defsubr (&Sdefine_charset);
8a73a704 1846 defsubr (&Sgeneric_character_list);
3fac5a51 1847 defsubr (&Sget_unused_iso_final_char);
4ed46869
KH
1848 defsubr (&Sdeclare_equiv_charset);
1849 defsubr (&Sfind_charset_region);
1850 defsubr (&Sfind_charset_string);
1851 defsubr (&Smake_char_internal);
1852 defsubr (&Ssplit_char);
1853 defsubr (&Schar_charset);
90d7b74e 1854 defsubr (&Scharset_after);
4ed46869 1855 defsubr (&Siso_charset);
9d3d8cba 1856 defsubr (&Schar_valid_p);
d2665018 1857 defsubr (&Sunibyte_char_to_multibyte);
1bcc1567 1858 defsubr (&Smultibyte_char_to_unibyte);
4ed46869
KH
1859 defsubr (&Schar_bytes);
1860 defsubr (&Schar_width);
1861 defsubr (&Sstring_width);
1862 defsubr (&Schar_direction);
87b089ad 1863 defsubr (&Sstring);
4ed46869
KH
1864 defsubr (&Ssetup_special_charsets);
1865
1866 DEFVAR_LISP ("charset-list", &Vcharset_list,
fdb82f93 1867 doc: /* List of charsets ever defined. */);
2e344af3
KH
1868 Vcharset_list = Fcons (Qascii, Fcons (Qeight_bit_control,
1869 Fcons (Qeight_bit_graphic, Qnil)));
4ed46869 1870
537efd8d 1871 DEFVAR_LISP ("translation-table-vector", &Vtranslation_table_vector,
fdb82f93
PJ
1872 doc: /* Vector of cons cell of a symbol and translation table ever defined.
1873An ID of a translation table is an index of this vector. */);
537efd8d 1874 Vtranslation_table_vector = Fmake_vector (make_number (16), Qnil);
b0e3cf2b 1875
4ed46869 1876 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11,
fdb82f93 1877 doc: /* Leading-code of private TYPE9N charset of column-width 1. */);
4ed46869
KH
1878 leading_code_private_11 = LEADING_CODE_PRIVATE_11;
1879
1880 DEFVAR_INT ("leading-code-private-12", &leading_code_private_12,
fdb82f93 1881 doc: /* Leading-code of private TYPE9N charset of column-width 2. */);
4ed46869
KH
1882 leading_code_private_12 = LEADING_CODE_PRIVATE_12;
1883
1884 DEFVAR_INT ("leading-code-private-21", &leading_code_private_21,
fdb82f93 1885 doc: /* Leading-code of private TYPE9Nx9N charset of column-width 1. */);
4ed46869
KH
1886 leading_code_private_21 = LEADING_CODE_PRIVATE_21;
1887
1888 DEFVAR_INT ("leading-code-private-22", &leading_code_private_22,
fdb82f93 1889 doc: /* Leading-code of private TYPE9Nx9N charset of column-width 2. */);
4ed46869 1890 leading_code_private_22 = LEADING_CODE_PRIVATE_22;
35e623fb
RS
1891
1892 DEFVAR_INT ("nonascii-insert-offset", &nonascii_insert_offset,
fdb82f93
PJ
1893 doc: /* Offset for converting non-ASCII unibyte codes 0240...0377 to multibyte.
1894This is used for converting unibyte text to multibyte,
1895and for inserting character codes specified by number.
1896
1897This serves to convert a Latin-1 or similar 8-bit character code
1898to the corresponding Emacs multibyte character code.
1899Typically the value should be (- (make-char CHARSET 0) 128),
1900for your choice of character set.
1901If `nonascii-translation-table' is non-nil, it overrides this variable. */);
35e623fb 1902 nonascii_insert_offset = 0;
b0e3cf2b 1903
b4e9dd77 1904 DEFVAR_LISP ("nonascii-translation-table", &Vnonascii_translation_table,
fdb82f93
PJ
1905 doc: /* Translation table to convert non-ASCII unibyte codes to multibyte.
1906This is used for converting unibyte text to multibyte,
1907and for inserting character codes specified by number.
1908
1909Conversion is performed only when multibyte characters are enabled,
1910and it serves to convert a Latin-1 or similar 8-bit character code
1911to the corresponding Emacs character code.
1912
1913If this is nil, `nonascii-insert-offset' is used instead.
1914See also the docstring of `make-translation-table'. */);
b4e9dd77 1915 Vnonascii_translation_table = Qnil;
4cf9710d 1916
c1a08b4c 1917 DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars,
fdb82f93
PJ
1918 doc: /* A char-table for characters which invoke auto-filling.
1919Such characters have value t in this table. */);
c1a08b4c 1920 Vauto_fill_chars = Fmake_char_table (Qauto_fill_chars, Qnil);
60022cb7
AS
1921 CHAR_TABLE_SET (Vauto_fill_chars, make_number (' '), Qt);
1922 CHAR_TABLE_SET (Vauto_fill_chars, make_number ('\n'), Qt);
4ed46869
KH
1923}
1924
1925#endif /* emacs */
ab5796a9
MB
1926
1927/* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
1928 (do not change this comment) */