(parse_str_to_multibyte): Extern it.
[bpt/emacs.git] / src / charset.c
CommitLineData
75c8c592 1/* Basic multilingual character support.
35e623fb 2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
75c8c592 3 Licensed to the Free Software Foundation.
4ed46869 4
369314dc
KH
5This file is part of GNU Emacs.
6
7GNU Emacs is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
4ed46869 11
369314dc
KH
12GNU Emacs is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
4ed46869 16
369314dc
KH
17You should have received a copy of the GNU General Public License
18along with GNU Emacs; see the file COPYING. If not, write to
19the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20Boston, MA 02111-1307, USA. */
4ed46869
KH
21
22/* At first, see the document in `charset.h' to understand the code in
23 this file. */
24
68c45bf0
PE
25#ifdef emacs
26#include <config.h>
27#endif
28
4ed46869
KH
29#include <stdio.h>
30
31#ifdef emacs
32
33#include <sys/types.h>
4ed46869
KH
34#include "lisp.h"
35#include "buffer.h"
36#include "charset.h"
3f62427c 37#include "composite.h"
4ed46869 38#include "coding.h"
fc6b09bf 39#include "disptab.h"
4ed46869
KH
40
41#else /* not emacs */
42
43#include "mulelib.h"
44
45#endif /* emacs */
46
2e344af3 47Lisp_Object Qcharset, Qascii, Qeight_bit_control, Qeight_bit_graphic;
0282eb69 48Lisp_Object Qunknown;
4ed46869
KH
49
50/* Declaration of special leading-codes. */
4ed46869
KH
51int leading_code_private_11; /* for private DIMENSION1 of 1-column */
52int leading_code_private_12; /* for private DIMENSION1 of 2-column */
53int leading_code_private_21; /* for private DIMENSION2 of 1-column */
54int leading_code_private_22; /* for private DIMENSION2 of 2-column */
55
2e344af3
KH
56/* Declaration of special charsets. The values are set by
57 Fsetup_special_charsets. */
4ed46869
KH
58int charset_latin_iso8859_1; /* ISO8859-1 (Latin-1) */
59int charset_jisx0208_1978; /* JISX0208.1978 (Japanese Kanji old set) */
60int charset_jisx0208; /* JISX0208.1983 (Japanese Kanji) */
61int charset_katakana_jisx0201; /* JISX0201.Kana (Japanese Katakana) */
62int charset_latin_jisx0201; /* JISX0201.Roman (Japanese Roman) */
63int charset_big5_1; /* Big5 Level 1 (Chinese Traditional) */
64int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */
65
66Lisp_Object Qcharset_table;
67
68/* A char-table containing information of each character set. */
69Lisp_Object Vcharset_table;
70
71/* A vector of charset symbol indexed by charset-id. This is used
72 only for returning charset symbol from C functions. */
73Lisp_Object Vcharset_symbol_table;
74
75/* A list of charset symbols ever defined. */
76Lisp_Object Vcharset_list;
77
537efd8d
KH
78/* Vector of translation table ever defined.
79 ID of a translation table is used to index this vector. */
80Lisp_Object Vtranslation_table_vector;
b0e3cf2b 81
c1a08b4c
KH
82/* A char-table for characters which may invoke auto-filling. */
83Lisp_Object Vauto_fill_chars;
84
85Lisp_Object Qauto_fill_chars;
86
4ed46869
KH
87/* Tables used by macros BYTES_BY_CHAR_HEAD and WIDTH_BY_CHAR_HEAD. */
88int bytes_by_char_head[256];
89int width_by_char_head[256];
90
91/* Mapping table from ISO2022's charset (specified by DIMENSION,
92 CHARS, and FINAL-CHAR) to Emacs' charset. */
93int iso_charset_table[2][2][128];
94
95/* Variables used locally in the macro FETCH_MULTIBYTE_CHAR. */
96unsigned char *_fetch_multibyte_char_p;
97int _fetch_multibyte_char_len;
98
35e623fb
RS
99/* Offset to add to a non-ASCII value when inserting it. */
100int nonascii_insert_offset;
101
4cf9710d
RS
102/* Translation table for converting non-ASCII unibyte characters
103 to multibyte codes, or nil. */
b4e9dd77 104Lisp_Object Vnonascii_translation_table;
4cf9710d 105
8a73a704
KH
106/* List of all possible generic characters. */
107Lisp_Object Vgeneric_character_list;
108
046b1f03
RS
109#define min(X, Y) ((X) < (Y) ? (X) : (Y))
110#define max(X, Y) ((X) > (Y) ? (X) : (Y))
111\f
93bcb785
KH
112void
113invalid_character (c)
114 int c;
115{
ba7434e5 116 error ("Invalid character: 0%o, %d, 0x%x", c, c, c);
93bcb785
KH
117}
118
2e344af3
KH
119/* Parse string STR of length LENGTH and fetch information of a
120 character at STR. Set BYTES to the byte length the character
121 occupies, CHARSET, C1, C2 to proper values of the character. */
122
123#define SPLIT_MULTIBYTE_SEQ(str, length, bytes, charset, c1, c2) \
124 do { \
125 (c1) = *(str); \
126 (bytes) = BYTES_BY_CHAR_HEAD (c1); \
127 if ((bytes) == 1) \
128 (charset) = ASCII_BYTE_P (c1) ? CHARSET_ASCII : CHARSET_8_BIT_GRAPHIC; \
129 else if ((bytes) == 2) \
130 { \
131 if ((c1) == LEADING_CODE_8_BIT_CONTROL) \
132 (charset) = CHARSET_8_BIT_CONTROL, (c1) = (str)[1] - 0x20; \
133 else \
134 (charset) = (c1), (c1) = (str)[1] & 0x7F; \
135 } \
136 else if ((bytes) == 3) \
137 { \
138 if ((c1) < LEADING_CODE_PRIVATE_11) \
139 (charset) = (c1), (c1) = (str)[1] & 0x7F, (c2) = (str)[2] & 0x7F; \
140 else \
141 (charset) = (str)[1], (c1) = (str)[2] & 0x7F; \
142 } \
143 else \
144 (charset) = (str)[1], (c1) = (str)[2] & 0x7F, (c2) = (str)[3] & 0x7F; \
ac4137cc
KH
145 } while (0)
146
147/* 1 if CHARSET, C1, and C2 compose a valid character, else 0. */
44c6492d 148#define CHAR_COMPONENTS_VALID_P(charset, c1, c2) \
2e344af3 149 ((charset) == CHARSET_ASCII \
63f4d579 150 ? ((c1) >= 0 && (c1) <= 0x7F) \
2e344af3
KH
151 : ((charset) == CHARSET_8_BIT_CONTROL \
152 ? ((c1) >= 0x80 && (c1) <= 0x9F) \
153 : ((charset) == CHARSET_8_BIT_GRAPHIC \
154 ? ((c1) >= 0x80 && (c1) <= 0xFF) \
155 : (CHARSET_DIMENSION (charset) == 1 \
156 ? ((c1) >= 0x20 && (c1) <= 0x7F) \
157 : ((c1) >= 0x20 && (c1) <= 0x7F \
158 && (c2) >= 0x20 && (c2) <= 0x7F)))))
93bcb785 159
99529c2c
KH
160/* Store multi-byte form of the character C in STR. The caller should
161 allocate at least 4-byte area at STR in advance. Returns the
162 length of the multi-byte form. If C is an invalid character code,
163 signal an error.
4ed46869 164
99529c2c
KH
165 Use macro `CHAR_STRING (C, STR)' instead of calling this function
166 directly if C can be an ASCII character. */
4ed46869
KH
167
168int
99529c2c 169char_to_string (c, str)
4ed46869 170 int c;
99529c2c 171 unsigned char *str;
4ed46869 172{
99529c2c
KH
173 unsigned char *p = str;
174
6662e69b 175 if (c & CHAR_MODIFIER_MASK) /* This includes the case C is negative. */
8ac5a9cc 176 {
6662e69b
KH
177 /* Multibyte character can't have a modifier bit. */
178 if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_MODIFIER_MASK)))
179 invalid_character (c);
180
181 /* For Meta, Shift, and Control modifiers, we need special care. */
8ac5a9cc 182 if (c & CHAR_META)
6662e69b
KH
183 {
184 /* Move the meta bit to the right place for a string. */
185 c = (c & ~CHAR_META) | 0x80;
186 }
187 if (c & CHAR_SHIFT)
188 {
189 /* Shift modifier is valid only with [A-Za-z]. */
190 if ((c & 0377) >= 'A' && (c & 0377) <= 'Z')
191 c &= ~CHAR_SHIFT;
192 else if ((c & 0377) >= 'a' && (c & 0377) <= 'z')
193 c = (c & ~CHAR_SHIFT) - ('a' - 'A');
194 }
8ac5a9cc 195 if (c & CHAR_CTL)
6662e69b
KH
196 {
197 /* Simulate the code in lread.c. */
198 /* Allow `\C- ' and `\C-?'. */
199 if (c == (CHAR_CTL | ' '))
200 c = 0;
201 else if (c == (CHAR_CTL | '?'))
202 c = 127;
203 /* ASCII control chars are made from letters (both cases),
204 as well as the non-letters within 0100...0137. */
205 else if ((c & 0137) >= 0101 && (c & 0137) <= 0132)
206 c &= (037 | (~0177 & ~CHAR_CTL));
207 else if ((c & 0177) >= 0100 && (c & 0177) <= 0137)
208 c &= (037 | (~0177 & ~CHAR_CTL));
209 }
210
211 /* If C still has any modifier bits, it is an invalid character. */
212 if (c & CHAR_MODIFIER_MASK)
213 invalid_character (c);
2e344af3
KH
214 }
215 if (SINGLE_BYTE_CHAR_P (c))
216 {
217 if (ASCII_BYTE_P (c) || c >= 0xA0)
218 *p++ = c;
219 else
220 {
221 *p++ = LEADING_CODE_8_BIT_CONTROL;
222 *p++ = c + 0x20;
223 }
8ac5a9cc 224 }
6ce974d4 225 else if (CHAR_VALID_P (c, 0))
4ed46869 226 {
ac4137cc 227 int charset, c1, c2;
4ed46869 228
2e344af3 229 SPLIT_CHAR (c, charset, c1, c2);
99529c2c
KH
230
231 if (charset >= LEADING_CODE_EXT_11)
232 *p++ = (charset < LEADING_CODE_EXT_12
233 ? LEADING_CODE_PRIVATE_11
234 : (charset < LEADING_CODE_EXT_21
235 ? LEADING_CODE_PRIVATE_12
236 : (charset < LEADING_CODE_EXT_22
237 ? LEADING_CODE_PRIVATE_21
238 : LEADING_CODE_PRIVATE_22)));
239 *p++ = charset;
240 if (c1 > 0 && c1 < 32 || c2 > 0 && c2 < 32)
241 invalid_character (c);
242 if (c1)
4ed46869 243 {
99529c2c
KH
244 *p++ = c1 | 0x80;
245 if (c2 > 0)
246 *p++ = c2 | 0x80;
4ed46869
KH
247 }
248 }
2e344af3
KH
249 else
250 invalid_character (c);
4ed46869 251
2e344af3 252 return (p - str);
4ed46869
KH
253}
254
44c6492d
KH
255/* Return the non-ASCII character corresponding to multi-byte form at
256 STR of length LEN. If ACTUAL_LEN is not NULL, store the byte
257 length of the multibyte form in *ACTUAL_LEN.
537efd8d 258
99529c2c
KH
259 Use macros STRING_CHAR or STRING_CHAR_AND_LENGTH instead of calling
260 this function directly if you want ot handle ASCII characters as
261 well. */
4ed46869 262
dfcf069d 263int
99529c2c 264string_to_char (str, len, actual_len)
8867de67 265 const unsigned char *str;
ac4137cc 266 int len, *actual_len;
4ed46869 267{
ac4137cc 268 int c, bytes, charset, c1, c2;
4ed46869 269
ac4137cc
KH
270 SPLIT_MULTIBYTE_SEQ (str, len, bytes, charset, c1, c2);
271 c = MAKE_CHAR (charset, c1, c2);
4ed46869 272 if (actual_len)
ac4137cc 273 *actual_len = bytes;
4ed46869
KH
274 return c;
275}
276
44c6492d
KH
277/* Return the length of the multi-byte form at string STR of length LEN.
278 Use the macro MULTIBYTE_FORM_LENGTH instead. */
4ed46869
KH
279int
280multibyte_form_length (str, len)
8867de67 281 const unsigned char *str;
4ed46869
KH
282 int len;
283{
ac4137cc 284 int bytes;
4ed46869 285
ac4137cc 286 PARSE_MULTIBYTE_SEQ (str, len, bytes);
90d7b74e 287 return bytes;
4ed46869
KH
288}
289
ac4137cc
KH
290/* Check multibyte form at string STR of length LEN and set variables
291 pointed by CHARSET, C1, and C2 to charset and position codes of the
292 character at STR, and return 0. If there's no multibyte character,
4ed46869
KH
293 return -1. This should be used only in the macro SPLIT_STRING
294 which checks range of STR in advance. */
295
dfcf069d 296int
99529c2c 297split_string (str, len, charset, c1, c2)
ac4137cc
KH
298 const unsigned char *str;
299 unsigned char *c1, *c2;
300 int len, *charset;
4ed46869 301{
ac4137cc 302 register int bytes, cs, code1, code2 = -1;
4ed46869 303
ac4137cc
KH
304 SPLIT_MULTIBYTE_SEQ (str, len, bytes, cs, code1, code2);
305 if (cs == CHARSET_ASCII)
4ed46869 306 return -1;
ac4137cc
KH
307 *charset = cs;
308 *c1 = code1;
309 *c2 = code2;
5865af0d 310 return 0;
ac4137cc
KH
311}
312
44c6492d
KH
313/* Return 1 iff character C has valid printable glyph.
314 Use the macro CHAR_PRINTABLE_P instead. */
ac4137cc
KH
315int
316char_printable_p (c)
317 int c;
318{
8ebae00c 319 int charset, c1, c2;
ac4137cc 320
2e344af3 321 if (ASCII_BYTE_P (c))
ac4137cc 322 return 1;
2e344af3
KH
323 else if (SINGLE_BYTE_CHAR_P (c))
324 return 0;
325 else if (c >= MAX_CHAR)
99529c2c 326 return 0;
ac4137cc 327
2e344af3 328 SPLIT_CHAR (c, charset, c1, c2);
ac4137cc
KH
329 if (! CHARSET_DEFINED_P (charset))
330 return 0;
331 if (CHARSET_CHARS (charset) == 94
332 ? c1 <= 32 || c1 >= 127
333 : c1 < 32)
334 return 0;
335 if (CHARSET_DIMENSION (charset) == 2
336 && (CHARSET_CHARS (charset) == 94
337 ? c2 <= 32 || c2 >= 127
338 : c2 < 32))
339 return 0;
340 return 1;
4ed46869
KH
341}
342
537efd8d 343/* Translate character C by translation table TABLE. If C
b4e9dd77
KH
344 is negative, translate a character specified by CHARSET, C1, and C2
345 (C1 and C2 are code points of the character). If no translation is
346 found in TABLE, return C. */
dfcf069d 347int
b4e9dd77 348translate_char (table, c, charset, c1, c2)
23d2a7f1
KH
349 Lisp_Object table;
350 int c, charset, c1, c2;
351{
352 Lisp_Object ch;
353 int alt_charset, alt_c1, alt_c2, dimension;
354
0ad3f83d 355 if (c < 0) c = MAKE_CHAR (charset, (c1 & 0x7F) , (c2 & 0x7F));
23d2a7f1 356 if (!CHAR_TABLE_P (table)
ac4137cc 357 || (ch = Faref (table, make_number (c)), !NATNUMP (ch)))
23d2a7f1
KH
358 return c;
359
360 SPLIT_CHAR (XFASTINT (ch), alt_charset, alt_c1, alt_c2);
361 dimension = CHARSET_DIMENSION (alt_charset);
362 if (dimension == 1 && alt_c1 > 0 || dimension == 2 && alt_c2 > 0)
363 /* CH is not a generic character, just return it. */
364 return XFASTINT (ch);
365
366 /* Since CH is a generic character, we must return a specific
367 charater which has the same position codes as C from CH. */
368 if (charset < 0)
369 SPLIT_CHAR (c, charset, c1, c2);
370 if (dimension != CHARSET_DIMENSION (charset))
371 /* We can't make such a character because of dimension mismatch. */
372 return c;
23d2a7f1
KH
373 return MAKE_CHAR (alt_charset, c1, c2);
374}
375
d2665018 376/* Convert the unibyte character C to multibyte based on
b4e9dd77 377 Vnonascii_translation_table or nonascii_insert_offset. If they can't
d2665018
KH
378 convert C to a valid multibyte character, convert it based on
379 DEFAULT_NONASCII_INSERT_OFFSET which makes C a Latin-1 character. */
35e623fb 380
dfcf069d 381int
35e623fb
RS
382unibyte_char_to_multibyte (c)
383 int c;
384{
543b4f61 385 if (c < 0400 && c >= 0200)
35e623fb 386 {
d2665018
KH
387 int c_save = c;
388
b4e9dd77 389 if (! NILP (Vnonascii_translation_table))
bbf12bb3
KH
390 {
391 c = XINT (Faref (Vnonascii_translation_table, make_number (c)));
ac4137cc 392 if (c >= 0400 && ! char_valid_p (c, 0))
bbf12bb3
KH
393 c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
394 }
395 else if (c >= 0240 && nonascii_insert_offset > 0)
396 {
397 c += nonascii_insert_offset;
ac4137cc 398 if (c < 0400 || ! char_valid_p (c, 0))
bbf12bb3
KH
399 c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
400 }
401 else if (c >= 0240)
d2665018 402 c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
35e623fb
RS
403 }
404 return c;
405}
76d7b829
KH
406
407
408/* Convert the multibyte character C to unibyte 8-bit character based
409 on Vnonascii_translation_table or nonascii_insert_offset. If
410 REV_TBL is non-nil, it should be a reverse table of
411 Vnonascii_translation_table, i.e. what given by:
412 Fchar_table_extra_slot (Vnonascii_translation_table, make_number (0)) */
413
414int
415multibyte_char_to_unibyte (c, rev_tbl)
416 int c;
417 Lisp_Object rev_tbl;
418{
419 if (!SINGLE_BYTE_CHAR_P (c))
420 {
421 int c_save = c;
422
423 if (! CHAR_TABLE_P (rev_tbl)
424 && CHAR_TABLE_P (Vnonascii_translation_table))
425 rev_tbl = Fchar_table_extra_slot (Vnonascii_translation_table,
426 make_number (0));
427 if (CHAR_TABLE_P (rev_tbl))
428 {
429 Lisp_Object temp;
430 temp = Faref (rev_tbl, make_number (c));
431 if (INTEGERP (temp))
432 c = XINT (temp);
bbf12bb3
KH
433 if (c >= 256)
434 c = (c_save & 0177) + 0200;
435 }
436 else
437 {
438 if (nonascii_insert_offset > 0)
439 c -= nonascii_insert_offset;
440 if (c < 128 || c >= 256)
441 c = (c_save & 0177) + 0200;
76d7b829 442 }
76d7b829
KH
443 }
444
445 return c;
446}
447
35e623fb 448\f
4ed46869
KH
449/* Update the table Vcharset_table with the given arguments (see the
450 document of `define-charset' for the meaning of each argument).
451 Several other table contents are also updated. The caller should
452 check the validity of CHARSET-ID and the remaining arguments in
453 advance. */
454
455void
456update_charset_table (charset_id, dimension, chars, width, direction,
457 iso_final_char, iso_graphic_plane,
458 short_name, long_name, description)
459 Lisp_Object charset_id, dimension, chars, width, direction;
460 Lisp_Object iso_final_char, iso_graphic_plane;
461 Lisp_Object short_name, long_name, description;
462{
463 int charset = XINT (charset_id);
464 int bytes;
465 unsigned char leading_code_base, leading_code_ext;
466
6dc0722d
KH
467 if (NILP (CHARSET_TABLE_ENTRY (charset)))
468 CHARSET_TABLE_ENTRY (charset)
469 = Fmake_vector (make_number (CHARSET_MAX_IDX), Qnil);
4ed46869 470
d78bc582
KH
471 if (NILP (long_name))
472 long_name = short_name;
473 if (NILP (description))
474 description = long_name;
475
4ed46869
KH
476 /* Get byte length of multibyte form, base leading-code, and
477 extended leading-code of the charset. See the comment under the
478 title "GENERAL NOTE on CHARACTER SET (CHARSET)" in charset.h. */
479 bytes = XINT (dimension);
480 if (charset < MIN_CHARSET_PRIVATE_DIMENSION1)
481 {
482 /* Official charset, it doesn't have an extended leading-code. */
2e344af3 483 if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC)
4ed46869
KH
484 bytes += 1; /* For a base leading-code. */
485 leading_code_base = charset;
486 leading_code_ext = 0;
487 }
488 else
489 {
490 /* Private charset. */
491 bytes += 2; /* For base and extended leading-codes. */
492 leading_code_base
493 = (charset < LEADING_CODE_EXT_12
494 ? LEADING_CODE_PRIVATE_11
495 : (charset < LEADING_CODE_EXT_21
496 ? LEADING_CODE_PRIVATE_12
497 : (charset < LEADING_CODE_EXT_22
498 ? LEADING_CODE_PRIVATE_21
499 : LEADING_CODE_PRIVATE_22)));
500 leading_code_ext = charset;
c83ef371
KH
501 if (BYTES_BY_CHAR_HEAD (leading_code_base) != bytes)
502 error ("Invalid dimension for the charset-ID %d", charset);
503 }
6ef23ebb 504
4ed46869
KH
505 CHARSET_TABLE_INFO (charset, CHARSET_ID_IDX) = charset_id;
506 CHARSET_TABLE_INFO (charset, CHARSET_BYTES_IDX) = make_number (bytes);
507 CHARSET_TABLE_INFO (charset, CHARSET_DIMENSION_IDX) = dimension;
508 CHARSET_TABLE_INFO (charset, CHARSET_CHARS_IDX) = chars;
509 CHARSET_TABLE_INFO (charset, CHARSET_WIDTH_IDX) = width;
510 CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX) = direction;
511 CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_BASE_IDX)
512 = make_number (leading_code_base);
513 CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX)
514 = make_number (leading_code_ext);
515 CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX) = iso_final_char;
516 CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX)
517 = iso_graphic_plane;
518 CHARSET_TABLE_INFO (charset, CHARSET_SHORT_NAME_IDX) = short_name;
519 CHARSET_TABLE_INFO (charset, CHARSET_LONG_NAME_IDX) = long_name;
520 CHARSET_TABLE_INFO (charset, CHARSET_DESCRIPTION_IDX) = description;
521 CHARSET_TABLE_INFO (charset, CHARSET_PLIST_IDX) = Qnil;
522
523 {
524 /* If we have already defined a charset which has the same
525 DIMENSION, CHARS and ISO-FINAL-CHAR but the different
526 DIRECTION, we must update the entry REVERSE-CHARSET of both
527 charsets. If there's no such charset, the value of the entry
528 is set to nil. */
529 int i;
530
513ee442 531 for (i = 0; i <= MAX_CHARSET; i++)
4ed46869
KH
532 if (!NILP (CHARSET_TABLE_ENTRY (i)))
533 {
534 if (CHARSET_DIMENSION (i) == XINT (dimension)
535 && CHARSET_CHARS (i) == XINT (chars)
536 && CHARSET_ISO_FINAL_CHAR (i) == XINT (iso_final_char)
537 && CHARSET_DIRECTION (i) != XINT (direction))
538 {
539 CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
540 = make_number (i);
541 CHARSET_TABLE_INFO (i, CHARSET_REVERSE_CHARSET_IDX) = charset_id;
542 break;
543 }
544 }
513ee442 545 if (i > MAX_CHARSET)
4ed46869
KH
546 /* No such a charset. */
547 CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
548 = make_number (-1);
549 }
550
c83ef371 551 if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC
4ed46869
KH
552 && charset < MIN_CHARSET_PRIVATE_DIMENSION1)
553 {
c83ef371 554 bytes_by_char_head[leading_code_base] = bytes;
4ed46869
KH
555 width_by_char_head[leading_code_base] = XINT (width);
556
557 /* Update table emacs_code_class. */
558 emacs_code_class[charset] = (bytes == 2
559 ? EMACS_leading_code_2
560 : (bytes == 3
561 ? EMACS_leading_code_3
562 : EMACS_leading_code_4));
563 }
564
565 /* Update table iso_charset_table. */
52e386c2 566 if (XINT (iso_final_char) >= 0
2e344af3 567 && ISO_CHARSET_TABLE (dimension, chars, iso_final_char) < 0)
4ed46869
KH
568 ISO_CHARSET_TABLE (dimension, chars, iso_final_char) = charset;
569}
570
571#ifdef emacs
572
573/* Return charset id of CHARSET_SYMBOL, or return -1 if CHARSET_SYMBOL
574 is invalid. */
575int
576get_charset_id (charset_symbol)
577 Lisp_Object charset_symbol;
578{
579 Lisp_Object val;
580 int charset;
581
582 return ((SYMBOLP (charset_symbol)
583 && (val = Fget (charset_symbol, Qcharset), VECTORP (val))
584 && (charset = XINT (XVECTOR (val)->contents[CHARSET_ID_IDX]),
585 CHARSET_VALID_P (charset)))
586 ? charset : -1);
587}
588
589/* Return an identification number for a new private charset of
590 DIMENSION and WIDTH. If there's no more room for the new charset,
591 return 0. */
592Lisp_Object
593get_new_private_charset_id (dimension, width)
594 int dimension, width;
595{
596 int charset, from, to;
597
598 if (dimension == 1)
599 {
3bb7b08b
KH
600 from = LEADING_CODE_EXT_11;
601 to = LEADING_CODE_EXT_21;
4ed46869
KH
602 }
603 else
604 {
3bb7b08b
KH
605 from = LEADING_CODE_EXT_21;
606 to = LEADING_CODE_EXT_MAX + 1;
4ed46869
KH
607 }
608
609 for (charset = from; charset < to; charset++)
610 if (!CHARSET_DEFINED_P (charset)) break;
611
612 return make_number (charset < to ? charset : 0);
613}
614
615DEFUN ("define-charset", Fdefine_charset, Sdefine_charset, 3, 3, 0,
616 "Define CHARSET-ID as the identification number of CHARSET with INFO-VECTOR.\n\
23d2a7f1 617If CHARSET-ID is nil, it is decided automatically, which means CHARSET is\n\
4ed46869
KH
618 treated as a private charset.\n\
619INFO-VECTOR is a vector of the format:\n\
620 [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE\n\
621 SHORT-NAME LONG-NAME DESCRIPTION]\n\
622The meanings of each elements is as follows:\n\
623DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.\n\
624CHARS (integer) is the number of characters in a dimension: 94 or 96.\n\
625WIDTH (integer) is the number of columns a character in the charset\n\
626occupies on the screen: one of 0, 1, and 2.\n\
627\n\
628DIRECTION (integer) is the rendering direction of characters in the\n\
277576f6
KH
629charset when rendering. If 0, render from left to right, else\n\
630render from right to left.\n\
4ed46869
KH
631\n\
632ISO-FINAL-CHAR (character) is the final character of the\n\
633corresponding ISO 2022 charset.\n\
2e344af3 634It may be -1 if the charset is internal use only.\n\
4ed46869
KH
635\n\
636ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked\n\
637while encoding to variants of ISO 2022 coding system, one of the\n\
638following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).\n\
2e344af3 639It may be -1 if the charset is internal use only.\n\
4ed46869
KH
640\n\
641SHORT-NAME (string) is the short name to refer to the charset.\n\
642\n\
643LONG-NAME (string) is the long name to refer to the charset.\n\
644\n\
645DESCRIPTION (string) is the description string of the charset.")
646 (charset_id, charset_symbol, info_vector)
647 Lisp_Object charset_id, charset_symbol, info_vector;
648{
649 Lisp_Object *vec;
650
651 if (!NILP (charset_id))
652 CHECK_NUMBER (charset_id, 0);
653 CHECK_SYMBOL (charset_symbol, 1);
654 CHECK_VECTOR (info_vector, 2);
655
656 if (! NILP (charset_id))
657 {
658 if (! CHARSET_VALID_P (XINT (charset_id)))
659 error ("Invalid CHARSET: %d", XINT (charset_id));
660 else if (CHARSET_DEFINED_P (XINT (charset_id)))
661 error ("Already defined charset: %d", XINT (charset_id));
662 }
663
664 vec = XVECTOR (info_vector)->contents;
665 if (XVECTOR (info_vector)->size != 9
666 || !INTEGERP (vec[0]) || !(XINT (vec[0]) == 1 || XINT (vec[0]) == 2)
667 || !INTEGERP (vec[1]) || !(XINT (vec[1]) == 94 || XINT (vec[1]) == 96)
668 || !INTEGERP (vec[2]) || !(XINT (vec[2]) == 1 || XINT (vec[2]) == 2)
669 || !INTEGERP (vec[3]) || !(XINT (vec[3]) == 0 || XINT (vec[3]) == 1)
2e344af3
KH
670 || !INTEGERP (vec[4])
671 || !(XINT (vec[4]) == -1 || XINT (vec[4]) >= '0' && XINT (vec[4]) <= '~')
672 || !INTEGERP (vec[5])
673 || !(XINT (vec[5]) == -1 || XINT (vec[5]) == 0 || XINT (vec[5]) == 1)
4ed46869
KH
674 || !STRINGP (vec[6])
675 || !STRINGP (vec[7])
676 || !STRINGP (vec[8]))
677 error ("Invalid info-vector argument for defining charset %s",
678 XSYMBOL (charset_symbol)->name->data);
679
680 if (NILP (charset_id))
681 {
682 charset_id = get_new_private_charset_id (XINT (vec[0]), XINT (vec[2]));
683 if (XINT (charset_id) == 0)
684 error ("There's no room for a new private charset %s",
685 XSYMBOL (charset_symbol)->name->data);
686 }
687
688 update_charset_table (charset_id, vec[0], vec[1], vec[2], vec[3],
689 vec[4], vec[5], vec[6], vec[7], vec[8]);
6dc0722d 690 Fput (charset_symbol, Qcharset, CHARSET_TABLE_ENTRY (XINT (charset_id)));
4ed46869
KH
691 CHARSET_SYMBOL (XINT (charset_id)) = charset_symbol;
692 Vcharset_list = Fcons (charset_symbol, Vcharset_list);
693 return Qnil;
694}
695
8a73a704
KH
696DEFUN ("generic-character-list", Fgeneric_character_list,
697 Sgeneric_character_list, 0, 0, 0,
698 "Return a list of all possible generic characters.\n\
699It includes a generic character for a charset not yet defined.")
700 ()
701{
702 return Vgeneric_character_list;
703}
704
3fac5a51
KH
705DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
706 Sget_unused_iso_final_char, 2, 2, 0,
707 "Return an unsed ISO's final char for a charset of DIMENISION and CHARS.\n\
708DIMENSION is the number of bytes to represent a character: 1 or 2.\n\
709CHARS is the number of characters in a dimension: 94 or 96.\n\
710\n\
711This final char is for private use, thus the range is `0' (48) .. `?' (63).\n\
712If there's no unused final char for the specified kind of charset,\n\
713return nil.")
714 (dimension, chars)
715 Lisp_Object dimension, chars;
716{
717 int final_char;
718
719 CHECK_NUMBER (dimension, 0);
720 CHECK_NUMBER (chars, 1);
721 if (XINT (dimension) != 1 && XINT (dimension) != 2)
722 error ("Invalid charset dimension %d, it should be 1 or 2",
723 XINT (dimension));
724 if (XINT (chars) != 94 && XINT (chars) != 96)
725 error ("Invalid charset chars %d, it should be 94 or 96",
726 XINT (chars));
727 for (final_char = '0'; final_char <= '?'; final_char++)
728 {
729 if (ISO_CHARSET_TABLE (dimension, chars, make_number (final_char)) < 0)
730 break;
731 }
732 return (final_char <= '?' ? make_number (final_char) : Qnil);
733}
734
4ed46869
KH
735DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
736 4, 4, 0,
737 "Declare a charset of DIMENSION, CHARS, FINAL-CHAR is the same as CHARSET.\n\
738CHARSET should be defined by `defined-charset' in advance.")
739 (dimension, chars, final_char, charset_symbol)
740 Lisp_Object dimension, chars, final_char, charset_symbol;
741{
742 int charset;
743
744 CHECK_NUMBER (dimension, 0);
745 CHECK_NUMBER (chars, 1);
746 CHECK_NUMBER (final_char, 2);
747 CHECK_SYMBOL (charset_symbol, 3);
748
749 if (XINT (dimension) != 1 && XINT (dimension) != 2)
750 error ("Invalid DIMENSION %d, it should be 1 or 2", XINT (dimension));
751 if (XINT (chars) != 94 && XINT (chars) != 96)
752 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
753 if (XINT (final_char) < '0' || XFASTINT (final_char) > '~')
754 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
755 if ((charset = get_charset_id (charset_symbol)) < 0)
756 error ("Invalid charset %s", XSYMBOL (charset_symbol)->name->data);
757
758 ISO_CHARSET_TABLE (dimension, chars, final_char) = charset;
759 return Qnil;
760}
761
2e344af3
KH
762/* Return information about charsets in the text at PTR of NBYTES
763 bytes, which are NCHARS characters. The value is:
f6302ac9 764
cfe34140 765 0: Each character is represented by one byte. This is always
f6302ac9 766 true for unibyte text.
2e344af3
KH
767 1: No charsets other than ascii eight-bit-control,
768 eight-bit-graphic, and latin-1 are found.
769 2: Otherwise.
1d67c29b 770
2e344af3
KH
771 In addition, if CHARSETS is nonzero, for each found charset N, set
772 CHARSETS[N] to 1. For that, callers should allocate CHARSETS
773 (MAX_CHARSET + 1 elements) in advance. It may lookup a translation
774 table TABLE if supplied. For invalid charsets, set CHARSETS[1] to
775 1 (note that there's no charset whose ID is 1). */
4ed46869
KH
776
777int
2e344af3
KH
778find_charset_in_text (ptr, nchars, nbytes, charsets, table)
779 unsigned char *ptr;
780 int nchars, nbytes, *charsets;
23d2a7f1 781 Lisp_Object table;
4ed46869 782{
2e344af3 783 if (nchars == nbytes)
0282eb69 784 {
2e344af3 785 if (charsets && nbytes > 0)
0282eb69 786 {
2e344af3
KH
787 unsigned char *endp = ptr + nbytes;
788 int maskbits = 0;
789
790 while (ptr < endp && maskbits != 7)
791 {
792 maskbits |= (*ptr < 0x80 ? 1 : *ptr < 0xA0 ? 2 : 4);
793 ptr++;
794 }
795
796 if (maskbits & 1)
797 charsets[CHARSET_ASCII] = 1;
798 if (maskbits & 2)
799 charsets[CHARSET_8_BIT_CONTROL] = 1;
800 if (maskbits & 4)
801 charsets[CHARSET_8_BIT_GRAPHIC] = 1;
0282eb69 802 }
2e344af3 803 return 0;
0282eb69 804 }
2e344af3 805 else
4ed46869 806 {
2e344af3 807 int return_val = 1;
99529c2c 808 int bytes, charset, c1, c2;
05505664 809
2e344af3
KH
810 if (! CHAR_TABLE_P (table))
811 table = Qnil;
05505664 812
2e344af3 813 while (nchars-- > 0)
23d2a7f1 814 {
2e344af3
KH
815 SPLIT_MULTIBYTE_SEQ (ptr, len, bytes, charset, c1, c2);
816 ptr += bytes;
4ed46869 817
2e344af3
KH
818 if (!CHARSET_DEFINED_P (charset))
819 charset = 1;
820 else if (! NILP (table))
821 {
822 int c = translate_char (table, -1, charset, c1, c2);
823 if (c >= 0)
824 charset = CHAR_CHARSET (c);
825 }
826
827 if (return_val == 1
828 && charset != CHARSET_ASCII
829 && charset != CHARSET_8_BIT_CONTROL
830 && charset != CHARSET_8_BIT_GRAPHIC
831 && charset != charset_latin_iso8859_1)
832 return_val = 2;
833
834 if (charsets)
835 charsets[charset] = 1;
836 else if (return_val == 2)
837 break;
4ed46869 838 }
2e344af3 839 return return_val;
4ed46869 840 }
4ed46869
KH
841}
842
843DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
23d2a7f1 844 2, 3, 0,
4ed46869 845 "Return a list of charsets in the region between BEG and END.\n\
23d2a7f1 846BEG and END are buffer positions.\n\
0282eb69
KH
847Optional arg TABLE if non-nil is a translation table to look up.\n\
848\n\
365dd325 849If the region contains invalid multibyte characters,\n\
38f02ede 850`unknown' is included in the returned list.\n\
0282eb69 851\n\
2e344af3
KH
852If the current buffer is unibyte, the returned list may contain\n\
853only `ascii', `eight-bit-control', and `eight-bit-graphic'.")
23d2a7f1
KH
854 (beg, end, table)
855 Lisp_Object beg, end, table;
4ed46869 856{
028d516b 857 int charsets[MAX_CHARSET + 1];
6ae1f27e 858 int from, from_byte, to, stop, stop_byte, i;
4ed46869
KH
859 Lisp_Object val;
860
861 validate_region (&beg, &end);
862 from = XFASTINT (beg);
863 stop = to = XFASTINT (end);
6ae1f27e 864
4ed46869 865 if (from < GPT && GPT < to)
6ae1f27e
RS
866 {
867 stop = GPT;
868 stop_byte = GPT_BYTE;
869 }
870 else
871 stop_byte = CHAR_TO_BYTE (stop);
872
873 from_byte = CHAR_TO_BYTE (from);
874
028d516b 875 bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
4ed46869
KH
876 while (1)
877 {
2e344af3
KH
878 find_charset_in_text (BYTE_POS_ADDR (from_byte), stop - from,
879 stop_byte - from_byte, charsets, table);
4ed46869 880 if (stop < to)
6ae1f27e
RS
881 {
882 from = stop, from_byte = stop_byte;
883 stop = to, stop_byte = CHAR_TO_BYTE (stop);
884 }
4ed46869
KH
885 else
886 break;
887 }
6ae1f27e 888
4ed46869 889 val = Qnil;
2e344af3 890 if (charsets[1])
0282eb69 891 val = Fcons (Qunknown, val);
2e344af3
KH
892 for (i = MAX_CHARSET; i >= MIN_CHARSET_OFFICIAL_DIMENSION1; i--)
893 if (charsets[i])
894 val = Fcons (CHARSET_SYMBOL (i), val);
895 if (charsets[0])
896 val = Fcons (Qascii, val);
4ed46869
KH
897 return val;
898}
899
900DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
23d2a7f1
KH
901 1, 2, 0,
902 "Return a list of charsets in STR.\n\
0282eb69
KH
903Optional arg TABLE if non-nil is a translation table to look up.\n\
904\n\
365dd325 905If the string contains invalid multibyte characters,\n\
0282eb69
KH
906`unknown' is included in the returned list.\n\
907\n\
2e344af3
KH
908If STR is unibyte, the returned list may contain\n\
909only `ascii', `eight-bit-control', and `eight-bit-graphic'.")
23d2a7f1
KH
910 (str, table)
911 Lisp_Object str, table;
4ed46869 912{
a29e3b1b 913 int charsets[MAX_CHARSET + 1];
4ed46869
KH
914 int i;
915 Lisp_Object val;
916
917 CHECK_STRING (str, 0);
87b089ad 918
a29e3b1b 919 bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
2e344af3
KH
920 find_charset_in_text (XSTRING (str)->data, XSTRING (str)->size,
921 STRING_BYTES (XSTRING (str)), charsets, table);
922
4ed46869 923 val = Qnil;
2e344af3 924 if (charsets[1])
0282eb69 925 val = Fcons (Qunknown, val);
2e344af3
KH
926 for (i = MAX_CHARSET; i >= MIN_CHARSET_OFFICIAL_DIMENSION1; i--)
927 if (charsets[i])
928 val = Fcons (CHARSET_SYMBOL (i), val);
929 if (charsets[0])
930 val = Fcons (Qascii, val);
4ed46869
KH
931 return val;
932}
2e344af3 933
4ed46869
KH
934\f
935DEFUN ("make-char-internal", Fmake_char_internal, Smake_char_internal, 1, 3, 0,
513ee442 936 "")
4ed46869
KH
937 (charset, code1, code2)
938 Lisp_Object charset, code1, code2;
939{
ac4137cc
KH
940 int charset_id, c1, c2;
941
4ed46869 942 CHECK_NUMBER (charset, 0);
ac4137cc
KH
943 charset_id = XINT (charset);
944 if (!CHARSET_DEFINED_P (charset_id))
945 error ("Invalid charset ID: %d", XINT (charset));
4ed46869
KH
946
947 if (NILP (code1))
ac4137cc 948 c1 = 0;
4ed46869 949 else
ac4137cc
KH
950 {
951 CHECK_NUMBER (code1, 1);
952 c1 = XINT (code1);
953 }
4ed46869 954 if (NILP (code2))
ac4137cc 955 c2 = 0;
4ed46869 956 else
ac4137cc
KH
957 {
958 CHECK_NUMBER (code2, 2);
959 c2 = XINT (code2);
960 }
4ed46869 961
2e344af3
KH
962 if (charset_id == CHARSET_ASCII)
963 {
964 if (c1 < 0 || c1 > 0x7F)
965 goto invalid_code_posints;
966 return make_number (c1);
967 }
968 else if (charset_id == CHARSET_8_BIT_CONTROL)
969 {
30736012
KH
970 if (NILP (code1))
971 c1 = 0x80;
972 else if (c1 < 0x80 || c1 > 0x9F)
2e344af3
KH
973 goto invalid_code_posints;
974 return make_number (c1);
975 }
976 else if (charset_id == CHARSET_8_BIT_GRAPHIC)
977 {
30736012
KH
978 if (NILP (code1))
979 c1 = 0xA0;
980 else if (c1 < 0xA0 || c1 > 0xFF)
2e344af3
KH
981 goto invalid_code_posints;
982 return make_number (c1);
983 }
984 else if (c1 < 0 || c1 > 0xFF || c2 < 0 || c2 > 0xFF)
985 goto invalid_code_posints;
ac4137cc
KH
986 c1 &= 0x7F;
987 c2 &= 0x7F;
988 if (c1 == 0
989 ? c2 != 0
990 : (c2 == 0
87f67317
KR
991 ? !CHAR_COMPONENTS_VALID_P (charset_id, c1, 0x20)
992 : !CHAR_COMPONENTS_VALID_P (charset_id, c1, c2)))
2e344af3 993 goto invalid_code_posints;
ac4137cc 994 return make_number (MAKE_CHAR (charset_id, c1, c2));
2e344af3
KH
995
996 invalid_code_posints:
997 error ("Invalid code points for charset ID %d: %d %d", charset_id, c1, c2);
4ed46869
KH
998}
999
1000DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
0282eb69
KH
1001 "Return list of charset and one or two position-codes of CHAR.\n\
1002If CHAR is invalid as a character code,\n\
1003return a list of symbol `unknown' and CHAR.")
4ed46869
KH
1004 (ch)
1005 Lisp_Object ch;
1006{
0282eb69 1007 int c, charset, c1, c2;
4ed46869
KH
1008
1009 CHECK_NUMBER (ch, 0);
0282eb69
KH
1010 c = XFASTINT (ch);
1011 if (!CHAR_VALID_P (c, 1))
1012 return Fcons (Qunknown, Fcons (ch, Qnil));
4ed46869 1013 SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
6dc0722d 1014 return (c2 >= 0
4ed46869
KH
1015 ? Fcons (CHARSET_SYMBOL (charset),
1016 Fcons (make_number (c1), Fcons (make_number (c2), Qnil)))
1017 : Fcons (CHARSET_SYMBOL (charset), Fcons (make_number (c1), Qnil)));
1018}
1019
1020DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
1021 "Return charset of CHAR.")
1022 (ch)
1023 Lisp_Object ch;
1024{
1025 CHECK_NUMBER (ch, 0);
1026
1027 return CHARSET_SYMBOL (CHAR_CHARSET (XINT (ch)));
1028}
1029
90d7b74e 1030DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
ac4137cc 1031 "Return charset of a character in the current buffer at position POS.\n\
e6e114f2
KH
1032If POS is nil, it defauls to the current point.\n\
1033If POS is out of range, the value is nil.")
90d7b74e
KH
1034 (pos)
1035 Lisp_Object pos;
1036{
2e344af3
KH
1037 Lisp_Object ch;
1038 int charset;
ac4137cc 1039
2e344af3
KH
1040 ch = Fchar_after (pos);
1041 if (! INTEGERP (ch))
1042 return ch;
1043 charset = CHAR_CHARSET (XINT (ch));
90d7b74e
KH
1044 return CHARSET_SYMBOL (charset);
1045}
1046
4ed46869 1047DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
2b71bb78
KH
1048 "Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.\n\
1049\n\
1050ISO 2022's designation sequence (escape sequence) distinguishes charsets\n\
1051by their DIMENSION, CHARS, and FINAL-CHAR,\n\
1052where as Emacs distinguishes them by charset symbol.\n\
1053See the documentation of the function `charset-info' for the meanings of\n\
1054DIMENSION, CHARS, and FINAL-CHAR.")
4ed46869
KH
1055 (dimension, chars, final_char)
1056 Lisp_Object dimension, chars, final_char;
1057{
1058 int charset;
1059
1060 CHECK_NUMBER (dimension, 0);
1061 CHECK_NUMBER (chars, 1);
1062 CHECK_NUMBER (final_char, 2);
1063
1064 if ((charset = ISO_CHARSET_TABLE (dimension, chars, final_char)) < 0)
1065 return Qnil;
1066 return CHARSET_SYMBOL (charset);
1067}
1068
9d3d8cba
KH
1069/* If GENERICP is nonzero, return nonzero iff C is a valid normal or
1070 generic character. If GENERICP is zero, return nonzero iff C is a
1071 valid normal character. Do not call this function directly,
1072 instead use macro CHAR_VALID_P. */
1073int
1074char_valid_p (c, genericp)
1075 int c, genericp;
1076{
1077 int charset, c1, c2;
1078
0e235b7e 1079 if (c < 0 || c >= MAX_CHAR)
9d3d8cba
KH
1080 return 0;
1081 if (SINGLE_BYTE_CHAR_P (c))
1082 return 1;
2e344af3 1083 SPLIT_CHAR (c, charset, c1, c2);
ac4137cc
KH
1084 if (genericp)
1085 {
1086 if (c1)
1087 {
1088 if (c2 <= 0) c2 = 0x20;
1089 }
1090 else
1091 {
1092 if (c2 <= 0) c1 = c2 = 0x20;
1093 }
1094 }
1095 return (CHARSET_DEFINED_P (charset)
44c6492d 1096 && CHAR_COMPONENTS_VALID_P (charset, c1, c2));
9d3d8cba
KH
1097}
1098
1099DEFUN ("char-valid-p", Fchar_valid_p, Schar_valid_p, 1, 2, 0,
a9d02884
DL
1100 "Return t if OBJECT is a valid normal character.\n\
1101If optional arg GENERICP is non-nil, also return t if OBJECT is\n\
9d3d8cba
KH
1102a valid generic character.")
1103 (object, genericp)
1104 Lisp_Object object, genericp;
1105{
1106 if (! NATNUMP (object))
1107 return Qnil;
1108 return (CHAR_VALID_P (XFASTINT (object), !NILP (genericp)) ? Qt : Qnil);
1109}
1110
d2665018
KH
1111DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte,
1112 Sunibyte_char_to_multibyte, 1, 1, 0,
1113 "Convert the unibyte character CH to multibyte character.\n\
537efd8d 1114The conversion is done based on `nonascii-translation-table' (which see)\n\
340b8d58 1115 or `nonascii-insert-offset' (which see).")
d2665018
KH
1116 (ch)
1117 Lisp_Object ch;
1118{
1119 int c;
1120
1121 CHECK_NUMBER (ch, 0);
1122 c = XINT (ch);
1123 if (c < 0 || c >= 0400)
1124 error ("Invalid unibyte character: %d", c);
1125 c = unibyte_char_to_multibyte (c);
1126 if (c < 0)
1127 error ("Can't convert to multibyte character: %d", XINT (ch));
1128 return make_number (c);
1129}
1130
1bcc1567
RS
1131DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte,
1132 Smultibyte_char_to_unibyte, 1, 1, 0,
1133 "Convert the multibyte character CH to unibyte character.\n\
1134The conversion is done based on `nonascii-translation-table' (which see)\n\
1135 or `nonascii-insert-offset' (which see).")
1136 (ch)
1137 Lisp_Object ch;
1138{
1139 int c;
1140
1141 CHECK_NUMBER (ch, 0);
1142 c = XINT (ch);
ac4137cc 1143 if (! CHAR_VALID_P (c, 0))
1bcc1567
RS
1144 error ("Invalid multibyte character: %d", c);
1145 c = multibyte_char_to_unibyte (c, Qnil);
1146 if (c < 0)
1147 error ("Can't convert to unibyte character: %d", XINT (ch));
1148 return make_number (c);
1149}
1150
4ed46869 1151DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
f78643ef 1152 "Return 1 regardless of the argument CHAR.\n\
60022cb7 1153This is now an obsolete function. We keep it just for backward compatibility.")
4ed46869
KH
1154 (ch)
1155 Lisp_Object ch;
1156{
4ed46869 1157 CHECK_NUMBER (ch, 0);
9b6a601f
KH
1158 return make_number (1);
1159}
1160
1161/* Return how many bytes C will occupy in a multibyte buffer.
1162 Don't call this function directly, instead use macro CHAR_BYTES. */
1163int
1164char_bytes (c)
1165 int c;
1166{
99529c2c 1167 int charset;
9b6a601f 1168
2e344af3
KH
1169 if (ASCII_BYTE_P (c) || (c & ~((1 << CHARACTERBITS) -1)))
1170 return 1;
1171 if (SINGLE_BYTE_CHAR_P (c) && c >= 0xA0)
8ac5a9cc
KH
1172 return 1;
1173
99529c2c
KH
1174 charset = CHAR_CHARSET (c);
1175 return (CHARSET_DEFINED_P (charset) ? CHARSET_BYTES (charset) : 1);
4ed46869
KH
1176}
1177
1178/* Return the width of character of which multi-byte form starts with
1179 C. The width is measured by how many columns occupied on the
1180 screen when displayed in the current buffer. */
1181
1182#define ONE_BYTE_CHAR_WIDTH(c) \
1183 (c < 0x20 \
1184 ? (c == '\t' \
53316e55 1185 ? XFASTINT (current_buffer->tab_width) \
4ed46869
KH
1186 : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2))) \
1187 : (c < 0x7f \
1188 ? 1 \
1189 : (c == 0x7F \
1190 ? (NILP (current_buffer->ctl_arrow) ? 4 : 2) \
1191 : ((! NILP (current_buffer->enable_multibyte_characters) \
1192 && BASE_LEADING_CODE_P (c)) \
1193 ? WIDTH_BY_CHAR_HEAD (c) \
b4e9dd77 1194 : 4))))
4ed46869
KH
1195
1196DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
1197 "Return width of CHAR when displayed in the current buffer.\n\
4245f3d2
KH
1198The width is measured by how many columns it occupies on the screen.\n\
1199Tab is taken to occupy `tab-width' columns.")
4ed46869
KH
1200 (ch)
1201 Lisp_Object ch;
1202{
859f2b3c 1203 Lisp_Object val, disp;
4ed46869 1204 int c;
51c4025f 1205 struct Lisp_Char_Table *dp = buffer_display_table ();
4ed46869
KH
1206
1207 CHECK_NUMBER (ch, 0);
1208
859f2b3c
RS
1209 c = XINT (ch);
1210
1211 /* Get the way the display table would display it. */
51c4025f 1212 disp = dp ? DISP_CHAR_VECTOR (dp, c) : Qnil;
859f2b3c
RS
1213
1214 if (VECTORP (disp))
1215 XSETINT (val, XVECTOR (disp)->size);
1216 else if (SINGLE_BYTE_CHAR_P (c))
1217 XSETINT (val, ONE_BYTE_CHAR_WIDTH (c));
4ed46869
KH
1218 else
1219 {
1220 int charset = CHAR_CHARSET (c);
1221
1222 XSETFASTINT (val, CHARSET_WIDTH (charset));
1223 }
1224 return val;
1225}
1226
1227/* Return width of string STR of length LEN when displayed in the
1228 current buffer. The width is measured by how many columns it
1229 occupies on the screen. */
859f2b3c 1230
4ed46869
KH
1231int
1232strwidth (str, len)
1233 unsigned char *str;
1234 int len;
1235{
beeedaad
KH
1236 return c_string_width (str, len, -1, NULL, NULL);
1237}
1238
1239/* Return width of string STR of length LEN when displayed in the
1240 current buffer. The width is measured by how many columns it
1241 occupies on the screen. If PRECISION > 0, return the width of
1242 longest substring that doesn't exceed PRECISION, and set number of
1243 characters and bytes of the substring in *NCHARS and *NBYTES
1244 respectively. */
1245
1246int
1247c_string_width (str, len, precision, nchars, nbytes)
1248 unsigned char *str;
1249 int precision, *nchars, *nbytes;
1250{
1251 int i = 0, i_byte = 0;
4ed46869 1252 int width = 0;
beeedaad 1253 int chars;
c4a4e28f 1254 struct Lisp_Char_Table *dp = buffer_display_table ();
4ed46869 1255
beeedaad 1256 while (i_byte < len)
859f2b3c 1257 {
beeedaad
KH
1258 int bytes, thiswidth;
1259 Lisp_Object val;
859f2b3c 1260
99529c2c 1261 if (dp)
beeedaad
KH
1262 {
1263 int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
1264
1265 chars = 1;
1266 val = DISP_CHAR_VECTOR (dp, c);
1267 if (VECTORP (val))
1268 thiswidth = XVECTOR (val)->size;
1269 else
1270 thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1271 }
859f2b3c 1272 else
beeedaad
KH
1273 {
1274 chars = 1;
1275 PARSE_MULTIBYTE_SEQ (str + i_byte, len - i_byte, bytes);
1276 thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1277 }
99529c2c 1278
beeedaad
KH
1279 if (precision > 0
1280 && (width + thiswidth > precision))
1281 {
1282 *nchars = i;
1283 *nbytes = i_byte;
1284 return width;
1285 }
1286 i++;
1287 i_byte += bytes;
1288 width += thiswidth;
1289 }
99529c2c 1290
beeedaad
KH
1291 if (precision > 0)
1292 {
1293 *nchars = i;
1294 *nbytes = i_byte;
859f2b3c 1295 }
beeedaad 1296
4ed46869
KH
1297 return width;
1298}
1299
beeedaad
KH
1300/* Return width of Lisp string STRING when displayed in the current
1301 buffer. The width is measured by how many columns it occupies on
1302 the screen while paying attention to compositions. If PRECISION >
1303 0, return the width of longest substring that doesn't exceed
1304 PRECISION, and set number of characters and bytes of the substring
1305 in *NCHARS and *NBYTES respectively. */
1306
3f62427c 1307int
beeedaad
KH
1308lisp_string_width (string, precision, nchars, nbytes)
1309 Lisp_Object string;
1310 int precision, *nchars, *nbytes;
3f62427c 1311{
beeedaad
KH
1312 int len = XSTRING (string)->size;
1313 int len_byte = STRING_BYTES (XSTRING (string));
1314 unsigned char *str = XSTRING (string)->data;
1315 int i = 0, i_byte = 0;
3f62427c 1316 int width = 0;
beeedaad 1317 struct Lisp_Char_Table *dp = buffer_display_table ();
3f62427c
KH
1318
1319 while (i < len)
1320 {
beeedaad
KH
1321 int chars, bytes, thiswidth;
1322 Lisp_Object val;
1323 int cmp_id;
1324 int ignore, end;
1325
1326 if (find_composition (i, -1, &ignore, &end, &val, string)
1327 && ((cmp_id = get_composition_id (i, i_byte, end - i, val, string))
1328 >= 0))
3f62427c 1329 {
beeedaad
KH
1330 thiswidth = composition_table[cmp_id]->width;
1331 chars = end - i;
1332 bytes = string_char_to_byte (string, end) - i_byte;
1333 }
1334 else if (dp)
1335 {
1336 int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
1337
1338 chars = 1;
1339 val = DISP_CHAR_VECTOR (dp, c);
1340 if (VECTORP (val))
1341 thiswidth = XVECTOR (val)->size;
1342 else
1343 thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
3f62427c
KH
1344 }
1345 else
1346 {
beeedaad
KH
1347 chars = 1;
1348 PARSE_MULTIBYTE_SEQ (str + i_byte, len_byte - i_byte, bytes);
1349 thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1350 }
1351
1352 if (precision > 0
1353 && (width + thiswidth > precision))
1354 {
1355 *nchars = i;
1356 *nbytes = i_byte;
1357 return width;
3f62427c 1358 }
beeedaad
KH
1359 i += chars;
1360 i_byte += bytes;
1361 width += thiswidth;
1362 }
1363
1364 if (precision > 0)
1365 {
1366 *nchars = i;
1367 *nbytes = i_byte;
3f62427c 1368 }
beeedaad 1369
3f62427c
KH
1370 return width;
1371}
1372
4ed46869
KH
1373DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
1374 "Return width of STRING when displayed in the current buffer.\n\
1375Width is measured by how many columns it occupies on the screen.\n\
046b1f03
RS
1376When calculating width of a multibyte character in STRING,\n\
1377only the base leading-code is considered; the validity of\n\
5af47d5f
GM
1378the following bytes is not checked. Tabs in STRING are always\n\
1379taken to occupy `tab-width' columns.")
4ed46869
KH
1380 (str)
1381 Lisp_Object str;
1382{
1383 Lisp_Object val;
1384
1385 CHECK_STRING (str, 0);
beeedaad 1386 XSETFASTINT (val, lisp_string_width (str, -1, NULL, NULL));
4ed46869
KH
1387 return val;
1388}
1389
1390DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
1391 "Return the direction of CHAR.\n\
1392The returned value is 0 for left-to-right and 1 for right-to-left.")
1393 (ch)
1394 Lisp_Object ch;
1395{
1396 int charset;
1397
1398 CHECK_NUMBER (ch, 0);
1399 charset = CHAR_CHARSET (XFASTINT (ch));
1400 if (!CHARSET_DEFINED_P (charset))
93bcb785 1401 invalid_character (XINT (ch));
4ed46869
KH
1402 return CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX);
1403}
1404
af4fecb4 1405DEFUN ("chars-in-region", Fchars_in_region, Schars_in_region, 2, 2, 0,
6ae1f27e 1406 "Return number of characters between BEG and END.")
046b1f03
RS
1407 (beg, end)
1408 Lisp_Object beg, end;
1409{
6ae1f27e 1410 int from, to;
046b1f03 1411
17e7ef1b
RS
1412 CHECK_NUMBER_COERCE_MARKER (beg, 0);
1413 CHECK_NUMBER_COERCE_MARKER (end, 1);
1414
046b1f03 1415 from = min (XFASTINT (beg), XFASTINT (end));
a8a35e61 1416 to = max (XFASTINT (beg), XFASTINT (end));
046b1f03 1417
a8c21066 1418 return make_number (to - from);
6ae1f27e 1419}
9036eb45 1420
87b089ad
RS
1421/* Return the number of characters in the NBYTES bytes at PTR.
1422 This works by looking at the contents and checking for multibyte sequences.
1423 However, if the current buffer has enable-multibyte-characters = nil,
1424 we treat each byte as a character. */
1425
6ae1f27e
RS
1426int
1427chars_in_text (ptr, nbytes)
1428 unsigned char *ptr;
1429 int nbytes;
1430{
87b089ad
RS
1431 /* current_buffer is null at early stages of Emacs initialization. */
1432 if (current_buffer == 0
1433 || NILP (current_buffer->enable_multibyte_characters))
6ae1f27e 1434 return nbytes;
a8a35e61 1435
ac4137cc 1436 return multibyte_chars_in_text (ptr, nbytes);
046b1f03
RS
1437}
1438
87b089ad
RS
1439/* Return the number of characters in the NBYTES bytes at PTR.
1440 This works by looking at the contents and checking for multibyte sequences.
1441 It ignores enable-multibyte-characters. */
1442
1443int
1444multibyte_chars_in_text (ptr, nbytes)
1445 unsigned char *ptr;
1446 int nbytes;
1447{
ac4137cc
KH
1448 unsigned char *endp;
1449 int chars, bytes;
87b089ad
RS
1450
1451 endp = ptr + nbytes;
1452 chars = 0;
1453
1454 while (ptr < endp)
1455 {
2e344af3
KH
1456 PARSE_MULTIBYTE_SEQ (ptr, endp - ptr, bytes);
1457 ptr += bytes;
1458 chars++;
1459 }
1460
1461 return chars;
1462}
1463
d8e4f486 1464/* Parse unibyte text at STR of LEN bytes as multibyte text, and
2e344af3 1465 count the numbers of characters and bytes in it. On counting
d8e4f486
DL
1466 bytes, pay attention to the fact that 8-bit characters in the range
1467 0x80..0x9F are represented by 2 bytes in multibyte text. */
2e344af3
KH
1468void
1469parse_str_as_multibyte (str, len, nchars, nbytes)
1470 unsigned char *str;
1471 int len, *nchars, *nbytes;
1472{
1473 unsigned char *endp = str + len;
1474 int n, chars = 0, bytes = 0;
1475
1476 while (str < endp)
1477 {
1478 if (UNIBYTE_STR_AS_MULTIBYTE_P (str, endp - str, n))
1479 str += n, bytes += n;
1480 else
1481 str++, bytes += 2;
1482 chars++;
1483 }
1484 *nchars = chars;
1485 *nbytes = bytes;
1486 return;
1487}
1488
d8e4f486 1489/* Arrange unibyte text at STR of NBYTES bytes as multibyte text.
2e344af3
KH
1490 It actually converts only 8-bit characters in the range 0x80..0x9F
1491 that don't contruct multibyte characters to multibyte forms. If
1492 NCHARS is nonzero, set *NCHARS to the number of characters in the
1493 text. It is assured that we can use LEN bytes at STR as a work
1494 area and that is enough. Return the number of bytes of the
1495 resulting text. */
1496
1497int
1498str_as_multibyte (str, len, nbytes, nchars)
1499 unsigned char *str;
1500 int len, nbytes, *nchars;
1501{
1502 unsigned char *p = str, *endp = str + nbytes;
1503 unsigned char *to;
1504 int chars = 0;
1505 int n;
1506
1507 while (p < endp && UNIBYTE_STR_AS_MULTIBYTE_P (p, endp - p, n))
1508 p += n, chars++;
1509 if (nchars)
1510 *nchars = chars;
1511 if (p == endp)
1512 return nbytes;
1513
1514 to = p;
1515 nbytes = endp - p;
1516 endp = str + len;
1517 safe_bcopy (p, endp - nbytes, nbytes);
1518 p = endp - nbytes;
1519 while (p < endp)
1520 {
1521 if (UNIBYTE_STR_AS_MULTIBYTE_P (p, endp - p, n))
ac4137cc 1522 {
2e344af3
KH
1523 while (n--)
1524 *to++ = *p++;
1525 }
ac4137cc
KH
1526 else
1527 {
2e344af3
KH
1528 *to++ = LEADING_CODE_8_BIT_CONTROL;
1529 *to++ = *p++ + 0x20;
ac4137cc 1530 }
87b089ad
RS
1531 chars++;
1532 }
2e344af3
KH
1533 if (nchars)
1534 *nchars = chars;
1535 return (to - str);
1536}
87b089ad 1537
d8e4f486 1538/* Convert unibyte text at STR of NBYTES bytes to multibyte text
2e344af3
KH
1539 that contains the same single-byte characters. It actually
1540 converts all 8-bit characters to multibyte forms. It is assured
1541 that we can use LEN bytes at STR as a work area and that is
1542 enough. */
1543
1544int
1545str_to_multibyte (str, len, bytes)
1546 unsigned char *str;
1547 int len, bytes;
1548{
1549 unsigned char *p = str, *endp = str + bytes;
1550 unsigned char *to;
2e344af3
KH
1551
1552 while (p < endp && (*p < 0x80 || *p >= 0xA0)) p++;
1553 if (p == endp)
1554 return bytes;
1555 to = p;
1556 bytes = endp - p;
1557 endp = str + len;
1558 safe_bcopy (p, endp - bytes, bytes);
1559 p = endp - bytes;
1560 while (p < endp)
1561 {
1562 if (*p < 0x80 || *p >= 0xA0)
1563 *to++ = *p++;
1564 else
1565 *to++ = LEADING_CODE_8_BIT_CONTROL, *to++ = *p++ + 0x20;
1566 }
1567 return (to - str);
87b089ad
RS
1568}
1569
2e344af3
KH
1570/* Arrange multibyte text at STR of LEN bytes as a unibyte text. It
1571 actually converts only 8-bit characters in the range 0x80..0x9F to
1572 unibyte forms. */
1573
1574int
1575str_as_unibyte (str, bytes)
1576 unsigned char *str;
1577 int bytes;
1578{
1579 unsigned char *p = str, *endp = str + bytes;
1580 unsigned char *to = str;
1581
1582 while (p < endp && *p != LEADING_CODE_8_BIT_CONTROL) p++;
1583 to = p;
1584 while (p < endp)
1585 {
1586 if (*p == LEADING_CODE_8_BIT_CONTROL)
1587 *to++ = *(p + 1) - 0x20, p += 2;
1588 else
1589 *to++ = *p++;
1590 }
1591 return (to - str);
1592}
1593
1594\f
87b089ad 1595DEFUN ("string", Fstring, Sstring, 1, MANY, 0,
4ed46869 1596 "Concatenate all the argument characters and make the result a string.")
53316e55
KH
1597 (n, args)
1598 int n;
4ed46869
KH
1599 Lisp_Object *args;
1600{
53316e55 1601 int i;
99529c2c 1602 unsigned char *buf = (unsigned char *) alloca (MAX_MULTIBYTE_LENGTH * n);
4ed46869 1603 unsigned char *p = buf;
2e344af3 1604 int c;
4ed46869
KH
1605
1606 for (i = 0; i < n; i++)
1607 {
d0c037d8 1608 CHECK_NUMBER (args[i], 0);
4ed46869 1609 c = XINT (args[i]);
99529c2c 1610 p += CHAR_STRING (c, p);
4ed46869
KH
1611 }
1612
2e344af3 1613 return make_string_from_bytes (buf, n, p - buf);
4ed46869
KH
1614}
1615
1616#endif /* emacs */
1617\f
dfcf069d 1618int
4ed46869
KH
1619charset_id_internal (charset_name)
1620 char *charset_name;
1621{
76d7b829 1622 Lisp_Object val;
4ed46869 1623
76d7b829 1624 val= Fget (intern (charset_name), Qcharset);
4ed46869
KH
1625 if (!VECTORP (val))
1626 error ("Charset %s is not defined", charset_name);
1627
1628 return (XINT (XVECTOR (val)->contents[0]));
1629}
1630
1631DEFUN ("setup-special-charsets", Fsetup_special_charsets,
1632 Ssetup_special_charsets, 0, 0, 0, "Internal use only.")
1633 ()
1634{
1635 charset_latin_iso8859_1 = charset_id_internal ("latin-iso8859-1");
1636 charset_jisx0208_1978 = charset_id_internal ("japanese-jisx0208-1978");
1637 charset_jisx0208 = charset_id_internal ("japanese-jisx0208");
1638 charset_katakana_jisx0201 = charset_id_internal ("katakana-jisx0201");
1639 charset_latin_jisx0201 = charset_id_internal ("latin-jisx0201");
1640 charset_big5_1 = charset_id_internal ("chinese-big5-1");
1641 charset_big5_2 = charset_id_internal ("chinese-big5-2");
1642 return Qnil;
1643}
1644
dfcf069d 1645void
4ed46869
KH
1646init_charset_once ()
1647{
1648 int i, j, k;
1649
1650 staticpro (&Vcharset_table);
1651 staticpro (&Vcharset_symbol_table);
8a73a704 1652 staticpro (&Vgeneric_character_list);
4ed46869
KH
1653
1654 /* This has to be done here, before we call Fmake_char_table. */
1655 Qcharset_table = intern ("charset-table");
1656 staticpro (&Qcharset_table);
1657
1658 /* Intern this now in case it isn't already done.
1659 Setting this variable twice is harmless.
1660 But don't staticpro it here--that is done in alloc.c. */
1661 Qchar_table_extra_slots = intern ("char-table-extra-slots");
1662
1663 /* Now we are ready to set up this property, so we can
1664 create the charset table. */
1665 Fput (Qcharset_table, Qchar_table_extra_slots, make_number (0));
1666 Vcharset_table = Fmake_char_table (Qcharset_table, Qnil);
1667
0282eb69
KH
1668 Qunknown = intern ("unknown");
1669 staticpro (&Qunknown);
1670 Vcharset_symbol_table = Fmake_vector (make_number (MAX_CHARSET + 1),
1671 Qunknown);
4ed46869
KH
1672
1673 /* Setup tables. */
1674 for (i = 0; i < 2; i++)
1675 for (j = 0; j < 2; j++)
1676 for (k = 0; k < 128; k++)
1677 iso_charset_table [i][j][k] = -1;
1678
60383934 1679 for (i = 0; i < 256; i++)
2e344af3 1680 bytes_by_char_head[i] = 1;
2e344af3
KH
1681 bytes_by_char_head[LEADING_CODE_PRIVATE_11] = 3;
1682 bytes_by_char_head[LEADING_CODE_PRIVATE_12] = 3;
1683 bytes_by_char_head[LEADING_CODE_PRIVATE_21] = 4;
1684 bytes_by_char_head[LEADING_CODE_PRIVATE_22] = 4;
4ed46869
KH
1685
1686 for (i = 0; i < 128; i++)
2e344af3 1687 width_by_char_head[i] = 1;
4ed46869 1688 for (; i < 256; i++)
2e344af3
KH
1689 width_by_char_head[i] = 4;
1690 width_by_char_head[LEADING_CODE_PRIVATE_11] = 1;
1691 width_by_char_head[LEADING_CODE_PRIVATE_12] = 2;
1692 width_by_char_head[LEADING_CODE_PRIVATE_21] = 1;
1693 width_by_char_head[LEADING_CODE_PRIVATE_22] = 2;
8a73a704
KH
1694
1695 {
76d7b829 1696 Lisp_Object val;
8a73a704 1697
76d7b829 1698 val = Qnil;
8a73a704
KH
1699 for (i = 0x81; i < 0x90; i++)
1700 val = Fcons (make_number ((i - 0x70) << 7), val);
1701 for (; i < 0x9A; i++)
1702 val = Fcons (make_number ((i - 0x8F) << 14), val);
1703 for (i = 0xA0; i < 0xF0; i++)
1704 val = Fcons (make_number ((i - 0x70) << 7), val);
1705 for (; i < 0xFF; i++)
1706 val = Fcons (make_number ((i - 0xE0) << 14), val);
8a73a704
KH
1707 Vgeneric_character_list = Fnreverse (val);
1708 }
bbf12bb3
KH
1709
1710 nonascii_insert_offset = 0;
1711 Vnonascii_translation_table = Qnil;
4ed46869
KH
1712}
1713
1714#ifdef emacs
1715
dfcf069d 1716void
4ed46869
KH
1717syms_of_charset ()
1718{
2e344af3
KH
1719 Qcharset = intern ("charset");
1720 staticpro (&Qcharset);
1721
4ed46869
KH
1722 Qascii = intern ("ascii");
1723 staticpro (&Qascii);
1724
2e344af3
KH
1725 Qeight_bit_control = intern ("eight-bit-control");
1726 staticpro (&Qeight_bit_control);
1727
1728 Qeight_bit_graphic = intern ("eight-bit-graphic");
1729 staticpro (&Qeight_bit_graphic);
4ed46869 1730
2e344af3
KH
1731 /* Define special charsets ascii, eight-bit-control, and
1732 eight-bit-graphic. */
4ed46869
KH
1733 update_charset_table (make_number (CHARSET_ASCII),
1734 make_number (1), make_number (94),
1735 make_number (1),
1736 make_number (0),
1737 make_number ('B'),
1738 make_number (0),
1739 build_string ("ASCII"),
d78bc582 1740 Qnil, /* same as above */
4ed46869
KH
1741 build_string ("ASCII (ISO646 IRV)"));
1742 CHARSET_SYMBOL (CHARSET_ASCII) = Qascii;
1743 Fput (Qascii, Qcharset, CHARSET_TABLE_ENTRY (CHARSET_ASCII));
1744
2e344af3
KH
1745 update_charset_table (make_number (CHARSET_8_BIT_CONTROL),
1746 make_number (1), make_number (96),
797a084a 1747 make_number (4),
2e344af3
KH
1748 make_number (0),
1749 make_number (-1),
1750 make_number (-1),
1751 build_string ("8-bit control code (0x80..0x9F)"),
d78bc582
KH
1752 Qnil, /* same as above */
1753 Qnil); /* same as above */
2e344af3
KH
1754 CHARSET_SYMBOL (CHARSET_8_BIT_CONTROL) = Qeight_bit_control;
1755 Fput (Qeight_bit_control, Qcharset,
1756 CHARSET_TABLE_ENTRY (CHARSET_8_BIT_CONTROL));
1757
1758 update_charset_table (make_number (CHARSET_8_BIT_GRAPHIC),
1759 make_number (1), make_number (96),
797a084a 1760 make_number (4),
2e344af3
KH
1761 make_number (0),
1762 make_number (-1),
1763 make_number (-1),
2e344af3 1764 build_string ("8-bit graphic char (0xA0..0xFF)"),
d78bc582
KH
1765 Qnil, /* same as above */
1766 Qnil); /* same as above */
2e344af3
KH
1767 CHARSET_SYMBOL (CHARSET_8_BIT_GRAPHIC) = Qeight_bit_graphic;
1768 Fput (Qeight_bit_graphic, Qcharset,
1769 CHARSET_TABLE_ENTRY (CHARSET_8_BIT_GRAPHIC));
1770
c1a08b4c
KH
1771 Qauto_fill_chars = intern ("auto-fill-chars");
1772 staticpro (&Qauto_fill_chars);
1773 Fput (Qauto_fill_chars, Qchar_table_extra_slots, make_number (0));
1774
4ed46869 1775 defsubr (&Sdefine_charset);
8a73a704 1776 defsubr (&Sgeneric_character_list);
3fac5a51 1777 defsubr (&Sget_unused_iso_final_char);
4ed46869
KH
1778 defsubr (&Sdeclare_equiv_charset);
1779 defsubr (&Sfind_charset_region);
1780 defsubr (&Sfind_charset_string);
1781 defsubr (&Smake_char_internal);
1782 defsubr (&Ssplit_char);
1783 defsubr (&Schar_charset);
90d7b74e 1784 defsubr (&Scharset_after);
4ed46869 1785 defsubr (&Siso_charset);
9d3d8cba 1786 defsubr (&Schar_valid_p);
d2665018 1787 defsubr (&Sunibyte_char_to_multibyte);
1bcc1567 1788 defsubr (&Smultibyte_char_to_unibyte);
4ed46869
KH
1789 defsubr (&Schar_bytes);
1790 defsubr (&Schar_width);
1791 defsubr (&Sstring_width);
1792 defsubr (&Schar_direction);
af4fecb4 1793 defsubr (&Schars_in_region);
87b089ad 1794 defsubr (&Sstring);
4ed46869
KH
1795 defsubr (&Ssetup_special_charsets);
1796
1797 DEFVAR_LISP ("charset-list", &Vcharset_list,
1798 "List of charsets ever defined.");
2e344af3
KH
1799 Vcharset_list = Fcons (Qascii, Fcons (Qeight_bit_control,
1800 Fcons (Qeight_bit_graphic, Qnil)));
4ed46869 1801
537efd8d 1802 DEFVAR_LISP ("translation-table-vector", &Vtranslation_table_vector,
b4e9dd77
KH
1803 "Vector of cons cell of a symbol and translation table ever defined.\n\
1804An ID of a translation table is an index of this vector.");
537efd8d 1805 Vtranslation_table_vector = Fmake_vector (make_number (16), Qnil);
b0e3cf2b 1806
4ed46869
KH
1807 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11,
1808 "Leading-code of private TYPE9N charset of column-width 1.");
1809 leading_code_private_11 = LEADING_CODE_PRIVATE_11;
1810
1811 DEFVAR_INT ("leading-code-private-12", &leading_code_private_12,
1812 "Leading-code of private TYPE9N charset of column-width 2.");
1813 leading_code_private_12 = LEADING_CODE_PRIVATE_12;
1814
1815 DEFVAR_INT ("leading-code-private-21", &leading_code_private_21,
1816 "Leading-code of private TYPE9Nx9N charset of column-width 1.");
1817 leading_code_private_21 = LEADING_CODE_PRIVATE_21;
1818
1819 DEFVAR_INT ("leading-code-private-22", &leading_code_private_22,
1820 "Leading-code of private TYPE9Nx9N charset of column-width 2.");
1821 leading_code_private_22 = LEADING_CODE_PRIVATE_22;
35e623fb
RS
1822
1823 DEFVAR_INT ("nonascii-insert-offset", &nonascii_insert_offset,
d2665018 1824 "Offset for converting non-ASCII unibyte codes 0240...0377 to multibyte.\n\
4cf9710d
RS
1825This is used for converting unibyte text to multibyte,\n\
1826and for inserting character codes specified by number.\n\n\
3e8ceaac
RS
1827This serves to convert a Latin-1 or similar 8-bit character code\n\
1828to the corresponding Emacs multibyte character code.\n\
1829Typically the value should be (- (make-char CHARSET 0) 128),\n\
1830for your choice of character set.\n\
537efd8d 1831If `nonascii-translation-table' is non-nil, it overrides this variable.");
35e623fb 1832 nonascii_insert_offset = 0;
b0e3cf2b 1833
b4e9dd77 1834 DEFVAR_LISP ("nonascii-translation-table", &Vnonascii_translation_table,
537efd8d 1835 "Translation table to convert non-ASCII unibyte codes to multibyte.\n\
4cf9710d
RS
1836This is used for converting unibyte text to multibyte,\n\
1837and for inserting character codes specified by number.\n\n\
1838Conversion is performed only when multibyte characters are enabled,\n\
1839and it serves to convert a Latin-1 or similar 8-bit character code\n\
1840to the corresponding Emacs character code.\n\n\
da4d65af 1841If this is nil, `nonascii-insert-offset' is used instead.\n\
b4e9dd77
KH
1842See also the docstring of `make-translation-table'.");
1843 Vnonascii_translation_table = Qnil;
4cf9710d 1844
c1a08b4c
KH
1845 DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars,
1846 "A char-table for characters which invoke auto-filling.\n\
8dd61baf 1847Such characters have value t in this table.");
c1a08b4c 1848 Vauto_fill_chars = Fmake_char_table (Qauto_fill_chars, Qnil);
60022cb7
AS
1849 CHAR_TABLE_SET (Vauto_fill_chars, make_number (' '), Qt);
1850 CHAR_TABLE_SET (Vauto_fill_chars, make_number ('\n'), Qt);
4ed46869
KH
1851}
1852
1853#endif /* emacs */