(encoded-kbd-iso2022-designation-map): Fix previous change.
[bpt/emacs.git] / src / charset.c
CommitLineData
75c8c592 1/* Basic multilingual character support.
35e623fb 2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
75c8c592 3 Licensed to the Free Software Foundation.
4ed46869 4
369314dc
KH
5This file is part of GNU Emacs.
6
7GNU Emacs is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
4ed46869 11
369314dc
KH
12GNU Emacs is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
4ed46869 16
369314dc
KH
17You should have received a copy of the GNU General Public License
18along with GNU Emacs; see the file COPYING. If not, write to
19the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20Boston, MA 02111-1307, USA. */
4ed46869
KH
21
22/* At first, see the document in `charset.h' to understand the code in
23 this file. */
24
68c45bf0
PE
25#ifdef emacs
26#include <config.h>
27#endif
28
4ed46869
KH
29#include <stdio.h>
30
31#ifdef emacs
32
33#include <sys/types.h>
4ed46869
KH
34#include "lisp.h"
35#include "buffer.h"
36#include "charset.h"
37#include "coding.h"
fc6b09bf 38#include "disptab.h"
4ed46869
KH
39
40#else /* not emacs */
41
42#include "mulelib.h"
43
44#endif /* emacs */
45
2e344af3 46Lisp_Object Qcharset, Qascii, Qeight_bit_control, Qeight_bit_graphic;
0282eb69 47Lisp_Object Qunknown;
4ed46869
KH
48
49/* Declaration of special leading-codes. */
4ed46869
KH
50int leading_code_private_11; /* for private DIMENSION1 of 1-column */
51int leading_code_private_12; /* for private DIMENSION1 of 2-column */
52int leading_code_private_21; /* for private DIMENSION2 of 1-column */
53int leading_code_private_22; /* for private DIMENSION2 of 2-column */
54
2e344af3
KH
55/* Declaration of special charsets. The values are set by
56 Fsetup_special_charsets. */
4ed46869
KH
57int charset_latin_iso8859_1; /* ISO8859-1 (Latin-1) */
58int charset_jisx0208_1978; /* JISX0208.1978 (Japanese Kanji old set) */
59int charset_jisx0208; /* JISX0208.1983 (Japanese Kanji) */
60int charset_katakana_jisx0201; /* JISX0201.Kana (Japanese Katakana) */
61int charset_latin_jisx0201; /* JISX0201.Roman (Japanese Roman) */
62int charset_big5_1; /* Big5 Level 1 (Chinese Traditional) */
63int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */
64
65Lisp_Object Qcharset_table;
66
67/* A char-table containing information of each character set. */
68Lisp_Object Vcharset_table;
69
70/* A vector of charset symbol indexed by charset-id. This is used
71 only for returning charset symbol from C functions. */
72Lisp_Object Vcharset_symbol_table;
73
74/* A list of charset symbols ever defined. */
75Lisp_Object Vcharset_list;
76
537efd8d
KH
77/* Vector of translation table ever defined.
78 ID of a translation table is used to index this vector. */
79Lisp_Object Vtranslation_table_vector;
b0e3cf2b 80
c1a08b4c
KH
81/* A char-table for characters which may invoke auto-filling. */
82Lisp_Object Vauto_fill_chars;
83
84Lisp_Object Qauto_fill_chars;
85
4ed46869
KH
86/* Tables used by macros BYTES_BY_CHAR_HEAD and WIDTH_BY_CHAR_HEAD. */
87int bytes_by_char_head[256];
88int width_by_char_head[256];
89
90/* Mapping table from ISO2022's charset (specified by DIMENSION,
91 CHARS, and FINAL-CHAR) to Emacs' charset. */
92int iso_charset_table[2][2][128];
93
94/* Variables used locally in the macro FETCH_MULTIBYTE_CHAR. */
95unsigned char *_fetch_multibyte_char_p;
96int _fetch_multibyte_char_len;
97
35e623fb
RS
98/* Offset to add to a non-ASCII value when inserting it. */
99int nonascii_insert_offset;
100
4cf9710d
RS
101/* Translation table for converting non-ASCII unibyte characters
102 to multibyte codes, or nil. */
b4e9dd77 103Lisp_Object Vnonascii_translation_table;
4cf9710d 104
8a73a704
KH
105/* List of all possible generic characters. */
106Lisp_Object Vgeneric_character_list;
107
046b1f03
RS
108#define min(X, Y) ((X) < (Y) ? (X) : (Y))
109#define max(X, Y) ((X) > (Y) ? (X) : (Y))
110\f
93bcb785
KH
111void
112invalid_character (c)
113 int c;
114{
ba7434e5 115 error ("Invalid character: 0%o, %d, 0x%x", c, c, c);
93bcb785
KH
116}
117
2e344af3
KH
118/* Parse string STR of length LENGTH and fetch information of a
119 character at STR. Set BYTES to the byte length the character
120 occupies, CHARSET, C1, C2 to proper values of the character. */
121
122#define SPLIT_MULTIBYTE_SEQ(str, length, bytes, charset, c1, c2) \
123 do { \
124 (c1) = *(str); \
125 (bytes) = BYTES_BY_CHAR_HEAD (c1); \
126 if ((bytes) == 1) \
127 (charset) = ASCII_BYTE_P (c1) ? CHARSET_ASCII : CHARSET_8_BIT_GRAPHIC; \
128 else if ((bytes) == 2) \
129 { \
130 if ((c1) == LEADING_CODE_8_BIT_CONTROL) \
131 (charset) = CHARSET_8_BIT_CONTROL, (c1) = (str)[1] - 0x20; \
132 else \
133 (charset) = (c1), (c1) = (str)[1] & 0x7F; \
134 } \
135 else if ((bytes) == 3) \
136 { \
137 if ((c1) < LEADING_CODE_PRIVATE_11) \
138 (charset) = (c1), (c1) = (str)[1] & 0x7F, (c2) = (str)[2] & 0x7F; \
139 else \
140 (charset) = (str)[1], (c1) = (str)[2] & 0x7F; \
141 } \
142 else \
143 (charset) = (str)[1], (c1) = (str)[2] & 0x7F, (c2) = (str)[3] & 0x7F; \
ac4137cc
KH
144 } while (0)
145
146/* 1 if CHARSET, C1, and C2 compose a valid character, else 0. */
44c6492d 147#define CHAR_COMPONENTS_VALID_P(charset, c1, c2) \
2e344af3 148 ((charset) == CHARSET_ASCII \
63f4d579 149 ? ((c1) >= 0 && (c1) <= 0x7F) \
2e344af3
KH
150 : ((charset) == CHARSET_8_BIT_CONTROL \
151 ? ((c1) >= 0x80 && (c1) <= 0x9F) \
152 : ((charset) == CHARSET_8_BIT_GRAPHIC \
153 ? ((c1) >= 0x80 && (c1) <= 0xFF) \
154 : (CHARSET_DIMENSION (charset) == 1 \
155 ? ((c1) >= 0x20 && (c1) <= 0x7F) \
156 : ((c1) >= 0x20 && (c1) <= 0x7F \
157 && (c2) >= 0x20 && (c2) <= 0x7F)))))
93bcb785 158
99529c2c
KH
159/* Store multi-byte form of the character C in STR. The caller should
160 allocate at least 4-byte area at STR in advance. Returns the
161 length of the multi-byte form. If C is an invalid character code,
162 signal an error.
4ed46869 163
99529c2c
KH
164 Use macro `CHAR_STRING (C, STR)' instead of calling this function
165 directly if C can be an ASCII character. */
4ed46869
KH
166
167int
99529c2c 168char_to_string (c, str)
4ed46869 169 int c;
99529c2c 170 unsigned char *str;
4ed46869 171{
99529c2c
KH
172 unsigned char *p = str;
173
6662e69b 174 if (c & CHAR_MODIFIER_MASK) /* This includes the case C is negative. */
8ac5a9cc 175 {
6662e69b
KH
176 /* Multibyte character can't have a modifier bit. */
177 if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_MODIFIER_MASK)))
178 invalid_character (c);
179
180 /* For Meta, Shift, and Control modifiers, we need special care. */
8ac5a9cc 181 if (c & CHAR_META)
6662e69b
KH
182 {
183 /* Move the meta bit to the right place for a string. */
184 c = (c & ~CHAR_META) | 0x80;
185 }
186 if (c & CHAR_SHIFT)
187 {
188 /* Shift modifier is valid only with [A-Za-z]. */
189 if ((c & 0377) >= 'A' && (c & 0377) <= 'Z')
190 c &= ~CHAR_SHIFT;
191 else if ((c & 0377) >= 'a' && (c & 0377) <= 'z')
192 c = (c & ~CHAR_SHIFT) - ('a' - 'A');
193 }
8ac5a9cc 194 if (c & CHAR_CTL)
6662e69b
KH
195 {
196 /* Simulate the code in lread.c. */
197 /* Allow `\C- ' and `\C-?'. */
198 if (c == (CHAR_CTL | ' '))
199 c = 0;
200 else if (c == (CHAR_CTL | '?'))
201 c = 127;
202 /* ASCII control chars are made from letters (both cases),
203 as well as the non-letters within 0100...0137. */
204 else if ((c & 0137) >= 0101 && (c & 0137) <= 0132)
205 c &= (037 | (~0177 & ~CHAR_CTL));
206 else if ((c & 0177) >= 0100 && (c & 0177) <= 0137)
207 c &= (037 | (~0177 & ~CHAR_CTL));
208 }
209
210 /* If C still has any modifier bits, it is an invalid character. */
211 if (c & CHAR_MODIFIER_MASK)
212 invalid_character (c);
2e344af3
KH
213 }
214 if (SINGLE_BYTE_CHAR_P (c))
215 {
216 if (ASCII_BYTE_P (c) || c >= 0xA0)
217 *p++ = c;
218 else
219 {
220 *p++ = LEADING_CODE_8_BIT_CONTROL;
221 *p++ = c + 0x20;
222 }
8ac5a9cc 223 }
99529c2c 224 else if (c < MAX_CHAR)
4ed46869 225 {
ac4137cc 226 int charset, c1, c2;
4ed46869 227
2e344af3 228 SPLIT_CHAR (c, charset, c1, c2);
99529c2c
KH
229
230 if (charset >= LEADING_CODE_EXT_11)
231 *p++ = (charset < LEADING_CODE_EXT_12
232 ? LEADING_CODE_PRIVATE_11
233 : (charset < LEADING_CODE_EXT_21
234 ? LEADING_CODE_PRIVATE_12
235 : (charset < LEADING_CODE_EXT_22
236 ? LEADING_CODE_PRIVATE_21
237 : LEADING_CODE_PRIVATE_22)));
238 *p++ = charset;
239 if (c1 > 0 && c1 < 32 || c2 > 0 && c2 < 32)
240 invalid_character (c);
241 if (c1)
4ed46869 242 {
99529c2c
KH
243 *p++ = c1 | 0x80;
244 if (c2 > 0)
245 *p++ = c2 | 0x80;
4ed46869
KH
246 }
247 }
2e344af3
KH
248 else
249 invalid_character (c);
4ed46869 250
2e344af3 251 return (p - str);
4ed46869
KH
252}
253
44c6492d
KH
254/* Return the non-ASCII character corresponding to multi-byte form at
255 STR of length LEN. If ACTUAL_LEN is not NULL, store the byte
256 length of the multibyte form in *ACTUAL_LEN.
537efd8d 257
99529c2c
KH
258 Use macros STRING_CHAR or STRING_CHAR_AND_LENGTH instead of calling
259 this function directly if you want ot handle ASCII characters as
260 well. */
4ed46869 261
dfcf069d 262int
99529c2c 263string_to_char (str, len, actual_len)
8867de67 264 const unsigned char *str;
ac4137cc 265 int len, *actual_len;
4ed46869 266{
ac4137cc 267 int c, bytes, charset, c1, c2;
4ed46869 268
ac4137cc
KH
269 SPLIT_MULTIBYTE_SEQ (str, len, bytes, charset, c1, c2);
270 c = MAKE_CHAR (charset, c1, c2);
4ed46869 271 if (actual_len)
ac4137cc 272 *actual_len = bytes;
4ed46869
KH
273 return c;
274}
275
44c6492d
KH
276/* Return the length of the multi-byte form at string STR of length LEN.
277 Use the macro MULTIBYTE_FORM_LENGTH instead. */
4ed46869
KH
278int
279multibyte_form_length (str, len)
8867de67 280 const unsigned char *str;
4ed46869
KH
281 int len;
282{
ac4137cc 283 int bytes;
4ed46869 284
ac4137cc 285 PARSE_MULTIBYTE_SEQ (str, len, bytes);
90d7b74e 286 return bytes;
4ed46869
KH
287}
288
ac4137cc
KH
289/* Check multibyte form at string STR of length LEN and set variables
290 pointed by CHARSET, C1, and C2 to charset and position codes of the
291 character at STR, and return 0. If there's no multibyte character,
4ed46869
KH
292 return -1. This should be used only in the macro SPLIT_STRING
293 which checks range of STR in advance. */
294
dfcf069d 295int
99529c2c 296split_string (str, len, charset, c1, c2)
ac4137cc
KH
297 const unsigned char *str;
298 unsigned char *c1, *c2;
299 int len, *charset;
4ed46869 300{
ac4137cc 301 register int bytes, cs, code1, code2 = -1;
4ed46869 302
ac4137cc
KH
303 SPLIT_MULTIBYTE_SEQ (str, len, bytes, cs, code1, code2);
304 if (cs == CHARSET_ASCII)
4ed46869 305 return -1;
ac4137cc
KH
306 *charset = cs;
307 *c1 = code1;
308 *c2 = code2;
5865af0d 309 return 0;
ac4137cc
KH
310}
311
44c6492d
KH
312/* Return 1 iff character C has valid printable glyph.
313 Use the macro CHAR_PRINTABLE_P instead. */
ac4137cc
KH
314int
315char_printable_p (c)
316 int c;
317{
318 int charset, c1, c2, chars;
319
2e344af3 320 if (ASCII_BYTE_P (c))
ac4137cc 321 return 1;
2e344af3
KH
322 else if (SINGLE_BYTE_CHAR_P (c))
323 return 0;
324 else if (c >= MAX_CHAR)
99529c2c 325 return 0;
ac4137cc 326
2e344af3 327 SPLIT_CHAR (c, charset, c1, c2);
ac4137cc
KH
328 if (! CHARSET_DEFINED_P (charset))
329 return 0;
330 if (CHARSET_CHARS (charset) == 94
331 ? c1 <= 32 || c1 >= 127
332 : c1 < 32)
333 return 0;
334 if (CHARSET_DIMENSION (charset) == 2
335 && (CHARSET_CHARS (charset) == 94
336 ? c2 <= 32 || c2 >= 127
337 : c2 < 32))
338 return 0;
339 return 1;
4ed46869
KH
340}
341
537efd8d 342/* Translate character C by translation table TABLE. If C
b4e9dd77
KH
343 is negative, translate a character specified by CHARSET, C1, and C2
344 (C1 and C2 are code points of the character). If no translation is
345 found in TABLE, return C. */
dfcf069d 346int
b4e9dd77 347translate_char (table, c, charset, c1, c2)
23d2a7f1
KH
348 Lisp_Object table;
349 int c, charset, c1, c2;
350{
351 Lisp_Object ch;
352 int alt_charset, alt_c1, alt_c2, dimension;
353
0ad3f83d 354 if (c < 0) c = MAKE_CHAR (charset, (c1 & 0x7F) , (c2 & 0x7F));
23d2a7f1 355 if (!CHAR_TABLE_P (table)
ac4137cc 356 || (ch = Faref (table, make_number (c)), !NATNUMP (ch)))
23d2a7f1
KH
357 return c;
358
359 SPLIT_CHAR (XFASTINT (ch), alt_charset, alt_c1, alt_c2);
360 dimension = CHARSET_DIMENSION (alt_charset);
361 if (dimension == 1 && alt_c1 > 0 || dimension == 2 && alt_c2 > 0)
362 /* CH is not a generic character, just return it. */
363 return XFASTINT (ch);
364
365 /* Since CH is a generic character, we must return a specific
366 charater which has the same position codes as C from CH. */
367 if (charset < 0)
368 SPLIT_CHAR (c, charset, c1, c2);
369 if (dimension != CHARSET_DIMENSION (charset))
370 /* We can't make such a character because of dimension mismatch. */
371 return c;
23d2a7f1
KH
372 return MAKE_CHAR (alt_charset, c1, c2);
373}
374
d2665018 375/* Convert the unibyte character C to multibyte based on
b4e9dd77 376 Vnonascii_translation_table or nonascii_insert_offset. If they can't
d2665018
KH
377 convert C to a valid multibyte character, convert it based on
378 DEFAULT_NONASCII_INSERT_OFFSET which makes C a Latin-1 character. */
35e623fb 379
dfcf069d 380int
35e623fb
RS
381unibyte_char_to_multibyte (c)
382 int c;
383{
543b4f61 384 if (c < 0400 && c >= 0200)
35e623fb 385 {
d2665018
KH
386 int c_save = c;
387
b4e9dd77 388 if (! NILP (Vnonascii_translation_table))
bbf12bb3
KH
389 {
390 c = XINT (Faref (Vnonascii_translation_table, make_number (c)));
ac4137cc 391 if (c >= 0400 && ! char_valid_p (c, 0))
bbf12bb3
KH
392 c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
393 }
394 else if (c >= 0240 && nonascii_insert_offset > 0)
395 {
396 c += nonascii_insert_offset;
ac4137cc 397 if (c < 0400 || ! char_valid_p (c, 0))
bbf12bb3
KH
398 c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
399 }
400 else if (c >= 0240)
d2665018 401 c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
35e623fb
RS
402 }
403 return c;
404}
76d7b829
KH
405
406
407/* Convert the multibyte character C to unibyte 8-bit character based
408 on Vnonascii_translation_table or nonascii_insert_offset. If
409 REV_TBL is non-nil, it should be a reverse table of
410 Vnonascii_translation_table, i.e. what given by:
411 Fchar_table_extra_slot (Vnonascii_translation_table, make_number (0)) */
412
413int
414multibyte_char_to_unibyte (c, rev_tbl)
415 int c;
416 Lisp_Object rev_tbl;
417{
418 if (!SINGLE_BYTE_CHAR_P (c))
419 {
420 int c_save = c;
421
422 if (! CHAR_TABLE_P (rev_tbl)
423 && CHAR_TABLE_P (Vnonascii_translation_table))
424 rev_tbl = Fchar_table_extra_slot (Vnonascii_translation_table,
425 make_number (0));
426 if (CHAR_TABLE_P (rev_tbl))
427 {
428 Lisp_Object temp;
429 temp = Faref (rev_tbl, make_number (c));
430 if (INTEGERP (temp))
431 c = XINT (temp);
bbf12bb3
KH
432 if (c >= 256)
433 c = (c_save & 0177) + 0200;
434 }
435 else
436 {
437 if (nonascii_insert_offset > 0)
438 c -= nonascii_insert_offset;
439 if (c < 128 || c >= 256)
440 c = (c_save & 0177) + 0200;
76d7b829 441 }
76d7b829
KH
442 }
443
444 return c;
445}
446
35e623fb 447\f
4ed46869
KH
448/* Update the table Vcharset_table with the given arguments (see the
449 document of `define-charset' for the meaning of each argument).
450 Several other table contents are also updated. The caller should
451 check the validity of CHARSET-ID and the remaining arguments in
452 advance. */
453
454void
455update_charset_table (charset_id, dimension, chars, width, direction,
456 iso_final_char, iso_graphic_plane,
457 short_name, long_name, description)
458 Lisp_Object charset_id, dimension, chars, width, direction;
459 Lisp_Object iso_final_char, iso_graphic_plane;
460 Lisp_Object short_name, long_name, description;
461{
462 int charset = XINT (charset_id);
463 int bytes;
464 unsigned char leading_code_base, leading_code_ext;
465
6dc0722d
KH
466 if (NILP (CHARSET_TABLE_ENTRY (charset)))
467 CHARSET_TABLE_ENTRY (charset)
468 = Fmake_vector (make_number (CHARSET_MAX_IDX), Qnil);
4ed46869 469
d78bc582
KH
470 if (NILP (long_name))
471 long_name = short_name;
472 if (NILP (description))
473 description = long_name;
474
4ed46869
KH
475 /* Get byte length of multibyte form, base leading-code, and
476 extended leading-code of the charset. See the comment under the
477 title "GENERAL NOTE on CHARACTER SET (CHARSET)" in charset.h. */
478 bytes = XINT (dimension);
479 if (charset < MIN_CHARSET_PRIVATE_DIMENSION1)
480 {
481 /* Official charset, it doesn't have an extended leading-code. */
2e344af3 482 if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC)
4ed46869
KH
483 bytes += 1; /* For a base leading-code. */
484 leading_code_base = charset;
485 leading_code_ext = 0;
486 }
487 else
488 {
489 /* Private charset. */
490 bytes += 2; /* For base and extended leading-codes. */
491 leading_code_base
492 = (charset < LEADING_CODE_EXT_12
493 ? LEADING_CODE_PRIVATE_11
494 : (charset < LEADING_CODE_EXT_21
495 ? LEADING_CODE_PRIVATE_12
496 : (charset < LEADING_CODE_EXT_22
497 ? LEADING_CODE_PRIVATE_21
498 : LEADING_CODE_PRIVATE_22)));
499 leading_code_ext = charset;
500 }
501
2e344af3
KH
502 if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC
503 &&BYTES_BY_CHAR_HEAD (leading_code_base) != bytes)
6ef23ebb
KH
504 error ("Invalid dimension for the charset-ID %d", charset);
505
4ed46869
KH
506 CHARSET_TABLE_INFO (charset, CHARSET_ID_IDX) = charset_id;
507 CHARSET_TABLE_INFO (charset, CHARSET_BYTES_IDX) = make_number (bytes);
508 CHARSET_TABLE_INFO (charset, CHARSET_DIMENSION_IDX) = dimension;
509 CHARSET_TABLE_INFO (charset, CHARSET_CHARS_IDX) = chars;
510 CHARSET_TABLE_INFO (charset, CHARSET_WIDTH_IDX) = width;
511 CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX) = direction;
512 CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_BASE_IDX)
513 = make_number (leading_code_base);
514 CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX)
515 = make_number (leading_code_ext);
516 CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX) = iso_final_char;
517 CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX)
518 = iso_graphic_plane;
519 CHARSET_TABLE_INFO (charset, CHARSET_SHORT_NAME_IDX) = short_name;
520 CHARSET_TABLE_INFO (charset, CHARSET_LONG_NAME_IDX) = long_name;
521 CHARSET_TABLE_INFO (charset, CHARSET_DESCRIPTION_IDX) = description;
522 CHARSET_TABLE_INFO (charset, CHARSET_PLIST_IDX) = Qnil;
523
524 {
525 /* If we have already defined a charset which has the same
526 DIMENSION, CHARS and ISO-FINAL-CHAR but the different
527 DIRECTION, we must update the entry REVERSE-CHARSET of both
528 charsets. If there's no such charset, the value of the entry
529 is set to nil. */
530 int i;
531
513ee442 532 for (i = 0; i <= MAX_CHARSET; i++)
4ed46869
KH
533 if (!NILP (CHARSET_TABLE_ENTRY (i)))
534 {
535 if (CHARSET_DIMENSION (i) == XINT (dimension)
536 && CHARSET_CHARS (i) == XINT (chars)
537 && CHARSET_ISO_FINAL_CHAR (i) == XINT (iso_final_char)
538 && CHARSET_DIRECTION (i) != XINT (direction))
539 {
540 CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
541 = make_number (i);
542 CHARSET_TABLE_INFO (i, CHARSET_REVERSE_CHARSET_IDX) = charset_id;
543 break;
544 }
545 }
513ee442 546 if (i > MAX_CHARSET)
4ed46869
KH
547 /* No such a charset. */
548 CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
549 = make_number (-1);
550 }
551
552 if (charset != CHARSET_ASCII
553 && charset < MIN_CHARSET_PRIVATE_DIMENSION1)
554 {
4ed46869
KH
555 width_by_char_head[leading_code_base] = XINT (width);
556
557 /* Update table emacs_code_class. */
558 emacs_code_class[charset] = (bytes == 2
559 ? EMACS_leading_code_2
560 : (bytes == 3
561 ? EMACS_leading_code_3
562 : EMACS_leading_code_4));
563 }
564
565 /* Update table iso_charset_table. */
2e344af3
KH
566 if (iso_final_char >= 0
567 && ISO_CHARSET_TABLE (dimension, chars, iso_final_char) < 0)
4ed46869
KH
568 ISO_CHARSET_TABLE (dimension, chars, iso_final_char) = charset;
569}
570
571#ifdef emacs
572
573/* Return charset id of CHARSET_SYMBOL, or return -1 if CHARSET_SYMBOL
574 is invalid. */
575int
576get_charset_id (charset_symbol)
577 Lisp_Object charset_symbol;
578{
579 Lisp_Object val;
580 int charset;
581
582 return ((SYMBOLP (charset_symbol)
583 && (val = Fget (charset_symbol, Qcharset), VECTORP (val))
584 && (charset = XINT (XVECTOR (val)->contents[CHARSET_ID_IDX]),
585 CHARSET_VALID_P (charset)))
586 ? charset : -1);
587}
588
589/* Return an identification number for a new private charset of
590 DIMENSION and WIDTH. If there's no more room for the new charset,
591 return 0. */
592Lisp_Object
593get_new_private_charset_id (dimension, width)
594 int dimension, width;
595{
596 int charset, from, to;
597
598 if (dimension == 1)
599 {
600 if (width == 1)
601 from = LEADING_CODE_EXT_11, to = LEADING_CODE_EXT_12;
602 else
603 from = LEADING_CODE_EXT_12, to = LEADING_CODE_EXT_21;
604 }
605 else
606 {
607 if (width == 1)
608 from = LEADING_CODE_EXT_21, to = LEADING_CODE_EXT_22;
609 else
b0e3cf2b 610 from = LEADING_CODE_EXT_22, to = LEADING_CODE_EXT_MAX + 1;
4ed46869
KH
611 }
612
613 for (charset = from; charset < to; charset++)
614 if (!CHARSET_DEFINED_P (charset)) break;
615
616 return make_number (charset < to ? charset : 0);
617}
618
619DEFUN ("define-charset", Fdefine_charset, Sdefine_charset, 3, 3, 0,
620 "Define CHARSET-ID as the identification number of CHARSET with INFO-VECTOR.\n\
23d2a7f1 621If CHARSET-ID is nil, it is decided automatically, which means CHARSET is\n\
4ed46869
KH
622 treated as a private charset.\n\
623INFO-VECTOR is a vector of the format:\n\
624 [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE\n\
625 SHORT-NAME LONG-NAME DESCRIPTION]\n\
626The meanings of each elements is as follows:\n\
627DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.\n\
628CHARS (integer) is the number of characters in a dimension: 94 or 96.\n\
629WIDTH (integer) is the number of columns a character in the charset\n\
630occupies on the screen: one of 0, 1, and 2.\n\
631\n\
632DIRECTION (integer) is the rendering direction of characters in the\n\
277576f6
KH
633charset when rendering. If 0, render from left to right, else\n\
634render from right to left.\n\
4ed46869
KH
635\n\
636ISO-FINAL-CHAR (character) is the final character of the\n\
637corresponding ISO 2022 charset.\n\
2e344af3 638It may be -1 if the charset is internal use only.\n\
4ed46869
KH
639\n\
640ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked\n\
641while encoding to variants of ISO 2022 coding system, one of the\n\
642following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).\n\
2e344af3 643It may be -1 if the charset is internal use only.\n\
4ed46869
KH
644\n\
645SHORT-NAME (string) is the short name to refer to the charset.\n\
646\n\
647LONG-NAME (string) is the long name to refer to the charset.\n\
648\n\
649DESCRIPTION (string) is the description string of the charset.")
650 (charset_id, charset_symbol, info_vector)
651 Lisp_Object charset_id, charset_symbol, info_vector;
652{
653 Lisp_Object *vec;
654
655 if (!NILP (charset_id))
656 CHECK_NUMBER (charset_id, 0);
657 CHECK_SYMBOL (charset_symbol, 1);
658 CHECK_VECTOR (info_vector, 2);
659
660 if (! NILP (charset_id))
661 {
662 if (! CHARSET_VALID_P (XINT (charset_id)))
663 error ("Invalid CHARSET: %d", XINT (charset_id));
664 else if (CHARSET_DEFINED_P (XINT (charset_id)))
665 error ("Already defined charset: %d", XINT (charset_id));
666 }
667
668 vec = XVECTOR (info_vector)->contents;
669 if (XVECTOR (info_vector)->size != 9
670 || !INTEGERP (vec[0]) || !(XINT (vec[0]) == 1 || XINT (vec[0]) == 2)
671 || !INTEGERP (vec[1]) || !(XINT (vec[1]) == 94 || XINT (vec[1]) == 96)
672 || !INTEGERP (vec[2]) || !(XINT (vec[2]) == 1 || XINT (vec[2]) == 2)
673 || !INTEGERP (vec[3]) || !(XINT (vec[3]) == 0 || XINT (vec[3]) == 1)
2e344af3
KH
674 || !INTEGERP (vec[4])
675 || !(XINT (vec[4]) == -1 || XINT (vec[4]) >= '0' && XINT (vec[4]) <= '~')
676 || !INTEGERP (vec[5])
677 || !(XINT (vec[5]) == -1 || XINT (vec[5]) == 0 || XINT (vec[5]) == 1)
4ed46869
KH
678 || !STRINGP (vec[6])
679 || !STRINGP (vec[7])
680 || !STRINGP (vec[8]))
681 error ("Invalid info-vector argument for defining charset %s",
682 XSYMBOL (charset_symbol)->name->data);
683
684 if (NILP (charset_id))
685 {
686 charset_id = get_new_private_charset_id (XINT (vec[0]), XINT (vec[2]));
687 if (XINT (charset_id) == 0)
688 error ("There's no room for a new private charset %s",
689 XSYMBOL (charset_symbol)->name->data);
690 }
691
692 update_charset_table (charset_id, vec[0], vec[1], vec[2], vec[3],
693 vec[4], vec[5], vec[6], vec[7], vec[8]);
6dc0722d 694 Fput (charset_symbol, Qcharset, CHARSET_TABLE_ENTRY (XINT (charset_id)));
4ed46869
KH
695 CHARSET_SYMBOL (XINT (charset_id)) = charset_symbol;
696 Vcharset_list = Fcons (charset_symbol, Vcharset_list);
697 return Qnil;
698}
699
8a73a704
KH
700DEFUN ("generic-character-list", Fgeneric_character_list,
701 Sgeneric_character_list, 0, 0, 0,
702 "Return a list of all possible generic characters.\n\
703It includes a generic character for a charset not yet defined.")
704 ()
705{
706 return Vgeneric_character_list;
707}
708
3fac5a51
KH
709DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
710 Sget_unused_iso_final_char, 2, 2, 0,
711 "Return an unsed ISO's final char for a charset of DIMENISION and CHARS.\n\
712DIMENSION is the number of bytes to represent a character: 1 or 2.\n\
713CHARS is the number of characters in a dimension: 94 or 96.\n\
714\n\
715This final char is for private use, thus the range is `0' (48) .. `?' (63).\n\
716If there's no unused final char for the specified kind of charset,\n\
717return nil.")
718 (dimension, chars)
719 Lisp_Object dimension, chars;
720{
721 int final_char;
722
723 CHECK_NUMBER (dimension, 0);
724 CHECK_NUMBER (chars, 1);
725 if (XINT (dimension) != 1 && XINT (dimension) != 2)
726 error ("Invalid charset dimension %d, it should be 1 or 2",
727 XINT (dimension));
728 if (XINT (chars) != 94 && XINT (chars) != 96)
729 error ("Invalid charset chars %d, it should be 94 or 96",
730 XINT (chars));
731 for (final_char = '0'; final_char <= '?'; final_char++)
732 {
733 if (ISO_CHARSET_TABLE (dimension, chars, make_number (final_char)) < 0)
734 break;
735 }
736 return (final_char <= '?' ? make_number (final_char) : Qnil);
737}
738
4ed46869
KH
739DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
740 4, 4, 0,
741 "Declare a charset of DIMENSION, CHARS, FINAL-CHAR is the same as CHARSET.\n\
742CHARSET should be defined by `defined-charset' in advance.")
743 (dimension, chars, final_char, charset_symbol)
744 Lisp_Object dimension, chars, final_char, charset_symbol;
745{
746 int charset;
747
748 CHECK_NUMBER (dimension, 0);
749 CHECK_NUMBER (chars, 1);
750 CHECK_NUMBER (final_char, 2);
751 CHECK_SYMBOL (charset_symbol, 3);
752
753 if (XINT (dimension) != 1 && XINT (dimension) != 2)
754 error ("Invalid DIMENSION %d, it should be 1 or 2", XINT (dimension));
755 if (XINT (chars) != 94 && XINT (chars) != 96)
756 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
757 if (XINT (final_char) < '0' || XFASTINT (final_char) > '~')
758 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
759 if ((charset = get_charset_id (charset_symbol)) < 0)
760 error ("Invalid charset %s", XSYMBOL (charset_symbol)->name->data);
761
762 ISO_CHARSET_TABLE (dimension, chars, final_char) = charset;
763 return Qnil;
764}
765
2e344af3
KH
766/* Return information about charsets in the text at PTR of NBYTES
767 bytes, which are NCHARS characters. The value is:
f6302ac9 768
cfe34140 769 0: Each character is represented by one byte. This is always
f6302ac9 770 true for unibyte text.
2e344af3
KH
771 1: No charsets other than ascii eight-bit-control,
772 eight-bit-graphic, and latin-1 are found.
773 2: Otherwise.
1d67c29b 774
2e344af3
KH
775 In addition, if CHARSETS is nonzero, for each found charset N, set
776 CHARSETS[N] to 1. For that, callers should allocate CHARSETS
777 (MAX_CHARSET + 1 elements) in advance. It may lookup a translation
778 table TABLE if supplied. For invalid charsets, set CHARSETS[1] to
779 1 (note that there's no charset whose ID is 1). */
4ed46869
KH
780
781int
2e344af3
KH
782find_charset_in_text (ptr, nchars, nbytes, charsets, table)
783 unsigned char *ptr;
784 int nchars, nbytes, *charsets;
23d2a7f1 785 Lisp_Object table;
4ed46869 786{
2e344af3 787 if (nchars == nbytes)
0282eb69 788 {
2e344af3 789 if (charsets && nbytes > 0)
0282eb69 790 {
2e344af3
KH
791 unsigned char *endp = ptr + nbytes;
792 int maskbits = 0;
793
794 while (ptr < endp && maskbits != 7)
795 {
796 maskbits |= (*ptr < 0x80 ? 1 : *ptr < 0xA0 ? 2 : 4);
797 ptr++;
798 }
799
800 if (maskbits & 1)
801 charsets[CHARSET_ASCII] = 1;
802 if (maskbits & 2)
803 charsets[CHARSET_8_BIT_CONTROL] = 1;
804 if (maskbits & 4)
805 charsets[CHARSET_8_BIT_GRAPHIC] = 1;
0282eb69 806 }
2e344af3 807 return 0;
0282eb69 808 }
2e344af3 809 else
4ed46869 810 {
2e344af3 811 int return_val = 1;
99529c2c 812 int bytes, charset, c1, c2;
05505664 813
2e344af3
KH
814 if (! CHAR_TABLE_P (table))
815 table = Qnil;
05505664 816
2e344af3 817 while (nchars-- > 0)
23d2a7f1 818 {
2e344af3
KH
819 SPLIT_MULTIBYTE_SEQ (ptr, len, bytes, charset, c1, c2);
820 ptr += bytes;
4ed46869 821
2e344af3
KH
822 if (!CHARSET_DEFINED_P (charset))
823 charset = 1;
824 else if (! NILP (table))
825 {
826 int c = translate_char (table, -1, charset, c1, c2);
827 if (c >= 0)
828 charset = CHAR_CHARSET (c);
829 }
830
831 if (return_val == 1
832 && charset != CHARSET_ASCII
833 && charset != CHARSET_8_BIT_CONTROL
834 && charset != CHARSET_8_BIT_GRAPHIC
835 && charset != charset_latin_iso8859_1)
836 return_val = 2;
837
838 if (charsets)
839 charsets[charset] = 1;
840 else if (return_val == 2)
841 break;
4ed46869 842 }
2e344af3 843 return return_val;
4ed46869 844 }
4ed46869
KH
845}
846
847DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
23d2a7f1 848 2, 3, 0,
4ed46869 849 "Return a list of charsets in the region between BEG and END.\n\
23d2a7f1 850BEG and END are buffer positions.\n\
0282eb69
KH
851Optional arg TABLE if non-nil is a translation table to look up.\n\
852\n\
853If the region contains invalid multiybte characters,\n\
38f02ede 854`unknown' is included in the returned list.\n\
0282eb69 855\n\
2e344af3
KH
856If the current buffer is unibyte, the returned list may contain\n\
857only `ascii', `eight-bit-control', and `eight-bit-graphic'.")
23d2a7f1
KH
858 (beg, end, table)
859 Lisp_Object beg, end, table;
4ed46869 860{
028d516b 861 int charsets[MAX_CHARSET + 1];
6ae1f27e 862 int from, from_byte, to, stop, stop_byte, i;
4ed46869
KH
863 Lisp_Object val;
864
865 validate_region (&beg, &end);
866 from = XFASTINT (beg);
867 stop = to = XFASTINT (end);
6ae1f27e 868
4ed46869 869 if (from < GPT && GPT < to)
6ae1f27e
RS
870 {
871 stop = GPT;
872 stop_byte = GPT_BYTE;
873 }
874 else
875 stop_byte = CHAR_TO_BYTE (stop);
876
877 from_byte = CHAR_TO_BYTE (from);
878
028d516b 879 bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
4ed46869
KH
880 while (1)
881 {
2e344af3
KH
882 find_charset_in_text (BYTE_POS_ADDR (from_byte), stop - from,
883 stop_byte - from_byte, charsets, table);
4ed46869 884 if (stop < to)
6ae1f27e
RS
885 {
886 from = stop, from_byte = stop_byte;
887 stop = to, stop_byte = CHAR_TO_BYTE (stop);
888 }
4ed46869
KH
889 else
890 break;
891 }
6ae1f27e 892
4ed46869 893 val = Qnil;
2e344af3 894 if (charsets[1])
0282eb69 895 val = Fcons (Qunknown, val);
2e344af3
KH
896 for (i = MAX_CHARSET; i >= MIN_CHARSET_OFFICIAL_DIMENSION1; i--)
897 if (charsets[i])
898 val = Fcons (CHARSET_SYMBOL (i), val);
899 if (charsets[0])
900 val = Fcons (Qascii, val);
4ed46869
KH
901 return val;
902}
903
904DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
23d2a7f1
KH
905 1, 2, 0,
906 "Return a list of charsets in STR.\n\
0282eb69
KH
907Optional arg TABLE if non-nil is a translation table to look up.\n\
908\n\
909If the region contains invalid multiybte characters,\n\
910`unknown' is included in the returned list.\n\
911\n\
2e344af3
KH
912If STR is unibyte, the returned list may contain\n\
913only `ascii', `eight-bit-control', and `eight-bit-graphic'.")
23d2a7f1
KH
914 (str, table)
915 Lisp_Object str, table;
4ed46869 916{
a29e3b1b 917 int charsets[MAX_CHARSET + 1];
4ed46869
KH
918 int i;
919 Lisp_Object val;
920
921 CHECK_STRING (str, 0);
87b089ad 922
a29e3b1b 923 bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
2e344af3
KH
924 find_charset_in_text (XSTRING (str)->data, XSTRING (str)->size,
925 STRING_BYTES (XSTRING (str)), charsets, table);
926
4ed46869 927 val = Qnil;
2e344af3 928 if (charsets[1])
0282eb69 929 val = Fcons (Qunknown, val);
2e344af3
KH
930 for (i = MAX_CHARSET; i >= MIN_CHARSET_OFFICIAL_DIMENSION1; i--)
931 if (charsets[i])
932 val = Fcons (CHARSET_SYMBOL (i), val);
933 if (charsets[0])
934 val = Fcons (Qascii, val);
4ed46869
KH
935 return val;
936}
2e344af3 937
4ed46869
KH
938\f
939DEFUN ("make-char-internal", Fmake_char_internal, Smake_char_internal, 1, 3, 0,
513ee442 940 "")
4ed46869
KH
941 (charset, code1, code2)
942 Lisp_Object charset, code1, code2;
943{
ac4137cc
KH
944 int charset_id, c1, c2;
945
4ed46869 946 CHECK_NUMBER (charset, 0);
ac4137cc
KH
947 charset_id = XINT (charset);
948 if (!CHARSET_DEFINED_P (charset_id))
949 error ("Invalid charset ID: %d", XINT (charset));
4ed46869
KH
950
951 if (NILP (code1))
ac4137cc 952 c1 = 0;
4ed46869 953 else
ac4137cc
KH
954 {
955 CHECK_NUMBER (code1, 1);
956 c1 = XINT (code1);
957 }
4ed46869 958 if (NILP (code2))
ac4137cc 959 c2 = 0;
4ed46869 960 else
ac4137cc
KH
961 {
962 CHECK_NUMBER (code2, 2);
963 c2 = XINT (code2);
964 }
4ed46869 965
2e344af3
KH
966 if (charset_id == CHARSET_ASCII)
967 {
968 if (c1 < 0 || c1 > 0x7F)
969 goto invalid_code_posints;
970 return make_number (c1);
971 }
972 else if (charset_id == CHARSET_8_BIT_CONTROL)
973 {
974 if (c1 < 0x80 || c1 > 0x9F)
975 goto invalid_code_posints;
976 return make_number (c1);
977 }
978 else if (charset_id == CHARSET_8_BIT_GRAPHIC)
979 {
980 if (c1 < 0xA0 || c1 > 0xFF)
981 goto invalid_code_posints;
982 return make_number (c1);
983 }
984 else if (c1 < 0 || c1 > 0xFF || c2 < 0 || c2 > 0xFF)
985 goto invalid_code_posints;
ac4137cc
KH
986 c1 &= 0x7F;
987 c2 &= 0x7F;
988 if (c1 == 0
989 ? c2 != 0
990 : (c2 == 0
87f67317
KR
991 ? !CHAR_COMPONENTS_VALID_P (charset_id, c1, 0x20)
992 : !CHAR_COMPONENTS_VALID_P (charset_id, c1, c2)))
2e344af3 993 goto invalid_code_posints;
ac4137cc 994 return make_number (MAKE_CHAR (charset_id, c1, c2));
2e344af3
KH
995
996 invalid_code_posints:
997 error ("Invalid code points for charset ID %d: %d %d", charset_id, c1, c2);
4ed46869
KH
998}
999
1000DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
0282eb69
KH
1001 "Return list of charset and one or two position-codes of CHAR.\n\
1002If CHAR is invalid as a character code,\n\
1003return a list of symbol `unknown' and CHAR.")
4ed46869
KH
1004 (ch)
1005 Lisp_Object ch;
1006{
1007 Lisp_Object val;
0282eb69 1008 int c, charset, c1, c2;
4ed46869
KH
1009
1010 CHECK_NUMBER (ch, 0);
0282eb69
KH
1011 c = XFASTINT (ch);
1012 if (!CHAR_VALID_P (c, 1))
1013 return Fcons (Qunknown, Fcons (ch, Qnil));
4ed46869 1014 SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
6dc0722d 1015 return (c2 >= 0
4ed46869
KH
1016 ? Fcons (CHARSET_SYMBOL (charset),
1017 Fcons (make_number (c1), Fcons (make_number (c2), Qnil)))
1018 : Fcons (CHARSET_SYMBOL (charset), Fcons (make_number (c1), Qnil)));
1019}
1020
1021DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
1022 "Return charset of CHAR.")
1023 (ch)
1024 Lisp_Object ch;
1025{
1026 CHECK_NUMBER (ch, 0);
1027
1028 return CHARSET_SYMBOL (CHAR_CHARSET (XINT (ch)));
1029}
1030
90d7b74e 1031DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
ac4137cc 1032 "Return charset of a character in the current buffer at position POS.\n\
e6e114f2
KH
1033If POS is nil, it defauls to the current point.\n\
1034If POS is out of range, the value is nil.")
90d7b74e
KH
1035 (pos)
1036 Lisp_Object pos;
1037{
2e344af3
KH
1038 Lisp_Object ch;
1039 int charset;
ac4137cc 1040
2e344af3
KH
1041 ch = Fchar_after (pos);
1042 if (! INTEGERP (ch))
1043 return ch;
1044 charset = CHAR_CHARSET (XINT (ch));
90d7b74e
KH
1045 return CHARSET_SYMBOL (charset);
1046}
1047
4ed46869 1048DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
2b71bb78
KH
1049 "Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.\n\
1050\n\
1051ISO 2022's designation sequence (escape sequence) distinguishes charsets\n\
1052by their DIMENSION, CHARS, and FINAL-CHAR,\n\
1053where as Emacs distinguishes them by charset symbol.\n\
1054See the documentation of the function `charset-info' for the meanings of\n\
1055DIMENSION, CHARS, and FINAL-CHAR.")
4ed46869
KH
1056 (dimension, chars, final_char)
1057 Lisp_Object dimension, chars, final_char;
1058{
1059 int charset;
1060
1061 CHECK_NUMBER (dimension, 0);
1062 CHECK_NUMBER (chars, 1);
1063 CHECK_NUMBER (final_char, 2);
1064
1065 if ((charset = ISO_CHARSET_TABLE (dimension, chars, final_char)) < 0)
1066 return Qnil;
1067 return CHARSET_SYMBOL (charset);
1068}
1069
9d3d8cba
KH
1070/* If GENERICP is nonzero, return nonzero iff C is a valid normal or
1071 generic character. If GENERICP is zero, return nonzero iff C is a
1072 valid normal character. Do not call this function directly,
1073 instead use macro CHAR_VALID_P. */
1074int
1075char_valid_p (c, genericp)
1076 int c, genericp;
1077{
1078 int charset, c1, c2;
1079
1080 if (c < 0)
1081 return 0;
1082 if (SINGLE_BYTE_CHAR_P (c))
1083 return 1;
2e344af3 1084 SPLIT_CHAR (c, charset, c1, c2);
ac4137cc
KH
1085 if (genericp)
1086 {
1087 if (c1)
1088 {
1089 if (c2 <= 0) c2 = 0x20;
1090 }
1091 else
1092 {
1093 if (c2 <= 0) c1 = c2 = 0x20;
1094 }
1095 }
1096 return (CHARSET_DEFINED_P (charset)
44c6492d 1097 && CHAR_COMPONENTS_VALID_P (charset, c1, c2));
9d3d8cba
KH
1098}
1099
1100DEFUN ("char-valid-p", Fchar_valid_p, Schar_valid_p, 1, 2, 0,
a9d02884
DL
1101 "Return t if OBJECT is a valid normal character.\n\
1102If optional arg GENERICP is non-nil, also return t if OBJECT is\n\
9d3d8cba
KH
1103a valid generic character.")
1104 (object, genericp)
1105 Lisp_Object object, genericp;
1106{
1107 if (! NATNUMP (object))
1108 return Qnil;
1109 return (CHAR_VALID_P (XFASTINT (object), !NILP (genericp)) ? Qt : Qnil);
1110}
1111
d2665018
KH
1112DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte,
1113 Sunibyte_char_to_multibyte, 1, 1, 0,
1114 "Convert the unibyte character CH to multibyte character.\n\
537efd8d 1115The conversion is done based on `nonascii-translation-table' (which see)\n\
340b8d58 1116 or `nonascii-insert-offset' (which see).")
d2665018
KH
1117 (ch)
1118 Lisp_Object ch;
1119{
1120 int c;
1121
1122 CHECK_NUMBER (ch, 0);
1123 c = XINT (ch);
1124 if (c < 0 || c >= 0400)
1125 error ("Invalid unibyte character: %d", c);
1126 c = unibyte_char_to_multibyte (c);
1127 if (c < 0)
1128 error ("Can't convert to multibyte character: %d", XINT (ch));
1129 return make_number (c);
1130}
1131
1bcc1567
RS
1132DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte,
1133 Smultibyte_char_to_unibyte, 1, 1, 0,
1134 "Convert the multibyte character CH to unibyte character.\n\
1135The conversion is done based on `nonascii-translation-table' (which see)\n\
1136 or `nonascii-insert-offset' (which see).")
1137 (ch)
1138 Lisp_Object ch;
1139{
1140 int c;
1141
1142 CHECK_NUMBER (ch, 0);
1143 c = XINT (ch);
ac4137cc 1144 if (! CHAR_VALID_P (c, 0))
1bcc1567
RS
1145 error ("Invalid multibyte character: %d", c);
1146 c = multibyte_char_to_unibyte (c, Qnil);
1147 if (c < 0)
1148 error ("Can't convert to unibyte character: %d", XINT (ch));
1149 return make_number (c);
1150}
1151
4ed46869 1152DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
f78643ef 1153 "Return 1 regardless of the argument CHAR.\n\
60022cb7 1154This is now an obsolete function. We keep it just for backward compatibility.")
4ed46869
KH
1155 (ch)
1156 Lisp_Object ch;
1157{
1158 Lisp_Object val;
4ed46869
KH
1159
1160 CHECK_NUMBER (ch, 0);
9b6a601f
KH
1161 return make_number (1);
1162}
1163
1164/* Return how many bytes C will occupy in a multibyte buffer.
1165 Don't call this function directly, instead use macro CHAR_BYTES. */
1166int
1167char_bytes (c)
1168 int c;
1169{
99529c2c 1170 int charset;
9b6a601f 1171
2e344af3
KH
1172 if (ASCII_BYTE_P (c) || (c & ~((1 << CHARACTERBITS) -1)))
1173 return 1;
1174 if (SINGLE_BYTE_CHAR_P (c) && c >= 0xA0)
8ac5a9cc
KH
1175 return 1;
1176
99529c2c
KH
1177 charset = CHAR_CHARSET (c);
1178 return (CHARSET_DEFINED_P (charset) ? CHARSET_BYTES (charset) : 1);
4ed46869
KH
1179}
1180
1181/* Return the width of character of which multi-byte form starts with
1182 C. The width is measured by how many columns occupied on the
1183 screen when displayed in the current buffer. */
1184
1185#define ONE_BYTE_CHAR_WIDTH(c) \
1186 (c < 0x20 \
1187 ? (c == '\t' \
53316e55 1188 ? XFASTINT (current_buffer->tab_width) \
4ed46869
KH
1189 : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2))) \
1190 : (c < 0x7f \
1191 ? 1 \
1192 : (c == 0x7F \
1193 ? (NILP (current_buffer->ctl_arrow) ? 4 : 2) \
1194 : ((! NILP (current_buffer->enable_multibyte_characters) \
1195 && BASE_LEADING_CODE_P (c)) \
1196 ? WIDTH_BY_CHAR_HEAD (c) \
b4e9dd77 1197 : 4))))
4ed46869
KH
1198
1199DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
1200 "Return width of CHAR when displayed in the current buffer.\n\
1201The width is measured by how many columns it occupies on the screen.")
1202 (ch)
1203 Lisp_Object ch;
1204{
859f2b3c 1205 Lisp_Object val, disp;
4ed46869 1206 int c;
51c4025f 1207 struct Lisp_Char_Table *dp = buffer_display_table ();
4ed46869
KH
1208
1209 CHECK_NUMBER (ch, 0);
1210
859f2b3c
RS
1211 c = XINT (ch);
1212
1213 /* Get the way the display table would display it. */
51c4025f 1214 disp = dp ? DISP_CHAR_VECTOR (dp, c) : Qnil;
859f2b3c
RS
1215
1216 if (VECTORP (disp))
1217 XSETINT (val, XVECTOR (disp)->size);
1218 else if (SINGLE_BYTE_CHAR_P (c))
1219 XSETINT (val, ONE_BYTE_CHAR_WIDTH (c));
4ed46869
KH
1220 else
1221 {
1222 int charset = CHAR_CHARSET (c);
1223
1224 XSETFASTINT (val, CHARSET_WIDTH (charset));
1225 }
1226 return val;
1227}
1228
1229/* Return width of string STR of length LEN when displayed in the
1230 current buffer. The width is measured by how many columns it
1231 occupies on the screen. */
859f2b3c 1232
4ed46869
KH
1233int
1234strwidth (str, len)
1235 unsigned char *str;
1236 int len;
1237{
1238 unsigned char *endp = str + len;
1239 int width = 0;
c4a4e28f 1240 struct Lisp_Char_Table *dp = buffer_display_table ();
4ed46869 1241
859f2b3c
RS
1242 while (str < endp)
1243 {
99529c2c
KH
1244 Lisp_Object disp;
1245 int thislen;
1246 int c = STRING_CHAR_AND_LENGTH (str, endp - str, thislen);
859f2b3c 1247
99529c2c
KH
1248 /* Get the way the display table would display it. */
1249 if (dp)
1250 disp = DISP_CHAR_VECTOR (dp, c);
859f2b3c 1251 else
99529c2c
KH
1252 disp = Qnil;
1253
1254 if (VECTORP (disp))
1255 width += XVECTOR (disp)->size;
1256 else
1257 width += ONE_BYTE_CHAR_WIDTH (*str);
1258
1259 str += thislen;
859f2b3c 1260 }
4ed46869
KH
1261 return width;
1262}
1263
1264DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
1265 "Return width of STRING when displayed in the current buffer.\n\
1266Width is measured by how many columns it occupies on the screen.\n\
046b1f03
RS
1267When calculating width of a multibyte character in STRING,\n\
1268only the base leading-code is considered; the validity of\n\
1269the following bytes is not checked.")
4ed46869
KH
1270 (str)
1271 Lisp_Object str;
1272{
1273 Lisp_Object val;
1274
1275 CHECK_STRING (str, 0);
fc932ac6
RS
1276 XSETFASTINT (val, strwidth (XSTRING (str)->data,
1277 STRING_BYTES (XSTRING (str))));
4ed46869
KH
1278 return val;
1279}
1280
1281DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
1282 "Return the direction of CHAR.\n\
1283The returned value is 0 for left-to-right and 1 for right-to-left.")
1284 (ch)
1285 Lisp_Object ch;
1286{
1287 int charset;
1288
1289 CHECK_NUMBER (ch, 0);
1290 charset = CHAR_CHARSET (XFASTINT (ch));
1291 if (!CHARSET_DEFINED_P (charset))
93bcb785 1292 invalid_character (XINT (ch));
4ed46869
KH
1293 return CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX);
1294}
1295
af4fecb4 1296DEFUN ("chars-in-region", Fchars_in_region, Schars_in_region, 2, 2, 0,
6ae1f27e 1297 "Return number of characters between BEG and END.")
046b1f03
RS
1298 (beg, end)
1299 Lisp_Object beg, end;
1300{
6ae1f27e 1301 int from, to;
046b1f03 1302
17e7ef1b
RS
1303 CHECK_NUMBER_COERCE_MARKER (beg, 0);
1304 CHECK_NUMBER_COERCE_MARKER (end, 1);
1305
046b1f03 1306 from = min (XFASTINT (beg), XFASTINT (end));
a8a35e61 1307 to = max (XFASTINT (beg), XFASTINT (end));
046b1f03 1308
a8c21066 1309 return make_number (to - from);
6ae1f27e 1310}
9036eb45 1311
87b089ad
RS
1312/* Return the number of characters in the NBYTES bytes at PTR.
1313 This works by looking at the contents and checking for multibyte sequences.
1314 However, if the current buffer has enable-multibyte-characters = nil,
1315 we treat each byte as a character. */
1316
6ae1f27e
RS
1317int
1318chars_in_text (ptr, nbytes)
1319 unsigned char *ptr;
1320 int nbytes;
1321{
87b089ad
RS
1322 /* current_buffer is null at early stages of Emacs initialization. */
1323 if (current_buffer == 0
1324 || NILP (current_buffer->enable_multibyte_characters))
6ae1f27e 1325 return nbytes;
a8a35e61 1326
ac4137cc 1327 return multibyte_chars_in_text (ptr, nbytes);
046b1f03
RS
1328}
1329
87b089ad
RS
1330/* Return the number of characters in the NBYTES bytes at PTR.
1331 This works by looking at the contents and checking for multibyte sequences.
1332 It ignores enable-multibyte-characters. */
1333
1334int
1335multibyte_chars_in_text (ptr, nbytes)
1336 unsigned char *ptr;
1337 int nbytes;
1338{
ac4137cc
KH
1339 unsigned char *endp;
1340 int chars, bytes;
87b089ad
RS
1341
1342 endp = ptr + nbytes;
1343 chars = 0;
1344
1345 while (ptr < endp)
1346 {
2e344af3
KH
1347 PARSE_MULTIBYTE_SEQ (ptr, endp - ptr, bytes);
1348 ptr += bytes;
1349 chars++;
1350 }
1351
1352 return chars;
1353}
1354
1355/* Parse unibyte text at STR of LEN bytes as a multibyte text, and
1356 count the numbers of characters and bytes in it. On counting
1357 bytes, pay attention to that 8-bit characters in the range
1358 0x80..0x9F are represented by 2-byte in a multibyte text. */
1359void
1360parse_str_as_multibyte (str, len, nchars, nbytes)
1361 unsigned char *str;
1362 int len, *nchars, *nbytes;
1363{
1364 unsigned char *endp = str + len;
1365 int n, chars = 0, bytes = 0;
1366
1367 while (str < endp)
1368 {
1369 if (UNIBYTE_STR_AS_MULTIBYTE_P (str, endp - str, n))
1370 str += n, bytes += n;
1371 else
1372 str++, bytes += 2;
1373 chars++;
1374 }
1375 *nchars = chars;
1376 *nbytes = bytes;
1377 return;
1378}
1379
1380/* Arrange unibyte text at STR of NBYTES bytes as a multibyte text.
1381 It actually converts only 8-bit characters in the range 0x80..0x9F
1382 that don't contruct multibyte characters to multibyte forms. If
1383 NCHARS is nonzero, set *NCHARS to the number of characters in the
1384 text. It is assured that we can use LEN bytes at STR as a work
1385 area and that is enough. Return the number of bytes of the
1386 resulting text. */
1387
1388int
1389str_as_multibyte (str, len, nbytes, nchars)
1390 unsigned char *str;
1391 int len, nbytes, *nchars;
1392{
1393 unsigned char *p = str, *endp = str + nbytes;
1394 unsigned char *to;
1395 int chars = 0;
1396 int n;
1397
1398 while (p < endp && UNIBYTE_STR_AS_MULTIBYTE_P (p, endp - p, n))
1399 p += n, chars++;
1400 if (nchars)
1401 *nchars = chars;
1402 if (p == endp)
1403 return nbytes;
1404
1405 to = p;
1406 nbytes = endp - p;
1407 endp = str + len;
1408 safe_bcopy (p, endp - nbytes, nbytes);
1409 p = endp - nbytes;
1410 while (p < endp)
1411 {
1412 if (UNIBYTE_STR_AS_MULTIBYTE_P (p, endp - p, n))
ac4137cc 1413 {
2e344af3
KH
1414 while (n--)
1415 *to++ = *p++;
1416 }
ac4137cc
KH
1417 else
1418 {
2e344af3
KH
1419 *to++ = LEADING_CODE_8_BIT_CONTROL;
1420 *to++ = *p++ + 0x20;
ac4137cc 1421 }
87b089ad
RS
1422 chars++;
1423 }
2e344af3
KH
1424 if (nchars)
1425 *nchars = chars;
1426 return (to - str);
1427}
87b089ad 1428
2e344af3
KH
1429/* Convert unibyte text at STR of NBYTES bytes to a multibyte text
1430 that contains the same single-byte characters. It actually
1431 converts all 8-bit characters to multibyte forms. It is assured
1432 that we can use LEN bytes at STR as a work area and that is
1433 enough. */
1434
1435int
1436str_to_multibyte (str, len, bytes)
1437 unsigned char *str;
1438 int len, bytes;
1439{
1440 unsigned char *p = str, *endp = str + bytes;
1441 unsigned char *to;
1442 int c;
1443
1444 while (p < endp && (*p < 0x80 || *p >= 0xA0)) p++;
1445 if (p == endp)
1446 return bytes;
1447 to = p;
1448 bytes = endp - p;
1449 endp = str + len;
1450 safe_bcopy (p, endp - bytes, bytes);
1451 p = endp - bytes;
1452 while (p < endp)
1453 {
1454 if (*p < 0x80 || *p >= 0xA0)
1455 *to++ = *p++;
1456 else
1457 *to++ = LEADING_CODE_8_BIT_CONTROL, *to++ = *p++ + 0x20;
1458 }
1459 return (to - str);
87b089ad
RS
1460}
1461
2e344af3
KH
1462/* Arrange multibyte text at STR of LEN bytes as a unibyte text. It
1463 actually converts only 8-bit characters in the range 0x80..0x9F to
1464 unibyte forms. */
1465
1466int
1467str_as_unibyte (str, bytes)
1468 unsigned char *str;
1469 int bytes;
1470{
1471 unsigned char *p = str, *endp = str + bytes;
1472 unsigned char *to = str;
1473
1474 while (p < endp && *p != LEADING_CODE_8_BIT_CONTROL) p++;
1475 to = p;
1476 while (p < endp)
1477 {
1478 if (*p == LEADING_CODE_8_BIT_CONTROL)
1479 *to++ = *(p + 1) - 0x20, p += 2;
1480 else
1481 *to++ = *p++;
1482 }
1483 return (to - str);
1484}
1485
1486\f
87b089ad 1487DEFUN ("string", Fstring, Sstring, 1, MANY, 0,
4ed46869 1488 "Concatenate all the argument characters and make the result a string.")
53316e55
KH
1489 (n, args)
1490 int n;
4ed46869
KH
1491 Lisp_Object *args;
1492{
53316e55 1493 int i;
99529c2c 1494 unsigned char *buf = (unsigned char *) alloca (MAX_MULTIBYTE_LENGTH * n);
4ed46869 1495 unsigned char *p = buf;
2e344af3 1496 int c;
4ed46869
KH
1497
1498 for (i = 0; i < n; i++)
1499 {
d0c037d8 1500 CHECK_NUMBER (args[i], 0);
4ed46869 1501 c = XINT (args[i]);
99529c2c 1502 p += CHAR_STRING (c, p);
4ed46869
KH
1503 }
1504
2e344af3 1505 return make_string_from_bytes (buf, n, p - buf);
4ed46869
KH
1506}
1507
1508#endif /* emacs */
1509\f
dfcf069d 1510int
4ed46869
KH
1511charset_id_internal (charset_name)
1512 char *charset_name;
1513{
76d7b829 1514 Lisp_Object val;
4ed46869 1515
76d7b829 1516 val= Fget (intern (charset_name), Qcharset);
4ed46869
KH
1517 if (!VECTORP (val))
1518 error ("Charset %s is not defined", charset_name);
1519
1520 return (XINT (XVECTOR (val)->contents[0]));
1521}
1522
1523DEFUN ("setup-special-charsets", Fsetup_special_charsets,
1524 Ssetup_special_charsets, 0, 0, 0, "Internal use only.")
1525 ()
1526{
1527 charset_latin_iso8859_1 = charset_id_internal ("latin-iso8859-1");
1528 charset_jisx0208_1978 = charset_id_internal ("japanese-jisx0208-1978");
1529 charset_jisx0208 = charset_id_internal ("japanese-jisx0208");
1530 charset_katakana_jisx0201 = charset_id_internal ("katakana-jisx0201");
1531 charset_latin_jisx0201 = charset_id_internal ("latin-jisx0201");
1532 charset_big5_1 = charset_id_internal ("chinese-big5-1");
1533 charset_big5_2 = charset_id_internal ("chinese-big5-2");
1534 return Qnil;
1535}
1536
dfcf069d 1537void
4ed46869
KH
1538init_charset_once ()
1539{
1540 int i, j, k;
1541
1542 staticpro (&Vcharset_table);
1543 staticpro (&Vcharset_symbol_table);
8a73a704 1544 staticpro (&Vgeneric_character_list);
4ed46869
KH
1545
1546 /* This has to be done here, before we call Fmake_char_table. */
1547 Qcharset_table = intern ("charset-table");
1548 staticpro (&Qcharset_table);
1549
1550 /* Intern this now in case it isn't already done.
1551 Setting this variable twice is harmless.
1552 But don't staticpro it here--that is done in alloc.c. */
1553 Qchar_table_extra_slots = intern ("char-table-extra-slots");
1554
1555 /* Now we are ready to set up this property, so we can
1556 create the charset table. */
1557 Fput (Qcharset_table, Qchar_table_extra_slots, make_number (0));
1558 Vcharset_table = Fmake_char_table (Qcharset_table, Qnil);
1559
0282eb69
KH
1560 Qunknown = intern ("unknown");
1561 staticpro (&Qunknown);
1562 Vcharset_symbol_table = Fmake_vector (make_number (MAX_CHARSET + 1),
1563 Qunknown);
4ed46869
KH
1564
1565 /* Setup tables. */
1566 for (i = 0; i < 2; i++)
1567 for (j = 0; j < 2; j++)
1568 for (k = 0; k < 128; k++)
1569 iso_charset_table [i][j][k] = -1;
1570
60383934 1571 for (i = 0; i < 256; i++)
2e344af3 1572 bytes_by_char_head[i] = 1;
a5236564 1573 for (i = 128; i < MIN_CHARSET_OFFICIAL_DIMENSION2; i++)
2e344af3 1574 bytes_by_char_head[i] = 2;
a5236564 1575 for (; i <= MAX_CHARSET_OFFICIAL_DIMENSION2; i++)
2e344af3 1576 bytes_by_char_head[i] = 3;
a5236564
KH
1577 for (; i < 160; i++)
1578 bytes_by_char_head[i] = 2;
2e344af3
KH
1579 bytes_by_char_head[LEADING_CODE_PRIVATE_11] = 3;
1580 bytes_by_char_head[LEADING_CODE_PRIVATE_12] = 3;
1581 bytes_by_char_head[LEADING_CODE_PRIVATE_21] = 4;
1582 bytes_by_char_head[LEADING_CODE_PRIVATE_22] = 4;
4ed46869
KH
1583
1584 for (i = 0; i < 128; i++)
2e344af3 1585 width_by_char_head[i] = 1;
4ed46869 1586 for (; i < 256; i++)
2e344af3
KH
1587 width_by_char_head[i] = 4;
1588 width_by_char_head[LEADING_CODE_PRIVATE_11] = 1;
1589 width_by_char_head[LEADING_CODE_PRIVATE_12] = 2;
1590 width_by_char_head[LEADING_CODE_PRIVATE_21] = 1;
1591 width_by_char_head[LEADING_CODE_PRIVATE_22] = 2;
8a73a704
KH
1592
1593 {
76d7b829 1594 Lisp_Object val;
8a73a704 1595
76d7b829 1596 val = Qnil;
8a73a704
KH
1597 for (i = 0x81; i < 0x90; i++)
1598 val = Fcons (make_number ((i - 0x70) << 7), val);
1599 for (; i < 0x9A; i++)
1600 val = Fcons (make_number ((i - 0x8F) << 14), val);
1601 for (i = 0xA0; i < 0xF0; i++)
1602 val = Fcons (make_number ((i - 0x70) << 7), val);
1603 for (; i < 0xFF; i++)
1604 val = Fcons (make_number ((i - 0xE0) << 14), val);
8a73a704
KH
1605 Vgeneric_character_list = Fnreverse (val);
1606 }
bbf12bb3
KH
1607
1608 nonascii_insert_offset = 0;
1609 Vnonascii_translation_table = Qnil;
4ed46869
KH
1610}
1611
1612#ifdef emacs
1613
dfcf069d 1614void
4ed46869
KH
1615syms_of_charset ()
1616{
2e344af3
KH
1617 Qcharset = intern ("charset");
1618 staticpro (&Qcharset);
1619
4ed46869
KH
1620 Qascii = intern ("ascii");
1621 staticpro (&Qascii);
1622
2e344af3
KH
1623 Qeight_bit_control = intern ("eight-bit-control");
1624 staticpro (&Qeight_bit_control);
1625
1626 Qeight_bit_graphic = intern ("eight-bit-graphic");
1627 staticpro (&Qeight_bit_graphic);
4ed46869 1628
2e344af3
KH
1629 /* Define special charsets ascii, eight-bit-control, and
1630 eight-bit-graphic. */
4ed46869
KH
1631 update_charset_table (make_number (CHARSET_ASCII),
1632 make_number (1), make_number (94),
1633 make_number (1),
1634 make_number (0),
1635 make_number ('B'),
1636 make_number (0),
1637 build_string ("ASCII"),
d78bc582 1638 Qnil, /* same as above */
4ed46869
KH
1639 build_string ("ASCII (ISO646 IRV)"));
1640 CHARSET_SYMBOL (CHARSET_ASCII) = Qascii;
1641 Fput (Qascii, Qcharset, CHARSET_TABLE_ENTRY (CHARSET_ASCII));
1642
2e344af3
KH
1643 update_charset_table (make_number (CHARSET_8_BIT_CONTROL),
1644 make_number (1), make_number (96),
cd550f91 1645 make_number (1),
2e344af3
KH
1646 make_number (0),
1647 make_number (-1),
1648 make_number (-1),
1649 build_string ("8-bit control code (0x80..0x9F)"),
d78bc582
KH
1650 Qnil, /* same as above */
1651 Qnil); /* same as above */
2e344af3
KH
1652 CHARSET_SYMBOL (CHARSET_8_BIT_CONTROL) = Qeight_bit_control;
1653 Fput (Qeight_bit_control, Qcharset,
1654 CHARSET_TABLE_ENTRY (CHARSET_8_BIT_CONTROL));
1655
1656 update_charset_table (make_number (CHARSET_8_BIT_GRAPHIC),
1657 make_number (1), make_number (96),
cd550f91 1658 make_number (1),
2e344af3
KH
1659 make_number (0),
1660 make_number (-1),
1661 make_number (-1),
2e344af3 1662 build_string ("8-bit graphic char (0xA0..0xFF)"),
d78bc582
KH
1663 Qnil, /* same as above */
1664 Qnil); /* same as above */
2e344af3
KH
1665 CHARSET_SYMBOL (CHARSET_8_BIT_GRAPHIC) = Qeight_bit_graphic;
1666 Fput (Qeight_bit_graphic, Qcharset,
1667 CHARSET_TABLE_ENTRY (CHARSET_8_BIT_GRAPHIC));
1668
c1a08b4c
KH
1669 Qauto_fill_chars = intern ("auto-fill-chars");
1670 staticpro (&Qauto_fill_chars);
1671 Fput (Qauto_fill_chars, Qchar_table_extra_slots, make_number (0));
1672
4ed46869 1673 defsubr (&Sdefine_charset);
8a73a704 1674 defsubr (&Sgeneric_character_list);
3fac5a51 1675 defsubr (&Sget_unused_iso_final_char);
4ed46869
KH
1676 defsubr (&Sdeclare_equiv_charset);
1677 defsubr (&Sfind_charset_region);
1678 defsubr (&Sfind_charset_string);
1679 defsubr (&Smake_char_internal);
1680 defsubr (&Ssplit_char);
1681 defsubr (&Schar_charset);
90d7b74e 1682 defsubr (&Scharset_after);
4ed46869 1683 defsubr (&Siso_charset);
9d3d8cba 1684 defsubr (&Schar_valid_p);
d2665018 1685 defsubr (&Sunibyte_char_to_multibyte);
1bcc1567 1686 defsubr (&Smultibyte_char_to_unibyte);
4ed46869
KH
1687 defsubr (&Schar_bytes);
1688 defsubr (&Schar_width);
1689 defsubr (&Sstring_width);
1690 defsubr (&Schar_direction);
af4fecb4 1691 defsubr (&Schars_in_region);
87b089ad 1692 defsubr (&Sstring);
4ed46869
KH
1693 defsubr (&Ssetup_special_charsets);
1694
1695 DEFVAR_LISP ("charset-list", &Vcharset_list,
1696 "List of charsets ever defined.");
2e344af3
KH
1697 Vcharset_list = Fcons (Qascii, Fcons (Qeight_bit_control,
1698 Fcons (Qeight_bit_graphic, Qnil)));
4ed46869 1699
537efd8d 1700 DEFVAR_LISP ("translation-table-vector", &Vtranslation_table_vector,
b4e9dd77
KH
1701 "Vector of cons cell of a symbol and translation table ever defined.\n\
1702An ID of a translation table is an index of this vector.");
537efd8d 1703 Vtranslation_table_vector = Fmake_vector (make_number (16), Qnil);
b0e3cf2b 1704
4ed46869
KH
1705 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11,
1706 "Leading-code of private TYPE9N charset of column-width 1.");
1707 leading_code_private_11 = LEADING_CODE_PRIVATE_11;
1708
1709 DEFVAR_INT ("leading-code-private-12", &leading_code_private_12,
1710 "Leading-code of private TYPE9N charset of column-width 2.");
1711 leading_code_private_12 = LEADING_CODE_PRIVATE_12;
1712
1713 DEFVAR_INT ("leading-code-private-21", &leading_code_private_21,
1714 "Leading-code of private TYPE9Nx9N charset of column-width 1.");
1715 leading_code_private_21 = LEADING_CODE_PRIVATE_21;
1716
1717 DEFVAR_INT ("leading-code-private-22", &leading_code_private_22,
1718 "Leading-code of private TYPE9Nx9N charset of column-width 2.");
1719 leading_code_private_22 = LEADING_CODE_PRIVATE_22;
35e623fb
RS
1720
1721 DEFVAR_INT ("nonascii-insert-offset", &nonascii_insert_offset,
d2665018 1722 "Offset for converting non-ASCII unibyte codes 0240...0377 to multibyte.\n\
4cf9710d
RS
1723This is used for converting unibyte text to multibyte,\n\
1724and for inserting character codes specified by number.\n\n\
3e8ceaac
RS
1725This serves to convert a Latin-1 or similar 8-bit character code\n\
1726to the corresponding Emacs multibyte character code.\n\
1727Typically the value should be (- (make-char CHARSET 0) 128),\n\
1728for your choice of character set.\n\
537efd8d 1729If `nonascii-translation-table' is non-nil, it overrides this variable.");
35e623fb 1730 nonascii_insert_offset = 0;
b0e3cf2b 1731
b4e9dd77 1732 DEFVAR_LISP ("nonascii-translation-table", &Vnonascii_translation_table,
537efd8d 1733 "Translation table to convert non-ASCII unibyte codes to multibyte.\n\
4cf9710d
RS
1734This is used for converting unibyte text to multibyte,\n\
1735and for inserting character codes specified by number.\n\n\
1736Conversion is performed only when multibyte characters are enabled,\n\
1737and it serves to convert a Latin-1 or similar 8-bit character code\n\
1738to the corresponding Emacs character code.\n\n\
da4d65af 1739If this is nil, `nonascii-insert-offset' is used instead.\n\
b4e9dd77
KH
1740See also the docstring of `make-translation-table'.");
1741 Vnonascii_translation_table = Qnil;
4cf9710d 1742
c1a08b4c
KH
1743 DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars,
1744 "A char-table for characters which invoke auto-filling.\n\
8dd61baf 1745Such characters have value t in this table.");
c1a08b4c 1746 Vauto_fill_chars = Fmake_char_table (Qauto_fill_chars, Qnil);
60022cb7
AS
1747 CHAR_TABLE_SET (Vauto_fill_chars, make_number (' '), Qt);
1748 CHAR_TABLE_SET (Vauto_fill_chars, make_number ('\n'), Qt);
4ed46869
KH
1749}
1750
1751#endif /* emacs */