Copyright fixed
[bpt/emacs.git] / src / charset.c
1 /* Basic multilingual character support.
2 Copyright (C) 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
3 Copyright (C) 1995, 1997, 1998, 1999, 2000, 2001
4 National Institute of Advanced Industrial Science and Technology (AIST)
5 Registration Number H14PRO021
6
7 This file is part of GNU Emacs.
8
9 GNU Emacs is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
12 any later version.
13
14 GNU Emacs is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GNU Emacs; see the file COPYING. If not, write to
21 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
22 Boston, MA 02110-1301, USA. */
23
24 /* At first, see the document in `charset.h' to understand the code in
25 this file. */
26
27 #ifdef emacs
28 #include <config.h>
29 #endif
30
31 #include <stdio.h>
32
33 #ifdef emacs
34
35 #include <sys/types.h>
36 #include "lisp.h"
37 #include "buffer.h"
38 #include "charset.h"
39 #include "composite.h"
40 #include "coding.h"
41 #include "disptab.h"
42
43 #else /* not emacs */
44
45 #include "mulelib.h"
46
47 #endif /* emacs */
48
49 Lisp_Object Qcharset, Qascii, Qeight_bit_control, Qeight_bit_graphic;
50 Lisp_Object Qunknown;
51
52 /* Declaration of special leading-codes. */
53 EMACS_INT leading_code_private_11; /* for private DIMENSION1 of 1-column */
54 EMACS_INT leading_code_private_12; /* for private DIMENSION1 of 2-column */
55 EMACS_INT leading_code_private_21; /* for private DIMENSION2 of 1-column */
56 EMACS_INT leading_code_private_22; /* for private DIMENSION2 of 2-column */
57
58 /* Declaration of special charsets. The values are set by
59 Fsetup_special_charsets. */
60 int charset_latin_iso8859_1; /* ISO8859-1 (Latin-1) */
61 int charset_jisx0208_1978; /* JISX0208.1978 (Japanese Kanji old set) */
62 int charset_jisx0208; /* JISX0208.1983 (Japanese Kanji) */
63 int charset_katakana_jisx0201; /* JISX0201.Kana (Japanese Katakana) */
64 int charset_latin_jisx0201; /* JISX0201.Roman (Japanese Roman) */
65 int charset_big5_1; /* Big5 Level 1 (Chinese Traditional) */
66 int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */
67 int charset_mule_unicode_0100_24ff;
68 int charset_mule_unicode_2500_33ff;
69 int charset_mule_unicode_e000_ffff;
70
71 Lisp_Object Qcharset_table;
72
73 /* A char-table containing information of each character set. */
74 Lisp_Object Vcharset_table;
75
76 /* A vector of charset symbol indexed by charset-id. This is used
77 only for returning charset symbol from C functions. */
78 Lisp_Object Vcharset_symbol_table;
79
80 /* A list of charset symbols ever defined. */
81 Lisp_Object Vcharset_list;
82
83 /* Vector of translation table ever defined.
84 ID of a translation table is used to index this vector. */
85 Lisp_Object Vtranslation_table_vector;
86
87 /* A char-table for characters which may invoke auto-filling. */
88 Lisp_Object Vauto_fill_chars;
89
90 Lisp_Object Qauto_fill_chars;
91
92 /* Tables used by macros BYTES_BY_CHAR_HEAD and WIDTH_BY_CHAR_HEAD. */
93 int bytes_by_char_head[256];
94 int width_by_char_head[256];
95
96 /* Mapping table from ISO2022's charset (specified by DIMENSION,
97 CHARS, and FINAL-CHAR) to Emacs' charset. */
98 int iso_charset_table[2][2][128];
99
100 /* Variables used locally in the macro FETCH_MULTIBYTE_CHAR. */
101 unsigned char *_fetch_multibyte_char_p;
102 int _fetch_multibyte_char_len;
103
104 /* Offset to add to a non-ASCII value when inserting it. */
105 EMACS_INT nonascii_insert_offset;
106
107 /* Translation table for converting non-ASCII unibyte characters
108 to multibyte codes, or nil. */
109 Lisp_Object Vnonascii_translation_table;
110
111 /* List of all possible generic characters. */
112 Lisp_Object Vgeneric_character_list;
113
114 \f
115 void
116 invalid_character (c)
117 int c;
118 {
119 error ("Invalid character: %d, #o%o, #x%x", c, c, c);
120 }
121
122 /* Parse string STR of length LENGTH and fetch information of a
123 character at STR. Set BYTES to the byte length the character
124 occupies, CHARSET, C1, C2 to proper values of the character. */
125
126 #define SPLIT_MULTIBYTE_SEQ(str, length, bytes, charset, c1, c2) \
127 do { \
128 (c1) = *(str); \
129 (bytes) = BYTES_BY_CHAR_HEAD (c1); \
130 if ((bytes) == 1) \
131 (charset) = ASCII_BYTE_P (c1) ? CHARSET_ASCII : CHARSET_8_BIT_GRAPHIC; \
132 else if ((bytes) == 2) \
133 { \
134 if ((c1) == LEADING_CODE_8_BIT_CONTROL) \
135 (charset) = CHARSET_8_BIT_CONTROL, (c1) = (str)[1] - 0x20; \
136 else \
137 (charset) = (c1), (c1) = (str)[1] & 0x7F; \
138 } \
139 else if ((bytes) == 3) \
140 { \
141 if ((c1) < LEADING_CODE_PRIVATE_11) \
142 (charset) = (c1), (c1) = (str)[1] & 0x7F, (c2) = (str)[2] & 0x7F; \
143 else \
144 (charset) = (str)[1], (c1) = (str)[2] & 0x7F; \
145 } \
146 else \
147 (charset) = (str)[1], (c1) = (str)[2] & 0x7F, (c2) = (str)[3] & 0x7F; \
148 } while (0)
149
150 /* 1 if CHARSET, C1, and C2 compose a valid character, else 0.
151 Note that this intentionally allows invalid components, such
152 as 0xA0 0xA0, because there exist many files that contain
153 such invalid byte sequences, especially in EUC-GB. */
154 #define CHAR_COMPONENTS_VALID_P(charset, c1, c2) \
155 ((charset) == CHARSET_ASCII \
156 ? ((c1) >= 0 && (c1) <= 0x7F) \
157 : ((charset) == CHARSET_8_BIT_CONTROL \
158 ? ((c1) >= 0x80 && (c1) <= 0x9F) \
159 : ((charset) == CHARSET_8_BIT_GRAPHIC \
160 ? ((c1) >= 0x80 && (c1) <= 0xFF) \
161 : (CHARSET_DIMENSION (charset) == 1 \
162 ? ((c1) >= 0x20 && (c1) <= 0x7F) \
163 : ((c1) >= 0x20 && (c1) <= 0x7F \
164 && (c2) >= 0x20 && (c2) <= 0x7F)))))
165
166 /* Store multi-byte form of the character C in STR. The caller should
167 allocate at least 4-byte area at STR in advance. Returns the
168 length of the multi-byte form. If C is an invalid character code,
169 return -1. */
170
171 int
172 char_to_string_1 (c, str)
173 int c;
174 unsigned char *str;
175 {
176 unsigned char *p = str;
177
178 if (c & CHAR_MODIFIER_MASK) /* This includes the case C is negative. */
179 {
180 /* Multibyte character can't have a modifier bit. */
181 if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_MODIFIER_MASK)))
182 return -1;
183
184 /* For Meta, Shift, and Control modifiers, we need special care. */
185 if (c & CHAR_META)
186 {
187 /* Move the meta bit to the right place for a string. */
188 c = (c & ~CHAR_META) | 0x80;
189 }
190 if (c & CHAR_SHIFT)
191 {
192 /* Shift modifier is valid only with [A-Za-z]. */
193 if ((c & 0377) >= 'A' && (c & 0377) <= 'Z')
194 c &= ~CHAR_SHIFT;
195 else if ((c & 0377) >= 'a' && (c & 0377) <= 'z')
196 c = (c & ~CHAR_SHIFT) - ('a' - 'A');
197 }
198 if (c & CHAR_CTL)
199 {
200 /* Simulate the code in lread.c. */
201 /* Allow `\C- ' and `\C-?'. */
202 if (c == (CHAR_CTL | ' '))
203 c = 0;
204 else if (c == (CHAR_CTL | '?'))
205 c = 127;
206 /* ASCII control chars are made from letters (both cases),
207 as well as the non-letters within 0100...0137. */
208 else if ((c & 0137) >= 0101 && (c & 0137) <= 0132)
209 c &= (037 | (~0177 & ~CHAR_CTL));
210 else if ((c & 0177) >= 0100 && (c & 0177) <= 0137)
211 c &= (037 | (~0177 & ~CHAR_CTL));
212 }
213
214 /* If C still has any modifier bits, just ignore it. */
215 c &= ~CHAR_MODIFIER_MASK;
216 }
217
218 if (SINGLE_BYTE_CHAR_P (c))
219 {
220 if (ASCII_BYTE_P (c) || c >= 0xA0)
221 *p++ = c;
222 else
223 {
224 *p++ = LEADING_CODE_8_BIT_CONTROL;
225 *p++ = c + 0x20;
226 }
227 }
228 else if (CHAR_VALID_P (c, 0))
229 {
230 int charset, c1, c2;
231
232 SPLIT_CHAR (c, charset, c1, c2);
233
234 if (charset >= LEADING_CODE_EXT_11)
235 *p++ = (charset < LEADING_CODE_EXT_12
236 ? LEADING_CODE_PRIVATE_11
237 : (charset < LEADING_CODE_EXT_21
238 ? LEADING_CODE_PRIVATE_12
239 : (charset < LEADING_CODE_EXT_22
240 ? LEADING_CODE_PRIVATE_21
241 : LEADING_CODE_PRIVATE_22)));
242 *p++ = charset;
243 if ((c1 > 0 && c1 < 32) || (c2 > 0 && c2 < 32))
244 return -1;
245 if (c1)
246 {
247 *p++ = c1 | 0x80;
248 if (c2 > 0)
249 *p++ = c2 | 0x80;
250 }
251 }
252 else
253 return -1;
254
255 return (p - str);
256 }
257
258
259 /* Store multi-byte form of the character C in STR. The caller should
260 allocate at least 4-byte area at STR in advance. Returns the
261 length of the multi-byte form. If C is an invalid character code,
262 signal an error.
263
264 Use macro `CHAR_STRING (C, STR)' instead of calling this function
265 directly if C can be an ASCII character. */
266
267 int
268 char_to_string (c, str)
269 int c;
270 unsigned char *str;
271 {
272 int len;
273 len = char_to_string_1 (c, str);
274 if (len == -1)
275 invalid_character (c);
276 return len;
277 }
278
279
280 /* Return the non-ASCII character corresponding to multi-byte form at
281 STR of length LEN. If ACTUAL_LEN is not NULL, store the byte
282 length of the multibyte form in *ACTUAL_LEN.
283
284 Use macros STRING_CHAR or STRING_CHAR_AND_LENGTH instead of calling
285 this function directly if you want ot handle ASCII characters as
286 well. */
287
288 int
289 string_to_char (str, len, actual_len)
290 const unsigned char *str;
291 int len, *actual_len;
292 {
293 int c, bytes, charset, c1, c2;
294
295 SPLIT_MULTIBYTE_SEQ (str, len, bytes, charset, c1, c2);
296 c = MAKE_CHAR (charset, c1, c2);
297 if (actual_len)
298 *actual_len = bytes;
299 return c;
300 }
301
302 /* Return the length of the multi-byte form at string STR of length LEN.
303 Use the macro MULTIBYTE_FORM_LENGTH instead. */
304 int
305 multibyte_form_length (str, len)
306 const unsigned char *str;
307 int len;
308 {
309 int bytes;
310
311 PARSE_MULTIBYTE_SEQ (str, len, bytes);
312 return bytes;
313 }
314
315 /* Check multibyte form at string STR of length LEN and set variables
316 pointed by CHARSET, C1, and C2 to charset and position codes of the
317 character at STR, and return 0. If there's no multibyte character,
318 return -1. This should be used only in the macro SPLIT_STRING
319 which checks range of STR in advance. */
320
321 int
322 split_string (str, len, charset, c1, c2)
323 const unsigned char *str;
324 unsigned char *c1, *c2;
325 int len, *charset;
326 {
327 register int bytes, cs, code1, code2 = -1;
328
329 SPLIT_MULTIBYTE_SEQ (str, len, bytes, cs, code1, code2);
330 if (cs == CHARSET_ASCII)
331 return -1;
332 *charset = cs;
333 *c1 = code1;
334 *c2 = code2;
335 return 0;
336 }
337
338 /* Return 1 iff character C has valid printable glyph.
339 Use the macro CHAR_PRINTABLE_P instead. */
340 int
341 char_printable_p (c)
342 int c;
343 {
344 int charset, c1, c2;
345
346 if (ASCII_BYTE_P (c))
347 return 1;
348 else if (SINGLE_BYTE_CHAR_P (c))
349 return 0;
350 else if (c >= MAX_CHAR)
351 return 0;
352
353 SPLIT_CHAR (c, charset, c1, c2);
354 if (! CHARSET_DEFINED_P (charset))
355 return 0;
356 if (CHARSET_CHARS (charset) == 94
357 ? c1 <= 32 || c1 >= 127
358 : c1 < 32)
359 return 0;
360 if (CHARSET_DIMENSION (charset) == 2
361 && (CHARSET_CHARS (charset) == 94
362 ? c2 <= 32 || c2 >= 127
363 : c2 < 32))
364 return 0;
365 return 1;
366 }
367
368 /* Translate character C by translation table TABLE. If C
369 is negative, translate a character specified by CHARSET, C1, and C2
370 (C1 and C2 are code points of the character). If no translation is
371 found in TABLE, return C. */
372 int
373 translate_char (table, c, charset, c1, c2)
374 Lisp_Object table;
375 int c, charset, c1, c2;
376 {
377 Lisp_Object ch;
378 int alt_charset, alt_c1, alt_c2, dimension;
379
380 if (c < 0) c = MAKE_CHAR (charset, (c1 & 0x7F) , (c2 & 0x7F));
381 if (!CHAR_TABLE_P (table)
382 || (ch = Faref (table, make_number (c)), !NATNUMP (ch)))
383 return c;
384
385 SPLIT_CHAR (XFASTINT (ch), alt_charset, alt_c1, alt_c2);
386 dimension = CHARSET_DIMENSION (alt_charset);
387 if ((dimension == 1 && alt_c1 > 0) || (dimension == 2 && alt_c2 > 0))
388 /* CH is not a generic character, just return it. */
389 return XFASTINT (ch);
390
391 /* Since CH is a generic character, we must return a specific
392 charater which has the same position codes as C from CH. */
393 if (charset < 0)
394 SPLIT_CHAR (c, charset, c1, c2);
395 if (dimension != CHARSET_DIMENSION (charset))
396 /* We can't make such a character because of dimension mismatch. */
397 return c;
398 return MAKE_CHAR (alt_charset, c1, c2);
399 }
400
401 /* Convert the unibyte character C to multibyte based on
402 Vnonascii_translation_table or nonascii_insert_offset. If they can't
403 convert C to a valid multibyte character, convert it based on
404 DEFAULT_NONASCII_INSERT_OFFSET which makes C a Latin-1 character. */
405
406 int
407 unibyte_char_to_multibyte (c)
408 int c;
409 {
410 if (c < 0400 && c >= 0200)
411 {
412 int c_save = c;
413
414 if (! NILP (Vnonascii_translation_table))
415 {
416 c = XINT (Faref (Vnonascii_translation_table, make_number (c)));
417 if (c >= 0400 && ! char_valid_p (c, 0))
418 c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
419 }
420 else if (c >= 0240 && nonascii_insert_offset > 0)
421 {
422 c += nonascii_insert_offset;
423 if (c < 0400 || ! char_valid_p (c, 0))
424 c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
425 }
426 else if (c >= 0240)
427 c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
428 }
429 return c;
430 }
431
432
433 /* Convert the multibyte character C to unibyte 8-bit character based
434 on Vnonascii_translation_table or nonascii_insert_offset. If
435 REV_TBL is non-nil, it should be a reverse table of
436 Vnonascii_translation_table, i.e. what given by:
437 Fchar_table_extra_slot (Vnonascii_translation_table, make_number (0)) */
438
439 int
440 multibyte_char_to_unibyte (c, rev_tbl)
441 int c;
442 Lisp_Object rev_tbl;
443 {
444 if (!SINGLE_BYTE_CHAR_P (c))
445 {
446 int c_save = c;
447
448 if (! CHAR_TABLE_P (rev_tbl)
449 && CHAR_TABLE_P (Vnonascii_translation_table))
450 rev_tbl = Fchar_table_extra_slot (Vnonascii_translation_table,
451 make_number (0));
452 if (CHAR_TABLE_P (rev_tbl))
453 {
454 Lisp_Object temp;
455 temp = Faref (rev_tbl, make_number (c));
456 if (INTEGERP (temp))
457 c = XINT (temp);
458 if (c >= 256)
459 c = (c_save & 0177) + 0200;
460 }
461 else
462 {
463 if (nonascii_insert_offset > 0)
464 c -= nonascii_insert_offset;
465 if (c < 128 || c >= 256)
466 c = (c_save & 0177) + 0200;
467 }
468 }
469
470 return c;
471 }
472
473 \f
474 /* Update the table Vcharset_table with the given arguments (see the
475 document of `define-charset' for the meaning of each argument).
476 Several other table contents are also updated. The caller should
477 check the validity of CHARSET-ID and the remaining arguments in
478 advance. */
479
480 void
481 update_charset_table (charset_id, dimension, chars, width, direction,
482 iso_final_char, iso_graphic_plane,
483 short_name, long_name, description)
484 Lisp_Object charset_id, dimension, chars, width, direction;
485 Lisp_Object iso_final_char, iso_graphic_plane;
486 Lisp_Object short_name, long_name, description;
487 {
488 int charset = XINT (charset_id);
489 int bytes;
490 unsigned char leading_code_base, leading_code_ext;
491
492 if (NILP (CHARSET_TABLE_ENTRY (charset)))
493 CHARSET_TABLE_ENTRY (charset)
494 = Fmake_vector (make_number (CHARSET_MAX_IDX), Qnil);
495
496 if (NILP (long_name))
497 long_name = short_name;
498 if (NILP (description))
499 description = long_name;
500
501 /* Get byte length of multibyte form, base leading-code, and
502 extended leading-code of the charset. See the comment under the
503 title "GENERAL NOTE on CHARACTER SET (CHARSET)" in charset.h. */
504 bytes = XINT (dimension);
505 if (charset < MIN_CHARSET_PRIVATE_DIMENSION1)
506 {
507 /* Official charset, it doesn't have an extended leading-code. */
508 if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC)
509 bytes += 1; /* For a base leading-code. */
510 leading_code_base = charset;
511 leading_code_ext = 0;
512 }
513 else
514 {
515 /* Private charset. */
516 bytes += 2; /* For base and extended leading-codes. */
517 leading_code_base
518 = (charset < LEADING_CODE_EXT_12
519 ? LEADING_CODE_PRIVATE_11
520 : (charset < LEADING_CODE_EXT_21
521 ? LEADING_CODE_PRIVATE_12
522 : (charset < LEADING_CODE_EXT_22
523 ? LEADING_CODE_PRIVATE_21
524 : LEADING_CODE_PRIVATE_22)));
525 leading_code_ext = charset;
526 if (BYTES_BY_CHAR_HEAD (leading_code_base) != bytes)
527 error ("Invalid dimension for the charset-ID %d", charset);
528 }
529
530 CHARSET_TABLE_INFO (charset, CHARSET_ID_IDX) = charset_id;
531 CHARSET_TABLE_INFO (charset, CHARSET_BYTES_IDX) = make_number (bytes);
532 CHARSET_TABLE_INFO (charset, CHARSET_DIMENSION_IDX) = dimension;
533 CHARSET_TABLE_INFO (charset, CHARSET_CHARS_IDX) = chars;
534 CHARSET_TABLE_INFO (charset, CHARSET_WIDTH_IDX) = width;
535 CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX) = direction;
536 CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_BASE_IDX)
537 = make_number (leading_code_base);
538 CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX)
539 = make_number (leading_code_ext);
540 CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX) = iso_final_char;
541 CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX)
542 = iso_graphic_plane;
543 CHARSET_TABLE_INFO (charset, CHARSET_SHORT_NAME_IDX) = short_name;
544 CHARSET_TABLE_INFO (charset, CHARSET_LONG_NAME_IDX) = long_name;
545 CHARSET_TABLE_INFO (charset, CHARSET_DESCRIPTION_IDX) = description;
546 CHARSET_TABLE_INFO (charset, CHARSET_PLIST_IDX) = Qnil;
547
548 {
549 /* If we have already defined a charset which has the same
550 DIMENSION, CHARS and ISO-FINAL-CHAR but the different
551 DIRECTION, we must update the entry REVERSE-CHARSET of both
552 charsets. If there's no such charset, the value of the entry
553 is set to nil. */
554 int i;
555
556 for (i = 0; i <= MAX_CHARSET; i++)
557 if (!NILP (CHARSET_TABLE_ENTRY (i)))
558 {
559 if (CHARSET_DIMENSION (i) == XINT (dimension)
560 && CHARSET_CHARS (i) == XINT (chars)
561 && CHARSET_ISO_FINAL_CHAR (i) == XINT (iso_final_char)
562 && CHARSET_DIRECTION (i) != XINT (direction))
563 {
564 CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
565 = make_number (i);
566 CHARSET_TABLE_INFO (i, CHARSET_REVERSE_CHARSET_IDX) = charset_id;
567 break;
568 }
569 }
570 if (i > MAX_CHARSET)
571 /* No such a charset. */
572 CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
573 = make_number (-1);
574 }
575
576 if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC
577 && charset < MIN_CHARSET_PRIVATE_DIMENSION1)
578 {
579 bytes_by_char_head[leading_code_base] = bytes;
580 width_by_char_head[leading_code_base] = XINT (width);
581
582 /* Update table emacs_code_class. */
583 emacs_code_class[charset] = (bytes == 2
584 ? EMACS_leading_code_2
585 : (bytes == 3
586 ? EMACS_leading_code_3
587 : EMACS_leading_code_4));
588 }
589
590 /* Update table iso_charset_table. */
591 if (XINT (iso_final_char) >= 0
592 && ISO_CHARSET_TABLE (dimension, chars, iso_final_char) < 0)
593 ISO_CHARSET_TABLE (dimension, chars, iso_final_char) = charset;
594 }
595
596 #ifdef emacs
597
598 /* Return charset id of CHARSET_SYMBOL, or return -1 if CHARSET_SYMBOL
599 is invalid. */
600 int
601 get_charset_id (charset_symbol)
602 Lisp_Object charset_symbol;
603 {
604 Lisp_Object val;
605 int charset;
606
607 /* This originally used a ?: operator, but reportedly the HP-UX
608 compiler version HP92453-01 A.10.32.22 miscompiles that. */
609 if (SYMBOLP (charset_symbol)
610 && VECTORP (val = Fget (charset_symbol, Qcharset))
611 && CHARSET_VALID_P (charset =
612 XINT (XVECTOR (val)->contents[CHARSET_ID_IDX])))
613 return charset;
614 else
615 return -1;
616 }
617
618 /* Return an identification number for a new private charset of
619 DIMENSION and WIDTH. If there's no more room for the new charset,
620 return 0. */
621 Lisp_Object
622 get_new_private_charset_id (dimension, width)
623 int dimension, width;
624 {
625 int charset, from, to;
626
627 if (dimension == 1)
628 {
629 from = LEADING_CODE_EXT_11;
630 to = LEADING_CODE_EXT_21;
631 }
632 else
633 {
634 from = LEADING_CODE_EXT_21;
635 to = LEADING_CODE_EXT_MAX + 1;
636 }
637
638 for (charset = from; charset < to; charset++)
639 if (!CHARSET_DEFINED_P (charset)) break;
640
641 return make_number (charset < to ? charset : 0);
642 }
643
644 DEFUN ("define-charset", Fdefine_charset, Sdefine_charset, 3, 3, 0,
645 doc: /* Define CHARSET-ID as the identification number of CHARSET with INFO-VECTOR.
646 If CHARSET-ID is nil, it is decided automatically, which means CHARSET is
647 treated as a private charset.
648 INFO-VECTOR is a vector of the format:
649 [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE
650 SHORT-NAME LONG-NAME DESCRIPTION]
651 The meanings of each elements is as follows:
652 DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.
653 CHARS (integer) is the number of characters in a dimension: 94 or 96.
654 WIDTH (integer) is the number of columns a character in the charset
655 occupies on the screen: one of 0, 1, and 2.
656
657 DIRECTION (integer) is the rendering direction of characters in the
658 charset when rendering. If 0, render from left to right, else
659 render from right to left.
660
661 ISO-FINAL-CHAR (character) is the final character of the
662 corresponding ISO 2022 charset.
663 It may be -1 if the charset is internal use only.
664
665 ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked
666 while encoding to variants of ISO 2022 coding system, one of the
667 following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).
668 It may be -1 if the charset is internal use only.
669
670 SHORT-NAME (string) is the short name to refer to the charset.
671
672 LONG-NAME (string) is the long name to refer to the charset.
673
674 DESCRIPTION (string) is the description string of the charset. */)
675 (charset_id, charset_symbol, info_vector)
676 Lisp_Object charset_id, charset_symbol, info_vector;
677 {
678 Lisp_Object *vec;
679
680 if (!NILP (charset_id))
681 CHECK_NUMBER (charset_id);
682 CHECK_SYMBOL (charset_symbol);
683 CHECK_VECTOR (info_vector);
684
685 if (! NILP (charset_id))
686 {
687 if (! CHARSET_VALID_P (XINT (charset_id)))
688 error ("Invalid CHARSET: %d", XINT (charset_id));
689 else if (CHARSET_DEFINED_P (XINT (charset_id)))
690 error ("Already defined charset: %d", XINT (charset_id));
691 }
692
693 vec = XVECTOR (info_vector)->contents;
694 if (XVECTOR (info_vector)->size != 9
695 || !INTEGERP (vec[0]) || !(XINT (vec[0]) == 1 || XINT (vec[0]) == 2)
696 || !INTEGERP (vec[1]) || !(XINT (vec[1]) == 94 || XINT (vec[1]) == 96)
697 || !INTEGERP (vec[2]) || !(XINT (vec[2]) == 1 || XINT (vec[2]) == 2)
698 || !INTEGERP (vec[3]) || !(XINT (vec[3]) == 0 || XINT (vec[3]) == 1)
699 || !INTEGERP (vec[4])
700 || !(XINT (vec[4]) == -1 || (XINT (vec[4]) >= '0' && XINT (vec[4]) <= '~'))
701 || !INTEGERP (vec[5])
702 || !(XINT (vec[5]) == -1 || XINT (vec[5]) == 0 || XINT (vec[5]) == 1)
703 || !STRINGP (vec[6])
704 || !STRINGP (vec[7])
705 || !STRINGP (vec[8]))
706 error ("Invalid info-vector argument for defining charset %s",
707 SDATA (SYMBOL_NAME (charset_symbol)));
708
709 if (NILP (charset_id))
710 {
711 charset_id = get_new_private_charset_id (XINT (vec[0]), XINT (vec[2]));
712 if (XINT (charset_id) == 0)
713 error ("There's no room for a new private charset %s",
714 SDATA (SYMBOL_NAME (charset_symbol)));
715 }
716
717 update_charset_table (charset_id, vec[0], vec[1], vec[2], vec[3],
718 vec[4], vec[5], vec[6], vec[7], vec[8]);
719 Fput (charset_symbol, Qcharset, CHARSET_TABLE_ENTRY (XINT (charset_id)));
720 CHARSET_SYMBOL (XINT (charset_id)) = charset_symbol;
721 Vcharset_list = Fcons (charset_symbol, Vcharset_list);
722 Fupdate_coding_systems_internal ();
723 return Qnil;
724 }
725
726 DEFUN ("generic-character-list", Fgeneric_character_list,
727 Sgeneric_character_list, 0, 0, 0,
728 doc: /* Return a list of all possible generic characters.
729 It includes a generic character for a charset not yet defined. */)
730 ()
731 {
732 return Vgeneric_character_list;
733 }
734
735 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
736 Sget_unused_iso_final_char, 2, 2, 0,
737 doc: /* Return an unused ISO's final char for a charset of DIMENSION and CHARS.
738 DIMENSION is the number of bytes to represent a character: 1 or 2.
739 CHARS is the number of characters in a dimension: 94 or 96.
740
741 This final char is for private use, thus the range is `0' (48) .. `?' (63).
742 If there's no unused final char for the specified kind of charset,
743 return nil. */)
744 (dimension, chars)
745 Lisp_Object dimension, chars;
746 {
747 int final_char;
748
749 CHECK_NUMBER (dimension);
750 CHECK_NUMBER (chars);
751 if (XINT (dimension) != 1 && XINT (dimension) != 2)
752 error ("Invalid charset dimension %d, it should be 1 or 2",
753 XINT (dimension));
754 if (XINT (chars) != 94 && XINT (chars) != 96)
755 error ("Invalid charset chars %d, it should be 94 or 96",
756 XINT (chars));
757 for (final_char = '0'; final_char <= '?'; final_char++)
758 {
759 if (ISO_CHARSET_TABLE (dimension, chars, make_number (final_char)) < 0)
760 break;
761 }
762 return (final_char <= '?' ? make_number (final_char) : Qnil);
763 }
764
765 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
766 4, 4, 0,
767 doc: /* Declare an equivalent charset for ISO-2022 decoding.
768
769 On decoding by an ISO-2022 base coding system, when a charset
770 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
771 if CHARSET is designated instead. */)
772 (dimension, chars, final_char, charset)
773 Lisp_Object dimension, chars, final_char, charset;
774 {
775 int charset_id;
776
777 CHECK_NUMBER (dimension);
778 CHECK_NUMBER (chars);
779 CHECK_NUMBER (final_char);
780 CHECK_SYMBOL (charset);
781
782 if (XINT (dimension) != 1 && XINT (dimension) != 2)
783 error ("Invalid DIMENSION %d, it should be 1 or 2", XINT (dimension));
784 if (XINT (chars) != 94 && XINT (chars) != 96)
785 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
786 if (XINT (final_char) < '0' || XFASTINT (final_char) > '~')
787 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
788 if ((charset_id = get_charset_id (charset)) < 0)
789 error ("Invalid charset %s", SDATA (SYMBOL_NAME (charset)));
790
791 ISO_CHARSET_TABLE (dimension, chars, final_char) = charset_id;
792 return Qnil;
793 }
794
795 /* Return information about charsets in the text at PTR of NBYTES
796 bytes, which are NCHARS characters. The value is:
797
798 0: Each character is represented by one byte. This is always
799 true for unibyte text.
800 1: No charsets other than ascii eight-bit-control,
801 eight-bit-graphic, and latin-1 are found.
802 2: Otherwise.
803
804 In addition, if CHARSETS is nonzero, for each found charset N, set
805 CHARSETS[N] to 1. For that, callers should allocate CHARSETS
806 (MAX_CHARSET + 1 elements) in advance. It may lookup a translation
807 table TABLE if supplied. For invalid charsets, set CHARSETS[1] to
808 1 (note that there's no charset whose ID is 1). */
809
810 int
811 find_charset_in_text (ptr, nchars, nbytes, charsets, table)
812 const unsigned char *ptr;
813 int nchars, nbytes, *charsets;
814 Lisp_Object table;
815 {
816 if (nchars == nbytes)
817 {
818 if (charsets && nbytes > 0)
819 {
820 const unsigned char *endp = ptr + nbytes;
821 int maskbits = 0;
822
823 while (ptr < endp && maskbits != 7)
824 {
825 maskbits |= (*ptr < 0x80 ? 1 : *ptr < 0xA0 ? 2 : 4);
826 ptr++;
827 }
828
829 if (maskbits & 1)
830 charsets[CHARSET_ASCII] = 1;
831 if (maskbits & 2)
832 charsets[CHARSET_8_BIT_CONTROL] = 1;
833 if (maskbits & 4)
834 charsets[CHARSET_8_BIT_GRAPHIC] = 1;
835 }
836 return 0;
837 }
838 else
839 {
840 int return_val = 1;
841 int bytes, charset, c1, c2;
842
843 if (! CHAR_TABLE_P (table))
844 table = Qnil;
845
846 while (nchars-- > 0)
847 {
848 SPLIT_MULTIBYTE_SEQ (ptr, len, bytes, charset, c1, c2);
849 ptr += bytes;
850
851 if (!CHARSET_DEFINED_P (charset))
852 charset = 1;
853 else if (! NILP (table))
854 {
855 int c = translate_char (table, -1, charset, c1, c2);
856 if (c >= 0)
857 charset = CHAR_CHARSET (c);
858 }
859
860 if (return_val == 1
861 && charset != CHARSET_ASCII
862 && charset != CHARSET_8_BIT_CONTROL
863 && charset != CHARSET_8_BIT_GRAPHIC
864 && charset != charset_latin_iso8859_1)
865 return_val = 2;
866
867 if (charsets)
868 charsets[charset] = 1;
869 else if (return_val == 2)
870 break;
871 }
872 return return_val;
873 }
874 }
875
876 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
877 2, 3, 0,
878 doc: /* Return a list of charsets in the region between BEG and END.
879 BEG and END are buffer positions.
880 Optional arg TABLE if non-nil is a translation table to look up.
881
882 If the region contains invalid multibyte characters,
883 `unknown' is included in the returned list.
884
885 If the current buffer is unibyte, the returned list may contain
886 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
887 (beg, end, table)
888 Lisp_Object beg, end, table;
889 {
890 int charsets[MAX_CHARSET + 1];
891 int from, from_byte, to, stop, stop_byte, i;
892 Lisp_Object val;
893
894 validate_region (&beg, &end);
895 from = XFASTINT (beg);
896 stop = to = XFASTINT (end);
897
898 if (from < GPT && GPT < to)
899 {
900 stop = GPT;
901 stop_byte = GPT_BYTE;
902 }
903 else
904 stop_byte = CHAR_TO_BYTE (stop);
905
906 from_byte = CHAR_TO_BYTE (from);
907
908 bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
909 while (1)
910 {
911 find_charset_in_text (BYTE_POS_ADDR (from_byte), stop - from,
912 stop_byte - from_byte, charsets, table);
913 if (stop < to)
914 {
915 from = stop, from_byte = stop_byte;
916 stop = to, stop_byte = CHAR_TO_BYTE (stop);
917 }
918 else
919 break;
920 }
921
922 val = Qnil;
923 if (charsets[1])
924 val = Fcons (Qunknown, val);
925 for (i = MAX_CHARSET; i >= MIN_CHARSET_OFFICIAL_DIMENSION1; i--)
926 if (charsets[i])
927 val = Fcons (CHARSET_SYMBOL (i), val);
928 if (charsets[0])
929 val = Fcons (Qascii, val);
930 return val;
931 }
932
933 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
934 1, 2, 0,
935 doc: /* Return a list of charsets in STR.
936 Optional arg TABLE if non-nil is a translation table to look up.
937
938 If the string contains invalid multibyte characters,
939 `unknown' is included in the returned list.
940
941 If STR is unibyte, the returned list may contain
942 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
943 (str, table)
944 Lisp_Object str, table;
945 {
946 int charsets[MAX_CHARSET + 1];
947 int i;
948 Lisp_Object val;
949
950 CHECK_STRING (str);
951
952 bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
953 find_charset_in_text (SDATA (str), SCHARS (str),
954 SBYTES (str), charsets, table);
955
956 val = Qnil;
957 if (charsets[1])
958 val = Fcons (Qunknown, val);
959 for (i = MAX_CHARSET; i >= MIN_CHARSET_OFFICIAL_DIMENSION1; i--)
960 if (charsets[i])
961 val = Fcons (CHARSET_SYMBOL (i), val);
962 if (charsets[0])
963 val = Fcons (Qascii, val);
964 return val;
965 }
966
967 \f
968 DEFUN ("make-char-internal", Fmake_char_internal, Smake_char_internal, 1, 3, 0,
969 doc: /* Return a character made from arguments.
970 Internal use only. */)
971 (charset, code1, code2)
972 Lisp_Object charset, code1, code2;
973 {
974 int charset_id, c1, c2;
975
976 CHECK_NUMBER (charset);
977 charset_id = XINT (charset);
978 if (!CHARSET_DEFINED_P (charset_id))
979 error ("Invalid charset ID: %d", XINT (charset));
980
981 if (NILP (code1))
982 c1 = 0;
983 else
984 {
985 CHECK_NUMBER (code1);
986 c1 = XINT (code1);
987 }
988 if (NILP (code2))
989 c2 = 0;
990 else
991 {
992 CHECK_NUMBER (code2);
993 c2 = XINT (code2);
994 }
995
996 if (charset_id == CHARSET_ASCII)
997 {
998 if (c1 < 0 || c1 > 0x7F)
999 goto invalid_code_posints;
1000 return make_number (c1);
1001 }
1002 else if (charset_id == CHARSET_8_BIT_CONTROL)
1003 {
1004 if (NILP (code1))
1005 c1 = 0x80;
1006 else if (c1 < 0x80 || c1 > 0x9F)
1007 goto invalid_code_posints;
1008 return make_number (c1);
1009 }
1010 else if (charset_id == CHARSET_8_BIT_GRAPHIC)
1011 {
1012 if (NILP (code1))
1013 c1 = 0xA0;
1014 else if (c1 < 0xA0 || c1 > 0xFF)
1015 goto invalid_code_posints;
1016 return make_number (c1);
1017 }
1018 else if (c1 < 0 || c1 > 0xFF || c2 < 0 || c2 > 0xFF)
1019 goto invalid_code_posints;
1020 c1 &= 0x7F;
1021 c2 &= 0x7F;
1022 if (c1 == 0
1023 ? c2 != 0
1024 : (c2 == 0
1025 ? !CHAR_COMPONENTS_VALID_P (charset_id, c1, 0x20)
1026 : !CHAR_COMPONENTS_VALID_P (charset_id, c1, c2)))
1027 goto invalid_code_posints;
1028 return make_number (MAKE_CHAR (charset_id, c1, c2));
1029
1030 invalid_code_posints:
1031 error ("Invalid code points for charset ID %d: %d %d", charset_id, c1, c2);
1032 }
1033
1034 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
1035 doc: /* Return list of charset and one or two position-codes of CH.
1036 If CH is invalid as a character code,
1037 return a list of symbol `unknown' and CH. */)
1038 (ch)
1039 Lisp_Object ch;
1040 {
1041 int c, charset, c1, c2;
1042
1043 CHECK_NUMBER (ch);
1044 c = XFASTINT (ch);
1045 if (!CHAR_VALID_P (c, 1))
1046 return Fcons (Qunknown, Fcons (ch, Qnil));
1047 SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
1048 return (c2 >= 0
1049 ? Fcons (CHARSET_SYMBOL (charset),
1050 Fcons (make_number (c1), Fcons (make_number (c2), Qnil)))
1051 : Fcons (CHARSET_SYMBOL (charset), Fcons (make_number (c1), Qnil)));
1052 }
1053
1054 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
1055 doc: /* Return charset of CH. */)
1056 (ch)
1057 Lisp_Object ch;
1058 {
1059 CHECK_NUMBER (ch);
1060
1061 return CHARSET_SYMBOL (CHAR_CHARSET (XINT (ch)));
1062 }
1063
1064 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
1065 doc: /* Return charset of a character in the current buffer at position POS.
1066 If POS is nil, it defauls to the current point.
1067 If POS is out of range, the value is nil. */)
1068 (pos)
1069 Lisp_Object pos;
1070 {
1071 Lisp_Object ch;
1072 int charset;
1073
1074 ch = Fchar_after (pos);
1075 if (! INTEGERP (ch))
1076 return ch;
1077 charset = CHAR_CHARSET (XINT (ch));
1078 return CHARSET_SYMBOL (charset);
1079 }
1080
1081 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
1082 doc: /* Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
1083
1084 ISO 2022's designation sequence (escape sequence) distinguishes charsets
1085 by their DIMENSION, CHARS, and FINAL-CHAR,
1086 where as Emacs distinguishes them by charset symbol.
1087 See the documentation of the function `charset-info' for the meanings of
1088 DIMENSION, CHARS, and FINAL-CHAR. */)
1089 (dimension, chars, final_char)
1090 Lisp_Object dimension, chars, final_char;
1091 {
1092 int charset;
1093
1094 CHECK_NUMBER (dimension);
1095 CHECK_NUMBER (chars);
1096 CHECK_NUMBER (final_char);
1097
1098 if ((charset = ISO_CHARSET_TABLE (dimension, chars, final_char)) < 0)
1099 return Qnil;
1100 return CHARSET_SYMBOL (charset);
1101 }
1102
1103 /* If GENERICP is nonzero, return nonzero iff C is a valid normal or
1104 generic character. If GENERICP is zero, return nonzero iff C is a
1105 valid normal character. Do not call this function directly,
1106 instead use macro CHAR_VALID_P. */
1107 int
1108 char_valid_p (c, genericp)
1109 int c, genericp;
1110 {
1111 int charset, c1, c2;
1112
1113 if (c < 0 || c >= MAX_CHAR)
1114 return 0;
1115 if (SINGLE_BYTE_CHAR_P (c))
1116 return 1;
1117 SPLIT_CHAR (c, charset, c1, c2);
1118 if (genericp)
1119 {
1120 if (c1)
1121 {
1122 if (c2 <= 0) c2 = 0x20;
1123 }
1124 else
1125 {
1126 if (c2 <= 0) c1 = c2 = 0x20;
1127 }
1128 }
1129 return (CHARSET_DEFINED_P (charset)
1130 && CHAR_COMPONENTS_VALID_P (charset, c1, c2));
1131 }
1132
1133 DEFUN ("char-valid-p", Fchar_valid_p, Schar_valid_p, 1, 2, 0,
1134 doc: /* Return t if OBJECT is a valid normal character.
1135 If optional arg GENERICP is non-nil, also return t if OBJECT is
1136 a valid generic character. */)
1137 (object, genericp)
1138 Lisp_Object object, genericp;
1139 {
1140 if (! NATNUMP (object))
1141 return Qnil;
1142 return (CHAR_VALID_P (XFASTINT (object), !NILP (genericp)) ? Qt : Qnil);
1143 }
1144
1145 DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte,
1146 Sunibyte_char_to_multibyte, 1, 1, 0,
1147 doc: /* Convert the unibyte character CH to multibyte character.
1148 The conversion is done based on `nonascii-translation-table' (which see)
1149 or `nonascii-insert-offset' (which see). */)
1150 (ch)
1151 Lisp_Object ch;
1152 {
1153 int c;
1154
1155 CHECK_NUMBER (ch);
1156 c = XINT (ch);
1157 if (c < 0 || c >= 0400)
1158 error ("Invalid unibyte character: %d", c);
1159 c = unibyte_char_to_multibyte (c);
1160 if (c < 0)
1161 error ("Can't convert to multibyte character: %d", XINT (ch));
1162 return make_number (c);
1163 }
1164
1165 DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte,
1166 Smultibyte_char_to_unibyte, 1, 1, 0,
1167 doc: /* Convert the multibyte character CH to unibyte character.
1168 The conversion is done based on `nonascii-translation-table' (which see)
1169 or `nonascii-insert-offset' (which see). */)
1170 (ch)
1171 Lisp_Object ch;
1172 {
1173 int c;
1174
1175 CHECK_NUMBER (ch);
1176 c = XINT (ch);
1177 if (! CHAR_VALID_P (c, 0))
1178 error ("Invalid multibyte character: %d", c);
1179 c = multibyte_char_to_unibyte (c, Qnil);
1180 if (c < 0)
1181 error ("Can't convert to unibyte character: %d", XINT (ch));
1182 return make_number (c);
1183 }
1184
1185 DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
1186 doc: /* Return 1 regardless of the argument CH. */)
1187 (ch)
1188 Lisp_Object ch;
1189 {
1190 CHECK_NUMBER (ch);
1191 return make_number (1);
1192 }
1193
1194 /* Return how many bytes C will occupy in a multibyte buffer.
1195 Don't call this function directly, instead use macro CHAR_BYTES. */
1196 int
1197 char_bytes (c)
1198 int c;
1199 {
1200 int charset;
1201
1202 if (ASCII_BYTE_P (c) || (c & ~((1 << CHARACTERBITS) -1)))
1203 return 1;
1204 if (SINGLE_BYTE_CHAR_P (c) && c >= 0xA0)
1205 return 1;
1206
1207 charset = CHAR_CHARSET (c);
1208 return (CHARSET_DEFINED_P (charset) ? CHARSET_BYTES (charset) : 1);
1209 }
1210
1211 /* Return the width of character of which multi-byte form starts with
1212 C. The width is measured by how many columns occupied on the
1213 screen when displayed in the current buffer. */
1214
1215 #define ONE_BYTE_CHAR_WIDTH(c) \
1216 (c < 0x20 \
1217 ? (c == '\t' \
1218 ? XFASTINT (current_buffer->tab_width) \
1219 : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2))) \
1220 : (c < 0x7f \
1221 ? 1 \
1222 : (c == 0x7F \
1223 ? (NILP (current_buffer->ctl_arrow) ? 4 : 2) \
1224 : ((! NILP (current_buffer->enable_multibyte_characters) \
1225 && BASE_LEADING_CODE_P (c)) \
1226 ? WIDTH_BY_CHAR_HEAD (c) \
1227 : 4))))
1228
1229 DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
1230 doc: /* Return width of CH when displayed in the current buffer.
1231 The width is measured by how many columns it occupies on the screen.
1232 Tab is taken to occupy `tab-width' columns. */)
1233 (ch)
1234 Lisp_Object ch;
1235 {
1236 Lisp_Object val, disp;
1237 int c;
1238 struct Lisp_Char_Table *dp = buffer_display_table ();
1239
1240 CHECK_NUMBER (ch);
1241
1242 c = XINT (ch);
1243
1244 /* Get the way the display table would display it. */
1245 disp = dp ? DISP_CHAR_VECTOR (dp, c) : Qnil;
1246
1247 if (VECTORP (disp))
1248 XSETINT (val, XVECTOR (disp)->size);
1249 else if (SINGLE_BYTE_CHAR_P (c))
1250 XSETINT (val, ONE_BYTE_CHAR_WIDTH (c));
1251 else
1252 {
1253 int charset = CHAR_CHARSET (c);
1254
1255 XSETFASTINT (val, CHARSET_WIDTH (charset));
1256 }
1257 return val;
1258 }
1259
1260 /* Return width of string STR of length LEN when displayed in the
1261 current buffer. The width is measured by how many columns it
1262 occupies on the screen. */
1263
1264 int
1265 strwidth (str, len)
1266 unsigned char *str;
1267 int len;
1268 {
1269 return c_string_width (str, len, -1, NULL, NULL);
1270 }
1271
1272 /* Return width of string STR of length LEN when displayed in the
1273 current buffer. The width is measured by how many columns it
1274 occupies on the screen. If PRECISION > 0, return the width of
1275 longest substring that doesn't exceed PRECISION, and set number of
1276 characters and bytes of the substring in *NCHARS and *NBYTES
1277 respectively. */
1278
1279 int
1280 c_string_width (str, len, precision, nchars, nbytes)
1281 const unsigned char *str;
1282 int len, precision, *nchars, *nbytes;
1283 {
1284 int i = 0, i_byte = 0;
1285 int width = 0;
1286 int chars;
1287 struct Lisp_Char_Table *dp = buffer_display_table ();
1288
1289 while (i_byte < len)
1290 {
1291 int bytes, thiswidth;
1292 Lisp_Object val;
1293
1294 if (dp)
1295 {
1296 int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
1297
1298 chars = 1;
1299 val = DISP_CHAR_VECTOR (dp, c);
1300 if (VECTORP (val))
1301 thiswidth = XVECTOR (val)->size;
1302 else
1303 thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1304 }
1305 else
1306 {
1307 chars = 1;
1308 PARSE_MULTIBYTE_SEQ (str + i_byte, len - i_byte, bytes);
1309 thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1310 }
1311
1312 if (precision > 0
1313 && (width + thiswidth > precision))
1314 {
1315 *nchars = i;
1316 *nbytes = i_byte;
1317 return width;
1318 }
1319 i++;
1320 i_byte += bytes;
1321 width += thiswidth;
1322 }
1323
1324 if (precision > 0)
1325 {
1326 *nchars = i;
1327 *nbytes = i_byte;
1328 }
1329
1330 return width;
1331 }
1332
1333 /* Return width of Lisp string STRING when displayed in the current
1334 buffer. The width is measured by how many columns it occupies on
1335 the screen while paying attention to compositions. If PRECISION >
1336 0, return the width of longest substring that doesn't exceed
1337 PRECISION, and set number of characters and bytes of the substring
1338 in *NCHARS and *NBYTES respectively. */
1339
1340 int
1341 lisp_string_width (string, precision, nchars, nbytes)
1342 Lisp_Object string;
1343 int precision, *nchars, *nbytes;
1344 {
1345 int len = SCHARS (string);
1346 int len_byte = SBYTES (string);
1347 const unsigned char *str = SDATA (string);
1348 int i = 0, i_byte = 0;
1349 int width = 0;
1350 struct Lisp_Char_Table *dp = buffer_display_table ();
1351
1352 while (i < len)
1353 {
1354 int chars, bytes, thiswidth;
1355 Lisp_Object val;
1356 int cmp_id;
1357 int ignore, end;
1358
1359 if (find_composition (i, -1, &ignore, &end, &val, string)
1360 && ((cmp_id = get_composition_id (i, i_byte, end - i, val, string))
1361 >= 0))
1362 {
1363 thiswidth = composition_table[cmp_id]->width;
1364 chars = end - i;
1365 bytes = string_char_to_byte (string, end) - i_byte;
1366 }
1367 else if (dp)
1368 {
1369 int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
1370
1371 chars = 1;
1372 val = DISP_CHAR_VECTOR (dp, c);
1373 if (VECTORP (val))
1374 thiswidth = XVECTOR (val)->size;
1375 else
1376 thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1377 }
1378 else
1379 {
1380 chars = 1;
1381 PARSE_MULTIBYTE_SEQ (str + i_byte, len_byte - i_byte, bytes);
1382 thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1383 }
1384
1385 if (precision > 0
1386 && (width + thiswidth > precision))
1387 {
1388 *nchars = i;
1389 *nbytes = i_byte;
1390 return width;
1391 }
1392 i += chars;
1393 i_byte += bytes;
1394 width += thiswidth;
1395 }
1396
1397 if (precision > 0)
1398 {
1399 *nchars = i;
1400 *nbytes = i_byte;
1401 }
1402
1403 return width;
1404 }
1405
1406 DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
1407 doc: /* Return width of STRING when displayed in the current buffer.
1408 Width is measured by how many columns it occupies on the screen.
1409 When calculating width of a multibyte character in STRING,
1410 only the base leading-code is considered; the validity of
1411 the following bytes is not checked. Tabs in STRING are always
1412 taken to occupy `tab-width' columns. */)
1413 (string)
1414 Lisp_Object string;
1415 {
1416 Lisp_Object val;
1417
1418 CHECK_STRING (string);
1419 XSETFASTINT (val, lisp_string_width (string, -1, NULL, NULL));
1420 return val;
1421 }
1422
1423 DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
1424 doc: /* Return the direction of CH.
1425 The returned value is 0 for left-to-right and 1 for right-to-left. */)
1426 (ch)
1427 Lisp_Object ch;
1428 {
1429 int charset;
1430
1431 CHECK_NUMBER (ch);
1432 charset = CHAR_CHARSET (XFASTINT (ch));
1433 if (!CHARSET_DEFINED_P (charset))
1434 invalid_character (XINT (ch));
1435 return CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX);
1436 }
1437
1438 /* Return the number of characters in the NBYTES bytes at PTR.
1439 This works by looking at the contents and checking for multibyte sequences.
1440 However, if the current buffer has enable-multibyte-characters = nil,
1441 we treat each byte as a character. */
1442
1443 int
1444 chars_in_text (ptr, nbytes)
1445 const unsigned char *ptr;
1446 int nbytes;
1447 {
1448 /* current_buffer is null at early stages of Emacs initialization. */
1449 if (current_buffer == 0
1450 || NILP (current_buffer->enable_multibyte_characters))
1451 return nbytes;
1452
1453 return multibyte_chars_in_text (ptr, nbytes);
1454 }
1455
1456 /* Return the number of characters in the NBYTES bytes at PTR.
1457 This works by looking at the contents and checking for multibyte sequences.
1458 It ignores enable-multibyte-characters. */
1459
1460 int
1461 multibyte_chars_in_text (ptr, nbytes)
1462 const unsigned char *ptr;
1463 int nbytes;
1464 {
1465 const unsigned char *endp;
1466 int chars, bytes;
1467
1468 endp = ptr + nbytes;
1469 chars = 0;
1470
1471 while (ptr < endp)
1472 {
1473 PARSE_MULTIBYTE_SEQ (ptr, endp - ptr, bytes);
1474 ptr += bytes;
1475 chars++;
1476 }
1477
1478 return chars;
1479 }
1480
1481 /* Parse unibyte text at STR of LEN bytes as multibyte text, and
1482 count the numbers of characters and bytes in it. On counting
1483 bytes, pay attention to the fact that 8-bit characters in the range
1484 0x80..0x9F are represented by 2 bytes in multibyte text. */
1485 void
1486 parse_str_as_multibyte (str, len, nchars, nbytes)
1487 const unsigned char *str;
1488 int len, *nchars, *nbytes;
1489 {
1490 const unsigned char *endp = str + len;
1491 int n, chars = 0, bytes = 0;
1492
1493 while (str < endp)
1494 {
1495 if (UNIBYTE_STR_AS_MULTIBYTE_P (str, endp - str, n))
1496 str += n, bytes += n;
1497 else
1498 str++, bytes += 2;
1499 chars++;
1500 }
1501 *nchars = chars;
1502 *nbytes = bytes;
1503 return;
1504 }
1505
1506 /* Arrange unibyte text at STR of NBYTES bytes as multibyte text.
1507 It actually converts only 8-bit characters in the range 0x80..0x9F
1508 that don't contruct multibyte characters to multibyte forms. If
1509 NCHARS is nonzero, set *NCHARS to the number of characters in the
1510 text. It is assured that we can use LEN bytes at STR as a work
1511 area and that is enough. Return the number of bytes of the
1512 resulting text. */
1513
1514 int
1515 str_as_multibyte (str, len, nbytes, nchars)
1516 unsigned char *str;
1517 int len, nbytes, *nchars;
1518 {
1519 unsigned char *p = str, *endp = str + nbytes;
1520 unsigned char *to;
1521 int chars = 0;
1522 int n;
1523
1524 while (p < endp && UNIBYTE_STR_AS_MULTIBYTE_P (p, endp - p, n))
1525 p += n, chars++;
1526 if (nchars)
1527 *nchars = chars;
1528 if (p == endp)
1529 return nbytes;
1530
1531 to = p;
1532 nbytes = endp - p;
1533 endp = str + len;
1534 safe_bcopy (p, endp - nbytes, nbytes);
1535 p = endp - nbytes;
1536 while (p < endp)
1537 {
1538 if (UNIBYTE_STR_AS_MULTIBYTE_P (p, endp - p, n))
1539 {
1540 while (n--)
1541 *to++ = *p++;
1542 }
1543 else
1544 {
1545 *to++ = LEADING_CODE_8_BIT_CONTROL;
1546 *to++ = *p++ + 0x20;
1547 }
1548 chars++;
1549 }
1550 if (nchars)
1551 *nchars = chars;
1552 return (to - str);
1553 }
1554
1555 /* Parse unibyte string at STR of LEN bytes, and return the number of
1556 bytes it may ocupy when converted to multibyte string by
1557 `str_to_multibyte'. */
1558
1559 int
1560 parse_str_to_multibyte (str, len)
1561 unsigned char *str;
1562 int len;
1563 {
1564 unsigned char *endp = str + len;
1565 int bytes;
1566
1567 for (bytes = 0; str < endp; str++)
1568 bytes += (*str < 0x80 || *str >= 0xA0) ? 1 : 2;
1569 return bytes;
1570 }
1571
1572 /* Convert unibyte text at STR of NBYTES bytes to multibyte text
1573 that contains the same single-byte characters. It actually
1574 converts all 8-bit characters to multibyte forms. It is assured
1575 that we can use LEN bytes at STR as a work area and that is
1576 enough. */
1577
1578 int
1579 str_to_multibyte (str, len, bytes)
1580 unsigned char *str;
1581 int len, bytes;
1582 {
1583 unsigned char *p = str, *endp = str + bytes;
1584 unsigned char *to;
1585
1586 while (p < endp && (*p < 0x80 || *p >= 0xA0)) p++;
1587 if (p == endp)
1588 return bytes;
1589 to = p;
1590 bytes = endp - p;
1591 endp = str + len;
1592 safe_bcopy (p, endp - bytes, bytes);
1593 p = endp - bytes;
1594 while (p < endp)
1595 {
1596 if (*p < 0x80 || *p >= 0xA0)
1597 *to++ = *p++;
1598 else
1599 *to++ = LEADING_CODE_8_BIT_CONTROL, *to++ = *p++ + 0x20;
1600 }
1601 return (to - str);
1602 }
1603
1604 /* Arrange multibyte text at STR of LEN bytes as a unibyte text. It
1605 actually converts only 8-bit characters in the range 0x80..0x9F to
1606 unibyte forms. */
1607
1608 int
1609 str_as_unibyte (str, bytes)
1610 unsigned char *str;
1611 int bytes;
1612 {
1613 unsigned char *p = str, *endp = str + bytes;
1614 unsigned char *to = str;
1615
1616 while (p < endp && *p != LEADING_CODE_8_BIT_CONTROL) p++;
1617 to = p;
1618 while (p < endp)
1619 {
1620 if (*p == LEADING_CODE_8_BIT_CONTROL)
1621 *to++ = *(p + 1) - 0x20, p += 2;
1622 else
1623 *to++ = *p++;
1624 }
1625 return (to - str);
1626 }
1627
1628 \f
1629 DEFUN ("string", Fstring, Sstring, 0, MANY, 0,
1630 doc: /* Concatenate all the argument characters and make the result a string.
1631 usage: (string &rest CHARACTERS) */)
1632 (n, args)
1633 int n;
1634 Lisp_Object *args;
1635 {
1636 int i, bufsize;
1637 unsigned char *buf, *p;
1638 int c;
1639 int multibyte = 0;
1640 Lisp_Object ret;
1641 USE_SAFE_ALLOCA;
1642
1643 bufsize = MAX_MULTIBYTE_LENGTH * n;
1644 SAFE_ALLOCA (buf, unsigned char *, bufsize);
1645 p = buf;
1646
1647 for (i = 0; i < n; i++)
1648 {
1649 CHECK_NUMBER (args[i]);
1650 if (!multibyte && !SINGLE_BYTE_CHAR_P (XFASTINT (args[i])))
1651 multibyte = 1;
1652 }
1653
1654 for (i = 0; i < n; i++)
1655 {
1656 c = XINT (args[i]);
1657 if (multibyte)
1658 p += CHAR_STRING (c, p);
1659 else
1660 *p++ = c;
1661 }
1662
1663 ret = make_string_from_bytes (buf, n, p - buf);
1664 SAFE_FREE ();
1665
1666 return ret;
1667 }
1668
1669 #endif /* emacs */
1670 \f
1671 int
1672 charset_id_internal (charset_name)
1673 char *charset_name;
1674 {
1675 Lisp_Object val;
1676
1677 val= Fget (intern (charset_name), Qcharset);
1678 if (!VECTORP (val))
1679 error ("Charset %s is not defined", charset_name);
1680
1681 return (XINT (XVECTOR (val)->contents[0]));
1682 }
1683
1684 DEFUN ("setup-special-charsets", Fsetup_special_charsets,
1685 Ssetup_special_charsets, 0, 0, 0, doc: /* Internal use only. */)
1686 ()
1687 {
1688 charset_latin_iso8859_1 = charset_id_internal ("latin-iso8859-1");
1689 charset_jisx0208_1978 = charset_id_internal ("japanese-jisx0208-1978");
1690 charset_jisx0208 = charset_id_internal ("japanese-jisx0208");
1691 charset_katakana_jisx0201 = charset_id_internal ("katakana-jisx0201");
1692 charset_latin_jisx0201 = charset_id_internal ("latin-jisx0201");
1693 charset_big5_1 = charset_id_internal ("chinese-big5-1");
1694 charset_big5_2 = charset_id_internal ("chinese-big5-2");
1695 charset_mule_unicode_0100_24ff
1696 = charset_id_internal ("mule-unicode-0100-24ff");
1697 charset_mule_unicode_2500_33ff
1698 = charset_id_internal ("mule-unicode-2500-33ff");
1699 charset_mule_unicode_e000_ffff
1700 = charset_id_internal ("mule-unicode-e000-ffff");
1701 return Qnil;
1702 }
1703
1704 void
1705 init_charset_once ()
1706 {
1707 int i, j, k;
1708
1709 staticpro (&Vcharset_table);
1710 staticpro (&Vcharset_symbol_table);
1711 staticpro (&Vgeneric_character_list);
1712
1713 /* This has to be done here, before we call Fmake_char_table. */
1714 Qcharset_table = intern ("charset-table");
1715 staticpro (&Qcharset_table);
1716
1717 /* Intern this now in case it isn't already done.
1718 Setting this variable twice is harmless.
1719 But don't staticpro it here--that is done in alloc.c. */
1720 Qchar_table_extra_slots = intern ("char-table-extra-slots");
1721
1722 /* Now we are ready to set up this property, so we can
1723 create the charset table. */
1724 Fput (Qcharset_table, Qchar_table_extra_slots, make_number (0));
1725 Vcharset_table = Fmake_char_table (Qcharset_table, Qnil);
1726
1727 Qunknown = intern ("unknown");
1728 staticpro (&Qunknown);
1729 Vcharset_symbol_table = Fmake_vector (make_number (MAX_CHARSET + 1),
1730 Qunknown);
1731
1732 /* Setup tables. */
1733 for (i = 0; i < 2; i++)
1734 for (j = 0; j < 2; j++)
1735 for (k = 0; k < 128; k++)
1736 iso_charset_table [i][j][k] = -1;
1737
1738 for (i = 0; i < 256; i++)
1739 bytes_by_char_head[i] = 1;
1740 bytes_by_char_head[LEADING_CODE_PRIVATE_11] = 3;
1741 bytes_by_char_head[LEADING_CODE_PRIVATE_12] = 3;
1742 bytes_by_char_head[LEADING_CODE_PRIVATE_21] = 4;
1743 bytes_by_char_head[LEADING_CODE_PRIVATE_22] = 4;
1744
1745 for (i = 0; i < 128; i++)
1746 width_by_char_head[i] = 1;
1747 for (; i < 256; i++)
1748 width_by_char_head[i] = 4;
1749 width_by_char_head[LEADING_CODE_PRIVATE_11] = 1;
1750 width_by_char_head[LEADING_CODE_PRIVATE_12] = 2;
1751 width_by_char_head[LEADING_CODE_PRIVATE_21] = 1;
1752 width_by_char_head[LEADING_CODE_PRIVATE_22] = 2;
1753
1754 {
1755 Lisp_Object val;
1756
1757 val = Qnil;
1758 for (i = 0x81; i < 0x90; i++)
1759 val = Fcons (make_number ((i - 0x70) << 7), val);
1760 for (; i < 0x9A; i++)
1761 val = Fcons (make_number ((i - 0x8F) << 14), val);
1762 for (i = 0xA0; i < 0xF0; i++)
1763 val = Fcons (make_number ((i - 0x70) << 7), val);
1764 for (; i < 0xFF; i++)
1765 val = Fcons (make_number ((i - 0xE0) << 14), val);
1766 Vgeneric_character_list = Fnreverse (val);
1767 }
1768
1769 nonascii_insert_offset = 0;
1770 Vnonascii_translation_table = Qnil;
1771 }
1772
1773 #ifdef emacs
1774
1775 void
1776 syms_of_charset ()
1777 {
1778 Qcharset = intern ("charset");
1779 staticpro (&Qcharset);
1780
1781 Qascii = intern ("ascii");
1782 staticpro (&Qascii);
1783
1784 Qeight_bit_control = intern ("eight-bit-control");
1785 staticpro (&Qeight_bit_control);
1786
1787 Qeight_bit_graphic = intern ("eight-bit-graphic");
1788 staticpro (&Qeight_bit_graphic);
1789
1790 /* Define special charsets ascii, eight-bit-control, and
1791 eight-bit-graphic. */
1792 update_charset_table (make_number (CHARSET_ASCII),
1793 make_number (1), make_number (94),
1794 make_number (1),
1795 make_number (0),
1796 make_number ('B'),
1797 make_number (0),
1798 build_string ("ASCII"),
1799 Qnil, /* same as above */
1800 build_string ("ASCII (ISO646 IRV)"));
1801 CHARSET_SYMBOL (CHARSET_ASCII) = Qascii;
1802 Fput (Qascii, Qcharset, CHARSET_TABLE_ENTRY (CHARSET_ASCII));
1803
1804 update_charset_table (make_number (CHARSET_8_BIT_CONTROL),
1805 make_number (1), make_number (96),
1806 make_number (4),
1807 make_number (0),
1808 make_number (-1),
1809 make_number (-1),
1810 build_string ("8-bit control code (0x80..0x9F)"),
1811 Qnil, /* same as above */
1812 Qnil); /* same as above */
1813 CHARSET_SYMBOL (CHARSET_8_BIT_CONTROL) = Qeight_bit_control;
1814 Fput (Qeight_bit_control, Qcharset,
1815 CHARSET_TABLE_ENTRY (CHARSET_8_BIT_CONTROL));
1816
1817 update_charset_table (make_number (CHARSET_8_BIT_GRAPHIC),
1818 make_number (1), make_number (96),
1819 make_number (4),
1820 make_number (0),
1821 make_number (-1),
1822 make_number (-1),
1823 build_string ("8-bit graphic char (0xA0..0xFF)"),
1824 Qnil, /* same as above */
1825 Qnil); /* same as above */
1826 CHARSET_SYMBOL (CHARSET_8_BIT_GRAPHIC) = Qeight_bit_graphic;
1827 Fput (Qeight_bit_graphic, Qcharset,
1828 CHARSET_TABLE_ENTRY (CHARSET_8_BIT_GRAPHIC));
1829
1830 Qauto_fill_chars = intern ("auto-fill-chars");
1831 staticpro (&Qauto_fill_chars);
1832 Fput (Qauto_fill_chars, Qchar_table_extra_slots, make_number (0));
1833
1834 defsubr (&Sdefine_charset);
1835 defsubr (&Sgeneric_character_list);
1836 defsubr (&Sget_unused_iso_final_char);
1837 defsubr (&Sdeclare_equiv_charset);
1838 defsubr (&Sfind_charset_region);
1839 defsubr (&Sfind_charset_string);
1840 defsubr (&Smake_char_internal);
1841 defsubr (&Ssplit_char);
1842 defsubr (&Schar_charset);
1843 defsubr (&Scharset_after);
1844 defsubr (&Siso_charset);
1845 defsubr (&Schar_valid_p);
1846 defsubr (&Sunibyte_char_to_multibyte);
1847 defsubr (&Smultibyte_char_to_unibyte);
1848 defsubr (&Schar_bytes);
1849 defsubr (&Schar_width);
1850 defsubr (&Sstring_width);
1851 defsubr (&Schar_direction);
1852 defsubr (&Sstring);
1853 defsubr (&Ssetup_special_charsets);
1854
1855 DEFVAR_LISP ("charset-list", &Vcharset_list,
1856 doc: /* List of charsets ever defined. */);
1857 Vcharset_list = Fcons (Qascii, Fcons (Qeight_bit_control,
1858 Fcons (Qeight_bit_graphic, Qnil)));
1859
1860 DEFVAR_LISP ("translation-table-vector", &Vtranslation_table_vector,
1861 doc: /* Vector of cons cell of a symbol and translation table ever defined.
1862 An ID of a translation table is an index of this vector. */);
1863 Vtranslation_table_vector = Fmake_vector (make_number (16), Qnil);
1864
1865 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11,
1866 doc: /* Leading-code of private TYPE9N charset of column-width 1. */);
1867 leading_code_private_11 = LEADING_CODE_PRIVATE_11;
1868
1869 DEFVAR_INT ("leading-code-private-12", &leading_code_private_12,
1870 doc: /* Leading-code of private TYPE9N charset of column-width 2. */);
1871 leading_code_private_12 = LEADING_CODE_PRIVATE_12;
1872
1873 DEFVAR_INT ("leading-code-private-21", &leading_code_private_21,
1874 doc: /* Leading-code of private TYPE9Nx9N charset of column-width 1. */);
1875 leading_code_private_21 = LEADING_CODE_PRIVATE_21;
1876
1877 DEFVAR_INT ("leading-code-private-22", &leading_code_private_22,
1878 doc: /* Leading-code of private TYPE9Nx9N charset of column-width 2. */);
1879 leading_code_private_22 = LEADING_CODE_PRIVATE_22;
1880
1881 DEFVAR_INT ("nonascii-insert-offset", &nonascii_insert_offset,
1882 doc: /* Offset for converting non-ASCII unibyte codes 0240...0377 to multibyte.
1883 This is used for converting unibyte text to multibyte,
1884 and for inserting character codes specified by number.
1885
1886 This serves to convert a Latin-1 or similar 8-bit character code
1887 to the corresponding Emacs multibyte character code.
1888 Typically the value should be (- (make-char CHARSET 0) 128),
1889 for your choice of character set.
1890 If `nonascii-translation-table' is non-nil, it overrides this variable. */);
1891 nonascii_insert_offset = 0;
1892
1893 DEFVAR_LISP ("nonascii-translation-table", &Vnonascii_translation_table,
1894 doc: /* Translation table to convert non-ASCII unibyte codes to multibyte.
1895 This is used for converting unibyte text to multibyte,
1896 and for inserting character codes specified by number.
1897
1898 Conversion is performed only when multibyte characters are enabled,
1899 and it serves to convert a Latin-1 or similar 8-bit character code
1900 to the corresponding Emacs character code.
1901
1902 If this is nil, `nonascii-insert-offset' is used instead.
1903 See also the docstring of `make-translation-table'. */);
1904 Vnonascii_translation_table = Qnil;
1905
1906 DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars,
1907 doc: /* A char-table for characters which invoke auto-filling.
1908 Such characters have value t in this table. */);
1909 Vauto_fill_chars = Fmake_char_table (Qauto_fill_chars, Qnil);
1910 CHAR_TABLE_SET (Vauto_fill_chars, make_number (' '), Qt);
1911 CHAR_TABLE_SET (Vauto_fill_chars, make_number ('\n'), Qt);
1912 }
1913
1914 #endif /* emacs */
1915
1916 /* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
1917 (do not change this comment) */