(define-charset): New args :min-code and :max-code.
[bpt/emacs.git] / src / charset.c
CommitLineData
3263d5a2
KH
1/* Basic character set support.
2 Copyright (C) 1995, 97, 98, 2000, 2001 Electrotechnical Laboratory, JAPAN.
75c8c592 3 Licensed to the Free Software Foundation.
12bcae05 4 Copyright (C) 2001 Free Software Foundation, Inc.
3263d5a2
KH
5 Copyright (C) 2001, 2002
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H13PRO009
4ed46869 8
369314dc
KH
9This file is part of GNU Emacs.
10
11GNU Emacs is free software; you can redistribute it and/or modify
12it under the terms of the GNU General Public License as published by
13the Free Software Foundation; either version 2, or (at your option)
14any later version.
4ed46869 15
369314dc
KH
16GNU Emacs is distributed in the hope that it will be useful,
17but WITHOUT ANY WARRANTY; without even the implied warranty of
18MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19GNU General Public License for more details.
4ed46869 20
369314dc
KH
21You should have received a copy of the GNU General Public License
22along with GNU Emacs; see the file COPYING. If not, write to
23the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24Boston, MA 02111-1307, USA. */
4ed46869 25
68c45bf0
PE
26#ifdef emacs
27#include <config.h>
28#endif
29
4ed46869 30#include <stdio.h>
3263d5a2
KH
31#include <unistd.h>
32#include <ctype.h>
4ed46869
KH
33
34#ifdef emacs
35
36#include <sys/types.h>
4ed46869 37#include "lisp.h"
3263d5a2 38#include "character.h"
4ed46869
KH
39#include "charset.h"
40#include "coding.h"
fc6b09bf 41#include "disptab.h"
3263d5a2 42#include "buffer.h"
4ed46869
KH
43
44#else /* not emacs */
45
46#include "mulelib.h"
47
48#endif /* emacs */
49
4ed46869 50
04c2f2c5 51/*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
4ed46869 52
3263d5a2 53 A coded character set ("charset" hereafter) is a meaningful
04c2f2c5 54 collection (i.e. language, culture, functionality, etc.) of
3263d5a2 55 characters. Emacs handles multiple charsets at once. In Emacs Lisp
04c2f2c5
DL
56 code, a charset is represented by a symbol. In C code, a charset is
57 represented by its ID number or by a pointer to a struct charset.
4ed46869 58
3263d5a2
KH
59 The actual information about each charset is stored in two places.
60 Lispy information is stored in the hash table Vcharset_hash_table as
61 a vector (charset attributes). The other information is stored in
04c2f2c5 62 charset_table as a struct charset.
4ed46869 63
3263d5a2 64*/
4ed46869 65
3263d5a2
KH
66/* List of all charsets. This variable is used only from Emacs
67 Lisp. */
4ed46869
KH
68Lisp_Object Vcharset_list;
69
3263d5a2
KH
70/* Hash table that contains attributes of each charset. Keys are
71 charset symbols, and values are vectors of charset attributes. */
72Lisp_Object Vcharset_hash_table;
73
74/* Table of struct charset. */
75struct charset *charset_table;
76
77static int charset_table_size;
78int charset_table_used;
79
80Lisp_Object Qcharsetp;
81
82/* Special charset symbols. */
83Lisp_Object Qascii;
84Lisp_Object Qeight_bit_control;
85Lisp_Object Qeight_bit_graphic;
86Lisp_Object Qiso_8859_1;
87Lisp_Object Qunicode;
b0e3cf2b 88
3263d5a2
KH
89/* The corresponding charsets. */
90int charset_ascii;
91int charset_8_bit_control;
92int charset_8_bit_graphic;
93int charset_iso_8859_1;
94int charset_unicode;
c1a08b4c 95
3263d5a2
KH
96/* Value of charset attribute `charset-iso-plane'. */
97Lisp_Object Qgl, Qgr;
c1a08b4c 98
3263d5a2
KH
99/* The primary charset. It is a charset of unibyte characters. */
100int charset_primary;
101
102/* List of charsets ordered by the priority. */
103Lisp_Object Vcharset_ordered_list;
104
105/* List of iso-2022 charsets. */
106Lisp_Object Viso_2022_charset_list;
107
108/* List of emacs-mule charsets. */
109Lisp_Object Vemacs_mule_charset_list;
110
111struct charset *emacs_mule_charset[256];
4ed46869
KH
112
113/* Mapping table from ISO2022's charset (specified by DIMENSION,
114 CHARS, and FINAL-CHAR) to Emacs' charset. */
3263d5a2
KH
115int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
116
117Lisp_Object Vcharset_map_directory;
118
119Lisp_Object Vchar_unified_charset_table;
120
69f8de5b
KH
121#define CODE_POINT_TO_INDEX(charset, code) \
122 ((charset)->code_linear_p \
123 ? (code) - (charset)->min_code \
124 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
125 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
126 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
127 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
128 ? (((((code) >> 24) - (charset)->code_space[12]) \
129 * (charset)->code_space[11]) \
130 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
131 * (charset)->code_space[7]) \
132 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
133 * (charset)->code_space[3]) \
820ee249
KH
134 + (((code) & 0xFF) - (charset)->code_space[0]) \
135 - ((charset)->char_index_offset)) \
3263d5a2
KH
136 : -1)
137
138
139/* Convert the character index IDX to code-point CODE for CHARSET.
140 It is assumed that IDX is in a valid range. */
141
820ee249
KH
142#define INDEX_TO_CODE_POINT(charset, idx) \
143 ((charset)->code_linear_p \
144 ? (idx) + (charset)->min_code \
145 : (idx += (charset)->char_index_offset, \
146 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
147 | (((charset)->code_space[4] \
148 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
149 << 8) \
150 | (((charset)->code_space[8] \
151 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
152 << 16) \
153 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
154 << 24))))
155
4ed46869 156
3263d5a2 157\f
4ed46869 158
e9ce014c
KH
159/* Set to 1 to warn that a charset map is loaded and thus a buffer
160 text and a string data may be relocated. */
3263d5a2 161int charset_map_loaded;
35e623fb 162
e9ce014c
KH
163struct charset_map_entries
164{
165 struct {
166 unsigned from, to;
167 int c;
168 } entry[0x10000];
169 struct charset_map_entries *next;
170};
171
172/* Load the mapping information for CHARSET from ENTRIES.
4cf9710d 173
3263d5a2 174 If CONTROL_FLAG is 0, setup CHARSET->min_char and CHARSET->max_char.
8a73a704 175
3263d5a2
KH
176 If CONTROL_FLAG is 1, setup CHARSET->min_char, CHARSET->max_char,
177 CHARSET->decoder, and CHARSET->encoder.
93bcb785 178
3263d5a2
KH
179 If CONTROL_FLAG is 2, setup CHARSET->deunifier and
180 Vchar_unify_table. If Vchar_unified_charset_table is non-nil,
181 setup it too. */
4ed46869 182
3263d5a2 183static void
e9ce014c 184load_charset_map (charset, entries, n_entries, control_flag)
3263d5a2 185 struct charset *charset;
e9ce014c
KH
186 struct charset_map_entries *entries;
187 int n_entries;
3263d5a2 188 int control_flag;
4ed46869 189{
3263d5a2 190 Lisp_Object vec, table;
3263d5a2
KH
191 unsigned max_code = CHARSET_MAX_CODE (charset);
192 int ascii_compatible_p = charset->ascii_compatible_p;
193 int min_char, max_char, nonascii_min_char;
3263d5a2 194 int i;
3263d5a2 195 unsigned char *fast_map = charset->fast_map;
99529c2c 196
e9ce014c
KH
197 if (n_entries <= 0)
198 return;
199
200 if (control_flag > 0)
8ac5a9cc 201 {
3263d5a2
KH
202 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
203 unsigned invalid_code = CHARSET_INVALID_CODE (charset);
6662e69b 204
3263d5a2
KH
205 table = Fmake_char_table (Qnil, make_number (invalid_code));
206 if (control_flag == 1)
207 vec = Fmake_vector (make_number (n), make_number (-1));
208 else if (! CHAR_TABLE_P (Vchar_unify_table))
209 Vchar_unify_table = Fmake_char_table (Qnil, make_number (-1));
6662e69b 210
3263d5a2 211 charset_map_loaded = 1;
2e344af3 212 }
3263d5a2 213
e9ce014c 214 min_char = max_char = entries->entry[0].c;
3263d5a2 215 nonascii_min_char = MAX_CHAR;
e9ce014c 216 for (i = 0; i < n_entries; i++)
2e344af3 217 {
e9ce014c 218 unsigned from, to;
04c2f2c5 219 int c;
e9ce014c 220 int idx = i % 0x10000;
3263d5a2 221
e9ce014c
KH
222 if (i > 0 && idx == 0)
223 entries = entries->next;
224 from = entries->entry[idx].from;
225 to = entries->entry[idx].to;
226 c = entries->entry[idx].c;
3263d5a2 227
3263d5a2
KH
228 if (control_flag < 2)
229 {
e9ce014c 230 if (control_flag == 1)
3263d5a2 231 {
e9ce014c
KH
232 unsigned code = from;
233 int from_index, to_index;
234
235 from_index = CODE_POINT_TO_INDEX (charset, from);
236 if (from == to)
237 to_index = from_index;
238 else
239 to_index = CODE_POINT_TO_INDEX (charset, to);
240 if (from_index < 0 || to_index < 0)
241 continue;
242 if (CHARSET_COMPACT_CODES_P (charset))
243 while (1)
244 {
245 ASET (vec, from_index, make_number (c));
246 CHAR_TABLE_SET (table, c, make_number (code));
247 if (from_index == to_index)
248 break;
249 from_index++, c++;
250 code = INDEX_TO_CODE_POINT (charset, from_index);
251 }
252 else
253 for (; from_index <= to_index; from_index++, c++)
254 {
255 ASET (vec, from_index, make_number (c));
256 CHAR_TABLE_SET (table, c, make_number (from_index));
257 }
3263d5a2 258 }
e9ce014c
KH
259
260 if (c > max_char)
3263d5a2
KH
261 max_char = c;
262 else if (c < min_char)
263 min_char = c;
264 if (ascii_compatible_p && ! ASCII_BYTE_P (c)
265 && c < nonascii_min_char)
266 nonascii_min_char = c;
267
268 CHARSET_FAST_MAP_SET (c, fast_map);
269 }
e9ce014c 270 else
2e344af3 271 {
69f8de5b
KH
272 unsigned code = from;
273 int from_index, to_index;
e9ce014c 274
69f8de5b
KH
275 from_index = CODE_POINT_TO_INDEX (charset, from);
276 if (from == to)
277 to_index = from_index;
278 else
279 to_index = CODE_POINT_TO_INDEX (charset, to);
280 if (from_index < 0 || to_index < 0)
281 continue;
282 while (1)
283 {
284 int c1 = DECODE_CHAR (charset, code);
285
3263d5a2
KH
286 if (c1 >= 0)
287 {
288 CHAR_TABLE_SET (table, c, make_number (c1));
289 CHAR_TABLE_SET (Vchar_unify_table, c1, c);
290 if (CHAR_TABLE_P (Vchar_unified_charset_table))
291 CHAR_TABLE_SET (Vchar_unified_charset_table, c1,
292 CHARSET_NAME (charset));
293 }
69f8de5b
KH
294 if (from_index == to_index)
295 break;
296 from_index++, c++;
297 code = INDEX_TO_CODE_POINT (charset, from_index);
3263d5a2 298 }
2e344af3 299 }
8ac5a9cc 300 }
3263d5a2
KH
301
302 if (control_flag < 2)
4ed46869 303 {
3263d5a2
KH
304 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
305 ? nonascii_min_char : min_char);
306 CHARSET_MAX_CHAR (charset) = max_char;
e9ce014c 307 if (control_flag == 1)
4ed46869 308 {
3263d5a2
KH
309 CHARSET_DECODER (charset) = vec;
310 CHARSET_ENCODER (charset) = table;
4ed46869
KH
311 }
312 }
2e344af3 313 else
3263d5a2 314 CHARSET_DEUNIFIER (charset) = table;
4ed46869
KH
315}
316
12bcae05 317
3263d5a2
KH
318/* Read a hexadecimal number (preceded by "0x") from the file FP while
319 paying attention to comment charcter '#'. */
12bcae05 320
3263d5a2
KH
321static INLINE unsigned
322read_hex (fp, eof)
323 FILE *fp;
324 int *eof;
12bcae05 325{
3263d5a2
KH
326 int c;
327 unsigned n;
12bcae05 328
3263d5a2
KH
329 while ((c = getc (fp)) != EOF)
330 {
69f8de5b 331 if (c == '#')
3263d5a2
KH
332 {
333 while ((c = getc (fp)) != EOF && c != '\n');
334 }
335 else if (c == '0')
336 {
337 if ((c = getc (fp)) == EOF || c == 'x')
338 break;
339 }
340 }
341 if (c == EOF)
342 {
343 *eof = 1;
344 return 0;
345 }
346 *eof = 0;
347 n = 0;
348 if (c == 'x')
349 while ((c = getc (fp)) != EOF && isxdigit (c))
350 n = ((n << 4)
351 | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10));
352 else
353 while ((c = getc (fp)) != EOF && isdigit (c))
354 n = (n * 10) + c - '0';
e9ce014c
KH
355 if (c != EOF)
356 ungetc (c, fp);
3263d5a2
KH
357 return n;
358}
12bcae05 359
537efd8d 360
3263d5a2 361/* Return a mapping vector for CHARSET loaded from MAPFILE.
e9ce014c
KH
362 Each line of MAPFILE has this form
363 0xAAAA 0xCCCC
364 where 0xAAAA is a code-point and 0xCCCC is the corresponding
365 character code, or this form
366 0xAAAA-0xBBBB 0xCCCC
367 where 0xAAAA and 0xBBBB are code-points specifying a range, and
368 0xCCCC is the first character code of the range.
369
3263d5a2
KH
370 The returned vector has this form:
371 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
e9ce014c
KH
372 where CODE1 is a code-point or a cons of code-points specifying a
373 range. */
4ed46869 374
c449997d
KH
375extern void add_to_log P_ ((char *, Lisp_Object, Lisp_Object));
376
e9ce014c
KH
377static void
378load_charset_map_from_file (charset, mapfile, control_flag)
3263d5a2
KH
379 struct charset *charset;
380 Lisp_Object mapfile;
e9ce014c 381 int control_flag;
4ed46869 382{
e9ce014c
KH
383 unsigned min_code = CHARSET_MIN_CODE (charset);
384 unsigned max_code = CHARSET_MAX_CODE (charset);
3263d5a2
KH
385 int fd;
386 FILE *fp;
3263d5a2
KH
387 int eof;
388 Lisp_Object suffixes;
e9ce014c
KH
389 struct charset_map_entries *head, *entries;
390 int n_entries;
4ed46869 391
3263d5a2
KH
392 suffixes = Fcons (build_string (".map"),
393 Fcons (build_string (".TXT"), Qnil));
4ed46869 394
3263d5a2
KH
395 fd = openp (Fcons (Vcharset_map_directory, Qnil), mapfile, suffixes,
396 NULL, 0);
397 if (fd < 0
398 || ! (fp = fdopen (fd, "r")))
399 {
400 add_to_log ("Failure in loading charset map: %S", mapfile, Qnil);
e9ce014c 401 return;
3263d5a2 402 }
4ed46869 403
e9ce014c
KH
404 head = entries = ((struct charset_map_entries *)
405 alloca (sizeof (struct charset_map_entries)));
406 n_entries = 0;
3263d5a2
KH
407 eof = 0;
408 while (1)
409 {
e9ce014c
KH
410 unsigned from, to;
411 int c;
412 int idx;
4ed46869 413
e9ce014c 414 from = read_hex (fp, &eof);
3263d5a2
KH
415 if (eof)
416 break;
e9ce014c
KH
417 if (getc (fp) == '-')
418 to = read_hex (fp, &eof);
419 else
420 to = from;
421 c = (int) read_hex (fp, &eof);
422
423 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
424 continue;
425
426 if (n_entries > 0 && (n_entries % 0x10000) == 0)
3263d5a2 427 {
e9ce014c
KH
428 entries->next = ((struct charset_map_entries *)
429 alloca (sizeof (struct charset_map_entries)));
430 entries = entries->next;
3263d5a2 431 }
e9ce014c
KH
432 idx = n_entries % 0x10000;
433 entries->entry[idx].from = from;
434 entries->entry[idx].to = to;
435 entries->entry[idx].c = c;
436 n_entries++;
3263d5a2
KH
437 }
438 fclose (fp);
439 close (fd);
4ed46869 440
e9ce014c
KH
441 load_charset_map (charset, head, n_entries, control_flag);
442}
443
444static void
445load_charset_map_from_vector (charset, vec, control_flag)
446 struct charset *charset;
447 Lisp_Object vec;
448 int control_flag;
449{
450 unsigned min_code = CHARSET_MIN_CODE (charset);
451 unsigned max_code = CHARSET_MAX_CODE (charset);
452 struct charset_map_entries *head, *entries;
453 int n_entries;
454 int len = ASIZE (vec);
455 int i;
456
457 if (len % 2 == 1)
3263d5a2 458 {
e9ce014c
KH
459 add_to_log ("Failure in loading charset map: %V", vec, Qnil);
460 return;
3263d5a2 461 }
4ed46869 462
e9ce014c
KH
463 head = entries = ((struct charset_map_entries *)
464 alloca (sizeof (struct charset_map_entries)));
465 n_entries = 0;
466 for (i = 0; i < len; i += 2)
467 {
468 Lisp_Object val, val2;
469 unsigned from, to;
470 int c;
471 int idx;
3263d5a2 472
e9ce014c
KH
473 val = AREF (vec, i);
474 if (CONSP (val))
475 {
476 val2 = XCDR (val);
477 val = XCAR (val);
478 CHECK_NATNUM (val);
479 CHECK_NATNUM (val2);
480 from = XFASTINT (val);
481 to = XFASTINT (val2);
482 }
483 else
484 {
485 CHECK_NATNUM (val);
486 from = to = XFASTINT (val);
487 }
488 val = AREF (vec, i + 1);
489 CHECK_NATNUM (val);
490 c = XFASTINT (val);
491
492 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
493 continue;
494
495 if ((n_entries % 0x10000) == 0)
496 {
497 entries->next = ((struct charset_map_entries *)
498 alloca (sizeof (struct charset_map_entries)));
499 entries = entries->next;
500 }
501 idx = n_entries % 0x10000;
502 entries->entry[idx].from = from;
503 entries->entry[idx].to = to;
504 entries->entry[idx].c = c;
505 n_entries++;
506 }
507
508 load_charset_map (charset, head, n_entries, control_flag);
ac4137cc
KH
509}
510
3263d5a2
KH
511static void
512load_charset (charset)
513 struct charset *charset;
ac4137cc 514{
3263d5a2
KH
515 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP_DEFERRED)
516 {
517 Lisp_Object map;
ac4137cc 518
3263d5a2
KH
519 map = CHARSET_MAP (charset);
520 if (STRINGP (map))
e9ce014c
KH
521 load_charset_map_from_file (charset, map, 1);
522 else
523 load_charset_map_from_vector (charset, map, 1);
3263d5a2
KH
524 CHARSET_METHOD (charset) = CHARSET_METHOD_MAP;
525 }
4ed46869
KH
526}
527
3263d5a2
KH
528
529DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
530 doc: /* Return non-nil if and only if OBJECT is a charset.*/)
531 (object)
532 Lisp_Object object;
23d2a7f1 533{
3263d5a2 534 return (CHARSETP (object) ? Qt : Qnil);
23d2a7f1
KH
535}
536
35e623fb 537
3263d5a2
KH
538void
539map_charset_chars (c_function, function, charset_symbol, arg)
1721b6af 540 void (*c_function) P_ ((Lisp_Object, Lisp_Object, Lisp_Object));
3263d5a2 541 Lisp_Object function, charset_symbol, arg;
35e623fb 542{
3263d5a2
KH
543 int id;
544 struct charset *charset;
545 Lisp_Object range;
d2665018 546
3263d5a2
KH
547 CHECK_CHARSET_GET_ID (charset_symbol, id);
548 charset = CHARSET_FROM_ID (id);
549
550 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP_DEFERRED)
551 load_charset (charset);
552
553 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
554 {
555 range = Fcons (make_number (CHARSET_MIN_CHAR (charset)),
556 make_number (CHARSET_MAX_CHAR (charset)));
557 if (NILP (function))
558 (*c_function) (arg, range, Qnil);
559 else
560 call2 (function, range, arg);
561 }
562 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
563 {
564 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
565 return;
566 if (CHARSET_ASCII_COMPATIBLE_P (charset))
bbf12bb3 567 {
3263d5a2
KH
568 range = Fcons (make_number (0), make_number (127));
569 if (NILP (function))
570 (*c_function) (arg, range, Qnil);
571 else
572 call2 (function, range, arg);
bbf12bb3 573 }
3263d5a2
KH
574 map_char_table (c_function, function, CHARSET_ENCODER (charset), arg,
575 0, NULL);
576 }
577 else /* i.e. CHARSET_METHOD_PARENT */
578 {
579 int from, to, c;
580 unsigned code;
581 int i, j, k, l;
582 int *code_space = CHARSET_CODE_SPACE (charset);
583 Lisp_Object val;
584
585 range = Fcons (Qnil, Qnil);
586 from = to = -2;
587 for (i = code_space[12]; i <= code_space[13]; i++)
588 for (j = code_space[8]; j <= code_space[9]; j++)
589 for (k = code_space[4]; k <= code_space[5]; k++)
590 for (l = code_space[0]; l <= code_space[1]; l++)
591 {
592 code = (i << 24) | (j << 16) | (k << 8) | l;
593 c = DECODE_CHAR (charset, code);
594 if (c == to + 1)
595 {
596 to++;
597 continue;
598 }
599 if (from >= 0)
600 {
601 if (from < to)
602 {
603 XSETCAR (range, make_number (from));
604 XSETCDR (range, make_number (to));
605 val = range;
606 }
607 else
608 val = make_number (from);
609 if (NILP (function))
610 (*c_function) (arg, val, Qnil);
611 else
612 call2 (function, val, arg);
613 }
614 from = to = (c < 0 ? -2 : c);
615 }
616 if (from >= 0)
bbf12bb3 617 {
3263d5a2
KH
618 if (from < to)
619 {
620 XSETCAR (range, make_number (from));
621 XSETCDR (range, make_number (to));
622 val = range;
623 }
624 else
625 val = make_number (from);
626 if (NILP (function))
627 (*c_function) (arg, val, Qnil);
628 else
629 call2 (function, val, arg);
bbf12bb3 630 }
35e623fb 631 }
3263d5a2
KH
632}
633
634DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 3, 0,
04c2f2c5
DL
635 doc: /* Call FUNCTION for all characters in CHARSET.
636FUNCTION is called with an argument RANGE and optional 2nd
3263d5a2 637argument ARG.
6abd9323 638
04c2f2c5
DL
639RANGE is either a cons (FROM . TO), where FROM and TO indicate a range of
640characters contained in CHARSET or a single character in the case that
641FROM and TO would be equal. (The charset mapping may have gaps.)*/)
3263d5a2
KH
642 (function, charset, arg)
643 Lisp_Object function, charset, arg;
644{
645 map_charset_chars (NULL, function, charset, arg);
646 return Qnil;
35e623fb 647}
76d7b829
KH
648
649
3263d5a2
KH
650/* Define a charset according to the arguments. The Nth argument is
651 the Nth attribute of the charset (the last attribute `charset-id'
652 is not included). See the docstring of `define-charset' for the
653 detail. */
76d7b829 654
3263d5a2
KH
655DEFUN ("define-charset-internal", Fdefine_charset_internal,
656 Sdefine_charset_internal, charset_arg_max, MANY, 0,
04c2f2c5
DL
657 doc: /* For internal use only.
658usage: (define-charset-internal ...) */)
3263d5a2
KH
659 (nargs, args)
660 int nargs;
661 Lisp_Object *args;
76d7b829 662{
3263d5a2
KH
663 /* Charset attr vector. */
664 Lisp_Object attrs;
665 Lisp_Object val;
666 unsigned hash_code;
667 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
69f8de5b 668 int i, j;
3263d5a2
KH
669 struct charset charset;
670 int id;
671 int dimension;
672 int new_definition_p;
673 int nchars;
674
675 if (nargs != charset_arg_max)
676 return Fsignal (Qwrong_number_of_arguments,
677 Fcons (intern ("define-charset-internal"),
678 make_number (nargs)));
679
680 attrs = Fmake_vector (make_number (charset_attr_max), Qnil);
681
682 CHECK_SYMBOL (args[charset_arg_name]);
683 ASET (attrs, charset_name, args[charset_arg_name]);
684
685 val = args[charset_arg_code_space];
686 for (i = 0, dimension = 0, nchars = 1; i < 4; i++)
76d7b829 687 {
3263d5a2
KH
688 int min_byte, max_byte;
689
690 min_byte = XINT (Faref (val, make_number (i * 2)));
691 max_byte = XINT (Faref (val, make_number (i * 2 + 1)));
692 if (min_byte < 0 || min_byte > max_byte || max_byte >= 256)
693 error ("Invalid :code-space value");
694 charset.code_space[i * 4] = min_byte;
695 charset.code_space[i * 4 + 1] = max_byte;
696 charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
697 nchars *= charset.code_space[i * 4 + 2];
698 charset.code_space[i * 4 + 3] = nchars;
699 if (max_byte > 0)
700 dimension = i + 1;
701 }
76d7b829 702
3263d5a2
KH
703 val = args[charset_arg_dimension];
704 if (NILP (val))
705 charset.dimension = dimension;
706 else
707 {
708 CHECK_NATNUM (val);
709 charset.dimension = XINT (val);
710 if (charset.dimension < 1 || charset.dimension > 4)
711 args_out_of_range_3 (val, make_number (1), make_number (4));
712 }
713
714 charset.code_linear_p
715 = (charset.dimension == 1
716 || (charset.code_space[2] == 256
717 && (charset.dimension == 2
718 || (charset.code_space[6] == 256
719 && (charset.dimension == 3
720 || charset.code_space[10] == 256)))));
721
69f8de5b
KH
722 if (! charset.code_linear_p)
723 {
724 charset.code_space_mask = (unsigned char *) xmalloc (256);
33df3183 725 bzero (charset.code_space_mask, 256);
69f8de5b
KH
726 for (i = 0; i < 4; i++)
727 for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
728 j++)
729 charset.code_space_mask[j] |= (1 << i);
730 }
731
3263d5a2
KH
732 charset.iso_chars_96 = charset.code_space[2] == 96;
733
734 charset.min_code = (charset.code_space[0]
735 | (charset.code_space[4] << 8)
736 | (charset.code_space[8] << 16)
737 | (charset.code_space[12] << 24));
738 charset.max_code = (charset.code_space[1]
739 | (charset.code_space[5] << 8)
740 | (charset.code_space[9] << 16)
741 | (charset.code_space[13] << 24));
820ee249
KH
742 charset.char_index_offset = 0;
743
744 val = args[charset_arg_min_code];
745 if (! NILP (val))
746 {
747 unsigned code;
748
749 if (INTEGERP (val))
750 code = XINT (val);
751 else
752 {
753 CHECK_CONS (val);
754 CHECK_NUMBER (XCAR (val));
755 CHECK_NUMBER (XCDR (val));
756 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
757 }
758 if (code < charset.min_code
759 || code > charset.max_code)
760 args_out_of_range_3 (make_number (charset.min_code),
761 make_number (charset.max_code), val);
762 charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
763 charset.min_code = code;
764 }
765
766 val = args[charset_arg_max_code];
767 if (! NILP (val))
768 {
769 unsigned code;
770
771 if (INTEGERP (val))
772 code = XINT (val);
773 else
774 {
775 CHECK_CONS (val);
776 CHECK_NUMBER (XCAR (val));
777 CHECK_NUMBER (XCDR (val));
778 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
779 }
780 if (code < charset.min_code
781 || code > charset.max_code)
782 args_out_of_range_3 (make_number (charset.min_code),
783 make_number (charset.max_code), val);
784 charset.max_code = code;
785 }
3263d5a2 786
e9ce014c
KH
787 charset.compact_codes_p = charset.max_code < 0x1000000;
788
3263d5a2
KH
789 val = args[charset_arg_invalid_code];
790 if (NILP (val))
791 {
792 if (charset.min_code > 0)
793 charset.invalid_code = 0;
bbf12bb3
KH
794 else
795 {
3263d5a2
KH
796 XSETINT (val, charset.max_code + 1);
797 if (XINT (val) == charset.max_code + 1)
798 charset.invalid_code = charset.max_code + 1;
799 else
800 error ("Attribute :invalid-code must be specified");
76d7b829 801 }
76d7b829 802 }
3263d5a2
KH
803 else
804 {
805 CHECK_NATNUM (val);
806 charset.invalid_code = XFASTINT (val);
807 }
76d7b829 808
3263d5a2
KH
809 val = args[charset_arg_iso_final];
810 if (NILP (val))
811 charset.iso_final = -1;
812 else
813 {
814 CHECK_NUMBER (val);
815 if (XINT (val) < '0' || XINT (val) > 127)
816 error ("Invalid iso-final-char: %d", XINT (val));
817 charset.iso_final = XINT (val);
818 }
819
820 val = args[charset_arg_iso_revision];
821 if (NILP (val))
822 charset.iso_revision = -1;
823 else
4ed46869 824 {
3263d5a2
KH
825 CHECK_NUMBER (val);
826 if (XINT (val) > 63)
827 args_out_of_range (make_number (63), val);
828 charset.iso_revision = XINT (val);
4ed46869 829 }
3263d5a2
KH
830
831 val = args[charset_arg_emacs_mule_id];
832 if (NILP (val))
833 charset.emacs_mule_id = -1;
4ed46869
KH
834 else
835 {
3263d5a2
KH
836 CHECK_NATNUM (val);
837 if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
838 error ("Invalid emacs-mule-id: %d", XINT (val));
839 charset.emacs_mule_id = XINT (val);
c83ef371 840 }
6ef23ebb 841
3263d5a2 842 charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
4ed46869 843
3263d5a2
KH
844 charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
845
846 charset.unified_p = 0;
847
848 bzero (charset.fast_map, sizeof (charset.fast_map));
849
850 if (! NILP (args[charset_arg_code_offset]))
851 {
852 val = args[charset_arg_code_offset];
853 CHECK_NUMBER (val);
854
855 charset.method = CHARSET_METHOD_OFFSET;
856 charset.code_offset = XINT (val);
857
858 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
859 charset.min_char = i + charset.code_offset;
860 i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
861 charset.max_char = i + charset.code_offset;
862 if (charset.max_char > MAX_CHAR)
863 error ("Unsupported max char: %d", charset.max_char);
864
865 for (i = charset.min_char; i < 0x10000 && i <= charset.max_char;
866 i += 128)
867 CHARSET_FAST_MAP_SET (i, charset.fast_map);
868 for (; i <= charset.max_char; i += 0x1000)
869 CHARSET_FAST_MAP_SET (i, charset.fast_map);
870 }
871 else if (! NILP (args[charset_arg_map]))
872 {
873 val = args[charset_arg_map];
874 ASET (attrs, charset_map, val);
875 if (STRINGP (val))
e9ce014c
KH
876 load_charset_map_from_file (&charset, val, 0);
877 else
878 load_charset_map_from_vector (&charset, val, 0);
3263d5a2
KH
879 charset.method = CHARSET_METHOD_MAP_DEFERRED;
880 }
881 else if (! NILP (args[charset_arg_parents]))
882 {
883 val = args[charset_arg_parents];
884 CHECK_LIST (val);
885 charset.method = CHARSET_METHOD_INHERIT;
886 val = Fcopy_sequence (val);
887 ASET (attrs, charset_parents, val);
888
889 charset.min_char = MAX_CHAR;
890 charset.max_char = 0;
891 for (; ! NILP (val); val = Fcdr (val))
4ed46869 892 {
3263d5a2
KH
893 Lisp_Object elt, car_part, cdr_part;
894 int this_id, offset;
895 struct charset *this_charset;
896
897 elt = Fcar (val);
898 if (CONSP (elt))
899 {
900 car_part = XCAR (elt);
901 cdr_part = XCDR (elt);
902 CHECK_CHARSET_GET_ID (car_part, this_id);
903 CHECK_NUMBER (cdr_part);
904 offset = XINT (cdr_part);
905 }
906 else
4ed46869 907 {
3263d5a2
KH
908 CHECK_CHARSET_GET_ID (elt, this_id);
909 offset = 0;
4ed46869 910 }
3263d5a2
KH
911 XSETCAR (val, Fcons (make_number (this_id), make_number (offset)));
912
913 this_charset = CHARSET_FROM_ID (this_id);
914 if (charset.min_char > this_charset->min_char)
915 charset.min_char = this_charset->min_char;
916 if (charset.max_char < this_charset->max_char)
917 charset.max_char = this_charset->max_char;
918 for (i = 0; i < 190; i++)
919 charset.fast_map[i] |= this_charset->fast_map[i];
4ed46869 920 }
4ed46869 921 }
3263d5a2
KH
922 else
923 error ("None of :code-offset, :map, :parents are specified");
4ed46869 924
3263d5a2
KH
925 val = args[charset_arg_unify_map];
926 if (! NILP (val) && !STRINGP (val))
927 CHECK_VECTOR (val);
928 ASET (attrs, charset_unify_map, val);
4ed46869 929
3263d5a2
KH
930 CHECK_LIST (args[charset_arg_plist]);
931 ASET (attrs, charset_plist, args[charset_arg_plist]);
4ed46869 932
3263d5a2
KH
933 charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
934 &hash_code);
935 if (charset.hash_index >= 0)
936 {
937 new_definition_p = 0;
4f65af01 938 id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
3263d5a2
KH
939 HASH_VALUE (hash_table, charset.hash_index) = attrs;
940 }
1a45ff10 941 else
3263d5a2
KH
942 {
943 charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
944 hash_code);
945 if (charset_table_used == charset_table_size)
946 {
947 charset_table_size += 256;
948 charset_table
949 = ((struct charset *)
950 xrealloc (charset_table,
951 sizeof (struct charset) * charset_table_size));
952 }
953 id = charset_table_used++;
3263d5a2
KH
954 new_definition_p = 1;
955 }
4ed46869 956
4f65af01 957 ASET (attrs, charset_id, make_number (id));
3263d5a2
KH
958 charset.id = id;
959 charset_table[id] = charset;
960
961 if (charset.iso_final >= 0)
4ed46869 962 {
3263d5a2
KH
963 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
964 charset.iso_final) = id;
965 if (new_definition_p)
966 Viso_2022_charset_list = nconc2 (Viso_2022_charset_list,
967 Fcons (make_number (id), Qnil));
4ed46869 968 }
3263d5a2
KH
969
970 if (charset.emacs_mule_id >= 0)
4ed46869 971 {
3263d5a2 972 emacs_mule_charset[charset.emacs_mule_id] = CHARSET_FROM_ID (id);
4f65af01
KH
973 if (charset.emacs_mule_id < 0xA0)
974 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
3263d5a2
KH
975 if (new_definition_p)
976 Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
977 Fcons (make_number (id), Qnil));
4ed46869
KH
978 }
979
3263d5a2
KH
980 if (new_definition_p)
981 {
982 Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
983 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
984 Fcons (make_number (id), Qnil));
985 }
4ed46869 986
3263d5a2 987 return Qnil;
4ed46869
KH
988}
989
3263d5a2
KH
990DEFUN ("define-charset-alias", Fdefine_charset_alias,
991 Sdefine_charset_alias, 2, 2, 0,
992 doc: /* Define ALIAS as an alias for charset CHARSET. */)
993 (alias, charset)
994 Lisp_Object alias, charset;
4ed46869 995{
3263d5a2
KH
996 Lisp_Object attr;
997
998 CHECK_CHARSET_GET_ATTR (charset, attr);
999 Fputhash (alias, attr, Vcharset_hash_table);
528623a0 1000 Vcharset_list = Fcons (alias, Vcharset_list);
3263d5a2
KH
1001 return Qnil;
1002}
4ed46869 1003
4ed46869 1004
3263d5a2
KH
1005DEFUN ("primary-charset", Fprimary_charset, Sprimary_charset, 0, 0, 0,
1006 doc: /* Return the primary charset. */)
1007 ()
1008{
1009 return CHARSET_NAME (CHARSET_FROM_ID (charset_primary));
1010}
4ed46869 1011
4ed46869 1012
3263d5a2
KH
1013DEFUN ("set-primary-charset", Fset_primary_charset, Sset_primary_charset,
1014 1, 1, 0,
1015 doc: /* Set the primary charset to CHARSET. */)
1016 (charset)
1017 Lisp_Object charset;
1018{
1019 int id;
1020
1021 CHECK_CHARSET_GET_ID (charset, id);
1022 charset_primary = id;
4ed46869
KH
1023 return Qnil;
1024}
1025
3263d5a2
KH
1026
1027DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
1028 doc: /* Return a property list of CHARSET. */)
1029 (charset)
1030 Lisp_Object charset;
1031{
1032 Lisp_Object attrs;
1033
1034 CHECK_CHARSET_GET_ATTR (charset, attrs);
1035 return CHARSET_ATTR_PLIST (attrs);
1036}
1037
1038
1039DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
1040 doc: /* Set CHARSET's property list to PLIST. */)
1041 (charset, plist)
1042 Lisp_Object charset, plist;
1043{
1044 Lisp_Object attrs;
1045
1046 CHECK_CHARSET_GET_ATTR (charset, attrs);
1047 CHARSET_ATTR_PLIST (attrs) = plist;
1048 return plist;
1049}
1050
1051
1052DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 2, 0,
1053 doc: /* Unify characters of CHARSET with Unicode. */)
1054 (charset, unify_map)
1055 Lisp_Object charset, unify_map;
8a73a704 1056{
3263d5a2
KH
1057 int id;
1058 struct charset *cs;
1059
1060 CHECK_CHARSET_GET_ID (charset, id);
1061 cs = CHARSET_FROM_ID (id);
1062 if (CHARSET_METHOD (cs) == CHARSET_METHOD_MAP_DEFERRED)
1063 load_charset (cs);
1064 if (CHARSET_UNIFIED_P (cs)
1065 && CHAR_TABLE_P (CHARSET_DEUNIFIER (cs)))
1066 return Qnil;
1067 CHARSET_UNIFIED_P (cs) = 0;
1068 if (NILP (unify_map))
1069 unify_map = CHARSET_UNIFY_MAP (cs);
1070 if (STRINGP (unify_map))
e9ce014c
KH
1071 load_charset_map_from_file (cs, unify_map, 2);
1072 else
1073 load_charset_map_from_vector (cs, unify_map, 2);
3263d5a2
KH
1074 CHARSET_UNIFIED_P (cs) = 1;
1075 return Qnil;
8a73a704
KH
1076}
1077
3fac5a51
KH
1078DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
1079 Sget_unused_iso_final_char, 2, 2, 0,
3263d5a2 1080 doc: /*
1721b6af 1081Return an unsed ISO final char for a charset of DIMENISION and CHARS.
fdb82f93
PJ
1082DIMENSION is the number of bytes to represent a character: 1 or 2.
1083CHARS is the number of characters in a dimension: 94 or 96.
1084
1085This final char is for private use, thus the range is `0' (48) .. `?' (63).
1721b6af 1086If there's no unused final char for the specified kind of charset,
fdb82f93
PJ
1087return nil. */)
1088 (dimension, chars)
3fac5a51
KH
1089 Lisp_Object dimension, chars;
1090{
1091 int final_char;
1092
b7826503
PJ
1093 CHECK_NUMBER (dimension);
1094 CHECK_NUMBER (chars);
3263d5a2
KH
1095 if (XINT (dimension) != 1 && XINT (dimension) != 2 && XINT (dimension) != 3)
1096 args_out_of_range_3 (dimension, make_number (1), make_number (3));
3fac5a51 1097 if (XINT (chars) != 94 && XINT (chars) != 96)
3263d5a2 1098 args_out_of_range_3 (chars, make_number (94), make_number (96));
3fac5a51 1099 for (final_char = '0'; final_char <= '?'; final_char++)
3263d5a2
KH
1100 if (ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), final_char) < 0)
1101 break;
3fac5a51
KH
1102 return (final_char <= '?' ? make_number (final_char) : Qnil);
1103}
1104
3263d5a2
KH
1105static void
1106check_iso_charset_parameter (dimension, chars, final_char)
1107 Lisp_Object dimension, chars, final_char;
4ed46869 1108{
3263d5a2
KH
1109 CHECK_NATNUM (dimension);
1110 CHECK_NATNUM (chars);
1111 CHECK_NATNUM (final_char);
4ed46869 1112
3263d5a2
KH
1113 if (XINT (dimension) > 3)
1114 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension));
4ed46869
KH
1115 if (XINT (chars) != 94 && XINT (chars) != 96)
1116 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
3263d5a2 1117 if (XINT (final_char) < '0' || XINT (final_char) > '~')
4ed46869 1118 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
3263d5a2
KH
1119}
1120
1121
1122DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
1123 4, 4, 0,
1124 doc: /*
1125Declare a charset of DIMENSION, CHARS, FINAL-CHAR is the same as CHARSET.
1126CHARSET should be defined by `defined-charset' in advance. */)
1127 (dimension, chars, final_char, charset)
1128 Lisp_Object dimension, chars, final_char, charset;
1129{
1130 int id;
4ed46869 1131
3263d5a2
KH
1132 CHECK_CHARSET_GET_ID (charset, id);
1133 check_iso_charset_parameter (dimension, chars, final_char);
1134
1135 ISO_CHARSET_TABLE (dimension, chars, final_char) = id;
4ed46869
KH
1136 return Qnil;
1137}
1138
3263d5a2 1139
2e344af3
KH
1140/* Return information about charsets in the text at PTR of NBYTES
1141 bytes, which are NCHARS characters. The value is:
f6302ac9 1142
cfe34140 1143 0: Each character is represented by one byte. This is always
3263d5a2
KH
1144 true for a unibyte string. For a multibyte string, true if
1145 it contains only ASCII characters.
1146
1147 1: No charsets other than ascii, eight-bit-control, and
1148 latin-1 are found.
1d67c29b 1149
3263d5a2
KH
1150 2: Otherwise.
1151*/
4ed46869
KH
1152
1153int
3263d5a2
KH
1154string_xstring_p (string)
1155 Lisp_Object string;
4ed46869 1156{
3263d5a2
KH
1157 unsigned char *p = XSTRING (string)->data;
1158 unsigned char *endp = p + STRING_BYTES (XSTRING (string));
1159 struct charset *charset;
1160
1161 if (XSTRING (string)->size == STRING_BYTES (XSTRING (string)))
1162 return 0;
1163
1164 charset = CHARSET_FROM_ID (charset_iso_8859_1);
1165 while (p < endp)
0282eb69 1166 {
3263d5a2 1167 int c = STRING_CHAR_ADVANCE (p);
2e344af3 1168
3263d5a2
KH
1169 if (ENCODE_CHAR (charset, c) < 0)
1170 return 2;
0282eb69 1171 }
3263d5a2
KH
1172 return 1;
1173}
05505664 1174
05505664 1175
3263d5a2 1176/* Find charsets in the string at PTR of NCHARS and NBYTES.
4ed46869 1177
3263d5a2
KH
1178 CHARSETS is a vector. Each element is a cons of CHARSET and
1179 FOUND-FLAG. CHARSET is a charset id, and FOUND-FLAG is nil or t.
1180 FOUND-FLAG t (or nil) means that the corresponding charset is
1181 already found (or not yet found).
2e344af3 1182
3263d5a2 1183 It may lookup a translation table TABLE if supplied. */
2e344af3 1184
3263d5a2
KH
1185static void
1186find_charsets_in_text (ptr, nchars, nbytes, charsets, table)
1187 unsigned char *ptr;
1188 int nchars, nbytes;
1189 Lisp_Object charsets, table;
1190{
1191 unsigned char *pend = ptr + nbytes;
1192 int ncharsets = ASIZE (charsets);
1193
1194 if (nchars == nbytes)
1195 return;
1196
1197 while (ptr < pend)
1198 {
1199 int c = STRING_CHAR_ADVANCE (ptr);
1200 int i;
1201 int all_found = 1;
1202 Lisp_Object elt;
1203
1204 if (!NILP (table))
1205 c = translate_char (table, c);
1206 for (i = 0; i < ncharsets; i++)
1207 {
1208 elt = AREF (charsets, i);
1209 if (NILP (XCDR (elt)))
1210 {
1211 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (elt)));
1212
1213 if (ENCODE_CHAR (charset, c) != CHARSET_INVALID_CODE (charset))
1214 XCDR (elt) = Qt;
1215 else
1216 all_found = 0;
1217 }
4ed46869 1218 }
3263d5a2
KH
1219 if (all_found)
1220 break;
4ed46869 1221 }
4ed46869
KH
1222}
1223
3263d5a2 1224
4ed46869 1225DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
23d2a7f1 1226 2, 3, 0,
fdb82f93
PJ
1227 doc: /* Return a list of charsets in the region between BEG and END.
1228BEG and END are buffer positions.
1229Optional arg TABLE if non-nil is a translation table to look up.
1230
1231If the region contains invalid multibyte characters,
1232`unknown' is included in the returned list.
1233
1234If the current buffer is unibyte, the returned list may contain
1235only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1236 (beg, end, table)
23d2a7f1 1237 Lisp_Object beg, end, table;
4ed46869 1238{
3263d5a2 1239 Lisp_Object charsets;
6ae1f27e 1240 int from, from_byte, to, stop, stop_byte, i;
4ed46869
KH
1241 Lisp_Object val;
1242
1243 validate_region (&beg, &end);
1244 from = XFASTINT (beg);
1245 stop = to = XFASTINT (end);
6ae1f27e 1246
4ed46869 1247 if (from < GPT && GPT < to)
6ae1f27e
RS
1248 {
1249 stop = GPT;
1250 stop_byte = GPT_BYTE;
1251 }
1252 else
1253 stop_byte = CHAR_TO_BYTE (stop);
1254
1255 from_byte = CHAR_TO_BYTE (from);
1256
3263d5a2
KH
1257 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1258 for (i = 0; i < charset_table_used; i++)
1259 ASET (charsets, i, Fcons (make_number (i), Qnil));
1260
4ed46869
KH
1261 while (1)
1262 {
3263d5a2
KH
1263 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
1264 stop_byte - from_byte, charsets, table);
4ed46869 1265 if (stop < to)
6ae1f27e
RS
1266 {
1267 from = stop, from_byte = stop_byte;
1268 stop = to, stop_byte = CHAR_TO_BYTE (stop);
1269 }
4ed46869
KH
1270 else
1271 break;
1272 }
6ae1f27e 1273
4ed46869 1274 val = Qnil;
3263d5a2
KH
1275 for (i = charset_table_used - 1; i >= 0; i--)
1276 if (!NILP (XCDR (AREF (charsets, i))))
1277 val = Fcons (CHARSET_NAME (charset_table + i), val);
4ed46869
KH
1278 return val;
1279}
1280
1281DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
23d2a7f1 1282 1, 2, 0,
fdb82f93
PJ
1283 doc: /* Return a list of charsets in STR.
1284Optional arg TABLE if non-nil is a translation table to look up.
1285
1286If the string contains invalid multibyte characters,
1287`unknown' is included in the returned list.
1288
1289If STR is unibyte, the returned list may contain
3263d5a2 1290only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
fdb82f93 1291 (str, table)
23d2a7f1 1292 Lisp_Object str, table;
4ed46869 1293{
3263d5a2 1294 Lisp_Object charsets;
4ed46869
KH
1295 int i;
1296 Lisp_Object val;
1297
b7826503 1298 CHECK_STRING (str);
87b089ad 1299
3263d5a2
KH
1300 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1301 find_charsets_in_text (XSTRING (str)->data, XSTRING (str)->size,
1302 STRING_BYTES (XSTRING (str)), charsets, table);
2e344af3 1303
4ed46869 1304 val = Qnil;
3263d5a2
KH
1305 for (i = charset_table_used - 1; i >= 0; i--)
1306 if (!NILP (XCDR (AREF (charsets, i))))
1307 val = Fcons (CHARSET_NAME (charset_table + i), val);
4ed46869
KH
1308 return val;
1309}
2e344af3 1310
4ed46869 1311\f
3263d5a2
KH
1312
1313/* Return a character correponding to the code-point CODE of
1314 CHARSET. */
1315
1316int
1317decode_char (charset, code)
1318 struct charset *charset;
1319 unsigned code;
4ed46869 1320{
3263d5a2
KH
1321 int c, char_index;
1322 enum charset_method method = CHARSET_METHOD (charset);
ac4137cc 1323
3263d5a2
KH
1324 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1325 return -1;
4ed46869 1326
3263d5a2 1327 if (method == CHARSET_METHOD_MAP_DEFERRED)
ac4137cc 1328 {
3263d5a2
KH
1329 load_charset (charset);
1330 method = CHARSET_METHOD (charset);
ac4137cc 1331 }
4ed46869 1332
3263d5a2 1333 if (method == CHARSET_METHOD_INHERIT)
2e344af3 1334 {
3263d5a2 1335 Lisp_Object parents;
4ed46869 1336
3263d5a2
KH
1337 parents = CHARSET_PARENTS (charset);
1338 c = -1;
1339 for (; CONSP (parents); parents = XCDR (parents))
1340 {
1341 int id = XINT (XCAR (XCAR (parents)));
1342 int code_offset = XINT (XCDR (XCAR (parents)));
1343 unsigned this_code = code + code_offset;
9d3d8cba 1344
3263d5a2
KH
1345 charset = CHARSET_FROM_ID (id);
1346 if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1347 break;
1348 }
1349 }
1350 else
ac4137cc 1351 {
3263d5a2 1352 char_index = CODE_POINT_TO_INDEX (charset, code);
69f8de5b
KH
1353 if (char_index < 0)
1354 return -1;
3263d5a2
KH
1355
1356 if (method == CHARSET_METHOD_MAP)
ac4137cc 1357 {
3263d5a2
KH
1358 Lisp_Object decoder;
1359
1360 decoder = CHARSET_DECODER (charset);
1361 if (! VECTORP (decoder))
1362 return -1;
1363 c = XINT (AREF (decoder, char_index));
ac4137cc
KH
1364 }
1365 else
1366 {
3263d5a2 1367 c = char_index + CHARSET_CODE_OFFSET (charset);
ac4137cc
KH
1368 }
1369 }
9d3d8cba 1370
3263d5a2
KH
1371 if (CHARSET_UNIFIED_P (charset)
1372 && c >= 0)
c449997d
KH
1373 {
1374 MAYBE_UNIFY_CHAR (c);
1375 }
d2665018 1376
3263d5a2 1377 return c;
d2665018
KH
1378}
1379
1bcc1567 1380
3263d5a2
KH
1381/* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
1382 CHARSET, return CHARSET_INVALID_CODE (CHARSET). */
1bcc1567 1383
3263d5a2
KH
1384unsigned
1385encode_char (charset, c)
1386 struct charset *charset;
9b6a601f
KH
1387 int c;
1388{
3263d5a2
KH
1389 unsigned code;
1390 enum charset_method method = CHARSET_METHOD (charset);
8ac5a9cc 1391
3263d5a2 1392 if (CHARSET_UNIFIED_P (charset))
4ed46869 1393 {
3263d5a2
KH
1394 Lisp_Object deunifier;
1395 int deunified;
4ed46869 1396
3263d5a2
KH
1397 deunifier = CHARSET_DEUNIFIER (charset);
1398 if (! CHAR_TABLE_P (deunifier))
1399 {
1400 Funify_charset (CHARSET_NAME (charset), Qnil);
1401 deunifier = CHARSET_DEUNIFIER (charset);
1402 }
1403 deunified = XINT (CHAR_TABLE_REF (deunifier, c));
1404 if (deunified > 0)
1405 c = deunified;
4ed46869 1406 }
beeedaad 1407
3263d5a2
KH
1408 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1409 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1410 return CHARSET_INVALID_CODE (charset);
beeedaad 1411
3263d5a2 1412 if (method == CHARSET_METHOD_INHERIT)
859f2b3c 1413 {
3263d5a2 1414 Lisp_Object parents;
859f2b3c 1415
3263d5a2
KH
1416 parents = CHARSET_PARENTS (charset);
1417 for (; CONSP (parents); parents = XCDR (parents))
beeedaad 1418 {
3263d5a2
KH
1419 int id = XINT (XCAR (XCAR (parents)));
1420 int code_offset = XINT (XCDR (XCAR (parents)));
1421 struct charset *this_charset = CHARSET_FROM_ID (id);
beeedaad 1422
3263d5a2
KH
1423 code = ENCODE_CHAR (this_charset, c);
1424 if (code != CHARSET_INVALID_CODE (this_charset)
1425 && (code_offset < 0 || code >= code_offset))
1426 {
1427 code -= code_offset;
69f8de5b
KH
1428 if (code >= charset->min_code && code <= charset->max_code
1429 && CODE_POINT_TO_INDEX (charset, code) >= 0)
3263d5a2
KH
1430 return code;
1431 }
beeedaad 1432 }
3263d5a2
KH
1433 return CHARSET_INVALID_CODE (charset);
1434 }
99529c2c 1435
3263d5a2 1436 if (method == CHARSET_METHOD_MAP_DEFERRED)
beeedaad 1437 {
3263d5a2
KH
1438 load_charset (charset);
1439 method = CHARSET_METHOD (charset);
859f2b3c 1440 }
beeedaad 1441
3263d5a2 1442 if (method == CHARSET_METHOD_MAP)
3f62427c 1443 {
3263d5a2 1444 Lisp_Object encoder;
beeedaad 1445 Lisp_Object val;
beeedaad 1446
3263d5a2
KH
1447 encoder = CHARSET_ENCODER (charset);
1448 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
1449 return CHARSET_INVALID_CODE (charset);
1450 val = CHAR_TABLE_REF (encoder, c);
e9ce014c
KH
1451 code = XINT (val);
1452 if (! CHARSET_COMPACT_CODES_P (charset))
1453 code = INDEX_TO_CODE_POINT (charset, code);
3263d5a2 1454 }
820ee249 1455 else /* method == CHARSET_METHOD_OFFSET */
beeedaad 1456 {
3263d5a2
KH
1457 code = c - CHARSET_CODE_OFFSET (charset);
1458 code = INDEX_TO_CODE_POINT (charset, code);
3f62427c 1459 }
beeedaad 1460
3263d5a2 1461 return code;
3f62427c
KH
1462}
1463
4ed46869 1464
3263d5a2
KH
1465DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 3, 0,
1466 doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
1467Return nil if CODE-POINT is not valid in CHARSET.
4ed46869 1468
3263d5a2
KH
1469CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
1470
1471Optional argument RESTRICTION specifies a way to map the pair of CCS
1472and CODE-POINT to a chracter. Currently not supported and just ignored. */)
1473 (charset, code_point, restriction)
1474 Lisp_Object charset, code_point, restriction;
4ed46869 1475{
3263d5a2
KH
1476 int c, id;
1477 unsigned code;
1478 struct charset *charsetp;
4ed46869 1479
3263d5a2
KH
1480 CHECK_CHARSET_GET_ID (charset, id);
1481 if (CONSP (code_point))
1482 {
1483 CHECK_NATNUM (XCAR (code_point));
1484 CHECK_NATNUM (XCDR (code_point));
69f8de5b 1485 code = (XINT (XCAR (code_point)) << 16) | (XINT (XCDR (code_point)));
3263d5a2
KH
1486 }
1487 else
1488 {
1489 CHECK_NATNUM (code_point);
1490 code = XINT (code_point);
1491 }
1492 charsetp = CHARSET_FROM_ID (id);
1493 c = DECODE_CHAR (charsetp, code);
1494 return (c >= 0 ? make_number (c) : Qnil);
4ed46869
KH
1495}
1496
046b1f03 1497
3263d5a2
KH
1498DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0,
1499 doc: /* Encode the character CH into a code-point of CHARSET.
1500Return nil if CHARSET doesn't include CH.
17e7ef1b 1501
3263d5a2
KH
1502Optional argument RESTRICTION specifies a way to map CHAR to a
1503code-point in CCS. Currently not supported and just ignored. */)
1504 (ch, charset, restriction)
1505 Lisp_Object ch, charset, restriction;
1506{
1507 int c, id;
1508 unsigned code;
1509 struct charset *charsetp;
046b1f03 1510
3263d5a2
KH
1511 CHECK_CHARSET_GET_ID (charset, id);
1512 CHECK_NATNUM (ch);
1513 c = XINT (ch);
1514 charsetp = CHARSET_FROM_ID (id);
1515 code = ENCODE_CHAR (charsetp, ch);
1516 if (code == CHARSET_INVALID_CODE (charsetp))
1517 return Qnil;
1518 if (code > 0x7FFFFFF)
1519 return Fcons (make_number (code >> 16), make_number (code & 0xFFFF));
1520 return make_number (code);
6ae1f27e 1521}
9036eb45 1522
87b089ad 1523
b121a744
KH
1524DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
1525 doc:
1526 /* Return a character of CHARSET whose position codes are CODEn.
1527
1528CODE1 through CODE4 are optional, but if you don't supply sufficient
1529position codes, it is assumed that the minimum code in each dimension
04c2f2c5 1530is specified. */)
b121a744
KH
1531 (charset, code1, code2, code3, code4)
1532 Lisp_Object charset, code1, code2, code3, code4;
87b089ad 1533{
3263d5a2
KH
1534 int id, dimension;
1535 struct charset *charsetp;
b121a744
KH
1536 unsigned code;
1537 int c;
87b089ad 1538
3263d5a2
KH
1539 CHECK_CHARSET_GET_ID (charset, id);
1540 charsetp = CHARSET_FROM_ID (id);
87b089ad 1541
b121a744
KH
1542 dimension = CHARSET_DIMENSION (charsetp);
1543 if (NILP (code1))
d47073ca
KH
1544 code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
1545 ? 0 : CHARSET_MIN_CODE (charsetp));
3263d5a2 1546 else
87b089ad 1547 {
b121a744
KH
1548 CHECK_NATNUM (code1);
1549 if (XFASTINT (code1) >= 0x100)
1550 args_out_of_range (make_number (0xFF), code1);
1551 code = XFASTINT (code1);
2e344af3 1552
b0a1e45e 1553 if (dimension > 1)
b121a744
KH
1554 {
1555 code <<= 8;
b0a1e45e
KH
1556 if (NILP (code2))
1557 code |= charsetp->code_space[(dimension - 2) * 4];
b121a744
KH
1558 else
1559 {
b0a1e45e
KH
1560 CHECK_NATNUM (code2);
1561 if (XFASTINT (code2) >= 0x100)
1562 args_out_of_range (make_number (0xFF), code2);
1563 code |= XFASTINT (code2);
b121a744
KH
1564 }
1565
b0a1e45e 1566 if (dimension > 2)
b121a744
KH
1567 {
1568 code <<= 8;
b0a1e45e
KH
1569 if (NILP (code3))
1570 code |= charsetp->code_space[(dimension - 3) * 4];
b121a744
KH
1571 else
1572 {
b0a1e45e
KH
1573 CHECK_NATNUM (code3);
1574 if (XFASTINT (code3) >= 0x100)
1575 args_out_of_range (make_number (0xFF), code3);
1576 code |= XFASTINT (code3);
1577 }
1578
1579 if (dimension > 3)
1580 {
1581 code <<= 8;
1582 if (NILP (code4))
1583 code |= charsetp->code_space[0];
1584 else
1585 {
1586 CHECK_NATNUM (code4);
1587 if (XFASTINT (code4) >= 0x100)
1588 args_out_of_range (make_number (0xFF), code4);
1589 code |= XFASTINT (code4);
1590 }
b121a744
KH
1591 }
1592 }
1593 }
1594 }
3263d5a2 1595
b121a744
KH
1596 if (CHARSET_ISO_FINAL (charsetp) >= 0)
1597 code &= 0x7F7F7F7F;
1598 c = DECODE_CHAR (charsetp, code);
1599 if (c < 0)
1600 error ("Invalid code(s)");
3263d5a2 1601 return make_number (c);
2e344af3
KH
1602}
1603
3263d5a2
KH
1604
1605/* Return the first charset in CHARSET_LIST that contains C.
1606 CHARSET_LIST is a list of charset IDs. If it is nil, use
1607 Vcharset_ordered_list. */
1608
1609struct charset *
1610char_charset (c, charset_list, code_return)
1611 int c;
1612 Lisp_Object charset_list;
1613 unsigned *code_return;
2e344af3 1614{
3263d5a2
KH
1615 if (NILP (charset_list))
1616 charset_list = Vcharset_ordered_list;
2e344af3 1617
3263d5a2 1618 while (CONSP (charset_list))
2e344af3 1619 {
3263d5a2
KH
1620 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
1621 unsigned code = ENCODE_CHAR (charset, c);
1622
1623 if (code != CHARSET_INVALID_CODE (charset))
1624 {
1625 if (code_return)
1626 *code_return = code;
1627 return charset;
1628 }
1629 charset_list = XCDR (charset_list);
2e344af3 1630 }
3263d5a2 1631 return NULL;
2e344af3
KH
1632}
1633
2e344af3 1634
3263d5a2 1635DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
04c2f2c5 1636 doc: /*Return list of charset and one to three position-codes of CHAR.
3263d5a2
KH
1637If CHAR is invalid as a character code,
1638return a list of symbol `unknown' and CHAR. */)
1639 (ch)
1640 Lisp_Object ch;
2e344af3 1641{
3263d5a2
KH
1642 struct charset *charset;
1643 int c, dimension;
1644 unsigned code;
1645 Lisp_Object val;
1646
1647 CHECK_CHARACTER (ch);
1648 c = XFASTINT (ch);
1649 charset = CHAR_CHARSET (c);
1650 if (! charset)
1651 return Fcons (intern ("unknown"), Fcons (ch, Qnil));
1652
1653 code = ENCODE_CHAR (charset, c);
1654 if (code == CHARSET_INVALID_CODE (charset))
1655 abort ();
1656 dimension = CHARSET_DIMENSION (charset);
1657 val = (dimension == 1 ? Fcons (make_number (code), Qnil)
1658 : dimension == 2 ? Fcons (make_number (code >> 8),
1659 Fcons (make_number (code & 0xFF), Qnil))
1660 : Fcons (make_number (code >> 16),
1661 Fcons (make_number ((code >> 8) & 0xFF),
1662 Fcons (make_number (code & 0xFF), Qnil))));
1663 return Fcons (CHARSET_NAME (charset), val);
2e344af3 1664}
87b089ad 1665
740f080d 1666
3263d5a2
KH
1667DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
1668 doc: /* Return the charset of highest priority that contains CHAR. */)
1669 (ch)
1670 Lisp_Object ch;
740f080d 1671{
3263d5a2 1672 struct charset *charset;
740f080d 1673
3263d5a2
KH
1674 CHECK_CHARACTER (ch);
1675 charset = CHAR_CHARSET (XINT (ch));
1676 return (CHARSET_NAME (charset));
740f080d
KH
1677}
1678
2e344af3 1679
3263d5a2
KH
1680DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
1681 doc: /*
1682Return charset of a character in the current buffer at position POS.
1683If POS is nil, it defauls to the current point.
1684If POS is out of range, the value is nil. */)
1685 (pos)
1686 Lisp_Object pos;
2e344af3 1687{
3263d5a2
KH
1688 Lisp_Object ch;
1689 struct charset *charset;
1690
1691 ch = Fchar_after (pos);
1692 if (! INTEGERP (ch))
1693 return ch;
1694 charset = CHAR_CHARSET (XINT (ch));
1695 return (CHARSET_NAME (charset));
87b089ad
RS
1696}
1697
2e344af3 1698
3263d5a2
KH
1699DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
1700 doc: /*
1701Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
1702
1703ISO 2022's designation sequence (escape sequence) distinguishes charsets
1704by their DIMENSION, CHARS, and FINAL-CHAR,
1705where as Emacs distinguishes them by charset symbol.
1706See the documentation of the function `charset-info' for the meanings of
1707DIMENSION, CHARS, and FINAL-CHAR. */)
1708 (dimension, chars, final_char)
1709 Lisp_Object dimension, chars, final_char;
2e344af3 1710{
3263d5a2 1711 int id;
2e344af3 1712
3263d5a2
KH
1713 check_iso_charset_parameter (dimension, chars, final_char);
1714 id = ISO_CHARSET_TABLE (XFASTINT (dimension), XFASTINT (chars),
1715 XFASTINT (final_char));
1716 return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
2e344af3
KH
1717}
1718
3263d5a2
KH
1719
1720DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
1721 0, 0, 0,
1722 doc: /*
1723Clear encoder and decoder of charsets that are loaded from mapfiles. */)
1724 ()
4ed46869 1725{
53316e55 1726 int i;
3263d5a2
KH
1727 struct charset *charset;
1728 Lisp_Object attrs;
4ed46869 1729
3263d5a2 1730 for (i = 0; i < charset_table_used; i++)
4ed46869 1731 {
3263d5a2
KH
1732 charset = CHARSET_FROM_ID (i);
1733 attrs = CHARSET_ATTRIBUTES (charset);
1734
1735 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
1736 {
1737 CHARSET_ATTR_DECODER (attrs) = Qnil;
1738 CHARSET_ATTR_ENCODER (attrs) = Qnil;
1739 CHARSET_METHOD (charset) = CHARSET_METHOD_MAP_DEFERRED;
1740 }
1741
1742 if (CHARSET_UNIFIED_P (charset))
1743 CHARSET_ATTR_DEUNIFIER (attrs) = Qnil;
5729c92f
KH
1744 }
1745
3263d5a2 1746 if (CHAR_TABLE_P (Vchar_unified_charset_table))
5729c92f 1747 {
3263d5a2
KH
1748 Foptimize_char_table (Vchar_unified_charset_table);
1749 Vchar_unify_table = Vchar_unified_charset_table;
1750 Vchar_unified_charset_table = Qnil;
4ed46869
KH
1751 }
1752
3263d5a2 1753 return Qnil;
4ed46869
KH
1754}
1755
8ddf5e57
DL
1756DEFUN ("charset-priority-list", Fcharset_priority_list,
1757 Scharset_priority_list, 0, 1, 0,
1758 doc: /* Return the list of charsets ordered by priority.
1759HIGHESTP non-nil means just return the highest priority one. */)
1760 (highestp)
1761 Lisp_Object highestp;
1762{
1763 Lisp_Object val = Qnil, list = Vcharset_ordered_list;
1764
1765 if (!NILP (highestp))
1766 return CHARSET_NAME (CHARSET_FROM_ID (Fcar (list)));
1767
1768 while (!NILP (list))
1769 {
1770 val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XCAR (list))), val);
1771 list = XCDR (list);
1772 }
1773 return Fnreverse (val);
1774}
1775
1776DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
1777 1, MANY, 0,
1778 doc: /* Assign higher priority to the charsets given as arguments.
1779usage: (set-charset-priority &rest charsets) */)
1780 (nargs, args)
1781 int nargs;
1782 Lisp_Object *args;
1783{
1784 Lisp_Object new_head = Qnil, old_list, id, arglist[2];
1785 int i;
1786
1787 old_list = Fcopy_sequence (Vcharset_ordered_list);
1788 for (i = 0; i < nargs; i++)
1789 {
1790 CHECK_CHARSET_GET_ID (args[i], id);
1791 old_list = Fdelq (id, old_list);
1792 new_head = Fcons (id, new_head);
1793 }
1794 arglist[0] = Fnreverse (new_head);
1795 arglist[1] = old_list;
1796 Vcharset_ordered_list = Fnconc (2, arglist);
1797 return Qnil;
1798}
4ed46869 1799\f
3263d5a2
KH
1800void
1801init_charset ()
4ed46869 1802{
4ed46869 1803
4ed46869
KH
1804}
1805
4ed46869 1806
dfcf069d 1807void
4ed46869
KH
1808init_charset_once ()
1809{
1810 int i, j, k;
1811
3263d5a2
KH
1812 for (i = 0; i < ISO_MAX_DIMENSION; i++)
1813 for (j = 0; j < ISO_MAX_CHARS; j++)
1814 for (k = 0; k < ISO_MAX_FINAL; k++)
1815 iso_charset_table[i][j][k] = -1;
1816
1817 for (i = 0; i < 255; i++)
1818 emacs_mule_charset[i] = NULL;
4ed46869 1819
3263d5a2
KH
1820#if 0
1821 Vchar_charset_set = Fmake_char_table (Qnil, Qnil);
1822 CHAR_TABLE_SET (Vchar_charset_set, make_number (97), Qnil);
1823
1824 DEFSYM (Qcharset_encode_table, "charset-encode-table");
4ed46869
KH
1825
1826 /* Intern this now in case it isn't already done.
1827 Setting this variable twice is harmless.
1828 But don't staticpro it here--that is done in alloc.c. */
1829 Qchar_table_extra_slots = intern ("char-table-extra-slots");
1830
3263d5a2
KH
1831 /* Now we are ready to set up this property, so we can create syntax
1832 tables. */
1833 Fput (Qcharset_encode_table, Qchar_table_extra_slots, make_number (0));
1834#endif
4ed46869
KH
1835}
1836
1837#ifdef emacs
1838
dfcf069d 1839void
4ed46869
KH
1840syms_of_charset ()
1841{
3263d5a2
KH
1842 char *p;
1843
1844 DEFSYM (Qcharsetp, "charsetp");
1845
1846 DEFSYM (Qascii, "ascii");
1847 DEFSYM (Qunicode, "unicode");
1848 DEFSYM (Qeight_bit_control, "eight-bit-control");
1849 DEFSYM (Qeight_bit_graphic, "eight-bit-graphic");
1850 DEFSYM (Qiso_8859_1, "iso-8859-1");
1851
1852 DEFSYM (Qgl, "gl");
1853 DEFSYM (Qgr, "gr");
1854
1855 p = (char *) xmalloc (30000);
1856
1857 staticpro (&Vcharset_ordered_list);
1858 Vcharset_ordered_list = Qnil;
1859
1860 staticpro (&Viso_2022_charset_list);
1861 Viso_2022_charset_list = Qnil;
1862
1863 staticpro (&Vemacs_mule_charset_list);
1864 Vemacs_mule_charset_list = Qnil;
1865
1866 staticpro (&Vcharset_hash_table);
1867 Vcharset_hash_table = Fmakehash (Qeq);
1868
1869 charset_table_size = 128;
1870 charset_table = ((struct charset *)
1871 xmalloc (sizeof (struct charset) * charset_table_size));
1872 charset_table_used = 0;
1873
1874 staticpro (&Vchar_unified_charset_table);
1875 Vchar_unified_charset_table = Fmake_char_table (Qnil, make_number (-1));
1876
1877 defsubr (&Scharsetp);
1878 defsubr (&Smap_charset_chars);
1879 defsubr (&Sdefine_charset_internal);
1880 defsubr (&Sdefine_charset_alias);
1881 defsubr (&Sprimary_charset);
1882 defsubr (&Sset_primary_charset);
1883 defsubr (&Scharset_plist);
1884 defsubr (&Sset_charset_plist);
1885 defsubr (&Sunify_charset);
3fac5a51 1886 defsubr (&Sget_unused_iso_final_char);
4ed46869
KH
1887 defsubr (&Sdeclare_equiv_charset);
1888 defsubr (&Sfind_charset_region);
1889 defsubr (&Sfind_charset_string);
3263d5a2
KH
1890 defsubr (&Sdecode_char);
1891 defsubr (&Sencode_char);
4ed46869 1892 defsubr (&Ssplit_char);
3263d5a2 1893 defsubr (&Smake_char);
4ed46869 1894 defsubr (&Schar_charset);
90d7b74e 1895 defsubr (&Scharset_after);
4ed46869 1896 defsubr (&Siso_charset);
3263d5a2 1897 defsubr (&Sclear_charset_maps);
8ddf5e57
DL
1898 defsubr (&Scharset_priority_list);
1899 defsubr (&Sset_charset_priority);
3263d5a2
KH
1900
1901 DEFVAR_LISP ("charset-map-directory", &Vcharset_map_directory,
1902 doc: /* Directory of charset map files that come with GNU Emacs.
04c2f2c5 1903The default value is sub-directory "charsets" of `data-directory'. */);
3263d5a2
KH
1904 Vcharset_map_directory = Fexpand_file_name (build_string ("charsets"),
1905 Vdata_directory);
4ed46869
KH
1906
1907 DEFVAR_LISP ("charset-list", &Vcharset_list,
528623a0 1908 doc: /* List of all charsets ever defined. */);
3263d5a2
KH
1909 Vcharset_list = Qnil;
1910
1911 /* Make the prerequisite charset `ascii' and `unicode'. */
1912 {
1913 Lisp_Object args[charset_arg_max];
1914 Lisp_Object plist[14];
1915 Lisp_Object val;
1916
1917 plist[0] = intern (":name");
3263d5a2 1918 plist[2] = intern (":dimension");
3263d5a2 1919 plist[4] = intern (":code-space");
3263d5a2 1920 plist[6] = intern (":iso-final-char");
3263d5a2 1921 plist[8] = intern (":emacs-mule-id");
3263d5a2 1922 plist[10] = intern (":ascii-compatible-p");
820ee249
KH
1923 plist[12] = intern (":code-offset");
1924
1925 args[charset_arg_name] = Qascii;
1926 args[charset_arg_dimension] = make_number (1);
1927 val = Fmake_vector (make_number (8), make_number (0));
1928 ASET (val, 1, make_number (127));
1929 args[charset_arg_code_space] = val;
1930 args[charset_arg_min_code] = Qnil;
1931 args[charset_arg_max_code] = Qnil;
1932 args[charset_arg_iso_final] = make_number ('B');
1933 args[charset_arg_iso_revision] = Qnil;
1934 args[charset_arg_emacs_mule_id] = make_number (0);
1935 args[charset_arg_ascii_compatible_p] = Qt;
3263d5a2
KH
1936 args[charset_arg_supplementary_p] = Qnil;
1937 args[charset_arg_invalid_code] = Qnil;
820ee249 1938 args[charset_arg_code_offset] = make_number (0);
3263d5a2
KH
1939 args[charset_arg_map] = Qnil;
1940 args[charset_arg_parents] = Qnil;
1941 args[charset_arg_unify_map] = Qnil;
1942 /* The actual plist is set by mule-conf.el. */
820ee249
KH
1943 plist[1] = args[charset_arg_name];
1944 plist[3] = args[charset_arg_dimension];
1945 plist[5] = args[charset_arg_code_space];
1946 plist[7] = args[charset_arg_iso_final];
1947 plist[9] = args[charset_arg_emacs_mule_id];
1948 plist[11] = args[charset_arg_ascii_compatible_p];
1949 plist[13] = args[charset_arg_code_offset];
3263d5a2
KH
1950 args[charset_arg_plist] = Flist (14, plist);
1951 Fdefine_charset_internal (charset_arg_max, args);
1952 charset_ascii = CHARSET_SYMBOL_ID (Qascii);
1953
820ee249
KH
1954 args[charset_arg_name] = Qunicode;
1955 args[charset_arg_dimension] = make_number (3);
3263d5a2
KH
1956 val = Fmake_vector (make_number (8), make_number (0));
1957 ASET (val, 1, make_number (255));
1958 ASET (val, 3, make_number (255));
1959 ASET (val, 5, make_number (16));
820ee249
KH
1960 args[charset_arg_code_space] = val;
1961 args[charset_arg_min_code] = Qnil;
1962 args[charset_arg_max_code] = Qnil;
1963 args[charset_arg_iso_final] = Qnil;
3263d5a2 1964 args[charset_arg_iso_revision] = Qnil;
820ee249
KH
1965 args[charset_arg_emacs_mule_id] = Qnil;
1966 args[charset_arg_ascii_compatible_p] = Qt;
3263d5a2
KH
1967 args[charset_arg_supplementary_p] = Qnil;
1968 args[charset_arg_invalid_code] = Qnil;
820ee249 1969 args[charset_arg_code_offset] = make_number (0);
3263d5a2
KH
1970 args[charset_arg_map] = Qnil;
1971 args[charset_arg_parents] = Qnil;
1972 args[charset_arg_unify_map] = Qnil;
1973 /* The actual plist is set by mule-conf.el. */
820ee249
KH
1974 plist[1] = args[charset_arg_name];
1975 plist[3] = args[charset_arg_dimension];
1976 plist[5] = args[charset_arg_code_space];
1977 plist[7] = args[charset_arg_iso_final];
1978 plist[9] = args[charset_arg_emacs_mule_id];
1979 plist[11] = args[charset_arg_ascii_compatible_p];
1980 plist[13] = args[charset_arg_code_offset];
3263d5a2
KH
1981 args[charset_arg_plist] = Flist (14, plist);
1982 Fdefine_charset_internal (charset_arg_max, args);
1983 charset_unicode = CHARSET_SYMBOL_ID (Qunicode);
1984 }
4ed46869
KH
1985}
1986
1987#endif /* emacs */