(Funibyte_char_to_multibyte): If C can't be decoded
[bpt/emacs.git] / src / charset.c
CommitLineData
3263d5a2
KH
1/* Basic character set support.
2 Copyright (C) 1995, 97, 98, 2000, 2001 Electrotechnical Laboratory, JAPAN.
75c8c592 3 Licensed to the Free Software Foundation.
12bcae05 4 Copyright (C) 2001 Free Software Foundation, Inc.
3263d5a2
KH
5 Copyright (C) 2001, 2002
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H13PRO009
4ed46869 8
369314dc
KH
9This file is part of GNU Emacs.
10
11GNU Emacs is free software; you can redistribute it and/or modify
12it under the terms of the GNU General Public License as published by
13the Free Software Foundation; either version 2, or (at your option)
14any later version.
4ed46869 15
369314dc
KH
16GNU Emacs is distributed in the hope that it will be useful,
17but WITHOUT ANY WARRANTY; without even the implied warranty of
18MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19GNU General Public License for more details.
4ed46869 20
369314dc
KH
21You should have received a copy of the GNU General Public License
22along with GNU Emacs; see the file COPYING. If not, write to
23the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24Boston, MA 02111-1307, USA. */
4ed46869 25
68c45bf0 26#include <config.h>
68c45bf0 27
4ed46869 28#include <stdio.h>
3263d5a2
KH
29#include <unistd.h>
30#include <ctype.h>
4ed46869 31#include <sys/types.h>
4ed46869 32#include "lisp.h"
3263d5a2 33#include "character.h"
4ed46869
KH
34#include "charset.h"
35#include "coding.h"
fc6b09bf 36#include "disptab.h"
3263d5a2 37#include "buffer.h"
4ed46869 38
04c2f2c5 39/*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
4ed46869 40
3263d5a2 41 A coded character set ("charset" hereafter) is a meaningful
04c2f2c5 42 collection (i.e. language, culture, functionality, etc.) of
3263d5a2 43 characters. Emacs handles multiple charsets at once. In Emacs Lisp
04c2f2c5
DL
44 code, a charset is represented by a symbol. In C code, a charset is
45 represented by its ID number or by a pointer to a struct charset.
4ed46869 46
3263d5a2
KH
47 The actual information about each charset is stored in two places.
48 Lispy information is stored in the hash table Vcharset_hash_table as
49 a vector (charset attributes). The other information is stored in
04c2f2c5 50 charset_table as a struct charset.
4ed46869 51
3263d5a2 52*/
4ed46869 53
3263d5a2
KH
54/* List of all charsets. This variable is used only from Emacs
55 Lisp. */
4ed46869
KH
56Lisp_Object Vcharset_list;
57
3263d5a2
KH
58/* Hash table that contains attributes of each charset. Keys are
59 charset symbols, and values are vectors of charset attributes. */
60Lisp_Object Vcharset_hash_table;
61
62/* Table of struct charset. */
63struct charset *charset_table;
64
65static int charset_table_size;
66int charset_table_used;
67
68Lisp_Object Qcharsetp;
69
70/* Special charset symbols. */
71Lisp_Object Qascii;
72Lisp_Object Qeight_bit_control;
73Lisp_Object Qeight_bit_graphic;
74Lisp_Object Qiso_8859_1;
75Lisp_Object Qunicode;
b0e3cf2b 76
3263d5a2
KH
77/* The corresponding charsets. */
78int charset_ascii;
79int charset_8_bit_control;
80int charset_8_bit_graphic;
81int charset_iso_8859_1;
82int charset_unicode;
c1a08b4c 83
7c7dceee
KH
84/* The other special charsets. */
85int charset_jisx0201_roman;
86int charset_jisx0208_1978;
87int charset_jisx0208;
88
3263d5a2
KH
89/* Value of charset attribute `charset-iso-plane'. */
90Lisp_Object Qgl, Qgr;
c1a08b4c 91
3263d5a2
KH
92/* The primary charset. It is a charset of unibyte characters. */
93int charset_primary;
94
95/* List of charsets ordered by the priority. */
96Lisp_Object Vcharset_ordered_list;
97
dbbb237d 98/* Incremented everytime we change Vcharset_ordered_list. This is
64165ae2 99 unsigned short so that it fits in Lisp_Int and never matches
dbbb237d
KH
100 -1. */
101unsigned short charset_ordered_list_tick;
102
3263d5a2
KH
103/* List of iso-2022 charsets. */
104Lisp_Object Viso_2022_charset_list;
105
106/* List of emacs-mule charsets. */
107Lisp_Object Vemacs_mule_charset_list;
108
109struct charset *emacs_mule_charset[256];
4ed46869
KH
110
111/* Mapping table from ISO2022's charset (specified by DIMENSION,
112 CHARS, and FINAL-CHAR) to Emacs' charset. */
3263d5a2
KH
113int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
114
115Lisp_Object Vcharset_map_directory;
116
117Lisp_Object Vchar_unified_charset_table;
118
64165ae2
DL
119/* Defined in chartab.c */
120extern void
121map_char_table_for_charset P_ ((void (*c_function) (Lisp_Object, Lisp_Object),
122 Lisp_Object function, Lisp_Object table,
123 Lisp_Object arg, struct charset *charset,
124 unsigned from, unsigned to));
125
69f8de5b
KH
126#define CODE_POINT_TO_INDEX(charset, code) \
127 ((charset)->code_linear_p \
128 ? (code) - (charset)->min_code \
129 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
130 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
131 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
132 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
133 ? (((((code) >> 24) - (charset)->code_space[12]) \
134 * (charset)->code_space[11]) \
135 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
136 * (charset)->code_space[7]) \
137 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
138 * (charset)->code_space[3]) \
820ee249
KH
139 + (((code) & 0xFF) - (charset)->code_space[0]) \
140 - ((charset)->char_index_offset)) \
3263d5a2
KH
141 : -1)
142
143
144/* Convert the character index IDX to code-point CODE for CHARSET.
145 It is assumed that IDX is in a valid range. */
146
820ee249
KH
147#define INDEX_TO_CODE_POINT(charset, idx) \
148 ((charset)->code_linear_p \
149 ? (idx) + (charset)->min_code \
150 : (idx += (charset)->char_index_offset, \
151 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
152 | (((charset)->code_space[4] \
153 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
154 << 8) \
155 | (((charset)->code_space[8] \
156 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
157 << 16) \
158 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
159 << 24))))
160
4ed46869 161
3263d5a2 162\f
4ed46869 163
e9ce014c
KH
164/* Set to 1 to warn that a charset map is loaded and thus a buffer
165 text and a string data may be relocated. */
3263d5a2 166int charset_map_loaded;
35e623fb 167
e9ce014c
KH
168struct charset_map_entries
169{
170 struct {
171 unsigned from, to;
172 int c;
173 } entry[0x10000];
174 struct charset_map_entries *next;
175};
176
177/* Load the mapping information for CHARSET from ENTRIES.
4cf9710d 178
3263d5a2 179 If CONTROL_FLAG is 0, setup CHARSET->min_char and CHARSET->max_char.
8a73a704 180
3263d5a2
KH
181 If CONTROL_FLAG is 1, setup CHARSET->min_char, CHARSET->max_char,
182 CHARSET->decoder, and CHARSET->encoder.
93bcb785 183
3263d5a2
KH
184 If CONTROL_FLAG is 2, setup CHARSET->deunifier and
185 Vchar_unify_table. If Vchar_unified_charset_table is non-nil,
186 setup it too. */
4ed46869 187
3263d5a2 188static void
e9ce014c 189load_charset_map (charset, entries, n_entries, control_flag)
3263d5a2 190 struct charset *charset;
e9ce014c
KH
191 struct charset_map_entries *entries;
192 int n_entries;
3263d5a2 193 int control_flag;
4ed46869 194{
3263d5a2 195 Lisp_Object vec, table;
3263d5a2
KH
196 unsigned max_code = CHARSET_MAX_CODE (charset);
197 int ascii_compatible_p = charset->ascii_compatible_p;
198 int min_char, max_char, nonascii_min_char;
3263d5a2 199 int i;
3263d5a2 200 unsigned char *fast_map = charset->fast_map;
99529c2c 201
e9ce014c
KH
202 if (n_entries <= 0)
203 return;
204
205 if (control_flag > 0)
8ac5a9cc 206 {
3263d5a2 207 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
6662e69b 208
374c5cfd 209 table = Fmake_char_table (Qnil, Qnil);
3263d5a2
KH
210 if (control_flag == 1)
211 vec = Fmake_vector (make_number (n), make_number (-1));
212 else if (! CHAR_TABLE_P (Vchar_unify_table))
374c5cfd 213 Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
6662e69b 214
3263d5a2 215 charset_map_loaded = 1;
2e344af3 216 }
3263d5a2 217
e9ce014c 218 min_char = max_char = entries->entry[0].c;
3263d5a2 219 nonascii_min_char = MAX_CHAR;
e9ce014c 220 for (i = 0; i < n_entries; i++)
2e344af3 221 {
e9ce014c 222 unsigned from, to;
3b4f4446
KH
223 int from_index, to_index;
224 int from_c, to_c;
e9ce014c 225 int idx = i % 0x10000;
3263d5a2 226
e9ce014c
KH
227 if (i > 0 && idx == 0)
228 entries = entries->next;
229 from = entries->entry[idx].from;
230 to = entries->entry[idx].to;
3b4f4446
KH
231 from_c = entries->entry[idx].c;
232 from_index = CODE_POINT_TO_INDEX (charset, from);
233 if (from == to)
234 {
235 to_index = from_index;
236 to_c = from_c;
237 }
238 else
239 {
240 to_index = CODE_POINT_TO_INDEX (charset, to);
241 to_c = from_c + (to_index - from_index);
242 }
243 if (from_index < 0 || to_index < 0)
244 continue;
3263d5a2 245
3263d5a2
KH
246 if (control_flag < 2)
247 {
3b4f4446
KH
248 int c;
249
250 if (to_c > max_char)
251 max_char = to_c;
252 else if (from_c < min_char)
253 min_char = from_c;
254 if (ascii_compatible_p)
255 {
256 if (! ASCII_BYTE_P (from_c))
257 {
258 if (from_c < nonascii_min_char)
259 nonascii_min_char = from_c;
260 }
261 else if (! ASCII_BYTE_P (to_c))
262 {
263 nonascii_min_char = 0x80;
264 }
265 }
266
267 for (c = from_c; c <= to_c; c++)
268 CHARSET_FAST_MAP_SET (c, fast_map);
269
e9ce014c 270 if (control_flag == 1)
3263d5a2 271 {
e9ce014c 272 unsigned code = from;
e9ce014c 273
e9ce014c
KH
274 if (CHARSET_COMPACT_CODES_P (charset))
275 while (1)
276 {
3b4f4446
KH
277 ASET (vec, from_index, make_number (from_c));
278 CHAR_TABLE_SET (table, from_c, make_number (code));
e9ce014c
KH
279 if (from_index == to_index)
280 break;
3b4f4446 281 from_index++, from_c++;
e9ce014c
KH
282 code = INDEX_TO_CODE_POINT (charset, from_index);
283 }
284 else
3b4f4446 285 for (; from_index <= to_index; from_index++, from_c++)
e9ce014c 286 {
3b4f4446
KH
287 ASET (vec, from_index, make_number (from_c));
288 CHAR_TABLE_SET (table, from_c, make_number (from_index));
e9ce014c 289 }
3263d5a2 290 }
3263d5a2 291 }
e9ce014c 292 else
2e344af3 293 {
69f8de5b 294 unsigned code = from;
3b4f4446 295
69f8de5b
KH
296 while (1)
297 {
298 int c1 = DECODE_CHAR (charset, code);
299
3263d5a2
KH
300 if (c1 >= 0)
301 {
3b4f4446 302 CHAR_TABLE_SET (table, from_c, make_number (c1));
16fed1fc 303 CHAR_TABLE_SET (Vchar_unify_table, c1, make_number (from_c));
3263d5a2
KH
304 if (CHAR_TABLE_P (Vchar_unified_charset_table))
305 CHAR_TABLE_SET (Vchar_unified_charset_table, c1,
306 CHARSET_NAME (charset));
307 }
69f8de5b
KH
308 if (from_index == to_index)
309 break;
3b4f4446 310 from_index++, from_c++;
69f8de5b 311 code = INDEX_TO_CODE_POINT (charset, from_index);
3263d5a2 312 }
2e344af3 313 }
8ac5a9cc 314 }
3263d5a2
KH
315
316 if (control_flag < 2)
4ed46869 317 {
3263d5a2
KH
318 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
319 ? nonascii_min_char : min_char);
320 CHARSET_MAX_CHAR (charset) = max_char;
e9ce014c 321 if (control_flag == 1)
4ed46869 322 {
3263d5a2
KH
323 CHARSET_DECODER (charset) = vec;
324 CHARSET_ENCODER (charset) = table;
4ed46869
KH
325 }
326 }
2e344af3 327 else
3263d5a2 328 CHARSET_DEUNIFIER (charset) = table;
4ed46869
KH
329}
330
12bcae05 331
3263d5a2
KH
332/* Read a hexadecimal number (preceded by "0x") from the file FP while
333 paying attention to comment charcter '#'. */
12bcae05 334
3263d5a2
KH
335static INLINE unsigned
336read_hex (fp, eof)
337 FILE *fp;
338 int *eof;
12bcae05 339{
3263d5a2
KH
340 int c;
341 unsigned n;
12bcae05 342
3263d5a2
KH
343 while ((c = getc (fp)) != EOF)
344 {
69f8de5b 345 if (c == '#')
3263d5a2
KH
346 {
347 while ((c = getc (fp)) != EOF && c != '\n');
348 }
349 else if (c == '0')
350 {
351 if ((c = getc (fp)) == EOF || c == 'x')
352 break;
353 }
354 }
355 if (c == EOF)
356 {
357 *eof = 1;
358 return 0;
359 }
360 *eof = 0;
361 n = 0;
362 if (c == 'x')
363 while ((c = getc (fp)) != EOF && isxdigit (c))
364 n = ((n << 4)
365 | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10));
366 else
367 while ((c = getc (fp)) != EOF && isdigit (c))
368 n = (n * 10) + c - '0';
e9ce014c
KH
369 if (c != EOF)
370 ungetc (c, fp);
3263d5a2
KH
371 return n;
372}
12bcae05 373
537efd8d 374
3263d5a2 375/* Return a mapping vector for CHARSET loaded from MAPFILE.
e9ce014c
KH
376 Each line of MAPFILE has this form
377 0xAAAA 0xCCCC
378 where 0xAAAA is a code-point and 0xCCCC is the corresponding
379 character code, or this form
380 0xAAAA-0xBBBB 0xCCCC
381 where 0xAAAA and 0xBBBB are code-points specifying a range, and
382 0xCCCC is the first character code of the range.
383
3263d5a2
KH
384 The returned vector has this form:
385 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
e9ce014c
KH
386 where CODE1 is a code-point or a cons of code-points specifying a
387 range. */
4ed46869 388
c449997d
KH
389extern void add_to_log P_ ((char *, Lisp_Object, Lisp_Object));
390
e9ce014c
KH
391static void
392load_charset_map_from_file (charset, mapfile, control_flag)
3263d5a2
KH
393 struct charset *charset;
394 Lisp_Object mapfile;
e9ce014c 395 int control_flag;
4ed46869 396{
e9ce014c
KH
397 unsigned min_code = CHARSET_MIN_CODE (charset);
398 unsigned max_code = CHARSET_MAX_CODE (charset);
3263d5a2
KH
399 int fd;
400 FILE *fp;
3263d5a2
KH
401 int eof;
402 Lisp_Object suffixes;
e9ce014c
KH
403 struct charset_map_entries *head, *entries;
404 int n_entries;
4ed46869 405
3263d5a2
KH
406 suffixes = Fcons (build_string (".map"),
407 Fcons (build_string (".TXT"), Qnil));
4ed46869 408
3263d5a2
KH
409 fd = openp (Fcons (Vcharset_map_directory, Qnil), mapfile, suffixes,
410 NULL, 0);
411 if (fd < 0
412 || ! (fp = fdopen (fd, "r")))
413 {
414 add_to_log ("Failure in loading charset map: %S", mapfile, Qnil);
e9ce014c 415 return;
3263d5a2 416 }
4ed46869 417
e9ce014c
KH
418 head = entries = ((struct charset_map_entries *)
419 alloca (sizeof (struct charset_map_entries)));
420 n_entries = 0;
3263d5a2
KH
421 eof = 0;
422 while (1)
423 {
e9ce014c
KH
424 unsigned from, to;
425 int c;
426 int idx;
4ed46869 427
e9ce014c 428 from = read_hex (fp, &eof);
3263d5a2
KH
429 if (eof)
430 break;
e9ce014c
KH
431 if (getc (fp) == '-')
432 to = read_hex (fp, &eof);
433 else
434 to = from;
435 c = (int) read_hex (fp, &eof);
436
437 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
438 continue;
439
440 if (n_entries > 0 && (n_entries % 0x10000) == 0)
3263d5a2 441 {
e9ce014c
KH
442 entries->next = ((struct charset_map_entries *)
443 alloca (sizeof (struct charset_map_entries)));
444 entries = entries->next;
3263d5a2 445 }
e9ce014c
KH
446 idx = n_entries % 0x10000;
447 entries->entry[idx].from = from;
448 entries->entry[idx].to = to;
449 entries->entry[idx].c = c;
450 n_entries++;
3263d5a2
KH
451 }
452 fclose (fp);
453 close (fd);
4ed46869 454
e9ce014c
KH
455 load_charset_map (charset, head, n_entries, control_flag);
456}
457
458static void
459load_charset_map_from_vector (charset, vec, control_flag)
460 struct charset *charset;
461 Lisp_Object vec;
462 int control_flag;
463{
464 unsigned min_code = CHARSET_MIN_CODE (charset);
465 unsigned max_code = CHARSET_MAX_CODE (charset);
466 struct charset_map_entries *head, *entries;
467 int n_entries;
468 int len = ASIZE (vec);
469 int i;
470
471 if (len % 2 == 1)
3263d5a2 472 {
e9ce014c
KH
473 add_to_log ("Failure in loading charset map: %V", vec, Qnil);
474 return;
3263d5a2 475 }
4ed46869 476
e9ce014c
KH
477 head = entries = ((struct charset_map_entries *)
478 alloca (sizeof (struct charset_map_entries)));
479 n_entries = 0;
480 for (i = 0; i < len; i += 2)
481 {
482 Lisp_Object val, val2;
483 unsigned from, to;
484 int c;
485 int idx;
3263d5a2 486
e9ce014c
KH
487 val = AREF (vec, i);
488 if (CONSP (val))
489 {
490 val2 = XCDR (val);
491 val = XCAR (val);
492 CHECK_NATNUM (val);
493 CHECK_NATNUM (val2);
494 from = XFASTINT (val);
495 to = XFASTINT (val2);
496 }
497 else
498 {
499 CHECK_NATNUM (val);
500 from = to = XFASTINT (val);
501 }
502 val = AREF (vec, i + 1);
503 CHECK_NATNUM (val);
504 c = XFASTINT (val);
505
506 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
507 continue;
508
509 if ((n_entries % 0x10000) == 0)
510 {
511 entries->next = ((struct charset_map_entries *)
512 alloca (sizeof (struct charset_map_entries)));
513 entries = entries->next;
514 }
515 idx = n_entries % 0x10000;
516 entries->entry[idx].from = from;
517 entries->entry[idx].to = to;
518 entries->entry[idx].c = c;
519 n_entries++;
520 }
521
522 load_charset_map (charset, head, n_entries, control_flag);
ac4137cc
KH
523}
524
3263d5a2
KH
525static void
526load_charset (charset)
527 struct charset *charset;
ac4137cc 528{
3263d5a2
KH
529 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP_DEFERRED)
530 {
531 Lisp_Object map;
ac4137cc 532
3263d5a2
KH
533 map = CHARSET_MAP (charset);
534 if (STRINGP (map))
e9ce014c
KH
535 load_charset_map_from_file (charset, map, 1);
536 else
537 load_charset_map_from_vector (charset, map, 1);
3263d5a2
KH
538 CHARSET_METHOD (charset) = CHARSET_METHOD_MAP;
539 }
4ed46869
KH
540}
541
3263d5a2
KH
542
543DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
544 doc: /* Return non-nil if and only if OBJECT is a charset.*/)
545 (object)
546 Lisp_Object object;
23d2a7f1 547{
3263d5a2 548 return (CHARSETP (object) ? Qt : Qnil);
23d2a7f1
KH
549}
550
35e623fb 551
3263d5a2 552void
374c5cfd
KH
553map_charset_chars (c_function, function, arg,
554 charset, from, to)
555 void (*c_function) P_ ((Lisp_Object, Lisp_Object));
556 Lisp_Object function, arg;
557 struct charset *charset;
558 unsigned from, to;
559
35e623fb 560{
3263d5a2 561 Lisp_Object range;
374c5cfd 562 int partial;
3263d5a2
KH
563
564 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP_DEFERRED)
565 load_charset (charset);
566
374c5cfd
KH
567 partial = (from > CHARSET_MIN_CODE (charset)
568 || to < CHARSET_MAX_CODE (charset));
569
570 if (CHARSET_UNIFIED_P (charset)
571 && CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
572 {
573 map_char_table_for_charset (c_function, function,
574 CHARSET_DEUNIFIER (charset), arg,
575 partial ? charset : NULL, from, to);
576 }
577
3263d5a2
KH
578 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
579 {
374c5cfd
KH
580 int from_idx = CODE_POINT_TO_INDEX (charset, from);
581 int to_idx = CODE_POINT_TO_INDEX (charset, to);
582 int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
583 int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
584
585 range = Fcons (make_number (from_c), make_number (to_c));
3263d5a2 586 if (NILP (function))
374c5cfd 587 (*c_function) (range, arg);
3263d5a2
KH
588 else
589 call2 (function, range, arg);
590 }
591 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
592 {
593 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
594 return;
374c5cfd 595 if (CHARSET_ASCII_COMPATIBLE_P (charset) && from <= 127)
bbf12bb3 596 {
374c5cfd
KH
597 range = Fcons (make_number (from), make_number (to));
598 if (to >= 128)
599 XSETCAR (range, make_number (127));
600
3263d5a2 601 if (NILP (function))
374c5cfd 602 (*c_function) (range, arg);
3263d5a2
KH
603 else
604 call2 (function, range, arg);
bbf12bb3 605 }
374c5cfd
KH
606 map_char_table_for_charset (c_function, function,
607 CHARSET_ENCODER (charset), arg,
608 partial ? charset : NULL, from, to);
3263d5a2 609 }
374c5cfd 610 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
3263d5a2 611 {
374c5cfd
KH
612 Lisp_Object subset_info;
613 int offset;
614
615 subset_info = CHARSET_SUBSET (charset);
616 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
617 offset = XINT (AREF (subset_info, 3));
618 from -= offset;
619 if (from < XFASTINT (AREF (subset_info, 1)))
620 from = XFASTINT (AREF (subset_info, 1));
621 to -= offset;
622 if (to > XFASTINT (AREF (subset_info, 2)))
623 to = XFASTINT (AREF (subset_info, 2));
624 map_charset_chars (c_function, function, arg, charset, from, to);
625 }
626 else /* i.e. CHARSET_METHOD_SUPERSET */
627 {
628 Lisp_Object parents;
3263d5a2 629
374c5cfd
KH
630 for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
631 parents = XCDR (parents))
bbf12bb3 632 {
374c5cfd
KH
633 int offset;
634 unsigned this_from, this_to;
635
636 charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents))));
637 offset = XINT (XCDR (XCAR (parents)));
638 this_from = from - offset;
639 this_to = to - offset;
640 if (this_from < CHARSET_MIN_CODE (charset))
641 this_from = CHARSET_MIN_CODE (charset);
642 if (this_to > CHARSET_MAX_CODE (charset))
643 this_to = CHARSET_MAX_CODE (charset);
644 map_charset_chars (c_function, function, arg, charset, from, to);
bbf12bb3 645 }
35e623fb 646 }
3263d5a2
KH
647}
648
374c5cfd
KH
649
650DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
04c2f2c5 651 doc: /* Call FUNCTION for all characters in CHARSET.
374c5cfd 652FUNCTION is called with an argument RANGE and the optional 3rd
3263d5a2 653argument ARG.
6abd9323 654
374c5cfd
KH
655RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
656characters contained in CHARSET.
657
658The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
659range of code points of targer characters. */)
660 (function, charset, arg, from_code, to_code)
661 Lisp_Object function, charset, arg, from_code, to_code;
3263d5a2 662{
374c5cfd 663 struct charset *cs;
16fed1fc 664 unsigned from, to;
374c5cfd
KH
665
666 CHECK_CHARSET_GET_CHARSET (charset, cs);
667 if (NILP (from_code))
16fed1fc 668 from = CHARSET_MIN_CODE (cs);
970b7474
KH
669 else
670 {
671 CHECK_NATNUM (from_code);
672 from = XINT (from_code);
673 if (from < CHARSET_MIN_CODE (cs))
674 from = CHARSET_MIN_CODE (cs);
675 }
374c5cfd 676 if (NILP (to_code))
970b7474
KH
677 to = CHARSET_MAX_CODE (cs);
678 else
679 {
680 CHECK_NATNUM (to_code);
681 to = XINT (to_code);
682 if (to > CHARSET_MAX_CODE (cs))
683 to = CHARSET_MAX_CODE (cs);
684 }
16fed1fc 685 map_charset_chars (NULL, function, arg, cs, from, to);
3263d5a2 686 return Qnil;
35e623fb 687}
76d7b829
KH
688
689
3263d5a2
KH
690/* Define a charset according to the arguments. The Nth argument is
691 the Nth attribute of the charset (the last attribute `charset-id'
692 is not included). See the docstring of `define-charset' for the
693 detail. */
76d7b829 694
3263d5a2
KH
695DEFUN ("define-charset-internal", Fdefine_charset_internal,
696 Sdefine_charset_internal, charset_arg_max, MANY, 0,
04c2f2c5
DL
697 doc: /* For internal use only.
698usage: (define-charset-internal ...) */)
3263d5a2
KH
699 (nargs, args)
700 int nargs;
701 Lisp_Object *args;
76d7b829 702{
3263d5a2
KH
703 /* Charset attr vector. */
704 Lisp_Object attrs;
705 Lisp_Object val;
706 unsigned hash_code;
707 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
69f8de5b 708 int i, j;
3263d5a2
KH
709 struct charset charset;
710 int id;
711 int dimension;
712 int new_definition_p;
713 int nchars;
714
715 if (nargs != charset_arg_max)
716 return Fsignal (Qwrong_number_of_arguments,
717 Fcons (intern ("define-charset-internal"),
718 make_number (nargs)));
719
720 attrs = Fmake_vector (make_number (charset_attr_max), Qnil);
721
722 CHECK_SYMBOL (args[charset_arg_name]);
723 ASET (attrs, charset_name, args[charset_arg_name]);
724
725 val = args[charset_arg_code_space];
726 for (i = 0, dimension = 0, nchars = 1; i < 4; i++)
76d7b829 727 {
3263d5a2
KH
728 int min_byte, max_byte;
729
730 min_byte = XINT (Faref (val, make_number (i * 2)));
731 max_byte = XINT (Faref (val, make_number (i * 2 + 1)));
732 if (min_byte < 0 || min_byte > max_byte || max_byte >= 256)
733 error ("Invalid :code-space value");
734 charset.code_space[i * 4] = min_byte;
735 charset.code_space[i * 4 + 1] = max_byte;
736 charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
737 nchars *= charset.code_space[i * 4 + 2];
738 charset.code_space[i * 4 + 3] = nchars;
739 if (max_byte > 0)
740 dimension = i + 1;
741 }
76d7b829 742
3263d5a2
KH
743 val = args[charset_arg_dimension];
744 if (NILP (val))
745 charset.dimension = dimension;
746 else
747 {
748 CHECK_NATNUM (val);
749 charset.dimension = XINT (val);
750 if (charset.dimension < 1 || charset.dimension > 4)
751 args_out_of_range_3 (val, make_number (1), make_number (4));
752 }
753
754 charset.code_linear_p
755 = (charset.dimension == 1
756 || (charset.code_space[2] == 256
757 && (charset.dimension == 2
758 || (charset.code_space[6] == 256
759 && (charset.dimension == 3
760 || charset.code_space[10] == 256)))));
761
69f8de5b
KH
762 if (! charset.code_linear_p)
763 {
764 charset.code_space_mask = (unsigned char *) xmalloc (256);
33df3183 765 bzero (charset.code_space_mask, 256);
69f8de5b
KH
766 for (i = 0; i < 4; i++)
767 for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
768 j++)
769 charset.code_space_mask[j] |= (1 << i);
770 }
771
3263d5a2
KH
772 charset.iso_chars_96 = charset.code_space[2] == 96;
773
774 charset.min_code = (charset.code_space[0]
775 | (charset.code_space[4] << 8)
776 | (charset.code_space[8] << 16)
777 | (charset.code_space[12] << 24));
778 charset.max_code = (charset.code_space[1]
779 | (charset.code_space[5] << 8)
780 | (charset.code_space[9] << 16)
781 | (charset.code_space[13] << 24));
820ee249
KH
782 charset.char_index_offset = 0;
783
784 val = args[charset_arg_min_code];
785 if (! NILP (val))
786 {
787 unsigned code;
788
789 if (INTEGERP (val))
790 code = XINT (val);
791 else
792 {
793 CHECK_CONS (val);
794 CHECK_NUMBER (XCAR (val));
795 CHECK_NUMBER (XCDR (val));
796 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
797 }
798 if (code < charset.min_code
799 || code > charset.max_code)
800 args_out_of_range_3 (make_number (charset.min_code),
801 make_number (charset.max_code), val);
802 charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
803 charset.min_code = code;
804 }
805
806 val = args[charset_arg_max_code];
807 if (! NILP (val))
808 {
809 unsigned code;
810
811 if (INTEGERP (val))
812 code = XINT (val);
813 else
814 {
815 CHECK_CONS (val);
816 CHECK_NUMBER (XCAR (val));
817 CHECK_NUMBER (XCDR (val));
818 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
819 }
820 if (code < charset.min_code
821 || code > charset.max_code)
822 args_out_of_range_3 (make_number (charset.min_code),
823 make_number (charset.max_code), val);
824 charset.max_code = code;
825 }
3263d5a2 826
e9ce014c
KH
827 charset.compact_codes_p = charset.max_code < 0x1000000;
828
3263d5a2
KH
829 val = args[charset_arg_invalid_code];
830 if (NILP (val))
831 {
832 if (charset.min_code > 0)
833 charset.invalid_code = 0;
bbf12bb3
KH
834 else
835 {
3263d5a2
KH
836 XSETINT (val, charset.max_code + 1);
837 if (XINT (val) == charset.max_code + 1)
838 charset.invalid_code = charset.max_code + 1;
839 else
840 error ("Attribute :invalid-code must be specified");
76d7b829 841 }
76d7b829 842 }
3263d5a2
KH
843 else
844 {
845 CHECK_NATNUM (val);
846 charset.invalid_code = XFASTINT (val);
847 }
76d7b829 848
3263d5a2
KH
849 val = args[charset_arg_iso_final];
850 if (NILP (val))
851 charset.iso_final = -1;
852 else
853 {
854 CHECK_NUMBER (val);
855 if (XINT (val) < '0' || XINT (val) > 127)
856 error ("Invalid iso-final-char: %d", XINT (val));
857 charset.iso_final = XINT (val);
858 }
859
860 val = args[charset_arg_iso_revision];
861 if (NILP (val))
862 charset.iso_revision = -1;
863 else
4ed46869 864 {
3263d5a2
KH
865 CHECK_NUMBER (val);
866 if (XINT (val) > 63)
867 args_out_of_range (make_number (63), val);
868 charset.iso_revision = XINT (val);
4ed46869 869 }
3263d5a2
KH
870
871 val = args[charset_arg_emacs_mule_id];
872 if (NILP (val))
873 charset.emacs_mule_id = -1;
4ed46869
KH
874 else
875 {
3263d5a2
KH
876 CHECK_NATNUM (val);
877 if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
878 error ("Invalid emacs-mule-id: %d", XINT (val));
879 charset.emacs_mule_id = XINT (val);
c83ef371 880 }
6ef23ebb 881
3263d5a2 882 charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
4ed46869 883
3263d5a2
KH
884 charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
885
886 charset.unified_p = 0;
887
888 bzero (charset.fast_map, sizeof (charset.fast_map));
889
890 if (! NILP (args[charset_arg_code_offset]))
891 {
892 val = args[charset_arg_code_offset];
893 CHECK_NUMBER (val);
894
895 charset.method = CHARSET_METHOD_OFFSET;
896 charset.code_offset = XINT (val);
897
898 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
899 charset.min_char = i + charset.code_offset;
900 i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
901 charset.max_char = i + charset.code_offset;
902 if (charset.max_char > MAX_CHAR)
903 error ("Unsupported max char: %d", charset.max_char);
904
905 for (i = charset.min_char; i < 0x10000 && i <= charset.max_char;
906 i += 128)
907 CHARSET_FAST_MAP_SET (i, charset.fast_map);
908 for (; i <= charset.max_char; i += 0x1000)
909 CHARSET_FAST_MAP_SET (i, charset.fast_map);
910 }
911 else if (! NILP (args[charset_arg_map]))
912 {
913 val = args[charset_arg_map];
914 ASET (attrs, charset_map, val);
915 if (STRINGP (val))
e9ce014c
KH
916 load_charset_map_from_file (&charset, val, 0);
917 else
918 load_charset_map_from_vector (&charset, val, 0);
3263d5a2
KH
919 charset.method = CHARSET_METHOD_MAP_DEFERRED;
920 }
374c5cfd 921 else if (! NILP (args[charset_arg_subset]))
3263d5a2 922 {
374c5cfd
KH
923 Lisp_Object parent;
924 Lisp_Object parent_min_code, parent_max_code, parent_code_offset;
925 struct charset *parent_charset;
926
927 val = args[charset_arg_subset];
928 parent = Fcar (val);
929 CHECK_CHARSET_GET_CHARSET (parent, parent_charset);
930 parent_min_code = Fnth (make_number (1), val);
931 CHECK_NATNUM (parent_min_code);
932 parent_max_code = Fnth (make_number (2), val);
933 CHECK_NATNUM (parent_max_code);
934 parent_code_offset = Fnth (make_number (3), val);
935 CHECK_NUMBER (parent_code_offset);
936 val = Fmake_vector (make_number (4), Qnil);
937 ASET (val, 0, make_number (parent_charset->id));
938 ASET (val, 1, parent_min_code);
939 ASET (val, 2, parent_max_code);
940 ASET (val, 3, parent_code_offset);
941 ASET (attrs, charset_subset, val);
942
943 charset.method = CHARSET_METHOD_SUBSET;
944 /* Here, we just copy the parent's fast_map. It's not accurate,
945 but at least it works for quickly detecting which character
946 DOESN'T belong to this charset. */
947 for (i = 0; i < 190; i++)
948 charset.fast_map[i] = parent_charset->fast_map[i];
949
950 /* We also copy these for parents. */
951 charset.min_char = parent_charset->min_char;
952 charset.max_char = parent_charset->max_char;
953 }
954 else if (! NILP (args[charset_arg_superset]))
955 {
956 val = args[charset_arg_superset];
957 charset.method = CHARSET_METHOD_SUPERSET;
3263d5a2 958 val = Fcopy_sequence (val);
374c5cfd 959 ASET (attrs, charset_superset, val);
3263d5a2
KH
960
961 charset.min_char = MAX_CHAR;
962 charset.max_char = 0;
963 for (; ! NILP (val); val = Fcdr (val))
4ed46869 964 {
3263d5a2
KH
965 Lisp_Object elt, car_part, cdr_part;
966 int this_id, offset;
967 struct charset *this_charset;
968
969 elt = Fcar (val);
970 if (CONSP (elt))
971 {
972 car_part = XCAR (elt);
973 cdr_part = XCDR (elt);
974 CHECK_CHARSET_GET_ID (car_part, this_id);
975 CHECK_NUMBER (cdr_part);
976 offset = XINT (cdr_part);
977 }
978 else
4ed46869 979 {
3263d5a2
KH
980 CHECK_CHARSET_GET_ID (elt, this_id);
981 offset = 0;
4ed46869 982 }
3263d5a2
KH
983 XSETCAR (val, Fcons (make_number (this_id), make_number (offset)));
984
985 this_charset = CHARSET_FROM_ID (this_id);
986 if (charset.min_char > this_charset->min_char)
987 charset.min_char = this_charset->min_char;
988 if (charset.max_char < this_charset->max_char)
989 charset.max_char = this_charset->max_char;
990 for (i = 0; i < 190; i++)
991 charset.fast_map[i] |= this_charset->fast_map[i];
4ed46869 992 }
4ed46869 993 }
3263d5a2
KH
994 else
995 error ("None of :code-offset, :map, :parents are specified");
4ed46869 996
3263d5a2
KH
997 val = args[charset_arg_unify_map];
998 if (! NILP (val) && !STRINGP (val))
999 CHECK_VECTOR (val);
1000 ASET (attrs, charset_unify_map, val);
4ed46869 1001
3263d5a2
KH
1002 CHECK_LIST (args[charset_arg_plist]);
1003 ASET (attrs, charset_plist, args[charset_arg_plist]);
4ed46869 1004
3263d5a2
KH
1005 charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
1006 &hash_code);
1007 if (charset.hash_index >= 0)
1008 {
1009 new_definition_p = 0;
4f65af01 1010 id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
3263d5a2
KH
1011 HASH_VALUE (hash_table, charset.hash_index) = attrs;
1012 }
1a45ff10 1013 else
3263d5a2
KH
1014 {
1015 charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
1016 hash_code);
1017 if (charset_table_used == charset_table_size)
1018 {
1019 charset_table_size += 256;
1020 charset_table
1021 = ((struct charset *)
1022 xrealloc (charset_table,
1023 sizeof (struct charset) * charset_table_size));
1024 }
1025 id = charset_table_used++;
3263d5a2
KH
1026 new_definition_p = 1;
1027 }
4ed46869 1028
4f65af01 1029 ASET (attrs, charset_id, make_number (id));
3263d5a2
KH
1030 charset.id = id;
1031 charset_table[id] = charset;
1032
1033 if (charset.iso_final >= 0)
4ed46869 1034 {
3263d5a2
KH
1035 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
1036 charset.iso_final) = id;
1037 if (new_definition_p)
1038 Viso_2022_charset_list = nconc2 (Viso_2022_charset_list,
1039 Fcons (make_number (id), Qnil));
7c7dceee
KH
1040 if (ISO_CHARSET_TABLE (1, 0, 'J') == id)
1041 charset_jisx0201_roman = id;
1042 else if (ISO_CHARSET_TABLE (2, 0, '@') == id)
1043 charset_jisx0208_1978 = id;
1044 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
1045 charset_jisx0208 = id;
4ed46869 1046 }
3263d5a2
KH
1047
1048 if (charset.emacs_mule_id >= 0)
4ed46869 1049 {
3263d5a2 1050 emacs_mule_charset[charset.emacs_mule_id] = CHARSET_FROM_ID (id);
4f65af01
KH
1051 if (charset.emacs_mule_id < 0xA0)
1052 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
3263d5a2
KH
1053 if (new_definition_p)
1054 Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
1055 Fcons (make_number (id), Qnil));
4ed46869
KH
1056 }
1057
3263d5a2
KH
1058 if (new_definition_p)
1059 {
1060 Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
1061 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1062 Fcons (make_number (id), Qnil));
dbbb237d 1063 charset_ordered_list_tick++;
3263d5a2 1064 }
4ed46869 1065
3263d5a2 1066 return Qnil;
4ed46869
KH
1067}
1068
3263d5a2
KH
1069DEFUN ("define-charset-alias", Fdefine_charset_alias,
1070 Sdefine_charset_alias, 2, 2, 0,
1071 doc: /* Define ALIAS as an alias for charset CHARSET. */)
1072 (alias, charset)
1073 Lisp_Object alias, charset;
4ed46869 1074{
3263d5a2
KH
1075 Lisp_Object attr;
1076
1077 CHECK_CHARSET_GET_ATTR (charset, attr);
1078 Fputhash (alias, attr, Vcharset_hash_table);
528623a0 1079 Vcharset_list = Fcons (alias, Vcharset_list);
3263d5a2
KH
1080 return Qnil;
1081}
4ed46869 1082
4ed46869 1083
3263d5a2 1084DEFUN ("primary-charset", Fprimary_charset, Sprimary_charset, 0, 0, 0,
56a46d1d 1085 doc: /* Return the primary charset (set by `set-primary-charset'). */)
3263d5a2
KH
1086 ()
1087{
1088 return CHARSET_NAME (CHARSET_FROM_ID (charset_primary));
1089}
4ed46869 1090
4ed46869 1091
3263d5a2
KH
1092DEFUN ("set-primary-charset", Fset_primary_charset, Sset_primary_charset,
1093 1, 1, 0,
56a46d1d
DL
1094 doc: /* Set the primary charset to CHARSET.
1095This determines how unibyte/multibyte conversion is done. See also
1096function `primary-charset'. */)
3263d5a2
KH
1097 (charset)
1098 Lisp_Object charset;
1099{
1100 int id;
1101
1102 CHECK_CHARSET_GET_ID (charset, id);
1103 charset_primary = id;
4ed46869
KH
1104 return Qnil;
1105}
1106
3263d5a2
KH
1107
1108DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
56a46d1d 1109 doc: /* Return the property list of CHARSET. */)
3263d5a2
KH
1110 (charset)
1111 Lisp_Object charset;
1112{
1113 Lisp_Object attrs;
1114
1115 CHECK_CHARSET_GET_ATTR (charset, attrs);
1116 return CHARSET_ATTR_PLIST (attrs);
1117}
1118
1119
1120DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
1121 doc: /* Set CHARSET's property list to PLIST. */)
1122 (charset, plist)
1123 Lisp_Object charset, plist;
1124{
1125 Lisp_Object attrs;
1126
1127 CHECK_CHARSET_GET_ATTR (charset, attrs);
1128 CHARSET_ATTR_PLIST (attrs) = plist;
1129 return plist;
1130}
1131
1132
dbbb237d 1133DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 3, 0,
56a46d1d
DL
1134 doc: /* Unify characters of CHARSET with Unicode.
1135This means reading the relevant file and installing the table defined
dbbb237d
KH
1136by CHARSET's `:unify-map' property.
1137
64165ae2
DL
1138Optional second arg UNIFY-MAP is a file name string or a vector. It has
1139the same meaning as the `:unify-map' attribute in the function
dbbb237d
KH
1140`define-charset' (which see).
1141
1142Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1143 (charset, unify_map, deunify)
1144 Lisp_Object charset, unify_map, deunify;
8a73a704 1145{
3263d5a2
KH
1146 int id;
1147 struct charset *cs;
1148
1149 CHECK_CHARSET_GET_ID (charset, id);
1150 cs = CHARSET_FROM_ID (id);
1151 if (CHARSET_METHOD (cs) == CHARSET_METHOD_MAP_DEFERRED)
1152 load_charset (cs);
dbbb237d
KH
1153 if (NILP (deunify)
1154 ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
1155 : ! CHARSET_UNIFIED_P (cs))
3263d5a2 1156 return Qnil;
dbbb237d 1157
3263d5a2 1158 CHARSET_UNIFIED_P (cs) = 0;
dbbb237d
KH
1159 if (NILP (deunify))
1160 {
1161 if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET)
1162 error ("Can't unify charset: %s", XSYMBOL (charset)->name->data);
1163 if (NILP (unify_map))
1164 unify_map = CHARSET_UNIFY_MAP (cs);
1165 if (STRINGP (unify_map))
1166 load_charset_map_from_file (cs, unify_map, 2);
1167 else if (VECTORP (unify_map))
1168 load_charset_map_from_vector (cs, unify_map, 2);
1169 else if (NILP (unify_map))
1170 error ("No unify-map for charset");
1171 else
1172 error ("Bad unify-map arg");
1173 CHARSET_UNIFIED_P (cs) = 1;
1174 }
1175 else if (CHAR_TABLE_P (Vchar_unify_table))
1176 {
1177 int min_code = CHARSET_MIN_CODE (cs);
1178 int max_code = CHARSET_MAX_CODE (cs);
1179 int min_char = DECODE_CHAR (cs, min_code);
1180 int max_char = DECODE_CHAR (cs, max_code);
1181
1182 char_table_set_range (Vchar_unify_table, min_char, max_char, Qnil);
1183 }
1184
3263d5a2 1185 return Qnil;
8a73a704
KH
1186}
1187
3fac5a51
KH
1188DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
1189 Sget_unused_iso_final_char, 2, 2, 0,
3263d5a2 1190 doc: /*
1721b6af 1191Return an unsed ISO final char for a charset of DIMENISION and CHARS.
fdb82f93
PJ
1192DIMENSION is the number of bytes to represent a character: 1 or 2.
1193CHARS is the number of characters in a dimension: 94 or 96.
1194
1195This final char is for private use, thus the range is `0' (48) .. `?' (63).
1721b6af 1196If there's no unused final char for the specified kind of charset,
fdb82f93
PJ
1197return nil. */)
1198 (dimension, chars)
3fac5a51
KH
1199 Lisp_Object dimension, chars;
1200{
1201 int final_char;
1202
b7826503
PJ
1203 CHECK_NUMBER (dimension);
1204 CHECK_NUMBER (chars);
3263d5a2
KH
1205 if (XINT (dimension) != 1 && XINT (dimension) != 2 && XINT (dimension) != 3)
1206 args_out_of_range_3 (dimension, make_number (1), make_number (3));
3fac5a51 1207 if (XINT (chars) != 94 && XINT (chars) != 96)
3263d5a2 1208 args_out_of_range_3 (chars, make_number (94), make_number (96));
3fac5a51 1209 for (final_char = '0'; final_char <= '?'; final_char++)
3263d5a2
KH
1210 if (ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), final_char) < 0)
1211 break;
3fac5a51
KH
1212 return (final_char <= '?' ? make_number (final_char) : Qnil);
1213}
1214
3263d5a2
KH
1215static void
1216check_iso_charset_parameter (dimension, chars, final_char)
1217 Lisp_Object dimension, chars, final_char;
4ed46869 1218{
3263d5a2
KH
1219 CHECK_NATNUM (dimension);
1220 CHECK_NATNUM (chars);
1221 CHECK_NATNUM (final_char);
4ed46869 1222
3263d5a2
KH
1223 if (XINT (dimension) > 3)
1224 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension));
4ed46869
KH
1225 if (XINT (chars) != 94 && XINT (chars) != 96)
1226 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
3263d5a2 1227 if (XINT (final_char) < '0' || XINT (final_char) > '~')
4ed46869 1228 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
3263d5a2
KH
1229}
1230
1231
1232DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
1233 4, 4, 0,
1234 doc: /*
1235Declare a charset of DIMENSION, CHARS, FINAL-CHAR is the same as CHARSET.
56a46d1d 1236CHARSET should be defined by `define-charset' in advance. */)
3263d5a2
KH
1237 (dimension, chars, final_char, charset)
1238 Lisp_Object dimension, chars, final_char, charset;
1239{
1240 int id;
4ed46869 1241
3263d5a2
KH
1242 CHECK_CHARSET_GET_ID (charset, id);
1243 check_iso_charset_parameter (dimension, chars, final_char);
1244
16fed1fc 1245 ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), XINT (final_char)) = id;
4ed46869
KH
1246 return Qnil;
1247}
1248
3263d5a2 1249
2e344af3
KH
1250/* Return information about charsets in the text at PTR of NBYTES
1251 bytes, which are NCHARS characters. The value is:
f6302ac9 1252
cfe34140 1253 0: Each character is represented by one byte. This is always
3263d5a2
KH
1254 true for a unibyte string. For a multibyte string, true if
1255 it contains only ASCII characters.
1256
28c026cd
DL
1257 1: No charsets other than ascii, control-1, and latin-1 are
1258 found.
1d67c29b 1259
3263d5a2
KH
1260 2: Otherwise.
1261*/
4ed46869
KH
1262
1263int
3263d5a2
KH
1264string_xstring_p (string)
1265 Lisp_Object string;
4ed46869 1266{
dbbb237d
KH
1267 const unsigned char *p = XSTRING (string)->data;
1268 const unsigned char *endp = p + STRING_BYTES (XSTRING (string));
3263d5a2
KH
1269 struct charset *charset;
1270
1271 if (XSTRING (string)->size == STRING_BYTES (XSTRING (string)))
1272 return 0;
1273
1274 charset = CHARSET_FROM_ID (charset_iso_8859_1);
1275 while (p < endp)
0282eb69 1276 {
3263d5a2 1277 int c = STRING_CHAR_ADVANCE (p);
2e344af3 1278
64165ae2 1279 /* Fixme: comparison of unsigned expression < 0 is always false */
3263d5a2
KH
1280 if (ENCODE_CHAR (charset, c) < 0)
1281 return 2;
0282eb69 1282 }
3263d5a2
KH
1283 return 1;
1284}
05505664 1285
05505664 1286
3263d5a2 1287/* Find charsets in the string at PTR of NCHARS and NBYTES.
4ed46869 1288
3263d5a2
KH
1289 CHARSETS is a vector. Each element is a cons of CHARSET and
1290 FOUND-FLAG. CHARSET is a charset id, and FOUND-FLAG is nil or t.
1291 FOUND-FLAG t (or nil) means that the corresponding charset is
1292 already found (or not yet found).
2e344af3 1293
3263d5a2 1294 It may lookup a translation table TABLE if supplied. */
2e344af3 1295
3263d5a2
KH
1296static void
1297find_charsets_in_text (ptr, nchars, nbytes, charsets, table)
dbbb237d 1298 const unsigned char *ptr;
3263d5a2
KH
1299 int nchars, nbytes;
1300 Lisp_Object charsets, table;
1301{
dbbb237d 1302 const unsigned char *pend = ptr + nbytes;
3263d5a2
KH
1303 int ncharsets = ASIZE (charsets);
1304
1305 if (nchars == nbytes)
1306 return;
1307
1308 while (ptr < pend)
1309 {
1310 int c = STRING_CHAR_ADVANCE (ptr);
1311 int i;
1312 int all_found = 1;
1313 Lisp_Object elt;
1314
1315 if (!NILP (table))
1316 c = translate_char (table, c);
1317 for (i = 0; i < ncharsets; i++)
1318 {
1319 elt = AREF (charsets, i);
1320 if (NILP (XCDR (elt)))
1321 {
1322 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (elt)));
1323
1324 if (ENCODE_CHAR (charset, c) != CHARSET_INVALID_CODE (charset))
1325 XCDR (elt) = Qt;
1326 else
1327 all_found = 0;
1328 }
4ed46869 1329 }
3263d5a2
KH
1330 if (all_found)
1331 break;
4ed46869 1332 }
4ed46869
KH
1333}
1334
28c026cd 1335/* Fixme: returns nil for unibyte. */
4ed46869 1336DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
23d2a7f1 1337 2, 3, 0,
fdb82f93
PJ
1338 doc: /* Return a list of charsets in the region between BEG and END.
1339BEG and END are buffer positions.
1340Optional arg TABLE if non-nil is a translation table to look up.
1341
fdb82f93
PJ
1342If the current buffer is unibyte, the returned list may contain
1343only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1344 (beg, end, table)
23d2a7f1 1345 Lisp_Object beg, end, table;
4ed46869 1346{
3263d5a2 1347 Lisp_Object charsets;
6ae1f27e 1348 int from, from_byte, to, stop, stop_byte, i;
4ed46869
KH
1349 Lisp_Object val;
1350
1351 validate_region (&beg, &end);
1352 from = XFASTINT (beg);
1353 stop = to = XFASTINT (end);
6ae1f27e 1354
4ed46869 1355 if (from < GPT && GPT < to)
6ae1f27e
RS
1356 {
1357 stop = GPT;
1358 stop_byte = GPT_BYTE;
1359 }
1360 else
1361 stop_byte = CHAR_TO_BYTE (stop);
1362
1363 from_byte = CHAR_TO_BYTE (from);
1364
3263d5a2
KH
1365 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1366 for (i = 0; i < charset_table_used; i++)
1367 ASET (charsets, i, Fcons (make_number (i), Qnil));
1368
4ed46869
KH
1369 while (1)
1370 {
3263d5a2
KH
1371 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
1372 stop_byte - from_byte, charsets, table);
4ed46869 1373 if (stop < to)
6ae1f27e
RS
1374 {
1375 from = stop, from_byte = stop_byte;
1376 stop = to, stop_byte = CHAR_TO_BYTE (stop);
1377 }
4ed46869
KH
1378 else
1379 break;
1380 }
6ae1f27e 1381
4ed46869 1382 val = Qnil;
3263d5a2
KH
1383 for (i = charset_table_used - 1; i >= 0; i--)
1384 if (!NILP (XCDR (AREF (charsets, i))))
1385 val = Fcons (CHARSET_NAME (charset_table + i), val);
4ed46869
KH
1386 return val;
1387}
1388
28c026cd 1389/* Fixme: returns nil for unibyte. */
4ed46869 1390DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
23d2a7f1 1391 1, 2, 0,
fdb82f93
PJ
1392 doc: /* Return a list of charsets in STR.
1393Optional arg TABLE if non-nil is a translation table to look up.
1394
fdb82f93 1395If STR is unibyte, the returned list may contain
3263d5a2 1396only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
fdb82f93 1397 (str, table)
23d2a7f1 1398 Lisp_Object str, table;
4ed46869 1399{
3263d5a2 1400 Lisp_Object charsets;
4ed46869
KH
1401 int i;
1402 Lisp_Object val;
1403
b7826503 1404 CHECK_STRING (str);
87b089ad 1405
3263d5a2 1406 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
dcd50550
KH
1407 for (i = 0; i < charset_table_used; i++)
1408 ASET (charsets, i, Fcons (make_number (i), Qnil));
3263d5a2
KH
1409 find_charsets_in_text (XSTRING (str)->data, XSTRING (str)->size,
1410 STRING_BYTES (XSTRING (str)), charsets, table);
2e344af3 1411
4ed46869 1412 val = Qnil;
3263d5a2
KH
1413 for (i = charset_table_used - 1; i >= 0; i--)
1414 if (!NILP (XCDR (AREF (charsets, i))))
1415 val = Fcons (CHARSET_NAME (charset_table + i), val);
4ed46869
KH
1416 return val;
1417}
2e344af3 1418
4ed46869 1419\f
3263d5a2
KH
1420
1421/* Return a character correponding to the code-point CODE of
1422 CHARSET. */
1423
1424int
1425decode_char (charset, code)
1426 struct charset *charset;
1427 unsigned code;
4ed46869 1428{
3263d5a2
KH
1429 int c, char_index;
1430 enum charset_method method = CHARSET_METHOD (charset);
ac4137cc 1431
3263d5a2
KH
1432 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1433 return -1;
4ed46869 1434
3263d5a2 1435 if (method == CHARSET_METHOD_MAP_DEFERRED)
ac4137cc 1436 {
3263d5a2
KH
1437 load_charset (charset);
1438 method = CHARSET_METHOD (charset);
ac4137cc 1439 }
4ed46869 1440
374c5cfd
KH
1441 if (method == CHARSET_METHOD_SUBSET)
1442 {
1443 Lisp_Object subset_info;
1444
1445 subset_info = CHARSET_SUBSET (charset);
1446 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1447 code -= XINT (AREF (subset_info, 3));
1448 if (code < XFASTINT (AREF (subset_info, 1))
1449 || code > XFASTINT (AREF (subset_info, 2)))
1450 c = -1;
1451 else
1452 c = DECODE_CHAR (charset, code);
1453 }
1454 else if (method == CHARSET_METHOD_SUPERSET)
2e344af3 1455 {
3263d5a2 1456 Lisp_Object parents;
4ed46869 1457
374c5cfd 1458 parents = CHARSET_SUPERSET (charset);
3263d5a2
KH
1459 c = -1;
1460 for (; CONSP (parents); parents = XCDR (parents))
1461 {
1462 int id = XINT (XCAR (XCAR (parents)));
1463 int code_offset = XINT (XCDR (XCAR (parents)));
374c5cfd 1464 unsigned this_code = code - code_offset;
9d3d8cba 1465
3263d5a2
KH
1466 charset = CHARSET_FROM_ID (id);
1467 if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1468 break;
1469 }
1470 }
1471 else
ac4137cc 1472 {
3263d5a2 1473 char_index = CODE_POINT_TO_INDEX (charset, code);
69f8de5b
KH
1474 if (char_index < 0)
1475 return -1;
3263d5a2
KH
1476
1477 if (method == CHARSET_METHOD_MAP)
ac4137cc 1478 {
3263d5a2
KH
1479 Lisp_Object decoder;
1480
1481 decoder = CHARSET_DECODER (charset);
1482 if (! VECTORP (decoder))
1483 return -1;
1484 c = XINT (AREF (decoder, char_index));
ac4137cc
KH
1485 }
1486 else
1487 {
3263d5a2 1488 c = char_index + CHARSET_CODE_OFFSET (charset);
ac4137cc
KH
1489 }
1490 }
9d3d8cba 1491
3263d5a2
KH
1492 if (CHARSET_UNIFIED_P (charset)
1493 && c >= 0)
c449997d
KH
1494 {
1495 MAYBE_UNIFY_CHAR (c);
1496 }
d2665018 1497
3263d5a2 1498 return c;
d2665018
KH
1499}
1500
374c5cfd
KH
1501/* Variable used temporarily by the macro ENCODE_CHAR. */
1502Lisp_Object charset_work;
1bcc1567 1503
3263d5a2 1504/* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
28c026cd
DL
1505 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1506 use CHARSET's strict_max_char instead of max_char. */
1bcc1567 1507
3263d5a2
KH
1508unsigned
1509encode_char (charset, c)
1510 struct charset *charset;
9b6a601f
KH
1511 int c;
1512{
3263d5a2
KH
1513 unsigned code;
1514 enum charset_method method = CHARSET_METHOD (charset);
8ac5a9cc 1515
3263d5a2 1516 if (CHARSET_UNIFIED_P (charset))
4ed46869 1517 {
374c5cfd 1518 Lisp_Object deunifier, deunified;
4ed46869 1519
3263d5a2
KH
1520 deunifier = CHARSET_DEUNIFIER (charset);
1521 if (! CHAR_TABLE_P (deunifier))
1522 {
dbbb237d 1523 Funify_charset (CHARSET_NAME (charset), Qnil, Qnil);
3263d5a2
KH
1524 deunifier = CHARSET_DEUNIFIER (charset);
1525 }
374c5cfd
KH
1526 deunified = CHAR_TABLE_REF (deunifier, c);
1527 if (! NILP (deunified))
1528 c = XINT (deunified);
4ed46869 1529 }
beeedaad 1530
3263d5a2
KH
1531 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1532 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1533 return CHARSET_INVALID_CODE (charset);
beeedaad 1534
374c5cfd
KH
1535 if (method == CHARSET_METHOD_SUBSET)
1536 {
1537 Lisp_Object subset_info;
1538 struct charset *this_charset;
1539
1540 subset_info = CHARSET_SUBSET (charset);
1541 this_charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1542 code = ENCODE_CHAR (this_charset, c);
1543 if (code == CHARSET_INVALID_CODE (this_charset)
1544 || code < XFASTINT (AREF (subset_info, 1))
1545 || code > XFASTINT (AREF (subset_info, 2)))
1546 return CHARSET_INVALID_CODE (charset);
1547 code += XINT (AREF (subset_info, 3));
1548 return code;
1549 }
1550
1551 if (method == CHARSET_METHOD_SUPERSET)
859f2b3c 1552 {
3263d5a2 1553 Lisp_Object parents;
859f2b3c 1554
374c5cfd 1555 parents = CHARSET_SUPERSET (charset);
3263d5a2 1556 for (; CONSP (parents); parents = XCDR (parents))
beeedaad 1557 {
3263d5a2
KH
1558 int id = XINT (XCAR (XCAR (parents)));
1559 int code_offset = XINT (XCDR (XCAR (parents)));
1560 struct charset *this_charset = CHARSET_FROM_ID (id);
beeedaad 1561
3263d5a2 1562 code = ENCODE_CHAR (this_charset, c);
dbbb237d
KH
1563 if (code != CHARSET_INVALID_CODE (this_charset))
1564 return code + code_offset;
beeedaad 1565 }
3263d5a2
KH
1566 return CHARSET_INVALID_CODE (charset);
1567 }
99529c2c 1568
3263d5a2 1569 if (method == CHARSET_METHOD_MAP_DEFERRED)
beeedaad 1570 {
3263d5a2
KH
1571 load_charset (charset);
1572 method = CHARSET_METHOD (charset);
859f2b3c 1573 }
beeedaad 1574
3263d5a2 1575 if (method == CHARSET_METHOD_MAP)
3f62427c 1576 {
3263d5a2 1577 Lisp_Object encoder;
beeedaad 1578 Lisp_Object val;
beeedaad 1579
3263d5a2
KH
1580 encoder = CHARSET_ENCODER (charset);
1581 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
1582 return CHARSET_INVALID_CODE (charset);
1583 val = CHAR_TABLE_REF (encoder, c);
374c5cfd
KH
1584 if (NILP (val))
1585 return CHARSET_INVALID_CODE (charset);
e9ce014c
KH
1586 code = XINT (val);
1587 if (! CHARSET_COMPACT_CODES_P (charset))
1588 code = INDEX_TO_CODE_POINT (charset, code);
3263d5a2 1589 }
820ee249 1590 else /* method == CHARSET_METHOD_OFFSET */
beeedaad 1591 {
3263d5a2
KH
1592 code = c - CHARSET_CODE_OFFSET (charset);
1593 code = INDEX_TO_CODE_POINT (charset, code);
3f62427c 1594 }
beeedaad 1595
3263d5a2 1596 return code;
3f62427c
KH
1597}
1598
4ed46869 1599
3263d5a2
KH
1600DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 3, 0,
1601 doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
1602Return nil if CODE-POINT is not valid in CHARSET.
4ed46869 1603
3263d5a2
KH
1604CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
1605
1606Optional argument RESTRICTION specifies a way to map the pair of CCS
1607and CODE-POINT to a chracter. Currently not supported and just ignored. */)
1608 (charset, code_point, restriction)
1609 Lisp_Object charset, code_point, restriction;
4ed46869 1610{
3263d5a2
KH
1611 int c, id;
1612 unsigned code;
1613 struct charset *charsetp;
4ed46869 1614
3263d5a2
KH
1615 CHECK_CHARSET_GET_ID (charset, id);
1616 if (CONSP (code_point))
1617 {
1618 CHECK_NATNUM (XCAR (code_point));
1619 CHECK_NATNUM (XCDR (code_point));
69f8de5b 1620 code = (XINT (XCAR (code_point)) << 16) | (XINT (XCDR (code_point)));
3263d5a2
KH
1621 }
1622 else
1623 {
1624 CHECK_NATNUM (code_point);
1625 code = XINT (code_point);
1626 }
1627 charsetp = CHARSET_FROM_ID (id);
1628 c = DECODE_CHAR (charsetp, code);
1629 return (c >= 0 ? make_number (c) : Qnil);
4ed46869
KH
1630}
1631
046b1f03 1632
3263d5a2
KH
1633DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0,
1634 doc: /* Encode the character CH into a code-point of CHARSET.
1635Return nil if CHARSET doesn't include CH.
17e7ef1b 1636
3263d5a2
KH
1637Optional argument RESTRICTION specifies a way to map CHAR to a
1638code-point in CCS. Currently not supported and just ignored. */)
1639 (ch, charset, restriction)
1640 Lisp_Object ch, charset, restriction;
1641{
16fed1fc 1642 int id;
3263d5a2
KH
1643 unsigned code;
1644 struct charset *charsetp;
046b1f03 1645
3263d5a2
KH
1646 CHECK_CHARSET_GET_ID (charset, id);
1647 CHECK_NATNUM (ch);
3263d5a2 1648 charsetp = CHARSET_FROM_ID (id);
16fed1fc 1649 code = ENCODE_CHAR (charsetp, XINT (ch));
3263d5a2
KH
1650 if (code == CHARSET_INVALID_CODE (charsetp))
1651 return Qnil;
1652 if (code > 0x7FFFFFF)
1653 return Fcons (make_number (code >> 16), make_number (code & 0xFFFF));
1654 return make_number (code);
6ae1f27e 1655}
9036eb45 1656
87b089ad 1657
b121a744
KH
1658DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
1659 doc:
1660 /* Return a character of CHARSET whose position codes are CODEn.
1661
1662CODE1 through CODE4 are optional, but if you don't supply sufficient
1663position codes, it is assumed that the minimum code in each dimension
04c2f2c5 1664is specified. */)
b121a744
KH
1665 (charset, code1, code2, code3, code4)
1666 Lisp_Object charset, code1, code2, code3, code4;
87b089ad 1667{
3263d5a2
KH
1668 int id, dimension;
1669 struct charset *charsetp;
b121a744
KH
1670 unsigned code;
1671 int c;
87b089ad 1672
3263d5a2
KH
1673 CHECK_CHARSET_GET_ID (charset, id);
1674 charsetp = CHARSET_FROM_ID (id);
87b089ad 1675
b121a744
KH
1676 dimension = CHARSET_DIMENSION (charsetp);
1677 if (NILP (code1))
d47073ca
KH
1678 code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
1679 ? 0 : CHARSET_MIN_CODE (charsetp));
3263d5a2 1680 else
87b089ad 1681 {
b121a744
KH
1682 CHECK_NATNUM (code1);
1683 if (XFASTINT (code1) >= 0x100)
1684 args_out_of_range (make_number (0xFF), code1);
1685 code = XFASTINT (code1);
2e344af3 1686
b0a1e45e 1687 if (dimension > 1)
b121a744
KH
1688 {
1689 code <<= 8;
b0a1e45e
KH
1690 if (NILP (code2))
1691 code |= charsetp->code_space[(dimension - 2) * 4];
b121a744
KH
1692 else
1693 {
b0a1e45e
KH
1694 CHECK_NATNUM (code2);
1695 if (XFASTINT (code2) >= 0x100)
1696 args_out_of_range (make_number (0xFF), code2);
1697 code |= XFASTINT (code2);
b121a744
KH
1698 }
1699
b0a1e45e 1700 if (dimension > 2)
b121a744
KH
1701 {
1702 code <<= 8;
b0a1e45e
KH
1703 if (NILP (code3))
1704 code |= charsetp->code_space[(dimension - 3) * 4];
b121a744
KH
1705 else
1706 {
b0a1e45e
KH
1707 CHECK_NATNUM (code3);
1708 if (XFASTINT (code3) >= 0x100)
1709 args_out_of_range (make_number (0xFF), code3);
1710 code |= XFASTINT (code3);
1711 }
1712
1713 if (dimension > 3)
1714 {
1715 code <<= 8;
1716 if (NILP (code4))
1717 code |= charsetp->code_space[0];
1718 else
1719 {
1720 CHECK_NATNUM (code4);
1721 if (XFASTINT (code4) >= 0x100)
1722 args_out_of_range (make_number (0xFF), code4);
1723 code |= XFASTINT (code4);
1724 }
b121a744
KH
1725 }
1726 }
1727 }
1728 }
3263d5a2 1729
b121a744
KH
1730 if (CHARSET_ISO_FINAL (charsetp) >= 0)
1731 code &= 0x7F7F7F7F;
1732 c = DECODE_CHAR (charsetp, code);
1733 if (c < 0)
1734 error ("Invalid code(s)");
3263d5a2 1735 return make_number (c);
2e344af3
KH
1736}
1737
3263d5a2
KH
1738
1739/* Return the first charset in CHARSET_LIST that contains C.
1740 CHARSET_LIST is a list of charset IDs. If it is nil, use
1741 Vcharset_ordered_list. */
1742
1743struct charset *
1744char_charset (c, charset_list, code_return)
1745 int c;
1746 Lisp_Object charset_list;
1747 unsigned *code_return;
2e344af3 1748{
3263d5a2
KH
1749 if (NILP (charset_list))
1750 charset_list = Vcharset_ordered_list;
2e344af3 1751
3263d5a2 1752 while (CONSP (charset_list))
2e344af3 1753 {
3263d5a2
KH
1754 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
1755 unsigned code = ENCODE_CHAR (charset, c);
1756
1757 if (code != CHARSET_INVALID_CODE (charset))
1758 {
1759 if (code_return)
1760 *code_return = code;
1761 return charset;
1762 }
1763 charset_list = XCDR (charset_list);
2e344af3 1764 }
3263d5a2 1765 return NULL;
2e344af3
KH
1766}
1767
2e344af3 1768
56a46d1d 1769/* Fixme: `unknown' can't happen now? */
3263d5a2 1770DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
04c2f2c5 1771 doc: /*Return list of charset and one to three position-codes of CHAR.
16fed1fc 1772If CHAR is invalid as a character code, return a list `(unknown CHAR)'. */)
3263d5a2
KH
1773 (ch)
1774 Lisp_Object ch;
2e344af3 1775{
3263d5a2
KH
1776 struct charset *charset;
1777 int c, dimension;
1778 unsigned code;
1779 Lisp_Object val;
1780
1781 CHECK_CHARACTER (ch);
1782 c = XFASTINT (ch);
1783 charset = CHAR_CHARSET (c);
1784 if (! charset)
1785 return Fcons (intern ("unknown"), Fcons (ch, Qnil));
1786
1787 code = ENCODE_CHAR (charset, c);
1788 if (code == CHARSET_INVALID_CODE (charset))
1789 abort ();
1790 dimension = CHARSET_DIMENSION (charset);
1791 val = (dimension == 1 ? Fcons (make_number (code), Qnil)
1792 : dimension == 2 ? Fcons (make_number (code >> 8),
1793 Fcons (make_number (code & 0xFF), Qnil))
1794 : Fcons (make_number (code >> 16),
1795 Fcons (make_number ((code >> 8) & 0xFF),
1796 Fcons (make_number (code & 0xFF), Qnil))));
1797 return Fcons (CHARSET_NAME (charset), val);
2e344af3 1798}
87b089ad 1799
740f080d 1800
3263d5a2
KH
1801DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
1802 doc: /* Return the charset of highest priority that contains CHAR. */)
1803 (ch)
1804 Lisp_Object ch;
740f080d 1805{
3263d5a2 1806 struct charset *charset;
740f080d 1807
3263d5a2
KH
1808 CHECK_CHARACTER (ch);
1809 charset = CHAR_CHARSET (XINT (ch));
1810 return (CHARSET_NAME (charset));
740f080d
KH
1811}
1812
2e344af3 1813
3263d5a2
KH
1814DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
1815 doc: /*
1816Return charset of a character in the current buffer at position POS.
1817If POS is nil, it defauls to the current point.
1818If POS is out of range, the value is nil. */)
1819 (pos)
1820 Lisp_Object pos;
2e344af3 1821{
3263d5a2
KH
1822 Lisp_Object ch;
1823 struct charset *charset;
1824
1825 ch = Fchar_after (pos);
1826 if (! INTEGERP (ch))
1827 return ch;
1828 charset = CHAR_CHARSET (XINT (ch));
1829 return (CHARSET_NAME (charset));
87b089ad
RS
1830}
1831
2e344af3 1832
3263d5a2
KH
1833DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
1834 doc: /*
1835Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
1836
1837ISO 2022's designation sequence (escape sequence) distinguishes charsets
1838by their DIMENSION, CHARS, and FINAL-CHAR,
1839where as Emacs distinguishes them by charset symbol.
1840See the documentation of the function `charset-info' for the meanings of
1841DIMENSION, CHARS, and FINAL-CHAR. */)
1842 (dimension, chars, final_char)
1843 Lisp_Object dimension, chars, final_char;
2e344af3 1844{
3263d5a2 1845 int id;
2e344af3 1846
3263d5a2
KH
1847 check_iso_charset_parameter (dimension, chars, final_char);
1848 id = ISO_CHARSET_TABLE (XFASTINT (dimension), XFASTINT (chars),
1849 XFASTINT (final_char));
1850 return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
2e344af3
KH
1851}
1852
3263d5a2
KH
1853
1854DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
1855 0, 0, 0,
1856 doc: /*
1857Clear encoder and decoder of charsets that are loaded from mapfiles. */)
1858 ()
4ed46869 1859{
53316e55 1860 int i;
3263d5a2
KH
1861 struct charset *charset;
1862 Lisp_Object attrs;
4ed46869 1863
3263d5a2 1864 for (i = 0; i < charset_table_used; i++)
4ed46869 1865 {
3263d5a2
KH
1866 charset = CHARSET_FROM_ID (i);
1867 attrs = CHARSET_ATTRIBUTES (charset);
1868
1869 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
1870 {
1871 CHARSET_ATTR_DECODER (attrs) = Qnil;
1872 CHARSET_ATTR_ENCODER (attrs) = Qnil;
1873 CHARSET_METHOD (charset) = CHARSET_METHOD_MAP_DEFERRED;
1874 }
1875
1876 if (CHARSET_UNIFIED_P (charset))
1877 CHARSET_ATTR_DEUNIFIER (attrs) = Qnil;
5729c92f
KH
1878 }
1879
3263d5a2 1880 if (CHAR_TABLE_P (Vchar_unified_charset_table))
5729c92f 1881 {
3263d5a2
KH
1882 Foptimize_char_table (Vchar_unified_charset_table);
1883 Vchar_unify_table = Vchar_unified_charset_table;
1884 Vchar_unified_charset_table = Qnil;
4ed46869
KH
1885 }
1886
3263d5a2 1887 return Qnil;
4ed46869
KH
1888}
1889
8ddf5e57
DL
1890DEFUN ("charset-priority-list", Fcharset_priority_list,
1891 Scharset_priority_list, 0, 1, 0,
1892 doc: /* Return the list of charsets ordered by priority.
1893HIGHESTP non-nil means just return the highest priority one. */)
1894 (highestp)
1895 Lisp_Object highestp;
1896{
1897 Lisp_Object val = Qnil, list = Vcharset_ordered_list;
1898
1899 if (!NILP (highestp))
16fed1fc 1900 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list))));
8ddf5e57
DL
1901
1902 while (!NILP (list))
1903 {
16fed1fc 1904 val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list)))), val);
8ddf5e57
DL
1905 list = XCDR (list);
1906 }
1907 return Fnreverse (val);
1908}
1909
1910DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
1911 1, MANY, 0,
1912 doc: /* Assign higher priority to the charsets given as arguments.
1913usage: (set-charset-priority &rest charsets) */)
1914 (nargs, args)
1915 int nargs;
1916 Lisp_Object *args;
1917{
16fed1fc
DL
1918 Lisp_Object new_head = Qnil, old_list, arglist[2];
1919 int i, id;
8ddf5e57
DL
1920
1921 old_list = Fcopy_sequence (Vcharset_ordered_list);
1922 for (i = 0; i < nargs; i++)
1923 {
1924 CHECK_CHARSET_GET_ID (args[i], id);
16fed1fc
DL
1925 old_list = Fdelq (make_number (id), old_list);
1926 new_head = Fcons (make_number (id), new_head);
8ddf5e57
DL
1927 }
1928 arglist[0] = Fnreverse (new_head);
1929 arglist[1] = old_list;
1930 Vcharset_ordered_list = Fnconc (2, arglist);
dbbb237d 1931 charset_ordered_list_tick++;
8ddf5e57
DL
1932 return Qnil;
1933}
4ed46869 1934\f
3263d5a2
KH
1935void
1936init_charset ()
4ed46869 1937{
4ed46869 1938
4ed46869
KH
1939}
1940
4ed46869 1941
dfcf069d 1942void
4ed46869
KH
1943init_charset_once ()
1944{
1945 int i, j, k;
1946
3263d5a2
KH
1947 for (i = 0; i < ISO_MAX_DIMENSION; i++)
1948 for (j = 0; j < ISO_MAX_CHARS; j++)
1949 for (k = 0; k < ISO_MAX_FINAL; k++)
1950 iso_charset_table[i][j][k] = -1;
1951
1952 for (i = 0; i < 255; i++)
1953 emacs_mule_charset[i] = NULL;
4ed46869 1954
7c7dceee
KH
1955 charset_jisx0201_roman = -1;
1956 charset_jisx0208_1978 = -1;
1957 charset_jisx0208 = -1;
1958
3263d5a2
KH
1959#if 0
1960 Vchar_charset_set = Fmake_char_table (Qnil, Qnil);
1961 CHAR_TABLE_SET (Vchar_charset_set, make_number (97), Qnil);
1962
1963 DEFSYM (Qcharset_encode_table, "charset-encode-table");
4ed46869
KH
1964
1965 /* Intern this now in case it isn't already done.
1966 Setting this variable twice is harmless.
1967 But don't staticpro it here--that is done in alloc.c. */
1968 Qchar_table_extra_slots = intern ("char-table-extra-slots");
1969
3263d5a2
KH
1970 /* Now we are ready to set up this property, so we can create syntax
1971 tables. */
1972 Fput (Qcharset_encode_table, Qchar_table_extra_slots, make_number (0));
1973#endif
4ed46869
KH
1974}
1975
1976#ifdef emacs
1977
dfcf069d 1978void
4ed46869
KH
1979syms_of_charset ()
1980{
3263d5a2
KH
1981 char *p;
1982
1983 DEFSYM (Qcharsetp, "charsetp");
1984
1985 DEFSYM (Qascii, "ascii");
1986 DEFSYM (Qunicode, "unicode");
1987 DEFSYM (Qeight_bit_control, "eight-bit-control");
1988 DEFSYM (Qeight_bit_graphic, "eight-bit-graphic");
1989 DEFSYM (Qiso_8859_1, "iso-8859-1");
1990
1991 DEFSYM (Qgl, "gl");
1992 DEFSYM (Qgr, "gr");
1993
1994 p = (char *) xmalloc (30000);
1995
1996 staticpro (&Vcharset_ordered_list);
1997 Vcharset_ordered_list = Qnil;
1998
1999 staticpro (&Viso_2022_charset_list);
2000 Viso_2022_charset_list = Qnil;
2001
2002 staticpro (&Vemacs_mule_charset_list);
2003 Vemacs_mule_charset_list = Qnil;
2004
2005 staticpro (&Vcharset_hash_table);
2006 Vcharset_hash_table = Fmakehash (Qeq);
2007
2008 charset_table_size = 128;
2009 charset_table = ((struct charset *)
2010 xmalloc (sizeof (struct charset) * charset_table_size));
2011 charset_table_used = 0;
2012
2013 staticpro (&Vchar_unified_charset_table);
2014 Vchar_unified_charset_table = Fmake_char_table (Qnil, make_number (-1));
2015
2016 defsubr (&Scharsetp);
2017 defsubr (&Smap_charset_chars);
2018 defsubr (&Sdefine_charset_internal);
2019 defsubr (&Sdefine_charset_alias);
2020 defsubr (&Sprimary_charset);
2021 defsubr (&Sset_primary_charset);
2022 defsubr (&Scharset_plist);
2023 defsubr (&Sset_charset_plist);
2024 defsubr (&Sunify_charset);
3fac5a51 2025 defsubr (&Sget_unused_iso_final_char);
4ed46869
KH
2026 defsubr (&Sdeclare_equiv_charset);
2027 defsubr (&Sfind_charset_region);
2028 defsubr (&Sfind_charset_string);
3263d5a2
KH
2029 defsubr (&Sdecode_char);
2030 defsubr (&Sencode_char);
4ed46869 2031 defsubr (&Ssplit_char);
3263d5a2 2032 defsubr (&Smake_char);
4ed46869 2033 defsubr (&Schar_charset);
90d7b74e 2034 defsubr (&Scharset_after);
4ed46869 2035 defsubr (&Siso_charset);
3263d5a2 2036 defsubr (&Sclear_charset_maps);
8ddf5e57
DL
2037 defsubr (&Scharset_priority_list);
2038 defsubr (&Sset_charset_priority);
3263d5a2
KH
2039
2040 DEFVAR_LISP ("charset-map-directory", &Vcharset_map_directory,
2041 doc: /* Directory of charset map files that come with GNU Emacs.
04c2f2c5 2042The default value is sub-directory "charsets" of `data-directory'. */);
3263d5a2
KH
2043 Vcharset_map_directory = Fexpand_file_name (build_string ("charsets"),
2044 Vdata_directory);
4ed46869
KH
2045
2046 DEFVAR_LISP ("charset-list", &Vcharset_list,
528623a0 2047 doc: /* List of all charsets ever defined. */);
3263d5a2
KH
2048 Vcharset_list = Qnil;
2049
2050 /* Make the prerequisite charset `ascii' and `unicode'. */
2051 {
2052 Lisp_Object args[charset_arg_max];
2053 Lisp_Object plist[14];
2054 Lisp_Object val;
2055
2056 plist[0] = intern (":name");
3263d5a2 2057 plist[2] = intern (":dimension");
3263d5a2 2058 plist[4] = intern (":code-space");
3263d5a2 2059 plist[6] = intern (":iso-final-char");
3263d5a2 2060 plist[8] = intern (":emacs-mule-id");
3263d5a2 2061 plist[10] = intern (":ascii-compatible-p");
820ee249
KH
2062 plist[12] = intern (":code-offset");
2063
2064 args[charset_arg_name] = Qascii;
2065 args[charset_arg_dimension] = make_number (1);
2066 val = Fmake_vector (make_number (8), make_number (0));
2067 ASET (val, 1, make_number (127));
2068 args[charset_arg_code_space] = val;
2069 args[charset_arg_min_code] = Qnil;
2070 args[charset_arg_max_code] = Qnil;
2071 args[charset_arg_iso_final] = make_number ('B');
2072 args[charset_arg_iso_revision] = Qnil;
2073 args[charset_arg_emacs_mule_id] = make_number (0);
2074 args[charset_arg_ascii_compatible_p] = Qt;
3263d5a2
KH
2075 args[charset_arg_supplementary_p] = Qnil;
2076 args[charset_arg_invalid_code] = Qnil;
820ee249 2077 args[charset_arg_code_offset] = make_number (0);
3263d5a2 2078 args[charset_arg_map] = Qnil;
374c5cfd
KH
2079 args[charset_arg_subset] = Qnil;
2080 args[charset_arg_superset] = Qnil;
3263d5a2
KH
2081 args[charset_arg_unify_map] = Qnil;
2082 /* The actual plist is set by mule-conf.el. */
820ee249
KH
2083 plist[1] = args[charset_arg_name];
2084 plist[3] = args[charset_arg_dimension];
2085 plist[5] = args[charset_arg_code_space];
2086 plist[7] = args[charset_arg_iso_final];
2087 plist[9] = args[charset_arg_emacs_mule_id];
2088 plist[11] = args[charset_arg_ascii_compatible_p];
2089 plist[13] = args[charset_arg_code_offset];
3263d5a2
KH
2090 args[charset_arg_plist] = Flist (14, plist);
2091 Fdefine_charset_internal (charset_arg_max, args);
16fed1fc 2092 charset_ascii = XINT (CHARSET_SYMBOL_ID (Qascii));
3263d5a2 2093
820ee249
KH
2094 args[charset_arg_name] = Qunicode;
2095 args[charset_arg_dimension] = make_number (3);
3263d5a2
KH
2096 val = Fmake_vector (make_number (8), make_number (0));
2097 ASET (val, 1, make_number (255));
2098 ASET (val, 3, make_number (255));
2099 ASET (val, 5, make_number (16));
820ee249
KH
2100 args[charset_arg_code_space] = val;
2101 args[charset_arg_min_code] = Qnil;
2102 args[charset_arg_max_code] = Qnil;
2103 args[charset_arg_iso_final] = Qnil;
3263d5a2 2104 args[charset_arg_iso_revision] = Qnil;
820ee249
KH
2105 args[charset_arg_emacs_mule_id] = Qnil;
2106 args[charset_arg_ascii_compatible_p] = Qt;
3263d5a2
KH
2107 args[charset_arg_supplementary_p] = Qnil;
2108 args[charset_arg_invalid_code] = Qnil;
820ee249 2109 args[charset_arg_code_offset] = make_number (0);
3263d5a2 2110 args[charset_arg_map] = Qnil;
374c5cfd
KH
2111 args[charset_arg_subset] = Qnil;
2112 args[charset_arg_superset] = Qnil;
3263d5a2
KH
2113 args[charset_arg_unify_map] = Qnil;
2114 /* The actual plist is set by mule-conf.el. */
820ee249
KH
2115 plist[1] = args[charset_arg_name];
2116 plist[3] = args[charset_arg_dimension];
2117 plist[5] = args[charset_arg_code_space];
2118 plist[7] = args[charset_arg_iso_final];
2119 plist[9] = args[charset_arg_emacs_mule_id];
2120 plist[11] = args[charset_arg_ascii_compatible_p];
2121 plist[13] = args[charset_arg_code_offset];
3263d5a2
KH
2122 args[charset_arg_plist] = Flist (14, plist);
2123 Fdefine_charset_internal (charset_arg_max, args);
16fed1fc 2124 charset_unicode = XINT (CHARSET_SYMBOL_ID (Qunicode));
3263d5a2 2125 }
4ed46869
KH
2126}
2127
2128#endif /* emacs */