(Fcall_process): Sync with HEAD.
[bpt/emacs.git] / src / charset.c
CommitLineData
3263d5a2 1/* Basic character set support.
aaef169d 2 Copyright (C) 2001, 2002, 2003, 2004, 2005,
38087490 3 2006 Free Software Foundation, Inc.
ce03bf76
KH
4 Copyright (C) 1995, 1997, 1998, 1999, 2000, 2001
5 National Institute of Advanced Industrial Science and Technology (AIST)
6 Registration Number H14PRO021
327719ee 7 Copyright (C) 2003, 2004
3263d5a2
KH
8 National Institute of Advanced Industrial Science and Technology (AIST)
9 Registration Number H13PRO009
4ed46869 10
369314dc
KH
11This file is part of GNU Emacs.
12
13GNU Emacs is free software; you can redistribute it and/or modify
14it under the terms of the GNU General Public License as published by
15the Free Software Foundation; either version 2, or (at your option)
16any later version.
4ed46869 17
369314dc
KH
18GNU Emacs is distributed in the hope that it will be useful,
19but WITHOUT ANY WARRANTY; without even the implied warranty of
20MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21GNU General Public License for more details.
4ed46869 22
369314dc
KH
23You should have received a copy of the GNU General Public License
24along with GNU Emacs; see the file COPYING. If not, write to
4fc5845f
LK
25the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
26Boston, MA 02110-1301, USA. */
4ed46869 27
68c45bf0 28#include <config.h>
68c45bf0 29
4ed46869 30#include <stdio.h>
3263d5a2
KH
31#include <unistd.h>
32#include <ctype.h>
4ed46869 33#include <sys/types.h>
4ed46869 34#include "lisp.h"
3263d5a2 35#include "character.h"
4ed46869
KH
36#include "charset.h"
37#include "coding.h"
fc6b09bf 38#include "disptab.h"
3263d5a2 39#include "buffer.h"
4ed46869 40
04c2f2c5 41/*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
4ed46869 42
3263d5a2 43 A coded character set ("charset" hereafter) is a meaningful
04c2f2c5 44 collection (i.e. language, culture, functionality, etc.) of
3263d5a2 45 characters. Emacs handles multiple charsets at once. In Emacs Lisp
04c2f2c5
DL
46 code, a charset is represented by a symbol. In C code, a charset is
47 represented by its ID number or by a pointer to a struct charset.
4ed46869 48
3263d5a2
KH
49 The actual information about each charset is stored in two places.
50 Lispy information is stored in the hash table Vcharset_hash_table as
51 a vector (charset attributes). The other information is stored in
04c2f2c5 52 charset_table as a struct charset.
4ed46869 53
3263d5a2 54*/
4ed46869 55
3263d5a2
KH
56/* List of all charsets. This variable is used only from Emacs
57 Lisp. */
4ed46869 58Lisp_Object Vcharset_list;
4ed46869 59
3263d5a2
KH
60/* Hash table that contains attributes of each charset. Keys are
61 charset symbols, and values are vectors of charset attributes. */
62Lisp_Object Vcharset_hash_table;
4ed46869 63
3263d5a2
KH
64/* Table of struct charset. */
65struct charset *charset_table;
4ed46869 66
3263d5a2 67static int charset_table_size;
5af5dd92 68static int charset_table_used;
4ed46869 69
3263d5a2 70Lisp_Object Qcharsetp;
4ed46869 71
3263d5a2
KH
72/* Special charset symbols. */
73Lisp_Object Qascii;
2fe1edd1 74Lisp_Object Qeight_bit;
3263d5a2
KH
75Lisp_Object Qiso_8859_1;
76Lisp_Object Qunicode;
4ed46869 77
3263d5a2
KH
78/* The corresponding charsets. */
79int charset_ascii;
2fe1edd1 80int charset_eight_bit;
3263d5a2
KH
81int charset_iso_8859_1;
82int charset_unicode;
b0e3cf2b 83
7c7dceee
KH
84/* The other special charsets. */
85int charset_jisx0201_roman;
86int charset_jisx0208_1978;
87int charset_jisx0208;
c1a08b4c 88
3263d5a2
KH
89/* Value of charset attribute `charset-iso-plane'. */
90Lisp_Object Qgl, Qgr;
c1a08b4c 91
d1a04588
KH
92/* Charset of unibyte characters. */
93int charset_unibyte;
4ed46869 94
3263d5a2
KH
95/* List of charsets ordered by the priority. */
96Lisp_Object Vcharset_ordered_list;
4ed46869 97
dbbb237d 98/* Incremented everytime we change Vcharset_ordered_list. This is
64165ae2 99 unsigned short so that it fits in Lisp_Int and never matches
dbbb237d
KH
100 -1. */
101unsigned short charset_ordered_list_tick;
4ed46869 102
3263d5a2
KH
103/* List of iso-2022 charsets. */
104Lisp_Object Viso_2022_charset_list;
35e623fb 105
3263d5a2
KH
106/* List of emacs-mule charsets. */
107Lisp_Object Vemacs_mule_charset_list;
108
109struct charset *emacs_mule_charset[256];
4ed46869
KH
110
111/* Mapping table from ISO2022's charset (specified by DIMENSION,
112 CHARS, and FINAL-CHAR) to Emacs' charset. */
3263d5a2
KH
113int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
114
4beef065 115Lisp_Object Vcharset_map_path;
3263d5a2
KH
116
117Lisp_Object Vchar_unified_charset_table;
118
64165ae2
DL
119/* Defined in chartab.c */
120extern void
121map_char_table_for_charset P_ ((void (*c_function) (Lisp_Object, Lisp_Object),
122 Lisp_Object function, Lisp_Object table,
123 Lisp_Object arg, struct charset *charset,
124 unsigned from, unsigned to));
125
69f8de5b
KH
126#define CODE_POINT_TO_INDEX(charset, code) \
127 ((charset)->code_linear_p \
128 ? (code) - (charset)->min_code \
129 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
130 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
131 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
132 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
133 ? (((((code) >> 24) - (charset)->code_space[12]) \
134 * (charset)->code_space[11]) \
135 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
136 * (charset)->code_space[7]) \
137 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
138 * (charset)->code_space[3]) \
820ee249
KH
139 + (((code) & 0xFF) - (charset)->code_space[0]) \
140 - ((charset)->char_index_offset)) \
3263d5a2
KH
141 : -1)
142
143
144/* Convert the character index IDX to code-point CODE for CHARSET.
145 It is assumed that IDX is in a valid range. */
146
820ee249
KH
147#define INDEX_TO_CODE_POINT(charset, idx) \
148 ((charset)->code_linear_p \
149 ? (idx) + (charset)->min_code \
150 : (idx += (charset)->char_index_offset, \
151 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
152 | (((charset)->code_space[4] \
153 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
154 << 8) \
155 | (((charset)->code_space[8] \
156 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
157 << 16) \
158 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
159 << 24))))
4cf9710d 160
8a73a704 161
046b1f03 162\f
93bcb785 163
e9ce014c
KH
164/* Set to 1 to warn that a charset map is loaded and thus a buffer
165 text and a string data may be relocated. */
3263d5a2 166int charset_map_loaded;
4ed46869 167
e9ce014c 168struct charset_map_entries
4ed46869 169{
e9ce014c
KH
170 struct {
171 unsigned from, to;
172 int c;
173 } entry[0x10000];
174 struct charset_map_entries *next;
175};
176
177/* Load the mapping information for CHARSET from ENTRIES.
4cf9710d 178
3263d5a2 179 If CONTROL_FLAG is 0, setup CHARSET->min_char and CHARSET->max_char.
8a73a704 180
3263d5a2
KH
181 If CONTROL_FLAG is 1, setup CHARSET->min_char, CHARSET->max_char,
182 CHARSET->decoder, and CHARSET->encoder.
93bcb785 183
3263d5a2
KH
184 If CONTROL_FLAG is 2, setup CHARSET->deunifier and
185 Vchar_unify_table. If Vchar_unified_charset_table is non-nil,
186 setup it too. */
4ed46869 187
3263d5a2 188static void
e9ce014c 189load_charset_map (charset, entries, n_entries, control_flag)
3263d5a2 190 struct charset *charset;
e9ce014c
KH
191 struct charset_map_entries *entries;
192 int n_entries;
3263d5a2 193 int control_flag;
4ed46869 194{
3263d5a2 195 Lisp_Object vec, table;
3263d5a2
KH
196 unsigned max_code = CHARSET_MAX_CODE (charset);
197 int ascii_compatible_p = charset->ascii_compatible_p;
198 int min_char, max_char, nonascii_min_char;
3263d5a2 199 int i;
3263d5a2 200 unsigned char *fast_map = charset->fast_map;
99529c2c 201
e9ce014c
KH
202 if (n_entries <= 0)
203 return;
204
205 if (control_flag > 0)
8ac5a9cc 206 {
3263d5a2 207 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
6662e69b 208
374c5cfd 209 table = Fmake_char_table (Qnil, Qnil);
3263d5a2
KH
210 if (control_flag == 1)
211 vec = Fmake_vector (make_number (n), make_number (-1));
212 else if (! CHAR_TABLE_P (Vchar_unify_table))
374c5cfd 213 Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
6662e69b 214
3263d5a2 215 charset_map_loaded = 1;
2e344af3 216 }
6662e69b 217
e9ce014c 218 min_char = max_char = entries->entry[0].c;
3263d5a2 219 nonascii_min_char = MAX_CHAR;
e9ce014c 220 for (i = 0; i < n_entries; i++)
2e344af3 221 {
e9ce014c 222 unsigned from, to;
3b4f4446
KH
223 int from_index, to_index;
224 int from_c, to_c;
e9ce014c 225 int idx = i % 0x10000;
3263d5a2 226
e9ce014c
KH
227 if (i > 0 && idx == 0)
228 entries = entries->next;
229 from = entries->entry[idx].from;
230 to = entries->entry[idx].to;
3b4f4446
KH
231 from_c = entries->entry[idx].c;
232 from_index = CODE_POINT_TO_INDEX (charset, from);
233 if (from == to)
6662e69b 234 {
3b4f4446
KH
235 to_index = from_index;
236 to_c = from_c;
6662e69b 237 }
3b4f4446 238 else
6662e69b 239 {
3b4f4446
KH
240 to_index = CODE_POINT_TO_INDEX (charset, to);
241 to_c = from_c + (to_index - from_index);
6662e69b 242 }
3b4f4446
KH
243 if (from_index < 0 || to_index < 0)
244 continue;
3263d5a2 245
3263d5a2 246 if (control_flag < 2)
6662e69b 247 {
3b4f4446 248 int c;
6662e69b 249
3b4f4446
KH
250 if (to_c > max_char)
251 max_char = to_c;
252 else if (from_c < min_char)
253 min_char = from_c;
254 if (ascii_compatible_p)
255 {
256 if (! ASCII_BYTE_P (from_c))
257 {
258 if (from_c < nonascii_min_char)
259 nonascii_min_char = from_c;
260 }
261 else if (! ASCII_BYTE_P (to_c))
262 {
263 nonascii_min_char = 0x80;
264 }
265 }
177c0ea7 266
3b4f4446
KH
267 for (c = from_c; c <= to_c; c++)
268 CHARSET_FAST_MAP_SET (c, fast_map);
269
e9ce014c 270 if (control_flag == 1)
3263d5a2 271 {
e9ce014c 272 unsigned code = from;
e9ce014c 273
e9ce014c
KH
274 if (CHARSET_COMPACT_CODES_P (charset))
275 while (1)
276 {
3b4f4446 277 ASET (vec, from_index, make_number (from_c));
8f924df7
KH
278 if (NILP (CHAR_TABLE_REF (table, from_c)))
279 CHAR_TABLE_SET (table, from_c, make_number (code));
e9ce014c
KH
280 if (from_index == to_index)
281 break;
3b4f4446 282 from_index++, from_c++;
e9ce014c
KH
283 code = INDEX_TO_CODE_POINT (charset, from_index);
284 }
285 else
3b4f4446 286 for (; from_index <= to_index; from_index++, from_c++)
e9ce014c 287 {
3b4f4446 288 ASET (vec, from_index, make_number (from_c));
8f924df7
KH
289 if (NILP (CHAR_TABLE_REF (table, from_c)))
290 CHAR_TABLE_SET (table, from_c, make_number (from_index));
e9ce014c 291 }
3263d5a2 292 }
3263d5a2 293 }
2e344af3
KH
294 else
295 {
69f8de5b 296 unsigned code = from;
3b4f4446 297
69f8de5b
KH
298 while (1)
299 {
300 int c1 = DECODE_CHAR (charset, code);
8f924df7 301
3263d5a2
KH
302 if (c1 >= 0)
303 {
3b4f4446 304 CHAR_TABLE_SET (table, from_c, make_number (c1));
16fed1fc 305 CHAR_TABLE_SET (Vchar_unify_table, c1, make_number (from_c));
3263d5a2
KH
306 if (CHAR_TABLE_P (Vchar_unified_charset_table))
307 CHAR_TABLE_SET (Vchar_unified_charset_table, c1,
308 CHARSET_NAME (charset));
309 }
69f8de5b
KH
310 if (from_index == to_index)
311 break;
3b4f4446 312 from_index++, from_c++;
69f8de5b 313 code = INDEX_TO_CODE_POINT (charset, from_index);
3263d5a2 314 }
2e344af3 315 }
8ac5a9cc 316 }
3263d5a2
KH
317
318 if (control_flag < 2)
4ed46869 319 {
3263d5a2
KH
320 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
321 ? nonascii_min_char : min_char);
322 CHARSET_MAX_CHAR (charset) = max_char;
e9ce014c 323 if (control_flag == 1)
4ed46869 324 {
3263d5a2
KH
325 CHARSET_DECODER (charset) = vec;
326 CHARSET_ENCODER (charset) = table;
4ed46869
KH
327 }
328 }
2e344af3 329 else
8f924df7 330 CHARSET_DEUNIFIER (charset) = table;
4ed46869
KH
331}
332
12bcae05 333
3263d5a2
KH
334/* Read a hexadecimal number (preceded by "0x") from the file FP while
335 paying attention to comment charcter '#'. */
12bcae05 336
3263d5a2
KH
337static INLINE unsigned
338read_hex (fp, eof)
339 FILE *fp;
340 int *eof;
12bcae05 341{
3263d5a2
KH
342 int c;
343 unsigned n;
12bcae05 344
3263d5a2
KH
345 while ((c = getc (fp)) != EOF)
346 {
69f8de5b 347 if (c == '#')
3263d5a2
KH
348 {
349 while ((c = getc (fp)) != EOF && c != '\n');
350 }
351 else if (c == '0')
352 {
353 if ((c = getc (fp)) == EOF || c == 'x')
354 break;
355 }
8f924df7 356 }
3263d5a2
KH
357 if (c == EOF)
358 {
359 *eof = 1;
360 return 0;
361 }
362 *eof = 0;
363 n = 0;
364 if (c == 'x')
365 while ((c = getc (fp)) != EOF && isxdigit (c))
366 n = ((n << 4)
367 | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10));
368 else
369 while ((c = getc (fp)) != EOF && isdigit (c))
370 n = (n * 10) + c - '0';
e9ce014c
KH
371 if (c != EOF)
372 ungetc (c, fp);
3263d5a2
KH
373 return n;
374}
12bcae05 375
537efd8d 376
3263d5a2 377/* Return a mapping vector for CHARSET loaded from MAPFILE.
e9ce014c
KH
378 Each line of MAPFILE has this form
379 0xAAAA 0xCCCC
380 where 0xAAAA is a code-point and 0xCCCC is the corresponding
381 character code, or this form
382 0xAAAA-0xBBBB 0xCCCC
383 where 0xAAAA and 0xBBBB are code-points specifying a range, and
384 0xCCCC is the first character code of the range.
4ed46869 385
3263d5a2
KH
386 The returned vector has this form:
387 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
e9ce014c
KH
388 where CODE1 is a code-point or a cons of code-points specifying a
389 range. */
4ed46869 390
c449997d 391extern void add_to_log P_ ((char *, Lisp_Object, Lisp_Object));
4ed46869 392
e9ce014c
KH
393static void
394load_charset_map_from_file (charset, mapfile, control_flag)
3263d5a2
KH
395 struct charset *charset;
396 Lisp_Object mapfile;
e9ce014c 397 int control_flag;
4ed46869 398{
e9ce014c
KH
399 unsigned min_code = CHARSET_MIN_CODE (charset);
400 unsigned max_code = CHARSET_MAX_CODE (charset);
3263d5a2
KH
401 int fd;
402 FILE *fp;
3263d5a2
KH
403 int eof;
404 Lisp_Object suffixes;
e9ce014c
KH
405 struct charset_map_entries *head, *entries;
406 int n_entries;
4ed46869 407
3263d5a2
KH
408 suffixes = Fcons (build_string (".map"),
409 Fcons (build_string (".TXT"), Qnil));
4ed46869 410
4beef065 411 fd = openp (Vcharset_map_path, mapfile, suffixes, NULL, Qnil);
3263d5a2
KH
412 if (fd < 0
413 || ! (fp = fdopen (fd, "r")))
414 {
415 add_to_log ("Failure in loading charset map: %S", mapfile, Qnil);
e9ce014c 416 return;
3263d5a2 417 }
4ed46869 418
e9ce014c
KH
419 head = entries = ((struct charset_map_entries *)
420 alloca (sizeof (struct charset_map_entries)));
421 n_entries = 0;
3263d5a2
KH
422 eof = 0;
423 while (1)
424 {
e9ce014c
KH
425 unsigned from, to;
426 int c;
427 int idx;
4ed46869 428
e9ce014c 429 from = read_hex (fp, &eof);
3263d5a2
KH
430 if (eof)
431 break;
e9ce014c
KH
432 if (getc (fp) == '-')
433 to = read_hex (fp, &eof);
434 else
435 to = from;
436 c = (int) read_hex (fp, &eof);
ac4137cc 437
e9ce014c
KH
438 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
439 continue;
ac4137cc 440
e9ce014c 441 if (n_entries > 0 && (n_entries % 0x10000) == 0)
3263d5a2 442 {
e9ce014c
KH
443 entries->next = ((struct charset_map_entries *)
444 alloca (sizeof (struct charset_map_entries)));
445 entries = entries->next;
3263d5a2 446 }
e9ce014c
KH
447 idx = n_entries % 0x10000;
448 entries->entry[idx].from = from;
449 entries->entry[idx].to = to;
450 entries->entry[idx].c = c;
451 n_entries++;
3263d5a2
KH
452 }
453 fclose (fp);
454 close (fd);
177c0ea7 455
e9ce014c 456 load_charset_map (charset, head, n_entries, control_flag);
4ed46869
KH
457}
458
e9ce014c
KH
459static void
460load_charset_map_from_vector (charset, vec, control_flag)
461 struct charset *charset;
462 Lisp_Object vec;
463 int control_flag;
23d2a7f1 464{
e9ce014c
KH
465 unsigned min_code = CHARSET_MIN_CODE (charset);
466 unsigned max_code = CHARSET_MAX_CODE (charset);
467 struct charset_map_entries *head, *entries;
468 int n_entries;
469 int len = ASIZE (vec);
470 int i;
23d2a7f1 471
e9ce014c 472 if (len % 2 == 1)
3263d5a2 473 {
e9ce014c
KH
474 add_to_log ("Failure in loading charset map: %V", vec, Qnil);
475 return;
3263d5a2 476 }
35e623fb 477
e9ce014c
KH
478 head = entries = ((struct charset_map_entries *)
479 alloca (sizeof (struct charset_map_entries)));
480 n_entries = 0;
481 for (i = 0; i < len; i += 2)
35e623fb 482 {
e9ce014c
KH
483 Lisp_Object val, val2;
484 unsigned from, to;
485 int c;
486 int idx;
d2665018 487
e9ce014c
KH
488 val = AREF (vec, i);
489 if (CONSP (val))
bbf12bb3 490 {
e9ce014c
KH
491 val2 = XCDR (val);
492 val = XCAR (val);
493 CHECK_NATNUM (val);
494 CHECK_NATNUM (val2);
495 from = XFASTINT (val);
496 to = XFASTINT (val2);
bbf12bb3 497 }
e9ce014c 498 else
bbf12bb3 499 {
e9ce014c
KH
500 CHECK_NATNUM (val);
501 from = to = XFASTINT (val);
bbf12bb3 502 }
e9ce014c
KH
503 val = AREF (vec, i + 1);
504 CHECK_NATNUM (val);
505 c = XFASTINT (val);
76d7b829 506
e9ce014c
KH
507 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
508 continue;
76d7b829 509
dcc694d7 510 if (n_entries > 0 && (n_entries % 0x10000) == 0)
e9ce014c
KH
511 {
512 entries->next = ((struct charset_map_entries *)
513 alloca (sizeof (struct charset_map_entries)));
514 entries = entries->next;
515 }
516 idx = n_entries % 0x10000;
517 entries->entry[idx].from = from;
518 entries->entry[idx].to = to;
519 entries->entry[idx].c = c;
520 n_entries++;
521 }
76d7b829 522
e9ce014c 523 load_charset_map (charset, head, n_entries, control_flag);
ac4137cc
KH
524}
525
3263d5a2
KH
526static void
527load_charset (charset)
528 struct charset *charset;
76d7b829 529{
3263d5a2 530 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP_DEFERRED)
76d7b829 531 {
3263d5a2 532 Lisp_Object map;
76d7b829 533
3263d5a2
KH
534 map = CHARSET_MAP (charset);
535 if (STRINGP (map))
e9ce014c 536 load_charset_map_from_file (charset, map, 1);
bbf12bb3 537 else
e9ce014c 538 load_charset_map_from_vector (charset, map, 1);
3263d5a2 539 CHARSET_METHOD (charset) = CHARSET_METHOD_MAP;
76d7b829 540 }
4ed46869 541}
76d7b829 542
3263d5a2
KH
543
544DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
545 doc: /* Return non-nil if and only if OBJECT is a charset.*/)
546 (object)
547 Lisp_Object object;
23d2a7f1 548{
3263d5a2 549 return (CHARSETP (object) ? Qt : Qnil);
76d7b829
KH
550}
551
4ed46869
KH
552
553void
374c5cfd
KH
554map_charset_chars (c_function, function, arg,
555 charset, from, to)
556 void (*c_function) P_ ((Lisp_Object, Lisp_Object));
557 Lisp_Object function, arg;
558 struct charset *charset;
559 unsigned from, to;
4ed46869 560{
3263d5a2 561 Lisp_Object range;
374c5cfd 562 int partial;
3263d5a2 563
8f924df7 564 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP_DEFERRED)
3263d5a2
KH
565 load_charset (charset);
566
374c5cfd
KH
567 partial = (from > CHARSET_MIN_CODE (charset)
568 || to < CHARSET_MAX_CODE (charset));
569
570 if (CHARSET_UNIFIED_P (charset)
571 && CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
4ed46869 572 {
374c5cfd
KH
573 map_char_table_for_charset (c_function, function,
574 CHARSET_DEUNIFIER (charset), arg,
575 partial ? charset : NULL, from, to);
4ed46869 576 }
374c5cfd 577
3263d5a2 578 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
4ed46869 579 {
374c5cfd
KH
580 int from_idx = CODE_POINT_TO_INDEX (charset, from);
581 int to_idx = CODE_POINT_TO_INDEX (charset, to);
582 int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
583 int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
584
585 range = Fcons (make_number (from_c), make_number (to_c));
3263d5a2 586 if (NILP (function))
5af5dd92 587 (*c_function) (arg, range);
3263d5a2
KH
588 else
589 call2 (function, range, arg);
c83ef371 590 }
3263d5a2
KH
591 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
592 {
593 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
594 return;
374c5cfd
KH
595 map_char_table_for_charset (c_function, function,
596 CHARSET_ENCODER (charset), arg,
597 partial ? charset : NULL, from, to);
3263d5a2 598 }
374c5cfd 599 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
4ed46869 600 {
374c5cfd
KH
601 Lisp_Object subset_info;
602 int offset;
603
604 subset_info = CHARSET_SUBSET (charset);
605 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
606 offset = XINT (AREF (subset_info, 3));
607 from -= offset;
608 if (from < XFASTINT (AREF (subset_info, 1)))
609 from = XFASTINT (AREF (subset_info, 1));
610 to -= offset;
611 if (to > XFASTINT (AREF (subset_info, 2)))
612 to = XFASTINT (AREF (subset_info, 2));
613 map_charset_chars (c_function, function, arg, charset, from, to);
4ed46869 614 }
374c5cfd
KH
615 else /* i.e. CHARSET_METHOD_SUPERSET */
616 {
617 Lisp_Object parents;
4ed46869 618
374c5cfd
KH
619 for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
620 parents = XCDR (parents))
bbf12bb3 621 {
374c5cfd
KH
622 int offset;
623 unsigned this_from, this_to;
624
625 charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents))));
626 offset = XINT (XCDR (XCAR (parents)));
627 this_from = from - offset;
628 this_to = to - offset;
629 if (this_from < CHARSET_MIN_CODE (charset))
630 this_from = CHARSET_MIN_CODE (charset);
631 if (this_to > CHARSET_MAX_CODE (charset))
632 this_to = CHARSET_MAX_CODE (charset);
111daccf
KH
633 map_charset_chars (c_function, function, arg, charset,
634 this_from, this_to);
bbf12bb3 635 }
35e623fb 636 }
4ed46869
KH
637}
638
374c5cfd 639DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
04c2f2c5 640 doc: /* Call FUNCTION for all characters in CHARSET.
374c5cfd 641FUNCTION is called with an argument RANGE and the optional 3rd
3263d5a2 642argument ARG.
4ed46869 643
374c5cfd
KH
644RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
645characters contained in CHARSET.
4ed46869 646
374c5cfd 647The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
8f924df7 648range of code points of target characters. */)
374c5cfd
KH
649 (function, charset, arg, from_code, to_code)
650 Lisp_Object function, charset, arg, from_code, to_code;
4ed46869 651{
374c5cfd 652 struct charset *cs;
16fed1fc 653 unsigned from, to;
4ed46869 654
374c5cfd
KH
655 CHECK_CHARSET_GET_CHARSET (charset, cs);
656 if (NILP (from_code))
16fed1fc 657 from = CHARSET_MIN_CODE (cs);
970b7474 658 else
4ed46869 659 {
970b7474
KH
660 CHECK_NATNUM (from_code);
661 from = XINT (from_code);
662 if (from < CHARSET_MIN_CODE (cs))
663 from = CHARSET_MIN_CODE (cs);
4ed46869 664 }
374c5cfd 665 if (NILP (to_code))
970b7474 666 to = CHARSET_MAX_CODE (cs);
4ed46869
KH
667 else
668 {
970b7474
KH
669 CHECK_NATNUM (to_code);
670 to = XINT (to_code);
671 if (to > CHARSET_MAX_CODE (cs))
672 to = CHARSET_MAX_CODE (cs);
4ed46869 673 }
16fed1fc 674 map_charset_chars (NULL, function, arg, cs, from, to);
3263d5a2 675 return Qnil;
35e623fb 676}
4ed46869 677
4ed46869 678
3263d5a2
KH
679/* Define a charset according to the arguments. The Nth argument is
680 the Nth attribute of the charset (the last attribute `charset-id'
681 is not included). See the docstring of `define-charset' for the
682 detail. */
4ed46869 683
3263d5a2
KH
684DEFUN ("define-charset-internal", Fdefine_charset_internal,
685 Sdefine_charset_internal, charset_arg_max, MANY, 0,
04c2f2c5
DL
686 doc: /* For internal use only.
687usage: (define-charset-internal ...) */)
3263d5a2
KH
688 (nargs, args)
689 int nargs;
690 Lisp_Object *args;
4ed46869 691{
3263d5a2
KH
692 /* Charset attr vector. */
693 Lisp_Object attrs;
694 Lisp_Object val;
695 unsigned hash_code;
696 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
69f8de5b 697 int i, j;
3263d5a2
KH
698 struct charset charset;
699 int id;
700 int dimension;
701 int new_definition_p;
702 int nchars;
703
704 if (nargs != charset_arg_max)
705 return Fsignal (Qwrong_number_of_arguments,
706 Fcons (intern ("define-charset-internal"),
707 make_number (nargs)));
708
709 attrs = Fmake_vector (make_number (charset_attr_max), Qnil);
710
711 CHECK_SYMBOL (args[charset_arg_name]);
712 ASET (attrs, charset_name, args[charset_arg_name]);
713
714 val = args[charset_arg_code_space];
715 for (i = 0, dimension = 0, nchars = 1; i < 4; i++)
76d7b829 716 {
3263d5a2
KH
717 int min_byte, max_byte;
718
719 min_byte = XINT (Faref (val, make_number (i * 2)));
720 max_byte = XINT (Faref (val, make_number (i * 2 + 1)));
721 if (min_byte < 0 || min_byte > max_byte || max_byte >= 256)
722 error ("Invalid :code-space value");
723 charset.code_space[i * 4] = min_byte;
724 charset.code_space[i * 4 + 1] = max_byte;
725 charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
726 nchars *= charset.code_space[i * 4 + 2];
727 charset.code_space[i * 4 + 3] = nchars;
728 if (max_byte > 0)
729 dimension = i + 1;
730 }
4ed46869 731
3263d5a2
KH
732 val = args[charset_arg_dimension];
733 if (NILP (val))
734 charset.dimension = dimension;
735 else
4ed46869 736 {
3263d5a2
KH
737 CHECK_NATNUM (val);
738 charset.dimension = XINT (val);
739 if (charset.dimension < 1 || charset.dimension > 4)
740 args_out_of_range_3 (val, make_number (1), make_number (4));
4ed46869
KH
741 }
742
3263d5a2
KH
743 charset.code_linear_p
744 = (charset.dimension == 1
745 || (charset.code_space[2] == 256
746 && (charset.dimension == 2
747 || (charset.code_space[6] == 256
748 && (charset.dimension == 3
749 || charset.code_space[10] == 256)))));
750
69f8de5b 751 if (! charset.code_linear_p)
4ed46869 752 {
69f8de5b 753 charset.code_space_mask = (unsigned char *) xmalloc (256);
33df3183 754 bzero (charset.code_space_mask, 256);
69f8de5b
KH
755 for (i = 0; i < 4; i++)
756 for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
757 j++)
758 charset.code_space_mask[j] |= (1 << i);
4ed46869
KH
759 }
760
3263d5a2 761 charset.iso_chars_96 = charset.code_space[2] == 96;
4ed46869 762
3263d5a2
KH
763 charset.min_code = (charset.code_space[0]
764 | (charset.code_space[4] << 8)
765 | (charset.code_space[8] << 16)
766 | (charset.code_space[12] << 24));
767 charset.max_code = (charset.code_space[1]
768 | (charset.code_space[5] << 8)
769 | (charset.code_space[9] << 16)
770 | (charset.code_space[13] << 24));
820ee249 771 charset.char_index_offset = 0;
8a73a704 772
820ee249
KH
773 val = args[charset_arg_min_code];
774 if (! NILP (val))
775 {
776 unsigned code;
fdb82f93 777
820ee249
KH
778 if (INTEGERP (val))
779 code = XINT (val);
780 else
781 {
782 CHECK_CONS (val);
8f924df7
KH
783 CHECK_NUMBER_CAR (val);
784 CHECK_NUMBER_CDR (val);
820ee249
KH
785 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
786 }
787 if (code < charset.min_code
788 || code > charset.max_code)
789 args_out_of_range_3 (make_number (charset.min_code),
790 make_number (charset.max_code), val);
791 charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
792 charset.min_code = code;
793 }
3fac5a51 794
820ee249
KH
795 val = args[charset_arg_max_code];
796 if (! NILP (val))
3fac5a51 797 {
820ee249
KH
798 unsigned code;
799
800 if (INTEGERP (val))
801 code = XINT (val);
802 else
803 {
804 CHECK_CONS (val);
8f924df7
KH
805 CHECK_NUMBER_CAR (val);
806 CHECK_NUMBER_CDR (val);
820ee249
KH
807 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
808 }
809 if (code < charset.min_code
810 || code > charset.max_code)
811 args_out_of_range_3 (make_number (charset.min_code),
812 make_number (charset.max_code), val);
813 charset.max_code = code;
3fac5a51 814 }
3fac5a51 815
e9ce014c 816 charset.compact_codes_p = charset.max_code < 0x1000000;
4ed46869 817
3263d5a2
KH
818 val = args[charset_arg_invalid_code];
819 if (NILP (val))
820 {
821 if (charset.min_code > 0)
822 charset.invalid_code = 0;
bbf12bb3
KH
823 else
824 {
3263d5a2
KH
825 XSETINT (val, charset.max_code + 1);
826 if (XINT (val) == charset.max_code + 1)
827 charset.invalid_code = charset.max_code + 1;
828 else
829 error ("Attribute :invalid-code must be specified");
76d7b829 830 }
76d7b829 831 }
3263d5a2
KH
832 else
833 {
834 CHECK_NATNUM (val);
835 charset.invalid_code = XFASTINT (val);
836 }
4ed46869 837
3263d5a2
KH
838 val = args[charset_arg_iso_final];
839 if (NILP (val))
840 charset.iso_final = -1;
841 else
842 {
843 CHECK_NUMBER (val);
844 if (XINT (val) < '0' || XINT (val) > 127)
845 error ("Invalid iso-final-char: %d", XINT (val));
846 charset.iso_final = XINT (val);
847 }
4ed46869 848
3263d5a2
KH
849 val = args[charset_arg_iso_revision];
850 if (NILP (val))
851 charset.iso_revision = -1;
852 else
4ed46869 853 {
3263d5a2
KH
854 CHECK_NUMBER (val);
855 if (XINT (val) > 63)
856 args_out_of_range (make_number (63), val);
857 charset.iso_revision = XINT (val);
4ed46869 858 }
4ed46869 859
3263d5a2
KH
860 val = args[charset_arg_emacs_mule_id];
861 if (NILP (val))
862 charset.emacs_mule_id = -1;
4ed46869
KH
863 else
864 {
3263d5a2
KH
865 CHECK_NATNUM (val);
866 if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
867 error ("Invalid emacs-mule-id: %d", XINT (val));
868 charset.emacs_mule_id = XINT (val);
c83ef371 869 }
f6302ac9 870
3263d5a2 871 charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
1d67c29b 872
3263d5a2 873 charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
4ed46869 874
3263d5a2
KH
875 charset.unified_p = 0;
876
877 bzero (charset.fast_map, sizeof (charset.fast_map));
878
879 if (! NILP (args[charset_arg_code_offset]))
880 {
881 val = args[charset_arg_code_offset];
882 CHECK_NUMBER (val);
883
884 charset.method = CHARSET_METHOD_OFFSET;
885 charset.code_offset = XINT (val);
886
887 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
888 charset.min_char = i + charset.code_offset;
889 i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
890 charset.max_char = i + charset.code_offset;
891 if (charset.max_char > MAX_CHAR)
892 error ("Unsupported max char: %d", charset.max_char);
893
f148205f
KH
894 i = (charset.min_char >> 7) << 7;
895 for (; i < 0x10000 && i <= charset.max_char; i += 128)
3263d5a2 896 CHARSET_FAST_MAP_SET (i, charset.fast_map);
f148205f 897 i = (i >> 12) << 12;
3263d5a2
KH
898 for (; i <= charset.max_char; i += 0x1000)
899 CHARSET_FAST_MAP_SET (i, charset.fast_map);
900 }
901 else if (! NILP (args[charset_arg_map]))
902 {
903 val = args[charset_arg_map];
904 ASET (attrs, charset_map, val);
905 if (STRINGP (val))
e9ce014c
KH
906 load_charset_map_from_file (&charset, val, 0);
907 else
908 load_charset_map_from_vector (&charset, val, 0);
3263d5a2
KH
909 charset.method = CHARSET_METHOD_MAP_DEFERRED;
910 }
374c5cfd 911 else if (! NILP (args[charset_arg_subset]))
3263d5a2 912 {
374c5cfd
KH
913 Lisp_Object parent;
914 Lisp_Object parent_min_code, parent_max_code, parent_code_offset;
915 struct charset *parent_charset;
916
917 val = args[charset_arg_subset];
918 parent = Fcar (val);
919 CHECK_CHARSET_GET_CHARSET (parent, parent_charset);
920 parent_min_code = Fnth (make_number (1), val);
921 CHECK_NATNUM (parent_min_code);
922 parent_max_code = Fnth (make_number (2), val);
923 CHECK_NATNUM (parent_max_code);
924 parent_code_offset = Fnth (make_number (3), val);
925 CHECK_NUMBER (parent_code_offset);
926 val = Fmake_vector (make_number (4), Qnil);
927 ASET (val, 0, make_number (parent_charset->id));
928 ASET (val, 1, parent_min_code);
929 ASET (val, 2, parent_max_code);
930 ASET (val, 3, parent_code_offset);
931 ASET (attrs, charset_subset, val);
932
933 charset.method = CHARSET_METHOD_SUBSET;
934 /* Here, we just copy the parent's fast_map. It's not accurate,
935 but at least it works for quickly detecting which character
936 DOESN'T belong to this charset. */
937 for (i = 0; i < 190; i++)
938 charset.fast_map[i] = parent_charset->fast_map[i];
939
940 /* We also copy these for parents. */
941 charset.min_char = parent_charset->min_char;
942 charset.max_char = parent_charset->max_char;
943 }
944 else if (! NILP (args[charset_arg_superset]))
0282eb69 945 {
374c5cfd
KH
946 val = args[charset_arg_superset];
947 charset.method = CHARSET_METHOD_SUPERSET;
3263d5a2 948 val = Fcopy_sequence (val);
374c5cfd 949 ASET (attrs, charset_superset, val);
3263d5a2
KH
950
951 charset.min_char = MAX_CHAR;
952 charset.max_char = 0;
953 for (; ! NILP (val); val = Fcdr (val))
0282eb69 954 {
3263d5a2
KH
955 Lisp_Object elt, car_part, cdr_part;
956 int this_id, offset;
957 struct charset *this_charset;
2e344af3 958
3263d5a2
KH
959 elt = Fcar (val);
960 if (CONSP (elt))
2e344af3 961 {
3263d5a2
KH
962 car_part = XCAR (elt);
963 cdr_part = XCDR (elt);
964 CHECK_CHARSET_GET_ID (car_part, this_id);
965 CHECK_NUMBER (cdr_part);
966 offset = XINT (cdr_part);
177c0ea7 967 }
3263d5a2 968 else
4ed46869 969 {
3263d5a2
KH
970 CHECK_CHARSET_GET_ID (elt, this_id);
971 offset = 0;
4ed46869 972 }
3263d5a2
KH
973 XSETCAR (val, Fcons (make_number (this_id), make_number (offset)));
974
975 this_charset = CHARSET_FROM_ID (this_id);
976 if (charset.min_char > this_charset->min_char)
977 charset.min_char = this_charset->min_char;
978 if (charset.max_char < this_charset->max_char)
979 charset.max_char = this_charset->max_char;
980 for (i = 0; i < 190; i++)
981 charset.fast_map[i] |= this_charset->fast_map[i];
0282eb69 982 }
0282eb69 983 }
2e344af3 984 else
3263d5a2 985 error ("None of :code-offset, :map, :parents are specified");
05505664 986
3263d5a2
KH
987 val = args[charset_arg_unify_map];
988 if (! NILP (val) && !STRINGP (val))
989 CHECK_VECTOR (val);
990 ASET (attrs, charset_unify_map, val);
05505664 991
3263d5a2
KH
992 CHECK_LIST (args[charset_arg_plist]);
993 ASET (attrs, charset_plist, args[charset_arg_plist]);
4ed46869 994
3263d5a2
KH
995 charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
996 &hash_code);
997 if (charset.hash_index >= 0)
998 {
999 new_definition_p = 0;
4f65af01 1000 id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
3263d5a2
KH
1001 HASH_VALUE (hash_table, charset.hash_index) = attrs;
1002 }
1a45ff10 1003 else
3263d5a2
KH
1004 {
1005 charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
1006 hash_code);
1007 if (charset_table_used == charset_table_size)
1008 {
2fe1edd1
KH
1009 struct charset *new_table
1010 = (struct charset *) xmalloc (sizeof (struct charset)
1011 * (charset_table_size + 16));
1012 bcopy (charset_table, new_table,
1013 sizeof (struct charset) * charset_table_size);
1014 charset_table_size += 16;
1015 charset_table = new_table;
3263d5a2
KH
1016 }
1017 id = charset_table_used++;
3263d5a2
KH
1018 new_definition_p = 1;
1019 }
2e344af3 1020
4f65af01 1021 ASET (attrs, charset_id, make_number (id));
3263d5a2
KH
1022 charset.id = id;
1023 charset_table[id] = charset;
2e344af3 1024
3263d5a2 1025 if (charset.iso_final >= 0)
4ed46869 1026 {
3263d5a2
KH
1027 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
1028 charset.iso_final) = id;
1029 if (new_definition_p)
1030 Viso_2022_charset_list = nconc2 (Viso_2022_charset_list,
1031 Fcons (make_number (id), Qnil));
7c7dceee
KH
1032 if (ISO_CHARSET_TABLE (1, 0, 'J') == id)
1033 charset_jisx0201_roman = id;
1034 else if (ISO_CHARSET_TABLE (2, 0, '@') == id)
1035 charset_jisx0208_1978 = id;
1036 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
1037 charset_jisx0208 = id;
4ed46869 1038 }
3263d5a2
KH
1039
1040 if (charset.emacs_mule_id >= 0)
4ed46869 1041 {
3263d5a2 1042 emacs_mule_charset[charset.emacs_mule_id] = CHARSET_FROM_ID (id);
4f65af01
KH
1043 if (charset.emacs_mule_id < 0xA0)
1044 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
3b1ae89b
KH
1045 else
1046 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 2;
3263d5a2
KH
1047 if (new_definition_p)
1048 Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
1049 Fcons (make_number (id), Qnil));
4ed46869
KH
1050 }
1051
3263d5a2
KH
1052 if (new_definition_p)
1053 {
1054 Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
8f924df7 1055 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
3263d5a2 1056 Fcons (make_number (id), Qnil));
dbbb237d 1057 charset_ordered_list_tick++;
3263d5a2 1058 }
4ed46869 1059
3263d5a2 1060 return Qnil;
4ed46869
KH
1061}
1062
2fe1edd1
KH
1063
1064/* Same as Fdefine_charset_internal but arguments are more convenient
1065 to call from C (typically in syms_of_charset). This can define a
1066 charset of `offset' method only. Return the ID of the new
1067 charset. */
1068
1069static int
1070define_charset_internal (name, dimension, code_space, min_code, max_code,
1071 iso_final, iso_revision, emacs_mule_id,
7acf89e6 1072 ascii_compatible, supplementary,
2fe1edd1
KH
1073 code_offset)
1074 Lisp_Object name;
1075 int dimension;
1076 unsigned char *code_space;
1077 unsigned min_code, max_code;
1078 int iso_final, iso_revision, emacs_mule_id;
7acf89e6 1079 int ascii_compatible, supplementary;
2fe1edd1
KH
1080 int code_offset;
1081{
1082 Lisp_Object args[charset_arg_max];
1083 Lisp_Object plist[14];
1084 Lisp_Object val;
1085 int i;
1086
1087 args[charset_arg_name] = name;
1088 args[charset_arg_dimension] = make_number (dimension);
1089 val = Fmake_vector (make_number (8), make_number (0));
1090 for (i = 0; i < 8; i++)
1091 ASET (val, i, make_number (code_space[i]));
1092 args[charset_arg_code_space] = val;
1093 args[charset_arg_min_code] = make_number (min_code);
1094 args[charset_arg_max_code] = make_number (max_code);
1095 args[charset_arg_iso_final]
1096 = (iso_final < 0 ? Qnil : make_number (iso_final));
1097 args[charset_arg_iso_revision] = make_number (iso_revision);
1098 args[charset_arg_emacs_mule_id]
1099 = (emacs_mule_id < 0 ? Qnil : make_number (emacs_mule_id));
1100 args[charset_arg_ascii_compatible_p] = ascii_compatible ? Qt : Qnil;
7acf89e6 1101 args[charset_arg_supplementary_p] = supplementary ? Qt : Qnil;
2fe1edd1
KH
1102 args[charset_arg_invalid_code] = Qnil;
1103 args[charset_arg_code_offset] = make_number (code_offset);
1104 args[charset_arg_map] = Qnil;
1105 args[charset_arg_subset] = Qnil;
1106 args[charset_arg_superset] = Qnil;
1107 args[charset_arg_unify_map] = Qnil;
1108
1109 plist[0] = intern (":name");
1110 plist[1] = args[charset_arg_name];
1111 plist[2] = intern (":dimension");
1112 plist[3] = args[charset_arg_dimension];
1113 plist[4] = intern (":code-space");
1114 plist[5] = args[charset_arg_code_space];
1115 plist[6] = intern (":iso-final-char");
1116 plist[7] = args[charset_arg_iso_final];
1117 plist[8] = intern (":emacs-mule-id");
1118 plist[9] = args[charset_arg_emacs_mule_id];
1119 plist[10] = intern (":ascii-compatible-p");
1120 plist[11] = args[charset_arg_ascii_compatible_p];
1121 plist[12] = intern (":code-offset");
1122 plist[13] = args[charset_arg_code_offset];
1123
1124 args[charset_arg_plist] = Flist (14, plist);
1125 Fdefine_charset_internal (charset_arg_max, args);
1126
1127 return XINT (CHARSET_SYMBOL_ID (name));
1128}
1129
1130
3263d5a2
KH
1131DEFUN ("define-charset-alias", Fdefine_charset_alias,
1132 Sdefine_charset_alias, 2, 2, 0,
1133 doc: /* Define ALIAS as an alias for charset CHARSET. */)
1134 (alias, charset)
1135 Lisp_Object alias, charset;
4ed46869 1136{
3263d5a2
KH
1137 Lisp_Object attr;
1138
1139 CHECK_CHARSET_GET_ATTR (charset, attr);
1140 Fputhash (alias, attr, Vcharset_hash_table);
528623a0 1141 Vcharset_list = Fcons (alias, Vcharset_list);
3263d5a2
KH
1142 return Qnil;
1143}
4ed46869 1144
4ed46869 1145
d1a04588
KH
1146DEFUN ("unibyte-charset", Funibyte_charset, Sunibyte_charset, 0, 0, 0,
1147 doc: /* Return the unibyte charset (set by `set-unibyte-charset'). */)
3263d5a2
KH
1148 ()
1149{
d1a04588 1150 return CHARSET_NAME (CHARSET_FROM_ID (charset_unibyte));
3263d5a2 1151}
4ed46869 1152
4ed46869 1153
d1a04588 1154DEFUN ("set-unibyte-charset", Fset_unibyte_charset, Sset_unibyte_charset,
3263d5a2 1155 1, 1, 0,
d1a04588 1156 doc: /* Set the unibyte charset to CHARSET.
56a46d1d 1157This determines how unibyte/multibyte conversion is done. See also
d1a04588 1158function `unibyte-charset'. */)
3263d5a2
KH
1159 (charset)
1160 Lisp_Object charset;
1161{
14e3d523
KH
1162 struct charset *cs;
1163 int i, c;
1164
1165 CHECK_CHARSET_GET_CHARSET (charset, cs);
1166 if (! cs->ascii_compatible_p
1167 || cs->dimension != 1)
8f924df7 1168 error ("Inappropriate unibyte charset: %s", SDATA (SYMBOL_NAME (charset)));
14e3d523 1169 charset_unibyte = cs->id;
943a83ad 1170 memset (unibyte_has_multibyte_table, 1, 128);
14e3d523
KH
1171 for (i = 128; i < 256; i++)
1172 {
1173 c = DECODE_CHAR (cs, i);
170e4589 1174 unibyte_to_multibyte_table[i] = (c < 0 ? BYTE8_TO_CHAR (i) : c);
943a83ad 1175 unibyte_has_multibyte_table[i] = c >= 0;
14e3d523 1176 }
3263d5a2 1177
4ed46869
KH
1178 return Qnil;
1179}
1180
3263d5a2
KH
1181
1182DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
56a46d1d 1183 doc: /* Return the property list of CHARSET. */)
3263d5a2
KH
1184 (charset)
1185 Lisp_Object charset;
1186{
1187 Lisp_Object attrs;
1188
1189 CHECK_CHARSET_GET_ATTR (charset, attrs);
1190 return CHARSET_ATTR_PLIST (attrs);
1191}
1192
1193
1194DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
1195 doc: /* Set CHARSET's property list to PLIST. */)
1196 (charset, plist)
1197 Lisp_Object charset, plist;
1198{
1199 Lisp_Object attrs;
1200
1201 CHECK_CHARSET_GET_ATTR (charset, attrs);
1202 CHARSET_ATTR_PLIST (attrs) = plist;
1203 return plist;
1204}
1205
1206
dbbb237d 1207DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 3, 0,
56a46d1d
DL
1208 doc: /* Unify characters of CHARSET with Unicode.
1209This means reading the relevant file and installing the table defined
dbbb237d
KH
1210by CHARSET's `:unify-map' property.
1211
64165ae2
DL
1212Optional second arg UNIFY-MAP is a file name string or a vector. It has
1213the same meaning as the `:unify-map' attribute in the function
dbbb237d
KH
1214`define-charset' (which see).
1215
1216Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1217 (charset, unify_map, deunify)
1218 Lisp_Object charset, unify_map, deunify;
8a73a704 1219{
3263d5a2
KH
1220 int id;
1221 struct charset *cs;
8f924df7 1222
3263d5a2
KH
1223 CHECK_CHARSET_GET_ID (charset, id);
1224 cs = CHARSET_FROM_ID (id);
1225 if (CHARSET_METHOD (cs) == CHARSET_METHOD_MAP_DEFERRED)
1226 load_charset (cs);
dbbb237d
KH
1227 if (NILP (deunify)
1228 ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
1229 : ! CHARSET_UNIFIED_P (cs))
3263d5a2 1230 return Qnil;
dbbb237d 1231
3263d5a2 1232 CHARSET_UNIFIED_P (cs) = 0;
dbbb237d
KH
1233 if (NILP (deunify))
1234 {
1235 if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET)
8f924df7 1236 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset)));
dbbb237d
KH
1237 if (NILP (unify_map))
1238 unify_map = CHARSET_UNIFY_MAP (cs);
1239 if (STRINGP (unify_map))
1240 load_charset_map_from_file (cs, unify_map, 2);
1241 else if (VECTORP (unify_map))
1242 load_charset_map_from_vector (cs, unify_map, 2);
1243 else if (NILP (unify_map))
1244 error ("No unify-map for charset");
1245 else
1246 error ("Bad unify-map arg");
1247 CHARSET_UNIFIED_P (cs) = 1;
1248 }
1249 else if (CHAR_TABLE_P (Vchar_unify_table))
1250 {
1251 int min_code = CHARSET_MIN_CODE (cs);
1252 int max_code = CHARSET_MAX_CODE (cs);
1253 int min_char = DECODE_CHAR (cs, min_code);
1254 int max_char = DECODE_CHAR (cs, max_code);
8f924df7 1255
dbbb237d
KH
1256 char_table_set_range (Vchar_unify_table, min_char, max_char, Qnil);
1257 }
8f924df7 1258
3263d5a2 1259 return Qnil;
8a73a704
KH
1260}
1261
3fac5a51
KH
1262DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
1263 Sget_unused_iso_final_char, 2, 2, 0,
3263d5a2 1264 doc: /*
327719ee 1265Return an unused ISO final char for a charset of DIMENISION and CHARS.
fdb82f93
PJ
1266DIMENSION is the number of bytes to represent a character: 1 or 2.
1267CHARS is the number of characters in a dimension: 94 or 96.
1268
1269This final char is for private use, thus the range is `0' (48) .. `?' (63).
1721b6af 1270If there's no unused final char for the specified kind of charset,
fdb82f93
PJ
1271return nil. */)
1272 (dimension, chars)
3fac5a51
KH
1273 Lisp_Object dimension, chars;
1274{
1275 int final_char;
1276
b7826503
PJ
1277 CHECK_NUMBER (dimension);
1278 CHECK_NUMBER (chars);
3263d5a2
KH
1279 if (XINT (dimension) != 1 && XINT (dimension) != 2 && XINT (dimension) != 3)
1280 args_out_of_range_3 (dimension, make_number (1), make_number (3));
3fac5a51 1281 if (XINT (chars) != 94 && XINT (chars) != 96)
3263d5a2 1282 args_out_of_range_3 (chars, make_number (94), make_number (96));
3fac5a51 1283 for (final_char = '0'; final_char <= '?'; final_char++)
3263d5a2
KH
1284 if (ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), final_char) < 0)
1285 break;
3fac5a51
KH
1286 return (final_char <= '?' ? make_number (final_char) : Qnil);
1287}
1288
3263d5a2
KH
1289static void
1290check_iso_charset_parameter (dimension, chars, final_char)
1291 Lisp_Object dimension, chars, final_char;
4ed46869 1292{
3263d5a2
KH
1293 CHECK_NATNUM (dimension);
1294 CHECK_NATNUM (chars);
1295 CHECK_NATNUM (final_char);
4ed46869 1296
3263d5a2
KH
1297 if (XINT (dimension) > 3)
1298 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension));
4ed46869
KH
1299 if (XINT (chars) != 94 && XINT (chars) != 96)
1300 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
3263d5a2 1301 if (XINT (final_char) < '0' || XINT (final_char) > '~')
4ed46869 1302 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
3263d5a2
KH
1303}
1304
1305
1306DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
1307 4, 4, 0,
cefd8c4f
KH
1308 doc: /* Declare an equivalent charset for ISO-2022 decoding.
1309
1310On decoding by an ISO-2022 base coding system, when a charset
1311specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1312if CHARSET is designated instead. */)
3263d5a2
KH
1313 (dimension, chars, final_char, charset)
1314 Lisp_Object dimension, chars, final_char, charset;
1315{
1316 int id;
82215ce9 1317 int chars_flag;
4ed46869 1318
3263d5a2
KH
1319 CHECK_CHARSET_GET_ID (charset, id);
1320 check_iso_charset_parameter (dimension, chars, final_char);
82215ce9
KH
1321 chars_flag = XINT (chars) == 96;
1322 ISO_CHARSET_TABLE (XINT (dimension), chars_flag, XINT (final_char)) = id;
4ed46869
KH
1323 return Qnil;
1324}
1325
3263d5a2 1326
2e344af3
KH
1327/* Return information about charsets in the text at PTR of NBYTES
1328 bytes, which are NCHARS characters. The value is:
f6302ac9 1329
cfe34140 1330 0: Each character is represented by one byte. This is always
3263d5a2
KH
1331 true for a unibyte string. For a multibyte string, true if
1332 it contains only ASCII characters.
1333
28c026cd
DL
1334 1: No charsets other than ascii, control-1, and latin-1 are
1335 found.
1d67c29b 1336
3263d5a2
KH
1337 2: Otherwise.
1338*/
4ed46869
KH
1339
1340int
3263d5a2
KH
1341string_xstring_p (string)
1342 Lisp_Object string;
4ed46869 1343{
8f924df7
KH
1344 const unsigned char *p = SDATA (string);
1345 const unsigned char *endp = p + SBYTES (string);
3263d5a2 1346
8f924df7 1347 if (SCHARS (string) == SBYTES (string))
3263d5a2
KH
1348 return 0;
1349
3263d5a2 1350 while (p < endp)
0282eb69 1351 {
3263d5a2 1352 int c = STRING_CHAR_ADVANCE (p);
2e344af3 1353
3cc67a4d 1354 if (c >= 0x100)
3263d5a2 1355 return 2;
0282eb69 1356 }
3263d5a2
KH
1357 return 1;
1358}
05505664 1359
05505664 1360
3263d5a2 1361/* Find charsets in the string at PTR of NCHARS and NBYTES.
4ed46869 1362
3cc67a4d
KH
1363 CHARSETS is a vector. If Nth element is non-nil, it means the
1364 charset whose id is N is already found.
2e344af3 1365
3263d5a2 1366 It may lookup a translation table TABLE if supplied. */
2e344af3 1367
3263d5a2 1368static void
3cc67a4d 1369find_charsets_in_text (ptr, nchars, nbytes, charsets, table, multibyte)
dbbb237d 1370 const unsigned char *ptr;
42ca828e 1371 EMACS_INT nchars, nbytes;
3263d5a2 1372 Lisp_Object charsets, table;
3cc67a4d 1373 int multibyte;
3263d5a2 1374{
dbbb237d 1375 const unsigned char *pend = ptr + nbytes;
3263d5a2
KH
1376
1377 if (nchars == nbytes)
3263d5a2 1378 {
3cc67a4d
KH
1379 if (multibyte)
1380 ASET (charsets, charset_ascii, Qt);
1381 else
1382 while (ptr < pend)
1383 {
1384 int c = *ptr++;
1385
1386 if (!NILP (table))
1387 c = translate_char (table, c);
1388 if (ASCII_BYTE_P (c))
1389 ASET (charsets, charset_ascii, Qt);
1390 else
1391 ASET (charsets, charset_eight_bit, Qt);
1392 }
1393 }
1394 else
1395 {
1396 while (ptr < pend)
3263d5a2 1397 {
3cc67a4d
KH
1398 int c = STRING_CHAR_ADVANCE (ptr);
1399 struct charset *charset;
3263d5a2 1400
3cc67a4d
KH
1401 if (!NILP (table))
1402 c = translate_char (table, c);
1403 charset = CHAR_CHARSET (c);
1404 ASET (charsets, CHARSET_ID (charset), Qt);
4ed46869 1405 }
4ed46869 1406 }
4ed46869
KH
1407}
1408
1409DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
23d2a7f1 1410 2, 3, 0,
fdb82f93
PJ
1411 doc: /* Return a list of charsets in the region between BEG and END.
1412BEG and END are buffer positions.
1413Optional arg TABLE if non-nil is a translation table to look up.
1414
fdb82f93
PJ
1415If the current buffer is unibyte, the returned list may contain
1416only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1417 (beg, end, table)
23d2a7f1 1418 Lisp_Object beg, end, table;
4ed46869 1419{
3263d5a2 1420 Lisp_Object charsets;
42ca828e
DL
1421 EMACS_INT from, from_byte, to, stop, stop_byte;
1422 int i;
4ed46869 1423 Lisp_Object val;
3cc67a4d 1424 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
4ed46869
KH
1425
1426 validate_region (&beg, &end);
1427 from = XFASTINT (beg);
1428 stop = to = XFASTINT (end);
6ae1f27e 1429
4ed46869 1430 if (from < GPT && GPT < to)
6ae1f27e
RS
1431 {
1432 stop = GPT;
1433 stop_byte = GPT_BYTE;
1434 }
1435 else
1436 stop_byte = CHAR_TO_BYTE (stop);
1437
1438 from_byte = CHAR_TO_BYTE (from);
1439
3263d5a2 1440 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
4ed46869
KH
1441 while (1)
1442 {
3263d5a2 1443 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
3cc67a4d
KH
1444 stop_byte - from_byte, charsets, table,
1445 multibyte);
4ed46869 1446 if (stop < to)
6ae1f27e
RS
1447 {
1448 from = stop, from_byte = stop_byte;
1449 stop = to, stop_byte = CHAR_TO_BYTE (stop);
1450 }
4ed46869
KH
1451 else
1452 break;
1453 }
6ae1f27e 1454
4ed46869 1455 val = Qnil;
3263d5a2 1456 for (i = charset_table_used - 1; i >= 0; i--)
3cc67a4d 1457 if (!NILP (AREF (charsets, i)))
3263d5a2 1458 val = Fcons (CHARSET_NAME (charset_table + i), val);
4ed46869
KH
1459 return val;
1460}
1461
1462DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
23d2a7f1 1463 1, 2, 0,
fdb82f93
PJ
1464 doc: /* Return a list of charsets in STR.
1465Optional arg TABLE if non-nil is a translation table to look up.
1466
fdb82f93 1467If STR is unibyte, the returned list may contain
3263d5a2 1468only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
fdb82f93 1469 (str, table)
23d2a7f1 1470 Lisp_Object str, table;
4ed46869 1471{
3263d5a2 1472 Lisp_Object charsets;
4ed46869
KH
1473 int i;
1474 Lisp_Object val;
1475
b7826503 1476 CHECK_STRING (str);
87b089ad 1477
3263d5a2 1478 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
8f924df7 1479 find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str),
3cc67a4d
KH
1480 charsets, table,
1481 STRING_MULTIBYTE (str));
4ed46869 1482 val = Qnil;
3263d5a2 1483 for (i = charset_table_used - 1; i >= 0; i--)
3cc67a4d 1484 if (!NILP (AREF (charsets, i)))
3263d5a2 1485 val = Fcons (CHARSET_NAME (charset_table + i), val);
4ed46869
KH
1486 return val;
1487}
2e344af3 1488
4ed46869 1489\f
3263d5a2
KH
1490
1491/* Return a character correponding to the code-point CODE of
1492 CHARSET. */
1493
1494int
1495decode_char (charset, code)
1496 struct charset *charset;
1497 unsigned code;
4ed46869 1498{
3263d5a2
KH
1499 int c, char_index;
1500 enum charset_method method = CHARSET_METHOD (charset);
ac4137cc 1501
3263d5a2
KH
1502 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1503 return -1;
4ed46869 1504
3263d5a2 1505 if (method == CHARSET_METHOD_MAP_DEFERRED)
ac4137cc 1506 {
3263d5a2
KH
1507 load_charset (charset);
1508 method = CHARSET_METHOD (charset);
ac4137cc 1509 }
4ed46869 1510
374c5cfd 1511 if (method == CHARSET_METHOD_SUBSET)
2e344af3 1512 {
374c5cfd
KH
1513 Lisp_Object subset_info;
1514
1515 subset_info = CHARSET_SUBSET (charset);
1516 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1517 code -= XINT (AREF (subset_info, 3));
1518 if (code < XFASTINT (AREF (subset_info, 1))
1519 || code > XFASTINT (AREF (subset_info, 2)))
1520 c = -1;
1521 else
1522 c = DECODE_CHAR (charset, code);
2e344af3 1523 }
374c5cfd 1524 else if (method == CHARSET_METHOD_SUPERSET)
2e344af3 1525 {
3263d5a2 1526 Lisp_Object parents;
4ed46869 1527
374c5cfd 1528 parents = CHARSET_SUPERSET (charset);
3263d5a2
KH
1529 c = -1;
1530 for (; CONSP (parents); parents = XCDR (parents))
1531 {
1532 int id = XINT (XCAR (XCAR (parents)));
1533 int code_offset = XINT (XCDR (XCAR (parents)));
374c5cfd 1534 unsigned this_code = code - code_offset;
4ed46869 1535
3263d5a2
KH
1536 charset = CHARSET_FROM_ID (id);
1537 if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1538 break;
1539 }
1540 }
1541 else
ac4137cc 1542 {
3263d5a2 1543 char_index = CODE_POINT_TO_INDEX (charset, code);
69f8de5b
KH
1544 if (char_index < 0)
1545 return -1;
4ed46869 1546
3263d5a2 1547 if (method == CHARSET_METHOD_MAP)
ac4137cc 1548 {
3263d5a2 1549 Lisp_Object decoder;
4ed46869 1550
3263d5a2
KH
1551 decoder = CHARSET_DECODER (charset);
1552 if (! VECTORP (decoder))
1553 return -1;
1554 c = XINT (AREF (decoder, char_index));
ac4137cc
KH
1555 }
1556 else
1557 {
3263d5a2 1558 c = char_index + CHARSET_CODE_OFFSET (charset);
ac4137cc
KH
1559 }
1560 }
4ed46869 1561
3263d5a2
KH
1562 if (CHARSET_UNIFIED_P (charset)
1563 && c >= 0)
c449997d
KH
1564 {
1565 MAYBE_UNIFY_CHAR (c);
1566 }
ac4137cc 1567
3263d5a2 1568 return c;
90d7b74e
KH
1569}
1570
374c5cfd
KH
1571/* Variable used temporarily by the macro ENCODE_CHAR. */
1572Lisp_Object charset_work;
4ed46869 1573
3263d5a2 1574/* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
28c026cd
DL
1575 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1576 use CHARSET's strict_max_char instead of max_char. */
4ed46869 1577
3263d5a2
KH
1578unsigned
1579encode_char (charset, c)
1580 struct charset *charset;
9b6a601f 1581 int c;
9d3d8cba 1582{
3263d5a2
KH
1583 unsigned code;
1584 enum charset_method method = CHARSET_METHOD (charset);
9d3d8cba 1585
3263d5a2 1586 if (CHARSET_UNIFIED_P (charset))
ac4137cc 1587 {
374c5cfd 1588 Lisp_Object deunifier, deunified;
4ed46869 1589
3263d5a2
KH
1590 deunifier = CHARSET_DEUNIFIER (charset);
1591 if (! CHAR_TABLE_P (deunifier))
ac4137cc 1592 {
dbbb237d 1593 Funify_charset (CHARSET_NAME (charset), Qnil, Qnil);
3263d5a2 1594 deunifier = CHARSET_DEUNIFIER (charset);
ac4137cc 1595 }
374c5cfd
KH
1596 deunified = CHAR_TABLE_REF (deunifier, c);
1597 if (! NILP (deunified))
1598 c = XINT (deunified);
ac4137cc 1599 }
9d3d8cba 1600
374c5cfd
KH
1601 if (method == CHARSET_METHOD_SUBSET)
1602 {
1603 Lisp_Object subset_info;
1604 struct charset *this_charset;
1605
1606 subset_info = CHARSET_SUBSET (charset);
1607 this_charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1608 code = ENCODE_CHAR (this_charset, c);
1609 if (code == CHARSET_INVALID_CODE (this_charset)
1610 || code < XFASTINT (AREF (subset_info, 1))
1611 || code > XFASTINT (AREF (subset_info, 2)))
1612 return CHARSET_INVALID_CODE (charset);
1613 code += XINT (AREF (subset_info, 3));
1614 return code;
1615 }
9d3d8cba 1616
374c5cfd 1617 if (method == CHARSET_METHOD_SUPERSET)
859f2b3c 1618 {
3263d5a2 1619 Lisp_Object parents;
d2665018 1620
374c5cfd 1621 parents = CHARSET_SUPERSET (charset);
3263d5a2 1622 for (; CONSP (parents); parents = XCDR (parents))
beeedaad 1623 {
3263d5a2
KH
1624 int id = XINT (XCAR (XCAR (parents)));
1625 int code_offset = XINT (XCDR (XCAR (parents)));
1626 struct charset *this_charset = CHARSET_FROM_ID (id);
d2665018 1627
3263d5a2 1628 code = ENCODE_CHAR (this_charset, c);
dbbb237d
KH
1629 if (code != CHARSET_INVALID_CODE (this_charset))
1630 return code + code_offset;
beeedaad 1631 }
3263d5a2
KH
1632 return CHARSET_INVALID_CODE (charset);
1633 }
1bcc1567 1634
15c85a88
KH
1635 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1636 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1637 return CHARSET_INVALID_CODE (charset);
1bcc1567 1638
3263d5a2 1639 if (method == CHARSET_METHOD_MAP_DEFERRED)
beeedaad 1640 {
3263d5a2
KH
1641 load_charset (charset);
1642 method = CHARSET_METHOD (charset);
859f2b3c 1643 }
9b6a601f 1644
3263d5a2 1645 if (method == CHARSET_METHOD_MAP)
3f62427c 1646 {
3263d5a2 1647 Lisp_Object encoder;
beeedaad 1648 Lisp_Object val;
9b6a601f 1649
3263d5a2
KH
1650 encoder = CHARSET_ENCODER (charset);
1651 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
1652 return CHARSET_INVALID_CODE (charset);
1653 val = CHAR_TABLE_REF (encoder, c);
374c5cfd
KH
1654 if (NILP (val))
1655 return CHARSET_INVALID_CODE (charset);
e9ce014c
KH
1656 code = XINT (val);
1657 if (! CHARSET_COMPACT_CODES_P (charset))
1658 code = INDEX_TO_CODE_POINT (charset, code);
3263d5a2 1659 }
820ee249 1660 else /* method == CHARSET_METHOD_OFFSET */
beeedaad 1661 {
3263d5a2
KH
1662 code = c - CHARSET_CODE_OFFSET (charset);
1663 code = INDEX_TO_CODE_POINT (charset, code);
3f62427c 1664 }
8ac5a9cc 1665
3263d5a2 1666 return code;
4ed46869
KH
1667}
1668
4ed46869 1669
3263d5a2
KH
1670DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 3, 0,
1671 doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
1672Return nil if CODE-POINT is not valid in CHARSET.
4ed46869 1673
3263d5a2 1674CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
859f2b3c 1675
3263d5a2
KH
1676Optional argument RESTRICTION specifies a way to map the pair of CCS
1677and CODE-POINT to a chracter. Currently not supported and just ignored. */)
1678 (charset, code_point, restriction)
1679 Lisp_Object charset, code_point, restriction;
4ed46869 1680{
3263d5a2
KH
1681 int c, id;
1682 unsigned code;
1683 struct charset *charsetp;
859f2b3c 1684
3263d5a2
KH
1685 CHECK_CHARSET_GET_ID (charset, id);
1686 if (CONSP (code_point))
1687 {
8f924df7
KH
1688 CHECK_NATNUM_CAR (code_point);
1689 CHECK_NATNUM_CDR (code_point);
69f8de5b 1690 code = (XINT (XCAR (code_point)) << 16) | (XINT (XCDR (code_point)));
3263d5a2 1691 }
4ed46869
KH
1692 else
1693 {
3263d5a2
KH
1694 CHECK_NATNUM (code_point);
1695 code = XINT (code_point);
4ed46869 1696 }
3263d5a2
KH
1697 charsetp = CHARSET_FROM_ID (id);
1698 c = DECODE_CHAR (charsetp, code);
1699 return (c >= 0 ? make_number (c) : Qnil);
4ed46869
KH
1700}
1701
859f2b3c 1702
3263d5a2
KH
1703DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0,
1704 doc: /* Encode the character CH into a code-point of CHARSET.
1705Return nil if CHARSET doesn't include CH.
17e7ef1b 1706
3263d5a2
KH
1707Optional argument RESTRICTION specifies a way to map CHAR to a
1708code-point in CCS. Currently not supported and just ignored. */)
1709 (ch, charset, restriction)
1710 Lisp_Object ch, charset, restriction;
4ed46869 1711{
16fed1fc 1712 int id;
3263d5a2
KH
1713 unsigned code;
1714 struct charset *charsetp;
046b1f03 1715
3263d5a2
KH
1716 CHECK_CHARSET_GET_ID (charset, id);
1717 CHECK_NATNUM (ch);
3263d5a2 1718 charsetp = CHARSET_FROM_ID (id);
16fed1fc 1719 code = ENCODE_CHAR (charsetp, XINT (ch));
3263d5a2
KH
1720 if (code == CHARSET_INVALID_CODE (charsetp))
1721 return Qnil;
1722 if (code > 0x7FFFFFF)
1723 return Fcons (make_number (code >> 16), make_number (code & 0xFFFF));
1724 return make_number (code);
beeedaad
KH
1725}
1726
beeedaad 1727
b121a744
KH
1728DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
1729 doc:
1730 /* Return a character of CHARSET whose position codes are CODEn.
1731
1732CODE1 through CODE4 are optional, but if you don't supply sufficient
1733position codes, it is assumed that the minimum code in each dimension
04c2f2c5 1734is specified. */)
b121a744
KH
1735 (charset, code1, code2, code3, code4)
1736 Lisp_Object charset, code1, code2, code3, code4;
beeedaad 1737{
3263d5a2
KH
1738 int id, dimension;
1739 struct charset *charsetp;
b121a744
KH
1740 unsigned code;
1741 int c;
87b089ad 1742
3263d5a2
KH
1743 CHECK_CHARSET_GET_ID (charset, id);
1744 charsetp = CHARSET_FROM_ID (id);
4ed46869 1745
b121a744
KH
1746 dimension = CHARSET_DIMENSION (charsetp);
1747 if (NILP (code1))
d47073ca
KH
1748 code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
1749 ? 0 : CHARSET_MIN_CODE (charsetp));
3263d5a2 1750 else
859f2b3c 1751 {
b121a744
KH
1752 CHECK_NATNUM (code1);
1753 if (XFASTINT (code1) >= 0x100)
1754 args_out_of_range (make_number (0xFF), code1);
1755 code = XFASTINT (code1);
859f2b3c 1756
b0a1e45e 1757 if (dimension > 1)
beeedaad 1758 {
b121a744 1759 code <<= 8;
b0a1e45e
KH
1760 if (NILP (code2))
1761 code |= charsetp->code_space[(dimension - 2) * 4];
beeedaad 1762 else
b121a744 1763 {
b0a1e45e
KH
1764 CHECK_NATNUM (code2);
1765 if (XFASTINT (code2) >= 0x100)
1766 args_out_of_range (make_number (0xFF), code2);
1767 code |= XFASTINT (code2);
b121a744 1768 }
99529c2c 1769
b0a1e45e 1770 if (dimension > 2)
b121a744
KH
1771 {
1772 code <<= 8;
b0a1e45e
KH
1773 if (NILP (code3))
1774 code |= charsetp->code_space[(dimension - 3) * 4];
b121a744
KH
1775 else
1776 {
b0a1e45e
KH
1777 CHECK_NATNUM (code3);
1778 if (XFASTINT (code3) >= 0x100)
1779 args_out_of_range (make_number (0xFF), code3);
1780 code |= XFASTINT (code3);
1781 }
1782
1783 if (dimension > 3)
1784 {
1785 code <<= 8;
1786 if (NILP (code4))
1787 code |= charsetp->code_space[0];
1788 else
1789 {
1790 CHECK_NATNUM (code4);
1791 if (XFASTINT (code4) >= 0x100)
1792 args_out_of_range (make_number (0xFF), code4);
1793 code |= XFASTINT (code4);
1794 }
b121a744
KH
1795 }
1796 }
beeedaad 1797 }
859f2b3c 1798 }
beeedaad 1799
b121a744
KH
1800 if (CHARSET_ISO_FINAL (charsetp) >= 0)
1801 code &= 0x7F7F7F7F;
1802 c = DECODE_CHAR (charsetp, code);
1803 if (c < 0)
1804 error ("Invalid code(s)");
3263d5a2 1805 return make_number (c);
4ed46869
KH
1806}
1807
beeedaad 1808
3263d5a2
KH
1809/* Return the first charset in CHARSET_LIST that contains C.
1810 CHARSET_LIST is a list of charset IDs. If it is nil, use
1811 Vcharset_ordered_list. */
beeedaad 1812
3263d5a2
KH
1813struct charset *
1814char_charset (c, charset_list, code_return)
1815 int c;
1816 Lisp_Object charset_list;
1817 unsigned *code_return;
2e344af3 1818{
3263d5a2
KH
1819 if (NILP (charset_list))
1820 charset_list = Vcharset_ordered_list;
beeedaad 1821
3263d5a2 1822 while (CONSP (charset_list))
2e344af3 1823 {
3263d5a2
KH
1824 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
1825 unsigned code = ENCODE_CHAR (charset, c);
beeedaad 1826
3263d5a2 1827 if (code != CHARSET_INVALID_CODE (charset))
beeedaad 1828 {
3263d5a2
KH
1829 if (code_return)
1830 *code_return = code;
1831 return charset;
3f62427c 1832 }
3263d5a2 1833 charset_list = XCDR (charset_list);
3f62427c 1834 }
3263d5a2 1835 return NULL;
3f62427c
KH
1836}
1837
2e344af3 1838
3263d5a2 1839DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
3cc67a4d
KH
1840 doc:
1841 /*Return list of charset and one to four position-codes of CHAR.
1842The charset is decided by the current priority order of charsets.
1843A position-code is a byte value of each dimension of the code-point of
1844CHAR in the charset. */)
3263d5a2
KH
1845 (ch)
1846 Lisp_Object ch;
4ed46869 1847{
3263d5a2
KH
1848 struct charset *charset;
1849 int c, dimension;
1850 unsigned code;
4ed46869
KH
1851 Lisp_Object val;
1852
3263d5a2
KH
1853 CHECK_CHARACTER (ch);
1854 c = XFASTINT (ch);
1855 charset = CHAR_CHARSET (c);
1856 if (! charset)
3cc67a4d 1857 abort ();
3263d5a2
KH
1858 code = ENCODE_CHAR (charset, c);
1859 if (code == CHARSET_INVALID_CODE (charset))
1860 abort ();
1861 dimension = CHARSET_DIMENSION (charset);
3cc67a4d
KH
1862 for (val = Qnil; dimension > 0; dimension--)
1863 {
1864 val = Fcons (make_number (code & 0xFF), val);
1865 code >>= 8;
1866 }
3263d5a2 1867 return Fcons (CHARSET_NAME (charset), val);
4ed46869
KH
1868}
1869
740f080d 1870
3263d5a2 1871DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
327719ee 1872 doc: /* Return the charset of highest priority that contains CH. */)
fdb82f93 1873 (ch)
4ed46869
KH
1874 Lisp_Object ch;
1875{
3263d5a2 1876 struct charset *charset;
4ed46869 1877
3263d5a2
KH
1878 CHECK_CHARACTER (ch);
1879 charset = CHAR_CHARSET (XINT (ch));
1880 return (CHARSET_NAME (charset));
4ed46869
KH
1881}
1882
17e7ef1b 1883
3263d5a2
KH
1884DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
1885 doc: /*
1886Return charset of a character in the current buffer at position POS.
1887If POS is nil, it defauls to the current point.
1888If POS is out of range, the value is nil. */)
1889 (pos)
1890 Lisp_Object pos;
2e344af3 1891{
3263d5a2
KH
1892 Lisp_Object ch;
1893 struct charset *charset;
046b1f03 1894
3263d5a2
KH
1895 ch = Fchar_after (pos);
1896 if (! INTEGERP (ch))
1897 return ch;
1898 charset = CHAR_CHARSET (XINT (ch));
1899 return (CHARSET_NAME (charset));
6ae1f27e 1900}
9036eb45 1901
87b089ad 1902
3263d5a2
KH
1903DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
1904 doc: /*
1905Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
1906
1907ISO 2022's designation sequence (escape sequence) distinguishes charsets
1908by their DIMENSION, CHARS, and FINAL-CHAR,
1909where as Emacs distinguishes them by charset symbol.
1910See the documentation of the function `charset-info' for the meanings of
1911DIMENSION, CHARS, and FINAL-CHAR. */)
1912 (dimension, chars, final_char)
1913 Lisp_Object dimension, chars, final_char;
6ae1f27e 1914{
3263d5a2 1915 int id;
82215ce9 1916 int chars_flag;
a8a35e61 1917
3263d5a2 1918 check_iso_charset_parameter (dimension, chars, final_char);
82215ce9
KH
1919 chars_flag = XFASTINT (chars) == 96;
1920 id = ISO_CHARSET_TABLE (XFASTINT (dimension), chars_flag,
3263d5a2
KH
1921 XFASTINT (final_char));
1922 return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
046b1f03
RS
1923}
1924
87b089ad 1925
3263d5a2
KH
1926DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
1927 0, 0, 0,
1928 doc: /*
1929Clear encoder and decoder of charsets that are loaded from mapfiles. */)
1930 ()
87b089ad 1931{
53316e55 1932 int i;
3263d5a2
KH
1933 struct charset *charset;
1934 Lisp_Object attrs;
87b089ad 1935
3263d5a2 1936 for (i = 0; i < charset_table_used; i++)
87b089ad 1937 {
3263d5a2
KH
1938 charset = CHARSET_FROM_ID (i);
1939 attrs = CHARSET_ATTRIBUTES (charset);
2e344af3 1940
3263d5a2
KH
1941 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
1942 {
1943 CHARSET_ATTR_DECODER (attrs) = Qnil;
1944 CHARSET_ATTR_ENCODER (attrs) = Qnil;
1945 CHARSET_METHOD (charset) = CHARSET_METHOD_MAP_DEFERRED;
1946 }
2e344af3 1947
3263d5a2
KH
1948 if (CHARSET_UNIFIED_P (charset))
1949 CHARSET_ATTR_DEUNIFIER (attrs) = Qnil;
2e344af3 1950 }
2e344af3 1951
3263d5a2 1952 if (CHAR_TABLE_P (Vchar_unified_charset_table))
2e344af3 1953 {
3263d5a2
KH
1954 Foptimize_char_table (Vchar_unified_charset_table);
1955 Vchar_unify_table = Vchar_unified_charset_table;
1956 Vchar_unified_charset_table = Qnil;
87b089ad 1957 }
740f080d 1958
3263d5a2 1959 return Qnil;
740f080d
KH
1960}
1961
8ddf5e57
DL
1962DEFUN ("charset-priority-list", Fcharset_priority_list,
1963 Scharset_priority_list, 0, 1, 0,
1964 doc: /* Return the list of charsets ordered by priority.
1965HIGHESTP non-nil means just return the highest priority one. */)
1966 (highestp)
1967 Lisp_Object highestp;
2e344af3 1968{
8ddf5e57 1969 Lisp_Object val = Qnil, list = Vcharset_ordered_list;
2e344af3 1970
8ddf5e57 1971 if (!NILP (highestp))
16fed1fc 1972 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list))));
2e344af3 1973
8ddf5e57 1974 while (!NILP (list))
2e344af3 1975 {
16fed1fc 1976 val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list)))), val);
8ddf5e57 1977 list = XCDR (list);
2e344af3 1978 }
8ddf5e57 1979 return Fnreverse (val);
2e344af3
KH
1980}
1981
8ddf5e57
DL
1982DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
1983 1, MANY, 0,
1984 doc: /* Assign higher priority to the charsets given as arguments.
1985usage: (set-charset-priority &rest charsets) */)
1986 (nargs, args)
1987 int nargs;
4ed46869
KH
1988 Lisp_Object *args;
1989{
af7c60ca 1990 Lisp_Object new_head, old_list, arglist[2];
321c819c 1991 Lisp_Object list_2022, list_emacs_mule;
16fed1fc 1992 int i, id;
4ed46869 1993
8ddf5e57 1994 old_list = Fcopy_sequence (Vcharset_ordered_list);
af7c60ca 1995 new_head = Qnil;
8ddf5e57 1996 for (i = 0; i < nargs; i++)
4ed46869 1997 {
8ddf5e57 1998 CHECK_CHARSET_GET_ID (args[i], id);
af7c60ca
KH
1999 if (! NILP (Fmemq (make_number (id), old_list)))
2000 {
2001 old_list = Fdelq (make_number (id), old_list);
2002 new_head = Fcons (make_number (id), new_head);
2003 }
5729c92f 2004 }
8ddf5e57
DL
2005 arglist[0] = Fnreverse (new_head);
2006 arglist[1] = old_list;
2007 Vcharset_ordered_list = Fnconc (2, arglist);
dbbb237d 2008 charset_ordered_list_tick++;
5729c92f 2009
321c819c 2010 for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil;
d017b41e 2011 CONSP (old_list); old_list = XCDR (old_list))
5729c92f 2012 {
e77415b0 2013 if (! NILP (Fmemq (XCAR (old_list), Viso_2022_charset_list)))
321c819c
KH
2014 list_2022 = Fcons (XCAR (old_list), list_2022);
2015 if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list)))
2016 list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule);
4ed46869 2017 }
321c819c
KH
2018 Viso_2022_charset_list = Fnreverse (list_2022);
2019 Vemacs_mule_charset_list = Fnreverse (list_emacs_mule);
4ed46869 2020
8ddf5e57 2021 return Qnil;
4ed46869
KH
2022}
2023
d5b33309
KH
2024DEFUN ("charset-id-internal", Fcharset_id_internal, Scharset_id_internal,
2025 0, 1, 0,
2026 doc: /* Internal use only.
2027Return charset identification number of CHARSET. */)
2028 (charset)
2029 Lisp_Object charset;
4ed46869 2030{
d5b33309 2031 int id;
4ed46869 2032
d5b33309
KH
2033 CHECK_CHARSET_GET_ID (charset, id);
2034 return make_number (id);
4ed46869
KH
2035}
2036
4ed46869 2037\f
3263d5a2
KH
2038void
2039init_charset ()
4ed46869 2040{
4beef065
KH
2041 Vcharset_map_path
2042 = Fcons (Fexpand_file_name (build_string ("charsets"), Vdata_directory),
2043 Qnil);
4ed46869
KH
2044}
2045
4ed46869 2046
dfcf069d 2047void
4ed46869
KH
2048init_charset_once ()
2049{
2050 int i, j, k;
2051
3263d5a2
KH
2052 for (i = 0; i < ISO_MAX_DIMENSION; i++)
2053 for (j = 0; j < ISO_MAX_CHARS; j++)
2054 for (k = 0; k < ISO_MAX_FINAL; k++)
2055 iso_charset_table[i][j][k] = -1;
4ed46869 2056
60383934 2057 for (i = 0; i < 256; i++)
3263d5a2 2058 emacs_mule_charset[i] = NULL;
4ed46869 2059
7c7dceee
KH
2060 charset_jisx0201_roman = -1;
2061 charset_jisx0208_1978 = -1;
2062 charset_jisx0208 = -1;
4ed46869
KH
2063
2064 for (i = 0; i < 128; i++)
14e3d523 2065 unibyte_to_multibyte_table[i] = i;
4ed46869 2066 for (; i < 256; i++)
170e4589 2067 unibyte_to_multibyte_table[i] = BYTE8_TO_CHAR (i);
4ed46869
KH
2068}
2069
2070#ifdef emacs
2071
dfcf069d 2072void
4ed46869
KH
2073syms_of_charset ()
2074{
3263d5a2
KH
2075 DEFSYM (Qcharsetp, "charsetp");
2076
2077 DEFSYM (Qascii, "ascii");
2078 DEFSYM (Qunicode, "unicode");
2fe1edd1 2079 DEFSYM (Qeight_bit, "eight-bit");
3263d5a2
KH
2080 DEFSYM (Qiso_8859_1, "iso-8859-1");
2081
2082 DEFSYM (Qgl, "gl");
2083 DEFSYM (Qgr, "gr");
2084
3263d5a2
KH
2085 staticpro (&Vcharset_ordered_list);
2086 Vcharset_ordered_list = Qnil;
2087
2088 staticpro (&Viso_2022_charset_list);
2089 Viso_2022_charset_list = Qnil;
2090
2091 staticpro (&Vemacs_mule_charset_list);
2092 Vemacs_mule_charset_list = Qnil;
2093
2094 staticpro (&Vcharset_hash_table);
8f924df7
KH
2095 {
2096 Lisp_Object args[2];
2097 args[0] = QCtest;
2098 args[1] = Qeq;
2099 Vcharset_hash_table = Fmake_hash_table (2, args);
2100 }
3263d5a2
KH
2101
2102 charset_table_size = 128;
2103 charset_table = ((struct charset *)
2104 xmalloc (sizeof (struct charset) * charset_table_size));
2105 charset_table_used = 0;
2106
2107 staticpro (&Vchar_unified_charset_table);
2108 Vchar_unified_charset_table = Fmake_char_table (Qnil, make_number (-1));
2109
2110 defsubr (&Scharsetp);
2111 defsubr (&Smap_charset_chars);
2112 defsubr (&Sdefine_charset_internal);
2113 defsubr (&Sdefine_charset_alias);
d1a04588
KH
2114 defsubr (&Sunibyte_charset);
2115 defsubr (&Sset_unibyte_charset);
3263d5a2
KH
2116 defsubr (&Scharset_plist);
2117 defsubr (&Sset_charset_plist);
2118 defsubr (&Sunify_charset);
3fac5a51 2119 defsubr (&Sget_unused_iso_final_char);
4ed46869
KH
2120 defsubr (&Sdeclare_equiv_charset);
2121 defsubr (&Sfind_charset_region);
2122 defsubr (&Sfind_charset_string);
3263d5a2
KH
2123 defsubr (&Sdecode_char);
2124 defsubr (&Sencode_char);
4ed46869 2125 defsubr (&Ssplit_char);
3263d5a2 2126 defsubr (&Smake_char);
4ed46869 2127 defsubr (&Schar_charset);
90d7b74e 2128 defsubr (&Scharset_after);
4ed46869 2129 defsubr (&Siso_charset);
3263d5a2 2130 defsubr (&Sclear_charset_maps);
8ddf5e57
DL
2131 defsubr (&Scharset_priority_list);
2132 defsubr (&Sset_charset_priority);
d5b33309 2133 defsubr (&Scharset_id_internal);
3263d5a2 2134
4beef065
KH
2135 DEFVAR_LISP ("charset-map-path", &Vcharset_map_path,
2136 doc: /* *Lisp of directories to search for charset map files. */);
2137 Vcharset_map_path = Qnil;
4ed46869
KH
2138
2139 DEFVAR_LISP ("charset-list", &Vcharset_list,
528623a0 2140 doc: /* List of all charsets ever defined. */);
3263d5a2
KH
2141 Vcharset_list = Qnil;
2142
2fe1edd1
KH
2143 charset_ascii
2144 = define_charset_internal (Qascii, 1, "\x00\x7F\x00\x00\x00\x00",
2145 0, 127, 'B', -1, 0, 1, 0, 0);
14e3d523
KH
2146 charset_iso_8859_1
2147 = define_charset_internal (Qiso_8859_1, 1, "\x00\xFF\x00\x00\x00\x00",
2148 0, 255, -1, -1, -1, 1, 0, 0);
2fe1edd1 2149 charset_unicode
73fbf2d9 2150 = define_charset_internal (Qunicode, 3, "\x00\xFF\x00\xFF\x00\x10",
2fe1edd1
KH
2151 0, MAX_UNICODE_CHAR, -1, 0, -1, 1, 0, 0);
2152 charset_eight_bit
2153 = define_charset_internal (Qeight_bit, 1, "\x80\xFF\x00\x00\x00\x00",
2154 128, 255, -1, 0, -1, 0, 0,
2155 MAX_5_BYTE_CHAR + 1);
4ed46869
KH
2156}
2157
2158#endif /* emacs */
cefd8c4f
KH
2159
2160/* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
2161 (do not change this comment) */