New file that implements char table.
[bpt/emacs.git] / src / category.c
CommitLineData
4ed46869 1/* GNU Emacs routines to deal with category tables.
75c8c592
RS
2 Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN.
3 Licensed to the Free Software Foundation.
4ed46869
KH
4
5This file is part of GNU Emacs.
6
7GNU Emacs is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GNU Emacs is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU Emacs; see the file COPYING. If not, write to
369314dc
KH
19the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20Boston, MA 02111-1307, USA. */
4ed46869
KH
21
22
23/* Here we handle three objects: category, category set, and category
24 table. Read comments in the file category.h to understand them. */
25
26#include <config.h>
27#include <ctype.h>
28#include "lisp.h"
29#include "buffer.h"
30#include "charset.h"
31#include "category.h"
e35f6ff7 32#include "keymap.h"
4ed46869
KH
33
34/* The version number of the latest category table. Each category
35 table has a unique version number. It is assigned a new number
36 also when it is modified. When a regular expression is compiled
37 into the struct re_pattern_buffer, the version number of the
38 category table (of the current buffer) at that moment is also
39 embedded in the structure.
40
41 For the moment, we are not using this feature. */
42static int category_table_version;
43
44Lisp_Object Qcategory_table, Qcategoryp, Qcategorysetp, Qcategory_table_p;
45
46/* Variables to determine word boundary. */
47Lisp_Object Vword_combining_categories, Vword_separating_categories;
48
49/* Temporary internal variable used in macro CHAR_HAS_CATEGORY. */
50Lisp_Object _temp_category_set;
51
52\f
53/* Category set staff. */
54
55DEFUN ("make-category-set", Fmake_category_set, Smake_category_set, 1, 1, 0,
fdb82f93
PJ
56 doc: /* Return a newly created category-set which contains CATEGORIES.
57CATEGORIES is a string of category mnemonics.
58The value is a bool-vector which has t at the indices corresponding to
59those categories. */)
60 (categories)
4ed46869
KH
61 Lisp_Object categories;
62{
63 Lisp_Object val;
64 int len;
65
b7826503 66 CHECK_STRING (categories);
4ed46869
KH
67 val = MAKE_CATEGORY_SET;
68
115afec3
RS
69 if (STRING_MULTIBYTE (categories))
70 error ("Multibyte string in make-category-set");
71
4ed46869
KH
72 len = XSTRING (categories)->size;
73 while (--len >= 0)
74 {
15c60737 75 Lisp_Object category;
4ed46869 76
15c60737 77 XSETFASTINT (category, XSTRING (categories)->data[len]);
b7826503 78 CHECK_CATEGORY (category);
4ed46869
KH
79 SET_CATEGORY_SET (val, category, Qt);
80 }
81 return val;
82}
83
84\f
85/* Category staff. */
86
87Lisp_Object check_category_table ();
88
89DEFUN ("define-category", Fdefine_category, Sdefine_category, 2, 3, 0,
fdb82f93
PJ
90 doc: /* Define CHAR as a category which is described by DOCSTRING.
91CHAR should be an ASCII printing character in the range ` ' to `~'.
92DOCSTRING is a documentation string of the category.
93The category is defined only in category table TABLE, which defaults to
94 the current buffer's category table. */)
95 (category, docstring, table)
4ed46869
KH
96 Lisp_Object category, docstring, table;
97{
b7826503
PJ
98 CHECK_CATEGORY (category);
99 CHECK_STRING (docstring);
4ed46869
KH
100 table = check_category_table (table);
101
102 if (!NILP (CATEGORY_DOCSTRING (table, XFASTINT (category))))
103 error ("Category `%c' is already defined", XFASTINT (category));
104 CATEGORY_DOCSTRING (table, XFASTINT (category)) = docstring;
105
106 return Qnil;
107}
108
109DEFUN ("category-docstring", Fcategory_docstring, Scategory_docstring, 1, 2, 0,
fdb82f93
PJ
110 doc: /* Return the documentation string of CATEGORY, as defined in CATEGORY-TABLE. */)
111 (category, table)
4ed46869
KH
112 Lisp_Object category, table;
113{
b7826503 114 CHECK_CATEGORY (category);
4ed46869
KH
115 table = check_category_table (table);
116
117 return CATEGORY_DOCSTRING (table, XFASTINT (category));
118}
119
120DEFUN ("get-unused-category", Fget_unused_category, Sget_unused_category,
121 0, 1, 0,
0b6694a5
RS
122 doc: /* Return a category which is not yet defined in CATEGORY-TABLE.
123If no category remains available, return nil.
124The optional argument CATEGORY-TABLE
125specifies which category table to modify;
126it defaults to the current buffer's category table. */)
fdb82f93 127 (table)
4ed46869
KH
128 Lisp_Object table;
129{
130 int i;
4ed46869
KH
131
132 table = check_category_table (table);
133
134 for (i = ' '; i <= '~'; i++)
135 if (NILP (CATEGORY_DOCSTRING (table, i)))
136 return make_number (i);
137
138 return Qnil;
139}
140
141\f
142/* Category-table staff. */
143
144DEFUN ("category-table-p", Fcategory_table_p, Scategory_table_p, 1, 1, 0,
fdb82f93
PJ
145 doc: /* Return t if ARG is a category table. */)
146 (arg)
4ed46869
KH
147 Lisp_Object arg;
148{
149 if (CHAR_TABLE_P (arg)
ed8ec86d 150 && EQ (XCHAR_TABLE (arg)->purpose, Qcategory_table))
4ed46869
KH
151 return Qt;
152 return Qnil;
153}
154
155/* If TABLE is nil, return the current category table. If TABLE is
156 not nil, check the validity of TABLE as a category table. If
157 valid, return TABLE itself, but if not valid, signal an error of
158 wrong-type-argument. */
159
160Lisp_Object
161check_category_table (table)
162 Lisp_Object table;
163{
164 register Lisp_Object tem;
165 if (NILP (table))
166 return current_buffer->category_table;
167 while (tem = Fcategory_table_p (table), NILP (tem))
168 table = wrong_type_argument (Qcategory_table_p, table);
169 return table;
170}
171
172DEFUN ("category-table", Fcategory_table, Scategory_table, 0, 0, 0,
fdb82f93
PJ
173 doc: /* Return the current category table.
174This is the one specified by the current buffer. */)
175 ()
4ed46869
KH
176{
177 return current_buffer->category_table;
178}
179
180DEFUN ("standard-category-table", Fstandard_category_table,
181 Sstandard_category_table, 0, 0, 0,
fdb82f93
PJ
182 doc: /* Return the standard category table.
183This is the one used for new buffers. */)
184 ()
4ed46869
KH
185{
186 return Vstandard_category_table;
187}
188
189/* Return a copy of category table TABLE. We can't simply use the
190 function copy-sequence because no contents should be shared between
ed8ec86d 191 the original and the copy. This function is called recursively by
9da95d53 192 binding TABLE to a sub char table. */
4ed46869
KH
193
194Lisp_Object
ed8ec86d 195copy_category_table (table)
4ed46869
KH
196 Lisp_Object table;
197{
ed8ec86d
KH
198 Lisp_Object tmp;
199 int i, to;
4ed46869 200
ed8ec86d
KH
201 if (!NILP (XCHAR_TABLE (table)->top))
202 {
203 /* TABLE is a top level char table.
204 At first, make a copy of tree structure of the table. */
205 table = Fcopy_sequence (table);
206
207 /* Then, copy elements for single byte characters one by one. */
208 for (i = 0; i < CHAR_TABLE_SINGLE_BYTE_SLOTS; i++)
209 if (!NILP (tmp = XCHAR_TABLE (table)->contents[i]))
210 XCHAR_TABLE (table)->contents[i] = Fcopy_sequence (tmp);
211 to = CHAR_TABLE_ORDINARY_SLOTS;
9da95d53
KH
212
213 /* Also copy the first (and sole) extra slot. It is a vector
214 containing docstring of each category. */
215 Fset_char_table_extra_slot
216 (table, make_number (0),
217 Fcopy_sequence (Fchar_table_extra_slot (table, make_number (0))));
ed8ec86d
KH
218 }
219 else
4ed46869 220 {
ed8ec86d
KH
221 i = 32;
222 to = SUB_CHAR_TABLE_ORDINARY_SLOTS;
4ed46869
KH
223 }
224
ed8ec86d
KH
225 /* If the table has non-nil default value, copy it. */
226 if (!NILP (tmp = XCHAR_TABLE (table)->defalt))
227 XCHAR_TABLE (table)->defalt = Fcopy_sequence (tmp);
228
229 /* At last, copy the remaining elements while paying attention to a
230 sub char table. */
231 for (; i < to; i++)
232 if (!NILP (tmp = XCHAR_TABLE (table)->contents[i]))
233 XCHAR_TABLE (table)->contents[i]
234 = (SUB_CHAR_TABLE_P (tmp)
235 ? copy_category_table (tmp) : Fcopy_sequence (tmp));
236
4ed46869
KH
237 return table;
238}
239
240DEFUN ("copy-category-table", Fcopy_category_table, Scopy_category_table,
241 0, 1, 0,
fdb82f93
PJ
242 doc: /* Construct a new category table and return it.
243It is a copy of the TABLE, which defaults to the standard category table. */)
244 (table)
4ed46869
KH
245 Lisp_Object table;
246{
247 if (!NILP (table))
248 check_category_table (table);
249 else
250 table = Vstandard_category_table;
251
9da95d53 252 return copy_category_table (table);
4ed46869
KH
253}
254
70414a3d
KH
255DEFUN ("make-category-table", Fmake_category_table, Smake_category_table,
256 0, 0, 0,
fdb82f93
PJ
257 doc: /* Construct a new and empty category table and return it. */)
258 ()
70414a3d
KH
259{
260 Lisp_Object val;
261
262 val = Fmake_char_table (Qcategory_table, Qnil);
263 XCHAR_TABLE (val)->defalt = MAKE_CATEGORY_SET;
264 Fset_char_table_extra_slot (val, make_number (0),
265 Fmake_vector (make_number (95), Qnil));
266 return val;
267}
268
4ed46869 269DEFUN ("set-category-table", Fset_category_table, Sset_category_table, 1, 1, 0,
fdb82f93
PJ
270 doc: /* Specify TABLE as the category table for the current buffer. */)
271 (table)
4ed46869
KH
272 Lisp_Object table;
273{
6f598a6d 274 int idx;
4ed46869
KH
275 table = check_category_table (table);
276 current_buffer->category_table = table;
277 /* Indicate that this buffer now has a specified category table. */
f6cd0527
GM
278 idx = PER_BUFFER_VAR_IDX (category_table);
279 SET_PER_BUFFER_VALUE_P (current_buffer, idx, 1);
4ed46869
KH
280 return table;
281}
282
283\f
284DEFUN ("char-category-set", Fchar_category_set, Schar_category_set, 1, 1, 0,
fdb82f93
PJ
285 doc: /* Return the category set of CHAR. */)
286 (ch)
4ed46869
KH
287 Lisp_Object ch;
288{
b7826503 289 CHECK_NUMBER (ch);
4ed46869
KH
290 return CATEGORY_SET (XFASTINT (ch));
291}
292
293DEFUN ("category-set-mnemonics", Fcategory_set_mnemonics,
294 Scategory_set_mnemonics, 1, 1, 0,
fdb82f93
PJ
295 doc: /* Return a string containing mnemonics of the categories in CATEGORY-SET.
296CATEGORY-SET is a bool-vector, and the categories \"in\" it are those
297that are indexes where t occurs the bool-vector.
298The return value is a string containing those same categories. */)
299 (category_set)
4ed46869
KH
300 Lisp_Object category_set;
301{
302 int i, j;
303 char str[96];
304
b7826503 305 CHECK_CATEGORY_SET (category_set);
4ed46869
KH
306
307 j = 0;
308 for (i = 32; i < 127; i++)
309 if (CATEGORY_MEMBER (i, category_set))
310 str[j++] = i;
311 str[j] = '\0';
312
313 return build_string (str);
314}
315
ed8ec86d 316/* Modify all category sets stored under sub char-table TABLE so that
4ed46869
KH
317 they contain (SET_VALUE is t) or don't contain (SET_VALUE is nil)
318 CATEGORY. */
319
320void
321modify_lower_category_set (table, category, set_value)
322 Lisp_Object table, category, set_value;
323{
324 Lisp_Object val;
325 int i;
326
54e67cf7
KH
327 val = XCHAR_TABLE (table)->defalt;
328 if (!CATEGORY_SET_P (val))
329 val = MAKE_CATEGORY_SET;
330 SET_CATEGORY_SET (val, category, set_value);
331 XCHAR_TABLE (table)->defalt = val;
4ed46869 332
ed8ec86d 333 for (i = 32; i < SUB_CHAR_TABLE_ORDINARY_SLOTS; i++)
4ed46869
KH
334 {
335 val = XCHAR_TABLE (table)->contents[i];
336
337 if (CATEGORY_SET_P (val))
338 SET_CATEGORY_SET (val, category, set_value);
ed8ec86d 339 else if (SUB_CHAR_TABLE_P (val))
4ed46869
KH
340 modify_lower_category_set (val, category, set_value);
341 }
342}
343
344void
345set_category_set (category_set, category, val)
346 Lisp_Object category_set, category, val;
347{
348 do {
349 int idx = XINT (category) / 8;
350 unsigned char bits = 1 << (XINT (category) % 8);
351
352 if (NILP (val))
353 XCATEGORY_SET (category_set)->data[idx] &= ~bits;
354 else
355 XCATEGORY_SET (category_set)->data[idx] |= bits;
356 } while (0);
357}
358
359DEFUN ("modify-category-entry", Fmodify_category_entry,
360 Smodify_category_entry, 2, 4, 0,
fdb82f93
PJ
361 doc: /* Modify the category set of CHARACTER by adding CATEGORY to it.
362The category is changed only for table TABLE, which defaults to
363 the current buffer's category table.
364If optional fourth argument RESET is non-nil,
365then delete CATEGORY from the category set instead of adding it. */)
366 (character, category, table, reset)
ea4943bf 367 Lisp_Object character, category, table, reset;
4ed46869
KH
368{
369 int c, charset, c1, c2;
370 Lisp_Object set_value; /* Actual value to be set in category sets. */
371 Lisp_Object val, category_set;
372
b7826503 373 CHECK_NUMBER (character);
ea4943bf 374 c = XINT (character);
b7826503 375 CHECK_CATEGORY (category);
4ed46869
KH
376 table = check_category_table (table);
377
378 if (NILP (CATEGORY_DOCSTRING (table, XFASTINT (category))))
379 error ("Undefined category: %c", XFASTINT (category));
380
381 set_value = NILP (reset) ? Qt : Qnil;
382
ed8ec86d 383 if (c < CHAR_TABLE_SINGLE_BYTE_SLOTS)
4ed46869
KH
384 {
385 val = XCHAR_TABLE (table)->contents[c];
386 if (!CATEGORY_SET_P (val))
387 XCHAR_TABLE (table)->contents[c] = (val = MAKE_CATEGORY_SET);
388 SET_CATEGORY_SET (val, category, set_value);
389 return Qnil;
390 }
391
75ec644a 392 SPLIT_CHAR (c, charset, c1, c2);
4ed46869
KH
393
394 /* The top level table. */
cecda314 395 val = XCHAR_TABLE (table)->contents[charset + 128];
ed8ec86d
KH
396 if (CATEGORY_SET_P (val))
397 category_set = val;
398 else if (!SUB_CHAR_TABLE_P (val))
4ed46869 399 {
ed8ec86d 400 category_set = val = MAKE_CATEGORY_SET;
cecda314 401 XCHAR_TABLE (table)->contents[charset + 128] = category_set;
4ed46869 402 }
4ed46869 403
ed8ec86d 404 if (c1 <= 0)
4ed46869
KH
405 {
406 /* Only a charset is specified. */
ed8ec86d
KH
407 if (SUB_CHAR_TABLE_P (val))
408 /* All characters in CHARSET should be the same as for having
409 CATEGORY or not. */
4ed46869
KH
410 modify_lower_category_set (val, category, set_value);
411 else
412 SET_CATEGORY_SET (category_set, category, set_value);
413 return Qnil;
414 }
415
416 /* The second level table. */
ed8ec86d 417 if (!SUB_CHAR_TABLE_P (val))
4ed46869 418 {
ed8ec86d 419 val = make_sub_char_table (Qnil);
cecda314 420 XCHAR_TABLE (table)->contents[charset + 128] = val;
4ed46869
KH
421 /* We must set default category set of CHARSET in `defalt' slot. */
422 XCHAR_TABLE (val)->defalt = category_set;
423 }
424 table = val;
425
426 val = XCHAR_TABLE (table)->contents[c1];
ed8ec86d
KH
427 if (CATEGORY_SET_P (val))
428 category_set = val;
429 else if (!SUB_CHAR_TABLE_P (val))
4ed46869 430 {
ed8ec86d 431 category_set = val = Fcopy_sequence (XCHAR_TABLE (table)->defalt);
4ed46869
KH
432 XCHAR_TABLE (table)->contents[c1] = category_set;
433 }
4ed46869 434
ed8ec86d 435 if (c2 <= 0)
4ed46869 436 {
ed8ec86d 437 if (SUB_CHAR_TABLE_P (val))
4ed46869
KH
438 /* All characters in C1 group of CHARSET should be the same as
439 for CATEGORY. */
440 modify_lower_category_set (val, category, set_value);
441 else
442 SET_CATEGORY_SET (category_set, category, set_value);
443 return Qnil;
444 }
445
446 /* The third (bottom) level table. */
ed8ec86d 447 if (!SUB_CHAR_TABLE_P (val))
4ed46869 448 {
53740deb 449 val = make_sub_char_table (Qnil);
4ed46869
KH
450 XCHAR_TABLE (table)->contents[c1] = val;
451 /* We must set default category set of CHARSET and C1 in
452 `defalt' slot. */
453 XCHAR_TABLE (val)->defalt = category_set;
454 }
455 table = val;
456
457 val = XCHAR_TABLE (table)->contents[c2];
ed8ec86d
KH
458 if (CATEGORY_SET_P (val))
459 category_set = val;
460 else if (!SUB_CHAR_TABLE_P (val))
4ed46869
KH
461 {
462 category_set = Fcopy_sequence (XCHAR_TABLE (table)->defalt);
463 XCHAR_TABLE (table)->contents[c2] = category_set;
464 }
4ed46869
KH
465 else
466 /* This should never happen. */
467 error ("Invalid category table");
468
469 SET_CATEGORY_SET (category_set, category, set_value);
470
471 return Qnil;
472}
473\f
4ed46869
KH
474/* Return 1 if there is a word boundary between two word-constituent
475 characters C1 and C2 if they appear in this order, else return 0.
476 Use the macro WORD_BOUNDARY_P instead of calling this function
477 directly. */
478
479int
480word_boundary_p (c1, c2)
481 int c1, c2;
482{
483 Lisp_Object category_set1, category_set2;
484 Lisp_Object tail;
485 int default_result;
486
487 if (CHAR_CHARSET (c1) == CHAR_CHARSET (c2))
488 {
489 tail = Vword_separating_categories;
490 default_result = 0;
491 }
492 else
493 {
494 tail = Vword_combining_categories;
495 default_result = 1;
496 }
497
498 category_set1 = CATEGORY_SET (c1);
499 if (NILP (category_set1))
500 return default_result;
501 category_set2 = CATEGORY_SET (c2);
502 if (NILP (category_set2))
503 return default_result;
504
03699b14 505 for (; CONSP (tail); tail = XCDR (tail))
4ed46869 506 {
03699b14 507 Lisp_Object elt = XCAR (tail);
4ed46869
KH
508
509 if (CONSP (elt)
03699b14
KR
510 && CATEGORYP (XCAR (elt))
511 && CATEGORYP (XCDR (elt))
512 && CATEGORY_MEMBER (XFASTINT (XCAR (elt)), category_set1)
513 && CATEGORY_MEMBER (XFASTINT (XCDR (elt)), category_set2))
4ed46869
KH
514 return !default_result;
515 }
516 return default_result;
517}
518
519\f
dfcf069d 520void
4ed46869
KH
521init_category_once ()
522{
523 /* This has to be done here, before we call Fmake_char_table. */
524 Qcategory_table = intern ("category-table");
525 staticpro (&Qcategory_table);
526
527 /* Intern this now in case it isn't already done.
528 Setting this variable twice is harmless.
529 But don't staticpro it here--that is done in alloc.c. */
530 Qchar_table_extra_slots = intern ("char-table-extra-slots");
531
532 /* Now we are ready to set up this property, so we can
533 create category tables. */
534 Fput (Qcategory_table, Qchar_table_extra_slots, make_number (2));
535
536 Vstandard_category_table = Fmake_char_table (Qcategory_table, Qnil);
537 /* Set a category set which contains nothing to the default. */
538 XCHAR_TABLE (Vstandard_category_table)->defalt = MAKE_CATEGORY_SET;
9da95d53 539 Fset_char_table_extra_slot (Vstandard_category_table, make_number (0),
4ed46869
KH
540 Fmake_vector (make_number (95), Qnil));
541}
542
dfcf069d 543void
4ed46869
KH
544syms_of_category ()
545{
546 Qcategoryp = intern ("categoryp");
547 staticpro (&Qcategoryp);
548 Qcategorysetp = intern ("categorysetp");
549 staticpro (&Qcategorysetp);
550 Qcategory_table_p = intern ("category-table-p");
551 staticpro (&Qcategory_table_p);
552
553 DEFVAR_LISP ("word-combining-categories", &Vword_combining_categories,
fdb82f93
PJ
554 doc: /* List of pair (cons) of categories to determine word boundary.
555
556Emacs treats a sequence of word constituent characters as a single
557word (i.e. finds no word boundary between them) iff they belongs to
558the same charset. But, exceptions are allowed in the following cases.
559
560\(1) The case that characters are in different charsets is controlled
561by the variable `word-combining-categories'.
562
563Emacs finds no word boundary between characters of different charsets
564if they have categories matching some element of this list.
565
566More precisely, if an element of this list is a cons of category CAT1
567and CAT2, and a multibyte character C1 which has CAT1 is followed by
568C2 which has CAT2, there's no word boundary between C1 and C2.
569
570For instance, to tell that ASCII characters and Latin-1 characters can
571form a single word, the element `(?l . ?l)' should be in this list
572because both characters have the category `l' (Latin characters).
573
574\(2) The case that character are in the same charset is controlled by
575the variable `word-separating-categories'.
576
577Emacs find a word boundary between characters of the same charset
578if they have categories matching some element of this list.
579
580More precisely, if an element of this list is a cons of category CAT1
581and CAT2, and a multibyte character C1 which has CAT1 is followed by
582C2 which has CAT2, there's a word boundary between C1 and C2.
583
584For instance, to tell that there's a word boundary between Japanese
585Hiragana and Japanese Kanji (both are in the same charset), the
586element `(?H . ?C) should be in this list. */);
4ed46869
KH
587
588 Vword_combining_categories = Qnil;
589
590 DEFVAR_LISP ("word-separating-categories", &Vword_separating_categories,
fdb82f93
PJ
591 doc: /* List of pair (cons) of categories to determine word boundary.
592See the documentation of the variable `word-combining-categories'. */);
4ed46869
KH
593
594 Vword_separating_categories = Qnil;
595
596 defsubr (&Smake_category_set);
597 defsubr (&Sdefine_category);
598 defsubr (&Scategory_docstring);
599 defsubr (&Sget_unused_category);
600 defsubr (&Scategory_table_p);
601 defsubr (&Scategory_table);
602 defsubr (&Sstandard_category_table);
603 defsubr (&Scopy_category_table);
70414a3d 604 defsubr (&Smake_category_table);
4ed46869
KH
605 defsubr (&Sset_category_table);
606 defsubr (&Schar_category_set);
607 defsubr (&Scategory_set_mnemonics);
608 defsubr (&Smodify_category_entry);
4ed46869
KH
609
610 category_table_version = 0;
611}