Update md5 checksums
[bpt/emacs.git] / src / category.c
CommitLineData
4ed46869 1/* GNU Emacs routines to deal with category tables.
5df4f04c 2 Copyright (C) 1998, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
429ab54e 3 Free Software Foundation, Inc.
7976eda0 4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5df4f04c 5 2005, 2006, 2007, 2008, 2009, 2010, 2011
ce03bf76
KH
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
8f924df7 8 Copyright (C) 2003
ea012abd
KH
9 National Institute of Advanced Industrial Science and Technology (AIST)
10 Registration Number H13PRO009
4ed46869
KH
11
12This file is part of GNU Emacs.
13
9ec0b715 14GNU Emacs is free software: you can redistribute it and/or modify
4ed46869 15it under the terms of the GNU General Public License as published by
9ec0b715
GM
16the Free Software Foundation, either version 3 of the License, or
17(at your option) any later version.
4ed46869
KH
18
19GNU Emacs is distributed in the hope that it will be useful,
20but WITHOUT ANY WARRANTY; without even the implied warranty of
21MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22GNU General Public License for more details.
23
24You should have received a copy of the GNU General Public License
9ec0b715 25along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
4ed46869
KH
26
27
28/* Here we handle three objects: category, category set, and category
29 table. Read comments in the file category.h to understand them. */
30
31#include <config.h>
32#include <ctype.h>
d7306fe6 33#include <setjmp.h>
4ed46869
KH
34#include "lisp.h"
35#include "buffer.h"
ea012abd 36#include "character.h"
4ed46869
KH
37#include "charset.h"
38#include "category.h"
e35f6ff7 39#include "keymap.h"
4ed46869
KH
40
41/* The version number of the latest category table. Each category
42 table has a unique version number. It is assigned a new number
43 also when it is modified. When a regular expression is compiled
44 into the struct re_pattern_buffer, the version number of the
45 category table (of the current buffer) at that moment is also
46 embedded in the structure.
47
48 For the moment, we are not using this feature. */
49static int category_table_version;
50
51Lisp_Object Qcategory_table, Qcategoryp, Qcategorysetp, Qcategory_table_p;
52
53/* Variables to determine word boundary. */
54Lisp_Object Vword_combining_categories, Vword_separating_categories;
55
56/* Temporary internal variable used in macro CHAR_HAS_CATEGORY. */
57Lisp_Object _temp_category_set;
58
59\f
60/* Category set staff. */
61
f57e2426 62static Lisp_Object hash_get_category_set (Lisp_Object, Lisp_Object);
cd486875
KH
63
64static Lisp_Object
971de7fb 65hash_get_category_set (Lisp_Object table, Lisp_Object category_set)
cd486875
KH
66{
67 Lisp_Object val;
68 struct Lisp_Hash_Table *h;
69 int i;
70 unsigned hash;
71
72 if (NILP (XCHAR_TABLE (table)->extras[1]))
73 XCHAR_TABLE (table)->extras[1]
74 = make_hash_table (Qequal, make_number (DEFAULT_HASH_SIZE),
75 make_float (DEFAULT_REHASH_SIZE),
76 make_float (DEFAULT_REHASH_THRESHOLD),
77 Qnil, Qnil, Qnil);
78 h = XHASH_TABLE (XCHAR_TABLE (table)->extras[1]);
79 i = hash_lookup (h, category_set, &hash);
80 if (i >= 0)
81 return HASH_KEY (h, i);
82 hash_put (h, category_set, Qnil, hash);
83 return category_set;
84}
85
86
4ed46869 87DEFUN ("make-category-set", Fmake_category_set, Smake_category_set, 1, 1, 0,
fdb82f93
PJ
88 doc: /* Return a newly created category-set which contains CATEGORIES.
89CATEGORIES is a string of category mnemonics.
90The value is a bool-vector which has t at the indices corresponding to
91those categories. */)
5842a27b 92 (Lisp_Object categories)
4ed46869
KH
93{
94 Lisp_Object val;
95 int len;
96
b7826503 97 CHECK_STRING (categories);
4ed46869
KH
98 val = MAKE_CATEGORY_SET;
99
115afec3 100 if (STRING_MULTIBYTE (categories))
95030461 101 error ("Multibyte string in `make-category-set'");
115afec3 102
d5db4077 103 len = SCHARS (categories);
4ed46869
KH
104 while (--len >= 0)
105 {
15c60737 106 Lisp_Object category;
4ed46869 107
d5db4077 108 XSETFASTINT (category, SREF (categories, len));
b7826503 109 CHECK_CATEGORY (category);
4ed46869
KH
110 SET_CATEGORY_SET (val, category, Qt);
111 }
112 return val;
113}
114
115\f
116/* Category staff. */
117
971de7fb 118Lisp_Object check_category_table (Lisp_Object table);
4ed46869
KH
119
120DEFUN ("define-category", Fdefine_category, Sdefine_category, 2, 3, 0,
839966f3
KH
121 doc: /* Define CATEGORY as a category which is described by DOCSTRING.
122CATEGORY should be an ASCII printing character in the range ` ' to `~'.
f6d2f045
KH
123DOCSTRING is the documentation string of the category. The first line
124should be a terse text (preferably less than 16 characters),
125and the rest lines should be the full description.
fdb82f93 126The category is defined only in category table TABLE, which defaults to
839966f3 127the current buffer's category table. */)
5842a27b 128 (Lisp_Object category, Lisp_Object docstring, Lisp_Object table)
4ed46869 129{
b7826503
PJ
130 CHECK_CATEGORY (category);
131 CHECK_STRING (docstring);
4ed46869
KH
132 table = check_category_table (table);
133
134 if (!NILP (CATEGORY_DOCSTRING (table, XFASTINT (category))))
135 error ("Category `%c' is already defined", XFASTINT (category));
905a9ed3
DN
136 if (!NILP (Vpurify_flag))
137 docstring = Fpurecopy (docstring);
4ed46869
KH
138 CATEGORY_DOCSTRING (table, XFASTINT (category)) = docstring;
139
140 return Qnil;
141}
142
143DEFUN ("category-docstring", Fcategory_docstring, Scategory_docstring, 1, 2, 0,
839966f3
KH
144 doc: /* Return the documentation string of CATEGORY, as defined in TABLE.
145TABLE should be a category table and defaults to the current buffer's
146category table. */)
5842a27b 147 (Lisp_Object category, Lisp_Object table)
4ed46869 148{
b7826503 149 CHECK_CATEGORY (category);
4ed46869
KH
150 table = check_category_table (table);
151
152 return CATEGORY_DOCSTRING (table, XFASTINT (category));
153}
154
155DEFUN ("get-unused-category", Fget_unused_category, Sget_unused_category,
156 0, 1, 0,
839966f3 157 doc: /* Return a category which is not yet defined in TABLE.
0b6694a5 158If no category remains available, return nil.
839966f3 159The optional argument TABLE specifies which category table to modify;
0b6694a5 160it defaults to the current buffer's category table. */)
5842a27b 161 (Lisp_Object table)
4ed46869
KH
162{
163 int i;
4ed46869
KH
164
165 table = check_category_table (table);
166
167 for (i = ' '; i <= '~'; i++)
168 if (NILP (CATEGORY_DOCSTRING (table, i)))
169 return make_number (i);
170
171 return Qnil;
172}
173
174\f
175/* Category-table staff. */
176
177DEFUN ("category-table-p", Fcategory_table_p, Scategory_table_p, 1, 1, 0,
fdb82f93 178 doc: /* Return t if ARG is a category table. */)
5842a27b 179 (Lisp_Object arg)
4ed46869
KH
180{
181 if (CHAR_TABLE_P (arg)
ed8ec86d 182 && EQ (XCHAR_TABLE (arg)->purpose, Qcategory_table))
4ed46869
KH
183 return Qt;
184 return Qnil;
185}
186
187/* If TABLE is nil, return the current category table. If TABLE is
188 not nil, check the validity of TABLE as a category table. If
189 valid, return TABLE itself, but if not valid, signal an error of
190 wrong-type-argument. */
191
192Lisp_Object
971de7fb 193check_category_table (Lisp_Object table)
4ed46869 194{
4ed46869
KH
195 if (NILP (table))
196 return current_buffer->category_table;
88674269 197 CHECK_TYPE (!NILP (Fcategory_table_p (table)), Qcategory_table_p, table);
4ed46869 198 return table;
177c0ea7 199}
4ed46869
KH
200
201DEFUN ("category-table", Fcategory_table, Scategory_table, 0, 0, 0,
fdb82f93
PJ
202 doc: /* Return the current category table.
203This is the one specified by the current buffer. */)
5842a27b 204 (void)
4ed46869
KH
205{
206 return current_buffer->category_table;
207}
208
209DEFUN ("standard-category-table", Fstandard_category_table,
210 Sstandard_category_table, 0, 0, 0,
fdb82f93
PJ
211 doc: /* Return the standard category table.
212This is the one used for new buffers. */)
5842a27b 213 (void)
4ed46869
KH
214{
215 return Vstandard_category_table;
216}
217
ea012abd
KH
218
219static void
971de7fb 220copy_category_entry (Lisp_Object table, Lisp_Object c, Lisp_Object val)
ea012abd 221{
f4b670ef 222 val = Fcopy_sequence (val);
8f924df7
KH
223 if (CONSP (c))
224 char_table_set_range (table, XINT (XCAR (c)), XINT (XCDR (c)), val);
f4b670ef 225 else
8f924df7 226 char_table_set (table, XINT (c), val);
ea012abd
KH
227}
228
4ed46869
KH
229/* Return a copy of category table TABLE. We can't simply use the
230 function copy-sequence because no contents should be shared between
ed8ec86d 231 the original and the copy. This function is called recursively by
9da95d53 232 binding TABLE to a sub char table. */
4ed46869
KH
233
234Lisp_Object
971de7fb 235copy_category_table (Lisp_Object table)
4ed46869 236{
ea012abd 237 table = copy_char_table (table);
4ed46869 238
ea012abd
KH
239 if (! NILP (XCHAR_TABLE (table)->defalt))
240 XCHAR_TABLE (table)->defalt
241 = Fcopy_sequence (XCHAR_TABLE (table)->defalt);
242 XCHAR_TABLE (table)->extras[0]
243 = Fcopy_sequence (XCHAR_TABLE (table)->extras[0]);
8f924df7 244 map_char_table (copy_category_entry, Qnil, table, table);
ed8ec86d 245
4ed46869
KH
246 return table;
247}
248
249DEFUN ("copy-category-table", Fcopy_category_table, Scopy_category_table,
250 0, 1, 0,
fdb82f93
PJ
251 doc: /* Construct a new category table and return it.
252It is a copy of the TABLE, which defaults to the standard category table. */)
5842a27b 253 (Lisp_Object table)
4ed46869
KH
254{
255 if (!NILP (table))
256 check_category_table (table);
257 else
258 table = Vstandard_category_table;
259
9da95d53 260 return copy_category_table (table);
4ed46869
KH
261}
262
70414a3d
KH
263DEFUN ("make-category-table", Fmake_category_table, Smake_category_table,
264 0, 0, 0,
fdb82f93 265 doc: /* Construct a new and empty category table and return it. */)
5842a27b 266 (void)
70414a3d
KH
267{
268 Lisp_Object val;
ea012abd 269 int i;
70414a3d
KH
270
271 val = Fmake_char_table (Qcategory_table, Qnil);
272 XCHAR_TABLE (val)->defalt = MAKE_CATEGORY_SET;
8f924df7 273 for (i = 0; i < (1 << CHARTAB_SIZE_BITS_0); i++)
ea012abd 274 XCHAR_TABLE (val)->contents[i] = MAKE_CATEGORY_SET;
70414a3d
KH
275 Fset_char_table_extra_slot (val, make_number (0),
276 Fmake_vector (make_number (95), Qnil));
277 return val;
278}
279
4ed46869 280DEFUN ("set-category-table", Fset_category_table, Sset_category_table, 1, 1, 0,
839966f3
KH
281 doc: /* Specify TABLE as the category table for the current buffer.
282Return TABLE. */)
5842a27b 283 (Lisp_Object table)
4ed46869 284{
6f598a6d 285 int idx;
4ed46869
KH
286 table = check_category_table (table);
287 current_buffer->category_table = table;
288 /* Indicate that this buffer now has a specified category table. */
f6cd0527
GM
289 idx = PER_BUFFER_VAR_IDX (category_table);
290 SET_PER_BUFFER_VALUE_P (current_buffer, idx, 1);
4ed46869
KH
291 return table;
292}
293
294\f
ea012abd 295Lisp_Object
971de7fb 296char_category_set (int c)
ea012abd
KH
297{
298 return CHAR_TABLE_REF (current_buffer->category_table, c);
299}
300
4ed46869 301DEFUN ("char-category-set", Fchar_category_set, Schar_category_set, 1, 1, 0,
a2c2f196
JB
302 doc: /* Return the category set of CHAR.
303usage: (char-category-set CHAR) */)
5842a27b 304 (Lisp_Object ch)
4ed46869 305{
b7826503 306 CHECK_NUMBER (ch);
4ed46869
KH
307 return CATEGORY_SET (XFASTINT (ch));
308}
309
310DEFUN ("category-set-mnemonics", Fcategory_set_mnemonics,
311 Scategory_set_mnemonics, 1, 1, 0,
fdb82f93
PJ
312 doc: /* Return a string containing mnemonics of the categories in CATEGORY-SET.
313CATEGORY-SET is a bool-vector, and the categories \"in\" it are those
839966f3 314that are indexes where t occurs in the bool-vector.
fdb82f93 315The return value is a string containing those same categories. */)
5842a27b 316 (Lisp_Object category_set)
4ed46869
KH
317{
318 int i, j;
319 char str[96];
320
b7826503 321 CHECK_CATEGORY_SET (category_set);
4ed46869
KH
322
323 j = 0;
324 for (i = 32; i < 127; i++)
325 if (CATEGORY_MEMBER (i, category_set))
326 str[j++] = i;
327 str[j] = '\0';
328
329 return build_string (str);
330}
331
4ed46869 332void
971de7fb 333set_category_set (Lisp_Object category_set, Lisp_Object category, Lisp_Object val)
4ed46869
KH
334{
335 do {
336 int idx = XINT (category) / 8;
337 unsigned char bits = 1 << (XINT (category) % 8);
338
339 if (NILP (val))
340 XCATEGORY_SET (category_set)->data[idx] &= ~bits;
341 else
342 XCATEGORY_SET (category_set)->data[idx] |= bits;
343 } while (0);
344}
345
346DEFUN ("modify-category-entry", Fmodify_category_entry,
347 Smodify_category_entry, 2, 4, 0,
fdb82f93
PJ
348 doc: /* Modify the category set of CHARACTER by adding CATEGORY to it.
349The category is changed only for table TABLE, which defaults to
8f7e5042
DL
350the current buffer's category table.
351CHARACTER can be either a single character or a cons representing the
352lower and upper ends of an inclusive character range to modify.
fdb82f93
PJ
353If optional fourth argument RESET is non-nil,
354then delete CATEGORY from the category set instead of adding it. */)
5842a27b 355 (Lisp_Object character, Lisp_Object category, Lisp_Object table, Lisp_Object reset)
4ed46869 356{
4ed46869 357 Lisp_Object set_value; /* Actual value to be set in category sets. */
8f7e5042 358 Lisp_Object category_set;
ea012abd
KH
359 int start, end;
360 int from, to;
4ed46869 361
ea012abd 362 if (INTEGERP (character))
4ed46869 363 {
ea012abd
KH
364 CHECK_CHARACTER (character);
365 start = end = XFASTINT (character);
4ed46869 366 }
ea012abd 367 else
4ed46869 368 {
ea012abd 369 CHECK_CONS (character);
8f924df7
KH
370 CHECK_CHARACTER_CAR (character);
371 CHECK_CHARACTER_CDR (character);
ea012abd
KH
372 start = XFASTINT (XCAR (character));
373 end = XFASTINT (XCDR (character));
4ed46869 374 }
4ed46869 375
b7826503 376 CHECK_CATEGORY (category);
4ed46869 377 table = check_category_table (table);
4ed46869 378
4ed46869
KH
379 if (NILP (CATEGORY_DOCSTRING (table, XFASTINT (category))))
380 error ("Undefined category: %c", XFASTINT (category));
4ed46869 381
4ed46869 382 set_value = NILP (reset) ? Qt : Qnil;
4ed46869 383
ea012abd 384 while (start <= end)
4ed46869 385 {
cd486875 386 from = start, to = end;
ea012abd 387 category_set = char_table_ref_and_range (table, start, &from, &to);
0b261f59 388 if (CATEGORY_MEMBER (XFASTINT (category), category_set) != NILP (reset))
ea012abd 389 {
d8887a31
KH
390 category_set = Fcopy_sequence (category_set);
391 SET_CATEGORY_SET (category_set, category, set_value);
cd486875
KH
392 category_set = hash_get_category_set (table, category_set);
393 char_table_set_range (table, start, to, category_set);
ea012abd 394 }
ea012abd 395 start = to + 1;
4ed46869 396 }
4ed46869
KH
397
398 return Qnil;
399}
400\f
4ed46869
KH
401/* Return 1 if there is a word boundary between two word-constituent
402 characters C1 and C2 if they appear in this order, else return 0.
403 Use the macro WORD_BOUNDARY_P instead of calling this function
404 directly. */
405
406int
971de7fb 407word_boundary_p (int c1, int c2)
4ed46869
KH
408{
409 Lisp_Object category_set1, category_set2;
410 Lisp_Object tail;
411 int default_result;
412
714b2198
KH
413 if (EQ (CHAR_TABLE_REF (Vchar_script_table, c1),
414 CHAR_TABLE_REF (Vchar_script_table, c2)))
4ed46869
KH
415 {
416 tail = Vword_separating_categories;
417 default_result = 0;
418 }
419 else
420 {
421 tail = Vword_combining_categories;
422 default_result = 1;
423 }
424
425 category_set1 = CATEGORY_SET (c1);
426 if (NILP (category_set1))
427 return default_result;
428 category_set2 = CATEGORY_SET (c2);
429 if (NILP (category_set2))
430 return default_result;
431
03699b14 432 for (; CONSP (tail); tail = XCDR (tail))
4ed46869 433 {
03699b14 434 Lisp_Object elt = XCAR (tail);
4ed46869
KH
435
436 if (CONSP (elt)
714b2198
KH
437 && (NILP (XCAR (elt))
438 || (CATEGORYP (XCAR (elt))
8a605fe8
KH
439 && CATEGORY_MEMBER (XFASTINT (XCAR (elt)), category_set1)
440 && ! CATEGORY_MEMBER (XFASTINT (XCAR (elt)), category_set2)))
714b2198
KH
441 && (NILP (XCDR (elt))
442 || (CATEGORYP (XCDR (elt))
8a605fe8 443 && ! CATEGORY_MEMBER (XFASTINT (XCDR (elt)), category_set1)
714b2198 444 && CATEGORY_MEMBER (XFASTINT (XCDR (elt)), category_set2))))
4ed46869
KH
445 return !default_result;
446 }
447 return default_result;
448}
449
450\f
dfcf069d 451void
971de7fb 452init_category_once (void)
4ed46869
KH
453{
454 /* This has to be done here, before we call Fmake_char_table. */
d67b4f80 455 Qcategory_table = intern_c_string ("category-table");
4ed46869
KH
456 staticpro (&Qcategory_table);
457
458 /* Intern this now in case it isn't already done.
459 Setting this variable twice is harmless.
460 But don't staticpro it here--that is done in alloc.c. */
d67b4f80 461 Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots");
4ed46869
KH
462
463 /* Now we are ready to set up this property, so we can
464 create category tables. */
465 Fput (Qcategory_table, Qchar_table_extra_slots, make_number (2));
466
467 Vstandard_category_table = Fmake_char_table (Qcategory_table, Qnil);
177c0ea7 468 /* Set a category set which contains nothing to the default. */
4ed46869 469 XCHAR_TABLE (Vstandard_category_table)->defalt = MAKE_CATEGORY_SET;
9da95d53 470 Fset_char_table_extra_slot (Vstandard_category_table, make_number (0),
4ed46869
KH
471 Fmake_vector (make_number (95), Qnil));
472}
473
dfcf069d 474void
971de7fb 475syms_of_category (void)
4ed46869 476{
d67b4f80 477 Qcategoryp = intern_c_string ("categoryp");
4ed46869 478 staticpro (&Qcategoryp);
d67b4f80 479 Qcategorysetp = intern_c_string ("categorysetp");
4ed46869 480 staticpro (&Qcategorysetp);
d67b4f80 481 Qcategory_table_p = intern_c_string ("category-table-p");
4ed46869
KH
482 staticpro (&Qcategory_table_p);
483
484 DEFVAR_LISP ("word-combining-categories", &Vword_combining_categories,
fdb82f93
PJ
485 doc: /* List of pair (cons) of categories to determine word boundary.
486
487Emacs treats a sequence of word constituent characters as a single
e0f24100 488word (i.e. finds no word boundary between them) only if they belong to
714b2198 489the same script. But, exceptions are allowed in the following cases.
fdb82f93 490
714b2198 491\(1) The case that characters are in different scripts is controlled
fdb82f93
PJ
492by the variable `word-combining-categories'.
493
714b2198 494Emacs finds no word boundary between characters of different scripts
fdb82f93
PJ
495if they have categories matching some element of this list.
496
497More precisely, if an element of this list is a cons of category CAT1
498and CAT2, and a multibyte character C1 which has CAT1 is followed by
499C2 which has CAT2, there's no word boundary between C1 and C2.
500
714b2198
KH
501For instance, to tell that Han characters followed by Hiragana
502characters can form a single word, the element `(?C . ?H)' should be
503in this list.
fdb82f93 504
714b2198 505\(2) The case that character are in the same script is controlled by
fdb82f93
PJ
506the variable `word-separating-categories'.
507
ecdcaa09 508Emacs finds a word boundary between characters of the same script
fdb82f93
PJ
509if they have categories matching some element of this list.
510
511More precisely, if an element of this list is a cons of category CAT1
8a605fe8
KH
512and CAT2, and a multibyte character C1 which has CAT1 but not CAT2 is
513followed by C2 which has CAT2 but not CAT1, there's a word boundary
514between C1 and C2.
fdb82f93 515
714b2198
KH
516For instance, to tell that there's a word boundary between Hiragana
517and Katakana (both are in the same script `kana'),
518the element `(?H . ?K) should be in this list. */);
4ed46869
KH
519
520 Vword_combining_categories = Qnil;
521
522 DEFVAR_LISP ("word-separating-categories", &Vword_separating_categories,
fdb82f93
PJ
523 doc: /* List of pair (cons) of categories to determine word boundary.
524See the documentation of the variable `word-combining-categories'. */);
4ed46869
KH
525
526 Vword_separating_categories = Qnil;
527
528 defsubr (&Smake_category_set);
529 defsubr (&Sdefine_category);
530 defsubr (&Scategory_docstring);
531 defsubr (&Sget_unused_category);
532 defsubr (&Scategory_table_p);
533 defsubr (&Scategory_table);
534 defsubr (&Sstandard_category_table);
535 defsubr (&Scopy_category_table);
70414a3d 536 defsubr (&Smake_category_table);
4ed46869
KH
537 defsubr (&Sset_category_table);
538 defsubr (&Schar_category_set);
539 defsubr (&Scategory_set_mnemonics);
540 defsubr (&Smodify_category_entry);
4ed46869
KH
541
542 category_table_version = 0;
543}
839966f3 544