* buffer.h (FETCH_MULTIBYTE_CHAR): Define as inline.
[bpt/emacs.git] / src / category.c
CommitLineData
4ed46869 1/* GNU Emacs routines to deal with category tables.
95df8112 2
acaf905b 3Copyright (C) 1998, 2001-2012 Free Software Foundation, Inc.
95df8112
GM
4Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5 2005, 2006, 2007, 2008, 2009, 2010, 2011
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
8Copyright (C) 2003
9 National Institute of Advanced Industrial Science and Technology (AIST)
10 Registration Number H13PRO009
4ed46869
KH
11
12This file is part of GNU Emacs.
13
9ec0b715 14GNU Emacs is free software: you can redistribute it and/or modify
4ed46869 15it under the terms of the GNU General Public License as published by
9ec0b715
GM
16the Free Software Foundation, either version 3 of the License, or
17(at your option) any later version.
4ed46869
KH
18
19GNU Emacs is distributed in the hope that it will be useful,
20but WITHOUT ANY WARRANTY; without even the implied warranty of
21MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22GNU General Public License for more details.
23
24You should have received a copy of the GNU General Public License
9ec0b715 25along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
4ed46869
KH
26
27
28/* Here we handle three objects: category, category set, and category
29 table. Read comments in the file category.h to understand them. */
30
31#include <config.h>
32#include <ctype.h>
d7306fe6 33#include <setjmp.h>
4ed46869 34#include "lisp.h"
ea012abd 35#include "character.h"
e5560ff7 36#include "buffer.h"
4ed46869
KH
37#include "charset.h"
38#include "category.h"
e35f6ff7 39#include "keymap.h"
4ed46869
KH
40
41/* The version number of the latest category table. Each category
42 table has a unique version number. It is assigned a new number
43 also when it is modified. When a regular expression is compiled
44 into the struct re_pattern_buffer, the version number of the
45 category table (of the current buffer) at that moment is also
46 embedded in the structure.
47
48 For the moment, we are not using this feature. */
49static int category_table_version;
50
955cbe7b 51static Lisp_Object Qcategory_table, Qcategoryp, Qcategorysetp, Qcategory_table_p;
4ed46869 52
d85b608f
PE
53/* Make CATEGORY_SET includes (if VAL is t) or excludes (if VAL is
54 nil) CATEGORY. */
55#define SET_CATEGORY_SET(category_set, category, val) \
56 set_category_set (category_set, category, val)
57static void set_category_set (Lisp_Object, Lisp_Object, Lisp_Object);
4ed46869
KH
58\f
59/* Category set staff. */
60
f57e2426 61static Lisp_Object hash_get_category_set (Lisp_Object, Lisp_Object);
cd486875
KH
62
63static Lisp_Object
971de7fb 64hash_get_category_set (Lisp_Object table, Lisp_Object category_set)
cd486875 65{
cd486875 66 struct Lisp_Hash_Table *h;
d3411f89 67 ptrdiff_t i;
0de4bb68 68 EMACS_UINT hash;
cd486875
KH
69
70 if (NILP (XCHAR_TABLE (table)->extras[1]))
71 XCHAR_TABLE (table)->extras[1]
72 = make_hash_table (Qequal, make_number (DEFAULT_HASH_SIZE),
73 make_float (DEFAULT_REHASH_SIZE),
74 make_float (DEFAULT_REHASH_THRESHOLD),
75 Qnil, Qnil, Qnil);
76 h = XHASH_TABLE (XCHAR_TABLE (table)->extras[1]);
77 i = hash_lookup (h, category_set, &hash);
78 if (i >= 0)
79 return HASH_KEY (h, i);
80 hash_put (h, category_set, Qnil, hash);
81 return category_set;
82}
83
84
4ed46869 85DEFUN ("make-category-set", Fmake_category_set, Smake_category_set, 1, 1, 0,
fdb82f93
PJ
86 doc: /* Return a newly created category-set which contains CATEGORIES.
87CATEGORIES is a string of category mnemonics.
88The value is a bool-vector which has t at the indices corresponding to
89those categories. */)
5842a27b 90 (Lisp_Object categories)
4ed46869
KH
91{
92 Lisp_Object val;
93 int len;
94
b7826503 95 CHECK_STRING (categories);
4ed46869
KH
96 val = MAKE_CATEGORY_SET;
97
115afec3 98 if (STRING_MULTIBYTE (categories))
95030461 99 error ("Multibyte string in `make-category-set'");
115afec3 100
d5db4077 101 len = SCHARS (categories);
4ed46869
KH
102 while (--len >= 0)
103 {
15c60737 104 Lisp_Object category;
4ed46869 105
d5db4077 106 XSETFASTINT (category, SREF (categories, len));
b7826503 107 CHECK_CATEGORY (category);
4ed46869
KH
108 SET_CATEGORY_SET (val, category, Qt);
109 }
110 return val;
111}
112
113\f
114/* Category staff. */
115
d85b608f 116static Lisp_Object check_category_table (Lisp_Object table);
4ed46869
KH
117
118DEFUN ("define-category", Fdefine_category, Sdefine_category, 2, 3, 0,
839966f3
KH
119 doc: /* Define CATEGORY as a category which is described by DOCSTRING.
120CATEGORY should be an ASCII printing character in the range ` ' to `~'.
f6d2f045
KH
121DOCSTRING is the documentation string of the category. The first line
122should be a terse text (preferably less than 16 characters),
123and the rest lines should be the full description.
fdb82f93 124The category is defined only in category table TABLE, which defaults to
839966f3 125the current buffer's category table. */)
5842a27b 126 (Lisp_Object category, Lisp_Object docstring, Lisp_Object table)
4ed46869 127{
b7826503
PJ
128 CHECK_CATEGORY (category);
129 CHECK_STRING (docstring);
4ed46869
KH
130 table = check_category_table (table);
131
132 if (!NILP (CATEGORY_DOCSTRING (table, XFASTINT (category))))
5fdb398c 133 error ("Category `%c' is already defined", (int) XFASTINT (category));
905a9ed3
DN
134 if (!NILP (Vpurify_flag))
135 docstring = Fpurecopy (docstring);
4ed46869
KH
136 CATEGORY_DOCSTRING (table, XFASTINT (category)) = docstring;
137
138 return Qnil;
139}
140
141DEFUN ("category-docstring", Fcategory_docstring, Scategory_docstring, 1, 2, 0,
839966f3
KH
142 doc: /* Return the documentation string of CATEGORY, as defined in TABLE.
143TABLE should be a category table and defaults to the current buffer's
144category table. */)
5842a27b 145 (Lisp_Object category, Lisp_Object table)
4ed46869 146{
b7826503 147 CHECK_CATEGORY (category);
4ed46869
KH
148 table = check_category_table (table);
149
150 return CATEGORY_DOCSTRING (table, XFASTINT (category));
151}
152
153DEFUN ("get-unused-category", Fget_unused_category, Sget_unused_category,
154 0, 1, 0,
839966f3 155 doc: /* Return a category which is not yet defined in TABLE.
0b6694a5 156If no category remains available, return nil.
839966f3 157The optional argument TABLE specifies which category table to modify;
0b6694a5 158it defaults to the current buffer's category table. */)
5842a27b 159 (Lisp_Object table)
4ed46869
KH
160{
161 int i;
4ed46869
KH
162
163 table = check_category_table (table);
164
165 for (i = ' '; i <= '~'; i++)
166 if (NILP (CATEGORY_DOCSTRING (table, i)))
167 return make_number (i);
168
169 return Qnil;
170}
171
172\f
173/* Category-table staff. */
174
175DEFUN ("category-table-p", Fcategory_table_p, Scategory_table_p, 1, 1, 0,
fdb82f93 176 doc: /* Return t if ARG is a category table. */)
5842a27b 177 (Lisp_Object arg)
4ed46869
KH
178{
179 if (CHAR_TABLE_P (arg)
ed8ec86d 180 && EQ (XCHAR_TABLE (arg)->purpose, Qcategory_table))
4ed46869
KH
181 return Qt;
182 return Qnil;
183}
184
185/* If TABLE is nil, return the current category table. If TABLE is
186 not nil, check the validity of TABLE as a category table. If
187 valid, return TABLE itself, but if not valid, signal an error of
188 wrong-type-argument. */
189
d85b608f 190static Lisp_Object
971de7fb 191check_category_table (Lisp_Object table)
4ed46869 192{
4ed46869 193 if (NILP (table))
4b4deea2 194 return BVAR (current_buffer, category_table);
88674269 195 CHECK_TYPE (!NILP (Fcategory_table_p (table)), Qcategory_table_p, table);
4ed46869 196 return table;
177c0ea7 197}
4ed46869
KH
198
199DEFUN ("category-table", Fcategory_table, Scategory_table, 0, 0, 0,
fdb82f93
PJ
200 doc: /* Return the current category table.
201This is the one specified by the current buffer. */)
5842a27b 202 (void)
4ed46869 203{
4b4deea2 204 return BVAR (current_buffer, category_table);
4ed46869
KH
205}
206
207DEFUN ("standard-category-table", Fstandard_category_table,
208 Sstandard_category_table, 0, 0, 0,
fdb82f93
PJ
209 doc: /* Return the standard category table.
210This is the one used for new buffers. */)
5842a27b 211 (void)
4ed46869
KH
212{
213 return Vstandard_category_table;
214}
215
ea012abd
KH
216
217static void
971de7fb 218copy_category_entry (Lisp_Object table, Lisp_Object c, Lisp_Object val)
ea012abd 219{
f4b670ef 220 val = Fcopy_sequence (val);
8f924df7
KH
221 if (CONSP (c))
222 char_table_set_range (table, XINT (XCAR (c)), XINT (XCDR (c)), val);
f4b670ef 223 else
8f924df7 224 char_table_set (table, XINT (c), val);
ea012abd
KH
225}
226
4ed46869
KH
227/* Return a copy of category table TABLE. We can't simply use the
228 function copy-sequence because no contents should be shared between
ed8ec86d 229 the original and the copy. This function is called recursively by
9da95d53 230 binding TABLE to a sub char table. */
4ed46869 231
9f3b5e69 232static Lisp_Object
971de7fb 233copy_category_table (Lisp_Object table)
4ed46869 234{
ea012abd 235 table = copy_char_table (table);
4ed46869 236
ea012abd
KH
237 if (! NILP (XCHAR_TABLE (table)->defalt))
238 XCHAR_TABLE (table)->defalt
239 = Fcopy_sequence (XCHAR_TABLE (table)->defalt);
240 XCHAR_TABLE (table)->extras[0]
241 = Fcopy_sequence (XCHAR_TABLE (table)->extras[0]);
8f924df7 242 map_char_table (copy_category_entry, Qnil, table, table);
ed8ec86d 243
4ed46869
KH
244 return table;
245}
246
247DEFUN ("copy-category-table", Fcopy_category_table, Scopy_category_table,
248 0, 1, 0,
fdb82f93
PJ
249 doc: /* Construct a new category table and return it.
250It is a copy of the TABLE, which defaults to the standard category table. */)
5842a27b 251 (Lisp_Object table)
4ed46869
KH
252{
253 if (!NILP (table))
254 check_category_table (table);
255 else
256 table = Vstandard_category_table;
257
9da95d53 258 return copy_category_table (table);
4ed46869
KH
259}
260
70414a3d
KH
261DEFUN ("make-category-table", Fmake_category_table, Smake_category_table,
262 0, 0, 0,
fdb82f93 263 doc: /* Construct a new and empty category table and return it. */)
5842a27b 264 (void)
70414a3d
KH
265{
266 Lisp_Object val;
ea012abd 267 int i;
70414a3d
KH
268
269 val = Fmake_char_table (Qcategory_table, Qnil);
270 XCHAR_TABLE (val)->defalt = MAKE_CATEGORY_SET;
8f924df7 271 for (i = 0; i < (1 << CHARTAB_SIZE_BITS_0); i++)
ea012abd 272 XCHAR_TABLE (val)->contents[i] = MAKE_CATEGORY_SET;
70414a3d
KH
273 Fset_char_table_extra_slot (val, make_number (0),
274 Fmake_vector (make_number (95), Qnil));
275 return val;
276}
277
4ed46869 278DEFUN ("set-category-table", Fset_category_table, Sset_category_table, 1, 1, 0,
839966f3
KH
279 doc: /* Specify TABLE as the category table for the current buffer.
280Return TABLE. */)
5842a27b 281 (Lisp_Object table)
4ed46869 282{
6f598a6d 283 int idx;
4ed46869 284 table = check_category_table (table);
4b4deea2 285 BVAR (current_buffer, category_table) = table;
4ed46869 286 /* Indicate that this buffer now has a specified category table. */
f6cd0527
GM
287 idx = PER_BUFFER_VAR_IDX (category_table);
288 SET_PER_BUFFER_VALUE_P (current_buffer, idx, 1);
4ed46869
KH
289 return table;
290}
291
292\f
ea012abd 293Lisp_Object
971de7fb 294char_category_set (int c)
ea012abd 295{
4b4deea2 296 return CHAR_TABLE_REF (BVAR (current_buffer, category_table), c);
ea012abd
KH
297}
298
4ed46869 299DEFUN ("char-category-set", Fchar_category_set, Schar_category_set, 1, 1, 0,
a2c2f196
JB
300 doc: /* Return the category set of CHAR.
301usage: (char-category-set CHAR) */)
5842a27b 302 (Lisp_Object ch)
4ed46869 303{
d311d28c 304 CHECK_CHARACTER (ch);
4ed46869
KH
305 return CATEGORY_SET (XFASTINT (ch));
306}
307
308DEFUN ("category-set-mnemonics", Fcategory_set_mnemonics,
309 Scategory_set_mnemonics, 1, 1, 0,
fdb82f93
PJ
310 doc: /* Return a string containing mnemonics of the categories in CATEGORY-SET.
311CATEGORY-SET is a bool-vector, and the categories \"in\" it are those
839966f3 312that are indexes where t occurs in the bool-vector.
fdb82f93 313The return value is a string containing those same categories. */)
5842a27b 314 (Lisp_Object category_set)
4ed46869
KH
315{
316 int i, j;
317 char str[96];
318
b7826503 319 CHECK_CATEGORY_SET (category_set);
4ed46869
KH
320
321 j = 0;
322 for (i = 32; i < 127; i++)
323 if (CATEGORY_MEMBER (i, category_set))
324 str[j++] = i;
325 str[j] = '\0';
326
327 return build_string (str);
328}
329
d85b608f 330static void
971de7fb 331set_category_set (Lisp_Object category_set, Lisp_Object category, Lisp_Object val)
4ed46869
KH
332{
333 do {
334 int idx = XINT (category) / 8;
335 unsigned char bits = 1 << (XINT (category) % 8);
336
337 if (NILP (val))
338 XCATEGORY_SET (category_set)->data[idx] &= ~bits;
339 else
340 XCATEGORY_SET (category_set)->data[idx] |= bits;
341 } while (0);
342}
343
344DEFUN ("modify-category-entry", Fmodify_category_entry,
345 Smodify_category_entry, 2, 4, 0,
fdb82f93
PJ
346 doc: /* Modify the category set of CHARACTER by adding CATEGORY to it.
347The category is changed only for table TABLE, which defaults to
8f7e5042
DL
348the current buffer's category table.
349CHARACTER can be either a single character or a cons representing the
350lower and upper ends of an inclusive character range to modify.
fdb82f93
PJ
351If optional fourth argument RESET is non-nil,
352then delete CATEGORY from the category set instead of adding it. */)
5842a27b 353 (Lisp_Object character, Lisp_Object category, Lisp_Object table, Lisp_Object reset)
4ed46869 354{
4ed46869 355 Lisp_Object set_value; /* Actual value to be set in category sets. */
8f7e5042 356 Lisp_Object category_set;
ea012abd
KH
357 int start, end;
358 int from, to;
4ed46869 359
ea012abd 360 if (INTEGERP (character))
4ed46869 361 {
ea012abd
KH
362 CHECK_CHARACTER (character);
363 start = end = XFASTINT (character);
4ed46869 364 }
ea012abd 365 else
4ed46869 366 {
ea012abd 367 CHECK_CONS (character);
8f924df7
KH
368 CHECK_CHARACTER_CAR (character);
369 CHECK_CHARACTER_CDR (character);
ea012abd
KH
370 start = XFASTINT (XCAR (character));
371 end = XFASTINT (XCDR (character));
4ed46869 372 }
4ed46869 373
b7826503 374 CHECK_CATEGORY (category);
4ed46869 375 table = check_category_table (table);
4ed46869 376
4ed46869 377 if (NILP (CATEGORY_DOCSTRING (table, XFASTINT (category))))
5fdb398c 378 error ("Undefined category: %c", (int) XFASTINT (category));
4ed46869 379
4ed46869 380 set_value = NILP (reset) ? Qt : Qnil;
4ed46869 381
ea012abd 382 while (start <= end)
4ed46869 383 {
cd486875 384 from = start, to = end;
ea012abd 385 category_set = char_table_ref_and_range (table, start, &from, &to);
0b261f59 386 if (CATEGORY_MEMBER (XFASTINT (category), category_set) != NILP (reset))
ea012abd 387 {
d8887a31
KH
388 category_set = Fcopy_sequence (category_set);
389 SET_CATEGORY_SET (category_set, category, set_value);
cd486875
KH
390 category_set = hash_get_category_set (table, category_set);
391 char_table_set_range (table, start, to, category_set);
ea012abd 392 }
ea012abd 393 start = to + 1;
4ed46869 394 }
4ed46869
KH
395
396 return Qnil;
397}
398\f
4ed46869
KH
399/* Return 1 if there is a word boundary between two word-constituent
400 characters C1 and C2 if they appear in this order, else return 0.
401 Use the macro WORD_BOUNDARY_P instead of calling this function
402 directly. */
403
404int
971de7fb 405word_boundary_p (int c1, int c2)
4ed46869
KH
406{
407 Lisp_Object category_set1, category_set2;
408 Lisp_Object tail;
409 int default_result;
410
714b2198
KH
411 if (EQ (CHAR_TABLE_REF (Vchar_script_table, c1),
412 CHAR_TABLE_REF (Vchar_script_table, c2)))
4ed46869
KH
413 {
414 tail = Vword_separating_categories;
415 default_result = 0;
416 }
417 else
418 {
419 tail = Vword_combining_categories;
420 default_result = 1;
421 }
422
423 category_set1 = CATEGORY_SET (c1);
424 if (NILP (category_set1))
425 return default_result;
426 category_set2 = CATEGORY_SET (c2);
427 if (NILP (category_set2))
428 return default_result;
429
03699b14 430 for (; CONSP (tail); tail = XCDR (tail))
4ed46869 431 {
03699b14 432 Lisp_Object elt = XCAR (tail);
4ed46869
KH
433
434 if (CONSP (elt)
714b2198
KH
435 && (NILP (XCAR (elt))
436 || (CATEGORYP (XCAR (elt))
8a605fe8
KH
437 && CATEGORY_MEMBER (XFASTINT (XCAR (elt)), category_set1)
438 && ! CATEGORY_MEMBER (XFASTINT (XCAR (elt)), category_set2)))
714b2198
KH
439 && (NILP (XCDR (elt))
440 || (CATEGORYP (XCDR (elt))
8a605fe8 441 && ! CATEGORY_MEMBER (XFASTINT (XCDR (elt)), category_set1)
714b2198 442 && CATEGORY_MEMBER (XFASTINT (XCDR (elt)), category_set2))))
4ed46869
KH
443 return !default_result;
444 }
445 return default_result;
446}
447
448\f
dfcf069d 449void
971de7fb 450init_category_once (void)
4ed46869
KH
451{
452 /* This has to be done here, before we call Fmake_char_table. */
cd3520a4 453 DEFSYM (Qcategory_table, "category-table");
4ed46869
KH
454
455 /* Intern this now in case it isn't already done.
456 Setting this variable twice is harmless.
457 But don't staticpro it here--that is done in alloc.c. */
d67b4f80 458 Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots");
4ed46869
KH
459
460 /* Now we are ready to set up this property, so we can
461 create category tables. */
462 Fput (Qcategory_table, Qchar_table_extra_slots, make_number (2));
463
464 Vstandard_category_table = Fmake_char_table (Qcategory_table, Qnil);
177c0ea7 465 /* Set a category set which contains nothing to the default. */
4ed46869 466 XCHAR_TABLE (Vstandard_category_table)->defalt = MAKE_CATEGORY_SET;
9da95d53 467 Fset_char_table_extra_slot (Vstandard_category_table, make_number (0),
4ed46869
KH
468 Fmake_vector (make_number (95), Qnil));
469}
470
dfcf069d 471void
971de7fb 472syms_of_category (void)
4ed46869 473{
cd3520a4
JB
474 DEFSYM (Qcategoryp, "categoryp");
475 DEFSYM (Qcategorysetp, "categorysetp");
476 DEFSYM (Qcategory_table_p, "category-table-p");
4ed46869 477
29208e82 478 DEFVAR_LISP ("word-combining-categories", Vword_combining_categories,
fdb82f93
PJ
479 doc: /* List of pair (cons) of categories to determine word boundary.
480
481Emacs treats a sequence of word constituent characters as a single
e0f24100 482word (i.e. finds no word boundary between them) only if they belong to
714b2198 483the same script. But, exceptions are allowed in the following cases.
fdb82f93 484
714b2198 485\(1) The case that characters are in different scripts is controlled
fdb82f93
PJ
486by the variable `word-combining-categories'.
487
714b2198 488Emacs finds no word boundary between characters of different scripts
fdb82f93
PJ
489if they have categories matching some element of this list.
490
491More precisely, if an element of this list is a cons of category CAT1
492and CAT2, and a multibyte character C1 which has CAT1 is followed by
493C2 which has CAT2, there's no word boundary between C1 and C2.
494
714b2198
KH
495For instance, to tell that Han characters followed by Hiragana
496characters can form a single word, the element `(?C . ?H)' should be
497in this list.
fdb82f93 498
714b2198 499\(2) The case that character are in the same script is controlled by
fdb82f93
PJ
500the variable `word-separating-categories'.
501
ecdcaa09 502Emacs finds a word boundary between characters of the same script
fdb82f93
PJ
503if they have categories matching some element of this list.
504
505More precisely, if an element of this list is a cons of category CAT1
8a605fe8
KH
506and CAT2, and a multibyte character C1 which has CAT1 but not CAT2 is
507followed by C2 which has CAT2 but not CAT1, there's a word boundary
508between C1 and C2.
fdb82f93 509
714b2198
KH
510For instance, to tell that there's a word boundary between Hiragana
511and Katakana (both are in the same script `kana'),
512the element `(?H . ?K) should be in this list. */);
4ed46869
KH
513
514 Vword_combining_categories = Qnil;
515
29208e82 516 DEFVAR_LISP ("word-separating-categories", Vword_separating_categories,
fdb82f93
PJ
517 doc: /* List of pair (cons) of categories to determine word boundary.
518See the documentation of the variable `word-combining-categories'. */);
4ed46869
KH
519
520 Vword_separating_categories = Qnil;
521
522 defsubr (&Smake_category_set);
523 defsubr (&Sdefine_category);
524 defsubr (&Scategory_docstring);
525 defsubr (&Sget_unused_category);
526 defsubr (&Scategory_table_p);
527 defsubr (&Scategory_table);
528 defsubr (&Sstandard_category_table);
529 defsubr (&Scopy_category_table);
70414a3d 530 defsubr (&Smake_category_table);
4ed46869
KH
531 defsubr (&Sset_category_table);
532 defsubr (&Schar_category_set);
533 defsubr (&Scategory_set_mnemonics);
534 defsubr (&Smodify_category_entry);
4ed46869
KH
535
536 category_table_version = 0;
537}