* net/browse-url.el (browse-url): Identify alist with "consp and
[bpt/emacs.git] / src / category.c
CommitLineData
4ed46869 1/* GNU Emacs routines to deal with category tables.
76b6f707 2 Copyright (C) 1998, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
429ab54e 3 Free Software Foundation, Inc.
7976eda0 4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
76b6f707 5 2005, 2006, 2007, 2008, 2009
ce03bf76
KH
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
8f924df7 8 Copyright (C) 2003
ea012abd
KH
9 National Institute of Advanced Industrial Science and Technology (AIST)
10 Registration Number H13PRO009
4ed46869
KH
11
12This file is part of GNU Emacs.
13
9ec0b715 14GNU Emacs is free software: you can redistribute it and/or modify
4ed46869 15it under the terms of the GNU General Public License as published by
9ec0b715
GM
16the Free Software Foundation, either version 3 of the License, or
17(at your option) any later version.
4ed46869
KH
18
19GNU Emacs is distributed in the hope that it will be useful,
20but WITHOUT ANY WARRANTY; without even the implied warranty of
21MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22GNU General Public License for more details.
23
24You should have received a copy of the GNU General Public License
9ec0b715 25along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
4ed46869
KH
26
27
28/* Here we handle three objects: category, category set, and category
29 table. Read comments in the file category.h to understand them. */
30
31#include <config.h>
32#include <ctype.h>
33#include "lisp.h"
34#include "buffer.h"
ea012abd 35#include "character.h"
4ed46869
KH
36#include "charset.h"
37#include "category.h"
e35f6ff7 38#include "keymap.h"
4ed46869
KH
39
40/* The version number of the latest category table. Each category
41 table has a unique version number. It is assigned a new number
42 also when it is modified. When a regular expression is compiled
43 into the struct re_pattern_buffer, the version number of the
44 category table (of the current buffer) at that moment is also
45 embedded in the structure.
46
47 For the moment, we are not using this feature. */
48static int category_table_version;
49
50Lisp_Object Qcategory_table, Qcategoryp, Qcategorysetp, Qcategory_table_p;
51
52/* Variables to determine word boundary. */
53Lisp_Object Vword_combining_categories, Vword_separating_categories;
54
55/* Temporary internal variable used in macro CHAR_HAS_CATEGORY. */
56Lisp_Object _temp_category_set;
57
58\f
59/* Category set staff. */
60
cd486875
KH
61static Lisp_Object hash_get_category_set P_ ((Lisp_Object, Lisp_Object));
62
63static Lisp_Object
64hash_get_category_set (table, category_set)
65 Lisp_Object table, category_set;
66{
67 Lisp_Object val;
68 struct Lisp_Hash_Table *h;
69 int i;
70 unsigned hash;
71
72 if (NILP (XCHAR_TABLE (table)->extras[1]))
73 XCHAR_TABLE (table)->extras[1]
74 = make_hash_table (Qequal, make_number (DEFAULT_HASH_SIZE),
75 make_float (DEFAULT_REHASH_SIZE),
76 make_float (DEFAULT_REHASH_THRESHOLD),
77 Qnil, Qnil, Qnil);
78 h = XHASH_TABLE (XCHAR_TABLE (table)->extras[1]);
79 i = hash_lookup (h, category_set, &hash);
80 if (i >= 0)
81 return HASH_KEY (h, i);
82 hash_put (h, category_set, Qnil, hash);
83 return category_set;
84}
85
86
4ed46869 87DEFUN ("make-category-set", Fmake_category_set, Smake_category_set, 1, 1, 0,
fdb82f93
PJ
88 doc: /* Return a newly created category-set which contains CATEGORIES.
89CATEGORIES is a string of category mnemonics.
90The value is a bool-vector which has t at the indices corresponding to
91those categories. */)
92 (categories)
4ed46869
KH
93 Lisp_Object categories;
94{
95 Lisp_Object val;
96 int len;
97
b7826503 98 CHECK_STRING (categories);
4ed46869
KH
99 val = MAKE_CATEGORY_SET;
100
115afec3 101 if (STRING_MULTIBYTE (categories))
95030461 102 error ("Multibyte string in `make-category-set'");
115afec3 103
d5db4077 104 len = SCHARS (categories);
4ed46869
KH
105 while (--len >= 0)
106 {
15c60737 107 Lisp_Object category;
4ed46869 108
d5db4077 109 XSETFASTINT (category, SREF (categories, len));
b7826503 110 CHECK_CATEGORY (category);
4ed46869
KH
111 SET_CATEGORY_SET (val, category, Qt);
112 }
113 return val;
114}
115
116\f
117/* Category staff. */
118
119Lisp_Object check_category_table ();
120
121DEFUN ("define-category", Fdefine_category, Sdefine_category, 2, 3, 0,
839966f3
KH
122 doc: /* Define CATEGORY as a category which is described by DOCSTRING.
123CATEGORY should be an ASCII printing character in the range ` ' to `~'.
f6d2f045
KH
124DOCSTRING is the documentation string of the category. The first line
125should be a terse text (preferably less than 16 characters),
126and the rest lines should be the full description.
fdb82f93 127The category is defined only in category table TABLE, which defaults to
839966f3 128the current buffer's category table. */)
fdb82f93 129 (category, docstring, table)
4ed46869
KH
130 Lisp_Object category, docstring, table;
131{
b7826503
PJ
132 CHECK_CATEGORY (category);
133 CHECK_STRING (docstring);
4ed46869
KH
134 table = check_category_table (table);
135
136 if (!NILP (CATEGORY_DOCSTRING (table, XFASTINT (category))))
137 error ("Category `%c' is already defined", XFASTINT (category));
138 CATEGORY_DOCSTRING (table, XFASTINT (category)) = docstring;
139
140 return Qnil;
141}
142
143DEFUN ("category-docstring", Fcategory_docstring, Scategory_docstring, 1, 2, 0,
839966f3
KH
144 doc: /* Return the documentation string of CATEGORY, as defined in TABLE.
145TABLE should be a category table and defaults to the current buffer's
146category table. */)
fdb82f93 147 (category, table)
4ed46869
KH
148 Lisp_Object category, table;
149{
b7826503 150 CHECK_CATEGORY (category);
4ed46869
KH
151 table = check_category_table (table);
152
153 return CATEGORY_DOCSTRING (table, XFASTINT (category));
154}
155
156DEFUN ("get-unused-category", Fget_unused_category, Sget_unused_category,
157 0, 1, 0,
839966f3 158 doc: /* Return a category which is not yet defined in TABLE.
0b6694a5 159If no category remains available, return nil.
839966f3 160The optional argument TABLE specifies which category table to modify;
0b6694a5 161it defaults to the current buffer's category table. */)
fdb82f93 162 (table)
4ed46869
KH
163 Lisp_Object table;
164{
165 int i;
4ed46869
KH
166
167 table = check_category_table (table);
168
169 for (i = ' '; i <= '~'; i++)
170 if (NILP (CATEGORY_DOCSTRING (table, i)))
171 return make_number (i);
172
173 return Qnil;
174}
175
176\f
177/* Category-table staff. */
178
179DEFUN ("category-table-p", Fcategory_table_p, Scategory_table_p, 1, 1, 0,
fdb82f93
PJ
180 doc: /* Return t if ARG is a category table. */)
181 (arg)
4ed46869
KH
182 Lisp_Object arg;
183{
184 if (CHAR_TABLE_P (arg)
ed8ec86d 185 && EQ (XCHAR_TABLE (arg)->purpose, Qcategory_table))
4ed46869
KH
186 return Qt;
187 return Qnil;
188}
189
190/* If TABLE is nil, return the current category table. If TABLE is
191 not nil, check the validity of TABLE as a category table. If
192 valid, return TABLE itself, but if not valid, signal an error of
193 wrong-type-argument. */
194
195Lisp_Object
196check_category_table (table)
197 Lisp_Object table;
198{
4ed46869
KH
199 if (NILP (table))
200 return current_buffer->category_table;
88674269 201 CHECK_TYPE (!NILP (Fcategory_table_p (table)), Qcategory_table_p, table);
4ed46869 202 return table;
177c0ea7 203}
4ed46869
KH
204
205DEFUN ("category-table", Fcategory_table, Scategory_table, 0, 0, 0,
fdb82f93
PJ
206 doc: /* Return the current category table.
207This is the one specified by the current buffer. */)
208 ()
4ed46869
KH
209{
210 return current_buffer->category_table;
211}
212
213DEFUN ("standard-category-table", Fstandard_category_table,
214 Sstandard_category_table, 0, 0, 0,
fdb82f93
PJ
215 doc: /* Return the standard category table.
216This is the one used for new buffers. */)
217 ()
4ed46869
KH
218{
219 return Vstandard_category_table;
220}
221
ea012abd
KH
222
223static void
8f924df7
KH
224copy_category_entry (table, c, val)
225 Lisp_Object table, c, val;
ea012abd 226{
f4b670ef 227 val = Fcopy_sequence (val);
8f924df7
KH
228 if (CONSP (c))
229 char_table_set_range (table, XINT (XCAR (c)), XINT (XCDR (c)), val);
f4b670ef 230 else
8f924df7 231 char_table_set (table, XINT (c), val);
ea012abd
KH
232}
233
4ed46869
KH
234/* Return a copy of category table TABLE. We can't simply use the
235 function copy-sequence because no contents should be shared between
ed8ec86d 236 the original and the copy. This function is called recursively by
9da95d53 237 binding TABLE to a sub char table. */
4ed46869
KH
238
239Lisp_Object
ed8ec86d 240copy_category_table (table)
4ed46869
KH
241 Lisp_Object table;
242{
ea012abd 243 table = copy_char_table (table);
4ed46869 244
ea012abd
KH
245 if (! NILP (XCHAR_TABLE (table)->defalt))
246 XCHAR_TABLE (table)->defalt
247 = Fcopy_sequence (XCHAR_TABLE (table)->defalt);
248 XCHAR_TABLE (table)->extras[0]
249 = Fcopy_sequence (XCHAR_TABLE (table)->extras[0]);
8f924df7 250 map_char_table (copy_category_entry, Qnil, table, table);
ed8ec86d 251
4ed46869
KH
252 return table;
253}
254
255DEFUN ("copy-category-table", Fcopy_category_table, Scopy_category_table,
256 0, 1, 0,
fdb82f93
PJ
257 doc: /* Construct a new category table and return it.
258It is a copy of the TABLE, which defaults to the standard category table. */)
259 (table)
4ed46869
KH
260 Lisp_Object table;
261{
262 if (!NILP (table))
263 check_category_table (table);
264 else
265 table = Vstandard_category_table;
266
9da95d53 267 return copy_category_table (table);
4ed46869
KH
268}
269
70414a3d
KH
270DEFUN ("make-category-table", Fmake_category_table, Smake_category_table,
271 0, 0, 0,
fdb82f93
PJ
272 doc: /* Construct a new and empty category table and return it. */)
273 ()
70414a3d
KH
274{
275 Lisp_Object val;
ea012abd 276 int i;
70414a3d
KH
277
278 val = Fmake_char_table (Qcategory_table, Qnil);
279 XCHAR_TABLE (val)->defalt = MAKE_CATEGORY_SET;
8f924df7 280 for (i = 0; i < (1 << CHARTAB_SIZE_BITS_0); i++)
ea012abd 281 XCHAR_TABLE (val)->contents[i] = MAKE_CATEGORY_SET;
70414a3d
KH
282 Fset_char_table_extra_slot (val, make_number (0),
283 Fmake_vector (make_number (95), Qnil));
284 return val;
285}
286
4ed46869 287DEFUN ("set-category-table", Fset_category_table, Sset_category_table, 1, 1, 0,
839966f3
KH
288 doc: /* Specify TABLE as the category table for the current buffer.
289Return TABLE. */)
fdb82f93 290 (table)
4ed46869
KH
291 Lisp_Object table;
292{
6f598a6d 293 int idx;
4ed46869
KH
294 table = check_category_table (table);
295 current_buffer->category_table = table;
296 /* Indicate that this buffer now has a specified category table. */
f6cd0527
GM
297 idx = PER_BUFFER_VAR_IDX (category_table);
298 SET_PER_BUFFER_VALUE_P (current_buffer, idx, 1);
4ed46869
KH
299 return table;
300}
301
302\f
ea012abd
KH
303Lisp_Object
304char_category_set (c)
305 int c;
306{
307 return CHAR_TABLE_REF (current_buffer->category_table, c);
308}
309
4ed46869 310DEFUN ("char-category-set", Fchar_category_set, Schar_category_set, 1, 1, 0,
a2c2f196
JB
311 doc: /* Return the category set of CHAR.
312usage: (char-category-set CHAR) */)
fdb82f93 313 (ch)
4ed46869
KH
314 Lisp_Object ch;
315{
b7826503 316 CHECK_NUMBER (ch);
4ed46869
KH
317 return CATEGORY_SET (XFASTINT (ch));
318}
319
320DEFUN ("category-set-mnemonics", Fcategory_set_mnemonics,
321 Scategory_set_mnemonics, 1, 1, 0,
fdb82f93
PJ
322 doc: /* Return a string containing mnemonics of the categories in CATEGORY-SET.
323CATEGORY-SET is a bool-vector, and the categories \"in\" it are those
839966f3 324that are indexes where t occurs in the bool-vector.
fdb82f93
PJ
325The return value is a string containing those same categories. */)
326 (category_set)
4ed46869
KH
327 Lisp_Object category_set;
328{
329 int i, j;
330 char str[96];
331
b7826503 332 CHECK_CATEGORY_SET (category_set);
4ed46869
KH
333
334 j = 0;
335 for (i = 32; i < 127; i++)
336 if (CATEGORY_MEMBER (i, category_set))
337 str[j++] = i;
338 str[j] = '\0';
339
340 return build_string (str);
341}
342
4ed46869
KH
343void
344set_category_set (category_set, category, val)
345 Lisp_Object category_set, category, val;
346{
347 do {
348 int idx = XINT (category) / 8;
349 unsigned char bits = 1 << (XINT (category) % 8);
350
351 if (NILP (val))
352 XCATEGORY_SET (category_set)->data[idx] &= ~bits;
353 else
354 XCATEGORY_SET (category_set)->data[idx] |= bits;
355 } while (0);
356}
357
358DEFUN ("modify-category-entry", Fmodify_category_entry,
359 Smodify_category_entry, 2, 4, 0,
fdb82f93
PJ
360 doc: /* Modify the category set of CHARACTER by adding CATEGORY to it.
361The category is changed only for table TABLE, which defaults to
8f7e5042
DL
362the current buffer's category table.
363CHARACTER can be either a single character or a cons representing the
364lower and upper ends of an inclusive character range to modify.
fdb82f93
PJ
365If optional fourth argument RESET is non-nil,
366then delete CATEGORY from the category set instead of adding it. */)
367 (character, category, table, reset)
ea4943bf 368 Lisp_Object character, category, table, reset;
4ed46869 369{
4ed46869 370 Lisp_Object set_value; /* Actual value to be set in category sets. */
8f7e5042 371 Lisp_Object category_set;
ea012abd
KH
372 int start, end;
373 int from, to;
4ed46869 374
ea012abd 375 if (INTEGERP (character))
4ed46869 376 {
ea012abd
KH
377 CHECK_CHARACTER (character);
378 start = end = XFASTINT (character);
4ed46869 379 }
ea012abd 380 else
4ed46869 381 {
ea012abd 382 CHECK_CONS (character);
8f924df7
KH
383 CHECK_CHARACTER_CAR (character);
384 CHECK_CHARACTER_CDR (character);
ea012abd
KH
385 start = XFASTINT (XCAR (character));
386 end = XFASTINT (XCDR (character));
4ed46869 387 }
4ed46869 388
b7826503 389 CHECK_CATEGORY (category);
4ed46869 390 table = check_category_table (table);
4ed46869 391
4ed46869
KH
392 if (NILP (CATEGORY_DOCSTRING (table, XFASTINT (category))))
393 error ("Undefined category: %c", XFASTINT (category));
4ed46869 394
4ed46869 395 set_value = NILP (reset) ? Qt : Qnil;
4ed46869 396
ea012abd 397 while (start <= end)
4ed46869 398 {
cd486875 399 from = start, to = end;
ea012abd 400 category_set = char_table_ref_and_range (table, start, &from, &to);
0b261f59 401 if (CATEGORY_MEMBER (XFASTINT (category), category_set) != NILP (reset))
ea012abd 402 {
d8887a31
KH
403 category_set = Fcopy_sequence (category_set);
404 SET_CATEGORY_SET (category_set, category, set_value);
cd486875
KH
405 category_set = hash_get_category_set (table, category_set);
406 char_table_set_range (table, start, to, category_set);
ea012abd 407 }
ea012abd 408 start = to + 1;
4ed46869 409 }
4ed46869
KH
410
411 return Qnil;
412}
413\f
4ed46869
KH
414/* Return 1 if there is a word boundary between two word-constituent
415 characters C1 and C2 if they appear in this order, else return 0.
416 Use the macro WORD_BOUNDARY_P instead of calling this function
417 directly. */
418
419int
420word_boundary_p (c1, c2)
421 int c1, c2;
422{
423 Lisp_Object category_set1, category_set2;
424 Lisp_Object tail;
425 int default_result;
426
714b2198
KH
427 if (EQ (CHAR_TABLE_REF (Vchar_script_table, c1),
428 CHAR_TABLE_REF (Vchar_script_table, c2)))
4ed46869
KH
429 {
430 tail = Vword_separating_categories;
431 default_result = 0;
432 }
433 else
434 {
435 tail = Vword_combining_categories;
436 default_result = 1;
437 }
438
439 category_set1 = CATEGORY_SET (c1);
440 if (NILP (category_set1))
441 return default_result;
442 category_set2 = CATEGORY_SET (c2);
443 if (NILP (category_set2))
444 return default_result;
445
03699b14 446 for (; CONSP (tail); tail = XCDR (tail))
4ed46869 447 {
03699b14 448 Lisp_Object elt = XCAR (tail);
4ed46869
KH
449
450 if (CONSP (elt)
714b2198
KH
451 && (NILP (XCAR (elt))
452 || (CATEGORYP (XCAR (elt))
453 && CATEGORY_MEMBER (XFASTINT (XCAR (elt)), category_set1)))
454 && (NILP (XCDR (elt))
455 || (CATEGORYP (XCDR (elt))
456 && CATEGORY_MEMBER (XFASTINT (XCDR (elt)), category_set2))))
4ed46869
KH
457 return !default_result;
458 }
459 return default_result;
460}
461
462\f
dfcf069d 463void
4ed46869
KH
464init_category_once ()
465{
466 /* This has to be done here, before we call Fmake_char_table. */
467 Qcategory_table = intern ("category-table");
468 staticpro (&Qcategory_table);
469
470 /* Intern this now in case it isn't already done.
471 Setting this variable twice is harmless.
472 But don't staticpro it here--that is done in alloc.c. */
473 Qchar_table_extra_slots = intern ("char-table-extra-slots");
474
475 /* Now we are ready to set up this property, so we can
476 create category tables. */
477 Fput (Qcategory_table, Qchar_table_extra_slots, make_number (2));
478
479 Vstandard_category_table = Fmake_char_table (Qcategory_table, Qnil);
177c0ea7 480 /* Set a category set which contains nothing to the default. */
4ed46869 481 XCHAR_TABLE (Vstandard_category_table)->defalt = MAKE_CATEGORY_SET;
9da95d53 482 Fset_char_table_extra_slot (Vstandard_category_table, make_number (0),
4ed46869
KH
483 Fmake_vector (make_number (95), Qnil));
484}
485
dfcf069d 486void
4ed46869
KH
487syms_of_category ()
488{
489 Qcategoryp = intern ("categoryp");
490 staticpro (&Qcategoryp);
491 Qcategorysetp = intern ("categorysetp");
492 staticpro (&Qcategorysetp);
493 Qcategory_table_p = intern ("category-table-p");
494 staticpro (&Qcategory_table_p);
495
496 DEFVAR_LISP ("word-combining-categories", &Vword_combining_categories,
fdb82f93
PJ
497 doc: /* List of pair (cons) of categories to determine word boundary.
498
499Emacs treats a sequence of word constituent characters as a single
e0f24100 500word (i.e. finds no word boundary between them) only if they belong to
714b2198 501the same script. But, exceptions are allowed in the following cases.
fdb82f93 502
714b2198 503\(1) The case that characters are in different scripts is controlled
fdb82f93
PJ
504by the variable `word-combining-categories'.
505
714b2198 506Emacs finds no word boundary between characters of different scripts
fdb82f93
PJ
507if they have categories matching some element of this list.
508
509More precisely, if an element of this list is a cons of category CAT1
510and CAT2, and a multibyte character C1 which has CAT1 is followed by
511C2 which has CAT2, there's no word boundary between C1 and C2.
512
714b2198
KH
513For instance, to tell that Han characters followed by Hiragana
514characters can form a single word, the element `(?C . ?H)' should be
515in this list.
fdb82f93 516
714b2198 517\(2) The case that character are in the same script is controlled by
fdb82f93
PJ
518the variable `word-separating-categories'.
519
ecdcaa09 520Emacs finds a word boundary between characters of the same script
fdb82f93
PJ
521if they have categories matching some element of this list.
522
523More precisely, if an element of this list is a cons of category CAT1
524and CAT2, and a multibyte character C1 which has CAT1 is followed by
525C2 which has CAT2, there's a word boundary between C1 and C2.
526
714b2198
KH
527For instance, to tell that there's a word boundary between Hiragana
528and Katakana (both are in the same script `kana'),
529the element `(?H . ?K) should be in this list. */);
4ed46869
KH
530
531 Vword_combining_categories = Qnil;
532
533 DEFVAR_LISP ("word-separating-categories", &Vword_separating_categories,
fdb82f93
PJ
534 doc: /* List of pair (cons) of categories to determine word boundary.
535See the documentation of the variable `word-combining-categories'. */);
4ed46869
KH
536
537 Vword_separating_categories = Qnil;
538
539 defsubr (&Smake_category_set);
540 defsubr (&Sdefine_category);
541 defsubr (&Scategory_docstring);
542 defsubr (&Sget_unused_category);
543 defsubr (&Scategory_table_p);
544 defsubr (&Scategory_table);
545 defsubr (&Sstandard_category_table);
546 defsubr (&Scopy_category_table);
70414a3d 547 defsubr (&Smake_category_table);
4ed46869
KH
548 defsubr (&Sset_category_table);
549 defsubr (&Schar_category_set);
550 defsubr (&Scategory_set_mnemonics);
551 defsubr (&Smodify_category_entry);
4ed46869
KH
552
553 category_table_version = 0;
554}
839966f3
KH
555
556/* arch-tag: 74ebf524-121b-4d9c-bd68-07f8d708b211
557 (do not change this comment) */