Add 2012 to FSF copyright years for Emacs files
[bpt/emacs.git] / src / casefiddle.c
CommitLineData
dcfdbac7 1/* GNU Emacs case conversion functions.
95df8112 2
acaf905b 3Copyright (C) 1985, 1994, 1997-1999, 2001-2012 Free Software Foundation, Inc.
dcfdbac7
JB
4
5This file is part of GNU Emacs.
6
9ec0b715 7GNU Emacs is free software: you can redistribute it and/or modify
dcfdbac7 8it under the terms of the GNU General Public License as published by
9ec0b715
GM
9the Free Software Foundation, either version 3 of the License, or
10(at your option) any later version.
dcfdbac7
JB
11
12GNU Emacs is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
9ec0b715 18along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
dcfdbac7
JB
19
20
18160b98 21#include <config.h>
d7306fe6 22#include <setjmp.h>
dcfdbac7
JB
23#include "lisp.h"
24#include "buffer.h"
83be827a 25#include "character.h"
dcfdbac7
JB
26#include "commands.h"
27#include "syntax.h"
66da2880 28#include "composite.h"
e35f6ff7 29#include "keymap.h"
dcfdbac7
JB
30
31enum case_action {CASE_UP, CASE_DOWN, CASE_CAPITALIZE, CASE_CAPITALIZE_UP};
a04538da
KH
32
33Lisp_Object Qidentity;
dcfdbac7 34\f
181aa2be 35static Lisp_Object
971de7fb 36casify_object (enum case_action flag, Lisp_Object obj)
dcfdbac7 37{
2422e50a 38 register int c, c1;
dcfdbac7
JB
39 register int inword = flag == CASE_DOWN;
40
bd47bd35 41 /* If the case table is flagged as modified, rescan it. */
4b4deea2
TT
42 if (NILP (XCHAR_TABLE (BVAR (current_buffer, downcase_table))->extras[1]))
43 Fset_case_table (BVAR (current_buffer, downcase_table));
bd47bd35 44
0d64f689 45 if (INTEGERP (obj))
dcfdbac7 46 {
0d64f689
KH
47 int flagbits = (CHAR_ALT | CHAR_SUPER | CHAR_HYPER
48 | CHAR_SHIFT | CHAR_CTL | CHAR_META);
49 int flags = XINT (obj) & flagbits;
4b4deea2 50 int multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
0d64f689
KH
51
52 /* If the character has higher bits set
53 above the flags, return it unchanged.
54 It is not a real character. */
ea204efb 55 if (UNSIGNED_CMP (XFASTINT (obj), >, flagbits))
0d64f689
KH
56 return obj;
57
58 c1 = XFASTINT (obj) & ~flagbits;
1fb5aad7
SM
59 /* FIXME: Even if enable-multibyte-characters is nil, we may
60 manipulate multibyte chars. This means we have a bug for latin-1
61 chars since when we receive an int 128-255 we can't tell whether
62 it's an eight-bit byte or a latin-1 char. */
63 if (c1 >= 256)
64 multibyte = 1;
0d64f689
KH
65 if (! multibyte)
66 MAKE_CHAR_MULTIBYTE (c1);
5da9919f 67 c = downcase (c1);
0d64f689
KH
68 if (inword)
69 XSETFASTINT (obj, c | flags);
70 else if (c == (XFASTINT (obj) & ~flagbits))
dcfdbac7 71 {
0d64f689 72 if (! inword)
5da9919f 73 c = upcase1 (c1);
2422e50a 74 if (! multibyte)
0d64f689
KH
75 MAKE_CHAR_UNIBYTE (c);
76 XSETFASTINT (obj, c | flags);
dcfdbac7 77 }
0d64f689
KH
78 return obj;
79 }
5245463a 80
438eba3c
SM
81 if (!STRINGP (obj))
82 wrong_type_argument (Qchar_or_string_p, obj);
83 else if (!STRING_MULTIBYTE (obj))
0d64f689 84 {
438eba3c
SM
85 EMACS_INT i;
86 EMACS_INT size = SCHARS (obj);
a0615d90 87
0d64f689 88 obj = Fcopy_sequence (obj);
438eba3c 89 for (i = 0; i < size; i++)
0d64f689 90 {
438eba3c 91 c = SREF (obj, i);
4c0354d7 92 MAKE_CHAR_MULTIBYTE (c);
0d64f689
KH
93 c1 = c;
94 if (inword && flag != CASE_CAPITALIZE_UP)
5da9919f
PE
95 c = downcase (c);
96 else if (!uppercasep (c)
0d64f689 97 && (!inword || flag != CASE_CAPITALIZE_UP))
5da9919f 98 c = upcase1 (c1);
0d64f689
KH
99 if ((int) flag >= (int) CASE_CAPITALIZE)
100 inword = (SYNTAX (c) == Sword);
101 if (c != c1)
102 {
0d64f689 103 MAKE_CHAR_UNIBYTE (c);
438eba3c
SM
104 /* If the char can't be converted to a valid byte, just don't
105 change it. */
106 if (c >= 0 && c < 256)
107 SSET (obj, i, c);
108 }
109 }
110 return obj;
994b75e0
SM
111 }
112 else
113 {
114 EMACS_INT i, i_byte, size = SCHARS (obj);
115 int len;
438eba3c
SM
116 USE_SAFE_ALLOCA;
117 unsigned char *dst, *o;
118 /* Over-allocate by 12%: this is a minor overhead, but should be
119 sufficient in 99.999% of the cases to avoid a reallocation. */
120 EMACS_INT o_size = SBYTES (obj) + SBYTES (obj) / 8 + MAX_MULTIBYTE_LENGTH;
121 SAFE_ALLOCA (dst, void *, o_size);
122 o = dst;
123
124 for (i = i_byte = 0; i < size; i++, i_byte += len)
125 {
126 if ((o - dst) + MAX_MULTIBYTE_LENGTH > o_size)
127 { /* Not enough space for the next char: grow the destination. */
128 unsigned char *old_dst = dst;
129 o_size += o_size; /* Probably overkill, but extremely rare. */
130 SAFE_ALLOCA (dst, void *, o_size);
72af86bd 131 memcpy (dst, old_dst, o - old_dst);
438eba3c 132 o = dst + (o - old_dst);
a0615d90 133 }
62a6e103 134 c = STRING_CHAR_AND_LENGTH (SDATA (obj) + i_byte, len);
438eba3c 135 if (inword && flag != CASE_CAPITALIZE_UP)
5da9919f
PE
136 c = downcase (c);
137 else if (!uppercasep (c)
438eba3c 138 && (!inword || flag != CASE_CAPITALIZE_UP))
5da9919f 139 c = upcase1 (c);
438eba3c
SM
140 if ((int) flag >= (int) CASE_CAPITALIZE)
141 inword = (SYNTAX (c) == Sword);
142 o += CHAR_STRING (c, o);
dcfdbac7 143 }
438eba3c 144 eassert (o - dst <= o_size);
47ce90e4 145 obj = make_multibyte_string ((char *) dst, size, o - dst);
438eba3c 146 SAFE_FREE ();
0d64f689 147 return obj;
dcfdbac7
JB
148 }
149}
150
a7ca3326 151DEFUN ("upcase", Fupcase, Supcase, 1, 1, 0,
fdb82f93
PJ
152 doc: /* Convert argument to upper case and return that.
153The argument may be a character or string. The result has the same type.
154The argument object is not altered--the value is a copy.
155See also `capitalize', `downcase' and `upcase-initials'. */)
5842a27b 156 (Lisp_Object obj)
dcfdbac7
JB
157{
158 return casify_object (CASE_UP, obj);
159}
160
a7ca3326 161DEFUN ("downcase", Fdowncase, Sdowncase, 1, 1, 0,
fdb82f93
PJ
162 doc: /* Convert argument to lower case and return that.
163The argument may be a character or string. The result has the same type.
164The argument object is not altered--the value is a copy. */)
5842a27b 165 (Lisp_Object obj)
dcfdbac7
JB
166{
167 return casify_object (CASE_DOWN, obj);
168}
169
170DEFUN ("capitalize", Fcapitalize, Scapitalize, 1, 1, 0,
fdb82f93
PJ
171 doc: /* Convert argument to capitalized form and return that.
172This means that each word's first character is upper case
173and the rest is lower case.
174The argument may be a character or string. The result has the same type.
175The argument object is not altered--the value is a copy. */)
5842a27b 176 (Lisp_Object obj)
dcfdbac7
JB
177{
178 return casify_object (CASE_CAPITALIZE, obj);
179}
96927ba4 180
2371fad4
KH
181/* Like Fcapitalize but change only the initials. */
182
a7ca3326 183DEFUN ("upcase-initials", Fupcase_initials, Supcase_initials, 1, 1, 0,
fdb82f93
PJ
184 doc: /* Convert the initial of each word in the argument to upper case.
185Do not change the other letters of each word.
186The argument may be a character or string. The result has the same type.
187The argument object is not altered--the value is a copy. */)
5842a27b 188 (Lisp_Object obj)
8cef1f78
RS
189{
190 return casify_object (CASE_CAPITALIZE_UP, obj);
191}
dcfdbac7
JB
192\f
193/* flag is CASE_UP, CASE_DOWN or CASE_CAPITALIZE or CASE_CAPITALIZE_UP.
194 b and e specify range of buffer to operate on. */
195
181aa2be 196static void
971de7fb 197casify_region (enum case_action flag, Lisp_Object b, Lisp_Object e)
dcfdbac7 198{
dcfdbac7
JB
199 register int c;
200 register int inword = flag == CASE_DOWN;
4b4deea2 201 register int multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
7927d8e3 202 EMACS_INT start, end;
79b73827 203 EMACS_INT start_byte;
e45a141a
PE
204
205 /* Position of first and last changes. */
206 EMACS_INT first = -1, last IF_LINT (= 0);
207
7927d8e3
SM
208 EMACS_INT opoint = PT;
209 EMACS_INT opoint_byte = PT_BYTE;
dcfdbac7
JB
210
211 if (EQ (b, e))
212 /* Not modifying because nothing marked */
213 return;
214
bd47bd35 215 /* If the case table is flagged as modified, rescan it. */
4b4deea2
TT
216 if (NILP (XCHAR_TABLE (BVAR (current_buffer, downcase_table))->extras[1]))
217 Fset_case_table (BVAR (current_buffer, downcase_table));
bd47bd35 218
dcfdbac7 219 validate_region (&b, &e);
2371fad4
KH
220 start = XFASTINT (b);
221 end = XFASTINT (e);
3e145152 222 modify_region (current_buffer, start, end, 0);
2371fad4 223 record_change (start, end - start);
4c7b7eab 224 start_byte = CHAR_TO_BYTE (start);
dcfdbac7 225
5e617bc2 226 SETUP_BUFFER_SYNTAX_TABLE (); /* For syntax_prefix_flag_p. */
c785836d 227
2422e50a 228 while (start < end)
a04538da 229 {
2422e50a
KH
230 int c2, len;
231
232 if (multibyte)
233 {
234 c = FETCH_MULTIBYTE_CHAR (start_byte);
235 len = CHAR_BYTES (c);
236 }
237 else
238 {
239 c = FETCH_BYTE (start_byte);
240 MAKE_CHAR_MULTIBYTE (c);
241 len = 1;
242 }
243 c2 = c;
a0615d90 244 if (inword && flag != CASE_CAPITALIZE_UP)
5da9919f
PE
245 c = downcase (c);
246 else if (!uppercasep (c)
a0615d90 247 && (!inword || flag != CASE_CAPITALIZE_UP))
5da9919f 248 c = upcase1 (c);
a0615d90 249 if ((int) flag >= (int) CASE_CAPITALIZE)
c5683ceb
SM
250 inword = ((SYNTAX (c) == Sword)
251 && (inword || !syntax_prefix_flag_p (c)));
2422e50a 252 if (c != c2)
a04538da 253 {
7927d8e3
SM
254 last = start;
255 if (first < 0)
256 first = start;
257
2422e50a
KH
258 if (! multibyte)
259 {
260 MAKE_CHAR_UNIBYTE (c);
261 FETCH_BYTE (start_byte) = c;
262 }
263 else if (ASCII_CHAR_P (c2) && ASCII_CHAR_P (c))
264 FETCH_BYTE (start_byte) = c;
08588bfa 265 else
a04538da 266 {
08588bfa 267 int tolen = CHAR_BYTES (c);
2422e50a 268 int j;
66da2880 269 unsigned char str[MAX_MULTIBYTE_LENGTH];
a04538da 270
2422e50a 271 CHAR_STRING (c, str);
08588bfa
KH
272 if (len == tolen)
273 {
274 /* Length is unchanged. */
275 for (j = 0; j < len; ++j)
276 FETCH_BYTE (start_byte + j) = str[j];
277 }
278 else
279 {
280 /* Replace one character with the other,
281 keeping text properties the same. */
282 replace_range_2 (start, start_byte,
283 start + 1, start_byte + len,
47ce90e4 284 (char *) str, 1, tolen,
08588bfa
KH
285 0);
286 len = tolen;
287 }
a04538da 288 }
a04538da 289 }
2422e50a
KH
290 start++;
291 start_byte += len;
dcfdbac7
JB
292 }
293
8f924df7
KH
294 if (PT != opoint)
295 TEMP_SET_PT_BOTH (opoint, opoint_byte);
296
7927d8e3 297 if (first >= 0)
66da2880 298 {
7927d8e3
SM
299 signal_after_change (first, last + 1 - first, last + 1 - first);
300 update_compositions (first, last + 1, CHECK_ALL);
66da2880 301 }
dcfdbac7
JB
302}
303
a7ca3326 304DEFUN ("upcase-region", Fupcase_region, Supcase_region, 2, 2, "r",
fdb82f93
PJ
305 doc: /* Convert the region to upper case. In programs, wants two arguments.
306These arguments specify the starting and ending character numbers of
307the region to operate on. When used as a command, the text between
308point and the mark is operated on.
309See also `capitalize-region'. */)
5842a27b 310 (Lisp_Object beg, Lisp_Object end)
dcfdbac7 311{
8c22d56c 312 casify_region (CASE_UP, beg, end);
dcfdbac7
JB
313 return Qnil;
314}
315
316DEFUN ("downcase-region", Fdowncase_region, Sdowncase_region, 2, 2, "r",
fdb82f93
PJ
317 doc: /* Convert the region to lower case. In programs, wants two arguments.
318These arguments specify the starting and ending character numbers of
319the region to operate on. When used as a command, the text between
320point and the mark is operated on. */)
5842a27b 321 (Lisp_Object beg, Lisp_Object end)
dcfdbac7 322{
8c22d56c 323 casify_region (CASE_DOWN, beg, end);
dcfdbac7
JB
324 return Qnil;
325}
326
327DEFUN ("capitalize-region", Fcapitalize_region, Scapitalize_region, 2, 2, "r",
fdb82f93
PJ
328 doc: /* Convert the region to capitalized form.
329Capitalized form means each word's first character is upper case
330and the rest of it is lower case.
331In programs, give two arguments, the starting and ending
332character positions to operate on. */)
5842a27b 333 (Lisp_Object beg, Lisp_Object end)
dcfdbac7 334{
8c22d56c 335 casify_region (CASE_CAPITALIZE, beg, end);
dcfdbac7
JB
336 return Qnil;
337}
338
2371fad4
KH
339/* Like Fcapitalize_region but change only the initials. */
340
a7ca3326 341DEFUN ("upcase-initials-region", Fupcase_initials_region,
8cef1f78 342 Supcase_initials_region, 2, 2, "r",
fdb82f93
PJ
343 doc: /* Upcase the initial of each word in the region.
344Subsequent letters of each word are not changed.
345In programs, give two arguments, the starting and ending
346character positions to operate on. */)
5842a27b 347 (Lisp_Object beg, Lisp_Object end)
8cef1f78 348{
8c22d56c 349 casify_region (CASE_CAPITALIZE_UP, beg, end);
8cef1f78
RS
350 return Qnil;
351}
dcfdbac7 352\f
438eba3c 353static Lisp_Object
971de7fb 354operate_on_word (Lisp_Object arg, EMACS_INT *newpoint)
dcfdbac7 355{
39fb55ff 356 Lisp_Object val;
4f3a2f8d
EZ
357 EMACS_INT farend;
358 EMACS_INT iarg;
dcfdbac7 359
b7826503 360 CHECK_NUMBER (arg);
2371fad4 361 iarg = XINT (arg);
6ec8bbd2 362 farend = scan_words (PT, iarg);
dcfdbac7 363 if (!farend)
2371fad4 364 farend = iarg > 0 ? ZV : BEGV;
dcfdbac7 365
6ec8bbd2 366 *newpoint = PT > farend ? PT : farend;
18e23fd0 367 XSETFASTINT (val, farend);
dcfdbac7
JB
368
369 return val;
370}
371
372DEFUN ("upcase-word", Fupcase_word, Supcase_word, 1, 1, "p",
fdb82f93
PJ
373 doc: /* Convert following word (or ARG words) to upper case, moving over.
374With negative argument, convert previous words but do not move.
375See also `capitalize-word'. */)
5842a27b 376 (Lisp_Object arg)
dcfdbac7 377{
34628a90 378 Lisp_Object beg, end;
438eba3c 379 EMACS_INT newpoint;
6ec8bbd2 380 XSETFASTINT (beg, PT);
34628a90
RS
381 end = operate_on_word (arg, &newpoint);
382 casify_region (CASE_UP, beg, end);
383 SET_PT (newpoint);
dcfdbac7
JB
384 return Qnil;
385}
386
387DEFUN ("downcase-word", Fdowncase_word, Sdowncase_word, 1, 1, "p",
fdb82f93
PJ
388 doc: /* Convert following word (or ARG words) to lower case, moving over.
389With negative argument, convert previous words but do not move. */)
5842a27b 390 (Lisp_Object arg)
dcfdbac7 391{
34628a90 392 Lisp_Object beg, end;
438eba3c 393 EMACS_INT newpoint;
6ec8bbd2 394 XSETFASTINT (beg, PT);
34628a90
RS
395 end = operate_on_word (arg, &newpoint);
396 casify_region (CASE_DOWN, beg, end);
397 SET_PT (newpoint);
dcfdbac7
JB
398 return Qnil;
399}
400
401DEFUN ("capitalize-word", Fcapitalize_word, Scapitalize_word, 1, 1, "p",
fdb82f93
PJ
402 doc: /* Capitalize the following word (or ARG words), moving over.
403This gives the word(s) a first character in upper case
404and the rest lower case.
405With negative argument, capitalize previous words but do not move. */)
5842a27b 406 (Lisp_Object arg)
dcfdbac7 407{
34628a90 408 Lisp_Object beg, end;
438eba3c 409 EMACS_INT newpoint;
6ec8bbd2 410 XSETFASTINT (beg, PT);
34628a90
RS
411 end = operate_on_word (arg, &newpoint);
412 casify_region (CASE_CAPITALIZE, beg, end);
413 SET_PT (newpoint);
dcfdbac7
JB
414 return Qnil;
415}
416\f
dfcf069d 417void
971de7fb 418syms_of_casefiddle (void)
dcfdbac7 419{
cd3520a4 420 DEFSYM (Qidentity, "identity");
dcfdbac7
JB
421 defsubr (&Supcase);
422 defsubr (&Sdowncase);
423 defsubr (&Scapitalize);
8cef1f78 424 defsubr (&Supcase_initials);
dcfdbac7
JB
425 defsubr (&Supcase_region);
426 defsubr (&Sdowncase_region);
427 defsubr (&Scapitalize_region);
8cef1f78 428 defsubr (&Supcase_initials_region);
dcfdbac7
JB
429 defsubr (&Supcase_word);
430 defsubr (&Sdowncase_word);
431 defsubr (&Scapitalize_word);
432}
433
dfcf069d 434void
971de7fb 435keys_of_casefiddle (void)
dcfdbac7 436{
5e617bc2 437 initial_define_key (control_x_map, Ctl ('U'), "upcase-region");
d427b66a 438 Fput (intern ("upcase-region"), Qdisabled, Qt);
5e617bc2 439 initial_define_key (control_x_map, Ctl ('L'), "downcase-region");
d427b66a
JB
440 Fput (intern ("downcase-region"), Qdisabled, Qt);
441
dcfdbac7
JB
442 initial_define_key (meta_map, 'u', "upcase-word");
443 initial_define_key (meta_map, 'l', "downcase-word");
444 initial_define_key (meta_map, 'c', "capitalize-word");
445}