Use functions, not macros, for up- and down-casing.
[bpt/emacs.git] / src / casefiddle.c
1 /* GNU Emacs case conversion functions.
2
3 Copyright (C) 1985, 1994, 1997-1999, 2001-2011 Free Software Foundation, Inc.
4
5 This file is part of GNU Emacs.
6
7 GNU Emacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
19
20
21 #include <config.h>
22 #include <setjmp.h>
23 #include "lisp.h"
24 #include "buffer.h"
25 #include "character.h"
26 #include "commands.h"
27 #include "syntax.h"
28 #include "composite.h"
29 #include "keymap.h"
30
31 enum case_action {CASE_UP, CASE_DOWN, CASE_CAPITALIZE, CASE_CAPITALIZE_UP};
32
33 Lisp_Object Qidentity;
34 \f
35 static Lisp_Object
36 casify_object (enum case_action flag, Lisp_Object obj)
37 {
38 register int c, c1;
39 register int inword = flag == CASE_DOWN;
40
41 /* If the case table is flagged as modified, rescan it. */
42 if (NILP (XCHAR_TABLE (BVAR (current_buffer, downcase_table))->extras[1]))
43 Fset_case_table (BVAR (current_buffer, downcase_table));
44
45 if (INTEGERP (obj))
46 {
47 int flagbits = (CHAR_ALT | CHAR_SUPER | CHAR_HYPER
48 | CHAR_SHIFT | CHAR_CTL | CHAR_META);
49 int flags = XINT (obj) & flagbits;
50 int multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
51
52 /* If the character has higher bits set
53 above the flags, return it unchanged.
54 It is not a real character. */
55 if ((unsigned) XFASTINT (obj) > (unsigned) flagbits)
56 return obj;
57
58 c1 = XFASTINT (obj) & ~flagbits;
59 /* FIXME: Even if enable-multibyte-characters is nil, we may
60 manipulate multibyte chars. This means we have a bug for latin-1
61 chars since when we receive an int 128-255 we can't tell whether
62 it's an eight-bit byte or a latin-1 char. */
63 if (c1 >= 256)
64 multibyte = 1;
65 if (! multibyte)
66 MAKE_CHAR_MULTIBYTE (c1);
67 c = downcase (c1);
68 if (inword)
69 XSETFASTINT (obj, c | flags);
70 else if (c == (XFASTINT (obj) & ~flagbits))
71 {
72 if (! inword)
73 c = upcase1 (c1);
74 if (! multibyte)
75 MAKE_CHAR_UNIBYTE (c);
76 XSETFASTINT (obj, c | flags);
77 }
78 return obj;
79 }
80
81 if (!STRINGP (obj))
82 wrong_type_argument (Qchar_or_string_p, obj);
83 else if (!STRING_MULTIBYTE (obj))
84 {
85 EMACS_INT i;
86 EMACS_INT size = SCHARS (obj);
87
88 obj = Fcopy_sequence (obj);
89 for (i = 0; i < size; i++)
90 {
91 c = SREF (obj, i);
92 MAKE_CHAR_MULTIBYTE (c);
93 c1 = c;
94 if (inword && flag != CASE_CAPITALIZE_UP)
95 c = downcase (c);
96 else if (!uppercasep (c)
97 && (!inword || flag != CASE_CAPITALIZE_UP))
98 c = upcase1 (c1);
99 if ((int) flag >= (int) CASE_CAPITALIZE)
100 inword = (SYNTAX (c) == Sword);
101 if (c != c1)
102 {
103 MAKE_CHAR_UNIBYTE (c);
104 /* If the char can't be converted to a valid byte, just don't
105 change it. */
106 if (c >= 0 && c < 256)
107 SSET (obj, i, c);
108 }
109 }
110 return obj;
111 }
112 else
113 {
114 EMACS_INT i, i_byte, size = SCHARS (obj);
115 int len;
116 USE_SAFE_ALLOCA;
117 unsigned char *dst, *o;
118 /* Over-allocate by 12%: this is a minor overhead, but should be
119 sufficient in 99.999% of the cases to avoid a reallocation. */
120 EMACS_INT o_size = SBYTES (obj) + SBYTES (obj) / 8 + MAX_MULTIBYTE_LENGTH;
121 SAFE_ALLOCA (dst, void *, o_size);
122 o = dst;
123
124 for (i = i_byte = 0; i < size; i++, i_byte += len)
125 {
126 if ((o - dst) + MAX_MULTIBYTE_LENGTH > o_size)
127 { /* Not enough space for the next char: grow the destination. */
128 unsigned char *old_dst = dst;
129 o_size += o_size; /* Probably overkill, but extremely rare. */
130 SAFE_ALLOCA (dst, void *, o_size);
131 memcpy (dst, old_dst, o - old_dst);
132 o = dst + (o - old_dst);
133 }
134 c = STRING_CHAR_AND_LENGTH (SDATA (obj) + i_byte, len);
135 if (inword && flag != CASE_CAPITALIZE_UP)
136 c = downcase (c);
137 else if (!uppercasep (c)
138 && (!inword || flag != CASE_CAPITALIZE_UP))
139 c = upcase1 (c);
140 if ((int) flag >= (int) CASE_CAPITALIZE)
141 inword = (SYNTAX (c) == Sword);
142 o += CHAR_STRING (c, o);
143 }
144 eassert (o - dst <= o_size);
145 obj = make_multibyte_string ((char *) dst, size, o - dst);
146 SAFE_FREE ();
147 return obj;
148 }
149 }
150
151 DEFUN ("upcase", Fupcase, Supcase, 1, 1, 0,
152 doc: /* Convert argument to upper case and return that.
153 The argument may be a character or string. The result has the same type.
154 The argument object is not altered--the value is a copy.
155 See also `capitalize', `downcase' and `upcase-initials'. */)
156 (Lisp_Object obj)
157 {
158 return casify_object (CASE_UP, obj);
159 }
160
161 DEFUN ("downcase", Fdowncase, Sdowncase, 1, 1, 0,
162 doc: /* Convert argument to lower case and return that.
163 The argument may be a character or string. The result has the same type.
164 The argument object is not altered--the value is a copy. */)
165 (Lisp_Object obj)
166 {
167 return casify_object (CASE_DOWN, obj);
168 }
169
170 DEFUN ("capitalize", Fcapitalize, Scapitalize, 1, 1, 0,
171 doc: /* Convert argument to capitalized form and return that.
172 This means that each word's first character is upper case
173 and the rest is lower case.
174 The argument may be a character or string. The result has the same type.
175 The argument object is not altered--the value is a copy. */)
176 (Lisp_Object obj)
177 {
178 return casify_object (CASE_CAPITALIZE, obj);
179 }
180
181 /* Like Fcapitalize but change only the initials. */
182
183 DEFUN ("upcase-initials", Fupcase_initials, Supcase_initials, 1, 1, 0,
184 doc: /* Convert the initial of each word in the argument to upper case.
185 Do not change the other letters of each word.
186 The argument may be a character or string. The result has the same type.
187 The argument object is not altered--the value is a copy. */)
188 (Lisp_Object obj)
189 {
190 return casify_object (CASE_CAPITALIZE_UP, obj);
191 }
192 \f
193 /* flag is CASE_UP, CASE_DOWN or CASE_CAPITALIZE or CASE_CAPITALIZE_UP.
194 b and e specify range of buffer to operate on. */
195
196 static void
197 casify_region (enum case_action flag, Lisp_Object b, Lisp_Object e)
198 {
199 register int c;
200 register int inword = flag == CASE_DOWN;
201 register int multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
202 EMACS_INT start, end;
203 EMACS_INT start_byte, end_byte;
204
205 /* Position of first and last changes. */
206 EMACS_INT first = -1, last IF_LINT (= 0);
207
208 EMACS_INT opoint = PT;
209 EMACS_INT opoint_byte = PT_BYTE;
210
211 if (EQ (b, e))
212 /* Not modifying because nothing marked */
213 return;
214
215 /* If the case table is flagged as modified, rescan it. */
216 if (NILP (XCHAR_TABLE (BVAR (current_buffer, downcase_table))->extras[1]))
217 Fset_case_table (BVAR (current_buffer, downcase_table));
218
219 validate_region (&b, &e);
220 start = XFASTINT (b);
221 end = XFASTINT (e);
222 modify_region (current_buffer, start, end, 0);
223 record_change (start, end - start);
224 start_byte = CHAR_TO_BYTE (start);
225 end_byte = CHAR_TO_BYTE (end);
226
227 SETUP_BUFFER_SYNTAX_TABLE(); /* For syntax_prefix_flag_p. */
228
229 while (start < end)
230 {
231 int c2, len;
232
233 if (multibyte)
234 {
235 c = FETCH_MULTIBYTE_CHAR (start_byte);
236 len = CHAR_BYTES (c);
237 }
238 else
239 {
240 c = FETCH_BYTE (start_byte);
241 MAKE_CHAR_MULTIBYTE (c);
242 len = 1;
243 }
244 c2 = c;
245 if (inword && flag != CASE_CAPITALIZE_UP)
246 c = downcase (c);
247 else if (!uppercasep (c)
248 && (!inword || flag != CASE_CAPITALIZE_UP))
249 c = upcase1 (c);
250 if ((int) flag >= (int) CASE_CAPITALIZE)
251 inword = ((SYNTAX (c) == Sword)
252 && (inword || !syntax_prefix_flag_p (c)));
253 if (c != c2)
254 {
255 last = start;
256 if (first < 0)
257 first = start;
258
259 if (! multibyte)
260 {
261 MAKE_CHAR_UNIBYTE (c);
262 FETCH_BYTE (start_byte) = c;
263 }
264 else if (ASCII_CHAR_P (c2) && ASCII_CHAR_P (c))
265 FETCH_BYTE (start_byte) = c;
266 else
267 {
268 int tolen = CHAR_BYTES (c);
269 int j;
270 unsigned char str[MAX_MULTIBYTE_LENGTH];
271
272 CHAR_STRING (c, str);
273 if (len == tolen)
274 {
275 /* Length is unchanged. */
276 for (j = 0; j < len; ++j)
277 FETCH_BYTE (start_byte + j) = str[j];
278 }
279 else
280 {
281 /* Replace one character with the other,
282 keeping text properties the same. */
283 replace_range_2 (start, start_byte,
284 start + 1, start_byte + len,
285 (char *) str, 1, tolen,
286 0);
287 len = tolen;
288 }
289 }
290 }
291 start++;
292 start_byte += len;
293 }
294
295 if (PT != opoint)
296 TEMP_SET_PT_BOTH (opoint, opoint_byte);
297
298 if (first >= 0)
299 {
300 signal_after_change (first, last + 1 - first, last + 1 - first);
301 update_compositions (first, last + 1, CHECK_ALL);
302 }
303 }
304
305 DEFUN ("upcase-region", Fupcase_region, Supcase_region, 2, 2, "r",
306 doc: /* Convert the region to upper case. In programs, wants two arguments.
307 These arguments specify the starting and ending character numbers of
308 the region to operate on. When used as a command, the text between
309 point and the mark is operated on.
310 See also `capitalize-region'. */)
311 (Lisp_Object beg, Lisp_Object end)
312 {
313 casify_region (CASE_UP, beg, end);
314 return Qnil;
315 }
316
317 DEFUN ("downcase-region", Fdowncase_region, Sdowncase_region, 2, 2, "r",
318 doc: /* Convert the region to lower case. In programs, wants two arguments.
319 These arguments specify the starting and ending character numbers of
320 the region to operate on. When used as a command, the text between
321 point and the mark is operated on. */)
322 (Lisp_Object beg, Lisp_Object end)
323 {
324 casify_region (CASE_DOWN, beg, end);
325 return Qnil;
326 }
327
328 DEFUN ("capitalize-region", Fcapitalize_region, Scapitalize_region, 2, 2, "r",
329 doc: /* Convert the region to capitalized form.
330 Capitalized form means each word's first character is upper case
331 and the rest of it is lower case.
332 In programs, give two arguments, the starting and ending
333 character positions to operate on. */)
334 (Lisp_Object beg, Lisp_Object end)
335 {
336 casify_region (CASE_CAPITALIZE, beg, end);
337 return Qnil;
338 }
339
340 /* Like Fcapitalize_region but change only the initials. */
341
342 DEFUN ("upcase-initials-region", Fupcase_initials_region,
343 Supcase_initials_region, 2, 2, "r",
344 doc: /* Upcase the initial of each word in the region.
345 Subsequent letters of each word are not changed.
346 In programs, give two arguments, the starting and ending
347 character positions to operate on. */)
348 (Lisp_Object beg, Lisp_Object end)
349 {
350 casify_region (CASE_CAPITALIZE_UP, beg, end);
351 return Qnil;
352 }
353 \f
354 static Lisp_Object
355 operate_on_word (Lisp_Object arg, EMACS_INT *newpoint)
356 {
357 Lisp_Object val;
358 EMACS_INT farend;
359 EMACS_INT iarg;
360
361 CHECK_NUMBER (arg);
362 iarg = XINT (arg);
363 farend = scan_words (PT, iarg);
364 if (!farend)
365 farend = iarg > 0 ? ZV : BEGV;
366
367 *newpoint = PT > farend ? PT : farend;
368 XSETFASTINT (val, farend);
369
370 return val;
371 }
372
373 DEFUN ("upcase-word", Fupcase_word, Supcase_word, 1, 1, "p",
374 doc: /* Convert following word (or ARG words) to upper case, moving over.
375 With negative argument, convert previous words but do not move.
376 See also `capitalize-word'. */)
377 (Lisp_Object arg)
378 {
379 Lisp_Object beg, end;
380 EMACS_INT newpoint;
381 XSETFASTINT (beg, PT);
382 end = operate_on_word (arg, &newpoint);
383 casify_region (CASE_UP, beg, end);
384 SET_PT (newpoint);
385 return Qnil;
386 }
387
388 DEFUN ("downcase-word", Fdowncase_word, Sdowncase_word, 1, 1, "p",
389 doc: /* Convert following word (or ARG words) to lower case, moving over.
390 With negative argument, convert previous words but do not move. */)
391 (Lisp_Object arg)
392 {
393 Lisp_Object beg, end;
394 EMACS_INT newpoint;
395 XSETFASTINT (beg, PT);
396 end = operate_on_word (arg, &newpoint);
397 casify_region (CASE_DOWN, beg, end);
398 SET_PT (newpoint);
399 return Qnil;
400 }
401
402 DEFUN ("capitalize-word", Fcapitalize_word, Scapitalize_word, 1, 1, "p",
403 doc: /* Capitalize the following word (or ARG words), moving over.
404 This gives the word(s) a first character in upper case
405 and the rest lower case.
406 With negative argument, capitalize previous words but do not move. */)
407 (Lisp_Object arg)
408 {
409 Lisp_Object beg, end;
410 EMACS_INT newpoint;
411 XSETFASTINT (beg, PT);
412 end = operate_on_word (arg, &newpoint);
413 casify_region (CASE_CAPITALIZE, beg, end);
414 SET_PT (newpoint);
415 return Qnil;
416 }
417 \f
418 void
419 syms_of_casefiddle (void)
420 {
421 Qidentity = intern_c_string ("identity");
422 staticpro (&Qidentity);
423 defsubr (&Supcase);
424 defsubr (&Sdowncase);
425 defsubr (&Scapitalize);
426 defsubr (&Supcase_initials);
427 defsubr (&Supcase_region);
428 defsubr (&Sdowncase_region);
429 defsubr (&Scapitalize_region);
430 defsubr (&Supcase_initials_region);
431 defsubr (&Supcase_word);
432 defsubr (&Sdowncase_word);
433 defsubr (&Scapitalize_word);
434 }
435
436 void
437 keys_of_casefiddle (void)
438 {
439 initial_define_key (control_x_map, Ctl('U'), "upcase-region");
440 Fput (intern ("upcase-region"), Qdisabled, Qt);
441 initial_define_key (control_x_map, Ctl('L'), "downcase-region");
442 Fput (intern ("downcase-region"), Qdisabled, Qt);
443
444 initial_define_key (meta_map, 'u', "upcase-word");
445 initial_define_key (meta_map, 'l', "downcase-word");
446 initial_define_key (meta_map, 'c', "capitalize-word");
447 }