* buffer.h (FETCH_MULTIBYTE_CHAR): Define as inline.
[bpt/emacs.git] / src / casefiddle.c
1 /* GNU Emacs case conversion functions.
2
3 Copyright (C) 1985, 1994, 1997-1999, 2001-2012 Free Software Foundation, Inc.
4
5 This file is part of GNU Emacs.
6
7 GNU Emacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
19
20
21 #include <config.h>
22 #include <setjmp.h>
23 #include "lisp.h"
24 #include "character.h"
25 #include "buffer.h"
26 #include "commands.h"
27 #include "syntax.h"
28 #include "composite.h"
29 #include "keymap.h"
30
31 enum case_action {CASE_UP, CASE_DOWN, CASE_CAPITALIZE, CASE_CAPITALIZE_UP};
32
33 Lisp_Object Qidentity;
34 \f
35 static Lisp_Object
36 casify_object (enum case_action flag, Lisp_Object obj)
37 {
38 register int c, c1;
39 register int inword = flag == CASE_DOWN;
40
41 /* If the case table is flagged as modified, rescan it. */
42 if (NILP (XCHAR_TABLE (BVAR (current_buffer, downcase_table))->extras[1]))
43 Fset_case_table (BVAR (current_buffer, downcase_table));
44
45 if (INTEGERP (obj))
46 {
47 int flagbits = (CHAR_ALT | CHAR_SUPER | CHAR_HYPER
48 | CHAR_SHIFT | CHAR_CTL | CHAR_META);
49 int flags = XINT (obj) & flagbits;
50 int multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
51
52 /* If the character has higher bits set
53 above the flags, return it unchanged.
54 It is not a real character. */
55 if (UNSIGNED_CMP (XFASTINT (obj), >, flagbits))
56 return obj;
57
58 c1 = XFASTINT (obj) & ~flagbits;
59 /* FIXME: Even if enable-multibyte-characters is nil, we may
60 manipulate multibyte chars. This means we have a bug for latin-1
61 chars since when we receive an int 128-255 we can't tell whether
62 it's an eight-bit byte or a latin-1 char. */
63 if (c1 >= 256)
64 multibyte = 1;
65 if (! multibyte)
66 MAKE_CHAR_MULTIBYTE (c1);
67 c = downcase (c1);
68 if (inword)
69 XSETFASTINT (obj, c | flags);
70 else if (c == (XFASTINT (obj) & ~flagbits))
71 {
72 if (! inword)
73 c = upcase1 (c1);
74 if (! multibyte)
75 MAKE_CHAR_UNIBYTE (c);
76 XSETFASTINT (obj, c | flags);
77 }
78 return obj;
79 }
80
81 if (!STRINGP (obj))
82 wrong_type_argument (Qchar_or_string_p, obj);
83 else if (!STRING_MULTIBYTE (obj))
84 {
85 ptrdiff_t i;
86 ptrdiff_t size = SCHARS (obj);
87
88 obj = Fcopy_sequence (obj);
89 for (i = 0; i < size; i++)
90 {
91 c = SREF (obj, i);
92 MAKE_CHAR_MULTIBYTE (c);
93 c1 = c;
94 if (inword && flag != CASE_CAPITALIZE_UP)
95 c = downcase (c);
96 else if (!uppercasep (c)
97 && (!inword || flag != CASE_CAPITALIZE_UP))
98 c = upcase1 (c1);
99 if ((int) flag >= (int) CASE_CAPITALIZE)
100 inword = (SYNTAX (c) == Sword);
101 if (c != c1)
102 {
103 MAKE_CHAR_UNIBYTE (c);
104 /* If the char can't be converted to a valid byte, just don't
105 change it. */
106 if (c >= 0 && c < 256)
107 SSET (obj, i, c);
108 }
109 }
110 return obj;
111 }
112 else
113 {
114 ptrdiff_t i, i_byte, size = SCHARS (obj);
115 int len;
116 USE_SAFE_ALLOCA;
117 unsigned char *dst, *o;
118 ptrdiff_t o_size = (size < STRING_BYTES_BOUND / MAX_MULTIBYTE_LENGTH
119 ? size * MAX_MULTIBYTE_LENGTH
120 : STRING_BYTES_BOUND);
121 SAFE_ALLOCA (dst, void *, o_size);
122 o = dst;
123
124 for (i = i_byte = 0; i < size; i++, i_byte += len)
125 {
126 if (o_size - (o - dst) < MAX_MULTIBYTE_LENGTH)
127 string_overflow ();
128 c = STRING_CHAR_AND_LENGTH (SDATA (obj) + i_byte, len);
129 if (inword && flag != CASE_CAPITALIZE_UP)
130 c = downcase (c);
131 else if (!uppercasep (c)
132 && (!inword || flag != CASE_CAPITALIZE_UP))
133 c = upcase1 (c);
134 if ((int) flag >= (int) CASE_CAPITALIZE)
135 inword = (SYNTAX (c) == Sword);
136 o += CHAR_STRING (c, o);
137 }
138 eassert (o - dst <= o_size);
139 obj = make_multibyte_string ((char *) dst, size, o - dst);
140 SAFE_FREE ();
141 return obj;
142 }
143 }
144
145 DEFUN ("upcase", Fupcase, Supcase, 1, 1, 0,
146 doc: /* Convert argument to upper case and return that.
147 The argument may be a character or string. The result has the same type.
148 The argument object is not altered--the value is a copy.
149 See also `capitalize', `downcase' and `upcase-initials'. */)
150 (Lisp_Object obj)
151 {
152 return casify_object (CASE_UP, obj);
153 }
154
155 DEFUN ("downcase", Fdowncase, Sdowncase, 1, 1, 0,
156 doc: /* Convert argument to lower case and return that.
157 The argument may be a character or string. The result has the same type.
158 The argument object is not altered--the value is a copy. */)
159 (Lisp_Object obj)
160 {
161 return casify_object (CASE_DOWN, obj);
162 }
163
164 DEFUN ("capitalize", Fcapitalize, Scapitalize, 1, 1, 0,
165 doc: /* Convert argument to capitalized form and return that.
166 This means that each word's first character is upper case
167 and the rest is lower case.
168 The argument may be a character or string. The result has the same type.
169 The argument object is not altered--the value is a copy. */)
170 (Lisp_Object obj)
171 {
172 return casify_object (CASE_CAPITALIZE, obj);
173 }
174
175 /* Like Fcapitalize but change only the initials. */
176
177 DEFUN ("upcase-initials", Fupcase_initials, Supcase_initials, 1, 1, 0,
178 doc: /* Convert the initial of each word in the argument to upper case.
179 Do not change the other letters of each word.
180 The argument may be a character or string. The result has the same type.
181 The argument object is not altered--the value is a copy. */)
182 (Lisp_Object obj)
183 {
184 return casify_object (CASE_CAPITALIZE_UP, obj);
185 }
186 \f
187 /* flag is CASE_UP, CASE_DOWN or CASE_CAPITALIZE or CASE_CAPITALIZE_UP.
188 b and e specify range of buffer to operate on. */
189
190 static void
191 casify_region (enum case_action flag, Lisp_Object b, Lisp_Object e)
192 {
193 register int c;
194 register int inword = flag == CASE_DOWN;
195 register int multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
196 ptrdiff_t start, end;
197 ptrdiff_t start_byte;
198
199 /* Position of first and last changes. */
200 ptrdiff_t first = -1, last IF_LINT (= 0);
201
202 ptrdiff_t opoint = PT;
203 ptrdiff_t opoint_byte = PT_BYTE;
204
205 if (EQ (b, e))
206 /* Not modifying because nothing marked */
207 return;
208
209 /* If the case table is flagged as modified, rescan it. */
210 if (NILP (XCHAR_TABLE (BVAR (current_buffer, downcase_table))->extras[1]))
211 Fset_case_table (BVAR (current_buffer, downcase_table));
212
213 validate_region (&b, &e);
214 start = XFASTINT (b);
215 end = XFASTINT (e);
216 modify_region (current_buffer, start, end, 0);
217 record_change (start, end - start);
218 start_byte = CHAR_TO_BYTE (start);
219
220 SETUP_BUFFER_SYNTAX_TABLE (); /* For syntax_prefix_flag_p. */
221
222 while (start < end)
223 {
224 int c2, len;
225
226 if (multibyte)
227 {
228 c = FETCH_MULTIBYTE_CHAR (start_byte);
229 len = CHAR_BYTES (c);
230 }
231 else
232 {
233 c = FETCH_BYTE (start_byte);
234 MAKE_CHAR_MULTIBYTE (c);
235 len = 1;
236 }
237 c2 = c;
238 if (inword && flag != CASE_CAPITALIZE_UP)
239 c = downcase (c);
240 else if (!uppercasep (c)
241 && (!inword || flag != CASE_CAPITALIZE_UP))
242 c = upcase1 (c);
243 if ((int) flag >= (int) CASE_CAPITALIZE)
244 inword = ((SYNTAX (c) == Sword)
245 && (inword || !syntax_prefix_flag_p (c)));
246 if (c != c2)
247 {
248 last = start;
249 if (first < 0)
250 first = start;
251
252 if (! multibyte)
253 {
254 MAKE_CHAR_UNIBYTE (c);
255 FETCH_BYTE (start_byte) = c;
256 }
257 else if (ASCII_CHAR_P (c2) && ASCII_CHAR_P (c))
258 FETCH_BYTE (start_byte) = c;
259 else
260 {
261 int tolen = CHAR_BYTES (c);
262 int j;
263 unsigned char str[MAX_MULTIBYTE_LENGTH];
264
265 CHAR_STRING (c, str);
266 if (len == tolen)
267 {
268 /* Length is unchanged. */
269 for (j = 0; j < len; ++j)
270 FETCH_BYTE (start_byte + j) = str[j];
271 }
272 else
273 {
274 /* Replace one character with the other,
275 keeping text properties the same. */
276 replace_range_2 (start, start_byte,
277 start + 1, start_byte + len,
278 (char *) str, 1, tolen,
279 0);
280 len = tolen;
281 }
282 }
283 }
284 start++;
285 start_byte += len;
286 }
287
288 if (PT != opoint)
289 TEMP_SET_PT_BOTH (opoint, opoint_byte);
290
291 if (first >= 0)
292 {
293 signal_after_change (first, last + 1 - first, last + 1 - first);
294 update_compositions (first, last + 1, CHECK_ALL);
295 }
296 }
297
298 DEFUN ("upcase-region", Fupcase_region, Supcase_region, 2, 2, "r",
299 doc: /* Convert the region to upper case. In programs, wants two arguments.
300 These arguments specify the starting and ending character numbers of
301 the region to operate on. When used as a command, the text between
302 point and the mark is operated on.
303 See also `capitalize-region'. */)
304 (Lisp_Object beg, Lisp_Object end)
305 {
306 casify_region (CASE_UP, beg, end);
307 return Qnil;
308 }
309
310 DEFUN ("downcase-region", Fdowncase_region, Sdowncase_region, 2, 2, "r",
311 doc: /* Convert the region to lower case. In programs, wants two arguments.
312 These arguments specify the starting and ending character numbers of
313 the region to operate on. When used as a command, the text between
314 point and the mark is operated on. */)
315 (Lisp_Object beg, Lisp_Object end)
316 {
317 casify_region (CASE_DOWN, beg, end);
318 return Qnil;
319 }
320
321 DEFUN ("capitalize-region", Fcapitalize_region, Scapitalize_region, 2, 2, "r",
322 doc: /* Convert the region to capitalized form.
323 Capitalized form means each word's first character is upper case
324 and the rest of it is lower case.
325 In programs, give two arguments, the starting and ending
326 character positions to operate on. */)
327 (Lisp_Object beg, Lisp_Object end)
328 {
329 casify_region (CASE_CAPITALIZE, beg, end);
330 return Qnil;
331 }
332
333 /* Like Fcapitalize_region but change only the initials. */
334
335 DEFUN ("upcase-initials-region", Fupcase_initials_region,
336 Supcase_initials_region, 2, 2, "r",
337 doc: /* Upcase the initial of each word in the region.
338 Subsequent letters of each word are not changed.
339 In programs, give two arguments, the starting and ending
340 character positions to operate on. */)
341 (Lisp_Object beg, Lisp_Object end)
342 {
343 casify_region (CASE_CAPITALIZE_UP, beg, end);
344 return Qnil;
345 }
346 \f
347 static Lisp_Object
348 operate_on_word (Lisp_Object arg, ptrdiff_t *newpoint)
349 {
350 Lisp_Object val;
351 ptrdiff_t farend;
352 EMACS_INT iarg;
353
354 CHECK_NUMBER (arg);
355 iarg = XINT (arg);
356 farend = scan_words (PT, iarg);
357 if (!farend)
358 farend = iarg > 0 ? ZV : BEGV;
359
360 *newpoint = PT > farend ? PT : farend;
361 XSETFASTINT (val, farend);
362
363 return val;
364 }
365
366 DEFUN ("upcase-word", Fupcase_word, Supcase_word, 1, 1, "p",
367 doc: /* Convert following word (or ARG words) to upper case, moving over.
368 With negative argument, convert previous words but do not move.
369 See also `capitalize-word'. */)
370 (Lisp_Object arg)
371 {
372 Lisp_Object beg, end;
373 ptrdiff_t newpoint;
374 XSETFASTINT (beg, PT);
375 end = operate_on_word (arg, &newpoint);
376 casify_region (CASE_UP, beg, end);
377 SET_PT (newpoint);
378 return Qnil;
379 }
380
381 DEFUN ("downcase-word", Fdowncase_word, Sdowncase_word, 1, 1, "p",
382 doc: /* Convert following word (or ARG words) to lower case, moving over.
383 With negative argument, convert previous words but do not move. */)
384 (Lisp_Object arg)
385 {
386 Lisp_Object beg, end;
387 ptrdiff_t newpoint;
388 XSETFASTINT (beg, PT);
389 end = operate_on_word (arg, &newpoint);
390 casify_region (CASE_DOWN, beg, end);
391 SET_PT (newpoint);
392 return Qnil;
393 }
394
395 DEFUN ("capitalize-word", Fcapitalize_word, Scapitalize_word, 1, 1, "p",
396 doc: /* Capitalize the following word (or ARG words), moving over.
397 This gives the word(s) a first character in upper case
398 and the rest lower case.
399 With negative argument, capitalize previous words but do not move. */)
400 (Lisp_Object arg)
401 {
402 Lisp_Object beg, end;
403 ptrdiff_t newpoint;
404 XSETFASTINT (beg, PT);
405 end = operate_on_word (arg, &newpoint);
406 casify_region (CASE_CAPITALIZE, beg, end);
407 SET_PT (newpoint);
408 return Qnil;
409 }
410 \f
411 void
412 syms_of_casefiddle (void)
413 {
414 DEFSYM (Qidentity, "identity");
415 defsubr (&Supcase);
416 defsubr (&Sdowncase);
417 defsubr (&Scapitalize);
418 defsubr (&Supcase_initials);
419 defsubr (&Supcase_region);
420 defsubr (&Sdowncase_region);
421 defsubr (&Scapitalize_region);
422 defsubr (&Supcase_initials_region);
423 defsubr (&Supcase_word);
424 defsubr (&Sdowncase_word);
425 defsubr (&Scapitalize_word);
426 }
427
428 void
429 keys_of_casefiddle (void)
430 {
431 initial_define_key (control_x_map, Ctl ('U'), "upcase-region");
432 Fput (intern ("upcase-region"), Qdisabled, Qt);
433 initial_define_key (control_x_map, Ctl ('L'), "downcase-region");
434 Fput (intern ("downcase-region"), Qdisabled, Qt);
435
436 initial_define_key (meta_map, 'u', "upcase-word");
437 initial_define_key (meta_map, 'l', "downcase-word");
438 initial_define_key (meta_map, 'c', "capitalize-word");
439 }