(casify_region): Fix multibyte case.
[bpt/emacs.git] / src / character.c
CommitLineData
0168c3d8
KH
1/* Basic character support.
2 Copyright (C) 1995, 1997, 1998, 2001 Electrotechnical Laboratory, JAPAN.
3 Licensed to the Free Software Foundation.
4 Copyright (C) 2001 Free Software Foundation, Inc.
5 Copyright (C) 2001, 2002
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H13PRO009
8
9This file is part of GNU Emacs.
10
11GNU Emacs is free software; you can redistribute it and/or modify
12it under the terms of the GNU General Public License as published by
13the Free Software Foundation; either version 2, or (at your option)
14any later version.
15
16GNU Emacs is distributed in the hope that it will be useful,
17but WITHOUT ANY WARRANTY; without even the implied warranty of
18MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19GNU General Public License for more details.
20
21You should have received a copy of the GNU General Public License
22along with GNU Emacs; see the file COPYING. If not, write to
23the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24Boston, MA 02111-1307, USA. */
25
26/* At first, see the document in `character.h' to understand the code
27 in this file. */
28
29#ifdef emacs
30#include <config.h>
31#endif
32
33#include <stdio.h>
34
35#ifdef emacs
36
37#include <sys/types.h>
38#include "lisp.h"
39#include "character.h"
40#include "buffer.h"
41#include "charset.h"
42#include "composite.h"
43#include "disptab.h"
44
45#else /* not emacs */
46
47#include "mulelib.h"
48
49#endif /* emacs */
50
51Lisp_Object Qcharacterp;
52
53/* Vector of translation table ever defined.
54 ID of a translation table is used to index this vector. */
55Lisp_Object Vtranslation_table_vector;
56
57/* A char-table for characters which may invoke auto-filling. */
58Lisp_Object Vauto_fill_chars;
59
60Lisp_Object Qauto_fill_chars;
61
62Lisp_Object Vchar_unify_table;
63
64/* A char-table. An element is non-nil iff the corresponding
65 character has a printable glyph. */
66Lisp_Object Vprintable_chars;
67
68/* A char-table. An elemnent is a column-width of the corresponding
69 character. */
70Lisp_Object Vchar_width_table;
71
72/* A char-table. An element is a symbol indicating the direction
73 property of corresponding character. */
74Lisp_Object Vchar_direction_table;
75
76/* Variables used locally in the macro FETCH_MULTIBYTE_CHAR. */
77unsigned char *_fetch_multibyte_char_p;
78int _fetch_multibyte_char_len;
79
80\f
81
82int
83char_string_with_unification (c, p, advanced)
84 int c;
85 unsigned char *p, **advanced;
86{
87 int bytes;
88
89 MAYBE_UNIFY_CHAR (c);
90
91 if (c <= MAX_3_BYTE_CHAR || c > MAX_5_BYTE_CHAR)
92 {
93 bytes = CHAR_STRING (c, p);
94 }
95 else if (c <= MAX_4_BYTE_CHAR)
96 {
97 p[0] = (0xF0 | (c >> 18));
98 p[1] = (0x80 | ((c >> 12) & 0x3F));
99 p[2] = (0x80 | ((c >> 6) & 0x3F));
100 p[3] = (0x80 | (c & 0x3F));
101 bytes = 4;
102 }
103 else
104 {
105 p[0] = 0xF8;
106 p[1] = (0x80 | ((c >> 18) & 0x0F));
107 p[2] = (0x80 | ((c >> 12) & 0x3F));
108 p[3] = (0x80 | ((c >> 6) & 0x3F));
109 p[4] = (0x80 | (c & 0x3F));
110 bytes = 5;
111 }
112 if (advanced)
113 *advanced = p + bytes;
114 return bytes;
115}
116
117
118int
119string_char_with_unification (p, advanced, len)
120 unsigned char *p, **advanced;
121 int *len;
122{
123 int c, unified;
124 unsigned char *saved_p = p;
125
126 if (*p < 0x80 || ! (*p & 0x20) || ! (*p & 0x10))
127 {
128 c = STRING_CHAR_ADVANCE (p);
129 }
130 else if (! (*p & 0x08))
131 {
132 c = ((((p)[0] & 0xF) << 18)
133 | (((p)[1] & 0x3F) << 12)
134 | (((p)[2] & 0x3F) << 6)
135 | ((p)[3] & 0x3F));
136 p += 4;
137 }
138 else
139 {
140 c = ((((p)[1] & 0x3F) << 18)
141 | (((p)[2] & 0x3F) << 12)
142 | (((p)[3] & 0x3F) << 6)
143 | ((p)[4] & 0x3F));
144 p += 5;
145 }
146
147 MAYBE_UNIFY_CHAR (c);
148
149 if (len)
150 *len = p - saved_p;
151 if (advanced)
152 *advanced = p;
153 return c;
154}
155
156
157/* Translate character C by translation table TABLE. If C is
158 negative, translate a character specified by CHARSET and CODE. If
159 no translation is found in TABLE, return the untranslated
160 character. */
161
162int
163translate_char (table, c)
164 Lisp_Object table;
165 int c;
166{
167 Lisp_Object ch;
168
169 if (! CHAR_TABLE_P (table))
170 return c;
171 ch = CHAR_TABLE_REF (table, c);
172 if (! CHARACTERP (ch))
173 return c;
174 return XINT (ch);
175}
176
177/* Convert the unibyte character C to the corresponding multibyte
178 character based on the current value of charset_primary. If C
179 can't be converted, return C. */
180
181int
182unibyte_char_to_multibyte (c)
183 int c;
184{
185 struct charset *charset = CHARSET_FROM_ID (charset_primary);
186 int c1 = DECODE_CHAR (charset, c);
187
188 return ((c1 >= 0) ? c1 : c);
189}
190
191
192/* Convert the multibyte character C to unibyte 8-bit character based
193 on the current value of charset_primary. If dimension of
194 charset_primary is more than one, return (C & 0xFF).
195
196 The argument REV_TBL is now ignored. It will be removed in the
197 future. */
198
199int
200multibyte_char_to_unibyte (c, rev_tbl)
201 int c;
202 Lisp_Object rev_tbl;
203{
204 struct charset *charset = CHARSET_FROM_ID (charset_primary);
205 unsigned c1 = ENCODE_CHAR (charset, c);
206
207 return ((c1 != CHARSET_INVALID_CODE (charset)) ? c1 : c & 0xFF);
208}
209
210
211DEFUN ("characterp", Fcharacterp, Scharacterp, 1, 2, 0,
212 doc: /* Return non-nil if OBJECT is a character. */)
213 (object, ignore)
214 Lisp_Object object, ignore;
215{
216 return (CHARACTERP (object) ? Qt : Qnil);
217}
218
219DEFUN ("max-char", Fmax_char, Smax_char, 0, 0, 0,
220 doc: /* Return the character of the maximum code. */)
221 ()
222{
223 return make_number (MAX_CHAR);
224}
225
226DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte,
227 Sunibyte_char_to_multibyte, 1, 1, 0,
228 doc: /* Convert the unibyte character CH to multibyte character.
229The multibyte character is a result of decoding CH by
230the current primary charset (value of `charset-primary'). */)
231 (ch)
232 Lisp_Object ch;
233{
234 int c;
235 struct charset *charset;
236
237 CHECK_CHARACTER (ch);
238 c = XFASTINT (ch);
239 if (c >= 0400)
240 error ("Invalid unibyte character: %d", c);
241 charset = CHARSET_FROM_ID (charset_primary);
242 c = DECODE_CHAR (charset, c);
243 if (c < 0)
244 error ("Can't convert to multibyte character: %d", XINT (ch));
245 return make_number (c);
246}
247
248DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte,
249 Smultibyte_char_to_unibyte, 1, 1, 0,
250 doc: /* Convert the multibyte character CH to unibyte character.\n\
251The unibyte character is a result of encoding CH by
252the current primary charset (value of `charset-primary'). */)
253 (ch)
254 Lisp_Object ch;
255{
256 int c;
257 unsigned code;
258 struct charset *charset;
259
260 CHECK_CHARACTER (ch);
261 c = XFASTINT (ch);
262 charset = CHARSET_FROM_ID (charset_primary);
263 code = ENCODE_CHAR (charset, c);
264 if (code < CHARSET_MIN_CODE (charset)
265 || code > CHARSET_MAX_CODE (charset))
266 error ("Can't convert to unibyte character: %d", XINT (ch));
267 return make_number (code);
268}
269
270DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
271 doc: /* Return 1 regardless of the argument CHAR.
272This is now an obsolete function. We keep it just for backward compatibility. */)
273 (ch)
274 Lisp_Object ch;
275{
276 CHECK_CHARACTER (ch);
277 return make_number (1);
278}
279
280DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
281 doc: /* Return width of CHAR when displayed in the current buffer.
282The width is measured by how many columns it occupies on the screen.
283Tab is taken to occupy `tab-width' columns. */)
284 (ch)
285 Lisp_Object ch;
286{
287 Lisp_Object disp;
288 int c, width;
289 struct Lisp_Char_Table *dp = buffer_display_table ();
290
291 CHECK_CHARACTER (ch);
292 c = XINT (ch);
293
294 /* Get the way the display table would display it. */
295 disp = dp ? DISP_CHAR_VECTOR (dp, c) : Qnil;
296
297 if (VECTORP (disp))
298 width = ASIZE (disp);
299 else
300 width = CHAR_WIDTH (c);
301
302 return make_number (width);
303}
304
305/* Return width of string STR of length LEN when displayed in the
306 current buffer. The width is measured by how many columns it
307 occupies on the screen. */
308
309int
310strwidth (str, len)
311 unsigned char *str;
312 int len;
313{
314 return c_string_width (str, len, -1, NULL, NULL);
315}
316
317/* Return width of string STR of length LEN when displayed in the
318 current buffer. The width is measured by how many columns it
319 occupies on the screen. If PRECISION > 0, return the width of
320 longest substring that doesn't exceed PRECISION, and set number of
321 characters and bytes of the substring in *NCHARS and *NBYTES
322 respectively. */
323
324c_string_width (str, len, precision, nchars, nbytes)
325 unsigned char *str;
326 int precision, *nchars, *nbytes;
327{
328 int i = 0, i_byte = 0;
329 int width = 0;
330 struct Lisp_Char_Table *dp = buffer_display_table ();
331
332 while (i_byte < len)
333 {
334 int bytes, thiswidth;
335 Lisp_Object val;
336 int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
337
338 if (dp)
339 {
340 val = DISP_CHAR_VECTOR (dp, c);
341 if (VECTORP (val))
342 thiswidth = XVECTOR (val)->size;
343 else
344 thiswidth = CHAR_WIDTH (c);
345 }
346 else
347 {
348 thiswidth = CHAR_WIDTH (c);
349 }
350
351 if (precision > 0
352 && (width + thiswidth > precision))
353 {
354 *nchars = i;
355 *nbytes = i_byte;
356 return width;
357 }
358 i++;
359 i_byte += bytes;
360 width += thiswidth;
361 }
362
363 if (precision > 0)
364 {
365 *nchars = i;
366 *nbytes = i_byte;
367 }
368
369 return width;
370}
371
372/* Return width of Lisp string STRING when displayed in the current
373 buffer. The width is measured by how many columns it occupies on
374 the screen while paying attention to compositions. If PRECISION >
375 0, return the width of longest substring that doesn't exceed
376 PRECISION, and set number of characters and bytes of the substring
377 in *NCHARS and *NBYTES respectively. */
378
379int
380lisp_string_width (string, precision, nchars, nbytes)
381 Lisp_Object string;
382 int precision, *nchars, *nbytes;
383{
384 int len = XSTRING (string)->size;
385 int len_byte = STRING_BYTES (XSTRING (string));
386 unsigned char *str = XSTRING (string)->data;
387 int i = 0, i_byte = 0;
388 int width = 0;
389 struct Lisp_Char_Table *dp = buffer_display_table ();
390
391 while (i < len)
392 {
393 int chars, bytes, thiswidth;
394 Lisp_Object val;
395 int cmp_id;
396 int ignore, end;
397
398 if (find_composition (i, -1, &ignore, &end, &val, string)
399 && ((cmp_id = get_composition_id (i, i_byte, end - i, val, string))
400 >= 0))
401 {
402 thiswidth = composition_table[cmp_id]->width;
403 chars = end - i;
404 bytes = string_char_to_byte (string, end) - i_byte;
405 }
406 else if (dp)
407 {
408 int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
409
410 chars = 1;
411 val = DISP_CHAR_VECTOR (dp, c);
412 if (VECTORP (val))
413 thiswidth = XVECTOR (val)->size;
414 else
415 thiswidth = CHAR_WIDTH (c);
416 }
417 else
418 {
419 int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
420
421 chars = 1;
422 thiswidth = CHAR_WIDTH (c);
423 }
424
425 if (precision > 0
426 && (width + thiswidth > precision))
427 {
428 *nchars = i;
429 *nbytes = i_byte;
430 return width;
431 }
432 i += chars;
433 i_byte += bytes;
434 width += thiswidth;
435 }
436
437 if (precision > 0)
438 {
439 *nchars = i;
440 *nbytes = i_byte;
441 }
442
443 return width;
444}
445
446DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
447 doc: /* Return width of STRING when displayed in the current buffer.
448Width is measured by how many columns it occupies on the screen.
449When calculating width of a multibyte character in STRING,
450only the base leading-code is considered; the validity of
451the following bytes is not checked. Tabs in STRING are always
452taken to occupy `tab-width' columns. */)
453 (str)
454 Lisp_Object str;
455{
456 Lisp_Object val;
457
458 CHECK_STRING (str);
459 XSETFASTINT (val, lisp_string_width (str, -1, NULL, NULL));
460 return val;
461}
462
463DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
464 doc: /* Return the direction of CHAR.
465The returned value is 0 for left-to-right and 1 for right-to-left. */)
466 (ch)
467 Lisp_Object ch;
468{
469 int c;
470
471 CHECK_CHARACTER (ch);
472 c = XINT (ch);
473 return CHAR_TABLE_REF (Vchar_direction_table, c);
474}
475
476DEFUN ("chars-in-region", Fchars_in_region, Schars_in_region, 2, 2, 0,
477 doc: /* Return number of characters between BEG and END.
478This is now an obsolete function. We keep it just for backward compatibility. */)
479 (beg, end)
480 Lisp_Object beg, end;
481{
482 int from, to;
483
484 CHECK_NUMBER_COERCE_MARKER (beg);
485 CHECK_NUMBER_COERCE_MARKER (end);
486
487 from = min (XFASTINT (beg), XFASTINT (end));
488 to = max (XFASTINT (beg), XFASTINT (end));
489
490 return make_number (to - from);
491}
492
493/* Return the number of characters in the NBYTES bytes at PTR.
494 This works by looking at the contents and checking for multibyte
495 sequences while assuming that there's no invalid sequence.
496 However, if the current buffer has enable-multibyte-characters =
497 nil, we treat each byte as a character. */
498
499int
500chars_in_text (ptr, nbytes)
501 unsigned char *ptr;
502 int nbytes;
503{
504 /* current_buffer is null at early stages of Emacs initialization. */
505 if (current_buffer == 0
506 || NILP (current_buffer->enable_multibyte_characters))
507 return nbytes;
508
509 return multibyte_chars_in_text (ptr, nbytes);
510}
511
512/* Return the number of characters in the NBYTES bytes at PTR.
513 This works by looking at the contents and checking for multibyte
514 sequences while assuming that there's no invalid sequence. It
515 ignores enable-multibyte-characters. */
516
517int
518multibyte_chars_in_text (ptr, nbytes)
519 unsigned char *ptr;
520 int nbytes;
521{
522 unsigned char *endp = ptr + nbytes;
523 int chars = 0;
524
525 while (ptr < endp)
526 {
527 int len = MULTIBYTE_LENGTH (ptr, endp);
528
529 if (len == 0)
530 abort ();
531 ptr += len;
532 chars++;
533 }
534
535 return chars;
536}
537
538/* Parse unibyte text at STR of LEN bytes as a multibyte text, count
539 characters and bytes in it, and store them in *NCHARS and *NBYTES
540 respectively. On counting bytes, pay attention to that 8-bit
541 characters not constructing a valid multibyte sequence are
542 represented by 2-byte in a multibyte text. */
543
544void
545parse_str_as_multibyte (str, len, nchars, nbytes)
546 unsigned char *str;
547 int len, *nchars, *nbytes;
548{
549 unsigned char *endp = str + len;
550 int n, chars = 0, bytes = 0;
551
552 if (len >= MAX_MULTIBYTE_LENGTH)
553 {
554 unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH;
555 while (str < adjusted_endp)
556 {
557 if ((n = MULTIBYTE_LENGTH_NO_CHECK (str)) > 0)
558 str += n, bytes += n;
559 else
560 str++, bytes += 2;
561 chars++;
562 }
563 }
564 while (str < endp)
565 {
566 if ((n = MULTIBYTE_LENGTH (str, endp)) > 0)
567 str += n, bytes += n;
568 else
569 str++, bytes += 2;
570 chars++;
571 }
572
573 *nchars = chars;
574 *nbytes = bytes;
575 return;
576}
577
578/* Arrange unibyte text at STR of NBYTES bytes as a multibyte text.
579 It actually converts only such 8-bit characters that don't contruct
580 a multibyte sequence to multibyte forms of Latin-1 characters. If
581 NCHARS is nonzero, set *NCHARS to the number of characters in the
582 text. It is assured that we can use LEN bytes at STR as a work
583 area and that is enough. Return the number of bytes of the
584 resulting text. */
585
586int
587str_as_multibyte (str, len, nbytes, nchars)
588 unsigned char *str;
589 int len, nbytes, *nchars;
590{
591 unsigned char *p = str, *endp = str + nbytes;
592 unsigned char *to;
593 int chars = 0;
594 int n;
595
596 if (nbytes >= MAX_MULTIBYTE_LENGTH)
597 {
598 unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH;
599 while (p < adjusted_endp
600 && (n = MULTIBYTE_LENGTH_NO_CHECK (p)) > 0)
601 p += n, chars++;
602 }
603 while ((n = MULTIBYTE_LENGTH (p, endp)) > 0)
604 p += n, chars++;
605 if (nchars)
606 *nchars = chars;
607 if (p == endp)
608 return nbytes;
609
610 to = p;
611 nbytes = endp - p;
612 endp = str + len;
613 safe_bcopy ((char *) p, (char *) (endp - nbytes), nbytes);
614 p = endp - nbytes;
615
616 if (nbytes >= MAX_MULTIBYTE_LENGTH)
617 {
618 unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH;
619 while (p < adjusted_endp)
620 {
621 if ((n = MULTIBYTE_LENGTH_NO_CHECK (p)) > 0)
622 {
623 while (n--)
624 *to++ = *p++;
625 }
626 else
627 {
628 int c = *p++;
629 c = BYTE8_TO_CHAR (c);
630 to += CHAR_STRING (c, to);
631 }
632 }
633 chars++;
634 }
635 while (p < endp)
636 {
637 if ((n = MULTIBYTE_LENGTH (p, endp)) > 0)
638 {
639 while (n--)
640 *to++ = *p++;
641 }
642 else
643 {
644 int c = *p++;
645 c = BYTE8_TO_CHAR (c);
646 to += CHAR_STRING (c, to);
647 }
648 chars++;
649 }
650 if (nchars)
651 *nchars = chars;
652 return (to - str);
653}
654
655/* Parse unibyte string at STR of LEN bytes, and return the number of
656 bytes it may ocupy when converted to multibyte string by
657 `str_to_multibyte'. */
658
659int
660parse_str_to_multibyte (str, len)
661 unsigned char *str;
662 int len;
663{
664 unsigned char *endp = str + len;
665 int bytes;
666
667 for (bytes = 0; str < endp; str++)
668 bytes += (*str < 0x80) ? 1 : 2;
669 return bytes;
670}
671
672
673/* Convert unibyte text at STR of NBYTES bytes to a multibyte text
674 that contains the same single-byte characters. It actually
675 converts all 8-bit characters to multibyte forms. It is assured
676 that we can use LEN bytes at STR as a work area and that is
677 enough. */
678
679int
680str_to_multibyte (str, len, bytes)
681 unsigned char *str;
682 int len, bytes;
683{
684 unsigned char *p = str, *endp = str + bytes;
685 unsigned char *to;
686
687 while (p < endp && *p < 0x80) p++;
688 if (p == endp)
689 return bytes;
690 to = p;
691 bytes = endp - p;
692 endp = str + len;
693 safe_bcopy ((char *) p, (char *) (endp - bytes), bytes);
694 p = endp - bytes;
695 while (p < endp)
696 {
697 int c = *p++;
698
699 if (c >= 0x80)
700 c = BYTE8_TO_CHAR (c);
701 to += CHAR_STRING (c, to);
702 }
703 return (to - str);
704}
705
706/* Arrange multibyte text at STR of LEN bytes as a unibyte text. It
707 actually converts characters in the range 0x80..0xFF to
708 unibyte. */
709
710int
711str_as_unibyte (str, bytes)
712 unsigned char *str;
713 int bytes;
714{
715 unsigned char *p = str, *endp = str + bytes;
716 unsigned char *to = str;
717 int c, len;
718
719 while (p < endp)
720 {
721 c = *p;
722 len = BYTES_BY_CHAR_HEAD (c);
723 if (CHAR_BYTE8_HEAD_P (c))
724 break;
725 p += len;
726 }
727 to = p;
728 while (p < endp)
729 {
730 c = *p;
731 len = BYTES_BY_CHAR_HEAD (c);
732 if (CHAR_BYTE8_HEAD_P (c))
733 {
734 c = STRING_CHAR_ADVANCE (p);
735 *to++ = CHAR_TO_BYTE8 (c);
736 }
737 else
738 {
739 while (len--) *to++ = *p++;
740 }
741 }
742 return (to - str);
743}
744
745int
746string_count_byte8 (string)
747 Lisp_Object string;
748{
749 int multibyte = STRING_MULTIBYTE (string);
750 int nchars = XSTRING (string)->size;
751 int nbytes = STRING_BYTES (XSTRING (string));
752 unsigned char *p = XSTRING (string)->data;
753 unsigned char *pend = p + nbytes;
754 int count = 0;
755 int c, len;
756
757 if (multibyte)
758 while (p < pend)
759 {
760 c = *p;
761 len = BYTES_BY_CHAR_HEAD (c);
762
763 if (CHAR_BYTE8_HEAD_P (c))
764 count++;
765 p += len;
766 }
767 else
768 while (p < pend)
769 {
770 if (*p++ >= 0x80)
771 count++;
772 }
773 return count;
774}
775
776
777Lisp_Object
778string_escape_byte8 (string)
779 Lisp_Object string;
780{
781 int nchars = XSTRING (string)->size;
782 int nbytes = STRING_BYTES (XSTRING (string));
783 int multibyte = STRING_MULTIBYTE (string);
784 int byte8_count;
785 unsigned char *src, *src_end, *dst;
786 Lisp_Object val;
787 int c, len;
788
789 if (multibyte && nchars == nbytes)
790 return string;
791
792 byte8_count = string_count_byte8 (string);
793
794 if (byte8_count == 0)
795 return string;
796
797 if (multibyte)
798 /* Convert 2-byte sequence of byte8 chars to 4-byte octal. */
799 val = make_uninit_multibyte_string (nchars + byte8_count * 2,
800 nbytes + byte8_count * 2);
801 else
802 /* Convert 1-byte sequence of byte8 chars to 4-byte octal. */
803 val = make_uninit_string (nbytes + byte8_count * 3);
804
805 src = XSTRING (string)->data;
806 src_end = src + nbytes;
807 dst = XSTRING (val)->data;
808 if (multibyte)
809 while (src < src_end)
810 {
811 c = *src;
812 len = BYTES_BY_CHAR_HEAD (c);
813
814 if (CHAR_BYTE8_HEAD_P (c))
815 {
816 c = STRING_CHAR_ADVANCE (src);
817 c = CHAR_TO_BYTE8 (c);
818 sprintf (dst, "\\%03o", c);
819 dst += 4;
820 }
821 else
822 while (len--) *dst++ = *src++;
823 }
824 else
825 while (src < src_end)
826 {
827 c = *src++;
828 if (c >= 0x80)
829 {
830 sprintf (dst, "\\%03o", c);
831 dst += 4;
832 }
833 else
834 *dst++ = c;
835 }
836 return val;
837}
838
839\f
840DEFUN ("string", Fstring, Sstring, 1, MANY, 0,
841 doc: /*
842Concatenate all the argument characters and make the result a string. */)
843 (n, args)
844 int n;
845 Lisp_Object *args;
846{
847 int i;
848 unsigned char *buf = (unsigned char *) alloca (MAX_MULTIBYTE_LENGTH * n);
849 unsigned char *p = buf;
850 int c;
851
852 for (i = 0; i < n; i++)
853 {
854 CHECK_CHARACTER (args[i]);
855 c = XINT (args[i]);
856 p += CHAR_STRING (c, p);
857 }
858
859 return make_string_from_bytes ((char *) buf, n, p - buf);
860}
861
862void
863init_character_once ()
864{
865}
866
867#ifdef emacs
868
869void
870syms_of_character ()
871{
872 DEFSYM (Qcharacterp, "characterp");
873 DEFSYM (Qauto_fill_chars, "auto-fill-chars");
874
875 staticpro (&Vchar_unify_table);
876 Vchar_unify_table = Qnil;
877
878 defsubr (&Smax_char);
879 defsubr (&Scharacterp);
880 defsubr (&Sunibyte_char_to_multibyte);
881 defsubr (&Smultibyte_char_to_unibyte);
882 defsubr (&Schar_bytes);
883 defsubr (&Schar_width);
884 defsubr (&Sstring_width);
885 defsubr (&Schar_direction);
886 defsubr (&Schars_in_region);
887 defsubr (&Sstring);
888
889 DEFVAR_LISP ("translation-table-vector", &Vtranslation_table_vector,
890 doc: /*
891Vector of cons cell of a symbol and translation table ever defined.
892An ID of a translation table is an index of this vector. */);
893 Vtranslation_table_vector = Fmake_vector (make_number (16), Qnil);
894
895 DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars,
896 doc: /*
897A char-table for characters which invoke auto-filling.
898Such characters have value t in this table. */);
899 Vauto_fill_chars = Fmake_char_table (Qauto_fill_chars, Qnil);
900 CHAR_TABLE_SET (Vauto_fill_chars, make_number (' '), Qt);
901 CHAR_TABLE_SET (Vauto_fill_chars, make_number ('\n'), Qt);
902
903 DEFVAR_LISP ("char-width-table", &Vchar_width_table,
904 doc: /*
905A char-table for width (columns) of each character. */);
906 Vchar_width_table = Fmake_char_table (Qnil, make_number (1));
907
908 DEFVAR_LISP ("char-direction-table", &Vchar_direction_table,
909 doc: /* A char-table for direction of each character. */);
910 Vchar_direction_table = Fmake_char_table (Qnil, make_number (1));
911
912 DEFVAR_LISP ("printable-chars", &Vprintable_chars,
913 doc: /* A char-table for each printable character. */);
914 Vprintable_chars = Fmake_char_table (Qnil, Qt);
915}
916
917#endif /* emacs */