(LEADING_CODE_PRIVATE_11, LEADING_CODE_PRIVATE_12,
[bpt/emacs.git] / src / character.c
CommitLineData
0168c3d8
KH
1/* Basic character support.
2 Copyright (C) 1995, 1997, 1998, 2001 Electrotechnical Laboratory, JAPAN.
3 Licensed to the Free Software Foundation.
4 Copyright (C) 2001 Free Software Foundation, Inc.
5 Copyright (C) 2001, 2002
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H13PRO009
8
9This file is part of GNU Emacs.
10
11GNU Emacs is free software; you can redistribute it and/or modify
12it under the terms of the GNU General Public License as published by
13the Free Software Foundation; either version 2, or (at your option)
14any later version.
15
16GNU Emacs is distributed in the hope that it will be useful,
17but WITHOUT ANY WARRANTY; without even the implied warranty of
18MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19GNU General Public License for more details.
20
21You should have received a copy of the GNU General Public License
22along with GNU Emacs; see the file COPYING. If not, write to
23the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24Boston, MA 02111-1307, USA. */
25
26/* At first, see the document in `character.h' to understand the code
27 in this file. */
28
29#ifdef emacs
30#include <config.h>
31#endif
32
33#include <stdio.h>
34
35#ifdef emacs
36
37#include <sys/types.h>
38#include "lisp.h"
39#include "character.h"
40#include "buffer.h"
41#include "charset.h"
42#include "composite.h"
43#include "disptab.h"
44
45#else /* not emacs */
46
47#include "mulelib.h"
48
49#endif /* emacs */
50
51Lisp_Object Qcharacterp;
52
53/* Vector of translation table ever defined.
54 ID of a translation table is used to index this vector. */
55Lisp_Object Vtranslation_table_vector;
56
57/* A char-table for characters which may invoke auto-filling. */
58Lisp_Object Vauto_fill_chars;
59
60Lisp_Object Qauto_fill_chars;
61
62Lisp_Object Vchar_unify_table;
63
64/* A char-table. An element is non-nil iff the corresponding
65 character has a printable glyph. */
66Lisp_Object Vprintable_chars;
67
68/* A char-table. An elemnent is a column-width of the corresponding
69 character. */
70Lisp_Object Vchar_width_table;
71
72/* A char-table. An element is a symbol indicating the direction
73 property of corresponding character. */
74Lisp_Object Vchar_direction_table;
75
76/* Variables used locally in the macro FETCH_MULTIBYTE_CHAR. */
77unsigned char *_fetch_multibyte_char_p;
78int _fetch_multibyte_char_len;
79
80\f
81
82int
1889b238 83char_string_with_unification (c, p)
0168c3d8 84 int c;
1889b238 85 unsigned char *p;
0168c3d8
KH
86{
87 int bytes;
88
89 MAYBE_UNIFY_CHAR (c);
90
91 if (c <= MAX_3_BYTE_CHAR || c > MAX_5_BYTE_CHAR)
92 {
93 bytes = CHAR_STRING (c, p);
94 }
95 else if (c <= MAX_4_BYTE_CHAR)
96 {
97 p[0] = (0xF0 | (c >> 18));
98 p[1] = (0x80 | ((c >> 12) & 0x3F));
99 p[2] = (0x80 | ((c >> 6) & 0x3F));
100 p[3] = (0x80 | (c & 0x3F));
101 bytes = 4;
102 }
103 else
104 {
105 p[0] = 0xF8;
106 p[1] = (0x80 | ((c >> 18) & 0x0F));
107 p[2] = (0x80 | ((c >> 12) & 0x3F));
108 p[3] = (0x80 | ((c >> 6) & 0x3F));
109 p[4] = (0x80 | (c & 0x3F));
110 bytes = 5;
111 }
1889b238 112
0168c3d8
KH
113 return bytes;
114}
115
116
117int
118string_char_with_unification (p, advanced, len)
119 unsigned char *p, **advanced;
120 int *len;
121{
1889b238 122 int c;
0168c3d8
KH
123 unsigned char *saved_p = p;
124
125 if (*p < 0x80 || ! (*p & 0x20) || ! (*p & 0x10))
126 {
127 c = STRING_CHAR_ADVANCE (p);
128 }
129 else if (! (*p & 0x08))
130 {
131 c = ((((p)[0] & 0xF) << 18)
132 | (((p)[1] & 0x3F) << 12)
133 | (((p)[2] & 0x3F) << 6)
134 | ((p)[3] & 0x3F));
135 p += 4;
136 }
137 else
138 {
139 c = ((((p)[1] & 0x3F) << 18)
140 | (((p)[2] & 0x3F) << 12)
141 | (((p)[3] & 0x3F) << 6)
142 | ((p)[4] & 0x3F));
143 p += 5;
144 }
145
146 MAYBE_UNIFY_CHAR (c);
147
148 if (len)
149 *len = p - saved_p;
150 if (advanced)
151 *advanced = p;
152 return c;
153}
154
155
156/* Translate character C by translation table TABLE. If C is
157 negative, translate a character specified by CHARSET and CODE. If
158 no translation is found in TABLE, return the untranslated
159 character. */
160
161int
162translate_char (table, c)
163 Lisp_Object table;
164 int c;
165{
166 Lisp_Object ch;
167
168 if (! CHAR_TABLE_P (table))
169 return c;
170 ch = CHAR_TABLE_REF (table, c);
171 if (! CHARACTERP (ch))
172 return c;
173 return XINT (ch);
174}
175
176/* Convert the unibyte character C to the corresponding multibyte
177 character based on the current value of charset_primary. If C
178 can't be converted, return C. */
179
180int
181unibyte_char_to_multibyte (c)
182 int c;
183{
184 struct charset *charset = CHARSET_FROM_ID (charset_primary);
185 int c1 = DECODE_CHAR (charset, c);
186
187 return ((c1 >= 0) ? c1 : c);
188}
189
190
191/* Convert the multibyte character C to unibyte 8-bit character based
192 on the current value of charset_primary. If dimension of
193 charset_primary is more than one, return (C & 0xFF).
194
195 The argument REV_TBL is now ignored. It will be removed in the
196 future. */
197
198int
199multibyte_char_to_unibyte (c, rev_tbl)
200 int c;
201 Lisp_Object rev_tbl;
202{
203 struct charset *charset = CHARSET_FROM_ID (charset_primary);
204 unsigned c1 = ENCODE_CHAR (charset, c);
205
206 return ((c1 != CHARSET_INVALID_CODE (charset)) ? c1 : c & 0xFF);
207}
208
209
210DEFUN ("characterp", Fcharacterp, Scharacterp, 1, 2, 0,
211 doc: /* Return non-nil if OBJECT is a character. */)
212 (object, ignore)
213 Lisp_Object object, ignore;
214{
215 return (CHARACTERP (object) ? Qt : Qnil);
216}
217
218DEFUN ("max-char", Fmax_char, Smax_char, 0, 0, 0,
219 doc: /* Return the character of the maximum code. */)
220 ()
221{
222 return make_number (MAX_CHAR);
223}
224
225DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte,
226 Sunibyte_char_to_multibyte, 1, 1, 0,
227 doc: /* Convert the unibyte character CH to multibyte character.
228The multibyte character is a result of decoding CH by
229the current primary charset (value of `charset-primary'). */)
230 (ch)
231 Lisp_Object ch;
232{
233 int c;
234 struct charset *charset;
235
236 CHECK_CHARACTER (ch);
237 c = XFASTINT (ch);
238 if (c >= 0400)
239 error ("Invalid unibyte character: %d", c);
240 charset = CHARSET_FROM_ID (charset_primary);
241 c = DECODE_CHAR (charset, c);
242 if (c < 0)
243 error ("Can't convert to multibyte character: %d", XINT (ch));
244 return make_number (c);
245}
246
247DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte,
248 Smultibyte_char_to_unibyte, 1, 1, 0,
249 doc: /* Convert the multibyte character CH to unibyte character.\n\
250The unibyte character is a result of encoding CH by
251the current primary charset (value of `charset-primary'). */)
252 (ch)
253 Lisp_Object ch;
254{
255 int c;
256 unsigned code;
257 struct charset *charset;
258
259 CHECK_CHARACTER (ch);
260 c = XFASTINT (ch);
261 charset = CHARSET_FROM_ID (charset_primary);
262 code = ENCODE_CHAR (charset, c);
263 if (code < CHARSET_MIN_CODE (charset)
264 || code > CHARSET_MAX_CODE (charset))
265 error ("Can't convert to unibyte character: %d", XINT (ch));
266 return make_number (code);
267}
268
269DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
270 doc: /* Return 1 regardless of the argument CHAR.
271This is now an obsolete function. We keep it just for backward compatibility. */)
272 (ch)
273 Lisp_Object ch;
274{
275 CHECK_CHARACTER (ch);
276 return make_number (1);
277}
278
279DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
280 doc: /* Return width of CHAR when displayed in the current buffer.
281The width is measured by how many columns it occupies on the screen.
282Tab is taken to occupy `tab-width' columns. */)
283 (ch)
284 Lisp_Object ch;
285{
286 Lisp_Object disp;
287 int c, width;
288 struct Lisp_Char_Table *dp = buffer_display_table ();
289
290 CHECK_CHARACTER (ch);
291 c = XINT (ch);
292
293 /* Get the way the display table would display it. */
294 disp = dp ? DISP_CHAR_VECTOR (dp, c) : Qnil;
295
296 if (VECTORP (disp))
297 width = ASIZE (disp);
298 else
299 width = CHAR_WIDTH (c);
300
301 return make_number (width);
302}
303
0168c3d8
KH
304/* Return width of string STR of length LEN when displayed in the
305 current buffer. The width is measured by how many columns it
306 occupies on the screen. If PRECISION > 0, return the width of
307 longest substring that doesn't exceed PRECISION, and set number of
308 characters and bytes of the substring in *NCHARS and *NBYTES
309 respectively. */
310
1889b238 311int
0168c3d8
KH
312c_string_width (str, len, precision, nchars, nbytes)
313 unsigned char *str;
314 int precision, *nchars, *nbytes;
315{
316 int i = 0, i_byte = 0;
317 int width = 0;
318 struct Lisp_Char_Table *dp = buffer_display_table ();
319
320 while (i_byte < len)
321 {
322 int bytes, thiswidth;
323 Lisp_Object val;
324 int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
325
326 if (dp)
327 {
328 val = DISP_CHAR_VECTOR (dp, c);
329 if (VECTORP (val))
330 thiswidth = XVECTOR (val)->size;
331 else
332 thiswidth = CHAR_WIDTH (c);
333 }
334 else
335 {
336 thiswidth = CHAR_WIDTH (c);
337 }
338
339 if (precision > 0
340 && (width + thiswidth > precision))
341 {
342 *nchars = i;
343 *nbytes = i_byte;
344 return width;
345 }
346 i++;
347 i_byte += bytes;
348 width += thiswidth;
349 }
350
351 if (precision > 0)
352 {
353 *nchars = i;
354 *nbytes = i_byte;
355 }
356
357 return width;
358}
359
1889b238
KH
360/* Return width of string STR of length LEN when displayed in the
361 current buffer. The width is measured by how many columns it
362 occupies on the screen. */
363
364int
365strwidth (str, len)
366 unsigned char *str;
367 int len;
368{
369 return c_string_width (str, len, -1, NULL, NULL);
370}
371
0168c3d8
KH
372/* Return width of Lisp string STRING when displayed in the current
373 buffer. The width is measured by how many columns it occupies on
374 the screen while paying attention to compositions. If PRECISION >
375 0, return the width of longest substring that doesn't exceed
376 PRECISION, and set number of characters and bytes of the substring
377 in *NCHARS and *NBYTES respectively. */
378
379int
380lisp_string_width (string, precision, nchars, nbytes)
381 Lisp_Object string;
382 int precision, *nchars, *nbytes;
383{
384 int len = XSTRING (string)->size;
0168c3d8
KH
385 unsigned char *str = XSTRING (string)->data;
386 int i = 0, i_byte = 0;
387 int width = 0;
388 struct Lisp_Char_Table *dp = buffer_display_table ();
389
390 while (i < len)
391 {
392 int chars, bytes, thiswidth;
393 Lisp_Object val;
394 int cmp_id;
395 int ignore, end;
396
397 if (find_composition (i, -1, &ignore, &end, &val, string)
398 && ((cmp_id = get_composition_id (i, i_byte, end - i, val, string))
399 >= 0))
400 {
401 thiswidth = composition_table[cmp_id]->width;
402 chars = end - i;
403 bytes = string_char_to_byte (string, end) - i_byte;
404 }
405 else if (dp)
406 {
407 int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
408
409 chars = 1;
410 val = DISP_CHAR_VECTOR (dp, c);
411 if (VECTORP (val))
412 thiswidth = XVECTOR (val)->size;
413 else
414 thiswidth = CHAR_WIDTH (c);
415 }
416 else
417 {
418 int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
419
420 chars = 1;
421 thiswidth = CHAR_WIDTH (c);
422 }
423
424 if (precision > 0
425 && (width + thiswidth > precision))
426 {
427 *nchars = i;
428 *nbytes = i_byte;
429 return width;
430 }
431 i += chars;
432 i_byte += bytes;
433 width += thiswidth;
434 }
435
436 if (precision > 0)
437 {
438 *nchars = i;
439 *nbytes = i_byte;
440 }
441
442 return width;
443}
444
445DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
446 doc: /* Return width of STRING when displayed in the current buffer.
447Width is measured by how many columns it occupies on the screen.
448When calculating width of a multibyte character in STRING,
449only the base leading-code is considered; the validity of
450the following bytes is not checked. Tabs in STRING are always
451taken to occupy `tab-width' columns. */)
452 (str)
453 Lisp_Object str;
454{
455 Lisp_Object val;
456
457 CHECK_STRING (str);
458 XSETFASTINT (val, lisp_string_width (str, -1, NULL, NULL));
459 return val;
460}
461
462DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
463 doc: /* Return the direction of CHAR.
464The returned value is 0 for left-to-right and 1 for right-to-left. */)
465 (ch)
466 Lisp_Object ch;
467{
468 int c;
469
470 CHECK_CHARACTER (ch);
471 c = XINT (ch);
472 return CHAR_TABLE_REF (Vchar_direction_table, c);
473}
474
475DEFUN ("chars-in-region", Fchars_in_region, Schars_in_region, 2, 2, 0,
476 doc: /* Return number of characters between BEG and END.
477This is now an obsolete function. We keep it just for backward compatibility. */)
478 (beg, end)
479 Lisp_Object beg, end;
480{
481 int from, to;
482
483 CHECK_NUMBER_COERCE_MARKER (beg);
484 CHECK_NUMBER_COERCE_MARKER (end);
485
486 from = min (XFASTINT (beg), XFASTINT (end));
487 to = max (XFASTINT (beg), XFASTINT (end));
488
489 return make_number (to - from);
490}
491
492/* Return the number of characters in the NBYTES bytes at PTR.
493 This works by looking at the contents and checking for multibyte
494 sequences while assuming that there's no invalid sequence.
495 However, if the current buffer has enable-multibyte-characters =
496 nil, we treat each byte as a character. */
497
498int
499chars_in_text (ptr, nbytes)
500 unsigned char *ptr;
501 int nbytes;
502{
503 /* current_buffer is null at early stages of Emacs initialization. */
504 if (current_buffer == 0
505 || NILP (current_buffer->enable_multibyte_characters))
506 return nbytes;
507
508 return multibyte_chars_in_text (ptr, nbytes);
509}
510
511/* Return the number of characters in the NBYTES bytes at PTR.
512 This works by looking at the contents and checking for multibyte
513 sequences while assuming that there's no invalid sequence. It
514 ignores enable-multibyte-characters. */
515
516int
517multibyte_chars_in_text (ptr, nbytes)
518 unsigned char *ptr;
519 int nbytes;
520{
521 unsigned char *endp = ptr + nbytes;
522 int chars = 0;
523
524 while (ptr < endp)
525 {
526 int len = MULTIBYTE_LENGTH (ptr, endp);
527
528 if (len == 0)
529 abort ();
530 ptr += len;
531 chars++;
532 }
533
534 return chars;
535}
536
537/* Parse unibyte text at STR of LEN bytes as a multibyte text, count
538 characters and bytes in it, and store them in *NCHARS and *NBYTES
539 respectively. On counting bytes, pay attention to that 8-bit
540 characters not constructing a valid multibyte sequence are
541 represented by 2-byte in a multibyte text. */
542
543void
544parse_str_as_multibyte (str, len, nchars, nbytes)
545 unsigned char *str;
546 int len, *nchars, *nbytes;
547{
548 unsigned char *endp = str + len;
549 int n, chars = 0, bytes = 0;
550
551 if (len >= MAX_MULTIBYTE_LENGTH)
552 {
553 unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH;
554 while (str < adjusted_endp)
555 {
556 if ((n = MULTIBYTE_LENGTH_NO_CHECK (str)) > 0)
557 str += n, bytes += n;
558 else
559 str++, bytes += 2;
560 chars++;
561 }
562 }
563 while (str < endp)
564 {
565 if ((n = MULTIBYTE_LENGTH (str, endp)) > 0)
566 str += n, bytes += n;
567 else
568 str++, bytes += 2;
569 chars++;
570 }
571
572 *nchars = chars;
573 *nbytes = bytes;
574 return;
575}
576
577/* Arrange unibyte text at STR of NBYTES bytes as a multibyte text.
578 It actually converts only such 8-bit characters that don't contruct
579 a multibyte sequence to multibyte forms of Latin-1 characters. If
580 NCHARS is nonzero, set *NCHARS to the number of characters in the
581 text. It is assured that we can use LEN bytes at STR as a work
582 area and that is enough. Return the number of bytes of the
583 resulting text. */
584
585int
586str_as_multibyte (str, len, nbytes, nchars)
587 unsigned char *str;
588 int len, nbytes, *nchars;
589{
590 unsigned char *p = str, *endp = str + nbytes;
591 unsigned char *to;
592 int chars = 0;
593 int n;
594
595 if (nbytes >= MAX_MULTIBYTE_LENGTH)
596 {
597 unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH;
598 while (p < adjusted_endp
599 && (n = MULTIBYTE_LENGTH_NO_CHECK (p)) > 0)
600 p += n, chars++;
601 }
602 while ((n = MULTIBYTE_LENGTH (p, endp)) > 0)
603 p += n, chars++;
604 if (nchars)
605 *nchars = chars;
606 if (p == endp)
607 return nbytes;
608
609 to = p;
610 nbytes = endp - p;
611 endp = str + len;
612 safe_bcopy ((char *) p, (char *) (endp - nbytes), nbytes);
613 p = endp - nbytes;
614
615 if (nbytes >= MAX_MULTIBYTE_LENGTH)
616 {
617 unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH;
618 while (p < adjusted_endp)
619 {
620 if ((n = MULTIBYTE_LENGTH_NO_CHECK (p)) > 0)
621 {
622 while (n--)
623 *to++ = *p++;
624 }
625 else
626 {
627 int c = *p++;
628 c = BYTE8_TO_CHAR (c);
629 to += CHAR_STRING (c, to);
630 }
631 }
632 chars++;
633 }
634 while (p < endp)
635 {
636 if ((n = MULTIBYTE_LENGTH (p, endp)) > 0)
637 {
638 while (n--)
639 *to++ = *p++;
640 }
641 else
642 {
643 int c = *p++;
644 c = BYTE8_TO_CHAR (c);
645 to += CHAR_STRING (c, to);
646 }
647 chars++;
648 }
649 if (nchars)
650 *nchars = chars;
651 return (to - str);
652}
653
654/* Parse unibyte string at STR of LEN bytes, and return the number of
655 bytes it may ocupy when converted to multibyte string by
656 `str_to_multibyte'. */
657
658int
659parse_str_to_multibyte (str, len)
660 unsigned char *str;
661 int len;
662{
663 unsigned char *endp = str + len;
664 int bytes;
665
666 for (bytes = 0; str < endp; str++)
667 bytes += (*str < 0x80) ? 1 : 2;
668 return bytes;
669}
670
671
672/* Convert unibyte text at STR of NBYTES bytes to a multibyte text
673 that contains the same single-byte characters. It actually
674 converts all 8-bit characters to multibyte forms. It is assured
675 that we can use LEN bytes at STR as a work area and that is
676 enough. */
677
678int
679str_to_multibyte (str, len, bytes)
680 unsigned char *str;
681 int len, bytes;
682{
683 unsigned char *p = str, *endp = str + bytes;
684 unsigned char *to;
685
686 while (p < endp && *p < 0x80) p++;
687 if (p == endp)
688 return bytes;
689 to = p;
690 bytes = endp - p;
691 endp = str + len;
692 safe_bcopy ((char *) p, (char *) (endp - bytes), bytes);
693 p = endp - bytes;
694 while (p < endp)
695 {
696 int c = *p++;
697
698 if (c >= 0x80)
699 c = BYTE8_TO_CHAR (c);
700 to += CHAR_STRING (c, to);
701 }
702 return (to - str);
703}
704
705/* Arrange multibyte text at STR of LEN bytes as a unibyte text. It
706 actually converts characters in the range 0x80..0xFF to
707 unibyte. */
708
709int
710str_as_unibyte (str, bytes)
711 unsigned char *str;
712 int bytes;
713{
714 unsigned char *p = str, *endp = str + bytes;
715 unsigned char *to = str;
716 int c, len;
717
718 while (p < endp)
719 {
720 c = *p;
721 len = BYTES_BY_CHAR_HEAD (c);
722 if (CHAR_BYTE8_HEAD_P (c))
723 break;
724 p += len;
725 }
726 to = p;
727 while (p < endp)
728 {
729 c = *p;
730 len = BYTES_BY_CHAR_HEAD (c);
731 if (CHAR_BYTE8_HEAD_P (c))
732 {
733 c = STRING_CHAR_ADVANCE (p);
734 *to++ = CHAR_TO_BYTE8 (c);
735 }
736 else
737 {
738 while (len--) *to++ = *p++;
739 }
740 }
741 return (to - str);
742}
743
744int
745string_count_byte8 (string)
746 Lisp_Object string;
747{
748 int multibyte = STRING_MULTIBYTE (string);
0168c3d8
KH
749 int nbytes = STRING_BYTES (XSTRING (string));
750 unsigned char *p = XSTRING (string)->data;
751 unsigned char *pend = p + nbytes;
752 int count = 0;
753 int c, len;
754
755 if (multibyte)
756 while (p < pend)
757 {
758 c = *p;
759 len = BYTES_BY_CHAR_HEAD (c);
760
761 if (CHAR_BYTE8_HEAD_P (c))
762 count++;
763 p += len;
764 }
765 else
766 while (p < pend)
767 {
768 if (*p++ >= 0x80)
769 count++;
770 }
771 return count;
772}
773
774
775Lisp_Object
776string_escape_byte8 (string)
777 Lisp_Object string;
778{
779 int nchars = XSTRING (string)->size;
780 int nbytes = STRING_BYTES (XSTRING (string));
781 int multibyte = STRING_MULTIBYTE (string);
782 int byte8_count;
783 unsigned char *src, *src_end, *dst;
784 Lisp_Object val;
785 int c, len;
786
787 if (multibyte && nchars == nbytes)
788 return string;
789
790 byte8_count = string_count_byte8 (string);
791
792 if (byte8_count == 0)
793 return string;
794
795 if (multibyte)
796 /* Convert 2-byte sequence of byte8 chars to 4-byte octal. */
7b40ebaf 797 val = make_uninit_multibyte_string (nchars + byte8_count * 3,
0168c3d8
KH
798 nbytes + byte8_count * 2);
799 else
800 /* Convert 1-byte sequence of byte8 chars to 4-byte octal. */
801 val = make_uninit_string (nbytes + byte8_count * 3);
802
803 src = XSTRING (string)->data;
804 src_end = src + nbytes;
805 dst = XSTRING (val)->data;
806 if (multibyte)
807 while (src < src_end)
808 {
809 c = *src;
810 len = BYTES_BY_CHAR_HEAD (c);
811
812 if (CHAR_BYTE8_HEAD_P (c))
813 {
814 c = STRING_CHAR_ADVANCE (src);
815 c = CHAR_TO_BYTE8 (c);
1889b238 816 sprintf ((char *) dst, "\\%03o", c);
0168c3d8
KH
817 dst += 4;
818 }
819 else
820 while (len--) *dst++ = *src++;
821 }
822 else
823 while (src < src_end)
824 {
825 c = *src++;
826 if (c >= 0x80)
827 {
1889b238 828 sprintf ((char *) dst, "\\%03o", c);
0168c3d8
KH
829 dst += 4;
830 }
831 else
832 *dst++ = c;
833 }
834 return val;
835}
836
837\f
838DEFUN ("string", Fstring, Sstring, 1, MANY, 0,
839 doc: /*
d2e83296
DL
840Concatenate all the argument characters and make the result a string.
841usage: (string &rest CHARACTERS) */)
0168c3d8
KH
842 (n, args)
843 int n;
844 Lisp_Object *args;
845{
846 int i;
847 unsigned char *buf = (unsigned char *) alloca (MAX_MULTIBYTE_LENGTH * n);
848 unsigned char *p = buf;
849 int c;
850
851 for (i = 0; i < n; i++)
852 {
853 CHECK_CHARACTER (args[i]);
854 c = XINT (args[i]);
855 p += CHAR_STRING (c, p);
856 }
857
858 return make_string_from_bytes ((char *) buf, n, p - buf);
859}
860
861void
862init_character_once ()
863{
864}
865
866#ifdef emacs
867
868void
869syms_of_character ()
870{
871 DEFSYM (Qcharacterp, "characterp");
872 DEFSYM (Qauto_fill_chars, "auto-fill-chars");
873
874 staticpro (&Vchar_unify_table);
875 Vchar_unify_table = Qnil;
876
877 defsubr (&Smax_char);
878 defsubr (&Scharacterp);
879 defsubr (&Sunibyte_char_to_multibyte);
880 defsubr (&Smultibyte_char_to_unibyte);
881 defsubr (&Schar_bytes);
882 defsubr (&Schar_width);
883 defsubr (&Sstring_width);
884 defsubr (&Schar_direction);
885 defsubr (&Schars_in_region);
886 defsubr (&Sstring);
887
888 DEFVAR_LISP ("translation-table-vector", &Vtranslation_table_vector,
889 doc: /*
890Vector of cons cell of a symbol and translation table ever defined.
891An ID of a translation table is an index of this vector. */);
892 Vtranslation_table_vector = Fmake_vector (make_number (16), Qnil);
893
894 DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars,
895 doc: /*
896A char-table for characters which invoke auto-filling.
897Such characters have value t in this table. */);
898 Vauto_fill_chars = Fmake_char_table (Qauto_fill_chars, Qnil);
6cc0e1ca
DL
899 CHAR_TABLE_SET (Vauto_fill_chars, ' ', Qt);
900 CHAR_TABLE_SET (Vauto_fill_chars, '\n', Qt);
0168c3d8
KH
901
902 DEFVAR_LISP ("char-width-table", &Vchar_width_table,
903 doc: /*
904A char-table for width (columns) of each character. */);
905 Vchar_width_table = Fmake_char_table (Qnil, make_number (1));
be8b50bc
KH
906 char_table_set_range (Vchar_width_table, 0x80, 0x9F, make_number (4));
907 char_table_set_range (Vchar_width_table, MAX_5_BYTE_CHAR + 1, MAX_CHAR,
908 make_number (4));
0168c3d8
KH
909
910 DEFVAR_LISP ("char-direction-table", &Vchar_direction_table,
911 doc: /* A char-table for direction of each character. */);
912 Vchar_direction_table = Fmake_char_table (Qnil, make_number (1));
913
914 DEFVAR_LISP ("printable-chars", &Vprintable_chars,
915 doc: /* A char-table for each printable character. */);
db6d4189 916 Vprintable_chars = Fmake_char_table (Qnil, Qnil);
0168c3d8
KH
917}
918
919#endif /* emacs */