(LEADING_CODE_LATIN_1_MIN)
[bpt/emacs.git] / src / character.c
CommitLineData
0168c3d8
KH
1/* Basic character support.
2 Copyright (C) 1995, 1997, 1998, 2001 Electrotechnical Laboratory, JAPAN.
3 Licensed to the Free Software Foundation.
4 Copyright (C) 2001 Free Software Foundation, Inc.
5 Copyright (C) 2001, 2002
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H13PRO009
8
9This file is part of GNU Emacs.
10
11GNU Emacs is free software; you can redistribute it and/or modify
12it under the terms of the GNU General Public License as published by
13the Free Software Foundation; either version 2, or (at your option)
14any later version.
15
16GNU Emacs is distributed in the hope that it will be useful,
17but WITHOUT ANY WARRANTY; without even the implied warranty of
18MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19GNU General Public License for more details.
20
21You should have received a copy of the GNU General Public License
22along with GNU Emacs; see the file COPYING. If not, write to
23the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24Boston, MA 02111-1307, USA. */
25
26/* At first, see the document in `character.h' to understand the code
27 in this file. */
28
29#ifdef emacs
30#include <config.h>
31#endif
32
33#include <stdio.h>
34
35#ifdef emacs
36
37#include <sys/types.h>
38#include "lisp.h"
39#include "character.h"
40#include "buffer.h"
41#include "charset.h"
42#include "composite.h"
43#include "disptab.h"
44
45#else /* not emacs */
46
47#include "mulelib.h"
48
49#endif /* emacs */
50
51Lisp_Object Qcharacterp;
52
53/* Vector of translation table ever defined.
54 ID of a translation table is used to index this vector. */
55Lisp_Object Vtranslation_table_vector;
56
57/* A char-table for characters which may invoke auto-filling. */
58Lisp_Object Vauto_fill_chars;
59
60Lisp_Object Qauto_fill_chars;
61
62Lisp_Object Vchar_unify_table;
63
64/* A char-table. An element is non-nil iff the corresponding
65 character has a printable glyph. */
66Lisp_Object Vprintable_chars;
67
68/* A char-table. An elemnent is a column-width of the corresponding
69 character. */
70Lisp_Object Vchar_width_table;
71
72/* A char-table. An element is a symbol indicating the direction
73 property of corresponding character. */
74Lisp_Object Vchar_direction_table;
75
8973478b 76/* Variable used locally in the macro FETCH_MULTIBYTE_CHAR. */
0168c3d8 77unsigned char *_fetch_multibyte_char_p;
0168c3d8 78
c57f3328
KH
79/* Char table of scripts. */
80Lisp_Object Vchar_script_table;
81
82static Lisp_Object Qchar_script_table;
83
15843e6f 84
0168c3d8
KH
85\f
86
87int
1889b238 88char_string_with_unification (c, p)
0168c3d8 89 int c;
1889b238 90 unsigned char *p;
0168c3d8
KH
91{
92 int bytes;
93
94 MAYBE_UNIFY_CHAR (c);
95
96 if (c <= MAX_3_BYTE_CHAR || c > MAX_5_BYTE_CHAR)
97 {
98 bytes = CHAR_STRING (c, p);
99 }
100 else if (c <= MAX_4_BYTE_CHAR)
101 {
102 p[0] = (0xF0 | (c >> 18));
103 p[1] = (0x80 | ((c >> 12) & 0x3F));
104 p[2] = (0x80 | ((c >> 6) & 0x3F));
105 p[3] = (0x80 | (c & 0x3F));
106 bytes = 4;
107 }
108 else
109 {
110 p[0] = 0xF8;
111 p[1] = (0x80 | ((c >> 18) & 0x0F));
112 p[2] = (0x80 | ((c >> 12) & 0x3F));
113 p[3] = (0x80 | ((c >> 6) & 0x3F));
114 p[4] = (0x80 | (c & 0x3F));
115 bytes = 5;
116 }
1889b238 117
0168c3d8
KH
118 return bytes;
119}
120
121
122int
123string_char_with_unification (p, advanced, len)
15843e6f
KH
124 const unsigned char *p;
125 const unsigned char **advanced;
0168c3d8
KH
126 int *len;
127{
1889b238 128 int c;
15843e6f 129 const unsigned char *saved_p = p;
0168c3d8
KH
130
131 if (*p < 0x80 || ! (*p & 0x20) || ! (*p & 0x10))
132 {
133 c = STRING_CHAR_ADVANCE (p);
134 }
135 else if (! (*p & 0x08))
136 {
137 c = ((((p)[0] & 0xF) << 18)
138 | (((p)[1] & 0x3F) << 12)
139 | (((p)[2] & 0x3F) << 6)
140 | ((p)[3] & 0x3F));
141 p += 4;
142 }
143 else
144 {
145 c = ((((p)[1] & 0x3F) << 18)
146 | (((p)[2] & 0x3F) << 12)
147 | (((p)[3] & 0x3F) << 6)
148 | ((p)[4] & 0x3F));
149 p += 5;
150 }
151
152 MAYBE_UNIFY_CHAR (c);
153
154 if (len)
155 *len = p - saved_p;
156 if (advanced)
157 *advanced = p;
158 return c;
159}
160
161
162/* Translate character C by translation table TABLE. If C is
163 negative, translate a character specified by CHARSET and CODE. If
164 no translation is found in TABLE, return the untranslated
165 character. */
166
167int
168translate_char (table, c)
169 Lisp_Object table;
170 int c;
171{
172 Lisp_Object ch;
173
174 if (! CHAR_TABLE_P (table))
175 return c;
176 ch = CHAR_TABLE_REF (table, c);
177 if (! CHARACTERP (ch))
178 return c;
179 return XINT (ch);
180}
181
182/* Convert the unibyte character C to the corresponding multibyte
ac86488b 183 character based on the current value of charset_unibyte. If C
0168c3d8
KH
184 can't be converted, return C. */
185
186int
187unibyte_char_to_multibyte (c)
188 int c;
189{
ac86488b 190 struct charset *charset = CHARSET_FROM_ID (charset_unibyte);
0168c3d8
KH
191 int c1 = DECODE_CHAR (charset, c);
192
193 return ((c1 >= 0) ? c1 : c);
194}
195
196
197/* Convert the multibyte character C to unibyte 8-bit character based
ac86488b
KH
198 on the current value of charset_unibyte. If dimension of
199 charset_unibyte is more than one, return (C & 0xFF).
0168c3d8
KH
200
201 The argument REV_TBL is now ignored. It will be removed in the
202 future. */
203
204int
205multibyte_char_to_unibyte (c, rev_tbl)
206 int c;
207 Lisp_Object rev_tbl;
208{
ac86488b 209 struct charset *charset = CHARSET_FROM_ID (charset_unibyte);
0168c3d8
KH
210 unsigned c1 = ENCODE_CHAR (charset, c);
211
212 return ((c1 != CHARSET_INVALID_CODE (charset)) ? c1 : c & 0xFF);
213}
214
215
216DEFUN ("characterp", Fcharacterp, Scharacterp, 1, 2, 0,
217 doc: /* Return non-nil if OBJECT is a character. */)
218 (object, ignore)
219 Lisp_Object object, ignore;
220{
221 return (CHARACTERP (object) ? Qt : Qnil);
222}
223
224DEFUN ("max-char", Fmax_char, Smax_char, 0, 0, 0,
225 doc: /* Return the character of the maximum code. */)
226 ()
227{
228 return make_number (MAX_CHAR);
229}
230
231DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte,
232 Sunibyte_char_to_multibyte, 1, 1, 0,
233 doc: /* Convert the unibyte character CH to multibyte character.
234The multibyte character is a result of decoding CH by
ed1d5bc0 235the current unibyte charset (see `unibyte-charset'). */)
0168c3d8
KH
236 (ch)
237 Lisp_Object ch;
238{
239 int c;
240 struct charset *charset;
241
242 CHECK_CHARACTER (ch);
243 c = XFASTINT (ch);
244 if (c >= 0400)
245 error ("Invalid unibyte character: %d", c);
ac86488b 246 charset = CHARSET_FROM_ID (charset_unibyte);
0168c3d8
KH
247 c = DECODE_CHAR (charset, c);
248 if (c < 0)
3c5a53bd 249 c = BYTE8_TO_CHAR (XFASTINT (ch));
0168c3d8
KH
250 return make_number (c);
251}
252
253DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte,
254 Smultibyte_char_to_unibyte, 1, 1, 0,
255 doc: /* Convert the multibyte character CH to unibyte character.\n\
256The unibyte character is a result of encoding CH by
257the current primary charset (value of `charset-primary'). */)
258 (ch)
259 Lisp_Object ch;
260{
261 int c;
0168c3d8
KH
262
263 CHECK_CHARACTER (ch);
264 c = XFASTINT (ch);
3c5a53bd
KH
265 c = CHAR_TO_BYTE8 (c);
266 return make_number (c);
0168c3d8
KH
267}
268
269DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
270 doc: /* Return 1 regardless of the argument CHAR.
271This is now an obsolete function. We keep it just for backward compatibility. */)
272 (ch)
273 Lisp_Object ch;
274{
275 CHECK_CHARACTER (ch);
276 return make_number (1);
277}
278
279DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
280 doc: /* Return width of CHAR when displayed in the current buffer.
281The width is measured by how many columns it occupies on the screen.
282Tab is taken to occupy `tab-width' columns. */)
283 (ch)
284 Lisp_Object ch;
285{
286 Lisp_Object disp;
287 int c, width;
288 struct Lisp_Char_Table *dp = buffer_display_table ();
289
290 CHECK_CHARACTER (ch);
291 c = XINT (ch);
292
293 /* Get the way the display table would display it. */
294 disp = dp ? DISP_CHAR_VECTOR (dp, c) : Qnil;
295
296 if (VECTORP (disp))
297 width = ASIZE (disp);
298 else
299 width = CHAR_WIDTH (c);
300
301 return make_number (width);
302}
303
0168c3d8
KH
304/* Return width of string STR of length LEN when displayed in the
305 current buffer. The width is measured by how many columns it
306 occupies on the screen. If PRECISION > 0, return the width of
307 longest substring that doesn't exceed PRECISION, and set number of
308 characters and bytes of the substring in *NCHARS and *NBYTES
309 respectively. */
310
1889b238 311int
0168c3d8
KH
312c_string_width (str, len, precision, nchars, nbytes)
313 unsigned char *str;
314 int precision, *nchars, *nbytes;
315{
316 int i = 0, i_byte = 0;
317 int width = 0;
318 struct Lisp_Char_Table *dp = buffer_display_table ();
319
320 while (i_byte < len)
321 {
322 int bytes, thiswidth;
323 Lisp_Object val;
324 int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
325
326 if (dp)
327 {
328 val = DISP_CHAR_VECTOR (dp, c);
329 if (VECTORP (val))
330 thiswidth = XVECTOR (val)->size;
331 else
332 thiswidth = CHAR_WIDTH (c);
333 }
334 else
335 {
336 thiswidth = CHAR_WIDTH (c);
337 }
338
339 if (precision > 0
340 && (width + thiswidth > precision))
341 {
342 *nchars = i;
343 *nbytes = i_byte;
344 return width;
345 }
346 i++;
347 i_byte += bytes;
348 width += thiswidth;
349 }
350
351 if (precision > 0)
352 {
353 *nchars = i;
354 *nbytes = i_byte;
355 }
356
357 return width;
358}
359
1889b238
KH
360/* Return width of string STR of length LEN when displayed in the
361 current buffer. The width is measured by how many columns it
362 occupies on the screen. */
363
364int
365strwidth (str, len)
366 unsigned char *str;
367 int len;
368{
369 return c_string_width (str, len, -1, NULL, NULL);
370}
371
0168c3d8
KH
372/* Return width of Lisp string STRING when displayed in the current
373 buffer. The width is measured by how many columns it occupies on
374 the screen while paying attention to compositions. If PRECISION >
375 0, return the width of longest substring that doesn't exceed
376 PRECISION, and set number of characters and bytes of the substring
377 in *NCHARS and *NBYTES respectively. */
378
379int
380lisp_string_width (string, precision, nchars, nbytes)
381 Lisp_Object string;
382 int precision, *nchars, *nbytes;
383{
384 int len = XSTRING (string)->size;
0168c3d8
KH
385 unsigned char *str = XSTRING (string)->data;
386 int i = 0, i_byte = 0;
387 int width = 0;
388 struct Lisp_Char_Table *dp = buffer_display_table ();
389
390 while (i < len)
391 {
392 int chars, bytes, thiswidth;
393 Lisp_Object val;
394 int cmp_id;
395 int ignore, end;
396
397 if (find_composition (i, -1, &ignore, &end, &val, string)
398 && ((cmp_id = get_composition_id (i, i_byte, end - i, val, string))
399 >= 0))
400 {
401 thiswidth = composition_table[cmp_id]->width;
402 chars = end - i;
403 bytes = string_char_to_byte (string, end) - i_byte;
404 }
405 else if (dp)
406 {
407 int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
408
409 chars = 1;
410 val = DISP_CHAR_VECTOR (dp, c);
411 if (VECTORP (val))
412 thiswidth = XVECTOR (val)->size;
413 else
414 thiswidth = CHAR_WIDTH (c);
415 }
416 else
417 {
418 int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
419
420 chars = 1;
421 thiswidth = CHAR_WIDTH (c);
422 }
423
424 if (precision > 0
425 && (width + thiswidth > precision))
426 {
427 *nchars = i;
428 *nbytes = i_byte;
429 return width;
430 }
431 i += chars;
432 i_byte += bytes;
433 width += thiswidth;
434 }
435
436 if (precision > 0)
437 {
438 *nchars = i;
439 *nbytes = i_byte;
440 }
441
442 return width;
443}
444
445DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
446 doc: /* Return width of STRING when displayed in the current buffer.
447Width is measured by how many columns it occupies on the screen.
448When calculating width of a multibyte character in STRING,
449only the base leading-code is considered; the validity of
450the following bytes is not checked. Tabs in STRING are always
451taken to occupy `tab-width' columns. */)
452 (str)
453 Lisp_Object str;
454{
455 Lisp_Object val;
456
457 CHECK_STRING (str);
458 XSETFASTINT (val, lisp_string_width (str, -1, NULL, NULL));
459 return val;
460}
461
462DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
463 doc: /* Return the direction of CHAR.
464The returned value is 0 for left-to-right and 1 for right-to-left. */)
465 (ch)
466 Lisp_Object ch;
467{
468 int c;
469
470 CHECK_CHARACTER (ch);
471 c = XINT (ch);
472 return CHAR_TABLE_REF (Vchar_direction_table, c);
473}
474
475DEFUN ("chars-in-region", Fchars_in_region, Schars_in_region, 2, 2, 0,
476 doc: /* Return number of characters between BEG and END.
477This is now an obsolete function. We keep it just for backward compatibility. */)
478 (beg, end)
479 Lisp_Object beg, end;
480{
481 int from, to;
482
483 CHECK_NUMBER_COERCE_MARKER (beg);
484 CHECK_NUMBER_COERCE_MARKER (end);
485
486 from = min (XFASTINT (beg), XFASTINT (end));
487 to = max (XFASTINT (beg), XFASTINT (end));
488
489 return make_number (to - from);
490}
491
492/* Return the number of characters in the NBYTES bytes at PTR.
493 This works by looking at the contents and checking for multibyte
494 sequences while assuming that there's no invalid sequence.
495 However, if the current buffer has enable-multibyte-characters =
496 nil, we treat each byte as a character. */
497
498int
499chars_in_text (ptr, nbytes)
500 unsigned char *ptr;
501 int nbytes;
502{
503 /* current_buffer is null at early stages of Emacs initialization. */
504 if (current_buffer == 0
505 || NILP (current_buffer->enable_multibyte_characters))
506 return nbytes;
507
508 return multibyte_chars_in_text (ptr, nbytes);
509}
510
511/* Return the number of characters in the NBYTES bytes at PTR.
512 This works by looking at the contents and checking for multibyte
513 sequences while assuming that there's no invalid sequence. It
514 ignores enable-multibyte-characters. */
515
516int
517multibyte_chars_in_text (ptr, nbytes)
518 unsigned char *ptr;
519 int nbytes;
520{
521 unsigned char *endp = ptr + nbytes;
522 int chars = 0;
523
524 while (ptr < endp)
525 {
526 int len = MULTIBYTE_LENGTH (ptr, endp);
527
528 if (len == 0)
529 abort ();
530 ptr += len;
531 chars++;
532 }
533
534 return chars;
535}
536
537/* Parse unibyte text at STR of LEN bytes as a multibyte text, count
538 characters and bytes in it, and store them in *NCHARS and *NBYTES
539 respectively. On counting bytes, pay attention to that 8-bit
540 characters not constructing a valid multibyte sequence are
541 represented by 2-byte in a multibyte text. */
542
543void
544parse_str_as_multibyte (str, len, nchars, nbytes)
545 unsigned char *str;
546 int len, *nchars, *nbytes;
547{
548 unsigned char *endp = str + len;
549 int n, chars = 0, bytes = 0;
550
551 if (len >= MAX_MULTIBYTE_LENGTH)
552 {
553 unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH;
554 while (str < adjusted_endp)
555 {
556 if ((n = MULTIBYTE_LENGTH_NO_CHECK (str)) > 0)
557 str += n, bytes += n;
558 else
559 str++, bytes += 2;
560 chars++;
561 }
562 }
563 while (str < endp)
564 {
565 if ((n = MULTIBYTE_LENGTH (str, endp)) > 0)
566 str += n, bytes += n;
567 else
568 str++, bytes += 2;
569 chars++;
570 }
571
572 *nchars = chars;
573 *nbytes = bytes;
574 return;
575}
576
577/* Arrange unibyte text at STR of NBYTES bytes as a multibyte text.
578 It actually converts only such 8-bit characters that don't contruct
579 a multibyte sequence to multibyte forms of Latin-1 characters. If
580 NCHARS is nonzero, set *NCHARS to the number of characters in the
581 text. It is assured that we can use LEN bytes at STR as a work
582 area and that is enough. Return the number of bytes of the
583 resulting text. */
584
585int
586str_as_multibyte (str, len, nbytes, nchars)
587 unsigned char *str;
588 int len, nbytes, *nchars;
589{
590 unsigned char *p = str, *endp = str + nbytes;
591 unsigned char *to;
592 int chars = 0;
593 int n;
594
595 if (nbytes >= MAX_MULTIBYTE_LENGTH)
596 {
597 unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH;
598 while (p < adjusted_endp
599 && (n = MULTIBYTE_LENGTH_NO_CHECK (p)) > 0)
600 p += n, chars++;
601 }
602 while ((n = MULTIBYTE_LENGTH (p, endp)) > 0)
603 p += n, chars++;
604 if (nchars)
605 *nchars = chars;
606 if (p == endp)
607 return nbytes;
608
609 to = p;
610 nbytes = endp - p;
611 endp = str + len;
612 safe_bcopy ((char *) p, (char *) (endp - nbytes), nbytes);
613 p = endp - nbytes;
614
615 if (nbytes >= MAX_MULTIBYTE_LENGTH)
616 {
617 unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH;
618 while (p < adjusted_endp)
619 {
620 if ((n = MULTIBYTE_LENGTH_NO_CHECK (p)) > 0)
621 {
622 while (n--)
623 *to++ = *p++;
624 }
625 else
626 {
627 int c = *p++;
628 c = BYTE8_TO_CHAR (c);
629 to += CHAR_STRING (c, to);
630 }
631 }
632 chars++;
633 }
634 while (p < endp)
635 {
636 if ((n = MULTIBYTE_LENGTH (p, endp)) > 0)
637 {
638 while (n--)
639 *to++ = *p++;
640 }
641 else
642 {
643 int c = *p++;
644 c = BYTE8_TO_CHAR (c);
645 to += CHAR_STRING (c, to);
646 }
647 chars++;
648 }
649 if (nchars)
650 *nchars = chars;
651 return (to - str);
652}
653
654/* Parse unibyte string at STR of LEN bytes, and return the number of
655 bytes it may ocupy when converted to multibyte string by
656 `str_to_multibyte'. */
657
658int
659parse_str_to_multibyte (str, len)
660 unsigned char *str;
661 int len;
662{
663 unsigned char *endp = str + len;
664 int bytes;
665
666 for (bytes = 0; str < endp; str++)
667 bytes += (*str < 0x80) ? 1 : 2;
668 return bytes;
669}
670
671
672/* Convert unibyte text at STR of NBYTES bytes to a multibyte text
673 that contains the same single-byte characters. It actually
674 converts all 8-bit characters to multibyte forms. It is assured
675 that we can use LEN bytes at STR as a work area and that is
676 enough. */
677
678int
679str_to_multibyte (str, len, bytes)
680 unsigned char *str;
681 int len, bytes;
682{
683 unsigned char *p = str, *endp = str + bytes;
684 unsigned char *to;
685
686 while (p < endp && *p < 0x80) p++;
687 if (p == endp)
688 return bytes;
689 to = p;
690 bytes = endp - p;
691 endp = str + len;
692 safe_bcopy ((char *) p, (char *) (endp - bytes), bytes);
693 p = endp - bytes;
694 while (p < endp)
695 {
696 int c = *p++;
697
698 if (c >= 0x80)
699 c = BYTE8_TO_CHAR (c);
700 to += CHAR_STRING (c, to);
701 }
702 return (to - str);
703}
704
705/* Arrange multibyte text at STR of LEN bytes as a unibyte text. It
706 actually converts characters in the range 0x80..0xFF to
707 unibyte. */
708
709int
710str_as_unibyte (str, bytes)
711 unsigned char *str;
712 int bytes;
713{
15843e6f
KH
714 const unsigned char *p = str, *endp = str + bytes;
715 unsigned char *to;
0168c3d8
KH
716 int c, len;
717
718 while (p < endp)
719 {
720 c = *p;
721 len = BYTES_BY_CHAR_HEAD (c);
722 if (CHAR_BYTE8_HEAD_P (c))
723 break;
724 p += len;
725 }
15843e6f 726 to = str + (p - str);
0168c3d8
KH
727 while (p < endp)
728 {
729 c = *p;
730 len = BYTES_BY_CHAR_HEAD (c);
731 if (CHAR_BYTE8_HEAD_P (c))
732 {
733 c = STRING_CHAR_ADVANCE (p);
734 *to++ = CHAR_TO_BYTE8 (c);
735 }
736 else
737 {
738 while (len--) *to++ = *p++;
739 }
740 }
741 return (to - str);
742}
743
744int
745string_count_byte8 (string)
746 Lisp_Object string;
747{
748 int multibyte = STRING_MULTIBYTE (string);
0168c3d8
KH
749 int nbytes = STRING_BYTES (XSTRING (string));
750 unsigned char *p = XSTRING (string)->data;
751 unsigned char *pend = p + nbytes;
752 int count = 0;
753 int c, len;
754
755 if (multibyte)
756 while (p < pend)
757 {
758 c = *p;
759 len = BYTES_BY_CHAR_HEAD (c);
760
761 if (CHAR_BYTE8_HEAD_P (c))
762 count++;
763 p += len;
764 }
765 else
766 while (p < pend)
767 {
768 if (*p++ >= 0x80)
769 count++;
770 }
771 return count;
772}
773
774
775Lisp_Object
776string_escape_byte8 (string)
777 Lisp_Object string;
778{
779 int nchars = XSTRING (string)->size;
780 int nbytes = STRING_BYTES (XSTRING (string));
781 int multibyte = STRING_MULTIBYTE (string);
782 int byte8_count;
15843e6f
KH
783 const unsigned char *src, *src_end;
784 unsigned char *dst;
0168c3d8
KH
785 Lisp_Object val;
786 int c, len;
787
788 if (multibyte && nchars == nbytes)
789 return string;
790
791 byte8_count = string_count_byte8 (string);
792
793 if (byte8_count == 0)
794 return string;
795
796 if (multibyte)
797 /* Convert 2-byte sequence of byte8 chars to 4-byte octal. */
7b40ebaf 798 val = make_uninit_multibyte_string (nchars + byte8_count * 3,
0168c3d8
KH
799 nbytes + byte8_count * 2);
800 else
801 /* Convert 1-byte sequence of byte8 chars to 4-byte octal. */
802 val = make_uninit_string (nbytes + byte8_count * 3);
803
804 src = XSTRING (string)->data;
805 src_end = src + nbytes;
806 dst = XSTRING (val)->data;
807 if (multibyte)
808 while (src < src_end)
809 {
810 c = *src;
811 len = BYTES_BY_CHAR_HEAD (c);
812
813 if (CHAR_BYTE8_HEAD_P (c))
814 {
815 c = STRING_CHAR_ADVANCE (src);
816 c = CHAR_TO_BYTE8 (c);
1889b238 817 sprintf ((char *) dst, "\\%03o", c);
0168c3d8
KH
818 dst += 4;
819 }
820 else
821 while (len--) *dst++ = *src++;
822 }
823 else
824 while (src < src_end)
825 {
826 c = *src++;
827 if (c >= 0x80)
828 {
1889b238 829 sprintf ((char *) dst, "\\%03o", c);
0168c3d8
KH
830 dst += 4;
831 }
832 else
833 *dst++ = c;
834 }
835 return val;
836}
837
838\f
839DEFUN ("string", Fstring, Sstring, 1, MANY, 0,
840 doc: /*
d2e83296
DL
841Concatenate all the argument characters and make the result a string.
842usage: (string &rest CHARACTERS) */)
0168c3d8
KH
843 (n, args)
844 int n;
845 Lisp_Object *args;
846{
847 int i;
848 unsigned char *buf = (unsigned char *) alloca (MAX_MULTIBYTE_LENGTH * n);
849 unsigned char *p = buf;
850 int c;
851
852 for (i = 0; i < n; i++)
853 {
854 CHECK_CHARACTER (args[i]);
855 c = XINT (args[i]);
856 p += CHAR_STRING (c, p);
857 }
858
859 return make_string_from_bytes ((char *) buf, n, p - buf);
860}
861
862void
863init_character_once ()
864{
865}
866
867#ifdef emacs
868
869void
870syms_of_character ()
871{
872 DEFSYM (Qcharacterp, "characterp");
873 DEFSYM (Qauto_fill_chars, "auto-fill-chars");
874
875 staticpro (&Vchar_unify_table);
876 Vchar_unify_table = Qnil;
877
878 defsubr (&Smax_char);
879 defsubr (&Scharacterp);
880 defsubr (&Sunibyte_char_to_multibyte);
881 defsubr (&Smultibyte_char_to_unibyte);
882 defsubr (&Schar_bytes);
883 defsubr (&Schar_width);
884 defsubr (&Sstring_width);
885 defsubr (&Schar_direction);
886 defsubr (&Schars_in_region);
887 defsubr (&Sstring);
888
889 DEFVAR_LISP ("translation-table-vector", &Vtranslation_table_vector,
890 doc: /*
68978cf0
DL
891Vector recording all translation tables ever defined.
892Each element is a pair (SYMBOL . TABLE) relating the table to the
893symbol naming it. The ID of a translation table is an index into this vector. */);
0168c3d8
KH
894 Vtranslation_table_vector = Fmake_vector (make_number (16), Qnil);
895
896 DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars,
897 doc: /*
898A char-table for characters which invoke auto-filling.
899Such characters have value t in this table. */);
900 Vauto_fill_chars = Fmake_char_table (Qauto_fill_chars, Qnil);
6cc0e1ca
DL
901 CHAR_TABLE_SET (Vauto_fill_chars, ' ', Qt);
902 CHAR_TABLE_SET (Vauto_fill_chars, '\n', Qt);
0168c3d8
KH
903
904 DEFVAR_LISP ("char-width-table", &Vchar_width_table,
905 doc: /*
906A char-table for width (columns) of each character. */);
907 Vchar_width_table = Fmake_char_table (Qnil, make_number (1));
be8b50bc
KH
908 char_table_set_range (Vchar_width_table, 0x80, 0x9F, make_number (4));
909 char_table_set_range (Vchar_width_table, MAX_5_BYTE_CHAR + 1, MAX_CHAR,
910 make_number (4));
0168c3d8
KH
911
912 DEFVAR_LISP ("char-direction-table", &Vchar_direction_table,
913 doc: /* A char-table for direction of each character. */);
914 Vchar_direction_table = Fmake_char_table (Qnil, make_number (1));
915
916 DEFVAR_LISP ("printable-chars", &Vprintable_chars,
917 doc: /* A char-table for each printable character. */);
db6d4189 918 Vprintable_chars = Fmake_char_table (Qnil, Qnil);
67dde660
KH
919 Fset_char_table_range (Vprintable_chars,
920 Fcons (make_number (32), make_number (126)), Qt);
921 Fset_char_table_range (Vprintable_chars,
922 Fcons (make_number (160),
923 make_number (MAX_5_BYTE_CHAR)), Qt);
15843e6f 924
c57f3328
KH
925 DEFVAR_LISP ("char-script-table", &Vchar_script_table,
926 doc: /* Char table of script symbols.
927It has one extra slot whose value is a list of script symbols. */);
928
929 /* Intern this now in case it isn't already done.
930 Setting this variable twice is harmless.
931 But don't staticpro it here--that is done in alloc.c. */
932 Qchar_table_extra_slots = intern ("char-table-extra-slots");
933 DEFSYM (Qchar_script_table, "char-script-table");
934 Fput (Qchar_script_table, Qchar_table_extra_slots, make_number (1));
935 Vchar_script_table = Fmake_char_table (Qchar_script_table, Qnil);
0168c3d8
KH
936}
937
938#endif /* emacs */