Remove `emacs' conditional. Include hash table stuff
[bpt/emacs.git] / src / character.c
CommitLineData
0168c3d8
KH
1/* Basic character support.
2 Copyright (C) 1995, 1997, 1998, 2001 Electrotechnical Laboratory, JAPAN.
3 Licensed to the Free Software Foundation.
4 Copyright (C) 2001 Free Software Foundation, Inc.
5 Copyright (C) 2001, 2002
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H13PRO009
8
9This file is part of GNU Emacs.
10
11GNU Emacs is free software; you can redistribute it and/or modify
12it under the terms of the GNU General Public License as published by
13the Free Software Foundation; either version 2, or (at your option)
14any later version.
15
16GNU Emacs is distributed in the hope that it will be useful,
17but WITHOUT ANY WARRANTY; without even the implied warranty of
18MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19GNU General Public License for more details.
20
21You should have received a copy of the GNU General Public License
22along with GNU Emacs; see the file COPYING. If not, write to
23the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24Boston, MA 02111-1307, USA. */
25
26/* At first, see the document in `character.h' to understand the code
27 in this file. */
28
29#ifdef emacs
30#include <config.h>
31#endif
32
33#include <stdio.h>
34
35#ifdef emacs
36
37#include <sys/types.h>
38#include "lisp.h"
39#include "character.h"
40#include "buffer.h"
41#include "charset.h"
42#include "composite.h"
43#include "disptab.h"
44
45#else /* not emacs */
46
47#include "mulelib.h"
48
49#endif /* emacs */
50
51Lisp_Object Qcharacterp;
52
53/* Vector of translation table ever defined.
54 ID of a translation table is used to index this vector. */
55Lisp_Object Vtranslation_table_vector;
56
57/* A char-table for characters which may invoke auto-filling. */
58Lisp_Object Vauto_fill_chars;
59
60Lisp_Object Qauto_fill_chars;
61
62Lisp_Object Vchar_unify_table;
63
64/* A char-table. An element is non-nil iff the corresponding
65 character has a printable glyph. */
66Lisp_Object Vprintable_chars;
67
68/* A char-table. An elemnent is a column-width of the corresponding
69 character. */
70Lisp_Object Vchar_width_table;
71
72/* A char-table. An element is a symbol indicating the direction
73 property of corresponding character. */
74Lisp_Object Vchar_direction_table;
75
76/* Variables used locally in the macro FETCH_MULTIBYTE_CHAR. */
77unsigned char *_fetch_multibyte_char_p;
78int _fetch_multibyte_char_len;
79
c57f3328
KH
80/* Char table of scripts. */
81Lisp_Object Vchar_script_table;
82
83static Lisp_Object Qchar_script_table;
84
15843e6f 85
0168c3d8
KH
86\f
87
88int
1889b238 89char_string_with_unification (c, p)
0168c3d8 90 int c;
1889b238 91 unsigned char *p;
0168c3d8
KH
92{
93 int bytes;
94
95 MAYBE_UNIFY_CHAR (c);
96
97 if (c <= MAX_3_BYTE_CHAR || c > MAX_5_BYTE_CHAR)
98 {
99 bytes = CHAR_STRING (c, p);
100 }
101 else if (c <= MAX_4_BYTE_CHAR)
102 {
103 p[0] = (0xF0 | (c >> 18));
104 p[1] = (0x80 | ((c >> 12) & 0x3F));
105 p[2] = (0x80 | ((c >> 6) & 0x3F));
106 p[3] = (0x80 | (c & 0x3F));
107 bytes = 4;
108 }
109 else
110 {
111 p[0] = 0xF8;
112 p[1] = (0x80 | ((c >> 18) & 0x0F));
113 p[2] = (0x80 | ((c >> 12) & 0x3F));
114 p[3] = (0x80 | ((c >> 6) & 0x3F));
115 p[4] = (0x80 | (c & 0x3F));
116 bytes = 5;
117 }
1889b238 118
0168c3d8
KH
119 return bytes;
120}
121
122
123int
124string_char_with_unification (p, advanced, len)
15843e6f
KH
125 const unsigned char *p;
126 const unsigned char **advanced;
0168c3d8
KH
127 int *len;
128{
1889b238 129 int c;
15843e6f 130 const unsigned char *saved_p = p;
0168c3d8
KH
131
132 if (*p < 0x80 || ! (*p & 0x20) || ! (*p & 0x10))
133 {
134 c = STRING_CHAR_ADVANCE (p);
135 }
136 else if (! (*p & 0x08))
137 {
138 c = ((((p)[0] & 0xF) << 18)
139 | (((p)[1] & 0x3F) << 12)
140 | (((p)[2] & 0x3F) << 6)
141 | ((p)[3] & 0x3F));
142 p += 4;
143 }
144 else
145 {
146 c = ((((p)[1] & 0x3F) << 18)
147 | (((p)[2] & 0x3F) << 12)
148 | (((p)[3] & 0x3F) << 6)
149 | ((p)[4] & 0x3F));
150 p += 5;
151 }
152
153 MAYBE_UNIFY_CHAR (c);
154
155 if (len)
156 *len = p - saved_p;
157 if (advanced)
158 *advanced = p;
159 return c;
160}
161
162
163/* Translate character C by translation table TABLE. If C is
164 negative, translate a character specified by CHARSET and CODE. If
165 no translation is found in TABLE, return the untranslated
166 character. */
167
168int
169translate_char (table, c)
170 Lisp_Object table;
171 int c;
172{
173 Lisp_Object ch;
174
175 if (! CHAR_TABLE_P (table))
176 return c;
177 ch = CHAR_TABLE_REF (table, c);
178 if (! CHARACTERP (ch))
179 return c;
180 return XINT (ch);
181}
182
183/* Convert the unibyte character C to the corresponding multibyte
184 character based on the current value of charset_primary. If C
185 can't be converted, return C. */
186
187int
188unibyte_char_to_multibyte (c)
189 int c;
190{
191 struct charset *charset = CHARSET_FROM_ID (charset_primary);
192 int c1 = DECODE_CHAR (charset, c);
193
194 return ((c1 >= 0) ? c1 : c);
195}
196
197
198/* Convert the multibyte character C to unibyte 8-bit character based
199 on the current value of charset_primary. If dimension of
200 charset_primary is more than one, return (C & 0xFF).
201
202 The argument REV_TBL is now ignored. It will be removed in the
203 future. */
204
205int
206multibyte_char_to_unibyte (c, rev_tbl)
207 int c;
208 Lisp_Object rev_tbl;
209{
210 struct charset *charset = CHARSET_FROM_ID (charset_primary);
211 unsigned c1 = ENCODE_CHAR (charset, c);
212
213 return ((c1 != CHARSET_INVALID_CODE (charset)) ? c1 : c & 0xFF);
214}
215
216
217DEFUN ("characterp", Fcharacterp, Scharacterp, 1, 2, 0,
218 doc: /* Return non-nil if OBJECT is a character. */)
219 (object, ignore)
220 Lisp_Object object, ignore;
221{
222 return (CHARACTERP (object) ? Qt : Qnil);
223}
224
225DEFUN ("max-char", Fmax_char, Smax_char, 0, 0, 0,
226 doc: /* Return the character of the maximum code. */)
227 ()
228{
229 return make_number (MAX_CHAR);
230}
231
232DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte,
233 Sunibyte_char_to_multibyte, 1, 1, 0,
234 doc: /* Convert the unibyte character CH to multibyte character.
235The multibyte character is a result of decoding CH by
236the current primary charset (value of `charset-primary'). */)
237 (ch)
238 Lisp_Object ch;
239{
240 int c;
241 struct charset *charset;
242
243 CHECK_CHARACTER (ch);
244 c = XFASTINT (ch);
245 if (c >= 0400)
246 error ("Invalid unibyte character: %d", c);
247 charset = CHARSET_FROM_ID (charset_primary);
248 c = DECODE_CHAR (charset, c);
249 if (c < 0)
250 error ("Can't convert to multibyte character: %d", XINT (ch));
251 return make_number (c);
252}
253
254DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte,
255 Smultibyte_char_to_unibyte, 1, 1, 0,
256 doc: /* Convert the multibyte character CH to unibyte character.\n\
257The unibyte character is a result of encoding CH by
258the current primary charset (value of `charset-primary'). */)
259 (ch)
260 Lisp_Object ch;
261{
262 int c;
263 unsigned code;
264 struct charset *charset;
265
266 CHECK_CHARACTER (ch);
267 c = XFASTINT (ch);
268 charset = CHARSET_FROM_ID (charset_primary);
269 code = ENCODE_CHAR (charset, c);
270 if (code < CHARSET_MIN_CODE (charset)
271 || code > CHARSET_MAX_CODE (charset))
272 error ("Can't convert to unibyte character: %d", XINT (ch));
273 return make_number (code);
274}
275
276DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
277 doc: /* Return 1 regardless of the argument CHAR.
278This is now an obsolete function. We keep it just for backward compatibility. */)
279 (ch)
280 Lisp_Object ch;
281{
282 CHECK_CHARACTER (ch);
283 return make_number (1);
284}
285
286DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
287 doc: /* Return width of CHAR when displayed in the current buffer.
288The width is measured by how many columns it occupies on the screen.
289Tab is taken to occupy `tab-width' columns. */)
290 (ch)
291 Lisp_Object ch;
292{
293 Lisp_Object disp;
294 int c, width;
295 struct Lisp_Char_Table *dp = buffer_display_table ();
296
297 CHECK_CHARACTER (ch);
298 c = XINT (ch);
299
300 /* Get the way the display table would display it. */
301 disp = dp ? DISP_CHAR_VECTOR (dp, c) : Qnil;
302
303 if (VECTORP (disp))
304 width = ASIZE (disp);
305 else
306 width = CHAR_WIDTH (c);
307
308 return make_number (width);
309}
310
0168c3d8
KH
311/* Return width of string STR of length LEN when displayed in the
312 current buffer. The width is measured by how many columns it
313 occupies on the screen. If PRECISION > 0, return the width of
314 longest substring that doesn't exceed PRECISION, and set number of
315 characters and bytes of the substring in *NCHARS and *NBYTES
316 respectively. */
317
1889b238 318int
0168c3d8
KH
319c_string_width (str, len, precision, nchars, nbytes)
320 unsigned char *str;
321 int precision, *nchars, *nbytes;
322{
323 int i = 0, i_byte = 0;
324 int width = 0;
325 struct Lisp_Char_Table *dp = buffer_display_table ();
326
327 while (i_byte < len)
328 {
329 int bytes, thiswidth;
330 Lisp_Object val;
331 int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
332
333 if (dp)
334 {
335 val = DISP_CHAR_VECTOR (dp, c);
336 if (VECTORP (val))
337 thiswidth = XVECTOR (val)->size;
338 else
339 thiswidth = CHAR_WIDTH (c);
340 }
341 else
342 {
343 thiswidth = CHAR_WIDTH (c);
344 }
345
346 if (precision > 0
347 && (width + thiswidth > precision))
348 {
349 *nchars = i;
350 *nbytes = i_byte;
351 return width;
352 }
353 i++;
354 i_byte += bytes;
355 width += thiswidth;
356 }
357
358 if (precision > 0)
359 {
360 *nchars = i;
361 *nbytes = i_byte;
362 }
363
364 return width;
365}
366
1889b238
KH
367/* Return width of string STR of length LEN when displayed in the
368 current buffer. The width is measured by how many columns it
369 occupies on the screen. */
370
371int
372strwidth (str, len)
373 unsigned char *str;
374 int len;
375{
376 return c_string_width (str, len, -1, NULL, NULL);
377}
378
0168c3d8
KH
379/* Return width of Lisp string STRING when displayed in the current
380 buffer. The width is measured by how many columns it occupies on
381 the screen while paying attention to compositions. If PRECISION >
382 0, return the width of longest substring that doesn't exceed
383 PRECISION, and set number of characters and bytes of the substring
384 in *NCHARS and *NBYTES respectively. */
385
386int
387lisp_string_width (string, precision, nchars, nbytes)
388 Lisp_Object string;
389 int precision, *nchars, *nbytes;
390{
391 int len = XSTRING (string)->size;
0168c3d8
KH
392 unsigned char *str = XSTRING (string)->data;
393 int i = 0, i_byte = 0;
394 int width = 0;
395 struct Lisp_Char_Table *dp = buffer_display_table ();
396
397 while (i < len)
398 {
399 int chars, bytes, thiswidth;
400 Lisp_Object val;
401 int cmp_id;
402 int ignore, end;
403
404 if (find_composition (i, -1, &ignore, &end, &val, string)
405 && ((cmp_id = get_composition_id (i, i_byte, end - i, val, string))
406 >= 0))
407 {
408 thiswidth = composition_table[cmp_id]->width;
409 chars = end - i;
410 bytes = string_char_to_byte (string, end) - i_byte;
411 }
412 else if (dp)
413 {
414 int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
415
416 chars = 1;
417 val = DISP_CHAR_VECTOR (dp, c);
418 if (VECTORP (val))
419 thiswidth = XVECTOR (val)->size;
420 else
421 thiswidth = CHAR_WIDTH (c);
422 }
423 else
424 {
425 int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
426
427 chars = 1;
428 thiswidth = CHAR_WIDTH (c);
429 }
430
431 if (precision > 0
432 && (width + thiswidth > precision))
433 {
434 *nchars = i;
435 *nbytes = i_byte;
436 return width;
437 }
438 i += chars;
439 i_byte += bytes;
440 width += thiswidth;
441 }
442
443 if (precision > 0)
444 {
445 *nchars = i;
446 *nbytes = i_byte;
447 }
448
449 return width;
450}
451
452DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
453 doc: /* Return width of STRING when displayed in the current buffer.
454Width is measured by how many columns it occupies on the screen.
455When calculating width of a multibyte character in STRING,
456only the base leading-code is considered; the validity of
457the following bytes is not checked. Tabs in STRING are always
458taken to occupy `tab-width' columns. */)
459 (str)
460 Lisp_Object str;
461{
462 Lisp_Object val;
463
464 CHECK_STRING (str);
465 XSETFASTINT (val, lisp_string_width (str, -1, NULL, NULL));
466 return val;
467}
468
469DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
470 doc: /* Return the direction of CHAR.
471The returned value is 0 for left-to-right and 1 for right-to-left. */)
472 (ch)
473 Lisp_Object ch;
474{
475 int c;
476
477 CHECK_CHARACTER (ch);
478 c = XINT (ch);
479 return CHAR_TABLE_REF (Vchar_direction_table, c);
480}
481
482DEFUN ("chars-in-region", Fchars_in_region, Schars_in_region, 2, 2, 0,
483 doc: /* Return number of characters between BEG and END.
484This is now an obsolete function. We keep it just for backward compatibility. */)
485 (beg, end)
486 Lisp_Object beg, end;
487{
488 int from, to;
489
490 CHECK_NUMBER_COERCE_MARKER (beg);
491 CHECK_NUMBER_COERCE_MARKER (end);
492
493 from = min (XFASTINT (beg), XFASTINT (end));
494 to = max (XFASTINT (beg), XFASTINT (end));
495
496 return make_number (to - from);
497}
498
499/* Return the number of characters in the NBYTES bytes at PTR.
500 This works by looking at the contents and checking for multibyte
501 sequences while assuming that there's no invalid sequence.
502 However, if the current buffer has enable-multibyte-characters =
503 nil, we treat each byte as a character. */
504
505int
506chars_in_text (ptr, nbytes)
507 unsigned char *ptr;
508 int nbytes;
509{
510 /* current_buffer is null at early stages of Emacs initialization. */
511 if (current_buffer == 0
512 || NILP (current_buffer->enable_multibyte_characters))
513 return nbytes;
514
515 return multibyte_chars_in_text (ptr, nbytes);
516}
517
518/* Return the number of characters in the NBYTES bytes at PTR.
519 This works by looking at the contents and checking for multibyte
520 sequences while assuming that there's no invalid sequence. It
521 ignores enable-multibyte-characters. */
522
523int
524multibyte_chars_in_text (ptr, nbytes)
525 unsigned char *ptr;
526 int nbytes;
527{
528 unsigned char *endp = ptr + nbytes;
529 int chars = 0;
530
531 while (ptr < endp)
532 {
533 int len = MULTIBYTE_LENGTH (ptr, endp);
534
535 if (len == 0)
536 abort ();
537 ptr += len;
538 chars++;
539 }
540
541 return chars;
542}
543
544/* Parse unibyte text at STR of LEN bytes as a multibyte text, count
545 characters and bytes in it, and store them in *NCHARS and *NBYTES
546 respectively. On counting bytes, pay attention to that 8-bit
547 characters not constructing a valid multibyte sequence are
548 represented by 2-byte in a multibyte text. */
549
550void
551parse_str_as_multibyte (str, len, nchars, nbytes)
552 unsigned char *str;
553 int len, *nchars, *nbytes;
554{
555 unsigned char *endp = str + len;
556 int n, chars = 0, bytes = 0;
557
558 if (len >= MAX_MULTIBYTE_LENGTH)
559 {
560 unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH;
561 while (str < adjusted_endp)
562 {
563 if ((n = MULTIBYTE_LENGTH_NO_CHECK (str)) > 0)
564 str += n, bytes += n;
565 else
566 str++, bytes += 2;
567 chars++;
568 }
569 }
570 while (str < endp)
571 {
572 if ((n = MULTIBYTE_LENGTH (str, endp)) > 0)
573 str += n, bytes += n;
574 else
575 str++, bytes += 2;
576 chars++;
577 }
578
579 *nchars = chars;
580 *nbytes = bytes;
581 return;
582}
583
584/* Arrange unibyte text at STR of NBYTES bytes as a multibyte text.
585 It actually converts only such 8-bit characters that don't contruct
586 a multibyte sequence to multibyte forms of Latin-1 characters. If
587 NCHARS is nonzero, set *NCHARS to the number of characters in the
588 text. It is assured that we can use LEN bytes at STR as a work
589 area and that is enough. Return the number of bytes of the
590 resulting text. */
591
592int
593str_as_multibyte (str, len, nbytes, nchars)
594 unsigned char *str;
595 int len, nbytes, *nchars;
596{
597 unsigned char *p = str, *endp = str + nbytes;
598 unsigned char *to;
599 int chars = 0;
600 int n;
601
602 if (nbytes >= MAX_MULTIBYTE_LENGTH)
603 {
604 unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH;
605 while (p < adjusted_endp
606 && (n = MULTIBYTE_LENGTH_NO_CHECK (p)) > 0)
607 p += n, chars++;
608 }
609 while ((n = MULTIBYTE_LENGTH (p, endp)) > 0)
610 p += n, chars++;
611 if (nchars)
612 *nchars = chars;
613 if (p == endp)
614 return nbytes;
615
616 to = p;
617 nbytes = endp - p;
618 endp = str + len;
619 safe_bcopy ((char *) p, (char *) (endp - nbytes), nbytes);
620 p = endp - nbytes;
621
622 if (nbytes >= MAX_MULTIBYTE_LENGTH)
623 {
624 unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH;
625 while (p < adjusted_endp)
626 {
627 if ((n = MULTIBYTE_LENGTH_NO_CHECK (p)) > 0)
628 {
629 while (n--)
630 *to++ = *p++;
631 }
632 else
633 {
634 int c = *p++;
635 c = BYTE8_TO_CHAR (c);
636 to += CHAR_STRING (c, to);
637 }
638 }
639 chars++;
640 }
641 while (p < endp)
642 {
643 if ((n = MULTIBYTE_LENGTH (p, endp)) > 0)
644 {
645 while (n--)
646 *to++ = *p++;
647 }
648 else
649 {
650 int c = *p++;
651 c = BYTE8_TO_CHAR (c);
652 to += CHAR_STRING (c, to);
653 }
654 chars++;
655 }
656 if (nchars)
657 *nchars = chars;
658 return (to - str);
659}
660
661/* Parse unibyte string at STR of LEN bytes, and return the number of
662 bytes it may ocupy when converted to multibyte string by
663 `str_to_multibyte'. */
664
665int
666parse_str_to_multibyte (str, len)
667 unsigned char *str;
668 int len;
669{
670 unsigned char *endp = str + len;
671 int bytes;
672
673 for (bytes = 0; str < endp; str++)
674 bytes += (*str < 0x80) ? 1 : 2;
675 return bytes;
676}
677
678
679/* Convert unibyte text at STR of NBYTES bytes to a multibyte text
680 that contains the same single-byte characters. It actually
681 converts all 8-bit characters to multibyte forms. It is assured
682 that we can use LEN bytes at STR as a work area and that is
683 enough. */
684
685int
686str_to_multibyte (str, len, bytes)
687 unsigned char *str;
688 int len, bytes;
689{
690 unsigned char *p = str, *endp = str + bytes;
691 unsigned char *to;
692
693 while (p < endp && *p < 0x80) p++;
694 if (p == endp)
695 return bytes;
696 to = p;
697 bytes = endp - p;
698 endp = str + len;
699 safe_bcopy ((char *) p, (char *) (endp - bytes), bytes);
700 p = endp - bytes;
701 while (p < endp)
702 {
703 int c = *p++;
704
705 if (c >= 0x80)
706 c = BYTE8_TO_CHAR (c);
707 to += CHAR_STRING (c, to);
708 }
709 return (to - str);
710}
711
712/* Arrange multibyte text at STR of LEN bytes as a unibyte text. It
713 actually converts characters in the range 0x80..0xFF to
714 unibyte. */
715
716int
717str_as_unibyte (str, bytes)
718 unsigned char *str;
719 int bytes;
720{
15843e6f
KH
721 const unsigned char *p = str, *endp = str + bytes;
722 unsigned char *to;
0168c3d8
KH
723 int c, len;
724
725 while (p < endp)
726 {
727 c = *p;
728 len = BYTES_BY_CHAR_HEAD (c);
729 if (CHAR_BYTE8_HEAD_P (c))
730 break;
731 p += len;
732 }
15843e6f 733 to = str + (p - str);
0168c3d8
KH
734 while (p < endp)
735 {
736 c = *p;
737 len = BYTES_BY_CHAR_HEAD (c);
738 if (CHAR_BYTE8_HEAD_P (c))
739 {
740 c = STRING_CHAR_ADVANCE (p);
741 *to++ = CHAR_TO_BYTE8 (c);
742 }
743 else
744 {
745 while (len--) *to++ = *p++;
746 }
747 }
748 return (to - str);
749}
750
751int
752string_count_byte8 (string)
753 Lisp_Object string;
754{
755 int multibyte = STRING_MULTIBYTE (string);
0168c3d8
KH
756 int nbytes = STRING_BYTES (XSTRING (string));
757 unsigned char *p = XSTRING (string)->data;
758 unsigned char *pend = p + nbytes;
759 int count = 0;
760 int c, len;
761
762 if (multibyte)
763 while (p < pend)
764 {
765 c = *p;
766 len = BYTES_BY_CHAR_HEAD (c);
767
768 if (CHAR_BYTE8_HEAD_P (c))
769 count++;
770 p += len;
771 }
772 else
773 while (p < pend)
774 {
775 if (*p++ >= 0x80)
776 count++;
777 }
778 return count;
779}
780
781
782Lisp_Object
783string_escape_byte8 (string)
784 Lisp_Object string;
785{
786 int nchars = XSTRING (string)->size;
787 int nbytes = STRING_BYTES (XSTRING (string));
788 int multibyte = STRING_MULTIBYTE (string);
789 int byte8_count;
15843e6f
KH
790 const unsigned char *src, *src_end;
791 unsigned char *dst;
0168c3d8
KH
792 Lisp_Object val;
793 int c, len;
794
795 if (multibyte && nchars == nbytes)
796 return string;
797
798 byte8_count = string_count_byte8 (string);
799
800 if (byte8_count == 0)
801 return string;
802
803 if (multibyte)
804 /* Convert 2-byte sequence of byte8 chars to 4-byte octal. */
7b40ebaf 805 val = make_uninit_multibyte_string (nchars + byte8_count * 3,
0168c3d8
KH
806 nbytes + byte8_count * 2);
807 else
808 /* Convert 1-byte sequence of byte8 chars to 4-byte octal. */
809 val = make_uninit_string (nbytes + byte8_count * 3);
810
811 src = XSTRING (string)->data;
812 src_end = src + nbytes;
813 dst = XSTRING (val)->data;
814 if (multibyte)
815 while (src < src_end)
816 {
817 c = *src;
818 len = BYTES_BY_CHAR_HEAD (c);
819
820 if (CHAR_BYTE8_HEAD_P (c))
821 {
822 c = STRING_CHAR_ADVANCE (src);
823 c = CHAR_TO_BYTE8 (c);
1889b238 824 sprintf ((char *) dst, "\\%03o", c);
0168c3d8
KH
825 dst += 4;
826 }
827 else
828 while (len--) *dst++ = *src++;
829 }
830 else
831 while (src < src_end)
832 {
833 c = *src++;
834 if (c >= 0x80)
835 {
1889b238 836 sprintf ((char *) dst, "\\%03o", c);
0168c3d8
KH
837 dst += 4;
838 }
839 else
840 *dst++ = c;
841 }
842 return val;
843}
844
845\f
846DEFUN ("string", Fstring, Sstring, 1, MANY, 0,
847 doc: /*
d2e83296
DL
848Concatenate all the argument characters and make the result a string.
849usage: (string &rest CHARACTERS) */)
0168c3d8
KH
850 (n, args)
851 int n;
852 Lisp_Object *args;
853{
854 int i;
855 unsigned char *buf = (unsigned char *) alloca (MAX_MULTIBYTE_LENGTH * n);
856 unsigned char *p = buf;
857 int c;
858
859 for (i = 0; i < n; i++)
860 {
861 CHECK_CHARACTER (args[i]);
862 c = XINT (args[i]);
863 p += CHAR_STRING (c, p);
864 }
865
866 return make_string_from_bytes ((char *) buf, n, p - buf);
867}
868
869void
870init_character_once ()
871{
872}
873
874#ifdef emacs
875
876void
877syms_of_character ()
878{
879 DEFSYM (Qcharacterp, "characterp");
880 DEFSYM (Qauto_fill_chars, "auto-fill-chars");
881
882 staticpro (&Vchar_unify_table);
883 Vchar_unify_table = Qnil;
884
885 defsubr (&Smax_char);
886 defsubr (&Scharacterp);
887 defsubr (&Sunibyte_char_to_multibyte);
888 defsubr (&Smultibyte_char_to_unibyte);
889 defsubr (&Schar_bytes);
890 defsubr (&Schar_width);
891 defsubr (&Sstring_width);
892 defsubr (&Schar_direction);
893 defsubr (&Schars_in_region);
894 defsubr (&Sstring);
895
896 DEFVAR_LISP ("translation-table-vector", &Vtranslation_table_vector,
897 doc: /*
898Vector of cons cell of a symbol and translation table ever defined.
899An ID of a translation table is an index of this vector. */);
900 Vtranslation_table_vector = Fmake_vector (make_number (16), Qnil);
901
902 DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars,
903 doc: /*
904A char-table for characters which invoke auto-filling.
905Such characters have value t in this table. */);
906 Vauto_fill_chars = Fmake_char_table (Qauto_fill_chars, Qnil);
6cc0e1ca
DL
907 CHAR_TABLE_SET (Vauto_fill_chars, ' ', Qt);
908 CHAR_TABLE_SET (Vauto_fill_chars, '\n', Qt);
0168c3d8
KH
909
910 DEFVAR_LISP ("char-width-table", &Vchar_width_table,
911 doc: /*
912A char-table for width (columns) of each character. */);
913 Vchar_width_table = Fmake_char_table (Qnil, make_number (1));
be8b50bc
KH
914 char_table_set_range (Vchar_width_table, 0x80, 0x9F, make_number (4));
915 char_table_set_range (Vchar_width_table, MAX_5_BYTE_CHAR + 1, MAX_CHAR,
916 make_number (4));
0168c3d8
KH
917
918 DEFVAR_LISP ("char-direction-table", &Vchar_direction_table,
919 doc: /* A char-table for direction of each character. */);
920 Vchar_direction_table = Fmake_char_table (Qnil, make_number (1));
921
922 DEFVAR_LISP ("printable-chars", &Vprintable_chars,
923 doc: /* A char-table for each printable character. */);
db6d4189 924 Vprintable_chars = Fmake_char_table (Qnil, Qnil);
15843e6f 925
c57f3328
KH
926 DEFVAR_LISP ("char-script-table", &Vchar_script_table,
927 doc: /* Char table of script symbols.
928It has one extra slot whose value is a list of script symbols. */);
929
930 /* Intern this now in case it isn't already done.
931 Setting this variable twice is harmless.
932 But don't staticpro it here--that is done in alloc.c. */
933 Qchar_table_extra_slots = intern ("char-table-extra-slots");
934 DEFSYM (Qchar_script_table, "char-script-table");
935 Fput (Qchar_script_table, Qchar_table_extra_slots, make_number (1));
936 Vchar_script_table = Fmake_char_table (Qchar_script_table, Qnil);
0168c3d8
KH
937}
938
939#endif /* emacs */