/* Basic character support.
Copyright (C) 1995, 1997, 1998, 2001 Electrotechnical Laboratory, JAPAN.
Licensed to the Free Software Foundation.
- Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
+ Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
Free Software Foundation, Inc.
- Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008
+ Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009
National Institute of Advanced Industrial Science and Technology (AIST)
Registration Number H13PRO009
This file is part of GNU Emacs.
-GNU Emacs is free software; you can redistribute it and/or modify
+GNU Emacs is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 3, or (at your option)
-any later version.
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
GNU Emacs is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
-along with GNU Emacs; see the file COPYING. If not, write to the
-Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
-Boston, MA 02110-1301, USA. */
+along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
/* At first, see the document in `character.h' to understand the code
in this file. */
Lisp_Object Qauto_fill_chars;
/* Char-table of information about which character to unify to which
- Unicode character. */
+ Unicode character. Mainly used by the macro MAYBE_UNIFY_CHAR. */
Lisp_Object Vchar_unify_table;
/* A char-table. An element is non-nil iff the corresponding
static Lisp_Object Qchar_script_table;
+Lisp_Object Vunicode_category_table;
+
/* Mapping table from unibyte chars to multibyte chars. */
int unibyte_to_multibyte_table[256];
DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte,
Sunibyte_char_to_multibyte, 1, 1, 0,
- doc: /* Convert the unibyte character CH to multibyte character.
-The multibyte character is a result of decoding CH by
-the current unibyte charset (see `unibyte-charset'). */)
+ doc: /* Convert the byte CH to multibyte character. */)
(ch)
Lisp_Object ch;
{
DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte,
Smultibyte_char_to_unibyte, 1, 1, 0,
- doc: /* Convert the multibyte character CH to unibyte character.\n\
-The unibyte character is a result of encoding CH by
-the current primary charset (value of `charset-primary'). */)
+ doc: /* Convert the multibyte character CH to a byte.
+If the multibyte character does not represent a byte, return -1. */)
(ch)
Lisp_Object ch;
{
- int c;
+ int cm;
CHECK_CHARACTER (ch);
- c = XFASTINT (ch);
- c = CHAR_TO_BYTE8 (c);
- return make_number (c);
+ cm = XFASTINT (ch);
+ if (cm < 256)
+ /* Can't distinguish a byte read from a unibyte buffer from
+ a latin1 char, so let's let it slide. */
+ return ch;
+ else
+ {
+ int cu = CHAR_TO_BYTE_SAFE (cm);
+ return make_number (cu);
+ }
}
DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
doc: /* Return 1 regardless of the argument CHAR.
-This is now an obsolete function. We keep it just for backward compatibility. */)
+This is now an obsolete function. We keep it just for backward compatibility.
+usage: (char-bytes CHAR) */)
(ch)
Lisp_Object ch;
{
DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
doc: /* Return width of CHAR when displayed in the current buffer.
The width is measured by how many columns it occupies on the screen.
-Tab is taken to occupy `tab-width' columns. */)
+Tab is taken to occupy `tab-width' columns.
+usage: (char-width CHAR) */)
(ch)
Lisp_Object ch;
{
respectively. */
int
-c_string_width (str, len, precision, nchars, nbytes)
- const unsigned char *str;
- int precision, *nchars, *nbytes;
+c_string_width (const unsigned char *str, int len, int precision, int *nchars, int *nbytes)
{
int i = 0, i_byte = 0;
int width = 0;
When calculating width of a multibyte character in STRING,
only the base leading-code is considered; the validity of
the following bytes is not checked. Tabs in STRING are always
-taken to occupy `tab-width' columns. */)
+taken to occupy `tab-width' columns.
+usage: (string-width STRING) */)
(str)
Lisp_Object str;
{
DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
doc: /* Return the direction of CHAR.
-The returned value is 0 for left-to-right and 1 for right-to-left. */)
+The returned value is 0 for left-to-right and 1 for right-to-left.
+usage: (char-direction CHAR) */)
(ch)
Lisp_Object ch;
{
return (to - str);
}
+/* Convert eight-bit chars in SRC (in multibyte form) to the
+ corresponding byte and store in DST. CHARS is the number of
+ characters in SRC. The value is the number of bytes stored in DST.
+ Usually, the value is the same as CHARS, but is less than it if SRC
+ contains a non-ASCII, non-eight-bit characater. If ACCEPT_LATIN_1
+ is nonzero, a Latin-1 character is accepted and converted to a byte
+ of that character code.
+ Note: Currently the arg ACCEPT_LATIN_1 is not used. */
+
+EMACS_INT
+str_to_unibyte (src, dst, chars, accept_latin_1)
+ const unsigned char *src;
+ unsigned char *dst;
+ EMACS_INT chars;
+ int accept_latin_1;
+{
+ EMACS_INT i;
+
+ for (i = 0; i < chars; i++)
+ {
+ int c = STRING_CHAR_ADVANCE (src);
+
+ if (CHAR_BYTE8_P (c))
+ c = CHAR_TO_BYTE8 (c);
+ else if (! ASCII_CHAR_P (c)
+ && (! accept_latin_1 || c >= 0x100))
+ return i;
+ *dst++ = c;
+ }
+ return i;
+}
+
+
int
string_count_byte8 (string)
Lisp_Object string;
return make_string_from_bytes ((char *) buf, n, p - buf);
}
-DEFUN ("char-resolve-modifers", Fchar_resolve_modifiers,
+DEFUN ("char-resolve-modifiers", Fchar_resolve_modifiers,
Schar_resolve_modifiers, 1, 1, 0,
doc: /* Resolve modifiers in the character CHAR.
The value is a character with modifiers resolved into the character
code. Unresolved modifiers are kept in the value.
-usage: (char-resolve-modifers CHAR) */)
+usage: (char-resolve-modifiers CHAR) */)
(character)
Lisp_Object character;
{
return make_number (char_resolve_modifier_mask (c));
}
+DEFUN ("get-byte", Fget_byte, Sget_byte, 0, 2, 0,
+ doc: /* Return a byte value of a character at point.
+Optional 1st arg POSITION, if non-nil, is a position of a character to get
+a byte value.
+Optional 2nd arg STRING, if non-nil, is a string of which first
+character is a target to get a byte value. In this case, POSITION, if
+non-nil, is an index of a target character in the string.
+
+If the current buffer (or STRING) is multibyte, and the target
+character is not ASCII nor 8-bit character, an error is signalled. */)
+ (position, string)
+ Lisp_Object position, string;
+{
+ int c;
+ EMACS_INT pos;
+ unsigned char *p;
+
+ if (NILP (string))
+ {
+ if (NILP (position))
+ {
+ p = PT_ADDR;
+ }
+ else
+ {
+ CHECK_NUMBER_COERCE_MARKER (position);
+ if (XINT (position) < BEGV || XINT (position) >= ZV)
+ args_out_of_range_3 (position, make_number (BEGV), make_number (ZV));
+ pos = XFASTINT (position);
+ p = CHAR_POS_ADDR (pos);
+ }
+ if (NILP (current_buffer->enable_multibyte_characters))
+ return make_number (*p);
+ }
+ else
+ {
+ CHECK_STRING (string);
+ if (NILP (position))
+ {
+ p = SDATA (string);
+ }
+ else
+ {
+ CHECK_NATNUM (position);
+ if (XINT (position) >= SCHARS (string))
+ args_out_of_range (string, position);
+ pos = XFASTINT (position);
+ p = SDATA (string) + string_char_to_byte (string, pos);
+ }
+ if (! STRING_MULTIBYTE (string))
+ return make_number (*p);
+ }
+ c = STRING_CHAR (p, 0);
+ if (CHAR_BYTE8_P (c))
+ c = CHAR_TO_BYTE8 (c);
+ else if (! ASCII_CHAR_P (c))
+ error ("Not an ASCII nor an 8-bit character: %d", c);
+ return make_number (c);
+}
+
+
void
init_character_once ()
{
defsubr (&Sstring);
defsubr (&Sunibyte_string);
defsubr (&Schar_resolve_modifiers);
+ defsubr (&Sget_byte);
DEFVAR_LISP ("translation-table-vector", &Vtranslation_table_vector,
doc: /*
Vchar_script_table = Fmake_char_table (Qchar_script_table, Qnil);
DEFVAR_LISP ("script-representative-chars", &Vscript_representative_chars,
- doc: /* Alist of scripts vs the representative characters. */);
+ doc: /* Alist of scripts vs the representative characters.
+Each element is a cons (SCRIPT . CHARS).
+SCRIPT is a symbol representing a script or a subgroup of a script.
+CHARS is a list or a vector of characters.
+If it is a list, all characters in the list are necessary for supporting SCRIPT.
+If it is a vector, one of the characters in the vector is necessary.
+This variable is used to find a font for a specific script. */);
Vscript_representative_chars = Qnil;
+
+ DEFVAR_LISP ("unicode-category-table", &Vunicode_category_table,
+ doc: /* Char table of Unicode's "General Category".
+All Unicode characters have one of the following values (symbol):
+ Lu, Ll, Lt, Lm, Lo, Mn, Mc, Me, Nd, Nl, No, Pc, Pd, Ps, Pe, Pi, Pf, Po,
+ Sm, Sc, Sk, So, Zs, Zl, Zp, Cc, Cf, Cs, Co, Cn
+See The Unicode Standard for the meaning of those values. */);
+ /* The correct char-table is setup in characters.el. */
+ Vunicode_category_table = Qnil;
}
#endif /* emacs */