From: Kenichi Handa Date: Tue, 20 Aug 2013 14:56:03 +0000 (+0300) Subject: src/character.c (string_char): Improve commentary. X-Git-Url: https://git.hcoop.net/bpt/emacs.git/commitdiff_plain/3f246b657225c786c460b22d774ab0b2e7488b55 src/character.c (string_char): Improve commentary. --- diff --git a/src/ChangeLog b/src/ChangeLog index e21d82bdc0..997459f01c 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,7 @@ +2013-08-20 Kenichi Handa + + * character.c (string_char): Improve commentary. + 2013-08-20 Paul Eggert * image.c (SIGNATURE_DIGESTSIZE): Remove. diff --git a/src/character.c b/src/character.c index b2caaa290a..1bde2364e3 100644 --- a/src/character.c +++ b/src/character.c @@ -174,11 +174,14 @@ string_char (const unsigned char *p, const unsigned char **advanced, int *len) if (*p < 0x80 || ! (*p & 0x20) || ! (*p & 0x10)) { + /* 1-, 2-, and 3-byte sequences can be handled by the macro. */ c = STRING_CHAR_ADVANCE (p); } else if (! (*p & 0x08)) { - c = ((((p)[0] & 0xF) << 18) + /* A 4-byte sequence of this form: + 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ + c = ((((p)[0] & 0x7) << 18) | (((p)[1] & 0x3F) << 12) | (((p)[2] & 0x3F) << 6) | ((p)[3] & 0x3F)); @@ -186,7 +189,14 @@ string_char (const unsigned char *p, const unsigned char **advanced, int *len) } else { - c = ((((p)[1] & 0x3F) << 18) + /* A 5-byte sequence of this form: + + 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx + + Note that the top 4 `x's are always 0, so shifting p[1] can + never exceed the maximum valid character codepoint. */ + c = (/* (((p)[0] & 0x3) << 24) ... always 0, so no need to shift. */ + (((p)[1] & 0x3F) << 18) | (((p)[2] & 0x3F) << 12) | (((p)[3] & 0x3F) << 6) | ((p)[4] & 0x3F));