From 24d56127bb0f07bcb477e2c73ccc3cac0c51ee73 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Ludovic=20Court=C3=A8s?= Date: Wed, 27 May 2009 16:50:40 +0200 Subject: [PATCH] Use GNU libunistring and Gnulib modules needed by R6RS bytevectors and ports. * m4/gnulib-cache.m4 (gl_MODULES): Add `byteswap', `iconv_open-utf', `libunistring', `striconveh', and `string'. --- build-aux/config.rpath | 24 +- lib/Makefile.am | 239 +++++- lib/byteswap.in.h | 44 + lib/c-ctype.c | 396 +++++++++ lib/c-ctype.h | 295 +++++++ lib/c-strcase.h | 55 ++ lib/c-strcasecmp.c | 57 ++ lib/c-strcaseeq.h | 184 +++++ lib/c-strncasecmp.c | 57 ++ lib/iconv.c | 450 +++++++++++ lib/iconv.in.h | 71 ++ lib/iconv_close.c | 47 ++ lib/iconv_open-aix.gperf | 44 + lib/iconv_open-hpux.gperf | 56 ++ lib/iconv_open-irix.gperf | 31 + lib/iconv_open-osf.gperf | 50 ++ lib/iconv_open.c | 172 ++++ lib/iconveh.h | 41 + lib/striconveh.c | 1251 +++++++++++++++++++++++++++++ lib/striconveh.h | 120 +++ lib/string.in.h | 605 ++++++++++++++ lib/unistr.h | 681 ++++++++++++++++ lib/unistr/u8-mbtouc-aux.c | 158 ++++ lib/unistr/u8-mbtouc-unsafe-aux.c | 168 ++++ lib/unistr/u8-mbtouc-unsafe.c | 179 +++++ lib/unistr/u8-mbtouc.c | 168 ++++ lib/unistr/u8-mbtoucr.c | 285 +++++++ lib/unistr/u8-prev.c | 93 +++ lib/unistr/u8-uctomb-aux.c | 69 ++ lib/unistr/u8-uctomb.c | 88 ++ lib/unitypes.h | 26 + m4/byteswap.m4 | 18 + m4/gnulib-cache.m4 | 7 +- m4/gnulib-comp.m4 | 55 ++ m4/iconv.m4 | 180 +++++ m4/iconv_h.m4 | 34 + m4/iconv_open.m4 | 237 ++++++ m4/lib-ld.m4 | 110 +++ m4/lib-link.m4 | 761 ++++++++++++++++++ m4/lib-prefix.m4 | 224 ++++++ m4/libunistring.m4 | 37 + m4/string_h.m4 | 92 +++ 42 files changed, 7947 insertions(+), 12 deletions(-) create mode 100644 lib/byteswap.in.h create mode 100644 lib/c-ctype.c create mode 100644 lib/c-ctype.h create mode 100644 lib/c-strcase.h create mode 100644 lib/c-strcasecmp.c create mode 100644 lib/c-strcaseeq.h create mode 100644 lib/c-strncasecmp.c create mode 100644 lib/iconv.c create mode 100644 lib/iconv.in.h create mode 100644 lib/iconv_close.c create mode 100644 lib/iconv_open-aix.gperf create mode 100644 lib/iconv_open-hpux.gperf create mode 100644 lib/iconv_open-irix.gperf create mode 100644 lib/iconv_open-osf.gperf create mode 100644 lib/iconv_open.c create mode 100644 lib/iconveh.h create mode 100644 lib/striconveh.c create mode 100644 lib/striconveh.h create mode 100644 lib/string.in.h create mode 100644 lib/unistr.h create mode 100644 lib/unistr/u8-mbtouc-aux.c create mode 100644 lib/unistr/u8-mbtouc-unsafe-aux.c create mode 100644 lib/unistr/u8-mbtouc-unsafe.c create mode 100644 lib/unistr/u8-mbtouc.c create mode 100644 lib/unistr/u8-mbtoucr.c create mode 100644 lib/unistr/u8-prev.c create mode 100644 lib/unistr/u8-uctomb-aux.c create mode 100644 lib/unistr/u8-uctomb.c create mode 100644 lib/unitypes.h create mode 100644 m4/byteswap.m4 create mode 100644 m4/iconv.m4 create mode 100644 m4/iconv_h.m4 create mode 100644 m4/iconv_open.m4 create mode 100644 m4/lib-ld.m4 create mode 100644 m4/lib-link.m4 create mode 100644 m4/lib-prefix.m4 create mode 100644 m4/libunistring.m4 create mode 100644 m4/string_h.m4 diff --git a/build-aux/config.rpath b/build-aux/config.rpath index 35f959b87..85c2f209b 100755 --- a/build-aux/config.rpath +++ b/build-aux/config.rpath @@ -47,7 +47,7 @@ for cc_temp in $CC""; do done cc_basename=`echo "$cc_temp" | sed -e 's%^.*/%%'` -# Code taken from libtool.m4's AC_LIBTOOL_PROG_COMPILER_PIC. +# Code taken from libtool.m4's _LT_COMPILER_PIC. wl= if test "$GCC" = yes; then @@ -64,7 +64,7 @@ else ;; esac ;; - mingw* | cygwin* | pw32* | os2*) + mingw* | cygwin* | pw32* | os2* | cegcc*) ;; hpux9* | hpux10* | hpux11*) wl='-Wl,' @@ -76,7 +76,13 @@ else ;; linux* | k*bsd*-gnu) case $cc_basename in - icc* | ecc*) + ecc*) + wl='-Wl,' + ;; + icc* | ifort*) + wl='-Wl,' + ;; + lf95*) wl='-Wl,' ;; pgcc | pgf77 | pgf90) @@ -124,7 +130,7 @@ else esac fi -# Code taken from libtool.m4's AC_LIBTOOL_PROG_LD_SHLIBS. +# Code taken from libtool.m4's _LT_LINKER_SHLIBS. hardcode_libdir_flag_spec= hardcode_libdir_separator= @@ -132,7 +138,7 @@ hardcode_direct=no hardcode_minus_L=no case "$host_os" in - cygwin* | mingw* | pw32*) + cygwin* | mingw* | pw32* | cegcc*) # FIXME: the MSVC++ port hasn't been tested in a loooong time # When not using gcc, we currently assume that we are using # Microsoft Visual C++. @@ -182,7 +188,7 @@ if test "$with_gnu_ld" = yes; then ld_shlibs=no fi ;; - cygwin* | mingw* | pw32*) + cygwin* | mingw* | pw32* | cegcc*) # hardcode_libdir_flag_spec is actually meaningless, as there is # no search path for DLLs. hardcode_libdir_flag_spec='-L$libdir' @@ -326,7 +332,7 @@ else ;; bsdi[45]*) ;; - cygwin* | mingw* | pw32*) + cygwin* | mingw* | pw32* | cegcc*) # When not using gcc, we currently assume that we are using # Microsoft Visual C++. # hardcode_libdir_flag_spec is actually meaningless, as there is @@ -494,7 +500,7 @@ else fi # Check dynamic linker characteristics -# Code taken from libtool.m4's AC_LIBTOOL_SYS_DYNAMIC_LINKER. +# Code taken from libtool.m4's _LT_SYS_DYNAMIC_LINKER. # Unlike libtool.m4, here we don't care about _all_ names of the library, but # only about the one the linker finds when passed -lNAME. This is the last # element of library_names_spec in libtool.m4, or possibly two of them if the @@ -517,7 +523,7 @@ case "$host_os" in bsdi[45]*) library_names_spec='$libname$shrext' ;; - cygwin* | mingw* | pw32*) + cygwin* | mingw* | pw32* | cegcc*) shrext=.dll library_names_spec='$libname.dll.a $libname.lib' ;; diff --git a/lib/Makefile.am b/lib/Makefile.am index f321b0b16..6f2f5c5fa 100644 --- a/lib/Makefile.am +++ b/lib/Makefile.am @@ -9,9 +9,9 @@ # the same distribution terms as the rest of that program. # # Generated by gnulib-tool. -# Reproduce by: gnulib-tool --import --dir=. --lib=libgnu --source-base=lib --m4-base=m4 --doc-base=doc --tests-base=tests --aux-dir=build-aux --lgpl --libtool --macro-prefix=gl --no-vc-files alloca-opt autobuild count-one-bits environ extensions flock fpieee full-read full-write lib-symbol-visibility putenv stdlib strcase strftime +# Reproduce by: gnulib-tool --import --dir=. --lib=libgnu --source-base=lib --m4-base=m4 --doc-base=doc --tests-base=tests --aux-dir=build-aux --lgpl --libtool --macro-prefix=gl --no-vc-files alloca-opt autobuild byteswap count-one-bits environ extensions flock fpieee full-read full-write iconv_open-utf lib-symbol-visibility libunistring putenv stdlib strcase strftime striconveh string -AUTOMAKE_OPTIONS = 1.5 gnits +AUTOMAKE_OPTIONS = 1.5 gnits subdir-objects SUBDIRS = noinst_HEADERS = @@ -54,6 +54,42 @@ EXTRA_DIST += alloca.in.h ## end gnulib module alloca-opt +## begin gnulib module byteswap + +BUILT_SOURCES += $(BYTESWAP_H) + +# We need the following in order to create when the system +# doesn't have one. +byteswap.h: byteswap.in.h + { echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */'; \ + cat $(srcdir)/byteswap.in.h; \ + } > $@-t + mv -f $@-t $@ +MOSTLYCLEANFILES += byteswap.h byteswap.h-t + +EXTRA_DIST += byteswap.in.h + +## end gnulib module byteswap + +## begin gnulib module c-ctype + +libgnu_la_SOURCES += c-ctype.h c-ctype.c + +## end gnulib module c-ctype + +## begin gnulib module c-strcase + +libgnu_la_SOURCES += c-strcase.h c-strcasecmp.c c-strncasecmp.c + +## end gnulib module c-strcase + +## begin gnulib module c-strcaseeq + + +EXTRA_DIST += c-strcaseeq.h + +## end gnulib module c-strcaseeq + ## begin gnulib module configmake # Retrieve values of the variables through 'configure' followed by @@ -143,6 +179,72 @@ libgnu_la_SOURCES += full-write.h full-write.c ## end gnulib module full-write +## begin gnulib module gperf + +GPERF = gperf + +## end gnulib module gperf + +## begin gnulib module havelib + + +EXTRA_DIST += $(top_srcdir)/build-aux/config.rpath + +## end gnulib module havelib + +## begin gnulib module iconv_open + +BUILT_SOURCES += $(ICONV_H) + +# We need the following in order to create when the system +# doesn't have one that works with the given compiler. +iconv.h: iconv.in.h + rm -f $@-t $@ + { echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */' && \ + sed -e 's|@''INCLUDE_NEXT''@|$(INCLUDE_NEXT)|g' \ + -e 's|@''PRAGMA_SYSTEM_HEADER''@|@PRAGMA_SYSTEM_HEADER@|g' \ + -e 's|@''NEXT_ICONV_H''@|$(NEXT_ICONV_H)|g' \ + -e 's|@''ICONV_CONST''@|$(ICONV_CONST)|g' \ + -e 's|@''REPLACE_ICONV''@|$(REPLACE_ICONV)|g' \ + -e 's|@''REPLACE_ICONV_OPEN''@|$(REPLACE_ICONV_OPEN)|g' \ + -e 's|@''REPLACE_ICONV_UTF''@|$(REPLACE_ICONV_UTF)|g' \ + < $(srcdir)/iconv.in.h; \ + } > $@-t + mv $@-t $@ +MOSTLYCLEANFILES += iconv.h iconv.h-t + +iconv_open-aix.h: iconv_open-aix.gperf + $(GPERF) -m 10 $(srcdir)/iconv_open-aix.gperf > $(srcdir)/iconv_open-aix.h-t + mv $(srcdir)/iconv_open-aix.h-t $(srcdir)/iconv_open-aix.h +iconv_open-hpux.h: iconv_open-hpux.gperf + $(GPERF) -m 10 $(srcdir)/iconv_open-hpux.gperf > $(srcdir)/iconv_open-hpux.h-t + mv $(srcdir)/iconv_open-hpux.h-t $(srcdir)/iconv_open-hpux.h +iconv_open-irix.h: iconv_open-irix.gperf + $(GPERF) -m 10 $(srcdir)/iconv_open-irix.gperf > $(srcdir)/iconv_open-irix.h-t + mv $(srcdir)/iconv_open-irix.h-t $(srcdir)/iconv_open-irix.h +iconv_open-osf.h: iconv_open-osf.gperf + $(GPERF) -m 10 $(srcdir)/iconv_open-osf.gperf > $(srcdir)/iconv_open-osf.h-t + mv $(srcdir)/iconv_open-osf.h-t $(srcdir)/iconv_open-osf.h +BUILT_SOURCES += iconv_open-aix.h iconv_open-hpux.h iconv_open-irix.h iconv_open-osf.h +MOSTLYCLEANFILES += iconv_open-aix.h-t iconv_open-hpux.h-t iconv_open-irix.h-t iconv_open-osf.h-t +MAINTAINERCLEANFILES += iconv_open-aix.h iconv_open-hpux.h iconv_open-irix.h iconv_open-osf.h +EXTRA_DIST += iconv_open-aix.h iconv_open-hpux.h iconv_open-irix.h iconv_open-osf.h + +EXTRA_DIST += iconv.in.h iconv_open-aix.gperf iconv_open-hpux.gperf iconv_open-irix.gperf iconv_open-osf.gperf iconv_open.c + +EXTRA_libgnu_la_SOURCES += iconv_open.c + +## end gnulib module iconv_open + +## begin gnulib module iconv_open-utf + + +EXTRA_DIST += iconv.c iconv_close.c + +EXTRA_libgnu_la_SOURCES += iconv.c iconv_close.c + +## end gnulib module iconv_open-utf + ## begin gnulib module lib-symbol-visibility # The value of $(CFLAG_VISIBILITY) needs to be added to the CFLAGS for the @@ -442,6 +544,95 @@ EXTRA_libgnu_la_SOURCES += strftime.c ## end gnulib module strftime +## begin gnulib module striconveh + +libgnu_la_SOURCES += striconveh.h striconveh.c +if GL_COND_LIBTOOL +libgnu_la_LDFLAGS += $(LTLIBICONV) +endif + +EXTRA_DIST += iconveh.h + +## end gnulib module striconveh + +## begin gnulib module string + +BUILT_SOURCES += string.h + +# We need the following in order to create when the system +# doesn't have one that works with the given compiler. +string.h: string.in.h + rm -f $@-t $@ + { echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */' && \ + sed -e 's|@''INCLUDE_NEXT''@|$(INCLUDE_NEXT)|g' \ + -e 's|@''PRAGMA_SYSTEM_HEADER''@|@PRAGMA_SYSTEM_HEADER@|g' \ + -e 's|@''NEXT_STRING_H''@|$(NEXT_STRING_H)|g' \ + -e 's|@''GNULIB_MBSLEN''@|$(GNULIB_MBSLEN)|g' \ + -e 's|@''GNULIB_MBSNLEN''@|$(GNULIB_MBSNLEN)|g' \ + -e 's|@''GNULIB_MBSCHR''@|$(GNULIB_MBSCHR)|g' \ + -e 's|@''GNULIB_MBSRCHR''@|$(GNULIB_MBSRCHR)|g' \ + -e 's|@''GNULIB_MBSSTR''@|$(GNULIB_MBSSTR)|g' \ + -e 's|@''GNULIB_MBSCASECMP''@|$(GNULIB_MBSCASECMP)|g' \ + -e 's|@''GNULIB_MBSNCASECMP''@|$(GNULIB_MBSNCASECMP)|g' \ + -e 's|@''GNULIB_MBSPCASECMP''@|$(GNULIB_MBSPCASECMP)|g' \ + -e 's|@''GNULIB_MBSCASESTR''@|$(GNULIB_MBSCASESTR)|g' \ + -e 's|@''GNULIB_MBSCSPN''@|$(GNULIB_MBSCSPN)|g' \ + -e 's|@''GNULIB_MBSPBRK''@|$(GNULIB_MBSPBRK)|g' \ + -e 's|@''GNULIB_MBSSPN''@|$(GNULIB_MBSSPN)|g' \ + -e 's|@''GNULIB_MBSSEP''@|$(GNULIB_MBSSEP)|g' \ + -e 's|@''GNULIB_MBSTOK_R''@|$(GNULIB_MBSTOK_R)|g' \ + -e 's|@''GNULIB_MEMMEM''@|$(GNULIB_MEMMEM)|g' \ + -e 's|@''GNULIB_MEMPCPY''@|$(GNULIB_MEMPCPY)|g' \ + -e 's|@''GNULIB_MEMRCHR''@|$(GNULIB_MEMRCHR)|g' \ + -e 's|@''GNULIB_RAWMEMCHR''@|$(GNULIB_RAWMEMCHR)|g' \ + -e 's|@''GNULIB_STPCPY''@|$(GNULIB_STPCPY)|g' \ + -e 's|@''GNULIB_STPNCPY''@|$(GNULIB_STPNCPY)|g' \ + -e 's|@''GNULIB_STRCHRNUL''@|$(GNULIB_STRCHRNUL)|g' \ + -e 's|@''GNULIB_STRDUP''@|$(GNULIB_STRDUP)|g' \ + -e 's|@''GNULIB_STRNDUP''@|$(GNULIB_STRNDUP)|g' \ + -e 's|@''GNULIB_STRNLEN''@|$(GNULIB_STRNLEN)|g' \ + -e 's|@''GNULIB_STRPBRK''@|$(GNULIB_STRPBRK)|g' \ + -e 's|@''GNULIB_STRSEP''@|$(GNULIB_STRSEP)|g' \ + -e 's|@''GNULIB_STRSTR''@|$(GNULIB_STRSTR)|g' \ + -e 's|@''GNULIB_STRCASESTR''@|$(GNULIB_STRCASESTR)|g' \ + -e 's|@''GNULIB_STRTOK_R''@|$(GNULIB_STRTOK_R)|g' \ + -e 's|@''GNULIB_STRERROR''@|$(GNULIB_STRERROR)|g' \ + -e 's|@''GNULIB_STRSIGNAL''@|$(GNULIB_STRSIGNAL)|g' \ + -e 's|@''GNULIB_STRVERSCMP''@|$(GNULIB_STRVERSCMP)|g' \ + -e 's|@''HAVE_DECL_MEMMEM''@|$(HAVE_DECL_MEMMEM)|g' \ + -e 's|@''HAVE_MEMPCPY''@|$(HAVE_MEMPCPY)|g' \ + -e 's|@''HAVE_DECL_MEMRCHR''@|$(HAVE_DECL_MEMRCHR)|g' \ + -e 's|@''HAVE_RAWMEMCHR''@|$(HAVE_RAWMEMCHR)|g' \ + -e 's|@''HAVE_STPCPY''@|$(HAVE_STPCPY)|g' \ + -e 's|@''HAVE_STPNCPY''@|$(HAVE_STPNCPY)|g' \ + -e 's|@''HAVE_STRCHRNUL''@|$(HAVE_STRCHRNUL)|g' \ + -e 's|@''HAVE_DECL_STRDUP''@|$(HAVE_DECL_STRDUP)|g' \ + -e 's|@''HAVE_STRNDUP''@|$(HAVE_STRNDUP)|g' \ + -e 's|@''HAVE_DECL_STRNDUP''@|$(HAVE_DECL_STRNDUP)|g' \ + -e 's|@''HAVE_DECL_STRNLEN''@|$(HAVE_DECL_STRNLEN)|g' \ + -e 's|@''HAVE_STRPBRK''@|$(HAVE_STRPBRK)|g' \ + -e 's|@''HAVE_STRSEP''@|$(HAVE_STRSEP)|g' \ + -e 's|@''HAVE_STRCASESTR''@|$(HAVE_STRCASESTR)|g' \ + -e 's|@''HAVE_DECL_STRTOK_R''@|$(HAVE_DECL_STRTOK_R)|g' \ + -e 's|@''HAVE_DECL_STRERROR''@|$(HAVE_DECL_STRERROR)|g' \ + -e 's|@''HAVE_DECL_STRSIGNAL''@|$(HAVE_DECL_STRSIGNAL)|g' \ + -e 's|@''HAVE_STRVERSCMP''@|$(HAVE_STRVERSCMP)|g' \ + -e 's|@''REPLACE_MEMMEM''@|$(REPLACE_MEMMEM)|g' \ + -e 's|@''REPLACE_STRCASESTR''@|$(REPLACE_STRCASESTR)|g' \ + -e 's|@''REPLACE_STRDUP''@|$(REPLACE_STRDUP)|g' \ + -e 's|@''REPLACE_STRSTR''@|$(REPLACE_STRSTR)|g' \ + -e 's|@''REPLACE_STRERROR''@|$(REPLACE_STRERROR)|g' \ + -e 's|@''REPLACE_STRSIGNAL''@|$(REPLACE_STRSIGNAL)|g' \ + -e '/definition of GL_LINK_WARNING/r $(LINK_WARNING_H)' \ + < $(srcdir)/string.in.h; \ + } > $@-t + mv $@-t $@ +MOSTLYCLEANFILES += string.h string.h-t + +EXTRA_DIST += string.in.h + +## end gnulib module string + ## begin gnulib module strings BUILT_SOURCES += strings.h @@ -598,6 +789,50 @@ EXTRA_DIST += unistd.in.h ## end gnulib module unistd +## begin gnulib module unistr/base + + +EXTRA_DIST += unistr.h + +## end gnulib module unistr/base + +## begin gnulib module unistr/u8-mbtouc + +libgnu_la_SOURCES += unistr/u8-mbtouc.c unistr/u8-mbtouc-aux.c + +## end gnulib module unistr/u8-mbtouc + +## begin gnulib module unistr/u8-mbtouc-unsafe + +libgnu_la_SOURCES += unistr/u8-mbtouc-unsafe.c unistr/u8-mbtouc-unsafe-aux.c + +## end gnulib module unistr/u8-mbtouc-unsafe + +## begin gnulib module unistr/u8-mbtoucr + +libgnu_la_SOURCES += unistr/u8-mbtoucr.c + +## end gnulib module unistr/u8-mbtoucr + +## begin gnulib module unistr/u8-prev + +libgnu_la_SOURCES += unistr/u8-prev.c + +## end gnulib module unistr/u8-prev + +## begin gnulib module unistr/u8-uctomb + +libgnu_la_SOURCES += unistr/u8-uctomb.c unistr/u8-uctomb-aux.c + +## end gnulib module unistr/u8-uctomb + +## begin gnulib module unitypes + + +EXTRA_DIST += unitypes.h + +## end gnulib module unitypes + ## begin gnulib module verify libgnu_la_SOURCES += verify.h diff --git a/lib/byteswap.in.h b/lib/byteswap.in.h new file mode 100644 index 000000000..f03463db6 --- /dev/null +++ b/lib/byteswap.in.h @@ -0,0 +1,44 @@ +/* byteswap.h - Byte swapping + Copyright (C) 2005, 2007 Free Software Foundation, Inc. + Written by Oskar Liljeblad , 2005. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +#ifndef _GL_BYTESWAP_H +#define _GL_BYTESWAP_H + +/* Given an unsigned 16-bit argument X, return the value corresponding to + X with reversed byte order. */ +#define bswap_16(x) ((((x) & 0x00FF) << 8) | \ + (((x) & 0xFF00) >> 8)) + +/* Given an unsigned 32-bit argument X, return the value corresponding to + X with reversed byte order. */ +#define bswap_32(x) ((((x) & 0x000000FF) << 24) | \ + (((x) & 0x0000FF00) << 8) | \ + (((x) & 0x00FF0000) >> 8) | \ + (((x) & 0xFF000000) >> 24)) + +/* Given an unsigned 64-bit argument X, return the value corresponding to + X with reversed byte order. */ +#define bswap_64(x) ((((x) & 0x00000000000000FFULL) << 56) | \ + (((x) & 0x000000000000FF00ULL) << 40) | \ + (((x) & 0x0000000000FF0000ULL) << 24) | \ + (((x) & 0x00000000FF000000ULL) << 8) | \ + (((x) & 0x000000FF00000000ULL) >> 8) | \ + (((x) & 0x0000FF0000000000ULL) >> 24) | \ + (((x) & 0x00FF000000000000ULL) >> 40) | \ + (((x) & 0xFF00000000000000ULL) >> 56)) + +#endif /* _GL_BYTESWAP_H */ diff --git a/lib/c-ctype.c b/lib/c-ctype.c new file mode 100644 index 000000000..e36a51340 --- /dev/null +++ b/lib/c-ctype.c @@ -0,0 +1,396 @@ +/* Character handling in C locale. + + Copyright 2000-2003, 2006 Free Software Foundation, Inc. + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with this program; if not, write to the Free Software Foundation, +Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +#include + +/* Specification. */ +#define NO_C_CTYPE_MACROS +#include "c-ctype.h" + +/* The function isascii is not locale dependent. Its use in EBCDIC is + questionable. */ +bool +c_isascii (int c) +{ + return (c >= 0x00 && c <= 0x7f); +} + +bool +c_isalnum (int c) +{ +#if C_CTYPE_CONSECUTIVE_DIGITS \ + && C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE +#if C_CTYPE_ASCII + return ((c >= '0' && c <= '9') + || ((c & ~0x20) >= 'A' && (c & ~0x20) <= 'Z')); +#else + return ((c >= '0' && c <= '9') + || (c >= 'A' && c <= 'Z') + || (c >= 'a' && c <= 'z')); +#endif +#else + switch (c) + { + case '0': case '1': case '2': case '3': case '4': case '5': + case '6': case '7': case '8': case '9': + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': + case 'Y': case 'Z': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': + case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': + case 's': case 't': case 'u': case 'v': case 'w': case 'x': + case 'y': case 'z': + return 1; + default: + return 0; + } +#endif +} + +bool +c_isalpha (int c) +{ +#if C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE +#if C_CTYPE_ASCII + return ((c & ~0x20) >= 'A' && (c & ~0x20) <= 'Z'); +#else + return ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')); +#endif +#else + switch (c) + { + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': + case 'Y': case 'Z': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': + case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': + case 's': case 't': case 'u': case 'v': case 'w': case 'x': + case 'y': case 'z': + return 1; + default: + return 0; + } +#endif +} + +bool +c_isblank (int c) +{ + return (c == ' ' || c == '\t'); +} + +bool +c_iscntrl (int c) +{ +#if C_CTYPE_ASCII + return ((c & ~0x1f) == 0 || c == 0x7f); +#else + switch (c) + { + case ' ': case '!': case '"': case '#': case '$': case '%': + case '&': case '\'': case '(': case ')': case '*': case '+': + case ',': case '-': case '.': case '/': + case '0': case '1': case '2': case '3': case '4': case '5': + case '6': case '7': case '8': case '9': + case ':': case ';': case '<': case '=': case '>': case '?': + case '@': + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': + case 'Y': case 'Z': + case '[': case '\\': case ']': case '^': case '_': case '`': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': + case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': + case 's': case 't': case 'u': case 'v': case 'w': case 'x': + case 'y': case 'z': + case '{': case '|': case '}': case '~': + return 0; + default: + return 1; + } +#endif +} + +bool +c_isdigit (int c) +{ +#if C_CTYPE_CONSECUTIVE_DIGITS + return (c >= '0' && c <= '9'); +#else + switch (c) + { + case '0': case '1': case '2': case '3': case '4': case '5': + case '6': case '7': case '8': case '9': + return 1; + default: + return 0; + } +#endif +} + +bool +c_islower (int c) +{ +#if C_CTYPE_CONSECUTIVE_LOWERCASE + return (c >= 'a' && c <= 'z'); +#else + switch (c) + { + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': + case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': + case 's': case 't': case 'u': case 'v': case 'w': case 'x': + case 'y': case 'z': + return 1; + default: + return 0; + } +#endif +} + +bool +c_isgraph (int c) +{ +#if C_CTYPE_ASCII + return (c >= '!' && c <= '~'); +#else + switch (c) + { + case '!': case '"': case '#': case '$': case '%': case '&': + case '\'': case '(': case ')': case '*': case '+': case ',': + case '-': case '.': case '/': + case '0': case '1': case '2': case '3': case '4': case '5': + case '6': case '7': case '8': case '9': + case ':': case ';': case '<': case '=': case '>': case '?': + case '@': + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': + case 'Y': case 'Z': + case '[': case '\\': case ']': case '^': case '_': case '`': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': + case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': + case 's': case 't': case 'u': case 'v': case 'w': case 'x': + case 'y': case 'z': + case '{': case '|': case '}': case '~': + return 1; + default: + return 0; + } +#endif +} + +bool +c_isprint (int c) +{ +#if C_CTYPE_ASCII + return (c >= ' ' && c <= '~'); +#else + switch (c) + { + case ' ': case '!': case '"': case '#': case '$': case '%': + case '&': case '\'': case '(': case ')': case '*': case '+': + case ',': case '-': case '.': case '/': + case '0': case '1': case '2': case '3': case '4': case '5': + case '6': case '7': case '8': case '9': + case ':': case ';': case '<': case '=': case '>': case '?': + case '@': + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': + case 'Y': case 'Z': + case '[': case '\\': case ']': case '^': case '_': case '`': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': + case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': + case 's': case 't': case 'u': case 'v': case 'w': case 'x': + case 'y': case 'z': + case '{': case '|': case '}': case '~': + return 1; + default: + return 0; + } +#endif +} + +bool +c_ispunct (int c) +{ +#if C_CTYPE_ASCII + return ((c >= '!' && c <= '~') + && !((c >= '0' && c <= '9') + || ((c & ~0x20) >= 'A' && (c & ~0x20) <= 'Z'))); +#else + switch (c) + { + case '!': case '"': case '#': case '$': case '%': case '&': + case '\'': case '(': case ')': case '*': case '+': case ',': + case '-': case '.': case '/': + case ':': case ';': case '<': case '=': case '>': case '?': + case '@': + case '[': case '\\': case ']': case '^': case '_': case '`': + case '{': case '|': case '}': case '~': + return 1; + default: + return 0; + } +#endif +} + +bool +c_isspace (int c) +{ + return (c == ' ' || c == '\t' + || c == '\n' || c == '\v' || c == '\f' || c == '\r'); +} + +bool +c_isupper (int c) +{ +#if C_CTYPE_CONSECUTIVE_UPPERCASE + return (c >= 'A' && c <= 'Z'); +#else + switch (c) + { + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': + case 'Y': case 'Z': + return 1; + default: + return 0; + } +#endif +} + +bool +c_isxdigit (int c) +{ +#if C_CTYPE_CONSECUTIVE_DIGITS \ + && C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE +#if C_CTYPE_ASCII + return ((c >= '0' && c <= '9') + || ((c & ~0x20) >= 'A' && (c & ~0x20) <= 'F')); +#else + return ((c >= '0' && c <= '9') + || (c >= 'A' && c <= 'F') + || (c >= 'a' && c <= 'f')); +#endif +#else + switch (c) + { + case '0': case '1': case '2': case '3': case '4': case '5': + case '6': case '7': case '8': case '9': + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + return 1; + default: + return 0; + } +#endif +} + +int +c_tolower (int c) +{ +#if C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE + return (c >= 'A' && c <= 'Z' ? c - 'A' + 'a' : c); +#else + switch (c) + { + case 'A': return 'a'; + case 'B': return 'b'; + case 'C': return 'c'; + case 'D': return 'd'; + case 'E': return 'e'; + case 'F': return 'f'; + case 'G': return 'g'; + case 'H': return 'h'; + case 'I': return 'i'; + case 'J': return 'j'; + case 'K': return 'k'; + case 'L': return 'l'; + case 'M': return 'm'; + case 'N': return 'n'; + case 'O': return 'o'; + case 'P': return 'p'; + case 'Q': return 'q'; + case 'R': return 'r'; + case 'S': return 's'; + case 'T': return 't'; + case 'U': return 'u'; + case 'V': return 'v'; + case 'W': return 'w'; + case 'X': return 'x'; + case 'Y': return 'y'; + case 'Z': return 'z'; + default: return c; + } +#endif +} + +int +c_toupper (int c) +{ +#if C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE + return (c >= 'a' && c <= 'z' ? c - 'a' + 'A' : c); +#else + switch (c) + { + case 'a': return 'A'; + case 'b': return 'B'; + case 'c': return 'C'; + case 'd': return 'D'; + case 'e': return 'E'; + case 'f': return 'F'; + case 'g': return 'G'; + case 'h': return 'H'; + case 'i': return 'I'; + case 'j': return 'J'; + case 'k': return 'K'; + case 'l': return 'L'; + case 'm': return 'M'; + case 'n': return 'N'; + case 'o': return 'O'; + case 'p': return 'P'; + case 'q': return 'Q'; + case 'r': return 'R'; + case 's': return 'S'; + case 't': return 'T'; + case 'u': return 'U'; + case 'v': return 'V'; + case 'w': return 'W'; + case 'x': return 'X'; + case 'y': return 'Y'; + case 'z': return 'Z'; + default: return c; + } +#endif +} diff --git a/lib/c-ctype.h b/lib/c-ctype.h new file mode 100644 index 000000000..d7b067e83 --- /dev/null +++ b/lib/c-ctype.h @@ -0,0 +1,295 @@ +/* Character handling in C locale. + + These functions work like the corresponding functions in , + except that they have the C (POSIX) locale hardwired, whereas the + functions' behaviour depends on the current locale set via + setlocale. + + Copyright (C) 2000-2003, 2006, 2008 Free Software Foundation, Inc. + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with this program; if not, write to the Free Software Foundation, +Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +#ifndef C_CTYPE_H +#define C_CTYPE_H + +#include + + +#ifdef __cplusplus +extern "C" { +#endif + + +/* The functions defined in this file assume the "C" locale and a character + set without diacritics (ASCII-US or EBCDIC-US or something like that). + Even if the "C" locale on a particular system is an extension of the ASCII + character set (like on BeOS, where it is UTF-8, or on AmigaOS, where it + is ISO-8859-1), the functions in this file recognize only the ASCII + characters. */ + + +/* Check whether the ASCII optimizations apply. */ + +/* ANSI C89 (and ISO C99 5.2.1.3 too) already guarantees that + '0', '1', ..., '9' have consecutive integer values. */ +#define C_CTYPE_CONSECUTIVE_DIGITS 1 + +#if ('A' <= 'Z') \ + && ('A' + 1 == 'B') && ('B' + 1 == 'C') && ('C' + 1 == 'D') \ + && ('D' + 1 == 'E') && ('E' + 1 == 'F') && ('F' + 1 == 'G') \ + && ('G' + 1 == 'H') && ('H' + 1 == 'I') && ('I' + 1 == 'J') \ + && ('J' + 1 == 'K') && ('K' + 1 == 'L') && ('L' + 1 == 'M') \ + && ('M' + 1 == 'N') && ('N' + 1 == 'O') && ('O' + 1 == 'P') \ + && ('P' + 1 == 'Q') && ('Q' + 1 == 'R') && ('R' + 1 == 'S') \ + && ('S' + 1 == 'T') && ('T' + 1 == 'U') && ('U' + 1 == 'V') \ + && ('V' + 1 == 'W') && ('W' + 1 == 'X') && ('X' + 1 == 'Y') \ + && ('Y' + 1 == 'Z') +#define C_CTYPE_CONSECUTIVE_UPPERCASE 1 +#endif + +#if ('a' <= 'z') \ + && ('a' + 1 == 'b') && ('b' + 1 == 'c') && ('c' + 1 == 'd') \ + && ('d' + 1 == 'e') && ('e' + 1 == 'f') && ('f' + 1 == 'g') \ + && ('g' + 1 == 'h') && ('h' + 1 == 'i') && ('i' + 1 == 'j') \ + && ('j' + 1 == 'k') && ('k' + 1 == 'l') && ('l' + 1 == 'm') \ + && ('m' + 1 == 'n') && ('n' + 1 == 'o') && ('o' + 1 == 'p') \ + && ('p' + 1 == 'q') && ('q' + 1 == 'r') && ('r' + 1 == 's') \ + && ('s' + 1 == 't') && ('t' + 1 == 'u') && ('u' + 1 == 'v') \ + && ('v' + 1 == 'w') && ('w' + 1 == 'x') && ('x' + 1 == 'y') \ + && ('y' + 1 == 'z') +#define C_CTYPE_CONSECUTIVE_LOWERCASE 1 +#endif + +#if (' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \ + && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \ + && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \ + && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \ + && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \ + && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \ + && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \ + && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \ + && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \ + && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \ + && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \ + && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \ + && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \ + && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \ + && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \ + && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \ + && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \ + && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \ + && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \ + && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \ + && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \ + && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \ + && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126) +/* The character set is ASCII or one of its variants or extensions, not EBCDIC. + Testing the value of '\n' and '\r' is not relevant. */ +#define C_CTYPE_ASCII 1 +#endif + + +/* Function declarations. */ + +/* Unlike the functions in , which require an argument in the range + of the 'unsigned char' type, the functions here operate on values that are + in the 'unsigned char' range or in the 'char' range. In other words, + when you have a 'char' value, you need to cast it before using it as + argument to a function: + + const char *s = ...; + if (isalpha ((unsigned char) *s)) ... + + but you don't need to cast it for the functions defined in this file: + + const char *s = ...; + if (c_isalpha (*s)) ... + */ + +extern bool c_isascii (int c); /* not locale dependent */ + +extern bool c_isalnum (int c); +extern bool c_isalpha (int c); +extern bool c_isblank (int c); +extern bool c_iscntrl (int c); +extern bool c_isdigit (int c); +extern bool c_islower (int c); +extern bool c_isgraph (int c); +extern bool c_isprint (int c); +extern bool c_ispunct (int c); +extern bool c_isspace (int c); +extern bool c_isupper (int c); +extern bool c_isxdigit (int c); + +extern int c_tolower (int c); +extern int c_toupper (int c); + + +#if defined __GNUC__ && defined __OPTIMIZE__ && !defined __OPTIMIZE_SIZE__ && !defined NO_C_CTYPE_MACROS + +/* ASCII optimizations. */ + +#undef c_isascii +#define c_isascii(c) \ + ({ int __c = (c); \ + (__c >= 0x00 && __c <= 0x7f); \ + }) + +#if C_CTYPE_CONSECUTIVE_DIGITS \ + && C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE +#if C_CTYPE_ASCII +#undef c_isalnum +#define c_isalnum(c) \ + ({ int __c = (c); \ + ((__c >= '0' && __c <= '9') \ + || ((__c & ~0x20) >= 'A' && (__c & ~0x20) <= 'Z')); \ + }) +#else +#undef c_isalnum +#define c_isalnum(c) \ + ({ int __c = (c); \ + ((__c >= '0' && __c <= '9') \ + || (__c >= 'A' && __c <= 'Z') \ + || (__c >= 'a' && __c <= 'z')); \ + }) +#endif +#endif + +#if C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE +#if C_CTYPE_ASCII +#undef c_isalpha +#define c_isalpha(c) \ + ({ int __c = (c); \ + ((__c & ~0x20) >= 'A' && (__c & ~0x20) <= 'Z'); \ + }) +#else +#undef c_isalpha +#define c_isalpha(c) \ + ({ int __c = (c); \ + ((__c >= 'A' && __c <= 'Z') || (__c >= 'a' && __c <= 'z')); \ + }) +#endif +#endif + +#undef c_isblank +#define c_isblank(c) \ + ({ int __c = (c); \ + (__c == ' ' || __c == '\t'); \ + }) + +#if C_CTYPE_ASCII +#undef c_iscntrl +#define c_iscntrl(c) \ + ({ int __c = (c); \ + ((__c & ~0x1f) == 0 || __c == 0x7f); \ + }) +#endif + +#if C_CTYPE_CONSECUTIVE_DIGITS +#undef c_isdigit +#define c_isdigit(c) \ + ({ int __c = (c); \ + (__c >= '0' && __c <= '9'); \ + }) +#endif + +#if C_CTYPE_CONSECUTIVE_LOWERCASE +#undef c_islower +#define c_islower(c) \ + ({ int __c = (c); \ + (__c >= 'a' && __c <= 'z'); \ + }) +#endif + +#if C_CTYPE_ASCII +#undef c_isgraph +#define c_isgraph(c) \ + ({ int __c = (c); \ + (__c >= '!' && __c <= '~'); \ + }) +#endif + +#if C_CTYPE_ASCII +#undef c_isprint +#define c_isprint(c) \ + ({ int __c = (c); \ + (__c >= ' ' && __c <= '~'); \ + }) +#endif + +#if C_CTYPE_ASCII +#undef c_ispunct +#define c_ispunct(c) \ + ({ int _c = (c); \ + (c_isgraph (_c) && ! c_isalnum (_c)); \ + }) +#endif + +#undef c_isspace +#define c_isspace(c) \ + ({ int __c = (c); \ + (__c == ' ' || __c == '\t' \ + || __c == '\n' || __c == '\v' || __c == '\f' || __c == '\r'); \ + }) + +#if C_CTYPE_CONSECUTIVE_UPPERCASE +#undef c_isupper +#define c_isupper(c) \ + ({ int __c = (c); \ + (__c >= 'A' && __c <= 'Z'); \ + }) +#endif + +#if C_CTYPE_CONSECUTIVE_DIGITS \ + && C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE +#if C_CTYPE_ASCII +#undef c_isxdigit +#define c_isxdigit(c) \ + ({ int __c = (c); \ + ((__c >= '0' && __c <= '9') \ + || ((__c & ~0x20) >= 'A' && (__c & ~0x20) <= 'F')); \ + }) +#else +#undef c_isxdigit +#define c_isxdigit(c) \ + ({ int __c = (c); \ + ((__c >= '0' && __c <= '9') \ + || (__c >= 'A' && __c <= 'F') \ + || (__c >= 'a' && __c <= 'f')); \ + }) +#endif +#endif + +#if C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE +#undef c_tolower +#define c_tolower(c) \ + ({ int __c = (c); \ + (__c >= 'A' && __c <= 'Z' ? __c - 'A' + 'a' : __c); \ + }) +#undef c_toupper +#define c_toupper(c) \ + ({ int __c = (c); \ + (__c >= 'a' && __c <= 'z' ? __c - 'a' + 'A' : __c); \ + }) +#endif + +#endif /* optimizing for speed */ + + +#ifdef __cplusplus +} +#endif + +#endif /* C_CTYPE_H */ diff --git a/lib/c-strcase.h b/lib/c-strcase.h new file mode 100644 index 000000000..714a3c623 --- /dev/null +++ b/lib/c-strcase.h @@ -0,0 +1,55 @@ +/* Case-insensitive string comparison functions in C locale. + Copyright (C) 1995-1996, 2001, 2003, 2005 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +#ifndef C_STRCASE_H +#define C_STRCASE_H + +#include + + +/* The functions defined in this file assume the "C" locale and a character + set without diacritics (ASCII-US or EBCDIC-US or something like that). + Even if the "C" locale on a particular system is an extension of the ASCII + character set (like on BeOS, where it is UTF-8, or on AmigaOS, where it + is ISO-8859-1), the functions in this file recognize only the ASCII + characters. More precisely, one of the string arguments must be an ASCII + string; the other one can also contain non-ASCII characters (but then + the comparison result will be nonzero). */ + + +#ifdef __cplusplus +extern "C" { +#endif + + +/* Compare strings S1 and S2, ignoring case, returning less than, equal to or + greater than zero if S1 is lexicographically less than, equal to or greater + than S2. */ +extern int c_strcasecmp (const char *s1, const char *s2); + +/* Compare no more than N characters of strings S1 and S2, ignoring case, + returning less than, equal to or greater than zero if S1 is + lexicographically less than, equal to or greater than S2. */ +extern int c_strncasecmp (const char *s1, const char *s2, size_t n); + + +#ifdef __cplusplus +} +#endif + + +#endif /* C_STRCASE_H */ diff --git a/lib/c-strcasecmp.c b/lib/c-strcasecmp.c new file mode 100644 index 000000000..a52389883 --- /dev/null +++ b/lib/c-strcasecmp.c @@ -0,0 +1,57 @@ +/* c-strcasecmp.c -- case insensitive string comparator in C locale + Copyright (C) 1998-1999, 2005-2006 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +#include + +/* Specification. */ +#include "c-strcase.h" + +#include + +#include "c-ctype.h" + +int +c_strcasecmp (const char *s1, const char *s2) +{ + register const unsigned char *p1 = (const unsigned char *) s1; + register const unsigned char *p2 = (const unsigned char *) s2; + unsigned char c1, c2; + + if (p1 == p2) + return 0; + + do + { + c1 = c_tolower (*p1); + c2 = c_tolower (*p2); + + if (c1 == '\0') + break; + + ++p1; + ++p2; + } + while (c1 == c2); + + if (UCHAR_MAX <= INT_MAX) + return c1 - c2; + else + /* On machines where 'char' and 'int' are types of the same size, the + difference of two 'unsigned char' values - including the sign bit - + doesn't fit in an 'int'. */ + return (c1 > c2 ? 1 : c1 < c2 ? -1 : 0); +} diff --git a/lib/c-strcaseeq.h b/lib/c-strcaseeq.h new file mode 100644 index 000000000..cd29b66c7 --- /dev/null +++ b/lib/c-strcaseeq.h @@ -0,0 +1,184 @@ +/* Optimized case-insensitive string comparison in C locale. + Copyright (C) 2001-2002, 2007 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible . */ + +#include "c-strcase.h" +#include "c-ctype.h" + +/* STRCASEEQ allows to optimize string comparison with a small literal string. + STRCASEEQ (s, "UTF-8", 'U','T','F','-','8',0,0,0,0) + is semantically equivalent to + c_strcasecmp (s, "UTF-8") == 0 + just faster. */ + +/* Help GCC to generate good code for string comparisons with + immediate strings. */ +#if defined (__GNUC__) && defined (__OPTIMIZE__) + +/* Case insensitive comparison of ASCII characters. */ +# if C_CTYPE_ASCII +# define CASEEQ(other,upper) \ + (c_isupper (upper) ? ((other) & ~0x20) == (upper) : (other) == (upper)) +# elif C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE +# define CASEEQ(other,upper) \ + (c_isupper (upper) ? (other) == (upper) || (other) == (upper) - 'A' + 'a' : (other) == (upper)) +# else +# define CASEEQ(other,upper) \ + (c_toupper (other) == (upper)) +# endif + +static inline int +strcaseeq9 (const char *s1, const char *s2) +{ + return c_strcasecmp (s1 + 9, s2 + 9) == 0; +} + +static inline int +strcaseeq8 (const char *s1, const char *s2, char s28) +{ + if (CASEEQ (s1[8], s28)) + { + if (s28 == 0) + return 1; + else + return strcaseeq9 (s1, s2); + } + else + return 0; +} + +static inline int +strcaseeq7 (const char *s1, const char *s2, char s27, char s28) +{ + if (CASEEQ (s1[7], s27)) + { + if (s27 == 0) + return 1; + else + return strcaseeq8 (s1, s2, s28); + } + else + return 0; +} + +static inline int +strcaseeq6 (const char *s1, const char *s2, char s26, char s27, char s28) +{ + if (CASEEQ (s1[6], s26)) + { + if (s26 == 0) + return 1; + else + return strcaseeq7 (s1, s2, s27, s28); + } + else + return 0; +} + +static inline int +strcaseeq5 (const char *s1, const char *s2, char s25, char s26, char s27, char s28) +{ + if (CASEEQ (s1[5], s25)) + { + if (s25 == 0) + return 1; + else + return strcaseeq6 (s1, s2, s26, s27, s28); + } + else + return 0; +} + +static inline int +strcaseeq4 (const char *s1, const char *s2, char s24, char s25, char s26, char s27, char s28) +{ + if (CASEEQ (s1[4], s24)) + { + if (s24 == 0) + return 1; + else + return strcaseeq5 (s1, s2, s25, s26, s27, s28); + } + else + return 0; +} + +static inline int +strcaseeq3 (const char *s1, const char *s2, char s23, char s24, char s25, char s26, char s27, char s28) +{ + if (CASEEQ (s1[3], s23)) + { + if (s23 == 0) + return 1; + else + return strcaseeq4 (s1, s2, s24, s25, s26, s27, s28); + } + else + return 0; +} + +static inline int +strcaseeq2 (const char *s1, const char *s2, char s22, char s23, char s24, char s25, char s26, char s27, char s28) +{ + if (CASEEQ (s1[2], s22)) + { + if (s22 == 0) + return 1; + else + return strcaseeq3 (s1, s2, s23, s24, s25, s26, s27, s28); + } + else + return 0; +} + +static inline int +strcaseeq1 (const char *s1, const char *s2, char s21, char s22, char s23, char s24, char s25, char s26, char s27, char s28) +{ + if (CASEEQ (s1[1], s21)) + { + if (s21 == 0) + return 1; + else + return strcaseeq2 (s1, s2, s22, s23, s24, s25, s26, s27, s28); + } + else + return 0; +} + +static inline int +strcaseeq0 (const char *s1, const char *s2, char s20, char s21, char s22, char s23, char s24, char s25, char s26, char s27, char s28) +{ + if (CASEEQ (s1[0], s20)) + { + if (s20 == 0) + return 1; + else + return strcaseeq1 (s1, s2, s21, s22, s23, s24, s25, s26, s27, s28); + } + else + return 0; +} + +#define STRCASEEQ(s1,s2,s20,s21,s22,s23,s24,s25,s26,s27,s28) \ + strcaseeq0 (s1, s2, s20, s21, s22, s23, s24, s25, s26, s27, s28) + +#else + +#define STRCASEEQ(s1,s2,s20,s21,s22,s23,s24,s25,s26,s27,s28) \ + (c_strcasecmp (s1, s2) == 0) + +#endif diff --git a/lib/c-strncasecmp.c b/lib/c-strncasecmp.c new file mode 100644 index 000000000..c1496ca41 --- /dev/null +++ b/lib/c-strncasecmp.c @@ -0,0 +1,57 @@ +/* c-strncasecmp.c -- case insensitive string comparator in C locale + Copyright (C) 1998-1999, 2005-2006 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +#include + +/* Specification. */ +#include "c-strcase.h" + +#include + +#include "c-ctype.h" + +int +c_strncasecmp (const char *s1, const char *s2, size_t n) +{ + register const unsigned char *p1 = (const unsigned char *) s1; + register const unsigned char *p2 = (const unsigned char *) s2; + unsigned char c1, c2; + + if (p1 == p2 || n == 0) + return 0; + + do + { + c1 = c_tolower (*p1); + c2 = c_tolower (*p2); + + if (--n == 0 || c1 == '\0') + break; + + ++p1; + ++p2; + } + while (c1 == c2); + + if (UCHAR_MAX <= INT_MAX) + return c1 - c2; + else + /* On machines where 'char' and 'int' are types of the same size, the + difference of two 'unsigned char' values - including the sign bit - + doesn't fit in an 'int'. */ + return (c1 > c2 ? 1 : c1 < c2 ? -1 : 0); +} diff --git a/lib/iconv.c b/lib/iconv.c new file mode 100644 index 000000000..56a84c456 --- /dev/null +++ b/lib/iconv.c @@ -0,0 +1,450 @@ +/* Character set conversion. + Copyright (C) 1999-2001, 2007 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License along + with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +#include + +/* Specification. */ +#include + +#include + +#if REPLACE_ICONV_UTF +# include +# include +# include +# include "unistr.h" +# ifndef uintptr_t +# define uintptr_t unsigned long +# endif +#endif + +#if REPLACE_ICONV_UTF + +/* UTF-{16,32}{BE,LE} converters taken from GNU libiconv 1.11. */ + +/* Return code if invalid. (xxx_mbtowc) */ +# define RET_ILSEQ -1 +/* Return code if no bytes were read. (xxx_mbtowc) */ +# define RET_TOOFEW -2 + +/* Return code if invalid. (xxx_wctomb) */ +# define RET_ILUNI -1 +/* Return code if output buffer is too small. (xxx_wctomb, xxx_reset) */ +# define RET_TOOSMALL -2 + +/* + * UTF-16BE + */ + +/* Specification: RFC 2781 */ + +static int +utf16be_mbtowc (ucs4_t *pwc, const unsigned char *s, size_t n) +{ + if (n >= 2) + { + ucs4_t wc = (s[0] << 8) + s[1]; + if (wc >= 0xd800 && wc < 0xdc00) + { + if (n >= 4) + { + ucs4_t wc2 = (s[2] << 8) + s[3]; + if (!(wc2 >= 0xdc00 && wc2 < 0xe000)) + return RET_ILSEQ; + *pwc = 0x10000 + ((wc - 0xd800) << 10) + (wc2 - 0xdc00); + return 4; + } + } + else if (wc >= 0xdc00 && wc < 0xe000) + { + return RET_ILSEQ; + } + else + { + *pwc = wc; + return 2; + } + } + return RET_TOOFEW; +} + +static int +utf16be_wctomb (unsigned char *r, ucs4_t wc, size_t n) +{ + if (!(wc >= 0xd800 && wc < 0xe000)) + { + if (wc < 0x10000) + { + if (n >= 2) + { + r[0] = (unsigned char) (wc >> 8); + r[1] = (unsigned char) wc; + return 2; + } + else + return RET_TOOSMALL; + } + else if (wc < 0x110000) + { + if (n >= 4) + { + ucs4_t wc1 = 0xd800 + ((wc - 0x10000) >> 10); + ucs4_t wc2 = 0xdc00 + ((wc - 0x10000) & 0x3ff); + r[0] = (unsigned char) (wc1 >> 8); + r[1] = (unsigned char) wc1; + r[2] = (unsigned char) (wc2 >> 8); + r[3] = (unsigned char) wc2; + return 4; + } + else + return RET_TOOSMALL; + } + } + return RET_ILUNI; +} + +/* + * UTF-16LE + */ + +/* Specification: RFC 2781 */ + +static int +utf16le_mbtowc (ucs4_t *pwc, const unsigned char *s, size_t n) +{ + if (n >= 2) + { + ucs4_t wc = s[0] + (s[1] << 8); + if (wc >= 0xd800 && wc < 0xdc00) + { + if (n >= 4) + { + ucs4_t wc2 = s[2] + (s[3] << 8); + if (!(wc2 >= 0xdc00 && wc2 < 0xe000)) + return RET_ILSEQ; + *pwc = 0x10000 + ((wc - 0xd800) << 10) + (wc2 - 0xdc00); + return 4; + } + } + else if (wc >= 0xdc00 && wc < 0xe000) + { + return RET_ILSEQ; + } + else + { + *pwc = wc; + return 2; + } + } + return RET_TOOFEW; +} + +static int +utf16le_wctomb (unsigned char *r, ucs4_t wc, size_t n) +{ + if (!(wc >= 0xd800 && wc < 0xe000)) + { + if (wc < 0x10000) + { + if (n >= 2) + { + r[0] = (unsigned char) wc; + r[1] = (unsigned char) (wc >> 8); + return 2; + } + else + return RET_TOOSMALL; + } + else if (wc < 0x110000) + { + if (n >= 4) + { + ucs4_t wc1 = 0xd800 + ((wc - 0x10000) >> 10); + ucs4_t wc2 = 0xdc00 + ((wc - 0x10000) & 0x3ff); + r[0] = (unsigned char) wc1; + r[1] = (unsigned char) (wc1 >> 8); + r[2] = (unsigned char) wc2; + r[3] = (unsigned char) (wc2 >> 8); + return 4; + } + else + return RET_TOOSMALL; + } + } + return RET_ILUNI; +} + +/* + * UTF-32BE + */ + +/* Specification: Unicode 3.1 Standard Annex #19 */ + +static int +utf32be_mbtowc (ucs4_t *pwc, const unsigned char *s, size_t n) +{ + if (n >= 4) + { + ucs4_t wc = (s[0] << 24) + (s[1] << 16) + (s[2] << 8) + s[3]; + if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000)) + { + *pwc = wc; + return 4; + } + else + return RET_ILSEQ; + } + return RET_TOOFEW; +} + +static int +utf32be_wctomb (unsigned char *r, ucs4_t wc, size_t n) +{ + if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000)) + { + if (n >= 4) + { + r[0] = 0; + r[1] = (unsigned char) (wc >> 16); + r[2] = (unsigned char) (wc >> 8); + r[3] = (unsigned char) wc; + return 4; + } + else + return RET_TOOSMALL; + } + return RET_ILUNI; +} + +/* + * UTF-32LE + */ + +/* Specification: Unicode 3.1 Standard Annex #19 */ + +static int +utf32le_mbtowc (ucs4_t *pwc, const unsigned char *s, size_t n) +{ + if (n >= 4) + { + ucs4_t wc = s[0] + (s[1] << 8) + (s[2] << 16) + (s[3] << 24); + if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000)) + { + *pwc = wc; + return 4; + } + else + return RET_ILSEQ; + } + return RET_TOOFEW; +} + +static int +utf32le_wctomb (unsigned char *r, ucs4_t wc, size_t n) +{ + if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000)) + { + if (n >= 4) + { + r[0] = (unsigned char) wc; + r[1] = (unsigned char) (wc >> 8); + r[2] = (unsigned char) (wc >> 16); + r[3] = 0; + return 4; + } + else + return RET_TOOSMALL; + } + return RET_ILUNI; +} + +#endif + +size_t +rpl_iconv (iconv_t cd, + ICONV_CONST char **inbuf, size_t *inbytesleft, + char **outbuf, size_t *outbytesleft) +#undef iconv +{ +#if REPLACE_ICONV_UTF + switch ((uintptr_t) cd) + { + { + int (*xxx_wctomb) (unsigned char *, ucs4_t, size_t); + + case (uintptr_t) _ICONV_UTF8_UTF16BE: + xxx_wctomb = utf16be_wctomb; + goto loop_from_utf8; + case (uintptr_t) _ICONV_UTF8_UTF16LE: + xxx_wctomb = utf16le_wctomb; + goto loop_from_utf8; + case (uintptr_t) _ICONV_UTF8_UTF32BE: + xxx_wctomb = utf32be_wctomb; + goto loop_from_utf8; + case (uintptr_t) _ICONV_UTF8_UTF32LE: + xxx_wctomb = utf32le_wctomb; + goto loop_from_utf8; + + loop_from_utf8: + if (inbuf == NULL || *inbuf == NULL) + return 0; + { + ICONV_CONST char *inptr = *inbuf; + size_t inleft = *inbytesleft; + char *outptr = *outbuf; + size_t outleft = *outbytesleft; + size_t res = 0; + while (inleft > 0) + { + ucs4_t uc; + int m = u8_mbtoucr (&uc, (const uint8_t *) inptr, inleft); + if (m <= 0) + { + if (m == -1) + { + errno = EILSEQ; + res = (size_t)(-1); + break; + } + if (m == -2) + { + errno = EINVAL; + res = (size_t)(-1); + break; + } + abort (); + } + else + { + int n = xxx_wctomb ((uint8_t *) outptr, uc, outleft); + if (n < 0) + { + if (n == RET_ILUNI) + { + errno = EILSEQ; + res = (size_t)(-1); + break; + } + if (n == RET_TOOSMALL) + { + errno = E2BIG; + res = (size_t)(-1); + break; + } + abort (); + } + else + { + inptr += m; + inleft -= m; + outptr += n; + outleft -= n; + } + } + } + *inbuf = inptr; + *inbytesleft = inleft; + *outbuf = outptr; + *outbytesleft = outleft; + return res; + } + } + + { + int (*xxx_mbtowc) (ucs4_t *, const unsigned char *, size_t); + + case (uintptr_t) _ICONV_UTF16BE_UTF8: + xxx_mbtowc = utf16be_mbtowc; + goto loop_to_utf8; + case (uintptr_t) _ICONV_UTF16LE_UTF8: + xxx_mbtowc = utf16le_mbtowc; + goto loop_to_utf8; + case (uintptr_t) _ICONV_UTF32BE_UTF8: + xxx_mbtowc = utf32be_mbtowc; + goto loop_to_utf8; + case (uintptr_t) _ICONV_UTF32LE_UTF8: + xxx_mbtowc = utf32le_mbtowc; + goto loop_to_utf8; + + loop_to_utf8: + if (inbuf == NULL || *inbuf == NULL) + return 0; + { + ICONV_CONST char *inptr = *inbuf; + size_t inleft = *inbytesleft; + char *outptr = *outbuf; + size_t outleft = *outbytesleft; + size_t res = 0; + while (inleft > 0) + { + ucs4_t uc; + int m = xxx_mbtowc (&uc, (const uint8_t *) inptr, inleft); + if (m <= 0) + { + if (m == RET_ILSEQ) + { + errno = EILSEQ; + res = (size_t)(-1); + break; + } + if (m == RET_TOOFEW) + { + errno = EINVAL; + res = (size_t)(-1); + break; + } + abort (); + } + else + { + int n = u8_uctomb ((uint8_t *) outptr, uc, outleft); + if (n < 0) + { + if (n == -1) + { + errno = EILSEQ; + res = (size_t)(-1); + break; + } + if (n == -2) + { + errno = E2BIG; + res = (size_t)(-1); + break; + } + abort (); + } + else + { + inptr += m; + inleft -= m; + outptr += n; + outleft -= n; + } + } + } + *inbuf = inptr; + *inbytesleft = inleft; + *outbuf = outptr; + *outbytesleft = outleft; + return res; + } + } + } +#endif + return iconv (cd, inbuf, inbytesleft, outbuf, outbytesleft); +} diff --git a/lib/iconv.in.h b/lib/iconv.in.h new file mode 100644 index 000000000..915dce2e7 --- /dev/null +++ b/lib/iconv.in.h @@ -0,0 +1,71 @@ +/* A GNU-like . + + Copyright (C) 2007-2008 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +#ifndef _GL_ICONV_H + +#if __GNUC__ >= 3 +@PRAGMA_SYSTEM_HEADER@ +#endif + +/* The include_next requires a split double-inclusion guard. */ +#@INCLUDE_NEXT@ @NEXT_ICONV_H@ + +#ifndef _GL_ICONV_H +#define _GL_ICONV_H + +#ifdef __cplusplus +extern "C" { +#endif + + +#if @REPLACE_ICONV_OPEN@ +/* An iconv_open wrapper that supports the IANA standardized encoding names + ("ISO-8859-1" etc.) as far as possible. */ +# define iconv_open rpl_iconv_open +extern iconv_t iconv_open (const char *tocode, const char *fromcode); +#endif + +#if @REPLACE_ICONV_UTF@ +/* Special constants for supporting UTF-{16,32}{BE,LE} encodings. + Not public. */ +# define _ICONV_UTF8_UTF16BE (iconv_t)(-161) +# define _ICONV_UTF8_UTF16LE (iconv_t)(-162) +# define _ICONV_UTF8_UTF32BE (iconv_t)(-163) +# define _ICONV_UTF8_UTF32LE (iconv_t)(-164) +# define _ICONV_UTF16BE_UTF8 (iconv_t)(-165) +# define _ICONV_UTF16LE_UTF8 (iconv_t)(-166) +# define _ICONV_UTF32BE_UTF8 (iconv_t)(-167) +# define _ICONV_UTF32LE_UTF8 (iconv_t)(-168) +#endif + +#if @REPLACE_ICONV@ +# define iconv rpl_iconv +extern size_t iconv (iconv_t cd, + @ICONV_CONST@ char **inbuf, size_t *inbytesleft, + char **outbuf, size_t *outbytesleft); +# define iconv_close rpl_iconv_close +extern int iconv_close (iconv_t cd); +#endif + + +#ifdef __cplusplus +} +#endif + +#endif /* _GL_ICONV_H */ +#endif /* _GL_ICONV_H */ diff --git a/lib/iconv_close.c b/lib/iconv_close.c new file mode 100644 index 000000000..3680412a0 --- /dev/null +++ b/lib/iconv_close.c @@ -0,0 +1,47 @@ +/* Character set conversion. + Copyright (C) 2007 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License along + with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +#include + +/* Specification. */ +#include + +#include +#ifndef uintptr_t +# define uintptr_t unsigned long +#endif + +int +rpl_iconv_close (iconv_t cd) +#undef iconv_close +{ +#if REPLACE_ICONV_UTF + switch ((uintptr_t) cd) + { + case (uintptr_t) _ICONV_UTF8_UTF16BE: + case (uintptr_t) _ICONV_UTF8_UTF16LE: + case (uintptr_t) _ICONV_UTF8_UTF32BE: + case (uintptr_t) _ICONV_UTF8_UTF32LE: + case (uintptr_t) _ICONV_UTF16BE_UTF8: + case (uintptr_t) _ICONV_UTF16LE_UTF8: + case (uintptr_t) _ICONV_UTF32BE_UTF8: + case (uintptr_t) _ICONV_UTF32LE_UTF8: + return 0; + } +#endif + return iconv_close (cd); +} diff --git a/lib/iconv_open-aix.gperf b/lib/iconv_open-aix.gperf new file mode 100644 index 000000000..6782b9956 --- /dev/null +++ b/lib/iconv_open-aix.gperf @@ -0,0 +1,44 @@ +struct mapping { int standard_name; const char vendor_name[10 + 1]; }; +%struct-type +%language=ANSI-C +%define slot-name standard_name +%define hash-function-name mapping_hash +%define lookup-function-name mapping_lookup +%readonly-tables +%global-table +%define word-array-name mappings +%pic +%% +# On AIX 5.1, look in /usr/lib/nls/loc/uconvTable. +ISO-8859-1, "ISO8859-1" +ISO-8859-2, "ISO8859-2" +ISO-8859-3, "ISO8859-3" +ISO-8859-4, "ISO8859-4" +ISO-8859-5, "ISO8859-5" +ISO-8859-6, "ISO8859-6" +ISO-8859-7, "ISO8859-7" +ISO-8859-8, "ISO8859-8" +ISO-8859-9, "ISO8859-9" +ISO-8859-15, "ISO8859-15" +CP437, "IBM-437" +CP850, "IBM-850" +CP852, "IBM-852" +CP856, "IBM-856" +CP857, "IBM-857" +CP861, "IBM-861" +CP865, "IBM-865" +CP869, "IBM-869" +ISO-8859-13, "IBM-921" +CP922, "IBM-922" +CP932, "IBM-932" +CP943, "IBM-943" +CP1046, "IBM-1046" +CP1124, "IBM-1124" +CP1125, "IBM-1125" +CP1129, "IBM-1129" +CP1252, "IBM-1252" +GB2312, "IBM-eucCN" +EUC-JP, "IBM-eucJP" +EUC-KR, "IBM-eucKR" +EUC-TW, "IBM-eucTW" +BIG5, "big5" diff --git a/lib/iconv_open-hpux.gperf b/lib/iconv_open-hpux.gperf new file mode 100644 index 000000000..5a35c83e1 --- /dev/null +++ b/lib/iconv_open-hpux.gperf @@ -0,0 +1,56 @@ +struct mapping { int standard_name; const char vendor_name[9 + 1]; }; +%struct-type +%language=ANSI-C +%define slot-name standard_name +%define hash-function-name mapping_hash +%define lookup-function-name mapping_lookup +%readonly-tables +%global-table +%define word-array-name mappings +%pic +%% +# On HP-UX 11.11, look in /usr/lib/nls/iconv. +ISO-8859-1, "iso88591" +ISO-8859-2, "iso88592" +ISO-8859-5, "iso88595" +ISO-8859-6, "iso88596" +ISO-8859-7, "iso88597" +ISO-8859-8, "iso88598" +ISO-8859-9, "iso88599" +ISO-8859-15, "iso885915" +CP437, "cp437" +CP775, "cp775" +CP850, "cp850" +CP852, "cp852" +CP855, "cp855" +CP857, "cp857" +CP861, "cp861" +CP862, "cp862" +CP864, "cp864" +CP865, "cp865" +CP866, "cp866" +CP869, "cp869" +CP874, "cp874" +CP1250, "cp1250" +CP1251, "cp1251" +CP1252, "cp1252" +CP1253, "cp1253" +CP1254, "cp1254" +CP1255, "cp1255" +CP1256, "cp1256" +CP1257, "cp1257" +CP1258, "cp1258" +HP-ROMAN8, "roman8" +HP-ARABIC8, "arabic8" +HP-GREEK8, "greek8" +HP-HEBREW8, "hebrew8" +HP-TURKISH8, "turkish8" +HP-KANA8, "kana8" +TIS-620, "tis620" +GB2312, "hp15CN" +EUC-JP, "eucJP" +EUC-KR, "eucKR" +EUC-TW, "eucTW" +BIG5, "big5" +SHIFT_JIS, "sjis" +UTF-8, "utf8" diff --git a/lib/iconv_open-irix.gperf b/lib/iconv_open-irix.gperf new file mode 100644 index 000000000..3672a8013 --- /dev/null +++ b/lib/iconv_open-irix.gperf @@ -0,0 +1,31 @@ +struct mapping { int standard_name; const char vendor_name[10 + 1]; }; +%struct-type +%language=ANSI-C +%define slot-name standard_name +%define hash-function-name mapping_hash +%define lookup-function-name mapping_lookup +%readonly-tables +%global-table +%define word-array-name mappings +%pic +%% +# On IRIX 6.5, look in /usr/lib/iconv and /usr/lib/international/encodings. +ISO-8859-1, "ISO8859-1" +ISO-8859-2, "ISO8859-2" +ISO-8859-3, "ISO8859-3" +ISO-8859-4, "ISO8859-4" +ISO-8859-5, "ISO8859-5" +ISO-8859-6, "ISO8859-6" +ISO-8859-7, "ISO8859-7" +ISO-8859-8, "ISO8859-8" +ISO-8859-9, "ISO8859-9" +ISO-8859-15, "ISO8859-15" +KOI8-R, "KOI8" +CP855, "DOS855" +CP1251, "WIN1251" +GB2312, "eucCN" +EUC-JP, "eucJP" +EUC-KR, "eucKR" +EUC-TW, "eucTW" +SHIFT_JIS, "sjis" +TIS-620, "TIS620" diff --git a/lib/iconv_open-osf.gperf b/lib/iconv_open-osf.gperf new file mode 100644 index 000000000..f468ff609 --- /dev/null +++ b/lib/iconv_open-osf.gperf @@ -0,0 +1,50 @@ +struct mapping { int standard_name; const char vendor_name[10 + 1]; }; +%struct-type +%language=ANSI-C +%define slot-name standard_name +%define hash-function-name mapping_hash +%define lookup-function-name mapping_lookup +%readonly-tables +%global-table +%define word-array-name mappings +%pic +%% +# On OSF/1 5.1, look in /usr/lib/nls/loc/iconv. +ISO-8859-1, "ISO8859-1" +ISO-8859-2, "ISO8859-2" +ISO-8859-3, "ISO8859-3" +ISO-8859-4, "ISO8859-4" +ISO-8859-5, "ISO8859-5" +ISO-8859-6, "ISO8859-6" +ISO-8859-7, "ISO8859-7" +ISO-8859-8, "ISO8859-8" +ISO-8859-9, "ISO8859-9" +ISO-8859-15, "ISO8859-15" +CP437, "cp437" +CP775, "cp775" +CP850, "cp850" +CP852, "cp852" +CP855, "cp855" +CP857, "cp857" +CP861, "cp861" +CP862, "cp862" +CP865, "cp865" +CP866, "cp866" +CP869, "cp869" +CP874, "cp874" +CP949, "KSC5601" +CP1250, "cp1250" +CP1251, "cp1251" +CP1252, "cp1252" +CP1253, "cp1253" +CP1254, "cp1254" +CP1255, "cp1255" +CP1256, "cp1256" +CP1257, "cp1257" +CP1258, "cp1258" +EUC-JP, "eucJP" +EUC-KR, "eucKR" +EUC-TW, "eucTW" +BIG5, "big5" +SHIFT_JIS, "SJIS" +TIS-620, "TACTIS" diff --git a/lib/iconv_open.c b/lib/iconv_open.c new file mode 100644 index 000000000..3d873acd6 --- /dev/null +++ b/lib/iconv_open.c @@ -0,0 +1,172 @@ +/* Character set conversion. + Copyright (C) 2007 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License along + with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +#include + +/* Specification. */ +#include + +#include +#include +#include "c-ctype.h" +#include "c-strcase.h" + +#define SIZEOF(a) (sizeof(a) / sizeof(a[0])) + +/* Namespace cleanliness. */ +#define mapping_lookup rpl_iconv_open_mapping_lookup + +/* The macro ICONV_FLAVOR is defined to one of these or undefined. */ + +#define ICONV_FLAVOR_AIX "iconv_open-aix.h" +#define ICONV_FLAVOR_HPUX "iconv_open-hpux.h" +#define ICONV_FLAVOR_IRIX "iconv_open-irix.h" +#define ICONV_FLAVOR_OSF "iconv_open-osf.h" + +#ifdef ICONV_FLAVOR +# include ICONV_FLAVOR +#endif + +iconv_t +rpl_iconv_open (const char *tocode, const char *fromcode) +#undef iconv_open +{ + char fromcode_upper[32]; + char tocode_upper[32]; + char *fromcode_upper_end; + char *tocode_upper_end; + +#if REPLACE_ICONV_UTF + /* Special handling of conversion between UTF-8 and UTF-{16,32}{BE,LE}. + Do this here, before calling the real iconv_open(), because OSF/1 5.1 + iconv() to these encoding inserts a BOM, which is wrong. + We do not need to handle conversion between arbitrary encodings and + UTF-{16,32}{BE,LE}, because the 'striconveh' module implements two-step + conversion throough UTF-8. + The _ICONV_* constants are chosen to be disjoint from any iconv_t + returned by the system's iconv_open() functions. Recall that iconv_t + is a scalar type. */ + if (c_toupper (fromcode[0]) == 'U' + && c_toupper (fromcode[1]) == 'T' + && c_toupper (fromcode[2]) == 'F' + && fromcode[3] == '-') + { + if (c_toupper (tocode[0]) == 'U' + && c_toupper (tocode[1]) == 'T' + && c_toupper (tocode[2]) == 'F' + && tocode[3] == '-') + { + if (strcmp (fromcode + 4, "8") == 0) + { + if (c_strcasecmp (tocode + 4, "16BE") == 0) + return _ICONV_UTF8_UTF16BE; + if (c_strcasecmp (tocode + 4, "16LE") == 0) + return _ICONV_UTF8_UTF16LE; + if (c_strcasecmp (tocode + 4, "32BE") == 0) + return _ICONV_UTF8_UTF32BE; + if (c_strcasecmp (tocode + 4, "32LE") == 0) + return _ICONV_UTF8_UTF32LE; + } + else if (strcmp (tocode + 4, "8") == 0) + { + if (c_strcasecmp (fromcode + 4, "16BE") == 0) + return _ICONV_UTF16BE_UTF8; + if (c_strcasecmp (fromcode + 4, "16LE") == 0) + return _ICONV_UTF16LE_UTF8; + if (c_strcasecmp (fromcode + 4, "32BE") == 0) + return _ICONV_UTF32BE_UTF8; + if (c_strcasecmp (fromcode + 4, "32LE") == 0) + return _ICONV_UTF32LE_UTF8; + } + } + } +#endif + + /* Do *not* add special support for 8-bit encodings like ASCII or ISO-8859-1 + here. This would lead to programs that work in some locales (such as the + "C" or "en_US" locales) but do not work in East Asian locales. It is + better if programmers make their programs depend on GNU libiconv (except + on glibc systems), e.g. by using the AM_ICONV macro and documenting the + dependency in an INSTALL or DEPENDENCIES file. */ + + /* Try with the original names first. + This covers the case when fromcode or tocode is a lowercase encoding name + that is understood by the system's iconv_open but not listed in our + mappings table. */ + { + iconv_t cd = iconv_open (tocode, fromcode); + if (cd != (iconv_t)(-1)) + return cd; + } + + /* Convert the encodings to upper case, because + 1. in the arguments of iconv_open() on AIX, HP-UX, and OSF/1 the case + matters, + 2. it makes searching in the table faster. */ + { + const char *p = fromcode; + char *q = fromcode_upper; + while ((*q = c_toupper (*p)) != '\0') + { + p++; + q++; + if (q == &fromcode_upper[SIZEOF (fromcode_upper)]) + { + errno = EINVAL; + return (iconv_t)(-1); + } + } + fromcode_upper_end = q; + } + + { + const char *p = tocode; + char *q = tocode_upper; + while ((*q = c_toupper (*p)) != '\0') + { + p++; + q++; + if (q == &tocode_upper[SIZEOF (tocode_upper)]) + { + errno = EINVAL; + return (iconv_t)(-1); + } + } + tocode_upper_end = q; + } + +#ifdef ICONV_FLAVOR + /* Apply the mappings. */ + { + const struct mapping *m = + mapping_lookup (fromcode_upper, fromcode_upper_end - fromcode_upper); + + fromcode = (m != NULL ? m->vendor_name : fromcode_upper); + } + { + const struct mapping *m = + mapping_lookup (tocode_upper, tocode_upper_end - tocode_upper); + + tocode = (m != NULL ? m->vendor_name : tocode_upper); + } +#else + fromcode = fromcode_upper; + tocode = tocode_upper; +#endif + + return iconv_open (tocode, fromcode); +} diff --git a/lib/iconveh.h b/lib/iconveh.h new file mode 100644 index 000000000..06cda52e8 --- /dev/null +++ b/lib/iconveh.h @@ -0,0 +1,41 @@ +/* Character set conversion handler type. + Copyright (C) 2001-2007, 2009 Free Software Foundation, Inc. + Written by Bruno Haible. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +#ifndef _ICONVEH_H +#define _ICONVEH_H + + +#ifdef __cplusplus +extern "C" { +#endif + + +/* Handling of unconvertible characters. */ +enum iconv_ilseq_handler +{ + iconveh_error, /* return and set errno = EILSEQ */ + iconveh_question_mark, /* use one '?' per unconvertible character */ + iconveh_escape_sequence /* use escape sequence \uxxxx or \Uxxxxxxxx */ +}; + + +#ifdef __cplusplus +} +#endif + + +#endif /* _ICONVEH_H */ diff --git a/lib/striconveh.c b/lib/striconveh.c new file mode 100644 index 000000000..b39a01f19 --- /dev/null +++ b/lib/striconveh.c @@ -0,0 +1,1251 @@ +/* Character set conversion with error handling. + Copyright (C) 2001-2008 Free Software Foundation, Inc. + Written by Bruno Haible and Simon Josefsson. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +#include + +/* Specification. */ +#include "striconveh.h" + +#include +#include +#include +#include + +#if HAVE_ICONV +# include +# include "unistr.h" +#endif + +#include "c-strcase.h" +#include "c-strcaseeq.h" + +#ifndef SIZE_MAX +# define SIZE_MAX ((size_t) -1) +#endif + + +#if HAVE_ICONV + +/* The caller must provide CD, CD1, CD2, not just CD, because when a conversion + error occurs, we may have to determine the Unicode representation of the + inconvertible character. */ + +/* iconv_carefully is like iconv, except that it stops as soon as it encounters + a conversion error, and it returns in *INCREMENTED a boolean telling whether + it has incremented the input pointers past the error location. */ +# if !defined _LIBICONV_VERSION && !defined __GLIBC__ +/* Irix iconv() inserts a NUL byte if it cannot convert. + NetBSD iconv() inserts a question mark if it cannot convert. + Only GNU libiconv and GNU libc are known to prefer to fail rather + than doing a lossy conversion. */ +static size_t +iconv_carefully (iconv_t cd, + const char **inbuf, size_t *inbytesleft, + char **outbuf, size_t *outbytesleft, + bool *incremented) +{ + const char *inptr = *inbuf; + const char *inptr_end = inptr + *inbytesleft; + char *outptr = *outbuf; + size_t outsize = *outbytesleft; + const char *inptr_before; + size_t res; + + do + { + size_t insize; + + inptr_before = inptr; + res = (size_t)(-1); + + for (insize = 1; inptr + insize <= inptr_end; insize++) + { + res = iconv (cd, + (ICONV_CONST char **) &inptr, &insize, + &outptr, &outsize); + if (!(res == (size_t)(-1) && errno == EINVAL)) + break; + /* iconv can eat up a shift sequence but give EINVAL while attempting + to convert the first character. E.g. libiconv does this. */ + if (inptr > inptr_before) + { + res = 0; + break; + } + } + + if (res == 0) + { + *outbuf = outptr; + *outbytesleft = outsize; + } + } + while (res == 0 && inptr < inptr_end); + + *inbuf = inptr; + *inbytesleft = inptr_end - inptr; + if (res != (size_t)(-1) && res > 0) + { + /* iconv() has already incremented INPTR. We cannot go back to a + previous INPTR, otherwise the state inside CD would become invalid, + if FROM_CODESET is a stateful encoding. So, tell the caller that + *INBUF has already been incremented. */ + *incremented = (inptr > inptr_before); + errno = EILSEQ; + return (size_t)(-1); + } + else + { + *incremented = false; + return res; + } +} +# else +# define iconv_carefully(cd, inbuf, inbytesleft, outbuf, outbytesleft, incremented) \ + (*(incremented) = false, \ + iconv (cd, (ICONV_CONST char **) (inbuf), inbytesleft, outbuf, outbytesleft)) +# endif + +/* iconv_carefully_1 is like iconv_carefully, except that it stops after + converting one character or one shift sequence. */ +static size_t +iconv_carefully_1 (iconv_t cd, + const char **inbuf, size_t *inbytesleft, + char **outbuf, size_t *outbytesleft, + bool *incremented) +{ + const char *inptr_before = *inbuf; + const char *inptr = inptr_before; + const char *inptr_end = inptr_before + *inbytesleft; + char *outptr = *outbuf; + size_t outsize = *outbytesleft; + size_t res = (size_t)(-1); + size_t insize; + + for (insize = 1; inptr_before + insize <= inptr_end; insize++) + { + inptr = inptr_before; + res = iconv (cd, + (ICONV_CONST char **) &inptr, &insize, + &outptr, &outsize); + if (!(res == (size_t)(-1) && errno == EINVAL)) + break; + /* iconv can eat up a shift sequence but give EINVAL while attempting + to convert the first character. E.g. libiconv does this. */ + if (inptr > inptr_before) + { + res = 0; + break; + } + } + + *inbuf = inptr; + *inbytesleft = inptr_end - inptr; +# if !defined _LIBICONV_VERSION && !defined __GLIBC__ + /* Irix iconv() inserts a NUL byte if it cannot convert. + NetBSD iconv() inserts a question mark if it cannot convert. + Only GNU libiconv and GNU libc are known to prefer to fail rather + than doing a lossy conversion. */ + if (res != (size_t)(-1) && res > 0) + { + /* iconv() has already incremented INPTR. We cannot go back to a + previous INPTR, otherwise the state inside CD would become invalid, + if FROM_CODESET is a stateful encoding. So, tell the caller that + *INBUF has already been incremented. */ + *incremented = (inptr > inptr_before); + errno = EILSEQ; + return (size_t)(-1); + } +# endif + + if (res != (size_t)(-1)) + { + *outbuf = outptr; + *outbytesleft = outsize; + } + *incremented = false; + return res; +} + +/* utf8conv_carefully is like iconv, except that + - it converts from UTF-8 to UTF-8, + - it stops as soon as it encounters a conversion error, and it returns + in *INCREMENTED a boolean telling whether it has incremented the input + pointers past the error location, + - if one_character_only is true, it stops after converting one + character. */ +static size_t +utf8conv_carefully (bool one_character_only, + const char **inbuf, size_t *inbytesleft, + char **outbuf, size_t *outbytesleft, + bool *incremented) +{ + const char *inptr = *inbuf; + size_t insize = *inbytesleft; + char *outptr = *outbuf; + size_t outsize = *outbytesleft; + size_t res; + + res = 0; + do + { + ucs4_t uc; + int n; + int m; + + n = u8_mbtoucr (&uc, (const uint8_t *) inptr, insize); + if (n < 0) + { + errno = (n == -2 ? EINVAL : EILSEQ); + n = u8_mbtouc (&uc, (const uint8_t *) inptr, insize); + inptr += n; + insize -= n; + res = (size_t)(-1); + *incremented = true; + break; + } + if (outsize == 0) + { + errno = E2BIG; + res = (size_t)(-1); + *incremented = false; + break; + } + m = u8_uctomb ((uint8_t *) outptr, uc, outsize); + if (m == -2) + { + errno = E2BIG; + res = (size_t)(-1); + *incremented = false; + break; + } + inptr += n; + insize -= n; + if (m == -1) + { + errno = EILSEQ; + res = (size_t)(-1); + *incremented = true; + break; + } + outptr += m; + outsize -= m; + } + while (!one_character_only && insize > 0); + + *inbuf = inptr; + *inbytesleft = insize; + *outbuf = outptr; + *outbytesleft = outsize; + return res; +} + +static int +mem_cd_iconveh_internal (const char *src, size_t srclen, + iconv_t cd, iconv_t cd1, iconv_t cd2, + enum iconv_ilseq_handler handler, + size_t extra_alloc, + size_t *offsets, + char **resultp, size_t *lengthp) +{ + /* When a conversion error occurs, we cannot start using CD1 and CD2 at + this point: FROM_CODESET may be a stateful encoding like ISO-2022-KR. + Instead, we have to start afresh from the beginning of SRC. */ + /* Use a temporary buffer, so that for small strings, a single malloc() + call will be sufficient. */ +# define tmpbufsize 4096 + /* The alignment is needed when converting e.g. to glibc's WCHAR_T or + libiconv's UCS-4-INTERNAL encoding. */ + union { unsigned int align; char buf[tmpbufsize]; } tmp; +# define tmpbuf tmp.buf + + char *initial_result; + char *result; + size_t allocated; + size_t length; + size_t last_length = (size_t)(-1); /* only needed if offsets != NULL */ + + if (*resultp != NULL && *lengthp >= sizeof (tmpbuf)) + { + initial_result = *resultp; + allocated = *lengthp; + } + else + { + initial_result = tmpbuf; + allocated = sizeof (tmpbuf); + } + result = initial_result; + + /* Test whether a direct conversion is possible at all. */ + if (cd == (iconv_t)(-1)) + goto indirectly; + + if (offsets != NULL) + { + size_t i; + + for (i = 0; i < srclen; i++) + offsets[i] = (size_t)(-1); + + last_length = (size_t)(-1); + } + length = 0; + + /* First, try a direct conversion, and see whether a conversion error + occurs at all. */ + { + const char *inptr = src; + size_t insize = srclen; + + /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug. */ +# if defined _LIBICONV_VERSION \ + || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun) + /* Set to the initial state. */ + iconv (cd, NULL, NULL, NULL, NULL); +# endif + + while (insize > 0) + { + char *outptr = result + length; + size_t outsize = allocated - extra_alloc - length; + bool incremented; + size_t res; + bool grow; + + if (offsets != NULL) + { + if (length != last_length) /* ensure that offset[] be increasing */ + { + offsets[inptr - src] = length; + last_length = length; + } + res = iconv_carefully_1 (cd, + &inptr, &insize, + &outptr, &outsize, + &incremented); + } + else + /* Use iconv_carefully instead of iconv here, because: + - If TO_CODESET is UTF-8, we can do the error handling in this + loop, no need for a second loop, + - With iconv() implementations other than GNU libiconv and GNU + libc, if we use iconv() in a big swoop, checking for an E2BIG + return, we lose the number of irreversible conversions. */ + res = iconv_carefully (cd, + &inptr, &insize, + &outptr, &outsize, + &incremented); + + length = outptr - result; + grow = (length + extra_alloc > allocated / 2); + if (res == (size_t)(-1)) + { + if (errno == E2BIG) + grow = true; + else if (errno == EINVAL) + break; + else if (errno == EILSEQ && handler != iconveh_error) + { + if (cd2 == (iconv_t)(-1)) + { + /* TO_CODESET is UTF-8. */ + /* Error handling can produce up to 1 byte of output. */ + if (length + 1 + extra_alloc > allocated) + { + char *memory; + + allocated = 2 * allocated; + if (length + 1 + extra_alloc > allocated) + abort (); + if (result == initial_result) + memory = (char *) malloc (allocated); + else + memory = (char *) realloc (result, allocated); + if (memory == NULL) + { + if (result != initial_result) + free (result); + errno = ENOMEM; + return -1; + } + if (result == initial_result) + memcpy (memory, initial_result, length); + result = memory; + grow = false; + } + /* The input is invalid in FROM_CODESET. Eat up one byte + and emit a question mark. */ + if (!incremented) + { + if (insize == 0) + abort (); + inptr++; + insize--; + } + result[length] = '?'; + length++; + } + else + goto indirectly; + } + else + { + if (result != initial_result) + { + int saved_errno = errno; + free (result); + errno = saved_errno; + } + return -1; + } + } + if (insize == 0) + break; + if (grow) + { + char *memory; + + allocated = 2 * allocated; + if (result == initial_result) + memory = (char *) malloc (allocated); + else + memory = (char *) realloc (result, allocated); + if (memory == NULL) + { + if (result != initial_result) + free (result); + errno = ENOMEM; + return -1; + } + if (result == initial_result) + memcpy (memory, initial_result, length); + result = memory; + } + } + } + + /* Now get the conversion state back to the initial state. + But avoid glibc-2.1 bug and Solaris 2.7 bug. */ +#if defined _LIBICONV_VERSION \ + || !((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) || defined __sun) + for (;;) + { + char *outptr = result + length; + size_t outsize = allocated - extra_alloc - length; + size_t res; + + res = iconv (cd, NULL, NULL, &outptr, &outsize); + length = outptr - result; + if (res == (size_t)(-1)) + { + if (errno == E2BIG) + { + char *memory; + + allocated = 2 * allocated; + if (result == initial_result) + memory = (char *) malloc (allocated); + else + memory = (char *) realloc (result, allocated); + if (memory == NULL) + { + if (result != initial_result) + free (result); + errno = ENOMEM; + return -1; + } + if (result == initial_result) + memcpy (memory, initial_result, length); + result = memory; + } + else + { + if (result != initial_result) + { + int saved_errno = errno; + free (result); + errno = saved_errno; + } + return -1; + } + } + else + break; + } +#endif + + /* The direct conversion succeeded. */ + goto done; + + indirectly: + /* The direct conversion failed. + Use a conversion through UTF-8. */ + if (offsets != NULL) + { + size_t i; + + for (i = 0; i < srclen; i++) + offsets[i] = (size_t)(-1); + + last_length = (size_t)(-1); + } + length = 0; + { + const bool slowly = (offsets != NULL || handler == iconveh_error); +# define utf8bufsize 4096 /* may also be smaller or larger than tmpbufsize */ + char utf8buf[utf8bufsize + 1]; + size_t utf8len = 0; + const char *in1ptr = src; + size_t in1size = srclen; + bool do_final_flush1 = true; + bool do_final_flush2 = true; + + /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug. */ +# if defined _LIBICONV_VERSION \ + || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun) + /* Set to the initial state. */ + if (cd1 != (iconv_t)(-1)) + iconv (cd1, NULL, NULL, NULL, NULL); + if (cd2 != (iconv_t)(-1)) + iconv (cd2, NULL, NULL, NULL, NULL); +# endif + + while (in1size > 0 || do_final_flush1 || utf8len > 0 || do_final_flush2) + { + char *out1ptr = utf8buf + utf8len; + size_t out1size = utf8bufsize - utf8len; + bool incremented1; + size_t res1; + int errno1; + + /* Conversion step 1: from FROM_CODESET to UTF-8. */ + if (in1size > 0) + { + if (offsets != NULL + && length != last_length) /* ensure that offset[] be increasing */ + { + offsets[in1ptr - src] = length; + last_length = length; + } + if (cd1 != (iconv_t)(-1)) + { + if (slowly) + res1 = iconv_carefully_1 (cd1, + &in1ptr, &in1size, + &out1ptr, &out1size, + &incremented1); + else + res1 = iconv_carefully (cd1, + &in1ptr, &in1size, + &out1ptr, &out1size, + &incremented1); + } + else + { + /* FROM_CODESET is UTF-8. */ + res1 = utf8conv_carefully (slowly, + &in1ptr, &in1size, + &out1ptr, &out1size, + &incremented1); + } + } + else if (do_final_flush1) + { + /* Now get the conversion state of CD1 back to the initial state. + But avoid glibc-2.1 bug and Solaris 2.7 bug. */ +# if defined _LIBICONV_VERSION \ + || !((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) || defined __sun) + if (cd1 != (iconv_t)(-1)) + res1 = iconv (cd1, NULL, NULL, &out1ptr, &out1size); + else +# endif + res1 = 0; + do_final_flush1 = false; + incremented1 = true; + } + else + { + res1 = 0; + incremented1 = true; + } + if (res1 == (size_t)(-1) + && !(errno == E2BIG || errno == EINVAL || errno == EILSEQ)) + { + if (result != initial_result) + { + int saved_errno = errno; + free (result); + errno = saved_errno; + } + return -1; + } + if (res1 == (size_t)(-1) + && errno == EILSEQ && handler != iconveh_error) + { + /* The input is invalid in FROM_CODESET. Eat up one byte and + emit a question mark. Room for the question mark was allocated + at the end of utf8buf. */ + if (!incremented1) + { + if (in1size == 0) + abort (); + in1ptr++; + in1size--; + } + utf8buf[utf8len++] = '?'; + } + errno1 = errno; + utf8len = out1ptr - utf8buf; + + if (offsets != NULL + || in1size == 0 + || utf8len > utf8bufsize / 2 + || (res1 == (size_t)(-1) && errno1 == E2BIG)) + { + /* Conversion step 2: from UTF-8 to TO_CODESET. */ + const char *in2ptr = utf8buf; + size_t in2size = utf8len; + + while (in2size > 0 + || (in1size == 0 && !do_final_flush1 && do_final_flush2)) + { + char *out2ptr = result + length; + size_t out2size = allocated - extra_alloc - length; + bool incremented2; + size_t res2; + bool grow; + + if (in2size > 0) + { + if (cd2 != (iconv_t)(-1)) + res2 = iconv_carefully (cd2, + &in2ptr, &in2size, + &out2ptr, &out2size, + &incremented2); + else + /* TO_CODESET is UTF-8. */ + res2 = utf8conv_carefully (false, + &in2ptr, &in2size, + &out2ptr, &out2size, + &incremented2); + } + else /* in1size == 0 && !do_final_flush1 + && in2size == 0 && do_final_flush2 */ + { + /* Now get the conversion state of CD1 back to the initial + state. But avoid glibc-2.1 bug and Solaris 2.7 bug. */ +# if defined _LIBICONV_VERSION \ + || !((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) || defined __sun) + if (cd2 != (iconv_t)(-1)) + res2 = iconv (cd2, NULL, NULL, &out2ptr, &out2size); + else +# endif + res2 = 0; + do_final_flush2 = false; + incremented2 = true; + } + + length = out2ptr - result; + grow = (length + extra_alloc > allocated / 2); + if (res2 == (size_t)(-1)) + { + if (errno == E2BIG) + grow = true; + else if (errno == EINVAL) + break; + else if (errno == EILSEQ && handler != iconveh_error) + { + /* Error handling can produce up to 10 bytes of ASCII + output. But TO_CODESET may be UCS-2, UTF-16 or + UCS-4, so use CD2 here as well. */ + char scratchbuf[10]; + size_t scratchlen; + ucs4_t uc; + const char *inptr; + size_t insize; + size_t res; + + if (incremented2) + { + if (u8_prev (&uc, (const uint8_t *) in2ptr, + (const uint8_t *) utf8buf) + == NULL) + abort (); + } + else + { + int n; + if (in2size == 0) + abort (); + n = u8_mbtouc_unsafe (&uc, (const uint8_t *) in2ptr, + in2size); + in2ptr += n; + in2size -= n; + } + + if (handler == iconveh_escape_sequence) + { + static char hex[16] = "0123456789ABCDEF"; + scratchlen = 0; + scratchbuf[scratchlen++] = '\\'; + if (uc < 0x10000) + scratchbuf[scratchlen++] = 'u'; + else + { + scratchbuf[scratchlen++] = 'U'; + scratchbuf[scratchlen++] = hex[(uc>>28) & 15]; + scratchbuf[scratchlen++] = hex[(uc>>24) & 15]; + scratchbuf[scratchlen++] = hex[(uc>>20) & 15]; + scratchbuf[scratchlen++] = hex[(uc>>16) & 15]; + } + scratchbuf[scratchlen++] = hex[(uc>>12) & 15]; + scratchbuf[scratchlen++] = hex[(uc>>8) & 15]; + scratchbuf[scratchlen++] = hex[(uc>>4) & 15]; + scratchbuf[scratchlen++] = hex[uc & 15]; + } + else + { + scratchbuf[0] = '?'; + scratchlen = 1; + } + + inptr = scratchbuf; + insize = scratchlen; + if (cd2 != (iconv_t)(-1)) + res = iconv (cd2, + (ICONV_CONST char **) &inptr, &insize, + &out2ptr, &out2size); + else + { + /* TO_CODESET is UTF-8. */ + if (out2size >= insize) + { + memcpy (out2ptr, inptr, insize); + out2ptr += insize; + out2size -= insize; + inptr += insize; + insize = 0; + res = 0; + } + else + { + errno = E2BIG; + res = (size_t)(-1); + } + } + length = out2ptr - result; + if (res == (size_t)(-1) && errno == E2BIG) + { + char *memory; + + allocated = 2 * allocated; + if (length + 1 + extra_alloc > allocated) + abort (); + if (result == initial_result) + memory = (char *) malloc (allocated); + else + memory = (char *) realloc (result, allocated); + if (memory == NULL) + { + if (result != initial_result) + free (result); + errno = ENOMEM; + return -1; + } + if (result == initial_result) + memcpy (memory, initial_result, length); + result = memory; + grow = false; + + out2ptr = result + length; + out2size = allocated - extra_alloc - length; + if (cd2 != (iconv_t)(-1)) + res = iconv (cd2, + (ICONV_CONST char **) &inptr, + &insize, + &out2ptr, &out2size); + else + { + /* TO_CODESET is UTF-8. */ + if (!(out2size >= insize)) + abort (); + memcpy (out2ptr, inptr, insize); + out2ptr += insize; + out2size -= insize; + inptr += insize; + insize = 0; + res = 0; + } + length = out2ptr - result; + } +# if !defined _LIBICONV_VERSION && !defined __GLIBC__ + /* Irix iconv() inserts a NUL byte if it cannot convert. + NetBSD iconv() inserts a question mark if it cannot + convert. + Only GNU libiconv and GNU libc are known to prefer + to fail rather than doing a lossy conversion. */ + if (res != (size_t)(-1) && res > 0) + { + errno = EILSEQ; + res = (size_t)(-1); + } +# endif + if (res == (size_t)(-1)) + { + /* Failure converting the ASCII replacement. */ + if (result != initial_result) + { + int saved_errno = errno; + free (result); + errno = saved_errno; + } + return -1; + } + } + else + { + if (result != initial_result) + { + int saved_errno = errno; + free (result); + errno = saved_errno; + } + return -1; + } + } + if (!(in2size > 0 + || (in1size == 0 && !do_final_flush1 && do_final_flush2))) + break; + if (grow) + { + char *memory; + + allocated = 2 * allocated; + if (result == initial_result) + memory = (char *) malloc (allocated); + else + memory = (char *) realloc (result, allocated); + if (memory == NULL) + { + if (result != initial_result) + free (result); + errno = ENOMEM; + return -1; + } + if (result == initial_result) + memcpy (memory, initial_result, length); + result = memory; + } + } + + /* Move the remaining bytes to the beginning of utf8buf. */ + if (in2size > 0) + memmove (utf8buf, in2ptr, in2size); + utf8len = in2size; + } + + if (res1 == (size_t)(-1)) + { + if (errno1 == EINVAL) + in1size = 0; + else if (errno1 == EILSEQ) + { + if (result != initial_result) + free (result); + errno = errno1; + return -1; + } + } + } +# undef utf8bufsize + } + + done: + /* Now the final memory allocation. */ + if (result == tmpbuf) + { + size_t memsize = length + extra_alloc; + char *memory; + + memory = (char *) malloc (memsize > 0 ? memsize : 1); + if (memory != NULL) + { + memcpy (memory, tmpbuf, length); + result = memory; + } + else + { + errno = ENOMEM; + return -1; + } + } + else if (result != *resultp && length + extra_alloc < allocated) + { + /* Shrink the allocated memory if possible. */ + size_t memsize = length + extra_alloc; + char *memory; + + memory = (char *) realloc (result, memsize > 0 ? memsize : 1); + if (memory != NULL) + result = memory; + } + *resultp = result; + *lengthp = length; + return 0; +# undef tmpbuf +# undef tmpbufsize +} + +int +mem_cd_iconveh (const char *src, size_t srclen, + iconv_t cd, iconv_t cd1, iconv_t cd2, + enum iconv_ilseq_handler handler, + size_t *offsets, + char **resultp, size_t *lengthp) +{ + return mem_cd_iconveh_internal (src, srclen, cd, cd1, cd2, handler, 0, + offsets, resultp, lengthp); +} + +char * +str_cd_iconveh (const char *src, + iconv_t cd, iconv_t cd1, iconv_t cd2, + enum iconv_ilseq_handler handler) +{ + /* For most encodings, a trailing NUL byte in the input will be converted + to a trailing NUL byte in the output. But not for UTF-7. So that this + function is usable for UTF-7, we have to exclude the NUL byte from the + conversion and add it by hand afterwards. */ + char *result = NULL; + size_t length = 0; + int retval = mem_cd_iconveh_internal (src, strlen (src), + cd, cd1, cd2, handler, 1, NULL, + &result, &length); + + if (retval < 0) + { + if (result != NULL) + { + int saved_errno = errno; + free (result); + errno = saved_errno; + } + return NULL; + } + + /* Add the terminating NUL byte. */ + result[length] = '\0'; + + return result; +} + +#endif + +int +mem_iconveh (const char *src, size_t srclen, + const char *from_codeset, const char *to_codeset, + enum iconv_ilseq_handler handler, + size_t *offsets, + char **resultp, size_t *lengthp) +{ + if (srclen == 0) + { + /* Nothing to convert. */ + *lengthp = 0; + return 0; + } + else if (offsets == NULL && c_strcasecmp (from_codeset, to_codeset) == 0) + { + char *result; + + if (*resultp != NULL && *lengthp >= srclen) + result = *resultp; + else + { + result = (char *) malloc (srclen); + if (result == NULL) + { + errno = ENOMEM; + return -1; + } + } + memcpy (result, src, srclen); + *resultp = result; + *lengthp = srclen; + return 0; + } + else + { +#if HAVE_ICONV + iconv_t cd; + iconv_t cd1; + iconv_t cd2; + char *result; + size_t length; + int retval; + + /* Avoid glibc-2.1 bug with EUC-KR. */ +# if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION + if (c_strcasecmp (from_codeset, "EUC-KR") == 0 + || c_strcasecmp (to_codeset, "EUC-KR") == 0) + { + errno = EINVAL; + return -1; + } +# endif + + cd = iconv_open (to_codeset, from_codeset); + + if (STRCASEEQ (from_codeset, "UTF-8", 'U','T','F','-','8',0,0,0,0)) + cd1 = (iconv_t)(-1); + else + { + cd1 = iconv_open ("UTF-8", from_codeset); + if (cd1 == (iconv_t)(-1)) + { + int saved_errno = errno; + if (cd != (iconv_t)(-1)) + iconv_close (cd); + errno = saved_errno; + return -1; + } + } + + if (STRCASEEQ (to_codeset, "UTF-8", 'U','T','F','-','8',0,0,0,0) +# if (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2 || _LIBICONV_VERSION >= 0x0105 + || c_strcasecmp (to_codeset, "UTF-8//TRANSLIT") == 0 +# endif + ) + cd2 = (iconv_t)(-1); + else + { + cd2 = iconv_open (to_codeset, "UTF-8"); + if (cd2 == (iconv_t)(-1)) + { + int saved_errno = errno; + if (cd1 != (iconv_t)(-1)) + iconv_close (cd1); + if (cd != (iconv_t)(-1)) + iconv_close (cd); + errno = saved_errno; + return -1; + } + } + + result = *resultp; + length = *lengthp; + retval = mem_cd_iconveh (src, srclen, cd, cd1, cd2, handler, offsets, + &result, &length); + + if (retval < 0) + { + /* Close cd, cd1, cd2, but preserve the errno from str_cd_iconv. */ + int saved_errno = errno; + if (cd2 != (iconv_t)(-1)) + iconv_close (cd2); + if (cd1 != (iconv_t)(-1)) + iconv_close (cd1); + if (cd != (iconv_t)(-1)) + iconv_close (cd); + errno = saved_errno; + } + else + { + if (cd2 != (iconv_t)(-1) && iconv_close (cd2) < 0) + { + /* Return -1, but free the allocated memory, and while doing + that, preserve the errno from iconv_close. */ + int saved_errno = errno; + if (cd1 != (iconv_t)(-1)) + iconv_close (cd1); + if (cd != (iconv_t)(-1)) + iconv_close (cd); + if (result != *resultp && result != NULL) + free (result); + errno = saved_errno; + return -1; + } + if (cd1 != (iconv_t)(-1) && iconv_close (cd1) < 0) + { + /* Return -1, but free the allocated memory, and while doing + that, preserve the errno from iconv_close. */ + int saved_errno = errno; + if (cd != (iconv_t)(-1)) + iconv_close (cd); + if (result != *resultp && result != NULL) + free (result); + errno = saved_errno; + return -1; + } + if (cd != (iconv_t)(-1) && iconv_close (cd) < 0) + { + /* Return -1, but free the allocated memory, and while doing + that, preserve the errno from iconv_close. */ + int saved_errno = errno; + if (result != *resultp && result != NULL) + free (result); + errno = saved_errno; + return -1; + } + *resultp = result; + *lengthp = length; + } + return retval; +#else + /* This is a different error code than if iconv_open existed but didn't + support from_codeset and to_codeset, so that the caller can emit + an error message such as + "iconv() is not supported. Installing GNU libiconv and + then reinstalling this package would fix this." */ + errno = ENOSYS; + return -1; +#endif + } +} + +char * +str_iconveh (const char *src, + const char *from_codeset, const char *to_codeset, + enum iconv_ilseq_handler handler) +{ + if (*src == '\0' || c_strcasecmp (from_codeset, to_codeset) == 0) + { + char *result = strdup (src); + + if (result == NULL) + errno = ENOMEM; + return result; + } + else + { +#if HAVE_ICONV + iconv_t cd; + iconv_t cd1; + iconv_t cd2; + char *result; + + /* Avoid glibc-2.1 bug with EUC-KR. */ +# if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION + if (c_strcasecmp (from_codeset, "EUC-KR") == 0 + || c_strcasecmp (to_codeset, "EUC-KR") == 0) + { + errno = EINVAL; + return NULL; + } +# endif + + cd = iconv_open (to_codeset, from_codeset); + + if (STRCASEEQ (from_codeset, "UTF-8", 'U','T','F','-','8',0,0,0,0)) + cd1 = (iconv_t)(-1); + else + { + cd1 = iconv_open ("UTF-8", from_codeset); + if (cd1 == (iconv_t)(-1)) + { + int saved_errno = errno; + if (cd != (iconv_t)(-1)) + iconv_close (cd); + errno = saved_errno; + return NULL; + } + } + + if (STRCASEEQ (to_codeset, "UTF-8", 'U','T','F','-','8',0,0,0,0) +# if (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2 || _LIBICONV_VERSION >= 0x0105 + || c_strcasecmp (to_codeset, "UTF-8//TRANSLIT") == 0 +# endif + ) + cd2 = (iconv_t)(-1); + else + { + cd2 = iconv_open (to_codeset, "UTF-8"); + if (cd2 == (iconv_t)(-1)) + { + int saved_errno = errno; + if (cd1 != (iconv_t)(-1)) + iconv_close (cd1); + if (cd != (iconv_t)(-1)) + iconv_close (cd); + errno = saved_errno; + return NULL; + } + } + + result = str_cd_iconveh (src, cd, cd1, cd2, handler); + + if (result == NULL) + { + /* Close cd, cd1, cd2, but preserve the errno from str_cd_iconv. */ + int saved_errno = errno; + if (cd2 != (iconv_t)(-1)) + iconv_close (cd2); + if (cd1 != (iconv_t)(-1)) + iconv_close (cd1); + if (cd != (iconv_t)(-1)) + iconv_close (cd); + errno = saved_errno; + } + else + { + if (cd2 != (iconv_t)(-1) && iconv_close (cd2) < 0) + { + /* Return NULL, but free the allocated memory, and while doing + that, preserve the errno from iconv_close. */ + int saved_errno = errno; + if (cd1 != (iconv_t)(-1)) + iconv_close (cd1); + if (cd != (iconv_t)(-1)) + iconv_close (cd); + free (result); + errno = saved_errno; + return NULL; + } + if (cd1 != (iconv_t)(-1) && iconv_close (cd1) < 0) + { + /* Return NULL, but free the allocated memory, and while doing + that, preserve the errno from iconv_close. */ + int saved_errno = errno; + if (cd != (iconv_t)(-1)) + iconv_close (cd); + free (result); + errno = saved_errno; + return NULL; + } + if (cd != (iconv_t)(-1) && iconv_close (cd) < 0) + { + /* Return NULL, but free the allocated memory, and while doing + that, preserve the errno from iconv_close. */ + int saved_errno = errno; + free (result); + errno = saved_errno; + return NULL; + } + } + return result; +#else + /* This is a different error code than if iconv_open existed but didn't + support from_codeset and to_codeset, so that the caller can emit + an error message such as + "iconv() is not supported. Installing GNU libiconv and + then reinstalling this package would fix this." */ + errno = ENOSYS; + return NULL; +#endif + } +} diff --git a/lib/striconveh.h b/lib/striconveh.h new file mode 100644 index 000000000..98b4d0c5e --- /dev/null +++ b/lib/striconveh.h @@ -0,0 +1,120 @@ +/* Character set conversion with error handling. + Copyright (C) 2001-2007, 2009 Free Software Foundation, Inc. + Written by Bruno Haible and Simon Josefsson. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +#ifndef _STRICONVEH_H +#define _STRICONVEH_H + +#include +#if HAVE_ICONV +#include +#endif + +#include "iconveh.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +#if HAVE_ICONV + +/* Convert an entire string from one encoding to another, using iconv. + The original string is at [SRC,...,SRC+SRCLEN-1]. + CD is the conversion descriptor from FROMCODE to TOCODE, or (iconv_t)(-1) if + the system does not support a direct conversion from FROMCODE to TOCODE. + CD1 is the conversion descriptor from FROM_CODESET to UTF-8 (or + (iconv_t)(-1) if FROM_CODESET is UTF-8). + CD2 is the conversion descriptor from UTF-8 to TO_CODESET (or (iconv_t)(-1) + if TO_CODESET is UTF-8). + If OFFSETS is not NULL, it should point to an array of SRCLEN integers; this + array is filled with offsets into the result, i.e. the character starting + at SRC[i] corresponds to the character starting at (*RESULTP)[OFFSETS[i]], + and other offsets are set to (size_t)(-1). + *RESULTP and *LENGTH should initially be a scratch buffer and its size, + or *RESULTP can initially be NULL. + May erase the contents of the memory at *RESULTP. + Return value: 0 if successful, otherwise -1 and errno set. + If successful: The resulting string is stored in *RESULTP and its length + in *LENGTHP. *RESULTP is set to a freshly allocated memory block, or is + unchanged if no dynamic memory allocation was necessary. */ +extern int + mem_cd_iconveh (const char *src, size_t srclen, + iconv_t cd, iconv_t cd1, iconv_t cd2, + enum iconv_ilseq_handler handler, + size_t *offsets, + char **resultp, size_t *lengthp); + +/* Convert an entire string from one encoding to another, using iconv. + The original string is the NUL-terminated string starting at SRC. + CD is the conversion descriptor from FROMCODE to TOCODE, or (iconv_t)(-1) if + the system does not support a direct conversion from FROMCODE to TOCODE. + Both the "from" and the "to" encoding must use a single NUL byte at the end + of the string (i.e. not UCS-2, UCS-4, UTF-16, UTF-32). + CD1 is the conversion descriptor from FROM_CODESET to UTF-8 (or + (iconv_t)(-1) if FROM_CODESET is UTF-8). + CD2 is the conversion descriptor from UTF-8 to TO_CODESET (or (iconv_t)(-1) + if TO_CODESET is UTF-8). + Allocate a malloced memory block for the result. + Return value: the freshly allocated resulting NUL-terminated string if + successful, otherwise NULL and errno set. */ +extern char * + str_cd_iconveh (const char *src, + iconv_t cd, iconv_t cd1, iconv_t cd2, + enum iconv_ilseq_handler handler); + +#endif + +/* Convert an entire string from one encoding to another, using iconv. + The original string is at [SRC,...,SRC+SRCLEN-1]. + If OFFSETS is not NULL, it should point to an array of SRCLEN integers; this + array is filled with offsets into the result, i.e. the character starting + at SRC[i] corresponds to the character starting at (*RESULTP)[OFFSETS[i]], + and other offsets are set to (size_t)(-1). + *RESULTP and *LENGTH should initially be a scratch buffer and its size, + or *RESULTP can initially be NULL. + May erase the contents of the memory at *RESULTP. + Return value: 0 if successful, otherwise -1 and errno set. + If successful: The resulting string is stored in *RESULTP and its length + in *LENGTHP. *RESULTP is set to a freshly allocated memory block, or is + unchanged if no dynamic memory allocation was necessary. */ +extern int + mem_iconveh (const char *src, size_t srclen, + const char *from_codeset, const char *to_codeset, + enum iconv_ilseq_handler handler, + size_t *offsets, + char **resultp, size_t *lengthp); + +/* Convert an entire string from one encoding to another, using iconv. + The original string is the NUL-terminated string starting at SRC. + Both the "from" and the "to" encoding must use a single NUL byte at the + end of the string (i.e. not UCS-2, UCS-4, UTF-16, UTF-32). + Allocate a malloced memory block for the result. + Return value: the freshly allocated resulting NUL-terminated string if + successful, otherwise NULL and errno set. */ +extern char * + str_iconveh (const char *src, + const char *from_codeset, const char *to_codeset, + enum iconv_ilseq_handler handler); + + +#ifdef __cplusplus +} +#endif + + +#endif /* _STRICONVEH_H */ diff --git a/lib/string.in.h b/lib/string.in.h new file mode 100644 index 000000000..ca029d7c0 --- /dev/null +++ b/lib/string.in.h @@ -0,0 +1,605 @@ +/* A GNU-like . + + Copyright (C) 1995-1996, 2001-2008 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +#ifndef _GL_STRING_H + +#if __GNUC__ >= 3 +@PRAGMA_SYSTEM_HEADER@ +#endif + +/* The include_next requires a split double-inclusion guard. */ +#@INCLUDE_NEXT@ @NEXT_STRING_H@ + +#ifndef _GL_STRING_H +#define _GL_STRING_H + + +#ifndef __attribute__ +/* This feature is available in gcc versions 2.5 and later. */ +# if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 5) +# define __attribute__(Spec) /* empty */ +# endif +/* The attribute __pure__ was added in gcc 2.96. */ +# if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 96) +# define __pure__ /* empty */ +# endif +#endif + + +/* The definition of GL_LINK_WARNING is copied here. */ + + +#ifdef __cplusplus +extern "C" { +#endif + + +/* Return the first occurrence of NEEDLE in HAYSTACK. */ +#if @GNULIB_MEMMEM@ +# if @REPLACE_MEMMEM@ +# define memmem rpl_memmem +# endif +# if ! @HAVE_DECL_MEMMEM@ || @REPLACE_MEMMEM@ +extern void *memmem (void const *__haystack, size_t __haystack_len, + void const *__needle, size_t __needle_len) + __attribute__ ((__pure__)); +# endif +#elif defined GNULIB_POSIXCHECK +# undef memmem +# define memmem(a,al,b,bl) \ + (GL_LINK_WARNING ("memmem is unportable and often quadratic - " \ + "use gnulib module memmem-simple for portability, " \ + "and module memmem for speed" ), \ + memmem (a, al, b, bl)) +#endif + +/* Copy N bytes of SRC to DEST, return pointer to bytes after the + last written byte. */ +#if @GNULIB_MEMPCPY@ +# if ! @HAVE_MEMPCPY@ +extern void *mempcpy (void *restrict __dest, void const *restrict __src, + size_t __n); +# endif +#elif defined GNULIB_POSIXCHECK +# undef mempcpy +# define mempcpy(a,b,n) \ + (GL_LINK_WARNING ("mempcpy is unportable - " \ + "use gnulib module mempcpy for portability"), \ + mempcpy (a, b, n)) +#endif + +/* Search backwards through a block for a byte (specified as an int). */ +#if @GNULIB_MEMRCHR@ +# if ! @HAVE_DECL_MEMRCHR@ +extern void *memrchr (void const *, int, size_t) + __attribute__ ((__pure__)); +# endif +#elif defined GNULIB_POSIXCHECK +# undef memrchr +# define memrchr(a,b,c) \ + (GL_LINK_WARNING ("memrchr is unportable - " \ + "use gnulib module memrchr for portability"), \ + memrchr (a, b, c)) +#endif + +/* Find the first occurrence of C in S. More efficient than + memchr(S,C,N), at the expense of undefined behavior if C does not + occur within N bytes. */ +#if @GNULIB_RAWMEMCHR@ +# if ! @HAVE_RAWMEMCHR@ +extern void *rawmemchr (void const *__s, int __c_in) + __attribute__ ((__pure__)); +# endif +#elif defined GNULIB_POSIXCHECK +# undef rawmemchr +# define rawmemchr(a,b) \ + (GL_LINK_WARNING ("rawmemchr is unportable - " \ + "use gnulib module rawmemchr for portability"), \ + rawmemchr (a, b)) +#endif + +/* Copy SRC to DST, returning the address of the terminating '\0' in DST. */ +#if @GNULIB_STPCPY@ +# if ! @HAVE_STPCPY@ +extern char *stpcpy (char *restrict __dst, char const *restrict __src); +# endif +#elif defined GNULIB_POSIXCHECK +# undef stpcpy +# define stpcpy(a,b) \ + (GL_LINK_WARNING ("stpcpy is unportable - " \ + "use gnulib module stpcpy for portability"), \ + stpcpy (a, b)) +#endif + +/* Copy no more than N bytes of SRC to DST, returning a pointer past the + last non-NUL byte written into DST. */ +#if @GNULIB_STPNCPY@ +# if ! @HAVE_STPNCPY@ +# define stpncpy gnu_stpncpy +extern char *stpncpy (char *restrict __dst, char const *restrict __src, + size_t __n); +# endif +#elif defined GNULIB_POSIXCHECK +# undef stpncpy +# define stpncpy(a,b,n) \ + (GL_LINK_WARNING ("stpncpy is unportable - " \ + "use gnulib module stpncpy for portability"), \ + stpncpy (a, b, n)) +#endif + +#if defined GNULIB_POSIXCHECK +/* strchr() does not work with multibyte strings if the locale encoding is + GB18030 and the character to be searched is a digit. */ +# undef strchr +# define strchr(s,c) \ + (GL_LINK_WARNING ("strchr cannot work correctly on character strings " \ + "in some multibyte locales - " \ + "use mbschr if you care about internationalization"), \ + strchr (s, c)) +#endif + +/* Find the first occurrence of C in S or the final NUL byte. */ +#if @GNULIB_STRCHRNUL@ +# if ! @HAVE_STRCHRNUL@ +extern char *strchrnul (char const *__s, int __c_in) + __attribute__ ((__pure__)); +# endif +#elif defined GNULIB_POSIXCHECK +# undef strchrnul +# define strchrnul(a,b) \ + (GL_LINK_WARNING ("strchrnul is unportable - " \ + "use gnulib module strchrnul for portability"), \ + strchrnul (a, b)) +#endif + +/* Duplicate S, returning an identical malloc'd string. */ +#if @GNULIB_STRDUP@ +# if @REPLACE_STRDUP@ +# undef strdup +# define strdup rpl_strdup +# endif +# if !(@HAVE_DECL_STRDUP@ || defined strdup) || @REPLACE_STRDUP@ +extern char *strdup (char const *__s); +# endif +#elif defined GNULIB_POSIXCHECK +# undef strdup +# define strdup(a) \ + (GL_LINK_WARNING ("strdup is unportable - " \ + "use gnulib module strdup for portability"), \ + strdup (a)) +#endif + +/* Return a newly allocated copy of at most N bytes of STRING. */ +#if @GNULIB_STRNDUP@ +# if ! @HAVE_STRNDUP@ +# undef strndup +# define strndup rpl_strndup +# endif +# if ! @HAVE_STRNDUP@ || ! @HAVE_DECL_STRNDUP@ +extern char *strndup (char const *__string, size_t __n); +# endif +#elif defined GNULIB_POSIXCHECK +# undef strndup +# define strndup(a,n) \ + (GL_LINK_WARNING ("strndup is unportable - " \ + "use gnulib module strndup for portability"), \ + strndup (a, n)) +#endif + +/* Find the length (number of bytes) of STRING, but scan at most + MAXLEN bytes. If no '\0' terminator is found in that many bytes, + return MAXLEN. */ +#if @GNULIB_STRNLEN@ +# if ! @HAVE_DECL_STRNLEN@ +extern size_t strnlen (char const *__string, size_t __maxlen) + __attribute__ ((__pure__)); +# endif +#elif defined GNULIB_POSIXCHECK +# undef strnlen +# define strnlen(a,n) \ + (GL_LINK_WARNING ("strnlen is unportable - " \ + "use gnulib module strnlen for portability"), \ + strnlen (a, n)) +#endif + +#if defined GNULIB_POSIXCHECK +/* strcspn() assumes the second argument is a list of single-byte characters. + Even in this simple case, it does not work with multibyte strings if the + locale encoding is GB18030 and one of the characters to be searched is a + digit. */ +# undef strcspn +# define strcspn(s,a) \ + (GL_LINK_WARNING ("strcspn cannot work correctly on character strings " \ + "in multibyte locales - " \ + "use mbscspn if you care about internationalization"), \ + strcspn (s, a)) +#endif + +/* Find the first occurrence in S of any character in ACCEPT. */ +#if @GNULIB_STRPBRK@ +# if ! @HAVE_STRPBRK@ +extern char *strpbrk (char const *__s, char const *__accept) + __attribute__ ((__pure__)); +# endif +# if defined GNULIB_POSIXCHECK +/* strpbrk() assumes the second argument is a list of single-byte characters. + Even in this simple case, it does not work with multibyte strings if the + locale encoding is GB18030 and one of the characters to be searched is a + digit. */ +# undef strpbrk +# define strpbrk(s,a) \ + (GL_LINK_WARNING ("strpbrk cannot work correctly on character strings " \ + "in multibyte locales - " \ + "use mbspbrk if you care about internationalization"), \ + strpbrk (s, a)) +# endif +#elif defined GNULIB_POSIXCHECK +# undef strpbrk +# define strpbrk(s,a) \ + (GL_LINK_WARNING ("strpbrk is unportable - " \ + "use gnulib module strpbrk for portability"), \ + strpbrk (s, a)) +#endif + +#if defined GNULIB_POSIXCHECK +/* strspn() assumes the second argument is a list of single-byte characters. + Even in this simple case, it cannot work with multibyte strings. */ +# undef strspn +# define strspn(s,a) \ + (GL_LINK_WARNING ("strspn cannot work correctly on character strings " \ + "in multibyte locales - " \ + "use mbsspn if you care about internationalization"), \ + strspn (s, a)) +#endif + +#if defined GNULIB_POSIXCHECK +/* strrchr() does not work with multibyte strings if the locale encoding is + GB18030 and the character to be searched is a digit. */ +# undef strrchr +# define strrchr(s,c) \ + (GL_LINK_WARNING ("strrchr cannot work correctly on character strings " \ + "in some multibyte locales - " \ + "use mbsrchr if you care about internationalization"), \ + strrchr (s, c)) +#endif + +/* Search the next delimiter (char listed in DELIM) starting at *STRINGP. + If one is found, overwrite it with a NUL, and advance *STRINGP + to point to the next char after it. Otherwise, set *STRINGP to NULL. + If *STRINGP was already NULL, nothing happens. + Return the old value of *STRINGP. + + This is a variant of strtok() that is multithread-safe and supports + empty fields. + + Caveat: It modifies the original string. + Caveat: These functions cannot be used on constant strings. + Caveat: The identity of the delimiting character is lost. + Caveat: It doesn't work with multibyte strings unless all of the delimiter + characters are ASCII characters < 0x30. + + See also strtok_r(). */ +#if @GNULIB_STRSEP@ +# if ! @HAVE_STRSEP@ +extern char *strsep (char **restrict __stringp, char const *restrict __delim); +# endif +# if defined GNULIB_POSIXCHECK +# undef strsep +# define strsep(s,d) \ + (GL_LINK_WARNING ("strsep cannot work correctly on character strings " \ + "in multibyte locales - " \ + "use mbssep if you care about internationalization"), \ + strsep (s, d)) +# endif +#elif defined GNULIB_POSIXCHECK +# undef strsep +# define strsep(s,d) \ + (GL_LINK_WARNING ("strsep is unportable - " \ + "use gnulib module strsep for portability"), \ + strsep (s, d)) +#endif + +#if @GNULIB_STRSTR@ +# if @REPLACE_STRSTR@ +# define strstr rpl_strstr +char *strstr (const char *haystack, const char *needle) + __attribute__ ((__pure__)); +# endif +#elif defined GNULIB_POSIXCHECK +/* strstr() does not work with multibyte strings if the locale encoding is + different from UTF-8: + POSIX says that it operates on "strings", and "string" in POSIX is defined + as a sequence of bytes, not of characters. */ +# undef strstr +# define strstr(a,b) \ + (GL_LINK_WARNING ("strstr is quadratic on many systems, and cannot " \ + "work correctly on character strings in most " \ + "multibyte locales - " \ + "use mbsstr if you care about internationalization, " \ + "or use strstr if you care about speed"), \ + strstr (a, b)) +#endif + +/* Find the first occurrence of NEEDLE in HAYSTACK, using case-insensitive + comparison. */ +#if @GNULIB_STRCASESTR@ +# if @REPLACE_STRCASESTR@ +# define strcasestr rpl_strcasestr +# endif +# if ! @HAVE_STRCASESTR@ || @REPLACE_STRCASESTR@ +extern char *strcasestr (const char *haystack, const char *needle) + __attribute__ ((__pure__)); +# endif +#elif defined GNULIB_POSIXCHECK +/* strcasestr() does not work with multibyte strings: + It is a glibc extension, and glibc implements it only for unibyte + locales. */ +# undef strcasestr +# define strcasestr(a,b) \ + (GL_LINK_WARNING ("strcasestr does work correctly on character strings " \ + "in multibyte locales - " \ + "use mbscasestr if you care about " \ + "internationalization, or use c-strcasestr if you want " \ + "a locale independent function"), \ + strcasestr (a, b)) +#endif + +/* Parse S into tokens separated by characters in DELIM. + If S is NULL, the saved pointer in SAVE_PTR is used as + the next starting point. For example: + char s[] = "-abc-=-def"; + char *sp; + x = strtok_r(s, "-", &sp); // x = "abc", sp = "=-def" + x = strtok_r(NULL, "-=", &sp); // x = "def", sp = NULL + x = strtok_r(NULL, "=", &sp); // x = NULL + // s = "abc\0-def\0" + + This is a variant of strtok() that is multithread-safe. + + For the POSIX documentation for this function, see: + http://www.opengroup.org/susv3xsh/strtok.html + + Caveat: It modifies the original string. + Caveat: These functions cannot be used on constant strings. + Caveat: The identity of the delimiting character is lost. + Caveat: It doesn't work with multibyte strings unless all of the delimiter + characters are ASCII characters < 0x30. + + See also strsep(). */ +#if @GNULIB_STRTOK_R@ +# if ! @HAVE_DECL_STRTOK_R@ +extern char *strtok_r (char *restrict s, char const *restrict delim, + char **restrict save_ptr); +# endif +# if defined GNULIB_POSIXCHECK +# undef strtok_r +# define strtok_r(s,d,p) \ + (GL_LINK_WARNING ("strtok_r cannot work correctly on character strings " \ + "in multibyte locales - " \ + "use mbstok_r if you care about internationalization"), \ + strtok_r (s, d, p)) +# endif +#elif defined GNULIB_POSIXCHECK +# undef strtok_r +# define strtok_r(s,d,p) \ + (GL_LINK_WARNING ("strtok_r is unportable - " \ + "use gnulib module strtok_r for portability"), \ + strtok_r (s, d, p)) +#endif + + +/* The following functions are not specified by POSIX. They are gnulib + extensions. */ + +#if @GNULIB_MBSLEN@ +/* Return the number of multibyte characters in the character string STRING. + This considers multibyte characters, unlike strlen, which counts bytes. */ +extern size_t mbslen (const char *string); +#endif + +#if @GNULIB_MBSNLEN@ +/* Return the number of multibyte characters in the character string starting + at STRING and ending at STRING + LEN. */ +extern size_t mbsnlen (const char *string, size_t len); +#endif + +#if @GNULIB_MBSCHR@ +/* Locate the first single-byte character C in the character string STRING, + and return a pointer to it. Return NULL if C is not found in STRING. + Unlike strchr(), this function works correctly in multibyte locales with + encodings such as GB18030. */ +# define mbschr rpl_mbschr /* avoid collision with HP-UX function */ +extern char * mbschr (const char *string, int c); +#endif + +#if @GNULIB_MBSRCHR@ +/* Locate the last single-byte character C in the character string STRING, + and return a pointer to it. Return NULL if C is not found in STRING. + Unlike strrchr(), this function works correctly in multibyte locales with + encodings such as GB18030. */ +# define mbsrchr rpl_mbsrchr /* avoid collision with HP-UX function */ +extern char * mbsrchr (const char *string, int c); +#endif + +#if @GNULIB_MBSSTR@ +/* Find the first occurrence of the character string NEEDLE in the character + string HAYSTACK. Return NULL if NEEDLE is not found in HAYSTACK. + Unlike strstr(), this function works correctly in multibyte locales with + encodings different from UTF-8. */ +extern char * mbsstr (const char *haystack, const char *needle); +#endif + +#if @GNULIB_MBSCASECMP@ +/* Compare the character strings S1 and S2, ignoring case, returning less than, + equal to or greater than zero if S1 is lexicographically less than, equal to + or greater than S2. + Note: This function may, in multibyte locales, return 0 for strings of + different lengths! + Unlike strcasecmp(), this function works correctly in multibyte locales. */ +extern int mbscasecmp (const char *s1, const char *s2); +#endif + +#if @GNULIB_MBSNCASECMP@ +/* Compare the initial segment of the character string S1 consisting of at most + N characters with the initial segment of the character string S2 consisting + of at most N characters, ignoring case, returning less than, equal to or + greater than zero if the initial segment of S1 is lexicographically less + than, equal to or greater than the initial segment of S2. + Note: This function may, in multibyte locales, return 0 for initial segments + of different lengths! + Unlike strncasecmp(), this function works correctly in multibyte locales. + But beware that N is not a byte count but a character count! */ +extern int mbsncasecmp (const char *s1, const char *s2, size_t n); +#endif + +#if @GNULIB_MBSPCASECMP@ +/* Compare the initial segment of the character string STRING consisting of + at most mbslen (PREFIX) characters with the character string PREFIX, + ignoring case, returning less than, equal to or greater than zero if this + initial segment is lexicographically less than, equal to or greater than + PREFIX. + Note: This function may, in multibyte locales, return 0 if STRING is of + smaller length than PREFIX! + Unlike strncasecmp(), this function works correctly in multibyte + locales. */ +extern char * mbspcasecmp (const char *string, const char *prefix); +#endif + +#if @GNULIB_MBSCASESTR@ +/* Find the first occurrence of the character string NEEDLE in the character + string HAYSTACK, using case-insensitive comparison. + Note: This function may, in multibyte locales, return success even if + strlen (haystack) < strlen (needle) ! + Unlike strcasestr(), this function works correctly in multibyte locales. */ +extern char * mbscasestr (const char *haystack, const char *needle); +#endif + +#if @GNULIB_MBSCSPN@ +/* Find the first occurrence in the character string STRING of any character + in the character string ACCEPT. Return the number of bytes from the + beginning of the string to this occurrence, or to the end of the string + if none exists. + Unlike strcspn(), this function works correctly in multibyte locales. */ +extern size_t mbscspn (const char *string, const char *accept); +#endif + +#if @GNULIB_MBSPBRK@ +/* Find the first occurrence in the character string STRING of any character + in the character string ACCEPT. Return the pointer to it, or NULL if none + exists. + Unlike strpbrk(), this function works correctly in multibyte locales. */ +# define mbspbrk rpl_mbspbrk /* avoid collision with HP-UX function */ +extern char * mbspbrk (const char *string, const char *accept); +#endif + +#if @GNULIB_MBSSPN@ +/* Find the first occurrence in the character string STRING of any character + not in the character string REJECT. Return the number of bytes from the + beginning of the string to this occurrence, or to the end of the string + if none exists. + Unlike strspn(), this function works correctly in multibyte locales. */ +extern size_t mbsspn (const char *string, const char *reject); +#endif + +#if @GNULIB_MBSSEP@ +/* Search the next delimiter (multibyte character listed in the character + string DELIM) starting at the character string *STRINGP. + If one is found, overwrite it with a NUL, and advance *STRINGP to point + to the next multibyte character after it. Otherwise, set *STRINGP to NULL. + If *STRINGP was already NULL, nothing happens. + Return the old value of *STRINGP. + + This is a variant of mbstok_r() that supports empty fields. + + Caveat: It modifies the original string. + Caveat: These functions cannot be used on constant strings. + Caveat: The identity of the delimiting character is lost. + + See also mbstok_r(). */ +extern char * mbssep (char **stringp, const char *delim); +#endif + +#if @GNULIB_MBSTOK_R@ +/* Parse the character string STRING into tokens separated by characters in + the character string DELIM. + If STRING is NULL, the saved pointer in SAVE_PTR is used as + the next starting point. For example: + char s[] = "-abc-=-def"; + char *sp; + x = mbstok_r(s, "-", &sp); // x = "abc", sp = "=-def" + x = mbstok_r(NULL, "-=", &sp); // x = "def", sp = NULL + x = mbstok_r(NULL, "=", &sp); // x = NULL + // s = "abc\0-def\0" + + Caveat: It modifies the original string. + Caveat: These functions cannot be used on constant strings. + Caveat: The identity of the delimiting character is lost. + + See also mbssep(). */ +extern char * mbstok_r (char *string, const char *delim, char **save_ptr); +#endif + +/* Map any int, typically from errno, into an error message. */ +#if @GNULIB_STRERROR@ +# if @REPLACE_STRERROR@ +# undef strerror +# define strerror rpl_strerror +extern char *strerror (int); +# endif +#elif defined GNULIB_POSIXCHECK +# undef strerror +# define strerror(e) \ + (GL_LINK_WARNING ("strerror is unportable - " \ + "use gnulib module strerror to guarantee non-NULL result"), \ + strerror (e)) +#endif + +#if @GNULIB_STRSIGNAL@ +# if @REPLACE_STRSIGNAL@ +# define strsignal rpl_strsignal +# endif +# if ! @HAVE_DECL_STRSIGNAL@ || @REPLACE_STRSIGNAL@ +extern char *strsignal (int __sig); +# endif +#elif defined GNULIB_POSIXCHECK +# undef strsignal +# define strsignal(a) \ + (GL_LINK_WARNING ("strsignal is unportable - " \ + "use gnulib module strsignal for portability"), \ + strsignal (a)) +#endif + +#if @GNULIB_STRVERSCMP@ +# if !@HAVE_STRVERSCMP@ +extern int strverscmp (const char *, const char *); +# endif +#elif defined GNULIB_POSIXCHECK +# undef strverscmp +# define strverscmp(a, b) \ + (GL_LINK_WARNING ("strverscmp is unportable - " \ + "use gnulib module strverscmp for portability"), \ + strverscmp (a, b)) +#endif + + +#ifdef __cplusplus +} +#endif + +#endif /* _GL_STRING_H */ +#endif /* _GL_STRING_H */ diff --git a/lib/unistr.h b/lib/unistr.h new file mode 100644 index 000000000..83ff13411 --- /dev/null +++ b/lib/unistr.h @@ -0,0 +1,681 @@ +/* Elementary Unicode string functions. + Copyright (C) 2001-2002, 2005-2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +#ifndef _UNISTR_H +#define _UNISTR_H + +#include "unitypes.h" + +/* Get bool. */ +#include + +/* Get size_t. */ +#include + +#ifdef __cplusplus +extern "C" { +#endif + + +/* Conventions: + + All functions prefixed with u8_ operate on UTF-8 encoded strings. + Their unit is an uint8_t (1 byte). + + All functions prefixed with u16_ operate on UTF-16 encoded strings. + Their unit is an uint16_t (a 2-byte word). + + All functions prefixed with u32_ operate on UCS-4 encoded strings. + Their unit is an uint32_t (a 4-byte word). + + All argument pairs (s, n) denote a Unicode string s[0..n-1] with exactly + n units. + + All arguments starting with "str" and the arguments of functions starting + with u8_str/u16_str/u32_str denote a NUL terminated string, i.e. a string + which terminates at the first NUL unit. This termination unit is + considered part of the string for all memory allocation purposes, but + is not considered part of the string for all other logical purposes. + + Functions returning a string result take a (resultbuf, lengthp) argument + pair. If resultbuf is not NULL and the result fits into *lengthp units, + it is put in resultbuf, and resultbuf is returned. Otherwise, a freshly + allocated string is returned. In both cases, *lengthp is set to the + length (number of units) of the returned string. In case of error, + NULL is returned and errno is set. */ + + +/* Elementary string checks. */ + +/* Check whether an UTF-8 string is well-formed. + Return NULL if valid, or a pointer to the first invalid unit otherwise. */ +extern const uint8_t * + u8_check (const uint8_t *s, size_t n); + +/* Check whether an UTF-16 string is well-formed. + Return NULL if valid, or a pointer to the first invalid unit otherwise. */ +extern const uint16_t * + u16_check (const uint16_t *s, size_t n); + +/* Check whether an UCS-4 string is well-formed. + Return NULL if valid, or a pointer to the first invalid unit otherwise. */ +extern const uint32_t * + u32_check (const uint32_t *s, size_t n); + + +/* Elementary string conversions. */ + +/* Convert an UTF-8 string to an UTF-16 string. */ +extern uint16_t * + u8_to_u16 (const uint8_t *s, size_t n, uint16_t *resultbuf, + size_t *lengthp); + +/* Convert an UTF-8 string to an UCS-4 string. */ +extern uint32_t * + u8_to_u32 (const uint8_t *s, size_t n, uint32_t *resultbuf, + size_t *lengthp); + +/* Convert an UTF-16 string to an UTF-8 string. */ +extern uint8_t * + u16_to_u8 (const uint16_t *s, size_t n, uint8_t *resultbuf, + size_t *lengthp); + +/* Convert an UTF-16 string to an UCS-4 string. */ +extern uint32_t * + u16_to_u32 (const uint16_t *s, size_t n, uint32_t *resultbuf, + size_t *lengthp); + +/* Convert an UCS-4 string to an UTF-8 string. */ +extern uint8_t * + u32_to_u8 (const uint32_t *s, size_t n, uint8_t *resultbuf, + size_t *lengthp); + +/* Convert an UCS-4 string to an UTF-16 string. */ +extern uint16_t * + u32_to_u16 (const uint32_t *s, size_t n, uint16_t *resultbuf, + size_t *lengthp); + + +/* Elementary string functions. */ + +/* Return the length (number of units) of the first character in S, which is + no longer than N. Return 0 if it is the NUL character. Return -1 upon + failure. */ +/* Similar to mblen(), except that s must not be NULL. */ +extern int + u8_mblen (const uint8_t *s, size_t n); +extern int + u16_mblen (const uint16_t *s, size_t n); +extern int + u32_mblen (const uint32_t *s, size_t n); + +/* Return the length (number of units) of the first character in S, putting + its 'ucs4_t' representation in *PUC. Upon failure, *PUC is set to 0xfffd, + and an appropriate number of units is returned. + The number of available units, N, must be > 0. */ +/* Similar to mbtowc(), except that puc and s must not be NULL, n must be > 0, + and the NUL character is not treated specially. */ +/* The variants with _safe suffix are safe, even if the library is compiled + without --enable-safety. */ + +#ifdef GNULIB_UNISTR_U8_MBTOUC_UNSAFE +# if !HAVE_INLINE +extern int + u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n); +# else +extern int + u8_mbtouc_unsafe_aux (ucs4_t *puc, const uint8_t *s, size_t n); +static inline int +u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n) +{ + uint8_t c = *s; + + if (c < 0x80) + { + *puc = c; + return 1; + } + else + return u8_mbtouc_unsafe_aux (puc, s, n); +} +# endif +#endif + +#ifdef GNULIB_UNISTR_U16_MBTOUC_UNSAFE +# if !HAVE_INLINE +extern int + u16_mbtouc_unsafe (ucs4_t *puc, const uint16_t *s, size_t n); +# else +extern int + u16_mbtouc_unsafe_aux (ucs4_t *puc, const uint16_t *s, size_t n); +static inline int +u16_mbtouc_unsafe (ucs4_t *puc, const uint16_t *s, size_t n) +{ + uint16_t c = *s; + + if (c < 0xd800 || c >= 0xe000) + { + *puc = c; + return 1; + } + else + return u16_mbtouc_unsafe_aux (puc, s, n); +} +# endif +#endif + +#ifdef GNULIB_UNISTR_U32_MBTOUC_UNSAFE +# if !HAVE_INLINE +extern int + u32_mbtouc_unsafe (ucs4_t *puc, const uint32_t *s, size_t n); +# else +static inline int +u32_mbtouc_unsafe (ucs4_t *puc, const uint32_t *s, size_t n _UNUSED_PARAMETER_) +{ + uint32_t c = *s; + +# if CONFIG_UNICODE_SAFETY + if (c < 0xd800 || (c >= 0xe000 && c < 0x110000)) +# endif + *puc = c; +# if CONFIG_UNICODE_SAFETY + else + /* invalid multibyte character */ + *puc = 0xfffd; +# endif + return 1; +} +# endif +#endif + +#ifdef GNULIB_UNISTR_U8_MBTOUC +# if !HAVE_INLINE +extern int + u8_mbtouc (ucs4_t *puc, const uint8_t *s, size_t n); +# else +extern int + u8_mbtouc_aux (ucs4_t *puc, const uint8_t *s, size_t n); +static inline int +u8_mbtouc (ucs4_t *puc, const uint8_t *s, size_t n) +{ + uint8_t c = *s; + + if (c < 0x80) + { + *puc = c; + return 1; + } + else + return u8_mbtouc_aux (puc, s, n); +} +# endif +#endif + +#ifdef GNULIB_UNISTR_U16_MBTOUC +# if !HAVE_INLINE +extern int + u16_mbtouc (ucs4_t *puc, const uint16_t *s, size_t n); +# else +extern int + u16_mbtouc_aux (ucs4_t *puc, const uint16_t *s, size_t n); +static inline int +u16_mbtouc (ucs4_t *puc, const uint16_t *s, size_t n) +{ + uint16_t c = *s; + + if (c < 0xd800 || c >= 0xe000) + { + *puc = c; + return 1; + } + else + return u16_mbtouc_aux (puc, s, n); +} +# endif +#endif + +#ifdef GNULIB_UNISTR_U32_MBTOUC +# if !HAVE_INLINE +extern int + u32_mbtouc (ucs4_t *puc, const uint32_t *s, size_t n); +# else +static inline int +u32_mbtouc (ucs4_t *puc, const uint32_t *s, size_t n _UNUSED_PARAMETER_) +{ + uint32_t c = *s; + + if (c < 0xd800 || (c >= 0xe000 && c < 0x110000)) + *puc = c; + else + /* invalid multibyte character */ + *puc = 0xfffd; + return 1; +} +# endif +#endif + +/* Return the length (number of units) of the first character in S, putting + its 'ucs4_t' representation in *PUC. Upon failure, *PUC is set to 0xfffd, + and -1 is returned for an invalid sequence of units, -2 is returned for an + incomplete sequence of units. + The number of available units, N, must be > 0. */ +/* Similar to u*_mbtouc(), except that the return value gives more details + about the failure, similar to mbrtowc(). */ + +#ifdef GNULIB_UNISTR_U8_MBTOUCR +extern int + u8_mbtoucr (ucs4_t *puc, const uint8_t *s, size_t n); +#endif + +#ifdef GNULIB_UNISTR_U16_MBTOUCR +extern int + u16_mbtoucr (ucs4_t *puc, const uint16_t *s, size_t n); +#endif + +#ifdef GNULIB_UNISTR_U32_MBTOUCR +extern int + u32_mbtoucr (ucs4_t *puc, const uint32_t *s, size_t n); +#endif + +/* Put the multibyte character represented by UC in S, returning its + length. Return -1 upon failure, -2 if the number of available units, N, + is too small. The latter case cannot occur if N >= 6/2/1, respectively. */ +/* Similar to wctomb(), except that s must not be NULL, and the argument n + must be specified. */ + +#ifdef GNULIB_UNISTR_U8_UCTOMB +/* Auxiliary function, also used by u8_chr, u8_strchr, u8_strrchr. */ +extern int + u8_uctomb_aux (uint8_t *s, ucs4_t uc, int n); +# if !HAVE_INLINE +extern int + u8_uctomb (uint8_t *s, ucs4_t uc, int n); +# else +static inline int +u8_uctomb (uint8_t *s, ucs4_t uc, int n) +{ + if (uc < 0x80 && n > 0) + { + s[0] = uc; + return 1; + } + else + return u8_uctomb_aux (s, uc, n); +} +# endif +#endif + +#ifdef GNULIB_UNISTR_U16_UCTOMB +/* Auxiliary function, also used by u16_chr, u16_strchr, u16_strrchr. */ +extern int + u16_uctomb_aux (uint16_t *s, ucs4_t uc, int n); +# if !HAVE_INLINE +extern int + u16_uctomb (uint16_t *s, ucs4_t uc, int n); +# else +static inline int +u16_uctomb (uint16_t *s, ucs4_t uc, int n) +{ + if (uc < 0xd800 && n > 0) + { + s[0] = uc; + return 1; + } + else + return u16_uctomb_aux (s, uc, n); +} +# endif +#endif + +#ifdef GNULIB_UNISTR_U32_UCTOMB +# if !HAVE_INLINE +extern int + u32_uctomb (uint32_t *s, ucs4_t uc, int n); +# else +static inline int +u32_uctomb (uint32_t *s, ucs4_t uc, int n) +{ + if (uc < 0xd800 || (uc >= 0xe000 && uc < 0x110000)) + { + if (n > 0) + { + *s = uc; + return 1; + } + else + return -2; + } + else + return -1; +} +# endif +#endif + +/* Copy N units from SRC to DEST. */ +/* Similar to memcpy(). */ +extern uint8_t * + u8_cpy (uint8_t *dest, const uint8_t *src, size_t n); +extern uint16_t * + u16_cpy (uint16_t *dest, const uint16_t *src, size_t n); +extern uint32_t * + u32_cpy (uint32_t *dest, const uint32_t *src, size_t n); + +/* Copy N units from SRC to DEST, guaranteeing correct behavior for + overlapping memory areas. */ +/* Similar to memmove(). */ +extern uint8_t * + u8_move (uint8_t *dest, const uint8_t *src, size_t n); +extern uint16_t * + u16_move (uint16_t *dest, const uint16_t *src, size_t n); +extern uint32_t * + u32_move (uint32_t *dest, const uint32_t *src, size_t n); + +/* Set the first N characters of S to UC. UC should be a character that + occupies only 1 unit. */ +/* Similar to memset(). */ +extern uint8_t * + u8_set (uint8_t *s, ucs4_t uc, size_t n); +extern uint16_t * + u16_set (uint16_t *s, ucs4_t uc, size_t n); +extern uint32_t * + u32_set (uint32_t *s, ucs4_t uc, size_t n); + +/* Compare S1 and S2, each of length N. */ +/* Similar to memcmp(). */ +extern int + u8_cmp (const uint8_t *s1, const uint8_t *s2, size_t n); +extern int + u16_cmp (const uint16_t *s1, const uint16_t *s2, size_t n); +extern int + u32_cmp (const uint32_t *s1, const uint32_t *s2, size_t n); + +/* Compare S1 and S2. */ +/* Similar to the gnulib function memcmp2(). */ +extern int + u8_cmp2 (const uint8_t *s1, size_t n1, const uint8_t *s2, size_t n2); +extern int + u16_cmp2 (const uint16_t *s1, size_t n1, const uint16_t *s2, size_t n2); +extern int + u32_cmp2 (const uint32_t *s1, size_t n1, const uint32_t *s2, size_t n2); + +/* Search the string at S for UC. */ +/* Similar to memchr(). */ +extern uint8_t * + u8_chr (const uint8_t *s, size_t n, ucs4_t uc); +extern uint16_t * + u16_chr (const uint16_t *s, size_t n, ucs4_t uc); +extern uint32_t * + u32_chr (const uint32_t *s, size_t n, ucs4_t uc); + +/* Count the number of Unicode characters in the N units from S. */ +/* Similar to mbsnlen(). */ +extern size_t + u8_mbsnlen (const uint8_t *s, size_t n); +extern size_t + u16_mbsnlen (const uint16_t *s, size_t n); +extern size_t + u32_mbsnlen (const uint32_t *s, size_t n); + +/* Elementary string functions with memory allocation. */ + +/* Make a freshly allocated copy of S, of length N. */ +extern uint8_t * + u8_cpy_alloc (const uint8_t *s, size_t n); +extern uint16_t * + u16_cpy_alloc (const uint16_t *s, size_t n); +extern uint32_t * + u32_cpy_alloc (const uint32_t *s, size_t n); + +/* Elementary string functions on NUL terminated strings. */ + +/* Return the length (number of units) of the first character in S. + Return 0 if it is the NUL character. Return -1 upon failure. */ +extern int + u8_strmblen (const uint8_t *s); +extern int + u16_strmblen (const uint16_t *s); +extern int + u32_strmblen (const uint32_t *s); + +/* Return the length (number of units) of the first character in S, putting + its 'ucs4_t' representation in *PUC. Return 0 if it is the NUL + character. Return -1 upon failure. */ +extern int + u8_strmbtouc (ucs4_t *puc, const uint8_t *s); +extern int + u16_strmbtouc (ucs4_t *puc, const uint16_t *s); +extern int + u32_strmbtouc (ucs4_t *puc, const uint32_t *s); + +/* Forward iteration step. Advances the pointer past the next character, + or returns NULL if the end of the string has been reached. Puts the + character's 'ucs4_t' representation in *PUC. */ +extern const uint8_t * + u8_next (ucs4_t *puc, const uint8_t *s); +extern const uint16_t * + u16_next (ucs4_t *puc, const uint16_t *s); +extern const uint32_t * + u32_next (ucs4_t *puc, const uint32_t *s); + +/* Backward iteration step. Advances the pointer to point to the previous + character, or returns NULL if the beginning of the string had been reached. + Puts the character's 'ucs4_t' representation in *PUC. */ +extern const uint8_t * + u8_prev (ucs4_t *puc, const uint8_t *s, const uint8_t *start); +extern const uint16_t * + u16_prev (ucs4_t *puc, const uint16_t *s, const uint16_t *start); +extern const uint32_t * + u32_prev (ucs4_t *puc, const uint32_t *s, const uint32_t *start); + +/* Return the number of units in S. */ +/* Similar to strlen(), wcslen(). */ +extern size_t + u8_strlen (const uint8_t *s); +extern size_t + u16_strlen (const uint16_t *s); +extern size_t + u32_strlen (const uint32_t *s); + +/* Return the number of units in S, but at most MAXLEN. */ +/* Similar to strnlen(), wcsnlen(). */ +extern size_t + u8_strnlen (const uint8_t *s, size_t maxlen); +extern size_t + u16_strnlen (const uint16_t *s, size_t maxlen); +extern size_t + u32_strnlen (const uint32_t *s, size_t maxlen); + +/* Copy SRC to DEST. */ +/* Similar to strcpy(), wcscpy(). */ +extern uint8_t * + u8_strcpy (uint8_t *dest, const uint8_t *src); +extern uint16_t * + u16_strcpy (uint16_t *dest, const uint16_t *src); +extern uint32_t * + u32_strcpy (uint32_t *dest, const uint32_t *src); + +/* Copy SRC to DEST, returning the address of the terminating NUL in DEST. */ +/* Similar to stpcpy(). */ +extern uint8_t * + u8_stpcpy (uint8_t *dest, const uint8_t *src); +extern uint16_t * + u16_stpcpy (uint16_t *dest, const uint16_t *src); +extern uint32_t * + u32_stpcpy (uint32_t *dest, const uint32_t *src); + +/* Copy no more than N units of SRC to DEST. */ +/* Similar to strncpy(), wcsncpy(). */ +extern uint8_t * + u8_strncpy (uint8_t *dest, const uint8_t *src, size_t n); +extern uint16_t * + u16_strncpy (uint16_t *dest, const uint16_t *src, size_t n); +extern uint32_t * + u32_strncpy (uint32_t *dest, const uint32_t *src, size_t n); + +/* Copy no more than N units of SRC to DEST, returning the address of + the last unit written into DEST. */ +/* Similar to stpncpy(). */ +extern uint8_t * + u8_stpncpy (uint8_t *dest, const uint8_t *src, size_t n); +extern uint16_t * + u16_stpncpy (uint16_t *dest, const uint16_t *src, size_t n); +extern uint32_t * + u32_stpncpy (uint32_t *dest, const uint32_t *src, size_t n); + +/* Append SRC onto DEST. */ +/* Similar to strcat(), wcscat(). */ +extern uint8_t * + u8_strcat (uint8_t *dest, const uint8_t *src); +extern uint16_t * + u16_strcat (uint16_t *dest, const uint16_t *src); +extern uint32_t * + u32_strcat (uint32_t *dest, const uint32_t *src); + +/* Append no more than N units of SRC onto DEST. */ +/* Similar to strncat(), wcsncat(). */ +extern uint8_t * + u8_strncat (uint8_t *dest, const uint8_t *src, size_t n); +extern uint16_t * + u16_strncat (uint16_t *dest, const uint16_t *src, size_t n); +extern uint32_t * + u32_strncat (uint32_t *dest, const uint32_t *src, size_t n); + +/* Compare S1 and S2. */ +/* Similar to strcmp(), wcscmp(). */ +extern int + u8_strcmp (const uint8_t *s1, const uint8_t *s2); +extern int + u16_strcmp (const uint16_t *s1, const uint16_t *s2); +extern int + u32_strcmp (const uint32_t *s1, const uint32_t *s2); + +/* Compare S1 and S2 using the collation rules of the current locale. + Return -1 if S1 < S2, 0 if S1 = S2, 1 if S1 > S2. + Upon failure, set errno and return any value. */ +/* Similar to strcoll(), wcscoll(). */ +extern int + u8_strcoll (const uint8_t *s1, const uint8_t *s2); +extern int + u16_strcoll (const uint16_t *s1, const uint16_t *s2); +extern int + u32_strcoll (const uint32_t *s1, const uint32_t *s2); + +/* Compare no more than N units of S1 and S2. */ +/* Similar to strncmp(), wcsncmp(). */ +extern int + u8_strncmp (const uint8_t *s1, const uint8_t *s2, size_t n); +extern int + u16_strncmp (const uint16_t *s1, const uint16_t *s2, size_t n); +extern int + u32_strncmp (const uint32_t *s1, const uint32_t *s2, size_t n); + +/* Duplicate S, returning an identical malloc'd string. */ +/* Similar to strdup(), wcsdup(). */ +extern uint8_t * + u8_strdup (const uint8_t *s); +extern uint16_t * + u16_strdup (const uint16_t *s); +extern uint32_t * + u32_strdup (const uint32_t *s); + +/* Find the first occurrence of UC in STR. */ +/* Similar to strchr(), wcschr(). */ +extern uint8_t * + u8_strchr (const uint8_t *str, ucs4_t uc); +extern uint16_t * + u16_strchr (const uint16_t *str, ucs4_t uc); +extern uint32_t * + u32_strchr (const uint32_t *str, ucs4_t uc); + +/* Find the last occurrence of UC in STR. */ +/* Similar to strrchr(), wcsrchr(). */ +extern uint8_t * + u8_strrchr (const uint8_t *str, ucs4_t uc); +extern uint16_t * + u16_strrchr (const uint16_t *str, ucs4_t uc); +extern uint32_t * + u32_strrchr (const uint32_t *str, ucs4_t uc); + +/* Return the length of the initial segment of STR which consists entirely + of Unicode characters not in REJECT. */ +/* Similar to strcspn(), wcscspn(). */ +extern size_t + u8_strcspn (const uint8_t *str, const uint8_t *reject); +extern size_t + u16_strcspn (const uint16_t *str, const uint16_t *reject); +extern size_t + u32_strcspn (const uint32_t *str, const uint32_t *reject); + +/* Return the length of the initial segment of STR which consists entirely + of Unicode characters in ACCEPT. */ +/* Similar to strspn(), wcsspn(). */ +extern size_t + u8_strspn (const uint8_t *str, const uint8_t *accept); +extern size_t + u16_strspn (const uint16_t *str, const uint16_t *accept); +extern size_t + u32_strspn (const uint32_t *str, const uint32_t *accept); + +/* Find the first occurrence in STR of any character in ACCEPT. */ +/* Similar to strpbrk(), wcspbrk(). */ +extern uint8_t * + u8_strpbrk (const uint8_t *str, const uint8_t *accept); +extern uint16_t * + u16_strpbrk (const uint16_t *str, const uint16_t *accept); +extern uint32_t * + u32_strpbrk (const uint32_t *str, const uint32_t *accept); + +/* Find the first occurrence of NEEDLE in HAYSTACK. */ +/* Similar to strstr(), wcsstr(). */ +extern uint8_t * + u8_strstr (const uint8_t *haystack, const uint8_t *needle); +extern uint16_t * + u16_strstr (const uint16_t *haystack, const uint16_t *needle); +extern uint32_t * + u32_strstr (const uint32_t *haystack, const uint32_t *needle); + +/* Test whether STR starts with PREFIX. */ +extern bool + u8_startswith (const uint8_t *str, const uint8_t *prefix); +extern bool + u16_startswith (const uint16_t *str, const uint16_t *prefix); +extern bool + u32_startswith (const uint32_t *str, const uint32_t *prefix); + +/* Test whether STR ends with SUFFIX. */ +extern bool + u8_endswith (const uint8_t *str, const uint8_t *suffix); +extern bool + u16_endswith (const uint16_t *str, const uint16_t *suffix); +extern bool + u32_endswith (const uint32_t *str, const uint32_t *suffix); + +/* Divide STR into tokens separated by characters in DELIM. + This interface is actually more similar to wcstok than to strtok. */ +/* Similar to strtok_r(), wcstok(). */ +extern uint8_t * + u8_strtok (uint8_t *str, const uint8_t *delim, uint8_t **ptr); +extern uint16_t * + u16_strtok (uint16_t *str, const uint16_t *delim, uint16_t **ptr); +extern uint32_t * + u32_strtok (uint32_t *str, const uint32_t *delim, uint32_t **ptr); + + +#ifdef __cplusplus +} +#endif + +#endif /* _UNISTR_H */ diff --git a/lib/unistr/u8-mbtouc-aux.c b/lib/unistr/u8-mbtouc-aux.c new file mode 100644 index 000000000..53d02bf0d --- /dev/null +++ b/lib/unistr/u8-mbtouc-aux.c @@ -0,0 +1,158 @@ +/* Conversion UTF-8 to UCS-4. + Copyright (C) 2001-2002, 2006-2007, 2009 Free Software Foundation, Inc. + Written by Bruno Haible , 2001. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +#include + +/* Specification. */ +#include "unistr.h" + +#if defined IN_LIBUNISTRING || HAVE_INLINE + +int +u8_mbtouc_aux (ucs4_t *puc, const uint8_t *s, size_t n) +{ + uint8_t c = *s; + + if (c >= 0xc2) + { + if (c < 0xe0) + { + if (n >= 2) + { + if ((s[1] ^ 0x80) < 0x40) + { + *puc = ((unsigned int) (c & 0x1f) << 6) + | (unsigned int) (s[1] ^ 0x80); + return 2; + } + /* invalid multibyte character */ + } + else + { + /* incomplete multibyte character */ + *puc = 0xfffd; + return n; + } + } + else if (c < 0xf0) + { + if (n >= 3) + { + if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 + && (c >= 0xe1 || s[1] >= 0xa0) + && (c != 0xed || s[1] < 0xa0)) + { + *puc = ((unsigned int) (c & 0x0f) << 12) + | ((unsigned int) (s[1] ^ 0x80) << 6) + | (unsigned int) (s[2] ^ 0x80); + return 3; + } + /* invalid multibyte character */ + } + else + { + /* incomplete multibyte character */ + *puc = 0xfffd; + return n; + } + } + else if (c < 0xf8) + { + if (n >= 4) + { + if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 + && (s[3] ^ 0x80) < 0x40 + && (c >= 0xf1 || s[1] >= 0x90) +#if 1 + && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90)) +#endif + ) + { + *puc = ((unsigned int) (c & 0x07) << 18) + | ((unsigned int) (s[1] ^ 0x80) << 12) + | ((unsigned int) (s[2] ^ 0x80) << 6) + | (unsigned int) (s[3] ^ 0x80); + return 4; + } + /* invalid multibyte character */ + } + else + { + /* incomplete multibyte character */ + *puc = 0xfffd; + return n; + } + } +#if 0 + else if (c < 0xfc) + { + if (n >= 5) + { + if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 + && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40 + && (c >= 0xf9 || s[1] >= 0x88)) + { + *puc = ((unsigned int) (c & 0x03) << 24) + | ((unsigned int) (s[1] ^ 0x80) << 18) + | ((unsigned int) (s[2] ^ 0x80) << 12) + | ((unsigned int) (s[3] ^ 0x80) << 6) + | (unsigned int) (s[4] ^ 0x80); + return 5; + } + /* invalid multibyte character */ + } + else + { + /* incomplete multibyte character */ + *puc = 0xfffd; + return n; + } + } + else if (c < 0xfe) + { + if (n >= 6) + { + if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 + && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40 + && (s[5] ^ 0x80) < 0x40 + && (c >= 0xfd || s[1] >= 0x84)) + { + *puc = ((unsigned int) (c & 0x01) << 30) + | ((unsigned int) (s[1] ^ 0x80) << 24) + | ((unsigned int) (s[2] ^ 0x80) << 18) + | ((unsigned int) (s[3] ^ 0x80) << 12) + | ((unsigned int) (s[4] ^ 0x80) << 6) + | (unsigned int) (s[5] ^ 0x80); + return 6; + } + /* invalid multibyte character */ + } + else + { + /* incomplete multibyte character */ + *puc = 0xfffd; + return n; + } + } +#endif + } + /* invalid multibyte character */ + *puc = 0xfffd; + return 1; +} + +#endif diff --git a/lib/unistr/u8-mbtouc-unsafe-aux.c b/lib/unistr/u8-mbtouc-unsafe-aux.c new file mode 100644 index 000000000..43e4a360f --- /dev/null +++ b/lib/unistr/u8-mbtouc-unsafe-aux.c @@ -0,0 +1,168 @@ +/* Conversion UTF-8 to UCS-4. + Copyright (C) 2001-2002, 2006-2007, 2009 Free Software Foundation, Inc. + Written by Bruno Haible , 2001. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +#include + +/* Specification. */ +#include "unistr.h" + +#if defined IN_LIBUNISTRING || HAVE_INLINE + +int +u8_mbtouc_unsafe_aux (ucs4_t *puc, const uint8_t *s, size_t n) +{ + uint8_t c = *s; + + if (c >= 0xc2) + { + if (c < 0xe0) + { + if (n >= 2) + { +#if CONFIG_UNICODE_SAFETY + if ((s[1] ^ 0x80) < 0x40) +#endif + { + *puc = ((unsigned int) (c & 0x1f) << 6) + | (unsigned int) (s[1] ^ 0x80); + return 2; + } + /* invalid multibyte character */ + } + else + { + /* incomplete multibyte character */ + *puc = 0xfffd; + return n; + } + } + else if (c < 0xf0) + { + if (n >= 3) + { +#if CONFIG_UNICODE_SAFETY + if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 + && (c >= 0xe1 || s[1] >= 0xa0) + && (c != 0xed || s[1] < 0xa0)) +#endif + { + *puc = ((unsigned int) (c & 0x0f) << 12) + | ((unsigned int) (s[1] ^ 0x80) << 6) + | (unsigned int) (s[2] ^ 0x80); + return 3; + } + /* invalid multibyte character */ + } + else + { + /* incomplete multibyte character */ + *puc = 0xfffd; + return n; + } + } + else if (c < 0xf8) + { + if (n >= 4) + { +#if CONFIG_UNICODE_SAFETY + if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 + && (s[3] ^ 0x80) < 0x40 + && (c >= 0xf1 || s[1] >= 0x90) +#if 1 + && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90)) +#endif + ) +#endif + { + *puc = ((unsigned int) (c & 0x07) << 18) + | ((unsigned int) (s[1] ^ 0x80) << 12) + | ((unsigned int) (s[2] ^ 0x80) << 6) + | (unsigned int) (s[3] ^ 0x80); + return 4; + } + /* invalid multibyte character */ + } + else + { + /* incomplete multibyte character */ + *puc = 0xfffd; + return n; + } + } +#if 0 + else if (c < 0xfc) + { + if (n >= 5) + { +#if CONFIG_UNICODE_SAFETY + if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 + && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40 + && (c >= 0xf9 || s[1] >= 0x88)) +#endif + { + *puc = ((unsigned int) (c & 0x03) << 24) + | ((unsigned int) (s[1] ^ 0x80) << 18) + | ((unsigned int) (s[2] ^ 0x80) << 12) + | ((unsigned int) (s[3] ^ 0x80) << 6) + | (unsigned int) (s[4] ^ 0x80); + return 5; + } + /* invalid multibyte character */ + } + else + { + /* incomplete multibyte character */ + *puc = 0xfffd; + return n; + } + } + else if (c < 0xfe) + { + if (n >= 6) + { +#if CONFIG_UNICODE_SAFETY + if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 + && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40 + && (s[5] ^ 0x80) < 0x40 + && (c >= 0xfd || s[1] >= 0x84)) +#endif + { + *puc = ((unsigned int) (c & 0x01) << 30) + | ((unsigned int) (s[1] ^ 0x80) << 24) + | ((unsigned int) (s[2] ^ 0x80) << 18) + | ((unsigned int) (s[3] ^ 0x80) << 12) + | ((unsigned int) (s[4] ^ 0x80) << 6) + | (unsigned int) (s[5] ^ 0x80); + return 6; + } + /* invalid multibyte character */ + } + else + { + /* incomplete multibyte character */ + *puc = 0xfffd; + return n; + } + } +#endif + } + /* invalid multibyte character */ + *puc = 0xfffd; + return 1; +} + +#endif diff --git a/lib/unistr/u8-mbtouc-unsafe.c b/lib/unistr/u8-mbtouc-unsafe.c new file mode 100644 index 000000000..466156967 --- /dev/null +++ b/lib/unistr/u8-mbtouc-unsafe.c @@ -0,0 +1,179 @@ +/* Look at first character in UTF-8 string. + Copyright (C) 1999-2002, 2006-2007, 2009 Free Software Foundation, Inc. + Written by Bruno Haible , 2001. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +#include + +#if defined IN_LIBUNISTRING +/* Tell unistr.h to declare u8_mbtouc_unsafe as 'extern', not + 'static inline'. */ +# include "unistring-notinline.h" +#endif + +/* Specification. */ +#include "unistr.h" + +#if !HAVE_INLINE + +int +u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n) +{ + uint8_t c = *s; + + if (c < 0x80) + { + *puc = c; + return 1; + } + else if (c >= 0xc2) + { + if (c < 0xe0) + { + if (n >= 2) + { +#if CONFIG_UNICODE_SAFETY + if ((s[1] ^ 0x80) < 0x40) +#endif + { + *puc = ((unsigned int) (c & 0x1f) << 6) + | (unsigned int) (s[1] ^ 0x80); + return 2; + } + /* invalid multibyte character */ + } + else + { + /* incomplete multibyte character */ + *puc = 0xfffd; + return n; + } + } + else if (c < 0xf0) + { + if (n >= 3) + { +#if CONFIG_UNICODE_SAFETY + if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 + && (c >= 0xe1 || s[1] >= 0xa0) + && (c != 0xed || s[1] < 0xa0)) +#endif + { + *puc = ((unsigned int) (c & 0x0f) << 12) + | ((unsigned int) (s[1] ^ 0x80) << 6) + | (unsigned int) (s[2] ^ 0x80); + return 3; + } + /* invalid multibyte character */ + } + else + { + /* incomplete multibyte character */ + *puc = 0xfffd; + return n; + } + } + else if (c < 0xf8) + { + if (n >= 4) + { +#if CONFIG_UNICODE_SAFETY + if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 + && (s[3] ^ 0x80) < 0x40 + && (c >= 0xf1 || s[1] >= 0x90) +#if 1 + && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90)) +#endif + ) +#endif + { + *puc = ((unsigned int) (c & 0x07) << 18) + | ((unsigned int) (s[1] ^ 0x80) << 12) + | ((unsigned int) (s[2] ^ 0x80) << 6) + | (unsigned int) (s[3] ^ 0x80); + return 4; + } + /* invalid multibyte character */ + } + else + { + /* incomplete multibyte character */ + *puc = 0xfffd; + return n; + } + } +#if 0 + else if (c < 0xfc) + { + if (n >= 5) + { +#if CONFIG_UNICODE_SAFETY + if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 + && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40 + && (c >= 0xf9 || s[1] >= 0x88)) +#endif + { + *puc = ((unsigned int) (c & 0x03) << 24) + | ((unsigned int) (s[1] ^ 0x80) << 18) + | ((unsigned int) (s[2] ^ 0x80) << 12) + | ((unsigned int) (s[3] ^ 0x80) << 6) + | (unsigned int) (s[4] ^ 0x80); + return 5; + } + /* invalid multibyte character */ + } + else + { + /* incomplete multibyte character */ + *puc = 0xfffd; + return n; + } + } + else if (c < 0xfe) + { + if (n >= 6) + { +#if CONFIG_UNICODE_SAFETY + if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 + && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40 + && (s[5] ^ 0x80) < 0x40 + && (c >= 0xfd || s[1] >= 0x84)) +#endif + { + *puc = ((unsigned int) (c & 0x01) << 30) + | ((unsigned int) (s[1] ^ 0x80) << 24) + | ((unsigned int) (s[2] ^ 0x80) << 18) + | ((unsigned int) (s[3] ^ 0x80) << 12) + | ((unsigned int) (s[4] ^ 0x80) << 6) + | (unsigned int) (s[5] ^ 0x80); + return 6; + } + /* invalid multibyte character */ + } + else + { + /* incomplete multibyte character */ + *puc = 0xfffd; + return n; + } + } +#endif + } + /* invalid multibyte character */ + *puc = 0xfffd; + return 1; +} + +#endif diff --git a/lib/unistr/u8-mbtouc.c b/lib/unistr/u8-mbtouc.c new file mode 100644 index 000000000..ff624f17d --- /dev/null +++ b/lib/unistr/u8-mbtouc.c @@ -0,0 +1,168 @@ +/* Look at first character in UTF-8 string. + Copyright (C) 1999-2002, 2006-2007, 2009 Free Software Foundation, Inc. + Written by Bruno Haible , 2001. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +#include + +#if defined IN_LIBUNISTRING +/* Tell unistr.h to declare u8_mbtouc as 'extern', not 'static inline'. */ +# include "unistring-notinline.h" +#endif + +/* Specification. */ +#include "unistr.h" + +#if !HAVE_INLINE + +int +u8_mbtouc (ucs4_t *puc, const uint8_t *s, size_t n) +{ + uint8_t c = *s; + + if (c < 0x80) + { + *puc = c; + return 1; + } + else if (c >= 0xc2) + { + if (c < 0xe0) + { + if (n >= 2) + { + if ((s[1] ^ 0x80) < 0x40) + { + *puc = ((unsigned int) (c & 0x1f) << 6) + | (unsigned int) (s[1] ^ 0x80); + return 2; + } + /* invalid multibyte character */ + } + else + { + /* incomplete multibyte character */ + *puc = 0xfffd; + return n; + } + } + else if (c < 0xf0) + { + if (n >= 3) + { + if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 + && (c >= 0xe1 || s[1] >= 0xa0) + && (c != 0xed || s[1] < 0xa0)) + { + *puc = ((unsigned int) (c & 0x0f) << 12) + | ((unsigned int) (s[1] ^ 0x80) << 6) + | (unsigned int) (s[2] ^ 0x80); + return 3; + } + /* invalid multibyte character */ + } + else + { + /* incomplete multibyte character */ + *puc = 0xfffd; + return n; + } + } + else if (c < 0xf8) + { + if (n >= 4) + { + if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 + && (s[3] ^ 0x80) < 0x40 + && (c >= 0xf1 || s[1] >= 0x90) +#if 1 + && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90)) +#endif + ) + { + *puc = ((unsigned int) (c & 0x07) << 18) + | ((unsigned int) (s[1] ^ 0x80) << 12) + | ((unsigned int) (s[2] ^ 0x80) << 6) + | (unsigned int) (s[3] ^ 0x80); + return 4; + } + /* invalid multibyte character */ + } + else + { + /* incomplete multibyte character */ + *puc = 0xfffd; + return n; + } + } +#if 0 + else if (c < 0xfc) + { + if (n >= 5) + { + if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 + && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40 + && (c >= 0xf9 || s[1] >= 0x88)) + { + *puc = ((unsigned int) (c & 0x03) << 24) + | ((unsigned int) (s[1] ^ 0x80) << 18) + | ((unsigned int) (s[2] ^ 0x80) << 12) + | ((unsigned int) (s[3] ^ 0x80) << 6) + | (unsigned int) (s[4] ^ 0x80); + return 5; + } + /* invalid multibyte character */ + } + else + { + /* incomplete multibyte character */ + *puc = 0xfffd; + return n; + } + } + else if (c < 0xfe) + { + if (n >= 6) + { + if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 + && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40 + && (s[5] ^ 0x80) < 0x40 + && (c >= 0xfd || s[1] >= 0x84)) + { + *puc = ((unsigned int) (c & 0x01) << 30) + | ((unsigned int) (s[1] ^ 0x80) << 24) + | ((unsigned int) (s[2] ^ 0x80) << 18) + | ((unsigned int) (s[3] ^ 0x80) << 12) + | ((unsigned int) (s[4] ^ 0x80) << 6) + | (unsigned int) (s[5] ^ 0x80); + return 6; + } + /* invalid multibyte character */ + } + else + { + /* incomplete multibyte character */ + *puc = 0xfffd; + return n; + } + } +#endif + } + /* invalid multibyte character */ + *puc = 0xfffd; + return 1; +} + +#endif diff --git a/lib/unistr/u8-mbtoucr.c b/lib/unistr/u8-mbtoucr.c new file mode 100644 index 000000000..dd8335247 --- /dev/null +++ b/lib/unistr/u8-mbtoucr.c @@ -0,0 +1,285 @@ +/* Look at first character in UTF-8 string, returning an error code. + Copyright (C) 1999-2002, 2006-2007 Free Software Foundation, Inc. + Written by Bruno Haible , 2001. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +#include + +/* Specification. */ +#include "unistr.h" + +int +u8_mbtoucr (ucs4_t *puc, const uint8_t *s, size_t n) +{ + uint8_t c = *s; + + if (c < 0x80) + { + *puc = c; + return 1; + } + else if (c >= 0xc2) + { + if (c < 0xe0) + { + if (n >= 2) + { + if ((s[1] ^ 0x80) < 0x40) + { + *puc = ((unsigned int) (c & 0x1f) << 6) + | (unsigned int) (s[1] ^ 0x80); + return 2; + } + /* invalid multibyte character */ + } + else + { + /* incomplete multibyte character */ + *puc = 0xfffd; + return -2; + } + } + else if (c < 0xf0) + { + if (n >= 2) + { + if ((s[1] ^ 0x80) < 0x40 + && (c >= 0xe1 || s[1] >= 0xa0) + && (c != 0xed || s[1] < 0xa0)) + { + if (n >= 3) + { + if ((s[2] ^ 0x80) < 0x40) + { + *puc = ((unsigned int) (c & 0x0f) << 12) + | ((unsigned int) (s[1] ^ 0x80) << 6) + | (unsigned int) (s[2] ^ 0x80); + return 3; + } + /* invalid multibyte character */ + } + else + { + /* incomplete multibyte character */ + *puc = 0xfffd; + return -2; + } + } + /* invalid multibyte character */ + } + else + { + /* incomplete multibyte character */ + *puc = 0xfffd; + return -2; + } + } + else if (c < 0xf8) + { + if (n >= 2) + { + if ((s[1] ^ 0x80) < 0x40 + && (c >= 0xf1 || s[1] >= 0x90) +#if 1 + && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90)) +#endif + ) + { + if (n >= 3) + { + if ((s[2] ^ 0x80) < 0x40) + { + if (n >= 4) + { + if ((s[3] ^ 0x80) < 0x40) + { + *puc = ((unsigned int) (c & 0x07) << 18) + | ((unsigned int) (s[1] ^ 0x80) << 12) + | ((unsigned int) (s[2] ^ 0x80) << 6) + | (unsigned int) (s[3] ^ 0x80); + return 4; + } + /* invalid multibyte character */ + } + else + { + /* incomplete multibyte character */ + *puc = 0xfffd; + return -2; + } + } + /* invalid multibyte character */ + } + else + { + /* incomplete multibyte character */ + *puc = 0xfffd; + return -2; + } + } + /* invalid multibyte character */ + } + else + { + /* incomplete multibyte character */ + *puc = 0xfffd; + return -2; + } + } +#if 0 + else if (c < 0xfc) + { + if (n >= 2) + { + if ((s[1] ^ 0x80) < 0x40 + && (c >= 0xf9 || s[1] >= 0x88)) + { + if (n >= 3) + { + if ((s[2] ^ 0x80) < 0x40) + { + if (n >= 4) + { + if ((s[3] ^ 0x80) < 0x40) + { + if (n >= 5) + { + if ((s[4] ^ 0x80) < 0x40) + { + *puc = ((unsigned int) (c & 0x03) << 24) + | ((unsigned int) (s[1] ^ 0x80) << 18) + | ((unsigned int) (s[2] ^ 0x80) << 12) + | ((unsigned int) (s[3] ^ 0x80) << 6) + | (unsigned int) (s[4] ^ 0x80); + return 5; + } + /* invalid multibyte character */ + } + else + { + /* incomplete multibyte character */ + *puc = 0xfffd; + return -2; + } + } + /* invalid multibyte character */ + } + else + { + /* incomplete multibyte character */ + *puc = 0xfffd; + return -2; + } + } + /* invalid multibyte character */ + } + else + { + /* incomplete multibyte character */ + *puc = 0xfffd; + return -2; + } + } + /* invalid multibyte character */ + } + else + { + /* incomplete multibyte character */ + *puc = 0xfffd; + return -2; + } + } + else if (c < 0xfe) + { + if (n >= 2) + { + if ((s[1] ^ 0x80) < 0x40 + && (c >= 0xfd || s[1] >= 0x84)) + { + if (n >= 3) + { + if ((s[2] ^ 0x80) < 0x40) + { + if (n >= 4) + { + if ((s[3] ^ 0x80) < 0x40) + { + if (n >= 5) + { + if ((s[4] ^ 0x80) < 0x40) + { + if (n >= 6) + { + if ((s[5] ^ 0x80) < 0x40) + { + *puc = ((unsigned int) (c & 0x01) << 30) + | ((unsigned int) (s[1] ^ 0x80) << 24) + | ((unsigned int) (s[2] ^ 0x80) << 18) + | ((unsigned int) (s[3] ^ 0x80) << 12) + | ((unsigned int) (s[4] ^ 0x80) << 6) + | (unsigned int) (s[5] ^ 0x80); + return 6; + } + /* invalid multibyte character */ + } + else + { + /* incomplete multibyte character */ + *puc = 0xfffd; + return -2; + } + } + /* invalid multibyte character */ + } + else + { + /* incomplete multibyte character */ + *puc = 0xfffd; + return -2; + } + } + /* invalid multibyte character */ + } + else + { + /* incomplete multibyte character */ + *puc = 0xfffd; + return -2; + } + } + /* invalid multibyte character */ + } + else + { + /* incomplete multibyte character */ + *puc = 0xfffd; + return -2; + } + } + /* invalid multibyte character */ + } + else + { + /* incomplete multibyte character */ + *puc = 0xfffd; + return -2; + } + } +#endif + } + /* invalid multibyte character */ + *puc = 0xfffd; + return -1; +} diff --git a/lib/unistr/u8-prev.c b/lib/unistr/u8-prev.c new file mode 100644 index 000000000..245d22ff0 --- /dev/null +++ b/lib/unistr/u8-prev.c @@ -0,0 +1,93 @@ +/* Iterate over previous character in UTF-8 string. + Copyright (C) 2002, 2006-2007 Free Software Foundation, Inc. + Written by Bruno Haible , 2002. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +#include + +/* Specification. */ +#include "unistr.h" + +const uint8_t * +u8_prev (ucs4_t *puc, const uint8_t *s, const uint8_t *start) +{ + /* Keep in sync with unistr.h and utf8-ucs4.c. */ + if (s != start) + { + uint8_t c_1 = s[-1]; + + if (c_1 < 0x80) + { + *puc = c_1; + return s - 1; + } +#if CONFIG_UNICODE_SAFETY + if ((c_1 ^ 0x80) < 0x40) +#endif + if (s - 1 != start) + { + uint8_t c_2 = s[-2]; + + if (c_2 >= 0xc2 && c_2 < 0xe0) + { + *puc = ((unsigned int) (c_2 & 0x1f) << 6) + | (unsigned int) (c_1 ^ 0x80); + return s - 2; + } +#if CONFIG_UNICODE_SAFETY + if ((c_2 ^ 0x80) < 0x40) +#endif + if (s - 2 != start) + { + uint8_t c_3 = s[-3]; + + if (c_3 >= 0xe0 && c_3 < 0xf0 +#if CONFIG_UNICODE_SAFETY + && (c_3 >= 0xe1 || c_2 >= 0xa0) + && (c_3 != 0xed || c_2 < 0xa0) +#endif + ) + { + *puc = ((unsigned int) (c_3 & 0x0f) << 12) + | ((unsigned int) (c_2 ^ 0x80) << 6) + | (unsigned int) (c_1 ^ 0x80); + return s - 3; + } +#if CONFIG_UNICODE_SAFETY + if ((c_3 ^ 0x80) < 0x40) +#endif + if (s - 3 != start) + { + uint8_t c_4 = s[-4]; + + if (c_4 >= 0xf0 && c_4 < 0xf8 +#if CONFIG_UNICODE_SAFETY + && (c_4 >= 0xf1 || c_3 >= 0x90) + && (c_4 < 0xf4 || (c_4 == 0xf4 && c_3 < 0x90)) +#endif + ) + { + *puc = ((unsigned int) (c_4 & 0x07) << 18) + | ((unsigned int) (c_3 ^ 0x80) << 12) + | ((unsigned int) (c_2 ^ 0x80) << 6) + | (unsigned int) (c_1 ^ 0x80); + return s - 4; + } + } + } + } + } + return NULL; +} diff --git a/lib/unistr/u8-uctomb-aux.c b/lib/unistr/u8-uctomb-aux.c new file mode 100644 index 000000000..c42fa5015 --- /dev/null +++ b/lib/unistr/u8-uctomb-aux.c @@ -0,0 +1,69 @@ +/* Conversion UCS-4 to UTF-8. + Copyright (C) 2002, 2006-2007 Free Software Foundation, Inc. + Written by Bruno Haible , 2002. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +#include + +/* Specification. */ +#include "unistr.h" + +int +u8_uctomb_aux (uint8_t *s, ucs4_t uc, int n) +{ + int count; + + if (uc < 0x80) + /* The case n >= 1 is already handled by the caller. */ + return -2; + else if (uc < 0x800) + count = 2; + else if (uc < 0x10000) + { + if (uc < 0xd800 || uc >= 0xe000) + count = 3; + else + return -1; + } +#if 0 + else if (uc < 0x200000) + count = 4; + else if (uc < 0x4000000) + count = 5; + else if (uc <= 0x7fffffff) + count = 6; +#else + else if (uc < 0x110000) + count = 4; +#endif + else + return -1; + + if (n < count) + return -2; + + switch (count) /* note: code falls through cases! */ + { +#if 0 + case 6: s[5] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x4000000; + case 5: s[4] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x200000; +#endif + case 4: s[3] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x10000; + case 3: s[2] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x800; + case 2: s[1] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0xc0; + /*case 1:*/ s[0] = uc; + } + return count; +} diff --git a/lib/unistr/u8-uctomb.c b/lib/unistr/u8-uctomb.c new file mode 100644 index 000000000..33921669e --- /dev/null +++ b/lib/unistr/u8-uctomb.c @@ -0,0 +1,88 @@ +/* Store a character in UTF-8 string. + Copyright (C) 2002, 2005-2006, 2009 Free Software Foundation, Inc. + Written by Bruno Haible , 2002. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +#include + +#if defined IN_LIBUNISTRING +/* Tell unistr.h to declare u8_uctomb as 'extern', not 'static inline'. */ +# include "unistring-notinline.h" +#endif + +/* Specification. */ +#include "unistr.h" + +#if !HAVE_INLINE + +int +u8_uctomb (uint8_t *s, ucs4_t uc, int n) +{ + if (uc < 0x80) + { + if (n > 0) + { + s[0] = uc; + return 1; + } + /* else return -2, below. */ + } + else + { + int count; + + if (uc < 0x800) + count = 2; + else if (uc < 0x10000) + { + if (uc < 0xd800 || uc >= 0xe000) + count = 3; + else + return -1; + } +#if 0 + else if (uc < 0x200000) + count = 4; + else if (uc < 0x4000000) + count = 5; + else if (uc <= 0x7fffffff) + count = 6; +#else + else if (uc < 0x110000) + count = 4; +#endif + else + return -1; + + if (n >= count) + { + switch (count) /* note: code falls through cases! */ + { +#if 0 + case 6: s[5] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x4000000; + case 5: s[4] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x200000; +#endif + case 4: s[3] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x10000; + case 3: s[2] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x800; + case 2: s[1] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0xc0; + /*case 1:*/ s[0] = uc; + } + return count; + } + } + return -2; +} + +#endif diff --git a/lib/unitypes.h b/lib/unitypes.h new file mode 100644 index 000000000..fe8d87735 --- /dev/null +++ b/lib/unitypes.h @@ -0,0 +1,26 @@ +/* Elementary types for the GNU UniString library. + Copyright (C) 2002, 2005-2006 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +#ifndef _UNITYPES_H +#define _UNITYPES_H + +/* Get uint8_t, uint16_t, uint32_t. */ +#include + +/* Type representing a Unicode character. */ +typedef uint32_t ucs4_t; + +#endif /* _UNITYPES_H */ diff --git a/m4/byteswap.m4 b/m4/byteswap.m4 new file mode 100644 index 000000000..ad13f2286 --- /dev/null +++ b/m4/byteswap.m4 @@ -0,0 +1,18 @@ +# byteswap.m4 serial 3 +dnl Copyright (C) 2005, 2007, 2009 Free Software Foundation, Inc. +dnl This file is free software; the Free Software Foundation +dnl gives unlimited permission to copy and/or distribute it, +dnl with or without modifications, as long as this notice is preserved. + +dnl Written by Oskar Liljeblad. + +AC_DEFUN([gl_BYTESWAP], +[ + dnl Prerequisites of lib/byteswap.in.h. + AC_CHECK_HEADERS([byteswap.h], [ + BYTESWAP_H='' + ], [ + BYTESWAP_H='byteswap.h' + ]) + AC_SUBST([BYTESWAP_H]) +]) diff --git a/m4/gnulib-cache.m4 b/m4/gnulib-cache.m4 index c7cfb83dd..0fbe11969 100644 --- a/m4/gnulib-cache.m4 +++ b/m4/gnulib-cache.m4 @@ -15,13 +15,14 @@ # Specification in the form of a command-line invocation: -# gnulib-tool --import --dir=. --lib=libgnu --source-base=lib --m4-base=m4 --doc-base=doc --tests-base=tests --aux-dir=build-aux --lgpl --libtool --macro-prefix=gl --no-vc-files alloca-opt autobuild count-one-bits environ extensions flock fpieee full-read full-write lib-symbol-visibility putenv stdlib strcase strftime +# gnulib-tool --import --dir=. --lib=libgnu --source-base=lib --m4-base=m4 --doc-base=doc --tests-base=tests --aux-dir=build-aux --lgpl --libtool --macro-prefix=gl --no-vc-files alloca-opt autobuild byteswap count-one-bits environ extensions flock fpieee full-read full-write iconv_open-utf lib-symbol-visibility libunistring putenv stdlib strcase strftime striconveh string # Specification in the form of a few gnulib-tool.m4 macro invocations: gl_LOCAL_DIR([]) gl_MODULES([ alloca-opt autobuild + byteswap count-one-bits environ extensions @@ -29,11 +30,15 @@ gl_MODULES([ fpieee full-read full-write + iconv_open-utf lib-symbol-visibility + libunistring putenv stdlib strcase strftime + striconveh + string ]) gl_AVOID([]) gl_SOURCE_BASE([lib]) diff --git a/m4/gnulib-comp.m4 b/m4/gnulib-comp.m4 index b6d10a862..8f775107e 100644 --- a/m4/gnulib-comp.m4 +++ b/m4/gnulib-comp.m4 @@ -25,6 +25,7 @@ AC_DEFUN([gl_EARLY], m4_pattern_allow([^gl_LIBOBJS$])dnl a variable m4_pattern_allow([^gl_LTLIBOBJS$])dnl a variable AC_REQUIRE([AC_PROG_RANLIB]) + AC_REQUIRE([AM_PROG_CC_C_O]) AB_INIT AC_REQUIRE([gl_USE_SYSTEM_EXTENSIONS]) AC_REQUIRE([gl_FP_IEEE]) @@ -44,13 +45,19 @@ AC_DEFUN([gl_INIT], gl_COMMON gl_source_base='lib' gl_FUNC_ALLOCA + gl_BYTESWAP gl_COUNT_ONE_BITS gl_ENVIRON gl_UNISTD_MODULE_INDICATOR([environ]) gl_FUNC_FLOCK gl_HEADER_SYS_FILE_MODULE_INDICATOR([flock]) + AM_ICONV + gl_ICONV_H + gl_FUNC_ICONV_OPEN + gl_FUNC_ICONV_OPEN_UTF gl_INLINE gl_VISIBILITY + gl_LIBUNISTRING gl_LOCALCHARSET LOCALCHARSET_TESTS_ENVIRONMENT="CHARSETALIASDIR=\"\$(top_builddir)/$gl_source_base\"" AC_SUBST([LOCALCHARSET_TESTS_ENVIRONMENT]) @@ -73,12 +80,21 @@ AC_DEFUN([gl_INIT], gl_STDLIB_H gl_STRCASE gl_FUNC_GNU_STRFTIME + if test $gl_cond_libtool = false; then + gl_ltlibdeps="$gl_ltlibdeps $LTLIBICONV" + gl_libdeps="$gl_libdeps $LIBICONV" + fi + gl_HEADER_STRING_H gl_HEADER_STRINGS_H gl_HEADER_SYS_FILE_H AC_PROG_MKDIR_P gl_HEADER_TIME_H gl_TIME_R gl_UNISTD_H + gl_MODULE_INDICATOR([unistr/u8-mbtouc]) + gl_MODULE_INDICATOR([unistr/u8-mbtouc-unsafe]) + gl_MODULE_INDICATOR([unistr/u8-mbtoucr]) + gl_MODULE_INDICATOR([unistr/u8-uctomb]) gl_WCHAR_H gl_FUNC_WRITE gl_UNISTD_MODULE_INDICATOR([write]) @@ -210,8 +226,16 @@ AC_DEFUN([gltests_LIBSOURCES], [ # This macro records the list of files which have been installed by # gnulib-tool and may be removed by future gnulib-tool invocations. AC_DEFUN([gl_FILE_LIST], [ + build-aux/config.rpath build-aux/link-warning.h lib/alloca.in.h + lib/byteswap.in.h + lib/c-ctype.c + lib/c-ctype.h + lib/c-strcase.h + lib/c-strcasecmp.c + lib/c-strcaseeq.h + lib/c-strncasecmp.c lib/config.charset lib/count-one-bits.h lib/flock.c @@ -219,6 +243,15 @@ AC_DEFUN([gl_FILE_LIST], [ lib/full-read.h lib/full-write.c lib/full-write.h + lib/iconv.c + lib/iconv.in.h + lib/iconv_close.c + lib/iconv_open-aix.gperf + lib/iconv_open-hpux.gperf + lib/iconv_open-irix.gperf + lib/iconv_open-osf.gperf + lib/iconv_open.c + lib/iconveh.h lib/localcharset.c lib/localcharset.h lib/malloc.c @@ -239,18 +272,32 @@ AC_DEFUN([gl_FILE_LIST], [ lib/streq.h lib/strftime.c lib/strftime.h + lib/striconveh.c + lib/striconveh.h + lib/string.in.h lib/strings.in.h lib/strncasecmp.c lib/sys_file.in.h lib/time.in.h lib/time_r.c lib/unistd.in.h + lib/unistr.h + lib/unistr/u8-mbtouc-aux.c + lib/unistr/u8-mbtouc-unsafe-aux.c + lib/unistr/u8-mbtouc-unsafe.c + lib/unistr/u8-mbtouc.c + lib/unistr/u8-mbtoucr.c + lib/unistr/u8-prev.c + lib/unistr/u8-uctomb-aux.c + lib/unistr/u8-uctomb.c + lib/unitypes.h lib/verify.h lib/wchar.in.h lib/write.c m4/00gnulib.m4 m4/alloca.m4 m4/autobuild.m4 + m4/byteswap.m4 m4/codeset.m4 m4/count-one-bits.m4 m4/environ.m4 @@ -259,8 +306,15 @@ AC_DEFUN([gl_FILE_LIST], [ m4/fpieee.m4 m4/glibc21.m4 m4/gnulib-common.m4 + m4/iconv.m4 + m4/iconv_h.m4 + m4/iconv_open.m4 m4/include_next.m4 m4/inline.m4 + m4/lib-ld.m4 + m4/lib-link.m4 + m4/lib-prefix.m4 + m4/libunistring.m4 m4/localcharset.m4 m4/locale-fr.m4 m4/locale-ja.m4 @@ -281,6 +335,7 @@ AC_DEFUN([gl_FILE_LIST], [ m4/stdlib_h.m4 m4/strcase.m4 m4/strftime.m4 + m4/string_h.m4 m4/strings_h.m4 m4/sys_file_h.m4 m4/time_h.m4 diff --git a/m4/iconv.m4 b/m4/iconv.m4 new file mode 100644 index 000000000..3cc626829 --- /dev/null +++ b/m4/iconv.m4 @@ -0,0 +1,180 @@ +# iconv.m4 serial AM7 (gettext-0.18) +dnl Copyright (C) 2000-2002, 2007-2009 Free Software Foundation, Inc. +dnl This file is free software; the Free Software Foundation +dnl gives unlimited permission to copy and/or distribute it, +dnl with or without modifications, as long as this notice is preserved. + +dnl From Bruno Haible. + +AC_DEFUN([AM_ICONV_LINKFLAGS_BODY], +[ + dnl Prerequisites of AC_LIB_LINKFLAGS_BODY. + AC_REQUIRE([AC_LIB_PREPARE_PREFIX]) + AC_REQUIRE([AC_LIB_RPATH]) + + dnl Search for libiconv and define LIBICONV, LTLIBICONV and INCICONV + dnl accordingly. + AC_LIB_LINKFLAGS_BODY([iconv]) +]) + +AC_DEFUN([AM_ICONV_LINK], +[ + dnl Some systems have iconv in libc, some have it in libiconv (OSF/1 and + dnl those with the standalone portable GNU libiconv installed). + AC_REQUIRE([AC_CANONICAL_HOST]) dnl for cross-compiles + + dnl Search for libiconv and define LIBICONV, LTLIBICONV and INCICONV + dnl accordingly. + AC_REQUIRE([AM_ICONV_LINKFLAGS_BODY]) + + dnl Add $INCICONV to CPPFLAGS before performing the following checks, + dnl because if the user has installed libiconv and not disabled its use + dnl via --without-libiconv-prefix, he wants to use it. The first + dnl AC_TRY_LINK will then fail, the second AC_TRY_LINK will succeed. + am_save_CPPFLAGS="$CPPFLAGS" + AC_LIB_APPENDTOVAR([CPPFLAGS], [$INCICONV]) + + AC_CACHE_CHECK([for iconv], [am_cv_func_iconv], [ + am_cv_func_iconv="no, consider installing GNU libiconv" + am_cv_lib_iconv=no + AC_TRY_LINK([#include +#include ], + [iconv_t cd = iconv_open("",""); + iconv(cd,NULL,NULL,NULL,NULL); + iconv_close(cd);], + [am_cv_func_iconv=yes]) + if test "$am_cv_func_iconv" != yes; then + am_save_LIBS="$LIBS" + LIBS="$LIBS $LIBICONV" + AC_TRY_LINK([#include +#include ], + [iconv_t cd = iconv_open("",""); + iconv(cd,NULL,NULL,NULL,NULL); + iconv_close(cd);], + [am_cv_lib_iconv=yes] + [am_cv_func_iconv=yes]) + LIBS="$am_save_LIBS" + fi + ]) + if test "$am_cv_func_iconv" = yes; then + AC_CACHE_CHECK([for working iconv], [am_cv_func_iconv_works], [ + dnl This tests against bugs in AIX 5.1 and HP-UX 11.11. + am_save_LIBS="$LIBS" + if test $am_cv_lib_iconv = yes; then + LIBS="$LIBS $LIBICONV" + fi + AC_TRY_RUN([ +#include +#include +int main () +{ + /* Test against AIX 5.1 bug: Failures are not distinguishable from successful + returns. */ + { + iconv_t cd_utf8_to_88591 = iconv_open ("ISO8859-1", "UTF-8"); + if (cd_utf8_to_88591 != (iconv_t)(-1)) + { + static const char input[] = "\342\202\254"; /* EURO SIGN */ + char buf[10]; + const char *inptr = input; + size_t inbytesleft = strlen (input); + char *outptr = buf; + size_t outbytesleft = sizeof (buf); + size_t res = iconv (cd_utf8_to_88591, + (char **) &inptr, &inbytesleft, + &outptr, &outbytesleft); + if (res == 0) + return 1; + } + } +#if 0 /* This bug could be worked around by the caller. */ + /* Test against HP-UX 11.11 bug: Positive return value instead of 0. */ + { + iconv_t cd_88591_to_utf8 = iconv_open ("utf8", "iso88591"); + if (cd_88591_to_utf8 != (iconv_t)(-1)) + { + static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337"; + char buf[50]; + const char *inptr = input; + size_t inbytesleft = strlen (input); + char *outptr = buf; + size_t outbytesleft = sizeof (buf); + size_t res = iconv (cd_88591_to_utf8, + (char **) &inptr, &inbytesleft, + &outptr, &outbytesleft); + if ((int)res > 0) + return 1; + } + } +#endif + /* Test against HP-UX 11.11 bug: No converter from EUC-JP to UTF-8 is + provided. */ + if (/* Try standardized names. */ + iconv_open ("UTF-8", "EUC-JP") == (iconv_t)(-1) + /* Try IRIX, OSF/1 names. */ + && iconv_open ("UTF-8", "eucJP") == (iconv_t)(-1) + /* Try AIX names. */ + && iconv_open ("UTF-8", "IBM-eucJP") == (iconv_t)(-1) + /* Try HP-UX names. */ + && iconv_open ("utf8", "eucJP") == (iconv_t)(-1)) + return 1; + return 0; +}], [am_cv_func_iconv_works=yes], [am_cv_func_iconv_works=no], + [case "$host_os" in + aix* | hpux*) am_cv_func_iconv_works="guessing no" ;; + *) am_cv_func_iconv_works="guessing yes" ;; + esac]) + LIBS="$am_save_LIBS" + ]) + case "$am_cv_func_iconv_works" in + *no) am_func_iconv=no am_cv_lib_iconv=no ;; + *) am_func_iconv=yes ;; + esac + else + am_func_iconv=no am_cv_lib_iconv=no + fi + if test "$am_func_iconv" = yes; then + AC_DEFINE([HAVE_ICONV], [1], + [Define if you have the iconv() function and it works.]) + fi + if test "$am_cv_lib_iconv" = yes; then + AC_MSG_CHECKING([how to link with libiconv]) + AC_MSG_RESULT([$LIBICONV]) + else + dnl If $LIBICONV didn't lead to a usable library, we don't need $INCICONV + dnl either. + CPPFLAGS="$am_save_CPPFLAGS" + LIBICONV= + LTLIBICONV= + fi + AC_SUBST([LIBICONV]) + AC_SUBST([LTLIBICONV]) +]) + +AC_DEFUN([AM_ICONV], +[ + AM_ICONV_LINK + if test "$am_cv_func_iconv" = yes; then + AC_MSG_CHECKING([for iconv declaration]) + AC_CACHE_VAL([am_cv_proto_iconv], [ + AC_TRY_COMPILE([ +#include +#include +extern +#ifdef __cplusplus +"C" +#endif +#if defined(__STDC__) || defined(__cplusplus) +size_t iconv (iconv_t cd, char * *inbuf, size_t *inbytesleft, char * *outbuf, size_t *outbytesleft); +#else +size_t iconv(); +#endif +], [], [am_cv_proto_iconv_arg1=""], [am_cv_proto_iconv_arg1="const"]) + am_cv_proto_iconv="extern size_t iconv (iconv_t cd, $am_cv_proto_iconv_arg1 char * *inbuf, size_t *inbytesleft, char * *outbuf, size_t *outbytesleft);"]) + am_cv_proto_iconv=`echo "[$]am_cv_proto_iconv" | tr -s ' ' | sed -e 's/( /(/'` + AC_MSG_RESULT([${ac_t:- + }$am_cv_proto_iconv]) + AC_DEFINE_UNQUOTED([ICONV_CONST], [$am_cv_proto_iconv_arg1], + [Define as const if the declaration of iconv() needs const.]) + fi +]) diff --git a/m4/iconv_h.m4 b/m4/iconv_h.m4 new file mode 100644 index 000000000..bc05b0551 --- /dev/null +++ b/m4/iconv_h.m4 @@ -0,0 +1,34 @@ +# iconv_h.m4 serial 4 +dnl Copyright (C) 2007-2008 Free Software Foundation, Inc. +dnl This file is free software; the Free Software Foundation +dnl gives unlimited permission to copy and/or distribute it, +dnl with or without modifications, as long as this notice is preserved. + +AC_DEFUN([gl_ICONV_H], +[ + AC_REQUIRE([gl_ICONV_H_DEFAULTS]) + gl_CHECK_NEXT_HEADERS([iconv.h]) +]) + +dnl Unconditionally enables the replacement of . +AC_DEFUN([gl_REPLACE_ICONV_H], +[ + AC_REQUIRE([gl_ICONV_H_DEFAULTS]) + ICONV_H='iconv.h' +]) + +AC_DEFUN([gl_ICONV_MODULE_INDICATOR], +[ + dnl Use AC_REQUIRE here, so that the default settings are expanded once only. + AC_REQUIRE([gl_ICONV_H_DEFAULTS]) + GNULIB_[]m4_translit([$1],[abcdefghijklmnopqrstuvwxyz./-],[ABCDEFGHIJKLMNOPQRSTUVWXYZ___])=1 +]) + +AC_DEFUN([gl_ICONV_H_DEFAULTS], +[ + dnl Assume proper GNU behavior unless another module says otherwise. + REPLACE_ICONV=0; AC_SUBST([REPLACE_ICONV]) + REPLACE_ICONV_OPEN=0; AC_SUBST([REPLACE_ICONV_OPEN]) + REPLACE_ICONV_UTF=0; AC_SUBST([REPLACE_ICONV_UTF]) + ICONV_H=''; AC_SUBST([ICONV_H]) +]) diff --git a/m4/iconv_open.m4 b/m4/iconv_open.m4 new file mode 100644 index 000000000..c7b948e90 --- /dev/null +++ b/m4/iconv_open.m4 @@ -0,0 +1,237 @@ +# iconv_open.m4 serial 5 +dnl Copyright (C) 2007-2009 Free Software Foundation, Inc. +dnl This file is free software; the Free Software Foundation +dnl gives unlimited permission to copy and/or distribute it, +dnl with or without modifications, as long as this notice is preserved. + +AC_DEFUN([gl_FUNC_ICONV_OPEN], +[ + AC_REQUIRE([AM_ICONV]) + AC_REQUIRE([AC_CANONICAL_HOST]) + AC_REQUIRE([gl_ICONV_H_DEFAULTS]) + if test "$am_cv_func_iconv" = yes; then + dnl Test whether iconv_open accepts standardized encoding names. + dnl We know that GNU libiconv and GNU libc do. + AC_EGREP_CPP([gnu_iconv], [ + #include + #if defined _LIBICONV_VERSION || defined __GLIBC__ + gnu_iconv + #endif + ], [gl_func_iconv_gnu=yes], [gl_func_iconv_gnu=no]) + if test $gl_func_iconv_gnu = no; then + iconv_flavor= + case "$host_os" in + aix*) iconv_flavor=ICONV_FLAVOR_AIX ;; + irix*) iconv_flavor=ICONV_FLAVOR_IRIX ;; + hpux*) iconv_flavor=ICONV_FLAVOR_HPUX ;; + osf*) iconv_flavor=ICONV_FLAVOR_OSF ;; + esac + if test -n "$iconv_flavor"; then + AC_DEFINE_UNQUOTED([ICONV_FLAVOR], [$iconv_flavor], + [Define to a symbolic name denoting the flavor of iconv_open() + implementation.]) + gl_REPLACE_ICONV_OPEN + fi + fi + fi +]) + +AC_DEFUN([gl_REPLACE_ICONV_OPEN], +[ + gl_REPLACE_ICONV_H + REPLACE_ICONV_OPEN=1 + AC_LIBOBJ([iconv_open]) +]) + +AC_DEFUN([gl_FUNC_ICONV_OPEN_UTF], +[ + AC_REQUIRE([gl_FUNC_ICONV_OPEN]) + AC_REQUIRE([AC_CANONICAL_HOST]) dnl for cross-compiles + AC_REQUIRE([gl_ICONV_H_DEFAULTS]) + if test "$am_cv_func_iconv" = yes; then + if test -n "$am_cv_proto_iconv_arg1"; then + ICONV_CONST="const" + else + ICONV_CONST= + fi + AC_SUBST([ICONV_CONST]) + AC_CACHE_CHECK([whether iconv supports conversion between UTF-8 and UTF-{16,32}{BE,LE}], + [gl_cv_func_iconv_supports_utf], + [ + save_LIBS="$LIBS" + LIBS="$LIBS $LIBICONV" + AC_TRY_RUN([ +#include +#include +#include +#include +#include +#define ASSERT(expr) if (!(expr)) return 1; +int main () +{ + /* Test conversion from UTF-8 to UTF-16BE with no errors. */ + { + static const char input[] = + "Japanese (\346\227\245\346\234\254\350\252\236) [\360\235\224\215\360\235\224\236\360\235\224\255]"; + static const char expected[] = + "\000J\000a\000p\000a\000n\000e\000s\000e\000 \000(\145\345\147\054\212\236\000)\000 \000[\330\065\335\015\330\065\335\036\330\065\335\055\000]"; + iconv_t cd; + char buf[100]; + const char *inptr; + size_t inbytesleft; + char *outptr; + size_t outbytesleft; + size_t res; + cd = iconv_open ("UTF-16BE", "UTF-8"); + ASSERT (cd != (iconv_t)(-1)); + inptr = input; + inbytesleft = sizeof (input) - 1; + outptr = buf; + outbytesleft = sizeof (buf); + res = iconv (cd, + (ICONV_CONST char **) &inptr, &inbytesleft, + &outptr, &outbytesleft); + ASSERT (res == 0 && inbytesleft == 0); + ASSERT (outptr == buf + (sizeof (expected) - 1)); + ASSERT (memcmp (buf, expected, sizeof (expected) - 1) == 0); + ASSERT (iconv_close (cd) == 0); + } + /* Test conversion from UTF-8 to UTF-16LE with no errors. */ + { + static const char input[] = + "Japanese (\346\227\245\346\234\254\350\252\236) [\360\235\224\215\360\235\224\236\360\235\224\255]"; + static const char expected[] = + "J\000a\000p\000a\000n\000e\000s\000e\000 \000(\000\345\145\054\147\236\212)\000 \000[\000\065\330\015\335\065\330\036\335\065\330\055\335]\000"; + iconv_t cd; + char buf[100]; + const char *inptr; + size_t inbytesleft; + char *outptr; + size_t outbytesleft; + size_t res; + cd = iconv_open ("UTF-16LE", "UTF-8"); + ASSERT (cd != (iconv_t)(-1)); + inptr = input; + inbytesleft = sizeof (input) - 1; + outptr = buf; + outbytesleft = sizeof (buf); + res = iconv (cd, + (ICONV_CONST char **) &inptr, &inbytesleft, + &outptr, &outbytesleft); + ASSERT (res == 0 && inbytesleft == 0); + ASSERT (outptr == buf + (sizeof (expected) - 1)); + ASSERT (memcmp (buf, expected, sizeof (expected) - 1) == 0); + ASSERT (iconv_close (cd) == 0); + } + /* Test conversion from UTF-8 to UTF-32BE with no errors. */ + { + static const char input[] = + "Japanese (\346\227\245\346\234\254\350\252\236) [\360\235\224\215\360\235\224\236\360\235\224\255]"; + static const char expected[] = + "\000\000\000J\000\000\000a\000\000\000p\000\000\000a\000\000\000n\000\000\000e\000\000\000s\000\000\000e\000\000\000 \000\000\000(\000\000\145\345\000\000\147\054\000\000\212\236\000\000\000)\000\000\000 \000\000\000[\000\001\325\015\000\001\325\036\000\001\325\055\000\000\000]"; + iconv_t cd; + char buf[100]; + const char *inptr; + size_t inbytesleft; + char *outptr; + size_t outbytesleft; + size_t res; + cd = iconv_open ("UTF-32BE", "UTF-8"); + ASSERT (cd != (iconv_t)(-1)); + inptr = input; + inbytesleft = sizeof (input) - 1; + outptr = buf; + outbytesleft = sizeof (buf); + res = iconv (cd, + (ICONV_CONST char **) &inptr, &inbytesleft, + &outptr, &outbytesleft); + ASSERT (res == 0 && inbytesleft == 0); + ASSERT (outptr == buf + (sizeof (expected) - 1)); + ASSERT (memcmp (buf, expected, sizeof (expected) - 1) == 0); + ASSERT (iconv_close (cd) == 0); + } + /* Test conversion from UTF-8 to UTF-32LE with no errors. */ + { + static const char input[] = + "Japanese (\346\227\245\346\234\254\350\252\236) [\360\235\224\215\360\235\224\236\360\235\224\255]"; + static const char expected[] = + "J\000\000\000a\000\000\000p\000\000\000a\000\000\000n\000\000\000e\000\000\000s\000\000\000e\000\000\000 \000\000\000(\000\000\000\345\145\000\000\054\147\000\000\236\212\000\000)\000\000\000 \000\000\000[\000\000\000\015\325\001\000\036\325\001\000\055\325\001\000]\000\000\000"; + iconv_t cd; + char buf[100]; + const char *inptr; + size_t inbytesleft; + char *outptr; + size_t outbytesleft; + size_t res; + cd = iconv_open ("UTF-32LE", "UTF-8"); + ASSERT (cd != (iconv_t)(-1)); + inptr = input; + inbytesleft = sizeof (input) - 1; + outptr = buf; + outbytesleft = sizeof (buf); + res = iconv (cd, + (ICONV_CONST char **) &inptr, &inbytesleft, + &outptr, &outbytesleft); + ASSERT (res == 0 && inbytesleft == 0); + ASSERT (outptr == buf + (sizeof (expected) - 1)); + ASSERT (memcmp (buf, expected, sizeof (expected) - 1) == 0); + ASSERT (iconv_close (cd) == 0); + } + /* Test conversion from UTF-16BE to UTF-8 with no errors. + This test fails on NetBSD 3.0. */ + { + static const char input[] = + "\000J\000a\000p\000a\000n\000e\000s\000e\000 \000(\145\345\147\054\212\236\000)\000 \000[\330\065\335\015\330\065\335\036\330\065\335\055\000]"; + static const char expected[] = + "Japanese (\346\227\245\346\234\254\350\252\236) [\360\235\224\215\360\235\224\236\360\235\224\255]"; + iconv_t cd; + char buf[100]; + const char *inptr; + size_t inbytesleft; + char *outptr; + size_t outbytesleft; + size_t res; + cd = iconv_open ("UTF-8", "UTF-16BE"); + ASSERT (cd != (iconv_t)(-1)); + inptr = input; + inbytesleft = sizeof (input) - 1; + outptr = buf; + outbytesleft = sizeof (buf); + res = iconv (cd, + (ICONV_CONST char **) &inptr, &inbytesleft, + &outptr, &outbytesleft); + ASSERT (res == 0 && inbytesleft == 0); + ASSERT (outptr == buf + (sizeof (expected) - 1)); + ASSERT (memcmp (buf, expected, sizeof (expected) - 1) == 0); + ASSERT (iconv_close (cd) == 0); + } + return 0; +}], [gl_cv_func_iconv_supports_utf=yes], [gl_cv_func_iconv_supports_utf=no], + [ + dnl We know that GNU libiconv, GNU libc, and Solaris >= 9 do. + dnl OSF/1 5.1 has these encodings, but inserts a BOM in the "to" + dnl direction. + gl_cv_func_iconv_supports_utf=no + if test $gl_func_iconv_gnu = yes; then + gl_cv_func_iconv_supports_utf=yes + else +changequote(,)dnl + case "$host_os" in + solaris2.9 | solaris2.1[0-9]) gl_cv_func_iconv_supports_utf=yes ;; + esac +changequote([,])dnl + fi + ]) + LIBS="$save_LIBS" + ]) + if test $gl_cv_func_iconv_supports_utf = no; then + REPLACE_ICONV_UTF=1 + AC_DEFINE([REPLACE_ICONV_UTF], [1], + [Define if the iconv() functions are enhanced to handle the UTF-{16,32}{BE,LE} encodings.]) + REPLACE_ICONV=1 + gl_REPLACE_ICONV_OPEN + AC_LIBOBJ([iconv]) + AC_LIBOBJ([iconv_close]) + fi + fi +]) diff --git a/m4/lib-ld.m4 b/m4/lib-ld.m4 new file mode 100644 index 000000000..e4863f2c9 --- /dev/null +++ b/m4/lib-ld.m4 @@ -0,0 +1,110 @@ +# lib-ld.m4 serial 4 (gettext-0.18) +dnl Copyright (C) 1996-2003, 2009 Free Software Foundation, Inc. +dnl This file is free software; the Free Software Foundation +dnl gives unlimited permission to copy and/or distribute it, +dnl with or without modifications, as long as this notice is preserved. + +dnl Subroutines of libtool.m4, +dnl with replacements s/AC_/AC_LIB/ and s/lt_cv/acl_cv/ to avoid collision +dnl with libtool.m4. + +dnl From libtool-1.4. Sets the variable with_gnu_ld to yes or no. +AC_DEFUN([AC_LIB_PROG_LD_GNU], +[AC_CACHE_CHECK([if the linker ($LD) is GNU ld], [acl_cv_prog_gnu_ld], +[# I'd rather use --version here, but apparently some GNU ld's only accept -v. +case `$LD -v 2>&1 conf$$.sh + echo "exit 0" >>conf$$.sh + chmod +x conf$$.sh + if (PATH="/nonexistent;."; conf$$.sh) >/dev/null 2>&1; then + PATH_SEPARATOR=';' + else + PATH_SEPARATOR=: + fi + rm -f conf$$.sh +fi +ac_prog=ld +if test "$GCC" = yes; then + # Check if gcc -print-prog-name=ld gives a path. + AC_MSG_CHECKING([for ld used by GCC]) + case $host in + *-*-mingw*) + # gcc leaves a trailing carriage return which upsets mingw + ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;; + *) + ac_prog=`($CC -print-prog-name=ld) 2>&5` ;; + esac + case $ac_prog in + # Accept absolute paths. + [[\\/]* | [A-Za-z]:[\\/]*)] + [re_direlt='/[^/][^/]*/\.\./'] + # Canonicalize the path of ld + ac_prog=`echo $ac_prog| sed 's%\\\\%/%g'` + while echo $ac_prog | grep "$re_direlt" > /dev/null 2>&1; do + ac_prog=`echo $ac_prog| sed "s%$re_direlt%/%"` + done + test -z "$LD" && LD="$ac_prog" + ;; + "") + # If it fails, then pretend we aren't using GCC. + ac_prog=ld + ;; + *) + # If it is relative, then search for the first ld in PATH. + with_gnu_ld=unknown + ;; + esac +elif test "$with_gnu_ld" = yes; then + AC_MSG_CHECKING([for GNU ld]) +else + AC_MSG_CHECKING([for non-GNU ld]) +fi +AC_CACHE_VAL([acl_cv_path_LD], +[if test -z "$LD"; then + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}${PATH_SEPARATOR-:}" + for ac_dir in $PATH; do + test -z "$ac_dir" && ac_dir=. + if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then + acl_cv_path_LD="$ac_dir/$ac_prog" + # Check to see if the program is GNU ld. I'd rather use --version, + # but apparently some GNU ld's only accept -v. + # Break only if it was the GNU/non-GNU ld that we prefer. + case `"$acl_cv_path_LD" -v 2>&1 < /dev/null` in + *GNU* | *'with BFD'*) + test "$with_gnu_ld" != no && break ;; + *) + test "$with_gnu_ld" != yes && break ;; + esac + fi + done + IFS="$ac_save_ifs" +else + acl_cv_path_LD="$LD" # Let the user override the test with a path. +fi]) +LD="$acl_cv_path_LD" +if test -n "$LD"; then + AC_MSG_RESULT([$LD]) +else + AC_MSG_RESULT([no]) +fi +test -z "$LD" && AC_MSG_ERROR([no acceptable ld found in \$PATH]) +AC_LIB_PROG_LD_GNU +]) diff --git a/m4/lib-link.m4 b/m4/lib-link.m4 new file mode 100644 index 000000000..21442033c --- /dev/null +++ b/m4/lib-link.m4 @@ -0,0 +1,761 @@ +# lib-link.m4 serial 19 (gettext-0.18) +dnl Copyright (C) 2001-2009 Free Software Foundation, Inc. +dnl This file is free software; the Free Software Foundation +dnl gives unlimited permission to copy and/or distribute it, +dnl with or without modifications, as long as this notice is preserved. + +dnl From Bruno Haible. + +AC_PREREQ([2.54]) + +dnl AC_LIB_LINKFLAGS(name [, dependencies]) searches for libname and +dnl the libraries corresponding to explicit and implicit dependencies. +dnl Sets and AC_SUBSTs the LIB${NAME} and LTLIB${NAME} variables and +dnl augments the CPPFLAGS variable. +dnl Sets and AC_SUBSTs the LIB${NAME}_PREFIX variable to nonempty if libname +dnl was found in ${LIB${NAME}_PREFIX}/$acl_libdirstem. +AC_DEFUN([AC_LIB_LINKFLAGS], +[ + AC_REQUIRE([AC_LIB_PREPARE_PREFIX]) + AC_REQUIRE([AC_LIB_RPATH]) + pushdef([Name],[translit([$1],[./-], [___])]) + pushdef([NAME],[translit([$1],[abcdefghijklmnopqrstuvwxyz./-], + [ABCDEFGHIJKLMNOPQRSTUVWXYZ___])]) + AC_CACHE_CHECK([how to link with lib[]$1], [ac_cv_lib[]Name[]_libs], [ + AC_LIB_LINKFLAGS_BODY([$1], [$2]) + ac_cv_lib[]Name[]_libs="$LIB[]NAME" + ac_cv_lib[]Name[]_ltlibs="$LTLIB[]NAME" + ac_cv_lib[]Name[]_cppflags="$INC[]NAME" + ac_cv_lib[]Name[]_prefix="$LIB[]NAME[]_PREFIX" + ]) + LIB[]NAME="$ac_cv_lib[]Name[]_libs" + LTLIB[]NAME="$ac_cv_lib[]Name[]_ltlibs" + INC[]NAME="$ac_cv_lib[]Name[]_cppflags" + LIB[]NAME[]_PREFIX="$ac_cv_lib[]Name[]_prefix" + AC_LIB_APPENDTOVAR([CPPFLAGS], [$INC]NAME) + AC_SUBST([LIB]NAME) + AC_SUBST([LTLIB]NAME) + AC_SUBST([LIB]NAME[_PREFIX]) + dnl Also set HAVE_LIB[]NAME so that AC_LIB_HAVE_LINKFLAGS can reuse the + dnl results of this search when this library appears as a dependency. + HAVE_LIB[]NAME=yes + popdef([NAME]) + popdef([Name]) +]) + +dnl AC_LIB_HAVE_LINKFLAGS(name, dependencies, includes, testcode, [missing-message]) +dnl searches for libname and the libraries corresponding to explicit and +dnl implicit dependencies, together with the specified include files and +dnl the ability to compile and link the specified testcode. The missing-message +dnl defaults to 'no' and may contain additional hints for the user. +dnl If found, it sets and AC_SUBSTs HAVE_LIB${NAME}=yes and the LIB${NAME} +dnl and LTLIB${NAME} variables and augments the CPPFLAGS variable, and +dnl #defines HAVE_LIB${NAME} to 1. Otherwise, it sets and AC_SUBSTs +dnl HAVE_LIB${NAME}=no and LIB${NAME} and LTLIB${NAME} to empty. +dnl Sets and AC_SUBSTs the LIB${NAME}_PREFIX variable to nonempty if libname +dnl was found in ${LIB${NAME}_PREFIX}/$acl_libdirstem. +AC_DEFUN([AC_LIB_HAVE_LINKFLAGS], +[ + AC_REQUIRE([AC_LIB_PREPARE_PREFIX]) + AC_REQUIRE([AC_LIB_RPATH]) + pushdef([Name],[translit([$1],[./-], [___])]) + pushdef([NAME],[translit([$1],[abcdefghijklmnopqrstuvwxyz./-], + [ABCDEFGHIJKLMNOPQRSTUVWXYZ___])]) + + dnl Search for lib[]Name and define LIB[]NAME, LTLIB[]NAME and INC[]NAME + dnl accordingly. + AC_LIB_LINKFLAGS_BODY([$1], [$2]) + + dnl Add $INC[]NAME to CPPFLAGS before performing the following checks, + dnl because if the user has installed lib[]Name and not disabled its use + dnl via --without-lib[]Name-prefix, he wants to use it. + ac_save_CPPFLAGS="$CPPFLAGS" + AC_LIB_APPENDTOVAR([CPPFLAGS], [$INC]NAME) + + AC_CACHE_CHECK([for lib[]$1], [ac_cv_lib[]Name], [ + ac_save_LIBS="$LIBS" + LIBS="$LIBS $LIB[]NAME" + AC_TRY_LINK([$3], [$4], + [ac_cv_lib[]Name=yes], + [ac_cv_lib[]Name='m4_if([$5], [], [no], [[$5]])']) + LIBS="$ac_save_LIBS" + ]) + if test "$ac_cv_lib[]Name" = yes; then + HAVE_LIB[]NAME=yes + AC_DEFINE([HAVE_LIB]NAME, 1, [Define if you have the lib[]$1 library.]) + AC_MSG_CHECKING([how to link with lib[]$1]) + AC_MSG_RESULT([$LIB[]NAME]) + else + HAVE_LIB[]NAME=no + dnl If $LIB[]NAME didn't lead to a usable library, we don't need + dnl $INC[]NAME either. + CPPFLAGS="$ac_save_CPPFLAGS" + LIB[]NAME= + LTLIB[]NAME= + LIB[]NAME[]_PREFIX= + fi + AC_SUBST([HAVE_LIB]NAME) + AC_SUBST([LIB]NAME) + AC_SUBST([LTLIB]NAME) + AC_SUBST([LIB]NAME[_PREFIX]) + popdef([NAME]) + popdef([Name]) +]) + +dnl Determine the platform dependent parameters needed to use rpath: +dnl acl_libext, +dnl acl_shlibext, +dnl acl_hardcode_libdir_flag_spec, +dnl acl_hardcode_libdir_separator, +dnl acl_hardcode_direct, +dnl acl_hardcode_minus_L. +AC_DEFUN([AC_LIB_RPATH], +[ + dnl Tell automake >= 1.10 to complain if config.rpath is missing. + m4_ifdef([AC_REQUIRE_AUX_FILE], [AC_REQUIRE_AUX_FILE([config.rpath])]) + AC_REQUIRE([AC_PROG_CC]) dnl we use $CC, $GCC, $LDFLAGS + AC_REQUIRE([AC_LIB_PROG_LD]) dnl we use $LD, $with_gnu_ld + AC_REQUIRE([AC_CANONICAL_HOST]) dnl we use $host + AC_REQUIRE([AC_CONFIG_AUX_DIR_DEFAULT]) dnl we use $ac_aux_dir + AC_CACHE_CHECK([for shared library run path origin], [acl_cv_rpath], [ + CC="$CC" GCC="$GCC" LDFLAGS="$LDFLAGS" LD="$LD" with_gnu_ld="$with_gnu_ld" \ + ${CONFIG_SHELL-/bin/sh} "$ac_aux_dir/config.rpath" "$host" > conftest.sh + . ./conftest.sh + rm -f ./conftest.sh + acl_cv_rpath=done + ]) + wl="$acl_cv_wl" + acl_libext="$acl_cv_libext" + acl_shlibext="$acl_cv_shlibext" + acl_libname_spec="$acl_cv_libname_spec" + acl_library_names_spec="$acl_cv_library_names_spec" + acl_hardcode_libdir_flag_spec="$acl_cv_hardcode_libdir_flag_spec" + acl_hardcode_libdir_separator="$acl_cv_hardcode_libdir_separator" + acl_hardcode_direct="$acl_cv_hardcode_direct" + acl_hardcode_minus_L="$acl_cv_hardcode_minus_L" + dnl Determine whether the user wants rpath handling at all. + AC_ARG_ENABLE([rpath], + [ --disable-rpath do not hardcode runtime library paths], + :, enable_rpath=yes) +]) + +dnl AC_LIB_FROMPACKAGE(name, package) +dnl declares that libname comes from the given package. The configure file +dnl will then not have a --with-libname-prefix option but a +dnl --with-package-prefix option. Several libraries can come from the same +dnl package. This declaration must occur before an AC_LIB_LINKFLAGS or similar +dnl macro call that searches for libname. +AC_DEFUN([AC_LIB_FROMPACKAGE], +[ + pushdef([NAME],[translit([$1],[abcdefghijklmnopqrstuvwxyz./-], + [ABCDEFGHIJKLMNOPQRSTUVWXYZ___])]) + define([acl_frompackage_]NAME, [$2]) + popdef([NAME]) + pushdef([PACK],[$2]) + pushdef([PACKUP],[translit(PACK,[abcdefghijklmnopqrstuvwxyz./-], + [ABCDEFGHIJKLMNOPQRSTUVWXYZ___])]) + define([acl_libsinpackage_]PACKUP, + m4_ifdef([acl_libsinpackage_]PACKUP, [acl_libsinpackage_]PACKUP[[, ]],)[lib$1]) + popdef([PACKUP]) + popdef([PACK]) +]) + +dnl AC_LIB_LINKFLAGS_BODY(name [, dependencies]) searches for libname and +dnl the libraries corresponding to explicit and implicit dependencies. +dnl Sets the LIB${NAME}, LTLIB${NAME} and INC${NAME} variables. +dnl Also, sets the LIB${NAME}_PREFIX variable to nonempty if libname was found +dnl in ${LIB${NAME}_PREFIX}/$acl_libdirstem. +AC_DEFUN([AC_LIB_LINKFLAGS_BODY], +[ + AC_REQUIRE([AC_LIB_PREPARE_MULTILIB]) + pushdef([NAME],[translit([$1],[abcdefghijklmnopqrstuvwxyz./-], + [ABCDEFGHIJKLMNOPQRSTUVWXYZ___])]) + pushdef([PACK],[m4_ifdef([acl_frompackage_]NAME, [acl_frompackage_]NAME, lib[$1])]) + pushdef([PACKUP],[translit(PACK,[abcdefghijklmnopqrstuvwxyz./-], + [ABCDEFGHIJKLMNOPQRSTUVWXYZ___])]) + pushdef([PACKLIBS],[m4_ifdef([acl_frompackage_]NAME, [acl_libsinpackage_]PACKUP, lib[$1])]) + dnl Autoconf >= 2.61 supports dots in --with options. + pushdef([P_A_C_K],[m4_if(m4_version_compare(m4_defn([m4_PACKAGE_VERSION]),[2.61]),[-1],[translit(PACK,[.],[_])],PACK)]) + dnl By default, look in $includedir and $libdir. + use_additional=yes + AC_LIB_WITH_FINAL_PREFIX([ + eval additional_includedir=\"$includedir\" + eval additional_libdir=\"$libdir\" + ]) + AC_ARG_WITH(P_A_C_K[-prefix], +[[ --with-]]P_A_C_K[[-prefix[=DIR] search for ]PACKLIBS[ in DIR/include and DIR/lib + --without-]]P_A_C_K[[-prefix don't search for ]PACKLIBS[ in includedir and libdir]], +[ + if test "X$withval" = "Xno"; then + use_additional=no + else + if test "X$withval" = "X"; then + AC_LIB_WITH_FINAL_PREFIX([ + eval additional_includedir=\"$includedir\" + eval additional_libdir=\"$libdir\" + ]) + else + additional_includedir="$withval/include" + additional_libdir="$withval/$acl_libdirstem" + if test "$acl_libdirstem2" != "$acl_libdirstem" \ + && ! test -d "$withval/$acl_libdirstem"; then + additional_libdir="$withval/$acl_libdirstem2" + fi + fi + fi +]) + dnl Search the library and its dependencies in $additional_libdir and + dnl $LDFLAGS. Using breadth-first-seach. + LIB[]NAME= + LTLIB[]NAME= + INC[]NAME= + LIB[]NAME[]_PREFIX= + rpathdirs= + ltrpathdirs= + names_already_handled= + names_next_round='$1 $2' + while test -n "$names_next_round"; do + names_this_round="$names_next_round" + names_next_round= + for name in $names_this_round; do + already_handled= + for n in $names_already_handled; do + if test "$n" = "$name"; then + already_handled=yes + break + fi + done + if test -z "$already_handled"; then + names_already_handled="$names_already_handled $name" + dnl See if it was already located by an earlier AC_LIB_LINKFLAGS + dnl or AC_LIB_HAVE_LINKFLAGS call. + uppername=`echo "$name" | sed -e 'y|abcdefghijklmnopqrstuvwxyz./-|ABCDEFGHIJKLMNOPQRSTUVWXYZ___|'` + eval value=\"\$HAVE_LIB$uppername\" + if test -n "$value"; then + if test "$value" = yes; then + eval value=\"\$LIB$uppername\" + test -z "$value" || LIB[]NAME="${LIB[]NAME}${LIB[]NAME:+ }$value" + eval value=\"\$LTLIB$uppername\" + test -z "$value" || LTLIB[]NAME="${LTLIB[]NAME}${LTLIB[]NAME:+ }$value" + else + dnl An earlier call to AC_LIB_HAVE_LINKFLAGS has determined + dnl that this library doesn't exist. So just drop it. + : + fi + else + dnl Search the library lib$name in $additional_libdir and $LDFLAGS + dnl and the already constructed $LIBNAME/$LTLIBNAME. + found_dir= + found_la= + found_so= + found_a= + eval libname=\"$acl_libname_spec\" # typically: libname=lib$name + if test -n "$acl_shlibext"; then + shrext=".$acl_shlibext" # typically: shrext=.so + else + shrext= + fi + if test $use_additional = yes; then + dir="$additional_libdir" + dnl The same code as in the loop below: + dnl First look for a shared library. + if test -n "$acl_shlibext"; then + if test -f "$dir/$libname$shrext"; then + found_dir="$dir" + found_so="$dir/$libname$shrext" + else + if test "$acl_library_names_spec" = '$libname$shrext$versuffix'; then + ver=`(cd "$dir" && \ + for f in "$libname$shrext".*; do echo "$f"; done \ + | sed -e "s,^$libname$shrext\\\\.,," \ + | sort -t '.' -n -r -k1,1 -k2,2 -k3,3 -k4,4 -k5,5 \ + | sed 1q ) 2>/dev/null` + if test -n "$ver" && test -f "$dir/$libname$shrext.$ver"; then + found_dir="$dir" + found_so="$dir/$libname$shrext.$ver" + fi + else + eval library_names=\"$acl_library_names_spec\" + for f in $library_names; do + if test -f "$dir/$f"; then + found_dir="$dir" + found_so="$dir/$f" + break + fi + done + fi + fi + fi + dnl Then look for a static library. + if test "X$found_dir" = "X"; then + if test -f "$dir/$libname.$acl_libext"; then + found_dir="$dir" + found_a="$dir/$libname.$acl_libext" + fi + fi + if test "X$found_dir" != "X"; then + if test -f "$dir/$libname.la"; then + found_la="$dir/$libname.la" + fi + fi + fi + if test "X$found_dir" = "X"; then + for x in $LDFLAGS $LTLIB[]NAME; do + AC_LIB_WITH_FINAL_PREFIX([eval x=\"$x\"]) + case "$x" in + -L*) + dir=`echo "X$x" | sed -e 's/^X-L//'` + dnl First look for a shared library. + if test -n "$acl_shlibext"; then + if test -f "$dir/$libname$shrext"; then + found_dir="$dir" + found_so="$dir/$libname$shrext" + else + if test "$acl_library_names_spec" = '$libname$shrext$versuffix'; then + ver=`(cd "$dir" && \ + for f in "$libname$shrext".*; do echo "$f"; done \ + | sed -e "s,^$libname$shrext\\\\.,," \ + | sort -t '.' -n -r -k1,1 -k2,2 -k3,3 -k4,4 -k5,5 \ + | sed 1q ) 2>/dev/null` + if test -n "$ver" && test -f "$dir/$libname$shrext.$ver"; then + found_dir="$dir" + found_so="$dir/$libname$shrext.$ver" + fi + else + eval library_names=\"$acl_library_names_spec\" + for f in $library_names; do + if test -f "$dir/$f"; then + found_dir="$dir" + found_so="$dir/$f" + break + fi + done + fi + fi + fi + dnl Then look for a static library. + if test "X$found_dir" = "X"; then + if test -f "$dir/$libname.$acl_libext"; then + found_dir="$dir" + found_a="$dir/$libname.$acl_libext" + fi + fi + if test "X$found_dir" != "X"; then + if test -f "$dir/$libname.la"; then + found_la="$dir/$libname.la" + fi + fi + ;; + esac + if test "X$found_dir" != "X"; then + break + fi + done + fi + if test "X$found_dir" != "X"; then + dnl Found the library. + LTLIB[]NAME="${LTLIB[]NAME}${LTLIB[]NAME:+ }-L$found_dir -l$name" + if test "X$found_so" != "X"; then + dnl Linking with a shared library. We attempt to hardcode its + dnl directory into the executable's runpath, unless it's the + dnl standard /usr/lib. + if test "$enable_rpath" = no \ + || test "X$found_dir" = "X/usr/$acl_libdirstem" \ + || test "X$found_dir" = "X/usr/$acl_libdirstem2"; then + dnl No hardcoding is needed. + LIB[]NAME="${LIB[]NAME}${LIB[]NAME:+ }$found_so" + else + dnl Use an explicit option to hardcode DIR into the resulting + dnl binary. + dnl Potentially add DIR to ltrpathdirs. + dnl The ltrpathdirs will be appended to $LTLIBNAME at the end. + haveit= + for x in $ltrpathdirs; do + if test "X$x" = "X$found_dir"; then + haveit=yes + break + fi + done + if test -z "$haveit"; then + ltrpathdirs="$ltrpathdirs $found_dir" + fi + dnl The hardcoding into $LIBNAME is system dependent. + if test "$acl_hardcode_direct" = yes; then + dnl Using DIR/libNAME.so during linking hardcodes DIR into the + dnl resulting binary. + LIB[]NAME="${LIB[]NAME}${LIB[]NAME:+ }$found_so" + else + if test -n "$acl_hardcode_libdir_flag_spec" && test "$acl_hardcode_minus_L" = no; then + dnl Use an explicit option to hardcode DIR into the resulting + dnl binary. + LIB[]NAME="${LIB[]NAME}${LIB[]NAME:+ }$found_so" + dnl Potentially add DIR to rpathdirs. + dnl The rpathdirs will be appended to $LIBNAME at the end. + haveit= + for x in $rpathdirs; do + if test "X$x" = "X$found_dir"; then + haveit=yes + break + fi + done + if test -z "$haveit"; then + rpathdirs="$rpathdirs $found_dir" + fi + else + dnl Rely on "-L$found_dir". + dnl But don't add it if it's already contained in the LDFLAGS + dnl or the already constructed $LIBNAME + haveit= + for x in $LDFLAGS $LIB[]NAME; do + AC_LIB_WITH_FINAL_PREFIX([eval x=\"$x\"]) + if test "X$x" = "X-L$found_dir"; then + haveit=yes + break + fi + done + if test -z "$haveit"; then + LIB[]NAME="${LIB[]NAME}${LIB[]NAME:+ }-L$found_dir" + fi + if test "$acl_hardcode_minus_L" != no; then + dnl FIXME: Not sure whether we should use + dnl "-L$found_dir -l$name" or "-L$found_dir $found_so" + dnl here. + LIB[]NAME="${LIB[]NAME}${LIB[]NAME:+ }$found_so" + else + dnl We cannot use $acl_hardcode_runpath_var and LD_RUN_PATH + dnl here, because this doesn't fit in flags passed to the + dnl compiler. So give up. No hardcoding. This affects only + dnl very old systems. + dnl FIXME: Not sure whether we should use + dnl "-L$found_dir -l$name" or "-L$found_dir $found_so" + dnl here. + LIB[]NAME="${LIB[]NAME}${LIB[]NAME:+ }-l$name" + fi + fi + fi + fi + else + if test "X$found_a" != "X"; then + dnl Linking with a static library. + LIB[]NAME="${LIB[]NAME}${LIB[]NAME:+ }$found_a" + else + dnl We shouldn't come here, but anyway it's good to have a + dnl fallback. + LIB[]NAME="${LIB[]NAME}${LIB[]NAME:+ }-L$found_dir -l$name" + fi + fi + dnl Assume the include files are nearby. + additional_includedir= + case "$found_dir" in + */$acl_libdirstem | */$acl_libdirstem/) + basedir=`echo "X$found_dir" | sed -e 's,^X,,' -e "s,/$acl_libdirstem/"'*$,,'` + if test "$name" = '$1'; then + LIB[]NAME[]_PREFIX="$basedir" + fi + additional_includedir="$basedir/include" + ;; + */$acl_libdirstem2 | */$acl_libdirstem2/) + basedir=`echo "X$found_dir" | sed -e 's,^X,,' -e "s,/$acl_libdirstem2/"'*$,,'` + if test "$name" = '$1'; then + LIB[]NAME[]_PREFIX="$basedir" + fi + additional_includedir="$basedir/include" + ;; + esac + if test "X$additional_includedir" != "X"; then + dnl Potentially add $additional_includedir to $INCNAME. + dnl But don't add it + dnl 1. if it's the standard /usr/include, + dnl 2. if it's /usr/local/include and we are using GCC on Linux, + dnl 3. if it's already present in $CPPFLAGS or the already + dnl constructed $INCNAME, + dnl 4. if it doesn't exist as a directory. + if test "X$additional_includedir" != "X/usr/include"; then + haveit= + if test "X$additional_includedir" = "X/usr/local/include"; then + if test -n "$GCC"; then + case $host_os in + linux* | gnu* | k*bsd*-gnu) haveit=yes;; + esac + fi + fi + if test -z "$haveit"; then + for x in $CPPFLAGS $INC[]NAME; do + AC_LIB_WITH_FINAL_PREFIX([eval x=\"$x\"]) + if test "X$x" = "X-I$additional_includedir"; then + haveit=yes + break + fi + done + if test -z "$haveit"; then + if test -d "$additional_includedir"; then + dnl Really add $additional_includedir to $INCNAME. + INC[]NAME="${INC[]NAME}${INC[]NAME:+ }-I$additional_includedir" + fi + fi + fi + fi + fi + dnl Look for dependencies. + if test -n "$found_la"; then + dnl Read the .la file. It defines the variables + dnl dlname, library_names, old_library, dependency_libs, current, + dnl age, revision, installed, dlopen, dlpreopen, libdir. + save_libdir="$libdir" + case "$found_la" in + */* | *\\*) . "$found_la" ;; + *) . "./$found_la" ;; + esac + libdir="$save_libdir" + dnl We use only dependency_libs. + for dep in $dependency_libs; do + case "$dep" in + -L*) + additional_libdir=`echo "X$dep" | sed -e 's/^X-L//'` + dnl Potentially add $additional_libdir to $LIBNAME and $LTLIBNAME. + dnl But don't add it + dnl 1. if it's the standard /usr/lib, + dnl 2. if it's /usr/local/lib and we are using GCC on Linux, + dnl 3. if it's already present in $LDFLAGS or the already + dnl constructed $LIBNAME, + dnl 4. if it doesn't exist as a directory. + if test "X$additional_libdir" != "X/usr/$acl_libdirstem" \ + && test "X$additional_libdir" != "X/usr/$acl_libdirstem2"; then + haveit= + if test "X$additional_libdir" = "X/usr/local/$acl_libdirstem" \ + || test "X$additional_libdir" = "X/usr/local/$acl_libdirstem2"; then + if test -n "$GCC"; then + case $host_os in + linux* | gnu* | k*bsd*-gnu) haveit=yes;; + esac + fi + fi + if test -z "$haveit"; then + haveit= + for x in $LDFLAGS $LIB[]NAME; do + AC_LIB_WITH_FINAL_PREFIX([eval x=\"$x\"]) + if test "X$x" = "X-L$additional_libdir"; then + haveit=yes + break + fi + done + if test -z "$haveit"; then + if test -d "$additional_libdir"; then + dnl Really add $additional_libdir to $LIBNAME. + LIB[]NAME="${LIB[]NAME}${LIB[]NAME:+ }-L$additional_libdir" + fi + fi + haveit= + for x in $LDFLAGS $LTLIB[]NAME; do + AC_LIB_WITH_FINAL_PREFIX([eval x=\"$x\"]) + if test "X$x" = "X-L$additional_libdir"; then + haveit=yes + break + fi + done + if test -z "$haveit"; then + if test -d "$additional_libdir"; then + dnl Really add $additional_libdir to $LTLIBNAME. + LTLIB[]NAME="${LTLIB[]NAME}${LTLIB[]NAME:+ }-L$additional_libdir" + fi + fi + fi + fi + ;; + -R*) + dir=`echo "X$dep" | sed -e 's/^X-R//'` + if test "$enable_rpath" != no; then + dnl Potentially add DIR to rpathdirs. + dnl The rpathdirs will be appended to $LIBNAME at the end. + haveit= + for x in $rpathdirs; do + if test "X$x" = "X$dir"; then + haveit=yes + break + fi + done + if test -z "$haveit"; then + rpathdirs="$rpathdirs $dir" + fi + dnl Potentially add DIR to ltrpathdirs. + dnl The ltrpathdirs will be appended to $LTLIBNAME at the end. + haveit= + for x in $ltrpathdirs; do + if test "X$x" = "X$dir"; then + haveit=yes + break + fi + done + if test -z "$haveit"; then + ltrpathdirs="$ltrpathdirs $dir" + fi + fi + ;; + -l*) + dnl Handle this in the next round. + names_next_round="$names_next_round "`echo "X$dep" | sed -e 's/^X-l//'` + ;; + *.la) + dnl Handle this in the next round. Throw away the .la's + dnl directory; it is already contained in a preceding -L + dnl option. + names_next_round="$names_next_round "`echo "X$dep" | sed -e 's,^X.*/,,' -e 's,^lib,,' -e 's,\.la$,,'` + ;; + *) + dnl Most likely an immediate library name. + LIB[]NAME="${LIB[]NAME}${LIB[]NAME:+ }$dep" + LTLIB[]NAME="${LTLIB[]NAME}${LTLIB[]NAME:+ }$dep" + ;; + esac + done + fi + else + dnl Didn't find the library; assume it is in the system directories + dnl known to the linker and runtime loader. (All the system + dnl directories known to the linker should also be known to the + dnl runtime loader, otherwise the system is severely misconfigured.) + LIB[]NAME="${LIB[]NAME}${LIB[]NAME:+ }-l$name" + LTLIB[]NAME="${LTLIB[]NAME}${LTLIB[]NAME:+ }-l$name" + fi + fi + fi + done + done + if test "X$rpathdirs" != "X"; then + if test -n "$acl_hardcode_libdir_separator"; then + dnl Weird platform: only the last -rpath option counts, the user must + dnl pass all path elements in one option. We can arrange that for a + dnl single library, but not when more than one $LIBNAMEs are used. + alldirs= + for found_dir in $rpathdirs; do + alldirs="${alldirs}${alldirs:+$acl_hardcode_libdir_separator}$found_dir" + done + dnl Note: acl_hardcode_libdir_flag_spec uses $libdir and $wl. + acl_save_libdir="$libdir" + libdir="$alldirs" + eval flag=\"$acl_hardcode_libdir_flag_spec\" + libdir="$acl_save_libdir" + LIB[]NAME="${LIB[]NAME}${LIB[]NAME:+ }$flag" + else + dnl The -rpath options are cumulative. + for found_dir in $rpathdirs; do + acl_save_libdir="$libdir" + libdir="$found_dir" + eval flag=\"$acl_hardcode_libdir_flag_spec\" + libdir="$acl_save_libdir" + LIB[]NAME="${LIB[]NAME}${LIB[]NAME:+ }$flag" + done + fi + fi + if test "X$ltrpathdirs" != "X"; then + dnl When using libtool, the option that works for both libraries and + dnl executables is -R. The -R options are cumulative. + for found_dir in $ltrpathdirs; do + LTLIB[]NAME="${LTLIB[]NAME}${LTLIB[]NAME:+ }-R$found_dir" + done + fi + popdef([P_A_C_K]) + popdef([PACKLIBS]) + popdef([PACKUP]) + popdef([PACK]) + popdef([NAME]) +]) + +dnl AC_LIB_APPENDTOVAR(VAR, CONTENTS) appends the elements of CONTENTS to VAR, +dnl unless already present in VAR. +dnl Works only for CPPFLAGS, not for LIB* variables because that sometimes +dnl contains two or three consecutive elements that belong together. +AC_DEFUN([AC_LIB_APPENDTOVAR], +[ + for element in [$2]; do + haveit= + for x in $[$1]; do + AC_LIB_WITH_FINAL_PREFIX([eval x=\"$x\"]) + if test "X$x" = "X$element"; then + haveit=yes + break + fi + done + if test -z "$haveit"; then + [$1]="${[$1]}${[$1]:+ }$element" + fi + done +]) + +dnl For those cases where a variable contains several -L and -l options +dnl referring to unknown libraries and directories, this macro determines the +dnl necessary additional linker options for the runtime path. +dnl AC_LIB_LINKFLAGS_FROM_LIBS([LDADDVAR], [LIBSVALUE], [USE-LIBTOOL]) +dnl sets LDADDVAR to linker options needed together with LIBSVALUE. +dnl If USE-LIBTOOL evaluates to non-empty, linking with libtool is assumed, +dnl otherwise linking without libtool is assumed. +AC_DEFUN([AC_LIB_LINKFLAGS_FROM_LIBS], +[ + AC_REQUIRE([AC_LIB_RPATH]) + AC_REQUIRE([AC_LIB_PREPARE_MULTILIB]) + $1= + if test "$enable_rpath" != no; then + if test -n "$acl_hardcode_libdir_flag_spec" && test "$acl_hardcode_minus_L" = no; then + dnl Use an explicit option to hardcode directories into the resulting + dnl binary. + rpathdirs= + next= + for opt in $2; do + if test -n "$next"; then + dir="$next" + dnl No need to hardcode the standard /usr/lib. + if test "X$dir" != "X/usr/$acl_libdirstem" \ + && test "X$dir" != "X/usr/$acl_libdirstem2"; then + rpathdirs="$rpathdirs $dir" + fi + next= + else + case $opt in + -L) next=yes ;; + -L*) dir=`echo "X$opt" | sed -e 's,^X-L,,'` + dnl No need to hardcode the standard /usr/lib. + if test "X$dir" != "X/usr/$acl_libdirstem" \ + && test "X$dir" != "X/usr/$acl_libdirstem2"; then + rpathdirs="$rpathdirs $dir" + fi + next= ;; + *) next= ;; + esac + fi + done + if test "X$rpathdirs" != "X"; then + if test -n ""$3""; then + dnl libtool is used for linking. Use -R options. + for dir in $rpathdirs; do + $1="${$1}${$1:+ }-R$dir" + done + else + dnl The linker is used for linking directly. + if test -n "$acl_hardcode_libdir_separator"; then + dnl Weird platform: only the last -rpath option counts, the user + dnl must pass all path elements in one option. + alldirs= + for dir in $rpathdirs; do + alldirs="${alldirs}${alldirs:+$acl_hardcode_libdir_separator}$dir" + done + acl_save_libdir="$libdir" + libdir="$alldirs" + eval flag=\"$acl_hardcode_libdir_flag_spec\" + libdir="$acl_save_libdir" + $1="$flag" + else + dnl The -rpath options are cumulative. + for dir in $rpathdirs; do + acl_save_libdir="$libdir" + libdir="$dir" + eval flag=\"$acl_hardcode_libdir_flag_spec\" + libdir="$acl_save_libdir" + $1="${$1}${$1:+ }$flag" + done + fi + fi + fi + fi + fi + AC_SUBST([$1]) +]) diff --git a/m4/lib-prefix.m4 b/m4/lib-prefix.m4 new file mode 100644 index 000000000..4b7ee3358 --- /dev/null +++ b/m4/lib-prefix.m4 @@ -0,0 +1,224 @@ +# lib-prefix.m4 serial 7 (gettext-0.18) +dnl Copyright (C) 2001-2005, 2008-2009 Free Software Foundation, Inc. +dnl This file is free software; the Free Software Foundation +dnl gives unlimited permission to copy and/or distribute it, +dnl with or without modifications, as long as this notice is preserved. + +dnl From Bruno Haible. + +dnl AC_LIB_ARG_WITH is synonymous to AC_ARG_WITH in autoconf-2.13, and +dnl similar to AC_ARG_WITH in autoconf 2.52...2.57 except that is doesn't +dnl require excessive bracketing. +ifdef([AC_HELP_STRING], +[AC_DEFUN([AC_LIB_ARG_WITH], [AC_ARG_WITH([$1],[[$2]],[$3],[$4])])], +[AC_DEFUN([AC_][LIB_ARG_WITH], [AC_ARG_WITH([$1],[$2],[$3],[$4])])]) + +dnl AC_LIB_PREFIX adds to the CPPFLAGS and LDFLAGS the flags that are needed +dnl to access previously installed libraries. The basic assumption is that +dnl a user will want packages to use other packages he previously installed +dnl with the same --prefix option. +dnl This macro is not needed if only AC_LIB_LINKFLAGS is used to locate +dnl libraries, but is otherwise very convenient. +AC_DEFUN([AC_LIB_PREFIX], +[ + AC_BEFORE([$0], [AC_LIB_LINKFLAGS]) + AC_REQUIRE([AC_PROG_CC]) + AC_REQUIRE([AC_CANONICAL_HOST]) + AC_REQUIRE([AC_LIB_PREPARE_MULTILIB]) + AC_REQUIRE([AC_LIB_PREPARE_PREFIX]) + dnl By default, look in $includedir and $libdir. + use_additional=yes + AC_LIB_WITH_FINAL_PREFIX([ + eval additional_includedir=\"$includedir\" + eval additional_libdir=\"$libdir\" + ]) + AC_LIB_ARG_WITH([lib-prefix], +[ --with-lib-prefix[=DIR] search for libraries in DIR/include and DIR/lib + --without-lib-prefix don't search for libraries in includedir and libdir], +[ + if test "X$withval" = "Xno"; then + use_additional=no + else + if test "X$withval" = "X"; then + AC_LIB_WITH_FINAL_PREFIX([ + eval additional_includedir=\"$includedir\" + eval additional_libdir=\"$libdir\" + ]) + else + additional_includedir="$withval/include" + additional_libdir="$withval/$acl_libdirstem" + fi + fi +]) + if test $use_additional = yes; then + dnl Potentially add $additional_includedir to $CPPFLAGS. + dnl But don't add it + dnl 1. if it's the standard /usr/include, + dnl 2. if it's already present in $CPPFLAGS, + dnl 3. if it's /usr/local/include and we are using GCC on Linux, + dnl 4. if it doesn't exist as a directory. + if test "X$additional_includedir" != "X/usr/include"; then + haveit= + for x in $CPPFLAGS; do + AC_LIB_WITH_FINAL_PREFIX([eval x=\"$x\"]) + if test "X$x" = "X-I$additional_includedir"; then + haveit=yes + break + fi + done + if test -z "$haveit"; then + if test "X$additional_includedir" = "X/usr/local/include"; then + if test -n "$GCC"; then + case $host_os in + linux* | gnu* | k*bsd*-gnu) haveit=yes;; + esac + fi + fi + if test -z "$haveit"; then + if test -d "$additional_includedir"; then + dnl Really add $additional_includedir to $CPPFLAGS. + CPPFLAGS="${CPPFLAGS}${CPPFLAGS:+ }-I$additional_includedir" + fi + fi + fi + fi + dnl Potentially add $additional_libdir to $LDFLAGS. + dnl But don't add it + dnl 1. if it's the standard /usr/lib, + dnl 2. if it's already present in $LDFLAGS, + dnl 3. if it's /usr/local/lib and we are using GCC on Linux, + dnl 4. if it doesn't exist as a directory. + if test "X$additional_libdir" != "X/usr/$acl_libdirstem"; then + haveit= + for x in $LDFLAGS; do + AC_LIB_WITH_FINAL_PREFIX([eval x=\"$x\"]) + if test "X$x" = "X-L$additional_libdir"; then + haveit=yes + break + fi + done + if test -z "$haveit"; then + if test "X$additional_libdir" = "X/usr/local/$acl_libdirstem"; then + if test -n "$GCC"; then + case $host_os in + linux*) haveit=yes;; + esac + fi + fi + if test -z "$haveit"; then + if test -d "$additional_libdir"; then + dnl Really add $additional_libdir to $LDFLAGS. + LDFLAGS="${LDFLAGS}${LDFLAGS:+ }-L$additional_libdir" + fi + fi + fi + fi + fi +]) + +dnl AC_LIB_PREPARE_PREFIX creates variables acl_final_prefix, +dnl acl_final_exec_prefix, containing the values to which $prefix and +dnl $exec_prefix will expand at the end of the configure script. +AC_DEFUN([AC_LIB_PREPARE_PREFIX], +[ + dnl Unfortunately, prefix and exec_prefix get only finally determined + dnl at the end of configure. + if test "X$prefix" = "XNONE"; then + acl_final_prefix="$ac_default_prefix" + else + acl_final_prefix="$prefix" + fi + if test "X$exec_prefix" = "XNONE"; then + acl_final_exec_prefix='${prefix}' + else + acl_final_exec_prefix="$exec_prefix" + fi + acl_save_prefix="$prefix" + prefix="$acl_final_prefix" + eval acl_final_exec_prefix=\"$acl_final_exec_prefix\" + prefix="$acl_save_prefix" +]) + +dnl AC_LIB_WITH_FINAL_PREFIX([statement]) evaluates statement, with the +dnl variables prefix and exec_prefix bound to the values they will have +dnl at the end of the configure script. +AC_DEFUN([AC_LIB_WITH_FINAL_PREFIX], +[ + acl_save_prefix="$prefix" + prefix="$acl_final_prefix" + acl_save_exec_prefix="$exec_prefix" + exec_prefix="$acl_final_exec_prefix" + $1 + exec_prefix="$acl_save_exec_prefix" + prefix="$acl_save_prefix" +]) + +dnl AC_LIB_PREPARE_MULTILIB creates +dnl - a variable acl_libdirstem, containing the basename of the libdir, either +dnl "lib" or "lib64" or "lib/64", +dnl - a variable acl_libdirstem2, as a secondary possible value for +dnl acl_libdirstem, either the same as acl_libdirstem or "lib/sparcv9" or +dnl "lib/amd64". +AC_DEFUN([AC_LIB_PREPARE_MULTILIB], +[ + dnl There is no formal standard regarding lib and lib64. + dnl On glibc systems, the current practice is that on a system supporting + dnl 32-bit and 64-bit instruction sets or ABIs, 64-bit libraries go under + dnl $prefix/lib64 and 32-bit libraries go under $prefix/lib. We determine + dnl the compiler's default mode by looking at the compiler's library search + dnl path. If at least one of its elements ends in /lib64 or points to a + dnl directory whose absolute pathname ends in /lib64, we assume a 64-bit ABI. + dnl Otherwise we use the default, namely "lib". + dnl On Solaris systems, the current practice is that on a system supporting + dnl 32-bit and 64-bit instruction sets or ABIs, 64-bit libraries go under + dnl $prefix/lib/64 (which is a symlink to either $prefix/lib/sparcv9 or + dnl $prefix/lib/amd64) and 32-bit libraries go under $prefix/lib. + AC_REQUIRE([AC_CANONICAL_HOST]) + acl_libdirstem=lib + acl_libdirstem2= + case "$host_os" in + solaris*) + dnl See Solaris 10 Software Developer Collection > Solaris 64-bit Developer's Guide > The Development Environment + dnl . + dnl "Portable Makefiles should refer to any library directories using the 64 symbolic link." + dnl But we want to recognize the sparcv9 or amd64 subdirectory also if the + dnl symlink is missing, so we set acl_libdirstem2 too. + AC_CACHE_CHECK([for 64-bit host], [gl_cv_solaris_64bit], + [AC_EGREP_CPP([sixtyfour bits], [ +#ifdef _LP64 +sixtyfour bits +#endif + ], [gl_cv_solaris_64bit=yes], [gl_cv_solaris_64bit=no]) + ]) + if test $gl_cv_solaris_64bit = yes; then + acl_libdirstem=lib/64 + case "$host_cpu" in + sparc*) acl_libdirstem2=lib/sparcv9 ;; + i*86 | x86_64) acl_libdirstem2=lib/amd64 ;; + esac + fi + ;; + *) + searchpath=`(LC_ALL=C $CC -print-search-dirs) 2>/dev/null | sed -n -e 's,^libraries: ,,p' | sed -e 's,^=,,'` + if test -n "$searchpath"; then + acl_save_IFS="${IFS= }"; IFS=":" + for searchdir in $searchpath; do + if test -d "$searchdir"; then + case "$searchdir" in + */lib64/ | */lib64 ) acl_libdirstem=lib64 ;; + */../ | */.. ) + # Better ignore directories of this form. They are misleading. + ;; + *) searchdir=`cd "$searchdir" && pwd` + case "$searchdir" in + */lib64 ) acl_libdirstem=lib64 ;; + esac ;; + esac + fi + done + IFS="$acl_save_IFS" + fi + ;; + esac + test -n "$acl_libdirstem2" || acl_libdirstem2="$acl_libdirstem" +]) diff --git a/m4/libunistring.m4 b/m4/libunistring.m4 new file mode 100644 index 000000000..52ff06b61 --- /dev/null +++ b/m4/libunistring.m4 @@ -0,0 +1,37 @@ +# libunistring.m4 serial 1 +dnl Copyright (C) 2009 Free Software Foundation, Inc. +dnl This file is free software; the Free Software Foundation +dnl gives unlimited permission to copy and/or distribute it, +dnl with or without modifications, as long as this notice is preserved. + +dnl gl_LIBUNISTRING +dnl Searches for an installed libunistring. +dnl If found, it sets and AC_SUBSTs HAVE_LIBUNISTRING=yes and the LIBUNISTRING +dnl and LTLIBUNISTRING variables and augments the CPPFLAGS variable, and +dnl #defines HAVE_LIBUNISTRING to 1. Otherwise, it sets and AC_SUBSTs +dnl HAVE_LIBUNISTRING=no and LIBUNINSTRING and LTLIBUNISTRING to empty. + +AC_DEFUN([gl_LIBUNISTRING], +[ + dnl First, try to link without -liconv. libunistring often depends on + dnl libiconv, but we don't know (and often don't need to know) where + dnl libiconv is installed. + AC_LIB_HAVE_LINKFLAGS([unistring], [], + [#include ], [u8_strconv_from_locale((char*)0);], + [no, consider installing GNU libunistring]) + if test "$ac_cv_libunistring" != yes; then + dnl Second try, with -liconv. + AC_REQUIRE([AM_ICONV]) + if test -n "$LIBICONV"; then + glus_save_LIBS="$LIBS" + LIBS="$LIBS $LIBICONV" + AC_LIB_HAVE_LINKFLAGS([unistring], [], + [#include ], [u8_strconv_from_locale((char*)0);], + [no, consider installing GNU libunistring]) + if test -n "$LIBUNISTRING"; then + LIBUNISTRING="$LIBUNISTRING $LIBICONV" + fi + LIBS="$glus_save_LIBS" + fi + fi +]) diff --git a/m4/string_h.m4 b/m4/string_h.m4 new file mode 100644 index 000000000..2d5553c37 --- /dev/null +++ b/m4/string_h.m4 @@ -0,0 +1,92 @@ +# Configure a GNU-like replacement for . + +# Copyright (C) 2007, 2008 Free Software Foundation, Inc. +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# serial 6 + +# Written by Paul Eggert. + +AC_DEFUN([gl_HEADER_STRING_H], +[ + dnl Use AC_REQUIRE here, so that the default behavior below is expanded + dnl once only, before all statements that occur in other macros. + AC_REQUIRE([gl_HEADER_STRING_H_BODY]) +]) + +AC_DEFUN([gl_HEADER_STRING_H_BODY], +[ + AC_REQUIRE([AC_C_RESTRICT]) + AC_REQUIRE([gl_HEADER_STRING_H_DEFAULTS]) + gl_CHECK_NEXT_HEADERS([string.h]) +]) + +AC_DEFUN([gl_STRING_MODULE_INDICATOR], +[ + dnl Use AC_REQUIRE here, so that the default settings are expanded once only. + AC_REQUIRE([gl_HEADER_STRING_H_DEFAULTS]) + GNULIB_[]m4_translit([$1],[abcdefghijklmnopqrstuvwxyz./-],[ABCDEFGHIJKLMNOPQRSTUVWXYZ___])=1 +]) + +AC_DEFUN([gl_HEADER_STRING_H_DEFAULTS], +[ + GNULIB_MEMMEM=0; AC_SUBST([GNULIB_MEMMEM]) + GNULIB_MEMPCPY=0; AC_SUBST([GNULIB_MEMPCPY]) + GNULIB_MEMRCHR=0; AC_SUBST([GNULIB_MEMRCHR]) + GNULIB_RAWMEMCHR=0; AC_SUBST([GNULIB_RAWMEMCHR]) + GNULIB_STPCPY=0; AC_SUBST([GNULIB_STPCPY]) + GNULIB_STPNCPY=0; AC_SUBST([GNULIB_STPNCPY]) + GNULIB_STRCHRNUL=0; AC_SUBST([GNULIB_STRCHRNUL]) + GNULIB_STRDUP=0; AC_SUBST([GNULIB_STRDUP]) + GNULIB_STRNDUP=0; AC_SUBST([GNULIB_STRNDUP]) + GNULIB_STRNLEN=0; AC_SUBST([GNULIB_STRNLEN]) + GNULIB_STRPBRK=0; AC_SUBST([GNULIB_STRPBRK]) + GNULIB_STRSEP=0; AC_SUBST([GNULIB_STRSEP]) + GNULIB_STRSTR=0; AC_SUBST([GNULIB_STRSTR]) + GNULIB_STRCASESTR=0; AC_SUBST([GNULIB_STRCASESTR]) + GNULIB_STRTOK_R=0; AC_SUBST([GNULIB_STRTOK_R]) + GNULIB_MBSLEN=0; AC_SUBST([GNULIB_MBSLEN]) + GNULIB_MBSNLEN=0; AC_SUBST([GNULIB_MBSNLEN]) + GNULIB_MBSCHR=0; AC_SUBST([GNULIB_MBSCHR]) + GNULIB_MBSRCHR=0; AC_SUBST([GNULIB_MBSRCHR]) + GNULIB_MBSSTR=0; AC_SUBST([GNULIB_MBSSTR]) + GNULIB_MBSCASECMP=0; AC_SUBST([GNULIB_MBSCASECMP]) + GNULIB_MBSNCASECMP=0; AC_SUBST([GNULIB_MBSNCASECMP]) + GNULIB_MBSPCASECMP=0; AC_SUBST([GNULIB_MBSPCASECMP]) + GNULIB_MBSCASESTR=0; AC_SUBST([GNULIB_MBSCASESTR]) + GNULIB_MBSCSPN=0; AC_SUBST([GNULIB_MBSCSPN]) + GNULIB_MBSPBRK=0; AC_SUBST([GNULIB_MBSPBRK]) + GNULIB_MBSSPN=0; AC_SUBST([GNULIB_MBSSPN]) + GNULIB_MBSSEP=0; AC_SUBST([GNULIB_MBSSEP]) + GNULIB_MBSTOK_R=0; AC_SUBST([GNULIB_MBSTOK_R]) + GNULIB_STRERROR=0; AC_SUBST([GNULIB_STRERROR]) + GNULIB_STRSIGNAL=0; AC_SUBST([GNULIB_STRSIGNAL]) + GNULIB_STRVERSCMP=0; AC_SUBST([GNULIB_STRVERSCMP]) + dnl Assume proper GNU behavior unless another module says otherwise. + HAVE_DECL_MEMMEM=1; AC_SUBST([HAVE_DECL_MEMMEM]) + HAVE_MEMPCPY=1; AC_SUBST([HAVE_MEMPCPY]) + HAVE_DECL_MEMRCHR=1; AC_SUBST([HAVE_DECL_MEMRCHR]) + HAVE_RAWMEMCHR=1; AC_SUBST([HAVE_RAWMEMCHR]) + HAVE_STPCPY=1; AC_SUBST([HAVE_STPCPY]) + HAVE_STPNCPY=1; AC_SUBST([HAVE_STPNCPY]) + HAVE_STRCHRNUL=1; AC_SUBST([HAVE_STRCHRNUL]) + HAVE_DECL_STRDUP=1; AC_SUBST([HAVE_DECL_STRDUP]) + HAVE_STRNDUP=1; AC_SUBST([HAVE_STRNDUP]) + HAVE_DECL_STRNDUP=1; AC_SUBST([HAVE_DECL_STRNDUP]) + HAVE_DECL_STRNLEN=1; AC_SUBST([HAVE_DECL_STRNLEN]) + HAVE_STRPBRK=1; AC_SUBST([HAVE_STRPBRK]) + HAVE_STRSEP=1; AC_SUBST([HAVE_STRSEP]) + HAVE_STRCASESTR=1; AC_SUBST([HAVE_STRCASESTR]) + HAVE_DECL_STRTOK_R=1; AC_SUBST([HAVE_DECL_STRTOK_R]) + HAVE_DECL_STRERROR=1; AC_SUBST([HAVE_DECL_STRERROR]) + HAVE_DECL_STRSIGNAL=1; AC_SUBST([HAVE_DECL_STRSIGNAL]) + HAVE_STRVERSCMP=1; AC_SUBST([HAVE_STRVERSCMP]) + REPLACE_MEMMEM=0; AC_SUBST([REPLACE_MEMMEM]) + REPLACE_STRDUP=0; AC_SUBST([REPLACE_STRDUP]) + REPLACE_STRSTR=0; AC_SUBST([REPLACE_STRSTR]) + REPLACE_STRCASESTR=0; AC_SUBST([REPLACE_STRCASESTR]) + REPLACE_STRERROR=0; AC_SUBST([REPLACE_STRERROR]) + REPLACE_STRSIGNAL=0; AC_SUBST([REPLACE_STRSIGNAL]) +]) -- 2.20.1