Sync with fileutils

This commit is contained in:
Paul Eggert
2002-02-11 14:28:09 +00:00
parent 4bfdfaa270
commit 9ded0a0567

View File

@@ -1,23 +1,27 @@
/* Unicode character output to streams with locale dependent encoding. /* Unicode character output to streams with locale dependent encoding.
Copyright (C) 2000, 2001 Free Software Foundation, Inc. Copyright (C) 2000-2002 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify it
it under the terms of the GNU General Public License as published by under the terms of the GNU Library General Public License as published
the Free Software Foundation; either version 2, or (at your option) by the Free Software Foundation; either version 2, or (at your option)
any later version. any later version.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
GNU General Public License for more details. Library General Public License for more details.
You should have received a copy of the GNU General Public License You should have received a copy of the GNU Library General Public
along with this program; if not, write to the Free Software Foundation, License along with this program; if not, write to the Free Software
Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA. */
/* Written by Bruno Haible <haible@clisp.cons.org>. */ /* Written by Bruno Haible <haible@clisp.cons.org>. */
/* Note: This file requires the locale_charset() function. See in
libiconv-1.7/libcharset/INTEGRATE for how to obtain it. */
#ifdef HAVE_CONFIG_H #ifdef HAVE_CONFIG_H
# include <config.h> # include <config.h>
#endif #endif
@@ -42,20 +46,17 @@ extern int errno;
# include <iconv.h> # include <iconv.h>
#endif #endif
/* Some systems, like SunOS 4, don't have EILSEQ. On these systems, #include <error.h>
define EILSEQ to some value other than EINVAL, because our invokers
may want to distinguish EINVAL from EILSEQ. */
#ifndef EILSEQ
# define EILSEQ ENOENT
#endif
#ifndef ENOTSUP
# define ENOTSUP EINVAL
#endif
#if HAVE_LANGINFO_CODESET && ! USE_INCLUDED_LIBINTL #if ENABLE_NLS
# include <langinfo.h> # include <libintl.h>
#else
# define gettext(Text) Text
#endif #endif
#define _(Text) gettext (Text)
#define N_(Text) Text
/* Specification. */
#include "unicodeio.h" #include "unicodeio.h"
/* When we pass a Unicode character to iconv(), we must pass it in a /* When we pass a Unicode character to iconv(), we must pass it in a
@@ -110,17 +111,17 @@ utf8_wctomb (unsigned char *r, unsigned int wc)
#define UTF8_NAME "UTF-8" #define UTF8_NAME "UTF-8"
/* Converts the Unicode character CODE to its multibyte representation /* Converts the Unicode character CODE to its multibyte representation
in the current locale and calls SUCCESS on the resulting byte in the current locale and calls the SUCCESS callback on the resulting
sequence. If an error occurs, invoke FAILURE instead, byte sequence. If an error occurs, invokes the FAILURE callback instead,
passing it CODE with errno set appropriately. passing it CODE and an English error string.
Assumes that the locale doesn't change between two calls. Returns whatever the callback returned.
Return whatever the SUCCESS or FAILURE returns. */ Assumes that the locale doesn't change between two calls. */
int long
unicode_to_mb (unsigned int code, unicode_to_mb (unsigned int code,
int (*success) PARAMS((const char *buf, size_t buflen, long (*success) PARAMS ((const char *buf, size_t buflen,
void *callback_arg)), void *callback_arg)),
int (*failure) PARAMS((unsigned int code, long (*failure) PARAMS ((unsigned int code, const char *msg,
void *callback_arg)), void *callback_arg)),
void *callback_arg) void *callback_arg)
{ {
static int initialized; static int initialized;
@@ -134,18 +135,8 @@ unicode_to_mb (unsigned int code,
if (!initialized) if (!initialized)
{ {
const char *charset;
#if USE_INCLUDED_LIBINTL
extern const char *locale_charset PARAMS ((void)); extern const char *locale_charset PARAMS ((void));
charset = locale_charset (); const char *charset = locale_charset ();
#else
# if HAVE_LANGINFO_CODESET
charset = nl_langinfo (CODESET);
# else
charset = "";
# endif
#endif
is_utf8 = !strcmp (charset, UTF8_NAME); is_utf8 = !strcmp (charset, UTF8_NAME);
#if HAVE_ICONV #if HAVE_ICONV
@@ -153,32 +144,32 @@ unicode_to_mb (unsigned int code,
{ {
utf8_to_local = iconv_open (charset, UTF8_NAME); utf8_to_local = iconv_open (charset, UTF8_NAME);
if (utf8_to_local == (iconv_t)(-1)) if (utf8_to_local == (iconv_t)(-1))
{ /* For an unknown encoding, assume ASCII. */
/* For an unknown encoding, assume ASCII. */ utf8_to_local = iconv_open ("ASCII", UTF8_NAME);
utf8_to_local = iconv_open ("ASCII", UTF8_NAME);
if (utf8_to_local == (iconv_t)(-1))
return failure (code, callback_arg);
}
} }
#endif #endif
initialized = 1; initialized = 1;
} }
/* Test whether the utf8_to_local converter is available at all. */
if (!is_utf8)
{
#if HAVE_ICONV
if (utf8_to_local == (iconv_t)(-1))
return failure (code, N_("iconv function not usable"), callback_arg);
#else
return failure (code, N_("iconv function not available"), callback_arg);
#endif
}
/* Convert the character to UTF-8. */ /* Convert the character to UTF-8. */
count = utf8_wctomb ((unsigned char *) inbuf, code); count = utf8_wctomb ((unsigned char *) inbuf, code);
if (count < 0) if (count < 0)
{ return failure (code, N_("character out of range"), callback_arg);
errno = EILSEQ;
return failure (code, callback_arg);
}
if (is_utf8)
{
return success (inbuf, count, callback_arg);
}
else
{
#if HAVE_ICONV #if HAVE_ICONV
if (!is_utf8)
{
char outbuf[25]; char outbuf[25];
const char *inptr; const char *inptr;
size_t inbytesleft; size_t inbytesleft;
@@ -201,11 +192,7 @@ unicode_to_mb (unsigned int code,
|| (res > 0 && code != 0 && outptr - outbuf == 1 && *outbuf == '\0') || (res > 0 && code != 0 && outptr - outbuf == 1 && *outbuf == '\0')
# endif # endif
) )
{ return failure (code, NULL, callback_arg);
if (res != (size_t)(-1))
errno = EILSEQ;
return failure (code, callback_arg);
}
/* Avoid glibc-2.1 bug and Solaris 2.7 bug. */ /* Avoid glibc-2.1 bug and Solaris 2.7 bug. */
# if defined _LIBICONV_VERSION \ # if defined _LIBICONV_VERSION \
@@ -214,46 +201,63 @@ unicode_to_mb (unsigned int code,
/* Get back to the initial shift state. */ /* Get back to the initial shift state. */
res = iconv (utf8_to_local, NULL, NULL, &outptr, &outbytesleft); res = iconv (utf8_to_local, NULL, NULL, &outptr, &outbytesleft);
if (res == (size_t)(-1)) if (res == (size_t)(-1))
return failure (code, callback_arg); return failure (code, NULL, callback_arg);
# endif # endif
return success (outbuf, outptr - outbuf, callback_arg); return success (outbuf, outptr - outbuf, callback_arg);
#else
errno = ENOTSUP;
return failure (code, callback_arg);
#endif
} }
#endif
/* At this point, is_utf8 is true, so no conversion is needed. */
return success (inbuf, count, callback_arg);
} }
/* Simple success callback that outputs the converted string. /* Simple success callback that outputs the converted string.
The STREAM is passed as callback_arg. */ The STREAM is passed as callback_arg. */
int long
print_unicode_success (const char *buf, size_t buflen, void *callback_arg) fwrite_success_callback (const char *buf, size_t buflen, void *callback_arg)
{ {
FILE *stream = (FILE *) callback_arg; FILE *stream = (FILE *) callback_arg;
return fwrite (buf, 1, buflen, stream) == 0 ? -1 : 0; fwrite (buf, 1, buflen, stream);
return 0;
} }
/* Simple failure callback that prints an ASCII representation, using /* Simple failure callback that displays an error and exits. */
the same notation as C99 strings. */ static long
int exit_failure_callback (unsigned int code, const char *msg, void *callback_arg)
print_unicode_failure (unsigned int code, void *callback_arg)
{ {
int e = errno; if (msg == NULL)
FILE *stream = callback_arg; error (1, 0, _("cannot convert U+%04X to local character set"), code);
else
fprintf (stream, code < 0x10000 ? "\\u%04X" : "\\U%08X", code); error (1, 0, _("cannot convert U+%04X to local character set: %s"), code,
errno = e; gettext (msg));
return -1;
}
/* Simple failure callback that displays a fallback representation in plain
ASCII, using the same notation as ISO C99 strings. */
static long
fallback_failure_callback (unsigned int code, const char *msg, void *callback_arg)
{
FILE *stream = (FILE *) callback_arg;
if (code < 0x10000)
fprintf (stream, "\\u%04X", code);
else
fprintf (stream, "\\U%08X", code);
return -1; return -1;
} }
/* Outputs the Unicode character CODE to the output stream STREAM. /* Outputs the Unicode character CODE to the output stream STREAM.
Returns zero if successful, -1 (setting errno) otherwise. Upon failure, exit if exit_on_error is true, otherwise output a fallback
Assumes that the locale doesn't change between two calls. */ notation. */
int void
print_unicode_char (FILE *stream, unsigned int code) print_unicode_char (FILE *stream, unsigned int code, int exit_on_error)
{ {
return unicode_to_mb (code, print_unicode_success, print_unicode_failure, unicode_to_mb (code, fwrite_success_callback,
stream); exit_on_error
? exit_failure_callback
: fallback_failure_callback,
stream);
} }