Parse in a more locale-independent way

update submodules to latest
* gnulib.modules: Add c-ctype.
* lib/wordsplit.c, src/buffer.c, src/exclist.c, src/incremen.c:
* src/list.c, src/misc.c, src/names.c, src/sparse.c, src/tar.c:
* src/xheader.c:
Include c-ctype.h, and use its API rather than ctype.h’s.
This is more likely to work when oddball locales are used.
* src/transform.c: Include ctype.h, since this module still uses
tolower and toupper (this is probably wrong - should be multi-byte).
This commit is contained in:
Paul Eggert
2023-09-12 00:15:52 -05:00
parent 05fcfaafb6
commit 01f986b921
14 changed files with 46 additions and 42 deletions

View File

@@ -18,8 +18,9 @@
# include <config.h>
#endif
#include <wordsplit.h>
#include <errno.h>
#include <ctype.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
@@ -28,6 +29,8 @@
#include <pwd.h>
#include <glob.h>
#include <c-ctype.h>
#if ENABLE_NLS
# include <gettext.h>
#else
@@ -36,22 +39,12 @@
#define _(msgid) gettext (msgid)
#define N_(msgid) msgid
#include <wordsplit.h>
#define ISWS(c) ((c)==' '||(c)=='\t'||(c)=='\n')
#define ISDELIM(ws,c) \
(strchr ((ws)->ws_delim, (c)) != NULL)
#define ISPUNCT(c) (strchr("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",(c))!=NULL)
#define ISUPPER(c) ('A' <= ((unsigned) (c)) && ((unsigned) (c)) <= 'Z')
#define ISLOWER(c) ('a' <= ((unsigned) (c)) && ((unsigned) (c)) <= 'z')
#define ISALPHA(c) (ISUPPER(c) || ISLOWER(c))
#define ISDIGIT(c) ('0' <= ((unsigned) (c)) && ((unsigned) (c)) <= '9')
#define ISXDIGIT(c) (strchr("abcdefABCDEF", c)!=NULL)
#define ISALNUM(c) (ISALPHA(c) || ISDIGIT(c))
#define ISPRINT(c) (' ' <= ((unsigned) (c)) && ((unsigned) (c)) <= 127)
#define ISVARBEG(c) (ISALPHA(c) || c == '_')
#define ISVARCHR(c) (ISALNUM(c) || c == '_')
#define ISVARBEG(c) (c_isalpha (c) || c == '_')
#define ISVARCHR(c) (c_isalnum (c) || c == '_')
#define WSP_RETURN_DELIMS(wsp) \
((wsp)->ws_flags & WRDSF_RETURN_DELIMS || ((wsp)->ws_options & WRDSO_MAXWORDS))
@@ -1891,7 +1884,7 @@ skip_sed_expr (const char *command, size_t i, size_t len)
if (command[i] == ';')
i++;
if (!(command[i] == 's' && i + 3 < len && ISPUNCT (command[i + 1])))
if (!(command[i] == 's' && i + 3 < len && c_ispunct (command[i + 1])))
break;
delim = command[++i];
@@ -1900,7 +1893,7 @@ skip_sed_expr (const char *command, size_t i, size_t len)
{
if (state == 3)
{
if (command[i] == delim || !ISALNUM (command[i]))
if (command[i] == delim || !c_isalnum (command[i]))
break;
}
else if (command[i] == '\\')
@@ -1987,7 +1980,7 @@ scan_word (struct wordsplit *wsp, size_t start, int consume_all)
start = i;
if (wsp->ws_flags & WRDSF_SED_EXPR
&& command[i] == 's' && i + 3 < len && ISPUNCT (command[i + 1]))
&& command[i] == 's' && i + 3 < len && c_ispunct (command[i + 1]))
{
flags = _WSNF_SEXP;
i = skip_sed_expr (command, i, len);
@@ -2080,7 +2073,7 @@ scan_word (struct wordsplit *wsp, size_t start, int consume_all)
}
#define to_num(c) \
(ISDIGIT(c) ? c - '0' : (ISXDIGIT(c) ? toupper(c) - 'A' + 10 : 255 ))
(c_isdigit(c) ? c - '0' : c_isxdigit (c) ? c_toupper (c) - 'A' + 10 : 255)
static int
xtonum (int *pval, const char *src, int base, int cnt)
@@ -2113,7 +2106,7 @@ wordsplit_c_quoted_length (const char *str, int quote_hex, int *quote)
len++;
else if (*str == '"')
len += 2;
else if (*str != '\t' && *str != '\\' && ISPRINT (*str))
else if (*str != '\t' && *str != '\\' && c_isprint (*str))
len++;
else if (quote_hex)
len += 3;
@@ -2201,7 +2194,7 @@ wordsplit_string_unquote_copy (struct wordsplit *ws, int inquote,
}
}
else if (WRDSO_ESC_TEST (ws, inquote, WRDSO_OESC)
&& (unsigned char) src[i] < 128 && ISDIGIT (src[i]))
&& (unsigned char) src[i] < 128 && c_isdigit (src[i]))
{
if (n - i < 1)
{
@@ -2251,7 +2244,7 @@ wordsplit_c_quote_copy (char *dst, const char *src, int quote_hex)
*dst++ = '\\';
*dst++ = *src;
}
else if (*src != '\t' && *src != '\\' && ISPRINT (*src))
else if (*src != '\t' && *src != '\\' && c_isprint (*src))
*dst++ = *src;
else
{