Sync wordsplit and paxutils with the most recent versions

This commit is contained in:
Sergey Poznyakoff
2018-07-31 11:53:00 +03:00
parent e81c89ddd6
commit c7b3f0217f
3 changed files with 1465 additions and 433 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -1,5 +1,5 @@
/* wordsplit - a word splitter /* wordsplit - a word splitter
Copyright (C) 2009-2014, 2016-2017 Free Software Foundation, Inc. Copyright (C) 2009-2018 Sergey Poznyakoff
This program is free software; you can redistribute it and/or modify it This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the under the terms of the GNU General Public License as published by the
@@ -12,10 +12,7 @@
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU General Public License along You should have received a copy of the GNU General Public License along
with this program. If not, see <http://www.gnu.org/licenses/>. with this program. If not, see <http://www.gnu.org/licenses/>. */
Written by Sergey Poznyakoff
*/
#ifndef __WORDSPLIT_H #ifndef __WORDSPLIT_H
#define __WORDSPLIT_H #define __WORDSPLIT_H
@@ -28,42 +25,106 @@
# define __WORDSPLIT_ATTRIBUTE_FORMAT(spec) /* empty */ # define __WORDSPLIT_ATTRIBUTE_FORMAT(spec) /* empty */
#endif #endif
struct wordsplit typedef struct wordsplit wordsplit_t;
/* Structure used to direct the splitting. Members marked with [Input]
can be defined before calling wordsplit(), those marked with [Output]
provide return values when the function returns. If neither mark is
used, the member is internal and must not be used by the caller.
In the comments below, the identifiers in parentheses indicate bits that
must be set (or unset, if starting with !) in ws_flags (if starting with
WRDSF_) or ws_options (if starting with WRDSO_) to initialize or use the
given member.
If not redefined explicitly, most of them are set to some reasonable
default value upon entry to wordsplit(). */
struct wordsplit
{ {
size_t ws_wordc; size_t ws_wordc; /* [Output] Number of words in ws_wordv. */
char **ws_wordv; char **ws_wordv; /* [Output] Array of parsed out words. */
size_t ws_offs; size_t ws_offs; /* [Input] (WRDSF_DOOFFS) Number of initial
size_t ws_wordn; elements in ws_wordv to fill with NULLs. */
int ws_flags; size_t ws_wordn; /* Number of elements ws_wordv can accomodate. */
const char *ws_delim; int ws_flags; /* [Input] Flags passed to wordsplit. */
const char *ws_comment; int ws_options; /* [Input] (WRDSF_OPTIONS)
const char *ws_escape; Additional options. */
void (*ws_alloc_die) (struct wordsplit * wsp); size_t ws_maxwords; /* [Input] (WRDSO_MAXWORDS) Return at most that
many words */
size_t ws_wordi; /* [Output] (WRDSF_INCREMENTAL) Total number of
words returned so far */
const char *ws_delim; /* [Input] (WRDSF_DELIM) Word delimiters. */
const char *ws_comment; /* [Input] (WRDSF_COMMENT) Comment characters. */
const char *ws_escape[2]; /* [Input] (WRDSF_ESCAPE) Characters to be escaped
with backslash. */
void (*ws_alloc_die) (wordsplit_t *wsp);
/* [Input] (WRDSF_ALLOC_DIE) Function called when
out of memory. Must not return. */
void (*ws_error) (const char *, ...) void (*ws_error) (const char *, ...)
__WORDSPLIT_ATTRIBUTE_FORMAT ((__printf__, 1, 2)); __attribute__ ((__format__ (__printf__, 1, 2)));
/* [Input] (WRDSF_ERROR) Function used for error
reporting */
void (*ws_debug) (const char *, ...) void (*ws_debug) (const char *, ...)
__WORDSPLIT_ATTRIBUTE_FORMAT ((__printf__, 1, 2)); __attribute__ ((__format__ (__printf__, 1, 2)));
/* [Input] (WRDSF_DEBUG) Function used for debug
output. */
const char **ws_env; /* [Input] (WRDSF_ENV, !WRDSF_NOVAR) Array of
environment variables. */
const char **ws_env; char **ws_envbuf;
const char *(*ws_getvar) (const char *, size_t, void *); size_t ws_envidx;
void *ws_closure; size_t ws_envsiz;
int (*ws_getvar) (char **ret, const char *var, size_t len, void *clos);
/* [Input] (WRDSF_GETVAR, !WRDSF_NOVAR) Looks up
the name VAR (LEN bytes long) in the table of
variables and if found returns in memory
location pointed to by RET the value of that
variable. Returns WRDSE_OK (0) on success,
and an error code (see WRDSE_* defines below)
on error. User-specific errors can be returned
by storing the error diagnostic string in RET
and returning WRDSE_USERERR.
Whatever is stored in RET, it must be allocated
using malloc(3). */
void *ws_closure; /* [Input] (WRDSF_CLOSURE) Passed as the CLOS
argument to ws_getvar and ws_command. */
int (*ws_command) (char **ret, const char *cmd, size_t len, char **argv,
void *clos);
/* [Input] (!WRDSF_NOCMD) Returns in the memory
location pointed to by RET the expansion of
the command CMD (LEN bytes long). If WRDSO_ARGV
option is set, ARGV contains CMD split out to
words. Otherwise ARGV is NULL.
const char *ws_input; See ws_getvar for a discussion of possible
size_t ws_len; return values. */
size_t ws_endp;
int ws_errno; const char *ws_input; /* Input string (the S argument to wordsplit. */
size_t ws_len; /* Length of ws_input. */
size_t ws_endp; /* Points past the last processed byte in
ws_input. */
int ws_errno; /* [Output] Error code, if an error occurred. */
char *ws_usererr; /* Points to textual description of
the error, if ws_errno is WRDSE_USERERR. Must
be allocated with malloc(3). */
struct wordsplit_node *ws_head, *ws_tail; struct wordsplit_node *ws_head, *ws_tail;
/* Doubly-linked list of parsed out nodes. */
int ws_lvl; /* Invocation nesting level. */
}; };
/* Wordsplit flags. Only 2 bits of a 32-bit word remain unused. /* Initial size for ws_env, if allocated automatically */
It is getting crowded... */ #define WORDSPLIT_ENV_INIT 16
/* Wordsplit flags. */
/* Append the words found to the array resulting from a previous /* Append the words found to the array resulting from a previous
call. */ call. */
#define WRDSF_APPEND 0x00000001 #define WRDSF_APPEND 0x00000001
/* Insert we_offs initial NULLs in the array ws_wordv. /* Insert ws_offs initial NULLs in the array ws_wordv.
(These are not counted in the returned ws_wordc.) */ (These are not counted in the returned ws_wordc.) */
#define WRDSF_DOOFFS 0x00000002 #define WRDSF_DOOFFS 0x00000002
/* Don't do command substitution. Reserved for future use. */ /* Don't do command substitution. */
#define WRDSF_NOCMD 0x00000004 #define WRDSF_NOCMD 0x00000004
/* The parameter p resulted from a previous call to /* The parameter p resulted from a previous call to
wordsplit(), and wordsplit_free() was not called. Reuse the wordsplit(), and wordsplit_free() was not called. Reuse the
@@ -71,10 +132,8 @@ struct wordsplit
#define WRDSF_REUSE 0x00000008 #define WRDSF_REUSE 0x00000008
/* Print errors */ /* Print errors */
#define WRDSF_SHOWERR 0x00000010 #define WRDSF_SHOWERR 0x00000010
/* Consider it an error if an undefined shell variable /* Consider it an error if an undefined variable is expanded. */
is expanded. */
#define WRDSF_UNDEF 0x00000020 #define WRDSF_UNDEF 0x00000020
/* Don't do variable expansion. */ /* Don't do variable expansion. */
#define WRDSF_NOVAR 0x00000040 #define WRDSF_NOVAR 0x00000040
/* Abort on ENOMEM error */ /* Abort on ENOMEM error */
@@ -85,7 +144,7 @@ struct wordsplit
#define WRDSF_SQUOTE 0x00000200 #define WRDSF_SQUOTE 0x00000200
/* Handle double quotes */ /* Handle double quotes */
#define WRDSF_DQUOTE 0x00000400 #define WRDSF_DQUOTE 0x00000400
/* Handle quotes and escape directives */ /* Handle single and double quotes */
#define WRDSF_QUOTE (WRDSF_SQUOTE|WRDSF_DQUOTE) #define WRDSF_QUOTE (WRDSF_SQUOTE|WRDSF_DQUOTE)
/* Replace each input sequence of repeated delimiters with a single /* Replace each input sequence of repeated delimiters with a single
delimiter */ delimiter */
@@ -113,56 +172,106 @@ struct wordsplit
/* Don't split input into words. Useful for side effects. */ /* Don't split input into words. Useful for side effects. */
#define WRDSF_NOSPLIT 0x00400000 #define WRDSF_NOSPLIT 0x00400000
/* Keep undefined variables in place, instead of expanding them to /* Keep undefined variables in place, instead of expanding them to
empty string */ empty strings. */
#define WRDSF_KEEPUNDEF 0x00800000 #define WRDSF_KEEPUNDEF 0x00800000
/* Warn about undefined variables */ /* Warn about undefined variables */
#define WRDSF_WARNUNDEF 0x01000000 #define WRDSF_WARNUNDEF 0x01000000
/* Handle C escapes */ /* Handle C escapes */
#define WRDSF_CESCAPES 0x02000000 #define WRDSF_CESCAPES 0x02000000
/* ws_closure is set */ /* ws_closure is set */
#define WRDSF_CLOSURE 0x04000000 #define WRDSF_CLOSURE 0x04000000
/* ws_env is a Key/Value environment, i.e. the value of a variable is /* ws_env is a Key/Value environment, i.e. the value of a variable is
stored in the element that follows its name. */ stored in the element that follows its name. */
#define WRDSF_ENV_KV 0x08000000 #define WRDSF_ENV_KV 0x08000000
/* ws_escape is set */ /* ws_escape is set */
#define WRDSF_ESCAPE 0x10000000 #define WRDSF_ESCAPE 0x10000000
/* Incremental mode */ /* Incremental mode */
#define WRDSF_INCREMENTAL 0x20000000 #define WRDSF_INCREMENTAL 0x20000000
/* Perform pathname and tilde expansion */
#define WRDSF_PATHEXPAND 0x40000000
/* ws_options is initialized */
#define WRDSF_OPTIONS 0x80000000
#define WRDSF_DEFFLAGS \ #define WRDSF_DEFFLAGS \
(WRDSF_NOVAR | WRDSF_NOCMD | \ (WRDSF_NOVAR | WRDSF_NOCMD | \
WRDSF_QUOTE | WRDSF_SQUEEZE_DELIMS | WRDSF_CESCAPES) WRDSF_QUOTE | WRDSF_SQUEEZE_DELIMS | WRDSF_CESCAPES)
#define WRDSE_EOF 0 /* Remove the word that produces empty string after path expansion */
#define WRDSO_NULLGLOB 0x00000001
/* Print error message if path expansion produces empty string */
#define WRDSO_FAILGLOB 0x00000002
/* Allow a leading period to be matched by metacharacters. */
#define WRDSO_DOTGLOB 0x00000004
/* ws_command needs argv parameter */
#define WRDSO_ARGV 0x00000008
/* Keep backslash in unrecognized escape sequences in words */
#define WRDSO_BSKEEP_WORD 0x00000010
/* Handle octal escapes in words */
#define WRDSO_OESC_WORD 0x00000020
/* Handle hex escapes in words */
#define WRDSO_XESC_WORD 0x00000040
/* ws_maxwords field is initialized */
#define WRDSO_MAXWORDS 0x00000080
/* Keep backslash in unrecognized escape sequences in quoted strings */
#define WRDSO_BSKEEP_QUOTE 0x00000100
/* Handle octal escapes in quoted strings */
#define WRDSO_OESC_QUOTE 0x00000200
/* Handle hex escapes in quoted strings */
#define WRDSO_XESC_QUOTE 0x00000400
#define WRDSO_BSKEEP WRDSO_BSKEEP_WORD
#define WRDSO_OESC WRDSO_OESC_WORD
#define WRDSO_XESC WRDSO_XESC_WORD
/* Indices into ws_escape */
#define WRDSX_WORD 0
#define WRDSX_QUOTE 1
/* Set escape option F in WS for words (Q==0) or quoted strings (Q==1) */
#define WRDSO_ESC_SET(ws,q,f) ((ws)->ws_options |= ((f) << 4*(q)))
/* Test WS for escape option F for words (Q==0) or quoted strings (Q==1) */
#define WRDSO_ESC_TEST(ws,q,f) ((ws)->ws_options & ((f) << 4*(q)))
#define WRDSE_OK 0
#define WRDSE_EOF WRDSE_OK
#define WRDSE_QUOTE 1 #define WRDSE_QUOTE 1
#define WRDSE_NOSPACE 2 #define WRDSE_NOSPACE 2
#define WRDSE_NOSUPP 3 #define WRDSE_USAGE 3
#define WRDSE_USAGE 4 #define WRDSE_CBRACE 4
#define WRDSE_CBRACE 5 #define WRDSE_UNDEF 5
#define WRDSE_UNDEF 6 #define WRDSE_NOINPUT 6
#define WRDSE_NOINPUT 7 #define WRDSE_PAREN 7
#define WRDSE_GLOBERR 8
#define WRDSE_USERERR 9
int wordsplit (const char *s, struct wordsplit *p, int flags); int wordsplit (const char *s, wordsplit_t *ws, int flags);
int wordsplit_len (const char *s, size_t len, int wordsplit_len (const char *s, size_t len, wordsplit_t *ws, int flags);
struct wordsplit *p, int flags); void wordsplit_free (wordsplit_t *ws);
void wordsplit_free (struct wordsplit *p); void wordsplit_free_words (wordsplit_t *ws);
void wordsplit_free_words (struct wordsplit *ws); void wordsplit_free_envbuf (wordsplit_t *ws);
int wordsplit_get_words (wordsplit_t *ws, size_t *wordc, char ***wordv);
static inline void wordsplit_getwords (wordsplit_t *ws, size_t *wordc, char ***wordv)
__attribute__ ((deprecated));
static inline void
wordsplit_getwords (wordsplit_t *ws, size_t *wordc, char ***wordv)
{
wordsplit_get_words (ws, wordc, wordv);
}
int wordsplit_append (wordsplit_t *wsp, int argc, char **argv);
int wordsplit_c_unquote_char (int c); int wordsplit_c_unquote_char (int c);
int wordsplit_c_quote_char (int c); int wordsplit_c_quote_char (int c);
size_t wordsplit_c_quoted_length (const char *str, int quote_hex, size_t wordsplit_c_quoted_length (const char *str, int quote_hex, int *quote);
int *quote);
void wordsplit_general_unquote_copy (char *dst, const char *src, size_t n,
const char *escapable);
void wordsplit_sh_unquote_copy (char *dst, const char *src, size_t n);
void wordsplit_c_unquote_copy (char *dst, const char *src, size_t n);
void wordsplit_c_quote_copy (char *dst, const char *src, int quote_hex); void wordsplit_c_quote_copy (char *dst, const char *src, int quote_hex);
void wordsplit_perror (struct wordsplit *ws); void wordsplit_perror (wordsplit_t *ws);
const char *wordsplit_strerror (struct wordsplit *ws); const char *wordsplit_strerror (wordsplit_t *ws);
void wordsplit_clearerr (wordsplit_t *ws);
#endif #endif