ota: Merge one true awk 20240422 (a3b68e649d2d)

Apr 22, 2024:
	fixed regex engine gototab reallocation issue that was
	introduced during the Nov 24 rewrite. Thanks to Arnold Robbins.
	Fixed a scan bug in split in the case the separator is a single
	character. thanks to Oguz Ismail for spotting the issue.

Mar 10, 2024:
	fixed use-after-free bug in fnematch due to adjbuf invalidating
	the pointers to buf. thanks to github user caffe3 for spotting
	the issue and providing a fix, and to Miguel Pineiro Jr.
	for the alternative fix.
	MAX_UTF_BYTES in fnematch has been replaced with awk_mb_cur_max.
	thanks to Miguel Pineiro Jr.

Sponsored by:		Netflix
This commit is contained in:
Warner Losh
2024-05-04 15:50:33 -06:00
14 changed files with 44 additions and 395 deletions
-24
View File
@@ -47,30 +47,6 @@
* test/T.lilly: Remove gawk warnings from output, improves * test/T.lilly: Remove gawk warnings from output, improves
portability. portability.
2019-10-17 Arnold D. Robbins <arnold@skeeve.com>
Pull in systime() and strftime() from the NetBSD awk.
* awk.1: Document the functions.
* run.c (bltin): Implement the functions.
* awk.h: Add defines for systime and strftime.
* lex.c: Add support for systime and strftime.
2019-10-07 Arnold D. Robbins <arnold@skeeve.com>
Integrate features from different *BSD versions of awk.
Gensub support from NetBSD. Bitwise functions from OpenBSD.
* awk.h: Add defines for and, or, xor, compl, lshift and rshift.
* awkgram.y: Add support for gensub.
* maketab.c: Ditto.
* lex.c: Add support for gensub and bitwise functions.
* parse.c (node5, op5): New functions.
* proto.h (node5, op5): New declarations.
* run.c (bltin): Implement the bitwise functions.
(gensub): New function.
* awk.1: Document additional functions.
2019-10-07 Arnold D. Robbins <arnold@skeeve.com> 2019-10-07 Arnold D. Robbins <arnold@skeeve.com>
* b.c (fnematch): Change type of pbuf from unsigned char to char. * b.c (fnematch): Change type of pbuf from unsigned char to char.
+14
View File
@@ -25,6 +25,20 @@ THIS SOFTWARE.
This file lists all bug fixes, changes, etc., made since the This file lists all bug fixes, changes, etc., made since the
second edition of the AWK book was published in September 2023. second edition of the AWK book was published in September 2023.
Apr 22, 2024:
fixed regex engine gototab reallocation issue that was
introduced during the Nov 24 rewrite. Thanks to Arnold Robbins.
Fixed a scan bug in split in the case the separator is a single
character. thanks to Oguz Ismail for spotting the issue.
Mar 10, 2024:
fixed use-after-free bug in fnematch due to adjbuf invalidating
the pointers to buf. thanks to github user caffe3 for spotting
the issue and providing a fix, and to Miguel Pineiro Jr.
for the alternative fix.
MAX_UTF_BYTES in fnematch has been replaced with awk_mb_cur_max.
thanks to Miguel Pineiro Jr.
Jan 22, 2024: Jan 22, 2024:
Restore the ability to compile with g++. Thanks to Restore the ability to compile with g++. Thanks to
Arnold Robbins. Arnold Robbins.
-52
View File
@@ -305,25 +305,6 @@ and
.B gsub .B gsub
return the number of replacements. return the number of replacements.
.TP .TP
\fBgensub(\fIpat\fB, \fIrepl\fB, \fIhow\fR [\fB, \fItarget\fR]\fB)\fR
replaces instances of
.I pat
in
.I target
with
.IR repl .
If
.I how
is \fB"g"\fR or \fB"G"\fR, do so globally. Otherwise,
.I how
is a number indicating which occurrence to replace. If no
.IR target ,
use
.BR $0 .
Return the resulting string;
.I target
is not modified.
.TP
.BI sprintf( fmt , " expr" , " ...\fB) .BI sprintf( fmt , " expr" , " ...\fB)
the string resulting from formatting the string resulting from formatting
.I expr ... .I expr ...
@@ -332,28 +313,6 @@ according to the
format format
.IR fmt . .IR fmt .
.TP .TP
.B systime()
returns the current date and time as a standard
``seconds since the epoch'' value.
.TP
.BI strftime( fmt ", " timestamp\^ )
formats
.I timestamp
(a value in seconds since the epoch)
according to
.IR fmt ,
which is a format string as supported by
.IR strftime (3).
Both
.I timestamp
and
.I fmt
may be omitted; if no
.IR timestamp ,
the current time of day is used, and if no
.IR fmt ,
a default format of \fB"%a %b %e %H:%M:%S %Z %Y"\fR is used.
.TP
.BI system( cmd ) .BI system( cmd )
executes executes
.I cmd .I cmd
@@ -413,17 +372,6 @@ In all cases,
returns 1 for a successful input, returns 1 for a successful input,
0 for end of file, and \-1 for an error. 0 for end of file, and \-1 for an error.
.PP .PP
The functions
.BR compl ,
.BR and ,
.BR or ,
.BR xor ,
.BR lshift ,
and
.B rshift
peform the corresponding bitwise operations on their
operands, which are first truncated to integer.
.PP
Patterns are arbitrary Boolean combinations Patterns are arbitrary Boolean combinations
(with (with
.BR "! || &&" ) .BR "! || &&" )
-8
View File
@@ -154,14 +154,6 @@ extern Cell *symtabloc; /* SYMTAB */
#define FTOUPPER 12 #define FTOUPPER 12
#define FTOLOWER 13 #define FTOLOWER 13
#define FFLUSH 14 #define FFLUSH 14
#define FAND 15
#define FFOR 16
#define FXOR 17
#define FCOMPL 18
#define FLSHIFT 19
#define FRSHIFT 20
#define FSYSTIME 21
#define FSTRFTIME 22
/* Node: parse tree is made of nodes, with Cell's at bottom */ /* Node: parse tree is made of nodes, with Cell's at bottom */
+1 -19
View File
@@ -53,7 +53,7 @@ Node *arglist = 0; /* list of args for current function */
%token <i> FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS EMPTYRE ZERO %token <i> FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS EMPTYRE ZERO
%token <i> AND BOR APPEND EQ GE GT LE LT NE IN %token <i> AND BOR APPEND EQ GE GT LE LT NE IN
%token <i> ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC %token <i> ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC
%token <i> GENSUB SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE %token <i> SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE
%token <i> ADD MINUS MULT DIVIDE MOD %token <i> ADD MINUS MULT DIVIDE MOD
%token <i> ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ %token <i> ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ
%token <i> PRINT PRINTF SPRINTF %token <i> PRINT PRINTF SPRINTF
@@ -377,24 +377,6 @@ term:
| INCR var { $$ = op1(PREINCR, $2); } | INCR var { $$ = op1(PREINCR, $2); }
| var DECR { $$ = op1(POSTDECR, $1); } | var DECR { $$ = op1(POSTDECR, $1); }
| var INCR { $$ = op1(POSTINCR, $1); } | var INCR { $$ = op1(POSTINCR, $1); }
| GENSUB '(' reg_expr comma pattern comma pattern ')'
{ $$ = op5(GENSUB, NIL, (Node*)makedfa($3, 1), $5, $7, rectonode()); }
| GENSUB '(' pattern comma pattern comma pattern ')'
{ if (constnode($3)) {
$$ = op5(GENSUB, NIL, (Node *)makedfa(strnode($3), 1), $5, $7, rectonode());
free($3);
} else
$$ = op5(GENSUB, (Node *)1, $3, $5, $7, rectonode());
}
| GENSUB '(' reg_expr comma pattern comma pattern comma pattern ')'
{ $$ = op5(GENSUB, NIL, (Node*)makedfa($3, 1), $5, $7, $9); }
| GENSUB '(' pattern comma pattern comma pattern comma pattern ')'
{ if (constnode($3)) {
$$ = op5(GENSUB, NIL, (Node *)makedfa(strnode($3),1), $5,$7,$9);
free($3);
} else
$$ = op5(GENSUB, (Node *)1, $3, $5, $7, $9);
}
| GETLINE var LT term { $$ = op3(GETLINE, $2, itonp($3), $4); } | GETLINE var LT term { $$ = op3(GETLINE, $2, itonp($3), $4); }
| GETLINE LT term { $$ = op3(GETLINE, NIL, itonp($2), $3); } | GETLINE LT term { $$ = op3(GETLINE, NIL, itonp($2), $3); }
| GETLINE var { $$ = op3(GETLINE, $2, NIL, NIL); } | GETLINE var { $$ = op3(GETLINE, $2, NIL, NIL); }
+20 -14
View File
@@ -651,8 +651,8 @@ static int set_gototab(fa *f, int state, int ch, int val) /* hide gototab implem
if (tab->inuse + 1 >= tab->allocated) if (tab->inuse + 1 >= tab->allocated)
resize_gototab(f, state); resize_gototab(f, state);
f->gototab[state].entries[f->gototab[state].inuse-1].ch = ch; f->gototab[state].entries[f->gototab[state].inuse].ch = ch;
f->gototab[state].entries[f->gototab[state].inuse-1].state = val; f->gototab[state].entries[f->gototab[state].inuse].state = val;
f->gototab[state].inuse++; f->gototab[state].inuse++;
return val; return val;
} else { } else {
@@ -677,9 +677,9 @@ static int set_gototab(fa *f, int state, int ch, int val) /* hide gototab implem
gtt *tab = & f->gototab[state]; gtt *tab = & f->gototab[state];
if (tab->inuse + 1 >= tab->allocated) if (tab->inuse + 1 >= tab->allocated)
resize_gototab(f, state); resize_gototab(f, state);
++tab->inuse;
f->gototab[state].entries[tab->inuse].ch = ch; f->gototab[state].entries[tab->inuse].ch = ch;
f->gototab[state].entries[tab->inuse].state = val; f->gototab[state].entries[tab->inuse].state = val;
++tab->inuse;
qsort(f->gototab[state].entries, qsort(f->gototab[state].entries,
f->gototab[state].inuse, sizeof(gtte), entry_cmp); f->gototab[state].inuse, sizeof(gtte), entry_cmp);
@@ -830,8 +830,6 @@ int nematch(fa *f, const char *p0) /* non-empty match, for sub */
} }
#define MAX_UTF_BYTES 4 // UTF-8 is up to 4 bytes long
/* /*
* NAME * NAME
* fnematch * fnematch
@@ -868,16 +866,28 @@ bool fnematch(fa *pfa, FILE *f, char **pbuf, int *pbufsize, int quantum)
do { do {
/* /*
* Call u8_rune with at least MAX_UTF_BYTES ahead in * Call u8_rune with at least awk_mb_cur_max ahead in
* the buffer until EOF interferes. * the buffer until EOF interferes.
*/ */
if (k - j < MAX_UTF_BYTES) { if (k - j < awk_mb_cur_max) {
if (k + MAX_UTF_BYTES > buf + bufsize) { if (k + awk_mb_cur_max > buf + bufsize) {
char *obuf = buf;
adjbuf((char **) &buf, &bufsize, adjbuf((char **) &buf, &bufsize,
bufsize + MAX_UTF_BYTES, bufsize + awk_mb_cur_max,
quantum, 0, "fnematch"); quantum, 0, "fnematch");
/* buf resized, maybe moved. update pointers */
*pbufsize = bufsize;
if (obuf != buf) {
i = buf + (i - obuf);
j = buf + (j - obuf);
k = buf + (k - obuf);
*pbuf = buf;
if (patlen)
patbeg = buf + (patbeg - obuf);
} }
for (n = MAX_UTF_BYTES ; n > 0; n--) { }
for (n = awk_mb_cur_max ; n > 0; n--) {
*k++ = (c = getc(f)) != EOF ? c : 0; *k++ = (c = getc(f)) != EOF ? c : 0;
if (c == EOF) { if (c == EOF) {
if (ferror(f)) if (ferror(f))
@@ -914,10 +924,6 @@ bool fnematch(fa *pfa, FILE *f, char **pbuf, int *pbufsize, int quantum)
s = 2; s = 2;
} while (1); } while (1);
/* adjbuf() may have relocated a resized buffer. Inform the world. */
*pbuf = buf;
*pbufsize = bufsize;
if (patlen) { if (patlen) {
/* /*
* Under no circumstances is the last character fed to * Under no circumstances is the last character fed to
+1 -1
View File
@@ -27,6 +27,6 @@ do
then then
rm -f $OUT rm -f $OUT
else else
echo '++++ $i failed!' echo "+++ $i failed!"
fi fi
done done
@@ -0,0 +1,3 @@
normal status 42
death by signal status 257
death by signal with core dump status 262
-9
View File
@@ -47,11 +47,9 @@ const Keyword keywords[] = { /* keep sorted: binary searched */
{ "BEGIN", XBEGIN, XBEGIN }, { "BEGIN", XBEGIN, XBEGIN },
{ "END", XEND, XEND }, { "END", XEND, XEND },
{ "NF", VARNF, VARNF }, { "NF", VARNF, VARNF },
{ "and", FAND, BLTIN },
{ "atan2", FATAN, BLTIN }, { "atan2", FATAN, BLTIN },
{ "break", BREAK, BREAK }, { "break", BREAK, BREAK },
{ "close", CLOSE, CLOSE }, { "close", CLOSE, CLOSE },
{ "compl", FCOMPL, BLTIN },
{ "continue", CONTINUE, CONTINUE }, { "continue", CONTINUE, CONTINUE },
{ "cos", FCOS, BLTIN }, { "cos", FCOS, BLTIN },
{ "delete", DELETE, DELETE }, { "delete", DELETE, DELETE },
@@ -63,7 +61,6 @@ const Keyword keywords[] = { /* keep sorted: binary searched */
{ "for", FOR, FOR }, { "for", FOR, FOR },
{ "func", FUNC, FUNC }, { "func", FUNC, FUNC },
{ "function", FUNC, FUNC }, { "function", FUNC, FUNC },
{ "gensub", GENSUB, GENSUB },
{ "getline", GETLINE, GETLINE }, { "getline", GETLINE, GETLINE },
{ "gsub", GSUB, GSUB }, { "gsub", GSUB, GSUB },
{ "if", IF, IF }, { "if", IF, IF },
@@ -72,30 +69,24 @@ const Keyword keywords[] = { /* keep sorted: binary searched */
{ "int", FINT, BLTIN }, { "int", FINT, BLTIN },
{ "length", FLENGTH, BLTIN }, { "length", FLENGTH, BLTIN },
{ "log", FLOG, BLTIN }, { "log", FLOG, BLTIN },
{ "lshift", FLSHIFT, BLTIN },
{ "match", MATCHFCN, MATCHFCN }, { "match", MATCHFCN, MATCHFCN },
{ "next", NEXT, NEXT }, { "next", NEXT, NEXT },
{ "nextfile", NEXTFILE, NEXTFILE }, { "nextfile", NEXTFILE, NEXTFILE },
{ "or", FFOR, BLTIN },
{ "print", PRINT, PRINT }, { "print", PRINT, PRINT },
{ "printf", PRINTF, PRINTF }, { "printf", PRINTF, PRINTF },
{ "rand", FRAND, BLTIN }, { "rand", FRAND, BLTIN },
{ "return", RETURN, RETURN }, { "return", RETURN, RETURN },
{ "rshift", FRSHIFT, BLTIN },
{ "sin", FSIN, BLTIN }, { "sin", FSIN, BLTIN },
{ "split", SPLIT, SPLIT }, { "split", SPLIT, SPLIT },
{ "sprintf", SPRINTF, SPRINTF }, { "sprintf", SPRINTF, SPRINTF },
{ "sqrt", FSQRT, BLTIN }, { "sqrt", FSQRT, BLTIN },
{ "srand", FSRAND, BLTIN }, { "srand", FSRAND, BLTIN },
{ "strftime", FSTRFTIME, BLTIN },
{ "sub", SUB, SUB }, { "sub", SUB, SUB },
{ "substr", SUBSTR, SUBSTR }, { "substr", SUBSTR, SUBSTR },
{ "system", FSYSTEM, BLTIN }, { "system", FSYSTEM, BLTIN },
{ "systime", FSYSTIME, BLTIN },
{ "tolower", FTOLOWER, BLTIN }, { "tolower", FTOLOWER, BLTIN },
{ "toupper", FTOUPPER, BLTIN }, { "toupper", FTOUPPER, BLTIN },
{ "while", WHILE, WHILE }, { "while", WHILE, WHILE },
{ "xor", FXOR, BLTIN },
}; };
#define RET(x) { if(dbg)printf("lex %s\n", tokname(x)); return(x); } #define RET(x) { if(dbg)printf("lex %s\n", tokname(x)); return(x); }
+1 -1
View File
@@ -22,7 +22,7 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
THIS SOFTWARE. THIS SOFTWARE.
****************************************************************/ ****************************************************************/
const char *version = "version 20240122"; const char *version = "version 20240422";
#define DEBUG #define DEBUG
#include <stdio.h> #include <stdio.h>
-1
View File
@@ -104,7 +104,6 @@ struct xx
{ ARG, "arg", "arg" }, { ARG, "arg", "arg" },
{ VARNF, "getnf", "NF" }, { VARNF, "getnf", "NF" },
{ GETLINE, "awkgetline", "getline" }, { GETLINE, "awkgetline", "getline" },
{ GENSUB, "gensub", "gensub" },
{ 0, "", "" }, { 0, "", "" },
}; };
-23
View File
@@ -93,20 +93,6 @@ Node *node4(int a, Node *b, Node *c, Node *d, Node *e)
return(x); return(x);
} }
Node *node5(int a, Node *b, Node *c, Node *d, Node *e, Node *f)
{
Node *x;
x = nodealloc(5);
x->nobj = a;
x->narg[0] = b;
x->narg[1] = c;
x->narg[2] = d;
x->narg[3] = e;
x->narg[4] = f;
return(x);
}
Node *stat1(int a, Node *b) Node *stat1(int a, Node *b)
{ {
Node *x; Node *x;
@@ -179,15 +165,6 @@ Node *op4(int a, Node *b, Node *c, Node *d, Node *e)
return(x); return(x);
} }
Node *op5(int a, Node *b, Node *c, Node *d, Node *e, Node *f)
{
Node *x;
x = node5(a,b,c,d,e,f);
x->ntype = NEXPR;
return(x);
}
Node *celltonode(Cell *a, int b) Node *celltonode(Cell *a, int b)
{ {
Node *x; Node *x;
-3
View File
@@ -73,14 +73,12 @@ extern Node *node1(int, Node *);
extern Node *node2(int, Node *, Node *); extern Node *node2(int, Node *, Node *);
extern Node *node3(int, Node *, Node *, Node *); extern Node *node3(int, Node *, Node *, Node *);
extern Node *node4(int, Node *, Node *, Node *, Node *); extern Node *node4(int, Node *, Node *, Node *, Node *);
extern Node *node5(int, Node *, Node *, Node *, Node *, Node *);
extern Node *stat3(int, Node *, Node *, Node *); extern Node *stat3(int, Node *, Node *, Node *);
extern Node *op2(int, Node *, Node *); extern Node *op2(int, Node *, Node *);
extern Node *op1(int, Node *); extern Node *op1(int, Node *);
extern Node *stat1(int, Node *); extern Node *stat1(int, Node *);
extern Node *op3(int, Node *, Node *, Node *); extern Node *op3(int, Node *, Node *, Node *);
extern Node *op4(int, Node *, Node *, Node *, Node *); extern Node *op4(int, Node *, Node *, Node *, Node *);
extern Node *op5(int, Node *, Node *, Node *, Node *, Node *);
extern Node *stat2(int, Node *, Node *); extern Node *stat2(int, Node *, Node *);
extern Node *stat4(int, Node *, Node *, Node *, Node *); extern Node *stat4(int, Node *, Node *, Node *, Node *);
extern Node *celltonode(Cell *, int); extern Node *celltonode(Cell *, int);
@@ -199,7 +197,6 @@ extern const char *filename(FILE *);
extern Cell *closefile(Node **, int); extern Cell *closefile(Node **, int);
extern void closeall(void); extern void closeall(void);
extern Cell *dosub(Node **, int); extern Cell *dosub(Node **, int);
extern Cell *gensub(Node **, int);
extern FILE *popen(const char *, const char *); extern FILE *popen(const char *, const char *);
extern int pclose(FILE *); extern int pclose(FILE *);
+4 -240
View File
@@ -1827,7 +1827,7 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */
for (;;) { for (;;) {
n++; n++;
t = s; t = s;
while (*s != sep && *s != '\n' && *s != '\0') while (*s != sep && *s != '\0')
s++; s++;
temp = *s; temp = *s;
setptr(s, '\0'); setptr(s, '\0');
@@ -2062,14 +2062,12 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis
{ {
Cell *x, *y; Cell *x, *y;
Awkfloat u; Awkfloat u;
int t, sz; int t;
Awkfloat tmp; Awkfloat tmp;
char *buf, *fmt; char *buf;
Node *nextarg; Node *nextarg;
FILE *fp; FILE *fp;
int status = 0; int status = 0;
time_t tv;
struct tm *tm;
int estatus = 0; int estatus = 0;
t = ptoi(a[0]); t = ptoi(a[0]);
@@ -2111,64 +2109,6 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis
nextarg = nextarg->nnext; nextarg = nextarg->nnext;
} }
break; break;
case FCOMPL:
u = ~((int)getfval(x));
break;
case FAND:
if (nextarg == 0) {
WARNING("and requires two arguments; returning 0");
u = 0;
break;
}
y = execute(a[1]->nnext);
u = ((int)getfval(x)) & ((int)getfval(y));
tempfree(y);
nextarg = nextarg->nnext;
break;
case FFOR:
if (nextarg == 0) {
WARNING("or requires two arguments; returning 0");
u = 0;
break;
}
y = execute(a[1]->nnext);
u = ((int)getfval(x)) | ((int)getfval(y));
tempfree(y);
nextarg = nextarg->nnext;
break;
case FXOR:
if (nextarg == 0) {
WARNING("xor requires two arguments; returning 0");
u = 0;
break;
}
y = execute(a[1]->nnext);
u = ((int)getfval(x)) ^ ((int)getfval(y));
tempfree(y);
nextarg = nextarg->nnext;
break;
case FLSHIFT:
if (nextarg == 0) {
WARNING("lshift requires two arguments; returning 0");
u = 0;
break;
}
y = execute(a[1]->nnext);
u = ((int)getfval(x)) << ((int)getfval(y));
tempfree(y);
nextarg = nextarg->nnext;
break;
case FRSHIFT:
if (nextarg == 0) {
WARNING("rshift requires two arguments; returning 0");
u = 0;
break;
}
y = execute(a[1]->nnext);
u = ((int)getfval(x)) >> ((int)getfval(y));
tempfree(y);
nextarg = nextarg->nnext;
break;
case FSYSTEM: case FSYSTEM:
fflush(stdout); /* in case something is buffered already */ fflush(stdout); /* in case something is buffered already */
estatus = status = system(getsval(x)); estatus = status = system(getsval(x));
@@ -2223,41 +2163,6 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis
else else
u = fflush(fp); u = fflush(fp);
break; break;
case FSYSTIME:
u = time((time_t *) 0);
break;
case FSTRFTIME:
/* strftime([format [,timestamp]]) */
if (nextarg) {
y = execute(nextarg);
nextarg = nextarg->nnext;
tv = (time_t) getfval(y);
tempfree(y);
} else
tv = time((time_t *) 0);
tm = localtime(&tv);
if (tm == NULL)
FATAL("bad time %ld", (long)tv);
if (isrec(x)) {
/* format argument not provided, use default */
fmt = tostring("%a %b %d %H:%M:%S %Z %Y");
} else
fmt = tostring(getsval(x));
sz = 32;
buf = NULL;
do {
if ((buf = realloc(buf, (sz *= 2))) == NULL)
FATAL("out of memory in strftime");
} while (strftime(buf, sz, fmt, tm) == 0 && fmt[0] != '\0');
y = gettemp();
setsval(y, buf);
free(fmt);
free(buf);
return y;
default: /* can't happen */ default: /* can't happen */
FATAL("illegal function type %d", t); FATAL("illegal function type %d", t);
break; break;
@@ -2501,7 +2406,7 @@ void backsub(char **pb_ptr, const char **sptr_ptr);
Cell *dosub(Node **a, int subop) /* sub and gsub */ Cell *dosub(Node **a, int subop) /* sub and gsub */
{ {
fa *pfa; fa *pfa;
int tempstat; int tempstat = 0;
char *repl; char *repl;
Cell *x; Cell *x;
@@ -2637,147 +2542,6 @@ Cell *dosub(Node **a, int subop) /* sub and gsub */
return x; return x;
} }
Cell *gensub(Node **a, int nnn) /* global selective substitute */
/* XXX incomplete - doesn't support backreferences \0 ... \9 */
{
Cell *x, *y, *res, *h;
char *rptr;
const char *sptr;
char *buf, *pb;
const char *t, *q;
fa *pfa;
int mflag, tempstat, num, whichm;
int bufsz = recsize;
if ((buf = malloc(bufsz)) == NULL)
FATAL("out of memory in gensub");
mflag = 0; /* if mflag == 0, can replace empty string */
num = 0;
x = execute(a[4]); /* source string */
t = getsval(x);
res = copycell(x); /* target string - initially copy of source */
res->csub = CTEMP; /* result values are temporary */
if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */
pfa = (fa *) a[1]; /* regular expression */
else {
y = execute(a[1]);
pfa = makedfa(getsval(y), 1);
tempfree(y);
}
y = execute(a[2]); /* replacement string */
h = execute(a[3]); /* which matches should be replaced */
sptr = getsval(h);
if (sptr[0] == 'g' || sptr[0] == 'G')
whichm = -1;
else {
/*
* The specified number is index of replacement, starting
* from 1. GNU awk treats index lower than 0 same as
* 1, we do same for compatibility.
*/
whichm = (int) getfval(h) - 1;
if (whichm < 0)
whichm = 0;
}
tempfree(h);
if (pmatch(pfa, t)) {
char *sl;
tempstat = pfa->initstat;
pfa->initstat = 2;
pb = buf;
rptr = getsval(y);
/*
* XXX if there are any backreferences in subst string,
* complain now.
*/
for (sl = rptr; (sl = strchr(sl, '\\')) && sl[1]; sl++) {
if (strchr("0123456789", sl[1])) {
FATAL("gensub doesn't support backreferences (subst \"%s\")", rptr);
}
}
do {
if (whichm >= 0 && whichm != num) {
num++;
adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - t) + patlen, recsize, &pb, "gensub");
/* copy the part of string up to and including
* match to output buffer */
while (t < patbeg + patlen)
*pb++ = *t++;
continue;
}
if (patlen == 0 && *patbeg != 0) { /* matched empty string */
if (mflag == 0) { /* can replace empty */
num++;
sptr = rptr;
while (*sptr != 0) {
adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub");
if (*sptr == '\\') {
backsub(&pb, &sptr);
} else if (*sptr == '&') {
sptr++;
adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub");
for (q = patbeg; q < patbeg+patlen; )
*pb++ = *q++;
} else
*pb++ = *sptr++;
}
}
if (*t == 0) /* at end */
goto done;
adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gensub");
*pb++ = *t++;
if (pb > buf + bufsz) /* BUG: not sure of this test */
FATAL("gensub result0 %.30s too big; can't happen", buf);
mflag = 0;
}
else { /* matched nonempty string */
num++;
sptr = t;
adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gensub");
while (sptr < patbeg)
*pb++ = *sptr++;
sptr = rptr;
while (*sptr != 0) {
adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub");
if (*sptr == '\\') {
backsub(&pb, &sptr);
} else if (*sptr == '&') {
sptr++;
adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub");
for (q = patbeg; q < patbeg+patlen; )
*pb++ = *q++;
} else
*pb++ = *sptr++;
}
t = patbeg + patlen;
if (patlen == 0 || *t == 0 || *(t-1) == 0)
goto done;
if (pb > buf + bufsz)
FATAL("gensub result1 %.30s too big; can't happen", buf);
mflag = 1;
}
} while (pmatch(pfa,t));
sptr = t;
adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gensub");
while ((*pb++ = *sptr++) != 0)
;
done: if (pb > buf + bufsz)
FATAL("gensub result2 %.30s too big; can't happen", buf);
*pb = '\0';
setsval(res, buf);
pfa->initstat = tempstat;
}
tempfree(x);
tempfree(y);
free(buf);
return(res);
}
void backsub(char **pb_ptr, const char **sptr_ptr) /* handle \\& variations */ void backsub(char **pb_ptr, const char **sptr_ptr) /* handle \\& variations */
{ /* sptr[0] == '\\' */ { /* sptr[0] == '\\' */
char *pb = *pb_ptr; char *pb = *pb_ptr;