ota: Merge one true awk 20240422 (a3b68e649d2d)
Apr 22, 2024: fixed regex engine gototab reallocation issue that was introduced during the Nov 24 rewrite. Thanks to Arnold Robbins. Fixed a scan bug in split in the case the separator is a single character. thanks to Oguz Ismail for spotting the issue. Mar 10, 2024: fixed use-after-free bug in fnematch due to adjbuf invalidating the pointers to buf. thanks to github user caffe3 for spotting the issue and providing a fix, and to Miguel Pineiro Jr. for the alternative fix. MAX_UTF_BYTES in fnematch has been replaced with awk_mb_cur_max. thanks to Miguel Pineiro Jr. Sponsored by: Netflix
This commit is contained in:
@@ -47,30 +47,6 @@
|
|||||||
* test/T.lilly: Remove gawk warnings from output, improves
|
* test/T.lilly: Remove gawk warnings from output, improves
|
||||||
portability.
|
portability.
|
||||||
|
|
||||||
2019-10-17 Arnold D. Robbins <arnold@skeeve.com>
|
|
||||||
|
|
||||||
Pull in systime() and strftime() from the NetBSD awk.
|
|
||||||
|
|
||||||
* awk.1: Document the functions.
|
|
||||||
* run.c (bltin): Implement the functions.
|
|
||||||
* awk.h: Add defines for systime and strftime.
|
|
||||||
* lex.c: Add support for systime and strftime.
|
|
||||||
|
|
||||||
2019-10-07 Arnold D. Robbins <arnold@skeeve.com>
|
|
||||||
|
|
||||||
Integrate features from different *BSD versions of awk.
|
|
||||||
Gensub support from NetBSD. Bitwise functions from OpenBSD.
|
|
||||||
|
|
||||||
* awk.h: Add defines for and, or, xor, compl, lshift and rshift.
|
|
||||||
* awkgram.y: Add support for gensub.
|
|
||||||
* maketab.c: Ditto.
|
|
||||||
* lex.c: Add support for gensub and bitwise functions.
|
|
||||||
* parse.c (node5, op5): New functions.
|
|
||||||
* proto.h (node5, op5): New declarations.
|
|
||||||
* run.c (bltin): Implement the bitwise functions.
|
|
||||||
(gensub): New function.
|
|
||||||
* awk.1: Document additional functions.
|
|
||||||
|
|
||||||
2019-10-07 Arnold D. Robbins <arnold@skeeve.com>
|
2019-10-07 Arnold D. Robbins <arnold@skeeve.com>
|
||||||
|
|
||||||
* b.c (fnematch): Change type of pbuf from unsigned char to char.
|
* b.c (fnematch): Change type of pbuf from unsigned char to char.
|
||||||
|
|||||||
@@ -25,6 +25,20 @@ THIS SOFTWARE.
|
|||||||
This file lists all bug fixes, changes, etc., made since the
|
This file lists all bug fixes, changes, etc., made since the
|
||||||
second edition of the AWK book was published in September 2023.
|
second edition of the AWK book was published in September 2023.
|
||||||
|
|
||||||
|
Apr 22, 2024:
|
||||||
|
fixed regex engine gototab reallocation issue that was
|
||||||
|
introduced during the Nov 24 rewrite. Thanks to Arnold Robbins.
|
||||||
|
Fixed a scan bug in split in the case the separator is a single
|
||||||
|
character. thanks to Oguz Ismail for spotting the issue.
|
||||||
|
|
||||||
|
Mar 10, 2024:
|
||||||
|
fixed use-after-free bug in fnematch due to adjbuf invalidating
|
||||||
|
the pointers to buf. thanks to github user caffe3 for spotting
|
||||||
|
the issue and providing a fix, and to Miguel Pineiro Jr.
|
||||||
|
for the alternative fix.
|
||||||
|
MAX_UTF_BYTES in fnematch has been replaced with awk_mb_cur_max.
|
||||||
|
thanks to Miguel Pineiro Jr.
|
||||||
|
|
||||||
Jan 22, 2024:
|
Jan 22, 2024:
|
||||||
Restore the ability to compile with g++. Thanks to
|
Restore the ability to compile with g++. Thanks to
|
||||||
Arnold Robbins.
|
Arnold Robbins.
|
||||||
|
|||||||
@@ -305,25 +305,6 @@ and
|
|||||||
.B gsub
|
.B gsub
|
||||||
return the number of replacements.
|
return the number of replacements.
|
||||||
.TP
|
.TP
|
||||||
\fBgensub(\fIpat\fB, \fIrepl\fB, \fIhow\fR [\fB, \fItarget\fR]\fB)\fR
|
|
||||||
replaces instances of
|
|
||||||
.I pat
|
|
||||||
in
|
|
||||||
.I target
|
|
||||||
with
|
|
||||||
.IR repl .
|
|
||||||
If
|
|
||||||
.I how
|
|
||||||
is \fB"g"\fR or \fB"G"\fR, do so globally. Otherwise,
|
|
||||||
.I how
|
|
||||||
is a number indicating which occurrence to replace. If no
|
|
||||||
.IR target ,
|
|
||||||
use
|
|
||||||
.BR $0 .
|
|
||||||
Return the resulting string;
|
|
||||||
.I target
|
|
||||||
is not modified.
|
|
||||||
.TP
|
|
||||||
.BI sprintf( fmt , " expr" , " ...\fB)
|
.BI sprintf( fmt , " expr" , " ...\fB)
|
||||||
the string resulting from formatting
|
the string resulting from formatting
|
||||||
.I expr ...
|
.I expr ...
|
||||||
@@ -332,28 +313,6 @@ according to the
|
|||||||
format
|
format
|
||||||
.IR fmt .
|
.IR fmt .
|
||||||
.TP
|
.TP
|
||||||
.B systime()
|
|
||||||
returns the current date and time as a standard
|
|
||||||
``seconds since the epoch'' value.
|
|
||||||
.TP
|
|
||||||
.BI strftime( fmt ", " timestamp\^ )
|
|
||||||
formats
|
|
||||||
.I timestamp
|
|
||||||
(a value in seconds since the epoch)
|
|
||||||
according to
|
|
||||||
.IR fmt ,
|
|
||||||
which is a format string as supported by
|
|
||||||
.IR strftime (3).
|
|
||||||
Both
|
|
||||||
.I timestamp
|
|
||||||
and
|
|
||||||
.I fmt
|
|
||||||
may be omitted; if no
|
|
||||||
.IR timestamp ,
|
|
||||||
the current time of day is used, and if no
|
|
||||||
.IR fmt ,
|
|
||||||
a default format of \fB"%a %b %e %H:%M:%S %Z %Y"\fR is used.
|
|
||||||
.TP
|
|
||||||
.BI system( cmd )
|
.BI system( cmd )
|
||||||
executes
|
executes
|
||||||
.I cmd
|
.I cmd
|
||||||
@@ -413,17 +372,6 @@ In all cases,
|
|||||||
returns 1 for a successful input,
|
returns 1 for a successful input,
|
||||||
0 for end of file, and \-1 for an error.
|
0 for end of file, and \-1 for an error.
|
||||||
.PP
|
.PP
|
||||||
The functions
|
|
||||||
.BR compl ,
|
|
||||||
.BR and ,
|
|
||||||
.BR or ,
|
|
||||||
.BR xor ,
|
|
||||||
.BR lshift ,
|
|
||||||
and
|
|
||||||
.B rshift
|
|
||||||
peform the corresponding bitwise operations on their
|
|
||||||
operands, which are first truncated to integer.
|
|
||||||
.PP
|
|
||||||
Patterns are arbitrary Boolean combinations
|
Patterns are arbitrary Boolean combinations
|
||||||
(with
|
(with
|
||||||
.BR "! || &&" )
|
.BR "! || &&" )
|
||||||
|
|||||||
@@ -154,14 +154,6 @@ extern Cell *symtabloc; /* SYMTAB */
|
|||||||
#define FTOUPPER 12
|
#define FTOUPPER 12
|
||||||
#define FTOLOWER 13
|
#define FTOLOWER 13
|
||||||
#define FFLUSH 14
|
#define FFLUSH 14
|
||||||
#define FAND 15
|
|
||||||
#define FFOR 16
|
|
||||||
#define FXOR 17
|
|
||||||
#define FCOMPL 18
|
|
||||||
#define FLSHIFT 19
|
|
||||||
#define FRSHIFT 20
|
|
||||||
#define FSYSTIME 21
|
|
||||||
#define FSTRFTIME 22
|
|
||||||
|
|
||||||
/* Node: parse tree is made of nodes, with Cell's at bottom */
|
/* Node: parse tree is made of nodes, with Cell's at bottom */
|
||||||
|
|
||||||
|
|||||||
@@ -53,7 +53,7 @@ Node *arglist = 0; /* list of args for current function */
|
|||||||
%token <i> FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS EMPTYRE ZERO
|
%token <i> FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS EMPTYRE ZERO
|
||||||
%token <i> AND BOR APPEND EQ GE GT LE LT NE IN
|
%token <i> AND BOR APPEND EQ GE GT LE LT NE IN
|
||||||
%token <i> ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC
|
%token <i> ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC
|
||||||
%token <i> GENSUB SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE
|
%token <i> SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE
|
||||||
%token <i> ADD MINUS MULT DIVIDE MOD
|
%token <i> ADD MINUS MULT DIVIDE MOD
|
||||||
%token <i> ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ
|
%token <i> ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ
|
||||||
%token <i> PRINT PRINTF SPRINTF
|
%token <i> PRINT PRINTF SPRINTF
|
||||||
@@ -377,24 +377,6 @@ term:
|
|||||||
| INCR var { $$ = op1(PREINCR, $2); }
|
| INCR var { $$ = op1(PREINCR, $2); }
|
||||||
| var DECR { $$ = op1(POSTDECR, $1); }
|
| var DECR { $$ = op1(POSTDECR, $1); }
|
||||||
| var INCR { $$ = op1(POSTINCR, $1); }
|
| var INCR { $$ = op1(POSTINCR, $1); }
|
||||||
| GENSUB '(' reg_expr comma pattern comma pattern ')'
|
|
||||||
{ $$ = op5(GENSUB, NIL, (Node*)makedfa($3, 1), $5, $7, rectonode()); }
|
|
||||||
| GENSUB '(' pattern comma pattern comma pattern ')'
|
|
||||||
{ if (constnode($3)) {
|
|
||||||
$$ = op5(GENSUB, NIL, (Node *)makedfa(strnode($3), 1), $5, $7, rectonode());
|
|
||||||
free($3);
|
|
||||||
} else
|
|
||||||
$$ = op5(GENSUB, (Node *)1, $3, $5, $7, rectonode());
|
|
||||||
}
|
|
||||||
| GENSUB '(' reg_expr comma pattern comma pattern comma pattern ')'
|
|
||||||
{ $$ = op5(GENSUB, NIL, (Node*)makedfa($3, 1), $5, $7, $9); }
|
|
||||||
| GENSUB '(' pattern comma pattern comma pattern comma pattern ')'
|
|
||||||
{ if (constnode($3)) {
|
|
||||||
$$ = op5(GENSUB, NIL, (Node *)makedfa(strnode($3),1), $5,$7,$9);
|
|
||||||
free($3);
|
|
||||||
} else
|
|
||||||
$$ = op5(GENSUB, (Node *)1, $3, $5, $7, $9);
|
|
||||||
}
|
|
||||||
| GETLINE var LT term { $$ = op3(GETLINE, $2, itonp($3), $4); }
|
| GETLINE var LT term { $$ = op3(GETLINE, $2, itonp($3), $4); }
|
||||||
| GETLINE LT term { $$ = op3(GETLINE, NIL, itonp($2), $3); }
|
| GETLINE LT term { $$ = op3(GETLINE, NIL, itonp($2), $3); }
|
||||||
| GETLINE var { $$ = op3(GETLINE, $2, NIL, NIL); }
|
| GETLINE var { $$ = op3(GETLINE, $2, NIL, NIL); }
|
||||||
|
|||||||
+20
-14
@@ -651,8 +651,8 @@ static int set_gototab(fa *f, int state, int ch, int val) /* hide gototab implem
|
|||||||
if (tab->inuse + 1 >= tab->allocated)
|
if (tab->inuse + 1 >= tab->allocated)
|
||||||
resize_gototab(f, state);
|
resize_gototab(f, state);
|
||||||
|
|
||||||
f->gototab[state].entries[f->gototab[state].inuse-1].ch = ch;
|
f->gototab[state].entries[f->gototab[state].inuse].ch = ch;
|
||||||
f->gototab[state].entries[f->gototab[state].inuse-1].state = val;
|
f->gototab[state].entries[f->gototab[state].inuse].state = val;
|
||||||
f->gototab[state].inuse++;
|
f->gototab[state].inuse++;
|
||||||
return val;
|
return val;
|
||||||
} else {
|
} else {
|
||||||
@@ -677,9 +677,9 @@ static int set_gototab(fa *f, int state, int ch, int val) /* hide gototab implem
|
|||||||
gtt *tab = & f->gototab[state];
|
gtt *tab = & f->gototab[state];
|
||||||
if (tab->inuse + 1 >= tab->allocated)
|
if (tab->inuse + 1 >= tab->allocated)
|
||||||
resize_gototab(f, state);
|
resize_gototab(f, state);
|
||||||
++tab->inuse;
|
|
||||||
f->gototab[state].entries[tab->inuse].ch = ch;
|
f->gototab[state].entries[tab->inuse].ch = ch;
|
||||||
f->gototab[state].entries[tab->inuse].state = val;
|
f->gototab[state].entries[tab->inuse].state = val;
|
||||||
|
++tab->inuse;
|
||||||
|
|
||||||
qsort(f->gototab[state].entries,
|
qsort(f->gototab[state].entries,
|
||||||
f->gototab[state].inuse, sizeof(gtte), entry_cmp);
|
f->gototab[state].inuse, sizeof(gtte), entry_cmp);
|
||||||
@@ -830,8 +830,6 @@ int nematch(fa *f, const char *p0) /* non-empty match, for sub */
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#define MAX_UTF_BYTES 4 // UTF-8 is up to 4 bytes long
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* NAME
|
* NAME
|
||||||
* fnematch
|
* fnematch
|
||||||
@@ -868,16 +866,28 @@ bool fnematch(fa *pfa, FILE *f, char **pbuf, int *pbufsize, int quantum)
|
|||||||
|
|
||||||
do {
|
do {
|
||||||
/*
|
/*
|
||||||
* Call u8_rune with at least MAX_UTF_BYTES ahead in
|
* Call u8_rune with at least awk_mb_cur_max ahead in
|
||||||
* the buffer until EOF interferes.
|
* the buffer until EOF interferes.
|
||||||
*/
|
*/
|
||||||
if (k - j < MAX_UTF_BYTES) {
|
if (k - j < awk_mb_cur_max) {
|
||||||
if (k + MAX_UTF_BYTES > buf + bufsize) {
|
if (k + awk_mb_cur_max > buf + bufsize) {
|
||||||
|
char *obuf = buf;
|
||||||
adjbuf((char **) &buf, &bufsize,
|
adjbuf((char **) &buf, &bufsize,
|
||||||
bufsize + MAX_UTF_BYTES,
|
bufsize + awk_mb_cur_max,
|
||||||
quantum, 0, "fnematch");
|
quantum, 0, "fnematch");
|
||||||
|
|
||||||
|
/* buf resized, maybe moved. update pointers */
|
||||||
|
*pbufsize = bufsize;
|
||||||
|
if (obuf != buf) {
|
||||||
|
i = buf + (i - obuf);
|
||||||
|
j = buf + (j - obuf);
|
||||||
|
k = buf + (k - obuf);
|
||||||
|
*pbuf = buf;
|
||||||
|
if (patlen)
|
||||||
|
patbeg = buf + (patbeg - obuf);
|
||||||
}
|
}
|
||||||
for (n = MAX_UTF_BYTES ; n > 0; n--) {
|
}
|
||||||
|
for (n = awk_mb_cur_max ; n > 0; n--) {
|
||||||
*k++ = (c = getc(f)) != EOF ? c : 0;
|
*k++ = (c = getc(f)) != EOF ? c : 0;
|
||||||
if (c == EOF) {
|
if (c == EOF) {
|
||||||
if (ferror(f))
|
if (ferror(f))
|
||||||
@@ -914,10 +924,6 @@ bool fnematch(fa *pfa, FILE *f, char **pbuf, int *pbufsize, int quantum)
|
|||||||
s = 2;
|
s = 2;
|
||||||
} while (1);
|
} while (1);
|
||||||
|
|
||||||
/* adjbuf() may have relocated a resized buffer. Inform the world. */
|
|
||||||
*pbuf = buf;
|
|
||||||
*pbufsize = bufsize;
|
|
||||||
|
|
||||||
if (patlen) {
|
if (patlen) {
|
||||||
/*
|
/*
|
||||||
* Under no circumstances is the last character fed to
|
* Under no circumstances is the last character fed to
|
||||||
|
|||||||
@@ -27,6 +27,6 @@ do
|
|||||||
then
|
then
|
||||||
rm -f $OUT
|
rm -f $OUT
|
||||||
else
|
else
|
||||||
echo '++++ $i failed!'
|
echo "+++ $i failed!"
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
|||||||
@@ -0,0 +1,3 @@
|
|||||||
|
normal status 42
|
||||||
|
death by signal status 257
|
||||||
|
death by signal with core dump status 262
|
||||||
@@ -47,11 +47,9 @@ const Keyword keywords[] = { /* keep sorted: binary searched */
|
|||||||
{ "BEGIN", XBEGIN, XBEGIN },
|
{ "BEGIN", XBEGIN, XBEGIN },
|
||||||
{ "END", XEND, XEND },
|
{ "END", XEND, XEND },
|
||||||
{ "NF", VARNF, VARNF },
|
{ "NF", VARNF, VARNF },
|
||||||
{ "and", FAND, BLTIN },
|
|
||||||
{ "atan2", FATAN, BLTIN },
|
{ "atan2", FATAN, BLTIN },
|
||||||
{ "break", BREAK, BREAK },
|
{ "break", BREAK, BREAK },
|
||||||
{ "close", CLOSE, CLOSE },
|
{ "close", CLOSE, CLOSE },
|
||||||
{ "compl", FCOMPL, BLTIN },
|
|
||||||
{ "continue", CONTINUE, CONTINUE },
|
{ "continue", CONTINUE, CONTINUE },
|
||||||
{ "cos", FCOS, BLTIN },
|
{ "cos", FCOS, BLTIN },
|
||||||
{ "delete", DELETE, DELETE },
|
{ "delete", DELETE, DELETE },
|
||||||
@@ -63,7 +61,6 @@ const Keyword keywords[] = { /* keep sorted: binary searched */
|
|||||||
{ "for", FOR, FOR },
|
{ "for", FOR, FOR },
|
||||||
{ "func", FUNC, FUNC },
|
{ "func", FUNC, FUNC },
|
||||||
{ "function", FUNC, FUNC },
|
{ "function", FUNC, FUNC },
|
||||||
{ "gensub", GENSUB, GENSUB },
|
|
||||||
{ "getline", GETLINE, GETLINE },
|
{ "getline", GETLINE, GETLINE },
|
||||||
{ "gsub", GSUB, GSUB },
|
{ "gsub", GSUB, GSUB },
|
||||||
{ "if", IF, IF },
|
{ "if", IF, IF },
|
||||||
@@ -72,30 +69,24 @@ const Keyword keywords[] = { /* keep sorted: binary searched */
|
|||||||
{ "int", FINT, BLTIN },
|
{ "int", FINT, BLTIN },
|
||||||
{ "length", FLENGTH, BLTIN },
|
{ "length", FLENGTH, BLTIN },
|
||||||
{ "log", FLOG, BLTIN },
|
{ "log", FLOG, BLTIN },
|
||||||
{ "lshift", FLSHIFT, BLTIN },
|
|
||||||
{ "match", MATCHFCN, MATCHFCN },
|
{ "match", MATCHFCN, MATCHFCN },
|
||||||
{ "next", NEXT, NEXT },
|
{ "next", NEXT, NEXT },
|
||||||
{ "nextfile", NEXTFILE, NEXTFILE },
|
{ "nextfile", NEXTFILE, NEXTFILE },
|
||||||
{ "or", FFOR, BLTIN },
|
|
||||||
{ "print", PRINT, PRINT },
|
{ "print", PRINT, PRINT },
|
||||||
{ "printf", PRINTF, PRINTF },
|
{ "printf", PRINTF, PRINTF },
|
||||||
{ "rand", FRAND, BLTIN },
|
{ "rand", FRAND, BLTIN },
|
||||||
{ "return", RETURN, RETURN },
|
{ "return", RETURN, RETURN },
|
||||||
{ "rshift", FRSHIFT, BLTIN },
|
|
||||||
{ "sin", FSIN, BLTIN },
|
{ "sin", FSIN, BLTIN },
|
||||||
{ "split", SPLIT, SPLIT },
|
{ "split", SPLIT, SPLIT },
|
||||||
{ "sprintf", SPRINTF, SPRINTF },
|
{ "sprintf", SPRINTF, SPRINTF },
|
||||||
{ "sqrt", FSQRT, BLTIN },
|
{ "sqrt", FSQRT, BLTIN },
|
||||||
{ "srand", FSRAND, BLTIN },
|
{ "srand", FSRAND, BLTIN },
|
||||||
{ "strftime", FSTRFTIME, BLTIN },
|
|
||||||
{ "sub", SUB, SUB },
|
{ "sub", SUB, SUB },
|
||||||
{ "substr", SUBSTR, SUBSTR },
|
{ "substr", SUBSTR, SUBSTR },
|
||||||
{ "system", FSYSTEM, BLTIN },
|
{ "system", FSYSTEM, BLTIN },
|
||||||
{ "systime", FSYSTIME, BLTIN },
|
|
||||||
{ "tolower", FTOLOWER, BLTIN },
|
{ "tolower", FTOLOWER, BLTIN },
|
||||||
{ "toupper", FTOUPPER, BLTIN },
|
{ "toupper", FTOUPPER, BLTIN },
|
||||||
{ "while", WHILE, WHILE },
|
{ "while", WHILE, WHILE },
|
||||||
{ "xor", FXOR, BLTIN },
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#define RET(x) { if(dbg)printf("lex %s\n", tokname(x)); return(x); }
|
#define RET(x) { if(dbg)printf("lex %s\n", tokname(x)); return(x); }
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
|
|||||||
THIS SOFTWARE.
|
THIS SOFTWARE.
|
||||||
****************************************************************/
|
****************************************************************/
|
||||||
|
|
||||||
const char *version = "version 20240122";
|
const char *version = "version 20240422";
|
||||||
|
|
||||||
#define DEBUG
|
#define DEBUG
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|||||||
@@ -104,7 +104,6 @@ struct xx
|
|||||||
{ ARG, "arg", "arg" },
|
{ ARG, "arg", "arg" },
|
||||||
{ VARNF, "getnf", "NF" },
|
{ VARNF, "getnf", "NF" },
|
||||||
{ GETLINE, "awkgetline", "getline" },
|
{ GETLINE, "awkgetline", "getline" },
|
||||||
{ GENSUB, "gensub", "gensub" },
|
|
||||||
{ 0, "", "" },
|
{ 0, "", "" },
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -93,20 +93,6 @@ Node *node4(int a, Node *b, Node *c, Node *d, Node *e)
|
|||||||
return(x);
|
return(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
Node *node5(int a, Node *b, Node *c, Node *d, Node *e, Node *f)
|
|
||||||
{
|
|
||||||
Node *x;
|
|
||||||
|
|
||||||
x = nodealloc(5);
|
|
||||||
x->nobj = a;
|
|
||||||
x->narg[0] = b;
|
|
||||||
x->narg[1] = c;
|
|
||||||
x->narg[2] = d;
|
|
||||||
x->narg[3] = e;
|
|
||||||
x->narg[4] = f;
|
|
||||||
return(x);
|
|
||||||
}
|
|
||||||
|
|
||||||
Node *stat1(int a, Node *b)
|
Node *stat1(int a, Node *b)
|
||||||
{
|
{
|
||||||
Node *x;
|
Node *x;
|
||||||
@@ -179,15 +165,6 @@ Node *op4(int a, Node *b, Node *c, Node *d, Node *e)
|
|||||||
return(x);
|
return(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
Node *op5(int a, Node *b, Node *c, Node *d, Node *e, Node *f)
|
|
||||||
{
|
|
||||||
Node *x;
|
|
||||||
|
|
||||||
x = node5(a,b,c,d,e,f);
|
|
||||||
x->ntype = NEXPR;
|
|
||||||
return(x);
|
|
||||||
}
|
|
||||||
|
|
||||||
Node *celltonode(Cell *a, int b)
|
Node *celltonode(Cell *a, int b)
|
||||||
{
|
{
|
||||||
Node *x;
|
Node *x;
|
||||||
|
|||||||
@@ -73,14 +73,12 @@ extern Node *node1(int, Node *);
|
|||||||
extern Node *node2(int, Node *, Node *);
|
extern Node *node2(int, Node *, Node *);
|
||||||
extern Node *node3(int, Node *, Node *, Node *);
|
extern Node *node3(int, Node *, Node *, Node *);
|
||||||
extern Node *node4(int, Node *, Node *, Node *, Node *);
|
extern Node *node4(int, Node *, Node *, Node *, Node *);
|
||||||
extern Node *node5(int, Node *, Node *, Node *, Node *, Node *);
|
|
||||||
extern Node *stat3(int, Node *, Node *, Node *);
|
extern Node *stat3(int, Node *, Node *, Node *);
|
||||||
extern Node *op2(int, Node *, Node *);
|
extern Node *op2(int, Node *, Node *);
|
||||||
extern Node *op1(int, Node *);
|
extern Node *op1(int, Node *);
|
||||||
extern Node *stat1(int, Node *);
|
extern Node *stat1(int, Node *);
|
||||||
extern Node *op3(int, Node *, Node *, Node *);
|
extern Node *op3(int, Node *, Node *, Node *);
|
||||||
extern Node *op4(int, Node *, Node *, Node *, Node *);
|
extern Node *op4(int, Node *, Node *, Node *, Node *);
|
||||||
extern Node *op5(int, Node *, Node *, Node *, Node *, Node *);
|
|
||||||
extern Node *stat2(int, Node *, Node *);
|
extern Node *stat2(int, Node *, Node *);
|
||||||
extern Node *stat4(int, Node *, Node *, Node *, Node *);
|
extern Node *stat4(int, Node *, Node *, Node *, Node *);
|
||||||
extern Node *celltonode(Cell *, int);
|
extern Node *celltonode(Cell *, int);
|
||||||
@@ -199,7 +197,6 @@ extern const char *filename(FILE *);
|
|||||||
extern Cell *closefile(Node **, int);
|
extern Cell *closefile(Node **, int);
|
||||||
extern void closeall(void);
|
extern void closeall(void);
|
||||||
extern Cell *dosub(Node **, int);
|
extern Cell *dosub(Node **, int);
|
||||||
extern Cell *gensub(Node **, int);
|
|
||||||
|
|
||||||
extern FILE *popen(const char *, const char *);
|
extern FILE *popen(const char *, const char *);
|
||||||
extern int pclose(FILE *);
|
extern int pclose(FILE *);
|
||||||
|
|||||||
+4
-240
@@ -1827,7 +1827,7 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */
|
|||||||
for (;;) {
|
for (;;) {
|
||||||
n++;
|
n++;
|
||||||
t = s;
|
t = s;
|
||||||
while (*s != sep && *s != '\n' && *s != '\0')
|
while (*s != sep && *s != '\0')
|
||||||
s++;
|
s++;
|
||||||
temp = *s;
|
temp = *s;
|
||||||
setptr(s, '\0');
|
setptr(s, '\0');
|
||||||
@@ -2062,14 +2062,12 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis
|
|||||||
{
|
{
|
||||||
Cell *x, *y;
|
Cell *x, *y;
|
||||||
Awkfloat u;
|
Awkfloat u;
|
||||||
int t, sz;
|
int t;
|
||||||
Awkfloat tmp;
|
Awkfloat tmp;
|
||||||
char *buf, *fmt;
|
char *buf;
|
||||||
Node *nextarg;
|
Node *nextarg;
|
||||||
FILE *fp;
|
FILE *fp;
|
||||||
int status = 0;
|
int status = 0;
|
||||||
time_t tv;
|
|
||||||
struct tm *tm;
|
|
||||||
int estatus = 0;
|
int estatus = 0;
|
||||||
|
|
||||||
t = ptoi(a[0]);
|
t = ptoi(a[0]);
|
||||||
@@ -2111,64 +2109,6 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis
|
|||||||
nextarg = nextarg->nnext;
|
nextarg = nextarg->nnext;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case FCOMPL:
|
|
||||||
u = ~((int)getfval(x));
|
|
||||||
break;
|
|
||||||
case FAND:
|
|
||||||
if (nextarg == 0) {
|
|
||||||
WARNING("and requires two arguments; returning 0");
|
|
||||||
u = 0;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
y = execute(a[1]->nnext);
|
|
||||||
u = ((int)getfval(x)) & ((int)getfval(y));
|
|
||||||
tempfree(y);
|
|
||||||
nextarg = nextarg->nnext;
|
|
||||||
break;
|
|
||||||
case FFOR:
|
|
||||||
if (nextarg == 0) {
|
|
||||||
WARNING("or requires two arguments; returning 0");
|
|
||||||
u = 0;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
y = execute(a[1]->nnext);
|
|
||||||
u = ((int)getfval(x)) | ((int)getfval(y));
|
|
||||||
tempfree(y);
|
|
||||||
nextarg = nextarg->nnext;
|
|
||||||
break;
|
|
||||||
case FXOR:
|
|
||||||
if (nextarg == 0) {
|
|
||||||
WARNING("xor requires two arguments; returning 0");
|
|
||||||
u = 0;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
y = execute(a[1]->nnext);
|
|
||||||
u = ((int)getfval(x)) ^ ((int)getfval(y));
|
|
||||||
tempfree(y);
|
|
||||||
nextarg = nextarg->nnext;
|
|
||||||
break;
|
|
||||||
case FLSHIFT:
|
|
||||||
if (nextarg == 0) {
|
|
||||||
WARNING("lshift requires two arguments; returning 0");
|
|
||||||
u = 0;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
y = execute(a[1]->nnext);
|
|
||||||
u = ((int)getfval(x)) << ((int)getfval(y));
|
|
||||||
tempfree(y);
|
|
||||||
nextarg = nextarg->nnext;
|
|
||||||
break;
|
|
||||||
case FRSHIFT:
|
|
||||||
if (nextarg == 0) {
|
|
||||||
WARNING("rshift requires two arguments; returning 0");
|
|
||||||
u = 0;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
y = execute(a[1]->nnext);
|
|
||||||
u = ((int)getfval(x)) >> ((int)getfval(y));
|
|
||||||
tempfree(y);
|
|
||||||
nextarg = nextarg->nnext;
|
|
||||||
break;
|
|
||||||
case FSYSTEM:
|
case FSYSTEM:
|
||||||
fflush(stdout); /* in case something is buffered already */
|
fflush(stdout); /* in case something is buffered already */
|
||||||
estatus = status = system(getsval(x));
|
estatus = status = system(getsval(x));
|
||||||
@@ -2223,41 +2163,6 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis
|
|||||||
else
|
else
|
||||||
u = fflush(fp);
|
u = fflush(fp);
|
||||||
break;
|
break;
|
||||||
case FSYSTIME:
|
|
||||||
u = time((time_t *) 0);
|
|
||||||
break;
|
|
||||||
case FSTRFTIME:
|
|
||||||
/* strftime([format [,timestamp]]) */
|
|
||||||
if (nextarg) {
|
|
||||||
y = execute(nextarg);
|
|
||||||
nextarg = nextarg->nnext;
|
|
||||||
tv = (time_t) getfval(y);
|
|
||||||
tempfree(y);
|
|
||||||
} else
|
|
||||||
tv = time((time_t *) 0);
|
|
||||||
tm = localtime(&tv);
|
|
||||||
if (tm == NULL)
|
|
||||||
FATAL("bad time %ld", (long)tv);
|
|
||||||
|
|
||||||
if (isrec(x)) {
|
|
||||||
/* format argument not provided, use default */
|
|
||||||
fmt = tostring("%a %b %d %H:%M:%S %Z %Y");
|
|
||||||
} else
|
|
||||||
fmt = tostring(getsval(x));
|
|
||||||
|
|
||||||
sz = 32;
|
|
||||||
buf = NULL;
|
|
||||||
do {
|
|
||||||
if ((buf = realloc(buf, (sz *= 2))) == NULL)
|
|
||||||
FATAL("out of memory in strftime");
|
|
||||||
} while (strftime(buf, sz, fmt, tm) == 0 && fmt[0] != '\0');
|
|
||||||
|
|
||||||
y = gettemp();
|
|
||||||
setsval(y, buf);
|
|
||||||
free(fmt);
|
|
||||||
free(buf);
|
|
||||||
|
|
||||||
return y;
|
|
||||||
default: /* can't happen */
|
default: /* can't happen */
|
||||||
FATAL("illegal function type %d", t);
|
FATAL("illegal function type %d", t);
|
||||||
break;
|
break;
|
||||||
@@ -2501,7 +2406,7 @@ void backsub(char **pb_ptr, const char **sptr_ptr);
|
|||||||
Cell *dosub(Node **a, int subop) /* sub and gsub */
|
Cell *dosub(Node **a, int subop) /* sub and gsub */
|
||||||
{
|
{
|
||||||
fa *pfa;
|
fa *pfa;
|
||||||
int tempstat;
|
int tempstat = 0;
|
||||||
char *repl;
|
char *repl;
|
||||||
Cell *x;
|
Cell *x;
|
||||||
|
|
||||||
@@ -2637,147 +2542,6 @@ Cell *dosub(Node **a, int subop) /* sub and gsub */
|
|||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
|
|
||||||
Cell *gensub(Node **a, int nnn) /* global selective substitute */
|
|
||||||
/* XXX incomplete - doesn't support backreferences \0 ... \9 */
|
|
||||||
{
|
|
||||||
Cell *x, *y, *res, *h;
|
|
||||||
char *rptr;
|
|
||||||
const char *sptr;
|
|
||||||
char *buf, *pb;
|
|
||||||
const char *t, *q;
|
|
||||||
fa *pfa;
|
|
||||||
int mflag, tempstat, num, whichm;
|
|
||||||
int bufsz = recsize;
|
|
||||||
|
|
||||||
if ((buf = malloc(bufsz)) == NULL)
|
|
||||||
FATAL("out of memory in gensub");
|
|
||||||
mflag = 0; /* if mflag == 0, can replace empty string */
|
|
||||||
num = 0;
|
|
||||||
x = execute(a[4]); /* source string */
|
|
||||||
t = getsval(x);
|
|
||||||
res = copycell(x); /* target string - initially copy of source */
|
|
||||||
res->csub = CTEMP; /* result values are temporary */
|
|
||||||
if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */
|
|
||||||
pfa = (fa *) a[1]; /* regular expression */
|
|
||||||
else {
|
|
||||||
y = execute(a[1]);
|
|
||||||
pfa = makedfa(getsval(y), 1);
|
|
||||||
tempfree(y);
|
|
||||||
}
|
|
||||||
y = execute(a[2]); /* replacement string */
|
|
||||||
h = execute(a[3]); /* which matches should be replaced */
|
|
||||||
sptr = getsval(h);
|
|
||||||
if (sptr[0] == 'g' || sptr[0] == 'G')
|
|
||||||
whichm = -1;
|
|
||||||
else {
|
|
||||||
/*
|
|
||||||
* The specified number is index of replacement, starting
|
|
||||||
* from 1. GNU awk treats index lower than 0 same as
|
|
||||||
* 1, we do same for compatibility.
|
|
||||||
*/
|
|
||||||
whichm = (int) getfval(h) - 1;
|
|
||||||
if (whichm < 0)
|
|
||||||
whichm = 0;
|
|
||||||
}
|
|
||||||
tempfree(h);
|
|
||||||
|
|
||||||
if (pmatch(pfa, t)) {
|
|
||||||
char *sl;
|
|
||||||
|
|
||||||
tempstat = pfa->initstat;
|
|
||||||
pfa->initstat = 2;
|
|
||||||
pb = buf;
|
|
||||||
rptr = getsval(y);
|
|
||||||
/*
|
|
||||||
* XXX if there are any backreferences in subst string,
|
|
||||||
* complain now.
|
|
||||||
*/
|
|
||||||
for (sl = rptr; (sl = strchr(sl, '\\')) && sl[1]; sl++) {
|
|
||||||
if (strchr("0123456789", sl[1])) {
|
|
||||||
FATAL("gensub doesn't support backreferences (subst \"%s\")", rptr);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
do {
|
|
||||||
if (whichm >= 0 && whichm != num) {
|
|
||||||
num++;
|
|
||||||
adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - t) + patlen, recsize, &pb, "gensub");
|
|
||||||
|
|
||||||
/* copy the part of string up to and including
|
|
||||||
* match to output buffer */
|
|
||||||
while (t < patbeg + patlen)
|
|
||||||
*pb++ = *t++;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (patlen == 0 && *patbeg != 0) { /* matched empty string */
|
|
||||||
if (mflag == 0) { /* can replace empty */
|
|
||||||
num++;
|
|
||||||
sptr = rptr;
|
|
||||||
while (*sptr != 0) {
|
|
||||||
adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub");
|
|
||||||
if (*sptr == '\\') {
|
|
||||||
backsub(&pb, &sptr);
|
|
||||||
} else if (*sptr == '&') {
|
|
||||||
sptr++;
|
|
||||||
adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub");
|
|
||||||
for (q = patbeg; q < patbeg+patlen; )
|
|
||||||
*pb++ = *q++;
|
|
||||||
} else
|
|
||||||
*pb++ = *sptr++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (*t == 0) /* at end */
|
|
||||||
goto done;
|
|
||||||
adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gensub");
|
|
||||||
*pb++ = *t++;
|
|
||||||
if (pb > buf + bufsz) /* BUG: not sure of this test */
|
|
||||||
FATAL("gensub result0 %.30s too big; can't happen", buf);
|
|
||||||
mflag = 0;
|
|
||||||
}
|
|
||||||
else { /* matched nonempty string */
|
|
||||||
num++;
|
|
||||||
sptr = t;
|
|
||||||
adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gensub");
|
|
||||||
while (sptr < patbeg)
|
|
||||||
*pb++ = *sptr++;
|
|
||||||
sptr = rptr;
|
|
||||||
while (*sptr != 0) {
|
|
||||||
adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub");
|
|
||||||
if (*sptr == '\\') {
|
|
||||||
backsub(&pb, &sptr);
|
|
||||||
} else if (*sptr == '&') {
|
|
||||||
sptr++;
|
|
||||||
adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub");
|
|
||||||
for (q = patbeg; q < patbeg+patlen; )
|
|
||||||
*pb++ = *q++;
|
|
||||||
} else
|
|
||||||
*pb++ = *sptr++;
|
|
||||||
}
|
|
||||||
t = patbeg + patlen;
|
|
||||||
if (patlen == 0 || *t == 0 || *(t-1) == 0)
|
|
||||||
goto done;
|
|
||||||
if (pb > buf + bufsz)
|
|
||||||
FATAL("gensub result1 %.30s too big; can't happen", buf);
|
|
||||||
mflag = 1;
|
|
||||||
}
|
|
||||||
} while (pmatch(pfa,t));
|
|
||||||
sptr = t;
|
|
||||||
adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gensub");
|
|
||||||
while ((*pb++ = *sptr++) != 0)
|
|
||||||
;
|
|
||||||
done: if (pb > buf + bufsz)
|
|
||||||
FATAL("gensub result2 %.30s too big; can't happen", buf);
|
|
||||||
*pb = '\0';
|
|
||||||
setsval(res, buf);
|
|
||||||
pfa->initstat = tempstat;
|
|
||||||
}
|
|
||||||
tempfree(x);
|
|
||||||
tempfree(y);
|
|
||||||
free(buf);
|
|
||||||
return(res);
|
|
||||||
}
|
|
||||||
|
|
||||||
void backsub(char **pb_ptr, const char **sptr_ptr) /* handle \\& variations */
|
void backsub(char **pb_ptr, const char **sptr_ptr) /* handle \\& variations */
|
||||||
{ /* sptr[0] == '\\' */
|
{ /* sptr[0] == '\\' */
|
||||||
char *pb = *pb_ptr;
|
char *pb = *pb_ptr;
|
||||||
|
|||||||
Reference in New Issue
Block a user