tests/ipfw: fix log:bpf test flakyness

There were several problems:

o Using 'netstat -B' is not a reliable way to make sure that all tcpdumps
  have attached to bpf(4).  The problem is that tcpdump (via libpcap) does
  several ioctl(2)s after the attach including two BIOCSETF.  Each of them
  flushes the input buffer.  So we can see tcpdump attached in 'netstat -B'
  and start sending packets and the packet will be captured by bpf(4)
  before BIOCSETF and freed and tcpdump won't read anything.  Instead of
  using netstat(1), use ps(1) and make sure each tcpdump is blocked on the
  "bpf" wait channel, which guarantees it is done with ioctl(2)s and is now
  blocked in read(2).
o Using 'nc -w 0' sets timeout not only on the connect(2) (as documented)
  but also on poll(2), which is not documented.  There is a race in shell
  that will make stdin not yet filled by 'echo foo' when nc(1) does
  poll(2).  With zero timeout, this poll(2) will immediately return and nc
  will exit.
o The waiting loop had two errors: using wrong variable name as well as
  invoking a subshell, that actually can't wait on the pid.
o The reading tcpdump was lacking '-q' option, that prevents any protocol
  interpretations.  Sometimes, when random port chosen by nc(1) would
  match some well-known (to tcpdump) port, the output would differ from
  the expected.

PR:	293241
This commit is contained in:
Gleb Smirnoff
2026-02-18 18:39:00 -08:00
parent d60082f16e
commit 38edf96b17
+11 -8
View File
@@ -57,31 +57,34 @@ bpf_body()
pids="${pids} $!"
done
# wait for tcpdumps to attach, include netstat(1) header in ${count}
count=$(( $(echo ${rules} ${auto} | wc -w) + 1))
while [ $(jexec alcatraz netstat -B | wc -l) -ne ${count} ]; do
sleep 0.01;
# wait for tcpdumps to fully attach and block in bpfread()
for p in ${pids}; do
while [ $(ps -o wchan ${p} | tr "\n" " " | cut -w -f 2) != \
"bpf" ]; do
sleep 0.01;
done
done
for p in ${rules} 666; do
echo foo | nc -u 192.0.2.1 10${p} -w 0
echo foo | nc -u 192.0.2.1 10${p}
done
for p in ${pids}; do
atf_check -s exit:0 sh -c "wait $pid; exit $?"
wait ${p}
atf_check_equal 0 $?
done
# statically numbered taps
for p in ${rules}; do
atf_check -o match:"192.0.2.0.[0-9]+ > 192.0.2.1.10${p}: UDP" \
-e match:"reading from file [a-zA-Z0-9/.]+${p}.pcap" \
tcpdump -nr ${PWD}/${p}.pcap
tcpdump -qnr ${PWD}/${p}.pcap
done
# autonumbered tap with 10666 port
atf_check -o match:"192.0.2.0.[0-9]+ > 192.0.2.1.10666: UDP" \
-e match:"reading from file [a-zA-Z0-9/.]+${auto}.pcap" \
tcpdump -nr ${PWD}/${auto}.pcap
tcpdump -qnr ${PWD}/${auto}.pcap
}
bpf_cleanup()