libc/amd64/strrchr.S: rewrite and fix scalar implementation
The original scalar implementation of strrchr() had incorrect logic that failed if the character searched for was the NUL character. It was also possibly affected by the issue fixed in3d8ef251afor strchrnul(). Rewrite the function with logic that actually works. We defer checking for the character until after we have checked for NUL. When we encounter the final NUL byte, we mask out the characters beyond the tail before checking for a match. This bug only affects users running on amd64 with ARCHLEVEL=scalar (cf. simd(7)). The default configuration is not affected. The bug was unfortunately not caught by the unit test inherited from NetBSD. An extended unit test catching the issue is proposed in D56037. PR: 293915 Reported by: safonov.paul@gmail.com Tested by: safonov.paul@gmail.com Fixes:2ed514a220See also: https://reviews.freebsd.org/D56037 MFC after: 1 week
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
/*-
|
||||
* Copyright (c) 2023 The FreeBSD Foundation
|
||||
* Copyright (c) 2026 Robert Clausecker <fuz@FreeBSD.org>
|
||||
*
|
||||
* This software was developed by Robert Clausecker <fuz@FreeBSD.org>
|
||||
* under sponsorship from the FreeBSD Foundation.
|
||||
@@ -65,77 +66,50 @@ ARCHENTRY(strrchr, scalar)
|
||||
xor %rax, %rcx # str ^ c
|
||||
or %r10, %rax # ensure str != 0 before string
|
||||
or %r10, %rcx # ensure str^c != 0 before string
|
||||
bswap %rcx # in reverse order, to find last match
|
||||
mov %rdi, %r10 # location of initial mismatch (if any)
|
||||
xor %r11, %r11 # initial mismatch (none)
|
||||
xor %r11, %r11 # vector of last match (0 -> no match)
|
||||
add $8, %rdi # advance to next iteration
|
||||
lea (%rax, %r8, 1), %rdx # str - 0x01..01
|
||||
not %rax # ~str
|
||||
and %rdx, %rax # (str - 0x01..01) & ~str
|
||||
and %r9, %rax # not including junk bits
|
||||
jnz 1f # end of string?
|
||||
|
||||
lea (%rcx, %r8, 1), %rdx # (str ^ c) - 0x01..01
|
||||
not %rcx # ~(str ^ c)
|
||||
and %rdx, %rcx # ((str ^ c - 0x01..01) & ~(str ^ c)
|
||||
and %r9, %rcx # not including junk bits
|
||||
mov %rcx, %r11 # remember mismatch in head
|
||||
jmp 0f
|
||||
|
||||
/* main loop unrolled twice */
|
||||
ALIGN_TEXT
|
||||
3: lea (%rcx, %r8, 1), %rdx # (str ^ c) - 0x01..01
|
||||
not %rcx # ~(str ^ c)
|
||||
and %rdx, %rcx # ((str ^ c - 0x01..01) & ~(str ^ c)
|
||||
and %r9, %rcx # not including junk bits
|
||||
lea -8(%rdi), %rdx
|
||||
cmovnz %rdx, %r10 # remember location of current mismatch
|
||||
cmovnz %rcx, %r11
|
||||
|
||||
0: mov (%rdi), %rax # str
|
||||
mov %rsi, %rcx
|
||||
xor %rax, %rcx # str ^ c
|
||||
bswap %rcx # in reverse order, to find last match
|
||||
lea (%rax, %r8, 1), %rdx # str - 0x01..01
|
||||
not %rax # ~str
|
||||
and %rdx, %rax # (str - 0x01..01) & ~str
|
||||
and %r9, %rax # not including junk bits
|
||||
and %r9, %rax # NUL bytes in str, not including junk bits
|
||||
jnz 2f # end of string?
|
||||
|
||||
/* main loop */
|
||||
ALIGN_TEXT
|
||||
3: mov (%rdi), %rax # str
|
||||
bswap %rcx # (str ^ c) in reverse order, to find last match
|
||||
lea (%rcx, %r8, 1), %rdx # (str ^ c) - 0x01..01
|
||||
not %rcx # ~(str ^ c)
|
||||
and %rdx, %rcx # ((str ^ c - 0x01..01) & ~(str ^ c)
|
||||
and %r9, %rcx # not including junk bits
|
||||
cmovnz %rdi, %r10 # remember location of current mismatch
|
||||
cmovnz %rcx, %r11
|
||||
and %r9, %rcx # matches in str, not including junk bits
|
||||
cmovnz %rdi, %r11 # if match found, update match vector
|
||||
cmovnz %rcx, %r10 # ... and match pointer
|
||||
|
||||
mov 8(%rdi), %rax # str
|
||||
add $16, %rdi
|
||||
add $8, %rdi # advance to next iteration
|
||||
mov %rsi, %rcx
|
||||
xor %rax, %rcx # str ^ c
|
||||
bswap %rcx
|
||||
lea (%rax, %r8, 1), %rdx # str - 0x01..01
|
||||
not %rax # ~str
|
||||
and %rdx, %rax # (str - 0x01..01) & ~str
|
||||
and %r9, %rax # not including junk bits
|
||||
and %r9, %rax # NUL bytes in str, not including junk bits
|
||||
jz 3b # end of string?
|
||||
|
||||
/* NUL found */
|
||||
1: sub $8, %rdi # undo advance past buffer
|
||||
2: lea (%rcx, %r8, 1), %rdx # (str ^ c) - 0x01..01
|
||||
/* NUL found, check for match in tail */
|
||||
2: mov %rax, %rdx
|
||||
neg %rax
|
||||
xor %rdx, %rax # all bytes behind the NUL byte
|
||||
or %rax, %rcx # (str ^ c) without matches behind NUL byte
|
||||
bswap %rcx # (src ^ c) in reverse order, to find last match
|
||||
lea (%rcx, %r8, 1), %rdx # (str ^ c) - 0x01..01
|
||||
not %rcx # ~(str ^ c)
|
||||
and %rdx, %rcx # ((str ^ c - 0x01..01) & ~(str ^ c)
|
||||
and %r9, %rcx # not including junk bits
|
||||
lea -1(%rax), %rdx
|
||||
xor %rdx, %rax # mask of bytes in the string
|
||||
bswap %rdx # in reverse order
|
||||
and %rdx, %rcx # c found in the tail?
|
||||
cmovnz %rdi, %r10
|
||||
cmovnz %rcx, %r11
|
||||
bswap %r11 # unreverse byte order
|
||||
bsr %r11, %rcx # last location of c in (R10)
|
||||
shr $3, %rcx # as byte offset
|
||||
lea (%r10, %rcx, 1), %rax # pointer to match
|
||||
and %r9, %rcx # matches in str, not including junk bits
|
||||
cmovnz %rdi, %r11 # if match found, update match vector
|
||||
cmovnz %rcx, %r10 # ... and match pointer
|
||||
tzcnt %r11, %rcx # location of last match
|
||||
lea -1(%r10), %rax # address of last character in vector
|
||||
shr $3, %ecx # as byte offset
|
||||
sub %rcx, %rax # subtract character offset
|
||||
test %r11, %r11 # was there actually a match?
|
||||
cmovz %r11, %rax # if not, return null pointer
|
||||
ret
|
||||
|
||||
Reference in New Issue
Block a user