libc: scalar memset() in RISC-V assembly

Adds scalar implementation of memset for RISC-V
and updates the relevant manpage

os: FreeBSD
arch: riscv
        │ ./results/memset/memset_baseline │   ./results/memset/memset_scalar    │
        │              sec/op              │   sec/op     vs base                │
40                             527.5µ ± 1%   479.4µ ± 1%   -9.12% (p=0.000 n=20)
168                            254.5µ ± 1%   216.7µ ± 1%  -14.86% (p=0.000 n=20)
2k                             169.5µ ± 1%   128.4µ ± 0%  -24.24% (p=0.000 n=20)
256k                           161.2µ ± 1%   118.6µ ± 1%  -26.42% (p=0.000 n=20)
16m                            56.58m ± 0%   53.91m ± 0%   -4.72% (p=0.000 n=20)
geomean                        730.2µ        611.2µ       -16.29%

        │ ./results/memset/memset_baseline │    ./results/memset/memset_scalar     │
        │               B/s                │      B/s       vs base                │
40                            452.0Mi ± 1%    497.3Mi ± 1%  +10.04% (p=0.000 n=20)
168                           936.9Mi ± 1%   1100.4Mi ± 1%  +17.45% (p=0.000 n=20)
2k                            1.373Gi ± 1%    1.813Gi ± 0%  +32.00% (p=0.000 n=20)
256k                          1.444Gi ± 1%    1.962Gi ± 1%  +35.91% (p=0.000 n=20)
16m                           269.7Mi ± 0%    283.1Mi ± 0%   +4.96% (p=0.000 n=20)
geomean                       750.1Mi         896.1Mi       +19.47%

MFC after:	1 month
MFC to:		stable/15
Approved by:	mhorne, markj (mentor)
Reviewed by:	fuz
Sponsored by:	Google LLC (GSoc 2024)
Differential Revision:	https://reviews.freebsd.org/D45730
This commit is contained in:
Strahinja Stanišić
2024-06-21 17:43:45 +02:00
committed by Robert Clausecker
parent d2c23f5953
commit 40a958d585
2 changed files with 96 additions and 0 deletions
+1
View File
@@ -1,3 +1,4 @@
MDSRCS+= \
memchr.S \
memset.S \
strrchr.S
+95
View File
@@ -0,0 +1,95 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2024 Strahinja Stanisic <strajabot@FreeBSD.org>
*/
#include <machine/asm.h>
/*
* register a0 - void *dest
* register a1 - int c
* register a2 - size_t len
*/
ENTRY(memset)
andi a1, a1, 0xFF
sltiu t1, a2, 8
mv t0, a0
bnez t1, .Lend
li t1, 0x0101010101010101
mul a1, a1, t1
andi t1, a0, 0b111
andi t0, a0, ~0b111
beqz t1, .Lloop_store_64
la t2, .Lduff_start
slli t3, t1, 2
add t2, t2, t3
jr -4(t2)
.Lduff_start:
sb a1, 1(t0)
sb a1, 2(t0)
sb a1, 3(t0)
sb a1, 4(t0)
sb a1, 5(t0)
sb a1, 6(t0)
sb a1, 7(t0)
/* a3 = a3 -(8-a) <=> a3 = a3 + (a-8) */
addi t1, t1, -8
add a2, a2, t1
addi t0, t0, 8
.Lloop_store_64:
slti t1, a2, 64
bnez t1, .Lstore_rest
sd a1, 0(t0)
sd a1, 8(t0)
sd a1, 16(t0)
sd a1, 24(t0)
sd a1, 32(t0)
sd a1, 40(t0)
sd a1, 48(t0)
sd a1, 56(t0)
addi a2, a2, -64
addi t0, t0, 64
j .Lloop_store_64
.Lstore_rest:
la t2, .Lduff_rest
andi t3, a2, ~0b111
srli t4, t3, 1
sub t2, t2, t4
jr t2
sd a1, 56(t0)
sd a1, 48(t0)
sd a1, 40(t0)
sd a1, 32(t0)
sd a1, 24(t0)
sd a1, 16(t0)
sd a1, 8(t0)
sd a1, 0(t0)
.Lduff_rest:
add t0, t0, t3
sub a2, a2, t3
.Lend:
slli a2, a2, 2
la t2, .Lduff_end
sub t2, t2, a2
jr t2
sb a1, 6(t0)
sb a1, 5(t0)
sb a1, 4(t0)
sb a1, 3(t0)
sb a1, 2(t0)
sb a1, 1(t0)
sb a1, (t0)
.Lduff_end:
ret
END(memset)