Merge ^/head r305892 through r306302.
This commit is contained in:
@@ -31,6 +31,16 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 12.x IS SLOW:
|
||||
disable the most expensive debugging functionality run
|
||||
"ln -s 'abort:false,junk:false' /etc/malloc.conf".)
|
||||
|
||||
20160918:
|
||||
GNU rcs has been turned off by default. It can (temporarily) be built
|
||||
again by adding WITH_RCS knob in src.conf.
|
||||
Otherwise, GNU rcs is available from packages:
|
||||
- rcs: Latest GPLv3 GNU rcs version.
|
||||
- rcs57: Copy of the latest version of GNU rcs (GPLv2) from base.
|
||||
|
||||
20160918:
|
||||
The backup_uses_rcs functionality has been removed from rc.subr.
|
||||
|
||||
20160908:
|
||||
The queue(3) debugging macro, QUEUE_MACRO_DEBUG, has been split into
|
||||
two separate components, QUEUE_MACRO_DEBUG_TRACE and
|
||||
|
||||
@@ -0,0 +1,11 @@
|
||||
*.a
|
||||
*.o
|
||||
*.la
|
||||
*.lo
|
||||
*.png
|
||||
*.pyc
|
||||
.deps
|
||||
.dirstamp
|
||||
.libs
|
||||
try-*
|
||||
cache.txt
|
||||
@@ -0,0 +1,327 @@
|
||||
# Copyright (c) 2011, Linaro Limited
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of the Linaro nor the
|
||||
# names of its contributors may be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
|
||||
# Top level Makefile for cortex-strings
|
||||
|
||||
# Used to record the compiler version in the executables
|
||||
COMPILER = $(shell $(CC) --version 2>&1 | head -n1)
|
||||
|
||||
# The main library
|
||||
lib_LTLIBRARIES = \
|
||||
libcortex-strings.la
|
||||
|
||||
## Test suite
|
||||
check_PROGRAMS = \
|
||||
tests/test-memchr \
|
||||
tests/test-memcmp \
|
||||
tests/test-memcpy \
|
||||
tests/test-memmove \
|
||||
tests/test-memset \
|
||||
tests/test-strchr \
|
||||
tests/test-strcmp \
|
||||
tests/test-strcpy \
|
||||
tests/test-strlen \
|
||||
tests/test-strncmp \
|
||||
tests/test-strnlen
|
||||
|
||||
# Options for the tests
|
||||
tests_cflags = -I$(srcdir)/tests $(AM_CFLAGS)
|
||||
tests_ldadd = libcortex-strings.la
|
||||
tests_test_memchr_LDADD = $(tests_ldadd)
|
||||
tests_test_memchr_CFLAGS = $(tests_cflags)
|
||||
tests_test_memcmp_LDADD = $(tests_ldadd)
|
||||
tests_test_memcmp_CFLAGS = $(tests_cflags)
|
||||
tests_test_memcpy_LDADD = $(tests_ldadd)
|
||||
tests_test_memcpy_CFLAGS = $(tests_cflags)
|
||||
tests_test_memmove_LDADD = $(tests_ldadd)
|
||||
tests_test_memmove_CFLAGS = $(tests_cflags)
|
||||
tests_test_memset_LDADD = $(tests_ldadd)
|
||||
tests_test_memset_CFLAGS = $(tests_cflags)
|
||||
tests_test_strchr_LDADD = $(tests_ldadd)
|
||||
tests_test_strchr_CFLAGS = $(tests_cflags)
|
||||
tests_test_strcmp_LDADD = $(tests_ldadd)
|
||||
tests_test_strcmp_CFLAGS = $(tests_cflags)
|
||||
tests_test_strcpy_LDADD = $(tests_ldadd)
|
||||
tests_test_strcpy_CFLAGS = $(tests_cflags)
|
||||
tests_test_strlen_LDADD = $(tests_ldadd)
|
||||
tests_test_strlen_CFLAGS = $(tests_cflags)
|
||||
tests_test_strncmp_LDADD = $(tests_ldadd)
|
||||
tests_test_strncmp_CFLAGS = $(tests_cflags)
|
||||
|
||||
TESTS = $(check_PROGRAMS)
|
||||
|
||||
## Benchmarks
|
||||
noinst_PROGRAMS = \
|
||||
dhry \
|
||||
dhry-native \
|
||||
try-none \
|
||||
try-this \
|
||||
try-plain \
|
||||
try-newlib-c \
|
||||
try-bionic-c \
|
||||
try-glibc-c
|
||||
|
||||
# Good 'ol Dhrystone
|
||||
dhry_SOURCES = \
|
||||
benchmarks/dhry/dhry_1.c \
|
||||
benchmarks/dhry/dhry_2.c \
|
||||
benchmarks/dhry/dhry.h
|
||||
|
||||
dhry_CFLAGS = -Dcompiler="\"$(COMPILER)\"" -Doptions="\"$(CFLAGS)\""
|
||||
dhry_LDADD = libcortex-strings.la
|
||||
|
||||
dhry_native_SOURCES = $(dhry_SOURCES)
|
||||
dhry_native_CFLAGS = $(dhry_CFLAGS)
|
||||
|
||||
# Benchmark harness
|
||||
noinst_LIBRARIES = \
|
||||
libmulti.a \
|
||||
libbionic-c.a \
|
||||
libglibc-c.a \
|
||||
libnewlib-c.a \
|
||||
libplain.a
|
||||
|
||||
libmulti_a_SOURCES = \
|
||||
benchmarks/multi/harness.c
|
||||
|
||||
libmulti_a_CFLAGS = -DVERSION=\"$(VERSION)\" $(AM_CFLAGS)
|
||||
|
||||
## Other architecture independant implementaions
|
||||
libbionic_c_a_SOURCES = \
|
||||
reference/bionic-c/bcopy.c \
|
||||
reference/bionic-c/memchr.c \
|
||||
reference/bionic-c/memcmp.c \
|
||||
reference/bionic-c/memcpy.c \
|
||||
reference/bionic-c/memset.c \
|
||||
reference/bionic-c/strchr.c \
|
||||
reference/bionic-c/strcmp.c \
|
||||
reference/bionic-c/strcpy.c \
|
||||
reference/bionic-c/strlen.c
|
||||
|
||||
libglibc_c_a_SOURCES = \
|
||||
reference/glibc-c/memchr.c \
|
||||
reference/glibc-c/memcmp.c \
|
||||
reference/glibc-c/memcpy.c \
|
||||
reference/glibc-c/memset.c \
|
||||
reference/glibc-c/strchr.c \
|
||||
reference/glibc-c/strcmp.c \
|
||||
reference/glibc-c/strcpy.c \
|
||||
reference/glibc-c/strlen.c \
|
||||
reference/glibc-c/wordcopy.c \
|
||||
reference/glibc-c/memcopy.h \
|
||||
reference/glibc-c/pagecopy.h
|
||||
|
||||
libnewlib_c_a_SOURCES = \
|
||||
reference/newlib-c/memchr.c \
|
||||
reference/newlib-c/memcmp.c \
|
||||
reference/newlib-c/memcpy.c \
|
||||
reference/newlib-c/memset.c \
|
||||
reference/newlib-c/strchr.c \
|
||||
reference/newlib-c/strcmp.c \
|
||||
reference/newlib-c/strcpy.c \
|
||||
reference/newlib-c/strlen.c \
|
||||
reference/newlib-c/shim.h
|
||||
|
||||
libplain_a_SOURCES = \
|
||||
reference/plain/memset.c \
|
||||
reference/plain/memcpy.c \
|
||||
reference/plain/strcmp.c \
|
||||
reference/plain/strcpy.c
|
||||
|
||||
try_none_SOURCES =
|
||||
try_none_LDADD = libmulti.a -lrt
|
||||
try_this_SOURCES =
|
||||
try_this_LDADD = libmulti.a libcortex-strings.la -lrt
|
||||
try_bionic_c_SOURCES =
|
||||
try_bionic_c_LDADD = libmulti.a libbionic-c.a -lrt
|
||||
try_glibc_c_SOURCES =
|
||||
try_glibc_c_LDADD = libmulti.a libglibc-c.a -lrt
|
||||
try_newlib_c_SOURCES =
|
||||
try_newlib_c_LDADD = libmulti.a libnewlib-c.a -lrt
|
||||
try_plain_SOURCES =
|
||||
try_plain_LDADD = libmulti.a libplain.a -lrt
|
||||
|
||||
# Architecture specific
|
||||
|
||||
if HOST_AARCH32
|
||||
|
||||
if WITH_NEON
|
||||
# Pull in the NEON specific files
|
||||
neon_bionic_a9_sources = \
|
||||
reference/bionic-a9/memcpy.S \
|
||||
reference/bionic-a9/memset.S
|
||||
neon_bionic_a15_sources = \
|
||||
reference/bionic-a15/memcpy.S \
|
||||
reference/bionic-a15/memset.S
|
||||
fpu_flags = -mfpu=neon
|
||||
else
|
||||
if WITH_VFP
|
||||
fpu_flags = -mfpu=vfp
|
||||
else
|
||||
fpu_flags = -msoft-float
|
||||
endif
|
||||
endif
|
||||
|
||||
# Benchmarks and example programs
|
||||
noinst_PROGRAMS += \
|
||||
try-bionic-a9 \
|
||||
try-bionic-a15 \
|
||||
try-csl \
|
||||
try-glibc \
|
||||
try-newlib \
|
||||
try-newlib-xscale
|
||||
|
||||
# Libraries used in the benchmarks and examples
|
||||
noinst_LIBRARIES += \
|
||||
libbionic-a9.a \
|
||||
libbionic-a15.a \
|
||||
libcsl.a \
|
||||
libglibc.a \
|
||||
libnewlib.a \
|
||||
libnewlib-xscale.a
|
||||
|
||||
# Main library
|
||||
libcortex_strings_la_SOURCES = \
|
||||
src/thumb-2/strcpy.c \
|
||||
src/arm/memchr.S \
|
||||
src/arm/strchr.S \
|
||||
src/thumb-2/strlen.S \
|
||||
src/arm/memset.S \
|
||||
src/arm/memcpy.S \
|
||||
src/arm/strcmp.S
|
||||
|
||||
# Libraries containing the difference reference versions
|
||||
libbionic_a9_a_SOURCES = \
|
||||
$(neon_bionic_a9_sources) \
|
||||
reference/bionic-a9/memcmp.S \
|
||||
reference/bionic-a9/strcmp.S \
|
||||
reference/bionic-a9/strcpy.S \
|
||||
reference/bionic-a9/strlen.c
|
||||
|
||||
libbionic_a9_a_CFLAGS = -Wa,-mimplicit-it=thumb
|
||||
|
||||
libbionic_a15_a_SOURCES = \
|
||||
$(neon_bionic_a15_sources) \
|
||||
reference/bionic-a15/memcmp.S \
|
||||
reference/bionic-a15/strcmp.S \
|
||||
reference/bionic-a15/strcpy.S \
|
||||
reference/bionic-a15/strlen.c
|
||||
|
||||
libbionic_a15_a_CFLAGS = -Wa,-mimplicit-it=thumb
|
||||
|
||||
libcsl_a_SOURCES = \
|
||||
reference/csl/memcpy.c \
|
||||
reference/csl/memset.c \
|
||||
reference/csl/arm_asm.h
|
||||
|
||||
libglibc_a_SOURCES = \
|
||||
reference/glibc/memcpy.S \
|
||||
reference/glibc/memset.S \
|
||||
reference/glibc/strchr.S \
|
||||
reference/glibc/strlen.S
|
||||
|
||||
libnewlib_a_SOURCES = \
|
||||
reference/newlib/memcpy.S \
|
||||
reference/newlib/strcmp.S \
|
||||
reference/newlib/strcpy.c \
|
||||
reference/newlib/strlen.c \
|
||||
reference/newlib/arm_asm.h \
|
||||
reference/newlib/shim.h
|
||||
|
||||
libnewlib_xscale_a_SOURCES = \
|
||||
reference/newlib-xscale/memchr.c \
|
||||
reference/newlib-xscale/memcpy.c \
|
||||
reference/newlib-xscale/memset.c \
|
||||
reference/newlib-xscale/strchr.c \
|
||||
reference/newlib-xscale/strcmp.c \
|
||||
reference/newlib-xscale/strcpy.c \
|
||||
reference/newlib-xscale/strlen.c \
|
||||
reference/newlib-xscale/xscale.h
|
||||
|
||||
# Flags for the benchmark helpers
|
||||
try_bionic_a9_SOURCES =
|
||||
try_bionic_a9_LDADD = libmulti.a libbionic-a9.a -lrt
|
||||
try_bionic_a15_SOURCES =
|
||||
try_bionic_a15_LDADD = libmulti.a libbionic-a15.a -lrt
|
||||
try_csl_SOURCES =
|
||||
try_csl_LDADD = libmulti.a libcsl.a -lrt
|
||||
try_glibc_SOURCES =
|
||||
try_glibc_LDADD = libmulti.a libglibc.a -lrt
|
||||
try_newlib_SOURCES =
|
||||
try_newlib_LDADD = libmulti.a libnewlib.a -lrt
|
||||
try_newlib_xscale_SOURCES =
|
||||
try_newlib_xscale_LDADD = libmulti.a libnewlib-xscale.a -lrt
|
||||
|
||||
AM_CPPFLAGS = $(fpu_flags)
|
||||
AM_LDFLAGS = $(fpu_flags)
|
||||
|
||||
endif
|
||||
|
||||
# aarch64 specific
|
||||
if HOST_AARCH64
|
||||
|
||||
libcortex_strings_la_SOURCES = \
|
||||
src/aarch64/memchr.S \
|
||||
src/aarch64/memcmp.S \
|
||||
src/aarch64/memcpy.S \
|
||||
src/aarch64/memmove.S \
|
||||
src/aarch64/memset.S \
|
||||
src/aarch64/strchr.S \
|
||||
src/aarch64/strchrnul.S \
|
||||
src/aarch64/strcmp.S \
|
||||
src/aarch64/strcpy.S \
|
||||
src/aarch64/strlen.S \
|
||||
src/aarch64/strncmp.S \
|
||||
src/aarch64/strnlen.S
|
||||
|
||||
endif
|
||||
|
||||
libcortex_strings_la_LDFLAGS = -version-info 1:0:0
|
||||
|
||||
AM_CFLAGS = \
|
||||
-std=gnu99 -Wall \
|
||||
-fno-builtin -fno-stack-protector -U_FORTIFY_SOURCE \
|
||||
$(AM_CPPFLAGS)
|
||||
|
||||
if WITH_SUBMACHINE
|
||||
AM_CFLAGS += \
|
||||
-mtune=$(submachine)
|
||||
endif
|
||||
|
||||
EXTRA_DIST = \
|
||||
tests/hp-timing.h \
|
||||
tests/test-string.h \
|
||||
tests/test-skeleton.c \
|
||||
scripts/add-license.sh \
|
||||
scripts/bench.py \
|
||||
scripts/fixup.py \
|
||||
scripts/libplot.py \
|
||||
scripts/plot-align.py \
|
||||
scripts/plot.py \
|
||||
scripts/plot-sizes.py \
|
||||
scripts/plot-top.py \
|
||||
scripts/trim.sh \
|
||||
autogen.sh
|
||||
@@ -0,0 +1,111 @@
|
||||
= Cortex-A String Routines =
|
||||
|
||||
This package contains optimised string routines including memcpy(), memset(),
|
||||
strcpy(), strlen() for the ARM Cortex-A series of cores.
|
||||
|
||||
Various implementations of these routines are provided, including generic
|
||||
implementations for ARMv7-A cores with/without Neon, Thumb2 implementations
|
||||
and generic implementations for cores supporting AArch64.
|
||||
|
||||
== Getting started ==
|
||||
First configure and then install libcortex-strings.so. To make other
|
||||
applications use this library, either add -lcortex-strings to the link
|
||||
command or use LD_PRELOAD to load the library into existing applications.
|
||||
|
||||
Our intent is to get these routines into the common C libraries such
|
||||
as GLIBC, Bionic, and Newlib. Your system may already include them!
|
||||
|
||||
== Contents ==
|
||||
* src/ contains the routines themselves
|
||||
* tests/ contains the unit tests
|
||||
* reference/ contains reference copies of other ARM-focused
|
||||
implementations gathered from around the Internet
|
||||
* benchmarks/ contains various benchmarks, tools, and scripts used to
|
||||
check and report on the different implementations.
|
||||
|
||||
The src directory contains different variants organised by the
|
||||
implementation they run on and optional features used. For example:
|
||||
* src/thumb-2 contains generic non-NEON routines for AArch32 (with Thumb-2).
|
||||
* src/arm contains tuned routines for Cortex-A class processors.
|
||||
* src/aarch64 contains generic routines for AArch64.
|
||||
* src/thumb contains generic routines for armv6-M (with Thumb).
|
||||
|
||||
== Reference versions ==
|
||||
reference/ contains versions collected from various popular Open
|
||||
Source libraries. These have been modified for use in benchmarking.
|
||||
Please refer to the individual files for any licensing terms.
|
||||
|
||||
The routines were collected from the following releases:
|
||||
* EGLIBC 2.13
|
||||
* Newlib 1.19.0
|
||||
* Bionic android-2.3.5_r1
|
||||
|
||||
== Licensing ==
|
||||
All Linaro-authored routines are under the modified BSD license:
|
||||
|
||||
Copyright (c) 2011, Linaro Limited
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the Linaro nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
All ARM-authored routines are under the modified BSD license:
|
||||
|
||||
Copyright (c) 2014 ARM Ltd
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the Linaro nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
All third party routines are under a GPL compatible license.
|
||||
|
||||
== Notes and Limitations ==
|
||||
Some of the implementations have been collected from other
|
||||
projects and have a variety of licenses and copyright holders.
|
||||
|
||||
== Style ==
|
||||
Assembly code attempts to follow the GLIBC coding convetions. They
|
||||
are:
|
||||
* Copyright headers in C style comment blocks
|
||||
* Instructions indented with one tab
|
||||
* Operands indented with one tab
|
||||
* Text is wrapped at 70 characters
|
||||
* End of line comments are fine
|
||||
Executable
+69
@@ -0,0 +1,69 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# autogen.sh glue for hplip
|
||||
#
|
||||
# HPLIP used to have five or so different autotools trees. Upstream
|
||||
# has reduced it to two. Still, this script is capable of cleaning
|
||||
# just about any possible mess of autoconf files.
|
||||
#
|
||||
# BE CAREFUL with trees that are not completely automake-generated,
|
||||
# this script deletes all Makefile.in files it can find.
|
||||
#
|
||||
# Requires: automake 1.9, autoconf 2.57+
|
||||
# Conflicts: autoconf 2.13
|
||||
set -e
|
||||
|
||||
# Refresh GNU autotools toolchain.
|
||||
echo Cleaning autotools files...
|
||||
find -type d -name autom4te.cache -print0 | xargs -0 rm -rf \;
|
||||
find -type f \( -name missing -o -name install-sh -o -name mkinstalldirs \
|
||||
-o -name depcomp -o -name ltmain.sh -o -name configure \
|
||||
-o -name config.sub -o -name config.guess \
|
||||
-o -name Makefile.in \) -print0 | xargs -0 rm -f
|
||||
|
||||
echo Running autoreconf...
|
||||
autoreconf --force --install
|
||||
|
||||
# For the Debian package build
|
||||
test -d debian && {
|
||||
# link these in Debian builds
|
||||
rm -f config.sub config.guess
|
||||
ln -s /usr/share/misc/config.sub .
|
||||
ln -s /usr/share/misc/config.guess .
|
||||
|
||||
# refresh list of executable scripts, to avoid possible breakage if
|
||||
# upstream tarball does not include the file or if it is mispackaged
|
||||
# for whatever reason.
|
||||
[ "$1" = "updateexec" ] && {
|
||||
echo Generating list of executable files...
|
||||
rm -f debian/executable.files
|
||||
find -type f -perm +111 ! -name '.*' -fprint debian/executable.files
|
||||
}
|
||||
|
||||
# Remove any files in upstream tarball that we don't have in the Debian
|
||||
# package (because diff cannot remove files)
|
||||
version=`dpkg-parsechangelog | awk '/Version:/ { print $2 }' | sed -e 's/-[^-]\+$//'`
|
||||
source=`dpkg-parsechangelog | awk '/Source:/ { print $2 }' | tr -d ' '`
|
||||
if test -r ../${source}_${version}.orig.tar.gz ; then
|
||||
echo Generating list of files that should be removed...
|
||||
rm -f debian/deletable.files
|
||||
touch debian/deletable.files
|
||||
[ -e debian/tmp ] && rm -rf debian/tmp
|
||||
mkdir debian/tmp
|
||||
( cd debian/tmp ; tar -zxf ../../../${source}_${version}.orig.tar.gz )
|
||||
find debian/tmp/ -type f ! -name '.*' -print0 | xargs -0 -ri echo '{}' | \
|
||||
while read -r i ; do
|
||||
if test -e "${i}" ; then
|
||||
filename=$(echo "${i}" | sed -e 's#.*debian/tmp/[^/]\+/##')
|
||||
test -e "${filename}" || echo "${filename}" >>debian/deletable.files
|
||||
fi
|
||||
done
|
||||
rm -fr debian/tmp
|
||||
else
|
||||
echo Emptying list of files that should be deleted...
|
||||
rm -f debian/deletable.files
|
||||
touch debian/deletable.files
|
||||
fi
|
||||
}
|
||||
|
||||
exit 0
|
||||
@@ -0,0 +1,311 @@
|
||||
/*
|
||||
**************************************************************************
|
||||
* DHRYSTONE 2.1 BENCHMARK PC VERSION
|
||||
**************************************************************************
|
||||
*
|
||||
* "DHRYSTONE" Benchmark Program
|
||||
* -----------------------------
|
||||
*
|
||||
* Version: C, Version 2.1
|
||||
*
|
||||
* File: dhry.h (part 1 of 3)
|
||||
*
|
||||
* Date: May 25, 1988
|
||||
*
|
||||
* Author: Reinhold P. Weicker
|
||||
* Siemens AG, AUT E 51
|
||||
* Postfach 3220
|
||||
* 8520 Erlangen
|
||||
* Germany (West)
|
||||
* Phone: [+49]-9131-7-20330
|
||||
* (8-17 Central European Time)
|
||||
* Usenet: ..!mcsun!unido!estevax!weicker
|
||||
*
|
||||
* Original Version (in Ada) published in
|
||||
* "Communications of the ACM" vol. 27., no. 10 (Oct. 1984),
|
||||
* pp. 1013 - 1030, together with the statistics
|
||||
* on which the distribution of statements etc. is based.
|
||||
*
|
||||
* In this C version, the following C library functions are used:
|
||||
* - strcpy, strcmp (inside the measurement loop)
|
||||
* - printf, scanf (outside the measurement loop)
|
||||
* In addition, Berkeley UNIX system calls "times ()" or "time ()"
|
||||
* are used for execution time measurement. For measurements
|
||||
* on other systems, these calls have to be changed.
|
||||
*
|
||||
* Collection of Results:
|
||||
* Reinhold Weicker (address see above) and
|
||||
*
|
||||
* Rick Richardson
|
||||
* PC Research. Inc.
|
||||
* 94 Apple Orchard Drive
|
||||
* Tinton Falls, NJ 07724
|
||||
* Phone: (201) 389-8963 (9-17 EST)
|
||||
* Usenet: ...!uunet!pcrat!rick
|
||||
*
|
||||
* Please send results to Rick Richardson and/or Reinhold Weicker.
|
||||
* Complete information should be given on hardware and software used.
|
||||
* Hardware information includes: Machine type, CPU, type and size
|
||||
* of caches; for microprocessors: clock frequency, memory speed
|
||||
* (number of wait states).
|
||||
* Software information includes: Compiler (and runtime library)
|
||||
* manufacturer and version, compilation switches, OS version.
|
||||
* The Operating System version may give an indication about the
|
||||
* compiler; Dhrystone itself performs no OS calls in the measurement
|
||||
* loop.
|
||||
*
|
||||
* The complete output generated by the program should be mailed
|
||||
* such that at least some checks for correctness can be made.
|
||||
*
|
||||
**************************************************************************
|
||||
*
|
||||
* This version has changes made by Roy Longbottom to conform to a common
|
||||
* format for a series of standard benchmarks for PCs:
|
||||
*
|
||||
* Running time greater than 5 seconds due to inaccuracy of the PC clock.
|
||||
*
|
||||
* Automatic adjustment of run time, no manually inserted parameters.
|
||||
*
|
||||
* Initial display of calibration times to confirm linearity.
|
||||
*
|
||||
* Display of results within one screen (or at a slow speed as the test
|
||||
* progresses) so that it can be seen to have run successfully.
|
||||
*
|
||||
* Facilities to type in details of system used etc.
|
||||
*
|
||||
* All results and details appended to a results file.
|
||||
*
|
||||
*
|
||||
* Roy Longbottom
|
||||
* 101323.2241@compuserve.com
|
||||
*
|
||||
**************************************************************************
|
||||
*
|
||||
* For details of history, changes, other defines, benchmark construction
|
||||
* statistics see official versions from ftp.nosc.mil/pub/aburto where
|
||||
* the latest table of results (dhry.tbl) are available. See also
|
||||
* netlib@ornl.gov
|
||||
*
|
||||
**************************************************************************
|
||||
*
|
||||
* Defines: The following "Defines" are possible:
|
||||
* -DREG=register (default: Not defined)
|
||||
* As an approximation to what an average C programmer
|
||||
* might do, the "register" storage class is applied
|
||||
* (if enabled by -DREG=register)
|
||||
* - for local variables, if they are used (dynamically)
|
||||
* five or more times
|
||||
* - for parameters if they are used (dynamically)
|
||||
* six or more times
|
||||
* Note that an optimal "register" strategy is
|
||||
* compiler-dependent, and that "register" declarations
|
||||
* do not necessarily lead to faster execution.
|
||||
* -DNOSTRUCTASSIGN (default: Not defined)
|
||||
* Define if the C compiler does not support
|
||||
* assignment of structures.
|
||||
* -DNOENUMS (default: Not defined)
|
||||
* Define if the C compiler does not support
|
||||
* enumeration types.
|
||||
***************************************************************************
|
||||
*
|
||||
* Compilation model and measurement (IMPORTANT):
|
||||
*
|
||||
* This C version of Dhrystone consists of three files:
|
||||
* - dhry.h (this file, containing global definitions and comments)
|
||||
* - dhry_1.c (containing the code corresponding to Ada package Pack_1)
|
||||
* - dhry_2.c (containing the code corresponding to Ada package Pack_2)
|
||||
*
|
||||
* The following "ground rules" apply for measurements:
|
||||
* - Separate compilation
|
||||
* - No procedure merging
|
||||
* - Otherwise, compiler optimizations are allowed but should be indicated
|
||||
* - Default results are those without register declarations
|
||||
* See the companion paper "Rationale for Dhrystone Version 2" for a more
|
||||
* detailed discussion of these ground rules.
|
||||
*
|
||||
* For 16-Bit processors (e.g. 80186, 80286), times for all compilation
|
||||
* models ("small", "medium", "large" etc.) should be given if possible,
|
||||
* together with a definition of these models for the compiler system used.
|
||||
*
|
||||
**************************************************************************
|
||||
* Examples of Pentium Results
|
||||
*
|
||||
* Dhrystone Benchmark Version 2.1 (Language: C)
|
||||
*
|
||||
* Month run 4/1996
|
||||
* PC model Escom
|
||||
* CPU Pentium
|
||||
* Clock MHz 100
|
||||
* Cache 256K
|
||||
* Options Neptune chipset
|
||||
* OS/DOS Windows 95
|
||||
* Compiler Watcom C/ C++ 10.5 Win386
|
||||
* OptLevel -otexan -zp8 -fp5 -5r
|
||||
* Run by Roy Longbottom
|
||||
* From UK
|
||||
* Mail 101323.2241@compuserve.com
|
||||
*
|
||||
* Final values (* implementation-dependent):
|
||||
*
|
||||
* Int_Glob: O.K. 5
|
||||
* Bool_Glob: O.K. 1
|
||||
* Ch_1_Glob: O.K. A
|
||||
* Ch_2_Glob: O.K. B
|
||||
* Arr_1_Glob[8]: O.K. 7
|
||||
* Arr_2_Glob8/7: O.K. 1600010
|
||||
* Ptr_Glob->
|
||||
* Ptr_Comp: * 98008
|
||||
* Discr: O.K. 0
|
||||
* Enum_Comp: O.K. 2
|
||||
* Int_Comp: O.K. 17
|
||||
* Str_Comp: O.K. DHRYSTONE PROGRAM, SOME STRING
|
||||
* Next_Ptr_Glob->
|
||||
* Ptr_Comp: * 98008 same as above
|
||||
* Discr: O.K. 0
|
||||
* Enum_Comp: O.K. 1
|
||||
* Int_Comp: O.K. 18
|
||||
* Str_Comp: O.K. DHRYSTONE PROGRAM, SOME STRING
|
||||
* Int_1_Loc: O.K. 5
|
||||
* Int_2_Loc: O.K. 13
|
||||
* Int_3_Loc: O.K. 7
|
||||
* Enum_Loc: O.K. 1
|
||||
* Str_1_Loc: O.K. DHRYSTONE PROGRAM, 1'ST STRING
|
||||
* Str_2_Loc: O.K. DHRYSTONE PROGRAM, 2'ND STRING
|
||||
*
|
||||
* Register option Selected.
|
||||
*
|
||||
* Microseconds 1 loop: 4.53
|
||||
* Dhrystones / second: 220690
|
||||
* VAX MIPS rating: 125.61
|
||||
*
|
||||
*
|
||||
* Dhrystone Benchmark Version 2.1 (Language: C)
|
||||
*
|
||||
* Month run 4/1996
|
||||
* PC model Escom
|
||||
* CPU Pentium
|
||||
* Clock MHz 100
|
||||
* Cache 256K
|
||||
* Options Neptune chipset
|
||||
* OS/DOS Windows 95
|
||||
* Compiler Watcom C/ C++ 10.5 Win386
|
||||
* OptLevel No optimisation
|
||||
* Run by Roy Longbottom
|
||||
* From UK
|
||||
* Mail 101323.2241@compuserve.com
|
||||
*
|
||||
* Final values (* implementation-dependent):
|
||||
*
|
||||
* Int_Glob: O.K. 5
|
||||
* Bool_Glob: O.K. 1
|
||||
* Ch_1_Glob: O.K. A
|
||||
* Ch_2_Glob: O.K. B
|
||||
* Arr_1_Glob[8]: O.K. 7
|
||||
* Arr_2_Glob8/7: O.K. 320010
|
||||
* Ptr_Glob->
|
||||
* Ptr_Comp: * 98004
|
||||
* Discr: O.K. 0
|
||||
* Enum_Comp: O.K. 2
|
||||
* Int_Comp: O.K. 17
|
||||
* Str_Comp: O.K. DHRYSTONE PROGRAM, SOME STRING
|
||||
* Next_Ptr_Glob->
|
||||
* Ptr_Comp: * 98004 same as above
|
||||
* Discr: O.K. 0
|
||||
* Enum_Comp: O.K. 1
|
||||
* Int_Comp: O.K. 18
|
||||
* Str_Comp: O.K. DHRYSTONE PROGRAM, SOME STRING
|
||||
* Int_1_Loc: O.K. 5
|
||||
* Int_2_Loc: O.K. 13
|
||||
* Int_3_Loc: O.K. 7
|
||||
* Enum_Loc: O.K. 1
|
||||
* Str_1_Loc: O.K. DHRYSTONE PROGRAM, 1'ST STRING
|
||||
* Str_2_Loc: O.K. DHRYSTONE PROGRAM, 2'ND STRING
|
||||
*
|
||||
* Register option Not selected.
|
||||
*
|
||||
* Microseconds 1 loop: 20.06
|
||||
* Dhrystones / second: 49844
|
||||
* VAX MIPS rating: 28.37
|
||||
*
|
||||
**************************************************************************
|
||||
*/
|
||||
|
||||
/* Compiler and system dependent definitions: */
|
||||
|
||||
#ifndef TIME
|
||||
#define TIMES
|
||||
#endif
|
||||
/* Use times(2) time function unless */
|
||||
/* explicitly defined otherwise */
|
||||
|
||||
#ifdef TIMES
|
||||
/* #include <sys/types.h>
|
||||
#include <sys/times.h> */
|
||||
/* for "times" */
|
||||
#endif
|
||||
|
||||
#define Mic_secs_Per_Second 1000000.0
|
||||
/* Berkeley UNIX C returns process times in seconds/HZ */
|
||||
|
||||
#ifdef NOSTRUCTASSIGN
|
||||
#define structassign(d, s) memcpy(&(d), &(s), sizeof(d))
|
||||
#else
|
||||
#define structassign(d, s) d = s
|
||||
#endif
|
||||
|
||||
#ifdef NOENUM
|
||||
#define Ident_1 0
|
||||
#define Ident_2 1
|
||||
#define Ident_3 2
|
||||
#define Ident_4 3
|
||||
#define Ident_5 4
|
||||
typedef int Enumeration;
|
||||
#else
|
||||
typedef enum {Ident_1, Ident_2, Ident_3, Ident_4, Ident_5}
|
||||
Enumeration;
|
||||
#endif
|
||||
/* for boolean and enumeration types in Ada, Pascal */
|
||||
|
||||
/* General definitions: */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
/* for strcpy, strcmp */
|
||||
|
||||
#define Null 0
|
||||
/* Value of a Null pointer */
|
||||
#define true 1
|
||||
#define false 0
|
||||
|
||||
typedef int One_Thirty;
|
||||
typedef int One_Fifty;
|
||||
typedef char Capital_Letter;
|
||||
typedef int Boolean;
|
||||
typedef char Str_30 [31];
|
||||
typedef int Arr_1_Dim [50];
|
||||
typedef int Arr_2_Dim [50] [50];
|
||||
|
||||
typedef struct record
|
||||
{
|
||||
struct record *Ptr_Comp;
|
||||
Enumeration Discr;
|
||||
union {
|
||||
struct {
|
||||
Enumeration Enum_Comp;
|
||||
int Int_Comp;
|
||||
char Str_Comp [31];
|
||||
} var_1;
|
||||
struct {
|
||||
Enumeration E_Comp_2;
|
||||
char Str_2_Comp [31];
|
||||
} var_2;
|
||||
struct {
|
||||
char Ch_1_Comp;
|
||||
char Ch_2_Comp;
|
||||
} var_3;
|
||||
} variant;
|
||||
} Rec_Type, *Rec_Pointer;
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,778 @@
|
||||
/*
|
||||
*************************************************************************
|
||||
*
|
||||
* "DHRYSTONE" Benchmark Program
|
||||
* -----------------------------
|
||||
*
|
||||
* Version: C, Version 2.1
|
||||
*
|
||||
* File: dhry_1.c (part 2 of 3)
|
||||
*
|
||||
* Date: May 25, 1988
|
||||
*
|
||||
* Author: Reinhold P. Weicker
|
||||
*
|
||||
*************************************************************************
|
||||
*/
|
||||
|
||||
#include <time.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include "dhry.h"
|
||||
/*COMPILER COMPILER COMPILER COMPILER COMPILER COMPILER COMPILER*/
|
||||
|
||||
#ifdef COW
|
||||
#define compiler "Watcom C/C++ 10.5 Win386"
|
||||
#define options " -otexan -zp8 -5r -ms"
|
||||
#endif
|
||||
#ifdef CNW
|
||||
#define compiler "Watcom C/C++ 10.5 Win386"
|
||||
#define options " No optimisation"
|
||||
#endif
|
||||
#ifdef COD
|
||||
#define compiler "Watcom C/C++ 10.5 Dos4GW"
|
||||
#define options " -otexan -zp8 -5r -ms"
|
||||
#endif
|
||||
#ifdef CND
|
||||
#define compiler "Watcom C/C++ 10.5 Dos4GW"
|
||||
#define options " No optimisation"
|
||||
#endif
|
||||
#ifdef CONT
|
||||
#define compiler "Watcom C/C++ 10.5 Win32NT"
|
||||
#define options " -otexan -zp8 -5r -ms"
|
||||
#endif
|
||||
#ifdef CNNT
|
||||
#define compiler "Watcom C/C++ 10.5 Win32NT"
|
||||
#define options " No optimisation"
|
||||
#endif
|
||||
#ifdef COO2
|
||||
#define compiler "Watcom C/C++ 10.5 OS/2-32"
|
||||
#define options " -otexan -zp8 -5r -ms"
|
||||
#endif
|
||||
#ifdef CNO2
|
||||
#define compiler "Watcom C/C++ 10.5 OS/2-32"
|
||||
#define options " No optimisation"
|
||||
#endif
|
||||
|
||||
|
||||
/* Global Variables: */
|
||||
|
||||
Rec_Pointer Ptr_Glob,
|
||||
Next_Ptr_Glob;
|
||||
int Int_Glob;
|
||||
Boolean Bool_Glob;
|
||||
char Ch_1_Glob,
|
||||
Ch_2_Glob;
|
||||
int Arr_1_Glob [50];
|
||||
int Arr_2_Glob [50] [50];
|
||||
int getinput = 1;
|
||||
|
||||
|
||||
char Reg_Define[100] = "Register option Selected.";
|
||||
|
||||
Enumeration Func_1 (Capital_Letter Ch_1_Par_Val,
|
||||
Capital_Letter Ch_2_Par_Val);
|
||||
/*
|
||||
forward declaration necessary since Enumeration may not simply be int
|
||||
*/
|
||||
|
||||
#ifndef ROPT
|
||||
#define REG
|
||||
/* REG becomes defined as empty */
|
||||
/* i.e. no register variables */
|
||||
#else
|
||||
#define REG register
|
||||
#endif
|
||||
|
||||
void Proc_1 (REG Rec_Pointer Ptr_Val_Par);
|
||||
void Proc_2 (One_Fifty *Int_Par_Ref);
|
||||
void Proc_3 (Rec_Pointer *Ptr_Ref_Par);
|
||||
void Proc_4 ();
|
||||
void Proc_5 ();
|
||||
void Proc_6 (Enumeration Enum_Val_Par, Enumeration *Enum_Ref_Par);
|
||||
void Proc_7 (One_Fifty Int_1_Par_Val, One_Fifty Int_2_Par_Val,
|
||||
One_Fifty *Int_Par_Ref);
|
||||
void Proc_8 (Arr_1_Dim Arr_1_Par_Ref, Arr_2_Dim Arr_2_Par_Ref,
|
||||
int Int_1_Par_Val, int Int_2_Par_Val);
|
||||
|
||||
Boolean Func_2 (Str_30 Str_1_Par_Ref, Str_30 Str_2_Par_Ref);
|
||||
|
||||
|
||||
/* variables for time measurement: */
|
||||
|
||||
#define Too_Small_Time 2
|
||||
/* Measurements should last at least 2 seconds */
|
||||
|
||||
double Begin_Time,
|
||||
End_Time,
|
||||
User_Time;
|
||||
|
||||
double Microseconds,
|
||||
Dhrystones_Per_Second,
|
||||
Vax_Mips;
|
||||
|
||||
/* end of variables for time measurement */
|
||||
|
||||
|
||||
void main (int argc, char *argv[])
|
||||
/*****/
|
||||
|
||||
/* main program, corresponds to procedures */
|
||||
/* Main and Proc_0 in the Ada version */
|
||||
{
|
||||
double dtime();
|
||||
|
||||
One_Fifty Int_1_Loc;
|
||||
REG One_Fifty Int_2_Loc;
|
||||
One_Fifty Int_3_Loc;
|
||||
REG char Ch_Index;
|
||||
Enumeration Enum_Loc;
|
||||
Str_30 Str_1_Loc;
|
||||
Str_30 Str_2_Loc;
|
||||
REG int Run_Index;
|
||||
REG int Number_Of_Runs;
|
||||
int endit, count = 10;
|
||||
FILE *Ap;
|
||||
char general[9][80] = {" "};
|
||||
|
||||
/* Initializations */
|
||||
if (argc > 1)
|
||||
{
|
||||
switch (argv[1][0])
|
||||
{
|
||||
case 'N':
|
||||
getinput = 0;
|
||||
break;
|
||||
case 'n':
|
||||
getinput = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ((Ap = fopen("Dhry.txt","a+")) == NULL)
|
||||
{
|
||||
printf("Can not open Dhry.txt\n\n");
|
||||
printf("Press any key\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/***********************************************************************
|
||||
* Change for compiler and optimisation used *
|
||||
***********************************************************************/
|
||||
|
||||
Next_Ptr_Glob = (Rec_Pointer) malloc (sizeof (Rec_Type));
|
||||
Ptr_Glob = (Rec_Pointer) malloc (sizeof (Rec_Type));
|
||||
|
||||
Ptr_Glob->Ptr_Comp = Next_Ptr_Glob;
|
||||
Ptr_Glob->Discr = Ident_1;
|
||||
Ptr_Glob->variant.var_1.Enum_Comp = Ident_3;
|
||||
Ptr_Glob->variant.var_1.Int_Comp = 40;
|
||||
strcpy (Ptr_Glob->variant.var_1.Str_Comp,
|
||||
"DHRYSTONE PROGRAM, SOME STRING");
|
||||
strcpy (Str_1_Loc, "DHRYSTONE PROGRAM, 1'ST STRING");
|
||||
|
||||
Arr_2_Glob [8][7] = 10;
|
||||
/* Was missing in published program. Without this statement, */
|
||||
/* Arr_2_Glob [8][7] would have an undefined value. */
|
||||
/* Warning: With 16-Bit processors and Number_Of_Runs > 32000, */
|
||||
/* overflow may occur for this array element. */
|
||||
|
||||
printf ("\n");
|
||||
printf ("Dhrystone Benchmark, Version 2.1 (Language: C or C++)\n");
|
||||
printf ("\n");
|
||||
|
||||
if (getinput == 0)
|
||||
{
|
||||
printf ("No run time input data\n\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
printf ("With run time input data\n\n");
|
||||
}
|
||||
|
||||
printf ("Compiler %s\n", compiler);
|
||||
printf ("Optimisation %s\n", options);
|
||||
#ifdef ROPT
|
||||
printf ("Register option selected\n\n");
|
||||
#else
|
||||
printf ("Register option not selected\n\n");
|
||||
strcpy(Reg_Define, "Register option Not selected.");
|
||||
#endif
|
||||
|
||||
/*
|
||||
if (Reg)
|
||||
{
|
||||
printf ("Program compiled with 'register' attribute\n");
|
||||
printf ("\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
printf ("Program compiled without 'register' attribute\n");
|
||||
printf ("\n");
|
||||
}
|
||||
|
||||
printf ("Please give the number of runs through the benchmark: ");
|
||||
{
|
||||
int n;
|
||||
scanf ("%d", &n);
|
||||
Number_Of_Runs = n;
|
||||
}
|
||||
printf ("\n");
|
||||
printf ("Execution starts, %d runs through Dhrystone\n",
|
||||
Number_Of_Runs);
|
||||
*/
|
||||
|
||||
Number_Of_Runs = 5000;
|
||||
|
||||
do
|
||||
{
|
||||
|
||||
Number_Of_Runs = Number_Of_Runs * 2;
|
||||
count = count - 1;
|
||||
Arr_2_Glob [8][7] = 10;
|
||||
|
||||
/***************/
|
||||
/* Start timer */
|
||||
/***************/
|
||||
|
||||
Begin_Time = dtime();
|
||||
|
||||
for (Run_Index = 1; Run_Index <= Number_Of_Runs; ++Run_Index)
|
||||
{
|
||||
|
||||
Proc_5();
|
||||
Proc_4();
|
||||
/* Ch_1_Glob == 'A', Ch_2_Glob == 'B', Bool_Glob == true */
|
||||
Int_1_Loc = 2;
|
||||
Int_2_Loc = 3;
|
||||
strcpy (Str_2_Loc, "DHRYSTONE PROGRAM, 2'ND STRING");
|
||||
Enum_Loc = Ident_2;
|
||||
Bool_Glob = ! Func_2 (Str_1_Loc, Str_2_Loc);
|
||||
/* Bool_Glob == 1 */
|
||||
while (Int_1_Loc < Int_2_Loc) /* loop body executed once */
|
||||
{
|
||||
Int_3_Loc = 5 * Int_1_Loc - Int_2_Loc;
|
||||
/* Int_3_Loc == 7 */
|
||||
Proc_7 (Int_1_Loc, Int_2_Loc, &Int_3_Loc);
|
||||
/* Int_3_Loc == 7 */
|
||||
Int_1_Loc += 1;
|
||||
} /* while */
|
||||
/* Int_1_Loc == 3, Int_2_Loc == 3, Int_3_Loc == 7 */
|
||||
Proc_8 (Arr_1_Glob, Arr_2_Glob, Int_1_Loc, Int_3_Loc);
|
||||
/* Int_Glob == 5 */
|
||||
Proc_1 (Ptr_Glob);
|
||||
for (Ch_Index = 'A'; Ch_Index <= Ch_2_Glob; ++Ch_Index)
|
||||
/* loop body executed twice */
|
||||
{
|
||||
if (Enum_Loc == Func_1 (Ch_Index, 'C'))
|
||||
/* then, not executed */
|
||||
{
|
||||
Proc_6 (Ident_1, &Enum_Loc);
|
||||
strcpy (Str_2_Loc, "DHRYSTONE PROGRAM, 3'RD STRING");
|
||||
Int_2_Loc = Run_Index;
|
||||
Int_Glob = Run_Index;
|
||||
}
|
||||
}
|
||||
/* Int_1_Loc == 3, Int_2_Loc == 3, Int_3_Loc == 7 */
|
||||
Int_2_Loc = Int_2_Loc * Int_1_Loc;
|
||||
Int_1_Loc = Int_2_Loc / Int_3_Loc;
|
||||
Int_2_Loc = 7 * (Int_2_Loc - Int_3_Loc) - Int_1_Loc;
|
||||
/* Int_1_Loc == 1, Int_2_Loc == 13, Int_3_Loc == 7 */
|
||||
Proc_2 (&Int_1_Loc);
|
||||
/* Int_1_Loc == 5 */
|
||||
|
||||
} /* loop "for Run_Index" */
|
||||
|
||||
/**************/
|
||||
/* Stop timer */
|
||||
/**************/
|
||||
|
||||
End_Time = dtime();
|
||||
User_Time = End_Time - Begin_Time;
|
||||
|
||||
printf ("%12.0f runs %6.2f seconds \n",(double) Number_Of_Runs, User_Time);
|
||||
if (User_Time > 5)
|
||||
{
|
||||
count = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (User_Time < 0.1)
|
||||
{
|
||||
Number_Of_Runs = Number_Of_Runs * 5;
|
||||
}
|
||||
}
|
||||
} /* calibrate/run do while */
|
||||
while (count >0);
|
||||
|
||||
printf ("\n");
|
||||
printf ("Final values (* implementation-dependent):\n");
|
||||
printf ("\n");
|
||||
printf ("Int_Glob: ");
|
||||
if (Int_Glob == 5) printf ("O.K. ");
|
||||
else printf ("WRONG ");
|
||||
printf ("%d ", Int_Glob);
|
||||
|
||||
printf ("Bool_Glob: ");
|
||||
if (Bool_Glob == 1) printf ("O.K. ");
|
||||
else printf ("WRONG ");
|
||||
printf ("%d\n", Bool_Glob);
|
||||
|
||||
printf ("Ch_1_Glob: ");
|
||||
if (Ch_1_Glob == 'A') printf ("O.K. ");
|
||||
else printf ("WRONG ");
|
||||
printf ("%c ", Ch_1_Glob);
|
||||
|
||||
printf ("Ch_2_Glob: ");
|
||||
if (Ch_2_Glob == 'B') printf ("O.K. ");
|
||||
else printf ("WRONG ");
|
||||
printf ("%c\n", Ch_2_Glob);
|
||||
|
||||
printf ("Arr_1_Glob[8]: ");
|
||||
if (Arr_1_Glob[8] == 7) printf ("O.K. ");
|
||||
else printf ("WRONG ");
|
||||
printf ("%d ", Arr_1_Glob[8]);
|
||||
|
||||
printf ("Arr_2_Glob8/7: ");
|
||||
if (Arr_2_Glob[8][7] == Number_Of_Runs + 10)
|
||||
printf ("O.K. ");
|
||||
else printf ("WRONG ");
|
||||
printf ("%10d\n", Arr_2_Glob[8][7]);
|
||||
|
||||
printf ("Ptr_Glob-> ");
|
||||
printf (" Ptr_Comp: * %d\n", (int) Ptr_Glob->Ptr_Comp);
|
||||
|
||||
printf (" Discr: ");
|
||||
if (Ptr_Glob->Discr == 0) printf ("O.K. ");
|
||||
else printf ("WRONG ");
|
||||
printf ("%d ", Ptr_Glob->Discr);
|
||||
|
||||
printf ("Enum_Comp: ");
|
||||
if (Ptr_Glob->variant.var_1.Enum_Comp == 2)
|
||||
printf ("O.K. ");
|
||||
else printf ("WRONG ");
|
||||
printf ("%d\n", Ptr_Glob->variant.var_1.Enum_Comp);
|
||||
|
||||
printf (" Int_Comp: ");
|
||||
if (Ptr_Glob->variant.var_1.Int_Comp == 17) printf ("O.K. ");
|
||||
else printf ("WRONG ");
|
||||
printf ("%d ", Ptr_Glob->variant.var_1.Int_Comp);
|
||||
|
||||
printf ("Str_Comp: ");
|
||||
if (strcmp(Ptr_Glob->variant.var_1.Str_Comp,
|
||||
"DHRYSTONE PROGRAM, SOME STRING") == 0)
|
||||
printf ("O.K. ");
|
||||
else printf ("WRONG ");
|
||||
printf ("%s\n", Ptr_Glob->variant.var_1.Str_Comp);
|
||||
|
||||
printf ("Next_Ptr_Glob-> ");
|
||||
printf (" Ptr_Comp: * %d", (int) Next_Ptr_Glob->Ptr_Comp);
|
||||
printf (" same as above\n");
|
||||
|
||||
printf (" Discr: ");
|
||||
if (Next_Ptr_Glob->Discr == 0)
|
||||
printf ("O.K. ");
|
||||
else printf ("WRONG ");
|
||||
printf ("%d ", Next_Ptr_Glob->Discr);
|
||||
|
||||
printf ("Enum_Comp: ");
|
||||
if (Next_Ptr_Glob->variant.var_1.Enum_Comp == 1)
|
||||
printf ("O.K. ");
|
||||
else printf ("WRONG ");
|
||||
printf ("%d\n", Next_Ptr_Glob->variant.var_1.Enum_Comp);
|
||||
|
||||
printf (" Int_Comp: ");
|
||||
if (Next_Ptr_Glob->variant.var_1.Int_Comp == 18)
|
||||
printf ("O.K. ");
|
||||
else printf ("WRONG ");
|
||||
printf ("%d ", Next_Ptr_Glob->variant.var_1.Int_Comp);
|
||||
|
||||
printf ("Str_Comp: ");
|
||||
if (strcmp(Next_Ptr_Glob->variant.var_1.Str_Comp,
|
||||
"DHRYSTONE PROGRAM, SOME STRING") == 0)
|
||||
printf ("O.K. ");
|
||||
else printf ("WRONG ");
|
||||
printf ("%s\n", Next_Ptr_Glob->variant.var_1.Str_Comp);
|
||||
|
||||
printf ("Int_1_Loc: ");
|
||||
if (Int_1_Loc == 5)
|
||||
printf ("O.K. ");
|
||||
else printf ("WRONG ");
|
||||
printf ("%d ", Int_1_Loc);
|
||||
|
||||
printf ("Int_2_Loc: ");
|
||||
if (Int_2_Loc == 13)
|
||||
printf ("O.K. ");
|
||||
else printf ("WRONG ");
|
||||
printf ("%d\n", Int_2_Loc);
|
||||
|
||||
printf ("Int_3_Loc: ");
|
||||
if (Int_3_Loc == 7)
|
||||
printf ("O.K. ");
|
||||
else printf ("WRONG ");
|
||||
printf ("%d ", Int_3_Loc);
|
||||
|
||||
printf ("Enum_Loc: ");
|
||||
if (Enum_Loc == 1)
|
||||
printf ("O.K. ");
|
||||
else printf ("WRONG ");
|
||||
printf ("%d\n", Enum_Loc);
|
||||
|
||||
printf ("Str_1_Loc: ");
|
||||
if (strcmp(Str_1_Loc, "DHRYSTONE PROGRAM, 1'ST STRING") == 0)
|
||||
printf ("O.K. ");
|
||||
else printf ("WRONG ");
|
||||
printf ("%s\n", Str_1_Loc);
|
||||
|
||||
printf ("Str_2_Loc: ");
|
||||
if (strcmp(Str_2_Loc, "DHRYSTONE PROGRAM, 2'ND STRING") == 0)
|
||||
printf ("O.K. ");
|
||||
else printf ("WRONG ");
|
||||
printf ("%s\n", Str_2_Loc);
|
||||
|
||||
printf ("\n");
|
||||
|
||||
|
||||
if (User_Time < Too_Small_Time)
|
||||
{
|
||||
printf ("Measured time too small to obtain meaningful results\n");
|
||||
printf ("Please increase number of runs\n");
|
||||
printf ("\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
Microseconds = User_Time * Mic_secs_Per_Second
|
||||
/ (double) Number_Of_Runs;
|
||||
Dhrystones_Per_Second = (double) Number_Of_Runs / User_Time;
|
||||
Vax_Mips = Dhrystones_Per_Second / 1757.0;
|
||||
|
||||
printf ("Microseconds for one run through Dhrystone: ");
|
||||
printf ("%12.2lf \n", Microseconds);
|
||||
printf ("Dhrystones per Second: ");
|
||||
printf ("%10.0lf \n", Dhrystones_Per_Second);
|
||||
printf ("VAX MIPS rating = ");
|
||||
printf ("%12.2lf \n",Vax_Mips);
|
||||
printf ("\n");
|
||||
|
||||
/************************************************************************
|
||||
* Type details of hardware, software etc. *
|
||||
************************************************************************/
|
||||
|
||||
if (getinput == 1)
|
||||
{
|
||||
printf ("Enter the following which will be added with results to file DHRY.TXT\n");
|
||||
printf ("When submitting a number of results you need only provide details once\n");
|
||||
printf ("but a cross reference such as an abbreviated CPU type would be useful.\n");
|
||||
printf ("You can kill (exit or close) the program now and no data will be added.\n\n");
|
||||
|
||||
printf ("PC Supplier/model ? ");
|
||||
gets(general[1]);
|
||||
|
||||
printf ("CPU chip ? ");
|
||||
gets(general[2]);
|
||||
|
||||
printf ("Clock MHz ? ");
|
||||
gets(general[3]);
|
||||
|
||||
printf ("Cache size ? ");
|
||||
gets(general[4]);
|
||||
|
||||
printf ("Chipset & H/W options ? ");
|
||||
gets(general[5]);
|
||||
|
||||
printf ("OS/DOS version ? ");
|
||||
gets(general[6]);
|
||||
|
||||
printf ("Your name ? ");
|
||||
gets(general[7]);
|
||||
|
||||
printf ("Company/Location ? ");
|
||||
gets(general[8]);
|
||||
|
||||
printf ("E-mail address ? ");
|
||||
gets(general[0]);
|
||||
}
|
||||
/************************************************************************
|
||||
* Add results to output file Dhry.txt *
|
||||
************************************************************************/
|
||||
fprintf (Ap, "-------------------- -----------------------------------"
|
||||
"\n");
|
||||
fprintf (Ap, "Dhrystone Benchmark Version 2.1 (Language: C++)\n\n");
|
||||
fprintf (Ap, "PC model %s\n", general[1]);
|
||||
fprintf (Ap, "CPU %s\n", general[2]);
|
||||
fprintf (Ap, "Clock MHz %s\n", general[3]);
|
||||
fprintf (Ap, "Cache %s\n", general[4]);
|
||||
fprintf (Ap, "Options %s\n", general[5]);
|
||||
fprintf (Ap, "OS/DOS %s\n", general[6]);
|
||||
fprintf (Ap, "Compiler %s\n", compiler);
|
||||
fprintf (Ap, "OptLevel %s\n", options);
|
||||
fprintf (Ap, "Run by %s\n", general[7]);
|
||||
fprintf (Ap, "From %s\n", general[8]);
|
||||
fprintf (Ap, "Mail %s\n\n", general[0]);
|
||||
|
||||
fprintf (Ap, "Final values (* implementation-dependent):\n");
|
||||
fprintf (Ap, "\n");
|
||||
fprintf (Ap, "Int_Glob: ");
|
||||
if (Int_Glob == 5) fprintf (Ap, "O.K. ");
|
||||
else fprintf (Ap, "WRONG ");
|
||||
fprintf (Ap, "%d\n", Int_Glob);
|
||||
|
||||
fprintf (Ap, "Bool_Glob: ");
|
||||
if (Bool_Glob == 1) fprintf (Ap, "O.K. ");
|
||||
else fprintf (Ap, "WRONG ");
|
||||
fprintf (Ap, "%d\n", Bool_Glob);
|
||||
|
||||
fprintf (Ap, "Ch_1_Glob: ");
|
||||
if (Ch_1_Glob == 'A') fprintf (Ap, "O.K. ");
|
||||
else fprintf (Ap, "WRONG ");
|
||||
fprintf (Ap, "%c\n", Ch_1_Glob);
|
||||
|
||||
fprintf (Ap, "Ch_2_Glob: ");
|
||||
if (Ch_2_Glob == 'B') fprintf (Ap, "O.K. ");
|
||||
else fprintf (Ap, "WRONG ");
|
||||
fprintf (Ap, "%c\n", Ch_2_Glob);
|
||||
|
||||
fprintf (Ap, "Arr_1_Glob[8]: ");
|
||||
if (Arr_1_Glob[8] == 7) fprintf (Ap, "O.K. ");
|
||||
else fprintf (Ap, "WRONG ");
|
||||
fprintf (Ap, "%d\n", Arr_1_Glob[8]);
|
||||
|
||||
fprintf (Ap, "Arr_2_Glob8/7: ");
|
||||
if (Arr_2_Glob[8][7] == Number_Of_Runs + 10)
|
||||
fprintf (Ap, "O.K. ");
|
||||
else fprintf (Ap, "WRONG ");
|
||||
fprintf (Ap, "%10d\n", Arr_2_Glob[8][7]);
|
||||
|
||||
fprintf (Ap, "Ptr_Glob-> \n");
|
||||
fprintf (Ap, " Ptr_Comp: * %d\n", (int) Ptr_Glob->Ptr_Comp);
|
||||
|
||||
fprintf (Ap, " Discr: ");
|
||||
if (Ptr_Glob->Discr == 0) fprintf (Ap, "O.K. ");
|
||||
else fprintf (Ap, "WRONG ");
|
||||
fprintf (Ap, "%d\n", Ptr_Glob->Discr);
|
||||
|
||||
fprintf (Ap, " Enum_Comp: ");
|
||||
if (Ptr_Glob->variant.var_1.Enum_Comp == 2)
|
||||
fprintf (Ap, "O.K. ");
|
||||
else fprintf (Ap, "WRONG ");
|
||||
fprintf (Ap, "%d\n", Ptr_Glob->variant.var_1.Enum_Comp);
|
||||
|
||||
fprintf (Ap, " Int_Comp: ");
|
||||
if (Ptr_Glob->variant.var_1.Int_Comp == 17) fprintf (Ap, "O.K. ");
|
||||
else fprintf (Ap, "WRONG ");
|
||||
fprintf (Ap, "%d\n", Ptr_Glob->variant.var_1.Int_Comp);
|
||||
|
||||
fprintf (Ap, " Str_Comp: ");
|
||||
if (strcmp(Ptr_Glob->variant.var_1.Str_Comp,
|
||||
"DHRYSTONE PROGRAM, SOME STRING") == 0)
|
||||
fprintf (Ap, "O.K. ");
|
||||
else fprintf (Ap, "WRONG ");
|
||||
fprintf (Ap, "%s\n", Ptr_Glob->variant.var_1.Str_Comp);
|
||||
|
||||
fprintf (Ap, "Next_Ptr_Glob-> \n");
|
||||
fprintf (Ap, " Ptr_Comp: * %d", (int) Next_Ptr_Glob->Ptr_Comp);
|
||||
fprintf (Ap, " same as above\n");
|
||||
|
||||
fprintf (Ap, " Discr: ");
|
||||
if (Next_Ptr_Glob->Discr == 0)
|
||||
fprintf (Ap, "O.K. ");
|
||||
else fprintf (Ap, "WRONG ");
|
||||
fprintf (Ap, "%d\n", Next_Ptr_Glob->Discr);
|
||||
|
||||
fprintf (Ap, " Enum_Comp: ");
|
||||
if (Next_Ptr_Glob->variant.var_1.Enum_Comp == 1)
|
||||
fprintf (Ap, "O.K. ");
|
||||
else fprintf (Ap, "WRONG ");
|
||||
fprintf (Ap, "%d\n", Next_Ptr_Glob->variant.var_1.Enum_Comp);
|
||||
|
||||
fprintf (Ap, " Int_Comp: ");
|
||||
if (Next_Ptr_Glob->variant.var_1.Int_Comp == 18)
|
||||
fprintf (Ap, "O.K. ");
|
||||
else fprintf (Ap, "WRONG ");
|
||||
fprintf (Ap, "%d\n", Next_Ptr_Glob->variant.var_1.Int_Comp);
|
||||
|
||||
fprintf (Ap, " Str_Comp: ");
|
||||
if (strcmp(Next_Ptr_Glob->variant.var_1.Str_Comp,
|
||||
"DHRYSTONE PROGRAM, SOME STRING") == 0)
|
||||
fprintf (Ap, "O.K. ");
|
||||
else fprintf (Ap, "WRONG ");
|
||||
fprintf (Ap, "%s\n", Next_Ptr_Glob->variant.var_1.Str_Comp);
|
||||
|
||||
fprintf (Ap, "Int_1_Loc: ");
|
||||
if (Int_1_Loc == 5)
|
||||
fprintf (Ap, "O.K. ");
|
||||
else fprintf (Ap, "WRONG ");
|
||||
fprintf (Ap, "%d\n", Int_1_Loc);
|
||||
|
||||
fprintf (Ap, "Int_2_Loc: ");
|
||||
if (Int_2_Loc == 13)
|
||||
fprintf (Ap, "O.K. ");
|
||||
else fprintf (Ap, "WRONG ");
|
||||
fprintf (Ap, "%d\n", Int_2_Loc);
|
||||
|
||||
fprintf (Ap, "Int_3_Loc: ");
|
||||
if (Int_3_Loc == 7)
|
||||
fprintf (Ap, "O.K. ");
|
||||
else fprintf (Ap, "WRONG ");
|
||||
fprintf (Ap, "%d\n", Int_3_Loc);
|
||||
|
||||
fprintf (Ap, "Enum_Loc: ");
|
||||
if (Enum_Loc == 1)
|
||||
fprintf (Ap, "O.K. ");
|
||||
else fprintf (Ap, "WRONG ");
|
||||
fprintf (Ap, "%d\n", Enum_Loc);
|
||||
|
||||
fprintf (Ap, "Str_1_Loc: ");
|
||||
if (strcmp(Str_1_Loc, "DHRYSTONE PROGRAM, 1'ST STRING") == 0)
|
||||
fprintf (Ap, "O.K. ");
|
||||
else fprintf (Ap, "WRONG ");
|
||||
fprintf (Ap, "%s\n", Str_1_Loc);
|
||||
|
||||
fprintf (Ap, "Str_2_Loc: ");
|
||||
if (strcmp(Str_2_Loc, "DHRYSTONE PROGRAM, 2'ND STRING") == 0)
|
||||
fprintf (Ap, "O.K. ");
|
||||
else fprintf (Ap, "WRONG ");
|
||||
fprintf (Ap, "%s\n", Str_2_Loc);
|
||||
|
||||
|
||||
fprintf (Ap, "\n");
|
||||
fprintf(Ap,"%s\n",Reg_Define);
|
||||
fprintf (Ap, "\n");
|
||||
fprintf(Ap,"Microseconds 1 loop: %12.2lf\n",Microseconds);
|
||||
fprintf(Ap,"Dhrystones / second: %10.0lf\n",Dhrystones_Per_Second);
|
||||
fprintf(Ap,"VAX MIPS rating: %12.2lf\n\n",Vax_Mips);
|
||||
fclose(Ap);
|
||||
}
|
||||
|
||||
printf ("\n");
|
||||
printf ("A new results file will have been created in the same directory as the\n");
|
||||
printf (".EXE files if one did not already exist. If you made a mistake on input, \n");
|
||||
printf ("you can use a text editor to correct it, delete the results or copy \n");
|
||||
printf ("them to a different file name. If you intend to run multiple tests you\n");
|
||||
printf ("you may wish to rename DHRY.TXT with a more informative title.\n\n");
|
||||
printf ("Please submit feedback and results files as a posting in Section 12\n");
|
||||
printf ("or to Roy_Longbottom@compuserve.com\n\n");
|
||||
|
||||
if (getinput == 1)
|
||||
{
|
||||
printf("Press any key to exit\n");
|
||||
printf ("\nIf this is displayed you must close the window in the normal way\n");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void Proc_1 (REG Rec_Pointer Ptr_Val_Par)
|
||||
/******************/
|
||||
|
||||
/* executed once */
|
||||
{
|
||||
REG Rec_Pointer Next_Record = Ptr_Val_Par->Ptr_Comp;
|
||||
/* == Ptr_Glob_Next */
|
||||
/* Local variable, initialized with Ptr_Val_Par->Ptr_Comp, */
|
||||
/* corresponds to "rename" in Ada, "with" in Pascal */
|
||||
|
||||
structassign (*Ptr_Val_Par->Ptr_Comp, *Ptr_Glob);
|
||||
Ptr_Val_Par->variant.var_1.Int_Comp = 5;
|
||||
Next_Record->variant.var_1.Int_Comp
|
||||
= Ptr_Val_Par->variant.var_1.Int_Comp;
|
||||
Next_Record->Ptr_Comp = Ptr_Val_Par->Ptr_Comp;
|
||||
Proc_3 (&Next_Record->Ptr_Comp);
|
||||
/* Ptr_Val_Par->Ptr_Comp->Ptr_Comp
|
||||
== Ptr_Glob->Ptr_Comp */
|
||||
if (Next_Record->Discr == Ident_1)
|
||||
/* then, executed */
|
||||
{
|
||||
Next_Record->variant.var_1.Int_Comp = 6;
|
||||
Proc_6 (Ptr_Val_Par->variant.var_1.Enum_Comp,
|
||||
&Next_Record->variant.var_1.Enum_Comp);
|
||||
Next_Record->Ptr_Comp = Ptr_Glob->Ptr_Comp;
|
||||
Proc_7 (Next_Record->variant.var_1.Int_Comp, 10,
|
||||
&Next_Record->variant.var_1.Int_Comp);
|
||||
}
|
||||
else /* not executed */
|
||||
structassign (*Ptr_Val_Par, *Ptr_Val_Par->Ptr_Comp);
|
||||
} /* Proc_1 */
|
||||
|
||||
|
||||
void Proc_2 (One_Fifty *Int_Par_Ref)
|
||||
/******************/
|
||||
/* executed once */
|
||||
/* *Int_Par_Ref == 1, becomes 4 */
|
||||
|
||||
{
|
||||
One_Fifty Int_Loc;
|
||||
Enumeration Enum_Loc;
|
||||
|
||||
Int_Loc = *Int_Par_Ref + 10;
|
||||
do /* executed once */
|
||||
if (Ch_1_Glob == 'A')
|
||||
/* then, executed */
|
||||
{
|
||||
Int_Loc -= 1;
|
||||
*Int_Par_Ref = Int_Loc - Int_Glob;
|
||||
Enum_Loc = Ident_1;
|
||||
} /* if */
|
||||
while (Enum_Loc != Ident_1); /* true */
|
||||
} /* Proc_2 */
|
||||
|
||||
|
||||
void Proc_3 (Rec_Pointer *Ptr_Ref_Par)
|
||||
/******************/
|
||||
/* executed once */
|
||||
/* Ptr_Ref_Par becomes Ptr_Glob */
|
||||
|
||||
{
|
||||
if (Ptr_Glob != Null)
|
||||
/* then, executed */
|
||||
*Ptr_Ref_Par = Ptr_Glob->Ptr_Comp;
|
||||
Proc_7 (10, Int_Glob, &Ptr_Glob->variant.var_1.Int_Comp);
|
||||
} /* Proc_3 */
|
||||
|
||||
|
||||
void Proc_4 () /* without parameters */
|
||||
/*******/
|
||||
/* executed once */
|
||||
{
|
||||
Boolean Bool_Loc;
|
||||
|
||||
Bool_Loc = Ch_1_Glob == 'A';
|
||||
Bool_Glob = Bool_Loc | Bool_Glob;
|
||||
Ch_2_Glob = 'B';
|
||||
} /* Proc_4 */
|
||||
|
||||
|
||||
void Proc_5 () /* without parameters */
|
||||
/*******/
|
||||
/* executed once */
|
||||
{
|
||||
Ch_1_Glob = 'A';
|
||||
Bool_Glob = false;
|
||||
} /* Proc_5 */
|
||||
|
||||
|
||||
/* Procedure for the assignment of structures, */
|
||||
/* if the C compiler doesn't support this feature */
|
||||
#ifdef NOSTRUCTASSIGN
|
||||
memcpy (d, s, l)
|
||||
register char *d;
|
||||
register char *s;
|
||||
register int l;
|
||||
{
|
||||
while (l--) *d++ = *s++;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
double dtime()
|
||||
{
|
||||
|
||||
/* #include <ctype.h> */
|
||||
|
||||
#define HZ CLOCKS_PER_SEC
|
||||
clock_t tnow;
|
||||
|
||||
double q;
|
||||
tnow = clock();
|
||||
q = (double)tnow / (double)HZ;
|
||||
return q;
|
||||
}
|
||||
@@ -0,0 +1,186 @@
|
||||
/*
|
||||
*************************************************************************
|
||||
*
|
||||
* "DHRYSTONE" Benchmark Program
|
||||
* -----------------------------
|
||||
*
|
||||
* Version: C, Version 2.1
|
||||
*
|
||||
* File: dhry_2.c (part 3 of 3)
|
||||
*
|
||||
* Date: May 25, 1988
|
||||
*
|
||||
* Author: Reinhold P. Weicker
|
||||
*
|
||||
*************************************************************************
|
||||
*/
|
||||
|
||||
#include "dhry.h"
|
||||
|
||||
#ifndef REG
|
||||
#define REG
|
||||
/* REG becomes defined as empty */
|
||||
/* i.e. no register variables */
|
||||
#else
|
||||
#define REG register
|
||||
#endif
|
||||
|
||||
extern int Int_Glob;
|
||||
extern char Ch_1_Glob;
|
||||
|
||||
Boolean Func_3 (Enumeration Enum_Par_Val);
|
||||
|
||||
void Proc_6 (Enumeration Enum_Val_Par, Enumeration *Enum_Ref_Par)
|
||||
/*********************************/
|
||||
/* executed once */
|
||||
/* Enum_Val_Par == Ident_3, Enum_Ref_Par becomes Ident_2 */
|
||||
|
||||
{
|
||||
*Enum_Ref_Par = Enum_Val_Par;
|
||||
if (! Func_3 (Enum_Val_Par))
|
||||
/* then, not executed */
|
||||
*Enum_Ref_Par = Ident_4;
|
||||
switch (Enum_Val_Par)
|
||||
{
|
||||
case Ident_1:
|
||||
*Enum_Ref_Par = Ident_1;
|
||||
break;
|
||||
case Ident_2:
|
||||
if (Int_Glob > 100)
|
||||
/* then */
|
||||
*Enum_Ref_Par = Ident_1;
|
||||
else *Enum_Ref_Par = Ident_4;
|
||||
break;
|
||||
case Ident_3: /* executed */
|
||||
*Enum_Ref_Par = Ident_2;
|
||||
break;
|
||||
case Ident_4: break;
|
||||
case Ident_5:
|
||||
*Enum_Ref_Par = Ident_3;
|
||||
break;
|
||||
} /* switch */
|
||||
} /* Proc_6 */
|
||||
|
||||
|
||||
void Proc_7 (One_Fifty Int_1_Par_Val, One_Fifty Int_2_Par_Val,
|
||||
One_Fifty *Int_Par_Ref)
|
||||
/**********************************************/
|
||||
/* executed three times */
|
||||
/* first call: Int_1_Par_Val == 2, Int_2_Par_Val == 3, */
|
||||
/* Int_Par_Ref becomes 7 */
|
||||
/* second call: Int_1_Par_Val == 10, Int_2_Par_Val == 5, */
|
||||
/* Int_Par_Ref becomes 17 */
|
||||
/* third call: Int_1_Par_Val == 6, Int_2_Par_Val == 10, */
|
||||
/* Int_Par_Ref becomes 18 */
|
||||
|
||||
{
|
||||
One_Fifty Int_Loc;
|
||||
|
||||
Int_Loc = Int_1_Par_Val + 2;
|
||||
*Int_Par_Ref = Int_2_Par_Val + Int_Loc;
|
||||
} /* Proc_7 */
|
||||
|
||||
|
||||
void Proc_8 (Arr_1_Dim Arr_1_Par_Ref, Arr_2_Dim Arr_2_Par_Ref,
|
||||
int Int_1_Par_Val, int Int_2_Par_Val)
|
||||
/*********************************************************************/
|
||||
/* executed once */
|
||||
/* Int_Par_Val_1 == 3 */
|
||||
/* Int_Par_Val_2 == 7 */
|
||||
|
||||
{
|
||||
REG One_Fifty Int_Index;
|
||||
REG One_Fifty Int_Loc;
|
||||
|
||||
Int_Loc = Int_1_Par_Val + 5;
|
||||
Arr_1_Par_Ref [Int_Loc] = Int_2_Par_Val;
|
||||
Arr_1_Par_Ref [Int_Loc+1] = Arr_1_Par_Ref [Int_Loc];
|
||||
Arr_1_Par_Ref [Int_Loc+30] = Int_Loc;
|
||||
for (Int_Index = Int_Loc; Int_Index <= Int_Loc+1; ++Int_Index)
|
||||
Arr_2_Par_Ref [Int_Loc] [Int_Index] = Int_Loc;
|
||||
Arr_2_Par_Ref [Int_Loc] [Int_Loc-1] += 1;
|
||||
Arr_2_Par_Ref [Int_Loc+20] [Int_Loc] = Arr_1_Par_Ref [Int_Loc];
|
||||
Int_Glob = 5;
|
||||
} /* Proc_8 */
|
||||
|
||||
|
||||
Enumeration Func_1 (Capital_Letter Ch_1_Par_Val,
|
||||
Capital_Letter Ch_2_Par_Val)
|
||||
/*************************************************/
|
||||
/* executed three times */
|
||||
/* first call: Ch_1_Par_Val == 'H', Ch_2_Par_Val == 'R' */
|
||||
/* second call: Ch_1_Par_Val == 'A', Ch_2_Par_Val == 'C' */
|
||||
/* third call: Ch_1_Par_Val == 'B', Ch_2_Par_Val == 'C' */
|
||||
|
||||
{
|
||||
Capital_Letter Ch_1_Loc;
|
||||
Capital_Letter Ch_2_Loc;
|
||||
|
||||
Ch_1_Loc = Ch_1_Par_Val;
|
||||
Ch_2_Loc = Ch_1_Loc;
|
||||
if (Ch_2_Loc != Ch_2_Par_Val)
|
||||
/* then, executed */
|
||||
return (Ident_1);
|
||||
else /* not executed */
|
||||
{
|
||||
Ch_1_Glob = Ch_1_Loc;
|
||||
return (Ident_2);
|
||||
}
|
||||
} /* Func_1 */
|
||||
|
||||
|
||||
Boolean Func_2 (Str_30 Str_1_Par_Ref, Str_30 Str_2_Par_Ref)
|
||||
/*************************************************/
|
||||
/* executed once */
|
||||
/* Str_1_Par_Ref == "DHRYSTONE PROGRAM, 1'ST STRING" */
|
||||
/* Str_2_Par_Ref == "DHRYSTONE PROGRAM, 2'ND STRING" */
|
||||
|
||||
{
|
||||
REG One_Thirty Int_Loc;
|
||||
Capital_Letter Ch_Loc;
|
||||
|
||||
Int_Loc = 2;
|
||||
while (Int_Loc <= 2) /* loop body executed once */
|
||||
if (Func_1 (Str_1_Par_Ref[Int_Loc],
|
||||
Str_2_Par_Ref[Int_Loc+1]) == Ident_1)
|
||||
/* then, executed */
|
||||
{
|
||||
Ch_Loc = 'A';
|
||||
Int_Loc += 1;
|
||||
} /* if, while */
|
||||
if (Ch_Loc >= 'W' && Ch_Loc < 'Z')
|
||||
/* then, not executed */
|
||||
Int_Loc = 7;
|
||||
if (Ch_Loc == 'R')
|
||||
/* then, not executed */
|
||||
return (true);
|
||||
else /* executed */
|
||||
{
|
||||
if (strcmp (Str_1_Par_Ref, Str_2_Par_Ref) > 0)
|
||||
/* then, not executed */
|
||||
{
|
||||
Int_Loc += 7;
|
||||
Int_Glob = Int_Loc;
|
||||
return (true);
|
||||
}
|
||||
else /* executed */
|
||||
return (false);
|
||||
} /* if Ch_Loc */
|
||||
} /* Func_2 */
|
||||
|
||||
|
||||
Boolean Func_3 (Enumeration Enum_Par_Val)
|
||||
/***************************/
|
||||
/* executed once */
|
||||
/* Enum_Par_Val == Ident_3 */
|
||||
|
||||
{
|
||||
Enumeration Enum_Loc;
|
||||
|
||||
Enum_Loc = Enum_Par_Val;
|
||||
if (Enum_Loc == Ident_3)
|
||||
/* then, executed */
|
||||
return (true);
|
||||
else /* not executed */
|
||||
return (false);
|
||||
} /* Func_3 */
|
||||
@@ -0,0 +1,407 @@
|
||||
/*
|
||||
* Copyright (c) 2011, Linaro Limited
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of the Linaro nor the
|
||||
* names of its contributors may be used to endorse or promote products
|
||||
* derived from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** A simple harness that times how long a string function takes to
|
||||
* run.
|
||||
*/
|
||||
|
||||
/* PENDING: Add EPL */
|
||||
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <stdbool.h>
|
||||
#include <assert.h>
|
||||
#include <unistd.h>
|
||||
#include <errno.h>
|
||||
|
||||
#define NUM_ELEMS(_x) (sizeof(_x) / sizeof((_x)[0]))
|
||||
|
||||
#ifndef VERSION
|
||||
#define VERSION "(unknown version)"
|
||||
#endif
|
||||
|
||||
/** Make sure a function is called by using the return value */
|
||||
#define SPOIL(_x) volatile long x = (long)(_x); (void)x
|
||||
|
||||
/** Type of functions that can be tested */
|
||||
typedef void (*stub_t)(void *dest, void *src, size_t n);
|
||||
|
||||
/** Meta data about one test */
|
||||
struct test
|
||||
{
|
||||
/** Test name */
|
||||
const char *name;
|
||||
/** Function to test */
|
||||
stub_t stub;
|
||||
};
|
||||
|
||||
/** Flush the cache by reading a chunk of memory */
|
||||
static void empty(volatile char *against)
|
||||
{
|
||||
/* We know that there's a 16 k cache with 64 byte lines giving
|
||||
a total of 256 lines. Read randomly from 256*5 places should
|
||||
flush everything */
|
||||
int offset = (1024 - 256)*1024;
|
||||
|
||||
for (int i = offset; i < offset + 16*1024*3; i += 64)
|
||||
{
|
||||
against[i];
|
||||
}
|
||||
}
|
||||
|
||||
/** Stub that does nothing. Used for calibrating */
|
||||
static void xbounce(void *dest, void *src, size_t n)
|
||||
{
|
||||
SPOIL(0);
|
||||
}
|
||||
|
||||
/** Stub that calls memcpy */
|
||||
static void xmemcpy(void *dest, void *src, size_t n)
|
||||
{
|
||||
SPOIL(memcpy(dest, src, n));
|
||||
}
|
||||
|
||||
/** Stub that calls memset */
|
||||
static void xmemset(void *dest, void *src, size_t n)
|
||||
{
|
||||
SPOIL(memset(dest, 0, n));
|
||||
}
|
||||
|
||||
/** Stub that calls memcmp */
|
||||
static void xmemcmp(void *dest, void *src, size_t n)
|
||||
{
|
||||
SPOIL(memcmp(dest, src, n));
|
||||
}
|
||||
|
||||
/** Stub that calls strcpy */
|
||||
static void xstrcpy(void *dest, void *src, size_t n)
|
||||
{
|
||||
SPOIL(strcpy(dest, src));
|
||||
}
|
||||
|
||||
/** Stub that calls strlen */
|
||||
static void xstrlen(void *dest, void *src, size_t n)
|
||||
{
|
||||
SPOIL(strlen(dest));
|
||||
}
|
||||
|
||||
/** Stub that calls strcmp */
|
||||
static void xstrcmp(void *dest, void *src, size_t n)
|
||||
{
|
||||
SPOIL(strcmp(dest, src));
|
||||
}
|
||||
|
||||
/** Stub that calls strchr */
|
||||
static void xstrchr(void *dest, void *src, size_t n)
|
||||
{
|
||||
/* Put the character at the end of the string and before the null */
|
||||
((char *)src)[n-1] = 32;
|
||||
SPOIL(strchr(src, 32));
|
||||
}
|
||||
|
||||
/** Stub that calls memchr */
|
||||
static void xmemchr(void *dest, void *src, size_t n)
|
||||
{
|
||||
/* Put the character at the end of the block */
|
||||
((char *)src)[n-1] = 32;
|
||||
SPOIL(memchr(src, 32, n));
|
||||
}
|
||||
|
||||
/** All functions that can be tested */
|
||||
static const struct test tests[] =
|
||||
{
|
||||
{ "bounce", xbounce },
|
||||
{ "memchr", xmemchr },
|
||||
{ "memcpy", xmemcpy },
|
||||
{ "memset", xmemset },
|
||||
{ "memcmp", xmemcmp },
|
||||
{ "strchr", xstrchr },
|
||||
{ "strcmp", xstrcmp },
|
||||
{ "strcpy", xstrcpy },
|
||||
{ "strlen", xstrlen },
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
/** Show basic usage */
|
||||
static void usage(const char* name)
|
||||
{
|
||||
printf("%s %s: run a string related benchmark.\n"
|
||||
"usage: %s [-c block-size] [-l loop-count] [-a alignment|src_alignment:dst_alignment] [-f] [-t test-name] [-r run-id]\n"
|
||||
, name, VERSION, name);
|
||||
|
||||
printf("Tests:");
|
||||
|
||||
for (const struct test *ptest = tests; ptest->name != NULL; ptest++)
|
||||
{
|
||||
printf(" %s", ptest->name);
|
||||
}
|
||||
|
||||
printf("\n");
|
||||
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
/** Find the test by name */
|
||||
static const struct test *find_test(const char *name)
|
||||
{
|
||||
if (name == NULL)
|
||||
{
|
||||
return tests + 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (const struct test *p = tests; p->name != NULL; p++)
|
||||
{
|
||||
if (strcmp(p->name, name) == 0)
|
||||
{
|
||||
return p;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#define MIN_BUFFER_SIZE 1024*1024
|
||||
#define MAX_ALIGNMENT 256
|
||||
|
||||
/** Take a pointer and ensure that the lower bits == alignment */
|
||||
static char *realign(char *p, int alignment)
|
||||
{
|
||||
uintptr_t pp = (uintptr_t)p;
|
||||
pp = (pp + (MAX_ALIGNMENT - 1)) & ~(MAX_ALIGNMENT - 1);
|
||||
pp += alignment;
|
||||
|
||||
return (char *)pp;
|
||||
}
|
||||
|
||||
static int parse_int_arg(const char *arg, const char *exe_name)
|
||||
{
|
||||
long int ret;
|
||||
|
||||
errno = 0;
|
||||
ret = strtol(arg, NULL, 0);
|
||||
|
||||
if (errno)
|
||||
{
|
||||
usage(exe_name);
|
||||
}
|
||||
|
||||
return (int)ret;
|
||||
}
|
||||
|
||||
static void parse_alignment_arg(const char *arg, const char *exe_name,
|
||||
int *src_alignment, int *dst_alignment)
|
||||
{
|
||||
long int ret;
|
||||
char *endptr;
|
||||
|
||||
errno = 0;
|
||||
ret = strtol(arg, &endptr, 0);
|
||||
|
||||
if (errno)
|
||||
{
|
||||
usage(exe_name);
|
||||
}
|
||||
|
||||
*src_alignment = (int)ret;
|
||||
|
||||
if (ret > 256 || ret < 1)
|
||||
{
|
||||
printf("Alignment should be in the range [1, 256].\n");
|
||||
usage(exe_name);
|
||||
}
|
||||
|
||||
if (ret == 256)
|
||||
ret = 0;
|
||||
|
||||
if (endptr && *endptr == ':')
|
||||
{
|
||||
errno = 0;
|
||||
ret = strtol(endptr + 1, NULL, 0);
|
||||
|
||||
if (errno)
|
||||
{
|
||||
usage(exe_name);
|
||||
}
|
||||
|
||||
if (ret > 256 || ret < 1)
|
||||
{
|
||||
printf("Alignment should be in the range [1, 256].\n");
|
||||
usage(exe_name);
|
||||
}
|
||||
|
||||
if (ret == 256)
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
*dst_alignment = (int)ret;
|
||||
}
|
||||
|
||||
/** Setup and run a test */
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
/* Size of src and dest buffers */
|
||||
size_t buffer_size = MIN_BUFFER_SIZE;
|
||||
|
||||
/* Number of bytes per call */
|
||||
int count = 31;
|
||||
/* Number of times to run */
|
||||
int loops = 10000000;
|
||||
/* True to flush the cache each time */
|
||||
int flush = 0;
|
||||
/* Name of the test */
|
||||
const char *name = NULL;
|
||||
/* Alignment of buffers */
|
||||
int src_alignment = 8;
|
||||
int dst_alignment = 8;
|
||||
/* Name of the run */
|
||||
const char *run_id = "0";
|
||||
|
||||
int opt;
|
||||
|
||||
while ((opt = getopt(argc, argv, "c:l:ft:r:hva:")) > 0)
|
||||
{
|
||||
switch (opt)
|
||||
{
|
||||
case 'c':
|
||||
count = parse_int_arg(optarg, argv[0]);
|
||||
break;
|
||||
case 'l':
|
||||
loops = parse_int_arg(optarg, argv[0]);
|
||||
break;
|
||||
case 'a':
|
||||
parse_alignment_arg(optarg, argv[0], &src_alignment, &dst_alignment);
|
||||
break;
|
||||
case 'f':
|
||||
flush = 1;
|
||||
break;
|
||||
case 't':
|
||||
name = strdup(optarg);
|
||||
break;
|
||||
case 'r':
|
||||
run_id = strdup(optarg);
|
||||
break;
|
||||
case 'h':
|
||||
usage(argv[0]);
|
||||
break;
|
||||
default:
|
||||
usage(argv[0]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Find the test by name */
|
||||
const struct test *ptest = find_test(name);
|
||||
|
||||
if (ptest == NULL)
|
||||
{
|
||||
usage(argv[0]);
|
||||
}
|
||||
|
||||
if (count + MAX_ALIGNMENT * 2 > MIN_BUFFER_SIZE)
|
||||
{
|
||||
buffer_size = count + MAX_ALIGNMENT * 2;
|
||||
}
|
||||
|
||||
/* Buffers to read and write from */
|
||||
char *src = malloc(buffer_size);
|
||||
char *dest = malloc(buffer_size);
|
||||
|
||||
assert(src != NULL && dest != NULL);
|
||||
|
||||
src = realign(src, src_alignment);
|
||||
dest = realign(dest, dst_alignment);
|
||||
|
||||
/* Fill the buffer with non-zero, reproducable random data */
|
||||
srandom(1539);
|
||||
|
||||
for (int i = 0; i < buffer_size; i++)
|
||||
{
|
||||
src[i] = (char)random() | 1;
|
||||
dest[i] = src[i];
|
||||
}
|
||||
|
||||
/* Make sure the buffers are null terminated for any string tests */
|
||||
src[count] = 0;
|
||||
dest[count] = 0;
|
||||
|
||||
struct timespec start, end;
|
||||
int err = clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start);
|
||||
assert(err == 0);
|
||||
|
||||
/* Preload */
|
||||
stub_t stub = ptest->stub;
|
||||
|
||||
/* Run two variants to reduce the cost of testing for the flush */
|
||||
if (flush == 0)
|
||||
{
|
||||
for (int i = 0; i < loops; i++)
|
||||
{
|
||||
(*stub)(dest, src, count);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int i = 0; i < loops; i++)
|
||||
{
|
||||
(*stub)(dest, src, count);
|
||||
empty(dest);
|
||||
}
|
||||
}
|
||||
|
||||
err = clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end);
|
||||
assert(err == 0);
|
||||
|
||||
/* Drop any leading path and pull the variant name out of the executable */
|
||||
char *variant = strrchr(argv[0], '/');
|
||||
|
||||
if (variant == NULL)
|
||||
{
|
||||
variant = argv[0];
|
||||
}
|
||||
|
||||
variant = strstr(variant, "try-");
|
||||
assert(variant != NULL);
|
||||
|
||||
double elapsed = (end.tv_sec - start.tv_sec) + (end.tv_nsec - start.tv_nsec) * 1e-9;
|
||||
/* Estimate the bounce time. Measured on a Panda. */
|
||||
double bounced = 0.448730 * loops / 50000000;
|
||||
|
||||
/* Dump both machine and human readable versions */
|
||||
printf("%s:%s:%u:%u:%d:%d:%s:%.6f: took %.6f s for %u calls to %s of %u bytes. ~%.3f MB/s corrected.\n",
|
||||
variant + 4, ptest->name,
|
||||
count, loops, src_alignment, dst_alignment, run_id,
|
||||
elapsed,
|
||||
elapsed, loops, ptest->name, count,
|
||||
(double)loops*count/(elapsed - bounced)/(1024*1024));
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,88 @@
|
||||
# Copyright (c) 2011-2012, Linaro Limited
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of the Linaro nor the
|
||||
# names of its contributors may be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
AC_INIT(cortex-strings, 1.1-2012.06~dev)
|
||||
AM_INIT_AUTOMAKE(foreign subdir-objects color-tests dist-bzip2)
|
||||
AC_CONFIG_HEADERS([config.h])
|
||||
AC_CONFIG_FILES(Makefile)
|
||||
AC_CANONICAL_HOST
|
||||
AM_PROG_AS
|
||||
AC_PROG_CC
|
||||
AC_PROG_LIBTOOL
|
||||
|
||||
default_submachine=
|
||||
|
||||
case $host in
|
||||
aarch64*-*-*)
|
||||
arch=aarch64
|
||||
;;
|
||||
arm*-*-*)
|
||||
arch=aarch32
|
||||
default_submachine=cortex-a9
|
||||
;;
|
||||
x86_64-*-*-*)
|
||||
arch=generic
|
||||
;;
|
||||
*)
|
||||
AC_MSG_ERROR([unknown architecture $host])
|
||||
;;
|
||||
esac
|
||||
|
||||
AM_CONDITIONAL([HOST_AARCH32], [test x$arch = xaarch32])
|
||||
AM_CONDITIONAL([HOST_AARCH64], [test x$arch = xaarch64])
|
||||
AM_CONDITIONAL([HOST_GENERIC], [test x$arch = xgeneric])
|
||||
|
||||
AC_ARG_WITH([cpu],
|
||||
AS_HELP_STRING([--with-cpu=CPU],
|
||||
[select code for CPU variant @<:@default=cortex-a9@:>@]]),
|
||||
[dnl
|
||||
case "$withval" in
|
||||
yes|'') AC_MSG_ERROR([--with-cpu requires an argument]) ;;
|
||||
no) ;;
|
||||
*) submachine="$withval" ;;
|
||||
esac
|
||||
],
|
||||
[submachine=$default_submachine])
|
||||
|
||||
AC_SUBST(submachine)
|
||||
AM_CONDITIONAL([WITH_SUBMACHINE], [test x$submachine != x])
|
||||
|
||||
AC_ARG_WITH([neon],
|
||||
AC_HELP_STRING([--with-neon],
|
||||
[include NEON specific routines @<:@default=yes@:>@]),
|
||||
[with_neon=$withval],
|
||||
[with_neon=yes])
|
||||
AC_SUBST(with_neon)
|
||||
AM_CONDITIONAL(WITH_NEON, test x$with_neon = xyes)
|
||||
|
||||
AC_ARG_WITH([vfp],
|
||||
AC_HELP_STRING([--with-vfp],
|
||||
[include VFP specific routines @<:@default=yes@:>@]),
|
||||
[with_vfp=$withval],
|
||||
[with_vfp=yes])
|
||||
AC_SUBST(with_vfp)
|
||||
AM_CONDITIONAL(WITH_VFP, test x$with_vfp = xyes)
|
||||
|
||||
AC_OUTPUT
|
||||
+79
@@ -0,0 +1,79 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Add the modified BSD license to a file
|
||||
#
|
||||
|
||||
f=`mktemp -d`
|
||||
trap "rm -rf $f" EXIT
|
||||
|
||||
year=`date +%Y`
|
||||
cat > $f/original <<EOF
|
||||
Copyright (c) $year, Linaro Limited
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the Linaro nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
EOF
|
||||
|
||||
# Translate it to C style
|
||||
echo "/*" > $f/c
|
||||
sed -r 's/(.*)/ * \1/' $f/original | sed -r 's/ +$//' >> $f/c
|
||||
echo " */" >> $f/c
|
||||
echo >> $f/c
|
||||
|
||||
# ...and shell style
|
||||
sed -r 's/(.*)/# \1/' $f/original | sed -r 's/ +$//' >> $f/shell
|
||||
echo '#' >> $f/shell
|
||||
echo >> $f/shell
|
||||
|
||||
for name in $@; do
|
||||
if grep -q Copyright $name; then
|
||||
echo $name already has some type of copyright
|
||||
continue
|
||||
fi
|
||||
|
||||
case $name in
|
||||
# These files don't have an explicit license
|
||||
*autogen.sh*)
|
||||
continue;;
|
||||
*reference/newlib/*)
|
||||
continue;;
|
||||
*reference/newlib-xscale/*)
|
||||
continue;;
|
||||
*/dhry/*)
|
||||
continue;;
|
||||
|
||||
*.c)
|
||||
src=$f/c
|
||||
;;
|
||||
*.sh|*.am|*.ac)
|
||||
src=$f/shell
|
||||
;;
|
||||
*)
|
||||
echo Unrecognied extension on $name
|
||||
continue
|
||||
esac
|
||||
|
||||
cat $src $name > $f/next
|
||||
mv $f/next $name
|
||||
echo Updated $name
|
||||
done
|
||||
@@ -0,0 +1,175 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
"""Simple harness that benchmarks different variants of the routines,
|
||||
caches the results, and emits all of the records at the end.
|
||||
|
||||
Results are generated for different values of:
|
||||
* Source
|
||||
* Routine
|
||||
* Length
|
||||
* Alignment
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import subprocess
|
||||
import math
|
||||
import sys
|
||||
|
||||
# Prefix to the executables
|
||||
build = '../build/try-'
|
||||
|
||||
ALL = 'memchr memcmp memcpy memset strchr strcmp strcpy strlen'
|
||||
|
||||
HAS = {
|
||||
'this': 'bounce memchr memcpy memset strchr strcmp strcpy strlen',
|
||||
'bionic-a9': 'memcmp memcpy memset strcmp strcpy strlen',
|
||||
'bionic-a15': 'memcmp memcpy memset strcmp strcpy strlen',
|
||||
'bionic-c': ALL,
|
||||
'csl': 'memcpy memset',
|
||||
'glibc': 'memcpy memset strchr strlen',
|
||||
'glibc-c': ALL,
|
||||
'newlib': 'memcpy strcmp strcpy strlen',
|
||||
'newlib-c': ALL,
|
||||
'newlib-xscale': 'memchr memcpy memset strchr strcmp strcpy strlen',
|
||||
'plain': 'memset memcpy strcmp strcpy',
|
||||
}
|
||||
|
||||
BOUNCE_ALIGNMENTS = ['1']
|
||||
SINGLE_BUFFER_ALIGNMENTS = ['1', '2', '4', '8', '16', '32']
|
||||
DUAL_BUFFER_ALIGNMENTS = ['1:32', '2:32', '4:32', '8:32', '16:32', '32:32']
|
||||
|
||||
ALIGNMENTS = {
|
||||
'bounce': BOUNCE_ALIGNMENTS,
|
||||
'memchr': SINGLE_BUFFER_ALIGNMENTS,
|
||||
'memset': SINGLE_BUFFER_ALIGNMENTS,
|
||||
'strchr': SINGLE_BUFFER_ALIGNMENTS,
|
||||
'strlen': SINGLE_BUFFER_ALIGNMENTS,
|
||||
'memcmp': DUAL_BUFFER_ALIGNMENTS,
|
||||
'memcpy': DUAL_BUFFER_ALIGNMENTS,
|
||||
'strcmp': DUAL_BUFFER_ALIGNMENTS,
|
||||
'strcpy': DUAL_BUFFER_ALIGNMENTS,
|
||||
}
|
||||
|
||||
VARIANTS = sorted(HAS.keys())
|
||||
FUNCTIONS = sorted(ALIGNMENTS.keys())
|
||||
|
||||
NUM_RUNS = 5
|
||||
|
||||
def run(cache, variant, function, bytes, loops, alignment, run_id, quiet=False):
|
||||
"""Perform a single run, exercising the cache as appropriate."""
|
||||
key = ':'.join('%s' % x for x in (variant, function, bytes, loops, alignment, run_id))
|
||||
|
||||
if key in cache:
|
||||
got = cache[key]
|
||||
else:
|
||||
xbuild = build
|
||||
cmd = '%(xbuild)s%(variant)s -t %(function)s -c %(bytes)s -l %(loops)s -a %(alignment)s -r %(run_id)s' % locals()
|
||||
|
||||
try:
|
||||
got = subprocess.check_output(cmd.split()).strip()
|
||||
except OSError, ex:
|
||||
assert False, 'Error %s while running %s' % (ex, cmd)
|
||||
|
||||
parts = got.split(':')
|
||||
took = float(parts[7])
|
||||
|
||||
cache[key] = got
|
||||
|
||||
if not quiet:
|
||||
print got
|
||||
sys.stdout.flush()
|
||||
|
||||
return took
|
||||
|
||||
def run_many(cache, variants, bytes, all_functions):
|
||||
# We want the data to come out in a useful order. So fix an
|
||||
# alignment and function, and do all sizes for a variant first
|
||||
bytes = sorted(bytes)
|
||||
mid = bytes[int(len(bytes)/1.5)]
|
||||
|
||||
if not all_functions:
|
||||
# Use the ordering in 'this' as the default
|
||||
all_functions = HAS['this'].split()
|
||||
|
||||
# Find all other functions
|
||||
for functions in HAS.values():
|
||||
for function in functions.split():
|
||||
if function not in all_functions:
|
||||
all_functions.append(function)
|
||||
|
||||
for function in all_functions:
|
||||
for alignment in ALIGNMENTS[function]:
|
||||
for variant in variants:
|
||||
if function not in HAS[variant].split():
|
||||
continue
|
||||
|
||||
# Run a tracer through and see how long it takes and
|
||||
# adjust the number of loops based on that. Not great
|
||||
# for memchr() and similar which are O(n), but it will
|
||||
# do
|
||||
f = 50000000
|
||||
want = 5.0
|
||||
|
||||
loops = int(f / math.sqrt(max(1, mid)))
|
||||
took = run(cache, variant, function, mid, loops, alignment, 0,
|
||||
quiet=True)
|
||||
# Keep it reasonable for silly routines like bounce
|
||||
factor = min(20, max(0.05, want/took))
|
||||
f = f * factor
|
||||
|
||||
# Round f to a few significant figures
|
||||
scale = 10**int(math.log10(f) - 1)
|
||||
f = scale*int(f/scale)
|
||||
|
||||
for b in sorted(bytes):
|
||||
# Figure out the number of loops to give a roughly consistent run
|
||||
loops = int(f / math.sqrt(max(1, b)))
|
||||
for run_id in range(0, NUM_RUNS):
|
||||
run(cache, variant, function, b, loops, alignment,
|
||||
run_id)
|
||||
|
||||
def run_top(cache):
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("-v", "--variants", nargs="+", help="library variant to run (run all if not specified)", default = VARIANTS, choices = VARIANTS)
|
||||
parser.add_argument("-f", "--functions", nargs="+", help="function to run (run all if not specified)", default = FUNCTIONS, choices = FUNCTIONS)
|
||||
parser.add_argument("-l", "--limit", type=int, help="upper limit to test to (in bytes)", default = 512*1024)
|
||||
args = parser.parse_args()
|
||||
|
||||
# Test all powers of 2
|
||||
step1 = 2.0
|
||||
# Test intermediate powers of 1.4
|
||||
step2 = 1.4
|
||||
|
||||
bytes = []
|
||||
|
||||
for step in [step1, step2]:
|
||||
if step:
|
||||
# Figure out how many steps get us up to the top
|
||||
steps = int(round(math.log(args.limit) / math.log(step)))
|
||||
bytes.extend([int(step**x) for x in range(0, steps+1)])
|
||||
|
||||
run_many(cache, args.variants, bytes, args.functions)
|
||||
|
||||
def main():
|
||||
cachename = 'cache.txt'
|
||||
|
||||
cache = {}
|
||||
|
||||
try:
|
||||
with open(cachename) as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
parts = line.split(':')
|
||||
cache[':'.join(parts[:7])] = line
|
||||
except:
|
||||
pass
|
||||
|
||||
try:
|
||||
run_top(cache)
|
||||
finally:
|
||||
with open(cachename, 'w') as f:
|
||||
for line in sorted(cache.values()):
|
||||
print >> f, line
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -0,0 +1,27 @@
|
||||
"""Simple script that enables target specific blocks based on the first argument.
|
||||
|
||||
Matches comment blocks like this:
|
||||
|
||||
/* For Foo: abc
|
||||
def
|
||||
*/
|
||||
|
||||
and de-comments them giving:
|
||||
abc
|
||||
def
|
||||
"""
|
||||
import re
|
||||
import sys
|
||||
|
||||
def main():
|
||||
key = sys.argv[1]
|
||||
expr = re.compile(r'/\* For %s:\s([^*]+)\*/' % key, re.M)
|
||||
|
||||
for arg in sys.argv[2:]:
|
||||
with open(arg) as f:
|
||||
body = f.read()
|
||||
with open(arg, 'w') as f:
|
||||
f.write(expr.sub(r'\1', body))
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -0,0 +1,78 @@
|
||||
"""Shared routines for the plotters."""
|
||||
|
||||
import fileinput
|
||||
import collections
|
||||
|
||||
Record = collections.namedtuple('Record', 'variant function bytes loops src_alignment dst_alignment run_id elapsed rest')
|
||||
|
||||
|
||||
def make_colours():
|
||||
return iter('m b g r c y k pink orange brown grey'.split())
|
||||
|
||||
def parse_value(v):
|
||||
"""Turn text into a primitive"""
|
||||
try:
|
||||
if '.' in v:
|
||||
return float(v)
|
||||
else:
|
||||
return int(v)
|
||||
except ValueError:
|
||||
return v
|
||||
|
||||
def create_column_tuple(record, names):
|
||||
cols = [getattr(record, name) for name in names]
|
||||
return tuple(cols)
|
||||
|
||||
def unique(records, name, prefer=''):
|
||||
"""Return the unique values of a column in the records"""
|
||||
if type(name) == tuple:
|
||||
values = list(set(create_column_tuple(x, name) for x in records))
|
||||
else:
|
||||
values = list(set(getattr(x, name) for x in records))
|
||||
|
||||
if not values:
|
||||
return values
|
||||
elif type(values[0]) == str:
|
||||
return sorted(values, key=lambda x: '%-06d|%s' % (-prefer.find(x), x))
|
||||
else:
|
||||
return sorted(values)
|
||||
|
||||
def alignments_equal(alignments):
|
||||
for alignment in alignments:
|
||||
if alignment[0] != alignment[1]:
|
||||
return False
|
||||
return True
|
||||
|
||||
def parse_row(line):
|
||||
return Record(*[parse_value(y) for y in line.split(':')])
|
||||
|
||||
def parse():
|
||||
"""Parse a record file into named tuples, correcting for loop
|
||||
overhead along the way.
|
||||
"""
|
||||
records = [parse_row(x) for x in fileinput.input()]
|
||||
|
||||
# Pull out any bounce values
|
||||
costs = {}
|
||||
|
||||
for record in [x for x in records if x.function=='bounce']:
|
||||
costs[(record.bytes, record.loops)] = record.elapsed
|
||||
|
||||
# Fix up all of the records for cost
|
||||
out = []
|
||||
|
||||
for record in records:
|
||||
if record.function == 'bounce':
|
||||
continue
|
||||
|
||||
cost = costs.get((record.bytes, record.loops), None)
|
||||
|
||||
if not cost:
|
||||
out.append(record)
|
||||
else:
|
||||
# Unfortunately you can't update a namedtuple...
|
||||
values = list(record)
|
||||
values[-2] -= cost
|
||||
out.append(Record(*values))
|
||||
|
||||
return out
|
||||
@@ -0,0 +1,67 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
"""Plot the performance of different variants of one routine versus alignment.
|
||||
"""
|
||||
|
||||
import libplot
|
||||
|
||||
import pylab
|
||||
|
||||
|
||||
def plot(records, bytes, function):
|
||||
records = [x for x in records if x.bytes==bytes and x.function==function]
|
||||
|
||||
variants = libplot.unique(records, 'variant', prefer='this')
|
||||
alignments = libplot.unique(records, ('src_alignment', 'dst_alignment'))
|
||||
|
||||
X = pylab.arange(len(alignments))
|
||||
width = 1.0/(len(variants)+1)
|
||||
|
||||
colours = libplot.make_colours()
|
||||
|
||||
pylab.figure(1).set_size_inches((16, 12))
|
||||
pylab.clf()
|
||||
|
||||
for i, variant in enumerate(variants):
|
||||
heights = []
|
||||
|
||||
for alignment in alignments:
|
||||
matches = [x for x in records if x.variant==variant and x.src_alignment==alignment[0] and x.dst_alignment==alignment[1]]
|
||||
|
||||
if matches:
|
||||
vals = [match.bytes*match.loops/match.elapsed/(1024*1024) for
|
||||
match in matches]
|
||||
mean = sum(vals)/len(vals)
|
||||
heights.append(mean)
|
||||
else:
|
||||
heights.append(0)
|
||||
|
||||
pylab.bar(X+i*width, heights, width, color=colours.next(), label=variant)
|
||||
|
||||
|
||||
axes = pylab.axes()
|
||||
if libplot.alignments_equal(alignments):
|
||||
alignment_labels = ["%s" % x[0] for x in alignments]
|
||||
else:
|
||||
alignment_labels = ["%s:%s" % (x[0], x[1]) for x in alignments]
|
||||
axes.set_xticklabels(alignment_labels)
|
||||
axes.set_xticks(X + 0.5)
|
||||
|
||||
pylab.title('Performance of different variants of %(function)s for %(bytes)d byte blocks' % locals())
|
||||
pylab.xlabel('Alignment')
|
||||
pylab.ylabel('Rate (MB/s)')
|
||||
pylab.legend(loc='lower right', ncol=3)
|
||||
pylab.grid()
|
||||
pylab.savefig('alignment-%(function)s-%(bytes)d.png' % locals(), dpi=72)
|
||||
|
||||
def main():
|
||||
records = libplot.parse()
|
||||
|
||||
for function in libplot.unique(records, 'function'):
|
||||
for bytes in libplot.unique(records, 'bytes'):
|
||||
plot(records, bytes, function)
|
||||
|
||||
pylab.show()
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -0,0 +1,120 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
"""Plot the performance for different block sizes of one function across
|
||||
variants.
|
||||
"""
|
||||
|
||||
import libplot
|
||||
|
||||
import pylab
|
||||
import pdb
|
||||
import math
|
||||
|
||||
def pretty_kb(v):
|
||||
if v < 1024:
|
||||
return '%d' % v
|
||||
else:
|
||||
if v % 1024 == 0:
|
||||
return '%d k' % (v//1024)
|
||||
else:
|
||||
return '%.1f k' % (v/1024)
|
||||
|
||||
def plot(records, function, alignment=None, scale=1):
|
||||
variants = libplot.unique(records, 'variant', prefer='this')
|
||||
records = [x for x in records if x.function==function]
|
||||
|
||||
if alignment != None:
|
||||
records = [x for x in records if x.src_alignment==alignment[0] and
|
||||
x.dst_alignment==alignment[1]]
|
||||
|
||||
alignments = libplot.unique(records, ('src_alignment', 'dst_alignment'))
|
||||
if len(alignments) != 1:
|
||||
return False
|
||||
if libplot.alignments_equal(alignments):
|
||||
aalignment = alignments[0][0]
|
||||
else:
|
||||
aalignment = "%s:%s" % (alignments[0][0], alignments[0][1])
|
||||
|
||||
bytes = libplot.unique(records, 'bytes')[0]
|
||||
|
||||
colours = libplot.make_colours()
|
||||
all_x = []
|
||||
|
||||
pylab.figure(1).set_size_inches((6.4*scale, 4.8*scale))
|
||||
pylab.clf()
|
||||
|
||||
if 'str' in function:
|
||||
# The harness fills out to 16k. Anything past that is an
|
||||
# early match
|
||||
top = 16384
|
||||
else:
|
||||
top = 2**31
|
||||
|
||||
for variant in variants:
|
||||
matches = [x for x in records if x.variant==variant and x.bytes <= top]
|
||||
matches.sort(key=lambda x: x.bytes)
|
||||
|
||||
X = sorted(list(set([x.bytes for x in matches])))
|
||||
Y = []
|
||||
Yerr = []
|
||||
for xbytes in X:
|
||||
vals = [x.bytes*x.loops/x.elapsed/(1024*1024) for x in matches if x.bytes == xbytes]
|
||||
if len(vals) > 1:
|
||||
mean = sum(vals)/len(vals)
|
||||
Y.append(mean)
|
||||
if len(Yerr) == 0:
|
||||
Yerr = [[], []]
|
||||
err1 = max(vals) - mean
|
||||
assert err1 >= 0
|
||||
err2 = min(vals) - mean
|
||||
assert err2 <= 0
|
||||
Yerr[0].append(abs(err2))
|
||||
Yerr[1].append(err1)
|
||||
else:
|
||||
Y.append(vals[0])
|
||||
|
||||
all_x.extend(X)
|
||||
colour = colours.next()
|
||||
|
||||
if X:
|
||||
pylab.plot(X, Y, c=colour)
|
||||
if len(Yerr) > 0:
|
||||
pylab.errorbar(X, Y, yerr=Yerr, c=colour, label=variant, fmt='o')
|
||||
else:
|
||||
pylab.scatter(X, Y, c=colour, label=variant, edgecolors='none')
|
||||
|
||||
pylab.legend(loc='upper left', ncol=3, prop={'size': 'small'})
|
||||
pylab.grid()
|
||||
pylab.title('%(function)s of %(aalignment)s byte aligned blocks' % locals())
|
||||
pylab.xlabel('Size (B)')
|
||||
pylab.ylabel('Rate (MB/s)')
|
||||
|
||||
# Figure out how high the range goes
|
||||
top = max(all_x)
|
||||
|
||||
power = int(round(math.log(max(all_x)) / math.log(2)))
|
||||
|
||||
pylab.semilogx()
|
||||
|
||||
pylab.axes().set_xticks([2**x for x in range(0, power+1)])
|
||||
pylab.axes().set_xticklabels([pretty_kb(2**x) for x in range(0, power+1)])
|
||||
pylab.xlim(0, top)
|
||||
pylab.ylim(0, pylab.ylim()[1])
|
||||
return True
|
||||
|
||||
def main():
|
||||
records = libplot.parse()
|
||||
|
||||
functions = libplot.unique(records, 'function')
|
||||
alignments = libplot.unique(records, ('src_alignment', 'dst_alignment'))
|
||||
|
||||
for function in functions:
|
||||
for alignment in alignments:
|
||||
for scale in [1, 2.5]:
|
||||
if plot(records, function, alignment, scale):
|
||||
pylab.savefig('sizes-%s-%02d-%02d-%.1f.png' % (function, alignment[0], alignment[1], scale), dpi=72)
|
||||
|
||||
pylab.show()
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -0,0 +1,61 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
"""Plot the performance of different variants of the string routines
|
||||
for one size.
|
||||
"""
|
||||
|
||||
import libplot
|
||||
|
||||
import pylab
|
||||
|
||||
|
||||
def plot(records, bytes):
|
||||
records = [x for x in records if x.bytes==bytes]
|
||||
|
||||
variants = libplot.unique(records, 'variant', prefer='this')
|
||||
functions = libplot.unique(records, 'function')
|
||||
|
||||
X = pylab.arange(len(functions))
|
||||
width = 1.0/(len(variants)+1)
|
||||
|
||||
colours = libplot.make_colours()
|
||||
|
||||
pylab.figure(1).set_size_inches((16, 12))
|
||||
pylab.clf()
|
||||
|
||||
for i, variant in enumerate(variants):
|
||||
heights = []
|
||||
|
||||
for function in functions:
|
||||
matches = [x for x in records if x.variant==variant and x.function==function and x.src_alignment==8]
|
||||
|
||||
if matches:
|
||||
vals = [match.bytes*match.loops/match.elapsed/(1024*1024) for
|
||||
match in matches]
|
||||
mean = sum(vals)/len(vals)
|
||||
heights.append(mean)
|
||||
else:
|
||||
heights.append(0)
|
||||
|
||||
pylab.bar(X+i*width, heights, width, color=colours.next(), label=variant)
|
||||
|
||||
axes = pylab.axes()
|
||||
axes.set_xticklabels(functions)
|
||||
axes.set_xticks(X + 0.5)
|
||||
|
||||
pylab.title('Performance of different variants for %d byte blocks' % bytes)
|
||||
pylab.ylabel('Rate (MB/s)')
|
||||
pylab.legend(loc='upper left', ncol=3)
|
||||
pylab.grid()
|
||||
pylab.savefig('top-%06d.png' % bytes, dpi=72)
|
||||
|
||||
def main():
|
||||
records = libplot.parse()
|
||||
|
||||
for bytes in libplot.unique(records, 'bytes'):
|
||||
plot(records, bytes)
|
||||
|
||||
pylab.show()
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -0,0 +1,123 @@
|
||||
"""Plot the results for each test. Spits out a set of images into the
|
||||
current directory.
|
||||
"""
|
||||
|
||||
import libplot
|
||||
|
||||
import fileinput
|
||||
import collections
|
||||
import pprint
|
||||
|
||||
import pylab
|
||||
|
||||
Record = collections.namedtuple('Record', 'variant test size loops src_alignment dst_alignment run_id rawtime comment time bytes rate')
|
||||
|
||||
def unique(rows, name):
|
||||
"""Takes a list of values, pulls out the named field, and returns
|
||||
a list of the unique values of this field.
|
||||
"""
|
||||
return sorted(set(getattr(x, name) for x in rows))
|
||||
|
||||
def to_float(v):
|
||||
"""Convert a string into a better type.
|
||||
|
||||
>>> to_float('foo')
|
||||
'foo'
|
||||
>>> to_float('1.23')
|
||||
1.23
|
||||
>>> to_float('45')
|
||||
45
|
||||
"""
|
||||
try:
|
||||
if '.' in v:
|
||||
return float(v)
|
||||
else:
|
||||
return int(v)
|
||||
except:
|
||||
return v
|
||||
|
||||
def parse():
|
||||
# Split the input up
|
||||
rows = [x.strip().split(':') for x in fileinput.input()]
|
||||
# Automatically turn numbers into the base type
|
||||
rows = [[to_float(y) for y in x] for x in rows]
|
||||
|
||||
# Scan once to calculate the overhead
|
||||
r = [Record(*(x + [0, 0, 0])) for x in rows]
|
||||
bounces = pylab.array([(x.loops, x.rawtime) for x in r if x.test == 'bounce'])
|
||||
fit = pylab.polyfit(bounces[:,0], bounces[:,1], 1)
|
||||
|
||||
records = []
|
||||
|
||||
for row in rows:
|
||||
# Make a dummy record so we can use the names
|
||||
r1 = Record(*(row + [0, 0, 0]))
|
||||
|
||||
bytes = r1.size * r1.loops
|
||||
# Calculate the bounce time
|
||||
delta = pylab.polyval(fit, [r1.loops])
|
||||
time = r1.rawtime - delta
|
||||
rate = bytes / time
|
||||
|
||||
records.append(Record(*(row + [time, bytes, rate])))
|
||||
|
||||
return records
|
||||
|
||||
def plot(records, field, scale, ylabel):
|
||||
variants = unique(records, 'variant')
|
||||
tests = unique(records, 'test')
|
||||
|
||||
colours = libplot.make_colours()
|
||||
|
||||
# A little hack. We want the 'all' record to be drawn last so
|
||||
# that it's obvious on the graph. Assume that no tests come
|
||||
# before it alphabetically
|
||||
variants.reverse()
|
||||
|
||||
for test in tests:
|
||||
for variant in variants:
|
||||
v = [x for x in records if x.test==test and x.variant==variant]
|
||||
v.sort(key=lambda x: x.size)
|
||||
V = pylab.array([(x.size, getattr(x, field)) for x in v])
|
||||
|
||||
# Ensure our results appear
|
||||
order = 1 if variant == 'this' else 0
|
||||
|
||||
try:
|
||||
# A little hack. We want the 'all' to be obvious on
|
||||
# the graph
|
||||
if variant == 'all':
|
||||
pylab.scatter(V[:,0], V[:,1]/scale, label=variant)
|
||||
pylab.plot(V[:,0], V[:,1]/scale)
|
||||
else:
|
||||
pylab.plot(V[:,0], V[:,1]/scale, label=variant,
|
||||
zorder=order, c = colours.next())
|
||||
|
||||
except Exception, ex:
|
||||
# michaelh1 likes to run this script while the test is
|
||||
# still running which can lead to bad data
|
||||
print ex, 'on %s of %s' % (variant, test)
|
||||
|
||||
pylab.legend(loc='lower right', ncol=2, prop={'size': 'small'})
|
||||
pylab.xlabel('Block size (B)')
|
||||
pylab.ylabel(ylabel)
|
||||
pylab.title('%s %s' % (test, field))
|
||||
pylab.grid()
|
||||
|
||||
pylab.savefig('%s-%s.png' % (test, field), dpi=100)
|
||||
pylab.semilogx(basex=2)
|
||||
pylab.savefig('%s-%s-semilog.png' % (test, field), dpi=100)
|
||||
pylab.clf()
|
||||
|
||||
def test():
|
||||
import doctest
|
||||
doctest.testmod()
|
||||
|
||||
def main():
|
||||
records = parse()
|
||||
|
||||
plot(records, 'rate', 1024**2, 'Rate (MB/s)')
|
||||
plot(records, 'time', 1, 'Total time (s)')
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Executable
+9
@@ -0,0 +1,9 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Trims the whitespace from around any given images
|
||||
#
|
||||
|
||||
for i in $@; do
|
||||
convert $i -bordercolor white -border 1x1 -trim +repage -alpha off +dither -colors 32 PNG8:next-$i
|
||||
mv next-$i $i
|
||||
done
|
||||
@@ -0,0 +1,172 @@
|
||||
/*
|
||||
* memchr - find a character in a memory zone
|
||||
*
|
||||
* Copyright (c) 2014, ARM Limited
|
||||
* All rights Reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of the company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* Assumptions:
|
||||
*
|
||||
* ARMv8-a, AArch64
|
||||
* Neon Available.
|
||||
*/
|
||||
|
||||
/* Arguments and results. */
|
||||
#define srcin x0
|
||||
#define chrin w1
|
||||
#define cntin x2
|
||||
|
||||
#define result x0
|
||||
|
||||
#define src x3
|
||||
#define tmp x4
|
||||
#define wtmp2 w5
|
||||
#define synd x6
|
||||
#define soff x9
|
||||
#define cntrem x10
|
||||
|
||||
#define vrepchr v0
|
||||
#define vdata1 v1
|
||||
#define vdata2 v2
|
||||
#define vhas_chr1 v3
|
||||
#define vhas_chr2 v4
|
||||
#define vrepmask v5
|
||||
#define vend v6
|
||||
|
||||
/*
|
||||
* Core algorithm:
|
||||
*
|
||||
* For each 32-byte chunk we calculate a 64-bit syndrome value, with two bits
|
||||
* per byte. For each tuple, bit 0 is set if the relevant byte matched the
|
||||
* requested character and bit 1 is not used (faster than using a 32bit
|
||||
* syndrome). Since the bits in the syndrome reflect exactly the order in which
|
||||
* things occur in the original string, counting trailing zeros allows to
|
||||
* identify exactly which byte has matched.
|
||||
*/
|
||||
|
||||
.macro def_fn f p2align=0
|
||||
.text
|
||||
.p2align \p2align
|
||||
.global \f
|
||||
.type \f, %function
|
||||
\f:
|
||||
.endm
|
||||
|
||||
def_fn memchr
|
||||
/* Do not dereference srcin if no bytes to compare. */
|
||||
cbz cntin, .Lzero_length
|
||||
/*
|
||||
* Magic constant 0x40100401 allows us to identify which lane matches
|
||||
* the requested byte.
|
||||
*/
|
||||
mov wtmp2, #0x0401
|
||||
movk wtmp2, #0x4010, lsl #16
|
||||
dup vrepchr.16b, chrin
|
||||
/* Work with aligned 32-byte chunks */
|
||||
bic src, srcin, #31
|
||||
dup vrepmask.4s, wtmp2
|
||||
ands soff, srcin, #31
|
||||
and cntrem, cntin, #31
|
||||
b.eq .Lloop
|
||||
|
||||
/*
|
||||
* Input string is not 32-byte aligned. We calculate the syndrome
|
||||
* value for the aligned 32 bytes block containing the first bytes
|
||||
* and mask the irrelevant part.
|
||||
*/
|
||||
|
||||
ld1 {vdata1.16b, vdata2.16b}, [src], #32
|
||||
sub tmp, soff, #32
|
||||
adds cntin, cntin, tmp
|
||||
cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
|
||||
cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
|
||||
and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
|
||||
and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
|
||||
addp vend.16b, vhas_chr1.16b, vhas_chr2.16b /* 256->128 */
|
||||
addp vend.16b, vend.16b, vend.16b /* 128->64 */
|
||||
mov synd, vend.d[0]
|
||||
/* Clear the soff*2 lower bits */
|
||||
lsl tmp, soff, #1
|
||||
lsr synd, synd, tmp
|
||||
lsl synd, synd, tmp
|
||||
/* The first block can also be the last */
|
||||
b.ls .Lmasklast
|
||||
/* Have we found something already? */
|
||||
cbnz synd, .Ltail
|
||||
|
||||
.Lloop:
|
||||
ld1 {vdata1.16b, vdata2.16b}, [src], #32
|
||||
subs cntin, cntin, #32
|
||||
cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
|
||||
cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
|
||||
/* If we're out of data we finish regardless of the result */
|
||||
b.ls .Lend
|
||||
/* Use a fast check for the termination condition */
|
||||
orr vend.16b, vhas_chr1.16b, vhas_chr2.16b
|
||||
addp vend.2d, vend.2d, vend.2d
|
||||
mov synd, vend.d[0]
|
||||
/* We're not out of data, loop if we haven't found the character */
|
||||
cbz synd, .Lloop
|
||||
|
||||
.Lend:
|
||||
/* Termination condition found, let's calculate the syndrome value */
|
||||
and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
|
||||
and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
|
||||
addp vend.16b, vhas_chr1.16b, vhas_chr2.16b /* 256->128 */
|
||||
addp vend.16b, vend.16b, vend.16b /* 128->64 */
|
||||
mov synd, vend.d[0]
|
||||
/* Only do the clear for the last possible block */
|
||||
b.hi .Ltail
|
||||
|
||||
.Lmasklast:
|
||||
/* Clear the (32 - ((cntrem + soff) % 32)) * 2 upper bits */
|
||||
add tmp, cntrem, soff
|
||||
and tmp, tmp, #31
|
||||
sub tmp, tmp, #32
|
||||
neg tmp, tmp, lsl #1
|
||||
lsl synd, synd, tmp
|
||||
lsr synd, synd, tmp
|
||||
|
||||
.Ltail:
|
||||
/* Count the trailing zeros using bit reversing */
|
||||
rbit synd, synd
|
||||
/* Compensate the last post-increment */
|
||||
sub src, src, #32
|
||||
/* Check that we have found a character */
|
||||
cmp synd, #0
|
||||
/* And count the leading zeros */
|
||||
clz synd, synd
|
||||
/* Compute the potential result */
|
||||
add result, src, synd, lsr #1
|
||||
/* Select result or NULL */
|
||||
csel result, xzr, result, eq
|
||||
ret
|
||||
|
||||
.Lzero_length:
|
||||
mov result, #0
|
||||
ret
|
||||
|
||||
.size memchr, . - memchr
|
||||
@@ -0,0 +1,162 @@
|
||||
/* memcmp - compare memory
|
||||
|
||||
Copyright (c) 2013, Linaro Limited
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the Linaro nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
|
||||
|
||||
/* Assumptions:
|
||||
*
|
||||
* ARMv8-a, AArch64
|
||||
*/
|
||||
|
||||
.macro def_fn f p2align=0
|
||||
.text
|
||||
.p2align \p2align
|
||||
.global \f
|
||||
.type \f, %function
|
||||
\f:
|
||||
.endm
|
||||
|
||||
/* Parameters and result. */
|
||||
#define src1 x0
|
||||
#define src2 x1
|
||||
#define limit x2
|
||||
#define result x0
|
||||
|
||||
/* Internal variables. */
|
||||
#define data1 x3
|
||||
#define data1w w3
|
||||
#define data2 x4
|
||||
#define data2w w4
|
||||
#define has_nul x5
|
||||
#define diff x6
|
||||
#define endloop x7
|
||||
#define tmp1 x8
|
||||
#define tmp2 x9
|
||||
#define tmp3 x10
|
||||
#define pos x11
|
||||
#define limit_wd x12
|
||||
#define mask x13
|
||||
|
||||
def_fn memcmp p2align=6
|
||||
cbz limit, .Lret0
|
||||
eor tmp1, src1, src2
|
||||
tst tmp1, #7
|
||||
b.ne .Lmisaligned8
|
||||
ands tmp1, src1, #7
|
||||
b.ne .Lmutual_align
|
||||
add limit_wd, limit, #7
|
||||
lsr limit_wd, limit_wd, #3
|
||||
/* Start of performance-critical section -- one 64B cache line. */
|
||||
.Lloop_aligned:
|
||||
ldr data1, [src1], #8
|
||||
ldr data2, [src2], #8
|
||||
.Lstart_realigned:
|
||||
subs limit_wd, limit_wd, #1
|
||||
eor diff, data1, data2 /* Non-zero if differences found. */
|
||||
csinv endloop, diff, xzr, ne /* Last Dword or differences. */
|
||||
cbz endloop, .Lloop_aligned
|
||||
/* End of performance-critical section -- one 64B cache line. */
|
||||
|
||||
/* Not reached the limit, must have found a diff. */
|
||||
cbnz limit_wd, .Lnot_limit
|
||||
|
||||
/* Limit % 8 == 0 => all bytes significant. */
|
||||
ands limit, limit, #7
|
||||
b.eq .Lnot_limit
|
||||
|
||||
lsl limit, limit, #3 /* Bits -> bytes. */
|
||||
mov mask, #~0
|
||||
#ifdef __AARCH64EB__
|
||||
lsr mask, mask, limit
|
||||
#else
|
||||
lsl mask, mask, limit
|
||||
#endif
|
||||
bic data1, data1, mask
|
||||
bic data2, data2, mask
|
||||
|
||||
orr diff, diff, mask
|
||||
.Lnot_limit:
|
||||
|
||||
#ifndef __AARCH64EB__
|
||||
rev diff, diff
|
||||
rev data1, data1
|
||||
rev data2, data2
|
||||
#endif
|
||||
/* The MS-non-zero bit of DIFF marks either the first bit
|
||||
that is different, or the end of the significant data.
|
||||
Shifting left now will bring the critical information into the
|
||||
top bits. */
|
||||
clz pos, diff
|
||||
lsl data1, data1, pos
|
||||
lsl data2, data2, pos
|
||||
/* But we need to zero-extend (char is unsigned) the value and then
|
||||
perform a signed 32-bit subtraction. */
|
||||
lsr data1, data1, #56
|
||||
sub result, data1, data2, lsr #56
|
||||
ret
|
||||
|
||||
.Lmutual_align:
|
||||
/* Sources are mutually aligned, but are not currently at an
|
||||
alignment boundary. Round down the addresses and then mask off
|
||||
the bytes that precede the start point. */
|
||||
bic src1, src1, #7
|
||||
bic src2, src2, #7
|
||||
add limit, limit, tmp1 /* Adjust the limit for the extra. */
|
||||
lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */
|
||||
ldr data1, [src1], #8
|
||||
neg tmp1, tmp1 /* Bits to alignment -64. */
|
||||
ldr data2, [src2], #8
|
||||
mov tmp2, #~0
|
||||
#ifdef __AARCH64EB__
|
||||
/* Big-endian. Early bytes are at MSB. */
|
||||
lsl tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
|
||||
#else
|
||||
/* Little-endian. Early bytes are at LSB. */
|
||||
lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
|
||||
#endif
|
||||
add limit_wd, limit, #7
|
||||
orr data1, data1, tmp2
|
||||
orr data2, data2, tmp2
|
||||
lsr limit_wd, limit_wd, #3
|
||||
b .Lstart_realigned
|
||||
|
||||
.Lret0:
|
||||
mov result, #0
|
||||
ret
|
||||
|
||||
.p2align 6
|
||||
.Lmisaligned8:
|
||||
sub limit, limit, #1
|
||||
1:
|
||||
/* Perhaps we can do better than this. */
|
||||
ldrb data1w, [src1], #1
|
||||
ldrb data2w, [src2], #1
|
||||
subs limit, limit, #1
|
||||
ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
|
||||
b.eq 1b
|
||||
sub result, data1, data2
|
||||
ret
|
||||
.size memcmp, . - memcmp
|
||||
@@ -0,0 +1,225 @@
|
||||
/* Copyright (c) 2012, Linaro Limited
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the Linaro nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
|
||||
|
||||
/*
|
||||
* Copyright (c) 2015 ARM Ltd
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the company may not be used to endorse or promote
|
||||
* products derived from this software without specific prior written
|
||||
* permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* Assumptions:
|
||||
*
|
||||
* ARMv8-a, AArch64, unaligned accesses.
|
||||
*
|
||||
*/
|
||||
|
||||
#define dstin x0
|
||||
#define src x1
|
||||
#define count x2
|
||||
#define dst x3
|
||||
#define srcend x4
|
||||
#define dstend x5
|
||||
#define A_l x6
|
||||
#define A_lw w6
|
||||
#define A_h x7
|
||||
#define A_hw w7
|
||||
#define B_l x8
|
||||
#define B_lw w8
|
||||
#define B_h x9
|
||||
#define C_l x10
|
||||
#define C_h x11
|
||||
#define D_l x12
|
||||
#define D_h x13
|
||||
#define E_l src
|
||||
#define E_h count
|
||||
#define F_l srcend
|
||||
#define F_h dst
|
||||
#define tmp1 x9
|
||||
|
||||
#define L(l) .L ## l
|
||||
|
||||
.macro def_fn f p2align=0
|
||||
.text
|
||||
.p2align \p2align
|
||||
.global \f
|
||||
.type \f, %function
|
||||
\f:
|
||||
.endm
|
||||
|
||||
/* Copies are split into 3 main cases: small copies of up to 16 bytes,
|
||||
medium copies of 17..96 bytes which are fully unrolled. Large copies
|
||||
of more than 96 bytes align the destination and use an unrolled loop
|
||||
processing 64 bytes per iteration.
|
||||
Small and medium copies read all data before writing, allowing any
|
||||
kind of overlap, and memmove tailcalls memcpy for these cases as
|
||||
well as non-overlapping copies.
|
||||
*/
|
||||
|
||||
def_fn memcpy p2align=6
|
||||
prfm PLDL1KEEP, [src]
|
||||
add srcend, src, count
|
||||
add dstend, dstin, count
|
||||
cmp count, 16
|
||||
b.ls L(copy16)
|
||||
cmp count, 96
|
||||
b.hi L(copy_long)
|
||||
|
||||
/* Medium copies: 17..96 bytes. */
|
||||
sub tmp1, count, 1
|
||||
ldp A_l, A_h, [src]
|
||||
tbnz tmp1, 6, L(copy96)
|
||||
ldp D_l, D_h, [srcend, -16]
|
||||
tbz tmp1, 5, 1f
|
||||
ldp B_l, B_h, [src, 16]
|
||||
ldp C_l, C_h, [srcend, -32]
|
||||
stp B_l, B_h, [dstin, 16]
|
||||
stp C_l, C_h, [dstend, -32]
|
||||
1:
|
||||
stp A_l, A_h, [dstin]
|
||||
stp D_l, D_h, [dstend, -16]
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
/* Small copies: 0..16 bytes. */
|
||||
L(copy16):
|
||||
cmp count, 8
|
||||
b.lo 1f
|
||||
ldr A_l, [src]
|
||||
ldr A_h, [srcend, -8]
|
||||
str A_l, [dstin]
|
||||
str A_h, [dstend, -8]
|
||||
ret
|
||||
.p2align 4
|
||||
1:
|
||||
tbz count, 2, 1f
|
||||
ldr A_lw, [src]
|
||||
ldr A_hw, [srcend, -4]
|
||||
str A_lw, [dstin]
|
||||
str A_hw, [dstend, -4]
|
||||
ret
|
||||
|
||||
/* Copy 0..3 bytes. Use a branchless sequence that copies the same
|
||||
byte 3 times if count==1, or the 2nd byte twice if count==2. */
|
||||
1:
|
||||
cbz count, 2f
|
||||
lsr tmp1, count, 1
|
||||
ldrb A_lw, [src]
|
||||
ldrb A_hw, [srcend, -1]
|
||||
ldrb B_lw, [src, tmp1]
|
||||
strb A_lw, [dstin]
|
||||
strb B_lw, [dstin, tmp1]
|
||||
strb A_hw, [dstend, -1]
|
||||
2: ret
|
||||
|
||||
.p2align 4
|
||||
/* Copy 64..96 bytes. Copy 64 bytes from the start and
|
||||
32 bytes from the end. */
|
||||
L(copy96):
|
||||
ldp B_l, B_h, [src, 16]
|
||||
ldp C_l, C_h, [src, 32]
|
||||
ldp D_l, D_h, [src, 48]
|
||||
ldp E_l, E_h, [srcend, -32]
|
||||
ldp F_l, F_h, [srcend, -16]
|
||||
stp A_l, A_h, [dstin]
|
||||
stp B_l, B_h, [dstin, 16]
|
||||
stp C_l, C_h, [dstin, 32]
|
||||
stp D_l, D_h, [dstin, 48]
|
||||
stp E_l, E_h, [dstend, -32]
|
||||
stp F_l, F_h, [dstend, -16]
|
||||
ret
|
||||
|
||||
/* Align DST to 16 byte alignment so that we don't cross cache line
|
||||
boundaries on both loads and stores. There are at least 96 bytes
|
||||
to copy, so copy 16 bytes unaligned and then align. The loop
|
||||
copies 64 bytes per iteration and prefetches one iteration ahead. */
|
||||
|
||||
.p2align 4
|
||||
L(copy_long):
|
||||
and tmp1, dstin, 15
|
||||
bic dst, dstin, 15
|
||||
ldp D_l, D_h, [src]
|
||||
sub src, src, tmp1
|
||||
add count, count, tmp1 /* Count is now 16 too large. */
|
||||
ldp A_l, A_h, [src, 16]
|
||||
stp D_l, D_h, [dstin]
|
||||
ldp B_l, B_h, [src, 32]
|
||||
ldp C_l, C_h, [src, 48]
|
||||
ldp D_l, D_h, [src, 64]!
|
||||
subs count, count, 128 + 16 /* Test and readjust count. */
|
||||
b.ls 2f
|
||||
1:
|
||||
stp A_l, A_h, [dst, 16]
|
||||
ldp A_l, A_h, [src, 16]
|
||||
stp B_l, B_h, [dst, 32]
|
||||
ldp B_l, B_h, [src, 32]
|
||||
stp C_l, C_h, [dst, 48]
|
||||
ldp C_l, C_h, [src, 48]
|
||||
stp D_l, D_h, [dst, 64]!
|
||||
ldp D_l, D_h, [src, 64]!
|
||||
subs count, count, 64
|
||||
b.hi 1b
|
||||
|
||||
/* Write the last full set of 64 bytes. The remainder is at most 64
|
||||
bytes, so it is safe to always copy 64 bytes from the end even if
|
||||
there is just 1 byte left. */
|
||||
2:
|
||||
ldp E_l, E_h, [srcend, -64]
|
||||
stp A_l, A_h, [dst, 16]
|
||||
ldp A_l, A_h, [srcend, -48]
|
||||
stp B_l, B_h, [dst, 32]
|
||||
ldp B_l, B_h, [srcend, -32]
|
||||
stp C_l, C_h, [dst, 48]
|
||||
ldp C_l, C_h, [srcend, -16]
|
||||
stp D_l, D_h, [dst, 64]
|
||||
stp E_l, E_h, [dstend, -64]
|
||||
stp A_l, A_h, [dstend, -48]
|
||||
stp B_l, B_h, [dstend, -32]
|
||||
stp C_l, C_h, [dstend, -16]
|
||||
ret
|
||||
|
||||
.size memcpy, . - memcpy
|
||||
@@ -0,0 +1,150 @@
|
||||
/* Copyright (c) 2013, Linaro Limited
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the Linaro nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
|
||||
|
||||
/*
|
||||
* Copyright (c) 2015 ARM Ltd
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the company may not be used to endorse or promote
|
||||
* products derived from this software without specific prior written
|
||||
* permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* Assumptions:
|
||||
*
|
||||
* ARMv8-a, AArch64, unaligned accesses
|
||||
*/
|
||||
|
||||
.macro def_fn f p2align=0
|
||||
.text
|
||||
.p2align \p2align
|
||||
.global \f
|
||||
.type \f, %function
|
||||
\f:
|
||||
.endm
|
||||
|
||||
/* Parameters and result. */
|
||||
#define dstin x0
|
||||
#define src x1
|
||||
#define count x2
|
||||
#define srcend x3
|
||||
#define dstend x4
|
||||
#define tmp1 x5
|
||||
#define A_l x6
|
||||
#define A_h x7
|
||||
#define B_l x8
|
||||
#define B_h x9
|
||||
#define C_l x10
|
||||
#define C_h x11
|
||||
#define D_l x12
|
||||
#define D_h x13
|
||||
#define E_l count
|
||||
#define E_h tmp1
|
||||
|
||||
/* All memmoves up to 96 bytes are done by memcpy as it supports overlaps.
|
||||
Larger backwards copies are also handled by memcpy. The only remaining
|
||||
case is forward large copies. The destination is aligned, and an
|
||||
unrolled loop processes 64 bytes per iteration.
|
||||
*/
|
||||
|
||||
def_fn memmove, 6
|
||||
sub tmp1, dstin, src
|
||||
cmp count, 96
|
||||
ccmp tmp1, count, 2, hi
|
||||
b.hs memcpy
|
||||
|
||||
cbz tmp1, 3f
|
||||
add dstend, dstin, count
|
||||
add srcend, src, count
|
||||
|
||||
/* Align dstend to 16 byte alignment so that we don't cross cache line
|
||||
boundaries on both loads and stores. There are at least 96 bytes
|
||||
to copy, so copy 16 bytes unaligned and then align. The loop
|
||||
copies 64 bytes per iteration and prefetches one iteration ahead. */
|
||||
|
||||
and tmp1, dstend, 15
|
||||
ldp D_l, D_h, [srcend, -16]
|
||||
sub srcend, srcend, tmp1
|
||||
sub count, count, tmp1
|
||||
ldp A_l, A_h, [srcend, -16]
|
||||
stp D_l, D_h, [dstend, -16]
|
||||
ldp B_l, B_h, [srcend, -32]
|
||||
ldp C_l, C_h, [srcend, -48]
|
||||
ldp D_l, D_h, [srcend, -64]!
|
||||
sub dstend, dstend, tmp1
|
||||
subs count, count, 128
|
||||
b.ls 2f
|
||||
nop
|
||||
1:
|
||||
stp A_l, A_h, [dstend, -16]
|
||||
ldp A_l, A_h, [srcend, -16]
|
||||
stp B_l, B_h, [dstend, -32]
|
||||
ldp B_l, B_h, [srcend, -32]
|
||||
stp C_l, C_h, [dstend, -48]
|
||||
ldp C_l, C_h, [srcend, -48]
|
||||
stp D_l, D_h, [dstend, -64]!
|
||||
ldp D_l, D_h, [srcend, -64]!
|
||||
subs count, count, 64
|
||||
b.hi 1b
|
||||
|
||||
/* Write the last full set of 64 bytes. The remainder is at most 64
|
||||
bytes, so it is safe to always copy 64 bytes from the start even if
|
||||
there is just 1 byte left. */
|
||||
2:
|
||||
ldp E_l, E_h, [src, 48]
|
||||
stp A_l, A_h, [dstend, -16]
|
||||
ldp A_l, A_h, [src, 32]
|
||||
stp B_l, B_h, [dstend, -32]
|
||||
ldp B_l, B_h, [src, 16]
|
||||
stp C_l, C_h, [dstend, -48]
|
||||
ldp C_l, C_h, [src]
|
||||
stp D_l, D_h, [dstend, -64]
|
||||
stp E_l, E_h, [dstin, 48]
|
||||
stp A_l, A_h, [dstin, 32]
|
||||
stp B_l, B_h, [dstin, 16]
|
||||
stp C_l, C_h, [dstin]
|
||||
3: ret
|
||||
|
||||
.size memmove, . - memmove
|
||||
@@ -0,0 +1,235 @@
|
||||
/* Copyright (c) 2012, Linaro Limited
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the Linaro nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
|
||||
|
||||
/*
|
||||
* Copyright (c) 2015 ARM Ltd
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the company may not be used to endorse or promote
|
||||
* products derived from this software without specific prior written
|
||||
* permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* Assumptions:
|
||||
*
|
||||
* ARMv8-a, AArch64, unaligned accesses
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#define dstin x0
|
||||
#define val x1
|
||||
#define valw w1
|
||||
#define count x2
|
||||
#define dst x3
|
||||
#define dstend x4
|
||||
#define tmp1 x5
|
||||
#define tmp1w w5
|
||||
#define tmp2 x6
|
||||
#define tmp2w w6
|
||||
#define zva_len x7
|
||||
#define zva_lenw w7
|
||||
|
||||
#define L(l) .L ## l
|
||||
|
||||
.macro def_fn f p2align=0
|
||||
.text
|
||||
.p2align \p2align
|
||||
.global \f
|
||||
.type \f, %function
|
||||
\f:
|
||||
.endm
|
||||
|
||||
def_fn memset p2align=6
|
||||
|
||||
dup v0.16B, valw
|
||||
add dstend, dstin, count
|
||||
|
||||
cmp count, 96
|
||||
b.hi L(set_long)
|
||||
cmp count, 16
|
||||
b.hs L(set_medium)
|
||||
mov val, v0.D[0]
|
||||
|
||||
/* Set 0..15 bytes. */
|
||||
tbz count, 3, 1f
|
||||
str val, [dstin]
|
||||
str val, [dstend, -8]
|
||||
ret
|
||||
nop
|
||||
1: tbz count, 2, 2f
|
||||
str valw, [dstin]
|
||||
str valw, [dstend, -4]
|
||||
ret
|
||||
2: cbz count, 3f
|
||||
strb valw, [dstin]
|
||||
tbz count, 1, 3f
|
||||
strh valw, [dstend, -2]
|
||||
3: ret
|
||||
|
||||
/* Set 17..96 bytes. */
|
||||
L(set_medium):
|
||||
str q0, [dstin]
|
||||
tbnz count, 6, L(set96)
|
||||
str q0, [dstend, -16]
|
||||
tbz count, 5, 1f
|
||||
str q0, [dstin, 16]
|
||||
str q0, [dstend, -32]
|
||||
1: ret
|
||||
|
||||
.p2align 4
|
||||
/* Set 64..96 bytes. Write 64 bytes from the start and
|
||||
32 bytes from the end. */
|
||||
L(set96):
|
||||
str q0, [dstin, 16]
|
||||
stp q0, q0, [dstin, 32]
|
||||
stp q0, q0, [dstend, -32]
|
||||
ret
|
||||
|
||||
.p2align 3
|
||||
nop
|
||||
L(set_long):
|
||||
and valw, valw, 255
|
||||
bic dst, dstin, 15
|
||||
str q0, [dstin]
|
||||
cmp count, 256
|
||||
ccmp valw, 0, 0, cs
|
||||
b.eq L(try_zva)
|
||||
L(no_zva):
|
||||
sub count, dstend, dst /* Count is 16 too large. */
|
||||
add dst, dst, 16
|
||||
sub count, count, 64 + 16 /* Adjust count and bias for loop. */
|
||||
1: stp q0, q0, [dst], 64
|
||||
stp q0, q0, [dst, -32]
|
||||
L(tail64):
|
||||
subs count, count, 64
|
||||
b.hi 1b
|
||||
2: stp q0, q0, [dstend, -64]
|
||||
stp q0, q0, [dstend, -32]
|
||||
ret
|
||||
|
||||
.p2align 3
|
||||
L(try_zva):
|
||||
mrs tmp1, dczid_el0
|
||||
tbnz tmp1w, 4, L(no_zva)
|
||||
and tmp1w, tmp1w, 15
|
||||
cmp tmp1w, 4 /* ZVA size is 64 bytes. */
|
||||
b.ne L(zva_128)
|
||||
|
||||
/* Write the first and last 64 byte aligned block using stp rather
|
||||
than using DC ZVA. This is faster on some cores.
|
||||
*/
|
||||
L(zva_64):
|
||||
str q0, [dst, 16]
|
||||
stp q0, q0, [dst, 32]
|
||||
bic dst, dst, 63
|
||||
stp q0, q0, [dst, 64]
|
||||
stp q0, q0, [dst, 96]
|
||||
sub count, dstend, dst /* Count is now 128 too large. */
|
||||
sub count, count, 128+64+64 /* Adjust count and bias for loop. */
|
||||
add dst, dst, 128
|
||||
nop
|
||||
1: dc zva, dst
|
||||
add dst, dst, 64
|
||||
subs count, count, 64
|
||||
b.hi 1b
|
||||
stp q0, q0, [dst, 0]
|
||||
stp q0, q0, [dst, 32]
|
||||
stp q0, q0, [dstend, -64]
|
||||
stp q0, q0, [dstend, -32]
|
||||
ret
|
||||
|
||||
.p2align 3
|
||||
L(zva_128):
|
||||
cmp tmp1w, 5 /* ZVA size is 128 bytes. */
|
||||
b.ne L(zva_other)
|
||||
|
||||
str q0, [dst, 16]
|
||||
stp q0, q0, [dst, 32]
|
||||
stp q0, q0, [dst, 64]
|
||||
stp q0, q0, [dst, 96]
|
||||
bic dst, dst, 127
|
||||
sub count, dstend, dst /* Count is now 128 too large. */
|
||||
sub count, count, 128+128 /* Adjust count and bias for loop. */
|
||||
add dst, dst, 128
|
||||
1: dc zva, dst
|
||||
add dst, dst, 128
|
||||
subs count, count, 128
|
||||
b.hi 1b
|
||||
stp q0, q0, [dstend, -128]
|
||||
stp q0, q0, [dstend, -96]
|
||||
stp q0, q0, [dstend, -64]
|
||||
stp q0, q0, [dstend, -32]
|
||||
ret
|
||||
|
||||
L(zva_other):
|
||||
mov tmp2w, 4
|
||||
lsl zva_lenw, tmp2w, tmp1w
|
||||
add tmp1, zva_len, 64 /* Max alignment bytes written. */
|
||||
cmp count, tmp1
|
||||
blo L(no_zva)
|
||||
|
||||
sub tmp2, zva_len, 1
|
||||
add tmp1, dst, zva_len
|
||||
add dst, dst, 16
|
||||
subs count, tmp1, dst /* Actual alignment bytes to write. */
|
||||
bic tmp1, tmp1, tmp2 /* Aligned dc zva start address. */
|
||||
beq 2f
|
||||
1: stp q0, q0, [dst], 64
|
||||
stp q0, q0, [dst, -32]
|
||||
subs count, count, 64
|
||||
b.hi 1b
|
||||
2: mov dst, tmp1
|
||||
sub count, dstend, tmp1 /* Remaining bytes to write. */
|
||||
subs count, count, zva_len
|
||||
b.lo 4f
|
||||
3: dc zva, dst
|
||||
add dst, dst, zva_len
|
||||
subs count, count, zva_len
|
||||
b.hs 3b
|
||||
4: add count, count, zva_len
|
||||
b L(tail64)
|
||||
|
||||
.size memset, . - memset
|
||||
@@ -0,0 +1,165 @@
|
||||
/*
|
||||
strchr - find a character in a string
|
||||
|
||||
Copyright (c) 2014, ARM Limited
|
||||
All rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the company nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
|
||||
|
||||
/* Assumptions:
|
||||
*
|
||||
* ARMv8-a, AArch64
|
||||
* Neon Available.
|
||||
*/
|
||||
|
||||
/* Arguments and results. */
|
||||
#define srcin x0
|
||||
#define chrin w1
|
||||
|
||||
#define result x0
|
||||
|
||||
#define src x2
|
||||
#define tmp1 x3
|
||||
#define wtmp2 w4
|
||||
#define tmp3 x5
|
||||
|
||||
#define vrepchr v0
|
||||
#define vdata1 v1
|
||||
#define vdata2 v2
|
||||
#define vhas_nul1 v3
|
||||
#define vhas_nul2 v4
|
||||
#define vhas_chr1 v5
|
||||
#define vhas_chr2 v6
|
||||
#define vrepmask_0 v7
|
||||
#define vrepmask_c v16
|
||||
#define vend1 v17
|
||||
#define vend2 v18
|
||||
|
||||
/* Core algorithm.
|
||||
|
||||
For each 32-byte hunk we calculate a 64-bit syndrome value, with
|
||||
two bits per byte (LSB is always in bits 0 and 1, for both big
|
||||
and little-endian systems). For each tuple, bit 0 is set iff
|
||||
the relevant byte matched the requested character; bit 1 is set
|
||||
iff the relevant byte matched the NUL end of string (we trigger
|
||||
off bit0 for the special case of looking for NUL). Since the bits
|
||||
in the syndrome reflect exactly the order in which things occur
|
||||
in the original string a count_trailing_zeros() operation will
|
||||
identify exactly which byte is causing the termination, and why. */
|
||||
|
||||
/* Locals and temporaries. */
|
||||
|
||||
.macro def_fn f p2align=0
|
||||
.text
|
||||
.p2align \p2align
|
||||
.global \f
|
||||
.type \f, %function
|
||||
\f:
|
||||
.endm
|
||||
|
||||
.macro def_alias f a
|
||||
.weak \a
|
||||
.set \a,\f
|
||||
.endm
|
||||
|
||||
def_fn strchr
|
||||
def_alias strchr index
|
||||
/* Magic constant 0x40100401 to allow us to identify which lane
|
||||
matches the requested byte. Magic constant 0x80200802 used
|
||||
similarly for NUL termination. */
|
||||
mov wtmp2, #0x0401
|
||||
movk wtmp2, #0x4010, lsl #16
|
||||
dup vrepchr.16b, chrin
|
||||
bic src, srcin, #31 /* Work with aligned 32-byte hunks. */
|
||||
dup vrepmask_c.4s, wtmp2
|
||||
ands tmp1, srcin, #31
|
||||
add vrepmask_0.4s, vrepmask_c.4s, vrepmask_c.4s /* equiv: lsl #1 */
|
||||
b.eq .Lloop
|
||||
|
||||
/* Input string is not 32-byte aligned. Rather than forcing
|
||||
the padding bytes to a safe value, we calculate the syndrome
|
||||
for all the bytes, but then mask off those bits of the
|
||||
syndrome that are related to the padding. */
|
||||
ld1 {vdata1.16b, vdata2.16b}, [src], #32
|
||||
neg tmp1, tmp1
|
||||
cmeq vhas_nul1.16b, vdata1.16b, #0
|
||||
cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
|
||||
cmeq vhas_nul2.16b, vdata2.16b, #0
|
||||
cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
|
||||
and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b
|
||||
and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b
|
||||
and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b
|
||||
and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b
|
||||
orr vend1.16b, vhas_nul1.16b, vhas_chr1.16b
|
||||
orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b
|
||||
lsl tmp1, tmp1, #1
|
||||
addp vend1.16b, vend1.16b, vend2.16b // 256->128
|
||||
mov tmp3, #~0
|
||||
addp vend1.16b, vend1.16b, vend2.16b // 128->64
|
||||
lsr tmp1, tmp3, tmp1
|
||||
|
||||
mov tmp3, vend1.d[0]
|
||||
bic tmp1, tmp3, tmp1 // Mask padding bits.
|
||||
cbnz tmp1, .Ltail
|
||||
|
||||
.Lloop:
|
||||
ld1 {vdata1.16b, vdata2.16b}, [src], #32
|
||||
cmeq vhas_nul1.16b, vdata1.16b, #0
|
||||
cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
|
||||
cmeq vhas_nul2.16b, vdata2.16b, #0
|
||||
cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
|
||||
/* Use a fast check for the termination condition. */
|
||||
orr vend1.16b, vhas_nul1.16b, vhas_chr1.16b
|
||||
orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b
|
||||
orr vend1.16b, vend1.16b, vend2.16b
|
||||
addp vend1.2d, vend1.2d, vend1.2d
|
||||
mov tmp1, vend1.d[0]
|
||||
cbz tmp1, .Lloop
|
||||
|
||||
/* Termination condition found. Now need to establish exactly why
|
||||
we terminated. */
|
||||
and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b
|
||||
and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b
|
||||
and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b
|
||||
and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b
|
||||
orr vend1.16b, vhas_nul1.16b, vhas_chr1.16b
|
||||
orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b
|
||||
addp vend1.16b, vend1.16b, vend2.16b // 256->128
|
||||
addp vend1.16b, vend1.16b, vend2.16b // 128->64
|
||||
|
||||
mov tmp1, vend1.d[0]
|
||||
.Ltail:
|
||||
/* Count the trailing zeros, by bit reversing... */
|
||||
rbit tmp1, tmp1
|
||||
/* Re-bias source. */
|
||||
sub src, src, #32
|
||||
clz tmp1, tmp1 /* And counting the leading zeros. */
|
||||
/* Tmp1 is even if the target charager was found first. Otherwise
|
||||
we've found the end of string and we weren't looking for NUL. */
|
||||
tst tmp1, #1
|
||||
add result, src, tmp1, lsr #1
|
||||
csel result, result, xzr, eq
|
||||
ret
|
||||
|
||||
.size strchr, . - strchr
|
||||
@@ -0,0 +1,144 @@
|
||||
/*
|
||||
strchrnul - find a character or nul in a string
|
||||
|
||||
Copyright (c) 2014, ARM Limited
|
||||
All rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the company nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
|
||||
|
||||
/* Assumptions:
|
||||
*
|
||||
* ARMv8-a, AArch64
|
||||
* Neon Available.
|
||||
*/
|
||||
|
||||
/* Arguments and results. */
|
||||
#define srcin x0
|
||||
#define chrin w1
|
||||
|
||||
#define result x0
|
||||
|
||||
#define src x2
|
||||
#define tmp1 x3
|
||||
#define wtmp2 w4
|
||||
#define tmp3 x5
|
||||
|
||||
#define vrepchr v0
|
||||
#define vdata1 v1
|
||||
#define vdata2 v2
|
||||
#define vhas_nul1 v3
|
||||
#define vhas_nul2 v4
|
||||
#define vhas_chr1 v5
|
||||
#define vhas_chr2 v6
|
||||
#define vrepmask v7
|
||||
#define vend1 v16
|
||||
|
||||
/* Core algorithm.
|
||||
|
||||
For each 32-byte hunk we calculate a 64-bit syndrome value, with
|
||||
two bits per byte (LSB is always in bits 0 and 1, for both big
|
||||
and little-endian systems). For each tuple, bit 0 is set iff
|
||||
the relevant byte matched the requested character or nul. Since the
|
||||
bits in the syndrome reflect exactly the order in which things occur
|
||||
in the original string a count_trailing_zeros() operation will
|
||||
identify exactly which byte is causing the termination. */
|
||||
|
||||
/* Locals and temporaries. */
|
||||
|
||||
.macro def_fn f p2align=0
|
||||
.text
|
||||
.p2align \p2align
|
||||
.global \f
|
||||
.type \f, %function
|
||||
\f:
|
||||
.endm
|
||||
|
||||
def_fn strchrnul
|
||||
/* Magic constant 0x40100401 to allow us to identify which lane
|
||||
matches the termination condition. */
|
||||
mov wtmp2, #0x0401
|
||||
movk wtmp2, #0x4010, lsl #16
|
||||
dup vrepchr.16b, chrin
|
||||
bic src, srcin, #31 /* Work with aligned 32-byte hunks. */
|
||||
dup vrepmask.4s, wtmp2
|
||||
ands tmp1, srcin, #31
|
||||
b.eq .Lloop
|
||||
|
||||
/* Input string is not 32-byte aligned. Rather than forcing
|
||||
the padding bytes to a safe value, we calculate the syndrome
|
||||
for all the bytes, but then mask off those bits of the
|
||||
syndrome that are related to the padding. */
|
||||
ld1 {vdata1.16b, vdata2.16b}, [src], #32
|
||||
neg tmp1, tmp1
|
||||
cmeq vhas_nul1.16b, vdata1.16b, #0
|
||||
cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
|
||||
cmeq vhas_nul2.16b, vdata2.16b, #0
|
||||
cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
|
||||
orr vhas_chr1.16b, vhas_chr1.16b, vhas_nul1.16b
|
||||
orr vhas_chr2.16b, vhas_chr2.16b, vhas_nul2.16b
|
||||
and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
|
||||
and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
|
||||
lsl tmp1, tmp1, #1
|
||||
addp vend1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128
|
||||
mov tmp3, #~0
|
||||
addp vend1.16b, vend1.16b, vend1.16b // 128->64
|
||||
lsr tmp1, tmp3, tmp1
|
||||
|
||||
mov tmp3, vend1.d[0]
|
||||
bic tmp1, tmp3, tmp1 // Mask padding bits.
|
||||
cbnz tmp1, .Ltail
|
||||
|
||||
.Lloop:
|
||||
ld1 {vdata1.16b, vdata2.16b}, [src], #32
|
||||
cmeq vhas_nul1.16b, vdata1.16b, #0
|
||||
cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
|
||||
cmeq vhas_nul2.16b, vdata2.16b, #0
|
||||
cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
|
||||
/* Use a fast check for the termination condition. */
|
||||
orr vhas_chr1.16b, vhas_nul1.16b, vhas_chr1.16b
|
||||
orr vhas_chr2.16b, vhas_nul2.16b, vhas_chr2.16b
|
||||
orr vend1.16b, vhas_chr1.16b, vhas_chr2.16b
|
||||
addp vend1.2d, vend1.2d, vend1.2d
|
||||
mov tmp1, vend1.d[0]
|
||||
cbz tmp1, .Lloop
|
||||
|
||||
/* Termination condition found. Now need to establish exactly why
|
||||
we terminated. */
|
||||
and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
|
||||
and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
|
||||
addp vend1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128
|
||||
addp vend1.16b, vend1.16b, vend1.16b // 128->64
|
||||
|
||||
mov tmp1, vend1.d[0]
|
||||
.Ltail:
|
||||
/* Count the trailing zeros, by bit reversing... */
|
||||
rbit tmp1, tmp1
|
||||
/* Re-bias source. */
|
||||
sub src, src, #32
|
||||
clz tmp1, tmp1 /* ... and counting the leading zeros. */
|
||||
/* tmp1 is twice the offset into the fragment. */
|
||||
add result, src, tmp1, lsr #1
|
||||
ret
|
||||
|
||||
.size strchrnul, . - strchrnul
|
||||
@@ -0,0 +1,166 @@
|
||||
/* Copyright (c) 2012, Linaro Limited
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the Linaro nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
|
||||
|
||||
/* Assumptions:
|
||||
*
|
||||
* ARMv8-a, AArch64
|
||||
*/
|
||||
|
||||
.macro def_fn f p2align=0
|
||||
.text
|
||||
.p2align \p2align
|
||||
.global \f
|
||||
.type \f, %function
|
||||
\f:
|
||||
.endm
|
||||
|
||||
#define REP8_01 0x0101010101010101
|
||||
#define REP8_7f 0x7f7f7f7f7f7f7f7f
|
||||
#define REP8_80 0x8080808080808080
|
||||
|
||||
/* Parameters and result. */
|
||||
#define src1 x0
|
||||
#define src2 x1
|
||||
#define result x0
|
||||
|
||||
/* Internal variables. */
|
||||
#define data1 x2
|
||||
#define data1w w2
|
||||
#define data2 x3
|
||||
#define data2w w3
|
||||
#define has_nul x4
|
||||
#define diff x5
|
||||
#define syndrome x6
|
||||
#define tmp1 x7
|
||||
#define tmp2 x8
|
||||
#define tmp3 x9
|
||||
#define zeroones x10
|
||||
#define pos x11
|
||||
|
||||
/* Start of performance-critical section -- one 64B cache line. */
|
||||
def_fn strcmp p2align=6
|
||||
eor tmp1, src1, src2
|
||||
mov zeroones, #REP8_01
|
||||
tst tmp1, #7
|
||||
b.ne .Lmisaligned8
|
||||
ands tmp1, src1, #7
|
||||
b.ne .Lmutual_align
|
||||
/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
|
||||
(=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
|
||||
can be done in parallel across the entire word. */
|
||||
.Lloop_aligned:
|
||||
ldr data1, [src1], #8
|
||||
ldr data2, [src2], #8
|
||||
.Lstart_realigned:
|
||||
sub tmp1, data1, zeroones
|
||||
orr tmp2, data1, #REP8_7f
|
||||
eor diff, data1, data2 /* Non-zero if differences found. */
|
||||
bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
|
||||
orr syndrome, diff, has_nul
|
||||
cbz syndrome, .Lloop_aligned
|
||||
/* End of performance-critical section -- one 64B cache line. */
|
||||
|
||||
#ifndef __AARCH64EB__
|
||||
rev syndrome, syndrome
|
||||
rev data1, data1
|
||||
/* The MS-non-zero bit of the syndrome marks either the first bit
|
||||
that is different, or the top bit of the first zero byte.
|
||||
Shifting left now will bring the critical information into the
|
||||
top bits. */
|
||||
clz pos, syndrome
|
||||
rev data2, data2
|
||||
lsl data1, data1, pos
|
||||
lsl data2, data2, pos
|
||||
/* But we need to zero-extend (char is unsigned) the value and then
|
||||
perform a signed 32-bit subtraction. */
|
||||
lsr data1, data1, #56
|
||||
sub result, data1, data2, lsr #56
|
||||
ret
|
||||
#else
|
||||
/* For big-endian we cannot use the trick with the syndrome value
|
||||
as carry-propagation can corrupt the upper bits if the trailing
|
||||
bytes in the string contain 0x01. */
|
||||
/* However, if there is no NUL byte in the dword, we can generate
|
||||
the result directly. We can't just subtract the bytes as the
|
||||
MSB might be significant. */
|
||||
cbnz has_nul, 1f
|
||||
cmp data1, data2
|
||||
cset result, ne
|
||||
cneg result, result, lo
|
||||
ret
|
||||
1:
|
||||
/* Re-compute the NUL-byte detection, using a byte-reversed value. */
|
||||
rev tmp3, data1
|
||||
sub tmp1, tmp3, zeroones
|
||||
orr tmp2, tmp3, #REP8_7f
|
||||
bic has_nul, tmp1, tmp2
|
||||
rev has_nul, has_nul
|
||||
orr syndrome, diff, has_nul
|
||||
clz pos, syndrome
|
||||
/* The MS-non-zero bit of the syndrome marks either the first bit
|
||||
that is different, or the top bit of the first zero byte.
|
||||
Shifting left now will bring the critical information into the
|
||||
top bits. */
|
||||
lsl data1, data1, pos
|
||||
lsl data2, data2, pos
|
||||
/* But we need to zero-extend (char is unsigned) the value and then
|
||||
perform a signed 32-bit subtraction. */
|
||||
lsr data1, data1, #56
|
||||
sub result, data1, data2, lsr #56
|
||||
ret
|
||||
#endif
|
||||
|
||||
.Lmutual_align:
|
||||
/* Sources are mutually aligned, but are not currently at an
|
||||
alignment boundary. Round down the addresses and then mask off
|
||||
the bytes that preceed the start point. */
|
||||
bic src1, src1, #7
|
||||
bic src2, src2, #7
|
||||
lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */
|
||||
ldr data1, [src1], #8
|
||||
neg tmp1, tmp1 /* Bits to alignment -64. */
|
||||
ldr data2, [src2], #8
|
||||
mov tmp2, #~0
|
||||
#ifdef __AARCH64EB__
|
||||
/* Big-endian. Early bytes are at MSB. */
|
||||
lsl tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
|
||||
#else
|
||||
/* Little-endian. Early bytes are at LSB. */
|
||||
lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
|
||||
#endif
|
||||
orr data1, data1, tmp2
|
||||
orr data2, data2, tmp2
|
||||
b .Lstart_realigned
|
||||
|
||||
.Lmisaligned8:
|
||||
/* We can do better than this. */
|
||||
ldrb data1w, [src1], #1
|
||||
ldrb data2w, [src2], #1
|
||||
cmp data1w, #1
|
||||
ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
|
||||
b.eq .Lmisaligned8
|
||||
sub result, data1, data2
|
||||
ret
|
||||
@@ -0,0 +1,336 @@
|
||||
/*
|
||||
strcpy/stpcpy - copy a string returning pointer to start/end.
|
||||
|
||||
Copyright (c) 2013, 2014, 2015 ARM Ltd.
|
||||
All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the company nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
|
||||
|
||||
/* Assumptions:
|
||||
*
|
||||
* ARMv8-a, AArch64, unaligned accesses, min page size 4k.
|
||||
*/
|
||||
|
||||
/* To build as stpcpy, define BUILD_STPCPY before compiling this file.
|
||||
|
||||
To test the page crossing code path more thoroughly, compile with
|
||||
-DSTRCPY_TEST_PAGE_CROSS - this will force all copies through the slower
|
||||
entry path. This option is not intended for production use. */
|
||||
|
||||
/* Arguments and results. */
|
||||
#define dstin x0
|
||||
#define srcin x1
|
||||
|
||||
/* Locals and temporaries. */
|
||||
#define src x2
|
||||
#define dst x3
|
||||
#define data1 x4
|
||||
#define data1w w4
|
||||
#define data2 x5
|
||||
#define data2w w5
|
||||
#define has_nul1 x6
|
||||
#define has_nul2 x7
|
||||
#define tmp1 x8
|
||||
#define tmp2 x9
|
||||
#define tmp3 x10
|
||||
#define tmp4 x11
|
||||
#define zeroones x12
|
||||
#define data1a x13
|
||||
#define data2a x14
|
||||
#define pos x15
|
||||
#define len x16
|
||||
#define to_align x17
|
||||
|
||||
#ifdef BUILD_STPCPY
|
||||
#define STRCPY stpcpy
|
||||
#else
|
||||
#define STRCPY strcpy
|
||||
#endif
|
||||
|
||||
.macro def_fn f p2align=0
|
||||
.text
|
||||
.p2align \p2align
|
||||
.global \f
|
||||
.type \f, %function
|
||||
\f:
|
||||
.endm
|
||||
|
||||
/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
|
||||
(=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
|
||||
can be done in parallel across the entire word. */
|
||||
|
||||
#define REP8_01 0x0101010101010101
|
||||
#define REP8_7f 0x7f7f7f7f7f7f7f7f
|
||||
#define REP8_80 0x8080808080808080
|
||||
|
||||
/* AArch64 systems have a minimum page size of 4k. We can do a quick
|
||||
page size check for crossing this boundary on entry and if we
|
||||
do not, then we can short-circuit much of the entry code. We
|
||||
expect early page-crossing strings to be rare (probability of
|
||||
16/MIN_PAGE_SIZE ~= 0.4%), so the branch should be quite
|
||||
predictable, even with random strings.
|
||||
|
||||
We don't bother checking for larger page sizes, the cost of setting
|
||||
up the correct page size is just not worth the extra gain from
|
||||
a small reduction in the cases taking the slow path. Note that
|
||||
we only care about whether the first fetch, which may be
|
||||
misaligned, crosses a page boundary - after that we move to aligned
|
||||
fetches for the remainder of the string. */
|
||||
|
||||
#ifdef STRCPY_TEST_PAGE_CROSS
|
||||
/* Make everything that isn't Qword aligned look like a page cross. */
|
||||
#define MIN_PAGE_P2 4
|
||||
#else
|
||||
#define MIN_PAGE_P2 12
|
||||
#endif
|
||||
|
||||
#define MIN_PAGE_SIZE (1 << MIN_PAGE_P2)
|
||||
|
||||
def_fn STRCPY p2align=6
|
||||
/* For moderately short strings, the fastest way to do the copy is to
|
||||
calculate the length of the string in the same way as strlen, then
|
||||
essentially do a memcpy of the result. This avoids the need for
|
||||
multiple byte copies and further means that by the time we
|
||||
reach the bulk copy loop we know we can always use DWord
|
||||
accesses. We expect strcpy to rarely be called repeatedly
|
||||
with the same source string, so branch prediction is likely to
|
||||
always be difficult - we mitigate against this by preferring
|
||||
conditional select operations over branches whenever this is
|
||||
feasible. */
|
||||
and tmp2, srcin, #(MIN_PAGE_SIZE - 1)
|
||||
mov zeroones, #REP8_01
|
||||
and to_align, srcin, #15
|
||||
cmp tmp2, #(MIN_PAGE_SIZE - 16)
|
||||
neg tmp1, to_align
|
||||
/* The first fetch will straddle a (possible) page boundary iff
|
||||
srcin + 15 causes bit[MIN_PAGE_P2] to change value. A 16-byte
|
||||
aligned string will never fail the page align check, so will
|
||||
always take the fast path. */
|
||||
b.gt .Lpage_cross
|
||||
|
||||
.Lpage_cross_ok:
|
||||
ldp data1, data2, [srcin]
|
||||
#ifdef __AARCH64EB__
|
||||
/* Because we expect the end to be found within 16 characters
|
||||
(profiling shows this is the most common case), it's worth
|
||||
swapping the bytes now to save having to recalculate the
|
||||
termination syndrome later. We preserve data1 and data2
|
||||
so that we can re-use the values later on. */
|
||||
rev tmp2, data1
|
||||
sub tmp1, tmp2, zeroones
|
||||
orr tmp2, tmp2, #REP8_7f
|
||||
bics has_nul1, tmp1, tmp2
|
||||
b.ne .Lfp_le8
|
||||
rev tmp4, data2
|
||||
sub tmp3, tmp4, zeroones
|
||||
orr tmp4, tmp4, #REP8_7f
|
||||
#else
|
||||
sub tmp1, data1, zeroones
|
||||
orr tmp2, data1, #REP8_7f
|
||||
bics has_nul1, tmp1, tmp2
|
||||
b.ne .Lfp_le8
|
||||
sub tmp3, data2, zeroones
|
||||
orr tmp4, data2, #REP8_7f
|
||||
#endif
|
||||
bics has_nul2, tmp3, tmp4
|
||||
b.eq .Lbulk_entry
|
||||
|
||||
/* The string is short (<=16 bytes). We don't know exactly how
|
||||
short though, yet. Work out the exact length so that we can
|
||||
quickly select the optimal copy strategy. */
|
||||
.Lfp_gt8:
|
||||
rev has_nul2, has_nul2
|
||||
clz pos, has_nul2
|
||||
mov tmp2, #56
|
||||
add dst, dstin, pos, lsr #3 /* Bits to bytes. */
|
||||
sub pos, tmp2, pos
|
||||
#ifdef __AARCH64EB__
|
||||
lsr data2, data2, pos
|
||||
#else
|
||||
lsl data2, data2, pos
|
||||
#endif
|
||||
str data2, [dst, #1]
|
||||
str data1, [dstin]
|
||||
#ifdef BUILD_STPCPY
|
||||
add dstin, dst, #8
|
||||
#endif
|
||||
ret
|
||||
|
||||
.Lfp_le8:
|
||||
rev has_nul1, has_nul1
|
||||
clz pos, has_nul1
|
||||
add dst, dstin, pos, lsr #3 /* Bits to bytes. */
|
||||
subs tmp2, pos, #24 /* Pos in bits. */
|
||||
b.lt .Lfp_lt4
|
||||
#ifdef __AARCH64EB__
|
||||
mov tmp2, #56
|
||||
sub pos, tmp2, pos
|
||||
lsr data2, data1, pos
|
||||
lsr data1, data1, #32
|
||||
#else
|
||||
lsr data2, data1, tmp2
|
||||
#endif
|
||||
/* 4->7 bytes to copy. */
|
||||
str data2w, [dst, #-3]
|
||||
str data1w, [dstin]
|
||||
#ifdef BUILD_STPCPY
|
||||
mov dstin, dst
|
||||
#endif
|
||||
ret
|
||||
.Lfp_lt4:
|
||||
cbz pos, .Lfp_lt2
|
||||
/* 2->3 bytes to copy. */
|
||||
#ifdef __AARCH64EB__
|
||||
lsr data1, data1, #48
|
||||
#endif
|
||||
strh data1w, [dstin]
|
||||
/* Fall-through, one byte (max) to go. */
|
||||
.Lfp_lt2:
|
||||
/* Null-terminated string. Last character must be zero! */
|
||||
strb wzr, [dst]
|
||||
#ifdef BUILD_STPCPY
|
||||
mov dstin, dst
|
||||
#endif
|
||||
ret
|
||||
|
||||
.p2align 6
|
||||
/* Aligning here ensures that the entry code and main loop all lies
|
||||
within one 64-byte cache line. */
|
||||
.Lbulk_entry:
|
||||
sub to_align, to_align, #16
|
||||
stp data1, data2, [dstin]
|
||||
sub src, srcin, to_align
|
||||
sub dst, dstin, to_align
|
||||
b .Lentry_no_page_cross
|
||||
|
||||
/* The inner loop deals with two Dwords at a time. This has a
|
||||
slightly higher start-up cost, but we should win quite quickly,
|
||||
especially on cores with a high number of issue slots per
|
||||
cycle, as we get much better parallelism out of the operations. */
|
||||
.Lmain_loop:
|
||||
stp data1, data2, [dst], #16
|
||||
.Lentry_no_page_cross:
|
||||
ldp data1, data2, [src], #16
|
||||
sub tmp1, data1, zeroones
|
||||
orr tmp2, data1, #REP8_7f
|
||||
sub tmp3, data2, zeroones
|
||||
orr tmp4, data2, #REP8_7f
|
||||
bic has_nul1, tmp1, tmp2
|
||||
bics has_nul2, tmp3, tmp4
|
||||
ccmp has_nul1, #0, #0, eq /* NZCV = 0000 */
|
||||
b.eq .Lmain_loop
|
||||
|
||||
/* Since we know we are copying at least 16 bytes, the fastest way
|
||||
to deal with the tail is to determine the location of the
|
||||
trailing NUL, then (re)copy the 16 bytes leading up to that. */
|
||||
cmp has_nul1, #0
|
||||
#ifdef __AARCH64EB__
|
||||
/* For big-endian, carry propagation (if the final byte in the
|
||||
string is 0x01) means we cannot use has_nul directly. The
|
||||
easiest way to get the correct byte is to byte-swap the data
|
||||
and calculate the syndrome a second time. */
|
||||
csel data1, data1, data2, ne
|
||||
rev data1, data1
|
||||
sub tmp1, data1, zeroones
|
||||
orr tmp2, data1, #REP8_7f
|
||||
bic has_nul1, tmp1, tmp2
|
||||
#else
|
||||
csel has_nul1, has_nul1, has_nul2, ne
|
||||
#endif
|
||||
rev has_nul1, has_nul1
|
||||
clz pos, has_nul1
|
||||
add tmp1, pos, #72
|
||||
add pos, pos, #8
|
||||
csel pos, pos, tmp1, ne
|
||||
add src, src, pos, lsr #3
|
||||
add dst, dst, pos, lsr #3
|
||||
ldp data1, data2, [src, #-32]
|
||||
stp data1, data2, [dst, #-16]
|
||||
#ifdef BUILD_STPCPY
|
||||
sub dstin, dst, #1
|
||||
#endif
|
||||
ret
|
||||
|
||||
.Lpage_cross:
|
||||
bic src, srcin, #15
|
||||
/* Start by loading two words at [srcin & ~15], then forcing the
|
||||
bytes that precede srcin to 0xff. This means they never look
|
||||
like termination bytes. */
|
||||
ldp data1, data2, [src]
|
||||
lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */
|
||||
tst to_align, #7
|
||||
csetm tmp2, ne
|
||||
#ifdef __AARCH64EB__
|
||||
lsl tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
|
||||
#else
|
||||
lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
|
||||
#endif
|
||||
orr data1, data1, tmp2
|
||||
orr data2a, data2, tmp2
|
||||
cmp to_align, #8
|
||||
csinv data1, data1, xzr, lt
|
||||
csel data2, data2, data2a, lt
|
||||
sub tmp1, data1, zeroones
|
||||
orr tmp2, data1, #REP8_7f
|
||||
sub tmp3, data2, zeroones
|
||||
orr tmp4, data2, #REP8_7f
|
||||
bic has_nul1, tmp1, tmp2
|
||||
bics has_nul2, tmp3, tmp4
|
||||
ccmp has_nul1, #0, #0, eq /* NZCV = 0000 */
|
||||
b.eq .Lpage_cross_ok
|
||||
/* We now need to make data1 and data2 look like they've been
|
||||
loaded directly from srcin. Do a rotate on the 128-bit value. */
|
||||
lsl tmp1, to_align, #3 /* Bytes->bits. */
|
||||
neg tmp2, to_align, lsl #3
|
||||
#ifdef __AARCH64EB__
|
||||
lsl data1a, data1, tmp1
|
||||
lsr tmp4, data2, tmp2
|
||||
lsl data2, data2, tmp1
|
||||
orr tmp4, tmp4, data1a
|
||||
cmp to_align, #8
|
||||
csel data1, tmp4, data2, lt
|
||||
rev tmp2, data1
|
||||
rev tmp4, data2
|
||||
sub tmp1, tmp2, zeroones
|
||||
orr tmp2, tmp2, #REP8_7f
|
||||
sub tmp3, tmp4, zeroones
|
||||
orr tmp4, tmp4, #REP8_7f
|
||||
#else
|
||||
lsr data1a, data1, tmp1
|
||||
lsl tmp4, data2, tmp2
|
||||
lsr data2, data2, tmp1
|
||||
orr tmp4, tmp4, data1a
|
||||
cmp to_align, #8
|
||||
csel data1, tmp4, data2, lt
|
||||
sub tmp1, data1, zeroones
|
||||
orr tmp2, data1, #REP8_7f
|
||||
sub tmp3, data2, zeroones
|
||||
orr tmp4, data2, #REP8_7f
|
||||
#endif
|
||||
bic has_nul1, tmp1, tmp2
|
||||
cbnz has_nul1, .Lfp_le8
|
||||
bic has_nul2, tmp3, tmp4
|
||||
b .Lfp_gt8
|
||||
|
||||
.size STRCPY, . - STRCPY
|
||||
@@ -0,0 +1,233 @@
|
||||
/* Copyright (c) 2013-2015, Linaro Limited
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the Linaro nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
|
||||
|
||||
/* Assumptions:
|
||||
*
|
||||
* ARMv8-a, AArch64, unaligned accesses, min page size 4k.
|
||||
*/
|
||||
|
||||
/* To test the page crossing code path more thoroughly, compile with
|
||||
-DTEST_PAGE_CROSS - this will force all calls through the slower
|
||||
entry path. This option is not intended for production use. */
|
||||
|
||||
/* Arguments and results. */
|
||||
#define srcin x0
|
||||
#define len x0
|
||||
|
||||
/* Locals and temporaries. */
|
||||
#define src x1
|
||||
#define data1 x2
|
||||
#define data2 x3
|
||||
#define has_nul1 x4
|
||||
#define has_nul2 x5
|
||||
#define tmp1 x4
|
||||
#define tmp2 x5
|
||||
#define tmp3 x6
|
||||
#define tmp4 x7
|
||||
#define zeroones x8
|
||||
|
||||
#define L(l) .L ## l
|
||||
|
||||
.macro def_fn f p2align=0
|
||||
.text
|
||||
.p2align \p2align
|
||||
.global \f
|
||||
.type \f, %function
|
||||
\f:
|
||||
.endm
|
||||
|
||||
/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
|
||||
(=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
|
||||
can be done in parallel across the entire word. A faster check
|
||||
(X - 1) & 0x80 is zero for non-NUL ASCII characters, but gives
|
||||
false hits for characters 129..255. */
|
||||
|
||||
#define REP8_01 0x0101010101010101
|
||||
#define REP8_7f 0x7f7f7f7f7f7f7f7f
|
||||
#define REP8_80 0x8080808080808080
|
||||
|
||||
#ifdef TEST_PAGE_CROSS
|
||||
# define MIN_PAGE_SIZE 15
|
||||
#else
|
||||
# define MIN_PAGE_SIZE 4096
|
||||
#endif
|
||||
|
||||
/* Since strings are short on average, we check the first 16 bytes
|
||||
of the string for a NUL character. In order to do an unaligned ldp
|
||||
safely we have to do a page cross check first. If there is a NUL
|
||||
byte we calculate the length from the 2 8-byte words using
|
||||
conditional select to reduce branch mispredictions (it is unlikely
|
||||
strlen will be repeatedly called on strings with the same length).
|
||||
|
||||
If the string is longer than 16 bytes, we align src so don't need
|
||||
further page cross checks, and process 32 bytes per iteration
|
||||
using the fast NUL check. If we encounter non-ASCII characters,
|
||||
fallback to a second loop using the full NUL check.
|
||||
|
||||
If the page cross check fails, we read 16 bytes from an aligned
|
||||
address, remove any characters before the string, and continue
|
||||
in the main loop using aligned loads. Since strings crossing a
|
||||
page in the first 16 bytes are rare (probability of
|
||||
16/MIN_PAGE_SIZE ~= 0.4%), this case does not need to be optimized.
|
||||
|
||||
AArch64 systems have a minimum page size of 4k. We don't bother
|
||||
checking for larger page sizes - the cost of setting up the correct
|
||||
page size is just not worth the extra gain from a small reduction in
|
||||
the cases taking the slow path. Note that we only care about
|
||||
whether the first fetch, which may be misaligned, crosses a page
|
||||
boundary. */
|
||||
|
||||
def_fn strlen p2align=6
|
||||
and tmp1, srcin, MIN_PAGE_SIZE - 1
|
||||
mov zeroones, REP8_01
|
||||
cmp tmp1, MIN_PAGE_SIZE - 16
|
||||
b.gt L(page_cross)
|
||||
ldp data1, data2, [srcin]
|
||||
#ifdef __AARCH64EB__
|
||||
/* For big-endian, carry propagation (if the final byte in the
|
||||
string is 0x01) means we cannot use has_nul1/2 directly.
|
||||
Since we expect strings to be small and early-exit,
|
||||
byte-swap the data now so has_null1/2 will be correct. */
|
||||
rev data1, data1
|
||||
rev data2, data2
|
||||
#endif
|
||||
sub tmp1, data1, zeroones
|
||||
orr tmp2, data1, REP8_7f
|
||||
sub tmp3, data2, zeroones
|
||||
orr tmp4, data2, REP8_7f
|
||||
bics has_nul1, tmp1, tmp2
|
||||
bic has_nul2, tmp3, tmp4
|
||||
ccmp has_nul2, 0, 0, eq
|
||||
beq L(main_loop_entry)
|
||||
|
||||
/* Enter with C = has_nul1 == 0. */
|
||||
csel has_nul1, has_nul1, has_nul2, cc
|
||||
mov len, 8
|
||||
rev has_nul1, has_nul1
|
||||
clz tmp1, has_nul1
|
||||
csel len, xzr, len, cc
|
||||
add len, len, tmp1, lsr 3
|
||||
ret
|
||||
|
||||
/* The inner loop processes 32 bytes per iteration and uses the fast
|
||||
NUL check. If we encounter non-ASCII characters, use a second
|
||||
loop with the accurate NUL check. */
|
||||
.p2align 4
|
||||
L(main_loop_entry):
|
||||
bic src, srcin, 15
|
||||
sub src, src, 16
|
||||
L(main_loop):
|
||||
ldp data1, data2, [src, 32]!
|
||||
.Lpage_cross_entry:
|
||||
sub tmp1, data1, zeroones
|
||||
sub tmp3, data2, zeroones
|
||||
orr tmp2, tmp1, tmp3
|
||||
tst tmp2, zeroones, lsl 7
|
||||
bne 1f
|
||||
ldp data1, data2, [src, 16]
|
||||
sub tmp1, data1, zeroones
|
||||
sub tmp3, data2, zeroones
|
||||
orr tmp2, tmp1, tmp3
|
||||
tst tmp2, zeroones, lsl 7
|
||||
beq L(main_loop)
|
||||
add src, src, 16
|
||||
1:
|
||||
/* The fast check failed, so do the slower, accurate NUL check. */
|
||||
orr tmp2, data1, REP8_7f
|
||||
orr tmp4, data2, REP8_7f
|
||||
bics has_nul1, tmp1, tmp2
|
||||
bic has_nul2, tmp3, tmp4
|
||||
ccmp has_nul2, 0, 0, eq
|
||||
beq L(nonascii_loop)
|
||||
|
||||
/* Enter with C = has_nul1 == 0. */
|
||||
L(tail):
|
||||
#ifdef __AARCH64EB__
|
||||
/* For big-endian, carry propagation (if the final byte in the
|
||||
string is 0x01) means we cannot use has_nul1/2 directly. The
|
||||
easiest way to get the correct byte is to byte-swap the data
|
||||
and calculate the syndrome a second time. */
|
||||
csel data1, data1, data2, cc
|
||||
rev data1, data1
|
||||
sub tmp1, data1, zeroones
|
||||
orr tmp2, data1, REP8_7f
|
||||
bic has_nul1, tmp1, tmp2
|
||||
#else
|
||||
csel has_nul1, has_nul1, has_nul2, cc
|
||||
#endif
|
||||
sub len, src, srcin
|
||||
rev has_nul1, has_nul1
|
||||
add tmp2, len, 8
|
||||
clz tmp1, has_nul1
|
||||
csel len, len, tmp2, cc
|
||||
add len, len, tmp1, lsr 3
|
||||
ret
|
||||
|
||||
L(nonascii_loop):
|
||||
ldp data1, data2, [src, 16]!
|
||||
sub tmp1, data1, zeroones
|
||||
orr tmp2, data1, REP8_7f
|
||||
sub tmp3, data2, zeroones
|
||||
orr tmp4, data2, REP8_7f
|
||||
bics has_nul1, tmp1, tmp2
|
||||
bic has_nul2, tmp3, tmp4
|
||||
ccmp has_nul2, 0, 0, eq
|
||||
bne L(tail)
|
||||
ldp data1, data2, [src, 16]!
|
||||
sub tmp1, data1, zeroones
|
||||
orr tmp2, data1, REP8_7f
|
||||
sub tmp3, data2, zeroones
|
||||
orr tmp4, data2, REP8_7f
|
||||
bics has_nul1, tmp1, tmp2
|
||||
bic has_nul2, tmp3, tmp4
|
||||
ccmp has_nul2, 0, 0, eq
|
||||
beq L(nonascii_loop)
|
||||
b L(tail)
|
||||
|
||||
/* Load 16 bytes from [srcin & ~15] and force the bytes that precede
|
||||
srcin to 0x7f, so we ignore any NUL bytes before the string.
|
||||
Then continue in the aligned loop. */
|
||||
L(page_cross):
|
||||
bic src, srcin, 15
|
||||
ldp data1, data2, [src]
|
||||
lsl tmp1, srcin, 3
|
||||
mov tmp4, -1
|
||||
#ifdef __AARCH64EB__
|
||||
/* Big-endian. Early bytes are at MSB. */
|
||||
lsr tmp1, tmp4, tmp1 /* Shift (tmp1 & 63). */
|
||||
#else
|
||||
/* Little-endian. Early bytes are at LSB. */
|
||||
lsl tmp1, tmp4, tmp1 /* Shift (tmp1 & 63). */
|
||||
#endif
|
||||
orr tmp1, tmp1, REP8_80
|
||||
orn data1, data1, tmp1
|
||||
orn tmp2, data2, tmp1
|
||||
tst srcin, 8
|
||||
csel data1, data1, tmp4, eq
|
||||
csel data2, data2, tmp2, eq
|
||||
b L(page_cross_entry)
|
||||
|
||||
.size strlen, . - strlen
|
||||
@@ -0,0 +1,222 @@
|
||||
/* Copyright (c) 2013, Linaro Limited
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the Linaro nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
|
||||
|
||||
/* Assumptions:
|
||||
*
|
||||
* ARMv8-a, AArch64
|
||||
*/
|
||||
|
||||
.macro def_fn f p2align=0
|
||||
.text
|
||||
.p2align \p2align
|
||||
.global \f
|
||||
.type \f, %function
|
||||
\f:
|
||||
.endm
|
||||
|
||||
#define REP8_01 0x0101010101010101
|
||||
#define REP8_7f 0x7f7f7f7f7f7f7f7f
|
||||
#define REP8_80 0x8080808080808080
|
||||
|
||||
/* Parameters and result. */
|
||||
#define src1 x0
|
||||
#define src2 x1
|
||||
#define limit x2
|
||||
#define result x0
|
||||
|
||||
/* Internal variables. */
|
||||
#define data1 x3
|
||||
#define data1w w3
|
||||
#define data2 x4
|
||||
#define data2w w4
|
||||
#define has_nul x5
|
||||
#define diff x6
|
||||
#define syndrome x7
|
||||
#define tmp1 x8
|
||||
#define tmp2 x9
|
||||
#define tmp3 x10
|
||||
#define zeroones x11
|
||||
#define pos x12
|
||||
#define limit_wd x13
|
||||
#define mask x14
|
||||
#define endloop x15
|
||||
|
||||
.text
|
||||
.p2align 6
|
||||
.rep 7
|
||||
nop /* Pad so that the loop below fits a cache line. */
|
||||
.endr
|
||||
def_fn strncmp
|
||||
cbz limit, .Lret0
|
||||
eor tmp1, src1, src2
|
||||
mov zeroones, #REP8_01
|
||||
tst tmp1, #7
|
||||
b.ne .Lmisaligned8
|
||||
ands tmp1, src1, #7
|
||||
b.ne .Lmutual_align
|
||||
/* Calculate the number of full and partial words -1. */
|
||||
sub limit_wd, limit, #1 /* limit != 0, so no underflow. */
|
||||
lsr limit_wd, limit_wd, #3 /* Convert to Dwords. */
|
||||
|
||||
/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
|
||||
(=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
|
||||
can be done in parallel across the entire word. */
|
||||
/* Start of performance-critical section -- one 64B cache line. */
|
||||
.Lloop_aligned:
|
||||
ldr data1, [src1], #8
|
||||
ldr data2, [src2], #8
|
||||
.Lstart_realigned:
|
||||
subs limit_wd, limit_wd, #1
|
||||
sub tmp1, data1, zeroones
|
||||
orr tmp2, data1, #REP8_7f
|
||||
eor diff, data1, data2 /* Non-zero if differences found. */
|
||||
csinv endloop, diff, xzr, pl /* Last Dword or differences. */
|
||||
bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
|
||||
ccmp endloop, #0, #0, eq
|
||||
b.eq .Lloop_aligned
|
||||
/* End of performance-critical section -- one 64B cache line. */
|
||||
|
||||
/* Not reached the limit, must have found the end or a diff. */
|
||||
tbz limit_wd, #63, .Lnot_limit
|
||||
|
||||
/* Limit % 8 == 0 => all bytes significant. */
|
||||
ands limit, limit, #7
|
||||
b.eq .Lnot_limit
|
||||
|
||||
lsl limit, limit, #3 /* Bits -> bytes. */
|
||||
mov mask, #~0
|
||||
#ifdef __AARCH64EB__
|
||||
lsr mask, mask, limit
|
||||
#else
|
||||
lsl mask, mask, limit
|
||||
#endif
|
||||
bic data1, data1, mask
|
||||
bic data2, data2, mask
|
||||
|
||||
/* Make sure that the NUL byte is marked in the syndrome. */
|
||||
orr has_nul, has_nul, mask
|
||||
|
||||
.Lnot_limit:
|
||||
orr syndrome, diff, has_nul
|
||||
|
||||
#ifndef __AARCH64EB__
|
||||
rev syndrome, syndrome
|
||||
rev data1, data1
|
||||
/* The MS-non-zero bit of the syndrome marks either the first bit
|
||||
that is different, or the top bit of the first zero byte.
|
||||
Shifting left now will bring the critical information into the
|
||||
top bits. */
|
||||
clz pos, syndrome
|
||||
rev data2, data2
|
||||
lsl data1, data1, pos
|
||||
lsl data2, data2, pos
|
||||
/* But we need to zero-extend (char is unsigned) the value and then
|
||||
perform a signed 32-bit subtraction. */
|
||||
lsr data1, data1, #56
|
||||
sub result, data1, data2, lsr #56
|
||||
ret
|
||||
#else
|
||||
/* For big-endian we cannot use the trick with the syndrome value
|
||||
as carry-propagation can corrupt the upper bits if the trailing
|
||||
bytes in the string contain 0x01. */
|
||||
/* However, if there is no NUL byte in the dword, we can generate
|
||||
the result directly. We can't just subtract the bytes as the
|
||||
MSB might be significant. */
|
||||
cbnz has_nul, 1f
|
||||
cmp data1, data2
|
||||
cset result, ne
|
||||
cneg result, result, lo
|
||||
ret
|
||||
1:
|
||||
/* Re-compute the NUL-byte detection, using a byte-reversed value. */
|
||||
rev tmp3, data1
|
||||
sub tmp1, tmp3, zeroones
|
||||
orr tmp2, tmp3, #REP8_7f
|
||||
bic has_nul, tmp1, tmp2
|
||||
rev has_nul, has_nul
|
||||
orr syndrome, diff, has_nul
|
||||
clz pos, syndrome
|
||||
/* The MS-non-zero bit of the syndrome marks either the first bit
|
||||
that is different, or the top bit of the first zero byte.
|
||||
Shifting left now will bring the critical information into the
|
||||
top bits. */
|
||||
lsl data1, data1, pos
|
||||
lsl data2, data2, pos
|
||||
/* But we need to zero-extend (char is unsigned) the value and then
|
||||
perform a signed 32-bit subtraction. */
|
||||
lsr data1, data1, #56
|
||||
sub result, data1, data2, lsr #56
|
||||
ret
|
||||
#endif
|
||||
|
||||
.Lmutual_align:
|
||||
/* Sources are mutually aligned, but are not currently at an
|
||||
alignment boundary. Round down the addresses and then mask off
|
||||
the bytes that precede the start point.
|
||||
We also need to adjust the limit calculations, but without
|
||||
overflowing if the limit is near ULONG_MAX. */
|
||||
bic src1, src1, #7
|
||||
bic src2, src2, #7
|
||||
ldr data1, [src1], #8
|
||||
neg tmp3, tmp1, lsl #3 /* 64 - bits(bytes beyond align). */
|
||||
ldr data2, [src2], #8
|
||||
mov tmp2, #~0
|
||||
sub limit_wd, limit, #1 /* limit != 0, so no underflow. */
|
||||
#ifdef __AARCH64EB__
|
||||
/* Big-endian. Early bytes are at MSB. */
|
||||
lsl tmp2, tmp2, tmp3 /* Shift (tmp1 & 63). */
|
||||
#else
|
||||
/* Little-endian. Early bytes are at LSB. */
|
||||
lsr tmp2, tmp2, tmp3 /* Shift (tmp1 & 63). */
|
||||
#endif
|
||||
and tmp3, limit_wd, #7
|
||||
lsr limit_wd, limit_wd, #3
|
||||
/* Adjust the limit. Only low 3 bits used, so overflow irrelevant. */
|
||||
add limit, limit, tmp1
|
||||
add tmp3, tmp3, tmp1
|
||||
orr data1, data1, tmp2
|
||||
orr data2, data2, tmp2
|
||||
add limit_wd, limit_wd, tmp3, lsr #3
|
||||
b .Lstart_realigned
|
||||
|
||||
.Lret0:
|
||||
mov result, #0
|
||||
ret
|
||||
|
||||
.p2align 6
|
||||
.Lmisaligned8:
|
||||
sub limit, limit, #1
|
||||
1:
|
||||
/* Perhaps we can do better than this. */
|
||||
ldrb data1w, [src1], #1
|
||||
ldrb data2w, [src2], #1
|
||||
subs limit, limit, #1
|
||||
ccmp data1w, #1, #0, cs /* NZCV = 0b0000. */
|
||||
ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
|
||||
b.eq 1b
|
||||
sub result, data1, data2
|
||||
ret
|
||||
.size strncmp, . - strncmp
|
||||
@@ -0,0 +1,181 @@
|
||||
/* strnlen - calculate the length of a string with limit.
|
||||
|
||||
Copyright (c) 2013, Linaro Limited
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the Linaro nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
|
||||
|
||||
/* Assumptions:
|
||||
*
|
||||
* ARMv8-a, AArch64
|
||||
*/
|
||||
|
||||
/* Arguments and results. */
|
||||
#define srcin x0
|
||||
#define len x0
|
||||
#define limit x1
|
||||
|
||||
/* Locals and temporaries. */
|
||||
#define src x2
|
||||
#define data1 x3
|
||||
#define data2 x4
|
||||
#define data2a x5
|
||||
#define has_nul1 x6
|
||||
#define has_nul2 x7
|
||||
#define tmp1 x8
|
||||
#define tmp2 x9
|
||||
#define tmp3 x10
|
||||
#define tmp4 x11
|
||||
#define zeroones x12
|
||||
#define pos x13
|
||||
#define limit_wd x14
|
||||
|
||||
.macro def_fn f p2align=0
|
||||
.text
|
||||
.p2align \p2align
|
||||
.global \f
|
||||
.type \f, %function
|
||||
\f:
|
||||
.endm
|
||||
|
||||
#define REP8_01 0x0101010101010101
|
||||
#define REP8_7f 0x7f7f7f7f7f7f7f7f
|
||||
#define REP8_80 0x8080808080808080
|
||||
|
||||
.text
|
||||
.p2align 6
|
||||
.Lstart:
|
||||
/* Pre-pad to ensure critical loop begins an icache line. */
|
||||
.rep 7
|
||||
nop
|
||||
.endr
|
||||
/* Put this code here to avoid wasting more space with pre-padding. */
|
||||
.Lhit_limit:
|
||||
mov len, limit
|
||||
ret
|
||||
|
||||
def_fn strnlen
|
||||
cbz limit, .Lhit_limit
|
||||
mov zeroones, #REP8_01
|
||||
bic src, srcin, #15
|
||||
ands tmp1, srcin, #15
|
||||
b.ne .Lmisaligned
|
||||
/* Calculate the number of full and partial words -1. */
|
||||
sub limit_wd, limit, #1 /* Limit != 0, so no underflow. */
|
||||
lsr limit_wd, limit_wd, #4 /* Convert to Qwords. */
|
||||
|
||||
/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
|
||||
(=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
|
||||
can be done in parallel across the entire word. */
|
||||
/* The inner loop deals with two Dwords at a time. This has a
|
||||
slightly higher start-up cost, but we should win quite quickly,
|
||||
especially on cores with a high number of issue slots per
|
||||
cycle, as we get much better parallelism out of the operations. */
|
||||
|
||||
/* Start of critial section -- keep to one 64Byte cache line. */
|
||||
.Lloop:
|
||||
ldp data1, data2, [src], #16
|
||||
.Lrealigned:
|
||||
sub tmp1, data1, zeroones
|
||||
orr tmp2, data1, #REP8_7f
|
||||
sub tmp3, data2, zeroones
|
||||
orr tmp4, data2, #REP8_7f
|
||||
bic has_nul1, tmp1, tmp2
|
||||
bic has_nul2, tmp3, tmp4
|
||||
subs limit_wd, limit_wd, #1
|
||||
orr tmp1, has_nul1, has_nul2
|
||||
ccmp tmp1, #0, #0, pl /* NZCV = 0000 */
|
||||
b.eq .Lloop
|
||||
/* End of critical section -- keep to one 64Byte cache line. */
|
||||
|
||||
orr tmp1, has_nul1, has_nul2
|
||||
cbz tmp1, .Lhit_limit /* No null in final Qword. */
|
||||
|
||||
/* We know there's a null in the final Qword. The easiest thing
|
||||
to do now is work out the length of the string and return
|
||||
MIN (len, limit). */
|
||||
|
||||
sub len, src, srcin
|
||||
cbz has_nul1, .Lnul_in_data2
|
||||
#ifdef __AARCH64EB__
|
||||
mov data2, data1
|
||||
#endif
|
||||
sub len, len, #8
|
||||
mov has_nul2, has_nul1
|
||||
.Lnul_in_data2:
|
||||
#ifdef __AARCH64EB__
|
||||
/* For big-endian, carry propagation (if the final byte in the
|
||||
string is 0x01) means we cannot use has_nul directly. The
|
||||
easiest way to get the correct byte is to byte-swap the data
|
||||
and calculate the syndrome a second time. */
|
||||
rev data2, data2
|
||||
sub tmp1, data2, zeroones
|
||||
orr tmp2, data2, #REP8_7f
|
||||
bic has_nul2, tmp1, tmp2
|
||||
#endif
|
||||
sub len, len, #8
|
||||
rev has_nul2, has_nul2
|
||||
clz pos, has_nul2
|
||||
add len, len, pos, lsr #3 /* Bits to bytes. */
|
||||
cmp len, limit
|
||||
csel len, len, limit, ls /* Return the lower value. */
|
||||
ret
|
||||
|
||||
.Lmisaligned:
|
||||
/* Deal with a partial first word.
|
||||
We're doing two things in parallel here;
|
||||
1) Calculate the number of words (but avoiding overflow if
|
||||
limit is near ULONG_MAX) - to do this we need to work out
|
||||
limit + tmp1 - 1 as a 65-bit value before shifting it;
|
||||
2) Load and mask the initial data words - we force the bytes
|
||||
before the ones we are interested in to 0xff - this ensures
|
||||
early bytes will not hit any zero detection. */
|
||||
sub limit_wd, limit, #1
|
||||
neg tmp4, tmp1
|
||||
cmp tmp1, #8
|
||||
|
||||
and tmp3, limit_wd, #15
|
||||
lsr limit_wd, limit_wd, #4
|
||||
mov tmp2, #~0
|
||||
|
||||
ldp data1, data2, [src], #16
|
||||
lsl tmp4, tmp4, #3 /* Bytes beyond alignment -> bits. */
|
||||
add tmp3, tmp3, tmp1
|
||||
|
||||
#ifdef __AARCH64EB__
|
||||
/* Big-endian. Early bytes are at MSB. */
|
||||
lsl tmp2, tmp2, tmp4 /* Shift (tmp1 & 63). */
|
||||
#else
|
||||
/* Little-endian. Early bytes are at LSB. */
|
||||
lsr tmp2, tmp2, tmp4 /* Shift (tmp1 & 63). */
|
||||
#endif
|
||||
add limit_wd, limit_wd, tmp3, lsr #4
|
||||
|
||||
orr data1, data1, tmp2
|
||||
orr data2a, data2, tmp2
|
||||
|
||||
csinv data1, data1, xzr, le
|
||||
csel data2, data2, data2a, le
|
||||
b .Lrealigned
|
||||
.size strnlen, . - .Lstart /* Include pre-padding in size. */
|
||||
@@ -0,0 +1,155 @@
|
||||
/* Copyright (c) 2010-2011, Linaro Limited
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Linaro Limited nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
Written by Dave Gilbert <david.gilbert@linaro.org>
|
||||
|
||||
This memchr routine is optimised on a Cortex-A9 and should work on
|
||||
all ARMv7 processors. It has a fast past for short sizes, and has
|
||||
an optimised path for large data sets; the worst case is finding the
|
||||
match early in a large data set.
|
||||
|
||||
*/
|
||||
|
||||
@ 2011-02-07 david.gilbert@linaro.org
|
||||
@ Extracted from local git a5b438d861
|
||||
@ 2011-07-14 david.gilbert@linaro.org
|
||||
@ Import endianness fix from local git ea786f1b
|
||||
@ 2011-12-07 david.gilbert@linaro.org
|
||||
@ Removed unneeded cbz from align loop
|
||||
|
||||
.syntax unified
|
||||
.arch armv7-a
|
||||
|
||||
@ this lets us check a flag in a 00/ff byte easily in either endianness
|
||||
#ifdef __ARMEB__
|
||||
#define CHARTSTMASK(c) 1<<(31-(c*8))
|
||||
#else
|
||||
#define CHARTSTMASK(c) 1<<(c*8)
|
||||
#endif
|
||||
.text
|
||||
.thumb
|
||||
|
||||
@ ---------------------------------------------------------------------------
|
||||
.thumb_func
|
||||
.align 2
|
||||
.p2align 4,,15
|
||||
.global memchr
|
||||
.type memchr,%function
|
||||
memchr:
|
||||
@ r0 = start of memory to scan
|
||||
@ r1 = character to look for
|
||||
@ r2 = length
|
||||
@ returns r0 = pointer to character or NULL if not found
|
||||
and r1,r1,#0xff @ Don't think we can trust the caller to actually pass a char
|
||||
|
||||
cmp r2,#16 @ If it's short don't bother with anything clever
|
||||
blt 20f
|
||||
|
||||
tst r0, #7 @ If it's already aligned skip the next bit
|
||||
beq 10f
|
||||
|
||||
@ Work up to an aligned point
|
||||
5:
|
||||
ldrb r3, [r0],#1
|
||||
subs r2, r2, #1
|
||||
cmp r3, r1
|
||||
beq 50f @ If it matches exit found
|
||||
tst r0, #7
|
||||
bne 5b @ If not aligned yet then do next byte
|
||||
|
||||
10:
|
||||
@ At this point, we are aligned, we know we have at least 8 bytes to work with
|
||||
push {r4,r5,r6,r7}
|
||||
orr r1, r1, r1, lsl #8 @ expand the match word across to all bytes
|
||||
orr r1, r1, r1, lsl #16
|
||||
bic r4, r2, #7 @ Number of double words to work with
|
||||
mvns r7, #0 @ all F's
|
||||
movs r3, #0
|
||||
|
||||
15:
|
||||
ldmia r0!,{r5,r6}
|
||||
subs r4, r4, #8
|
||||
eor r5,r5, r1 @ Get it so that r5,r6 have 00's where the bytes match the target
|
||||
eor r6,r6, r1
|
||||
uadd8 r5, r5, r7 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0
|
||||
sel r5, r3, r7 @ bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION
|
||||
uadd8 r6, r6, r7 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0
|
||||
sel r6, r5, r7 @ chained....bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION
|
||||
cbnz r6, 60f
|
||||
bne 15b @ (Flags from the subs above) If not run out of bytes then go around again
|
||||
|
||||
pop {r4,r5,r6,r7}
|
||||
and r1,r1,#0xff @ Get r1 back to a single character from the expansion above
|
||||
and r2,r2,#7 @ Leave the count remaining as the number after the double words have been done
|
||||
|
||||
20:
|
||||
cbz r2, 40f @ 0 length or hit the end already then not found
|
||||
|
||||
21: @ Post aligned section, or just a short call
|
||||
ldrb r3,[r0],#1
|
||||
subs r2,r2,#1
|
||||
eor r3,r3,r1 @ r3 = 0 if match - doesn't break flags from sub
|
||||
cbz r3, 50f
|
||||
bne 21b @ on r2 flags
|
||||
|
||||
40:
|
||||
movs r0,#0 @ not found
|
||||
bx lr
|
||||
|
||||
50:
|
||||
subs r0,r0,#1 @ found
|
||||
bx lr
|
||||
|
||||
60: @ We're here because the fast path found a hit - now we have to track down exactly which word it was
|
||||
@ r0 points to the start of the double word after the one that was tested
|
||||
@ r5 has the 00/ff pattern for the first word, r6 has the chained value
|
||||
cmp r5, #0
|
||||
itte eq
|
||||
moveq r5, r6 @ the end is in the 2nd word
|
||||
subeq r0,r0,#3 @ Points to 2nd byte of 2nd word
|
||||
subne r0,r0,#7 @ or 2nd byte of 1st word
|
||||
|
||||
@ r0 currently points to the 3rd byte of the word containing the hit
|
||||
tst r5, # CHARTSTMASK(0) @ 1st character
|
||||
bne 61f
|
||||
adds r0,r0,#1
|
||||
tst r5, # CHARTSTMASK(1) @ 2nd character
|
||||
ittt eq
|
||||
addeq r0,r0,#1
|
||||
tsteq r5, # (3<<15) @ 2nd & 3rd character
|
||||
@ If not the 3rd must be the last one
|
||||
addeq r0,r0,#1
|
||||
|
||||
61:
|
||||
pop {r4,r5,r6,r7}
|
||||
subs r0,r0,#1
|
||||
bx lr
|
||||
@@ -0,0 +1,617 @@
|
||||
/* Copyright (c) 2013, Linaro Limited
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Linaro Limited nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
This memcpy routine is optimised for Cortex-A15 cores and takes advantage
|
||||
of VFP or NEON when built with the appropriate flags.
|
||||
|
||||
Assumptions:
|
||||
|
||||
ARMv6 (ARMv7-a if using Neon)
|
||||
ARM state
|
||||
Unaligned accesses
|
||||
|
||||
*/
|
||||
|
||||
.syntax unified
|
||||
/* This implementation requires ARM state. */
|
||||
.arm
|
||||
|
||||
#ifdef __ARM_NEON__
|
||||
|
||||
.fpu neon
|
||||
.arch armv7-a
|
||||
# define FRAME_SIZE 4
|
||||
# define USE_VFP
|
||||
# define USE_NEON
|
||||
|
||||
#elif !defined (__SOFTFP__)
|
||||
|
||||
.arch armv6
|
||||
.fpu vfpv2
|
||||
# define FRAME_SIZE 32
|
||||
# define USE_VFP
|
||||
|
||||
#else
|
||||
.arch armv6
|
||||
# define FRAME_SIZE 32
|
||||
|
||||
#endif
|
||||
|
||||
/* Old versions of GAS incorrectly implement the NEON align semantics. */
|
||||
#ifdef BROKEN_ASM_NEON_ALIGN
|
||||
#define ALIGN(addr, align) addr,:align
|
||||
#else
|
||||
#define ALIGN(addr, align) addr:align
|
||||
#endif
|
||||
|
||||
#define PC_OFFSET 8 /* PC pipeline compensation. */
|
||||
#define INSN_SIZE 4
|
||||
|
||||
/* Call parameters. */
|
||||
#define dstin r0
|
||||
#define src r1
|
||||
#define count r2
|
||||
|
||||
/* Locals. */
|
||||
#define tmp1 r3
|
||||
#define dst ip
|
||||
#define tmp2 r10
|
||||
|
||||
#ifndef USE_NEON
|
||||
/* For bulk copies using GP registers. */
|
||||
#define A_l r2 /* Call-clobbered. */
|
||||
#define A_h r3 /* Call-clobbered. */
|
||||
#define B_l r4
|
||||
#define B_h r5
|
||||
#define C_l r6
|
||||
#define C_h r7
|
||||
#define D_l r8
|
||||
#define D_h r9
|
||||
#endif
|
||||
|
||||
/* Number of lines ahead to pre-fetch data. If you change this the code
|
||||
below will need adjustment to compensate. */
|
||||
|
||||
#define prefetch_lines 5
|
||||
|
||||
#ifdef USE_VFP
|
||||
.macro cpy_line_vfp vreg, base
|
||||
vstr \vreg, [dst, #\base]
|
||||
vldr \vreg, [src, #\base]
|
||||
vstr d0, [dst, #\base + 8]
|
||||
vldr d0, [src, #\base + 8]
|
||||
vstr d1, [dst, #\base + 16]
|
||||
vldr d1, [src, #\base + 16]
|
||||
vstr d2, [dst, #\base + 24]
|
||||
vldr d2, [src, #\base + 24]
|
||||
vstr \vreg, [dst, #\base + 32]
|
||||
vldr \vreg, [src, #\base + prefetch_lines * 64 - 32]
|
||||
vstr d0, [dst, #\base + 40]
|
||||
vldr d0, [src, #\base + 40]
|
||||
vstr d1, [dst, #\base + 48]
|
||||
vldr d1, [src, #\base + 48]
|
||||
vstr d2, [dst, #\base + 56]
|
||||
vldr d2, [src, #\base + 56]
|
||||
.endm
|
||||
|
||||
.macro cpy_tail_vfp vreg, base
|
||||
vstr \vreg, [dst, #\base]
|
||||
vldr \vreg, [src, #\base]
|
||||
vstr d0, [dst, #\base + 8]
|
||||
vldr d0, [src, #\base + 8]
|
||||
vstr d1, [dst, #\base + 16]
|
||||
vldr d1, [src, #\base + 16]
|
||||
vstr d2, [dst, #\base + 24]
|
||||
vldr d2, [src, #\base + 24]
|
||||
vstr \vreg, [dst, #\base + 32]
|
||||
vstr d0, [dst, #\base + 40]
|
||||
vldr d0, [src, #\base + 40]
|
||||
vstr d1, [dst, #\base + 48]
|
||||
vldr d1, [src, #\base + 48]
|
||||
vstr d2, [dst, #\base + 56]
|
||||
vldr d2, [src, #\base + 56]
|
||||
.endm
|
||||
#endif
|
||||
|
||||
.macro def_fn f p2align=0
|
||||
.text
|
||||
.p2align \p2align
|
||||
.global \f
|
||||
.type \f, %function
|
||||
\f:
|
||||
.endm
|
||||
|
||||
def_fn memcpy p2align=6
|
||||
|
||||
mov dst, dstin /* Preserve dstin, we need to return it. */
|
||||
cmp count, #64
|
||||
bge .Lcpy_not_short
|
||||
/* Deal with small copies quickly by dropping straight into the
|
||||
exit block. */
|
||||
|
||||
.Ltail63unaligned:
|
||||
#ifdef USE_NEON
|
||||
and tmp1, count, #0x38
|
||||
rsb tmp1, tmp1, #(56 - PC_OFFSET + INSN_SIZE)
|
||||
add pc, pc, tmp1
|
||||
vld1.8 {d0}, [src]! /* 14 words to go. */
|
||||
vst1.8 {d0}, [dst]!
|
||||
vld1.8 {d0}, [src]! /* 12 words to go. */
|
||||
vst1.8 {d0}, [dst]!
|
||||
vld1.8 {d0}, [src]! /* 10 words to go. */
|
||||
vst1.8 {d0}, [dst]!
|
||||
vld1.8 {d0}, [src]! /* 8 words to go. */
|
||||
vst1.8 {d0}, [dst]!
|
||||
vld1.8 {d0}, [src]! /* 6 words to go. */
|
||||
vst1.8 {d0}, [dst]!
|
||||
vld1.8 {d0}, [src]! /* 4 words to go. */
|
||||
vst1.8 {d0}, [dst]!
|
||||
vld1.8 {d0}, [src]! /* 2 words to go. */
|
||||
vst1.8 {d0}, [dst]!
|
||||
|
||||
tst count, #4
|
||||
ldrne tmp1, [src], #4
|
||||
strne tmp1, [dst], #4
|
||||
#else
|
||||
/* Copy up to 15 full words of data. May not be aligned. */
|
||||
/* Cannot use VFP for unaligned data. */
|
||||
and tmp1, count, #0x3c
|
||||
add dst, dst, tmp1
|
||||
add src, src, tmp1
|
||||
rsb tmp1, tmp1, #(60 - PC_OFFSET/2 + INSN_SIZE/2)
|
||||
/* Jump directly into the sequence below at the correct offset. */
|
||||
add pc, pc, tmp1, lsl #1
|
||||
|
||||
ldr tmp1, [src, #-60] /* 15 words to go. */
|
||||
str tmp1, [dst, #-60]
|
||||
|
||||
ldr tmp1, [src, #-56] /* 14 words to go. */
|
||||
str tmp1, [dst, #-56]
|
||||
ldr tmp1, [src, #-52]
|
||||
str tmp1, [dst, #-52]
|
||||
|
||||
ldr tmp1, [src, #-48] /* 12 words to go. */
|
||||
str tmp1, [dst, #-48]
|
||||
ldr tmp1, [src, #-44]
|
||||
str tmp1, [dst, #-44]
|
||||
|
||||
ldr tmp1, [src, #-40] /* 10 words to go. */
|
||||
str tmp1, [dst, #-40]
|
||||
ldr tmp1, [src, #-36]
|
||||
str tmp1, [dst, #-36]
|
||||
|
||||
ldr tmp1, [src, #-32] /* 8 words to go. */
|
||||
str tmp1, [dst, #-32]
|
||||
ldr tmp1, [src, #-28]
|
||||
str tmp1, [dst, #-28]
|
||||
|
||||
ldr tmp1, [src, #-24] /* 6 words to go. */
|
||||
str tmp1, [dst, #-24]
|
||||
ldr tmp1, [src, #-20]
|
||||
str tmp1, [dst, #-20]
|
||||
|
||||
ldr tmp1, [src, #-16] /* 4 words to go. */
|
||||
str tmp1, [dst, #-16]
|
||||
ldr tmp1, [src, #-12]
|
||||
str tmp1, [dst, #-12]
|
||||
|
||||
ldr tmp1, [src, #-8] /* 2 words to go. */
|
||||
str tmp1, [dst, #-8]
|
||||
ldr tmp1, [src, #-4]
|
||||
str tmp1, [dst, #-4]
|
||||
#endif
|
||||
|
||||
lsls count, count, #31
|
||||
ldrhcs tmp1, [src], #2
|
||||
ldrbne src, [src] /* Src is dead, use as a scratch. */
|
||||
strhcs tmp1, [dst], #2
|
||||
strbne src, [dst]
|
||||
bx lr
|
||||
|
||||
.Lcpy_not_short:
|
||||
/* At least 64 bytes to copy, but don't know the alignment yet. */
|
||||
str tmp2, [sp, #-FRAME_SIZE]!
|
||||
and tmp2, src, #7
|
||||
and tmp1, dst, #7
|
||||
cmp tmp1, tmp2
|
||||
bne .Lcpy_notaligned
|
||||
|
||||
#ifdef USE_VFP
|
||||
/* Magic dust alert! Force VFP on Cortex-A9. Experiments show
|
||||
that the FP pipeline is much better at streaming loads and
|
||||
stores. This is outside the critical loop. */
|
||||
vmov.f32 s0, s0
|
||||
#endif
|
||||
|
||||
/* SRC and DST have the same mutual 64-bit alignment, but we may
|
||||
still need to pre-copy some bytes to get to natural alignment.
|
||||
We bring SRC and DST into full 64-bit alignment. */
|
||||
lsls tmp2, dst, #29
|
||||
beq 1f
|
||||
rsbs tmp2, tmp2, #0
|
||||
sub count, count, tmp2, lsr #29
|
||||
ldrmi tmp1, [src], #4
|
||||
strmi tmp1, [dst], #4
|
||||
lsls tmp2, tmp2, #2
|
||||
ldrhcs tmp1, [src], #2
|
||||
ldrbne tmp2, [src], #1
|
||||
strhcs tmp1, [dst], #2
|
||||
strbne tmp2, [dst], #1
|
||||
|
||||
1:
|
||||
subs tmp2, count, #64 /* Use tmp2 for count. */
|
||||
blt .Ltail63aligned
|
||||
|
||||
cmp tmp2, #512
|
||||
bge .Lcpy_body_long
|
||||
|
||||
.Lcpy_body_medium: /* Count in tmp2. */
|
||||
#ifdef USE_VFP
|
||||
1:
|
||||
vldr d0, [src, #0]
|
||||
subs tmp2, tmp2, #64
|
||||
vldr d1, [src, #8]
|
||||
vstr d0, [dst, #0]
|
||||
vldr d0, [src, #16]
|
||||
vstr d1, [dst, #8]
|
||||
vldr d1, [src, #24]
|
||||
vstr d0, [dst, #16]
|
||||
vldr d0, [src, #32]
|
||||
vstr d1, [dst, #24]
|
||||
vldr d1, [src, #40]
|
||||
vstr d0, [dst, #32]
|
||||
vldr d0, [src, #48]
|
||||
vstr d1, [dst, #40]
|
||||
vldr d1, [src, #56]
|
||||
vstr d0, [dst, #48]
|
||||
add src, src, #64
|
||||
vstr d1, [dst, #56]
|
||||
add dst, dst, #64
|
||||
bge 1b
|
||||
tst tmp2, #0x3f
|
||||
beq .Ldone
|
||||
|
||||
.Ltail63aligned: /* Count in tmp2. */
|
||||
and tmp1, tmp2, #0x38
|
||||
add dst, dst, tmp1
|
||||
add src, src, tmp1
|
||||
rsb tmp1, tmp1, #(56 - PC_OFFSET + INSN_SIZE)
|
||||
add pc, pc, tmp1
|
||||
|
||||
vldr d0, [src, #-56] /* 14 words to go. */
|
||||
vstr d0, [dst, #-56]
|
||||
vldr d0, [src, #-48] /* 12 words to go. */
|
||||
vstr d0, [dst, #-48]
|
||||
vldr d0, [src, #-40] /* 10 words to go. */
|
||||
vstr d0, [dst, #-40]
|
||||
vldr d0, [src, #-32] /* 8 words to go. */
|
||||
vstr d0, [dst, #-32]
|
||||
vldr d0, [src, #-24] /* 6 words to go. */
|
||||
vstr d0, [dst, #-24]
|
||||
vldr d0, [src, #-16] /* 4 words to go. */
|
||||
vstr d0, [dst, #-16]
|
||||
vldr d0, [src, #-8] /* 2 words to go. */
|
||||
vstr d0, [dst, #-8]
|
||||
#else
|
||||
sub src, src, #8
|
||||
sub dst, dst, #8
|
||||
1:
|
||||
ldrd A_l, A_h, [src, #8]
|
||||
strd A_l, A_h, [dst, #8]
|
||||
ldrd A_l, A_h, [src, #16]
|
||||
strd A_l, A_h, [dst, #16]
|
||||
ldrd A_l, A_h, [src, #24]
|
||||
strd A_l, A_h, [dst, #24]
|
||||
ldrd A_l, A_h, [src, #32]
|
||||
strd A_l, A_h, [dst, #32]
|
||||
ldrd A_l, A_h, [src, #40]
|
||||
strd A_l, A_h, [dst, #40]
|
||||
ldrd A_l, A_h, [src, #48]
|
||||
strd A_l, A_h, [dst, #48]
|
||||
ldrd A_l, A_h, [src, #56]
|
||||
strd A_l, A_h, [dst, #56]
|
||||
ldrd A_l, A_h, [src, #64]!
|
||||
strd A_l, A_h, [dst, #64]!
|
||||
subs tmp2, tmp2, #64
|
||||
bge 1b
|
||||
tst tmp2, #0x3f
|
||||
bne 1f
|
||||
ldr tmp2,[sp], #FRAME_SIZE
|
||||
bx lr
|
||||
1:
|
||||
add src, src, #8
|
||||
add dst, dst, #8
|
||||
|
||||
.Ltail63aligned: /* Count in tmp2. */
|
||||
/* Copy up to 7 d-words of data. Similar to Ltail63unaligned, but
|
||||
we know that the src and dest are 64-bit aligned so we can use
|
||||
LDRD/STRD to improve efficiency. */
|
||||
/* TMP2 is now negative, but we don't care about that. The bottom
|
||||
six bits still tell us how many bytes are left to copy. */
|
||||
|
||||
and tmp1, tmp2, #0x38
|
||||
add dst, dst, tmp1
|
||||
add src, src, tmp1
|
||||
rsb tmp1, tmp1, #(56 - PC_OFFSET + INSN_SIZE)
|
||||
add pc, pc, tmp1
|
||||
ldrd A_l, A_h, [src, #-56] /* 14 words to go. */
|
||||
strd A_l, A_h, [dst, #-56]
|
||||
ldrd A_l, A_h, [src, #-48] /* 12 words to go. */
|
||||
strd A_l, A_h, [dst, #-48]
|
||||
ldrd A_l, A_h, [src, #-40] /* 10 words to go. */
|
||||
strd A_l, A_h, [dst, #-40]
|
||||
ldrd A_l, A_h, [src, #-32] /* 8 words to go. */
|
||||
strd A_l, A_h, [dst, #-32]
|
||||
ldrd A_l, A_h, [src, #-24] /* 6 words to go. */
|
||||
strd A_l, A_h, [dst, #-24]
|
||||
ldrd A_l, A_h, [src, #-16] /* 4 words to go. */
|
||||
strd A_l, A_h, [dst, #-16]
|
||||
ldrd A_l, A_h, [src, #-8] /* 2 words to go. */
|
||||
strd A_l, A_h, [dst, #-8]
|
||||
|
||||
#endif
|
||||
tst tmp2, #4
|
||||
ldrne tmp1, [src], #4
|
||||
strne tmp1, [dst], #4
|
||||
lsls tmp2, tmp2, #31 /* Count (tmp2) now dead. */
|
||||
ldrhcs tmp1, [src], #2
|
||||
ldrbne tmp2, [src]
|
||||
strhcs tmp1, [dst], #2
|
||||
strbne tmp2, [dst]
|
||||
|
||||
.Ldone:
|
||||
ldr tmp2, [sp], #FRAME_SIZE
|
||||
bx lr
|
||||
|
||||
.Lcpy_body_long: /* Count in tmp2. */
|
||||
|
||||
/* Long copy. We know that there's at least (prefetch_lines * 64)
|
||||
bytes to go. */
|
||||
#ifdef USE_VFP
|
||||
/* Don't use PLD. Instead, read some data in advance of the current
|
||||
copy position into a register. This should act like a PLD
|
||||
operation but we won't have to repeat the transfer. */
|
||||
|
||||
vldr d3, [src, #0]
|
||||
vldr d4, [src, #64]
|
||||
vldr d5, [src, #128]
|
||||
vldr d6, [src, #192]
|
||||
vldr d7, [src, #256]
|
||||
|
||||
vldr d0, [src, #8]
|
||||
vldr d1, [src, #16]
|
||||
vldr d2, [src, #24]
|
||||
add src, src, #32
|
||||
|
||||
subs tmp2, tmp2, #prefetch_lines * 64 * 2
|
||||
blt 2f
|
||||
1:
|
||||
cpy_line_vfp d3, 0
|
||||
cpy_line_vfp d4, 64
|
||||
cpy_line_vfp d5, 128
|
||||
add dst, dst, #3 * 64
|
||||
add src, src, #3 * 64
|
||||
cpy_line_vfp d6, 0
|
||||
cpy_line_vfp d7, 64
|
||||
add dst, dst, #2 * 64
|
||||
add src, src, #2 * 64
|
||||
subs tmp2, tmp2, #prefetch_lines * 64
|
||||
bge 1b
|
||||
|
||||
2:
|
||||
cpy_tail_vfp d3, 0
|
||||
cpy_tail_vfp d4, 64
|
||||
cpy_tail_vfp d5, 128
|
||||
add src, src, #3 * 64
|
||||
add dst, dst, #3 * 64
|
||||
cpy_tail_vfp d6, 0
|
||||
vstr d7, [dst, #64]
|
||||
vldr d7, [src, #64]
|
||||
vstr d0, [dst, #64 + 8]
|
||||
vldr d0, [src, #64 + 8]
|
||||
vstr d1, [dst, #64 + 16]
|
||||
vldr d1, [src, #64 + 16]
|
||||
vstr d2, [dst, #64 + 24]
|
||||
vldr d2, [src, #64 + 24]
|
||||
vstr d7, [dst, #64 + 32]
|
||||
add src, src, #96
|
||||
vstr d0, [dst, #64 + 40]
|
||||
vstr d1, [dst, #64 + 48]
|
||||
vstr d2, [dst, #64 + 56]
|
||||
add dst, dst, #128
|
||||
add tmp2, tmp2, #prefetch_lines * 64
|
||||
b .Lcpy_body_medium
|
||||
#else
|
||||
/* Long copy. Use an SMS style loop to maximize the I/O
|
||||
bandwidth of the core. We don't have enough spare registers
|
||||
to synthesise prefetching, so use PLD operations. */
|
||||
/* Pre-bias src and dst. */
|
||||
sub src, src, #8
|
||||
sub dst, dst, #8
|
||||
pld [src, #8]
|
||||
pld [src, #72]
|
||||
subs tmp2, tmp2, #64
|
||||
pld [src, #136]
|
||||
ldrd A_l, A_h, [src, #8]
|
||||
strd B_l, B_h, [sp, #8]
|
||||
ldrd B_l, B_h, [src, #16]
|
||||
strd C_l, C_h, [sp, #16]
|
||||
ldrd C_l, C_h, [src, #24]
|
||||
strd D_l, D_h, [sp, #24]
|
||||
pld [src, #200]
|
||||
ldrd D_l, D_h, [src, #32]!
|
||||
b 1f
|
||||
.p2align 6
|
||||
2:
|
||||
pld [src, #232]
|
||||
strd A_l, A_h, [dst, #40]
|
||||
ldrd A_l, A_h, [src, #40]
|
||||
strd B_l, B_h, [dst, #48]
|
||||
ldrd B_l, B_h, [src, #48]
|
||||
strd C_l, C_h, [dst, #56]
|
||||
ldrd C_l, C_h, [src, #56]
|
||||
strd D_l, D_h, [dst, #64]!
|
||||
ldrd D_l, D_h, [src, #64]!
|
||||
subs tmp2, tmp2, #64
|
||||
1:
|
||||
strd A_l, A_h, [dst, #8]
|
||||
ldrd A_l, A_h, [src, #8]
|
||||
strd B_l, B_h, [dst, #16]
|
||||
ldrd B_l, B_h, [src, #16]
|
||||
strd C_l, C_h, [dst, #24]
|
||||
ldrd C_l, C_h, [src, #24]
|
||||
strd D_l, D_h, [dst, #32]
|
||||
ldrd D_l, D_h, [src, #32]
|
||||
bcs 2b
|
||||
/* Save the remaining bytes and restore the callee-saved regs. */
|
||||
strd A_l, A_h, [dst, #40]
|
||||
add src, src, #40
|
||||
strd B_l, B_h, [dst, #48]
|
||||
ldrd B_l, B_h, [sp, #8]
|
||||
strd C_l, C_h, [dst, #56]
|
||||
ldrd C_l, C_h, [sp, #16]
|
||||
strd D_l, D_h, [dst, #64]
|
||||
ldrd D_l, D_h, [sp, #24]
|
||||
add dst, dst, #72
|
||||
tst tmp2, #0x3f
|
||||
bne .Ltail63aligned
|
||||
ldr tmp2, [sp], #FRAME_SIZE
|
||||
bx lr
|
||||
#endif
|
||||
|
||||
.Lcpy_notaligned:
|
||||
pld [src]
|
||||
pld [src, #64]
|
||||
/* There's at least 64 bytes to copy, but there is no mutual
|
||||
alignment. */
|
||||
/* Bring DST to 64-bit alignment. */
|
||||
lsls tmp2, dst, #29
|
||||
pld [src, #(2 * 64)]
|
||||
beq 1f
|
||||
rsbs tmp2, tmp2, #0
|
||||
sub count, count, tmp2, lsr #29
|
||||
ldrmi tmp1, [src], #4
|
||||
strmi tmp1, [dst], #4
|
||||
lsls tmp2, tmp2, #2
|
||||
ldrbne tmp1, [src], #1
|
||||
ldrhcs tmp2, [src], #2
|
||||
strbne tmp1, [dst], #1
|
||||
strhcs tmp2, [dst], #2
|
||||
1:
|
||||
pld [src, #(3 * 64)]
|
||||
subs count, count, #64
|
||||
ldrmi tmp2, [sp], #FRAME_SIZE
|
||||
bmi .Ltail63unaligned
|
||||
pld [src, #(4 * 64)]
|
||||
|
||||
#ifdef USE_NEON
|
||||
vld1.8 {d0-d3}, [src]!
|
||||
vld1.8 {d4-d7}, [src]!
|
||||
subs count, count, #64
|
||||
bmi 2f
|
||||
1:
|
||||
pld [src, #(4 * 64)]
|
||||
vst1.8 {d0-d3}, [ALIGN (dst, 64)]!
|
||||
vld1.8 {d0-d3}, [src]!
|
||||
vst1.8 {d4-d7}, [ALIGN (dst, 64)]!
|
||||
vld1.8 {d4-d7}, [src]!
|
||||
subs count, count, #64
|
||||
bpl 1b
|
||||
2:
|
||||
vst1.8 {d0-d3}, [ALIGN (dst, 64)]!
|
||||
vst1.8 {d4-d7}, [ALIGN (dst, 64)]!
|
||||
ands count, count, #0x3f
|
||||
#else
|
||||
/* Use an SMS style loop to maximize the I/O bandwidth. */
|
||||
sub src, src, #4
|
||||
sub dst, dst, #8
|
||||
subs tmp2, count, #64 /* Use tmp2 for count. */
|
||||
ldr A_l, [src, #4]
|
||||
ldr A_h, [src, #8]
|
||||
strd B_l, B_h, [sp, #8]
|
||||
ldr B_l, [src, #12]
|
||||
ldr B_h, [src, #16]
|
||||
strd C_l, C_h, [sp, #16]
|
||||
ldr C_l, [src, #20]
|
||||
ldr C_h, [src, #24]
|
||||
strd D_l, D_h, [sp, #24]
|
||||
ldr D_l, [src, #28]
|
||||
ldr D_h, [src, #32]!
|
||||
b 1f
|
||||
.p2align 6
|
||||
2:
|
||||
pld [src, #(5 * 64) - (32 - 4)]
|
||||
strd A_l, A_h, [dst, #40]
|
||||
ldr A_l, [src, #36]
|
||||
ldr A_h, [src, #40]
|
||||
strd B_l, B_h, [dst, #48]
|
||||
ldr B_l, [src, #44]
|
||||
ldr B_h, [src, #48]
|
||||
strd C_l, C_h, [dst, #56]
|
||||
ldr C_l, [src, #52]
|
||||
ldr C_h, [src, #56]
|
||||
strd D_l, D_h, [dst, #64]!
|
||||
ldr D_l, [src, #60]
|
||||
ldr D_h, [src, #64]!
|
||||
subs tmp2, tmp2, #64
|
||||
1:
|
||||
strd A_l, A_h, [dst, #8]
|
||||
ldr A_l, [src, #4]
|
||||
ldr A_h, [src, #8]
|
||||
strd B_l, B_h, [dst, #16]
|
||||
ldr B_l, [src, #12]
|
||||
ldr B_h, [src, #16]
|
||||
strd C_l, C_h, [dst, #24]
|
||||
ldr C_l, [src, #20]
|
||||
ldr C_h, [src, #24]
|
||||
strd D_l, D_h, [dst, #32]
|
||||
ldr D_l, [src, #28]
|
||||
ldr D_h, [src, #32]
|
||||
bcs 2b
|
||||
|
||||
/* Save the remaining bytes and restore the callee-saved regs. */
|
||||
strd A_l, A_h, [dst, #40]
|
||||
add src, src, #36
|
||||
strd B_l, B_h, [dst, #48]
|
||||
ldrd B_l, B_h, [sp, #8]
|
||||
strd C_l, C_h, [dst, #56]
|
||||
ldrd C_l, C_h, [sp, #16]
|
||||
strd D_l, D_h, [dst, #64]
|
||||
ldrd D_l, D_h, [sp, #24]
|
||||
add dst, dst, #72
|
||||
ands count, tmp2, #0x3f
|
||||
#endif
|
||||
ldr tmp2, [sp], #FRAME_SIZE
|
||||
bne .Ltail63unaligned
|
||||
bx lr
|
||||
|
||||
.size memcpy, . - memcpy
|
||||
@@ -0,0 +1,122 @@
|
||||
/* Copyright (c) 2010-2011, Linaro Limited
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Linaro Limited nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
Written by Dave Gilbert <david.gilbert@linaro.org>
|
||||
|
||||
This memset routine is optimised on a Cortex-A9 and should work on
|
||||
all ARMv7 processors.
|
||||
|
||||
*/
|
||||
|
||||
.syntax unified
|
||||
.arch armv7-a
|
||||
|
||||
@ 2011-08-30 david.gilbert@linaro.org
|
||||
@ Extracted from local git 2f11b436
|
||||
|
||||
@ this lets us check a flag in a 00/ff byte easily in either endianness
|
||||
#ifdef __ARMEB__
|
||||
#define CHARTSTMASK(c) 1<<(31-(c*8))
|
||||
#else
|
||||
#define CHARTSTMASK(c) 1<<(c*8)
|
||||
#endif
|
||||
.text
|
||||
.thumb
|
||||
|
||||
@ ---------------------------------------------------------------------------
|
||||
.thumb_func
|
||||
.align 2
|
||||
.p2align 4,,15
|
||||
.global memset
|
||||
.type memset,%function
|
||||
memset:
|
||||
@ r0 = address
|
||||
@ r1 = character
|
||||
@ r2 = count
|
||||
@ returns original address in r0
|
||||
|
||||
mov r3, r0 @ Leave r0 alone
|
||||
cbz r2, 10f @ Exit if 0 length
|
||||
|
||||
tst r0, #7
|
||||
beq 2f @ Already aligned
|
||||
|
||||
@ Ok, so we're misaligned here
|
||||
1:
|
||||
strb r1, [r3], #1
|
||||
subs r2,r2,#1
|
||||
tst r3, #7
|
||||
cbz r2, 10f @ Exit if we hit the end
|
||||
bne 1b @ go round again if still misaligned
|
||||
|
||||
2:
|
||||
@ OK, so we're aligned
|
||||
push {r4,r5,r6,r7}
|
||||
bics r4, r2, #15 @ if less than 16 bytes then need to finish it off
|
||||
beq 5f
|
||||
|
||||
3:
|
||||
@ POSIX says that ch is cast to an unsigned char. A uxtb is one
|
||||
@ byte and takes two cycles, where an AND is four bytes but one
|
||||
@ cycle.
|
||||
and r1, #0xFF
|
||||
orr r1, r1, r1, lsl#8 @ Same character into all bytes
|
||||
orr r1, r1, r1, lsl#16
|
||||
mov r5,r1
|
||||
mov r6,r1
|
||||
mov r7,r1
|
||||
|
||||
4:
|
||||
subs r4,r4,#16
|
||||
stmia r3!,{r1,r5,r6,r7}
|
||||
bne 4b
|
||||
and r2,r2,#15
|
||||
|
||||
@ At this point we're still aligned and we have upto align-1 bytes left to right
|
||||
@ we can avoid some of the byte-at-a time now by testing for some big chunks
|
||||
tst r2,#8
|
||||
itt ne
|
||||
subne r2,r2,#8
|
||||
stmiane r3!,{r1,r5}
|
||||
|
||||
5:
|
||||
pop {r4,r5,r6,r7}
|
||||
cbz r2, 10f
|
||||
|
||||
@ Got to do any last < alignment bytes
|
||||
6:
|
||||
subs r2,r2,#1
|
||||
strb r1,[r3],#1
|
||||
bne 6b
|
||||
|
||||
10:
|
||||
bx lr @ goodbye
|
||||
@@ -0,0 +1,80 @@
|
||||
/* Copyright (c) 2010-2011, Linaro Limited
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Linaro Limited nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
Written by Dave Gilbert <david.gilbert@linaro.org>
|
||||
|
||||
A very simple strchr routine, from benchmarks on A9 it's a bit faster than
|
||||
the current version in eglibc (2.12.1-0ubuntu14 package)
|
||||
I don't think doing a word at a time version is worth it since a lot
|
||||
of strchr cases are very short anyway.
|
||||
|
||||
*/
|
||||
|
||||
@ 2011-02-07 david.gilbert@linaro.org
|
||||
@ Extracted from local git a5b438d861
|
||||
|
||||
.syntax unified
|
||||
.arch armv7-a
|
||||
|
||||
.text
|
||||
.thumb
|
||||
|
||||
@ ---------------------------------------------------------------------------
|
||||
|
||||
.thumb_func
|
||||
.align 2
|
||||
.p2align 4,,15
|
||||
.global strchr
|
||||
.type strchr,%function
|
||||
strchr:
|
||||
@ r0 = start of string
|
||||
@ r1 = character to match
|
||||
@ returns NULL for no match, or a pointer to the match
|
||||
and r1,r1, #255
|
||||
|
||||
1:
|
||||
ldrb r2,[r0],#1
|
||||
cmp r2,r1
|
||||
cbz r2,10f
|
||||
bne 1b
|
||||
|
||||
@ We're here if it matched
|
||||
5:
|
||||
subs r0,r0,#1
|
||||
bx lr
|
||||
|
||||
10:
|
||||
@ We're here if we ran off the end
|
||||
cmp r1, #0 @ Corner case - you're allowed to search for the nil and get a pointer to it
|
||||
beq 5b @ A bit messy, if it's common we should branch at the start to a special loop
|
||||
mov r0,#0
|
||||
bx lr
|
||||
@@ -0,0 +1,500 @@
|
||||
/*
|
||||
* Copyright (c) 2012-2014 ARM Ltd
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the company may not be used to endorse or promote
|
||||
* products derived from this software without specific prior written
|
||||
* permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* Implementation of strcmp for ARMv7 when DSP instructions are
|
||||
available. Use ldrd to support wider loads, provided the data
|
||||
is sufficiently aligned. Use saturating arithmetic to optimize
|
||||
the compares. */
|
||||
|
||||
/* Build Options:
|
||||
STRCMP_NO_PRECHECK: Don't run a quick pre-check of the first
|
||||
byte in the string. If comparing completely random strings
|
||||
the pre-check will save time, since there is a very high
|
||||
probability of a mismatch in the first character: we save
|
||||
significant overhead if this is the common case. However,
|
||||
if strings are likely to be identical (eg because we're
|
||||
verifying a hit in a hash table), then this check is largely
|
||||
redundant. */
|
||||
|
||||
#define STRCMP_NO_PRECHECK 0
|
||||
|
||||
/* This version uses Thumb-2 code. */
|
||||
.thumb
|
||||
.syntax unified
|
||||
|
||||
#ifdef __ARM_BIG_ENDIAN
|
||||
#define S2LO lsl
|
||||
#define S2LOEQ lsleq
|
||||
#define S2HI lsr
|
||||
#define MSB 0x000000ff
|
||||
#define LSB 0xff000000
|
||||
#define BYTE0_OFFSET 24
|
||||
#define BYTE1_OFFSET 16
|
||||
#define BYTE2_OFFSET 8
|
||||
#define BYTE3_OFFSET 0
|
||||
#else /* not __ARM_BIG_ENDIAN */
|
||||
#define S2LO lsr
|
||||
#define S2LOEQ lsreq
|
||||
#define S2HI lsl
|
||||
#define BYTE0_OFFSET 0
|
||||
#define BYTE1_OFFSET 8
|
||||
#define BYTE2_OFFSET 16
|
||||
#define BYTE3_OFFSET 24
|
||||
#define MSB 0xff000000
|
||||
#define LSB 0x000000ff
|
||||
#endif /* not __ARM_BIG_ENDIAN */
|
||||
|
||||
.macro def_fn f p2align=0
|
||||
.text
|
||||
.p2align \p2align
|
||||
.global \f
|
||||
.type \f, %function
|
||||
\f:
|
||||
.endm
|
||||
|
||||
/* Parameters and result. */
|
||||
#define src1 r0
|
||||
#define src2 r1
|
||||
#define result r0 /* Overlaps src1. */
|
||||
|
||||
/* Internal variables. */
|
||||
#define tmp1 r4
|
||||
#define tmp2 r5
|
||||
#define const_m1 r12
|
||||
|
||||
/* Additional internal variables for 64-bit aligned data. */
|
||||
#define data1a r2
|
||||
#define data1b r3
|
||||
#define data2a r6
|
||||
#define data2b r7
|
||||
#define syndrome_a tmp1
|
||||
#define syndrome_b tmp2
|
||||
|
||||
/* Additional internal variables for 32-bit aligned data. */
|
||||
#define data1 r2
|
||||
#define data2 r3
|
||||
#define syndrome tmp2
|
||||
|
||||
|
||||
/* Macro to compute and return the result value for word-aligned
|
||||
cases. */
|
||||
.macro strcmp_epilogue_aligned synd d1 d2 restore_r6
|
||||
#ifdef __ARM_BIG_ENDIAN
|
||||
/* If data1 contains a zero byte, then syndrome will contain a 1 in
|
||||
bit 7 of that byte. Otherwise, the highest set bit in the
|
||||
syndrome will highlight the first different bit. It is therefore
|
||||
sufficient to extract the eight bits starting with the syndrome
|
||||
bit. */
|
||||
clz tmp1, \synd
|
||||
lsl r1, \d2, tmp1
|
||||
.if \restore_r6
|
||||
ldrd r6, r7, [sp, #8]
|
||||
.endif
|
||||
.cfi_restore 6
|
||||
.cfi_restore 7
|
||||
lsl \d1, \d1, tmp1
|
||||
.cfi_remember_state
|
||||
lsr result, \d1, #24
|
||||
ldrd r4, r5, [sp], #16
|
||||
.cfi_restore 4
|
||||
.cfi_restore 5
|
||||
sub result, result, r1, lsr #24
|
||||
bx lr
|
||||
#else
|
||||
/* To use the big-endian trick we'd have to reverse all three words.
|
||||
that's slower than this approach. */
|
||||
rev \synd, \synd
|
||||
clz tmp1, \synd
|
||||
bic tmp1, tmp1, #7
|
||||
lsr r1, \d2, tmp1
|
||||
.cfi_remember_state
|
||||
.if \restore_r6
|
||||
ldrd r6, r7, [sp, #8]
|
||||
.endif
|
||||
.cfi_restore 6
|
||||
.cfi_restore 7
|
||||
lsr \d1, \d1, tmp1
|
||||
and result, \d1, #255
|
||||
and r1, r1, #255
|
||||
ldrd r4, r5, [sp], #16
|
||||
.cfi_restore 4
|
||||
.cfi_restore 5
|
||||
sub result, result, r1
|
||||
|
||||
bx lr
|
||||
#endif
|
||||
.endm
|
||||
|
||||
.text
|
||||
.p2align 5
|
||||
.Lstrcmp_start_addr:
|
||||
#if STRCMP_NO_PRECHECK == 0
|
||||
.Lfastpath_exit:
|
||||
sub r0, r2, r3
|
||||
bx lr
|
||||
nop
|
||||
#endif
|
||||
def_fn strcmp
|
||||
#if STRCMP_NO_PRECHECK == 0
|
||||
ldrb r2, [src1]
|
||||
ldrb r3, [src2]
|
||||
cmp r2, #1
|
||||
it cs
|
||||
cmpcs r2, r3
|
||||
bne .Lfastpath_exit
|
||||
#endif
|
||||
.cfi_startproc
|
||||
strd r4, r5, [sp, #-16]!
|
||||
.cfi_def_cfa_offset 16
|
||||
.cfi_offset 4, -16
|
||||
.cfi_offset 5, -12
|
||||
orr tmp1, src1, src2
|
||||
strd r6, r7, [sp, #8]
|
||||
.cfi_offset 6, -8
|
||||
.cfi_offset 7, -4
|
||||
mvn const_m1, #0
|
||||
lsl r2, tmp1, #29
|
||||
cbz r2, .Lloop_aligned8
|
||||
|
||||
.Lnot_aligned:
|
||||
eor tmp1, src1, src2
|
||||
tst tmp1, #7
|
||||
bne .Lmisaligned8
|
||||
|
||||
/* Deal with mutual misalignment by aligning downwards and then
|
||||
masking off the unwanted loaded data to prevent a difference. */
|
||||
and tmp1, src1, #7
|
||||
bic src1, src1, #7
|
||||
and tmp2, tmp1, #3
|
||||
bic src2, src2, #7
|
||||
lsl tmp2, tmp2, #3 /* Bytes -> bits. */
|
||||
ldrd data1a, data1b, [src1], #16
|
||||
tst tmp1, #4
|
||||
ldrd data2a, data2b, [src2], #16
|
||||
/* In thumb code we can't use MVN with a register shift, but
|
||||
we do have ORN. */
|
||||
S2HI tmp1, const_m1, tmp2
|
||||
orn data1a, data1a, tmp1
|
||||
orn data2a, data2a, tmp1
|
||||
beq .Lstart_realigned8
|
||||
orn data1b, data1b, tmp1
|
||||
mov data1a, const_m1
|
||||
orn data2b, data2b, tmp1
|
||||
mov data2a, const_m1
|
||||
b .Lstart_realigned8
|
||||
|
||||
/* Unwind the inner loop by a factor of 2, giving 16 bytes per
|
||||
pass. */
|
||||
.p2align 5,,12 /* Don't start in the tail bytes of a cache line. */
|
||||
.p2align 2 /* Always word aligned. */
|
||||
.Lloop_aligned8:
|
||||
ldrd data1a, data1b, [src1], #16
|
||||
ldrd data2a, data2b, [src2], #16
|
||||
.Lstart_realigned8:
|
||||
uadd8 syndrome_b, data1a, const_m1 /* Only want GE bits, */
|
||||
eor syndrome_a, data1a, data2a
|
||||
sel syndrome_a, syndrome_a, const_m1
|
||||
cbnz syndrome_a, .Ldiff_in_a
|
||||
uadd8 syndrome_b, data1b, const_m1 /* Only want GE bits. */
|
||||
eor syndrome_b, data1b, data2b
|
||||
sel syndrome_b, syndrome_b, const_m1
|
||||
cbnz syndrome_b, .Ldiff_in_b
|
||||
|
||||
ldrd data1a, data1b, [src1, #-8]
|
||||
ldrd data2a, data2b, [src2, #-8]
|
||||
uadd8 syndrome_b, data1a, const_m1 /* Only want GE bits, */
|
||||
eor syndrome_a, data1a, data2a
|
||||
sel syndrome_a, syndrome_a, const_m1
|
||||
uadd8 syndrome_b, data1b, const_m1 /* Only want GE bits. */
|
||||
eor syndrome_b, data1b, data2b
|
||||
sel syndrome_b, syndrome_b, const_m1
|
||||
/* Can't use CBZ for backwards branch. */
|
||||
orrs syndrome_b, syndrome_b, syndrome_a /* Only need if s_a == 0 */
|
||||
beq .Lloop_aligned8
|
||||
|
||||
.Ldiff_found:
|
||||
cbnz syndrome_a, .Ldiff_in_a
|
||||
|
||||
.Ldiff_in_b:
|
||||
strcmp_epilogue_aligned syndrome_b, data1b, data2b 1
|
||||
|
||||
.Ldiff_in_a:
|
||||
.cfi_restore_state
|
||||
strcmp_epilogue_aligned syndrome_a, data1a, data2a 1
|
||||
|
||||
.cfi_restore_state
|
||||
.Lmisaligned8:
|
||||
tst tmp1, #3
|
||||
bne .Lmisaligned4
|
||||
ands tmp1, src1, #3
|
||||
bne .Lmutual_align4
|
||||
|
||||
/* Unrolled by a factor of 2, to reduce the number of post-increment
|
||||
operations. */
|
||||
.Lloop_aligned4:
|
||||
ldr data1, [src1], #8
|
||||
ldr data2, [src2], #8
|
||||
.Lstart_realigned4:
|
||||
uadd8 syndrome, data1, const_m1 /* Only need GE bits. */
|
||||
eor syndrome, data1, data2
|
||||
sel syndrome, syndrome, const_m1
|
||||
cbnz syndrome, .Laligned4_done
|
||||
ldr data1, [src1, #-4]
|
||||
ldr data2, [src2, #-4]
|
||||
uadd8 syndrome, data1, const_m1
|
||||
eor syndrome, data1, data2
|
||||
sel syndrome, syndrome, const_m1
|
||||
cmp syndrome, #0
|
||||
beq .Lloop_aligned4
|
||||
|
||||
.Laligned4_done:
|
||||
strcmp_epilogue_aligned syndrome, data1, data2, 0
|
||||
|
||||
.Lmutual_align4:
|
||||
.cfi_restore_state
|
||||
/* Deal with mutual misalignment by aligning downwards and then
|
||||
masking off the unwanted loaded data to prevent a difference. */
|
||||
lsl tmp1, tmp1, #3 /* Bytes -> bits. */
|
||||
bic src1, src1, #3
|
||||
ldr data1, [src1], #8
|
||||
bic src2, src2, #3
|
||||
ldr data2, [src2], #8
|
||||
|
||||
/* In thumb code we can't use MVN with a register shift, but
|
||||
we do have ORN. */
|
||||
S2HI tmp1, const_m1, tmp1
|
||||
orn data1, data1, tmp1
|
||||
orn data2, data2, tmp1
|
||||
b .Lstart_realigned4
|
||||
|
||||
.Lmisaligned4:
|
||||
ands tmp1, src1, #3
|
||||
beq .Lsrc1_aligned
|
||||
sub src2, src2, tmp1
|
||||
bic src1, src1, #3
|
||||
lsls tmp1, tmp1, #31
|
||||
ldr data1, [src1], #4
|
||||
beq .Laligned_m2
|
||||
bcs .Laligned_m1
|
||||
|
||||
#if STRCMP_NO_PRECHECK == 1
|
||||
ldrb data2, [src2, #1]
|
||||
uxtb tmp1, data1, ror #BYTE1_OFFSET
|
||||
subs tmp1, tmp1, data2
|
||||
bne .Lmisaligned_exit
|
||||
cbz data2, .Lmisaligned_exit
|
||||
|
||||
.Laligned_m2:
|
||||
ldrb data2, [src2, #2]
|
||||
uxtb tmp1, data1, ror #BYTE2_OFFSET
|
||||
subs tmp1, tmp1, data2
|
||||
bne .Lmisaligned_exit
|
||||
cbz data2, .Lmisaligned_exit
|
||||
|
||||
.Laligned_m1:
|
||||
ldrb data2, [src2, #3]
|
||||
uxtb tmp1, data1, ror #BYTE3_OFFSET
|
||||
subs tmp1, tmp1, data2
|
||||
bne .Lmisaligned_exit
|
||||
add src2, src2, #4
|
||||
cbnz data2, .Lsrc1_aligned
|
||||
#else /* STRCMP_NO_PRECHECK */
|
||||
/* If we've done the pre-check, then we don't need to check the
|
||||
first byte again here. */
|
||||
ldrb data2, [src2, #2]
|
||||
uxtb tmp1, data1, ror #BYTE2_OFFSET
|
||||
subs tmp1, tmp1, data2
|
||||
bne .Lmisaligned_exit
|
||||
cbz data2, .Lmisaligned_exit
|
||||
|
||||
.Laligned_m2:
|
||||
ldrb data2, [src2, #3]
|
||||
uxtb tmp1, data1, ror #BYTE3_OFFSET
|
||||
subs tmp1, tmp1, data2
|
||||
bne .Lmisaligned_exit
|
||||
cbnz data2, .Laligned_m1
|
||||
#endif
|
||||
|
||||
.Lmisaligned_exit:
|
||||
.cfi_remember_state
|
||||
mov result, tmp1
|
||||
ldr r4, [sp], #16
|
||||
.cfi_restore 4
|
||||
bx lr
|
||||
|
||||
#if STRCMP_NO_PRECHECK == 0
|
||||
.Laligned_m1:
|
||||
add src2, src2, #4
|
||||
#endif
|
||||
.Lsrc1_aligned:
|
||||
.cfi_restore_state
|
||||
/* src1 is word aligned, but src2 has no common alignment
|
||||
with it. */
|
||||
ldr data1, [src1], #4
|
||||
lsls tmp1, src2, #31 /* C=src2[1], Z=src2[0]. */
|
||||
|
||||
bic src2, src2, #3
|
||||
ldr data2, [src2], #4
|
||||
bhi .Loverlap1 /* C=1, Z=0 => src2[1:0] = 0b11. */
|
||||
bcs .Loverlap2 /* C=1, Z=1 => src2[1:0] = 0b10. */
|
||||
|
||||
/* (overlap3) C=0, Z=0 => src2[1:0] = 0b01. */
|
||||
.Loverlap3:
|
||||
bic tmp1, data1, #MSB
|
||||
uadd8 syndrome, data1, const_m1
|
||||
eors syndrome, tmp1, data2, S2LO #8
|
||||
sel syndrome, syndrome, const_m1
|
||||
bne 4f
|
||||
cbnz syndrome, 5f
|
||||
ldr data2, [src2], #4
|
||||
eor tmp1, tmp1, data1
|
||||
cmp tmp1, data2, S2HI #24
|
||||
bne 6f
|
||||
ldr data1, [src1], #4
|
||||
b .Loverlap3
|
||||
4:
|
||||
S2LO data2, data2, #8
|
||||
b .Lstrcmp_tail
|
||||
|
||||
5:
|
||||
bics syndrome, syndrome, #MSB
|
||||
bne .Lstrcmp_done_equal
|
||||
|
||||
/* We can only get here if the MSB of data1 contains 0, so
|
||||
fast-path the exit. */
|
||||
ldrb result, [src2]
|
||||
.cfi_remember_state
|
||||
ldrd r4, r5, [sp], #16
|
||||
.cfi_restore 4
|
||||
.cfi_restore 5
|
||||
/* R6/7 Not used in this sequence. */
|
||||
.cfi_restore 6
|
||||
.cfi_restore 7
|
||||
neg result, result
|
||||
bx lr
|
||||
|
||||
6:
|
||||
.cfi_restore_state
|
||||
S2LO data1, data1, #24
|
||||
and data2, data2, #LSB
|
||||
b .Lstrcmp_tail
|
||||
|
||||
.p2align 5,,12 /* Ensure at least 3 instructions in cache line. */
|
||||
.Loverlap2:
|
||||
and tmp1, data1, const_m1, S2LO #16
|
||||
uadd8 syndrome, data1, const_m1
|
||||
eors syndrome, tmp1, data2, S2LO #16
|
||||
sel syndrome, syndrome, const_m1
|
||||
bne 4f
|
||||
cbnz syndrome, 5f
|
||||
ldr data2, [src2], #4
|
||||
eor tmp1, tmp1, data1
|
||||
cmp tmp1, data2, S2HI #16
|
||||
bne 6f
|
||||
ldr data1, [src1], #4
|
||||
b .Loverlap2
|
||||
4:
|
||||
S2LO data2, data2, #16
|
||||
b .Lstrcmp_tail
|
||||
5:
|
||||
ands syndrome, syndrome, const_m1, S2LO #16
|
||||
bne .Lstrcmp_done_equal
|
||||
|
||||
ldrh data2, [src2]
|
||||
S2LO data1, data1, #16
|
||||
#ifdef __ARM_BIG_ENDIAN
|
||||
lsl data2, data2, #16
|
||||
#endif
|
||||
b .Lstrcmp_tail
|
||||
|
||||
6:
|
||||
S2LO data1, data1, #16
|
||||
and data2, data2, const_m1, S2LO #16
|
||||
b .Lstrcmp_tail
|
||||
|
||||
.p2align 5,,12 /* Ensure at least 3 instructions in cache line. */
|
||||
.Loverlap1:
|
||||
and tmp1, data1, #LSB
|
||||
uadd8 syndrome, data1, const_m1
|
||||
eors syndrome, tmp1, data2, S2LO #24
|
||||
sel syndrome, syndrome, const_m1
|
||||
bne 4f
|
||||
cbnz syndrome, 5f
|
||||
ldr data2, [src2], #4
|
||||
eor tmp1, tmp1, data1
|
||||
cmp tmp1, data2, S2HI #8
|
||||
bne 6f
|
||||
ldr data1, [src1], #4
|
||||
b .Loverlap1
|
||||
4:
|
||||
S2LO data2, data2, #24
|
||||
b .Lstrcmp_tail
|
||||
5:
|
||||
tst syndrome, #LSB
|
||||
bne .Lstrcmp_done_equal
|
||||
ldr data2, [src2]
|
||||
6:
|
||||
S2LO data1, data1, #8
|
||||
bic data2, data2, #MSB
|
||||
b .Lstrcmp_tail
|
||||
|
||||
.Lstrcmp_done_equal:
|
||||
mov result, #0
|
||||
.cfi_remember_state
|
||||
ldrd r4, r5, [sp], #16
|
||||
.cfi_restore 4
|
||||
.cfi_restore 5
|
||||
/* R6/7 not used in this sequence. */
|
||||
.cfi_restore 6
|
||||
.cfi_restore 7
|
||||
bx lr
|
||||
|
||||
.Lstrcmp_tail:
|
||||
.cfi_restore_state
|
||||
#ifndef __ARM_BIG_ENDIAN
|
||||
rev data1, data1
|
||||
rev data2, data2
|
||||
/* Now everything looks big-endian... */
|
||||
#endif
|
||||
uadd8 tmp1, data1, const_m1
|
||||
eor tmp1, data1, data2
|
||||
sel syndrome, tmp1, const_m1
|
||||
clz tmp1, syndrome
|
||||
lsl data1, data1, tmp1
|
||||
lsl data2, data2, tmp1
|
||||
lsr result, data1, #24
|
||||
ldrd r4, r5, [sp], #16
|
||||
.cfi_restore 4
|
||||
.cfi_restore 5
|
||||
/* R6/7 not used in this sequence. */
|
||||
.cfi_restore 6
|
||||
.cfi_restore 7
|
||||
sub result, result, data2, lsr #24
|
||||
bx lr
|
||||
.cfi_endproc
|
||||
.size strcmp, . - .Lstrcmp_start_addr
|
||||
@@ -0,0 +1,173 @@
|
||||
/*
|
||||
* Copyright (c) 2008 ARM Ltd
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the company may not be used to endorse or promote
|
||||
* products derived from this software without specific prior written
|
||||
* permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* For GLIBC:
|
||||
#include <string.h>
|
||||
#include <memcopy.h>
|
||||
|
||||
#undef strcmp
|
||||
*/
|
||||
|
||||
#ifdef __thumb2__
|
||||
#define magic1(REG) "#0x01010101"
|
||||
#define magic2(REG) "#0x80808080"
|
||||
#else
|
||||
#define magic1(REG) #REG
|
||||
#define magic2(REG) #REG ", lsl #7"
|
||||
#endif
|
||||
|
||||
char* __attribute__((naked))
|
||||
strcpy (char* dst, const char* src)
|
||||
{
|
||||
asm (
|
||||
#if !(defined(__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || \
|
||||
(defined (__thumb__) && !defined (__thumb2__)))
|
||||
"pld [r1, #0]\n\t"
|
||||
"eor r2, r0, r1\n\t"
|
||||
"mov ip, r0\n\t"
|
||||
"tst r2, #3\n\t"
|
||||
"bne 4f\n\t"
|
||||
"tst r1, #3\n\t"
|
||||
"bne 3f\n"
|
||||
"5:\n\t"
|
||||
#ifndef __thumb2__
|
||||
"str r5, [sp, #-4]!\n\t"
|
||||
"mov r5, #0x01\n\t"
|
||||
"orr r5, r5, r5, lsl #8\n\t"
|
||||
"orr r5, r5, r5, lsl #16\n\t"
|
||||
#endif
|
||||
|
||||
"str r4, [sp, #-4]!\n\t"
|
||||
"tst r1, #4\n\t"
|
||||
"ldr r3, [r1], #4\n\t"
|
||||
"beq 2f\n\t"
|
||||
"sub r2, r3, "magic1(r5)"\n\t"
|
||||
"bics r2, r2, r3\n\t"
|
||||
"tst r2, "magic2(r5)"\n\t"
|
||||
"itt eq\n\t"
|
||||
"streq r3, [ip], #4\n\t"
|
||||
"ldreq r3, [r1], #4\n"
|
||||
"bne 1f\n\t"
|
||||
/* Inner loop. We now know that r1 is 64-bit aligned, so we
|
||||
can safely fetch up to two words. This allows us to avoid
|
||||
load stalls. */
|
||||
".p2align 2\n"
|
||||
"2:\n\t"
|
||||
"pld [r1, #8]\n\t"
|
||||
"ldr r4, [r1], #4\n\t"
|
||||
"sub r2, r3, "magic1(r5)"\n\t"
|
||||
"bics r2, r2, r3\n\t"
|
||||
"tst r2, "magic2(r5)"\n\t"
|
||||
"sub r2, r4, "magic1(r5)"\n\t"
|
||||
"bne 1f\n\t"
|
||||
"str r3, [ip], #4\n\t"
|
||||
"bics r2, r2, r4\n\t"
|
||||
"tst r2, "magic2(r5)"\n\t"
|
||||
"itt eq\n\t"
|
||||
"ldreq r3, [r1], #4\n\t"
|
||||
"streq r4, [ip], #4\n\t"
|
||||
"beq 2b\n\t"
|
||||
"mov r3, r4\n"
|
||||
"1:\n\t"
|
||||
#ifdef __ARMEB__
|
||||
"rors r3, r3, #24\n\t"
|
||||
#endif
|
||||
"strb r3, [ip], #1\n\t"
|
||||
"tst r3, #0xff\n\t"
|
||||
#ifdef __ARMEL__
|
||||
"ror r3, r3, #8\n\t"
|
||||
#endif
|
||||
"bne 1b\n\t"
|
||||
"ldr r4, [sp], #4\n\t"
|
||||
#ifndef __thumb2__
|
||||
"ldr r5, [sp], #4\n\t"
|
||||
#endif
|
||||
"BX LR\n"
|
||||
|
||||
/* Strings have the same offset from word alignment, but it's
|
||||
not zero. */
|
||||
"3:\n\t"
|
||||
"tst r1, #1\n\t"
|
||||
"beq 1f\n\t"
|
||||
"ldrb r2, [r1], #1\n\t"
|
||||
"strb r2, [ip], #1\n\t"
|
||||
"cmp r2, #0\n\t"
|
||||
"it eq\n"
|
||||
"BXEQ LR\n"
|
||||
"1:\n\t"
|
||||
"tst r1, #2\n\t"
|
||||
"beq 5b\n\t"
|
||||
"ldrh r2, [r1], #2\n\t"
|
||||
#ifdef __ARMEB__
|
||||
"tst r2, #0xff00\n\t"
|
||||
"iteet ne\n\t"
|
||||
"strneh r2, [ip], #2\n\t"
|
||||
"lsreq r2, r2, #8\n\t"
|
||||
"streqb r2, [ip]\n\t"
|
||||
"tstne r2, #0xff\n\t"
|
||||
#else
|
||||
"tst r2, #0xff\n\t"
|
||||
"itet ne\n\t"
|
||||
"strneh r2, [ip], #2\n\t"
|
||||
"streqb r2, [ip]\n\t"
|
||||
"tstne r2, #0xff00\n\t"
|
||||
#endif
|
||||
"bne 5b\n\t"
|
||||
"BX LR\n"
|
||||
|
||||
/* src and dst do not have a common word-alignement. Fall back to
|
||||
byte copying. */
|
||||
"4:\n\t"
|
||||
"ldrb r2, [r1], #1\n\t"
|
||||
"strb r2, [ip], #1\n\t"
|
||||
"cmp r2, #0\n\t"
|
||||
"bne 4b\n\t"
|
||||
"BX LR"
|
||||
|
||||
#elif !defined (__thumb__) || defined (__thumb2__)
|
||||
"mov r3, r0\n\t"
|
||||
"1:\n\t"
|
||||
"ldrb r2, [r1], #1\n\t"
|
||||
"strb r2, [r3], #1\n\t"
|
||||
"cmp r2, #0\n\t"
|
||||
"bne 1b\n\t"
|
||||
"BX LR"
|
||||
#else
|
||||
"mov r3, r0\n\t"
|
||||
"1:\n\t"
|
||||
"ldrb r2, [r1]\n\t"
|
||||
"add r1, r1, #1\n\t"
|
||||
"strb r2, [r3]\n\t"
|
||||
"add r3, r3, #1\n\t"
|
||||
"cmp r2, #0\n\t"
|
||||
"bne 1b\n\t"
|
||||
"BX LR"
|
||||
#endif
|
||||
);
|
||||
}
|
||||
/* For GLIBC: libc_hidden_builtin_def (strcpy) */
|
||||
@@ -0,0 +1,150 @@
|
||||
/* Copyright (c) 2010-2011,2013 Linaro Limited
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Linaro Limited nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
Assumes:
|
||||
ARMv6T2, AArch32
|
||||
|
||||
*/
|
||||
|
||||
.macro def_fn f p2align=0
|
||||
.text
|
||||
.p2align \p2align
|
||||
.global \f
|
||||
.type \f, %function
|
||||
\f:
|
||||
.endm
|
||||
|
||||
#ifdef __ARMEB__
|
||||
#define S2LO lsl
|
||||
#define S2HI lsr
|
||||
#else
|
||||
#define S2LO lsr
|
||||
#define S2HI lsl
|
||||
#endif
|
||||
|
||||
/* This code requires Thumb. */
|
||||
.thumb
|
||||
.syntax unified
|
||||
|
||||
/* Parameters and result. */
|
||||
#define srcin r0
|
||||
#define result r0
|
||||
|
||||
/* Internal variables. */
|
||||
#define src r1
|
||||
#define data1a r2
|
||||
#define data1b r3
|
||||
#define const_m1 r12
|
||||
#define const_0 r4
|
||||
#define tmp1 r4 /* Overlaps const_0 */
|
||||
#define tmp2 r5
|
||||
|
||||
def_fn strlen p2align=6
|
||||
pld [srcin, #0]
|
||||
strd r4, r5, [sp, #-8]!
|
||||
bic src, srcin, #7
|
||||
mvn const_m1, #0
|
||||
ands tmp1, srcin, #7 /* (8 - bytes) to alignment. */
|
||||
pld [src, #32]
|
||||
bne.w .Lmisaligned8
|
||||
mov const_0, #0
|
||||
mov result, #-8
|
||||
.Lloop_aligned:
|
||||
/* Bytes 0-7. */
|
||||
ldrd data1a, data1b, [src]
|
||||
pld [src, #64]
|
||||
add result, result, #8
|
||||
.Lstart_realigned:
|
||||
uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
|
||||
sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
|
||||
uadd8 data1b, data1b, const_m1
|
||||
sel data1b, data1a, const_m1 /* Only used if d1a == 0. */
|
||||
cbnz data1b, .Lnull_found
|
||||
|
||||
/* Bytes 8-15. */
|
||||
ldrd data1a, data1b, [src, #8]
|
||||
uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
|
||||
add result, result, #8
|
||||
sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
|
||||
uadd8 data1b, data1b, const_m1
|
||||
sel data1b, data1a, const_m1 /* Only used if d1a == 0. */
|
||||
cbnz data1b, .Lnull_found
|
||||
|
||||
/* Bytes 16-23. */
|
||||
ldrd data1a, data1b, [src, #16]
|
||||
uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
|
||||
add result, result, #8
|
||||
sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
|
||||
uadd8 data1b, data1b, const_m1
|
||||
sel data1b, data1a, const_m1 /* Only used if d1a == 0. */
|
||||
cbnz data1b, .Lnull_found
|
||||
|
||||
/* Bytes 24-31. */
|
||||
ldrd data1a, data1b, [src, #24]
|
||||
add src, src, #32
|
||||
uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
|
||||
add result, result, #8
|
||||
sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
|
||||
uadd8 data1b, data1b, const_m1
|
||||
sel data1b, data1a, const_m1 /* Only used if d1a == 0. */
|
||||
cmp data1b, #0
|
||||
beq .Lloop_aligned
|
||||
|
||||
.Lnull_found:
|
||||
cmp data1a, #0
|
||||
itt eq
|
||||
addeq result, result, #4
|
||||
moveq data1a, data1b
|
||||
#ifndef __ARMEB__
|
||||
rev data1a, data1a
|
||||
#endif
|
||||
clz data1a, data1a
|
||||
ldrd r4, r5, [sp], #8
|
||||
add result, result, data1a, lsr #3 /* Bits -> Bytes. */
|
||||
bx lr
|
||||
|
||||
.Lmisaligned8:
|
||||
ldrd data1a, data1b, [src]
|
||||
and tmp2, tmp1, #3
|
||||
rsb result, tmp1, #0
|
||||
lsl tmp2, tmp2, #3 /* Bytes -> bits. */
|
||||
tst tmp1, #4
|
||||
pld [src, #64]
|
||||
S2HI tmp2, const_m1, tmp2
|
||||
orn data1a, data1a, tmp2
|
||||
itt ne
|
||||
ornne data1b, data1b, tmp2
|
||||
movne data1a, const_m1
|
||||
mov const_0, #0
|
||||
b .Lstart_realigned
|
||||
.size strlen, . - strlen
|
||||
|
||||
@@ -0,0 +1,318 @@
|
||||
/*
|
||||
* Copyright (c) 2014 ARM Ltd
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the company may not be used to endorse or promote
|
||||
* products derived from this software without specific prior written
|
||||
* permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* An executable stack is *not* required for these functions. */
|
||||
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
.previous
|
||||
.eabi_attribute 25, 1
|
||||
|
||||
/* ANSI concatenation macros. */
|
||||
|
||||
#define CONCAT1(a, b) CONCAT2(a, b)
|
||||
#define CONCAT2(a, b) a ## b
|
||||
|
||||
/* Use the right prefix for global labels. */
|
||||
|
||||
#define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
|
||||
|
||||
#define TYPE(x) .type SYM(x),function
|
||||
#define SIZE(x) .size SYM(x), . - SYM(x)
|
||||
#define LSYM(x) .x
|
||||
|
||||
.macro cfi_start start_label, end_label
|
||||
.pushsection .debug_frame
|
||||
LSYM(Lstart_frame):
|
||||
.4byte LSYM(Lend_cie) - LSYM(Lstart_cie)
|
||||
LSYM(Lstart_cie):
|
||||
.4byte 0xffffffff
|
||||
.byte 0x1
|
||||
.ascii "\0"
|
||||
.uleb128 0x1
|
||||
.sleb128 -4
|
||||
.byte 0xe
|
||||
.byte 0xc
|
||||
.uleb128 0xd
|
||||
.uleb128 0x0
|
||||
|
||||
.align 2
|
||||
LSYM(Lend_cie):
|
||||
.4byte LSYM(Lend_fde)-LSYM(Lstart_fde)
|
||||
LSYM(Lstart_fde):
|
||||
.4byte LSYM(Lstart_frame)
|
||||
.4byte \start_label
|
||||
.4byte \end_label-\start_label
|
||||
.popsection
|
||||
.endm
|
||||
|
||||
.macro cfi_end end_label
|
||||
.pushsection .debug_frame
|
||||
.align 2
|
||||
LSYM(Lend_fde):
|
||||
.popsection
|
||||
\end_label:
|
||||
.endm
|
||||
|
||||
.macro THUMB_LDIV0 name signed
|
||||
push {r0, lr}
|
||||
movs r0, #0
|
||||
bl SYM(__aeabi_idiv0)
|
||||
pop {r1, pc}
|
||||
.endm
|
||||
|
||||
.macro FUNC_END name
|
||||
SIZE (__\name)
|
||||
.endm
|
||||
|
||||
.macro DIV_FUNC_END name signed
|
||||
cfi_start __\name, LSYM(Lend_div0)
|
||||
LSYM(Ldiv0):
|
||||
THUMB_LDIV0 \name \signed
|
||||
cfi_end LSYM(Lend_div0)
|
||||
FUNC_END \name
|
||||
.endm
|
||||
|
||||
.macro THUMB_FUNC_START name
|
||||
.globl SYM (\name)
|
||||
TYPE (\name)
|
||||
.thumb_func
|
||||
SYM (\name):
|
||||
.endm
|
||||
|
||||
.macro FUNC_START name
|
||||
.text
|
||||
.globl SYM (__\name)
|
||||
TYPE (__\name)
|
||||
.align 0
|
||||
.force_thumb
|
||||
.thumb_func
|
||||
.syntax unified
|
||||
SYM (__\name):
|
||||
.endm
|
||||
|
||||
.macro FUNC_ALIAS new old
|
||||
.globl SYM (__\new)
|
||||
.thumb_set SYM (__\new), SYM (__\old)
|
||||
.endm
|
||||
|
||||
/* Register aliases. */
|
||||
work .req r4
|
||||
dividend .req r0
|
||||
divisor .req r1
|
||||
overdone .req r2
|
||||
result .req r2
|
||||
curbit .req r3
|
||||
|
||||
/* ------------------------------------------------------------------------ */
|
||||
/* Bodies of the division and modulo routines. */
|
||||
/* ------------------------------------------------------------------------ */
|
||||
.macro BranchToDiv n, label
|
||||
lsrs curbit, dividend, \n
|
||||
cmp curbit, divisor
|
||||
bcc \label
|
||||
.endm
|
||||
|
||||
.macro DoDiv n
|
||||
lsrs curbit, dividend, \n
|
||||
cmp curbit, divisor
|
||||
bcc 1f
|
||||
lsls curbit, divisor, \n
|
||||
subs dividend, dividend, curbit
|
||||
|
||||
1: adcs result, result
|
||||
.endm
|
||||
|
||||
.macro THUMB1_Div_Positive
|
||||
movs result, #0
|
||||
BranchToDiv #1, LSYM(Lthumb1_div1)
|
||||
BranchToDiv #4, LSYM(Lthumb1_div4)
|
||||
BranchToDiv #8, LSYM(Lthumb1_div8)
|
||||
BranchToDiv #12, LSYM(Lthumb1_div12)
|
||||
BranchToDiv #16, LSYM(Lthumb1_div16)
|
||||
LSYM(Lthumb1_div_large_positive):
|
||||
movs result, #0xff
|
||||
lsls divisor, divisor, #8
|
||||
rev result, result
|
||||
lsrs curbit, dividend, #16
|
||||
cmp curbit, divisor
|
||||
bcc 1f
|
||||
asrs result, #8
|
||||
lsls divisor, divisor, #8
|
||||
beq LSYM(Ldivbyzero_waypoint)
|
||||
|
||||
1: lsrs curbit, dividend, #12
|
||||
cmp curbit, divisor
|
||||
bcc LSYM(Lthumb1_div12)
|
||||
b LSYM(Lthumb1_div16)
|
||||
LSYM(Lthumb1_div_loop):
|
||||
lsrs divisor, divisor, #8
|
||||
LSYM(Lthumb1_div16):
|
||||
Dodiv #15
|
||||
Dodiv #14
|
||||
Dodiv #13
|
||||
Dodiv #12
|
||||
LSYM(Lthumb1_div12):
|
||||
Dodiv #11
|
||||
Dodiv #10
|
||||
Dodiv #9
|
||||
Dodiv #8
|
||||
bcs LSYM(Lthumb1_div_loop)
|
||||
LSYM(Lthumb1_div8):
|
||||
Dodiv #7
|
||||
Dodiv #6
|
||||
Dodiv #5
|
||||
LSYM(Lthumb1_div5):
|
||||
Dodiv #4
|
||||
LSYM(Lthumb1_div4):
|
||||
Dodiv #3
|
||||
LSYM(Lthumb1_div3):
|
||||
Dodiv #2
|
||||
LSYM(Lthumb1_div2):
|
||||
Dodiv #1
|
||||
LSYM(Lthumb1_div1):
|
||||
subs divisor, dividend, divisor
|
||||
bcs 1f
|
||||
mov divisor, dividend
|
||||
|
||||
1: adcs result, result
|
||||
mov dividend, result
|
||||
bx lr
|
||||
|
||||
LSYM(Ldivbyzero_waypoint):
|
||||
b LSYM(Ldiv0)
|
||||
.endm
|
||||
|
||||
.macro THUMB1_Div_Negative
|
||||
lsrs result, divisor, #31
|
||||
beq 1f
|
||||
rsbs divisor, divisor, #0
|
||||
|
||||
1: asrs curbit, dividend, #32
|
||||
bcc 2f
|
||||
rsbs dividend, dividend, #0
|
||||
|
||||
2: eors curbit, result
|
||||
movs result, #0
|
||||
mov ip, curbit
|
||||
BranchToDiv #4, LSYM(Lthumb1_div_negative4)
|
||||
BranchToDiv #8, LSYM(Lthumb1_div_negative8)
|
||||
LSYM(Lthumb1_div_large):
|
||||
movs result, #0xfc
|
||||
lsls divisor, divisor, #6
|
||||
rev result, result
|
||||
lsrs curbit, dividend, #8
|
||||
cmp curbit, divisor
|
||||
bcc LSYM(Lthumb1_div_negative8)
|
||||
|
||||
lsls divisor, divisor, #6
|
||||
asrs result, result, #6
|
||||
cmp curbit, divisor
|
||||
bcc LSYM(Lthumb1_div_negative8)
|
||||
|
||||
lsls divisor, divisor, #6
|
||||
asrs result, result, #6
|
||||
cmp curbit, divisor
|
||||
bcc LSYM(Lthumb1_div_negative8)
|
||||
|
||||
lsls divisor, divisor, #6
|
||||
beq LSYM(Ldivbyzero_negative)
|
||||
asrs result, result, #6
|
||||
b LSYM(Lthumb1_div_negative8)
|
||||
LSYM(Lthumb1_div_negative_loop):
|
||||
lsrs divisor, divisor, #6
|
||||
LSYM(Lthumb1_div_negative8):
|
||||
DoDiv #7
|
||||
DoDiv #6
|
||||
DoDiv #5
|
||||
DoDiv #4
|
||||
LSYM(Lthumb1_div_negative4):
|
||||
DoDiv #3
|
||||
DoDiv #2
|
||||
bcs LSYM(Lthumb1_div_negative_loop)
|
||||
DoDiv #1
|
||||
subs divisor, dividend, divisor
|
||||
bcs 1f
|
||||
mov divisor, dividend
|
||||
|
||||
1: mov curbit, ip
|
||||
adcs result, result
|
||||
asrs curbit, curbit, #1
|
||||
mov dividend, result
|
||||
bcc 2f
|
||||
rsbs dividend, dividend, #0
|
||||
cmp curbit, #0
|
||||
|
||||
2: bpl 3f
|
||||
rsbs divisor, divisor, #0
|
||||
|
||||
3: bx lr
|
||||
|
||||
LSYM(Ldivbyzero_negative):
|
||||
mov curbit, ip
|
||||
asrs curbit, curbit, #1
|
||||
bcc LSYM(Ldiv0)
|
||||
rsbs dividend, dividend, #0
|
||||
.endm
|
||||
|
||||
/* ------------------------------------------------------------------------ */
|
||||
/* Start of the Real Functions */
|
||||
/* ------------------------------------------------------------------------ */
|
||||
|
||||
FUNC_START aeabi_idiv0
|
||||
bx lr
|
||||
FUNC_END aeabi_idiv0
|
||||
|
||||
FUNC_START divsi3
|
||||
FUNC_ALIAS aeabi_idiv divsi3
|
||||
|
||||
LSYM(divsi3_skip_div0_test):
|
||||
mov curbit, dividend
|
||||
orrs curbit, divisor
|
||||
bmi LSYM(Lthumb1_div_negative)
|
||||
|
||||
LSYM(Lthumb1_div_positive):
|
||||
THUMB1_Div_Positive
|
||||
|
||||
LSYM(Lthumb1_div_negative):
|
||||
THUMB1_Div_Negative
|
||||
|
||||
DIV_FUNC_END divsi3 signed
|
||||
|
||||
FUNC_START aeabi_idivmod
|
||||
|
||||
cmp r1, #0
|
||||
beq LSYM(Ldiv0)
|
||||
push {r0, r1, lr}
|
||||
bl LSYM(divsi3_skip_div0_test)
|
||||
POP {r1, r2, r3}
|
||||
mul r2, r0
|
||||
sub r1, r1, r2
|
||||
bx r3
|
||||
|
||||
FUNC_END aeabi_idivmod
|
||||
/* ------------------------------------------------------------------------ */
|
||||
@@ -0,0 +1,143 @@
|
||||
/*
|
||||
* Copyright (c) 2014 ARM Ltd
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the company may not be used to endorse or promote
|
||||
* products derived from this software without specific prior written
|
||||
* permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* Implementation of strcmp for ARMv6m. This version is only used in
|
||||
ARMv6-M when we want an efficient implementation. Otherwize if the
|
||||
code size is preferred, strcmp-armv4t.S will be used. */
|
||||
|
||||
.thumb_func
|
||||
.syntax unified
|
||||
.arch armv6-m
|
||||
|
||||
.macro DoSub n, label
|
||||
subs r0, r0, r1
|
||||
#ifdef __ARM_BIG_ENDIAN
|
||||
lsrs r1, r4, \n
|
||||
#else
|
||||
lsls r1, r4, \n
|
||||
#endif
|
||||
orrs r1, r0
|
||||
bne \label
|
||||
.endm
|
||||
|
||||
.macro Byte_Test n, label
|
||||
lsrs r0, r2, \n
|
||||
lsrs r1, r3, \n
|
||||
DoSub \n, \label
|
||||
.endm
|
||||
|
||||
.text
|
||||
.p2align 0
|
||||
.global strcmp
|
||||
.type strcmp, %function
|
||||
strcmp:
|
||||
.cfi_startproc
|
||||
mov r2, r0
|
||||
push {r4, r5, r6, lr}
|
||||
orrs r2, r1
|
||||
lsls r2, r2, #30
|
||||
bne 6f
|
||||
ldr r5, =0x01010101
|
||||
lsls r6, r5, #7
|
||||
1:
|
||||
ldmia r0!, {r2}
|
||||
ldmia r1!, {r3}
|
||||
subs r4, r2, r5
|
||||
bics r4, r2
|
||||
ands r4, r6
|
||||
beq 3f
|
||||
|
||||
#ifdef __ARM_BIG_ENDIAN
|
||||
Byte_Test #24, 4f
|
||||
Byte_Test #16, 4f
|
||||
Byte_Test #8, 4f
|
||||
|
||||
b 7f
|
||||
3:
|
||||
cmp r2, r3
|
||||
beq 1b
|
||||
cmp r2, r3
|
||||
#else
|
||||
uxtb r0, r2
|
||||
uxtb r1, r3
|
||||
DoSub #24, 2f
|
||||
|
||||
uxth r0, r2
|
||||
uxth r1, r3
|
||||
DoSub #16, 2f
|
||||
|
||||
lsls r0, r2, #8
|
||||
lsls r1, r3, #8
|
||||
lsrs r0, r0, #8
|
||||
lsrs r1, r1, #8
|
||||
DoSub #8, 2f
|
||||
|
||||
lsrs r0, r2, #24
|
||||
lsrs r1, r3, #24
|
||||
subs r0, r0, r1
|
||||
2:
|
||||
pop {r4, r5, r6, pc}
|
||||
|
||||
3:
|
||||
cmp r2, r3
|
||||
beq 1b
|
||||
rev r0, r2
|
||||
rev r1, r3
|
||||
cmp r0, r1
|
||||
#endif
|
||||
|
||||
bls 5f
|
||||
movs r0, #1
|
||||
4:
|
||||
pop {r4, r5, r6, pc}
|
||||
5:
|
||||
movs r0, #0
|
||||
mvns r0, r0
|
||||
pop {r4, r5, r6, pc}
|
||||
6:
|
||||
ldrb r2, [r0, #0]
|
||||
ldrb r3, [r1, #0]
|
||||
adds r0, #1
|
||||
adds r1, #1
|
||||
cmp r2, #0
|
||||
beq 7f
|
||||
cmp r2, r3
|
||||
bne 7f
|
||||
ldrb r2, [r0, #0]
|
||||
ldrb r3, [r1, #0]
|
||||
adds r0, #1
|
||||
adds r1, #1
|
||||
cmp r2, #0
|
||||
beq 7f
|
||||
cmp r2, r3
|
||||
beq 6b
|
||||
7:
|
||||
subs r0, r2, r3
|
||||
pop {r4, r5, r6, pc}
|
||||
.cfi_endproc
|
||||
.size strcmp, . - strcmp
|
||||
@@ -234,6 +234,21 @@
|
||||
#define DW_AT_GNU_all_call_sites 0x2117
|
||||
#define DW_AT_GNU_all_source_call_sites 0x2118
|
||||
|
||||
/* Apple extensions. */
|
||||
#define DW_AT_APPLE_optimized 0x3fe1
|
||||
#define DW_AT_APPLE_flags 0x3fe2
|
||||
#define DW_AT_APPLE_isa 0x3fe3
|
||||
#define DW_AT_APPLE_block 0x3fe4
|
||||
#define DW_AT_APPLE_major_runtime_vers 0x3fe5
|
||||
#define DW_AT_APPLE_runtime_class 0x3fe6
|
||||
#define DW_AT_APPLE_omit_frame_ptr 0x3fe7
|
||||
#define DW_AT_APPLE_property_name 0x3fe8
|
||||
#define DW_AT_APPLE_property_getter 0x3fe9
|
||||
#define DW_AT_APPLE_property_setter 0x3fea
|
||||
#define DW_AT_APPLE_property_attribute 0x3feb
|
||||
#define DW_AT_APPLE_objc_complete_type 0x3fec
|
||||
#define DW_AT_APPLE_property 0x3fed
|
||||
|
||||
#define DW_FORM_addr 0x01
|
||||
#define DW_FORM_block2 0x03
|
||||
#define DW_FORM_block4 0x04
|
||||
|
||||
@@ -298,6 +298,32 @@ dwarf_get_AT_name(unsigned attr, const char **s)
|
||||
*s = "DW_AT_GNU_all_call_sites"; break;
|
||||
case DW_AT_GNU_all_source_call_sites:
|
||||
*s = "DW_AT_GNU_all_source_call_sites"; break;
|
||||
case DW_AT_APPLE_optimized:
|
||||
*s = "DW_AT_APPLE_optimized"; break;
|
||||
case DW_AT_APPLE_flags:
|
||||
*s = "DW_AT_APPLE_flags"; break;
|
||||
case DW_AT_APPLE_isa:
|
||||
*s = "DW_AT_APPLE_isa"; break;
|
||||
case DW_AT_APPLE_block:
|
||||
*s = "DW_AT_APPLE_block"; break;
|
||||
case DW_AT_APPLE_major_runtime_vers:
|
||||
*s = "DW_AT_APPLE_major_runtime_vers"; break;
|
||||
case DW_AT_APPLE_runtime_class:
|
||||
*s = "DW_AT_APPLE_runtime_class"; break;
|
||||
case DW_AT_APPLE_omit_frame_ptr:
|
||||
*s = "DW_AT_APPLE_omit_frame_ptr"; break;
|
||||
case DW_AT_APPLE_property_name:
|
||||
*s = "DW_AT_APPLE_property_name"; break;
|
||||
case DW_AT_APPLE_property_getter:
|
||||
*s = "DW_AT_APPLE_property_getter"; break;
|
||||
case DW_AT_APPLE_property_setter:
|
||||
*s = "DW_AT_APPLE_property_setter"; break;
|
||||
case DW_AT_APPLE_property_attribute:
|
||||
*s = "DW_AT_APPLE_property_attribute"; break;
|
||||
case DW_AT_APPLE_objc_complete_type:
|
||||
*s = "DW_AT_APPLE_objc_complete_type"; break;
|
||||
case DW_AT_APPLE_property:
|
||||
*s = "DW_AT_APPLE_property"; break;
|
||||
default:
|
||||
return (DW_DLV_NO_ENTRY);
|
||||
}
|
||||
|
||||
@@ -45,6 +45,15 @@ require_fs() {
|
||||
|
||||
# if we have autoloadable modules, just assume the file system
|
||||
atf_require_prog sysctl
|
||||
# Begin FreeBSD
|
||||
if true; then
|
||||
if kldstat -m ${name}; then
|
||||
found=yes
|
||||
else
|
||||
found=no
|
||||
fi
|
||||
else
|
||||
# End FreeBSD
|
||||
autoload=$(sysctl -n kern.module.autoload)
|
||||
[ "${autoload}" = "1" ] && return 0
|
||||
|
||||
@@ -57,6 +66,9 @@ require_fs() {
|
||||
fi
|
||||
shift
|
||||
done
|
||||
# Begin FreeBSD
|
||||
fi
|
||||
# End FreeBSD
|
||||
[ ${found} = yes ] || \
|
||||
atf_skip "The kernel does not include support the " \
|
||||
"\`${name}' file system"
|
||||
|
||||
@@ -106,7 +106,15 @@ pipe_body() {
|
||||
test_mount
|
||||
umask 022
|
||||
|
||||
# Begin FreeBSD
|
||||
if true; then
|
||||
atf_check -s eq:0 -o empty -e empty mkfifo pipe
|
||||
else
|
||||
# End FreeBSD
|
||||
atf_check -s eq:0 -o empty -e empty mknod pipe p
|
||||
# Begin FreeBSD
|
||||
fi
|
||||
# End FreeBSD
|
||||
eval $(stat -s pipe)
|
||||
[ ${st_mode} = 010644 ] || atf_fail "Invalid mode"
|
||||
|
||||
@@ -124,7 +132,15 @@ pipe_kqueue_body() {
|
||||
umask 022
|
||||
|
||||
atf_check -s eq:0 -o empty -e empty mkdir dir
|
||||
# Begin FreeBSD
|
||||
if true; then
|
||||
echo 'mkfifo dir/pipe' | kqueue_monitor 1 dir
|
||||
else
|
||||
# End FreeBSD
|
||||
echo 'mknod dir/pipe p' | kqueue_monitor 1 dir
|
||||
# Begin FreeBSD
|
||||
fi
|
||||
# End FreeBSD
|
||||
kqueue_check dir NOTE_WRITE
|
||||
|
||||
test_unmount
|
||||
|
||||
@@ -59,7 +59,15 @@ types_body() {
|
||||
atf_check -s eq:0 -o empty -e empty ln -s reg lnk
|
||||
atf_check -s eq:0 -o empty -e empty mknod blk b 0 0
|
||||
atf_check -s eq:0 -o empty -e empty mknod chr c 0 0
|
||||
# Begin FreeBSD
|
||||
if true; then
|
||||
atf_check -s eq:0 -o empty -e empty mkfifo fifo
|
||||
else
|
||||
# End FreeBSD
|
||||
atf_check -s eq:0 -o empty -e empty mknod fifo p
|
||||
# Begin FreeBSD
|
||||
fi
|
||||
# End FreeBSD
|
||||
atf_check -s eq:0 -o empty -e empty \
|
||||
$(atf_get_srcdir)/h_tools sockets sock
|
||||
|
||||
|
||||
@@ -54,7 +54,15 @@ big_head() {
|
||||
big_body() {
|
||||
test_mount -o -s10M
|
||||
|
||||
# Begin FreeBSD
|
||||
if true; then
|
||||
pagesize=$(sysctl -n hw.pagesize)
|
||||
else
|
||||
# End FreeBSD
|
||||
pagesize=$(sysctl hw.pagesize | cut -d ' ' -f 3)
|
||||
# Begin FreeBSD
|
||||
fi
|
||||
# End FreeBSD
|
||||
eval $($(atf_get_srcdir)/h_tools statvfs . | sed -e 's|^f_|cf_|')
|
||||
cf_bused=$((${cf_blocks} - ${cf_bfree}))
|
||||
|
||||
|
||||
@@ -38,7 +38,15 @@ values_head() {
|
||||
values_body() {
|
||||
test_mount -o -s10M
|
||||
|
||||
# Begin FreeBSD
|
||||
if true; then
|
||||
pagesize=$(sysctl -n hw.pagesize)
|
||||
else
|
||||
# End FreeBSD
|
||||
pagesize=$(sysctl hw.pagesize | cut -d ' ' -f 3)
|
||||
# Begin FreeBSD
|
||||
fi
|
||||
# End FreeBSD
|
||||
eval $($(atf_get_srcdir)/h_tools statvfs .)
|
||||
[ ${pagesize} -eq ${f_bsize} ] || \
|
||||
atf_fail "Invalid bsize"
|
||||
|
||||
@@ -38,12 +38,21 @@ basic_body() {
|
||||
|
||||
atf_check -s eq:0 -o ignore -e ignore \
|
||||
dd if=/dev/zero of=disk.img bs=1m count=10
|
||||
# Begin FreeBSD
|
||||
if true; then
|
||||
atf_check -s eq:0 -o empty -e empty mkdir mnt
|
||||
atf_check -s eq:0 -o empty -e empty mdmfs -F disk.img md3 mnt
|
||||
else
|
||||
# End FreeBSD
|
||||
atf_check -s eq:0 -o empty -e empty vndconfig /dev/vnd3 disk.img
|
||||
|
||||
atf_check -s eq:0 -o ignore -e ignore newfs /dev/rvnd3a
|
||||
|
||||
atf_check -s eq:0 -o empty -e empty mkdir mnt
|
||||
atf_check -s eq:0 -o empty -e empty mount /dev/vnd3a mnt
|
||||
# Begin FreeBSD
|
||||
fi
|
||||
# End FreeBSD
|
||||
|
||||
echo "Creating test files"
|
||||
for f in $(jot -w %u 100 | uniq); do
|
||||
@@ -58,7 +67,15 @@ basic_body() {
|
||||
done
|
||||
|
||||
atf_check -s eq:0 -o empty -e empty umount mnt
|
||||
# Begin FreeBSD
|
||||
if true; then
|
||||
atf_check -s eq:0 -o empty -e empty mdconfig -d -u 3
|
||||
else
|
||||
# End FreeBSD
|
||||
atf_check -s eq:0 -o empty -e empty vndconfig -u /dev/vnd3
|
||||
# Begin FreeBSD
|
||||
fi
|
||||
# End FreeBSD
|
||||
|
||||
test_unmount
|
||||
touch done
|
||||
@@ -66,7 +83,15 @@ basic_body() {
|
||||
basic_cleanup() {
|
||||
if [ ! -f done ]; then
|
||||
umount mnt 2>/dev/null 1>&2
|
||||
# Begin FreeBSD
|
||||
if true; then
|
||||
atf_check -s eq:0 -o empty -e empty mdconfig -d -u 3
|
||||
else
|
||||
# End FreeBSD
|
||||
vndconfig -u /dev/vnd3 2>/dev/null 1>&2
|
||||
# Begin FreeBSD
|
||||
fi
|
||||
# End FreeBSD
|
||||
fi
|
||||
}
|
||||
|
||||
|
||||
@@ -36,7 +36,15 @@ main_head() {
|
||||
}
|
||||
main_body() {
|
||||
echo "Lowering kern.maxvnodes to 2000"
|
||||
# Begin FreeBSD
|
||||
if true; then
|
||||
sysctl -n kern.maxvnodes > oldvnodes
|
||||
else
|
||||
# End FreeBSD
|
||||
sysctl kern.maxvnodes | awk '{ print $3; }' >oldvnodes
|
||||
# Begin FreeBSD
|
||||
fi
|
||||
# End FreeBSD
|
||||
atf_check -s eq:0 -o ignore -e empty sysctl -w kern.maxvnodes=2000
|
||||
|
||||
test_mount -o -s$(((4000 + 2) * 4096))
|
||||
|
||||
@@ -32,9 +32,6 @@ basic_head()
|
||||
}
|
||||
basic_body()
|
||||
{
|
||||
# Begin FreeBSD
|
||||
atf_expect_fail "dirname //usr//bin doesn't return //usr like it used to; bug # 212193"
|
||||
# End FreeBSD
|
||||
atf_check -o inline:"/\n" dirname /
|
||||
atf_check -o inline:"/\n" dirname //
|
||||
atf_check -o inline:"/usr\n" dirname /usr/bin/
|
||||
|
||||
@@ -22,4 +22,4 @@ CFLAGS+= -I${OFEDSYS}/include
|
||||
|
||||
# Remove .[ly] since the checked-in version is preferred.
|
||||
.SUFFIXES:
|
||||
.SUFFIXES: .o .po .So .c .ln
|
||||
.SUFFIXES: .o .po .pico .c .ln
|
||||
|
||||
@@ -34,7 +34,7 @@
|
||||
#include <sys/jail.h>
|
||||
#endif
|
||||
#ifdef HAVE_CAP_ENTER
|
||||
#include <sys/capability.h>
|
||||
#include <sys/capsicum.h>
|
||||
#endif
|
||||
|
||||
#include <errno.h>
|
||||
|
||||
@@ -92,7 +92,6 @@ extern int SIZE_BUF;
|
||||
#include <libcasper.h>
|
||||
#include <casper/cap_dns.h>
|
||||
#include <sys/nv.h>
|
||||
#include <sys/capability.h>
|
||||
#include <sys/ioccom.h>
|
||||
#include <net/bpf.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
@@ -2,6 +2,166 @@
|
||||
OpenSSL CHANGES
|
||||
_______________
|
||||
|
||||
Changes between 1.0.2h and 1.0.2i [22 Sep 2016]
|
||||
|
||||
*) OCSP Status Request extension unbounded memory growth
|
||||
|
||||
A malicious client can send an excessively large OCSP Status Request
|
||||
extension. If that client continually requests renegotiation, sending a
|
||||
large OCSP Status Request extension each time, then there will be unbounded
|
||||
memory growth on the server. This will eventually lead to a Denial Of
|
||||
Service attack through memory exhaustion. Servers with a default
|
||||
configuration are vulnerable even if they do not support OCSP. Builds using
|
||||
the "no-ocsp" build time option are not affected.
|
||||
|
||||
This issue was reported to OpenSSL by Shi Lei (Gear Team, Qihoo 360 Inc.)
|
||||
(CVE-2016-6304)
|
||||
[Matt Caswell]
|
||||
|
||||
*) In order to mitigate the SWEET32 attack, the DES ciphers were moved from
|
||||
HIGH to MEDIUM.
|
||||
|
||||
This issue was reported to OpenSSL Karthikeyan Bhargavan and Gaetan
|
||||
Leurent (INRIA)
|
||||
(CVE-2016-2183)
|
||||
[Rich Salz]
|
||||
|
||||
*) OOB write in MDC2_Update()
|
||||
|
||||
An overflow can occur in MDC2_Update() either if called directly or
|
||||
through the EVP_DigestUpdate() function using MDC2. If an attacker
|
||||
is able to supply very large amounts of input data after a previous
|
||||
call to EVP_EncryptUpdate() with a partial block then a length check
|
||||
can overflow resulting in a heap corruption.
|
||||
|
||||
The amount of data needed is comparable to SIZE_MAX which is impractical
|
||||
on most platforms.
|
||||
|
||||
This issue was reported to OpenSSL by Shi Lei (Gear Team, Qihoo 360 Inc.)
|
||||
(CVE-2016-6303)
|
||||
[Stephen Henson]
|
||||
|
||||
*) Malformed SHA512 ticket DoS
|
||||
|
||||
If a server uses SHA512 for TLS session ticket HMAC it is vulnerable to a
|
||||
DoS attack where a malformed ticket will result in an OOB read which will
|
||||
ultimately crash.
|
||||
|
||||
The use of SHA512 in TLS session tickets is comparatively rare as it requires
|
||||
a custom server callback and ticket lookup mechanism.
|
||||
|
||||
This issue was reported to OpenSSL by Shi Lei (Gear Team, Qihoo 360 Inc.)
|
||||
(CVE-2016-6302)
|
||||
[Stephen Henson]
|
||||
|
||||
*) OOB write in BN_bn2dec()
|
||||
|
||||
The function BN_bn2dec() does not check the return value of BN_div_word().
|
||||
This can cause an OOB write if an application uses this function with an
|
||||
overly large BIGNUM. This could be a problem if an overly large certificate
|
||||
or CRL is printed out from an untrusted source. TLS is not affected because
|
||||
record limits will reject an oversized certificate before it is parsed.
|
||||
|
||||
This issue was reported to OpenSSL by Shi Lei (Gear Team, Qihoo 360 Inc.)
|
||||
(CVE-2016-2182)
|
||||
[Stephen Henson]
|
||||
|
||||
*) OOB read in TS_OBJ_print_bio()
|
||||
|
||||
The function TS_OBJ_print_bio() misuses OBJ_obj2txt(): the return value is
|
||||
the total length the OID text representation would use and not the amount
|
||||
of data written. This will result in OOB reads when large OIDs are
|
||||
presented.
|
||||
|
||||
This issue was reported to OpenSSL by Shi Lei (Gear Team, Qihoo 360 Inc.)
|
||||
(CVE-2016-2180)
|
||||
[Stephen Henson]
|
||||
|
||||
*) Pointer arithmetic undefined behaviour
|
||||
|
||||
Avoid some undefined pointer arithmetic
|
||||
|
||||
A common idiom in the codebase is to check limits in the following manner:
|
||||
"p + len > limit"
|
||||
|
||||
Where "p" points to some malloc'd data of SIZE bytes and
|
||||
limit == p + SIZE
|
||||
|
||||
"len" here could be from some externally supplied data (e.g. from a TLS
|
||||
message).
|
||||
|
||||
The rules of C pointer arithmetic are such that "p + len" is only well
|
||||
defined where len <= SIZE. Therefore the above idiom is actually
|
||||
undefined behaviour.
|
||||
|
||||
For example this could cause problems if some malloc implementation
|
||||
provides an address for "p" such that "p + len" actually overflows for
|
||||
values of len that are too big and therefore p + len < limit.
|
||||
|
||||
This issue was reported to OpenSSL by Guido Vranken
|
||||
(CVE-2016-2177)
|
||||
[Matt Caswell]
|
||||
|
||||
*) Constant time flag not preserved in DSA signing
|
||||
|
||||
Operations in the DSA signing algorithm should run in constant time in
|
||||
order to avoid side channel attacks. A flaw in the OpenSSL DSA
|
||||
implementation means that a non-constant time codepath is followed for
|
||||
certain operations. This has been demonstrated through a cache-timing
|
||||
attack to be sufficient for an attacker to recover the private DSA key.
|
||||
|
||||
This issue was reported by César Pereida (Aalto University), Billy Brumley
|
||||
(Tampere University of Technology), and Yuval Yarom (The University of
|
||||
Adelaide and NICTA).
|
||||
(CVE-2016-2178)
|
||||
[César Pereida]
|
||||
|
||||
*) DTLS buffered message DoS
|
||||
|
||||
In a DTLS connection where handshake messages are delivered out-of-order
|
||||
those messages that OpenSSL is not yet ready to process will be buffered
|
||||
for later use. Under certain circumstances, a flaw in the logic means that
|
||||
those messages do not get removed from the buffer even though the handshake
|
||||
has been completed. An attacker could force up to approx. 15 messages to
|
||||
remain in the buffer when they are no longer required. These messages will
|
||||
be cleared when the DTLS connection is closed. The default maximum size for
|
||||
a message is 100k. Therefore the attacker could force an additional 1500k
|
||||
to be consumed per connection. By opening many simulataneous connections an
|
||||
attacker could cause a DoS attack through memory exhaustion.
|
||||
|
||||
This issue was reported to OpenSSL by Quan Luo.
|
||||
(CVE-2016-2179)
|
||||
[Matt Caswell]
|
||||
|
||||
*) DTLS replay protection DoS
|
||||
|
||||
A flaw in the DTLS replay attack protection mechanism means that records
|
||||
that arrive for future epochs update the replay protection "window" before
|
||||
the MAC for the record has been validated. This could be exploited by an
|
||||
attacker by sending a record for the next epoch (which does not have to
|
||||
decrypt or have a valid MAC), with a very large sequence number. This means
|
||||
that all subsequent legitimate packets are dropped causing a denial of
|
||||
service for a specific DTLS connection.
|
||||
|
||||
This issue was reported to OpenSSL by the OCAP audit team.
|
||||
(CVE-2016-2181)
|
||||
[Matt Caswell]
|
||||
|
||||
*) Certificate message OOB reads
|
||||
|
||||
In OpenSSL 1.0.2 and earlier some missing message length checks can result
|
||||
in OOB reads of up to 2 bytes beyond an allocated buffer. There is a
|
||||
theoretical DoS risk but this has not been observed in practice on common
|
||||
platforms.
|
||||
|
||||
The messages affected are client certificate, client certificate request
|
||||
and server certificate. As a result the attack can only be performed
|
||||
against a client or a server which enables client authentication.
|
||||
|
||||
This issue was reported to OpenSSL by Shi Lei (Gear Team, Qihoo 360 Inc.)
|
||||
(CVE-2016-6306)
|
||||
[Stephen Henson]
|
||||
|
||||
Changes between 1.0.2g and 1.0.2h [3 May 2016]
|
||||
|
||||
*) Prevent padding oracle in AES-NI CBC MAC check
|
||||
|
||||
+65
-28
@@ -1,38 +1,75 @@
|
||||
HOW TO CONTRIBUTE TO OpenSSL
|
||||
----------------------------
|
||||
HOW TO CONTRIBUTE TO PATCHES OpenSSL
|
||||
------------------------------------
|
||||
|
||||
Development is coordinated on the openssl-dev mailing list (see
|
||||
http://www.openssl.org for information on subscribing). If you
|
||||
would like to submit a patch, send it to rt@openssl.org with
|
||||
the string "[PATCH]" in the subject. Please be sure to include a
|
||||
textual explanation of what your patch does.
|
||||
|
||||
You can also make GitHub pull requests. If you do this, please also send
|
||||
mail to rt@openssl.org with a brief description and a link to the PR so
|
||||
that we can more easily keep track of it.
|
||||
(Please visit https://www.openssl.org/community/getting-started.html for
|
||||
other ideas about how to contribute.)
|
||||
|
||||
Development is coordinated on the openssl-dev mailing list (see the
|
||||
above link or https://mta.openssl.org for information on subscribing).
|
||||
If you are unsure as to whether a feature will be useful for the general
|
||||
OpenSSL community please discuss it on the openssl-dev mailing list first.
|
||||
Someone may be already working on the same thing or there may be a good
|
||||
reason as to why that feature isn't implemented.
|
||||
OpenSSL community you might want to discuss it on the openssl-dev mailing
|
||||
list first. Someone may be already working on the same thing or there
|
||||
may be a good reason as to why that feature isn't implemented.
|
||||
|
||||
Patches should be as up to date as possible, preferably relative to the
|
||||
current Git or the last snapshot. They should follow our coding style
|
||||
(see https://www.openssl.org/policies/codingstyle.html) and compile without
|
||||
warnings using the --strict-warnings flag. OpenSSL compiles on many varied
|
||||
platforms: try to ensure you only use portable features.
|
||||
The best way to submit a patch is to make a pull request on GitHub.
|
||||
(It is not necessary to send mail to rt@openssl.org to open a ticket!)
|
||||
If you think the patch could use feedback from the community, please
|
||||
start a thread on openssl-dev.
|
||||
|
||||
Our preferred format for patch files is "git format-patch" output. For example
|
||||
to provide a patch file containing the last commit in your local git repository
|
||||
use the following command:
|
||||
You can also submit patches by sending it as mail to rt@openssl.org.
|
||||
Please include the word "PATCH" and an explanation of what the patch
|
||||
does in the subject line. If you do this, our preferred format is "git
|
||||
format-patch" output. For example to provide a patch file containing the
|
||||
last commit in your local git repository use the following command:
|
||||
|
||||
# git format-patch --stdout HEAD^ >mydiffs.patch
|
||||
% git format-patch --stdout HEAD^ >mydiffs.patch
|
||||
|
||||
Another method of creating an acceptable patch file without using git is as
|
||||
follows:
|
||||
|
||||
# cd openssl-work
|
||||
# [your changes]
|
||||
# ./Configure dist; make clean
|
||||
# cd ..
|
||||
# diff -ur openssl-orig openssl-work > mydiffs.patch
|
||||
% cd openssl-work
|
||||
...make your changes...
|
||||
% ./Configure dist; make clean
|
||||
% cd ..
|
||||
% diff -ur openssl-orig openssl-work >mydiffs.patch
|
||||
|
||||
Note that pull requests are generally easier for the team, and community, to
|
||||
work with. Pull requests benefit from all of the standard GitHub features,
|
||||
including code review tools, simpler integration, and CI build support.
|
||||
|
||||
No matter how a patch is submitted, the following items will help make
|
||||
the acceptance and review process faster:
|
||||
|
||||
1. Anything other than trivial contributions will require a contributor
|
||||
licensing agreement, giving us permission to use your code. See
|
||||
https://www.openssl.org/policies/cla.html for details.
|
||||
|
||||
2. All source files should start with the following text (with
|
||||
appropriate comment characters at the start of each line and the
|
||||
year(s) updated):
|
||||
|
||||
Copyright 20xx-20yy The OpenSSL Project Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the OpenSSL license (the "License"). You may not use
|
||||
this file except in compliance with the License. You can obtain a copy
|
||||
in the file LICENSE in the source distribution or at
|
||||
https://www.openssl.org/source/license.html
|
||||
|
||||
3. Patches should be as current as possible. When using GitHub, please
|
||||
expect to have to rebase and update often. Note that we do not accept merge
|
||||
commits. You will be asked to remove them before a patch is considered
|
||||
acceptable.
|
||||
|
||||
4. Patches should follow our coding style (see
|
||||
https://www.openssl.org/policies/codingstyle.html) and compile without
|
||||
warnings. Where gcc or clang is availble you should use the
|
||||
--strict-warnings Configure option. OpenSSL compiles on many varied
|
||||
platforms: try to ensure you only use portable features.
|
||||
|
||||
5. When at all possible, patches should include tests. These can either be
|
||||
added to an existing test, or completely new. Please see test/README
|
||||
for information on the test framework.
|
||||
|
||||
6. New features or changed functionality must include documentation. Please
|
||||
look at the "pod" files in doc/apps, doc/crypto and doc/ssl for examples of
|
||||
our style.
|
||||
|
||||
@@ -799,7 +799,7 @@ my @experimental = ();
|
||||
|
||||
# This is what $depflags will look like with the above defaults
|
||||
# (we need this to see if we should advise the user to run "make depend"):
|
||||
my $default_depflags = " -DOPENSSL_NO_EC_NISTP_64_GCC_128 -DOPENSSL_NO_GMP -DOPENSSL_NO_JPAKE -DOPENSSL_NO_LIBUNBOUND -DOPENSSL_NO_MD2 -DOPENSSL_NO_RC5 -DOPENSSL_NO_RFC3779 -DOPENSSL_NO_SCTP -DOPENSSL_NO_SSL_TRACE -DOPENSSL_NO_STORE -DOPENSSL_NO_UNIT_TEST";
|
||||
my $default_depflags = " -DOPENSSL_NO_EC_NISTP_64_GCC_128 -DOPENSSL_NO_GMP -DOPENSSL_NO_JPAKE -DOPENSSL_NO_LIBUNBOUND -DOPENSSL_NO_MD2 -DOPENSSL_NO_RC5 -DOPENSSL_NO_RFC3779 -DOPENSSL_NO_SCTP -DOPENSSL_NO_SSL_TRACE -DOPENSSL_NO_SSL2 -DOPENSSL_NO_STORE -DOPENSSL_NO_UNIT_TEST -DOPENSSL_NO_WEAK_SSL_CIPHERS";
|
||||
|
||||
# Explicit "no-..." options will be collected in %disabled along with the defaults.
|
||||
# To remove something from %disabled, use "enable-foo" (unless it's experimental).
|
||||
@@ -1082,11 +1082,6 @@ if (defined($disabled{"md5"}) || defined($disabled{"sha"})
|
||||
$disabled{"tls1"} = "forced";
|
||||
}
|
||||
|
||||
if (defined($disabled{"tls1"}))
|
||||
{
|
||||
$disabled{"tlsext"} = "forced";
|
||||
}
|
||||
|
||||
if (defined($disabled{"ec"}) || defined($disabled{"dsa"})
|
||||
|| defined($disabled{"dh"}))
|
||||
{
|
||||
@@ -1254,6 +1249,7 @@ my $shared_extension = $fields[$idx_shared_extension];
|
||||
my $ranlib = $ENV{'RANLIB'} || $fields[$idx_ranlib];
|
||||
my $ar = $ENV{'AR'} || "ar";
|
||||
my $arflags = $fields[$idx_arflags];
|
||||
my $windres = $ENV{'RC'} || $ENV{'WINDRES'} || "windres";
|
||||
my $multilib = $fields[$idx_multilib];
|
||||
|
||||
# if $prefix/lib$multilib is not an existing directory, then
|
||||
@@ -1562,8 +1558,15 @@ $cpuid_obj="mem_clr.o" unless ($cpuid_obj =~ /\.o$/);
|
||||
$des_obj=$des_enc unless ($des_obj =~ /\.o$/);
|
||||
$bf_obj=$bf_enc unless ($bf_obj =~ /\.o$/);
|
||||
$cast_obj=$cast_enc unless ($cast_obj =~ /\.o$/);
|
||||
$rc4_obj=$rc4_enc unless ($rc4_obj =~ /\.o$/);
|
||||
$rc5_obj=$rc5_enc unless ($rc5_obj =~ /\.o$/);
|
||||
if ($rc4_obj =~ /\.o$/)
|
||||
{
|
||||
$cflags.=" -DRC4_ASM";
|
||||
}
|
||||
else
|
||||
{
|
||||
$rc4_obj=$rc4_enc;
|
||||
}
|
||||
if ($sha1_obj =~ /\.o$/)
|
||||
{
|
||||
# $sha1_obj=$sha1_enc;
|
||||
@@ -1717,12 +1720,14 @@ while (<IN>)
|
||||
s/^AR=\s*/AR= \$\(CROSS_COMPILE\)/;
|
||||
s/^NM=\s*/NM= \$\(CROSS_COMPILE\)/;
|
||||
s/^RANLIB=\s*/RANLIB= \$\(CROSS_COMPILE\)/;
|
||||
s/^RC=\s*/RC= \$\(CROSS_COMPILE\)/;
|
||||
s/^MAKEDEPPROG=.*$/MAKEDEPPROG= \$\(CROSS_COMPILE\)$cc/ if $cc eq "gcc";
|
||||
}
|
||||
else {
|
||||
s/^CC=.*$/CC= $cc/;
|
||||
s/^AR=\s*ar/AR= $ar/;
|
||||
s/^RANLIB=.*/RANLIB= $ranlib/;
|
||||
s/^RC=.*/RC= $windres/;
|
||||
s/^MAKEDEPPROG=.*$/MAKEDEPPROG= $cc/ if $cc eq "gcc";
|
||||
s/^MAKEDEPPROG=.*$/MAKEDEPPROG= $cc/ if $ecc eq "gcc" || $ecc eq "clang";
|
||||
}
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
## Makefile for OpenSSL
|
||||
##
|
||||
|
||||
VERSION=1.0.2h
|
||||
VERSION=1.0.2i
|
||||
MAJOR=1
|
||||
MINOR=0.2
|
||||
SHLIB_VERSION_NUMBER=1.0.0
|
||||
@@ -68,6 +68,7 @@ EXE_EXT=
|
||||
ARFLAGS=
|
||||
AR= ar $(ARFLAGS) r
|
||||
RANLIB= /usr/bin/ranlib
|
||||
RC= windres
|
||||
NM= nm
|
||||
PERL= /usr/bin/perl
|
||||
TAR= tar
|
||||
@@ -210,6 +211,7 @@ BUILDENV= LC_ALL=C PLATFORM='$(PLATFORM)' PROCESSOR='$(PROCESSOR)'\
|
||||
CC='$(CC)' CFLAG='$(CFLAG)' \
|
||||
AS='$(CC)' ASFLAG='$(CFLAG) -c' \
|
||||
AR='$(AR)' NM='$(NM)' RANLIB='$(RANLIB)' \
|
||||
RC='$(RC)' \
|
||||
CROSS_COMPILE='$(CROSS_COMPILE)' \
|
||||
PERL='$(PERL)' ENGDIRS='$(ENGDIRS)' \
|
||||
SDIRS='$(SDIRS)' LIBRPATH='$(INSTALLTOP)/$(LIBDIR)' \
|
||||
@@ -368,6 +370,7 @@ libcrypto.pc: Makefile
|
||||
echo 'exec_prefix=$${prefix}'; \
|
||||
echo 'libdir=$${exec_prefix}/$(LIBDIR)'; \
|
||||
echo 'includedir=$${prefix}/include'; \
|
||||
echo 'enginesdir=$${libdir}/engines'; \
|
||||
echo ''; \
|
||||
echo 'Name: OpenSSL-libcrypto'; \
|
||||
echo 'Description: OpenSSL cryptography library'; \
|
||||
|
||||
@@ -66,6 +66,7 @@ EXE_EXT=
|
||||
ARFLAGS?= r
|
||||
AR=ar $(ARFLAGS)
|
||||
RANLIB= ranlib
|
||||
RC= windres
|
||||
NM= nm
|
||||
PERL= perl
|
||||
TAR= tar
|
||||
@@ -208,6 +209,7 @@ BUILDENV= LC_ALL=C PLATFORM='$(PLATFORM)' PROCESSOR='$(PROCESSOR)'\
|
||||
CC='$(CC)' CFLAG='$(CFLAG)' \
|
||||
AS='$(CC)' ASFLAG='$(CFLAG) -c' \
|
||||
AR='$(AR)' NM='$(NM)' RANLIB='$(RANLIB)' \
|
||||
RC='$(RC)' \
|
||||
CROSS_COMPILE='$(CROSS_COMPILE)' \
|
||||
PERL='$(PERL)' ENGDIRS='$(ENGDIRS)' \
|
||||
SDIRS='$(SDIRS)' LIBRPATH='$(INSTALLTOP)/$(LIBDIR)' \
|
||||
@@ -366,6 +368,7 @@ libcrypto.pc: Makefile
|
||||
echo 'exec_prefix=$${prefix}'; \
|
||||
echo 'libdir=$${exec_prefix}/$(LIBDIR)'; \
|
||||
echo 'includedir=$${prefix}/include'; \
|
||||
echo 'enginesdir=$${libdir}/engines'; \
|
||||
echo ''; \
|
||||
echo 'Name: OpenSSL-libcrypto'; \
|
||||
echo 'Description: OpenSSL cryptography library'; \
|
||||
|
||||
@@ -293,7 +293,7 @@ link_a.cygwin:
|
||||
fi; \
|
||||
dll_name=$$SHLIB$$SHLIB_SOVER$$SHLIB_SUFFIX; \
|
||||
$(PERL) util/mkrc.pl $$dll_name | \
|
||||
$(CROSS_COMPILE)windres -o rc.o; \
|
||||
$(RC) -o rc.o; \
|
||||
extras="$$extras rc.o"; \
|
||||
ALLSYMSFLAGS='-Wl,--whole-archive'; \
|
||||
NOALLSYMSFLAGS='-Wl,--no-whole-archive'; \
|
||||
|
||||
@@ -5,6 +5,20 @@
|
||||
This file gives a brief overview of the major changes between each OpenSSL
|
||||
release. For more details please read the CHANGES file.
|
||||
|
||||
Major changes between OpenSSL 1.0.2h and OpenSSL 1.0.2i [22 Sep 2016]
|
||||
|
||||
o OCSP Status Request extension unbounded memory growth (CVE-2016-6304)
|
||||
o SWEET32 Mitigation (CVE-2016-2183)
|
||||
o OOB write in MDC2_Update() (CVE-2016-6303)
|
||||
o Malformed SHA512 ticket DoS (CVE-2016-6302)
|
||||
o OOB write in BN_bn2dec() (CVE-2016-2182)
|
||||
o OOB read in TS_OBJ_print_bio() (CVE-2016-2180)
|
||||
o Pointer arithmetic undefined behaviour (CVE-2016-2177)
|
||||
o Constant time flag not preserved in DSA signing (CVE-2016-2178)
|
||||
o DTLS buffered message DoS (CVE-2016-2179)
|
||||
o DTLS replay protection DoS (CVE-2016-2181)
|
||||
o Certificate message OOB reads (CVE-2016-6306)
|
||||
|
||||
Major changes between OpenSSL 1.0.2g and OpenSSL 1.0.2h [3 May 2016]
|
||||
|
||||
o Prevent padding oracle in AES-NI CBC MAC check (CVE-2016-2107)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
|
||||
OpenSSL 1.0.2h 3 May 2016
|
||||
OpenSSL 1.0.2i 22 Sep 2016
|
||||
|
||||
Copyright (c) 1998-2015 The OpenSSL Project
|
||||
Copyright (c) 1995-1998 Eric A. Young, Tim J. Hudson
|
||||
|
||||
@@ -64,7 +64,7 @@ $RET = 0;
|
||||
|
||||
foreach (@ARGV) {
|
||||
if ( /^(-\?|-h|-help)$/ ) {
|
||||
print STDERR "usage: CA -newcert|-newreq|-newreq-nodes|-newca|-sign|-verify\n";
|
||||
print STDERR "usage: CA -newcert|-newreq|-newreq-nodes|-newca|-sign|-signcert|-verify\n";
|
||||
exit 0;
|
||||
} elsif (/^-newcert$/) {
|
||||
# create a certificate
|
||||
@@ -186,4 +186,3 @@ while (<IN>) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -64,7 +64,7 @@ $RET = 0;
|
||||
|
||||
foreach (@ARGV) {
|
||||
if ( /^(-\?|-h|-help)$/ ) {
|
||||
print STDERR "usage: CA -newcert|-newreq|-newreq-nodes|-newca|-sign|-verify\n";
|
||||
print STDERR "usage: CA -newcert|-newreq|-newreq-nodes|-newca|-sign|-signcert|-verify\n";
|
||||
exit 0;
|
||||
} elsif (/^-newcert$/) {
|
||||
# create a certificate
|
||||
@@ -186,4 +186,3 @@ while (<IN>) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -215,7 +215,8 @@ int args_from_file(char *file, int *argc, char **argv[])
|
||||
if (arg != NULL)
|
||||
OPENSSL_free(arg);
|
||||
arg = (char **)OPENSSL_malloc(sizeof(char *) * (i * 2));
|
||||
|
||||
if (arg == NULL)
|
||||
return 0;
|
||||
*argv = arg;
|
||||
num = 0;
|
||||
p = buf;
|
||||
@@ -2374,6 +2375,8 @@ int args_verify(char ***pargs, int *pargc,
|
||||
flags |= X509_V_FLAG_PARTIAL_CHAIN;
|
||||
else if (!strcmp(arg, "-no_alt_chains"))
|
||||
flags |= X509_V_FLAG_NO_ALT_CHAINS;
|
||||
else if (!strcmp(arg, "-allow_proxy_certs"))
|
||||
flags |= X509_V_FLAG_ALLOW_PROXY_CERTS;
|
||||
else
|
||||
return 0;
|
||||
|
||||
@@ -3195,6 +3198,36 @@ int app_isdir(const char *name)
|
||||
#endif
|
||||
|
||||
/* raw_read|write section */
|
||||
#if defined(__VMS)
|
||||
# include "vms_term_sock.h"
|
||||
static int stdin_sock = -1;
|
||||
|
||||
static void close_stdin_sock(void)
|
||||
{
|
||||
TerminalSocket (TERM_SOCK_DELETE, &stdin_sock);
|
||||
}
|
||||
|
||||
int fileno_stdin(void)
|
||||
{
|
||||
if (stdin_sock == -1) {
|
||||
TerminalSocket(TERM_SOCK_CREATE, &stdin_sock);
|
||||
atexit(close_stdin_sock);
|
||||
}
|
||||
|
||||
return stdin_sock;
|
||||
}
|
||||
#else
|
||||
int fileno_stdin(void)
|
||||
{
|
||||
return fileno(stdin);
|
||||
}
|
||||
#endif
|
||||
|
||||
int fileno_stdout(void)
|
||||
{
|
||||
return fileno(stdout);
|
||||
}
|
||||
|
||||
#if defined(_WIN32) && defined(STD_INPUT_HANDLE)
|
||||
int raw_read_stdin(void *buf, int siz)
|
||||
{
|
||||
@@ -3204,10 +3237,17 @@ int raw_read_stdin(void *buf, int siz)
|
||||
else
|
||||
return (-1);
|
||||
}
|
||||
#elif defined(__VMS)
|
||||
#include <sys/socket.h>
|
||||
|
||||
int raw_read_stdin(void *buf, int siz)
|
||||
{
|
||||
return recv(fileno_stdin(), buf, siz, 0);
|
||||
}
|
||||
#else
|
||||
int raw_read_stdin(void *buf, int siz)
|
||||
{
|
||||
return read(fileno(stdin), buf, siz);
|
||||
return read(fileno_stdin(), buf, siz);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -3223,6 +3263,6 @@ int raw_write_stdout(const void *buf, int siz)
|
||||
#else
|
||||
int raw_write_stdout(const void *buf, int siz)
|
||||
{
|
||||
return write(fileno(stdout), buf, siz);
|
||||
return write(fileno_stdout(), buf, siz);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -375,6 +375,8 @@ void store_setup_crl_download(X509_STORE *st);
|
||||
# define SERIAL_RAND_BITS 64
|
||||
|
||||
int app_isdir(const char *);
|
||||
int fileno_stdin(void);
|
||||
int fileno_stdout(void);
|
||||
int raw_read_stdin(void *, int);
|
||||
int raw_write_stdout(const void *, int);
|
||||
|
||||
|
||||
+11
-12
@@ -2103,25 +2103,23 @@ static int do_body(X509 **xret, EVP_PKEY *pkey, X509 *x509,
|
||||
goto err;
|
||||
|
||||
/* We now just add it to the database */
|
||||
row[DB_type] = (char *)OPENSSL_malloc(2);
|
||||
|
||||
tm = X509_get_notAfter(ret);
|
||||
row[DB_exp_date] = (char *)OPENSSL_malloc(tm->length + 1);
|
||||
memcpy(row[DB_exp_date], tm->data, tm->length);
|
||||
row[DB_exp_date][tm->length] = '\0';
|
||||
|
||||
row[DB_rev_date] = NULL;
|
||||
|
||||
/* row[DB_serial] done already */
|
||||
row[DB_file] = (char *)OPENSSL_malloc(8);
|
||||
row[DB_type] = OPENSSL_malloc(2);
|
||||
row[DB_exp_date] = OPENSSL_malloc(tm->length + 1);
|
||||
row[DB_rev_date] = OPENSSL_malloc(1);
|
||||
row[DB_file] = OPENSSL_malloc(8);
|
||||
row[DB_name] = X509_NAME_oneline(X509_get_subject_name(ret), NULL, 0);
|
||||
|
||||
if ((row[DB_type] == NULL) || (row[DB_exp_date] == NULL) ||
|
||||
(row[DB_rev_date] == NULL) ||
|
||||
(row[DB_file] == NULL) || (row[DB_name] == NULL)) {
|
||||
BIO_printf(bio_err, "Memory allocation failure\n");
|
||||
goto err;
|
||||
}
|
||||
BUF_strlcpy(row[DB_file], "unknown", 8);
|
||||
|
||||
memcpy(row[DB_exp_date], tm->data, tm->length);
|
||||
row[DB_exp_date][tm->length] = '\0';
|
||||
row[DB_rev_date][0] = '\0';
|
||||
strcpy(row[DB_file], "unknown");
|
||||
row[DB_type][0] = 'V';
|
||||
row[DB_type][1] = '\0';
|
||||
|
||||
@@ -2307,6 +2305,7 @@ static int certify_spkac(X509 **xret, char *infile, EVP_PKEY *pkey,
|
||||
|
||||
j = NETSCAPE_SPKI_verify(spki, pktmp);
|
||||
if (j <= 0) {
|
||||
EVP_PKEY_free(pktmp);
|
||||
BIO_printf(bio_err,
|
||||
"signature verification failed on SPKAC public key\n");
|
||||
goto err;
|
||||
|
||||
@@ -243,6 +243,11 @@ int MAIN(int argc, char **argv)
|
||||
argv++;
|
||||
}
|
||||
|
||||
if (keyfile != NULL && argc > 1) {
|
||||
BIO_printf(bio_err, "Can only sign or verify one file\n");
|
||||
goto end;
|
||||
}
|
||||
|
||||
if (do_verify && !sigfile) {
|
||||
BIO_printf(bio_err,
|
||||
"No signature to verify: use the -signature option\n");
|
||||
|
||||
@@ -509,7 +509,7 @@ int MAIN(int argc, char **argv)
|
||||
BIO_printf(bio_err, "invalid hex salt value\n");
|
||||
goto end;
|
||||
}
|
||||
} else if (RAND_pseudo_bytes(salt, sizeof salt) < 0)
|
||||
} else if (RAND_bytes(salt, sizeof salt) <= 0)
|
||||
goto end;
|
||||
/*
|
||||
* If -P option then don't bother writing
|
||||
|
||||
@@ -416,7 +416,7 @@ static int do_passwd(int passed_salt, char **salt_p, char **salt_malloc_p,
|
||||
if (*salt_malloc_p == NULL)
|
||||
goto err;
|
||||
}
|
||||
if (RAND_pseudo_bytes((unsigned char *)*salt_p, 2) < 0)
|
||||
if (RAND_bytes((unsigned char *)*salt_p, 2) <= 0)
|
||||
goto err;
|
||||
(*salt_p)[0] = cov_2char[(*salt_p)[0] & 0x3f]; /* 6 bits */
|
||||
(*salt_p)[1] = cov_2char[(*salt_p)[1] & 0x3f]; /* 6 bits */
|
||||
@@ -437,7 +437,7 @@ static int do_passwd(int passed_salt, char **salt_p, char **salt_malloc_p,
|
||||
if (*salt_malloc_p == NULL)
|
||||
goto err;
|
||||
}
|
||||
if (RAND_pseudo_bytes((unsigned char *)*salt_p, 8) < 0)
|
||||
if (RAND_bytes((unsigned char *)*salt_p, 8) <= 0)
|
||||
goto err;
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
|
||||
@@ -832,6 +832,7 @@ int dump_certs_pkeys_bag(BIO *out, PKCS12_SAFEBAG *bag, char *pass,
|
||||
EVP_PKEY *pkey;
|
||||
PKCS8_PRIV_KEY_INFO *p8;
|
||||
X509 *x509;
|
||||
int ret = 0;
|
||||
|
||||
switch (M_PKCS12_bag_type(bag)) {
|
||||
case NID_keyBag:
|
||||
@@ -844,7 +845,7 @@ int dump_certs_pkeys_bag(BIO *out, PKCS12_SAFEBAG *bag, char *pass,
|
||||
if (!(pkey = EVP_PKCS82PKEY(p8)))
|
||||
return 0;
|
||||
print_attribs(out, p8->attributes, "Key Attributes");
|
||||
PEM_write_bio_PrivateKey(out, pkey, enc, NULL, 0, NULL, pempass);
|
||||
ret = PEM_write_bio_PrivateKey(out, pkey, enc, NULL, 0, NULL, pempass);
|
||||
EVP_PKEY_free(pkey);
|
||||
break;
|
||||
|
||||
@@ -864,7 +865,7 @@ int dump_certs_pkeys_bag(BIO *out, PKCS12_SAFEBAG *bag, char *pass,
|
||||
}
|
||||
print_attribs(out, p8->attributes, "Key Attributes");
|
||||
PKCS8_PRIV_KEY_INFO_free(p8);
|
||||
PEM_write_bio_PrivateKey(out, pkey, enc, NULL, 0, NULL, pempass);
|
||||
ret = PEM_write_bio_PrivateKey(out, pkey, enc, NULL, 0, NULL, pempass);
|
||||
EVP_PKEY_free(pkey);
|
||||
break;
|
||||
|
||||
@@ -884,7 +885,7 @@ int dump_certs_pkeys_bag(BIO *out, PKCS12_SAFEBAG *bag, char *pass,
|
||||
if (!(x509 = PKCS12_certbag2x509(bag)))
|
||||
return 0;
|
||||
dump_cert_text(out, x509);
|
||||
PEM_write_bio_X509(out, x509);
|
||||
ret = PEM_write_bio_X509(out, x509);
|
||||
X509_free(x509);
|
||||
break;
|
||||
|
||||
@@ -902,7 +903,7 @@ int dump_certs_pkeys_bag(BIO *out, PKCS12_SAFEBAG *bag, char *pass,
|
||||
return 1;
|
||||
break;
|
||||
}
|
||||
return 1;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Given a single certificate return a verified chain or NULL if error */
|
||||
@@ -931,16 +932,70 @@ static int get_cert_chain(X509 *cert, X509_STORE *store,
|
||||
|
||||
int alg_print(BIO *x, X509_ALGOR *alg)
|
||||
{
|
||||
PBEPARAM *pbe;
|
||||
const unsigned char *p;
|
||||
p = alg->parameter->value.sequence->data;
|
||||
pbe = d2i_PBEPARAM(NULL, &p, alg->parameter->value.sequence->length);
|
||||
if (!pbe)
|
||||
return 1;
|
||||
BIO_printf(bio_err, "%s, Iteration %ld\n",
|
||||
OBJ_nid2ln(OBJ_obj2nid(alg->algorithm)),
|
||||
ASN1_INTEGER_get(pbe->iter));
|
||||
PBEPARAM_free(pbe);
|
||||
int pbenid, aparamtype;
|
||||
ASN1_OBJECT *aoid;
|
||||
void *aparam;
|
||||
PBEPARAM *pbe = NULL;
|
||||
|
||||
X509_ALGOR_get0(&aoid, &aparamtype, &aparam, alg);
|
||||
|
||||
pbenid = OBJ_obj2nid(aoid);
|
||||
|
||||
BIO_printf(x, "%s", OBJ_nid2ln(pbenid));
|
||||
|
||||
/*
|
||||
* If PBE algorithm is PBES2 decode algorithm parameters
|
||||
* for additional details.
|
||||
*/
|
||||
if (pbenid == NID_pbes2) {
|
||||
PBE2PARAM *pbe2 = NULL;
|
||||
int encnid;
|
||||
if (aparamtype == V_ASN1_SEQUENCE)
|
||||
pbe2 = ASN1_item_unpack(aparam, ASN1_ITEM_rptr(PBE2PARAM));
|
||||
if (pbe2 == NULL) {
|
||||
BIO_puts(x, "<unsupported parameters>");
|
||||
goto done;
|
||||
}
|
||||
X509_ALGOR_get0(&aoid, &aparamtype, &aparam, pbe2->keyfunc);
|
||||
pbenid = OBJ_obj2nid(aoid);
|
||||
X509_ALGOR_get0(&aoid, NULL, NULL, pbe2->encryption);
|
||||
encnid = OBJ_obj2nid(aoid);
|
||||
BIO_printf(x, ", %s, %s", OBJ_nid2ln(pbenid),
|
||||
OBJ_nid2sn(encnid));
|
||||
/* If KDF is PBKDF2 decode parameters */
|
||||
if (pbenid == NID_id_pbkdf2) {
|
||||
PBKDF2PARAM *kdf = NULL;
|
||||
int prfnid;
|
||||
if (aparamtype == V_ASN1_SEQUENCE)
|
||||
kdf = ASN1_item_unpack(aparam, ASN1_ITEM_rptr(PBKDF2PARAM));
|
||||
if (kdf == NULL) {
|
||||
BIO_puts(x, "<unsupported parameters>");
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (kdf->prf == NULL) {
|
||||
prfnid = NID_hmacWithSHA1;
|
||||
} else {
|
||||
X509_ALGOR_get0(&aoid, NULL, NULL, kdf->prf);
|
||||
prfnid = OBJ_obj2nid(aoid);
|
||||
}
|
||||
BIO_printf(x, ", Iteration %ld, PRF %s",
|
||||
ASN1_INTEGER_get(kdf->iter), OBJ_nid2sn(prfnid));
|
||||
PBKDF2PARAM_free(kdf);
|
||||
}
|
||||
PBE2PARAM_free(pbe2);
|
||||
} else {
|
||||
if (aparamtype == V_ASN1_SEQUENCE)
|
||||
pbe = ASN1_item_unpack(aparam, ASN1_ITEM_rptr(PBEPARAM));
|
||||
if (pbe == NULL) {
|
||||
BIO_puts(x, "<unsupported parameters>");
|
||||
goto done;
|
||||
}
|
||||
BIO_printf(x, ", Iteration %ld", ASN1_INTEGER_get(pbe->iter));
|
||||
PBEPARAM_free(pbe);
|
||||
}
|
||||
done:
|
||||
BIO_puts(x, "\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
@@ -332,9 +332,10 @@ int MAIN(int argc, char **argv)
|
||||
subject = 1;
|
||||
else if (strcmp(*argv, "-text") == 0)
|
||||
text = 1;
|
||||
else if (strcmp(*argv, "-x509") == 0)
|
||||
else if (strcmp(*argv, "-x509") == 0) {
|
||||
newreq = 1;
|
||||
x509 = 1;
|
||||
else if (strcmp(*argv, "-asn1-kludge") == 0)
|
||||
} else if (strcmp(*argv, "-asn1-kludge") == 0)
|
||||
kludge = 1;
|
||||
else if (strcmp(*argv, "-no-asn1-kludge") == 0)
|
||||
kludge = 0;
|
||||
@@ -756,7 +757,7 @@ int MAIN(int argc, char **argv)
|
||||
}
|
||||
}
|
||||
|
||||
if (newreq || x509) {
|
||||
if (newreq) {
|
||||
if (pkey == NULL) {
|
||||
BIO_printf(bio_err, "you need to specify a private key\n");
|
||||
goto end;
|
||||
@@ -1331,12 +1332,11 @@ static int auto_info(X509_REQ *req, STACK_OF(CONF_VALUE) *dn_sk,
|
||||
break;
|
||||
}
|
||||
#ifndef CHARSET_EBCDIC
|
||||
if (*p == '+')
|
||||
if (*type == '+') {
|
||||
#else
|
||||
if (*p == os_toascii['+'])
|
||||
if (*type == os_toascii['+']) {
|
||||
#endif
|
||||
{
|
||||
p++;
|
||||
type++;
|
||||
mval = -1;
|
||||
} else
|
||||
mval = 0;
|
||||
|
||||
@@ -199,7 +199,8 @@ int load_excert(SSL_EXCERT **pexc, BIO *err);
|
||||
void print_ssl_summary(BIO *bio, SSL *s);
|
||||
#ifdef HEADER_SSL_H
|
||||
int args_ssl(char ***pargs, int *pargc, SSL_CONF_CTX *cctx,
|
||||
int *badarg, BIO *err, STACK_OF(OPENSSL_STRING) **pstr);
|
||||
int *badarg, BIO *err, STACK_OF(OPENSSL_STRING) **pstr,
|
||||
int *no_prot_opt);
|
||||
int args_ssl_call(SSL_CTX *ctx, BIO *err, SSL_CONF_CTX *cctx,
|
||||
STACK_OF(OPENSSL_STRING) *str, int no_ecdhe, int no_jpake);
|
||||
int ssl_ctx_add_crls(SSL_CTX *ctx, STACK_OF(X509_CRL) *crls,
|
||||
|
||||
@@ -1507,11 +1507,18 @@ void print_ssl_summary(BIO *bio, SSL *s)
|
||||
}
|
||||
|
||||
int args_ssl(char ***pargs, int *pargc, SSL_CONF_CTX *cctx,
|
||||
int *badarg, BIO *err, STACK_OF(OPENSSL_STRING) **pstr)
|
||||
int *badarg, BIO *err, STACK_OF(OPENSSL_STRING) **pstr,
|
||||
int *no_prot_opt)
|
||||
{
|
||||
char *arg = **pargs, *argn = (*pargs)[1];
|
||||
int rv;
|
||||
|
||||
if (strcmp(arg, "-no_ssl2") == 0 || strcmp(arg, "-no_ssl3") == 0
|
||||
|| strcmp(arg, "-no_tls1") == 0 || strcmp(arg, "-no_tls1_1") == 0
|
||||
|| strcmp(arg, "-no_tls1_2") == 0) {
|
||||
*no_prot_opt = 1;
|
||||
}
|
||||
|
||||
/* Attempt to run SSL configuration command */
|
||||
rv = SSL_CONF_cmd_argv(cctx, pargc, pargs);
|
||||
/* If parameter not recognised just return */
|
||||
|
||||
@@ -242,9 +242,9 @@ static unsigned int psk_client_cb(SSL *ssl, const char *hint, char *identity,
|
||||
unsigned char *psk,
|
||||
unsigned int max_psk_len)
|
||||
{
|
||||
unsigned int psk_len = 0;
|
||||
int ret;
|
||||
BIGNUM *bn = NULL;
|
||||
long key_len;
|
||||
unsigned char *key;
|
||||
|
||||
if (c_debug)
|
||||
BIO_printf(bio_c_out, "psk_client_cb\n");
|
||||
@@ -265,32 +265,29 @@ static unsigned int psk_client_cb(SSL *ssl, const char *hint, char *identity,
|
||||
if (c_debug)
|
||||
BIO_printf(bio_c_out, "created identity '%s' len=%d\n", identity,
|
||||
ret);
|
||||
ret = BN_hex2bn(&bn, psk_key);
|
||||
if (!ret) {
|
||||
BIO_printf(bio_err, "Could not convert PSK key '%s' to BIGNUM\n",
|
||||
|
||||
/* convert the PSK key to binary */
|
||||
key = string_to_hex(psk_key, &key_len);
|
||||
if (key == NULL) {
|
||||
BIO_printf(bio_err, "Could not convert PSK key '%s' to buffer\n",
|
||||
psk_key);
|
||||
if (bn)
|
||||
BN_free(bn);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if ((unsigned int)BN_num_bytes(bn) > max_psk_len) {
|
||||
if ((unsigned long)key_len > (unsigned long)max_psk_len) {
|
||||
BIO_printf(bio_err,
|
||||
"psk buffer of callback is too small (%d) for key (%d)\n",
|
||||
max_psk_len, BN_num_bytes(bn));
|
||||
BN_free(bn);
|
||||
"psk buffer of callback is too small (%d) for key (%ld)\n",
|
||||
max_psk_len, key_len);
|
||||
OPENSSL_free(key);
|
||||
return 0;
|
||||
}
|
||||
|
||||
psk_len = BN_bn2bin(bn, psk);
|
||||
BN_free(bn);
|
||||
if (psk_len == 0)
|
||||
goto out_err;
|
||||
memcpy(psk, key, key_len);
|
||||
OPENSSL_free(key);
|
||||
|
||||
if (c_debug)
|
||||
BIO_printf(bio_c_out, "created PSK len=%d\n", psk_len);
|
||||
BIO_printf(bio_c_out, "created PSK len=%ld\n", key_len);
|
||||
|
||||
return psk_len;
|
||||
return key_len;
|
||||
out_err:
|
||||
if (c_debug)
|
||||
BIO_printf(bio_err, "Error in PSK client callback\n");
|
||||
@@ -747,6 +744,7 @@ int MAIN(int argc, char **argv)
|
||||
int crl_format = FORMAT_PEM;
|
||||
int crl_download = 0;
|
||||
STACK_OF(X509_CRL) *crls = NULL;
|
||||
int prot_opt = 0, no_prot_opt = 0;
|
||||
|
||||
meth = SSLv23_client_method();
|
||||
|
||||
@@ -850,7 +848,8 @@ int MAIN(int argc, char **argv)
|
||||
if (badarg)
|
||||
goto bad;
|
||||
continue;
|
||||
} else if (args_ssl(&argv, &argc, cctx, &badarg, bio_err, &ssl_args)) {
|
||||
} else if (args_ssl(&argv, &argc, cctx, &badarg, bio_err, &ssl_args,
|
||||
&no_prot_opt)) {
|
||||
if (badarg)
|
||||
goto bad;
|
||||
continue;
|
||||
@@ -942,31 +941,42 @@ int MAIN(int argc, char **argv)
|
||||
}
|
||||
#endif
|
||||
#ifndef OPENSSL_NO_SSL2
|
||||
else if (strcmp(*argv, "-ssl2") == 0)
|
||||
else if (strcmp(*argv, "-ssl2") == 0) {
|
||||
meth = SSLv2_client_method();
|
||||
prot_opt++;
|
||||
}
|
||||
#endif
|
||||
#ifndef OPENSSL_NO_SSL3_METHOD
|
||||
else if (strcmp(*argv, "-ssl3") == 0)
|
||||
else if (strcmp(*argv, "-ssl3") == 0) {
|
||||
meth = SSLv3_client_method();
|
||||
prot_opt++;
|
||||
}
|
||||
#endif
|
||||
#ifndef OPENSSL_NO_TLS1
|
||||
else if (strcmp(*argv, "-tls1_2") == 0)
|
||||
else if (strcmp(*argv, "-tls1_2") == 0) {
|
||||
meth = TLSv1_2_client_method();
|
||||
else if (strcmp(*argv, "-tls1_1") == 0)
|
||||
prot_opt++;
|
||||
} else if (strcmp(*argv, "-tls1_1") == 0) {
|
||||
meth = TLSv1_1_client_method();
|
||||
else if (strcmp(*argv, "-tls1") == 0)
|
||||
prot_opt++;
|
||||
} else if (strcmp(*argv, "-tls1") == 0) {
|
||||
meth = TLSv1_client_method();
|
||||
prot_opt++;
|
||||
}
|
||||
#endif
|
||||
#ifndef OPENSSL_NO_DTLS1
|
||||
else if (strcmp(*argv, "-dtls") == 0) {
|
||||
meth = DTLS_client_method();
|
||||
socket_type = SOCK_DGRAM;
|
||||
prot_opt++;
|
||||
} else if (strcmp(*argv, "-dtls1") == 0) {
|
||||
meth = DTLSv1_client_method();
|
||||
socket_type = SOCK_DGRAM;
|
||||
prot_opt++;
|
||||
} else if (strcmp(*argv, "-dtls1_2") == 0) {
|
||||
meth = DTLSv1_2_client_method();
|
||||
socket_type = SOCK_DGRAM;
|
||||
prot_opt++;
|
||||
} else if (strcmp(*argv, "-timeout") == 0)
|
||||
enable_timeouts = 1;
|
||||
else if (strcmp(*argv, "-mtu") == 0) {
|
||||
@@ -1149,6 +1159,17 @@ int MAIN(int argc, char **argv)
|
||||
}
|
||||
#endif
|
||||
|
||||
if (prot_opt > 1) {
|
||||
BIO_printf(bio_err, "Cannot supply multiple protocol flags\n");
|
||||
goto end;
|
||||
}
|
||||
|
||||
if (prot_opt == 1 && no_prot_opt) {
|
||||
BIO_printf(bio_err, "Cannot supply both a protocol flag and "
|
||||
"\"-no_<prot>\"\n");
|
||||
goto end;
|
||||
}
|
||||
|
||||
OpenSSL_add_ssl_algorithms();
|
||||
SSL_load_error_strings();
|
||||
|
||||
@@ -1540,7 +1561,10 @@ int MAIN(int argc, char **argv)
|
||||
SSL_set_connect_state(con);
|
||||
|
||||
/* ok, lets connect */
|
||||
width = SSL_get_fd(con) + 1;
|
||||
if (fileno_stdin() > SSL_get_fd(con))
|
||||
width = fileno_stdin() + 1;
|
||||
else
|
||||
width = SSL_get_fd(con) + 1;
|
||||
|
||||
read_tty = 1;
|
||||
write_tty = 0;
|
||||
@@ -1723,9 +1747,11 @@ int MAIN(int argc, char **argv)
|
||||
#if !defined(OPENSSL_SYS_WINDOWS) && !defined(OPENSSL_SYS_MSDOS) && !defined(OPENSSL_SYS_NETWARE) && !defined (OPENSSL_SYS_BEOS_R5)
|
||||
if (tty_on) {
|
||||
if (read_tty)
|
||||
openssl_fdset(fileno(stdin), &readfds);
|
||||
openssl_fdset(fileno_stdin(), &readfds);
|
||||
#if !defined(OPENSSL_SYS_VMS)
|
||||
if (write_tty)
|
||||
openssl_fdset(fileno(stdout), &writefds);
|
||||
openssl_fdset(fileno_stdout(), &writefds);
|
||||
#endif
|
||||
}
|
||||
if (read_ssl)
|
||||
openssl_fdset(SSL_get_fd(con), &readfds);
|
||||
@@ -1795,14 +1821,14 @@ int MAIN(int argc, char **argv)
|
||||
/* Under BeOS-R5 the situation is similar to DOS */
|
||||
i = 0;
|
||||
stdin_set = 0;
|
||||
(void)fcntl(fileno(stdin), F_SETFL, O_NONBLOCK);
|
||||
(void)fcntl(fileno_stdin(), F_SETFL, O_NONBLOCK);
|
||||
if (!write_tty) {
|
||||
if (read_tty) {
|
||||
tv.tv_sec = 1;
|
||||
tv.tv_usec = 0;
|
||||
i = select(width, (void *)&readfds, (void *)&writefds,
|
||||
NULL, &tv);
|
||||
if (read(fileno(stdin), sbuf, 0) >= 0)
|
||||
if (read(fileno_stdin(), sbuf, 0) >= 0)
|
||||
stdin_set = 1;
|
||||
if (!i && (stdin_set != 1 || !read_tty))
|
||||
continue;
|
||||
@@ -1810,7 +1836,7 @@ int MAIN(int argc, char **argv)
|
||||
i = select(width, (void *)&readfds, (void *)&writefds,
|
||||
NULL, timeoutp);
|
||||
}
|
||||
(void)fcntl(fileno(stdin), F_SETFL, 0);
|
||||
(void)fcntl(fileno_stdin(), F_SETFL, 0);
|
||||
#else
|
||||
i = select(width, (void *)&readfds, (void *)&writefds,
|
||||
NULL, timeoutp);
|
||||
@@ -1886,11 +1912,11 @@ int MAIN(int argc, char **argv)
|
||||
goto shut;
|
||||
}
|
||||
}
|
||||
#if defined(OPENSSL_SYS_WINDOWS) || defined(OPENSSL_SYS_MSDOS) || defined(OPENSSL_SYS_NETWARE) || defined(OPENSSL_SYS_BEOS_R5)
|
||||
#if defined(OPENSSL_SYS_WINDOWS) || defined(OPENSSL_SYS_MSDOS) || defined(OPENSSL_SYS_NETWARE) || defined(OPENSSL_SYS_BEOS_R5) || defined(OPENSSL_SYS_VMS)
|
||||
/* Assume Windows/DOS/BeOS can always write */
|
||||
else if (!ssl_pending && write_tty)
|
||||
#else
|
||||
else if (!ssl_pending && FD_ISSET(fileno(stdout), &writefds))
|
||||
else if (!ssl_pending && FD_ISSET(fileno_stdout(), &writefds))
|
||||
#endif
|
||||
{
|
||||
#ifdef CHARSET_EBCDIC
|
||||
@@ -1988,7 +2014,7 @@ int MAIN(int argc, char **argv)
|
||||
#elif defined(OPENSSL_SYS_BEOS_R5)
|
||||
else if (stdin_set)
|
||||
#else
|
||||
else if (FD_ISSET(fileno(stdin), &readfds))
|
||||
else if (FD_ISSET(fileno_stdin(), &readfds))
|
||||
#endif
|
||||
{
|
||||
if (crlf) {
|
||||
|
||||
@@ -353,9 +353,8 @@ static unsigned int psk_server_cb(SSL *ssl, const char *identity,
|
||||
unsigned char *psk,
|
||||
unsigned int max_psk_len)
|
||||
{
|
||||
unsigned int psk_len = 0;
|
||||
int ret;
|
||||
BIGNUM *bn = NULL;
|
||||
long key_len = 0;
|
||||
unsigned char *key;
|
||||
|
||||
if (s_debug)
|
||||
BIO_printf(bio_s_out, "psk_server_cb\n");
|
||||
@@ -377,32 +376,26 @@ static unsigned int psk_server_cb(SSL *ssl, const char *identity,
|
||||
BIO_printf(bio_s_out, "PSK client identity found\n");
|
||||
|
||||
/* convert the PSK key to binary */
|
||||
ret = BN_hex2bn(&bn, psk_key);
|
||||
if (!ret) {
|
||||
BIO_printf(bio_err, "Could not convert PSK key '%s' to BIGNUM\n",
|
||||
key = string_to_hex(psk_key, &key_len);
|
||||
if (key == NULL) {
|
||||
BIO_printf(bio_err, "Could not convert PSK key '%s' to buffer\n",
|
||||
psk_key);
|
||||
if (bn)
|
||||
BN_free(bn);
|
||||
return 0;
|
||||
}
|
||||
if (BN_num_bytes(bn) > (int)max_psk_len) {
|
||||
if (key_len > (int)max_psk_len) {
|
||||
BIO_printf(bio_err,
|
||||
"psk buffer of callback is too small (%d) for key (%d)\n",
|
||||
max_psk_len, BN_num_bytes(bn));
|
||||
BN_free(bn);
|
||||
"psk buffer of callback is too small (%d) for key (%ld)\n",
|
||||
max_psk_len, key_len);
|
||||
OPENSSL_free(key);
|
||||
return 0;
|
||||
}
|
||||
|
||||
ret = BN_bn2bin(bn, psk);
|
||||
BN_free(bn);
|
||||
|
||||
if (ret < 0)
|
||||
goto out_err;
|
||||
psk_len = (unsigned int)ret;
|
||||
memcpy(psk, key, key_len);
|
||||
OPENSSL_free(key);
|
||||
|
||||
if (s_debug)
|
||||
BIO_printf(bio_s_out, "fetched PSK len=%d\n", psk_len);
|
||||
return psk_len;
|
||||
BIO_printf(bio_s_out, "fetched PSK len=%ld\n", key_len);
|
||||
return key_len;
|
||||
out_err:
|
||||
if (s_debug)
|
||||
BIO_printf(bio_err, "Error in PSK server callback\n");
|
||||
@@ -1144,6 +1137,7 @@ int MAIN(int argc, char *argv[])
|
||||
int crl_format = FORMAT_PEM;
|
||||
int crl_download = 0;
|
||||
STACK_OF(X509_CRL) *crls = NULL;
|
||||
int prot_opt = 0, no_prot_opt = 0;
|
||||
|
||||
meth = SSLv23_server_method();
|
||||
|
||||
@@ -1307,7 +1301,8 @@ int MAIN(int argc, char *argv[])
|
||||
if (badarg)
|
||||
goto bad;
|
||||
continue;
|
||||
} else if (args_ssl(&argv, &argc, cctx, &badarg, bio_err, &ssl_args)) {
|
||||
} else if (args_ssl(&argv, &argc, cctx, &badarg, bio_err, &ssl_args,
|
||||
&no_prot_opt)) {
|
||||
if (badarg)
|
||||
goto bad;
|
||||
continue;
|
||||
@@ -1451,32 +1446,40 @@ int MAIN(int argc, char *argv[])
|
||||
else if (strcmp(*argv, "-ssl2") == 0) {
|
||||
no_ecdhe = 1;
|
||||
meth = SSLv2_server_method();
|
||||
prot_opt++;
|
||||
}
|
||||
#endif
|
||||
#ifndef OPENSSL_NO_SSL3_METHOD
|
||||
else if (strcmp(*argv, "-ssl3") == 0) {
|
||||
meth = SSLv3_server_method();
|
||||
prot_opt++;
|
||||
}
|
||||
#endif
|
||||
#ifndef OPENSSL_NO_TLS1
|
||||
else if (strcmp(*argv, "-tls1") == 0) {
|
||||
meth = TLSv1_server_method();
|
||||
prot_opt++;
|
||||
} else if (strcmp(*argv, "-tls1_1") == 0) {
|
||||
meth = TLSv1_1_server_method();
|
||||
prot_opt++;
|
||||
} else if (strcmp(*argv, "-tls1_2") == 0) {
|
||||
meth = TLSv1_2_server_method();
|
||||
prot_opt++;
|
||||
}
|
||||
#endif
|
||||
#ifndef OPENSSL_NO_DTLS1
|
||||
else if (strcmp(*argv, "-dtls") == 0) {
|
||||
meth = DTLS_server_method();
|
||||
socket_type = SOCK_DGRAM;
|
||||
prot_opt++;
|
||||
} else if (strcmp(*argv, "-dtls1") == 0) {
|
||||
meth = DTLSv1_server_method();
|
||||
socket_type = SOCK_DGRAM;
|
||||
prot_opt++;
|
||||
} else if (strcmp(*argv, "-dtls1_2") == 0) {
|
||||
meth = DTLSv1_2_server_method();
|
||||
socket_type = SOCK_DGRAM;
|
||||
prot_opt++;
|
||||
} else if (strcmp(*argv, "-timeout") == 0)
|
||||
enable_timeouts = 1;
|
||||
else if (strcmp(*argv, "-mtu") == 0) {
|
||||
@@ -1586,6 +1589,17 @@ int MAIN(int argc, char *argv[])
|
||||
}
|
||||
#endif
|
||||
|
||||
if (prot_opt > 1) {
|
||||
BIO_printf(bio_err, "Cannot supply multiple protocol flags\n");
|
||||
goto end;
|
||||
}
|
||||
|
||||
if (prot_opt == 1 && no_prot_opt) {
|
||||
BIO_printf(bio_err, "Cannot supply both a protocol flag and "
|
||||
"\"-no_<prot>\"\n");
|
||||
goto end;
|
||||
}
|
||||
|
||||
SSL_load_error_strings();
|
||||
OpenSSL_add_ssl_algorithms();
|
||||
|
||||
@@ -2293,7 +2307,10 @@ static int sv_body(char *hostname, int s, int stype, unsigned char *context)
|
||||
}
|
||||
#endif
|
||||
|
||||
width = s + 1;
|
||||
if (fileno_stdin() > s)
|
||||
width = fileno_stdin() + 1;
|
||||
else
|
||||
width = s + 1;
|
||||
for (;;) {
|
||||
int read_from_terminal;
|
||||
int read_from_sslcon;
|
||||
@@ -2304,7 +2321,7 @@ static int sv_body(char *hostname, int s, int stype, unsigned char *context)
|
||||
if (!read_from_sslcon) {
|
||||
FD_ZERO(&readfds);
|
||||
#if !defined(OPENSSL_SYS_WINDOWS) && !defined(OPENSSL_SYS_MSDOS) && !defined(OPENSSL_SYS_NETWARE) && !defined(OPENSSL_SYS_BEOS_R5)
|
||||
openssl_fdset(fileno(stdin), &readfds);
|
||||
openssl_fdset(fileno_stdin(), &readfds);
|
||||
#endif
|
||||
openssl_fdset(s, &readfds);
|
||||
/*
|
||||
@@ -2332,13 +2349,13 @@ static int sv_body(char *hostname, int s, int stype, unsigned char *context)
|
||||
/* Under BeOS-R5 the situation is similar to DOS */
|
||||
tv.tv_sec = 1;
|
||||
tv.tv_usec = 0;
|
||||
(void)fcntl(fileno(stdin), F_SETFL, O_NONBLOCK);
|
||||
(void)fcntl(fileno_stdin(), F_SETFL, O_NONBLOCK);
|
||||
i = select(width, (void *)&readfds, NULL, NULL, &tv);
|
||||
if ((i < 0) || (!i && read(fileno(stdin), buf, 0) < 0))
|
||||
if ((i < 0) || (!i && read(fileno_stdin(), buf, 0) < 0))
|
||||
continue;
|
||||
if (read(fileno(stdin), buf, 0) >= 0)
|
||||
if (read(fileno_stdin(), buf, 0) >= 0)
|
||||
read_from_terminal = 1;
|
||||
(void)fcntl(fileno(stdin), F_SETFL, 0);
|
||||
(void)fcntl(fileno_stdin(), F_SETFL, 0);
|
||||
#else
|
||||
if ((SSL_version(con) == DTLS1_VERSION) &&
|
||||
DTLSv1_get_timeout(con, &timeout))
|
||||
@@ -2355,7 +2372,7 @@ static int sv_body(char *hostname, int s, int stype, unsigned char *context)
|
||||
|
||||
if (i <= 0)
|
||||
continue;
|
||||
if (FD_ISSET(fileno(stdin), &readfds))
|
||||
if (FD_ISSET(fileno_stdin(), &readfds))
|
||||
read_from_terminal = 1;
|
||||
#endif
|
||||
if (FD_ISSET(s, &readfds))
|
||||
@@ -2382,6 +2399,7 @@ static int sv_body(char *hostname, int s, int stype, unsigned char *context)
|
||||
assert(lf_num == 0);
|
||||
} else
|
||||
i = raw_read_stdin(buf, bufsize);
|
||||
|
||||
if (!s_quiet && !s_brief) {
|
||||
if ((i <= 0) || (buf[0] == 'Q')) {
|
||||
BIO_printf(bio_s_out, "DONE\n");
|
||||
@@ -3371,7 +3389,7 @@ static int generate_session_id(const SSL *ssl, unsigned char *id,
|
||||
{
|
||||
unsigned int count = 0;
|
||||
do {
|
||||
if (RAND_pseudo_bytes(id, *id_len) < 0)
|
||||
if (RAND_bytes(id, *id_len) <= 0)
|
||||
return 0;
|
||||
/*
|
||||
* Prefix the session_id with the required prefix. NB: If our prefix
|
||||
|
||||
@@ -2614,6 +2614,10 @@ static int do_multi(int multi)
|
||||
static char sep[] = ":";
|
||||
|
||||
fds = malloc(multi * sizeof *fds);
|
||||
if (fds == NULL) {
|
||||
fprintf(stderr, "Out of memory in speed (do_multi)\n");
|
||||
exit(1);
|
||||
}
|
||||
for (n = 0; n < multi; ++n) {
|
||||
if (pipe(fd) == -1) {
|
||||
fprintf(stderr, "pipe failure\n");
|
||||
|
||||
@@ -765,4 +765,6 @@ int MAIN(int argc, char **argv)
|
||||
OPENSSL_EXIT(ret);
|
||||
}
|
||||
|
||||
#else
|
||||
static void *dummy = &dummy;
|
||||
#endif
|
||||
|
||||
@@ -115,43 +115,43 @@ int MAIN(int argc, char **argv)
|
||||
if (argc >= 1) {
|
||||
if (strcmp(*argv, "-CApath") == 0) {
|
||||
if (argc-- < 1)
|
||||
goto end;
|
||||
goto usage;
|
||||
CApath = *(++argv);
|
||||
} else if (strcmp(*argv, "-CAfile") == 0) {
|
||||
if (argc-- < 1)
|
||||
goto end;
|
||||
goto usage;
|
||||
CAfile = *(++argv);
|
||||
} else if (args_verify(&argv, &argc, &badarg, bio_err, &vpm)) {
|
||||
if (badarg)
|
||||
goto end;
|
||||
goto usage;
|
||||
continue;
|
||||
} else if (strcmp(*argv, "-untrusted") == 0) {
|
||||
if (argc-- < 1)
|
||||
goto end;
|
||||
goto usage;
|
||||
untfile = *(++argv);
|
||||
} else if (strcmp(*argv, "-trusted") == 0) {
|
||||
if (argc-- < 1)
|
||||
goto end;
|
||||
goto usage;
|
||||
trustfile = *(++argv);
|
||||
} else if (strcmp(*argv, "-CRLfile") == 0) {
|
||||
if (argc-- < 1)
|
||||
goto end;
|
||||
goto usage;
|
||||
crlfile = *(++argv);
|
||||
} else if (strcmp(*argv, "-crl_download") == 0)
|
||||
crl_download = 1;
|
||||
#ifndef OPENSSL_NO_ENGINE
|
||||
else if (strcmp(*argv, "-engine") == 0) {
|
||||
if (--argc < 1)
|
||||
goto end;
|
||||
goto usage;
|
||||
engine = *(++argv);
|
||||
}
|
||||
#endif
|
||||
else if (strcmp(*argv, "-help") == 0)
|
||||
goto end;
|
||||
goto usage;
|
||||
else if (strcmp(*argv, "-verbose") == 0)
|
||||
v_verbose = 1;
|
||||
else if (argv[0][0] == '-')
|
||||
goto end;
|
||||
goto usage;
|
||||
else
|
||||
break;
|
||||
argc--;
|
||||
@@ -228,7 +228,7 @@ int MAIN(int argc, char **argv)
|
||||
ret = -1;
|
||||
}
|
||||
|
||||
end:
|
||||
usage:
|
||||
if (ret == 1) {
|
||||
BIO_printf(bio_err,
|
||||
"usage: verify [-verbose] [-CApath path] [-CAfile file] [-purpose purpose] [-crl_check]");
|
||||
@@ -247,6 +247,7 @@ int MAIN(int argc, char **argv)
|
||||
X509_PURPOSE_get0_name(ptmp));
|
||||
}
|
||||
}
|
||||
end:
|
||||
if (vpm)
|
||||
X509_VERIFY_PARAM_free(vpm);
|
||||
if (cert_ctx != NULL)
|
||||
|
||||
@@ -1105,6 +1105,10 @@ static int x509_certify(X509_STORE *ctx, char *CAfile, const EVP_MD *digest,
|
||||
EVP_PKEY *upkey;
|
||||
|
||||
upkey = X509_get_pubkey(xca);
|
||||
if (upkey == NULL) {
|
||||
BIO_printf(bio_err, "Error obtaining CA X509 public key\n");
|
||||
goto end;
|
||||
}
|
||||
EVP_PKEY_copy_parameters(upkey, pkey);
|
||||
EVP_PKEY_free(upkey);
|
||||
|
||||
@@ -1217,6 +1221,8 @@ static int sign(X509 *x, EVP_PKEY *pkey, int days, int clrext,
|
||||
EVP_PKEY *pktmp;
|
||||
|
||||
pktmp = X509_get_pubkey(x);
|
||||
if (pktmp == NULL)
|
||||
goto err;
|
||||
EVP_PKEY_copy_parameters(pktmp, pkey);
|
||||
EVP_PKEY_save_parameters(pktmp, 1);
|
||||
EVP_PKEY_free(pktmp);
|
||||
|
||||
@@ -1,7 +1,3 @@
|
||||
/*
|
||||
* $LP: LPlib/source/LPdir_unix.c,v 1.11 2004/09/23 22:07:22 _cvs_levitte Exp
|
||||
* $
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2004, Richard Levitte <richard@levitte.org>
|
||||
* All rights reserved.
|
||||
|
||||
@@ -1797,8 +1797,6 @@ $code.=<<___;
|
||||
b .Lxts_enc_done
|
||||
.align 4
|
||||
.Lxts_enc_6:
|
||||
vst1.64 {@XMM[14]}, [r0,:128] @ next round tweak
|
||||
|
||||
veor @XMM[4], @XMM[4], @XMM[12]
|
||||
#ifndef BSAES_ASM_EXTENDED_KEY
|
||||
add r4, sp, #0x90 @ pass key schedule
|
||||
@@ -1834,8 +1832,6 @@ $code.=<<___;
|
||||
|
||||
.align 5
|
||||
.Lxts_enc_5:
|
||||
vst1.64 {@XMM[13]}, [r0,:128] @ next round tweak
|
||||
|
||||
veor @XMM[3], @XMM[3], @XMM[11]
|
||||
#ifndef BSAES_ASM_EXTENDED_KEY
|
||||
add r4, sp, #0x90 @ pass key schedule
|
||||
@@ -1864,8 +1860,6 @@ $code.=<<___;
|
||||
b .Lxts_enc_done
|
||||
.align 4
|
||||
.Lxts_enc_4:
|
||||
vst1.64 {@XMM[12]}, [r0,:128] @ next round tweak
|
||||
|
||||
veor @XMM[2], @XMM[2], @XMM[10]
|
||||
#ifndef BSAES_ASM_EXTENDED_KEY
|
||||
add r4, sp, #0x90 @ pass key schedule
|
||||
@@ -1891,8 +1885,6 @@ $code.=<<___;
|
||||
b .Lxts_enc_done
|
||||
.align 4
|
||||
.Lxts_enc_3:
|
||||
vst1.64 {@XMM[11]}, [r0,:128] @ next round tweak
|
||||
|
||||
veor @XMM[1], @XMM[1], @XMM[9]
|
||||
#ifndef BSAES_ASM_EXTENDED_KEY
|
||||
add r4, sp, #0x90 @ pass key schedule
|
||||
@@ -1917,8 +1909,6 @@ $code.=<<___;
|
||||
b .Lxts_enc_done
|
||||
.align 4
|
||||
.Lxts_enc_2:
|
||||
vst1.64 {@XMM[10]}, [r0,:128] @ next round tweak
|
||||
|
||||
veor @XMM[0], @XMM[0], @XMM[8]
|
||||
#ifndef BSAES_ASM_EXTENDED_KEY
|
||||
add r4, sp, #0x90 @ pass key schedule
|
||||
@@ -1941,7 +1931,7 @@ $code.=<<___;
|
||||
.align 4
|
||||
.Lxts_enc_1:
|
||||
mov r0, sp
|
||||
veor @XMM[0], @XMM[8]
|
||||
veor @XMM[0], @XMM[0], @XMM[8]
|
||||
mov r1, sp
|
||||
vst1.8 {@XMM[0]}, [sp,:128]
|
||||
mov r2, $key
|
||||
@@ -2251,8 +2241,6 @@ $code.=<<___;
|
||||
b .Lxts_dec_done
|
||||
.align 4
|
||||
.Lxts_dec_5:
|
||||
vst1.64 {@XMM[13]}, [r0,:128] @ next round tweak
|
||||
|
||||
veor @XMM[3], @XMM[3], @XMM[11]
|
||||
#ifndef BSAES_ASM_EXTENDED_KEY
|
||||
add r4, sp, #0x90 @ pass key schedule
|
||||
@@ -2281,8 +2269,6 @@ $code.=<<___;
|
||||
b .Lxts_dec_done
|
||||
.align 4
|
||||
.Lxts_dec_4:
|
||||
vst1.64 {@XMM[12]}, [r0,:128] @ next round tweak
|
||||
|
||||
veor @XMM[2], @XMM[2], @XMM[10]
|
||||
#ifndef BSAES_ASM_EXTENDED_KEY
|
||||
add r4, sp, #0x90 @ pass key schedule
|
||||
@@ -2308,8 +2294,6 @@ $code.=<<___;
|
||||
b .Lxts_dec_done
|
||||
.align 4
|
||||
.Lxts_dec_3:
|
||||
vst1.64 {@XMM[11]}, [r0,:128] @ next round tweak
|
||||
|
||||
veor @XMM[1], @XMM[1], @XMM[9]
|
||||
#ifndef BSAES_ASM_EXTENDED_KEY
|
||||
add r4, sp, #0x90 @ pass key schedule
|
||||
@@ -2334,8 +2318,6 @@ $code.=<<___;
|
||||
b .Lxts_dec_done
|
||||
.align 4
|
||||
.Lxts_dec_2:
|
||||
vst1.64 {@XMM[10]}, [r0,:128] @ next round tweak
|
||||
|
||||
veor @XMM[0], @XMM[0], @XMM[8]
|
||||
#ifndef BSAES_ASM_EXTENDED_KEY
|
||||
add r4, sp, #0x90 @ pass key schedule
|
||||
@@ -2358,12 +2340,12 @@ $code.=<<___;
|
||||
.align 4
|
||||
.Lxts_dec_1:
|
||||
mov r0, sp
|
||||
veor @XMM[0], @XMM[8]
|
||||
veor @XMM[0], @XMM[0], @XMM[8]
|
||||
mov r1, sp
|
||||
vst1.8 {@XMM[0]}, [sp,:128]
|
||||
mov r5, $magic @ preserve magic
|
||||
mov r2, $key
|
||||
mov r4, $fp @ preserve fp
|
||||
mov r5, $magic @ preserve magic
|
||||
|
||||
bl AES_decrypt
|
||||
|
||||
|
||||
@@ -60,7 +60,12 @@
|
||||
#include "cryptlib.h"
|
||||
#include <openssl/asn1.h>
|
||||
|
||||
static int asn1_collate_primitive(ASN1_STRING *a, ASN1_const_CTX *c);
|
||||
static int asn1_collate_primitive(ASN1_STRING *a, ASN1_const_CTX *c,
|
||||
int depth);
|
||||
static ASN1_STRING *int_d2i_ASN1_bytes(ASN1_STRING **a,
|
||||
const unsigned char **pp, long length,
|
||||
int Ptag, int Pclass, int depth,
|
||||
int *perr);
|
||||
/*
|
||||
* type is a 'bitmap' of acceptable string types.
|
||||
*/
|
||||
@@ -99,7 +104,7 @@ ASN1_STRING *d2i_ASN1_type_bytes(ASN1_STRING **a, const unsigned char **pp,
|
||||
ret = (*a);
|
||||
|
||||
if (len != 0) {
|
||||
s = (unsigned char *)OPENSSL_malloc((int)len + 1);
|
||||
s = OPENSSL_malloc((int)len + 1);
|
||||
if (s == NULL) {
|
||||
i = ERR_R_MALLOC_FAILURE;
|
||||
goto err;
|
||||
@@ -154,15 +159,38 @@ int i2d_ASN1_bytes(ASN1_STRING *a, unsigned char **pp, int tag, int xclass)
|
||||
return (r);
|
||||
}
|
||||
|
||||
/*
|
||||
* Maximum recursion depth of d2i_ASN1_bytes(): much more than should be
|
||||
* encountered in pratice.
|
||||
*/
|
||||
|
||||
#define ASN1_BYTES_MAXDEPTH 20
|
||||
|
||||
ASN1_STRING *d2i_ASN1_bytes(ASN1_STRING **a, const unsigned char **pp,
|
||||
long length, int Ptag, int Pclass)
|
||||
{
|
||||
int err = 0;
|
||||
ASN1_STRING *s = int_d2i_ASN1_bytes(a, pp, length, Ptag, Pclass, 0, &err);
|
||||
if (err != 0)
|
||||
ASN1err(ASN1_F_D2I_ASN1_BYTES, err);
|
||||
return s;
|
||||
}
|
||||
|
||||
static ASN1_STRING *int_d2i_ASN1_bytes(ASN1_STRING **a,
|
||||
const unsigned char **pp, long length,
|
||||
int Ptag, int Pclass,
|
||||
int depth, int *perr)
|
||||
{
|
||||
ASN1_STRING *ret = NULL;
|
||||
const unsigned char *p;
|
||||
unsigned char *s;
|
||||
long len;
|
||||
int inf, tag, xclass;
|
||||
int i = 0;
|
||||
|
||||
if (depth > ASN1_BYTES_MAXDEPTH) {
|
||||
*perr = ASN1_R_NESTED_ASN1_STRING;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if ((a == NULL) || ((*a) == NULL)) {
|
||||
if ((ret = ASN1_STRING_new()) == NULL)
|
||||
@@ -173,18 +201,19 @@ ASN1_STRING *d2i_ASN1_bytes(ASN1_STRING **a, const unsigned char **pp,
|
||||
p = *pp;
|
||||
inf = ASN1_get_object(&p, &len, &tag, &xclass, length);
|
||||
if (inf & 0x80) {
|
||||
i = ASN1_R_BAD_OBJECT_HEADER;
|
||||
*perr = ASN1_R_BAD_OBJECT_HEADER;
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (tag != Ptag) {
|
||||
i = ASN1_R_WRONG_TAG;
|
||||
*perr = ASN1_R_WRONG_TAG;
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (inf & V_ASN1_CONSTRUCTED) {
|
||||
ASN1_const_CTX c;
|
||||
|
||||
c.error = 0;
|
||||
c.pp = pp;
|
||||
c.p = p;
|
||||
c.inf = inf;
|
||||
@@ -192,17 +221,18 @@ ASN1_STRING *d2i_ASN1_bytes(ASN1_STRING **a, const unsigned char **pp,
|
||||
c.tag = Ptag;
|
||||
c.xclass = Pclass;
|
||||
c.max = (length == 0) ? 0 : (p + length);
|
||||
if (!asn1_collate_primitive(ret, &c))
|
||||
if (!asn1_collate_primitive(ret, &c, depth)) {
|
||||
*perr = c.error;
|
||||
goto err;
|
||||
else {
|
||||
} else {
|
||||
p = c.p;
|
||||
}
|
||||
} else {
|
||||
if (len != 0) {
|
||||
if ((ret->length < len) || (ret->data == NULL)) {
|
||||
s = (unsigned char *)OPENSSL_malloc((int)len + 1);
|
||||
s = OPENSSL_malloc((int)len + 1);
|
||||
if (s == NULL) {
|
||||
i = ERR_R_MALLOC_FAILURE;
|
||||
*perr = ERR_R_MALLOC_FAILURE;
|
||||
goto err;
|
||||
}
|
||||
if (ret->data != NULL)
|
||||
@@ -230,7 +260,6 @@ ASN1_STRING *d2i_ASN1_bytes(ASN1_STRING **a, const unsigned char **pp,
|
||||
err:
|
||||
if ((ret != NULL) && ((a == NULL) || (*a != ret)))
|
||||
ASN1_STRING_free(ret);
|
||||
ASN1err(ASN1_F_D2I_ASN1_BYTES, i);
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
@@ -242,7 +271,8 @@ ASN1_STRING *d2i_ASN1_bytes(ASN1_STRING **a, const unsigned char **pp,
|
||||
* There have been a few bug fixes for this function from Paul Keogh
|
||||
* <paul.keogh@sse.ie>, many thanks to him
|
||||
*/
|
||||
static int asn1_collate_primitive(ASN1_STRING *a, ASN1_const_CTX *c)
|
||||
static int asn1_collate_primitive(ASN1_STRING *a, ASN1_const_CTX *c,
|
||||
int depth)
|
||||
{
|
||||
ASN1_STRING *os = NULL;
|
||||
BUF_MEM b;
|
||||
@@ -270,9 +300,8 @@ static int asn1_collate_primitive(ASN1_STRING *a, ASN1_const_CTX *c)
|
||||
}
|
||||
|
||||
c->q = c->p;
|
||||
if (d2i_ASN1_bytes(&os, &c->p, c->max - c->p, c->tag, c->xclass)
|
||||
== NULL) {
|
||||
c->error = ERR_R_ASN1_LIB;
|
||||
if (int_d2i_ASN1_bytes(&os, &c->p, c->max - c->p, c->tag, c->xclass,
|
||||
depth + 1, &c->error) == NULL) {
|
||||
goto err;
|
||||
}
|
||||
|
||||
@@ -297,7 +326,6 @@ static int asn1_collate_primitive(ASN1_STRING *a, ASN1_const_CTX *c)
|
||||
ASN1_STRING_free(os);
|
||||
return (1);
|
||||
err:
|
||||
ASN1err(ASN1_F_ASN1_COLLATE_PRIMITIVE, c->error);
|
||||
if (os != NULL)
|
||||
ASN1_STRING_free(os);
|
||||
if (b.data != NULL)
|
||||
|
||||
@@ -73,7 +73,7 @@ int i2d_ASN1_OBJECT(ASN1_OBJECT *a, unsigned char **pp)
|
||||
return (0);
|
||||
|
||||
objsize = ASN1_object_size(0, a->length, V_ASN1_OBJECT);
|
||||
if (pp == NULL)
|
||||
if (pp == NULL || objsize == -1)
|
||||
return objsize;
|
||||
|
||||
p = *pp;
|
||||
@@ -174,8 +174,12 @@ int a2d_ASN1_OBJECT(unsigned char *out, int olen, const char *buf, int num)
|
||||
if (!tmp)
|
||||
goto err;
|
||||
}
|
||||
while (blsize--)
|
||||
tmp[i++] = (unsigned char)BN_div_word(bl, 0x80L);
|
||||
while (blsize--) {
|
||||
BN_ULONG t = BN_div_word(bl, 0x80L);
|
||||
if (t == (BN_ULONG)-1)
|
||||
goto err;
|
||||
tmp[i++] = (unsigned char)t;
|
||||
}
|
||||
} else {
|
||||
|
||||
for (;;) {
|
||||
|
||||
@@ -57,6 +57,7 @@
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <limits.h>
|
||||
#include "cryptlib.h"
|
||||
#include <openssl/asn1_mac.h>
|
||||
|
||||
@@ -98,10 +99,14 @@ int i2d_ASN1_SET(STACK_OF(OPENSSL_BLOCK) *a, unsigned char **pp,
|
||||
|
||||
if (a == NULL)
|
||||
return (0);
|
||||
for (i = sk_OPENSSL_BLOCK_num(a) - 1; i >= 0; i--)
|
||||
for (i = sk_OPENSSL_BLOCK_num(a) - 1; i >= 0; i--) {
|
||||
int tmplen = i2d(sk_OPENSSL_BLOCK_value(a, i), NULL);
|
||||
if (tmplen > INT_MAX - ret)
|
||||
return -1;
|
||||
ret += i2d(sk_OPENSSL_BLOCK_value(a, i), NULL);
|
||||
}
|
||||
r = ASN1_object_size(1, ret, ex_tag);
|
||||
if (pp == NULL)
|
||||
if (pp == NULL || r == -1)
|
||||
return (r);
|
||||
|
||||
p = *pp;
|
||||
|
||||
@@ -337,7 +337,7 @@ static const signed char tag2nbyte[] = {
|
||||
-1, -1, -1, -1, -1, /* 5-9 */
|
||||
-1, -1, 0, -1, /* 10-13 */
|
||||
-1, -1, -1, -1, /* 15-17 */
|
||||
-1, 1, 1, /* 18-20 */
|
||||
1, 1, 1, /* 18-20 */
|
||||
-1, 1, 1, 1, /* 21-24 */
|
||||
-1, 1, -1, /* 25-27 */
|
||||
4, -1, 2 /* 28-30 */
|
||||
|
||||
@@ -250,6 +250,7 @@ int ASN1_STRING_TABLE_add(int nid,
|
||||
}
|
||||
tmp->flags = flags | STABLE_FLAGS_MALLOC;
|
||||
tmp->nid = nid;
|
||||
tmp->minsize = tmp->maxsize = -1;
|
||||
new_nid = 1;
|
||||
} else
|
||||
tmp->flags = (tmp->flags & STABLE_FLAGS_MALLOC) | flags;
|
||||
|
||||
@@ -93,7 +93,9 @@ static const EVP_PKEY_ASN1_METHOD *standard_methods[] = {
|
||||
&eckey_asn1_meth,
|
||||
#endif
|
||||
&hmac_asn1_meth,
|
||||
#ifndef OPENSSL_NO_CMAC
|
||||
&cmac_asn1_meth,
|
||||
#endif
|
||||
#ifndef OPENSSL_NO_DH
|
||||
&dhx_asn1_meth
|
||||
#endif
|
||||
|
||||
@@ -256,26 +256,30 @@ static void asn1_put_length(unsigned char **pp, int length)
|
||||
|
||||
int ASN1_object_size(int constructed, int length, int tag)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = length;
|
||||
ret++;
|
||||
int ret = 1;
|
||||
if (length < 0)
|
||||
return -1;
|
||||
if (tag >= 31) {
|
||||
while (tag > 0) {
|
||||
tag >>= 7;
|
||||
ret++;
|
||||
}
|
||||
}
|
||||
if (constructed == 2)
|
||||
return ret + 3;
|
||||
ret++;
|
||||
if (length > 127) {
|
||||
while (length > 0) {
|
||||
length >>= 8;
|
||||
ret++;
|
||||
if (constructed == 2) {
|
||||
ret += 3;
|
||||
} else {
|
||||
ret++;
|
||||
if (length > 127) {
|
||||
int tmplen = length;
|
||||
while (tmplen > 0) {
|
||||
tmplen >>= 8;
|
||||
ret++;
|
||||
}
|
||||
}
|
||||
}
|
||||
return (ret);
|
||||
if (ret >= INT_MAX - length)
|
||||
return -1;
|
||||
return ret + length;
|
||||
}
|
||||
|
||||
static int _asn1_Finish(ASN1_const_CTX *c)
|
||||
@@ -324,7 +328,7 @@ int asn1_GetSequence(ASN1_const_CTX *c, long *length)
|
||||
return (0);
|
||||
}
|
||||
if (c->inf == (1 | V_ASN1_CONSTRUCTED))
|
||||
c->slen = *length + *(c->pp) - c->p;
|
||||
c->slen = *length;
|
||||
c->eos = 0;
|
||||
return (1);
|
||||
}
|
||||
@@ -366,7 +370,7 @@ int ASN1_STRING_set(ASN1_STRING *str, const void *_data, int len)
|
||||
else
|
||||
len = strlen(data);
|
||||
}
|
||||
if ((str->length < len) || (str->data == NULL)) {
|
||||
if ((str->length <= len) || (str->data == NULL)) {
|
||||
c = str->data;
|
||||
if (c == NULL)
|
||||
str->data = OPENSSL_malloc(len + 1);
|
||||
|
||||
@@ -289,7 +289,7 @@ int SMIME_write_ASN1(BIO *bio, ASN1_VALUE *val, BIO *data, int flags,
|
||||
if ((flags & SMIME_DETACHED) && data) {
|
||||
/* We want multipart/signed */
|
||||
/* Generate a random boundary */
|
||||
if (RAND_pseudo_bytes((unsigned char *)bound, 32) < 0)
|
||||
if (RAND_bytes((unsigned char *)bound, 32) <= 0)
|
||||
return 0;
|
||||
for (i = 0; i < 32; i++) {
|
||||
c = bound[i] & 0xf;
|
||||
@@ -623,6 +623,8 @@ static int multi_split(BIO *bio, char *bound, STACK_OF(BIO) **ret)
|
||||
if (bpart)
|
||||
sk_BIO_push(parts, bpart);
|
||||
bpart = BIO_new(BIO_s_mem());
|
||||
if (bpart == NULL)
|
||||
return 1;
|
||||
BIO_set_mem_eof_return(bpart, 0);
|
||||
} else if (eol)
|
||||
BIO_write(bpart, "\r\n", 2);
|
||||
|
||||
@@ -170,10 +170,12 @@ static int asn1_bio_init(BIO_ASN1_BUF_CTX *ctx, int size)
|
||||
ctx->copylen = 0;
|
||||
ctx->asn1_class = V_ASN1_UNIVERSAL;
|
||||
ctx->asn1_tag = V_ASN1_OCTET_STRING;
|
||||
ctx->ex_buf = 0;
|
||||
ctx->ex_pos = 0;
|
||||
ctx->ex_buf = NULL;
|
||||
ctx->ex_len = 0;
|
||||
ctx->ex_pos = 0;
|
||||
ctx->state = ASN1_STATE_START;
|
||||
ctx->prefix = ctx->prefix_free = ctx->suffix = ctx->suffix_free = NULL;
|
||||
ctx->ex_arg = NULL;
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
@@ -136,6 +136,7 @@ BIO *BIO_new_NDEF(BIO *out, ASN1_VALUE *val, const ASN1_ITEM *it)
|
||||
ndef_aux->ndef_bio = sarg.ndef_bio;
|
||||
ndef_aux->boundary = sarg.boundary;
|
||||
ndef_aux->out = out;
|
||||
ndef_aux->derbuf = NULL;
|
||||
|
||||
BIO_ctrl(asn_bio, BIO_C_SET_EX_ARG, 0, ndef_aux);
|
||||
|
||||
|
||||
@@ -67,17 +67,19 @@ $arr[ord("?")] |= $PSTRING_CHAR;
|
||||
# Now generate the C code
|
||||
|
||||
print <<EOF;
|
||||
/* Auto generated with chartype.pl script.
|
||||
* Mask of various character properties
|
||||
/*
|
||||
* Auto generated with chartype.pl script. Mask of various character
|
||||
* properties
|
||||
*/
|
||||
|
||||
static unsigned char char_type[] = {
|
||||
static const unsigned char char_type[] = {
|
||||
EOF
|
||||
|
||||
print " ";
|
||||
for($i = 0; $i < 128; $i++) {
|
||||
print("\n") if($i && (($i % 16) == 0));
|
||||
printf("%2d", $arr[$i]);
|
||||
print("\n ") if($i && (($i % 16) == 0));
|
||||
printf(" %d", $arr[$i]);
|
||||
print(",") if ($i != 127);
|
||||
}
|
||||
print("\n};\n\n");
|
||||
print("\n};\n");
|
||||
|
||||
|
||||
@@ -97,15 +97,17 @@ EVP_PKEY *d2i_PrivateKey(int type, EVP_PKEY **a, const unsigned char **pp,
|
||||
if (!ret->ameth->old_priv_decode ||
|
||||
!ret->ameth->old_priv_decode(ret, &p, length)) {
|
||||
if (ret->ameth->priv_decode) {
|
||||
EVP_PKEY *tmp;
|
||||
PKCS8_PRIV_KEY_INFO *p8 = NULL;
|
||||
p8 = d2i_PKCS8_PRIV_KEY_INFO(NULL, &p, length);
|
||||
if (!p8)
|
||||
goto err;
|
||||
EVP_PKEY_free(ret);
|
||||
ret = EVP_PKCS82PKEY(p8);
|
||||
tmp = EVP_PKCS82PKEY(p8);
|
||||
PKCS8_PRIV_KEY_INFO_free(p8);
|
||||
if (ret == NULL)
|
||||
if (tmp == NULL)
|
||||
goto err;
|
||||
EVP_PKEY_free(ret);
|
||||
ret = tmp;
|
||||
} else {
|
||||
ASN1err(ASN1_F_D2I_PRIVATEKEY, ERR_R_ASN1_LIB);
|
||||
goto err;
|
||||
|
||||
@@ -160,8 +160,6 @@ int a2i_ASN1_ENUMERATED(BIO *bp, ASN1_ENUMERATED *bs, char *buf, int size)
|
||||
i * 2);
|
||||
if (sp == NULL) {
|
||||
ASN1err(ASN1_F_A2I_ASN1_ENUMERATED, ERR_R_MALLOC_FAILURE);
|
||||
if (s != NULL)
|
||||
OPENSSL_free(s);
|
||||
goto err;
|
||||
}
|
||||
s = sp;
|
||||
@@ -199,5 +197,7 @@ int a2i_ASN1_ENUMERATED(BIO *bp, ASN1_ENUMERATED *bs, char *buf, int size)
|
||||
err_sl:
|
||||
ASN1err(ASN1_F_A2I_ASN1_ENUMERATED, ASN1_R_SHORT_LINE);
|
||||
}
|
||||
if (ret != 1)
|
||||
OPENSSL_free(s);
|
||||
return (ret);
|
||||
}
|
||||
|
||||
@@ -172,8 +172,6 @@ int a2i_ASN1_INTEGER(BIO *bp, ASN1_INTEGER *bs, char *buf, int size)
|
||||
sp = OPENSSL_realloc_clean(s, slen, num + i * 2);
|
||||
if (sp == NULL) {
|
||||
ASN1err(ASN1_F_A2I_ASN1_INTEGER, ERR_R_MALLOC_FAILURE);
|
||||
if (s != NULL)
|
||||
OPENSSL_free(s);
|
||||
goto err;
|
||||
}
|
||||
s = sp;
|
||||
@@ -211,5 +209,7 @@ int a2i_ASN1_INTEGER(BIO *bp, ASN1_INTEGER *bs, char *buf, int size)
|
||||
err_sl:
|
||||
ASN1err(ASN1_F_A2I_ASN1_INTEGER, ASN1_R_SHORT_LINE);
|
||||
}
|
||||
if (ret != 1)
|
||||
OPENSSL_free(s);
|
||||
return (ret);
|
||||
}
|
||||
|
||||
@@ -166,8 +166,6 @@ int a2i_ASN1_STRING(BIO *bp, ASN1_STRING *bs, char *buf, int size)
|
||||
i * 2);
|
||||
if (sp == NULL) {
|
||||
ASN1err(ASN1_F_A2I_ASN1_STRING, ERR_R_MALLOC_FAILURE);
|
||||
if (s != NULL)
|
||||
OPENSSL_free(s);
|
||||
goto err;
|
||||
}
|
||||
s = sp;
|
||||
@@ -205,5 +203,7 @@ int a2i_ASN1_STRING(BIO *bp, ASN1_STRING *bs, char *buf, int size)
|
||||
err_sl:
|
||||
ASN1err(ASN1_F_A2I_ASN1_STRING, ASN1_R_SHORT_LINE);
|
||||
}
|
||||
if (ret != 1)
|
||||
OPENSSL_free(s);
|
||||
return (ret);
|
||||
}
|
||||
|
||||
@@ -69,10 +69,13 @@ int i2d_PrivateKey(EVP_PKEY *a, unsigned char **pp)
|
||||
}
|
||||
if (a->ameth && a->ameth->priv_encode) {
|
||||
PKCS8_PRIV_KEY_INFO *p8 = EVP_PKEY2PKCS8(a);
|
||||
int ret = i2d_PKCS8_PRIV_KEY_INFO(p8, pp);
|
||||
PKCS8_PRIV_KEY_INFO_free(p8);
|
||||
int ret = 0;
|
||||
if (p8 != NULL) {
|
||||
ret = i2d_PKCS8_PRIV_KEY_INFO(p8, pp);
|
||||
PKCS8_PRIV_KEY_INFO_free(p8);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
ASN1err(ASN1_F_I2D_PRIVATEKEY, ASN1_R_UNSUPPORTED_PUBLIC_KEY_TYPE);
|
||||
return (-1);
|
||||
return -1;
|
||||
}
|
||||
|
||||
@@ -101,7 +101,7 @@ int PKCS5_pbe_set0_algor(X509_ALGOR *algor, int alg, int iter,
|
||||
sstr = ASN1_STRING_data(pbe->salt);
|
||||
if (salt)
|
||||
memcpy(sstr, salt, saltlen);
|
||||
else if (RAND_pseudo_bytes(sstr, saltlen) < 0)
|
||||
else if (RAND_bytes(sstr, saltlen) <= 0)
|
||||
goto err;
|
||||
|
||||
if (!ASN1_item_pack(pbe, ASN1_ITEM_rptr(PBEPARAM), &pbe_str)) {
|
||||
|
||||
@@ -120,7 +120,7 @@ X509_ALGOR *PKCS5_pbe2_set_iv(const EVP_CIPHER *cipher, int iter,
|
||||
if (EVP_CIPHER_iv_length(cipher)) {
|
||||
if (aiv)
|
||||
memcpy(iv, aiv, EVP_CIPHER_iv_length(cipher));
|
||||
else if (RAND_pseudo_bytes(iv, EVP_CIPHER_iv_length(cipher)) < 0)
|
||||
else if (RAND_bytes(iv, EVP_CIPHER_iv_length(cipher)) <= 0)
|
||||
goto err;
|
||||
}
|
||||
|
||||
@@ -225,7 +225,7 @@ X509_ALGOR *PKCS5_pbkdf2_set(int iter, unsigned char *salt, int saltlen,
|
||||
|
||||
if (salt)
|
||||
memcpy(osalt->data, salt, saltlen);
|
||||
else if (RAND_pseudo_bytes(osalt->data, saltlen) < 0)
|
||||
else if (RAND_bytes(osalt->data, saltlen) <= 0)
|
||||
goto merr;
|
||||
|
||||
if (iter <= 0)
|
||||
|
||||
@@ -196,6 +196,7 @@ int X509_REQ_print_ex(BIO *bp, X509_REQ *x, unsigned long nmflags,
|
||||
if (BIO_puts(bp, ":") <= 0)
|
||||
goto err;
|
||||
if ((type == V_ASN1_PRINTABLESTRING) ||
|
||||
(type == V_ASN1_UTF8STRING) ||
|
||||
(type == V_ASN1_T61STRING) ||
|
||||
(type == V_ASN1_IA5STRING)) {
|
||||
if (BIO_write(bp, (char *)bs->data, bs->length)
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user