mirror of
git://gcc.gnu.org/git/gcc.git
synced 2024-12-13 15:30:58 +08:00
runtime: copy memory hash code from Go 1.7
Rewrite the AES hashing code from gc assembler to C code using intrinsics. The resulting code generates the same hash code for the same input as the gc code--that doesn't matter as such, but testing it ensures that the C code does something useful. Also change mips64pe32le to mips64p32le in configure script--noticed during CL review. Reviewed-on: https://go-review.googlesource.com/34022 From-SVN: r243445
This commit is contained in:
parent
b2264b0964
commit
453060a906
@ -1,4 +1,4 @@
|
||||
2442fca7be8a4f51ddc91070fa69ef66e24593ac
|
||||
78e3527fcaf4ffd33b22e39a56e5d076844302be
|
||||
|
||||
The first line of this file holds the git revision number of the last
|
||||
merge done from the gofrontend repository.
|
||||
|
@ -1648,7 +1648,7 @@ Type::type_functions(Gogo* gogo, Named_type* name, Function_type* hash_fntype,
|
||||
const char* equal_fnname;
|
||||
if (this->compare_is_identity(gogo))
|
||||
{
|
||||
hash_fnname = "__go_type_hash_identity";
|
||||
hash_fnname = "runtime.memhash";
|
||||
equal_fnname = "__go_type_equal_identity";
|
||||
}
|
||||
else
|
||||
|
@ -422,6 +422,7 @@ endif
|
||||
endif
|
||||
|
||||
runtime_files = \
|
||||
runtime/aeshash.c \
|
||||
runtime/go-assert.c \
|
||||
runtime/go-breakpoint.c \
|
||||
runtime/go-caller.c \
|
||||
|
@ -189,7 +189,7 @@ libgo_llgo_la_DEPENDENCIES = $(am__DEPENDENCIES_4)
|
||||
@LIBGO_IS_DARWIN_TRUE@@LIBGO_IS_LINUX_FALSE@am__objects_4 = \
|
||||
@LIBGO_IS_DARWIN_TRUE@@LIBGO_IS_LINUX_FALSE@ getncpu-bsd.lo
|
||||
@LIBGO_IS_LINUX_TRUE@am__objects_4 = getncpu-linux.lo
|
||||
am__objects_5 = go-assert.lo go-breakpoint.lo go-caller.lo \
|
||||
am__objects_5 = aeshash.lo go-assert.lo go-breakpoint.lo go-caller.lo \
|
||||
go-callers.lo go-cdiv.lo go-cgo.lo go-construct-map.lo \
|
||||
go-ffi.lo go-fieldtrack.lo go-matherr.lo go-memclr.lo \
|
||||
go-memcmp.lo go-memequal.lo go-memmove.lo go-nanotime.lo \
|
||||
@ -767,6 +767,7 @@ toolexeclibgounicode_DATA = \
|
||||
@LIBGO_IS_DARWIN_TRUE@@LIBGO_IS_LINUX_FALSE@runtime_getncpu_file = runtime/getncpu-bsd.c
|
||||
@LIBGO_IS_LINUX_TRUE@runtime_getncpu_file = runtime/getncpu-linux.c
|
||||
runtime_files = \
|
||||
runtime/aeshash.c \
|
||||
runtime/go-assert.c \
|
||||
runtime/go-breakpoint.c \
|
||||
runtime/go-caller.c \
|
||||
@ -1446,6 +1447,7 @@ mostlyclean-compile:
|
||||
distclean-compile:
|
||||
-rm -f *.tab.c
|
||||
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/aeshash.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/env_posix.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/getncpu-bsd.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/getncpu-irix.Plo@am__quote@
|
||||
@ -1573,6 +1575,13 @@ libgolibbegin_a-go-libmain.obj: runtime/go-libmain.c
|
||||
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
|
||||
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgolibbegin_a_CFLAGS) $(CFLAGS) -c -o libgolibbegin_a-go-libmain.obj `if test -f 'runtime/go-libmain.c'; then $(CYGPATH_W) 'runtime/go-libmain.c'; else $(CYGPATH_W) '$(srcdir)/runtime/go-libmain.c'; fi`
|
||||
|
||||
aeshash.lo: runtime/aeshash.c
|
||||
@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT aeshash.lo -MD -MP -MF $(DEPDIR)/aeshash.Tpo -c -o aeshash.lo `test -f 'runtime/aeshash.c' || echo '$(srcdir)/'`runtime/aeshash.c
|
||||
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/aeshash.Tpo $(DEPDIR)/aeshash.Plo
|
||||
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='runtime/aeshash.c' object='aeshash.lo' libtool=yes @AMDEPBACKSLASH@
|
||||
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
|
||||
@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o aeshash.lo `test -f 'runtime/aeshash.c' || echo '$(srcdir)/'`runtime/aeshash.c
|
||||
|
||||
go-assert.lo: runtime/go-assert.c
|
||||
@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT go-assert.lo -MD -MP -MF $(DEPDIR)/go-assert.Tpo -c -o go-assert.lo `test -f 'runtime/go-assert.c' || echo '$(srcdir)/'`runtime/go-assert.c
|
||||
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/go-assert.Tpo $(DEPDIR)/go-assert.Plo
|
||||
|
2
libgo/configure
vendored
2
libgo/configure
vendored
@ -13624,7 +13624,7 @@ esac
|
||||
# supported by the gofrontend and all architectures supported by the
|
||||
# gc toolchain.
|
||||
# N.B. Keep in sync with gcc/testsuite/go.test/go-test.exp (go-set-goarch).
|
||||
ALLGOARCH="386 alpha amd64 amd64p32 arm armbe arm64 arm64be ia64 m68k mipso32 mipsn32 mipso64 mipsn64 mips mipsle mips64 mips64le mips64p32 mips64pe32le ppc ppc64 ppc64le s390 s390x sparc sparc64"
|
||||
ALLGOARCH="386 alpha amd64 amd64p32 arm armbe arm64 arm64be ia64 m68k mipso32 mipsn32 mipso64 mipsn64 mips mipsle mips64 mips64le mips64p32 mips64p32le ppc ppc64 ppc64le s390 s390x sparc sparc64"
|
||||
|
||||
# All known GOARCH_FAMILY values.
|
||||
ALLGOARCHFAMILY="I386 ALPHA AMD64 ARM ARM64 IA64 M68K MIPS MIPS64 PPC PPC64 S390 S390X SPARC SPARC64"
|
||||
|
@ -197,7 +197,7 @@ AC_SUBST(USE_DEJAGNU)
|
||||
# supported by the gofrontend and all architectures supported by the
|
||||
# gc toolchain.
|
||||
# N.B. Keep in sync with gcc/testsuite/go.test/go-test.exp (go-set-goarch).
|
||||
ALLGOARCH="386 alpha amd64 amd64p32 arm armbe arm64 arm64be ia64 m68k mipso32 mipsn32 mipso64 mipsn64 mips mipsle mips64 mips64le mips64p32 mips64pe32le ppc ppc64 ppc64le s390 s390x sparc sparc64"
|
||||
ALLGOARCH="386 alpha amd64 amd64p32 arm armbe arm64 arm64be ia64 m68k mipso32 mipsn32 mipso64 mipsn64 mips mipsle mips64 mips64le mips64p32 mips64p32le ppc ppc64 ppc64le s390 s390x sparc sparc64"
|
||||
|
||||
# All known GOARCH_FAMILY values.
|
||||
ALLGOARCHFAMILY="I386 ALPHA AMD64 ARM ARM64 IA64 M68K MIPS MIPS64 PPC PPC64 S390 S390X SPARC SPARC64"
|
||||
|
@ -23,12 +23,29 @@ import (
|
||||
//go:linkname efacevaleq runtime.efacevaleq
|
||||
//go:linkname eqstring runtime.eqstring
|
||||
//go:linkname cmpstring runtime.cmpstring
|
||||
//
|
||||
// Temporary to be called from C code.
|
||||
//go:linkname alginit runtime.alginit
|
||||
|
||||
const (
|
||||
c0 = uintptr((8-sys.PtrSize)/4*2860486313 + (sys.PtrSize-4)/4*33054211828000289)
|
||||
c1 = uintptr((8-sys.PtrSize)/4*3267000013 + (sys.PtrSize-4)/4*23344194077549503)
|
||||
)
|
||||
|
||||
var useAeshash bool
|
||||
|
||||
// in C code
|
||||
func aeshashbody(p unsafe.Pointer, h, s uintptr, sched []byte) uintptr
|
||||
|
||||
func aeshash(p unsafe.Pointer, h, s uintptr) uintptr {
|
||||
return aeshashbody(p, h, s, aeskeysched[:])
|
||||
}
|
||||
|
||||
func aeshashstr(p unsafe.Pointer, h uintptr) uintptr {
|
||||
ps := (*stringStruct)(p)
|
||||
return aeshashbody(unsafe.Pointer(ps.str), h, uintptr(ps.len), aeskeysched[:])
|
||||
}
|
||||
|
||||
func interhash(p unsafe.Pointer, h uintptr, size uintptr) uintptr {
|
||||
a := (*iface)(p)
|
||||
tab := a.tab
|
||||
@ -198,7 +215,35 @@ func cmpstring(x, y string) int {
|
||||
|
||||
// Force the creation of function descriptors for equality and hash
|
||||
// functions. These will be referenced directly by the compiler.
|
||||
var _ = memhash
|
||||
var _ = interhash
|
||||
var _ = interequal
|
||||
var _ = nilinterhash
|
||||
var _ = nilinterequal
|
||||
|
||||
const hashRandomBytes = sys.PtrSize / 4 * 64
|
||||
|
||||
// used in asm_{386,amd64}.s to seed the hash function
|
||||
var aeskeysched [hashRandomBytes]byte
|
||||
|
||||
// used in hash{32,64}.go to seed the hash function
|
||||
var hashkey [4]uintptr
|
||||
|
||||
func alginit() {
|
||||
// Install aes hash algorithm if we have the instructions we need
|
||||
if (GOARCH == "386" || GOARCH == "amd64") &&
|
||||
GOOS != "nacl" &&
|
||||
cpuid_ecx&(1<<25) != 0 && // aes (aesenc)
|
||||
cpuid_ecx&(1<<9) != 0 && // sse3 (pshufb)
|
||||
cpuid_ecx&(1<<19) != 0 { // sse4.1 (pinsr{d,q})
|
||||
useAeshash = true
|
||||
// Initialize with random data so hash collisions will be hard to engineer.
|
||||
getRandomData(aeskeysched[:])
|
||||
return
|
||||
}
|
||||
getRandomData((*[len(hashkey) * sys.PtrSize]byte)(unsafe.Pointer(&hashkey))[:])
|
||||
hashkey[0] |= 1 // make sure these numbers are odd
|
||||
hashkey[1] |= 1
|
||||
hashkey[2] |= 1
|
||||
hashkey[3] |= 1
|
||||
}
|
||||
|
94
libgo/go/runtime/hash32.go
Normal file
94
libgo/go/runtime/hash32.go
Normal file
@ -0,0 +1,94 @@
|
||||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Hashing algorithm inspired by
|
||||
// xxhash: https://code.google.com/p/xxhash/
|
||||
// cityhash: https://code.google.com/p/cityhash/
|
||||
|
||||
// +build 386 arm armbe m68k mipso32 mipsn32 mips mipsle ppc s390 sparc
|
||||
|
||||
package runtime
|
||||
|
||||
import "unsafe"
|
||||
|
||||
// For gccgo, use go:linkname to rename compiler-called functions to
|
||||
// themselves, so that the compiler will export them.
|
||||
//
|
||||
//go:linkname memhash runtime.memhash
|
||||
|
||||
const (
|
||||
// Constants for multiplication: four random odd 32-bit numbers.
|
||||
m1 = 3168982561
|
||||
m2 = 3339683297
|
||||
m3 = 832293441
|
||||
m4 = 2336365089
|
||||
)
|
||||
|
||||
func memhash(p unsafe.Pointer, seed, s uintptr) uintptr {
|
||||
if GOARCH == "386" && GOOS != "nacl" && useAeshash {
|
||||
return aeshash(p, seed, s)
|
||||
}
|
||||
h := uint32(seed + s*hashkey[0])
|
||||
tail:
|
||||
switch {
|
||||
case s == 0:
|
||||
case s < 4:
|
||||
h ^= uint32(*(*byte)(p))
|
||||
h ^= uint32(*(*byte)(add(p, s>>1))) << 8
|
||||
h ^= uint32(*(*byte)(add(p, s-1))) << 16
|
||||
h = rotl_15(h*m1) * m2
|
||||
case s == 4:
|
||||
h ^= readUnaligned32(p)
|
||||
h = rotl_15(h*m1) * m2
|
||||
case s <= 8:
|
||||
h ^= readUnaligned32(p)
|
||||
h = rotl_15(h*m1) * m2
|
||||
h ^= readUnaligned32(add(p, s-4))
|
||||
h = rotl_15(h*m1) * m2
|
||||
case s <= 16:
|
||||
h ^= readUnaligned32(p)
|
||||
h = rotl_15(h*m1) * m2
|
||||
h ^= readUnaligned32(add(p, 4))
|
||||
h = rotl_15(h*m1) * m2
|
||||
h ^= readUnaligned32(add(p, s-8))
|
||||
h = rotl_15(h*m1) * m2
|
||||
h ^= readUnaligned32(add(p, s-4))
|
||||
h = rotl_15(h*m1) * m2
|
||||
default:
|
||||
v1 := h
|
||||
v2 := uint32(seed * hashkey[1])
|
||||
v3 := uint32(seed * hashkey[2])
|
||||
v4 := uint32(seed * hashkey[3])
|
||||
for s >= 16 {
|
||||
v1 ^= readUnaligned32(p)
|
||||
v1 = rotl_15(v1*m1) * m2
|
||||
p = add(p, 4)
|
||||
v2 ^= readUnaligned32(p)
|
||||
v2 = rotl_15(v2*m2) * m3
|
||||
p = add(p, 4)
|
||||
v3 ^= readUnaligned32(p)
|
||||
v3 = rotl_15(v3*m3) * m4
|
||||
p = add(p, 4)
|
||||
v4 ^= readUnaligned32(p)
|
||||
v4 = rotl_15(v4*m4) * m1
|
||||
p = add(p, 4)
|
||||
s -= 16
|
||||
}
|
||||
h = v1 ^ v2 ^ v3 ^ v4
|
||||
goto tail
|
||||
}
|
||||
h ^= h >> 17
|
||||
h *= m3
|
||||
h ^= h >> 13
|
||||
h *= m4
|
||||
h ^= h >> 16
|
||||
return uintptr(h)
|
||||
}
|
||||
|
||||
// Note: in order to get the compiler to issue rotl instructions, we
|
||||
// need to constant fold the shift amount by hand.
|
||||
// TODO: convince the compiler to issue rotl instructions after inlining.
|
||||
func rotl_15(x uint32) uint32 {
|
||||
return (x << 15) | (x >> (32 - 15))
|
||||
}
|
94
libgo/go/runtime/hash64.go
Normal file
94
libgo/go/runtime/hash64.go
Normal file
@ -0,0 +1,94 @@
|
||||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Hashing algorithm inspired by
|
||||
// xxhash: https://code.google.com/p/xxhash/
|
||||
// cityhash: https://code.google.com/p/cityhash/
|
||||
|
||||
// +build amd64 amd64p32 arm64 mips64 mips64le ppc64 ppc64le s390x alpha arm64be ia64 mipso64 mipsn64 mips64p32 mips64p32le sparc64
|
||||
|
||||
package runtime
|
||||
|
||||
import "unsafe"
|
||||
|
||||
// For gccgo, use go:linkname to rename compiler-called functions to
|
||||
// themselves, so that the compiler will export them.
|
||||
//
|
||||
//go:linkname memhash runtime.memhash
|
||||
|
||||
const (
|
||||
// Constants for multiplication: four random odd 64-bit numbers.
|
||||
m1 = 16877499708836156737
|
||||
m2 = 2820277070424839065
|
||||
m3 = 9497967016996688599
|
||||
m4 = 15839092249703872147
|
||||
)
|
||||
|
||||
func memhash(p unsafe.Pointer, seed, s uintptr) uintptr {
|
||||
if GOARCH == "amd64" && GOOS != "nacl" && useAeshash {
|
||||
return aeshash(p, seed, s)
|
||||
}
|
||||
h := uint64(seed + s*hashkey[0])
|
||||
tail:
|
||||
switch {
|
||||
case s == 0:
|
||||
case s < 4:
|
||||
h ^= uint64(*(*byte)(p))
|
||||
h ^= uint64(*(*byte)(add(p, s>>1))) << 8
|
||||
h ^= uint64(*(*byte)(add(p, s-1))) << 16
|
||||
h = rotl_31(h*m1) * m2
|
||||
case s <= 8:
|
||||
h ^= uint64(readUnaligned32(p))
|
||||
h ^= uint64(readUnaligned32(add(p, s-4))) << 32
|
||||
h = rotl_31(h*m1) * m2
|
||||
case s <= 16:
|
||||
h ^= readUnaligned64(p)
|
||||
h = rotl_31(h*m1) * m2
|
||||
h ^= readUnaligned64(add(p, s-8))
|
||||
h = rotl_31(h*m1) * m2
|
||||
case s <= 32:
|
||||
h ^= readUnaligned64(p)
|
||||
h = rotl_31(h*m1) * m2
|
||||
h ^= readUnaligned64(add(p, 8))
|
||||
h = rotl_31(h*m1) * m2
|
||||
h ^= readUnaligned64(add(p, s-16))
|
||||
h = rotl_31(h*m1) * m2
|
||||
h ^= readUnaligned64(add(p, s-8))
|
||||
h = rotl_31(h*m1) * m2
|
||||
default:
|
||||
v1 := h
|
||||
v2 := uint64(seed * hashkey[1])
|
||||
v3 := uint64(seed * hashkey[2])
|
||||
v4 := uint64(seed * hashkey[3])
|
||||
for s >= 32 {
|
||||
v1 ^= readUnaligned64(p)
|
||||
v1 = rotl_31(v1*m1) * m2
|
||||
p = add(p, 8)
|
||||
v2 ^= readUnaligned64(p)
|
||||
v2 = rotl_31(v2*m2) * m3
|
||||
p = add(p, 8)
|
||||
v3 ^= readUnaligned64(p)
|
||||
v3 = rotl_31(v3*m3) * m4
|
||||
p = add(p, 8)
|
||||
v4 ^= readUnaligned64(p)
|
||||
v4 = rotl_31(v4*m4) * m1
|
||||
p = add(p, 8)
|
||||
s -= 32
|
||||
}
|
||||
h = v1 ^ v2 ^ v3 ^ v4
|
||||
goto tail
|
||||
}
|
||||
|
||||
h ^= h >> 29
|
||||
h *= m3
|
||||
h ^= h >> 32
|
||||
return uintptr(h)
|
||||
}
|
||||
|
||||
// Note: in order to get the compiler to issue rotl instructions, we
|
||||
// need to constant fold the shift amount by hand.
|
||||
// TODO: convince the compiler to issue rotl instructions after inlining.
|
||||
func rotl_31(x uint64) uint64 {
|
||||
return (x << 31) | (x >> (64 - 31))
|
||||
}
|
23
libgo/go/runtime/os_gccgo.go
Normal file
23
libgo/go/runtime/os_gccgo.go
Normal file
@ -0,0 +1,23 @@
|
||||
// Copyright 2009 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package runtime
|
||||
|
||||
import (
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
var urandom_dev = []byte("/dev/urandom\x00")
|
||||
|
||||
func getRandomData(r []byte) {
|
||||
if startupRandomData != nil {
|
||||
n := copy(r, startupRandomData)
|
||||
extendRandom(r, n)
|
||||
return
|
||||
}
|
||||
fd := open(&urandom_dev[0], 0 /* O_RDONLY */, 0)
|
||||
n := read(fd, unsafe.Pointer(&r[0]), int32(len(r)))
|
||||
closefd(fd)
|
||||
extendRandom(r, int(n))
|
||||
}
|
@ -5,6 +5,7 @@
|
||||
package runtime
|
||||
|
||||
import (
|
||||
"runtime/internal/sys"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
@ -668,7 +669,6 @@ type forcegcstate struct {
|
||||
// the ELF AT_RANDOM auxiliary vector (vdso_linux_amd64.go or os_linux_386.go).
|
||||
var startupRandomData []byte
|
||||
|
||||
/*
|
||||
// extendRandom extends the random numbers in r[:n] to the whole slice r.
|
||||
// Treats n<0 as n==0.
|
||||
func extendRandom(r []byte, n int) {
|
||||
@ -689,7 +689,6 @@ func extendRandom(r []byte, n int) {
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
// deferred subroutine calls
|
||||
// This is the gccgo version.
|
||||
@ -770,11 +769,12 @@ var (
|
||||
|
||||
sched schedt
|
||||
|
||||
// newprocs int32
|
||||
// newprocs int32
|
||||
|
||||
// Information about what cpu features are available.
|
||||
// Set on startup.
|
||||
cpuid_ecx uint32
|
||||
|
||||
// Information about what cpu features are available.
|
||||
// Set on startup in asm_{x86,amd64}.s.
|
||||
// cpuid_ecx uint32
|
||||
// cpuid_edx uint32
|
||||
// cpuid_ebx7 uint32
|
||||
// lfenceBeforeRdtsc bool
|
||||
|
@ -248,6 +248,12 @@ func funcPC(f interface{}) uintptr {
|
||||
return **(**uintptr)(i.data)
|
||||
}
|
||||
|
||||
// For gccgo, to communicate from the C code to the Go code.
|
||||
//go:linkname setCpuidECX runtime.setCpuidECX
|
||||
func setCpuidECX(v uint32) {
|
||||
cpuid_ecx = v
|
||||
}
|
||||
|
||||
// typedmemmove copies a typed value.
|
||||
// For gccgo for now.
|
||||
//go:nosplit
|
||||
|
17
libgo/go/runtime/unaligned1.go
Normal file
17
libgo/go/runtime/unaligned1.go
Normal file
@ -0,0 +1,17 @@
|
||||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build 386 amd64 amd64p32 arm64 ppc64 ppc64le s390x ppc s390 arm64be
|
||||
|
||||
package runtime
|
||||
|
||||
import "unsafe"
|
||||
|
||||
func readUnaligned32(p unsafe.Pointer) uint32 {
|
||||
return *(*uint32)(p)
|
||||
}
|
||||
|
||||
func readUnaligned64(p unsafe.Pointer) uint64 {
|
||||
return *(*uint64)(p)
|
||||
}
|
20
libgo/go/runtime/unaligned2.go
Normal file
20
libgo/go/runtime/unaligned2.go
Normal file
@ -0,0 +1,20 @@
|
||||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build arm mips64 mips64le armbe m68k mipso32 mipsn32 mips mipsle sparc alpha ia64 mipso64 mipsn64 mips64p32 mips64p32le sparc64
|
||||
|
||||
package runtime
|
||||
|
||||
import "unsafe"
|
||||
|
||||
// Note: These routines perform the read with an unspecified endianness.
|
||||
func readUnaligned32(p unsafe.Pointer) uint32 {
|
||||
q := (*[4]byte)(p)
|
||||
return uint32(q[0]) + uint32(q[1])<<8 + uint32(q[2])<<16 + uint32(q[3])<<24
|
||||
}
|
||||
|
||||
func readUnaligned64(p unsafe.Pointer) uint64 {
|
||||
q := (*[8]byte)(p)
|
||||
return uint64(q[0]) + uint64(q[1])<<8 + uint64(q[2])<<16 + uint64(q[3])<<24 + uint64(q[4])<<32 + uint64(q[5])<<40 + uint64(q[6])<<48 + uint64(q[7])<<56
|
||||
}
|
583
libgo/runtime/aeshash.c
Normal file
583
libgo/runtime/aeshash.c
Normal file
@ -0,0 +1,583 @@
|
||||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Hash code using AES intrinsics.
|
||||
|
||||
#include "runtime.h"
|
||||
|
||||
uintptr aeshashbody(void*, uintptr, uintptr, Slice)
|
||||
__asm__(GOSYM_PREFIX "runtime.aeshashbody");
|
||||
|
||||
uintptr aeshashbody(void*, uintptr, uintptr, Slice)
|
||||
__attribute__((no_split_stack));
|
||||
|
||||
#if defined(__i386__) || defined(__x86_64__)
|
||||
|
||||
#include <emmintrin.h>
|
||||
#include <tmmintrin.h>
|
||||
#include <wmmintrin.h>
|
||||
|
||||
// Force appropriate CPU level. We won't call here unless the CPU
|
||||
// supports it.
|
||||
|
||||
#pragma GCC target("ssse3", "aes")
|
||||
|
||||
#ifdef __x86_64__
|
||||
|
||||
// aeshashbody implements a hash function using AES instructions
|
||||
// available in recent x86 processors. Note this is not encryption,
|
||||
// just hashing.
|
||||
//
|
||||
// This is written to produce exactly the same results as the gc
|
||||
// implementation, not because that matters, but just to ensure that
|
||||
// this does something reasonable.
|
||||
uintptr aeshashbody(void* p, uintptr seed, uintptr size, Slice aeskeysched) {
|
||||
__m128i mseed, mseed2, mseed3, mseed4, mseed5, mseed6, mseed7, mseed8;
|
||||
__m128i mval, mval2, mval3, mval4, mval5, mval6, mval7, mval8;
|
||||
|
||||
// Start with hash seed.
|
||||
mseed = _mm_cvtsi64_si128(seed);
|
||||
// Get 16 bits of length.
|
||||
mseed = _mm_insert_epi16(mseed, size, 4);
|
||||
// Repeat length 4 times total.
|
||||
mseed = _mm_shufflehi_epi16(mseed, 0);
|
||||
// Save unscrambled seed.
|
||||
mseed2 = mseed;
|
||||
// XOR in per-process seed.
|
||||
mseed ^= _mm_loadu_si128(aeskeysched.__values);
|
||||
// Scramble seed.
|
||||
mseed = _mm_aesenc_si128(mseed, mseed);
|
||||
|
||||
if (size <= 16) {
|
||||
if (size == 0) {
|
||||
// Return scrambled input seed.
|
||||
return _mm_cvtsi128_si64(_mm_aesenc_si128(mseed, mseed));
|
||||
} else if (size < 16) {
|
||||
if ((((uintptr)(p) + 16) & 0xff0) != 0) {
|
||||
static const uint64 masks[32]
|
||||
__attribute__ ((aligned(16))) =
|
||||
{
|
||||
0x0000000000000000, 0x0000000000000000,
|
||||
0x00000000000000ff, 0x0000000000000000,
|
||||
0x000000000000ffff, 0x0000000000000000,
|
||||
0x0000000000ffffff, 0x0000000000000000,
|
||||
0x00000000ffffffff, 0x0000000000000000,
|
||||
0x000000ffffffffff, 0x0000000000000000,
|
||||
0x0000ffffffffffff, 0x0000000000000000,
|
||||
0x00ffffffffffffff, 0x0000000000000000,
|
||||
0xffffffffffffffff, 0x0000000000000000,
|
||||
0xffffffffffffffff, 0x00000000000000ff,
|
||||
0xffffffffffffffff, 0x000000000000ffff,
|
||||
0xffffffffffffffff, 0x0000000000ffffff,
|
||||
0xffffffffffffffff, 0x00000000ffffffff,
|
||||
0xffffffffffffffff, 0x000000ffffffffff,
|
||||
0xffffffffffffffff, 0x0000ffffffffffff,
|
||||
0xffffffffffffffff, 0x00ffffffffffffff
|
||||
};
|
||||
|
||||
// 16 bytes loaded at p won't cross a page
|
||||
// boundary, so we can load directly.
|
||||
mval = _mm_loadu_si128(p);
|
||||
mval &= *(const __m128i*)(&masks[size*2]);
|
||||
} else {
|
||||
static const uint64 shifts[32]
|
||||
__attribute__ ((aligned(16))) =
|
||||
{
|
||||
0x0000000000000000, 0x0000000000000000,
|
||||
0xffffffffffffff0f, 0xffffffffffffffff,
|
||||
0xffffffffffff0f0e, 0xffffffffffffffff,
|
||||
0xffffffffff0f0e0d, 0xffffffffffffffff,
|
||||
0xffffffff0f0e0d0c, 0xffffffffffffffff,
|
||||
0xffffff0f0e0d0c0b, 0xffffffffffffffff,
|
||||
0xffff0f0e0d0c0b0a, 0xffffffffffffffff,
|
||||
0xff0f0e0d0c0b0a09, 0xffffffffffffffff,
|
||||
0x0f0e0d0c0b0a0908, 0xffffffffffffffff,
|
||||
0x0e0d0c0b0a090807, 0xffffffffffffff0f,
|
||||
0x0d0c0b0a09080706, 0xffffffffffff0f0e,
|
||||
0x0c0b0a0908070605, 0xffffffffff0f0e0d,
|
||||
0x0b0a090807060504, 0xffffffff0f0e0d0c,
|
||||
0x0a09080706050403, 0xffffff0f0e0d0c0b,
|
||||
0x0908070605040302, 0xffff0f0e0d0c0b0a,
|
||||
0x0807060504030201, 0xff0f0e0d0c0b0a09,
|
||||
};
|
||||
|
||||
// address ends in 1111xxxx. Might be
|
||||
// up against a page boundary, so load
|
||||
// ending at last byte. Then shift
|
||||
// bytes down using pshufb.
|
||||
mval = _mm_loadu_si128((void*)((char*)p - 16 + size));
|
||||
mval = _mm_shuffle_epi8(mval, *(const __m128i*)(&shifts[size*2]));
|
||||
}
|
||||
} else {
|
||||
mval = _mm_loadu_si128(p);
|
||||
}
|
||||
|
||||
// XOR data with seed.
|
||||
mval ^= mseed;
|
||||
// Scramble combo 3 times.
|
||||
mval = _mm_aesenc_si128(mval, mval);
|
||||
mval = _mm_aesenc_si128(mval, mval);
|
||||
mval = _mm_aesenc_si128(mval, mval);
|
||||
return _mm_cvtsi128_si64(mval);
|
||||
} else if (size <= 32) {
|
||||
// Make second starting seed.
|
||||
mseed2 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 16));
|
||||
mseed2 = _mm_aesenc_si128(mseed2, mseed2);
|
||||
// Load data to be hashed.
|
||||
mval = _mm_loadu_si128(p);
|
||||
mval2 = _mm_loadu_si128((void*)((char*)p + size - 16));
|
||||
// XOR with seed.
|
||||
mval ^= mseed;
|
||||
mval2 ^= mseed2;
|
||||
// Scramble 3 times.
|
||||
mval = _mm_aesenc_si128(mval, mval);
|
||||
mval2 = _mm_aesenc_si128(mval2, mval2);
|
||||
mval = _mm_aesenc_si128(mval, mval);
|
||||
mval2 = _mm_aesenc_si128(mval2, mval2);
|
||||
mval = _mm_aesenc_si128(mval, mval);
|
||||
mval2 = _mm_aesenc_si128(mval2, mval2);
|
||||
// Combine results.
|
||||
mval ^= mval2;
|
||||
return _mm_cvtsi128_si64(mval);
|
||||
} else if (size <= 64) {
|
||||
// Make 3 more starting seeds.
|
||||
mseed3 = mseed2;
|
||||
mseed4 = mseed2;
|
||||
mseed2 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 16));
|
||||
mseed3 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 32));
|
||||
mseed4 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 48));
|
||||
mseed2 = _mm_aesenc_si128(mseed2, mseed2);
|
||||
mseed3 = _mm_aesenc_si128(mseed3, mseed3);
|
||||
mseed4 = _mm_aesenc_si128(mseed4, mseed4);
|
||||
|
||||
mval = _mm_loadu_si128(p);
|
||||
mval2 = _mm_loadu_si128((void*)((char*)p + 16));
|
||||
mval3 = _mm_loadu_si128((void*)((char*)p + size - 32));
|
||||
mval4 = _mm_loadu_si128((void*)((char*)p + size - 16));
|
||||
|
||||
mval ^= mseed;
|
||||
mval2 ^= mseed2;
|
||||
mval3 ^= mseed3;
|
||||
mval4 ^= mseed4;
|
||||
|
||||
mval = _mm_aesenc_si128(mval, mval);
|
||||
mval2 = _mm_aesenc_si128(mval2, mval2);
|
||||
mval3 = _mm_aesenc_si128(mval3, mval3);
|
||||
mval4 = _mm_aesenc_si128(mval4, mval4);
|
||||
|
||||
mval = _mm_aesenc_si128(mval, mval);
|
||||
mval2 = _mm_aesenc_si128(mval2, mval2);
|
||||
mval3 = _mm_aesenc_si128(mval3, mval3);
|
||||
mval4 = _mm_aesenc_si128(mval4, mval4);
|
||||
|
||||
mval = _mm_aesenc_si128(mval, mval);
|
||||
mval2 = _mm_aesenc_si128(mval2, mval2);
|
||||
mval3 = _mm_aesenc_si128(mval3, mval3);
|
||||
mval4 = _mm_aesenc_si128(mval4, mval4);
|
||||
|
||||
mval ^= mval3;
|
||||
mval2 ^= mval4;
|
||||
mval ^= mval2;
|
||||
return _mm_cvtsi128_si64(mval);
|
||||
} else if (size <= 128) {
|
||||
// Make 7 more starting seeds.
|
||||
mseed3 = mseed2;
|
||||
mseed4 = mseed2;
|
||||
mseed5 = mseed2;
|
||||
mseed6 = mseed2;
|
||||
mseed7 = mseed2;
|
||||
mseed8 = mseed2;
|
||||
mseed2 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 16));
|
||||
mseed3 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 32));
|
||||
mseed4 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 48));
|
||||
mseed5 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 64));
|
||||
mseed6 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 80));
|
||||
mseed7 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 96));
|
||||
mseed8 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 112));
|
||||
mseed2 = _mm_aesenc_si128(mseed2, mseed2);
|
||||
mseed3 = _mm_aesenc_si128(mseed3, mseed3);
|
||||
mseed4 = _mm_aesenc_si128(mseed4, mseed4);
|
||||
mseed5 = _mm_aesenc_si128(mseed5, mseed5);
|
||||
mseed6 = _mm_aesenc_si128(mseed6, mseed6);
|
||||
mseed7 = _mm_aesenc_si128(mseed7, mseed7);
|
||||
mseed8 = _mm_aesenc_si128(mseed8, mseed8);
|
||||
|
||||
// Load data.
|
||||
mval = _mm_loadu_si128(p);
|
||||
mval2 = _mm_loadu_si128((void*)((char*)p + 16));
|
||||
mval3 = _mm_loadu_si128((void*)((char*)p + 32));
|
||||
mval4 = _mm_loadu_si128((void*)((char*)p + 48));
|
||||
mval5 = _mm_loadu_si128((void*)((char*)p + size - 64));
|
||||
mval6 = _mm_loadu_si128((void*)((char*)p + size - 48));
|
||||
mval7 = _mm_loadu_si128((void*)((char*)p + size - 32));
|
||||
mval8 = _mm_loadu_si128((void*)((char*)p + size - 16));
|
||||
|
||||
// XOR with seed.
|
||||
mval ^= mseed;
|
||||
mval2 ^= mseed2;
|
||||
mval3 ^= mseed3;
|
||||
mval4 ^= mseed4;
|
||||
mval5 ^= mseed5;
|
||||
mval6 ^= mseed6;
|
||||
mval7 ^= mseed7;
|
||||
mval8 ^= mseed8;
|
||||
|
||||
// Scramble 3 times.
|
||||
mval = _mm_aesenc_si128(mval, mval);
|
||||
mval2 = _mm_aesenc_si128(mval2, mval2);
|
||||
mval3 = _mm_aesenc_si128(mval3, mval3);
|
||||
mval4 = _mm_aesenc_si128(mval4, mval4);
|
||||
mval5 = _mm_aesenc_si128(mval5, mval5);
|
||||
mval6 = _mm_aesenc_si128(mval6, mval6);
|
||||
mval7 = _mm_aesenc_si128(mval7, mval7);
|
||||
mval8 = _mm_aesenc_si128(mval8, mval8);
|
||||
|
||||
mval = _mm_aesenc_si128(mval, mval);
|
||||
mval2 = _mm_aesenc_si128(mval2, mval2);
|
||||
mval3 = _mm_aesenc_si128(mval3, mval3);
|
||||
mval4 = _mm_aesenc_si128(mval4, mval4);
|
||||
mval5 = _mm_aesenc_si128(mval5, mval5);
|
||||
mval6 = _mm_aesenc_si128(mval6, mval6);
|
||||
mval7 = _mm_aesenc_si128(mval7, mval7);
|
||||
mval8 = _mm_aesenc_si128(mval8, mval8);
|
||||
|
||||
mval = _mm_aesenc_si128(mval, mval);
|
||||
mval2 = _mm_aesenc_si128(mval2, mval2);
|
||||
mval3 = _mm_aesenc_si128(mval3, mval3);
|
||||
mval4 = _mm_aesenc_si128(mval4, mval4);
|
||||
mval5 = _mm_aesenc_si128(mval5, mval5);
|
||||
mval6 = _mm_aesenc_si128(mval6, mval6);
|
||||
mval7 = _mm_aesenc_si128(mval7, mval7);
|
||||
mval8 = _mm_aesenc_si128(mval8, mval8);
|
||||
|
||||
// Combine results.
|
||||
mval ^= mval5;
|
||||
mval2 ^= mval6;
|
||||
mval3 ^= mval7;
|
||||
mval4 ^= mval8;
|
||||
mval ^= mval3;
|
||||
mval2 ^= mval4;
|
||||
mval ^= mval2;
|
||||
return _mm_cvtsi128_si64(mval);
|
||||
} else {
|
||||
// Make 7 more starting seeds.
|
||||
mseed3 = mseed2;
|
||||
mseed4 = mseed2;
|
||||
mseed5 = mseed2;
|
||||
mseed6 = mseed2;
|
||||
mseed7 = mseed2;
|
||||
mseed8 = mseed2;
|
||||
mseed2 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 16));
|
||||
mseed3 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 32));
|
||||
mseed4 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 48));
|
||||
mseed5 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 64));
|
||||
mseed6 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 80));
|
||||
mseed7 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 96));
|
||||
mseed8 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 112));
|
||||
mseed2 = _mm_aesenc_si128(mseed2, mseed2);
|
||||
mseed3 = _mm_aesenc_si128(mseed3, mseed3);
|
||||
mseed4 = _mm_aesenc_si128(mseed4, mseed4);
|
||||
mseed5 = _mm_aesenc_si128(mseed5, mseed5);
|
||||
mseed6 = _mm_aesenc_si128(mseed6, mseed6);
|
||||
mseed7 = _mm_aesenc_si128(mseed7, mseed7);
|
||||
mseed8 = _mm_aesenc_si128(mseed8, mseed8);
|
||||
|
||||
// Start with last (possibly overlapping) block.
|
||||
mval = _mm_loadu_si128((void*)((char*)p + size - 128));
|
||||
mval2 = _mm_loadu_si128((void*)((char*)p + size - 112));
|
||||
mval3 = _mm_loadu_si128((void*)((char*)p + size - 96));
|
||||
mval4 = _mm_loadu_si128((void*)((char*)p + size - 80));
|
||||
mval5 = _mm_loadu_si128((void*)((char*)p + size - 64));
|
||||
mval6 = _mm_loadu_si128((void*)((char*)p + size - 48));
|
||||
mval7 = _mm_loadu_si128((void*)((char*)p + size - 32));
|
||||
mval8 = _mm_loadu_si128((void*)((char*)p + size - 16));
|
||||
|
||||
// XOR in seed.
|
||||
mval ^= mseed;
|
||||
mval2 ^= mseed2;
|
||||
mval3 ^= mseed3;
|
||||
mval4 ^= mseed4;
|
||||
mval5 ^= mseed5;
|
||||
mval6 ^= mseed6;
|
||||
mval7 ^= mseed7;
|
||||
mval8 ^= mseed8;
|
||||
|
||||
// Compute number of remaining 128-byte blocks.
|
||||
size--;
|
||||
size >>= 7;
|
||||
do {
|
||||
// Scramble state.
|
||||
mval = _mm_aesenc_si128(mval, mval);
|
||||
mval2 = _mm_aesenc_si128(mval2, mval2);
|
||||
mval3 = _mm_aesenc_si128(mval3, mval3);
|
||||
mval4 = _mm_aesenc_si128(mval4, mval4);
|
||||
mval5 = _mm_aesenc_si128(mval5, mval5);
|
||||
mval6 = _mm_aesenc_si128(mval6, mval6);
|
||||
mval7 = _mm_aesenc_si128(mval7, mval7);
|
||||
mval8 = _mm_aesenc_si128(mval8, mval8);
|
||||
|
||||
// Scramble state, XOR in a block.
|
||||
mval = _mm_aesenc_si128(mval, _mm_loadu_si128(p));
|
||||
mval2 = _mm_aesenc_si128(mval2, _mm_loadu_si128((void*)((char*)p + 16)));
|
||||
mval3 = _mm_aesenc_si128(mval3, _mm_loadu_si128((void*)((char*)p + 32)));
|
||||
mval4 = _mm_aesenc_si128(mval4, _mm_loadu_si128((void*)((char*)p + 48)));
|
||||
mval5 = _mm_aesenc_si128(mval5, _mm_loadu_si128((void*)((char*)p + 64)));
|
||||
mval6 = _mm_aesenc_si128(mval6, _mm_loadu_si128((void*)((char*)p + 80)));
|
||||
mval7 = _mm_aesenc_si128(mval7, _mm_loadu_si128((void*)((char*)p + 96)));
|
||||
mval8 = _mm_aesenc_si128(mval8, _mm_loadu_si128((void*)((char*)p + 112)));
|
||||
|
||||
p = (void*)((char*)p + 128);
|
||||
} while (--size > 0);
|
||||
|
||||
// 3 more scrambles to finish.
|
||||
mval = _mm_aesenc_si128(mval, mval);
|
||||
mval2 = _mm_aesenc_si128(mval2, mval2);
|
||||
mval3 = _mm_aesenc_si128(mval3, mval3);
|
||||
mval4 = _mm_aesenc_si128(mval4, mval4);
|
||||
mval5 = _mm_aesenc_si128(mval5, mval5);
|
||||
mval6 = _mm_aesenc_si128(mval6, mval6);
|
||||
mval7 = _mm_aesenc_si128(mval7, mval7);
|
||||
mval8 = _mm_aesenc_si128(mval8, mval8);
|
||||
mval = _mm_aesenc_si128(mval, mval);
|
||||
mval2 = _mm_aesenc_si128(mval2, mval2);
|
||||
mval3 = _mm_aesenc_si128(mval3, mval3);
|
||||
mval4 = _mm_aesenc_si128(mval4, mval4);
|
||||
mval5 = _mm_aesenc_si128(mval5, mval5);
|
||||
mval6 = _mm_aesenc_si128(mval6, mval6);
|
||||
mval7 = _mm_aesenc_si128(mval7, mval7);
|
||||
mval8 = _mm_aesenc_si128(mval8, mval8);
|
||||
mval = _mm_aesenc_si128(mval, mval);
|
||||
mval2 = _mm_aesenc_si128(mval2, mval2);
|
||||
mval3 = _mm_aesenc_si128(mval3, mval3);
|
||||
mval4 = _mm_aesenc_si128(mval4, mval4);
|
||||
mval5 = _mm_aesenc_si128(mval5, mval5);
|
||||
mval6 = _mm_aesenc_si128(mval6, mval6);
|
||||
mval7 = _mm_aesenc_si128(mval7, mval7);
|
||||
mval8 = _mm_aesenc_si128(mval8, mval8);
|
||||
|
||||
mval ^= mval5;
|
||||
mval2 ^= mval6;
|
||||
mval3 ^= mval7;
|
||||
mval4 ^= mval8;
|
||||
mval ^= mval3;
|
||||
mval2 ^= mval4;
|
||||
mval ^= mval2;
|
||||
return _mm_cvtsi128_si64(mval);
|
||||
}
|
||||
}
|
||||
|
||||
#else // !defined(__x86_64__)
|
||||
|
||||
// The 32-bit version of aeshashbody.
|
||||
|
||||
uintptr aeshashbody(void* p, uintptr seed, uintptr size, Slice aeskeysched) {
|
||||
__m128i mseed, mseed2, mseed3, mseed4;
|
||||
__m128i mval, mval2, mval3, mval4;
|
||||
|
||||
// Start with hash seed.
|
||||
mseed = _mm_cvtsi32_si128(seed);
|
||||
// Get 16 bits of length.
|
||||
mseed = _mm_insert_epi16(mseed, size, 4);
|
||||
// Replace size with its low 2 bytes repeated 4 times.
|
||||
mseed = _mm_shufflehi_epi16(mseed, 0);
|
||||
// Save unscrambled seed.
|
||||
mseed2 = mseed;
|
||||
// XOR in per-process seed.
|
||||
mseed ^= _mm_loadu_si128(aeskeysched.__values);
|
||||
// Scramble seed.
|
||||
mseed = _mm_aesenc_si128(mseed, mseed);
|
||||
|
||||
if (size <= 16) {
|
||||
if (size == 0) {
|
||||
// Return scrambled input seed.
|
||||
return _mm_cvtsi128_si32(_mm_aesenc_si128(mseed, mseed));
|
||||
} else if (size < 16) {
|
||||
if ((((uintptr)(p) + 16) & 0xff0) != 0) {
|
||||
static const uint64 masks[32]
|
||||
__attribute__ ((aligned(16))) =
|
||||
{
|
||||
0x0000000000000000, 0x0000000000000000,
|
||||
0x00000000000000ff, 0x0000000000000000,
|
||||
0x000000000000ffff, 0x0000000000000000,
|
||||
0x0000000000ffffff, 0x0000000000000000,
|
||||
0x00000000ffffffff, 0x0000000000000000,
|
||||
0x000000ffffffffff, 0x0000000000000000,
|
||||
0x0000ffffffffffff, 0x0000000000000000,
|
||||
0x00ffffffffffffff, 0x0000000000000000,
|
||||
0xffffffffffffffff, 0x0000000000000000,
|
||||
0xffffffffffffffff, 0x00000000000000ff,
|
||||
0xffffffffffffffff, 0x000000000000ffff,
|
||||
0xffffffffffffffff, 0x0000000000ffffff,
|
||||
0xffffffffffffffff, 0x00000000ffffffff,
|
||||
0xffffffffffffffff, 0x000000ffffffffff,
|
||||
0xffffffffffffffff, 0x0000ffffffffffff,
|
||||
0xffffffffffffffff, 0x00ffffffffffffff
|
||||
};
|
||||
|
||||
// 16 bytes loaded at p won't cross a page
|
||||
// boundary, so we can load it directly.
|
||||
mval = _mm_loadu_si128(p);
|
||||
mval &= *(const __m128i*)(&masks[size*2]);
|
||||
} else {
|
||||
static const uint64 shifts[32]
|
||||
__attribute__ ((aligned(16))) =
|
||||
{
|
||||
0x0000000000000000, 0x0000000000000000,
|
||||
0xffffffffffffff0f, 0xffffffffffffffff,
|
||||
0xffffffffffff0f0e, 0xffffffffffffffff,
|
||||
0xffffffffff0f0e0d, 0xffffffffffffffff,
|
||||
0xffffffff0f0e0d0c, 0xffffffffffffffff,
|
||||
0xffffff0f0e0d0c0b, 0xffffffffffffffff,
|
||||
0xffff0f0e0d0c0b0a, 0xffffffffffffffff,
|
||||
0xff0f0e0d0c0b0a09, 0xffffffffffffffff,
|
||||
0x0f0e0d0c0b0a0908, 0xffffffffffffffff,
|
||||
0x0e0d0c0b0a090807, 0xffffffffffffff0f,
|
||||
0x0d0c0b0a09080706, 0xffffffffffff0f0e,
|
||||
0x0c0b0a0908070605, 0xffffffffff0f0e0d,
|
||||
0x0b0a090807060504, 0xffffffff0f0e0d0c,
|
||||
0x0a09080706050403, 0xffffff0f0e0d0c0b,
|
||||
0x0908070605040302, 0xffff0f0e0d0c0b0a,
|
||||
0x0807060504030201, 0xff0f0e0d0c0b0a09,
|
||||
};
|
||||
|
||||
// address ends in 1111xxxx. Might be
|
||||
// up against a page boundary, so load
|
||||
// ending at last byte. Then shift
|
||||
// bytes down using pshufb.
|
||||
mval = _mm_loadu_si128((void*)((char*)p - 16 + size));
|
||||
mval = _mm_shuffle_epi8(mval, *(const __m128i*)(&shifts[size*2]));
|
||||
}
|
||||
} else {
|
||||
mval = _mm_loadu_si128(p);
|
||||
}
|
||||
|
||||
// Scramble input, XOR in seed.
|
||||
mval = _mm_aesenc_si128(mval, mseed);
|
||||
mval = _mm_aesenc_si128(mval, mval);
|
||||
mval = _mm_aesenc_si128(mval, mval);
|
||||
return _mm_cvtsi128_si32(mval);
|
||||
} else if (size <= 32) {
|
||||
// Make second starting seed.
|
||||
mseed2 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 16));
|
||||
mseed2 = _mm_aesenc_si128(mseed2, mseed2);
|
||||
// Load data to be hashed.
|
||||
mval = _mm_loadu_si128(p);
|
||||
mval2 = _mm_loadu_si128((void*)((char*)p + size - 16));
|
||||
|
||||
// Scramble 3 times.
|
||||
mval = _mm_aesenc_si128(mval, mseed);
|
||||
mval2 = _mm_aesenc_si128(mval2, mseed2);
|
||||
mval = _mm_aesenc_si128(mval, mval);
|
||||
mval2 = _mm_aesenc_si128(mval2, mval2);
|
||||
mval = _mm_aesenc_si128(mval, mval);
|
||||
mval2 = _mm_aesenc_si128(mval2, mval2);
|
||||
|
||||
// Combine results.
|
||||
mval ^= mval2;
|
||||
return _mm_cvtsi128_si32(mval);
|
||||
} else if (size <= 64) {
|
||||
// Make 3 more starting seeds.
|
||||
mseed3 = mseed2;
|
||||
mseed4 = mseed2;
|
||||
mseed2 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 16));
|
||||
mseed3 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 32));
|
||||
mseed4 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 48));
|
||||
mseed2 = _mm_aesenc_si128(mseed2, mseed2);
|
||||
mseed3 = _mm_aesenc_si128(mseed3, mseed3);
|
||||
mseed4 = _mm_aesenc_si128(mseed4, mseed4);
|
||||
|
||||
mval = _mm_loadu_si128(p);
|
||||
mval2 = _mm_loadu_si128((void*)((char*)p + 16));
|
||||
mval3 = _mm_loadu_si128((void*)((char*)p + size - 32));
|
||||
mval4 = _mm_loadu_si128((void*)((char*)p + size - 16));
|
||||
|
||||
mval = _mm_aesenc_si128(mval, mseed);
|
||||
mval2 = _mm_aesenc_si128(mval2, mseed2);
|
||||
mval3 = _mm_aesenc_si128(mval3, mseed3);
|
||||
mval4 = _mm_aesenc_si128(mval4, mseed4);
|
||||
|
||||
mval = _mm_aesenc_si128(mval, mval);
|
||||
mval2 = _mm_aesenc_si128(mval2, mval2);
|
||||
mval3 = _mm_aesenc_si128(mval3, mval3);
|
||||
mval4 = _mm_aesenc_si128(mval4, mval4);
|
||||
|
||||
mval = _mm_aesenc_si128(mval, mval);
|
||||
mval2 = _mm_aesenc_si128(mval2, mval2);
|
||||
mval3 = _mm_aesenc_si128(mval3, mval3);
|
||||
mval4 = _mm_aesenc_si128(mval4, mval4);
|
||||
|
||||
mval ^= mval3;
|
||||
mval2 ^= mval4;
|
||||
mval ^= mval2;
|
||||
return _mm_cvtsi128_si32(mval);
|
||||
} else {
|
||||
// Make 3 more starting seeds.
|
||||
mseed3 = mseed2;
|
||||
mseed4 = mseed2;
|
||||
mseed2 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 16));
|
||||
mseed3 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 32));
|
||||
mseed4 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 48));
|
||||
mseed2 = _mm_aesenc_si128(mseed2, mseed2);
|
||||
mseed3 = _mm_aesenc_si128(mseed3, mseed3);
|
||||
mseed4 = _mm_aesenc_si128(mseed4, mseed4);
|
||||
|
||||
// Start with last (possibly overlapping) block.
|
||||
mval = _mm_loadu_si128((void*)((char*)p + size - 64));
|
||||
mval2 = _mm_loadu_si128((void*)((char*)p + size - 48));
|
||||
mval3 = _mm_loadu_si128((void*)((char*)p + size - 32));
|
||||
mval4 = _mm_loadu_si128((void*)((char*)p + size - 16));
|
||||
|
||||
// Scramble state once.
|
||||
mval = _mm_aesenc_si128(mval, mseed);
|
||||
mval2 = _mm_aesenc_si128(mval2, mseed2);
|
||||
mval3 = _mm_aesenc_si128(mval3, mseed3);
|
||||
mval4 = _mm_aesenc_si128(mval4, mseed4);
|
||||
|
||||
// Compute number of remaining 64-byte blocks.
|
||||
size--;
|
||||
size >>= 6;
|
||||
do {
|
||||
// Scramble state, XOR in a block.
|
||||
mval = _mm_aesenc_si128(mval, _mm_loadu_si128(p));
|
||||
mval2 = _mm_aesenc_si128(mval2, _mm_loadu_si128((void*)((char*)p + 16)));
|
||||
mval3 = _mm_aesenc_si128(mval3, _mm_loadu_si128((void*)((char*)p + 32)));
|
||||
mval4 = _mm_aesenc_si128(mval4, _mm_loadu_si128((void*)((char*)p + 48)));
|
||||
|
||||
// Scramble state.
|
||||
mval = _mm_aesenc_si128(mval, mval);
|
||||
mval2 = _mm_aesenc_si128(mval2, mval2);
|
||||
mval3 = _mm_aesenc_si128(mval3, mval3);
|
||||
mval4 = _mm_aesenc_si128(mval4, mval4);
|
||||
|
||||
p = (void*)((char*)p + 64);
|
||||
} while (--size > 0);
|
||||
|
||||
// 2 more scrambles to finish.
|
||||
mval = _mm_aesenc_si128(mval, mval);
|
||||
mval2 = _mm_aesenc_si128(mval2, mval2);
|
||||
mval3 = _mm_aesenc_si128(mval3, mval3);
|
||||
mval4 = _mm_aesenc_si128(mval4, mval4);
|
||||
|
||||
mval = _mm_aesenc_si128(mval, mval);
|
||||
mval2 = _mm_aesenc_si128(mval2, mval2);
|
||||
mval3 = _mm_aesenc_si128(mval3, mval3);
|
||||
mval4 = _mm_aesenc_si128(mval4, mval4);
|
||||
|
||||
mval ^= mval3;
|
||||
mval2 ^= mval4;
|
||||
mval ^= mval2;
|
||||
return _mm_cvtsi128_si32(mval);
|
||||
}
|
||||
}
|
||||
|
||||
#endif // !defined(__x86_64__)
|
||||
|
||||
#else // !defined(__i386__) && !defined(__x86_64__)
|
||||
|
||||
uintptr aeshashbody(void* p, uintptr seed, uintptr size, Slice aeskeysched) {
|
||||
// We should never get here on a non-x86 system.
|
||||
runtime_throw("impossible call to aeshashbody");
|
||||
}
|
||||
|
||||
#endif // !defined(__i386__) && !defined(__x86_64__)
|
@ -61,6 +61,7 @@ initfn (int argc, char **argv, char** env __attribute__ ((unused)))
|
||||
|
||||
runtime_isarchive = true;
|
||||
|
||||
runtime_cpuinit ();
|
||||
runtime_initsig(true);
|
||||
|
||||
a = (struct args *) malloc (sizeof *a);
|
||||
|
@ -47,6 +47,7 @@ main (int argc, char **argv)
|
||||
runtime_isstarted = true;
|
||||
|
||||
__go_end = (uintptr)_end;
|
||||
runtime_cpuinit ();
|
||||
runtime_check ();
|
||||
runtime_args (argc, (byte **) argv);
|
||||
runtime_osinit ();
|
||||
|
@ -9,44 +9,14 @@
|
||||
#include "runtime.h"
|
||||
#include "go-type.h"
|
||||
|
||||
/* An identity hash function for a type. This is used for types where
|
||||
we can simply use the type value itself as a hash code. This is
|
||||
true of, e.g., integers and pointers. */
|
||||
/* The hash functions for types that can compare as identity is
|
||||
written in Go. */
|
||||
|
||||
uintptr_t
|
||||
__go_type_hash_identity (const void *key, uintptr_t seed, uintptr_t key_size)
|
||||
{
|
||||
uintptr_t ret;
|
||||
uintptr_t i;
|
||||
const unsigned char *p;
|
||||
|
||||
if (key_size <= 8)
|
||||
{
|
||||
union
|
||||
{
|
||||
uint64 v;
|
||||
unsigned char a[8];
|
||||
} u;
|
||||
u.v = 0;
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
__builtin_memcpy (&u.a[8 - key_size], key, key_size);
|
||||
#else
|
||||
__builtin_memcpy (&u.a[0], key, key_size);
|
||||
#endif
|
||||
if (sizeof (uintptr_t) >= 8)
|
||||
return (uintptr_t) u.v ^ seed;
|
||||
else
|
||||
return (uintptr_t) ((u.v >> 32) ^ (u.v & 0xffffffff)) ^ seed;
|
||||
}
|
||||
|
||||
ret = seed;
|
||||
for (i = 0, p = (const unsigned char *) key; i < key_size; i++, p++)
|
||||
ret = ret * 33 + *p;
|
||||
return ret;
|
||||
}
|
||||
extern uintptr runtime_memhash(void *, uintptr, uintptr)
|
||||
__asm__ (GOSYM_PREFIX "runtime.memhash");
|
||||
|
||||
const FuncVal __go_type_hash_identity_descriptor =
|
||||
{ (void *) __go_type_hash_identity };
|
||||
{ (void *) runtime_memhash };
|
||||
|
||||
/* An identity equality function for a type. This is used for types
|
||||
where we can check for equality by checking that the values have
|
||||
|
@ -362,7 +362,6 @@ extern _Bool
|
||||
__go_type_descriptors_equal(const struct __go_type_descriptor*,
|
||||
const struct __go_type_descriptor*);
|
||||
|
||||
extern uintptr_t __go_type_hash_identity (const void *, uintptr_t, uintptr_t);
|
||||
extern const FuncVal __go_type_hash_identity_descriptor;
|
||||
extern _Bool __go_type_equal_identity (const void *, const void *, uintptr_t);
|
||||
extern const FuncVal __go_type_equal_identity_descriptor;
|
||||
|
@ -455,7 +455,8 @@ runtime_schedinit(void)
|
||||
// runtime_symtabinit();
|
||||
runtime_mallocinit();
|
||||
mcommoninit(m);
|
||||
|
||||
runtime_alginit(); // maps must not be used before this call
|
||||
|
||||
// Initialize the itable value for newErrorCString,
|
||||
// so that the next time it gets called, possibly
|
||||
// in a fault during a garbage collection, it will not
|
||||
|
@ -265,6 +265,8 @@ struct __go_func_type;
|
||||
void runtime_args(int32, byte**)
|
||||
__asm__ (GOSYM_PREFIX "runtime.args");
|
||||
void runtime_osinit();
|
||||
void runtime_alginit(void)
|
||||
__asm__ (GOSYM_PREFIX "runtime.alginit");
|
||||
void runtime_goargs(void)
|
||||
__asm__ (GOSYM_PREFIX "runtime.goargs");
|
||||
void runtime_goenvs(void);
|
||||
@ -592,3 +594,7 @@ extern void *getitab(const struct __go_type_descriptor *,
|
||||
const struct __go_type_descriptor *,
|
||||
_Bool)
|
||||
__asm__ (GOSYM_PREFIX "runtime.getitab");
|
||||
|
||||
extern void runtime_cpuinit(void);
|
||||
extern void setCpuidECX(uint32)
|
||||
__asm__ (GOSYM_PREFIX "runtime.setCpuidECX");
|
||||
|
@ -6,6 +6,10 @@
|
||||
#include <signal.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#if defined(__i386__) || defined(__x86_64__)
|
||||
#include <cpuid.h>
|
||||
#endif
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "runtime.h"
|
||||
@ -204,3 +208,18 @@ go_errno()
|
||||
{
|
||||
return (intgo)errno;
|
||||
}
|
||||
|
||||
// CPU-specific initialization.
|
||||
// Fetch CPUID info on x86.
|
||||
|
||||
void
|
||||
runtime_cpuinit()
|
||||
{
|
||||
#if defined(__i386__) || defined(__x86_64__)
|
||||
unsigned int eax, ebx, ecx, edx;
|
||||
|
||||
if (__get_cpuid(1, &eax, &ebx, &ecx, &edx)) {
|
||||
setCpuidECX(ecx);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user