x86: Remove SSSE3 instruction for broadcast in memset.S (SSE2 Only)

commit b62ace2740
Author: Noah Goldstein <goldstein.w.n@gmail.com>
Date:   Sun Feb 6 00:54:18 2022 -0600

    x86: Improve vec generation in memset-vec-unaligned-erms.S

Revert usage of 'pshufb' in broadcast logic as it is an SSSE3
instruction and memset.S is restricted to only SSE2 instructions.
This commit is contained in:
Noah Goldstein 2022-02-07 00:32:23 -06:00
parent 03c9c4fce4
commit 1b0c60f95b

View File

@ -30,9 +30,10 @@
# define MEMSET_SET_VEC0_AND_SET_RETURN(d, r) \
movd d, %xmm0; \
pxor %xmm1, %xmm1; \
pshufb %xmm1, %xmm0; \
movq r, %rax
movq r, %rax; \
punpcklbw %xmm0, %xmm0; \
punpcklwd %xmm0, %xmm0; \
pshufd $0, %xmm0, %xmm0
# define WMEMSET_SET_VEC0_AND_SET_RETURN(d, r) \
movd d, %xmm0; \