mirror of
https://sourceware.org/git/binutils-gdb.git
synced 2025-01-06 12:09:26 +08:00
102 lines
2.8 KiB
ArmAsm
102 lines
2.8 KiB
ArmAsm
|
# mach: bfin
|
||
|
|
||
|
// GENERIC CONVOLUTIONAL ENCODER
|
||
|
// This a generic rate 1/n convolutional encoder. It computes n output
|
||
|
// bits for each input bit, based on n generic polynomials.
|
||
|
// It uses the set of BXOR_CC instructions to compute bit XOR
|
||
|
// reduction from a state masked by a polynomial. For an alternate
|
||
|
// solution based on assembling several partial words, as in
|
||
|
// the BDT benchmark, see file conv_enc.c. The solution presented
|
||
|
// here is slower than conv_enc.c, but more generic.
|
||
|
//
|
||
|
// Forward Shift Register
|
||
|
// -----------------------
|
||
|
// This solution implements the XOR function by shifting the state
|
||
|
// left by one, applying a mask to the state, and reducing
|
||
|
// the result with a bit XOR reduction function.
|
||
|
// ----- XOR------------> G0
|
||
|
// | | | |
|
||
|
// +------------------------------+
|
||
|
// | b0 b1 b2 b3 b14 b15 | <- in
|
||
|
// +------------------------------+
|
||
|
// | | | | |
|
||
|
// ----- XOR------------> G1
|
||
|
// Instruction BXOR computes the bit G0 or G1 and stores it into CC
|
||
|
// and also into a destination reg half. Here, we take CC and rotate it
|
||
|
// into an output register.
|
||
|
// However, one can also store the output bit directly by storing
|
||
|
// the register half where this bit is placed. This would result
|
||
|
// in an output structure similar to the one in the original function
|
||
|
// Convolutional_Encode(), where an entire half word holds a bit.
|
||
|
// The resulting execution speed would be roughly twice as fast,
|
||
|
// since there is no need to rotate output bit via CC.
|
||
|
|
||
|
.include "testutils.inc"
|
||
|
start
|
||
|
|
||
|
loadsym P0, input;
|
||
|
loadsym P1, output;
|
||
|
|
||
|
R1 = 0; R2 = 0;R3 = 0;
|
||
|
|
||
|
R2.L = 0;
|
||
|
R2.H = 0xa01d; // polynom 0
|
||
|
R3.L = 0;
|
||
|
R3.H = 0x12f4; // polynom 1
|
||
|
|
||
|
// load and CurrentState to upper half of A0
|
||
|
A1 = A0 = 0;
|
||
|
R0 = 0x0000;
|
||
|
A0.w = R0;
|
||
|
A0 = A0 << 16;
|
||
|
|
||
|
// l-loop counter is in P4
|
||
|
P4 = 2(Z);
|
||
|
// **** START l-LOOP *****
|
||
|
l$0:
|
||
|
|
||
|
// insert 16 bits of input into lower half of A0
|
||
|
// and advance input pointer
|
||
|
R0 = W [ P0 ++ ] (Z);
|
||
|
A0.L = R0.L;
|
||
|
|
||
|
P5 = 2 (Z);
|
||
|
LSETUP ( m$0 , m$0end ) LC0 = P5; // **** BEGIN m-LOOP *****
|
||
|
m$0:
|
||
|
|
||
|
P5 = 8 (Z);
|
||
|
LSETUP ( i$1 , i$1end ) LC1 = P5; // **** BEGIN i-LOOP *****
|
||
|
i$1:
|
||
|
R4.L = CC = BXORSHIFT( A0 , R2 ); // polynom0 -> CC
|
||
|
R1 = ROT R1 BY 1; // CC -> R1
|
||
|
R4.L = CC = BXOR( A0 , R3 ); // polynom1 -> CC
|
||
|
i$1end:
|
||
|
R1 = ROT R1 BY 1; // CC -> R1
|
||
|
|
||
|
// store 16 bits of outdata RL1
|
||
|
m$0end:
|
||
|
W [ P1 ++ ] = R1;
|
||
|
|
||
|
P4 += -1;
|
||
|
CC = P4 == 0;
|
||
|
IF !CC JUMP l$0; // **** END l-LOOP *****
|
||
|
|
||
|
// Check results
|
||
|
loadsym I2, output;
|
||
|
R0.L = W [ I2 ++ ]; DBGA ( R0.L , 0x8c62 );
|
||
|
R0.L = W [ I2 ++ ]; DBGA ( R0.L , 0x262e );
|
||
|
R0.L = W [ I2 ++ ]; DBGA ( R0.L , 0x5b4d );
|
||
|
R0.L = W [ I2 ++ ]; DBGA ( R0.L , 0x834f );
|
||
|
pass
|
||
|
|
||
|
.data
|
||
|
input:
|
||
|
.dw 0x999f
|
||
|
.dw 0x1999
|
||
|
|
||
|
output:
|
||
|
.dw 0x0000
|
||
|
.dw 0x0000
|
||
|
.dw 0x0000
|
||
|
.dw 0x0000
|