binutils-gdb/sim/testsuite/bfin/conv_enc_gen.s

# mach: bfin

// GENERIC CONVOLUTIONAL ENCODER
// This a generic rate 1/n convolutional encoder. It computes n output
// bits for each input bit, based on n generic polynomials.
// It uses the set of BXOR_CC instructions to compute bit XOR
// reduction from a state masked by a polynomial.  For an alternate
// solution based on assembling several partial words, as in
// the BDT benchmark, see file conv_enc.c. The solution presented
// here is slower than conv_enc.c, but more generic.
//
// Forward Shift Register
// -----------------------
// This solution implements the XOR function by shifting the state
// left by one, applying a mask to the state, and reducing
// the result with a bit XOR reduction function.
//    	             ----- XOR------------> G0
// 	             |     |     |  |
//        +------------------------------+
//        | b0 b1 b2 b3          b14 b15 | <- in
//        +------------------------------+
//                   | 	|  |  |	    |
//    	             ----- XOR------------> G1
// Instruction BXOR computes the bit G0 or G1 and stores it into CC
// and also into a destination reg half. Here, we take CC and rotate it
// into an output register.
// However, one can also store the output bit directly by storing
// the register half where this bit is placed. This would result
// in an output structure similar to the one in the original function
// Convolutional_Encode(), where an entire half word holds a bit.
// The resulting execution speed would be roughly twice as fast,
// since there is no need to rotate output bit via CC.

.include "testutils.inc"
	start

	loadsym P0, input;
	loadsym P1, output;

	R1 = 0;	R2 = 0;R3 = 0;

	R2.L = 0;
	R2.H = 0xa01d;	// polynom 0
	R3.L = 0;
	R3.H = 0x12f4;	// polynom 1

	// load and  CurrentState to upper half of A0
	A1 = A0 = 0;
	R0 = 0x0000;
	A0.w = R0;
	A0 = A0 << 16;

	// l-loop counter is in P4
	P4 = 2(Z);
	// **** START l-LOOP *****
l$0:

	// insert 16 bits of input into lower half of A0
	// and advance input pointer
	R0 = W [ P0 ++ ] (Z);
	A0.L = R0.L;

	P5 = 2 (Z);
	LSETUP ( m$0 , m$0end ) LC0 = P5;	// **** BEGIN m-LOOP *****
m$0:

	P5 = 8 (Z);
	LSETUP ( i$1 , i$1end ) LC1 = P5;	// **** BEGIN i-LOOP *****
i$1:
	R4.L = CC = BXORSHIFT( A0 , R2 );	// polynom0 -> CC
	R1 = ROT R1 BY 1;			// CC -> R1
	R4.L = CC = BXOR( A0 , R3 );		// polynom1 -> CC
i$1end:
	R1 = ROT R1 BY 1;			// CC -> R1

	// store 16 bits of outdata RL1
m$0end:
	W [ P1 ++ ] = R1;

	P4 += -1;
	CC = P4 == 0;
	IF !CC JUMP l$0;	// **** END l-LOOP *****

				// Check results
	loadsym I2, output;
	R0.L = W [ I2 ++ ];	DBGA ( R0.L , 0x8c62 );
	R0.L = W [ I2 ++ ];	DBGA ( R0.L , 0x262e );
	R0.L = W [ I2 ++ ];	DBGA ( R0.L , 0x5b4d );
	R0.L = W [ I2 ++ ];	DBGA ( R0.L , 0x834f );
	pass

	.data
input:
	.dw 0x999f
	.dw 0x1999

output:
	.dw 0x0000
	.dw 0x0000
	.dw 0x0000
	.dw 0x0000
sim: bfin: import testsuite Now that the common sim testsuite code supports .S and .c files, we can import the Blackfin testsuite. There are about ~800 tests here, so I'm only attaching a compressed patch of them. Other than adding files to sim/testsuite/sim/bfin/, the sim/configure.tgt file was updated to mark Blackfin as having a testsuite, and sim/configure regenerated. Signed-off-by: Mike Frysinger <vapier@gentoo.org> 2011-06-05 01:44:22 +08:00			`# mach: bfin`

			`// GENERIC CONVOLUTIONAL ENCODER`
			`// This a generic rate 1/n convolutional encoder. It computes n output`
			`// bits for each input bit, based on n generic polynomials.`
			`// It uses the set of BXOR_CC instructions to compute bit XOR`
			`// reduction from a state masked by a polynomial. For an alternate`
			`// solution based on assembling several partial words, as in`
			`// the BDT benchmark, see file conv_enc.c. The solution presented`
			`// here is slower than conv_enc.c, but more generic.`
			`//`
			`// Forward Shift Register`
			`// -----------------------`
			`// This solution implements the XOR function by shifting the state`
			`// left by one, applying a mask to the state, and reducing`
			`// the result with a bit XOR reduction function.`
			`// ----- XOR------------> G0`
			`// \| \| \| \|`
			`// +------------------------------+`
			`// \| b0 b1 b2 b3 b14 b15 \| <- in`
			`// +------------------------------+`
			`// \| \| \| \| \|`
			`// ----- XOR------------> G1`
			`// Instruction BXOR computes the bit G0 or G1 and stores it into CC`
			`// and also into a destination reg half. Here, we take CC and rotate it`
			`// into an output register.`
			`// However, one can also store the output bit directly by storing`
			`// the register half where this bit is placed. This would result`
			`// in an output structure similar to the one in the original function`
			`// Convolutional_Encode(), where an entire half word holds a bit.`
			`// The resulting execution speed would be roughly twice as fast,`
			`// since there is no need to rotate output bit via CC.`

			`.include "testutils.inc"`
			`start`

			`loadsym P0, input;`
			`loadsym P1, output;`

			`R1 = 0; R2 = 0;R3 = 0;`

			`R2.L = 0;`
			`R2.H = 0xa01d; // polynom 0`
			`R3.L = 0;`
			`R3.H = 0x12f4; // polynom 1`

			`// load and CurrentState to upper half of A0`
			`A1 = A0 = 0;`
			`R0 = 0x0000;`
			`A0.w = R0;`
			`A0 = A0 << 16;`

			`// l-loop counter is in P4`
			`P4 = 2(Z);`
			`// ** START l-LOOP ***`
			`l$0:`

			`// insert 16 bits of input into lower half of A0`
			`// and advance input pointer`
			`R0 = W [ P0 ++ ] (Z);`
			`A0.L = R0.L;`

			`P5 = 2 (Z);`
			`LSETUP ( m$0 , m$0end ) LC0 = P5; // ** BEGIN m-LOOP ***`
			`m$0:`

			`P5 = 8 (Z);`
			`LSETUP ( i$1 , i$1end ) LC1 = P5; // ** BEGIN i-LOOP ***`
			`i$1:`
			`R4.L = CC = BXORSHIFT( A0 , R2 ); // polynom0 -> CC`
			`R1 = ROT R1 BY 1; // CC -> R1`
			`R4.L = CC = BXOR( A0 , R3 ); // polynom1 -> CC`
			`i$1end:`
			`R1 = ROT R1 BY 1; // CC -> R1`

			`// store 16 bits of outdata RL1`
			`m$0end:`
			`W [ P1 ++ ] = R1;`

			`P4 += -1;`
			`CC = P4 == 0;`
			`IF !CC JUMP l$0; // ** END l-LOOP ***`

			`// Check results`
			`loadsym I2, output;`
			`R0.L = W [ I2 ++ ]; DBGA ( R0.L , 0x8c62 );`
			`R0.L = W [ I2 ++ ]; DBGA ( R0.L , 0x262e );`
			`R0.L = W [ I2 ++ ]; DBGA ( R0.L , 0x5b4d );`
			`R0.L = W [ I2 ++ ]; DBGA ( R0.L , 0x834f );`
			`pass`

			`.data`
			`input:`
			`.dw 0x999f`
			`.dw 0x1999`

			`output:`
			`.dw 0x0000`
			`.dw 0x0000`
			`.dw 0x0000`
			`.dw 0x0000`