#define PSOURCE      r3
#define PROWDEST     r4
#define MAP          r5
#define TMP_A0	     r6
#define TMP_A1	     r7
#define TMP_B0	     r8
#define TMP_B1	     r9
#define LIGHT        r10
#define LIGHTSTEP    r11
#define DELTA_STEP   r12

#define R_LIGHTPTR   r31
#define R_STEPBACK   r30
#define R_SOURCEMAX  r29
#define R_LIGHTWIDTH r28
#define R_NUMVBLOCKS r27
#define SURFROWBYTES r26
#define SOURCETSTEP  r25
#define NEXTLIGHT_L  r24
#define NEXTLIGHT_R  r23
#define LIGHT_L      r22
#define LIGHT_R      r21
#define LIGHT_DELTA  r20
#define LIGHT_L_STEP r19
#define LIGHT_R_STEP r18
#define V_COUNT      r17
#define LOAD_0	     r16
#define STORE_0	     r15


.globl _testFunc2
_testFunc2:
	srawi	r3,r3,2 // we unroll by four
	mtctr	r3

	xor	r3,r3,r3
	xor	r4,r4,r4
	addis	r5,0,0x8000

L_LoopTop2:
	add	r6,r5,r5
	add	r3,r3,r3

	add	r6,r5,r5
	add	r3,r3,r3

	add	r6,r5,r5
	add	r3,r3,r3

	add	r6,r5,r5
	add	r3,r3,r3
	
	bdnz+   L_LoopTop2
	blr


.globl _testFunc
_testFunc:
	// prolog.  since we are leaf, don't store lr
	stmw	r17,-100(r1)

	mtctr	r3    	// Set the count register

	// set up the registers so that we don't reference invalid memory.
	//r4 points to 16 bytes of memory.
	or	PSOURCE,r4,r4
	or	PROWDEST,r4,r4
	or	MAP,r4,r4
	or	R_LIGHTPTR,r4,r4
	xor	SURFROWBYTES, SURFROWBYTES, SURFROWBYTES
	xor	SOURCETSTEP, SOURCETSTEP, SOURCETSTEP
	xor	LIGHT_DELTA, LIGHT_DELTA, LIGHT_DELTA
	xor	LIGHT,LIGHT,LIGHT
	xor	LIGHT_R, LIGHT_R, LIGHT_R
	xor	LIGHT_R_STEP, LIGHT_R_STEP, LIGHT_R_STEP
	xor	DELTA_STEP, DELTA_STEP, DELTA_STEP

L_LoopTop:

#if 0
	lbz	r5,0(r4)
	or	r0,r0,r0

	or	r3,r3,r3
	or	r6,r6,r6

	or	r10,r10,r10
	or	r11,r11,r11

	stb	r5,0(r4)
	or	r7,r7,r7

	or	r8,r8,r8
	or	r9,r9,r9

	or	r12,r12,r12
#endif

#if 0
	xor	r5,r3,r3
	lbz	r3,0(r4)
	
	xor	r5,r3,r3
	lbz	r3,0(r4)
	
	xor	r5,r3,r3
	lbz	r3,0(r4)
	
	xor	r5,r3,r3
	lbz	r3,0(r4)
	
	xor	r5,r3,r3
	lbz	r3,0(r4)
	
	xor	r5,r3,r3
	lbz	r3,0(r4)
	
	xor	r5,r3,r3
	lbz	r3,0(r4)
	
	xor	r5,r3,r3
	lbz	r3,0(r4)
	
	xor	r5,r3,r3
	lbz	r3,0(r4)
	
	xor	r5,r3,r3
	lbz	r3,0(r4)
	
	xor	r5,r3,r3
#endif

#if 0
	xor	r0,r0,r0
	nor	r0,r0,r0
	rlwimi	r0,r3,0,24,31	// insert the LSB
	rlwimi	r0,r3,8,16,23	// 
	rlwimi	r0,r3,16,8,15	// 
	rlwimi	r0,r3,24,0,7	// insert the MSB
#endif

#if 0
	rlwinm	r0,r3,32,24,31	// extract the LSB
	rlwinm	r0,r3,24,24,31	// 
	rlwinm	r0,r3,16,24,31	// 
	rlwinm	r0,r3,8,24,31	// extract the MSB
#endif

#if 0
	// mixing word stores and loads
	lwz	r3,0(r4)
	stw	r5,0(r4)
	lwz	r6,0(r4)
	stw	r7,0(r4)

	lwz	r3,0(r4)
	stw	r5,0(r4)
	lwz	r6,0(r4)
	stw	r7,0(r4)

	lwz	r3,0(r4)
	stw	r5,0(r4)
	lwz	r6,0(r4)
	stw	r7,0(r4)

	lwz	r3,0(r4)
	stw	r5,0(r4)
	lwz	r6,0(r4)
	stw	r7,0(r4)
#endif

#if 0
	// Unaligned writes
	stw	r3,0(r4)
	stw	r3,0(r4)
	stw	r3,0(r4)
	stw	r3,0(r4)
	stw	r3,0(r4)
	stw	r3,0(r4)
	stw	r3,0(r4)
	stw	r3,0(r4)
#endif

#if 0
	// Unaligned reads
	lwz	r3,0(r4)
	lwz	r3,0(r4)
	lwz	r3,0(r4)
	lwz	r3,0(r4)
	lwz	r3,0(r4)
	lwz	r3,0(r4)
	lwz	r3,0(r4)
	lwz	r3,0(r4)
#endif

#if 0
	srawi	LIGHTSTEP,LIGHT_DELTA,4
	add	LIGHT_DELTA,LIGHT_DELTA,DELTA_STEP

	or	LIGHT,LIGHT_R,LIGHT_R
	lwz	LOAD_0,12(PSOURCE)  

	add	LIGHT_R,LIGHT_R,LIGHT_R_STEP
	rlwinm	TMP_A1,LIGHT,0,16,23

	add	LIGHT,LIGHT,LIGHTSTEP
	rlwinm	TMP_B0,LOAD_0,24,24,31

	rlwinm  TMP_A0,LOAD_0,32,24,31
	rlwinm	TMP_B1,LIGHT,0,16,23

	add	LIGHT,LIGHT,LIGHTSTEP
	add	TMP_A0,TMP_A0,TMP_A1

	add	TMP_B0,TMP_B0,TMP_B1
	lbzx	TMP_A1,MAP,TMP_A0

	rlwinm	TMP_A0,LOAD_0,16,24,31
	lbzx	TMP_B1,MAP,TMP_B0

	rlwinm	TMP_B0,LOAD_0,8,24,31
	rlwimi	STORE_0,TMP_A1,0,24,31

	rlwinm	TMP_A1,LIGHT,0,16,23
	add	LIGHT,LIGHT,LIGHTSTEP

	rlwimi	STORE_0,TMP_B1,8,16,23
	rlwinm	TMP_B1,LIGHT,0,16,23

	add	TMP_A0,TMP_A0,TMP_A1
	lwz	LOAD_0,8(PSOURCE)

	add	TMP_B0,TMP_B0,TMP_B1
	lbzx	TMP_A1,MAP,TMP_A0

	add	LIGHT,LIGHT,LIGHTSTEP
	rlwinm  TMP_A0,LOAD_0,32,24,31

	lbzx	TMP_B1,MAP,TMP_B0
	rlwinm  TMP_B0,LOAD_0,24,24,31

	rlwimi	STORE_0,TMP_A1,16,8,15
	rlwinm	TMP_A1,LIGHT,0,16,23

	add	LIGHT,LIGHT,LIGHTSTEP
	add	TMP_A0,TMP_A0,TMP_A1

	rlwimi	STORE_0,TMP_B1,24,0,7
	rlwinm	TMP_B1,LIGHT,0,16,23

	add	LIGHT,LIGHT,LIGHTSTEP
	stw	STORE_0,12(PROWDEST)

	lbzx	TMP_A1,MAP,TMP_A0
	rlwinm  TMP_A0,LOAD_0,16,24,31

	add	TMP_B0,TMP_B0,TMP_B1
	lbzx	TMP_B1,MAP,TMP_B0

	rlwinm  TMP_B0,LOAD_0,8,24,31
	rlwimi	STORE_0,TMP_A1,0,24,31

	rlwinm	TMP_A1,LIGHT,0,16,23
	add	LIGHT,LIGHT,LIGHTSTEP

	rlwimi	STORE_0,TMP_B1,8,16,23
	rlwinm	TMP_B1,LIGHT,0,16,23

	add	TMP_A0,TMP_A0,TMP_A1
	lwz	LOAD_0,4(PSOURCE)

	add	TMP_B0,TMP_B0,TMP_B1
	lbzx	TMP_A1,MAP,TMP_A0

	add	LIGHT,LIGHT,LIGHTSTEP
	rlwinm  TMP_A0,LOAD_0,32,24,31

	lbzx	TMP_B1,MAP,TMP_B0
	rlwinm  TMP_B0,LOAD_0,24,24,31

	rlwimi	STORE_0,TMP_A1,16,8,15
	rlwinm	TMP_A1,LIGHT,0,16,23

	add	LIGHT,LIGHT,LIGHTSTEP
	add	TMP_A0,TMP_A0,TMP_A1

	rlwimi	STORE_0,TMP_B1,24,0,7
	rlwinm	TMP_B1,LIGHT,0,16,23

	add	LIGHT,LIGHT,LIGHTSTEP
	stw	STORE_0,8(PROWDEST)

	lbzx	TMP_A1,MAP,TMP_A0
	rlwinm  TMP_A0,LOAD_0,16,24,31

	add	TMP_B0,TMP_B0,TMP_B1
	lbzx	TMP_B1,MAP,TMP_B0

	rlwinm  TMP_B0,LOAD_0,8,24,31
	rlwimi	STORE_0,TMP_A1,0,24,31

	rlwinm	TMP_A1,LIGHT,0,16,23
	add	LIGHT,LIGHT,LIGHTSTEP

	rlwimi	STORE_0,TMP_B1,8,16,23
	add	TMP_A0,TMP_A0,TMP_A1

	rlwinm	TMP_B1,LIGHT,0,16,23
	lbzx	TMP_A1,MAP,TMP_A0

	add	LIGHT,LIGHT,LIGHTSTEP
	add	TMP_B0,TMP_B0,TMP_B1

	lwz	LOAD_0,0(PSOURCE)
	add	PSOURCE,PSOURCE,SOURCETSTEP

	rlwimi	STORE_0,TMP_A1,16,8,15
	rlwinm	TMP_A1,LIGHT,0,16,23

	add	LIGHT,LIGHT,LIGHTSTEP
	rlwinm  TMP_A0,LOAD_0,32,24,31

	lbzx	TMP_B1,MAP,TMP_B0
	add	TMP_A0,TMP_A0,TMP_A1

	rlwinm  TMP_B0,LOAD_0,24,24,31
	lbzx	TMP_A1,MAP,TMP_A0

	rlwinm  TMP_A0,LOAD_0,16,24,31
	rlwimi	STORE_0,TMP_B1,24,0,7

	rlwinm	TMP_B1,LIGHT,0,16,23
	add	LIGHT,LIGHT,LIGHTSTEP

	stw	STORE_0,4(PROWDEST)
	add	TMP_B0,TMP_B0,TMP_B1

	rlwimi	STORE_0,TMP_A1,0,24,31
	rlwinm	TMP_A1,LIGHT,0,16,23

	lbzx	TMP_B1,MAP,TMP_B0
	add	TMP_A0,TMP_A0,TMP_A1

	rlwinm  TMP_B0,LOAD_0,8,24,31
	lbzx	TMP_A1,MAP,TMP_A0

	add	LIGHT,LIGHT,LIGHTSTEP
	rlwimi	STORE_0,TMP_B1,8,16,23

	rlwinm	TMP_B1,LIGHT,0,16,23
	add	LIGHT,LIGHT,LIGHTSTEP

	rlwimi	STORE_0,TMP_A1,16,8,15
	// Empty

	add	TMP_B0,TMP_B0,TMP_B1
	lbzx	TMP_B1,MAP,TMP_B0
	rlwimi	STORE_0,TMP_B1,24,0,7
	stw	STORE_0,0(PROWDEST)
	add	PROWDEST,PROWDEST,SURFROWBYTES
#endif

#if 0
	srawi	LIGHTSTEP,LIGHT_DELTA,4
	add	LIGHT_DELTA,LIGHT_DELTA,DELTA_STEP

	or	LIGHT,LIGHT_R,LIGHT_R
	add	LIGHT_R,LIGHT_R,LIGHT_R_STEP

	rlwinm	TMP_A1,LIGHT,0,16,23
	add	LIGHT,LIGHT,LIGHTSTEP

	rlwinm	TMP_B1,LIGHT,0,16,23
	lwz	LOAD_0,12(PSOURCE)

	add	LIGHT,LIGHT,LIGHTSTEP
	rlwinm	TMP_B0,LOAD_0,24,24,31

	add	TMP_B0,TMP_B0,TMP_B1
	rlwinm  TMP_A0,LOAD_0,32,24,31

	add	TMP_A0,TMP_A0,TMP_A1
	lbzx	TMP_B1,MAP,TMP_B0

	rlwinm	TMP_B0,LOAD_0,8,24,31
	lbzx	TMP_A1,MAP,TMP_A0

	rlwinm	TMP_A0,LOAD_0,16,24,31
	rlwimi	STORE_0,TMP_A1,0,24,31

	rlwinm	TMP_A1,LIGHT,0,16,23
	add	LIGHT,LIGHT,LIGHTSTEP

	add	TMP_A0,TMP_A0,TMP_A1
	rlwimi	STORE_0,TMP_B1,8,16,23

	rlwinm	TMP_B1,LIGHT,0,16,23
	add	LIGHT,LIGHT,LIGHTSTEP

	add	TMP_B0,TMP_B0,TMP_B1
	lwz	LOAD_0,8(PSOURCE)

	lbzx	TMP_A1,MAP,TMP_A0
	rlwinm  TMP_A0,LOAD_0,32,24,31

	rlwimi	STORE_0,TMP_A1,16,8,15
	rlwinm	TMP_A1,LIGHT,0,16,23

	add	TMP_A0,TMP_A0,TMP_A1
	lbzx	TMP_B1,MAP,TMP_B0

	rlwinm  TMP_B0,LOAD_0,24,24,31
	lbzx	TMP_A1,MAP,TMP_A0

	rlwinm  TMP_A0,LOAD_0,16,24,31
	add	LIGHT,LIGHT,LIGHTSTEP

	rlwimi	STORE_0,TMP_B1,24,0,7
	rlwinm	TMP_B1,LIGHT,0,16,23

	add	TMP_B0,TMP_B0,TMP_B1
	stw	STORE_0,12(PROWDEST)

	add	LIGHT,LIGHT,LIGHTSTEP
	rlwimi	STORE_0,TMP_A1,0,24,31

	rlwinm	TMP_A1,LIGHT,0,16,23
	add	LIGHT,LIGHT,LIGHTSTEP

	add	TMP_A0,TMP_A0,TMP_A1
	lbzx	TMP_B1,MAP,TMP_B0

	rlwinm  TMP_B0,LOAD_0,8,24,31
	rlwimi	STORE_0,TMP_B1,8,16,23

	rlwinm	TMP_B1,LIGHT,0,16,23
	add	LIGHT,LIGHT,LIGHTSTEP

	add	TMP_B0,TMP_B0,TMP_B1
	lwz	LOAD_0,4(PSOURCE)

	lbzx	TMP_A1,MAP,TMP_A0
	rlwinm  TMP_A0,LOAD_0,32,24,31

	rlwimi	STORE_0,TMP_A1,16,8,15
	rlwinm	TMP_A1,LIGHT,0,16,23

	add	TMP_A0,TMP_A0,TMP_A1
	lbzx	TMP_B1,MAP,TMP_B0

	rlwinm  TMP_B0,LOAD_0,24,24,31
	lbzx	TMP_A1,MAP,TMP_A0

	rlwinm  TMP_A0,LOAD_0,16,24,31
	add	LIGHT,LIGHT,LIGHTSTEP

	rlwimi	STORE_0,TMP_B1,24,0,7
	rlwinm	TMP_B1,LIGHT,0,16,23

	add	TMP_B0,TMP_B0,TMP_B1
	stw	STORE_0,8(PROWDEST)

	add	LIGHT,LIGHT,LIGHTSTEP
	rlwimi	STORE_0,TMP_A1,0,24,31

	rlwinm	TMP_A1,LIGHT,0,16,23
	add	LIGHT,LIGHT,LIGHTSTEP

	add	TMP_A0,TMP_A0,TMP_A1
	lbzx	TMP_B1,MAP,TMP_B0

	rlwinm  TMP_B0,LOAD_0,8,24,31
	rlwimi	STORE_0,TMP_B1,8,16,23

	rlwinm	TMP_B1,LIGHT,0,16,23
	add	LIGHT,LIGHT,LIGHTSTEP

	add	TMP_B0,TMP_B0,TMP_B1
	lwz	LOAD_0,0(PSOURCE)

	add	PSOURCE,PSOURCE,SOURCETSTEP
	lbzx	TMP_B1,MAP,TMP_B0

	rlwinm  TMP_B0,LOAD_0,24,24,31
	lbzx	TMP_A1,MAP,TMP_A0

	rlwinm  TMP_A0,LOAD_0,32,24,31
	rlwimi	STORE_0,TMP_A1,16,8,15

	rlwinm	TMP_A1,LIGHT,0,16,23
	rlwimi	STORE_0,TMP_B1,24,0,7

	add	LIGHT,LIGHT,LIGHTSTEP
	stw	STORE_0,4(PROWDEST)

	rlwinm	TMP_B1,LIGHT,0,16,23
	add	LIGHT,LIGHT,LIGHTSTEP

	add	TMP_B0,TMP_B0,TMP_B1
	add	TMP_A0,TMP_A0,TMP_A1

	lbzx	TMP_B1,MAP,TMP_B0
	rlwinm  TMP_B0,LOAD_0,8,24,31

	lbzx	TMP_A1,MAP,TMP_A0
	rlwinm  TMP_A0,LOAD_0,16,24,31

	rlwimi	STORE_0,TMP_A1,0,24,31
	rlwinm	TMP_A1,LIGHT,0,16,23

	add	TMP_A0,TMP_A0,TMP_A1
	add	LIGHT,LIGHT,LIGHTSTEP

	rlwimi	STORE_0,TMP_B1,8,16,23
	rlwinm	TMP_B1,LIGHT,0,16,23

	add	TMP_B0,TMP_B0,TMP_B1
	lbzx	TMP_A1,MAP,TMP_A0

	add	LIGHT,LIGHT,LIGHTSTEP
	rlwimi	STORE_0,TMP_A1,16,8,15

	lbzx	TMP_B1,MAP,TMP_B0
	or	r3,r3,r3

	rlwimi	STORE_0,TMP_B1,24,0,7
	or	r5,r5,r5

	or	r0,r0,r0
	or	r4,r4,r4

	stw	STORE_0,0(PROWDEST)
	add	PROWDEST,PROWDEST,SURFROWBYTES

	or	r0,r0,r0
	or	r4,r4,r4

	or	r5,r5,r5
#endif

	// Decrement count and branch if not zero (hint that we will branch)
	bdnz+   L_LoopTop

	// Restore non-volatile registers
	lmw	r17,-100(r1)
	// Return!
	blr




	
	
	
