By: anon (anon.delete@this.anon.com), November 16, 2012 4:07 am
Room: Moderated Discussions
Felid (Felid.delete@this.mailinator.com) on November 15, 2012 3:19 pm wrote:
> Try to replace MOVAPS #2 with «xmm1, xmm2» (1-way dependence), and then to «xmm2,
> xmm3» (no dependence). To remove possible port issue bottleneck, also worth to test
> with GPR's, but not on 8- or 16-bit ones :) This'll give more info on the work logic.
Some measurements:
loop:
movaps xmm1, xmm0
movaps xmm2, xmm1
dec ecx
jnz loop ; 2 clk/loop
loop:
movaps xmm1, xmm0
movaps xmm3, xmm2
dec ecx
jnz loop ; 2.1-2.2 clk/loop - unstable
loop:
movaps xmm1, xmm0
movaps xmm2, xmm1
movaps xmm0, xmm2
dec ecx
jnz loop ; 3 clk/loop
loop:
movaps xmm1, xmm0
movaps xmm2, xmm1
movaps xmm1, xmm0
movaps xmm2, xmm1
dec ecx
jnz loop ; 3 clk/loop
loop:
movaps xmm1, xmm0
movaps xmm2, xmm1
movaps xmm3, xmm2
movaps xmm0, xmm3
dec ecx
jnz loop ; 3.33 clk/loop
loop:
movaps xmm1, xmm0
movaps xmm2, xmm1
movaps xmm3, xmm2
movaps xmm4, xmm3
dec ecx
jnz loop ; 3.5 clk/loop
loop:
movaps xmm1, xmm0
movaps xmm3, xmm2
movaps xmm5, xmm4
movaps xmm7, xmm6
dec ecx
jnz loop ; 3.67 clk/loop
loop:
mov edx, eax
mov eax, edx
dec ecx
jnz loop ; 1 clk/loop
loop:
mov edx, eax
mov edi, edx
mov eax, edi
dec ecx
jnz loop ; 2 clk/loop
loop:
mov edx, eax
mov edi, edx
mov esi, edi
mov eax, esi
dec ecx
jnz loop ; 2.33 clk/loop
> Try to replace MOVAPS #2 with «xmm1, xmm2» (1-way dependence), and then to «xmm2,
> xmm3» (no dependence). To remove possible port issue bottleneck, also worth to test
> with GPR's, but not on 8- or 16-bit ones :) This'll give more info on the work logic.
Some measurements:
loop:
movaps xmm1, xmm0
movaps xmm2, xmm1
dec ecx
jnz loop ; 2 clk/loop
loop:
movaps xmm1, xmm0
movaps xmm3, xmm2
dec ecx
jnz loop ; 2.1-2.2 clk/loop - unstable
loop:
movaps xmm1, xmm0
movaps xmm2, xmm1
movaps xmm0, xmm2
dec ecx
jnz loop ; 3 clk/loop
loop:
movaps xmm1, xmm0
movaps xmm2, xmm1
movaps xmm1, xmm0
movaps xmm2, xmm1
dec ecx
jnz loop ; 3 clk/loop
loop:
movaps xmm1, xmm0
movaps xmm2, xmm1
movaps xmm3, xmm2
movaps xmm0, xmm3
dec ecx
jnz loop ; 3.33 clk/loop
loop:
movaps xmm1, xmm0
movaps xmm2, xmm1
movaps xmm3, xmm2
movaps xmm4, xmm3
dec ecx
jnz loop ; 3.5 clk/loop
loop:
movaps xmm1, xmm0
movaps xmm3, xmm2
movaps xmm5, xmm4
movaps xmm7, xmm6
dec ecx
jnz loop ; 3.67 clk/loop
loop:
mov edx, eax
mov eax, edx
dec ecx
jnz loop ; 1 clk/loop
loop:
mov edx, eax
mov edi, edx
mov eax, edi
dec ecx
jnz loop ; 2 clk/loop
loop:
mov edx, eax
mov edi, edx
mov esi, edi
mov eax, esi
dec ecx
jnz loop ; 2.33 clk/loop



