Article: Knights Landing Details
By: Eric Bron (eric.bron.delete@this.zvisuel.privatefortest.com), January 6, 2014 11:37 am
Room: Moderated Discussions
> That's exactly what I was guessing.
in order to be complete, here is a simplistic example with full source code showing well the full compiler behavior
source:
#define INLINE _forceinline
class V8Double
{
__m512d m7_0;
INLINE V8Double (const __m512d &vm512d) : m7_0(vm512d) {}
public:
INLINE explicit V8Double (double val) : m7_0( _mm512_set1_pd(val)) {}
INLINE explicit V8Double (const double *v) : m7_0(_mm512_load_pd(v)) {}
INLINE friend V8Double operator + (const V8Double &a, const V8Double &b) {return _mm512_add_pd(a.m7_0,b.m7_0);}
INLINE friend V8Double operator * (const V8Double &a, const V8Double &b) {return _mm512_mul_pd(a.m7_0,b.m7_0);}
INLINE friend void Store (void *v, const V8Double &a) {_mm512_store_pd((float *)v,a.m7_0);}
};
class V8Poly3
{
const V8Double a3,a2,a1,a0; // a3*x^3 + a2*x^2 + a1*x + a0
public:
INLINE V8Poly3 (REAL c3, REAL c2, REAL c1, REAL c0) : a3(c3), a2(c2), a1(c1), a0(c0) {}
INLINE V8Double val (const V8Double &x) const {return ((a3*x+a2)*x+a1)*x+a0;}
};
void Demo (const double *src, double *dst, int size)
{
const V8Poly3 poly(1.2,3.4,5.6,7.8);
for (int i=0; i<size; i+=8) Store(dst+i,poly.val(V8Double(src+i)));
}
ASM:
xor eax, eax ;71.13
movsxd r8, r8d ;69.1
vbroadcastsd zmm3, QWORD PTR [_2il0floatpacket.260] ;70.34
vbroadcastsd zmm2, QWORD PTR [_2il0floatpacket.261] ;70.34
vbroadcastsd zmm1, QWORD PTR [_2il0floatpacket.262] ;70.34
vbroadcastsd zmm0, QWORD PTR [_2il0floatpacket.263] ;70.34
test r8, r8 ;71.19
jle .B2.5 ; Prob 10% ;71.19
vmovups zmm4, ZMMWORD PTR [rcx+rax*8] ;71.61
vfmadd213pd zmm4, zmm3, zmm2 ;71.48
vfmadd132pd zmm4, zmm1, ZMMWORD PTR [rcx+rax*8] ;71.48
vfmadd132pd zmm4, zmm0, ZMMWORD PTR [rcx+rax*8] ;71.48
vmovups ZMMWORD PTR [rdx+rax*8], zmm4 ;71.31
add rax, 8 ;71.25
cmp rax, r8 ;71.19
jl .B2.3 ; Prob 82% ;71.19
ret
[rcx+rax*8] is used redundantly, even more with higher order polynomials
in order to be complete, here is a simplistic example with full source code showing well the full compiler behavior
source:
#define INLINE _forceinline
class V8Double
{
__m512d m7_0;
INLINE V8Double (const __m512d &vm512d) : m7_0(vm512d) {}
public:
INLINE explicit V8Double (double val) : m7_0( _mm512_set1_pd(val)) {}
INLINE explicit V8Double (const double *v) : m7_0(_mm512_load_pd(v)) {}
INLINE friend V8Double operator + (const V8Double &a, const V8Double &b) {return _mm512_add_pd(a.m7_0,b.m7_0);}
INLINE friend V8Double operator * (const V8Double &a, const V8Double &b) {return _mm512_mul_pd(a.m7_0,b.m7_0);}
INLINE friend void Store (void *v, const V8Double &a) {_mm512_store_pd((float *)v,a.m7_0);}
};
class V8Poly3
{
const V8Double a3,a2,a1,a0; // a3*x^3 + a2*x^2 + a1*x + a0
public:
INLINE V8Poly3 (REAL c3, REAL c2, REAL c1, REAL c0) : a3(c3), a2(c2), a1(c1), a0(c0) {}
INLINE V8Double val (const V8Double &x) const {return ((a3*x+a2)*x+a1)*x+a0;}
};
void Demo (const double *src, double *dst, int size)
{
const V8Poly3 poly(1.2,3.4,5.6,7.8);
for (int i=0; i<size; i+=8) Store(dst+i,poly.val(V8Double(src+i)));
}
ASM:
xor eax, eax ;71.13
movsxd r8, r8d ;69.1
vbroadcastsd zmm3, QWORD PTR [_2il0floatpacket.260] ;70.34
vbroadcastsd zmm2, QWORD PTR [_2il0floatpacket.261] ;70.34
vbroadcastsd zmm1, QWORD PTR [_2il0floatpacket.262] ;70.34
vbroadcastsd zmm0, QWORD PTR [_2il0floatpacket.263] ;70.34
test r8, r8 ;71.19
jle .B2.5 ; Prob 10% ;71.19
vmovups zmm4, ZMMWORD PTR [rcx+rax*8] ;71.61
vfmadd213pd zmm4, zmm3, zmm2 ;71.48
vfmadd132pd zmm4, zmm1, ZMMWORD PTR [rcx+rax*8] ;71.48
vfmadd132pd zmm4, zmm0, ZMMWORD PTR [rcx+rax*8] ;71.48
vmovups ZMMWORD PTR [rdx+rax*8], zmm4 ;71.31
add rax, 8 ;71.25
cmp rax, r8 ;71.19
jl .B2.3 ; Prob 82% ;71.19
ret
[rcx+rax*8] is used redundantly, even more with higher order polynomials
Topic | Posted By | Date |
---|---|---|
Knights Landing details (new article) | David Kanter | 2014/01/03 12:58 AM |
eDRAM as cache | iz | 2014/01/03 04:39 AM |
eDRAM options | Eric Bron | 2014/01/09 03:45 AM |
Knights Landing details (new article) | Emil Briggs | 2014/01/03 06:06 AM |
Knights Landing details (new article) | Michael S | 2014/01/03 07:05 AM |
PCI-E and QPI | David Kanter | 2014/01/03 12:11 PM |
eDRAM still seems too expensive ... | Mark Roulo | 2014/01/03 10:48 AM |
Nevermind ... I see that you addressed this :-) | Mark Roulo | 2014/01/03 10:51 AM |
eDRAM still seems too expensive ... | Eric Bron | 2014/01/03 01:42 PM |
eDRAM or stacked DRAM? | Patrick Chase | 2014/01/03 11:21 AM |
eDRAM or stacked DRAM? | Wes Felter | 2014/01/03 03:00 PM |
eDRAM or stacked DRAM? | Patrick Chase | 2014/01/03 07:26 PM |
eDRAM or stacked DRAM? | tarlinian | 2014/06/23 09:59 PM |
eDRAM or stacked DRAM? | Maynard Handley | 2014/06/24 01:47 AM |
eDRAM or stacked DRAM? | Michael S | 2014/06/24 03:13 AM |
eDRAM or stacked DRAM? | David Kanter | 2014/06/24 12:09 PM |
eDRAM or stacked DRAM? | anon | 2014/06/24 07:50 PM |
eDRAM or stacked DRAM? | Eric Bron | 2014/06/24 10:02 PM |
eDRAM or stacked DRAM? | anon | 2014/06/24 10:39 PM |
eDRAM or stacked DRAM? | Michael S | 2014/06/25 01:46 AM |
eDRAM or stacked DRAM? | Michael S | 2014/06/25 01:29 AM |
eDRAM or stacked DRAM? | Eric Bron | 2014/06/24 05:37 AM |
eDRAM or stacked DRAM? | tarlinian | 2014/06/24 08:53 AM |
eDRAM or stacked DRAM? | Eric Bron | 2014/06/24 09:09 AM |
eDRAM or stacked DRAM? | tarlinian | 2014/06/24 09:40 AM |
eDRAM or stacked DRAM? | Eric Bron | 2014/06/24 10:10 AM |
eDRAM or stacked DRAM? | Eric Bron | 2014/06/24 10:12 AM |
eDRAM or stacked DRAM? | Wes Felter | 2014/06/24 10:09 PM |
eDRAM or stacked DRAM? | Michael S | 2014/06/25 02:02 AM |
Why not tag-inclusive L3? | Paul A. Clayton | 2014/01/03 04:28 PM |
Why not tag-inclusive L3? | Eric Bron | 2014/01/04 03:22 AM |
Knights Landing L/S bandwidth | Nicolas Capens | 2014/01/04 05:43 AM |
Knights Landing L/S bandwidth | Eric Bron | 2014/01/04 06:20 AM |
Knights Landing L/S bandwidth | Nicolas Capens | 2014/01/04 02:55 PM |
Knights Landing L/S bandwidth | Eric Bron | 2014/01/04 03:27 PM |
Knights Landing L/S bandwidth | hobold | 2014/01/04 04:23 PM |
Knights Landing L/S bandwidth | Eric Bron | 2014/01/04 05:20 PM |
Knights Landing L/S bandwidth | Michael S | 2014/01/05 03:42 AM |
Knights Landing L/S bandwidth | Eric Bron | 2014/01/05 03:49 AM |
Knights Landing L/S bandwidth | Patrick Chase | 2014/01/11 08:13 PM |
Knights Landing L/S bandwidth | Nicolas Capens | 2014/01/13 08:39 PM |
Knights Landing L/S bandwidth | Nicolas Capens | 2014/01/05 03:18 PM |
Knights Landing L/S bandwidth | Michael S | 2014/01/06 04:09 AM |
Knights Landing L/S bandwidth | Eric Bron | 2014/01/06 05:11 AM |
Knights Landing L/S bandwidth | Michael S | 2014/01/06 05:40 AM |
Knights Landing L/S bandwidth | Eric Bron | 2014/01/06 05:54 AM |
Knights Landing L/S bandwidth | Eric Bron | 2014/01/08 09:00 AM |
Knights Landing L/S bandwidth | Nicolas Capens | 2014/01/07 03:31 PM |
Knights Landing L/S bandwidth | Michael S | 2014/01/07 04:17 PM |
Knights Landing L/S bandwidth | Nicolas Capens | 2014/01/07 09:55 PM |
Knights Landing L/S bandwidth | Michael S | 2014/01/08 01:42 AM |
Knights Landing L/S bandwidth | Gabriele Svelto | 2014/01/08 08:30 AM |
Occam's razor | Nicolas Capens | 2014/01/08 02:33 PM |
Occam's razor | Gabriele Svelto | 2014/01/08 02:51 PM |
Occam's razor | Eric Bron | 2014/01/08 03:28 PM |
Occam's razor | bakaneko | 2014/01/09 04:45 AM |
Occam's razor | anon | 2014/01/09 05:02 AM |
Occam's razor | bakaneko | 2014/01/09 06:24 AM |
Occam's razor | bakaneko | 2014/01/09 06:51 AM |
Occam's razor | anon | 2014/01/09 07:18 AM |
Occam's razor | anon | 2014/01/09 07:16 AM |
Occam's razor | bakaneko | 2014/01/09 08:43 AM |
Occam's razor | anon | 2014/01/09 09:17 AM |
Occam's razor | bakaneko | 2014/01/09 11:12 AM |
Occam's razor | Eric Bron | 2014/01/09 11:18 AM |
Occam's razor | bakaneko | 2014/01/09 11:58 AM |
Occam's razor | anon | 2014/01/09 12:35 PM |
Occam's razor | bakaneko | 2014/01/12 10:48 AM |
99.9% not a new extension | Nicolas Capens | 2014/01/10 11:39 AM |
Compiler complexity | Gabriele Svelto | 2014/01/11 03:58 AM |
Compiler complexity | Nicolas Capens | 2014/01/11 01:20 PM |
Compiler complexity | Gabriele Svelto | 2014/01/11 03:17 PM |
Patent pending | Nicolas Capens | 2014/01/14 07:21 PM |
99.9% not a new extension | bakaneko | 2014/01/12 11:08 AM |
L0 data cache | Eric Bron | 2014/01/08 04:52 PM |
Occam's razor | David Kanter | 2014/01/08 04:53 PM |
Occam's razor | Nicolas Capens | 2014/01/09 03:07 AM |
Occam's razor | Ricardo B | 2014/01/09 05:21 AM |
Virtually indexed, untagged | Nicolas Capens | 2014/01/10 11:27 AM |
Virtually indexed, untagged | Gabriele Svelto | 2014/01/11 04:08 AM |
Virtually indexed, untagged | Nicolas Capens | 2014/01/11 09:45 PM |
Virtually indexed, untagged | David Kanter | 2014/01/12 02:13 AM |
Virtually indexed, untagged | anon | 2014/01/12 04:02 AM |
Virtually indexed, untagged | Nicolas Capens | 2014/01/16 09:55 AM |
Virtually indexed, untagged | Michael S | 2014/01/12 04:09 AM |
Virtually indexed, untagged | Nicolas Capens | 2014/01/16 10:47 AM |
Occam's razor | David Kanter | 2014/01/09 06:42 PM |
Occam's razor | Nicolas Capens | 2014/01/10 02:22 PM |
Occam's razor | David Kanter | 2014/01/10 04:06 PM |
MEM : ALU ratio | Nicolas Capens | 2014/01/11 12:24 AM |
MEM : ALU ratio | Gabriele Svelto | 2014/01/11 03:47 AM |
MEM : ALU ratio | Eric Bron | 2014/01/11 04:41 AM |
MEM : ALU ratio | Eric Bron | 2014/01/11 05:06 AM |
MEM : ALU ratio | David Kanter | 2014/01/11 08:28 PM |
MEM : ALU ratio | Eric Bron nli | 2014/01/12 02:54 AM |
MEM : ALU ratio | Gabriele Svelto | 2014/01/11 10:15 AM |
MEM : ALU ratio | Nicolas Capens | 2014/01/14 06:56 PM |
Etiquette in linking to papers | Paul A. Clayton | 2014/01/14 07:44 PM |
MEM : ALU ratio | anon | 2014/01/14 08:32 PM |
L0 power cost | Nicolas Capens | 2014/01/16 02:05 PM |
L0 power cost | anon | 2014/01/16 10:01 PM |
L0 power cost | Nicolas Capens | 2014/01/19 12:30 AM |
Links revealed | Paul A. Clayton | 2014/01/19 04:47 PM |
L0 power cost | anon | 2014/01/20 01:19 AM |
L0 power cost | Nicolas Capens | 2014/01/20 02:49 PM |
L0 power cost | anon | 2014/01/21 01:18 AM |
Q.E.D. | Nicolas Capens | 2014/01/21 08:44 PM |
Q.E.D. | anon | 2014/01/21 09:24 PM |
Straw man | Nicolas Capens | 2014/01/23 11:56 PM |
Straw man | anon | 2014/01/25 06:46 AM |
Still waiting for an explanation | Nicolas Capens | 2014/01/26 12:19 AM |
Still waiting for an explanation | Exophase | 2014/01/26 01:13 PM |
Still waiting for an explanation | bakaneko | 2014/01/26 11:52 PM |
Q.E.D. | Ricardo B | 2014/01/22 06:58 PM |
Q.E.D. | Michael S | 2014/01/23 04:59 AM |
L0 entry count | Nicolas Capens | 2014/01/24 01:11 AM |
L0 entry count | Eric Bron | 2014/01/24 02:08 AM |
L0 entry count | Michael S | 2014/01/24 06:18 AM |
L0 entry count | Eric Bron | 2014/01/24 07:15 AM |
L0 entry count | Michael S | 2014/01/24 08:10 AM |
L0 entry count | Eric Bron | 2014/01/24 08:20 AM |
L0 entry count | Nicolas Capens | 2014/01/24 02:33 PM |
L0 entry count | Eric Bron | 2014/01/24 03:20 PM |
L0 entry count and L1 read port orthogonality | Nicolas Capens | 2014/01/26 01:14 AM |
L0 entry count and L1 read port orthogonality | Eric Bron | 2014/01/26 03:49 AM |
L0 hit rate | Nicolas Capens | 2014/01/24 12:49 AM |
L0 hit rate | Ricardo B | 2014/01/24 06:42 AM |
L0 hit rate | Exophase | 2014/01/24 01:37 PM |
L0 hit rate | Eric Bron | 2014/01/24 02:12 PM |
L0 vs RF power | Nicolas Capens | 2014/01/24 02:43 PM |
MEM : ALU ratio | David Kanter | 2014/01/11 01:47 PM |
MEM : ALU ratio | Nicolas Capens | 2014/01/16 09:23 AM |
MEM : ALU ratio | Stubabe | 2014/01/17 12:58 PM |
MEM : ALU ratio | Stubabe | 2014/01/17 01:42 PM |
MEM : ALU ratio | Michael S | 2014/01/18 04:57 PM |
MEM : ALU ratio | bakaneko | 2014/01/19 12:47 AM |
MEM : ALU ratio | Nicolas Capens | 2014/01/20 03:48 PM |
It's called "tunnel vision" (NT) | iz | 2014/01/20 04:36 PM |
MEM : ALU ratio | Michael S | 2014/01/20 04:37 PM |
MEM : ALU ratio | Stubabe | 2014/01/21 04:54 PM |
MEM : ALU ratio | Nicolas Capens | 2014/01/21 10:07 PM |
MEM : ALU ratio | Michael S | 2014/01/22 08:17 AM |
MEM : ALU ratio | Nicolas Capens | 2014/01/24 03:33 PM |
MEM : ALU ratio | Stubabe | 2014/01/21 04:32 PM |
MEM : ALU ratio | Michael S | 2014/01/22 08:56 AM |
MEM : ALU ratio | Stubabe | 2014/01/23 09:06 AM |
MEM : ALU ratio | Eric Bron | 2014/01/23 09:45 AM |
edit | Eric Bron | 2014/01/23 09:49 AM |
MEM : ALU ratio | Michael S | 2014/01/23 09:58 AM |
MEM : ALU ratio | Eric Bron | 2014/01/23 10:29 AM |
MEM : ALU ratio | Michael S | 2014/01/23 10:33 AM |
MEM : ALU ratio | Stubabe | 2014/01/24 04:50 AM |
MEM : ALU ratio | bakaneko | 2014/01/23 10:36 AM |
MEM : ALU ratio | NoSpammer | 2014/01/11 03:39 PM |
L1 vs L0 access cost | Nicolas Capens | 2014/01/16 03:17 PM |
L1 vs L0 access cost | NoSpammer | 2014/01/19 01:48 PM |
L1 vs L0 access cost | dmcq | 2014/01/22 05:45 AM |
L1 vs L0 access cost | Gabriele Svelto | 2014/01/22 07:29 AM |
L1 vs L0 access cost | dmcq | 2014/01/22 01:33 PM |
L1 vs L0 access cost | Gabriele Svelto | 2014/01/22 04:33 PM |
L1 vs L0 access cost | dmcq | 2014/01/24 04:19 AM |
L1 vs L0 access cost | Nicolas Capens | 2014/01/24 02:16 AM |
Occam's razor | Patrick Chase | 2014/01/13 11:19 AM |
Occam's razor | Nicolas Capens | 2014/01/09 12:40 AM |
Occam's razor | Gabriele Svelto | 2014/01/09 02:41 AM |
Occam's razor | Eric Bron | 2014/01/09 02:54 AM |
Occam's razor | Gabriele Svelto | 2014/01/09 06:35 AM |
Occam's razor | Eric Bron | 2014/01/09 07:14 AM |
avoiding redundant loads | Eric Bron | 2014/01/09 07:18 AM |
AVX2 version | Eric Bron | 2014/01/09 07:32 AM |
Occam's razor | Amiba Gelos | 2014/01/09 03:01 AM |
Occam's razor | Eric Bron | 2014/01/09 03:06 AM |
Occam's razor | Amiba Gelos | 2014/01/09 03:43 AM |
Occam's razor | Eric Bron | 2014/01/09 04:02 AM |
L0 access latency | Nicolas Capens | 2014/01/09 04:27 AM |
L0 access latency | Amiba Gelos | 2014/01/09 05:16 AM |
compared to L0$ i would say banking is far more likely (NT) | Amiba Gelos | 2014/01/09 05:20 AM |
L0 access latency | Nicolas Capens | 2014/01/10 03:20 PM |
Occam's razor | Nicolas Capens | 2014/01/09 04:19 AM |
Occam's razor | NoSpammer | 2014/01/09 12:55 PM |
Occam's razor | Nicolas Capens | 2014/01/10 03:40 PM |
Occam's razor | Michael S | 2014/01/11 10:21 AM |
Occam's razor | Michael S | 2014/01/12 03:21 PM |
KNC compiler output | Nicolas Capens | 2014/01/16 06:39 PM |
KNC compiler output | Michael S | 2014/01/18 05:13 PM |
L0 cache coherency | David Kanter | 2014/01/11 08:39 PM |
Occam's razor | anon | 2014/01/09 05:12 AM |
Knights Landing L/S bandwidth | Eric Bron | 2014/01/08 10:46 AM |
Knights Landing L/S bandwidth | Michael S | 2014/01/08 11:23 AM |
Knights Landing L/S bandwidth | Nicolas Capens | 2014/01/08 02:02 PM |
Knights Landing L/S bandwidth | Michael S | 2014/01/08 02:29 PM |
Knights Landing L/S bandwidth | Eric Bron | 2014/01/08 02:54 PM |
Knights Landing L/S bandwidth | Michael S | 2014/01/08 03:00 PM |
Knights Landing L/S bandwidth | Eric Bron | 2014/01/08 03:13 PM |
Knights Landing L/S bandwidth | Michael S | 2014/01/08 03:28 PM |
Knights Landing L/S bandwidth | Eric Bron | 2014/01/08 03:32 PM |
Knights Landing L/S bandwidth | Michael S | 2014/01/08 03:40 PM |
Knights Landing L/S bandwidth | Eric Bron | 2014/01/08 03:51 PM |
Knights Landing L/S bandwidth | Michael S | 2014/01/09 12:18 PM |
Knights Landing L/S bandwidth | Patrick Chase | 2014/01/12 10:03 PM |
Also page/line splits? | David Kanter | 2014/01/12 10:50 PM |
Also page/line splits? | anon | 2014/01/13 01:44 AM |
Also page/line splits? | none | 2014/01/13 03:09 AM |
Also page/line splits? | anon | 2014/01/13 04:19 AM |
Knights Landing L/S bandwidth | Exophase | 2014/01/13 12:15 AM |
Knights Landing L/S bandwidth | anon | 2014/01/13 01:41 AM |
Knights Landing L/S bandwidth | Patrick Chase | 2014/01/13 11:14 AM |
Aliased writes | Nicolas Capens | 2014/01/14 09:46 PM |
Knights Landing L/S bandwidth | Ricardo B | 2014/01/07 04:27 PM |
Knights Landing L/S bandwidth | Nicolas Capens | 2014/01/07 10:28 PM |
Knights Landing L/S bandwidth | Ricardo B | 2014/01/08 02:13 AM |
Knights Landing L/S bandwidth | Eric Bron | 2014/01/08 11:10 AM |
Knights Landing L/S bandwidth | Nicolas Capens | 2014/01/08 03:31 PM |
Knights Landing L/S bandwidth | Ricardo B | 2014/01/08 03:58 PM |
Knights Landing L/S bandwidth | G. Gouvine | 2014/01/09 09:10 AM |
Knights Landing L/S bandwidth | Ricardo B | 2014/01/09 11:19 AM |
Efficient load queue vs. efficient L0 cache | Nicolas Capens | 2014/01/11 12:28 PM |
Efficient load queue vs. efficient L0 cache | G. Gouvine | 2014/01/13 02:11 AM |
Efficient load queue vs. efficient L0 cache | Michael S | 2014/01/13 03:43 AM |
Register file read port requirements | Nicolas Capens | 2014/01/11 12:55 AM |
Register file read port requirements | Ricardo B | 2014/01/11 05:24 AM |
Register file read port requirements | Eric Bron | 2014/01/11 05:32 AM |
Register file read port requirements | Michael S | 2014/01/11 09:57 AM |
Register file read port requirements | Eric Bron | 2014/01/11 11:16 AM |
Register file read port requirements | Michael S | 2014/01/11 11:46 AM |
Register file read port requirements | Eric Bron | 2014/01/11 12:12 PM |
Register file read port requirements | Michael S | 2014/01/11 12:36 PM |
Register file read port requirements | Eric Bron | 2014/01/11 12:51 PM |
Register file read port requirements | Patrick Chase | 2014/01/13 02:27 PM |
Register file read port requirements | Eric Bron | 2014/01/13 04:24 PM |
Register file read port requirements | Patrick Chase | 2014/01/13 06:02 PM |
Register file read port requirements | Eric Bron | 2014/01/14 04:50 AM |
Register file read port requirements | Michael S | 2014/01/14 11:36 AM |
Register file read port requirements | Eric Bron nli | 2014/01/14 01:04 PM |
Register file read port requirements | Patrick Chase | 2014/01/13 02:17 PM |
Register file read port requirements | Michael S | 2014/01/15 04:27 AM |
Register file read port requirements | Eric Bron | 2014/01/11 11:28 AM |
Register file read port requirements | Michael S | 2014/01/11 12:07 PM |
Register file read port requirements | Patrick Chase | 2014/01/13 02:40 PM |
Register file read port requirements | Patrick Chase | 2014/01/13 02:34 PM |
Register file read port requirements | Ricardo B | 2014/01/11 12:55 PM |
Register file read port requirements | Eric Bron | 2014/01/11 01:17 PM |
Register file read port requirements | Ricardo B | 2014/01/11 02:36 PM |
Register file read port requirements | Eric Bron | 2014/01/11 02:42 PM |
Register file read port requirements | Ricardo B | 2014/01/11 03:20 PM |
Register file read port requirements | Eric Bron | 2014/01/11 03:26 PM |
Register file read port requirements | Michael S | 2014/01/11 04:07 PM |
Register file read port requirements | Ricardo B | 2014/01/11 04:38 PM |
Register file read port requirements | Michael S | 2014/01/11 04:49 PM |
Register file read port requirements | Eric Bron | 2014/01/11 03:39 PM |
Register file read port requirements | Eric Bron | 2014/01/11 03:41 PM |
Register file read port requirements | Ricardo B | 2014/01/11 04:30 PM |
Register file read port requirements | Nicolas Capens | 2014/01/11 12:09 PM |
Knights Landing L/S bandwidth | anon | 2014/01/05 06:55 AM |
Knights Landing L/S bandwidth | Eric Bron | 2014/01/05 07:30 AM |
Knights Landing L/S bandwidth | anon | 2014/01/06 01:07 AM |
Knights Landing L/S bandwidth | Eric Bron | 2014/01/06 02:38 AM |
Knights Landing L/S bandwidth | anon | 2014/01/06 04:01 AM |
Knights Landing L/S bandwidth | Eric Bron | 2014/01/06 04:44 AM |
Knights Landing L/S bandwidth | anon | 2014/01/06 05:39 AM |
Knights Landing L/S bandwidth | Eric Bron | 2014/01/06 06:00 AM |
Knights Landing L/S bandwidth | anon | 2014/01/06 06:44 AM |
Knights Landing L/S bandwidth | Michael S | 2014/01/06 08:54 AM |
Knights Landing L/S bandwidth | Eric Bron | 2014/01/06 10:11 AM |
Knights Landing L/S bandwidth | Michael S | 2014/01/06 10:14 AM |
Knights Landing L/S bandwidth | Eric Bron | 2014/01/06 11:37 AM |
Knights Landing L/S bandwidth | Ricardo B | 2014/01/08 06:25 AM |
Knights Landing L/S bandwidth | Eric Bron | 2014/01/08 08:36 AM |
Knights Landing L/S bandwidth | Eric Bron | 2014/01/08 08:41 AM |
KNC code generator with EVEX back-end? | Michael S | 2014/01/08 09:43 AM |
KNC code generator with EVEX back-end? | Exophase | 2014/01/08 10:00 AM |
KNC code generator with EVEX back-end? | Ricardo B | 2014/01/08 11:39 AM |
KNC code generator with EVEX back-end? | Eric Bron | 2014/01/08 12:15 PM |
KNC code generator with EVEX back-end? | Exophase | 2014/01/08 01:17 PM |
KNC code generator with EVEX back-end? | Ricardo B | 2014/01/08 02:06 PM |
KNC code generator with EVEX back-end? | Exophase | 2014/01/08 02:24 PM |
KNC code generator with EVEX back-end? | Eric Bron | 2014/01/08 02:38 PM |
KNC code generator with EVEX back-end? | Michael S | 2014/01/08 01:54 PM |
KNC code generator with EVEX back-end? | Eric Bron | 2014/01/08 10:25 AM |
KNC code generator with EVEX back-end? | Eric Bron | 2014/01/08 10:35 AM |
KNC code generator with EVEX back-end? | Michael S | 2014/01/08 11:07 AM |
KNC code generator with EVEX back-end? | Eric Bron | 2014/01/08 11:24 AM |
KNC code generator with EVEX back-end? | Michael S | 2014/01/08 11:43 AM |
KNC code generator with EVEX back-end? | Eric Bron | 2014/01/08 01:23 PM |
KNC code generator with EVEX back-end? | Eric Bron | 2014/01/08 10:43 AM |
AVX2 code much different than AVX-512 | Eric Bron | 2014/01/08 08:52 AM |
evil question | hobold | 2014/01/08 10:22 AM |
evil question | Eric Bron | 2014/01/08 10:27 AM |
evil question | hobold | 2014/01/08 02:33 PM |
evil question | Michael S | 2014/01/08 02:37 PM |
stupid question (was: evil question) | hobold | 2014/01/09 05:41 AM |
stupid question (was: evil question) | Eric Bron | 2014/01/09 05:52 AM |
stupid question (was: evil question) | Michael S | 2014/01/09 08:00 AM |
stupid question (was: evil question) | Michael S | 2014/01/09 08:12 AM |
stupid question (was: evil question) | Eric Bron | 2014/01/09 10:47 AM |
stupid question (was: evil question) | Michael S | 2014/01/09 11:48 AM |
more decisive (hopefully) test case | Michael S | 2014/01/09 12:01 PM |
more decisive (hopefully) test case | Eric Bron | 2014/01/09 12:08 PM |
more decisive (hopefully) test case | Michael S | 2014/01/09 12:24 PM |
more decisive (hopefully) test case | Eric Bron | 2014/01/09 12:27 PM |
more decisive (hopefully) test case | Michael S | 2014/01/09 12:33 PM |
AVX2 | Eric Bron | 2014/01/09 12:14 PM |
AVX2 | Michael S | 2014/01/09 12:30 PM |
AVX2 | Eric Bron | 2014/01/09 12:40 PM |
another try | Michael S | 2014/01/09 03:02 PM |
another try | Eric Bron | 2014/01/09 03:33 PM |
another try | Michael S | 2014/01/09 04:20 PM |
another try - ignore misformated mess above | Michael S | 2014/01/09 04:24 PM |
another try - ignore misformated mess above | Gabriele Svelto | 2014/01/10 01:01 AM |
another try - ignore misformated mess above | Eric Bron | 2014/01/10 03:05 AM |
another try - ignore misformated mess above | Michael S | 2014/01/11 10:23 AM |
another try - ignore misformated mess above | Eric Bron | 2014/01/11 11:08 AM |
another try - ignore misformated mess above | Michael S | 2014/01/11 12:09 PM |
another try - ignore misformated mess above | Michael S | 2014/01/11 12:12 PM |
another try - ignore misformated mess above | Eric Bron | 2014/01/11 12:24 PM |
another try - ignore misformated mess above | Michael S | 2014/01/11 01:24 PM |
another try - ignore misformated mess above | Eric Bron | 2014/01/11 02:11 PM |
another try - ignore misformated mess above | Michael S | 2014/01/11 02:18 PM |
another try - ignore misformated mess above | Eric Bron | 2014/01/11 02:27 PM |
another try - ignore misformated mess above | Michael S | 2014/01/11 02:29 PM |
another try - ignore misformated mess above | Eric Bron | 2014/01/11 02:46 PM |
another try - ignore misformated mess above | Eric Bron | 2014/01/11 02:46 PM |
another try - ignore misformated mess above | Michael S | 2014/01/11 03:28 PM |
another try - ignore misformated mess above | Eric Bron | 2014/01/11 02:17 PM |
another try - ignore misformated mess above | Michael S | 2014/01/11 02:24 PM |
KNC version | Michael S | 2014/01/11 05:19 PM |
KNC version | Eric Bron nli | 2014/01/12 02:59 AM |
KNC version | Gabriele Svelto | 2014/01/12 09:06 AM |
evil question | Eric Bron | 2014/01/08 02:41 PM |
Knights Landing L/S bandwidth | Patrick Chase | 2014/01/05 11:20 PM |
Knights Landing L/S bandwidth | Eric Bron | 2014/01/06 02:45 AM |
Knights Landing L/S bandwidth | anon | 2014/01/06 04:12 AM |
Knights Landing L/S bandwidth | Michael S | 2014/01/06 04:17 AM |
Knights Landing L/S bandwidth | anon | 2014/01/06 05:20 AM |
Knights Landing L/S bandwidth | Nicolas Capens | 2014/01/04 05:34 PM |
Knights Landing L/S bandwidth | Eric Bron | 2014/01/04 05:44 PM |
Knights Landing L/S bandwidth | Nicolas Capens | 2014/01/05 12:25 PM |
Knights Landing L/S bandwidth | Eric Bron | 2014/01/05 01:50 PM |
Knights Landing L/S bandwidth | Nicolas Capens | 2014/01/05 03:34 PM |
Might even help with gather | Nicolas Capens | 2014/01/05 03:40 PM |
What is an L0 cache? | David Kanter | 2014/01/05 10:44 PM |
What is an L0 cache? | anon | 2014/01/06 05:57 AM |
What is an L0 cache? | Nicolas Capens | 2014/01/06 12:57 PM |
What is an L0 cache? | anon | 2014/01/06 02:18 PM |
Knights Landing L/S bandwidth | David Kanter | 2014/01/04 10:58 AM |
Knights Landing L/S bandwidth | Nicolas Capens | 2014/01/04 04:24 PM |
Knights Landing L/S bandwidth | Eric Bron | 2014/01/04 04:46 PM |
Knights Landing L/S bandwidth | Konrad Schwarz | 2014/01/08 12:48 AM |
Knights Landing L/S bandwidth | Michael S | 2014/01/08 02:45 AM |
Knights Landing L/S bandwidth | David Kanter | 2014/01/05 01:44 AM |
Knights Landing L/S bandwidth | Eric Bron | 2014/01/05 03:55 AM |
Knights Landing L/S bandwidth | Nicolas Capens | 2014/01/05 12:18 PM |
Knights Landing L/S bandwidth | Maynard Handley | 2014/01/05 11:33 PM |
Knights Landing L/S bandwidth | Eric Bron | 2014/01/06 04:02 AM |
Knights Landing L/S bandwidth | Michael S | 2014/01/06 04:23 AM |
Knights Landing L/S bandwidth | Eric Bron | 2014/01/06 04:35 AM |
Knights Landing L/S bandwidth | Michael S | 2014/01/06 05:20 AM |
Knights Landing L/S bandwidth | Michael S | 2014/01/06 05:32 AM |
Knights Landing L/S bandwidth | Eric Bron | 2014/01/06 05:36 AM |
Knights Landing L/S bandwidth | Michael S | 2014/01/06 06:00 AM |
Knights Landing L/S bandwidth | Eric Bron | 2014/01/06 06:07 AM |
Knights Landing L/S bandwidth | Eric Bron | 2014/01/06 06:14 AM |
edits | Eric Bron | 2014/01/06 06:22 AM |
optimized version | Eric Bron | 2014/01/06 06:35 AM |
yet more optimized version | Eric Bron | 2014/01/06 06:42 AM |
latest version for today | Eric Bron | 2014/01/06 06:51 AM |
Probably just L2 bandwith limited | Nicolas Capens | 2014/01/06 11:48 AM |
yet more optimized version | Maynard Handley | 2014/01/06 06:54 PM |
optimized version | Maynard Handley | 2014/01/06 06:52 PM |
optimized version | Michael S | 2014/01/07 10:42 AM |
optimized version | Nicolas Capens | 2014/01/07 12:36 PM |
optimized version | Michael S | 2014/01/07 03:41 PM |
optimized version | Nicolas Capens | 2014/01/07 10:52 PM |
optimized version | Michael S | 2014/01/08 02:10 AM |
optimized version | Eric Bron | 2014/01/07 02:34 PM |
optimized version | Michael S | 2014/01/07 03:18 PM |
optimized version | Eric Bron | 2014/01/07 03:30 PM |
optimized version | Eric Bron | 2014/01/07 03:33 PM |
optimized version | Michael S | 2014/01/07 03:57 PM |
optimized version | Maynard Handley | 2014/01/07 06:50 PM |
optimized version | Michael S | 2014/01/08 02:39 AM |
Knights Landing L/S bandwidth | Maynard Handley | 2014/01/06 06:47 PM |
Knights Landing L/S bandwidth | Nicolas Capens | 2014/01/06 09:18 AM |
Knights Landing L/S bandwidth | Maynard Handley | 2014/01/06 06:56 PM |
Knights Landing L/S bandwidth | Nicolas Capens | 2014/01/07 12:18 PM |
Knights Landing L/S bandwidth | NoSpammer | 2014/01/05 01:15 PM |
Knights Landing L/S bandwidth | Nicolas Capens | 2014/01/05 03:06 PM |
Knights Landing L/S bandwidth | NoSpammer | 2014/01/06 04:20 AM |
Knights Landing L/S bandwidth | Nicolas Capens | 2014/01/06 11:54 AM |
Knights Landing L/S bandwidth | NoSpammer | 2014/01/06 01:24 PM |
Knights Landing L/S bandwidth | Nicolas Capens | 2014/01/06 09:15 PM |
Knights Landing L/S bandwidth | NoSpammer | 2014/01/07 03:58 AM |
Knights Landing L/S bandwidth | Nicolas Capens | 2014/01/07 03:18 PM |
Knights Landing L/S bandwidth | NoSpammer | 2014/01/08 01:38 PM |
Knights Landing L/S bandwidth | Nicolas Capens | 2014/01/08 11:14 PM |
AVX512F question | Michael S | 2014/01/06 10:18 AM |
AVX512F question | Nicolas Capens | 2014/01/06 12:01 PM |
Knights Landing - time for obituary? | Michael S | 2018/07/31 03:00 PM |
Knights Landing - time for obituary? | Adrian | 2018/07/31 09:24 PM |
Knights Landing - time for obituary? | SoftwareEngineer | 2018/08/01 02:15 AM |
auto-vectorization is a dead end | Michael S | 2018/08/01 03:48 AM |
Auto-vectorization of random C is a dead end | Mark Roulo | 2018/08/01 11:07 AM |
Auto-vectorization of random C is a dead end | Passing Through | 2018/08/01 01:35 PM |
Auto-vectorization of random C is a dead end | David Kanter | 2018/08/01 10:44 PM |
Auto-vectorization of random C is a dead end | Passing Through | 2018/08/02 01:51 AM |
Auto-vectorization of random C is a dead end | SoftwareEngineer | 2018/08/02 01:19 AM |
Auto-vectorization of random C is a dead end | Mark Roulo | 2018/08/02 09:50 AM |
Auto-vectorization of random C is a dead end | Michael S | 2018/08/02 12:11 PM |
Auto-vectorization of random C is a dead end | j | 2018/08/02 11:37 PM |
Auto-vectorization of random C is a dead end | Michael S | 2018/08/03 03:50 AM |
Auto-vectorization of random C is a dead end | rwessel | 2018/08/03 11:06 PM |
Auto-vectorization of random C is a dead end | Ricardo B | 2018/08/03 04:20 AM |
Auto-vectorization of random C is a dead end | Michael S | 2018/08/03 05:37 AM |
Auto-vectorization of random C is a dead end | Ricardo B | 2018/08/03 11:22 AM |
Auto-vectorization of random C is a dead end | Travis | 2018/08/03 07:58 PM |
Potential way to autovectorization in the future. | Jouni Osmala | 2018/08/03 10:22 PM |
Potential way to autovectorization in the future. | Jukka Larja | 2018/08/04 04:03 AM |
Potential way to autovectorization in the future. | Passing Through | 2018/08/04 06:47 AM |
Potential way to autovectorization in the future. | Travis | 2018/08/04 01:50 PM |
Potential way to autovectorization in the future. | Michael S | 2018/08/04 02:33 PM |
Potential way to autovectorization in the future. | Travis | 2018/08/04 02:48 PM |
Potential way to autovectorization in the future. | Passing Through | 2018/08/04 02:58 PM |
Skylake server/client AVX PRF speculation | Jeff S. | 2018/08/04 05:42 PM |
Skylake server/client AVX PRF speculation | anonymou5 | 2018/08/04 06:21 PM |
Skylake server/client AVX PRF speculation | Jeff S. | 2018/08/04 06:38 PM |
Skylake server/client AVX PRF speculation | anonymou5 | 2018/08/04 07:45 PM |
Skylake server/client AVX PRF speculation | Jeff S. | 2018/08/04 08:08 PM |
Skylake server/client AVX PRF speculation | anonymou5 | 2018/08/04 08:18 PM |
Skylake server/client AVX PRF speculation | Nomad | 2018/08/05 11:10 PM |
Skylake server/client AVX PRF speculation | anonymou5 | 2018/08/06 12:14 PM |
Skylake server/client AVX PRF speculation | Travis | 2018/08/06 08:43 PM |
Skylake server/client AVX PRF speculation | Travis | 2018/08/06 08:39 PM |
Auto-vectorization of random C is a dead end | Brett | 2018/08/04 01:55 PM |
Auto-vectorization of random C is a dead end | Travis | 2018/08/04 02:38 PM |
Auto-vectorization of random C is a dead end | Passing Through | 2018/08/04 03:00 PM |
New record for shortest post by Ireland - AI crashed? (NT) | Travis | 2018/08/04 03:34 PM |
New record for shortest post by Ireland - AI crashed? | Passing Through | 2018/08/04 04:12 PM |
New record for shortest post by Ireland - AI crashed? | anonymou5 | 2018/08/04 06:00 PM |
New record for shortest post by Ireland - AI crashed? | Brett | 2018/08/04 06:40 PM |
New record for shortest post by Ireland - AI crashed? | anonymou5 | 2018/08/04 07:38 PM |
Auto-vectorization of random C is a dead end | noko | 2018/08/04 09:46 PM |
The story of ispc (a 12 entry blog series) | Simon Farnsworth | 2018/08/01 03:50 AM |
the 1st link is empty (NT) | Michael S | 2018/08/01 04:05 AM |
the 1st link is empty | Simon Farnsworth | 2018/08/01 06:42 AM |
Interesting read, thanks! (NT) | SoftwareEngineer | 2018/08/01 06:57 AM |
Amazing read | Laurent | 2018/08/01 09:00 AM |
Amazing read | Passing Through | 2018/08/01 01:13 PM |
Amazing read | Doug S | 2018/08/01 02:30 PM |
Amazing read | Passing Through | 2018/08/01 02:49 PM |
ISPC vs OpenCL? | j | 2018/08/02 11:41 PM |
ISPC vs OpenCL? | coppcie | 2018/08/03 03:55 AM |
ISPC vs OpenCL? | Passing Through | 2018/08/03 04:07 AM |
Go away | Forum Reader | 2018/08/03 08:11 AM |
ISPC vs OpenCL? | Gian-Carlo Pascutto | 2018/09/11 06:50 AM |
ISPC vs OpenCL? | SoftwareEngineer | 2018/08/03 04:18 AM |
Knights Landing - time for obituary? | Kevin G | 2018/08/01 07:14 AM |
Knights Landing - time for obituary? | SoftwareEngineer | 2018/08/01 07:29 AM |
Knights Landing - time for obituary? | Passing Through | 2018/08/01 07:38 AM |
Knights Landing - time for obituary? | Eric Bron | 2018/08/02 06:57 AM |
Knights Landing - time for obituary? | Passing Through | 2018/08/02 12:29 PM |
Knights Landing - time for obituary? | Eric Bron | 2018/08/02 01:49 PM |
Knights Landing - time for obituary? | Passing Through | 2018/08/02 02:17 PM |
chess algorithms vs, low level optimizations | Eric Bron | 2018/08/02 07:15 AM |
AlphaZero vs Stockfish | Michael S | 2018/08/02 07:55 AM |
AlphaZero vs Stockfish | Eric Bron | 2018/08/02 08:24 AM |
AlphaZero vs Stockfish | Michael S | 2018/08/02 09:01 AM |
AlphaZero vs Stockfish | Eric Bron | 2018/08/02 09:11 AM |
Leela 4th vs all others | Eric Bron nli | 2018/09/11 03:40 AM |
AlphaZero vs Stockfish | Gian-Carlo Pascutto | 2018/09/11 06:31 AM |
AlphaZero vs Stockfish | Eric Bron | 2018/09/11 09:26 AM |
AlphaZero vs Stockfish | Eric Bron | 2018/09/11 09:58 AM |
AlphaZero vs Stockfish | Per Hesselgren | 2018/12/31 10:04 AM |
Leela Chess Zero | Per Hesselgren | 2018/12/31 12:00 PM |
AlphaZero vs Stockfish (on Xeon) | Per Hesselgren | 2018/12/31 09:59 AM |
C/C++ and vector/parallel/distributed | RichardC | 2018/08/02 05:50 AM |
Knights Landing - time for obituary? | Passing Through | 2018/08/01 07:52 AM |
Knights Landing - time for obituary? | Kevin G | 2018/08/01 02:03 PM |
Knights Landing - time for obituary? | Passing Through | 2018/08/01 02:33 PM |
Knights Landing - time for obituary? | Kevin G | 2018/08/01 08:26 AM |
Knights Landing - time for obituary? | Kevin G | 2018/08/01 08:26 AM |
Knights Landing - time for obituary? | juanrga | 2018/08/01 02:26 PM |
Knights Landing - time for obituary? | hobel | 2018/08/02 05:46 AM |
Knights Landing - time for obituary? | juanrga | 2018/07/31 11:25 PM |
Right, time for obituary for whole LRB lineage | AM | 2018/08/02 11:46 AM |
Right, time for obituary for whole LRB lineage | Adrian | 2018/08/02 11:46 PM |
LRBNI, AVX512, etc... | Michael S | 2018/08/03 05:23 AM |
Right, time for obituary for whole LRB lineage | juanrga | 2018/08/03 04:11 AM |