│ 82d: vmovdqu (%r8,%r12,1),%xmm3
0.98 │ vmovdqu (%r8,%rcx,1),%xmm5
2.15 │ vmulps %xmm5,%xmm3,%xmm3
23.66 │ vaddps %xmm3,%xmm0,%xmm0
20.84 │ mov -0xc0(%rbp),%r14
9.07 │ add $0xffffffff,%r14d
│ ↑ jne 7c0
│ vmovshdup %xmm0,%xmm3
2.46 │ vmovaps %xmm0,%xmm5
│ vaddss %xmm5,%xmm3,%xmm3
0.37 │ vpshufd $0x2,%xmm0,%xmm5
│ vaddss %xmm5,%xmm3,%xmm3
0.35 │ vshufps $0x3,%xmm0,%xmm0,%xmm0
│ vaddss %xmm0,%xmm3,%xmm0