changeset 9519:c82806023d05 draft

asm: avx assembly code for 10bpp satd[12x32] - 24x [i422] satd[12x32] 24.20x 3364.92 81445.69
author Sumalatha Polureddy<sumalatha@multicorewareinc.com>
date Mon, 16 Feb 2015 16:55:54 +0530
parents 4030e4d97e99
children 3481b16114d7
files source/common/x86/asm-primitives.cpp source/common/x86/pixel-a.asm
diffstat 2 files changed, 104 insertions(+-), 1 deletions(-) [+]
line wrap: on
line diff
--- a/source/common/x86/asm-primitives.cpp	Mon Feb 16 16:25:54 2015 +0530
+++ b/source/common/x86/asm-primitives.cpp	Mon Feb 16 16:55:54 2015 +0530
@@ -950,6 +950,7 @@ void setupAssemblyPrimitives(EncoderPrim
         p.chroma[X265_CSP_I422].pu[CHROMA_422_24x64].satd = x265_pixel_satd_24x64_avx;
         p.chroma[X265_CSP_I422].pu[CHROMA_422_8x64].satd = x265_pixel_satd_8x64_avx;
         p.chroma[X265_CSP_I422].pu[CHROMA_422_8x12].satd = x265_pixel_satd_8x12_avx;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_12x32].satd = x265_pixel_satd_12x32_avx;
 
         ALL_LUMA_PU(satd, pixel_satd, avx);
         ASSIGN_SA8D(avx);
--- a/source/common/x86/pixel-a.asm	Mon Feb 16 16:25:54 2015 +0530
+++ b/source/common/x86/pixel-a.asm	Mon Feb 16 16:55:54 2015 +0530
@@ -1521,6 +1521,106 @@ cglobal pixel_satd_8x12, 4,7,8,0-gprsize
     movd   eax, m6
     RET
 %endif
+
+%if HIGH_BIT_DEPTH
+%if WIN64
+cglobal pixel_satd_12x32, 4,8,8   ;if WIN64 && cpuflag(avx)
+    SATD_START_MMX
+    mov r6, r0
+    mov r7, r2
+    pxor m7, m7
+    SATD_4x8_SSE vertical, 0, 4, 5
+    lea r0, [r0 + r1*2*SIZEOF_PIXEL]
+    lea r2, [r2 + r3*2*SIZEOF_PIXEL]
+    SATD_4x8_SSE vertical, 1, 4, 5
+    lea r0, [r0 + r1*2*SIZEOF_PIXEL]
+    lea r2, [r2 + r3*2*SIZEOF_PIXEL]
+    SATD_4x8_SSE vertical, 1, 4, 5
+    lea r0, [r0 + r1*2*SIZEOF_PIXEL]
+    lea r2, [r2 + r3*2*SIZEOF_PIXEL]
+    SATD_4x8_SSE vertical, 1, 4, 5
+    lea r0, [r6 + 4*SIZEOF_PIXEL]
+    lea r2, [r7 + 4*SIZEOF_PIXEL]
+    SATD_4x8_SSE vertical, 1, 4, 5
+    lea r0, [r0 + r1*2*SIZEOF_PIXEL]
+    lea r2, [r2 + r3*2*SIZEOF_PIXEL]
+    SATD_4x8_SSE vertical, 1, 4, 5
+    lea r0, [r0 + r1*2*SIZEOF_PIXEL]
+    lea r2, [r2 + r3*2*SIZEOF_PIXEL]
+    SATD_4x8_SSE vertical, 1, 4, 5
+    lea r0, [r0 + r1*2*SIZEOF_PIXEL]
+    lea r2, [r2 + r3*2*SIZEOF_PIXEL]
+    SATD_4x8_SSE vertical, 1, 4, 5
+    lea r0, [r6 + 8*SIZEOF_PIXEL]
+    lea r2, [r7 + 8*SIZEOF_PIXEL]
+    SATD_4x8_SSE vertical, 1, 4, 5
+    lea r0, [r0 + r1*2*SIZEOF_PIXEL]
+    lea r2, [r2 + r3*2*SIZEOF_PIXEL]
+    SATD_4x8_SSE vertical, 1, 4, 5
+    lea r0, [r0 + r1*2*SIZEOF_PIXEL]
+    lea r2, [r2 + r3*2*SIZEOF_PIXEL]
+    SATD_4x8_SSE vertical, 1, 4, 5
+    lea r0, [r0 + r1*2*SIZEOF_PIXEL]
+    lea r2, [r2 + r3*2*SIZEOF_PIXEL]
+    SATD_4x8_SSE vertical, 1, 4, 5
+    pxor    m1, m1
+    movhlps m1, m7
+    paddd   m7, m1
+    pshufd  m1, m7, 1
+    paddd   m7, m1
+    movd   eax, m7
+    RET
+%else
+cglobal pixel_satd_12x32, 4,7,8,0-gprsize
+    SATD_START_MMX
+    mov r6, r0
+    mov [rsp], r2
+    pxor m7, m7
+    SATD_4x8_SSE vertical, 0, 4, 5
+    lea r0, [r0 + r1*2*SIZEOF_PIXEL]
+    lea r2, [r2 + r3*2*SIZEOF_PIXEL]
+    SATD_4x8_SSE vertical, 1, 4, 5
+    lea r0, [r0 + r1*2*SIZEOF_PIXEL]
+    lea r2, [r2 + r3*2*SIZEOF_PIXEL]
+    SATD_4x8_SSE vertical, 1, 4, 5
+    lea r0, [r0 + r1*2*SIZEOF_PIXEL]
+    lea r2, [r2 + r3*2*SIZEOF_PIXEL]
+    SATD_4x8_SSE vertical, 1, 4, 5
+    lea r0, [r6 + 4*SIZEOF_PIXEL]
+    mov r2, [rsp]
+    add r2, 4*SIZEOF_PIXEL
+    SATD_4x8_SSE vertical, 1, 4, 5
+    lea r0, [r0 + r1*2*SIZEOF_PIXEL]
+    lea r2, [r2 + r3*2*SIZEOF_PIXEL]
+    SATD_4x8_SSE vertical, 1, 4, 5
+    lea r0, [r0 + r1*2*SIZEOF_PIXEL]
+    lea r2, [r2 + r3*2*SIZEOF_PIXEL]
+    SATD_4x8_SSE vertical, 1, 4, 5
+    lea r0, [r0 + r1*2*SIZEOF_PIXEL]
+    lea r2, [r2 + r3*2*SIZEOF_PIXEL]
+    SATD_4x8_SSE vertical, 1, 4, 5
+    lea r0, [r6 + 8*SIZEOF_PIXEL]
+    mov r2, [rsp]
+    add r2, 8*SIZEOF_PIXEL
+    SATD_4x8_SSE vertical, 1, 4, 5
+    lea r0, [r0 + r1*2*SIZEOF_PIXEL]
+    lea r2, [r2 + r3*2*SIZEOF_PIXEL]
+    SATD_4x8_SSE vertical, 1, 4, 5
+    lea r0, [r0 + r1*2*SIZEOF_PIXEL]
+    lea r2, [r2 + r3*2*SIZEOF_PIXEL]
+    SATD_4x8_SSE vertical, 1, 4, 5
+    lea r0, [r0 + r1*2*SIZEOF_PIXEL]
+    lea r2, [r2 + r3*2*SIZEOF_PIXEL]
+    SATD_4x8_SSE vertical, 1, 4, 5
+    pxor    m1, m1
+    movhlps m1, m7
+    paddd   m7, m1
+    pshufd  m1, m7, 1
+    paddd   m7, m1
+    movd   eax, m7
+    RET
+%endif
+%else ;HIGH_BIT_DEPTH
 %if WIN64
 cglobal pixel_satd_12x32, 4,8,8   ;if WIN64 && cpuflag(avx)
     SATD_START_MMX
@@ -1614,7 +1714,9 @@ cglobal pixel_satd_12x32, 4,7,8,0-gprsiz
     movd eax, m7
     RET
 %endif
- %if WIN64
+%endif
+
+%if WIN64
 cglobal pixel_satd_4x32, 4,8,8   ;if WIN64 && cpuflag(avx)
     SATD_START_MMX
     mov r6, r0