changeset 9524:ae80a972b770 draft

asm: avx assembly code for 10bpp satd[4x32] - 26x [i422] satd[ 4x32] 26.25x 1046.54 27472.36
author Sumalatha Polureddy<sumalatha@multicorewareinc.com>
date Tue, 17 Feb 2015 11:59:10 +0530
parents 74f975134412
children d2fdee36c259
files source/common/x86/asm-primitives.cpp source/common/x86/pixel-a.asm
diffstat 2 files changed, 51 insertions(+-), 0 deletions(-) [+]
line wrap: on
line diff
--- a/source/common/x86/asm-primitives.cpp	Tue Feb 17 11:48:16 2015 +0530
+++ b/source/common/x86/asm-primitives.cpp	Tue Feb 17 11:59:10 2015 +0530
@@ -951,6 +951,7 @@ void setupAssemblyPrimitives(EncoderPrim
         p.chroma[X265_CSP_I422].pu[CHROMA_422_8x64].satd = x265_pixel_satd_8x64_avx;
         p.chroma[X265_CSP_I422].pu[CHROMA_422_8x12].satd = x265_pixel_satd_8x12_avx;
         p.chroma[X265_CSP_I422].pu[CHROMA_422_12x32].satd = x265_pixel_satd_12x32_avx;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_4x32].satd = x265_pixel_satd_4x32_avx;
 
         ALL_LUMA_PU(satd, pixel_satd, avx);
         ASSIGN_SA8D(avx);
--- a/source/common/x86/pixel-a.asm	Tue Feb 17 11:48:16 2015 +0530
+++ b/source/common/x86/pixel-a.asm	Tue Feb 17 11:59:10 2015 +0530
@@ -1716,6 +1716,55 @@ cglobal pixel_satd_12x32, 4,7,8,0-gprsiz
 %endif
 %endif
 
+%if HIGH_BIT_DEPTH
+%if WIN64
+cglobal pixel_satd_4x32, 4,8,8   ;if WIN64 && cpuflag(avx)
+    SATD_START_MMX
+    mov r6, r0
+    mov r7, r2
+    pxor m7, m7
+    SATD_4x8_SSE vertical, 0, 4, 5
+    lea r0, [r0 + r1*2*SIZEOF_PIXEL]
+    lea r2, [r2 + r3*2*SIZEOF_PIXEL]
+    SATD_4x8_SSE vertical, 1, 4, 5
+    lea r0, [r0 + r1*2*SIZEOF_PIXEL]
+    lea r2, [r2 + r3*2*SIZEOF_PIXEL]
+    SATD_4x8_SSE vertical, 1, 4, 5
+    lea r0, [r0 + r1*2*SIZEOF_PIXEL]
+    lea r2, [r2 + r3*2*SIZEOF_PIXEL]
+    SATD_4x8_SSE vertical, 1, 4, 5
+    pxor    m1, m1
+    movhlps m1, m7
+    paddd   m7, m1
+    pshufd  m1, m7, 1
+    paddd   m7, m1
+    movd   eax, m7
+    RET
+%else
+cglobal pixel_satd_4x32, 4,7,8,0-gprsize
+    SATD_START_MMX
+    mov r6, r0
+    mov [rsp], r2
+    pxor m7, m7
+    SATD_4x8_SSE vertical, 0, 4, 5
+    lea r0, [r0 + r1*2*SIZEOF_PIXEL]
+    lea r2, [r2 + r3*2*SIZEOF_PIXEL]
+    SATD_4x8_SSE vertical, 1, 4, 5
+    lea r0, [r0 + r1*2*SIZEOF_PIXEL]
+    lea r2, [r2 + r3*2*SIZEOF_PIXEL]
+    SATD_4x8_SSE vertical, 1, 4, 5
+    lea r0, [r0 + r1*2*SIZEOF_PIXEL]
+    lea r2, [r2 + r3*2*SIZEOF_PIXEL]
+    SATD_4x8_SSE vertical, 1, 4, 5
+    pxor    m1, m1
+    movhlps m1, m7
+    paddd   m7, m1
+    pshufd  m1, m7, 1
+    paddd   m7, m1
+    movd   eax, m7
+    RET
+%endif
+%else
 %if WIN64
 cglobal pixel_satd_4x32, 4,8,8   ;if WIN64 && cpuflag(avx)
     SATD_START_MMX
@@ -1759,6 +1808,7 @@ cglobal pixel_satd_4x32, 4,7,8,0-gprsize
     movd eax, m7
     RET
 %endif
+%endif
 
 %if WIN64
 cglobal pixel_satd_32x8, 4,8,14    ;if WIN64 && cpuflag(avx)