Mercurial > x265
changeset 9524:ae80a972b770 draft
asm: avx assembly code for 10bpp satd[4x32] - 26x
[i422] satd[ 4x32] 26.25x 1046.54 27472.36
author | Sumalatha Polureddy<sumalatha@multicorewareinc.com> |
---|---|
date | Tue, 17 Feb 2015 11:59:10 +0530 |
parents | 74f975134412 |
children | d2fdee36c259 |
files | source/common/x86/asm-primitives.cpp source/common/x86/pixel-a.asm |
diffstat | 2 files changed, 51 insertions(+-), 0 deletions(-) [+] |
line wrap: on
line diff
--- a/source/common/x86/asm-primitives.cpp Tue Feb 17 11:48:16 2015 +0530 +++ b/source/common/x86/asm-primitives.cpp Tue Feb 17 11:59:10 2015 +0530 @@ -951,6 +951,7 @@ void setupAssemblyPrimitives(EncoderPrim p.chroma[X265_CSP_I422].pu[CHROMA_422_8x64].satd = x265_pixel_satd_8x64_avx; p.chroma[X265_CSP_I422].pu[CHROMA_422_8x12].satd = x265_pixel_satd_8x12_avx; p.chroma[X265_CSP_I422].pu[CHROMA_422_12x32].satd = x265_pixel_satd_12x32_avx; + p.chroma[X265_CSP_I422].pu[CHROMA_422_4x32].satd = x265_pixel_satd_4x32_avx; ALL_LUMA_PU(satd, pixel_satd, avx); ASSIGN_SA8D(avx);
--- a/source/common/x86/pixel-a.asm Tue Feb 17 11:48:16 2015 +0530 +++ b/source/common/x86/pixel-a.asm Tue Feb 17 11:59:10 2015 +0530 @@ -1716,6 +1716,55 @@ cglobal pixel_satd_12x32, 4,7,8,0-gprsiz %endif %endif +%if HIGH_BIT_DEPTH +%if WIN64 +cglobal pixel_satd_4x32, 4,8,8 ;if WIN64 && cpuflag(avx) + SATD_START_MMX + mov r6, r0 + mov r7, r2 + pxor m7, m7 + SATD_4x8_SSE vertical, 0, 4, 5 + lea r0, [r0 + r1*2*SIZEOF_PIXEL] + lea r2, [r2 + r3*2*SIZEOF_PIXEL] + SATD_4x8_SSE vertical, 1, 4, 5 + lea r0, [r0 + r1*2*SIZEOF_PIXEL] + lea r2, [r2 + r3*2*SIZEOF_PIXEL] + SATD_4x8_SSE vertical, 1, 4, 5 + lea r0, [r0 + r1*2*SIZEOF_PIXEL] + lea r2, [r2 + r3*2*SIZEOF_PIXEL] + SATD_4x8_SSE vertical, 1, 4, 5 + pxor m1, m1 + movhlps m1, m7 + paddd m7, m1 + pshufd m1, m7, 1 + paddd m7, m1 + movd eax, m7 + RET +%else +cglobal pixel_satd_4x32, 4,7,8,0-gprsize + SATD_START_MMX + mov r6, r0 + mov [rsp], r2 + pxor m7, m7 + SATD_4x8_SSE vertical, 0, 4, 5 + lea r0, [r0 + r1*2*SIZEOF_PIXEL] + lea r2, [r2 + r3*2*SIZEOF_PIXEL] + SATD_4x8_SSE vertical, 1, 4, 5 + lea r0, [r0 + r1*2*SIZEOF_PIXEL] + lea r2, [r2 + r3*2*SIZEOF_PIXEL] + SATD_4x8_SSE vertical, 1, 4, 5 + lea r0, [r0 + r1*2*SIZEOF_PIXEL] + lea r2, [r2 + r3*2*SIZEOF_PIXEL] + SATD_4x8_SSE vertical, 1, 4, 5 + pxor m1, m1 + movhlps m1, m7 + paddd m7, m1 + pshufd m1, m7, 1 + paddd m7, m1 + movd eax, m7 + RET +%endif +%else %if WIN64 cglobal pixel_satd_4x32, 4,8,8 ;if WIN64 && cpuflag(avx) SATD_START_MMX @@ -1759,6 +1808,7 @@ cglobal pixel_satd_4x32, 4,7,8,0-gprsize movd eax, m7 RET %endif +%endif %if WIN64 cglobal pixel_satd_32x8, 4,8,14 ;if WIN64 && cpuflag(avx)