Mercurial > x265
changeset 9519:c82806023d05 draft
asm: avx assembly code for 10bpp satd[12x32] - 24x
[i422] satd[12x32] 24.20x 3364.92 81445.69
author | Sumalatha Polureddy<sumalatha@multicorewareinc.com> |
---|---|
date | Mon, 16 Feb 2015 16:55:54 +0530 |
parents | 4030e4d97e99 |
children | 3481b16114d7 |
files | source/common/x86/asm-primitives.cpp source/common/x86/pixel-a.asm |
diffstat | 2 files changed, 104 insertions(+-), 1 deletions(-) [+] |
line wrap: on
line diff
--- a/source/common/x86/asm-primitives.cpp Mon Feb 16 16:25:54 2015 +0530 +++ b/source/common/x86/asm-primitives.cpp Mon Feb 16 16:55:54 2015 +0530 @@ -950,6 +950,7 @@ void setupAssemblyPrimitives(EncoderPrim p.chroma[X265_CSP_I422].pu[CHROMA_422_24x64].satd = x265_pixel_satd_24x64_avx; p.chroma[X265_CSP_I422].pu[CHROMA_422_8x64].satd = x265_pixel_satd_8x64_avx; p.chroma[X265_CSP_I422].pu[CHROMA_422_8x12].satd = x265_pixel_satd_8x12_avx; + p.chroma[X265_CSP_I422].pu[CHROMA_422_12x32].satd = x265_pixel_satd_12x32_avx; ALL_LUMA_PU(satd, pixel_satd, avx); ASSIGN_SA8D(avx);
--- a/source/common/x86/pixel-a.asm Mon Feb 16 16:25:54 2015 +0530 +++ b/source/common/x86/pixel-a.asm Mon Feb 16 16:55:54 2015 +0530 @@ -1521,6 +1521,106 @@ cglobal pixel_satd_8x12, 4,7,8,0-gprsize movd eax, m6 RET %endif + +%if HIGH_BIT_DEPTH +%if WIN64 +cglobal pixel_satd_12x32, 4,8,8 ;if WIN64 && cpuflag(avx) + SATD_START_MMX + mov r6, r0 + mov r7, r2 + pxor m7, m7 + SATD_4x8_SSE vertical, 0, 4, 5 + lea r0, [r0 + r1*2*SIZEOF_PIXEL] + lea r2, [r2 + r3*2*SIZEOF_PIXEL] + SATD_4x8_SSE vertical, 1, 4, 5 + lea r0, [r0 + r1*2*SIZEOF_PIXEL] + lea r2, [r2 + r3*2*SIZEOF_PIXEL] + SATD_4x8_SSE vertical, 1, 4, 5 + lea r0, [r0 + r1*2*SIZEOF_PIXEL] + lea r2, [r2 + r3*2*SIZEOF_PIXEL] + SATD_4x8_SSE vertical, 1, 4, 5 + lea r0, [r6 + 4*SIZEOF_PIXEL] + lea r2, [r7 + 4*SIZEOF_PIXEL] + SATD_4x8_SSE vertical, 1, 4, 5 + lea r0, [r0 + r1*2*SIZEOF_PIXEL] + lea r2, [r2 + r3*2*SIZEOF_PIXEL] + SATD_4x8_SSE vertical, 1, 4, 5 + lea r0, [r0 + r1*2*SIZEOF_PIXEL] + lea r2, [r2 + r3*2*SIZEOF_PIXEL] + SATD_4x8_SSE vertical, 1, 4, 5 + lea r0, [r0 + r1*2*SIZEOF_PIXEL] + lea r2, [r2 + r3*2*SIZEOF_PIXEL] + SATD_4x8_SSE vertical, 1, 4, 5 + lea r0, [r6 + 8*SIZEOF_PIXEL] + lea r2, [r7 + 8*SIZEOF_PIXEL] + SATD_4x8_SSE vertical, 1, 4, 5 + lea r0, [r0 + r1*2*SIZEOF_PIXEL] + lea r2, [r2 + r3*2*SIZEOF_PIXEL] + SATD_4x8_SSE vertical, 1, 4, 5 + lea r0, [r0 + r1*2*SIZEOF_PIXEL] + lea r2, [r2 + r3*2*SIZEOF_PIXEL] + SATD_4x8_SSE vertical, 1, 4, 5 + lea r0, [r0 + r1*2*SIZEOF_PIXEL] + lea r2, [r2 + r3*2*SIZEOF_PIXEL] + SATD_4x8_SSE vertical, 1, 4, 5 + pxor m1, m1 + movhlps m1, m7 + paddd m7, m1 + pshufd m1, m7, 1 + paddd m7, m1 + movd eax, m7 + RET +%else +cglobal pixel_satd_12x32, 4,7,8,0-gprsize + SATD_START_MMX + mov r6, r0 + mov [rsp], r2 + pxor m7, m7 + SATD_4x8_SSE vertical, 0, 4, 5 + lea r0, [r0 + r1*2*SIZEOF_PIXEL] + lea r2, [r2 + r3*2*SIZEOF_PIXEL] + SATD_4x8_SSE vertical, 1, 4, 5 + lea r0, [r0 + r1*2*SIZEOF_PIXEL] + lea r2, [r2 + r3*2*SIZEOF_PIXEL] + SATD_4x8_SSE vertical, 1, 4, 5 + lea r0, [r0 + r1*2*SIZEOF_PIXEL] + lea r2, [r2 + r3*2*SIZEOF_PIXEL] + SATD_4x8_SSE vertical, 1, 4, 5 + lea r0, [r6 + 4*SIZEOF_PIXEL] + mov r2, [rsp] + add r2, 4*SIZEOF_PIXEL + SATD_4x8_SSE vertical, 1, 4, 5 + lea r0, [r0 + r1*2*SIZEOF_PIXEL] + lea r2, [r2 + r3*2*SIZEOF_PIXEL] + SATD_4x8_SSE vertical, 1, 4, 5 + lea r0, [r0 + r1*2*SIZEOF_PIXEL] + lea r2, [r2 + r3*2*SIZEOF_PIXEL] + SATD_4x8_SSE vertical, 1, 4, 5 + lea r0, [r0 + r1*2*SIZEOF_PIXEL] + lea r2, [r2 + r3*2*SIZEOF_PIXEL] + SATD_4x8_SSE vertical, 1, 4, 5 + lea r0, [r6 + 8*SIZEOF_PIXEL] + mov r2, [rsp] + add r2, 8*SIZEOF_PIXEL + SATD_4x8_SSE vertical, 1, 4, 5 + lea r0, [r0 + r1*2*SIZEOF_PIXEL] + lea r2, [r2 + r3*2*SIZEOF_PIXEL] + SATD_4x8_SSE vertical, 1, 4, 5 + lea r0, [r0 + r1*2*SIZEOF_PIXEL] + lea r2, [r2 + r3*2*SIZEOF_PIXEL] + SATD_4x8_SSE vertical, 1, 4, 5 + lea r0, [r0 + r1*2*SIZEOF_PIXEL] + lea r2, [r2 + r3*2*SIZEOF_PIXEL] + SATD_4x8_SSE vertical, 1, 4, 5 + pxor m1, m1 + movhlps m1, m7 + paddd m7, m1 + pshufd m1, m7, 1 + paddd m7, m1 + movd eax, m7 + RET +%endif +%else ;HIGH_BIT_DEPTH %if WIN64 cglobal pixel_satd_12x32, 4,8,8 ;if WIN64 && cpuflag(avx) SATD_START_MMX @@ -1614,7 +1714,9 @@ cglobal pixel_satd_12x32, 4,7,8,0-gprsiz movd eax, m7 RET %endif - %if WIN64 +%endif + +%if WIN64 cglobal pixel_satd_4x32, 4,8,8 ;if WIN64 && cpuflag(avx) SATD_START_MMX mov r6, r0