Mercurial > x265
changeset 9530:8fba2e8eafb8 draft
asm: avx2 assembly code for 8bpp avg_pixel_pp[32xN]
avg_pp[32x16] 90.96x 465.07 42302.72
avg_pp[ 32x8] 67.83x 304.57 20659.12
avg_pp[32x24] 98.05x 628.94 61669.68
avg_pp[32x64] 111.05x 1470.99 163356.11
author | Sumalatha Polureddy<sumalatha@multicorewareinc.com> |
---|---|
date | Tue, 17 Feb 2015 14:15:43 +0530 |
parents | 4eaf41c1b2fb |
children | f5260d12d98b |
files | source/common/x86/asm-primitives.cpp source/common/x86/mc-a.asm source/common/x86/pixel.h |
diffstat | 3 files changed, 12 insertions(+-), 4 deletions(-) [+] |
line wrap: on
line diff
--- a/source/common/x86/asm-primitives.cpp Tue Feb 17 13:59:34 2015 +0530 +++ b/source/common/x86/asm-primitives.cpp Tue Feb 17 14:15:43 2015 +0530 @@ -1429,7 +1429,11 @@ void setupAssemblyPrimitives(EncoderPrim } if (cpuMask & X265_CPU_AVX2) { + p.pu[LUMA_32x64].pixelavg_pp = x265_pixel_avg_32x64_avx2; p.pu[LUMA_32x32].pixelavg_pp = x265_pixel_avg_32x32_avx2; + p.pu[LUMA_32x24].pixelavg_pp = x265_pixel_avg_32x24_avx2; + p.pu[LUMA_32x16].pixelavg_pp = x265_pixel_avg_32x16_avx2; + p.pu[LUMA_32x8].pixelavg_pp = x265_pixel_avg_32x8_avx2; p.pu[LUMA_16x16].satd = x265_pixel_satd_16x16_avx2; p.pu[LUMA_16x8].satd = x265_pixel_satd_16x8_avx2;
--- a/source/common/x86/mc-a.asm Tue Feb 17 13:59:34 2015 +0530 +++ b/source/common/x86/mc-a.asm Tue Feb 17 14:15:43 2015 +0530 @@ -3004,11 +3004,11 @@ INIT_XMM avx2 ;AVGH 64, 16 AVG_FUNC 32, movdqu, movdqa -;AVGH 32, 64 +AVGH 32, 64 AVGH 32, 32 -;AVGH 32, 24 -;AVGH 32, 16 -;AVGH 32, 8 +AVGH 32, 24 +AVGH 32, 16 +AVGH 32, 8 AVG_FUNC 16, movdqu, movdqa AVGH 16, 16 AVGH 16, 8
--- a/source/common/x86/pixel.h Tue Feb 17 13:59:34 2015 +0530 +++ b/source/common/x86/pixel.h Tue Feb 17 14:15:43 2015 +0530 @@ -235,7 +235,11 @@ int x265_psyCost_ss_8x8_sse4(const int16 int x265_psyCost_ss_16x16_sse4(const int16_t* source, intptr_t sstride, const int16_t* recon, intptr_t rstride); int x265_psyCost_ss_32x32_sse4(const int16_t* source, intptr_t sstride, const int16_t* recon, intptr_t rstride); int x265_psyCost_ss_64x64_sse4(const int16_t* source, intptr_t sstride, const int16_t* recon, intptr_t rstride); +void x265_pixel_avg_32x64_avx2(pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); void x265_pixel_avg_32x32_avx2(pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_32x24_avx2(pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_32x16_avx2(pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_32x8_avx2(pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); #undef DECL_PIXELS #undef DECL_HEVC_SSD