changeset 9530:8fba2e8eafb8 draft

asm: avx2 assembly code for 8bpp avg_pixel_pp[32xN] avg_pp[32x16] 90.96x 465.07 42302.72 avg_pp[ 32x8] 67.83x 304.57 20659.12 avg_pp[32x24] 98.05x 628.94 61669.68 avg_pp[32x64] 111.05x 1470.99 163356.11
author Sumalatha Polureddy<sumalatha@multicorewareinc.com>
date Tue, 17 Feb 2015 14:15:43 +0530
parents 4eaf41c1b2fb
children f5260d12d98b
files source/common/x86/asm-primitives.cpp source/common/x86/mc-a.asm source/common/x86/pixel.h
diffstat 3 files changed, 12 insertions(+-), 4 deletions(-) [+]
line wrap: on
line diff
--- a/source/common/x86/asm-primitives.cpp	Tue Feb 17 13:59:34 2015 +0530
+++ b/source/common/x86/asm-primitives.cpp	Tue Feb 17 14:15:43 2015 +0530
@@ -1429,7 +1429,11 @@ void setupAssemblyPrimitives(EncoderPrim
     }
     if (cpuMask & X265_CPU_AVX2)
     {
+        p.pu[LUMA_32x64].pixelavg_pp = x265_pixel_avg_32x64_avx2;
         p.pu[LUMA_32x32].pixelavg_pp = x265_pixel_avg_32x32_avx2;
+        p.pu[LUMA_32x24].pixelavg_pp = x265_pixel_avg_32x24_avx2;
+        p.pu[LUMA_32x16].pixelavg_pp = x265_pixel_avg_32x16_avx2;
+        p.pu[LUMA_32x8].pixelavg_pp = x265_pixel_avg_32x8_avx2;
 
         p.pu[LUMA_16x16].satd = x265_pixel_satd_16x16_avx2;
         p.pu[LUMA_16x8].satd  = x265_pixel_satd_16x8_avx2;
--- a/source/common/x86/mc-a.asm	Tue Feb 17 13:59:34 2015 +0530
+++ b/source/common/x86/mc-a.asm	Tue Feb 17 14:15:43 2015 +0530
@@ -3004,11 +3004,11 @@ INIT_XMM avx2
 ;AVGH 64, 16
 
 AVG_FUNC 32, movdqu, movdqa
-;AVGH 32, 64
+AVGH 32, 64
 AVGH 32, 32
-;AVGH 32, 24
-;AVGH 32, 16
-;AVGH 32, 8
+AVGH 32, 24
+AVGH 32, 16
+AVGH 32, 8
 AVG_FUNC 16, movdqu, movdqa
 AVGH 16, 16
 AVGH 16,  8
--- a/source/common/x86/pixel.h	Tue Feb 17 13:59:34 2015 +0530
+++ b/source/common/x86/pixel.h	Tue Feb 17 14:15:43 2015 +0530
@@ -235,7 +235,11 @@ int x265_psyCost_ss_8x8_sse4(const int16
 int x265_psyCost_ss_16x16_sse4(const int16_t* source, intptr_t sstride, const int16_t* recon, intptr_t rstride);
 int x265_psyCost_ss_32x32_sse4(const int16_t* source, intptr_t sstride, const int16_t* recon, intptr_t rstride);
 int x265_psyCost_ss_64x64_sse4(const int16_t* source, intptr_t sstride, const int16_t* recon, intptr_t rstride);
+void x265_pixel_avg_32x64_avx2(pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
 void x265_pixel_avg_32x32_avx2(pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
+void x265_pixel_avg_32x24_avx2(pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
+void x265_pixel_avg_32x16_avx2(pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
+void x265_pixel_avg_32x8_avx2(pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
 
 #undef DECL_PIXELS
 #undef DECL_HEVC_SSD