changeset 9535:73d2a949ddd5 draft

asm: avx2 assembly code for 8bpp avg_pixel_pp[64xN] avg_pp[64x64] 94.38x 3378.15 318814.41 avg_pp[64x48] 72.19x 3269.78 236047.20 avg_pp[64x32] 92.97x 1708.96 158879.88 avg_pp[64x16] 95.67x 888.72 85022.89
author Sumalatha Polureddy<sumalatha@multicorewareinc.com>
date Tue, 17 Feb 2015 15:45:07 +0530
parents fb9a01bb8b3d
children cb3cb8ba7fa1
files source/common/x86/asm-primitives.cpp source/common/x86/mc-a.asm source/common/x86/pixel.h
diffstat 3 files changed, 38 insertions(+-), 4 deletions(-) [+]
line wrap: on
line diff
--- a/source/common/x86/asm-primitives.cpp	Tue Feb 17 15:34:48 2015 +0530
+++ b/source/common/x86/asm-primitives.cpp	Tue Feb 17 15:45:07 2015 +0530
@@ -1438,6 +1438,11 @@ void setupAssemblyPrimitives(EncoderPrim
         p.pu[LUMA_32x16].pixelavg_pp = x265_pixel_avg_32x16_avx2;
         p.pu[LUMA_32x8].pixelavg_pp = x265_pixel_avg_32x8_avx2;
 
+        p.pu[LUMA_64x64].pixelavg_pp = x265_pixel_avg_64x64_avx2;
+        p.pu[LUMA_64x48].pixelavg_pp = x265_pixel_avg_64x48_avx2;
+        p.pu[LUMA_64x32].pixelavg_pp = x265_pixel_avg_64x32_avx2;
+        p.pu[LUMA_64x16].pixelavg_pp = x265_pixel_avg_64x16_avx2;
+
         p.pu[LUMA_16x16].satd = x265_pixel_satd_16x16_avx2;
         p.pu[LUMA_16x8].satd  = x265_pixel_satd_16x8_avx2;
         p.pu[LUMA_8x16].satd  = x265_pixel_satd_8x16_avx2;
--- a/source/common/x86/mc-a.asm	Tue Feb 17 15:34:48 2015 +0530
+++ b/source/common/x86/mc-a.asm	Tue Feb 17 15:45:07 2015 +0530
@@ -2393,6 +2393,30 @@ cglobal pixel_avg_weight_w32
     mova    [t0], m0
     AVG_END
 
+cglobal pixel_avg_weight_w64
+    BIWEIGHT_START
+    AVG_START 5
+.height_loop:
+    movu     m0, [t2]
+    movu     m1, [t4]
+    SBUTTERFLY bw, 0, 1, 2
+    pmaddubsw m0, m3
+    pmaddubsw m1, m3
+    pmulhrsw  m0, m4
+    pmulhrsw  m1, m4
+    packuswb  m0, m1
+    mova    [t0], m0
+    movu     m0, [t2 + 32]
+    movu     m1, [t4 + 32]
+    SBUTTERFLY bw, 0, 1, 2
+    pmaddubsw m0, m3
+    pmaddubsw m1, m3
+    pmulhrsw  m0, m4
+    pmulhrsw  m1, m4
+    packuswb  m0, m1
+    mova    [t0 + 32], m0
+    AVG_END
+
 %endif ;HIGH_BIT_DEPTH
 
 ;=============================================================================
@@ -2998,10 +3022,11 @@ INIT_XMM avx2
 ;AVG_FUNC 24, movdqu, movdqa
 ;AVGH 24, 32
 
-;AVG_FUNC 64, movdqu, movdqa
-;AVGH 64, 64
-;AVGH 64, 48
-;AVGH 64, 16
+AVG_FUNC 64, movdqu, movdqa
+AVGH 64, 64
+AVGH 64, 48
+AVGH 64, 32
+AVGH 64, 16
 
 AVG_FUNC 32, movdqu, movdqa
 AVGH 32, 64
--- a/source/common/x86/pixel.h	Tue Feb 17 15:34:48 2015 +0530
+++ b/source/common/x86/pixel.h	Tue Feb 17 15:45:07 2015 +0530
@@ -240,6 +240,10 @@ void x265_pixel_avg_32x32_avx2(pixel* ds
 void x265_pixel_avg_32x24_avx2(pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
 void x265_pixel_avg_32x16_avx2(pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
 void x265_pixel_avg_32x8_avx2(pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
+void x265_pixel_avg_64x64_avx2(pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
+void x265_pixel_avg_64x48_avx2(pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
+void x265_pixel_avg_64x32_avx2(pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
+void x265_pixel_avg_64x16_avx2(pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
 
 #undef DECL_PIXELS
 #undef DECL_HEVC_SSD