Mercurial > x265
changeset 239:a340e53f1b09 draft
Merged multicoreware/xhevc into default
author | Deepthi Devaki Akkoorath <deepthidevaki@multicorewareinc.com> |
---|---|
date | Wed, 03 Apr 2013 14:34:39 +0530 |
parents | a30ff90c19ba (current diff) 76773eabb15b (diff) |
children | 7b66ab6a4275 |
files | source/encoder/TComRdCost_SSE.cpp |
diffstat | 6 files changed, 140 insertions(+-), 92 deletions(-) [+] |
line wrap: on
line diff
--- a/source/Lib/TLibCommon/TComRdCost.cpp Wed Apr 03 10:57:28 2013 +0530 +++ b/source/Lib/TLibCommon/TComRdCost.cpp Wed Apr 03 14:34:39 2013 +0530 @@ -39,6 +39,7 @@ #include <assert.h> #include "TComRom.h" #include "TComRdCost.h" +#include "primitives.h" //! \ingroup TLibCommon //! \{ @@ -446,7 +447,6 @@ UInt TComRdCost::calcHAD(Int bitDepth, P } return uiSum >> DISTORTION_PRECISION_ADJUSTMENT(bitDepth-8); - } #if WEIGHTED_CHROMA_DISTORTION @@ -488,8 +488,18 @@ UInt TComRdCost::getDistPart(Int bitDept #if RATE_CONTROL_LAMBDA_DOMAIN UInt TComRdCost::getSADPart ( Int bitDepth, Pel* pelCur, Int curStride, Pel* pelOrg, Int orgStride, UInt width, UInt height ) { + Int shift = DISTORTION_PRECISION_ADJUSTMENT(bitDepth-8); + +#if ENABLE_PRIMITIVES + if (shift == 0) + { + int part = x265::PartitionFromSizes(width, height); + if (part >= 0) + return x265::primitives.sad[part]((pixel*)pelCur, curStride, (pixel*)pelOrg, orgStride); + } +#endif + UInt SAD = 0; - Int shift = DISTORTION_PRECISION_ADJUSTMENT(bitDepth-8); for ( Int i=0; i<height; i++ ) { for( Int j=0; j<width; j++ ) @@ -524,6 +534,12 @@ UInt TComRdCost::xGetSAD( DistParam* pcD Int iStrideCur = pcDtParam->iStrideCur; Int iStrideOrg = pcDtParam->iStrideOrg; +#if ENABLE_PRIMITIVES + int part = x265::PartitionFromSizes(iCols, iRows); + if (part >= 0) + return x265::primitives.sad[part]((pixel*)piCur, iStrideCur, (pixel*)piOrg, iStrideOrg) >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8); +#endif + UInt uiSum = 0; for( ; iRows != 0; iRows-- ) @@ -552,6 +568,15 @@ UInt TComRdCost::xGetSAD4( DistParam* pc Int iSubStep = ( 1 << iSubShift ); Int iStrideCur = pcDtParam->iStrideCur*iSubStep; Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep; + +#if ENABLE_PRIMITIVES + if (iSubStep == 1) + { + int part = x265::PartitionFromSizes(4, iRows); + if (part >= 0) + return x265::primitives.sad[part]((pixel*)piCur, iStrideCur, (pixel*)piOrg, iStrideOrg) >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8); + } +#endif UInt uiSum = 0; @@ -570,7 +595,6 @@ UInt TComRdCost::xGetSAD4( DistParam* pc return uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8); } -#ifndef ENABLE_VECTOR UInt TComRdCost::xGetSAD8( DistParam* pcDtParam ) { if ( pcDtParam->bApplyWeight ) @@ -585,6 +609,15 @@ UInt TComRdCost::xGetSAD8( DistParam* pc Int iStrideCur = pcDtParam->iStrideCur*iSubStep; Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep; +#if ENABLE_PRIMITIVES + if (iSubStep == 1) + { + int part = x265::PartitionFromSizes(8, iRows); + if (part >= 0) + return x265::primitives.sad[part]((pixel*)piCur, iStrideCur, (pixel*)piOrg, iStrideOrg) >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8); + } +#endif + UInt uiSum = 0; for( ; iRows != 0; iRows-=iSubStep ) @@ -605,9 +638,7 @@ UInt TComRdCost::xGetSAD8( DistParam* pc uiSum <<= iSubShift; return uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8); } -#endif -#ifndef ENABLE_VECTOR UInt TComRdCost::xGetSAD16( DistParam* pcDtParam ) { if ( pcDtParam->bApplyWeight ) @@ -622,6 +653,15 @@ UInt TComRdCost::xGetSAD16( DistParam* p Int iStrideCur = pcDtParam->iStrideCur*iSubStep; Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep; +#if ENABLE_PRIMITIVES + if (iSubStep == 1) + { + int part = x265::PartitionFromSizes(16, iRows); + if (part >= 0) + return x265::primitives.sad[part]((pixel*)piCur, iStrideCur, (pixel*)piOrg, iStrideOrg) >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8); + } +#endif + UInt uiSum = 0; for( ; iRows != 0; iRows-=iSubStep ) @@ -650,10 +690,8 @@ UInt TComRdCost::xGetSAD16( DistParam* p uiSum <<= iSubShift; return uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8); } -#endif #if AMP_SAD -#ifndef ENABLE_VECTOR UInt TComRdCost::xGetSAD12( DistParam* pcDtParam ) { if ( pcDtParam->bApplyWeight ) @@ -693,7 +731,6 @@ UInt TComRdCost::xGetSAD12( DistParam* p return uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8); } #endif -#endif UInt TComRdCost::xGetSAD16N( DistParam* pcDtParam ) { @@ -707,6 +744,15 @@ UInt TComRdCost::xGetSAD16N( DistParam* Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep; UInt uiSum = 0; + +#if ENABLE_PRIMITIVES + if (iSubStep == 1) + { + int part = x265::PartitionFromSizes(iCols, iRows); + if (part >= 0) + return x265::primitives.sad[part]((pixel*)piCur, iStrideCur, (pixel*)piOrg, iStrideOrg) >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8); + } +#endif for( ; iRows != 0; iRows-=iSubStep ) { @@ -737,7 +783,6 @@ UInt TComRdCost::xGetSAD16N( DistParam* return uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8); } -#ifndef ENABLE_VECTOR UInt TComRdCost::xGetSAD32( DistParam* pcDtParam ) { if ( pcDtParam->bApplyWeight ) @@ -752,6 +797,15 @@ UInt TComRdCost::xGetSAD32( DistParam* p Int iStrideCur = pcDtParam->iStrideCur*iSubStep; Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep; +#if ENABLE_PRIMITIVES + if (iSubStep == 1) + { + int part = x265::PartitionFromSizes(32, iRows); + if (part >= 0) + return x265::primitives.sad[part]((pixel*)piCur, iStrideCur, (pixel*)piOrg, iStrideOrg) >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8); + } +#endif + UInt uiSum = 0; for( ; iRows != 0; iRows-=iSubStep ) @@ -796,10 +850,8 @@ UInt TComRdCost::xGetSAD32( DistParam* p uiSum <<= iSubShift; return uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8); } -#endif #if AMP_SAD -#ifndef ENABLE_VECTOR UInt TComRdCost::xGetSAD24( DistParam* pcDtParam ) { if ( pcDtParam->bApplyWeight ) @@ -851,9 +903,7 @@ UInt TComRdCost::xGetSAD24( DistParam* p return uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8); } #endif -#endif -#ifndef ENABLE_VECTOR UInt TComRdCost::xGetSAD64( DistParam* pcDtParam ) { if ( pcDtParam->bApplyWeight ) @@ -944,7 +994,6 @@ UInt TComRdCost::xGetSAD64( DistParam* p uiSum <<= iSubShift; return uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8); } -#endif #if AMP_SAD UInt TComRdCost::xGetSAD48( DistParam* pcDtParam ) @@ -1021,7 +1070,6 @@ UInt TComRdCost::xGetSAD48( DistParam* p uiSum <<= iSubShift; return uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8); } -#endif // -------------------------------------------------------------------------------------------------------------------- // SSE @@ -1396,12 +1444,15 @@ UInt TComRdCost::xCalcHADs2x2( Pel *piOr return satd; } -#ifndef ENABLE_VECTOR UInt TComRdCost::xCalcHADs4x4( Pel *piOrg, Pel *piCur, Int iStrideOrg, Int iStrideCur, Int iStep ) { + assert( iStep == 1 ); + +#if ENABLE_PRIMITIVES + return x265::primitives.satd[x265::PARTITION_4x4]((pixel*)piCur, iStrideCur, (pixel*)piOrg, iStrideOrg); +#else Int k, satd = 0, diff[16], m[16], d[16]; - assert( iStep == 1 ); for( k = 0; k < 16; k+=4 ) { diff[k+0] = piOrg[0] - piCur[0]; @@ -1489,15 +1540,18 @@ UInt TComRdCost::xCalcHADs4x4( Pel *piOr satd = ((satd+1)>>1); return satd; +#endif } #endif -#ifndef ENABLE_VECTOR UInt TComRdCost::xCalcHADs8x8( Pel *piOrg, Pel *piCur, Int iStrideOrg, Int iStrideCur, Int iStep ) { + assert( iStep == 1 ); +#if ENABLE_PRIMITIVES + return x265::primitives.satd[x265::PARTITION_8x8]((pixel*)piCur, iStrideCur, (pixel*)piOrg, iStrideOrg); +#else Int k, i, j, jj, sad=0; Int diff[64], m1[8][8], m2[8][8], m3[8][8]; - assert( iStep == 1 ); for( k = 0; k < 64; k += 8 ) { diff[k+0] = piOrg[0] - piCur[0]; @@ -1587,15 +1641,18 @@ UInt TComRdCost::xCalcHADs8x8( Pel *piOr sad=((sad+2)>>2); return sad; +#endif } -#endif #if NS_HAD UInt TComRdCost::xCalcHADs16x4( Pel *piOrg, Pel *piCur, Int iStrideOrg, Int iStrideCur, Int iStep ) { + assert( iStep == 1 ); +#if ENABLE_PRIMITIVES + return x265::primitives.satd[x265::PARTITION_16x4]((pixel*)piCur, iStrideCur, (pixel*)piOrg, iStrideOrg); +#endif Int k, i, j, jj, sad=0; Int diff[64], m1[4][16], m2[4][16]; - assert( iStep == 1 ); for( k = 0; k < 64; k += 16 ) { diff[k+0] = piOrg[0] - piCur[0]; @@ -1723,9 +1780,12 @@ UInt TComRdCost::xCalcHADs16x4( Pel *piO UInt TComRdCost::xCalcHADs4x16( Pel *piOrg, Pel *piCur, Int iStrideOrg, Int iStrideCur, Int iStep ) { + assert( iStep == 1 ); +#if ENABLE_PRIMITIVES + return x265::primitives.satd[x265::PARTITION_4x16]((pixel*)piCur, iStrideCur, (pixel*)piOrg, iStrideOrg); +#endif Int k, i, j, jj, sad=0; Int diff[64], m1[16][4], m2[16][4], m3[16][4]; - assert( iStep == 1 ); for( k = 0; k < 64; k += 4 ) { diff[k+0] = piOrg[0] - piCur[0]; @@ -1854,6 +1914,15 @@ UInt TComRdCost::xGetHADs4( DistParam* p Int iOffsetOrg = iStrideOrg<<2; Int iOffsetCur = iStrideCur<<2; +#if ENABLE_PRIMITIVES + if (iStep == 0) + { + int part = x265::PartitionFromSizes(4, iRows); + if (part >= 0) + return x265::primitives.satd[part]((pixel*)piCur, iStrideCur, (pixel*)piOrg, iStrideOrg) >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8); + } +#endif + UInt uiSum = 0; for ( y=0; y<iRows; y+= 4 ) @@ -1880,6 +1949,15 @@ UInt TComRdCost::xGetHADs8( DistParam* p Int iStep = pcDtParam->iStep; Int y; +#if ENABLE_PRIMITIVES + if (iStep == 0) + { + int part = x265::PartitionFromSizes(8, iRows); + if (part >= 0) + return x265::primitives.satd[part]((pixel*)piCur, iStrideCur, (pixel*)piOrg, iStrideOrg) >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8); + } +#endif + UInt uiSum = 0; if ( iRows == 4 ) @@ -1916,6 +1994,15 @@ UInt TComRdCost::xGetHADs( DistParam* pc Int iStrideOrg = pcDtParam->iStrideOrg; Int iStep = pcDtParam->iStep; +#if ENABLE_PRIMITIVES + if (iStep == 0) + { + int part = x265::PartitionFromSizes(iCols, iRows); + if (part >= 0) + return x265::primitives.satd[part]((pixel*)piCur, iStrideCur, (pixel*)piOrg, iStrideOrg) >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8); + } +#endif + Int x, y; UInt uiSum = 0;
--- a/source/Lib/TLibCommon/TypeDef.h Wed Apr 03 10:57:28 2013 +0530 +++ b/source/Lib/TLibCommon/TypeDef.h Wed Apr 03 14:34:39 2013 +0530 @@ -204,8 +204,6 @@ #define AMP_MRG 1 ///< encoder only force merge for AMP partition (no motion search for AMP) #endif -#define ENABLE_VECTOR - #define SCALING_LIST_OUTPUT_RESULT 0 //JCTVC-G880/JCTVC-G1016 quantization matrices #define CABAC_INIT_PRESENT_FLAG 1
--- a/source/encoder/TComRdCost_SSE.cpp Wed Apr 03 10:57:28 2013 +0530 +++ b/source/encoder/TComRdCost_SSE.cpp Wed Apr 03 14:34:39 2013 +0530 @@ -28,7 +28,7 @@ #include <assert.h> -#ifdef ENABLE_VECTOR +#if 0 UInt TComRdCost::xGetSAD8(DistParam *pcDtParam) { if (pcDtParam->bApplyWeight)
--- a/source/encoder/pixel.cpp Wed Apr 03 10:57:28 2013 +0530 +++ b/source/encoder/pixel.cpp Wed Apr 03 14:34:39 2013 +0530 @@ -185,19 +185,12 @@ int CDECL satd32( pixel *pix1, intptr_t } // end anonymous namespace -namespace x265 -{ - -extern EncoderPrimitives primitives_c; - +namespace x265 { +// x265 private namespace -/* Setup() will be called before main(). It should initialize - * primitive_c entries for pixel functions defined in this file. - */ -static int Setup() +/* It should initialize entries for pixel functions defined in this file. */ +void Setup_C_Primitives(EncoderPrimitives &p) { - EncoderPrimitives &p = primitives_c; - p.sad[PARTITION_4x4] = sad<4,4>; p.sad[PARTITION_8x4] = sad<8,4>; p.sad[PARTITION_4x8] = sad<4,8>; @@ -231,10 +224,6 @@ static int Setup() p.satd[PARTITION_16x32] = satd32<16,32>; p.satd[PARTITION_32x16] = satd32<32,16>; p.satd[PARTITION_32x32] = satd32<32,32>; - - return 1; } -static int forceinit = Setup(); - };
--- a/source/encoder/primitives.cpp Wed Apr 03 10:57:28 2013 +0530 +++ b/source/encoder/primitives.cpp Wed Apr 03 14:34:39 2013 +0530 @@ -53,29 +53,28 @@ int PartitionFromSizes(int Width, int He return -1; if (Width > 32 || Height > 32) return -1; - return (int) psize[Width >> 2][Height >> 2]; + return (int) psize[(Width>>2)-1][(Height>>2)-1]; } -/* C (reference) versions of each primitive, implemented by various - * C++ files (pixel.cpp, etc) */ -EncoderPrimitives primitives_c; + +void Setup_C_Primitives(EncoderPrimitives &p); -/* These function tables are defined by C++ files in encoder/vec - * Depending on your compiler, some of them may be undefined. - * The #if logic here must match the file lists in vec/CMakeLists.txt */ +/* These functions are defined by C++ files in encoder/vec. Depending on your + * compiler, some of them may be undefined. The #if logic here must match the + * file lists in vec/CMakeLists.txt */ #if defined (__GNUC__) || defined(_MSC_VER) -extern EncoderPrimitives primitives_vectorized_sse42; -extern EncoderPrimitives primitives_vectorized_sse41; -extern EncoderPrimitives primitives_vectorized_ssse3; -extern EncoderPrimitives primitives_vectorized_sse3; -extern EncoderPrimitives primitives_vectorized_sse2; +extern void Setup_Vec_Primitives_sse42(EncoderPrimitives&); +extern void Setup_Vec_Primitives_sse41(EncoderPrimitives&); +extern void Setup_Vec_Primitives_ssse3(EncoderPrimitives&); +extern void Setup_Vec_Primitives_sse3(EncoderPrimitives&); +extern void Setup_Vec_Primitives_sse2(EncoderPrimitives&); #endif #if defined(_MSC_VER) && _MSC_VER >= 1600 -extern EncoderPrimitives primitives_vectorized_avx; +extern void Setup_Vec_Primitives_avx(EncoderPrimitives&); #endif #if defined(_MSC_VER) && _MSC_VER >= 1700 -extern EncoderPrimitives primitives_vectorized_avx2; +extern void Setup_Vec_Primitives_avx2(EncoderPrimitives&); #endif /* the "authoritative" set of encoder primitives */ @@ -83,21 +82,6 @@ extern EncoderPrimitives primitives_vect EncoderPrimitives primitives; #endif -/* Take all primitive functions from p which are non-NULL */ -static void MergeFunctions(const EncoderPrimitives &p) -{ - /* too bad this isn't an introspective language, but we can use macros */ - -#define TAKE_IF_NOT_NULL(FOO) \ - primitives.FOO = p.FOO ? p.FOO : primitives.FOO -#define TAKE_EACH_IF_NOT_NULL(FOO, COUNT) \ - for (int i = 0; i < COUNT; i++) \ - primitives.FOO[i] = p.FOO[i] ? p.FOO[i] : primitives.FOO[i] - - TAKE_EACH_IF_NOT_NULL(sad, NUM_PARTITIONS); - TAKE_EACH_IF_NOT_NULL(satd, NUM_PARTITIONS); -} - /* cpuid == 0 - auto-detect CPU type, else * cpuid != 0 - force CPU type */ void SetupPrimitives(int cpuid) @@ -108,23 +92,23 @@ void SetupPrimitives(int cpuid) } #if ENABLE_PRIMITIVES - memcpy((void *)&primitives, (void *)&primitives_c, sizeof(primitives)); + Setup_C_Primitives(primitives); /* Pick best vector architecture to use as a baseline. */ #if defined (__GNUC__) || defined(_MSC_VER) - if (cpuid > 1) MergeFunctions(primitives_vectorized_sse2); - if (cpuid > 2) MergeFunctions(primitives_vectorized_sse3); - if (cpuid > 3) MergeFunctions(primitives_vectorized_ssse3); - if (cpuid > 4) MergeFunctions(primitives_vectorized_sse41); - if (cpuid > 5) MergeFunctions(primitives_vectorized_sse42); + if (cpuid > 1) Setup_Vec_Primitives_sse2(primitives); + if (cpuid > 2) Setup_Vec_Primitives_sse3(primitives); + if (cpuid > 3) Setup_Vec_Primitives_ssse3(primitives); + if (cpuid > 4) Setup_Vec_Primitives_sse41(primitives); + if (cpuid > 5) Setup_Vec_Primitives_sse42(primitives); +#endif #if defined(_MSC_VER) && _MSC_VER >= 1600 - if (cpuid > 6) MergeFunctions(primitives_vectorized_avx); + if (cpuid > 6) Setup_Vec_Primitives_avx(primitives); #endif #if defined(_MSC_VER) && _MSC_VER >= 1700 - if (cpuid > 7) MergeFunctions(primitives_vectorized_avx2); + if (cpuid > 7) Setup_Vec_Primitives_avx2(primitives); #endif -#endif - + /* .. upgrade functions with available assembly code. */ #endif }
--- a/source/encoder/vec/vecprimitives.inc Wed Apr 03 10:57:28 2013 +0530 +++ b/source/encoder/vec/vecprimitives.inc Wed Apr 03 14:34:39 2013 +0530 @@ -171,27 +171,17 @@ int CDECL NAME(sad_8x8)(pixel *piOrg, in } // end anonymous namespace namespace x265 { -// Instantiate an EncoderPrimitives instance for this vector architecture - -EncoderPrimitives NAME(primitives_vectorized); +// private x265 namespace -/* Setup() will be called before main(). It should initialize the - * function table for this vector architecture. - */ -static int Setup() +/* It should initialize the function table for this vector architecture. */ +void NAME(Setup_Vec_Primitives)(EncoderPrimitives& p) { - EncoderPrimitives &p = NAME(primitives_vectorized); - #if HIGH_BIT_DEPTH /* 16-bit pixels */ p.sad[PARTITION_8x8] = NAME(sad_8x8); p.satd[PARTITION_4x4] = NAME(satd_4x4); #else p.sad[PARTITION_8x8] = NAME(sad_8x8); #endif - - return 1; } -static int forceinit = Setup(); - };