Mercurial > x265
changeset 783:07bd1df8ced7 draft
Merged multicoreware/xhevc into default
author | Deepthi Devaki Akkoorath <deepthidevaki@multicorewareinc.com> |
---|---|
date | Wed, 24 Apr 2013 12:00:28 +0530 |
parents | 189e1c23ebdb (current diff) d9d313f7abe1 (diff) |
children | 5bcab5c66709 |
files | source/Lib/TLibEncoder/TEncSearch.cpp |
diffstat | 29 files changed, 468 insertions(+-), 410 deletions(-) [+] |
line wrap: on
line diff
--- a/doc/uncrustify/codingstyle.cfg Wed Apr 24 11:58:14 2013 +0530 +++ b/doc/uncrustify/codingstyle.cfg Wed Apr 24 12:00:28 2013 +0530 @@ -84,8 +84,8 @@ mod_sort_include=false mod_sort_using=false newlines=lf nl_after_access_spec=2 -nl_after_brace_close=ignore -nl_after_brace_open=ignore +#nl_after_brace_close=ignore +#nl_after_brace_open=ignore nl_after_brace_open_cmt=true nl_after_case=false nl_after_class=2
--- a/source/Lib/TLibCommon/CommonDef.h Wed Apr 24 11:58:14 2013 +0530 +++ b/source/Lib/TLibCommon/CommonDef.h Wed Apr 24 12:00:28 2013 +0530 @@ -55,7 +55,7 @@ // Version information // ==================================================================================================================== -#define NV_VERSION "10.0" ///< Current software version +#define NV_VERSION "10.1rc" ///< Current software version // ==================================================================================================================== // Platform information
--- a/source/Lib/TLibCommon/TComSampleAdaptiveOffset.cpp Wed Apr 24 11:58:14 2013 +0530 +++ b/source/Lib/TLibCommon/TComSampleAdaptiveOffset.cpp Wed Apr 24 12:00:28 2013 +0530 @@ -198,7 +198,7 @@ Void TComSampleAdaptiveOffset::create(UI m_iUpBuff1++; m_iUpBuff2++; m_iUpBufft++; - Pel i; + Short i; UInt uiMaxY = (1 << g_bitDepthY) - 1; UInt uiMinY = 0;
--- a/source/Lib/TLibEncoder/TEncGOP.cpp Wed Apr 24 11:58:14 2013 +0530 +++ b/source/Lib/TLibEncoder/TEncGOP.cpp Wed Apr 24 12:00:28 2013 +0530 @@ -674,6 +674,10 @@ Void TEncGOP::compressGOP(Int iPOCLast, pcSlice->setCheckLDC(bLowDelay); } + else + { + pcSlice->setCheckLDC(true); + } uiColDir = 1 - uiColDir;
--- a/source/Lib/TLibEncoder/TEncSearch.cpp Wed Apr 24 11:58:14 2013 +0530 +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Wed Apr 24 12:00:28 2013 +0530 @@ -1154,7 +1154,7 @@ Void TEncSearch::xIntraCodingLumaBlk(TCo { for (UInt uiX = 0; uiX < uiWidth; uiX++) { - pReco[uiX] = ClipY(pPred[uiX] + pResi[uiX]); + pReco[uiX] = ClipY(static_cast<Short>(pPred[uiX]) + pResi[uiX]); pRecQt[uiX] = pReco[uiX]; pRecIPred[uiX] = pReco[uiX]; } @@ -1353,7 +1353,7 @@ Void TEncSearch::xIntraCodingChromaBlk(T { for (UInt uiX = 0; uiX < uiWidth; uiX++) { - pReco[uiX] = ClipC(pPred[uiX] + pResi[uiX]); + pReco[uiX] = ClipC(static_cast<Short> (pPred[uiX]) + pResi[uiX]); pRecQt[uiX] = pReco[uiX]; pRecIPred[uiX] = pReco[uiX]; }
--- a/source/PPA/ppaCPUEvents.h Wed Apr 24 11:58:14 2013 +0530 +++ b/source/PPA/ppaCPUEvents.h Wed Apr 24 12:00:28 2013 +0530 @@ -1,4 +1,5 @@ PPA_REGISTER_CPU_EVENT(encode_block) PPA_REGISTER_CPU_EVENT(read_yuv) +PPA_REGISTER_CPU_EVENT(write_yuv) PPA_REGISTER_CPU_EVENT(encode_frame) PPA_REGISTER_CPU_EVENT(bitstream_write)
--- a/source/VectorClass/vectori128.h Wed Apr 24 11:58:14 2013 +0530 +++ b/source/VectorClass/vectori128.h Wed Apr 24 12:00:28 2013 +0530 @@ -302,6 +302,10 @@ public: Vec16c(int i) { xmm = _mm_set1_epi8(i); } + // MCW Added - assign lowest 4 byte values from uint32_t + void fromUint32(uint32_t i) { + xmm = _mm_cvtsi32_si128(i); + } // Constructor to build from all elements: Vec16c(int8_t i0, int8_t i1, int8_t i2, int8_t i3, int8_t i4, int8_t i5, int8_t i6, int8_t i7, int8_t i8, int8_t i9, int8_t i10, int8_t i11, int8_t i12, int8_t i13, int8_t i14, int8_t i15) { @@ -320,6 +324,10 @@ public: operator __m128i() const { return xmm; } + // MCW Added - PSADBW + Vec16c sad(__m128i const & x) { + return _mm_sad_epu8(xmm, x); + } // Member function to load from array (unaligned) Vec16c & load(void const * p) { xmm = _mm_loadu_si128((__m128i const*)p); @@ -3698,7 +3706,7 @@ static inline Vec16uc blend16uc(Vec16uc } #if _MSC_VER -#pragma warning(disable: 4700) +#pragma warning(disable: 4700) #endif template <int i0, int i1, int i2, int i3, int i4, int i5, int i6, int i7>
--- a/source/encoder/md5.cpp Wed Apr 24 11:58:14 2013 +0530 +++ b/source/encoder/md5.cpp Wed Apr 24 12:00:28 2013 +0530 @@ -44,6 +44,8 @@ static void byteReverse(uint8_t_t *buf, } #endif +void MD5Transform(uint32_t *buf, uint32_t *in); + /* * Start MD5 accumulation. Set bit count to 0 and buffer to mysterious * initialization constants. @@ -152,7 +154,7 @@ void MD5Final(MD5Context *ctx, uint8_t * byteReverse((uint8_t *) ctx->buf, 4); memcpy(digest, ctx->buf, 16); - memset(ctx, 0, sizeof(ctx)); /* In case it's sensitive */ + memset(ctx, 0, sizeof(*ctx)); /* In case it's sensitive */ } /* The four core functions - F1 is optimized somewhat */
--- a/source/encoder/md5.h Wed Apr 24 11:58:14 2013 +0530 +++ b/source/encoder/md5.h Wed Apr 24 12:00:28 2013 +0530 @@ -39,7 +39,6 @@ typedef struct MD5Context { void MD5Init(MD5Context *context); void MD5Update(MD5Context *context, unsigned char *buf, uint32_t len); void MD5Final(MD5Context *ctx, uint8_t *digest); -void MD5Transform(uint32_t *buf, uint32_t *in); class MD5 {
--- a/source/encoder/pixel.cpp Wed Apr 24 11:58:14 2013 +0530 +++ b/source/encoder/pixel.cpp Wed Apr 24 12:00:28 2013 +0530 @@ -30,7 +30,6 @@ namespace { template<int lx, int ly> int CDECL sad(pixel *pix1, intptr_t stride_pix1, pixel *pix2, intptr_t stride_pix2) { - // TODO: we could use SWAR here fairly easily. Would it help? int sum = 0; for (int y = 0; y < ly; y++)
--- a/source/encoder/threadpool.cpp Wed Apr 24 11:58:14 2013 +0530 +++ b/source/encoder/threadpool.cpp Wed Apr 24 12:00:28 2013 +0530 @@ -57,12 +57,12 @@ #define CLZ64(x) __lzcnt_2x32(x) inline int __lzcnt_2x32(uint64_t x64) { - int val = __lzcnt((uint32_t)(x64 >> 32)); - - if (val) - return val + 32; - - return __lzcnt((uint32_t)x64); + uint32_t high32 = (uint32_t)(x64 >> 32); + uint32_t low32 = (uint32_t)x64; + if (high32) + return __lzcnt(high32); + else + return __lzcnt(low32) + 32; } #endif // if _WIN64
--- a/source/encoder/vec/pixel.inc Wed Apr 24 11:58:14 2013 +0530 +++ b/source/encoder/vec/pixel.inc Wed Apr 24 12:00:28 2013 +0530 @@ -23,20 +23,20 @@ // Vector class versions of pixel comparison performance primitives -template<int lx, int ly> +#if HIGH_BIT_DEPTH + +/* intrinsics for when pixel type is short */ + +template<int ly> int CDECL sad_4(pixel * piOrg, intptr_t strideOrg, pixel * piCur, intptr_t strideCur) { Vec8s sum(0); - for (int row = 0; row < ly; row++) { - for (int col = 0; col < lx; col += 4) - { - Vec8s m1, n1; - m1.load(piOrg + col); - n1.load(piCur + col); - sum += abs(m1 - n1); - } + Vec8s m1, n1; + m1.load(piOrg); + n1.load(piCur); + sum += abs(m1 - n1); piOrg += strideOrg; piCur += strideCur; @@ -45,42 +45,42 @@ int CDECL sad_4(pixel * piOrg, intptr_t return horizontal_add(extend_low(sum)); } -template<int lx, int ly> +template<int ly> int CDECL sad_8(pixel * piOrg, intptr_t strideOrg, pixel * piCur, intptr_t strideCur) { Vec8s sum(0); - for (int row = 0; row < ly; row++) { - for (int col = 0; col < lx; col += 8) - { - Vec8s m1, n1; - m1.load_a(piOrg + col); - n1.load(piCur + col); - sum += abs(m1 - n1); - } + Vec8s m1, n1; + m1.load_a(piOrg); + n1.load(piCur); + sum += abs(m1 - n1); piOrg += strideOrg; piCur += strideCur; } - return horizontal_add_x(sum); } -template<int lx, int ly> +template<int ly> +int CDECL sad_8x16(pixel * piOrg, intptr_t strideOrg, pixel * piCur, intptr_t strideCur) +{ + int sum = 0; + for (int row = 0; row < ly; row += 16) + sum += sad_8<16>(piOrg + row * strideOrg, strideOrg, piCur + row * strideCur, strideCur); + return sum; +} + +template<int ly> int CDECL sad_16(pixel * piOrg, intptr_t strideOrg, pixel * piCur, intptr_t strideCur) { Vec16s sum(0); - for (int row = 0; row < ly; row++) { - for (int col = 0; col < lx; col += 16) - { - Vec16s m1, n1; - m1.load_a(piOrg + col); - n1.load(piCur + col); - sum += abs(m1 - n1); - } + Vec16s m1, n1; + m1.load_a(piOrg); + n1.load(piCur); + sum += abs(m1 - n1); piOrg += strideOrg; piCur += strideCur; @@ -91,20 +91,29 @@ int CDECL sad_16(pixel * piOrg, intptr_t } template<int lx, int ly> -int CDECL sad_16x16(pixel * piOrg, intptr_t strideOrg, pixel * piCur, intptr_t strideCur) +int CDECL sad_32(pixel * piOrg, intptr_t strideOrg, pixel * piCur, intptr_t strideCur) { - int sum = 0; - - for (int row = 0; row < ly; row += 16) + Vec16s sum(0); + for (int row = 0; row < ly; row++) { - for (int col = 0; col < lx; col += 16) + for (int col = 0; col < lx; col += 32) { - sum += sad_16<16, 16>(piOrg + row * strideOrg + col, strideOrg, - piCur + row * strideCur + col, strideCur); + Vec16s m1, n1; + m1.load(piOrg + col); + n1.load(piCur + col); + sum += abs(m1 - n1); + Vec16s m2, n2; + m2.load(piOrg + col + 16); + n2.load(piCur + col + 16); + sum += abs(m2 - n2); } - } - return sum; + piOrg += strideOrg; + piCur += strideCur; + } + return horizontal_add_x(extend_low(sum)) + + horizontal_add_x(extend_high(sum)); + } int CDECL satd_4x4(pixel * piOrg, intptr_t iStrideOrg, pixel * piCur, intptr_t iStrideCur) @@ -175,23 +184,6 @@ int CDECL satd_4x4(pixel * piOrg, intptr return satd; } -template<int lx, int ly> -int CDECL satd(pixel * piOrg, intptr_t strideOrg, pixel * piCur, intptr_t strideCur) -{ - int uiSum = 0; - - for (int row = 0; row < ly; row += 4) - { - for (int col = 0; col < lx; col += 4) - { - uiSum += satd_4x4(piOrg + strideOrg * row + col, strideOrg, - piCur + strideCur * row + col, strideCur); - } - } - - return uiSum; -} - int CDECL sa8d_8x8(pixel * piOrg, intptr_t iStrideOrg, pixel * piCur, intptr_t iStrideCur) { ALIGN_VAR_16(short, m2[8][8]); @@ -317,8 +309,6 @@ int CDECL sa8d_8x8(pixel * piOrg, intptr v6 = abs(t1); v7 = abs(t2); -#if HIGH_BIT_DEPTH - Vec4i s0, s1, s2, s3, s4, s5, s6, s7, s8; s0 = extend_low(v0); s1 = extend_high(v0); @@ -355,56 +345,173 @@ int CDECL sa8d_8x8(pixel * piOrg, intptr s0 = (s0 + s1) + (s2 + s3) + (s4 + s5) + (s6 + s7); satd = horizontal_add_x(s0); -#else /* if HIGH_BIT_DEPTH */ - v0 = v0 + v1; - v2 = v2 + v3; - v0 = v0 + v2; - - v4 = v4 + v5; - v6 = v6 + v7; - v4 = v4 + v6; - - v0 = v0 + v4; - - satd = horizontal_add_x(v0); -#endif /* if HIGH_BIT_DEPTH */ } return (satd + 2) >> 2; } +#else + +/* intrinsics for when pixel type is uint8_t */ + +template<int ly> +int CDECL sad_4(pixel * piOrg, intptr_t strideOrg, pixel * piCur, intptr_t strideCur) +{ + Vec8s sum(0); + + for (int row = 0; row < ly; row++) + { + Vec16uc m1, n1; + m1.fromUint32(*(uint32_t*)piOrg); + n1.fromUint32(*(uint32_t*)piCur); + sum += Vec8s(m1.sad(n1)); + + piOrg += strideOrg; + piCur += strideCur; + } + + return horizontal_add(sum); +} + +template<int ly> +int CDECL sad_8(pixel * piOrg, intptr_t strideOrg, pixel * piCur, intptr_t strideCur) +{ + Vec8s sum(0); + for (int row = 0; row < ly; row++) + { + Vec16uc m1, n1; + m1.load(piOrg); + n1.load(piCur); + sum += Vec8s(m1.sad(n1)); + + piOrg += strideOrg; + piCur += strideCur; + } + return sum[0]; +} + +template<int ly> +int CDECL sad_8x16(pixel * piOrg, intptr_t strideOrg, pixel * piCur, intptr_t strideCur) +{ + /* groups of 8x16 blocks, upcasting sum from short to int often enough to avoid overflow */ + int sum = 0; + for (int row = 0; row < ly; row += 16) + sum += sad_8<16>(piOrg + row * strideOrg, strideOrg, piCur + row * strideCur, strideCur); + return sum; +} + +template<int ly> +int CDECL sad_16(pixel * piOrg, intptr_t strideOrg, pixel * piCur, intptr_t strideCur) +{ + Vec8s sum(0); + for (int row = 0; row < ly; row++) + { + Vec16uc m1, n1; + m1.load_a(piOrg); + n1.load(piCur); + sum += Vec8s(m1.sad(n1)); + + piOrg += strideOrg; + piCur += strideCur; + } + return horizontal_add_x(sum); +} + +template<int lx, int ly> +int CDECL sad_32(pixel * piOrg, intptr_t strideOrg, pixel * piCur, intptr_t strideCur) +{ + // TODO: AVX2 + int sum = 0; + for (int row = 0; row < ly; row++) + { + Vec8s sad(0); + for (int col = 0; col < lx; col += 32) + { + Vec16uc m1, n1; + m1.load_a(piOrg + col); + n1.load(piCur + col); + sad += Vec8s(m1.sad(n1)); + Vec16uc m2, n2; + m2.load_a(piOrg + col + 16); + n2.load(piCur + col + 16); + sad += Vec8s(m2.sad(n2)); + } + + piOrg += strideOrg; + piCur += strideCur; + sum += horizontal_add_x(sad); + } + return sum; +} + +#endif + +template<int lx, int ly> +int CDECL sad_16x16(pixel * piOrg, intptr_t strideOrg, pixel * piCur, intptr_t strideCur) +{ + int sum = 0; + for (int row = 0; row < ly; row += 16) + { + for (int col = 0; col < lx; col += 16) + { + sum += sad_16<16>(piOrg + row * strideOrg + col, strideOrg, + piCur + row * strideCur + col, strideCur); + } + } + return sum; +} + +template<int lx, int ly> +int CDECL satd(pixel * piOrg, intptr_t strideOrg, pixel * piCur, intptr_t strideCur) +{ + int uiSum = 0; + + for (int row = 0; row < ly; row += 4) + { + for (int col = 0; col < lx; col += 4) + { + uiSum += satd_4x4(piOrg + strideOrg * row + col, strideOrg, + piCur + strideCur * row + col, strideCur); + } + } + + return uiSum; +} + void Setup_Vec_PixelPrimitives(EncoderPrimitives &p) { - p.sad[PARTITION_4x4] = sad_4<4, 4>; - p.sad[PARTITION_4x8] = sad_4<4, 8>; - p.sad[PARTITION_8x4] = sad_8<8, 4>; - p.sad[PARTITION_8x8] = sad_8<8, 8>; - p.sad[PARTITION_16x4] = sad_16<16, 4>; - p.sad[PARTITION_4x16] = sad_4<4, 16>; - p.sad[PARTITION_16x8] = sad_16<16, 8>; - p.sad[PARTITION_8x16] = sad_8<8, 16>; - p.sad[PARTITION_16x16] = sad_16<16, 16>; - p.sad[PARTITION_4x32] = sad_4<4, 32>; - p.sad[PARTITION_32x4] = sad_16<32, 4>; - p.sad[PARTITION_8x32] = sad_8<8, 32>; - p.sad[PARTITION_32x8] = sad_16<32, 8>; - p.sad[PARTITION_16x32] = sad_16<16, 32>; - p.sad[PARTITION_32x16] = sad_16<32, 16>; + p.sad[PARTITION_4x4] = sad_4<4>; + p.sad[PARTITION_4x8] = sad_4<8>; + p.sad[PARTITION_8x4] = sad_8<4>; + p.sad[PARTITION_8x8] = sad_8<8>; + p.sad[PARTITION_16x4] = sad_16<4>; + p.sad[PARTITION_4x16] = sad_4<16>; + p.sad[PARTITION_16x8] = sad_16<8>; + p.sad[PARTITION_8x16] = sad_8<16>; + p.sad[PARTITION_16x16] = sad_16<16>; + p.sad[PARTITION_4x32] = sad_4<32>; + p.sad[PARTITION_32x4] = sad_32<32, 4>; + p.sad[PARTITION_8x32] = sad_8x16<32>; + p.sad[PARTITION_32x8] = sad_32<32, 8>; + p.sad[PARTITION_16x32] = sad_16x16<16, 32>; + p.sad[PARTITION_32x16] = sad_16x16<32, 16>; p.sad[PARTITION_32x32] = sad_16x16<32, 32>; - p.sad[PARTITION_4x64] = sad_4<4, 64>; - p.sad[PARTITION_64x4] = sad_16<64, 4>; - p.sad[PARTITION_64x8] = sad_16<64, 8>; - p.sad[PARTITION_8x64] = sad_8<8, 64>; + p.sad[PARTITION_4x64] = sad_4<64>; + p.sad[PARTITION_64x4] = sad_32<64, 4>; + p.sad[PARTITION_64x8] = sad_32<64, 8>; + p.sad[PARTITION_8x64] = sad_8x16<64>; p.sad[PARTITION_16x64] = sad_16x16<16, 64>; p.sad[PARTITION_64x16] = sad_16x16<64, 16>; p.sad[PARTITION_32x64] = sad_16x16<32, 64>; p.sad[PARTITION_64x32] = sad_16x16<64, 32>; p.sad[PARTITION_64x64] = sad_16x16<64, 64>; +#if HIGH_BIT_DEPTH p.satd[PARTITION_4x4] = satd_4x4; p.satd[PARTITION_4x8] = satd<4, 8>; p.satd[PARTITION_16x4] = satd<16, 4>; // p.satd[PARTITION_8x4] = satd<8, 4>; // slower than SWAR C version - p.sa8d_8x8 = sa8d_8x8; +#else +#endif }
--- a/source/encoder/x86/asm-primitives.cpp Wed Apr 24 11:58:14 2013 +0530 +++ b/source/encoder/x86/asm-primitives.cpp Wed Apr 24 12:00:28 2013 +0530 @@ -77,7 +77,9 @@ void Setup_Assembly_Primitives(EncoderPr { if (cpuid >= 1) { -// INIT7( sad, _mmx2 ); +#if 0 + INIT7( sad, _mmx2 ); +#endif INIT8( satd, _mmx2 ); // Intra predictions max out at 32x32 (but subpel refine can use larger blocks) @@ -85,17 +87,19 @@ void Setup_Assembly_Primitives(EncoderPr p.satd[PARTITION_32x8] = cmp<32, 8, 16, 8, x264_pixel_satd_16x8_mmx2>; p.satd[PARTITION_32x32] = cmp<32, 32, 16, 16, x264_pixel_satd_16x16_mmx2>; +#if 0 // For large CU motion search -// p.sad[PARTITION_32x32] = cmp<32, 32, 16, 16, x264_pixel_sad_16x16_mmx2>; -// p.sad[PARTITION_64x32] = cmp<64, 32, 16, 16, x264_pixel_sad_16x16_mmx2>; -// p.sad[PARTITION_32x64] = cmp<32, 64, 16, 16, x264_pixel_sad_16x16_mmx2>; -// p.sad[PARTITION_64x64] = cmp<64, 64, 16, 16, x264_pixel_sad_16x16_mmx2>; + p.sad[PARTITION_32x32] = cmp<32, 32, 16, 16, x264_pixel_sad_16x16_mmx2>; + p.sad[PARTITION_64x32] = cmp<64, 32, 16, 16, x264_pixel_sad_16x16_mmx2>; + p.sad[PARTITION_32x64] = cmp<32, 64, 16, 16, x264_pixel_sad_16x16_mmx2>; + p.sad[PARTITION_64x64] = cmp<64, 64, 16, 16, x264_pixel_sad_16x16_mmx2>; +#endif } #if 0 if (cpuid >= 2) { p.satd[PARTITION_4x16] = x264_pixel_satd_4x16_sse2; - //p.sa8d_8x8 = x264_pixel_sa8d_8x8_sse2; + p.sa8d_8x8 = x264_pixel_sa8d_8x8_sse2; p.sa8d_16x16 = x264_pixel_sa8d_16x16_sse2; p.sad[PARTITION_16x16] = x264_pixel_sad_16x16_sse2; p.sad[PARTITION_16x8] = x264_pixel_sad_16x8_sse2; @@ -109,13 +113,13 @@ void Setup_Assembly_Primitives(EncoderPr } if (cpuid >= 3) { - //p.sa8d_8x8 = x264_pixel_sa8d_8x8_ssse3; + p.sa8d_8x8 = x264_pixel_sa8d_8x8_ssse3; p.sa8d_16x16 = x264_pixel_sa8d_16x16_ssse3; } if (cpuid >= 4) { p.satd[PARTITION_4x16] = x264_pixel_satd_4x16_sse4; - //p.sa8d_8x8 = x264_pixel_sa8d_8x8_sse4; + p.sa8d_8x8 = x264_pixel_sa8d_8x8_sse4; p.sa8d_16x16 = x264_pixel_sa8d_16x16_sse4; } if (cpuid == 7)
--- a/source/input/input.h Wed Apr 24 11:58:14 2013 +0530 +++ b/source/input/input.h Wed Apr 24 12:00:28 2013 +0530 @@ -25,8 +25,6 @@ #define _INPUT_H_ #include "x265.h" -#include <stdint.h> -#include <iostream> namespace x265 { // private x265 namespace @@ -63,7 +61,7 @@ public: virtual bool isFail() const = 0; - virtual int guessFrameCount() const = 0; + virtual int guessFrameCount() = 0; }; }
--- a/source/input/y4m.cpp Wed Apr 24 11:58:14 2013 +0530 +++ b/source/input/y4m.cpp Wed Apr 24 12:00:28 2013 +0530 @@ -27,148 +27,134 @@ #include <string.h> using namespace x265; - -#define Y4M_FRAME_MAGIC 5 // "FRAME" +using namespace std; Y4MInput::Y4MInput(const char *filename) { - fp = fopen(filename, "rb"); - if (fp) + ifs.open(filename, ios::binary | ios::in); + if (!ifs.fail()) parseHeader(); - buf = new uint8_t[3 * width * height / 2]; + buf = new char[3 * width * height / 2]; } Y4MInput::~Y4MInput() { - if (fp) fclose(fp); + ifs.close(); if (buf) delete[] buf; } -#if _MSC_VER -#pragma warning(disable: 4127) -#endif void Y4MInput::parseHeader() { - char source[5]; int t_width = 0; int t_height = 0; int t_rateNumerator = 0; int t_rateDenominator = 0; - while (1) + while (ifs) { - source[0] = 0x0; + // Skip Y4MPEG string + char byte = ifs.get(); + while (!ifs.eof() && (byte != ' ') && (byte != '\n')) + byte = ifs.get(); - while ((source[0] != 0x20) && (source[0] != 0x0a)) - { - if (fread(&source[0], 1, 1, fp) == 0) - { - break; - } - } - - if (source[0] == 0x00) - { - break; - } - - while (source[0] == 0x20) + while (byte == ' ' && ifs) { // read parameter identifier - fread(&source[1], 1, 1, fp); - if (source[1] == 'W') + switch (ifs.get()) { + case 'W': t_width = 0; - while (true) + while (ifs) { - fread(&source[0], 1, 1, fp); + byte = ifs.get(); - if (source[0] == 0x20 || source[0] == 0x0a) + if (byte == ' ' || byte == '\n') { break; } else { - t_width = t_width * 10 + (source[0] - '0'); + t_width = t_width * 10 + (byte - '0'); } } - - continue; - } + break; - if (source[1] == 'H') - { + case 'H': t_height = 0; - while (true) + while (ifs) { - fread(&source[0], 1, 1, fp); - if (source[0] == 0x20 || source[0] == 0x0a) + byte = ifs.get(); + if (byte == ' ' || byte == '\n') { break; } else { - t_height = t_height * 10 + (source[0] - '0'); + t_height = t_height * 10 + (byte - '0'); } } + break; - continue; - } - - if (source[1] == 'F') - { + case 'F': t_rateNumerator = 0; t_rateDenominator = 0; - while (true) + while (ifs) { - fread(&source[0], 1, 1, fp); - if (source[0] == '.') + byte = ifs.get(); + if (byte == '.') { t_rateDenominator = 1; - while (true) + while (ifs) { - fread(&source[0], 1, 1, fp); - if (source[0] == 0x20 || source[0] == 0x10) + byte = ifs.get(); + if (byte == ' ' || byte == '\n') { break; } else { - t_rateNumerator = t_rateNumerator * 10 + (source[0] - '0'); + t_rateNumerator = t_rateNumerator * 10 + (byte - '0'); t_rateDenominator = t_rateDenominator * 10; } } break; } - else if (source[0] == ':') + else if (byte == ':') { - while (true) + while (ifs) { - fread(&source[0], 1, 1, fp); - if (source[0] == 0x20 || source[0] == 0x0a) + byte = ifs.get(); + if (byte == ' ' || byte == '\n') { break; } else - t_rateDenominator = t_rateDenominator * 10 + (source[0] - '0'); + t_rateDenominator = t_rateDenominator * 10 + (byte - '0'); } break; } else { - t_rateNumerator = t_rateNumerator * 10 + (source[0] - '0'); + t_rateNumerator = t_rateNumerator * 10 + (byte - '0'); } } + break; - continue; + default: + while (ifs) + { + // consume this unsupported configuration word + byte = ifs.get(); + if (byte == ' ' || byte == '\n') + break; + } + break; } - - break; } - if (source[0] == 0x0a) + if (byte == '\n') { break; } @@ -182,10 +168,16 @@ void Y4MInput::parseHeader() rateDenom = t_rateDenominator; } -int Y4MInput::guessFrameCount() const +static const char header[] = "FRAME"; + +int Y4MInput::guessFrameCount() { - /* TODO: Get file size, subtract file header, divide by (framesize+frameheader) */ - return 0; + long cur = ifs.tellg(); + ifs.seekg (0, ios::end); + long size = ifs.tellg(); + ifs.seekg (cur, ios::beg); + + return (int) ((size - cur) / ((width * height * 3 / 2) + strlen(header) + 1)); } void Y4MInput::skipFrames(int numFrames) @@ -203,42 +195,33 @@ bool Y4MInput::readPicture(x265_picture& PPAStartCpuEventFunc(read_yuv); /* strip off the FRAME header */ - char header[Y4M_FRAME_MAGIC]; - - if (fread(&header, 1, sizeof(header), fp) < sizeof(header)) - return false; - if (!strncmp(header, "FRAME", Y4M_FRAME_MAGIC)) + char hbuf[sizeof(header)]; + ifs.read(hbuf, strlen(header)); + if (!ifs || strncmp(hbuf, header, strlen(header))) { fprintf(stderr, "Y4M frame header missing\n"); return false; } /* consume bytes up to line feed */ - char byte; - do - { - if (fread(&byte, 1, 1, fp) == 0) - { - fprintf(stderr, "Y4M frame header incomplete\n"); - return false; - } - } - while (byte != '\n'); + char byte = ifs.get(); + while (byte != '\n' && !ifs) + byte = ifs.get(); const size_t count = width * height * 3 / 2; pic.planes[0] = buf; - pic.planes[1] = buf + (width * height); + pic.planes[1] = buf + width * height; - pic.planes[2] = buf + ((width * height) + ((width >> 1) * (height >> 1))); + pic.planes[2] = buf + width * height + ((width * height) >> 2); pic.stride[0] = width; pic.stride[1] = pic.stride[2] = pic.stride[0] >> 1; - size_t bytes = fread(buf, 1, count, fp); + ifs.read(buf, count); PPAStopCpuEventFunc(read_yuv); - return bytes == count; + return ifs.good(); }
--- a/source/input/y4m.h Wed Apr 24 11:58:14 2013 +0530 +++ b/source/input/y4m.h Wed Apr 24 12:00:28 2013 +0530 @@ -25,8 +25,7 @@ #define _Y4M_H_ #include "input.h" -#include <stdio.h> -#include <stdint.h> +#include <fstream> namespace x265 { // x265 private namespace @@ -43,11 +42,9 @@ protected: int height; - uint8_t* buf; + char* buf; - FILE* fp; - - bool eof; + std::ifstream ifs; void parseHeader(); @@ -67,13 +64,13 @@ public: int getHeight() const { return height; } - bool isEof() const { return !!feof(fp); } + bool isEof() const { return ifs.eof(); } - bool isFail() const { return !fp; } + bool isFail() const { return !ifs.is_open(); } void release() { delete this; } - int guessFrameCount() const; + int guessFrameCount(); void skipFrames(int numFrames);
--- a/source/input/yuv.cpp Wed Apr 24 11:58:14 2013 +0530 +++ b/source/input/yuv.cpp Wed Apr 24 12:00:28 2013 +0530 @@ -27,10 +27,11 @@ #include <string.h> using namespace x265; +using namespace std; YUVInput::YUVInput(const char *filename) { - fp = fopen(filename, "rb"); + ifs.open(filename, ios::binary | ios::in); width = height = 0; depth = 8; buf = NULL; @@ -38,14 +39,19 @@ YUVInput::YUVInput(const char *filename) YUVInput::~YUVInput() { - if (fp) fclose(fp); + ifs.close(); if (buf) delete[] buf; } -int YUVInput::guessFrameCount() const +int YUVInput::guessFrameCount() { - /* TODO: Get file size, divide by bufsize */ - return 0; + long cur = ifs.tellg(); + ifs.seekg (0, ios::end); + long size = ifs.tellg(); + ifs.seekg (cur, ios::beg); + int pixelbytes = depth > 8 ? 2 : 1; + + return (size - cur) / (width * height * pixelbytes * 3 / 2); } void YUVInput::skipFrames(int numFrames) @@ -54,7 +60,7 @@ void YUVInput::skipFrames(int numFrames) int framesize = (width * height * 3 / 2) * pixelbytes; - fseek(fp, framesize * numFrames, SEEK_CUR); + ifs.seekg(framesize * numFrames, ios::cur); } // TODO: only supports 4:2:0 chroma sampling @@ -68,7 +74,7 @@ bool YUVInput::readPicture(x265_picture& if (!buf) { - buf = new uint8_t[bufsize]; + buf = new char[bufsize]; } pic.planes[0] = buf; @@ -83,8 +89,8 @@ bool YUVInput::readPicture(x265_picture& pic.stride[1] = pic.stride[2] = pic.stride[0] >> 1; - size_t bytes = fread(buf, 1, bufsize, fp); + ifs.read(buf, bufsize); PPAStopCpuEventFunc(read_yuv); - return bytes == (size_t)bufsize; + return ifs.good(); }
--- a/source/input/yuv.h Wed Apr 24 11:58:14 2013 +0530 +++ b/source/input/yuv.h Wed Apr 24 12:00:28 2013 +0530 @@ -25,8 +25,7 @@ #define _YUV_H_ #include "input.h" -#include <stdio.h> -#include <stdint.h> +#include <fstream> namespace x265 { // private x265 namespace @@ -41,11 +40,9 @@ protected: int depth; - uint8_t* buf; + char* buf; - FILE *fp; - - bool eof; + std::ifstream ifs; public: @@ -63,19 +60,13 @@ public: int getHeight() const { return height; } - bool isEof() const { return !!feof(fp); } + bool isEof() const { return ifs.eof(); } - bool isFail() const { return !fp; } + bool isFail() const { return !ifs.is_open(); } - void release() - { - if (fp) - fclose(fp); + void release() { delete this; } - delete this; - } - - int guessFrameCount() const; + int guessFrameCount(); void skipFrames(int numFrames);
--- a/source/output/output.cpp Wed Apr 24 11:58:14 2013 +0530 +++ b/source/output/output.cpp Wed Apr 24 12:00:28 2013 +0530 @@ -28,11 +28,11 @@ using namespace x265; -Output* Output::Open(const char *fname, int width, int height, int bitdepth) +Output* Output::Open(const char *fname, int width, int height, int bitdepth, int rate) { const char * s = strrchr(fname, '.'); if (s && !strcmp(s, ".y4m")) - return new Y4MOutput(fname, width, height, bitdepth); + return new Y4MOutput(fname, width, height, rate); else return new YUVOutput(fname, width, height, bitdepth); }
--- a/source/output/output.h Wed Apr 24 11:58:14 2013 +0530 +++ b/source/output/output.h Wed Apr 24 12:00:28 2013 +0530 @@ -25,7 +25,6 @@ #define _OUTPUT_H_ #include "x265.h" -#include <stdint.h> namespace x265 { // private x265 namespace @@ -41,7 +40,9 @@ public: Output() {} - static Output* Open(const char *fname, int width, int height, int bitdepth); + static Output* Open(const char *fname, int width, int height, int bitdepth, int rate); + + virtual bool isFail() const = 0; virtual void release() = 0;
--- a/source/output/y4m.cpp Wed Apr 24 11:58:14 2013 +0530 +++ b/source/output/y4m.cpp Wed Apr 24 12:00:28 2013 +0530 @@ -21,37 +21,35 @@ * For more information, contact us at licensing@multicorewareinc.com. *****************************************************************************/ +#include "PPA/ppa.h" #include "output.h" #include "y4m.h" -#include <stdio.h> -#include <assert.h> using namespace x265; +using namespace std; -Y4MOutput::Y4MOutput(const char *filename, int t_width, int t_height, int bitdepth) +Y4MOutput::Y4MOutput(const char *filename, int w, int h, int rate) + : width(w) + , height(h) { - fp = fopen(filename, "wb"); - width = t_width; - height = t_height; - assert(bitdepth == 8); + ofs.open(filename, ios::binary | ios::out); buf = new char[width]; - if (fp) + if (ofs) { - // TODO: need to get frame rate - fprintf(fp, "YUV4MPEG2 W%d H%d F30:1 Ip C420\n", width, height); + ofs << "YUV4MPEG2 W" << width << " H" << height << " F" << rate << ":1 Ip C420\n"; } } Y4MOutput::~Y4MOutput() { - if (fp) fclose(fp); + ofs.close(); if (buf) delete [] buf; } - bool Y4MOutput::writePicture(const x265_picture& pic) { - fprintf(fp, "FRAME\n"); + PPAStartCpuEventFunc(write_yuv); + ofs << "FRAME\n"; if (pic.bitDepth > 8) { @@ -61,7 +59,7 @@ bool Y4MOutput::writePicture(const x265_ { for (int j = 0; j < width; j++) buf[j] = (char) Y[j]; - fwrite(buf, sizeof(char), width, fp); + ofs.write(buf, width); Y += pic.stride[0]; } short *U = (short*)pic.planes[1]; @@ -69,7 +67,7 @@ bool Y4MOutput::writePicture(const x265_ { for (int j = 0; j < width >> 1; j++) buf[j] = (char) U[j]; - fwrite(buf, sizeof(char), width >> 1, fp); + ofs.write(buf, width >> 1); U += pic.stride[1]; } short *V = (short*)pic.planes[2]; @@ -77,7 +75,7 @@ bool Y4MOutput::writePicture(const x265_ { for (int j = 0; j < width >> 1; j++) buf[j] = (char) V[j]; - fwrite(buf, sizeof(char), width >> 1, fp); + ofs.write(buf, width >> 1); V += pic.stride[2]; } } @@ -86,22 +84,23 @@ bool Y4MOutput::writePicture(const x265_ char *Y = (char*)pic.planes[0]; for (int i = 0; i < height; i++) { - fwrite(Y, sizeof(char), width, fp); + ofs.write(Y, width); Y += pic.stride[0]; } char *U = (char*)pic.planes[1]; for (int i = 0; i < height >> 1; i++) { - fwrite(U, sizeof(char), width >> 1, fp); + ofs.write(U, width >> 1); U += pic.stride[1]; } char *V = (char*)pic.planes[2]; for (int i = 0; i < height >> 1; i++) { - fwrite(V, sizeof(char), width >> 1, fp); + ofs.write(V, width >> 1); V += pic.stride[2]; } } + PPAStopCpuEventFunc(write_yuv); return true; }
--- a/source/output/y4m.h Wed Apr 24 11:58:14 2013 +0530 +++ b/source/output/y4m.h Wed Apr 24 12:00:28 2013 +0530 @@ -25,7 +25,7 @@ #define _Y4M_H_ #include "output.h" -#include <stdio.h> +#include <fstream> namespace x265 { // private x265 namespace @@ -38,7 +38,7 @@ protected: int height; - FILE* fp; + std::ofstream ofs; char *buf; @@ -46,10 +46,12 @@ protected: public: - Y4MOutput(const char *filename, int width, int height, int bitdepth); + Y4MOutput(const char *filename, int width, int height, int rate); virtual ~Y4MOutput(); + bool isFail() const { return ofs.fail(); } + void release() { delete this; } bool writePicture(const x265_picture& pic);
--- a/source/output/yuv.cpp Wed Apr 24 11:58:14 2013 +0530 +++ b/source/output/yuv.cpp Wed Apr 24 12:00:28 2013 +0530 @@ -21,28 +21,31 @@ * For more information, contact us at licensing@multicorewareinc.com. *****************************************************************************/ +#include "PPA/ppa.h" #include "output.h" #include "yuv.h" using namespace x265; +using namespace std; -YUVOutput::YUVOutput(const char *filename, int t_width, int t_height, int t_bitdepth) +YUVOutput::YUVOutput(const char *filename, int w, int h, int d) + : width(w) + , height(h) + , depth(d) { - fp = fopen(filename, "wb"); - width = t_width; - height = t_height; - depth = t_bitdepth; + ofs.open(filename, ios::binary | ios::out); buf = new char[width]; } YUVOutput::~YUVOutput() { - if (fp) fclose(fp); + ofs.close(); if (buf) delete [] buf; } bool YUVOutput::writePicture(const x265_picture& pic) { + PPAStartCpuEventFunc(write_yuv); int pixelbytes = (depth > 8) ? 2 : 1; if (pic.bitDepth > 8 && depth == 8) @@ -53,7 +56,7 @@ bool YUVOutput::writePicture(const x265_ { for (int j = 0; j < width; j++) buf[j] = (char) Y[j]; - fwrite(buf, sizeof(char), width, fp); + ofs.write(buf, width); Y += pic.stride[0]; } short *U = (short*)pic.planes[1]; @@ -61,7 +64,7 @@ bool YUVOutput::writePicture(const x265_ { for (int j = 0; j < width >> 1; j++) buf[j] = (char) U[j]; - fwrite(buf, sizeof(char), width >> 1, fp); + ofs.write(buf, width >> 1); U += pic.stride[1]; } short *V = (short*)pic.planes[2]; @@ -69,31 +72,33 @@ bool YUVOutput::writePicture(const x265_ { for (int j = 0; j < width >> 1; j++) buf[j] = (char) V[j]; - fwrite(buf, sizeof(char), width >> 1, fp); + ofs.write(buf, width >> 1); V += pic.stride[2]; } } else { - // encoder gave us byte pixels, write them directly + // encoder pixels same size as output pixels, write them directly char *Y = (char*)pic.planes[0]; for (int i = 0; i < height; i++) { - fwrite(Y, sizeof(char), width * pixelbytes, fp); + ofs.write(Y, width * pixelbytes); Y += pic.stride[0] * pixelbytes; } char *U = (char*)pic.planes[1]; for (int i = 0; i < height >> 1; i++) { - fwrite(U, sizeof(char), (width>>1) * pixelbytes, fp); + ofs.write(U, (width >> 1) * pixelbytes); U += pic.stride[1] * pixelbytes; } char *V = (char*)pic.planes[2]; for (int i = 0; i < height >> 1; i++) { - fwrite(V, sizeof(char), (width>>1) * pixelbytes, fp); + ofs.write(V, (width >> 1) * pixelbytes); V += pic.stride[2] * pixelbytes; } } + + PPAStopCpuEventFunc(write_yuv); return true; }
--- a/source/output/yuv.h Wed Apr 24 11:58:14 2013 +0530 +++ b/source/output/yuv.h Wed Apr 24 12:00:28 2013 +0530 @@ -25,6 +25,7 @@ #define _YUV_H_ #include "output.h" +#include <fstream> #include <stdio.h> namespace x265 { @@ -42,7 +43,7 @@ protected: char *buf; - FILE *fp; + std::ofstream ofs; public: @@ -50,6 +51,8 @@ public: virtual ~YUVOutput(); + bool isFail() const { return ofs.fail(); } + void release() { delete this; } bool writePicture(const x265_picture& pic);
--- a/source/test/testbench.cpp Wed Apr 24 11:58:14 2013 +0530 +++ b/source/test/testbench.cpp Wed Apr 24 12:00:28 2013 +0530 @@ -49,7 +49,8 @@ int main(int argc, char *argv[]) } int seed = (int)time(NULL); - printf("Using random seed %X\n", seed); + const char *bpp[] = { "8bpp", "16bpp" }; + printf("Using random seed %X %s\n", seed, bpp[HIGH_BIT_DEPTH]); srand(seed); PixelHarness HPixel;
--- a/source/test/testpool.cpp Wed Apr 24 11:58:14 2013 +0530 +++ b/source/test/testpool.cpp Wed Apr 24 12:00:28 2013 +0530 @@ -135,7 +135,7 @@ void MD5Frame::ProcessRow(int rownum) // Called by worker thread RowData &curRow = this->row[rownum]; - assert(rownum < this->numrows); + assert(rownum < this->numrows && rownum >= 0); assert(curRow.curCol < this->numcols); while (curRow.curCol < this->numcols)
--- a/source/x265cfg.cpp Wed Apr 24 11:58:14 2013 +0530 +++ b/source/x265cfg.cpp Wed Apr 24 12:00:28 2013 +0530 @@ -297,24 +297,17 @@ Bool TAppEncCfg::parseCfg(Int argc, Char ("ReconFile,o", cfg_ReconFile, string(""), "Reconstructed YUV output file name") ("SourceWidth,-wdt", m_iSourceWidth, 0, "Source picture width") ("SourceHeight,-hgt", m_iSourceHeight, 0, "Source picture height") - ("InputBitDepth", m_inputBitDepthY, 8, "Bit-depth of input file") - ("OutputBitDepth", m_outputBitDepthY, 0, "Bit-depth of output file (default:InternalBitDepth)") - ("InternalBitDepth", m_internalBitDepthY, 0, "Bit-depth the codec operates at. (default:InputBitDepth)" - "If different to InputBitDepth, source data will be converted") - ("InputBitDepthC", m_inputBitDepthC, 0, "As per InputBitDepth but for chroma component. (default:InputBitDepth)") - ("OutputBitDepthC", m_outputBitDepthC, 0, "As per OutputBitDepth but for chroma component. (default:InternalBitDepthC)") - ("InternalBitDepthC", m_internalBitDepthC, 0, "As per InternalBitDepth but for chroma component. (default:IntrenalBitDepth)") - ("ConformanceMode", m_conformanceMode, 0, "Window conformance mode (0: no window, 1:automatic padding, 2:padding, 3:conformance") - ("HorizontalPadding,-pdx", m_aiPad[0], 0, "Horizontal source padding for conformance window mode 2") - ("VerticalPadding,-pdy", m_aiPad[1], 0, "Vertical source padding for conformance window mode 2") - ("ConfLeft", m_confLeft, 0, "Left offset for window conformance mode 3") - ("ConfRight", m_confRight, 0, "Right offset for window conformance mode 3") - ("ConfTop", m_confTop, 0, "Top offset for window conformance mode 3") - ("ConfBottom", m_confBottom, 0, "Bottom offset for window conformance mode 3") ("FrameRate,-fr", m_iFrameRate, 0, "Frame rate") ("FrameSkip,-fs", m_FrameSkip, 0u, "Number of frames to skip at start of input YUV") ("FramesToBeEncoded,f", m_framesToBeEncoded, 0, "Number of frames to be encoded (default=all)") +#if HIGH_BIT_DEPTH + ("InputBitDepth", m_inputBitDepth, 8, "Bit-depth of input file") + ("OutputBitDepth", m_outputBitDepth, 0, "Bit-depth of output file (default:InternalBitDepth)") + ("InternalBitDepth", m_internalBitDepth, 0, "Bit-depth the codec operates at. (default:InputBitDepth)" + "If different to InputBitDepth, source data will be converted") +#endif + // Profile and level ("Profile", m_profile, Profile::NONE, "Profile to be used when encoding (Incomplete)") ("Level", m_level, Level::NONE, "Level limit to be used, eg 5.1 (Incomplete)") @@ -632,36 +625,57 @@ Bool TAppEncCfg::parseCfg(Int argc, Char printf("Unable to open source file\n"); return 1; } - printf("Input File : %s\n", cfg_InputFile.c_str()); if (m_input->getWidth()) { m_iSourceWidth = m_input->getWidth(); m_iSourceHeight = m_input->getHeight(); m_iFrameRate = (int)m_input->getRate(); - m_inputBitDepthC = m_inputBitDepthY = 8; +#if HIGH_BIT_DEPTH + m_inputBitDepth = 8; +#endif } else { m_input->setDimensions(m_iSourceWidth, m_iSourceHeight); - m_input->setBitDepth(m_inputBitDepthY); +#if HIGH_BIT_DEPTH + m_input->setBitDepth(m_inputBitDepth); +#else + m_input->setBitDepth(8); +#endif } +#if HIGH_BIT_DEPTH /* rules for input, output and internal bitdepths as per help text */ - if (!m_internalBitDepthY) { m_internalBitDepthY = m_inputBitDepthY; } - if (!m_internalBitDepthC) { m_internalBitDepthC = m_internalBitDepthY; } - if (!m_inputBitDepthC) { m_inputBitDepthC = m_inputBitDepthY; } - if (!m_outputBitDepthY) { m_outputBitDepthY = m_internalBitDepthY; } - if (!m_outputBitDepthC) { m_outputBitDepthC = m_internalBitDepthC; } + if (!m_internalBitDepth) { m_internalBitDepth = m_inputBitDepth; } + if (!m_outputBitDepth) { m_outputBitDepth = m_internalBitDepth; } +#endif if (m_FrameSkip && m_input) { m_input->skipFrames(m_FrameSkip); } + + int numRemainingFrames = m_input->guessFrameCount(); + + m_framesToBeEncoded = m_framesToBeEncoded ? min(m_framesToBeEncoded, numRemainingFrames) : numRemainingFrames; + + printf("Input File : %s (%d frames)\n", cfg_InputFile.c_str(), numRemainingFrames); + if (!cfg_ReconFile.empty()) { printf("Reconstruction File : %s\n", cfg_ReconFile.c_str()); - m_recon = x265::Output::Open(cfg_ReconFile.c_str(), m_iSourceWidth, m_iSourceHeight, m_outputBitDepthY); +#if HIGH_BIT_DEPTH + m_recon = x265::Output::Open(cfg_ReconFile.c_str(), m_iSourceWidth, m_iSourceHeight, m_outputBitDepth, m_iFrameRate); +#else + m_recon = x265::Output::Open(cfg_ReconFile.c_str(), m_iSourceWidth, m_iSourceHeight, 8, m_iFrameRate); +#endif + if (m_recon->isFail()) + { + printf("Unable to write reconstruction file\n"); + m_recon->release(); + m_recon = 0; + } } Char *pColumnWidth = cfg_ColumnWidth.empty() ? NULL : strdup(cfg_ColumnWidth.c_str()); @@ -733,67 +747,6 @@ Bool TAppEncCfg::parseCfg(Int argc, Char #endif m_scalingListFile = cfg_ScalingListFile.empty() ? NULL : strdup(cfg_ScalingListFile.c_str()); - // TODO:ChromaFmt assumes 4:2:0 below - switch (m_conformanceMode) - { - case 0: - { - // no conformance or padding - m_confLeft = m_confRight = m_confTop = m_confBottom = 0; - m_aiPad[1] = m_aiPad[0] = 0; - break; - } - case 1: - { - // automatic padding to minimum CU size - Int minCuSize = m_uiMaxCUHeight >> (m_uiMaxCUDepth - 1); - if (m_iSourceWidth % minCuSize) - { - m_aiPad[0] = m_confRight = ((m_iSourceWidth / minCuSize) + 1) * minCuSize - m_iSourceWidth; - m_iSourceWidth += m_confRight; - } - if (m_iSourceHeight % minCuSize) - { - m_aiPad[1] = m_confBottom = ((m_iSourceHeight / minCuSize) + 1) * minCuSize - m_iSourceHeight; - m_iSourceHeight += m_confBottom; - } - if (m_aiPad[0] % TComSPS::getWinUnitX(CHROMA_420) != 0) - { - fprintf(stderr, "Error: picture width is not an integer multiple of the specified chroma subsampling\n"); - exit(EXIT_FAILURE); - } - if (m_aiPad[1] % TComSPS::getWinUnitY(CHROMA_420) != 0) - { - fprintf(stderr, "Error: picture height is not an integer multiple of the specified chroma subsampling\n"); - exit(EXIT_FAILURE); - } - break; - } - case 2: - { - //padding - m_iSourceWidth += m_aiPad[0]; - m_iSourceHeight += m_aiPad[1]; - m_confRight = m_aiPad[0]; - m_confBottom = m_aiPad[1]; - break; - } - case 3: - { - // conformance - if ((m_confLeft == 0) && (m_confRight == 0) && (m_confTop == 0) && (m_confBottom == 0)) - { - fprintf(stderr, "Warning: Conformance window enabled, but all conformance window parameters set to zero\n"); - } - if ((m_aiPad[1] != 0) || (m_aiPad[0] != 0)) - { - fprintf(stderr, "Warning: Conformance window enabled, padding parameters will be ignored\n"); - } - m_aiPad[1] = m_aiPad[0] = 0; - break; - } - } - // allocate slice-based dQP values m_aidQP = new Int[m_framesToBeEncoded + m_iGOPSize + 1]; ::memset(m_aidQP, 0, sizeof(Int) * (m_framesToBeEncoded + m_iGOPSize + 1)); @@ -990,15 +943,20 @@ Void TAppEncCfg::xCheckParameter() Bool check_failed = false; /* abort if there is a fatal configuration problem */ #define xConfirmPara(a, b) check_failed |= confirmPara(a, b) // check range of parameters - xConfirmPara(m_inputBitDepthY < 8, "InputBitDepth must be at least 8"); - xConfirmPara(m_inputBitDepthC < 8, "InputBitDepthC must be at least 8"); +#if HIGH_BIT_DEPTH + xConfirmPara(m_inputBitDepth < 8, "InputBitDepth must be at least 8"); + xConfirmPara(m_inputBitDepth < 8, "InputBitDepth must be at least 8"); + xConfirmPara(m_outputBitDepth > m_internalBitDepth, "OutputBitDepth must be less than or equal to InternalBitDepth"); + xConfirmPara(m_iQP < -6 * (m_internalBitDepth - 8) || m_iQP > 51, "QP exceeds supported range (-QpBDOffsety to 51)"); +#else + xConfirmPara(m_iQP < 0 || m_iQP > 51, "QP exceeds supported range (-QpBDOffsety to 51)"); +#endif xConfirmPara(m_iFrameRate <= 0, "Frame rate must be more than 1"); xConfirmPara(m_framesToBeEncoded <= 0, "Total Number Of Frames encoded must be more than 0"); xConfirmPara(m_iGOPSize < 1, "GOP Size must be greater or equal to 1"); xConfirmPara(m_iGOPSize > 1 && m_iGOPSize % 2, "GOP Size must be a multiple of 2, if GOP Size is greater than 1"); xConfirmPara((m_iIntraPeriod > 0 && m_iIntraPeriod < m_iGOPSize) || m_iIntraPeriod == 0, "Intra period must be more than GOP size, or -1 , not 0"); xConfirmPara(m_iDecodingRefreshType < 0 || m_iDecodingRefreshType > 2, "Decoding Refresh Type must be equal to 0, 1 or 2"); - xConfirmPara(m_iQP < -6 * (m_internalBitDepthY - 8) || m_iQP > 51, "QP exceeds supported range (-QpBDOffsety to 51)"); xConfirmPara(m_loopFilterBetaOffsetDiv2 < -13 || m_loopFilterBetaOffsetDiv2 > 13, "Loop Filter Beta Offset div. 2 exceeds supported range (-13 to 13)"); xConfirmPara(m_loopFilterTcOffsetDiv2 < -13 || m_loopFilterTcOffsetDiv2 > 13, "Loop Filter Tc Offset div. 2 exceeds supported range (-13 to 13)"); xConfirmPara(m_iFastSearch < 0 || m_iFastSearch > 2, "Fast Search Mode is not supported value (0:Full search 1:Diamond 2:PMVFAST)"); @@ -1072,14 +1030,6 @@ Void TAppEncCfg::xCheckParameter() xConfirmPara(m_iSourceWidth % TComSPS::getWinUnitX(CHROMA_420) != 0, "Picture width must be an integer multiple of the specified chroma subsampling"); xConfirmPara(m_iSourceHeight % TComSPS::getWinUnitY(CHROMA_420) != 0, "Picture height must be an integer multiple of the specified chroma subsampling"); - xConfirmPara(m_aiPad[0] % TComSPS::getWinUnitX(CHROMA_420) != 0, "Horizontal padding must be an integer multiple of the specified chroma subsampling"); - xConfirmPara(m_aiPad[1] % TComSPS::getWinUnitY(CHROMA_420) != 0, "Vertical padding must be an integer multiple of the specified chroma subsampling"); - - xConfirmPara(m_confLeft % TComSPS::getWinUnitX(CHROMA_420) != 0, "Left conformance window offset must be an integer multiple of the specified chroma subsampling"); - xConfirmPara(m_confRight % TComSPS::getWinUnitX(CHROMA_420) != 0, "Right conformance window offset must be an integer multiple of the specified chroma subsampling"); - xConfirmPara(m_confTop % TComSPS::getWinUnitY(CHROMA_420) != 0, "Top conformance window offset must be an integer multiple of the specified chroma subsampling"); - xConfirmPara(m_confBottom % TComSPS::getWinUnitY(CHROMA_420) != 0, "Bottom conformance window offset must be an integer multiple of the specified chroma subsampling"); - // max CU width and height should be power of 2 UInt ui = m_uiMaxCUWidth; while (ui) @@ -1608,19 +1558,23 @@ Void TAppEncCfg::xSetGlobal() g_uiMaxCUDepth = m_uiMaxCUDepth; // set internal bit-depth and constants - g_bitDepthY = m_internalBitDepthY; - g_bitDepthC = m_internalBitDepthC; +#if HIGH_BIT_DEPTH + g_bitDepthY = m_internalBitDepth; + g_bitDepthC = m_internalBitDepth; - g_uiPCMBitDepthLuma = m_bPCMInputBitDepthFlag ? m_inputBitDepthY : m_internalBitDepthY; - g_uiPCMBitDepthChroma = m_bPCMInputBitDepthFlag ? m_inputBitDepthC : m_internalBitDepthC; + g_uiPCMBitDepthLuma = m_bPCMInputBitDepthFlag ? m_inputBitDepth : m_internalBitDepth; + g_uiPCMBitDepthChroma = m_bPCMInputBitDepthFlag ? m_inputBitDepth : m_internalBitDepth; +#else + g_bitDepthY = g_bitDepthC = 8; + g_uiPCMBitDepthLuma = g_uiPCMBitDepthChroma = 8; +#endif } Void TAppEncCfg::xPrintParameter() { printf("\n"); printf("Bitstream File : %s\n", m_pchBitstreamFile); - printf("Real Format : %dx%d %dHz\n", m_iSourceWidth - m_confLeft - m_confRight, m_iSourceHeight - m_confTop - m_confBottom, m_iFrameRate); - printf("Internal Format : %dx%d %dHz\n", m_iSourceWidth, m_iSourceHeight, m_iFrameRate); + printf("Format : %dx%d %dHz\n", m_iSourceWidth, m_iSourceHeight, m_iFrameRate); printf("Frame index : %u - %d (%d frames)\n", m_FrameSkip, m_FrameSkip + m_framesToBeEncoded - 1, m_framesToBeEncoded); printf("CU size / depth : %d / %d\n", m_uiMaxCUWidth, m_uiMaxCUDepth); printf("RQT trans. size (min / max) : %d / %d\n", 1 << m_uiQuadtreeTULog2MinSize, 1 << m_uiQuadtreeTULog2MaxSize); @@ -1638,7 +1592,9 @@ Void TAppEncCfg::xPrintParameter() printf("QP adaptation : %d (range=%d)\n", m_bUseAdaptiveQP, (m_bUseAdaptiveQP ? m_iQPAdaptationRange : 0)); printf("GOP size : %d\n", m_iGOPSize); - printf("Internal bit depth : (Y:%d, C:%d)\n", m_internalBitDepthY, m_internalBitDepthC); +#if HIGH_BIT_DEPTH + printf("Internal bit depth : %d\n", m_internalBitDepth); +#endif printf("PCM sample bit depth : (Y:%d, C:%d)\n", g_uiPCMBitDepthLuma, g_uiPCMBitDepthChroma); #if RATE_CONTROL_LAMBDA_DOMAIN printf("RateControl : %d\n", m_RCEnableRateControl); @@ -1663,7 +1619,9 @@ Void TAppEncCfg::xPrintParameter() printf("\n"); printf("TOOL CFG: "); - printf("IBD:%d ", g_bitDepthY > m_inputBitDepthY || g_bitDepthC > m_inputBitDepthC); +#if HIGH_BIT_DEPTH + printf("IBD:%d ", g_bitDepthY > m_inputBitDepth || g_bitDepthC > m_inputBitDepth); +#endif printf("HAD:%d ", m_bUseHADME); printf("SRD:%d ", m_bUseSBACRD); printf("RDQ:%d ", m_useRDOQ);
--- a/source/x265cfg.h Wed Apr 24 11:58:14 2013 +0530 +++ b/source/x265cfg.h Wed Apr 24 12:00:28 2013 +0530 @@ -66,18 +66,13 @@ protected: UInt m_FrameSkip; ///< number of skipped frames from the beginning Int m_iSourceWidth; ///< source width in pixel Int m_iSourceHeight; ///< source height in pixel - Int m_conformanceMode; - Int m_confLeft; - Int m_confRight; - Int m_confTop; - Int m_confBottom; Int m_framesToBeEncoded; ///< number of encoded frames - Int m_aiPad[2]; ///< number of padded pixels for width and height // profile/level Profile::Name m_profile; Level::Tier m_levelTier; Level::Name m_level; + #if L0046_CONSTRAINT_FLAGS Bool m_progressiveSourceFlag; Bool m_interlacedSourceFlag; @@ -122,7 +117,7 @@ protected: Bool m_bUseAdaptiveQP; ///< Flag for enabling QP adaptation based on a psycho-visual model Int m_iQPAdaptationRange; ///< dQP range by QP adaptation - Int m_maxTempLayer; ///< Max temporal layer + Int m_maxTempLayer; ///< Max temporal layer // coding unit (CU) definition UInt m_uiMaxCUWidth; ///< max. CU width in pixel @@ -136,13 +131,12 @@ protected: UInt m_uiQuadtreeTUMaxDepthInter; UInt m_uiQuadtreeTUMaxDepthIntra; +#if HIGH_BIT_DEPTH // coding tools (bit-depth) - Int m_inputBitDepthY; ///< bit-depth of input file (luma component) - Int m_inputBitDepthC; ///< bit-depth of input file (chroma component) - Int m_outputBitDepthY; ///< bit-depth of output file (luma component) - Int m_outputBitDepthC; ///< bit-depth of output file (chroma component) - Int m_internalBitDepthY; ///< bit-depth codec operates at in luma (input/output files will be converted) - Int m_internalBitDepthC; ///< bit-depth codec operates at in chroma (input/output files will be converted) + Int m_inputBitDepth; ///< bit-depth of input file (luma component) + Int m_outputBitDepth; ///< bit-depth of output file (luma component) + Int m_internalBitDepth; ///< bit-depth codec operates at in luma (input/output files will be converted) +#endif // coding tools (PCM bit-depth) Bool m_bPCMInputBitDepthFlag; ///< 0: PCM bit-depth is internal bit-depth. 1: PCM bit-depth is input bit-depth. @@ -155,9 +149,9 @@ protected: Bool m_saoLcuBasedOptimization; ///< SAO LCU-based optimization // coding tools (loop filter) Bool m_bLoopFilterDisable; ///< flag for using deblocking filter - Bool m_loopFilterOffsetInPPS; ///< offset for deblocking filter in 0 = slice header, 1 = PPS - Int m_loopFilterBetaOffsetDiv2; ///< beta offset for deblocking filter - Int m_loopFilterTcOffsetDiv2; ///< tc offset for deblocking filter + Bool m_loopFilterOffsetInPPS; ///< offset for deblocking filter in 0 = slice header, 1 = PPS + Int m_loopFilterBetaOffsetDiv2; ///< beta offset for deblocking filter + Int m_loopFilterTcOffsetDiv2; ///< tc offset for deblocking filter Bool m_DeblockingFilterControlPresent; ///< deblocking filter control present flag in PPS #if L0386_DB_METRIC Bool m_DeblockingFilterMetric; ///< blockiness metric in encoder
--- a/source/x265enc.cpp Wed Apr 24 11:58:14 2013 +0530 +++ b/source/x265enc.cpp Wed Apr 24 12:00:28 2013 +0530 @@ -97,8 +97,10 @@ Void TAppEncTop::xInitLibCfg() m_cTEncTop.setFrameSkip(m_FrameSkip); m_cTEncTop.setSourceWidth(m_iSourceWidth); m_cTEncTop.setSourceHeight(m_iSourceHeight); - m_cTEncTop.setConformanceWindow(m_confLeft, m_confRight, m_confTop, m_confBottom); + m_cTEncTop.setConformanceWindow(0, 0, 0, 0); m_cTEncTop.setFramesToBeEncoded(m_framesToBeEncoded); + int nullpad[2] = { 0, 0 }; + m_cTEncTop.setPad(nullpad); //====== Coding Structure ======== m_cTEncTop.setIntraPeriod(m_iIntraPeriod); @@ -119,8 +121,6 @@ Void TAppEncTop::xInitLibCfg() m_cTEncTop.setQP(m_iQP); - m_cTEncTop.setPad(m_aiPad); - m_cTEncTop.setMaxTempLayer(m_maxTempLayer); m_cTEncTop.setUseAMP(m_enableAMP); @@ -450,20 +450,16 @@ Void TAppEncTop::encode() // read input YUV file x265_picture pic; - m_input->readPicture(pic); - - // increase number of received frames - m_iFrameRcvd++; - - bEos = (m_iFrameRcvd == m_framesToBeEncoded); - - Bool flush = 0; - // if end of file (which is only detected on a read failure) flush the encoder of any queued pictures - if (m_input->isEof()) + Bool flush = false; + if (m_input->readPicture(pic)) + { + m_iFrameRcvd++; + bEos = (m_iFrameRcvd == m_framesToBeEncoded); + } + else { flush = true; bEos = true; - m_iFrameRcvd--; m_cTEncTop.setFramesToBeEncoded(m_iFrameRcvd); }