Mercurial > x265
changeset 12537:c141a18ea0af draft
motion: Implement 3-level Hierarchial Motion Estimation
This patch does the following:
1) Create HME-level 0 planes
2) Add option "--hme" and "--hme-search" to enable HME
and to select search method for levels 0, 1 and 2
author | Pooja Venkatesan <pooja@multicorewareinc.com> |
---|---|
date | Fri, 05 Jul 2019 11:17:26 +0530 |
parents | 41765d76ec03 |
children | 147fb92c5ed5 |
files | doc/reST/cli.rst source/CMakeLists.txt source/common/lowres.cpp source/common/lowres.h source/common/param.cpp source/common/pixel.cpp source/common/primitives.h source/common/x86/asm-primitives.cpp source/encoder/encoder.cpp source/test/regression-tests.txt source/x265.h source/x265cli.h |
diffstat | 12 files changed, 133 insertions(+-), 3 deletions(-) [+] |
line wrap: on
line diff
--- a/doc/reST/cli.rst Thu Jul 11 14:43:02 2019 +0530 +++ b/doc/reST/cli.rst Fri Jul 05 11:17:26 2019 +0530 @@ -1261,6 +1261,18 @@ Temporal / motion search options Enable motion estimation with source frame pixels, in this mode, motion estimation can be computed independently. Default disabled. +.. option:: --hme, --no-hme + + Enable 3-level Hierarchical motion estimation at One-Sixteenth, + Quarter and Full resolution. Default disabled. + +.. option:: --hme-search <integer|string>,<integer|string>,<integer|string> + + Motion search method for HME Level 0, 1 and 2. Refer to :option:`--me` for values. + Specify search method for each level. Alternatively, specify a single value + which will apply to all levels. Default is hex,umh,umh for + levels 0,1,2 respectively. + Spatial/intra options =====================
--- a/source/CMakeLists.txt Thu Jul 11 14:43:02 2019 +0530 +++ b/source/CMakeLists.txt Fri Jul 05 11:17:26 2019 +0530 @@ -29,7 +29,7 @@ option(NATIVE_BUILD "Target the build CP option(STATIC_LINK_CRT "Statically link C runtime for release builds" OFF) mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD) # X265_BUILD must be incremented each time the public API is changed -set(X265_BUILD 176) +set(X265_BUILD 177) configure_file("${PROJECT_SOURCE_DIR}/x265.def.in" "${PROJECT_BINARY_DIR}/x265.def") configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in"
--- a/source/common/lowres.cpp Thu Jul 11 14:43:02 2019 +0530 +++ b/source/common/lowres.cpp Fri Jul 05 11:17:26 2019 +0530 @@ -55,6 +55,7 @@ bool Lowres::create(x265_param* param, P heightFullRes = origPic->m_picHeight; width = origPic->m_picWidth / 2; lines = origPic->m_picHeight / 2; + bEnableHME = param->bEnableHME ? 1 : 0; lumaStride = width + 2 * origPic->m_lumaMarginX; if (lumaStride & 31) lumaStride += 32 - (lumaStride & 31); @@ -137,6 +138,26 @@ bool Lowres::create(x265_param* param, P lowresPlane[2] = buffer[2] + padoffset; lowresPlane[3] = buffer[3] + padoffset; + if (bEnableHME) + { + intptr_t lumaStrideHalf = lumaStride / 2; + if (lumaStrideHalf & 31) + lumaStrideHalf += 32 - (lumaStrideHalf & 31); + size_t planesizeHalf = planesize / 2; + size_t padoffsetHalf = padoffset / 2; + /* allocate lower-res buffers */ + CHECKED_MALLOC_ZERO(lowerResBuffer[0], pixel, 4 * planesizeHalf); + + lowerResBuffer[1] = lowerResBuffer[0] + planesizeHalf; + lowerResBuffer[2] = lowerResBuffer[1] + planesizeHalf; + lowerResBuffer[3] = lowerResBuffer[2] + planesizeHalf; + + lowerResPlane[0] = lowerResBuffer[0] + padoffsetHalf; + lowerResPlane[1] = lowerResBuffer[1] + padoffsetHalf; + lowerResPlane[2] = lowerResBuffer[2] + padoffsetHalf; + lowerResPlane[3] = lowerResBuffer[3] + padoffsetHalf; + } + CHECKED_MALLOC(intraCost, int32_t, cuCount); CHECKED_MALLOC(intraMode, uint8_t, cuCount); @@ -166,6 +187,8 @@ fail: void Lowres::destroy() { X265_FREE(buffer[0]); + if(bEnableHME) + X265_FREE(lowerResBuffer[0]); X265_FREE(intraCost); X265_FREE(intraMode); @@ -253,5 +276,18 @@ void Lowres::init(PicYuv *origPic, int p extendPicBorder(lowresPlane[1], lumaStride, width, lines, origPic->m_lumaMarginX, origPic->m_lumaMarginY); extendPicBorder(lowresPlane[2], lumaStride, width, lines, origPic->m_lumaMarginX, origPic->m_lumaMarginY); extendPicBorder(lowresPlane[3], lumaStride, width, lines, origPic->m_lumaMarginX, origPic->m_lumaMarginY); + + if (origPic->m_param->bEnableHME) + { + primitives.frameInitLowerRes(lowresPlane[0], + lowerResPlane[0], lowerResPlane[1], lowerResPlane[2], lowerResPlane[3], + lumaStride, lumaStride/2, (width / 2), (lines / 2)); + extendPicBorder(lowerResPlane[0], lumaStride/2, width/2, lines/2, origPic->m_lumaMarginX/2, origPic->m_lumaMarginY/2); + extendPicBorder(lowerResPlane[1], lumaStride/2, width/2, lines/2, origPic->m_lumaMarginX/2, origPic->m_lumaMarginY/2); + extendPicBorder(lowerResPlane[2], lumaStride/2, width/2, lines/2, origPic->m_lumaMarginX/2, origPic->m_lumaMarginY/2); + extendPicBorder(lowerResPlane[3], lumaStride/2, width/2, lines/2, origPic->m_lumaMarginX/2, origPic->m_lumaMarginY/2); + fpelLowerResPlane[0] = lowerResPlane[0]; + } + fpelPlane[0] = lowresPlane[0]; }
--- a/source/common/lowres.h Thu Jul 11 14:43:02 2019 +0530 +++ b/source/common/lowres.h Fri Jul 05 11:17:26 2019 +0530 @@ -40,6 +40,10 @@ struct ReferencePlanes pixel* lowresPlane[4]; PicYuv* reconPic; + /* 1/16th resolution : Level-0 HME planes */ + pixel* fpelLowerResPlane[3]; + pixel* lowerResPlane[4]; + bool isWeighted; bool isLowres; @@ -150,6 +154,7 @@ struct PicQPAdaptationLayer struct Lowres : public ReferencePlanes { pixel *buffer[4]; + pixel *lowerResBuffer[4]; // Level-0 buffer int frameNum; // Presentation frame number int sliceType; // Slice type decided by lookahead @@ -181,6 +186,9 @@ struct Lowres : public ReferencePlanes uint32_t maxBlocksInRowFullRes; uint32_t maxBlocksInColFullRes; + /* Hierarchical Motion Estimation */ + bool bEnableHME; + /* used for vbvLookahead */ int plannedType[X265_LOOKAHEAD_MAX + 1]; int64_t plannedSatd[X265_LOOKAHEAD_MAX + 1];
--- a/source/common/param.cpp Thu Jul 11 14:43:02 2019 +0530 +++ b/source/common/param.cpp Fri Jul 05 11:17:26 2019 +0530 @@ -201,6 +201,9 @@ void x265_param_default(x265_param* para param->bEnableTSkipFast = 0; param->maxNumReferences = 3; param->bEnableTemporalMvp = 1; + param->bEnableHME = 0; + param->hmeSearchMethod[0] = X265_HEX_SEARCH; + param->hmeSearchMethod[1] = param->hmeSearchMethod[2] = X265_UMH_SEARCH; param->bSourceReferenceEstimation = 0; param->limitTU = 0; param->dynamicRd = 0; @@ -1282,6 +1285,27 @@ int x265_param_parse(x265_param* p, cons OPT("fades") p->bEnableFades = atobool(value); OPT("field") p->bField = atobool( value ); OPT("cll") p->bEmitCLL = atobool(value); + OPT("hme") p->bEnableHME = atobool(value); + OPT("hme-search") + { + char search[3][5]; + memset(search, '\0', 15 * sizeof(char)); + if(3 == sscanf(value, "%d,%d,%d", &p->hmeSearchMethod[0], &p->hmeSearchMethod[1], &p->hmeSearchMethod[2]) || + 3 == sscanf(value, "%4[^,],%4[^,],%4[^,]", search[0], search[1], search[2])) + { + if(search[0][0]) + for(int level = 0; level < 3; level++) + p->hmeSearchMethod[level] = parseName(search[level], x265_motion_est_names, bError); + } + else if (sscanf(value, "%d", &p->hmeSearchMethod[0]) || sscanf(value, "%s", search[0])) + { + if (search[0][0]) { + p->hmeSearchMethod[0] = parseName(search[0], x265_motion_est_names, bError); + p->hmeSearchMethod[1] = p->hmeSearchMethod[2] = p->hmeSearchMethod[0]; + } + } + p->bEnableHME = true; + } else return X265_PARAM_BAD_NAME; } @@ -1732,8 +1756,13 @@ void x265_print_params(x265_param* param x265_log(param, X265_LOG_INFO, "Residual QT: max TU size, max depth : %d / %d inter / %d intra\n", param->maxTUSize, param->tuQTMaxInterDepth, param->tuQTMaxIntraDepth); - x265_log(param, X265_LOG_INFO, "ME / range / subpel / merge : %s / %d / %d / %d\n", - x265_motion_est_names[param->searchMethod], param->searchRange, param->subpelRefine, param->maxNumMergeCand); + if (param->bEnableHME) + x265_log(param, X265_LOG_INFO, "HME L0,1,2 / range / subpel / merge : %s, %s, %s / %d / %d / %d\n", + x265_motion_est_names[param->hmeSearchMethod[0]], x265_motion_est_names[param->hmeSearchMethod[1]], x265_motion_est_names[param->hmeSearchMethod[2]], param->searchRange, param->subpelRefine, param->maxNumMergeCand); + else + x265_log(param, X265_LOG_INFO, "ME / range / subpel / merge : %s / %d / %d / %d\n", + x265_motion_est_names[param->searchMethod], param->searchRange, param->subpelRefine, param->maxNumMergeCand); + if (param->keyframeMax != INT_MAX || param->scenecutThreshold) x265_log(param, X265_LOG_INFO, "Keyframe min / max / scenecut / bias: %d / %d / %d / %.2lf\n", param->keyframeMin, param->keyframeMax, param->scenecutThreshold, param->scenecutBias * 100); else @@ -1928,6 +1957,9 @@ char *x265_param2string(x265_param* p, i s += sprintf(s, " subme=%d", p->subpelRefine); s += sprintf(s, " merange=%d", p->searchRange); BOOL(p->bEnableTemporalMvp, "temporal-mvp"); + BOOL(p->bEnableHME, "hme"); + if (p->bEnableHME) + s += sprintf(s, " Level 0,1,2=%d,%d,%d", p->hmeSearchMethod[0], p->hmeSearchMethod[1], p->hmeSearchMethod[2]); BOOL(p->bEnableWeightedPred, "weightp"); BOOL(p->bEnableWeightedBiPred, "weightb"); BOOL(p->bSourceReferenceEstimation, "analyze-src-pics"); @@ -2215,6 +2247,12 @@ void x265_copy_params(x265_param* dst, x dst->subpelRefine = src->subpelRefine; dst->searchRange = src->searchRange; dst->bEnableTemporalMvp = src->bEnableTemporalMvp; + dst->bEnableHME = src->bEnableHME; + if (src->bEnableHME) + { + for (int level = 0; level < 3; level++) + dst->hmeSearchMethod[level] = src->hmeSearchMethod[level]; + } dst->bEnableWeightedBiPred = src->bEnableWeightedBiPred; dst->bEnableWeightedPred = src->bEnableWeightedPred; dst->bSourceReferenceEstimation = src->bSourceReferenceEstimation;
--- a/source/common/pixel.cpp Thu Jul 11 14:43:02 2019 +0530 +++ b/source/common/pixel.cpp Fri Jul 05 11:17:26 2019 +0530 @@ -1309,6 +1309,7 @@ void setupPixelPrimitives_c(EncoderPrimi p.scale1D_128to64[NONALIGNED] = p.scale1D_128to64[ALIGNED] = scale1D_128to64; p.scale2D_64to32 = scale2D_64to32; p.frameInitLowres = frame_init_lowres_core; + p.frameInitLowerRes = frame_init_lowres_core; p.ssim_4x4x2_core = ssim_4x4x2_core; p.ssim_end_4 = ssim_end_4;
--- a/source/common/primitives.h Thu Jul 11 14:43:02 2019 +0530 +++ b/source/common/primitives.h Fri Jul 05 11:17:26 2019 +0530 @@ -349,6 +349,7 @@ struct EncoderPrimitives saoCuStatsE3_t saoCuStatsE3; downscale_t frameInitLowres; + downscale_t frameInitLowerRes; cutree_propagate_cost propagateCost; cutree_fix8_unpack fix8Unpack; cutree_fix8_pack fix8Pack;
--- a/source/common/x86/asm-primitives.cpp Thu Jul 11 14:43:02 2019 +0530 +++ b/source/common/x86/asm-primitives.cpp Fri Jul 05 11:17:26 2019 +0530 @@ -1090,6 +1090,7 @@ void setupAssemblyPrimitives(EncoderPrim LUMA_VSS_FILTERS(sse2); p.frameInitLowres = PFX(frame_init_lowres_core_sse2); + p.frameInitLowerRes = PFX(frame_init_lowres_core_sse2); // TODO: the planecopy_sp is really planecopy_SC now, must be fix it //p.planecopy_sp = PFX(downShift_16_sse2); p.planecopy_sp_shl = PFX(upShift_16_sse2); @@ -1132,6 +1133,7 @@ void setupAssemblyPrimitives(EncoderPrim p.cu[BLOCK_8x8].idct = PFX(idct8_ssse3); p.frameInitLowres = PFX(frame_init_lowres_core_ssse3); + p.frameInitLowerRes = PFX(frame_init_lowres_core_ssse3); ALL_LUMA_PU(convert_p2s[ALIGNED], filterPixelToShort, ssse3); ALL_LUMA_PU(convert_p2s[NONALIGNED], filterPixelToShort, ssse3); @@ -1453,6 +1455,7 @@ void setupAssemblyPrimitives(EncoderPrim p.cu[BLOCK_64x64].copy_sp = (copy_sp_t)PFX(blockcopy_ss_64x64_avx); p.frameInitLowres = PFX(frame_init_lowres_core_avx); + p.frameInitLowerRes = PFX(frame_init_lowres_core_avx); p.pu[LUMA_64x16].copy_pp = (copy_pp_t)PFX(blockcopy_ss_64x16_avx); p.pu[LUMA_64x32].copy_pp = (copy_pp_t)PFX(blockcopy_ss_64x32_avx); @@ -1469,6 +1472,7 @@ void setupAssemblyPrimitives(EncoderPrim #endif LUMA_VAR(xop); p.frameInitLowres = PFX(frame_init_lowres_core_xop); + p.frameInitLowerRes = PFX(frame_init_lowres_core_xop); } if (cpuMask & X265_CPU_AVX2) { @@ -2296,6 +2300,7 @@ void setupAssemblyPrimitives(EncoderPrim p.chroma[X265_CSP_I444].pu[LUMA_64x64].filter_vsp = PFX(interp_4tap_vert_sp_64x64_avx2); p.frameInitLowres = PFX(frame_init_lowres_core_avx2); + p.frameInitLowerRes = PFX(frame_init_lowres_core_avx2); p.propagateCost = PFX(mbtree_propagate_cost_avx2); p.fix8Unpack = PFX(cutree_fix8_unpack_avx2); p.fix8Pack = PFX(cutree_fix8_pack_avx2); @@ -3294,6 +3299,7 @@ void setupAssemblyPrimitives(EncoderPrim //p.frameInitLowres = PFX(frame_init_lowres_core_mmx2); p.frameInitLowres = PFX(frame_init_lowres_core_sse2); + p.frameInitLowerRes = PFX(frame_init_lowres_core_sse2); ALL_LUMA_TU(blockfill_s[NONALIGNED], blockfill_s, sse2); ALL_LUMA_TU(blockfill_s[ALIGNED], blockfill_s, sse2); @@ -3414,6 +3420,7 @@ void setupAssemblyPrimitives(EncoderPrim p.pu[LUMA_8x8].luma_hvpp = PFX(interp_8tap_hv_pp_8x8_ssse3); p.frameInitLowres = PFX(frame_init_lowres_core_ssse3); + p.frameInitLowerRes = PFX(frame_init_lowres_core_ssse3); ASSIGN2(p.scale1D_128to64, scale1D_128to64_ssse3); p.scale2D_64to32 = PFX(scale2D_64to32_ssse3); @@ -3682,6 +3689,7 @@ void setupAssemblyPrimitives(EncoderPrim p.pu[LUMA_48x64].copy_pp = PFX(blockcopy_pp_48x64_avx); p.frameInitLowres = PFX(frame_init_lowres_core_avx); + p.frameInitLowerRes = PFX(frame_init_lowres_core_avx); p.propagateCost = PFX(mbtree_propagate_cost_avx); } if (cpuMask & X265_CPU_XOP) @@ -3693,6 +3701,8 @@ void setupAssemblyPrimitives(EncoderPrim p.cu[BLOCK_8x8].sse_pp = PFX(pixel_ssd_8x8_xop); p.cu[BLOCK_16x16].sse_pp = PFX(pixel_ssd_16x16_xop); p.frameInitLowres = PFX(frame_init_lowres_core_xop); + p.frameInitLowerRes = PFX(frame_init_lowres_core_xop); + } #if X86_64 if (cpuMask & X265_CPU_AVX2) @@ -4667,6 +4677,7 @@ void setupAssemblyPrimitives(EncoderPrim p.chroma[X265_CSP_I444].pu[LUMA_64x16].filter_vpp = PFX(interp_4tap_vert_pp_64x16_avx2); p.frameInitLowres = PFX(frame_init_lowres_core_avx2); + p.frameInitLowerRes = PFX(frame_init_lowres_core_avx2); p.propagateCost = PFX(mbtree_propagate_cost_avx2); p.saoCuStatsE0 = PFX(saoCuStatsE0_avx2); p.saoCuStatsE1 = PFX(saoCuStatsE1_avx2);
--- a/source/encoder/encoder.cpp Thu Jul 11 14:43:02 2019 +0530 +++ b/source/encoder/encoder.cpp Fri Jul 05 11:17:26 2019 +0530 @@ -3379,6 +3379,15 @@ void Encoder::configure(x265_param *p) p->bRepeatHeaders = 1; x265_log(p, X265_LOG_WARNING, "Turning on repeat - headers for zone encoding\n"); } + + if (m_param->bEnableHME) + { + if (m_param->sourceHeight < 540) + { + x265_log(p, X265_LOG_WARNING, "Source height < 540p is too low for HME. Disabling HME.\n"); + p->bEnableHME = 0; + } + } } void Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x265_picture* picIn, int paramBytes)
--- a/source/test/regression-tests.txt Thu Jul 11 14:43:02 2019 +0530 +++ b/source/test/regression-tests.txt Fri Jul 05 11:17:26 2019 +0530 @@ -153,6 +153,7 @@ Kimono1_1920x1080_24_400.yuv,--preset pl big_buck_bunny_360p24.y4m, --keyint 60 --min-keyint 40 --gop-lookahead 14 BasketballDrive_1920x1080_50.y4m, --preset medium --no-open-gop --keyint 50 --min-keyint 50 --radl 2 --vbv-maxrate 5000 --vbv-bufsize 5000 big_buck_bunny_360p24.y4m, --bitrate 500 --fades +720p50_parkrun_ter.y4m,--preset medium --bitrate 400 --hme # Main12 intraCost overflow bug test 720p50_parkrun_ter.y4m,--preset medium
--- a/source/x265.h Thu Jul 11 14:43:02 2019 +0530 +++ b/source/x265.h Fri Jul 05 11:17:26 2019 +0530 @@ -1172,6 +1172,14 @@ typedef struct x265_param /* Enable availability of temporal motion vector for AMVP, default is enabled */ int bEnableTemporalMvp; + /* Enable 3-level Hierarchical motion estimation at One-Sixteenth, Quarter and Full resolution. + * Default is disabled */ + int bEnableHME; + + /* Enable HME search method (DIA, HEX, UMH, STAR, SEA, FULL) for level 0, 1 and 2. + * Default is hex, umh, umh for L0, L1 and L2 respectively. */ + int hmeSearchMethod[3]; + /* Enable weighted prediction in P slices. This enables weighting analysis * in the lookahead, which influences slice decisions, and enables weighting * analysis in the main encoder which allows P reference samples to have a
--- a/source/x265cli.h Thu Jul 11 14:43:02 2019 +0530 +++ b/source/x265cli.h Fri Jul 05 11:17:26 2019 +0530 @@ -95,6 +95,9 @@ static const struct option long_options[ { "max-merge", required_argument, NULL, 0 }, { "no-temporal-mvp", no_argument, NULL, 0 }, { "temporal-mvp", no_argument, NULL, 0 }, + { "hme", no_argument, NULL, 0 }, + { "no-hme", no_argument, NULL, 0 }, + { "hme-search", required_argument, NULL, 0 }, { "rdpenalty", required_argument, NULL, 0 }, { "no-rect", no_argument, NULL, 0 }, { "rect", no_argument, NULL, 0 }, @@ -464,6 +467,8 @@ static void showHelp(x265_param *param) H0(" --[no-]amp Enable asymmetric motion partitions, requires --rect. Default %s\n", OPT(param->bEnableAMP)); H0(" --[no-]limit-modes Limit rectangular and asymmetric motion predictions. Default %d\n", param->limitModes); H1(" --[no-]temporal-mvp Enable temporal MV predictors. Default %s\n", OPT(param->bEnableTemporalMvp)); + H1(" --[no-]hme Enable Hierarchical Motion Estimation. Default %s\n", OPT(param->bEnableHME)); + H1(" --hme-search <string> Motion search-method for HME L0,L1 and L2. Default(L0,L1,L2) is %d,%d,%d\n", param->hmeSearchMethod[0], param->hmeSearchMethod[1], param->hmeSearchMethod[2]); H0("\nSpatial / intra options:\n"); H0(" --[no-]strong-intra-smoothing Enable strong intra smoothing for 32x32 blocks. Default %s\n", OPT(param->bEnableStrongIntraSmoothing)); H0(" --[no-]constrained-intra Constrained intra prediction (use only intra coded reference pixels) Default %s\n", OPT(param->bEnableConstrainedIntra));