changeset 12537:c141a18ea0af draft

motion: Implement 3-level Hierarchial Motion Estimation This patch does the following: 1) Create HME-level 0 planes 2) Add option "--hme" and "--hme-search" to enable HME and to select search method for levels 0, 1 and 2
author Pooja Venkatesan <pooja@multicorewareinc.com>
date Fri, 05 Jul 2019 11:17:26 +0530
parents 41765d76ec03
children 147fb92c5ed5
files doc/reST/cli.rst source/CMakeLists.txt source/common/lowres.cpp source/common/lowres.h source/common/param.cpp source/common/pixel.cpp source/common/primitives.h source/common/x86/asm-primitives.cpp source/encoder/encoder.cpp source/test/regression-tests.txt source/x265.h source/x265cli.h
diffstat 12 files changed, 133 insertions(+-), 3 deletions(-) [+]
line wrap: on
line diff
--- a/doc/reST/cli.rst	Thu Jul 11 14:43:02 2019 +0530
+++ b/doc/reST/cli.rst	Fri Jul 05 11:17:26 2019 +0530
@@ -1261,6 +1261,18 @@ Temporal / motion search options
 	Enable motion estimation with source frame pixels, in this mode, 
 	motion estimation can be computed independently. Default disabled.
 
+.. option:: --hme, --no-hme
+
+       Enable 3-level Hierarchical motion estimation at One-Sixteenth, 
+       Quarter and Full resolution. Default disabled.
+
+.. option:: --hme-search <integer|string>,<integer|string>,<integer|string>
+
+       Motion search method for HME Level 0, 1 and 2. Refer to :option:`--me` for values.
+       Specify search method for each level. Alternatively, specify a single value
+       which will apply to all levels. Default is hex,umh,umh for 
+       levels 0,1,2 respectively.
+
 Spatial/intra options
 =====================
 
--- a/source/CMakeLists.txt	Thu Jul 11 14:43:02 2019 +0530
+++ b/source/CMakeLists.txt	Fri Jul 05 11:17:26 2019 +0530
@@ -29,7 +29,7 @@ option(NATIVE_BUILD "Target the build CP
 option(STATIC_LINK_CRT "Statically link C runtime for release builds" OFF)
 mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD)
 # X265_BUILD must be incremented each time the public API is changed
-set(X265_BUILD 176)
+set(X265_BUILD 177)
 configure_file("${PROJECT_SOURCE_DIR}/x265.def.in"
                "${PROJECT_BINARY_DIR}/x265.def")
 configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in"
--- a/source/common/lowres.cpp	Thu Jul 11 14:43:02 2019 +0530
+++ b/source/common/lowres.cpp	Fri Jul 05 11:17:26 2019 +0530
@@ -55,6 +55,7 @@ bool Lowres::create(x265_param* param, P
     heightFullRes = origPic->m_picHeight;
     width = origPic->m_picWidth / 2;
     lines = origPic->m_picHeight / 2;
+    bEnableHME = param->bEnableHME ? 1 : 0;
     lumaStride = width + 2 * origPic->m_lumaMarginX;
     if (lumaStride & 31)
         lumaStride += 32 - (lumaStride & 31);
@@ -137,6 +138,26 @@ bool Lowres::create(x265_param* param, P
     lowresPlane[2] = buffer[2] + padoffset;
     lowresPlane[3] = buffer[3] + padoffset;
 
+    if (bEnableHME)
+    {
+        intptr_t lumaStrideHalf = lumaStride / 2;
+        if (lumaStrideHalf & 31)
+            lumaStrideHalf += 32 - (lumaStrideHalf & 31);
+        size_t planesizeHalf = planesize / 2;
+        size_t padoffsetHalf = padoffset / 2;
+        /* allocate lower-res buffers */
+        CHECKED_MALLOC_ZERO(lowerResBuffer[0], pixel, 4 * planesizeHalf);
+
+        lowerResBuffer[1] = lowerResBuffer[0] + planesizeHalf;
+        lowerResBuffer[2] = lowerResBuffer[1] + planesizeHalf;
+        lowerResBuffer[3] = lowerResBuffer[2] + planesizeHalf;
+
+        lowerResPlane[0] = lowerResBuffer[0] + padoffsetHalf;
+        lowerResPlane[1] = lowerResBuffer[1] + padoffsetHalf;
+        lowerResPlane[2] = lowerResBuffer[2] + padoffsetHalf;
+        lowerResPlane[3] = lowerResBuffer[3] + padoffsetHalf;
+    }
+
     CHECKED_MALLOC(intraCost, int32_t, cuCount);
     CHECKED_MALLOC(intraMode, uint8_t, cuCount);
 
@@ -166,6 +187,8 @@ fail:
 void Lowres::destroy()
 {
     X265_FREE(buffer[0]);
+    if(bEnableHME)
+        X265_FREE(lowerResBuffer[0]);
     X265_FREE(intraCost);
     X265_FREE(intraMode);
 
@@ -253,5 +276,18 @@ void Lowres::init(PicYuv *origPic, int p
     extendPicBorder(lowresPlane[1], lumaStride, width, lines, origPic->m_lumaMarginX, origPic->m_lumaMarginY);
     extendPicBorder(lowresPlane[2], lumaStride, width, lines, origPic->m_lumaMarginX, origPic->m_lumaMarginY);
     extendPicBorder(lowresPlane[3], lumaStride, width, lines, origPic->m_lumaMarginX, origPic->m_lumaMarginY);
+    
+    if (origPic->m_param->bEnableHME)
+    {
+        primitives.frameInitLowerRes(lowresPlane[0],
+            lowerResPlane[0], lowerResPlane[1], lowerResPlane[2], lowerResPlane[3],
+            lumaStride, lumaStride/2, (width / 2), (lines / 2));
+        extendPicBorder(lowerResPlane[0], lumaStride/2, width/2, lines/2, origPic->m_lumaMarginX/2, origPic->m_lumaMarginY/2);
+        extendPicBorder(lowerResPlane[1], lumaStride/2, width/2, lines/2, origPic->m_lumaMarginX/2, origPic->m_lumaMarginY/2);
+        extendPicBorder(lowerResPlane[2], lumaStride/2, width/2, lines/2, origPic->m_lumaMarginX/2, origPic->m_lumaMarginY/2);
+        extendPicBorder(lowerResPlane[3], lumaStride/2, width/2, lines/2, origPic->m_lumaMarginX/2, origPic->m_lumaMarginY/2);
+        fpelLowerResPlane[0] = lowerResPlane[0];
+    }
+
     fpelPlane[0] = lowresPlane[0];
 }
--- a/source/common/lowres.h	Thu Jul 11 14:43:02 2019 +0530
+++ b/source/common/lowres.h	Fri Jul 05 11:17:26 2019 +0530
@@ -40,6 +40,10 @@ struct ReferencePlanes
     pixel*   lowresPlane[4];
     PicYuv*  reconPic;
 
+    /* 1/16th resolution : Level-0 HME planes */
+    pixel*   fpelLowerResPlane[3];
+    pixel*   lowerResPlane[4];
+
     bool     isWeighted;
     bool     isLowres;
 
@@ -150,6 +154,7 @@ struct PicQPAdaptationLayer
 struct Lowres : public ReferencePlanes
 {
     pixel *buffer[4];
+    pixel *lowerResBuffer[4]; // Level-0 buffer
 
     int    frameNum;         // Presentation frame number
     int    sliceType;        // Slice type decided by lookahead
@@ -181,6 +186,9 @@ struct Lowres : public ReferencePlanes
     uint32_t  maxBlocksInRowFullRes;
     uint32_t  maxBlocksInColFullRes;
 
+    /* Hierarchical Motion Estimation */
+    bool      bEnableHME;
+
     /* used for vbvLookahead */
     int       plannedType[X265_LOOKAHEAD_MAX + 1];
     int64_t   plannedSatd[X265_LOOKAHEAD_MAX + 1];
--- a/source/common/param.cpp	Thu Jul 11 14:43:02 2019 +0530
+++ b/source/common/param.cpp	Fri Jul 05 11:17:26 2019 +0530
@@ -201,6 +201,9 @@ void x265_param_default(x265_param* para
     param->bEnableTSkipFast = 0;
     param->maxNumReferences = 3;
     param->bEnableTemporalMvp = 1;
+    param->bEnableHME = 0;
+    param->hmeSearchMethod[0] = X265_HEX_SEARCH;
+    param->hmeSearchMethod[1] = param->hmeSearchMethod[2] = X265_UMH_SEARCH;
     param->bSourceReferenceEstimation = 0;
     param->limitTU = 0;
     param->dynamicRd = 0;
@@ -1282,6 +1285,27 @@ int x265_param_parse(x265_param* p, cons
         OPT("fades") p->bEnableFades = atobool(value);
         OPT("field") p->bField = atobool( value );
         OPT("cll") p->bEmitCLL = atobool(value);
+        OPT("hme") p->bEnableHME = atobool(value);
+        OPT("hme-search")
+        {
+            char search[3][5];
+            memset(search, '\0', 15 * sizeof(char));
+            if(3 == sscanf(value, "%d,%d,%d", &p->hmeSearchMethod[0], &p->hmeSearchMethod[1], &p->hmeSearchMethod[2]) ||
+               3 == sscanf(value, "%4[^,],%4[^,],%4[^,]", search[0], search[1], search[2]))
+            {
+                if(search[0][0])
+                    for(int level = 0; level < 3; level++)
+                        p->hmeSearchMethod[level] = parseName(search[level], x265_motion_est_names, bError);
+            }
+            else if (sscanf(value, "%d", &p->hmeSearchMethod[0]) || sscanf(value, "%s", search[0]))
+            {
+                if (search[0][0]) {
+                    p->hmeSearchMethod[0] = parseName(search[0], x265_motion_est_names, bError);
+                    p->hmeSearchMethod[1] = p->hmeSearchMethod[2] = p->hmeSearchMethod[0];
+                }
+            }
+            p->bEnableHME = true;
+        }
         else
             return X265_PARAM_BAD_NAME;
     }
@@ -1732,8 +1756,13 @@ void x265_print_params(x265_param* param
     x265_log(param, X265_LOG_INFO, "Residual QT: max TU size, max depth : %d / %d inter / %d intra\n",
              param->maxTUSize, param->tuQTMaxInterDepth, param->tuQTMaxIntraDepth);
 
-    x265_log(param, X265_LOG_INFO, "ME / range / subpel / merge         : %s / %d / %d / %d\n",
-             x265_motion_est_names[param->searchMethod], param->searchRange, param->subpelRefine, param->maxNumMergeCand);
+    if (param->bEnableHME)
+        x265_log(param, X265_LOG_INFO, "HME L0,1,2 / range / subpel / merge : %s, %s, %s / %d / %d / %d\n",
+            x265_motion_est_names[param->hmeSearchMethod[0]], x265_motion_est_names[param->hmeSearchMethod[1]], x265_motion_est_names[param->hmeSearchMethod[2]], param->searchRange, param->subpelRefine, param->maxNumMergeCand);
+    else
+        x265_log(param, X265_LOG_INFO, "ME / range / subpel / merge         : %s / %d / %d / %d\n",
+            x265_motion_est_names[param->searchMethod], param->searchRange, param->subpelRefine, param->maxNumMergeCand);
+
     if (param->keyframeMax != INT_MAX || param->scenecutThreshold)
         x265_log(param, X265_LOG_INFO, "Keyframe min / max / scenecut / bias: %d / %d / %d / %.2lf\n", param->keyframeMin, param->keyframeMax, param->scenecutThreshold, param->scenecutBias * 100);
     else
@@ -1928,6 +1957,9 @@ char *x265_param2string(x265_param* p, i
     s += sprintf(s, " subme=%d", p->subpelRefine);
     s += sprintf(s, " merange=%d", p->searchRange);
     BOOL(p->bEnableTemporalMvp, "temporal-mvp");
+    BOOL(p->bEnableHME, "hme");
+    if (p->bEnableHME)
+        s += sprintf(s, " Level 0,1,2=%d,%d,%d", p->hmeSearchMethod[0], p->hmeSearchMethod[1], p->hmeSearchMethod[2]);
     BOOL(p->bEnableWeightedPred, "weightp");
     BOOL(p->bEnableWeightedBiPred, "weightb");
     BOOL(p->bSourceReferenceEstimation, "analyze-src-pics");
@@ -2215,6 +2247,12 @@ void x265_copy_params(x265_param* dst, x
     dst->subpelRefine = src->subpelRefine;
     dst->searchRange = src->searchRange;
     dst->bEnableTemporalMvp = src->bEnableTemporalMvp;
+    dst->bEnableHME = src->bEnableHME;
+    if (src->bEnableHME)
+    {
+        for (int level = 0; level < 3; level++)
+            dst->hmeSearchMethod[level] = src->hmeSearchMethod[level];
+    }
     dst->bEnableWeightedBiPred = src->bEnableWeightedBiPred;
     dst->bEnableWeightedPred = src->bEnableWeightedPred;
     dst->bSourceReferenceEstimation = src->bSourceReferenceEstimation;
--- a/source/common/pixel.cpp	Thu Jul 11 14:43:02 2019 +0530
+++ b/source/common/pixel.cpp	Fri Jul 05 11:17:26 2019 +0530
@@ -1309,6 +1309,7 @@ void setupPixelPrimitives_c(EncoderPrimi
     p.scale1D_128to64[NONALIGNED] = p.scale1D_128to64[ALIGNED] = scale1D_128to64;
     p.scale2D_64to32 = scale2D_64to32;
     p.frameInitLowres = frame_init_lowres_core;
+    p.frameInitLowerRes = frame_init_lowres_core;
     p.ssim_4x4x2_core = ssim_4x4x2_core;
     p.ssim_end_4 = ssim_end_4;
 
--- a/source/common/primitives.h	Thu Jul 11 14:43:02 2019 +0530
+++ b/source/common/primitives.h	Fri Jul 05 11:17:26 2019 +0530
@@ -349,6 +349,7 @@ struct EncoderPrimitives
     saoCuStatsE3_t        saoCuStatsE3;
 
     downscale_t           frameInitLowres;
+    downscale_t           frameInitLowerRes;
     cutree_propagate_cost propagateCost;
     cutree_fix8_unpack    fix8Unpack;
     cutree_fix8_pack      fix8Pack;
--- a/source/common/x86/asm-primitives.cpp	Thu Jul 11 14:43:02 2019 +0530
+++ b/source/common/x86/asm-primitives.cpp	Fri Jul 05 11:17:26 2019 +0530
@@ -1090,6 +1090,7 @@ void setupAssemblyPrimitives(EncoderPrim
         LUMA_VSS_FILTERS(sse2);
 
         p.frameInitLowres = PFX(frame_init_lowres_core_sse2);
+        p.frameInitLowerRes = PFX(frame_init_lowres_core_sse2);
         // TODO: the planecopy_sp is really planecopy_SC now, must be fix it 
         //p.planecopy_sp = PFX(downShift_16_sse2);
         p.planecopy_sp_shl = PFX(upShift_16_sse2);
@@ -1132,6 +1133,7 @@ void setupAssemblyPrimitives(EncoderPrim
         p.cu[BLOCK_8x8].idct = PFX(idct8_ssse3);
 
         p.frameInitLowres = PFX(frame_init_lowres_core_ssse3);
+        p.frameInitLowerRes = PFX(frame_init_lowres_core_ssse3);
 
         ALL_LUMA_PU(convert_p2s[ALIGNED], filterPixelToShort, ssse3);
         ALL_LUMA_PU(convert_p2s[NONALIGNED], filterPixelToShort, ssse3);
@@ -1453,6 +1455,7 @@ void setupAssemblyPrimitives(EncoderPrim
         p.cu[BLOCK_64x64].copy_sp = (copy_sp_t)PFX(blockcopy_ss_64x64_avx);
 
         p.frameInitLowres = PFX(frame_init_lowres_core_avx);
+        p.frameInitLowerRes = PFX(frame_init_lowres_core_avx);
 
         p.pu[LUMA_64x16].copy_pp = (copy_pp_t)PFX(blockcopy_ss_64x16_avx);
         p.pu[LUMA_64x32].copy_pp = (copy_pp_t)PFX(blockcopy_ss_64x32_avx);
@@ -1469,6 +1472,7 @@ void setupAssemblyPrimitives(EncoderPrim
 #endif
         LUMA_VAR(xop);
         p.frameInitLowres = PFX(frame_init_lowres_core_xop);
+        p.frameInitLowerRes = PFX(frame_init_lowres_core_xop);
     }
     if (cpuMask & X265_CPU_AVX2)
     {
@@ -2296,6 +2300,7 @@ void setupAssemblyPrimitives(EncoderPrim
         p.chroma[X265_CSP_I444].pu[LUMA_64x64].filter_vsp = PFX(interp_4tap_vert_sp_64x64_avx2);
 
         p.frameInitLowres = PFX(frame_init_lowres_core_avx2);
+        p.frameInitLowerRes = PFX(frame_init_lowres_core_avx2);
         p.propagateCost = PFX(mbtree_propagate_cost_avx2);
         p.fix8Unpack = PFX(cutree_fix8_unpack_avx2);
         p.fix8Pack = PFX(cutree_fix8_pack_avx2);
@@ -3294,6 +3299,7 @@ void setupAssemblyPrimitives(EncoderPrim
 
         //p.frameInitLowres = PFX(frame_init_lowres_core_mmx2);
         p.frameInitLowres = PFX(frame_init_lowres_core_sse2);
+        p.frameInitLowerRes = PFX(frame_init_lowres_core_sse2);
 
         ALL_LUMA_TU(blockfill_s[NONALIGNED], blockfill_s, sse2);
         ALL_LUMA_TU(blockfill_s[ALIGNED], blockfill_s, sse2);
@@ -3414,6 +3420,7 @@ void setupAssemblyPrimitives(EncoderPrim
         p.pu[LUMA_8x8].luma_hvpp = PFX(interp_8tap_hv_pp_8x8_ssse3);
 
         p.frameInitLowres = PFX(frame_init_lowres_core_ssse3);
+        p.frameInitLowerRes = PFX(frame_init_lowres_core_ssse3);
         ASSIGN2(p.scale1D_128to64, scale1D_128to64_ssse3);
         p.scale2D_64to32 = PFX(scale2D_64to32_ssse3);
 
@@ -3682,6 +3689,7 @@ void setupAssemblyPrimitives(EncoderPrim
         p.pu[LUMA_48x64].copy_pp = PFX(blockcopy_pp_48x64_avx);
 
         p.frameInitLowres = PFX(frame_init_lowres_core_avx);
+        p.frameInitLowerRes = PFX(frame_init_lowres_core_avx);
         p.propagateCost = PFX(mbtree_propagate_cost_avx);
     }
     if (cpuMask & X265_CPU_XOP)
@@ -3693,6 +3701,8 @@ void setupAssemblyPrimitives(EncoderPrim
         p.cu[BLOCK_8x8].sse_pp = PFX(pixel_ssd_8x8_xop);
         p.cu[BLOCK_16x16].sse_pp = PFX(pixel_ssd_16x16_xop);
         p.frameInitLowres = PFX(frame_init_lowres_core_xop);
+        p.frameInitLowerRes = PFX(frame_init_lowres_core_xop);
+
     }
 #if X86_64
     if (cpuMask & X265_CPU_AVX2)
@@ -4667,6 +4677,7 @@ void setupAssemblyPrimitives(EncoderPrim
         p.chroma[X265_CSP_I444].pu[LUMA_64x16].filter_vpp = PFX(interp_4tap_vert_pp_64x16_avx2);
 
         p.frameInitLowres = PFX(frame_init_lowres_core_avx2);
+        p.frameInitLowerRes = PFX(frame_init_lowres_core_avx2);
         p.propagateCost = PFX(mbtree_propagate_cost_avx2);
         p.saoCuStatsE0 = PFX(saoCuStatsE0_avx2);
         p.saoCuStatsE1 = PFX(saoCuStatsE1_avx2);
--- a/source/encoder/encoder.cpp	Thu Jul 11 14:43:02 2019 +0530
+++ b/source/encoder/encoder.cpp	Fri Jul 05 11:17:26 2019 +0530
@@ -3379,6 +3379,15 @@ void Encoder::configure(x265_param *p)
         p->bRepeatHeaders = 1;
         x265_log(p, X265_LOG_WARNING, "Turning on repeat - headers for zone encoding\n");
     }
+
+    if (m_param->bEnableHME)
+    {
+        if (m_param->sourceHeight < 540)
+        {
+            x265_log(p, X265_LOG_WARNING, "Source height < 540p is too low for HME. Disabling HME.\n");
+            p->bEnableHME = 0;
+        }
+    }
 }
 
 void Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x265_picture* picIn, int paramBytes)
--- a/source/test/regression-tests.txt	Thu Jul 11 14:43:02 2019 +0530
+++ b/source/test/regression-tests.txt	Fri Jul 05 11:17:26 2019 +0530
@@ -153,6 +153,7 @@ Kimono1_1920x1080_24_400.yuv,--preset pl
 big_buck_bunny_360p24.y4m, --keyint 60 --min-keyint 40 --gop-lookahead 14
 BasketballDrive_1920x1080_50.y4m, --preset medium --no-open-gop --keyint 50 --min-keyint 50 --radl 2 --vbv-maxrate 5000 --vbv-bufsize 5000
 big_buck_bunny_360p24.y4m, --bitrate 500 --fades
+720p50_parkrun_ter.y4m,--preset medium --bitrate 400 --hme
 
 # Main12 intraCost overflow bug test
 720p50_parkrun_ter.y4m,--preset medium
--- a/source/x265.h	Thu Jul 11 14:43:02 2019 +0530
+++ b/source/x265.h	Fri Jul 05 11:17:26 2019 +0530
@@ -1172,6 +1172,14 @@ typedef struct x265_param
     /* Enable availability of temporal motion vector for AMVP, default is enabled */
     int       bEnableTemporalMvp;
 
+    /* Enable 3-level Hierarchical motion estimation at One-Sixteenth, Quarter and Full resolution.
+     * Default is disabled */
+    int       bEnableHME;
+
+    /* Enable HME search method (DIA, HEX, UMH, STAR, SEA, FULL) for level 0, 1 and 2.
+     * Default is hex, umh, umh for L0, L1 and L2 respectively. */
+    int       hmeSearchMethod[3];
+
     /* Enable weighted prediction in P slices.  This enables weighting analysis
      * in the lookahead, which influences slice decisions, and enables weighting
      * analysis in the main encoder which allows P reference samples to have a
--- a/source/x265cli.h	Thu Jul 11 14:43:02 2019 +0530
+++ b/source/x265cli.h	Fri Jul 05 11:17:26 2019 +0530
@@ -95,6 +95,9 @@ static const struct option long_options[
     { "max-merge",      required_argument, NULL, 0 },
     { "no-temporal-mvp",      no_argument, NULL, 0 },
     { "temporal-mvp",         no_argument, NULL, 0 },
+    { "hme",                  no_argument, NULL, 0 },
+    { "no-hme",               no_argument, NULL, 0 },
+    { "hme-search",     required_argument, NULL, 0 },
     { "rdpenalty",      required_argument, NULL, 0 },
     { "no-rect",              no_argument, NULL, 0 },
     { "rect",                 no_argument, NULL, 0 },
@@ -464,6 +467,8 @@ static void showHelp(x265_param *param)
     H0("   --[no-]amp                    Enable asymmetric motion partitions, requires --rect. Default %s\n", OPT(param->bEnableAMP));
     H0("   --[no-]limit-modes            Limit rectangular and asymmetric motion predictions. Default %d\n", param->limitModes);
     H1("   --[no-]temporal-mvp           Enable temporal MV predictors. Default %s\n", OPT(param->bEnableTemporalMvp));
+    H1("   --[no-]hme                    Enable Hierarchical Motion Estimation. Default %s\n", OPT(param->bEnableHME));
+    H1("   --hme-search <string>         Motion search-method for HME L0,L1 and L2. Default(L0,L1,L2) is %d,%d,%d\n", param->hmeSearchMethod[0], param->hmeSearchMethod[1], param->hmeSearchMethod[2]);
     H0("\nSpatial / intra options:\n");
     H0("   --[no-]strong-intra-smoothing Enable strong intra smoothing for 32x32 blocks. Default %s\n", OPT(param->bEnableStrongIntraSmoothing));
     H0("   --[no-]constrained-intra      Constrained intra prediction (use only intra coded reference pixels) Default %s\n", OPT(param->bEnableConstrainedIntra));