changeset 12670:4996092aa8ab

Improvements to hist-based scenecut algorithm. This patch does the following: 1. Strengthens scenecut detection using threshold intervals, spatial and temporal properties. 2. Change default value of hist-threshold to 0.03
author Pooja Venkatesan <pooja@multicorewareinc.com>
date Mon, 29 Jun 2020 14:43:24 +0530
parents d45b4b0430f8
children ac951f9fef8a
files doc/reST/cli.rst source/common/lowres.cpp source/common/lowres.h source/common/param.cpp source/encoder/encoder.cpp source/encoder/encoder.h source/encoder/slicetype.cpp source/encoder/slicetype.h source/x265.h
diffstat 9 files changed, 107 insertions(+-), 32 deletions(-) [+]
line wrap: on
line diff
--- a/doc/reST/cli.rst	Thu Jun 25 13:41:59 2020 +0530
+++ b/doc/reST/cli.rst	Mon Jun 29 14:43:24 2020 +0530
@@ -1468,9 +1468,10 @@ Slice decision options
 .. option:: --hist-threshold <0.0..1.0>
 
 	This value represents the threshold for normalized SAD of edge histograms used in scenecut detection.
-	This requires :option:`--hist-scenecut` to be enabled. For example, a value of 0.2 indicates that a frame with normalized SAD value
-	greater than 0.2 against the previous frame as scenecut.
-	Default 0.01.
+	This requires :option:`--hist-scenecut` to be enabled. For example, a value of 0.2 indicates that a frame with normalized SAD value 
+	greater than 0.2 against the previous frame as scenecut. 
+	Increasing the threshold reduces the number of scenecuts detected.
+	Default 0.03.
 	
 .. option:: --radl <integer>
 	
--- a/source/common/lowres.cpp	Thu Jun 25 13:41:59 2020 +0530
+++ b/source/common/lowres.cpp	Mon Jun 29 14:43:24 2020 +0530
@@ -266,6 +266,9 @@ void Lowres::init(PicYuv *origPic, int p
     indB = 0;
     memset(costEst, -1, sizeof(costEst));
     memset(weightedCostDelta, 0, sizeof(weightedCostDelta));
+    interPCostPercDiff = 0.0;
+    intraCostPercDiff = 0.0;
+    m_bIsMaxThres = false;
 
     if (qpAqOffset && invQscaleFactor)
         memset(costEstAq, -1, sizeof(costEstAq));
--- a/source/common/lowres.h	Thu Jun 25 13:41:59 2020 +0530
+++ b/source/common/lowres.h	Mon Jun 29 14:43:24 2020 +0530
@@ -234,6 +234,11 @@ struct Lowres : public ReferencePlanes
     uint16_t* propagateCost;
     double    weightedCostDelta[X265_BFRAME_MAX + 2];
     ReferencePlanes weightedRef[X265_BFRAME_MAX + 2];
+    /* For hist-based scenecut */
+    bool   m_bIsMaxThres;
+    double interPCostPercDiff;
+    double intraCostPercDiff;
+
     bool create(x265_param* param, PicYuv *origPic, uint32_t qgSize);
     void destroy();
     void init(PicYuv *origPic, int poc);
--- a/source/common/param.cpp	Thu Jun 25 13:41:59 2020 +0530
+++ b/source/common/param.cpp	Mon Jun 29 14:43:24 2020 +0530
@@ -168,7 +168,7 @@ void x265_param_default(x265_param* para
     param->bFrameAdaptive = X265_B_ADAPT_TRELLIS;
     param->bBPyramid = 1;
     param->scenecutThreshold = 40; /* Magic number pulled in from x264 */
-    param->edgeTransitionThreshold = 0.01;
+    param->edgeTransitionThreshold = 0.03;
     param->bHistBasedSceneCut = 0;
     param->lookaheadSlices = 8;
     param->lookaheadThreads = 0;
--- a/source/encoder/encoder.cpp	Thu Jun 25 13:41:59 2020 +0530
+++ b/source/encoder/encoder.cpp	Mon Jun 29 14:43:24 2020 +0530
@@ -1528,8 +1528,12 @@ double Encoder::normalizeRange(int32_t v
     return (double)(value - minValue) * (rangeEnd - rangeStart) / (maxValue - minValue) + rangeStart;
 }
 
-void Encoder::findSceneCuts(x265_picture *pic, bool& bDup, double maxUVSad, double edgeSad)
+void Encoder::findSceneCuts(x265_picture *pic, bool& bDup, double maxUVSad, double edgeSad, bool& isMaxThres)
 {
+    double minEdgeT = m_edgeHistThreshold * MIN_EDGE_FACTOR;
+    double minChromaT = minEdgeT * SCENECUT_CHROMA_FACTOR;
+    double maxEdgeT = m_edgeHistThreshold * MAX_EDGE_FACTOR;
+    double maxChromaT = maxEdgeT * SCENECUT_CHROMA_FACTOR;
     pic->frameData.bScenecut = false;
 
     if (pic->poc == 0)
@@ -1544,11 +1548,20 @@ void Encoder::findSceneCuts(x265_picture
         {
             bDup = true;
         }
-        else if (edgeSad > m_scaledEdgeThreshold || maxUVSad >= m_scaledChromaThreshold || (edgeSad > m_edgeHistThreshold && maxUVSad >= m_chromaHistThreshold))
+        else if (edgeSad < minEdgeT && maxUVSad < minChromaT)
+        {
+            pic->frameData.bScenecut = false;
+        }
+        else if (edgeSad > maxEdgeT && maxUVSad > maxChromaT)
+        {
+            pic->frameData.bScenecut = true;
+            isMaxThres = true;
+        }
+        else if (edgeSad > m_scaledEdgeThreshold || maxUVSad >= m_scaledChromaThreshold
+                 || (edgeSad > m_edgeHistThreshold && maxUVSad >= m_chromaHistThreshold))
         {
             pic->frameData.bScenecut = true;
             bDup = false;
-            x265_log(m_param, X265_LOG_DEBUG, "scene cut at %d \n", pic->poc);
         }
     }
 }
@@ -1581,6 +1594,7 @@ int Encoder::encode(const x265_picture* 
     bool dontRead = false;
     bool bdropFrame = false;
     bool dropflag = false;
+    bool isMaxThres = false;
 
     if (m_exportedPic)
     {
@@ -1607,7 +1621,7 @@ int Encoder::encode(const x265_picture* 
             {
                 double maxUVSad = 0.0, edgeSad = 0.0;
                 computeHistogramSAD(&maxUVSad, &edgeSad, pic_in->poc);
-                findSceneCuts(pic, bdropFrame, maxUVSad, edgeSad);
+                findSceneCuts(pic, bdropFrame, maxUVSad, edgeSad, isMaxThres);
             }
         }
 
@@ -1786,6 +1800,7 @@ int Encoder::encode(const x265_picture* 
         if (m_param->bHistBasedSceneCut)
         {
             inFrame->m_lowres.bScenecut = (inputPic->frameData.bScenecut == 1) ? true : false;
+            inFrame->m_lowres.m_bIsMaxThres = isMaxThres;
         }
         if (m_param->bHistBasedSceneCut && m_param->analysisSave)
         {
@@ -4261,7 +4276,7 @@ void Encoder::configure(x265_param *p)
 
    if (p->bHistBasedSceneCut && !p->edgeTransitionThreshold)
    {
-       p->edgeTransitionThreshold = 0.01;
+       p->edgeTransitionThreshold = 0.03;
        x265_log(p, X265_LOG_WARNING, "using  default threshold %.2lf for scene cut detection\n", p->edgeTransitionThreshold);
    }
 
--- a/source/encoder/encoder.h	Thu Jun 25 13:41:59 2020 +0530
+++ b/source/encoder/encoder.h	Mon Jun 29 14:43:24 2020 +0530
@@ -165,6 +165,9 @@ class FrameData;
 
 #define MAX_SCENECUT_THRESHOLD 1.0
 #define SCENECUT_STRENGTH_FACTOR 2.0
+#define MIN_EDGE_FACTOR 0.5
+#define MAX_EDGE_FACTOR 1.5
+#define SCENECUT_CHROMA_FACTOR 10.0
 
 class Encoder : public x265_encoder
 {
@@ -373,7 +376,7 @@ public:
     bool computeHistograms(x265_picture *pic);
     void computeHistogramSAD(double *maxUVNormalizedSAD, double *edgeNormalizedSAD, int curPoc);
     double normalizeRange(int32_t value, int32_t minValue, int32_t maxValue, double rangeStart, double rangeEnd);
-    void findSceneCuts(x265_picture *pic, bool& bDup, double m_maxUVSADVal, double m_edgeSADVal);
+    void findSceneCuts(x265_picture *pic, bool& bDup, double m_maxUVSADVal, double m_edgeSADVal, bool& isMaxThres);
 
     void initRefIdx();
     void analyseRefIdx(int *numRefIdx);
--- a/source/encoder/slicetype.cpp	Thu Jun 25 13:41:59 2020 +0530
+++ b/source/encoder/slicetype.cpp	Mon Jun 29 14:43:24 2020 +0530
@@ -2001,10 +2001,41 @@ void Lookahead::slicetypeAnalyse(Lowres 
     int numAnalyzed = numFrames;
     bool isScenecut = false;
 
-    /* When scenecut threshold is set, use scenecut detection for I frame placements */
+    /* Temporal computations for scenecut detection */
     if (m_param->bHistBasedSceneCut)
-        isScenecut = frames[1]->bScenecut;
-    else
+    {
+        for (int i = numFrames - 1; i > 0; i--)
+        {
+            if (frames[i]->interPCostPercDiff > 0.0)
+                continue;
+            int64_t interCost = frames[i]->costEst[1][0];
+            int64_t intraCost = frames[i]->costEst[0][0];
+            if (interCost < 0 || intraCost < 0)
+                continue;
+            int times = 0;
+            double averagePcost = 0.0, averageIcost = 0.0;
+            for (int j = i - 1; j >= 0 && times < 5; j--, times++)
+            {
+                if (frames[j]->costEst[0][0] > 0 && frames[j]->costEst[1][0] > 0)
+                {
+                    averageIcost += frames[j]->costEst[0][0];
+                    averagePcost += frames[j]->costEst[1][0];
+                }
+                else
+                    times--;
+            }
+            if (times)
+            {
+                averageIcost = averageIcost / times;
+                averagePcost = averagePcost / times;
+                frames[i]->interPCostPercDiff = abs(interCost - averagePcost) / X265_MIN(interCost, averagePcost) * 100;
+                frames[i]->intraCostPercDiff = abs(intraCost - averageIcost) / X265_MIN(intraCost, averageIcost) * 100;
+            }
+        }
+    }
+
+    /* When scenecut threshold is set, use scenecut detection for I frame placements */
+    if (!m_param->bHistBasedSceneCut || (m_param->bHistBasedSceneCut && frames[1]->bScenecut))
         isScenecut = scenecut(frames, 0, 1, true, origNumFrames);
 
     if (isScenecut && (m_param->bHistBasedSceneCut || m_param->scenecutThreshold))
@@ -2018,17 +2049,16 @@ void Lookahead::slicetypeAnalyse(Lowres 
         m_extendGopBoundary = false;
         for (int i = m_param->bframes + 1; i < origNumFrames; i += m_param->bframes + 1)
         {
-            if (!m_param->bHistBasedSceneCut)
+            if (!m_param->bHistBasedSceneCut || (m_param->bHistBasedSceneCut && frames[i + 1]->bScenecut))
                 scenecut(frames, i, i + 1, true, origNumFrames);
 
             for (int j = i + 1; j <= X265_MIN(i + m_param->bframes + 1, origNumFrames); j++)
             {
-                if ((!m_param->bHistBasedSceneCut && frames[j]->bScenecut && scenecutInternal(frames, j - 1, j, true)) || 
-                    (m_param->bHistBasedSceneCut && frames[j]->bScenecut))
-                    {
-                        m_extendGopBoundary = true;
-                        break;
-                    }
+                if (frames[j]->bScenecut && scenecutInternal(frames, j - 1, j, true))
+                {
+                    m_extendGopBoundary = true;
+                    break;
+                }
             }
             if (m_extendGopBoundary)
                 break;
@@ -2133,14 +2163,15 @@ void Lookahead::slicetypeAnalyse(Lowres 
         {
             for (int j = 1; j < numBFrames + 1; j++)
             {
-                if ((!m_param->bHistBasedSceneCut && scenecut(frames, j, j + 1, false, origNumFrames)) ||
-                    (m_param->bHistBasedSceneCut && frames[j + 1]->bScenecut) ||
-                    (bForceRADL && (frames[j]->frameNum == preRADL)))
-                    {
-                        frames[j]->sliceType = X265_TYPE_P;
-                        numAnalyzed = j;
-                        break;
-                    }
+                bool isNextScenecut = false;
+                if (!m_param->bHistBasedSceneCut || (m_param->bHistBasedSceneCut && frames[j + 1]->bScenecut))
+                    isNextScenecut = scenecut(frames, j, j + 1, false, origNumFrames);
+                if (isNextScenecut || (bForceRADL && frames[j]->frameNum == preRADL))
+                {
+                    frames[j]->sliceType = X265_TYPE_P;
+                    numAnalyzed = j;
+                    break;
+                }
             }
         }
         resetStart = bKeyframe ? 1 : X265_MIN(numBFrames + 2, numAnalyzed + 1);
@@ -2203,7 +2234,7 @@ bool Lookahead::scenecut(Lowres **frames
          * and not considered a scenecut. */
         for (int cp1 = p1; cp1 <= maxp1; cp1++)
         {
-            if (!scenecutInternal(frames, p0, cp1, false))
+            if (!m_param->bHistBasedSceneCut && !scenecutInternal(frames, p0, cp1, false))
             {
                 /* Any frame in between p0 and cur_p1 cannot be a real scenecut. */
                 for (int i = cp1; i > p0; i--)
@@ -2212,7 +2243,7 @@ bool Lookahead::scenecut(Lowres **frames
                     noScenecuts = false;
                 }
             }
-            else if (scenecutInternal(frames, cp1 - 1, cp1, false))
+            else if ((m_param->bHistBasedSceneCut && frames[cp1]->m_bIsMaxThres) || scenecutInternal(frames, cp1 - 1, cp1, false))
             {
                 /* If current frame is a Scenecut from p0 frame as well as Scenecut from
                  * preceeding frame, mark it as a Scenecut */
@@ -2273,6 +2304,10 @@ bool Lookahead::scenecut(Lowres **frames
 
     if (!frames[p1]->bScenecut)
         return false;
+    /* Check only scene transitions if max threshold */
+    if (m_param->bHistBasedSceneCut && frames[p1]->m_bIsMaxThres)
+        return frames[p1]->bScenecut;
+
     return scenecutInternal(frames, p0, p1, bRealScenecut);
 }
 
@@ -2289,7 +2324,19 @@ bool Lookahead::scenecutInternal(Lowres 
     /* magic numbers pulled out of thin air */
     float threshMin = (float)(threshMax * 0.25);
     double bias = m_param->scenecutBias;
-    if (bRealScenecut)
+    if (m_param->bHistBasedSceneCut)
+    {
+        double minT = TEMPORAL_SCENECUT_THRESHOLD * (1 + m_param->edgeTransitionThreshold);
+        if (frame->interPCostPercDiff > minT || frame->intraCostPercDiff > minT)
+        {
+            if (bRealScenecut && frame->bScenecut)
+                x265_log(m_param, X265_LOG_DEBUG, "scene cut at %d \n", frame->frameNum);
+            return frame->bScenecut;
+        }
+        else
+            return false;
+    }
+    else if (bRealScenecut)
     {
         if (m_param->keyframeMin == m_param->keyframeMax)
             threshMin = threshMax;
--- a/source/encoder/slicetype.h	Thu Jun 25 13:41:59 2020 +0530
+++ b/source/encoder/slicetype.h	Mon Jun 29 14:43:24 2020 +0530
@@ -42,6 +42,7 @@ class Lookahead;
 #define LOWRES_COST_SHIFT 14
 #define AQ_EDGE_BIAS 0.5
 #define EDGE_INCLINATION 45
+#define TEMPORAL_SCENECUT_THRESHOLD 50
 
 #if HIGH_BIT_DEPTH
 #define EDGE_THRESHOLD 1023.0
--- a/source/x265.h	Thu Jun 25 13:41:59 2020 +0530
+++ b/source/x265.h	Mon Jun 29 14:43:24 2020 +0530
@@ -1860,7 +1860,7 @@ typedef struct x265_param
     /* A genuine threshold used for histogram based scene cut detection.
      * This threshold determines whether a frame is a scenecut or not
      * when compared against the edge and chroma histogram sad values.
-     * Default 0.01. Range: Real number in the interval (0,2). */
+     * Default 0.03. Range: Real number in the interval (0,1). */
     double    edgeTransitionThreshold;
 
     /* Enables histogram based scenecut detection algorithm to detect scenecuts. Default disabled */