changeset 12624:c2769ac5fa9d

Merge with stable
author Aruna Matheswaran <aruna@multicorewareinc.com>
date Mon, 17 Feb 2020 20:46:53 +0530
parents fdbd4e4a2aff (current diff) dd2464df2f40 (diff)
children 30eb4de83092
files
diffstat 6 files changed, 154 insertions(+-), 25 deletions(-) [+]
line wrap: on
line diff
--- a/.hgtags	Sat Jan 25 18:08:03 2020 +0530
+++ b/.hgtags	Mon Feb 17 20:46:53 2020 +0530
@@ -37,3 +37,6 @@ 113518629fa54ffb491dd479e15c1f00dd39d376
 b4e38ce16d7c4b37a6482dc7ae61fd31071b6ff1 3.1_RC2
 20c9994e8bfbeb9443851b2b3a050cd98c8b147b 3.2_RC1
 353572437201d551381002aebf20d244bd49ef17 3.2
+5ee3593ebd82b4d8957909bbc1b68b99b59ba773 3.3_RC1
+96a10df63c0b778b480330bdf3be8da7db8a5fb1 3.3_RC2
+057215961bc4b51b6260a584ff3d506e6d65cfd6 3.3
--- a/doc/reST/releasenotes.rst	Sat Jan 25 18:08:03 2020 +0530
+++ b/doc/reST/releasenotes.rst	Mon Feb 17 20:46:53 2020 +0530
@@ -2,6 +2,41 @@
 Release Notes
 *************
 
+Version 3.3
+===========
+
+Release date - 17th February, 2020.
+
+New features
+------------
+1. **Adaptive frame duplication** to identify and skip encoding of near-identical frames and signal the duplication info to the decoder via pic_struct SEI. :option:`frame-dup` to enable frame duplication and :option:`--dup-threshold` to set the threshold for frame similarity (optional).
+2. **Boundary aware quantization** to cut off bits from frames following scene-cut. This leverages the inability of HVS to perceive fine details during scene changes and saves bits. :option:`--scenecut-aware-qp` , :option:`--scenecut-window` and :option:`--max-qp-delta` to enable boundary aware frame quantization, to set window size (optional) and to set QP offset (optional).
+3. **Improved scene-cut detection** using edge and chroma histograms. :option:`--hist-scenecut` to enable the feature and :option:`--hist-threshold` (optional) to provide threshold for determining scene cuts.
+
+Enhancements to existing features
+---------------------------------
+1. :option:`--hme-range` to modify search range for HME levels L0, L1, and L2.
+2. Improved performance of AQ mode 4 by reducing memory foot print.
+3. Introduced :option:`--analysis-save-reuse-level` and :option:`--analysis-load-reuse-level` to de-couple reuse levels of :option:`--analysis-save` and :option:`--analysis-load`. Turnaround time of ABR encoding can be reduced by properly leveraging these options.
+  
+Encoder enhancements
+--------------------
+1. Improved VBV lookahead to eliminate blocky artifacts in Intra frames coming towards end of the title.
+
+API changes
+-----------
+1. New API function **x265_encoder_reconfig_zone()** to invoke zone reconfiguration dynamically.  
+2. Renamed :option:`--hdr` to :option:`--hdr10`. :option:`--hdr` will be deprecated in the upcoming major release. 
+3. Renamed :option:`--hdr-opt` to :option:`--hdr10-opt`. :option:`--hdr-opt` will be deprecated in the upcoming major release.
+4. Additions to **x265_param** structure to support the newly added features and encoder enhancements.
+
+Bug fixes
+---------
+1. Output change in :option:`--analysis-load` at inter-refine levels 2 and 3.
+2. Encoder crash with zones.
+3. Integration issues with SVT v1.4.1.
+4. Fixed bug in :option:`--limit-tu` 3 and 4 while loading co-located CU's TU depth.
+
 Version 3.2
 ===========
 
--- a/source/common/common.h	Sat Jan 25 18:08:03 2020 +0530
+++ b/source/common/common.h	Mon Feb 17 20:46:53 2020 +0530
@@ -130,7 +130,6 @@ typedef uint64_t sum2_t;
 typedef uint64_t pixel4;
 typedef int64_t  ssum2_t;
 #define HISTOGRAM_BINS 1024
-#define SHIFT 1
 #else
 typedef uint8_t  pixel;
 typedef uint16_t sum_t;
@@ -138,7 +137,6 @@ typedef uint32_t sum2_t;
 typedef uint32_t pixel4;
 typedef int32_t  ssum2_t; // Signed sum
 #define HISTOGRAM_BINS 256
-#define SHIFT 0
 #endif // if HIGH_BIT_DEPTH
 
 #if X265_DEPTH < 10
--- a/source/encoder/encoder.cpp	Sat Jan 25 18:08:03 2020 +0530
+++ b/source/encoder/encoder.cpp	Mon Feb 17 20:46:53 2020 +0530
@@ -220,9 +220,9 @@ void Encoder::create()
     {
         for (int i = 0; i < x265_cli_csps[m_param->internalCsp].planes; i++)
         {
-            m_planeSizes[i] = m_param->sourceWidth * m_param->sourceHeight >> x265_cli_csps[m_param->internalCsp].height[i];
-        }
-        uint32_t pixelbytes = m_param->sourceBitDepth > 8 ? 2 : 1;
+            m_planeSizes[i] = (m_param->sourceWidth >> x265_cli_csps[p->internalCsp].width[i]) * (m_param->sourceHeight >> x265_cli_csps[m_param->internalCsp].height[i]);
+        }
+        uint32_t pixelbytes = m_param->internalBitDepth > 8 ? 2 : 1;
         m_edgePic = X265_MALLOC(pixel, m_planeSizes[0] * pixelbytes);
         m_edgeHistThreshold = m_param->edgeTransitionThreshold;
         m_chromaHistThreshold = m_edgeHistThreshold * 10.0;
@@ -231,6 +231,23 @@ void Encoder::create()
         m_scaledEdgeThreshold = x265_min(m_scaledEdgeThreshold, MAX_SCENECUT_THRESHOLD);
         m_scaledChromaThreshold = m_chromaHistThreshold * SCENECUT_STRENGTH_FACTOR;
         m_scaledChromaThreshold = x265_min(m_scaledChromaThreshold, MAX_SCENECUT_THRESHOLD);
+        if (m_param->sourceBitDepth != m_param->internalBitDepth)
+        {
+            int size = m_param->sourceWidth * m_param->sourceHeight;
+            int hshift = CHROMA_H_SHIFT(m_param->internalCsp);
+            int vshift = CHROMA_V_SHIFT(m_param->internalCsp);
+            int widthC = m_param->sourceWidth >> hshift;
+            int heightC = m_param->sourceHeight >> vshift;
+
+            m_inputPic[0] = X265_MALLOC(pixel, size);
+            if (m_param->internalCsp != X265_CSP_I400)
+            {
+                for (int j = 1; j < 3; j++)
+                {
+                    m_inputPic[j] = X265_MALLOC(pixel, widthC * heightC);
+                }
+            }
+        }
     }
 
     // Do not allow WPP if only one row or fewer than 3 columns, it is pointless and unstable
@@ -874,6 +891,18 @@ void Encoder::destroy()
         {
             X265_FREE_ZERO(m_edgePic);
         }
+
+        if (m_param->sourceBitDepth != m_param->internalBitDepth)
+        {
+            X265_FREE_ZERO(m_inputPic[0]);
+            if (m_param->internalCsp != X265_CSP_I400)
+            {
+                for (int i = 1; i < 3; i++)
+                {
+                    X265_FREE_ZERO(m_inputPic[i]);
+                }
+            }
+        }
     }
 
     for (int i = 0; i < m_param->frameNumThreads; i++)
@@ -1337,11 +1366,82 @@ void Encoder::copyPicture(x265_picture *
 
 bool Encoder::computeHistograms(x265_picture *pic)
 {
-    pixel *src = (pixel *) pic->planes[0];
+    pixel *src = NULL, *planeV = NULL, *planeU = NULL;
+    uint32_t widthC, heightC;
+    int hshift, vshift;
+
+    hshift = CHROMA_H_SHIFT(pic->colorSpace);
+    vshift = CHROMA_V_SHIFT(pic->colorSpace);
+    widthC = pic->width >> hshift;
+    heightC = pic->height >> vshift;
+
+    if (pic->bitDepth == X265_DEPTH)
+    {
+        src = (pixel*)pic->planes[0];
+        if (m_param->internalCsp != X265_CSP_I400)
+        {
+            planeU = (pixel*)pic->planes[1];
+            planeV = (pixel*)pic->planes[2];
+        }
+    }
+    else if (pic->bitDepth == 8 && X265_DEPTH > 8)
+    {
+        int shift = (X265_DEPTH - 8);
+        uint8_t *yChar, *uChar, *vChar;
+
+        yChar = (uint8_t*)pic->planes[0];
+        primitives.planecopy_cp(yChar, pic->stride[0] / sizeof(*yChar), m_inputPic[0], pic->stride[0] / sizeof(*yChar), pic->width, pic->height, shift);
+        src = m_inputPic[0];
+        if (m_param->internalCsp != X265_CSP_I400)
+        {
+            uChar = (uint8_t*)pic->planes[1];
+            vChar = (uint8_t*)pic->planes[2];
+            primitives.planecopy_cp(uChar, pic->stride[1] / sizeof(*uChar), m_inputPic[1], pic->stride[1] / sizeof(*uChar), widthC, heightC, shift);
+            primitives.planecopy_cp(vChar, pic->stride[2] / sizeof(*vChar), m_inputPic[2], pic->stride[2] / sizeof(*vChar), widthC, heightC, shift);
+            planeU = m_inputPic[1];
+            planeV = m_inputPic[2];
+        }
+    }
+    else
+    {
+        uint16_t *yShort, *uShort, *vShort;
+        /* mask off bits that are supposed to be zero */
+        uint16_t mask = (1 << X265_DEPTH) - 1;
+        int shift = abs(pic->bitDepth - X265_DEPTH);
+
+        yShort = (uint16_t*)pic->planes[0];
+        uShort = (uint16_t*)pic->planes[1];
+        vShort = (uint16_t*)pic->planes[2];
+
+        if (pic->bitDepth > X265_DEPTH)
+        {
+            /* shift right and mask pixels to final size */
+            primitives.planecopy_sp(yShort, pic->stride[0] / sizeof(*yShort), m_inputPic[0], pic->stride[0] / sizeof(*yShort), pic->width, pic->height, shift, mask);
+            if (m_param->internalCsp != X265_CSP_I400)
+            {
+                primitives.planecopy_sp(uShort, pic->stride[1] / sizeof(*uShort), m_inputPic[1], pic->stride[1] / sizeof(*uShort), widthC, heightC, shift, mask);
+                primitives.planecopy_sp(vShort, pic->stride[2] / sizeof(*vShort), m_inputPic[2], pic->stride[2] / sizeof(*vShort), widthC, heightC, shift, mask);
+            }
+        }
+        else /* Case for (pic.bitDepth < X265_DEPTH) */
+        {
+            /* shift left and mask pixels to final size */
+            primitives.planecopy_sp_shl(yShort, pic->stride[0] / sizeof(*yShort), m_inputPic[0], pic->stride[0] / sizeof(*yShort), pic->width, pic->height, shift, mask);
+            if (m_param->internalCsp != X265_CSP_I400)
+            {
+                primitives.planecopy_sp_shl(uShort, pic->stride[1] / sizeof(*uShort), m_inputPic[1], pic->stride[1] / sizeof(*uShort), widthC, heightC, shift, mask);
+                primitives.planecopy_sp_shl(vShort, pic->stride[2] / sizeof(*vShort), m_inputPic[2], pic->stride[2] / sizeof(*vShort), widthC, heightC, shift, mask);
+            }
+        }
+
+        src = m_inputPic[0];
+        planeU = m_inputPic[1];
+        planeV = m_inputPic[2];
+    }
+
     size_t bufSize = sizeof(pixel) * m_planeSizes[0];
     int32_t planeCount = x265_cli_csps[m_param->internalCsp].planes;
-    int32_t numBytes = m_param->sourceBitDepth > 8 ? 2 : 1;
-    memset(m_edgePic, 0, bufSize * numBytes);
+    memset(m_edgePic, 0, bufSize);
 
     if (!computeEdge(m_edgePic, src, NULL, pic->width, pic->height, pic->width, false))
     {
@@ -1350,10 +1450,9 @@ bool Encoder::computeHistograms(x265_pic
     }
 
     pixel pixelVal;
-    int64_t size = pic->height * (pic->stride[0] >> SHIFT);
     int32_t *edgeHist = m_curEdgeHist;
     memset(edgeHist, 0, 2 * sizeof(int32_t));
-    for (int64_t i = 0; i < size; i++)
+    for (int64_t i = 0; i < m_planeSizes[0]; i++)
     {
         if (!m_edgePic[i])
            edgeHist[0]++;
@@ -1364,16 +1463,12 @@ bool Encoder::computeHistograms(x265_pic
     if (pic->colorSpace != X265_CSP_I400)
     {
         /* U Histogram Calculation */
-        int32_t HeightL = (pic->height >> x265_cli_csps[pic->colorSpace].height[1]);
-        size = HeightL * (pic->stride[1] >> SHIFT);
         int32_t *uHist = m_curUVHist[0];
-        pixel *chromaPlane = (pixel *) pic->planes[1];
-
         memset(uHist, 0, HISTOGRAM_BINS * sizeof(int32_t));
 
-        for (int64_t i = 0; i < size; i++)
-        {
-            pixelVal = chromaPlane[i];
+        for (int64_t i = 0; i < m_planeSizes[1]; i++)
+        {
+            pixelVal = planeU[i];
             uHist[pixelVal]++;
         }
 
@@ -1381,15 +1476,12 @@ bool Encoder::computeHistograms(x265_pic
         if (planeCount == 3)
         {
             pixelVal = 0;
-            int32_t heightV = (pic->height >> x265_cli_csps[pic->colorSpace].height[2]);
-            size = heightV * (pic->stride[2] >> SHIFT);
             int32_t *vHist = m_curUVHist[1];
-            chromaPlane = (pixel *) pic->planes[2];
-
             memset(vHist, 0, HISTOGRAM_BINS * sizeof(int32_t));
-            for (int64_t i = 0; i < size; i++)
+
+            for (int64_t i = 0; i < m_planeSizes[2]; i++)
             {
-                pixelVal = chromaPlane[i];
+                pixelVal = planeV[i];
                 vHist[pixelVal]++;
             }
             for (int i = 0; i < HISTOGRAM_BINS; i++)
--- a/source/encoder/encoder.h	Sat Jan 25 18:08:03 2020 +0530
+++ b/source/encoder/encoder.h	Mon Feb 17 20:46:53 2020 +0530
@@ -255,6 +255,7 @@ public:
 
     /* For histogram based scene-cut detection */
     pixel*             m_edgePic;
+    pixel*             m_inputPic[3];
     int32_t            m_curUVHist[2][HISTOGRAM_BINS];
     int32_t            m_curMaxUVHist[HISTOGRAM_BINS];
     int32_t            m_prevMaxUVHist[HISTOGRAM_BINS];
--- a/source/test/regression-tests.txt	Sat Jan 25 18:08:03 2020 +0530
+++ b/source/test/regression-tests.txt	Mon Feb 17 20:46:53 2020 +0530
@@ -23,7 +23,7 @@ BasketballDrive_1920x1080_50.y4m,--prese
 BasketballDrive_1920x1080_50.y4m,--preset slower --lossless --chromaloc 3 --subme 0 --limit-tu 4
 BasketballDrive_1920x1080_50.y4m,--preset slower --no-cutree --analysis-save x265_analysis.dat --analysis-save-reuse-level 10 --bitrate 7000 --limit-tu 0::--preset slower --no-cutree --analysis-load x265_analysis.dat --analysis-load-reuse-level 10 --bitrate 7000 --limit-tu 0
 BasketballDrive_1920x1080_50.y4m,--preset veryslow --crf 4 --cu-lossless --pmode --limit-refs 1 --aq-mode 3 --limit-tu 3
-BasketballDrive_1920x1080_50.y4m,--preset veryslow --no-cutree --analysis-save x265_analysis.dat --analysis-save-reuse-level 5 --crf 18 --tskip-fast --limit-tu 2::--preset veryslow --no-cutree --analysis-load x265_analysis.dat  --analysis-save-reuse-level 5 --crf 18 --tskip-fast --limit-tu 2
+BasketballDrive_1920x1080_50.y4m,--preset veryslow --no-cutree --analysis-save x265_analysis.dat --analysis-save-reuse-level 5 --crf 18 --tskip-fast --limit-tu 2::--preset veryslow --no-cutree --analysis-load x265_analysis.dat  --analysis-load-reuse-level 5 --crf 18 --tskip-fast --limit-tu 2
 BasketballDrive_1920x1080_50.y4m,--preset veryslow --recon-y4m-exec "ffplay -i pipe:0 -autoexit"
 Coastguard-4k.y4m,--preset ultrafast --recon-y4m-exec "ffplay -i pipe:0 -autoexit"
 Coastguard-4k.y4m,--preset superfast --tune grain --overscan=crop
@@ -182,7 +182,7 @@ crowd_run_1080p50.y4m,--preset ultrafast
 crowd_run_1080p50.y4m,--preset superfast --no-cutree --analysis-save x265_analysis.dat  --analysis-save-reuse-level 2 --scale-factor 2 --crf 22 --vbv-maxrate 5000 --vbv-bufsize 5000::crowd_run_2160p50.y4m, --preset superfast --no-cutree --analysis-load x265_analysis.dat  --analysis-load-reuse-level 2 --scale-factor 2 --crf 22 --vbv-maxrate 10000 --vbv-bufsize 10000 
 crowd_run_1080p50.y4m,--preset fast --no-cutree --analysis-save x265_analysis.dat  --analysis-save-reuse-level 5 --scale-factor 2 --qp 18::crowd_run_2160p50.y4m, --preset fast --no-cutree --analysis-load x265_analysis.dat  --analysis-load-reuse-level 5 --scale-factor 2 --qp 18
 crowd_run_1080p50.y4m,--preset medium --no-cutree --analysis-save x265_analysis.dat  --analysis-save-reuse-level 10 --scale-factor 2 --bitrate 5000  --vbv-maxrate 5000 --vbv-bufsize 5000 --early-skip --tu-inter-depth 3::crowd_run_2160p50.y4m, --preset medium --no-cutree --analysis-load x265_analysis.dat  --analysis-load-reuse-level 10 --scale-factor 2 --bitrate 10000 --vbv-maxrate 10000 --vbv-bufsize 10000 --early-skip --tu-inter-depth 3 --refine-intra 4 --dynamic-refine::crowd_run_2160p50.y4m, --preset medium --no-cutree --analysis-load x265_analysis.dat  --analysis-load-reuse-level 10 --scale-factor 2 --bitrate 10000 --vbv-maxrate 10000 --vbv-bufsize 10000 --early-skip --tu-inter-depth 3 --refine-intra 3 --refine-inter 3
-RaceHorses_416x240_30.y4m,--preset slow --no-cutree --ctu 16 --analysis-save x265_analysis.dat --analysis-save-reuse-level 10 --scale-factor 2 --crf 22  --vbv-maxrate 1000 --vbv-bufsize 1000::RaceHorses_832x480_30.y4m, --preset slow --no-cutree --ctu 32 --analysis-load x265_analysis.dat  --analysis-save x265_analysis_2.dat --analysis-load-reuse-level 10 --analysis-save-reuse-level 0 --scale-factor 2 --crf 16 --vbv-maxrate 4000 --vbv-bufsize 4000 --refine-intra 0 --refine-inter 1::RaceHorses_1664x960_30.y4m,--preset slow --no-cutree --ctu 64 --analysis-load x265_analysis_2.dat  --analysis-load-reuse-level 10 --scale-factor 2 --crf 12 --vbv-maxrate 7000 --vbv-bufsize 7000 --refine-intra 2 --refine-inter 2
+RaceHorses_416x240_30.y4m,--preset slow --no-cutree --ctu 16 --analysis-save x265_analysis.dat --analysis-save-reuse-level 10 --scale-factor 2 --crf 22  --vbv-maxrate 1000 --vbv-bufsize 1000::RaceHorses_832x480_30.y4m, --preset slow --no-cutree --ctu 32 --analysis-load x265_analysis.dat  --analysis-save x265_analysis_2.dat --analysis-load-reuse-level 10 --analysis-save-reuse-level 10 --scale-factor 2 --crf 16 --vbv-maxrate 4000 --vbv-bufsize 4000 --refine-intra 0 --refine-inter 1::RaceHorses_1664x960_30.y4m,--preset slow --no-cutree --ctu 64 --analysis-load x265_analysis_2.dat  --analysis-load-reuse-level 10 --scale-factor 2 --crf 12 --vbv-maxrate 7000 --vbv-bufsize 7000 --refine-intra 2 --refine-inter 2
 ElFunete_960x540_60.yuv,--colorprim bt709 --transfer bt709 --chromaloc 2 --aud --repeat-headers --no-opt-qp-pps --no-opt-ref-list-length-pps --wpp --no-interlace --sar 1:1 --min-keyint 60 --no-open-gop --rc-lookahead 180 --bframes 5 --b-intra --ref 4 --cbqpoffs -2 --crqpoffs -2 --lookahead-threads 0 --weightb --qg-size 8 --me star --preset veryslow --frame-threads 1 --b-adapt 2 --aq-mode 3 --rd 6 --pools 15 --colormatrix bt709 --keyint 120 --high-tier --ctu 64 --tune psnr --bitrate 10000 --vbv-bufsize 30000 --vbv-maxrate 17500 --analysis-save-reuse-level 10 --analysis-save elfuente_960x540.dat --scale-factor 2::ElFunete_1920x1080_60.yuv,--colorprim bt709 --transfer bt709 --chromaloc 2 --aud --repeat-headers --no-opt-qp-pps --no-opt-ref-list-length-pps --wpp --no-interlace --sar 1:1 --min-keyint 60 --no-open-gop --rc-lookahead 180 --bframes 5 --b-intra --ref 4 --cbqpoffs -2 --crqpoffs -2 --lookahead-threads 0 --weightb --qg-size 8 --me star --preset veryslow --frame-threads 1 --b-adapt 2 --aq-mode 3 --rd 6 --pools 15 --colormatrix bt709 --keyint 120 --high-tier --ctu 64 --tune psnr --bitrate 10000 --vbv-bufsize 30000 --vbv-maxrate 17500 --analysis-load-reuse-level 10 --analysis-save-reuse-level 10 --analysis-save elfuente_1920x1080.dat --limit-tu 0 --scale-factor 2 --analysis-load elfuente_960x540.dat --refine-intra 4 --refine-inter 2::ElFuente_3840x2160_60.yuv,--colorprim bt709 --transfer bt709 --chromaloc 2 --aud --repeat-headers --no-opt-qp-pps --no-opt-ref-list-length-pps --wpp --no-interlace --sar 1:1 --min-keyint 60 --no-open-gop --rc-lookahead 180 --bframes 5 --b-intra --ref 4 --cbqpoffs -2 --crqpoffs -2 --lookahead-threads 0 --weightb --qg-size 8 --me star --preset veryslow --frame-threads 1 --b-adapt 2 --aq-mode 3 --rd 6 --pools 15 --colormatrix bt709 --keyint 120 --high-tier --ctu 64 --tune=psnr --bitrate 24000 --vbv-bufsize 84000 --vbv-maxrate 49000 --analysis-load-reuse-level 10 --limit-tu 0 --scale-factor 2 --analysis-load elfuente_1920x1080.dat --refine-intra 4 --refine-inter 2
 #save/load with ctu distortion refinement
 CrowdRun_1920x1080_50_10bit_422.yuv,--no-cutree --analysis-save x265_analysis.dat --analysis-save-reuse-level 5 --refine-ctu-distortion 1 --bitrate 7000::--no-cutree --analysis-load x265_analysis.dat --refine-ctu-distortion 1 --bitrate 7000 --analysis-load-reuse-level 5