changeset 9521:c452279cb1c9 draft

Merge with public
author Praveen Tiwari <praveen@multicorewareinc.com>
date Tue, 17 Feb 2015 10:08:56 +0530
parents 3481b16114d7 (current diff) cbec71924b09 (diff)
children 0c005021f270
files source/common/x86/pixel-util8.asm source/encoder/entropy.cpp source/test/mbdstharness.cpp
diffstat 5 files changed, 53 insertions(+-), 49 deletions(-) [+]
line wrap: on
line diff
--- a/source/common/param.cpp	Mon Feb 16 16:59:08 2015 +0530
+++ b/source/common/param.cpp	Tue Feb 17 10:08:56 2015 +0530
@@ -1012,8 +1012,8 @@ int x265_check_params(x265_param *param)
           "QuadtreeTUMaxDepthIntra must be greater 0 and less than 5");
     CHECK(maxLog2CUSize < tuQTMinLog2Size + param->tuQTMaxIntraDepth - 1,
           "QuadtreeTUMaxDepthInter must be less than or equal to the difference between log2(maxCUSize) and QuadtreeTULog2MinSize plus 1");
-    CHECK((param->maxTUSize != 32 && param->maxTUSize != 16 && param->maxTUSize != 8 && param->maxTUSize != 4) || param->maxTUSize > param->maxCUSize,
-          "max TU size must be 4, 8, 16, or 32 and should be less than max CU size");
+    CHECK((param->maxTUSize != 32 && param->maxTUSize != 16 && param->maxTUSize != 8 && param->maxTUSize != 4),
+          "max TU size must be 4, 8, 16, or 32");
     CHECK(param->maxNumMergeCand < 1, "MaxNumMergeCand must be 1 or greater.");
     CHECK(param->maxNumMergeCand > 5, "MaxNumMergeCand must be 5 or smaller.");
 
--- a/source/common/x86/pixel-util8.asm	Mon Feb 16 16:59:08 2015 +0530
+++ b/source/common/x86/pixel-util8.asm	Tue Feb 17 10:08:56 2015 +0530
@@ -492,11 +492,11 @@ cglobal quant, 5,6,8
     dec         r4d
     jnz        .loop
 
-    pxor        m0, m0
-    psadbw      m7, m0
-    movhlps     m0, m7
-    paddd       m7, m0
-    movd        eax, m7
+    pshufd      m0, m7, 00001110b
+    paddd       m0, m7
+    pshufd      m1, m0, 00000001b
+    paddd       m0, m1
+    movd        eax, m0
     RET
 
 
--- a/source/encoder/encoder.cpp	Mon Feb 16 16:59:08 2015 +0530
+++ b/source/encoder/encoder.cpp	Tue Feb 17 10:08:56 2015 +0530
@@ -1624,6 +1624,12 @@ void Encoder::configure(x265_param *p)
         p->rc.cuTree = 0;
     }
 
+    if (p->maxTUSize > p->maxCUSize)
+    {
+        x265_log(p, X265_LOG_WARNING, "Max TU size should be less than or equal to max CU size, setting max TU size = %d\n", p->maxCUSize);
+        p->maxTUSize = p->maxCUSize;
+    }
+
     if (p->rc.aqStrength == 0 && p->rc.cuTree == 0)
         p->rc.aqMode = X265_AQ_NONE;
 
--- a/source/encoder/entropy.cpp	Mon Feb 16 16:59:08 2015 +0530
+++ b/source/encoder/entropy.cpp	Tue Feb 17 10:08:56 2015 +0530
@@ -557,7 +557,6 @@ void Entropy::encodeCU(const CUData& ctu
         return;
     }
 
-    // We need to split, so don't try these modes.
     if (cuSplitFlag) 
         codeSplitFlag(ctu, absPartIdx, depth);
 
@@ -684,78 +683,77 @@ void Entropy::finishCU(const CUData& ctu
     }
 }
 
-void Entropy::encodeTransform(const CUData& cu, uint32_t absPartIdx, uint32_t tuDepth, uint32_t log2TrSize,
+void Entropy::encodeTransform(const CUData& cu, uint32_t absPartIdx, uint32_t curDepth, uint32_t log2CurSize,
                               bool& bCodeDQP, const uint32_t depthRange[2])
 {
-    const bool subdiv = cu.m_tuDepth[absPartIdx] > tuDepth;
+    const bool subdiv = cu.m_tuDepth[absPartIdx] > curDepth;
 
     /* in each of these conditions, the subdiv flag is implied and not signaled,
      * so we have checks to make sure the implied value matches our intentions */
-    if (cu.isIntra(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N && !tuDepth)
+    if (cu.isIntra(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N && log2CurSize == MIN_LOG2_CU_SIZE)
     {
         X265_CHECK(subdiv, "intra NxN requires TU depth below CU depth\n");
     }
-    else if (cu.isInter(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N && !tuDepth &&
-             cu.m_slice->m_sps->quadtreeTUMaxDepthInter == 1)
+    else if (cu.isInter(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N)
     {
-        X265_CHECK(subdiv, "inter TU must be smaller than CU when not 2Nx2N part size: log2TrSize %d, depthRange[0] %d\n", log2TrSize, depthRange[0]);
+        X265_CHECK(subdiv, "inter TU must be smaller than CU when not 2Nx2N part size: log2CurSize %d, depthRange[0] %d\n", log2CurSize, depthRange[0]);
     }
-    else if (log2TrSize > depthRange[1])
+    else if (log2CurSize > depthRange[1])
     {
         X265_CHECK(subdiv, "TU is larger than the max allowed, it should have been split\n");
     }
-    else if (log2TrSize == cu.m_slice->m_sps->quadtreeTULog2MinSize || log2TrSize == depthRange[0])
+    else if (log2CurSize == cu.m_slice->m_sps->quadtreeTULog2MinSize || log2CurSize == depthRange[0])
     {
         X265_CHECK(!subdiv, "min sized TU cannot be subdivided\n");
     }
     else
     {
-        X265_CHECK(log2TrSize > depthRange[0], "transform size failure\n");
-        codeTransformSubdivFlag(subdiv, 5 - log2TrSize);
+        X265_CHECK(log2CurSize > depthRange[0], "transform size failure\n");
+        codeTransformSubdivFlag(subdiv, 5 - log2CurSize);
     }
 
     uint32_t hChromaShift = cu.m_hChromaShift;
     uint32_t vChromaShift = cu.m_vChromaShift;
-    bool bSmallChroma = (log2TrSize - hChromaShift < 2);
-    if (!tuDepth || !bSmallChroma)
+    bool bSmallChroma = (log2CurSize - hChromaShift) < 2;
+    if (!curDepth || !bSmallChroma)
     {
-        if (!tuDepth || cu.getCbf(absPartIdx, TEXT_CHROMA_U, tuDepth - 1))
-            codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_U, tuDepth, !subdiv);
-        if (!tuDepth || cu.getCbf(absPartIdx, TEXT_CHROMA_V, tuDepth - 1))
-            codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_V, tuDepth, !subdiv);
+        if (!curDepth || cu.getCbf(absPartIdx, TEXT_CHROMA_U, curDepth - 1))
+            codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_U, curDepth, !subdiv);
+        if (!curDepth || cu.getCbf(absPartIdx, TEXT_CHROMA_V, curDepth - 1))
+            codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_V, curDepth, !subdiv);
     }
     else
     {
-        X265_CHECK(cu.getCbf(absPartIdx, TEXT_CHROMA_U, tuDepth) == cu.getCbf(absPartIdx, TEXT_CHROMA_U, tuDepth - 1), "chroma xform size match failure\n");
-        X265_CHECK(cu.getCbf(absPartIdx, TEXT_CHROMA_V, tuDepth) == cu.getCbf(absPartIdx, TEXT_CHROMA_V, tuDepth - 1), "chroma xform size match failure\n");
+        X265_CHECK(cu.getCbf(absPartIdx, TEXT_CHROMA_U, curDepth) == cu.getCbf(absPartIdx, TEXT_CHROMA_U, curDepth - 1), "chroma xform size match failure\n");
+        X265_CHECK(cu.getCbf(absPartIdx, TEXT_CHROMA_V, curDepth) == cu.getCbf(absPartIdx, TEXT_CHROMA_V, curDepth - 1), "chroma xform size match failure\n");
     }
 
     if (subdiv)
     {
-        --log2TrSize;
-        ++tuDepth;
+        --log2CurSize;
+        ++curDepth;
 
-        uint32_t qNumParts = 1 << (log2TrSize - LOG2_UNIT_SIZE) * 2;
+        uint32_t qNumParts = 1 << (log2CurSize - LOG2_UNIT_SIZE) * 2;
 
-        encodeTransform(cu, absPartIdx + 0 * qNumParts, tuDepth, log2TrSize, bCodeDQP, depthRange);
-        encodeTransform(cu, absPartIdx + 1 * qNumParts, tuDepth, log2TrSize, bCodeDQP, depthRange);
-        encodeTransform(cu, absPartIdx + 2 * qNumParts, tuDepth, log2TrSize, bCodeDQP, depthRange);
-        encodeTransform(cu, absPartIdx + 3 * qNumParts, tuDepth, log2TrSize, bCodeDQP, depthRange);
+        encodeTransform(cu, absPartIdx + 0 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
+        encodeTransform(cu, absPartIdx + 1 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
+        encodeTransform(cu, absPartIdx + 2 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
+        encodeTransform(cu, absPartIdx + 3 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
         return;
     }
 
     uint32_t absPartIdxC = bSmallChroma ? absPartIdx & 0xFC : absPartIdx;
 
-    if (cu.isInter(absPartIdxC) && !tuDepth && !cu.getCbf(absPartIdxC, TEXT_CHROMA_U, 0) && !cu.getCbf(absPartIdxC, TEXT_CHROMA_V, 0))
+    if (cu.isInter(absPartIdxC) && !curDepth && !cu.getCbf(absPartIdxC, TEXT_CHROMA_U, 0) && !cu.getCbf(absPartIdxC, TEXT_CHROMA_V, 0))
     {
         X265_CHECK(cu.getCbf(absPartIdxC, TEXT_LUMA, 0), "CBF should have been set\n");
     }
     else
-        codeQtCbfLuma(cu, absPartIdx, tuDepth);
+        codeQtCbfLuma(cu, absPartIdx, curDepth);
 
-    uint32_t cbfY = cu.getCbf(absPartIdx, TEXT_LUMA, tuDepth);
-    uint32_t cbfU = cu.getCbf(absPartIdxC, TEXT_CHROMA_U, tuDepth);
-    uint32_t cbfV = cu.getCbf(absPartIdxC, TEXT_CHROMA_V, tuDepth);
+    uint32_t cbfY = cu.getCbf(absPartIdx, TEXT_LUMA, curDepth);
+    uint32_t cbfU = cu.getCbf(absPartIdxC, TEXT_CHROMA_U, curDepth);
+    uint32_t cbfV = cu.getCbf(absPartIdxC, TEXT_CHROMA_V, curDepth);
     if (!(cbfY || cbfU || cbfV))
         return;
 
@@ -771,7 +769,7 @@ void Entropy::encodeTransform(const CUDa
     if (cbfY)
     {
         uint32_t coeffOffset = absPartIdx << (LOG2_UNIT_SIZE * 2);
-        codeCoeffNxN(cu, cu.m_trCoeff[0] + coeffOffset, absPartIdx, log2TrSize, TEXT_LUMA);
+        codeCoeffNxN(cu, cu.m_trCoeff[0] + coeffOffset, absPartIdx, log2CurSize, TEXT_LUMA);
         if (!(cbfU || cbfV))
             return;
     }
@@ -781,7 +779,7 @@ void Entropy::encodeTransform(const CUDa
         if ((absPartIdx & 3) != 3)
             return;
 
-        const uint32_t log2TrSizeC = 2;
+        const uint32_t log2CurSizeC = 2;
         const bool splitIntoSubTUs = (cu.m_chromaFormat == X265_CSP_I422);
         const uint32_t curPartNum = 4;
         uint32_t coeffOffsetC  = absPartIdxC << (LOG2_UNIT_SIZE * 2 - (hChromaShift + vChromaShift));
@@ -791,10 +789,10 @@ void Entropy::encodeTransform(const CUDa
             const coeff_t* coeffChroma = cu.m_trCoeff[chromaId];
             do
             {
-                if (cu.getCbf(tuIterator.absPartIdxTURelCU, (TextType)chromaId, tuDepth + splitIntoSubTUs))
+                if (cu.getCbf(tuIterator.absPartIdxTURelCU, (TextType)chromaId, curDepth + splitIntoSubTUs))
                 {
-                    uint32_t subTUOffset = tuIterator.section << (log2TrSizeC * 2);
-                    codeCoeffNxN(cu, coeffChroma + coeffOffsetC + subTUOffset, tuIterator.absPartIdxTURelCU, log2TrSizeC, (TextType)chromaId);
+                    uint32_t subTUOffset = tuIterator.section << (log2CurSizeC * 2);
+                    codeCoeffNxN(cu, coeffChroma + coeffOffsetC + subTUOffset, tuIterator.absPartIdxTURelCU, log2CurSizeC, (TextType)chromaId);
                 }
             }
             while (tuIterator.isNextSection());
@@ -802,9 +800,9 @@ void Entropy::encodeTransform(const CUDa
     }
     else
     {
-        uint32_t log2TrSizeC = log2TrSize - hChromaShift;
+        uint32_t log2CurSizeC = log2CurSize - hChromaShift;
         const bool splitIntoSubTUs = (cu.m_chromaFormat == X265_CSP_I422);
-        uint32_t curPartNum = 1 << (log2TrSize - LOG2_UNIT_SIZE) * 2;
+        uint32_t curPartNum = 1 << (log2CurSize - LOG2_UNIT_SIZE) * 2;
         uint32_t coeffOffsetC  = absPartIdxC << (LOG2_UNIT_SIZE * 2 - (hChromaShift + vChromaShift));
         for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
         {
@@ -812,10 +810,10 @@ void Entropy::encodeTransform(const CUDa
             const coeff_t* coeffChroma = cu.m_trCoeff[chromaId];
             do
             {
-                if (cu.getCbf(tuIterator.absPartIdxTURelCU, (TextType)chromaId, tuDepth + splitIntoSubTUs))
+                if (cu.getCbf(tuIterator.absPartIdxTURelCU, (TextType)chromaId, curDepth + splitIntoSubTUs))
                 {
-                    uint32_t subTUOffset = tuIterator.section << (log2TrSizeC * 2);
-                    codeCoeffNxN(cu, coeffChroma + coeffOffsetC + subTUOffset, tuIterator.absPartIdxTURelCU, log2TrSizeC, (TextType)chromaId);
+                    uint32_t subTUOffset = tuIterator.section << (log2CurSizeC * 2);
+                    codeCoeffNxN(cu, coeffChroma + coeffOffsetC + subTUOffset, tuIterator.absPartIdxTURelCU, log2CurSizeC, (TextType)chromaId);
                 }
             }
             while (tuIterator.isNextSection());
--- a/source/test/mbdstharness.cpp	Mon Feb 16 16:59:08 2015 +0530
+++ b/source/test/mbdstharness.cpp	Tue Feb 17 10:08:56 2015 +0530
@@ -209,7 +209,7 @@ bool MBDstHarness::check_quant_primitive
 
     for (int i = 0; i < ITERS; i++)
     {
-        int width = (rand() % 4 + 1) * 4;
+        int width = 1 << (rand() % 4 + 2);
         int height = width;
 
         uint32_t optReturnValue = 0;