changeset 1503:33bb25b0bf5f

Merged multicoreware/xhevc into default
author Deepthi Devaki Akkoorath <deepthidevaki@multicorewareinc.com>
date Fri, 17 May 2013 09:55:13 +0530
parents 6f0ad2ce8510 (current diff) 2c3c0dfb5f83 (diff)
children 393536c14d6f
files
diffstat 12 files changed, 599 insertions(+-), 37 deletions(-) [+]
line wrap: on
line diff
--- a/build/linux/batch.py	Thu May 16 17:58:42 2013 +0530
+++ b/build/linux/batch.py	Fri May 17 09:55:13 2013 +0530
@@ -2,10 +2,12 @@
 import os
 import sys
 import subprocess
+import hashlib
 
 tsfolder = os.getenv('HEVC_TESTSEQ_FOLDER', '/testsequences')
 limit = os.getenv('HEVC_TESTSEQ_COUNT', '16')
 saverecon = os.getenv('HEVC_SAVE_RECON', '0')
+outfolder = os.getenv('HEVC_OUTPUT_FOLDER', 'runoutputs')
 
 seq = ['BQMall_832x480_60.y4m',
 'BQSquare_416x240_60.y4m',
@@ -50,12 +52,16 @@ for f in fullpath:
 else:
     print 'All test sequences found'
 
+if not os.path.isdir(outfolder):
+    os.mkdir(outfolder)
+
 print 'Running...'
 procs = []
 for i, path in enumerate(fullpath):
     base, ext = os.path.splitext(seq[i])
+    bitstream = os.path.join(outfolder, base + '.hevc')
     cmdline = ['./x265-cli', '-c', '../../cfg/encoder_I_15P.cfg',
-               '-i', path, '-b', base + '.hevc']
+               '-i', path, '-b', bitstream]
     if saverecon in ('1', 'Y'):
         cmdline.append('-o')
         cmdline.append(base + '_recon.y4m')
@@ -63,17 +69,20 @@ for i, path in enumerate(fullpath):
         cmdline.append('-o')
         cmdline.append('')
     cmdline += sys.argv[1:]
-    procs.append(subprocess.Popen(cmdline, shell=False,
-                                  stdout=subprocess.PIPE,
-                                  stderr=subprocess.PIPE))
+    procs.append([bitstream, subprocess.Popen(cmdline, shell=False,
+                  stdout=subprocess.PIPE, stderr=subprocess.PIPE)])
 
-fp = open('batchrun-log.txt', 'w')
+logfname = os.path.join(outfolder, 'log.txt')
+fp = open(logfname, 'w')
 fp.write('# generated by ' + ' '.join(sys.argv) + '\n\n')
-for proc in procs:
+for bitstream, proc in procs:
     out, err = proc.communicate()
-    fp.write('# stdout\n')
     fp.write(out)
-    fp.write('# stderr\n')
-    fp.write(err)
-fp.close();
-print 'Run completed, see batchrun-log.txt'
+    if err:
+        fp.write('# stderr start\n')
+        fp.write(err)
+        fp.write('# stderr end\n')
+    fp.write('MD5 hash of encoded bitstream:\n')
+    fp.write(hashlib.md5(open(bitstream, 'rb').read()).hexdigest() + '\n\n')
+fp.close()
+print 'Run completed, see', logfname
--- a/cfg/encoder_I_15P.cfg	Thu May 16 17:58:42 2013 +0530
+++ b/cfg/encoder_I_15P.cfg	Fri May 17 09:55:13 2013 +0530
@@ -1,7 +1,3 @@
-#======== File I/O =====================
-BitstreamFile                 : str.bin
-ReconFile                     : rec.yuv
-
 #======== Unit definition ================
 MaxCUWidth                    : 64          # Maximum coding unit width in pixel
 MaxCUHeight                   : 64          # Maximum coding unit height in pixel
--- a/cfg/encoder_all_I.cfg	Thu May 16 17:58:42 2013 +0530
+++ b/cfg/encoder_all_I.cfg	Fri May 17 09:55:13 2013 +0530
@@ -1,7 +1,3 @@
-#======== File I/O =====================
-BitstreamFile                 : str.bin
-ReconFile                     : rec.yuv
-
 #======== Unit definition ================
 MaxCUWidth                    : 64          # Maximum coding unit width in pixel
 MaxCUHeight                   : 64          # Maximum coding unit height in pixel
--- a/source/CMakeLists.txt	Thu May 16 17:58:42 2013 +0530
+++ b/source/CMakeLists.txt	Fri May 17 09:55:13 2013 +0530
@@ -112,6 +112,15 @@ if(ENABLE_CLI)
 
     file(GLOB APPCOMMON  Lib/TAppCommon/*.h Lib/TAppCommon/*.cpp)
 
+    # Visual leak detector
+    find_package(VLD)
+    if(VLD_FOUND)
+        add_definitions(-DHAVE_VLD)
+        include_directories(${VLD_INCLUDE_DIRS})
+        set(PLATFORM_LIBS ${PLATFORM_LIBS} ${VLD_LIBRARIES})
+        link_directories(${VLD_LIBRARY_DIRS})
+    endif()
+
     add_executable(x265-cli
         x265main.cpp ${EXTRAS} ${APPCOMMON}
         x265cfg.cpp x265cfg.h
--- a/source/Lib/TLibCommon/TComPicYuv.cpp	Thu May 16 17:58:42 2013 +0530
+++ b/source/Lib/TLibCommon/TComPicYuv.cpp	Fri May 17 09:55:13 2013 +0530
@@ -243,19 +243,24 @@ Void  TComPicYuv::copyToPicCr(TComPicYuv
 
 Void TComPicYuv::extendPicBorder()
 {
-    if (m_bIsBorderExtended) return;
+    if (m_bIsBorderExtended)
+        return;
+
     PPAScopeEvent(TComPicYUV_extendPicBorder);
     xExtendPicCompBorder(getLumaAddr(), getStride(), getWidth(),     getHeight(),      m_iLumaMarginX,   m_iLumaMarginY);
     xExtendPicCompBorder(getCbAddr(), getCStride(), getWidth() >> 1, getHeight() >> 1, m_iChromaMarginX, m_iChromaMarginY);
     xExtendPicCompBorder(getCrAddr(), getCStride(), getWidth() >> 1, getHeight() >> 1, m_iChromaMarginX, m_iChromaMarginY);
 
     /* Create buffers for Hpel/Qpel Planes */
-    for (int i = 0; i < 4; i++)
+    if (m_filteredBlockBufY[0][0] == NULL)
     {
-        for (int j = 0; j < 4; j++)
+        for (int i = 0; i < 4; i++)
         {
-            m_filteredBlockBufY[i][j]      = (Pel*)xMalloc(Pel, (m_iPicWidth       + (m_iLumaMarginX << 1)) * (m_iPicHeight       + (m_iLumaMarginY << 1)));
-            m_filteredBlockOrgY[i][j]      = m_filteredBlockBufY[i][j] + m_iLumaMarginY   * getStride()  + m_iLumaMarginX;
+            for (int j = 0; j < 4; j++)
+            {
+                m_filteredBlockBufY[i][j] = (Pel*)xMalloc(Pel, (m_iPicWidth + (m_iLumaMarginX << 1)) * (m_iPicHeight + (m_iLumaMarginY << 1)));
+                m_filteredBlockOrgY[i][j] = m_filteredBlockBufY[i][j] + m_iLumaMarginY * getStride() + m_iLumaMarginX;
+            }
         }
     }
 
--- a/source/Lib/TLibEncoder/TEncCu.cpp	Thu May 16 17:58:42 2013 +0530
+++ b/source/Lib/TLibEncoder/TEncCu.cpp	Fri May 17 09:55:13 2013 +0530
@@ -362,7 +362,7 @@ Void TEncCu::deriveTestModeAMP(TComDataC
  *
  *- for loop of QP value to compress the current CU with all possible QP
 */
-
+#if EARLY_PARTITION_DECISION
 Void TEncCu::xCompressCU(TComDataCU*& rpcBestCU, TComDataCU*& rpcTempCU, TComDataCU* rpcTempCUNxN, UInt uiDepth, PartSize eParentPartSize)
 {
     m_abortFlag = false;
@@ -801,6 +801,406 @@ Void TEncCu::xCompressCU(TComDataCU*& rp
     assert(rpcBestCU->getTotalCost() != MAX_DOUBLE);
 }
 
+#else
+Void TEncCu::xCompressCU(TComDataCU*& rpcBestCU, TComDataCU*& rpcTempCU, TComDataCU* rpcParentBestCU, UInt uiDepth, PartSize eParentPartSize)
+{
+    m_abortFlag = false;
+    TComPic* pcPic = rpcBestCU->getPic();
+
+    PPAScopeEvent(TEncCu_xCompressCU);
+
+    // get Original YUV data from picture
+    m_ppcOrigYuv[uiDepth]->copyFromPicYuv(pcPic->getPicYuvOrg(), rpcBestCU->getAddr(), rpcBestCU->getZorderIdxInCU());
+
+    // variables for fast encoder decision
+    Bool    bTrySplit    = true;
+    
+    // variable for Early CU determination
+    Bool    bSubBranch = true;
+
+    // variable for Cbf fast mode PU decision
+    Bool doNotBlockPu = true;
+    Bool earlyDetectionSkipMode = false;
+
+    Bool bTrySplitDQP  = true;
+
+    static  Double  afCost[MAX_CU_DEPTH];
+    static  Int      aiNum[MAX_CU_DEPTH];
+
+    if (rpcBestCU->getAddr() == 0)
+    {
+        ::memset(afCost, 0, sizeof(afCost));
+        ::memset(aiNum,  0, sizeof(aiNum));
+    }
+
+    Bool bBoundary = false;
+    UInt uiLPelX   = rpcBestCU->getCUPelX();
+    UInt uiRPelX   = uiLPelX + rpcBestCU->getWidth(0)  - 1;
+    UInt uiTPelY   = rpcBestCU->getCUPelY();
+    UInt uiBPelY   = uiTPelY + rpcBestCU->getHeight(0) - 1;
+
+    Int iQP = m_pcEncCfg->getUseRateCtrl() ? m_pcRateCtrl->getRCQP() : rpcTempCU->getQP(0);
+    Double _2Nx2NCost = 0, _NxNCost = 0, cost = 0.0;
+
+    // If slice start or slice end is within this cu...
+    TComSlice * pcSlice = rpcTempCU->getPic()->getSlice(rpcTempCU->getPic()->getCurrSliceIdx());
+    Bool bSliceEnd = (pcSlice->getSliceCurEndCUAddr() > rpcTempCU->getSCUAddr() && pcSlice->getSliceCurEndCUAddr() < rpcTempCU->getSCUAddr() + rpcTempCU->getTotalNumPart());
+    Bool bInsidePicture = (uiRPelX < rpcBestCU->getSlice()->getSPS()->getPicWidthInLumaSamples()) && (uiBPelY < rpcBestCU->getSlice()->getSPS()->getPicHeightInLumaSamples());
+    // We need to split, so don't try these modes.
+    if (!bSliceEnd && bInsidePicture)
+    {
+        // variables for fast encoder decision
+        bTrySplit    = true;
+        
+        rpcTempCU->initEstData(uiDepth, iQP);
+
+        // do inter modes, SKIP and 2Nx2N
+        if (rpcBestCU->getSlice()->getSliceType() != I_SLICE)
+        {
+            // 2Nx2N
+            if (m_pcEncCfg->getUseEarlySkipDetection())
+            {
+                xCheckRDCostInter(rpcBestCU, rpcTempCU, SIZE_2Nx2N, _2Nx2NCost);
+                rpcTempCU->initEstData(uiDepth, iQP);                              //by Competition for inter_2Nx2N
+            }
+            // SKIP
+            xCheckRDCostMerge2Nx2N(rpcBestCU, rpcTempCU, &earlyDetectionSkipMode); //by Merge for inter_2Nx2N
+            rpcTempCU->initEstData(uiDepth, iQP);
+
+            if (!m_pcEncCfg->getUseEarlySkipDetection())
+            {
+                // 2Nx2N, NxN
+                xCheckRDCostInter(rpcBestCU, rpcTempCU, SIZE_2Nx2N, _2Nx2NCost);
+                rpcTempCU->initEstData(uiDepth, iQP);
+                if (m_pcEncCfg->getUseCbfFastMode())
+                {
+                    doNotBlockPu = rpcBestCU->getQtRootCbf(0) != 0;
+                }
+            }
+        }
+
+        bTrySplitDQP = bTrySplit;
+
+        if (uiDepth <= m_addSADDepth)
+        {
+            m_LCUPredictionSAD += m_temporalSAD;
+            m_addSADDepth = uiDepth;
+        }
+
+        if (!earlyDetectionSkipMode)
+        {
+            rpcTempCU->initEstData(uiDepth, iQP);
+
+            // do inter modes, NxN, 2NxN, and Nx2N
+            if (rpcBestCU->getSlice()->getSliceType() != I_SLICE)
+            {
+                // 2Nx2N, NxN
+                if (!((rpcBestCU->getWidth(0) == 8) && (rpcBestCU->getHeight(0) == 8)))
+                {
+                    if (uiDepth == g_uiMaxCUDepth - g_uiAddCUDepth && doNotBlockPu)
+                    {
+                        xCheckRDCostInter(rpcBestCU, rpcTempCU, SIZE_NxN, _NxNCost);
+                        rpcTempCU->initEstData(uiDepth, iQP);
+                    }
+                }
+            
+                if (pcPic->getSlice(0)->getSPS()->getAMPRefineAcc(uiDepth))
+                {
+                    // 2NxN, Nx2N
+                    if (doNotBlockPu)
+                    {
+                        xCheckRDCostInter(rpcBestCU, rpcTempCU, SIZE_Nx2N, cost);
+                        rpcTempCU->initEstData(uiDepth, iQP);
+                        if (m_pcEncCfg->getUseCbfFastMode() && rpcBestCU->getPartitionSize(0) == SIZE_Nx2N)
+                        {
+                            doNotBlockPu = rpcBestCU->getQtRootCbf(0) != 0;
+                        }
+                    }
+                    if (doNotBlockPu)
+                    {
+                        xCheckRDCostInter(rpcBestCU, rpcTempCU, SIZE_2NxN, cost);
+                        rpcTempCU->initEstData(uiDepth, iQP);
+                        if (m_pcEncCfg->getUseCbfFastMode() && rpcBestCU->getPartitionSize(0) == SIZE_2NxN)
+                        {
+                            doNotBlockPu = rpcBestCU->getQtRootCbf(0) != 0;
+                        }
+                    }
+                }
+
+                //! Try AMP (SIZE_2NxnU, SIZE_2NxnD, SIZE_nLx2N, SIZE_nRx2N)
+                if (pcPic->getSlice(0)->getSPS()->getAMPAcc(uiDepth))
+                {
+                    Bool bTestAMP_Hor = false, bTestAMP_Ver = false;
+                    Bool bTestMergeAMP_Hor = false, bTestMergeAMP_Ver = false;
+
+                    deriveTestModeAMP(rpcBestCU, eParentPartSize, bTestAMP_Hor, bTestAMP_Ver, bTestMergeAMP_Hor, bTestMergeAMP_Ver);
+
+                    //! Do horizontal AMP
+                    if (bTestAMP_Hor)
+                    {
+                        if (doNotBlockPu)
+                        {
+                            xCheckRDCostInter(rpcBestCU, rpcTempCU, SIZE_2NxnU, cost);
+                            rpcTempCU->initEstData(uiDepth, iQP);
+                            if (m_pcEncCfg->getUseCbfFastMode() && rpcBestCU->getPartitionSize(0) == SIZE_2NxnU)
+                            {
+                                doNotBlockPu = rpcBestCU->getQtRootCbf(0) != 0;
+                            }
+                        }
+                        if (doNotBlockPu)
+                        {
+                            xCheckRDCostInter(rpcBestCU, rpcTempCU, SIZE_2NxnD, cost);
+                            rpcTempCU->initEstData(uiDepth, iQP);
+                            if (m_pcEncCfg->getUseCbfFastMode() && rpcBestCU->getPartitionSize(0) == SIZE_2NxnD)
+                            {
+                                doNotBlockPu = rpcBestCU->getQtRootCbf(0) != 0;
+                            }
+                        }
+                    }
+                    else if (bTestMergeAMP_Hor)
+                    {
+                        if (doNotBlockPu)
+                        {
+                            xCheckRDCostInter(rpcBestCU, rpcTempCU, SIZE_2NxnU, cost, true);
+                            rpcTempCU->initEstData(uiDepth, iQP);
+                            if (m_pcEncCfg->getUseCbfFastMode() && rpcBestCU->getPartitionSize(0) == SIZE_2NxnU)
+                            {
+                                doNotBlockPu = rpcBestCU->getQtRootCbf(0) != 0;
+                            }
+                        }
+                        if (doNotBlockPu)
+                        {
+                            xCheckRDCostInter(rpcBestCU, rpcTempCU, SIZE_2NxnD, cost, true);
+                            rpcTempCU->initEstData(uiDepth, iQP);
+                            if (m_pcEncCfg->getUseCbfFastMode() && rpcBestCU->getPartitionSize(0) == SIZE_2NxnD)
+                            {
+                                doNotBlockPu = rpcBestCU->getQtRootCbf(0) != 0;
+                            }
+                        }
+                    }
+
+                    //! Do horizontal AMP
+                    if (bTestAMP_Ver)
+                    {
+                        if (doNotBlockPu)
+                        {
+                            xCheckRDCostInter(rpcBestCU, rpcTempCU, SIZE_nLx2N, cost);
+                            rpcTempCU->initEstData(uiDepth, iQP);
+                            if (m_pcEncCfg->getUseCbfFastMode() && rpcBestCU->getPartitionSize(0) == SIZE_nLx2N)
+                            {
+                                doNotBlockPu = rpcBestCU->getQtRootCbf(0) != 0;
+                            }
+                        }
+                        if (doNotBlockPu)
+                        {
+                            xCheckRDCostInter(rpcBestCU, rpcTempCU, SIZE_nRx2N, cost);
+                            rpcTempCU->initEstData(uiDepth, iQP);
+                        }
+                    }
+                    else if (bTestMergeAMP_Ver)
+                    {
+                        if (doNotBlockPu)
+                        {
+                            xCheckRDCostInter(rpcBestCU, rpcTempCU, SIZE_nLx2N, cost, true);
+                            rpcTempCU->initEstData(uiDepth, iQP);
+                            if (m_pcEncCfg->getUseCbfFastMode() && rpcBestCU->getPartitionSize(0) == SIZE_nLx2N)
+                            {
+                                doNotBlockPu = rpcBestCU->getQtRootCbf(0) != 0;
+                            }
+                        }
+                        if (doNotBlockPu)
+                        {
+                            xCheckRDCostInter(rpcBestCU, rpcTempCU, SIZE_nRx2N, cost, true);
+                            rpcTempCU->initEstData(uiDepth, iQP);
+                        }
+                    }
+                }
+            }
+
+            // do normal intra modes
+            // speedup for inter frames
+            if (rpcBestCU->getSlice()->getSliceType() == I_SLICE ||
+                rpcBestCU->getCbf(0, TEXT_LUMA) != 0   ||
+                rpcBestCU->getCbf(0, TEXT_CHROMA_U) != 0   ||
+                rpcBestCU->getCbf(0, TEXT_CHROMA_V) != 0) // avoid very complex intra if it is unlikely
+            {
+                xCheckRDCostIntra(rpcBestCU, rpcTempCU, SIZE_2Nx2N, _2Nx2NCost);
+                rpcTempCU->initEstData(uiDepth, iQP);
+
+                if (uiDepth == g_uiMaxCUDepth - g_uiAddCUDepth)
+                {
+                    if (rpcTempCU->getWidth(0) > (1 << rpcTempCU->getSlice()->getSPS()->getQuadtreeTULog2MinSize()))
+                    {
+                        xCheckRDCostIntra(rpcBestCU, rpcTempCU, SIZE_NxN, _NxNCost);
+                        rpcTempCU->initEstData(uiDepth, iQP);
+                    }
+                }
+            }
+            // test PCM
+            if (pcPic->getSlice(0)->getSPS()->getUsePCM()
+                && rpcTempCU->getWidth(0) <= (1 << pcPic->getSlice(0)->getSPS()->getPCMLog2MaxSize())
+                && rpcTempCU->getWidth(0) >= (1 << pcPic->getSlice(0)->getSPS()->getPCMLog2MinSize()))
+            {
+                UInt uiRawBits = (2 * g_bitDepthY + g_bitDepthC) * rpcBestCU->getWidth(0) * rpcBestCU->getHeight(0) / 2;
+                UInt uiBestBits = rpcBestCU->getTotalBits();
+                if ((uiBestBits > uiRawBits) || (rpcBestCU->getTotalCost() > CALCRDCOST(uiRawBits, 0, m_pcRdCost->m_dLambda)))
+                {
+                    xCheckIntraPCM(rpcBestCU, rpcTempCU);
+                    rpcTempCU->initEstData(uiDepth, iQP);
+                }
+            }
+        }
+
+        m_pcEntropyCoder->resetBits();
+        m_pcEntropyCoder->encodeSplitFlag(rpcBestCU, 0, uiDepth, true);
+        rpcBestCU->getTotalBits() += m_pcEntropyCoder->getNumberOfWrittenBits(); // split bits
+        rpcBestCU->getTotalBins() += ((TEncBinCABAC*)((TEncSbac*)m_pcEntropyCoder->m_pcEntropyCoderIf)->getEncBinIf())->getBinsCoded();
+        rpcBestCU->getTotalCost()  = CALCRDCOST(rpcBestCU->getTotalBits(), rpcBestCU->getTotalDistortion(), m_pcRdCost->m_dLambda);
+
+        // accumulate statistics for early skip
+        if (rpcBestCU->isSkipped(0))
+        {
+            Int iIdx = g_aucConvertToBit[rpcBestCU->getWidth(0)];
+            afCost[iIdx] += rpcBestCU->getTotalCost();
+            aiNum[iIdx]++;
+        }
+
+        // Early CU determination
+        if (rpcBestCU->isSkipped(0))
+        {
+            bSubBranch = false;
+        }
+        else
+        {
+            bSubBranch = true;
+        }
+    }
+    else if (!(bSliceEnd && bInsidePicture))
+    {
+        bBoundary = true;
+        m_addSADDepth++;
+    }
+
+    // copy original YUV samples to PCM buffer
+    if (rpcBestCU->isLosslessCoded(0) && (rpcBestCU->getIPCMFlag(0) == false))
+    {
+        xFillPCMBuffer(rpcBestCU, m_ppcOrigYuv[uiDepth]);
+    }
+
+    rpcTempCU->initEstData(uiDepth, iQP);
+
+    // further split
+    if (bSubBranch && bTrySplitDQP && uiDepth < g_uiMaxCUDepth - g_uiAddCUDepth)
+    {
+        UChar       uhNextDepth         = uiDepth + 1;
+        TComDataCU* pcSubBestPartCU     = m_ppcBestCU[uhNextDepth];
+        TComDataCU* pcSubTempPartCU     = m_ppcTempCU[uhNextDepth];
+        UInt uiPartUnitIdx = 0;
+        for (; uiPartUnitIdx < 4; uiPartUnitIdx++)
+        {
+            pcSubBestPartCU->initSubCU(rpcTempCU, uiPartUnitIdx, uhNextDepth, iQP);     // clear sub partition datas or init.
+            pcSubTempPartCU->initSubCU(rpcTempCU, uiPartUnitIdx, uhNextDepth, iQP);     // clear sub partition datas or init.
+
+            Bool bInSlice = pcSubBestPartCU->getSCUAddr() < pcSlice->getSliceCurEndCUAddr();
+            if (bInSlice && (pcSubBestPartCU->getCUPelX() < pcSlice->getSPS()->getPicWidthInLumaSamples()) && (pcSubBestPartCU->getCUPelY() < pcSlice->getSPS()->getPicHeightInLumaSamples()))
+            {
+                if (0 == uiPartUnitIdx) //initialize RD with previous depth buffer
+                {
+                    m_pppcRDSbacCoder[uhNextDepth][CI_CURR_BEST]->load(m_pppcRDSbacCoder[uiDepth][CI_CURR_BEST]);
+                }
+                else
+                {
+                    m_pppcRDSbacCoder[uhNextDepth][CI_CURR_BEST]->load(m_pppcRDSbacCoder[uhNextDepth][CI_NEXT_BEST]);
+                }
+
+                // The following if condition has to be commented out in case the early Abort based on comparison of parentCu cost, childCU cost is not required.
+                if (rpcBestCU->isIntra(0))
+                {
+                    xCompressCU(pcSubBestPartCU, pcSubTempPartCU, rpcBestCU, uhNextDepth, SIZE_NONE);
+                }
+                else
+                {
+                    xCompressCU(pcSubBestPartCU, pcSubTempPartCU, rpcBestCU, uhNextDepth, rpcBestCU->getPartitionSize(0));
+                }
+                {
+                    rpcTempCU->copyPartFrom(pcSubBestPartCU, uiPartUnitIdx, uhNextDepth); // Keep best part data to current temporary data.
+                    xCopyYuv2Tmp(pcSubBestPartCU->getTotalNumPart() * uiPartUnitIdx, uhNextDepth);
+                }
+            }
+            else if (bInSlice)
+            {
+                pcSubBestPartCU->copyToPic(uhNextDepth);
+                rpcTempCU->copyPartFrom(pcSubBestPartCU, uiPartUnitIdx, uhNextDepth);
+            }
+        }
+
+    if (!bBoundary)
+    {
+        m_pcEntropyCoder->resetBits();
+        m_pcEntropyCoder->encodeSplitFlag(rpcTempCU, 0, uiDepth, true);
+
+        rpcTempCU->getTotalBits() += m_pcEntropyCoder->getNumberOfWrittenBits();         // split bits
+        rpcTempCU->getTotalBins() += ((TEncBinCABAC*)((TEncSbac*)m_pcEntropyCoder->m_pcEntropyCoderIf)->getEncBinIf())->getBinsCoded();
+    }
+    rpcTempCU->getTotalCost()  = CALCRDCOST(rpcTempCU->getTotalBits(), rpcTempCU->getTotalDistortion(), m_pcRdCost->m_dLambda);
+
+    if ((g_uiMaxCUWidth >> uiDepth) == rpcTempCU->getSlice()->getPPS()->getMinCuDQPSize() && rpcTempCU->getSlice()->getPPS()->getUseDQP())
+    {
+        Bool hasResidual = false;
+        for (UInt uiBlkIdx = 0; uiBlkIdx < rpcTempCU->getTotalNumPart(); uiBlkIdx++)
+        {
+            if (rpcTempCU->getCbf(uiBlkIdx, TEXT_LUMA) || rpcTempCU->getCbf(uiBlkIdx, TEXT_CHROMA_U) || rpcTempCU->getCbf(uiBlkIdx, TEXT_CHROMA_V))
+            {
+                hasResidual = true;
+                break;
+            }
+        }
+
+        UInt uiTargetPartIdx;
+        uiTargetPartIdx = 0;
+        if (hasResidual)
+        {
+#if !RDO_WITHOUT_DQP_BITS
+            m_pcEntropyCoder->resetBits();
+            m_pcEntropyCoder->encodeQP(rpcTempCU, uiTargetPartIdx, false);
+            rpcTempCU->getTotalBits() += m_pcEntropyCoder->getNumberOfWrittenBits();         // dQP bits
+            rpcTempCU->getTotalBins() += ((TEncBinCABAC*)((TEncSbac*)m_pcEntropyCoder->m_pcEntropyCoderIf)->getEncBinIf())->getBinsCoded();
+            rpcTempCU->getTotalCost()  = CALCRDCOST(rpcTempCU->getTotalBits(), rpcTempCU->getTotalDistortion(), m_pcRdCost->m_dLambda);
+#endif
+
+            Bool foundNonZeroCbf = false;
+            rpcTempCU->setQPSubCUs(rpcTempCU->getRefQP(uiTargetPartIdx), rpcTempCU, 0, uiDepth, foundNonZeroCbf);
+            assert(foundNonZeroCbf);
+        }
+        else
+        {
+            rpcTempCU->setQPSubParts(rpcTempCU->getRefQP(uiTargetPartIdx), 0, uiDepth);         // set QP to default QP
+        }
+    }
+
+    m_pppcRDSbacCoder[uhNextDepth][CI_NEXT_BEST]->store(m_pppcRDSbacCoder[uiDepth][CI_TEMP_BEST]);
+    xCheckBestMode(rpcBestCU, rpcTempCU, uiDepth);                                     // RD compare current larger prediction
+                                                                                // with sub partitioned prediction.
+    }
+    
+    rpcBestCU->copyToPic(uiDepth);                                                   // Copy Best data to Picture for next partition prediction.
+    xCopyYuv2Pic(rpcBestCU->getPic(), rpcBestCU->getAddr(), rpcBestCU->getZorderIdxInCU(), uiDepth, uiDepth, rpcBestCU, uiLPelX, uiTPelY);   // Copy Yuv data to picture Yuv
+    
+    if (bBoundary || (bSliceEnd && bInsidePicture))
+    {
+        return;
+    }
+
+    // Assert if Best prediction mode is NONE
+    // Selected mode's RD-cost must be not MAX_DOUBLE.
+    assert(rpcBestCU->getPartitionSize(0) != SIZE_NONE);
+    assert(rpcBestCU->getPredictionMode(0) != MODE_NONE);
+    assert(rpcBestCU->getTotalCost() != MAX_DOUBLE);
+}
+
+#endif
+
+
 /** finish encoding a cu and handle end-of-slice conditions
  * \param pcCU
  * \param uiAbsPartIdx
--- a/source/Lib/TLibEncoder/TEncGOP.cpp	Thu May 16 17:58:42 2013 +0530
+++ b/source/Lib/TLibEncoder/TEncGOP.cpp	Fri May 17 09:55:13 2013 +0530
@@ -680,7 +680,7 @@ Void TEncGOP::compressGOP(Int iPOCLast, 
             Bool bNoBinBitConstraintViolated = (!pcSlice->isNextSlice());
 
             startCUAddrSlice = pcSlice->getSliceCurEndCUAddr();
-            assert(startCUAddrSlice >= uiRealEndAddress);
+            assert(startCUAddrSlice >= uiRealEndAddress);
 
             nextCUAddr = startCUAddrSlice;
         }
@@ -1881,9 +1881,12 @@ Void TEncGOP::xCalculateAddPSNR(TComPic*
            uibits);
 
     printf(" [Y %6.4lf dB    U %6.4lf dB    V %6.4lf dB]", dYPSNR, dUPSNR, dVPSNR);
-    printf(" [ET %5.0f ]", dEncTime);
+    //printf(" [ET %5.0f ]", dEncTime);
 
-    for (Int iRefList = 0; iRefList < 2; iRefList++)
+    if (pcSlice->isIntra())
+        return;
+    Int numLists = pcSlice->isInterP() ? 1 : 2;
+    for (Int iRefList = 0; iRefList < numLists; iRefList++)
     {
         printf(" [L%d ", iRefList);
         for (Int iRefIndex = 0; iRefIndex < pcSlice->getNumRefIdx(RefPicList(iRefList)); iRefIndex++)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/source/cmake/FindVLD.cmake	Fri May 17 09:55:13 2013 +0530
@@ -0,0 +1,123 @@
+# Module for locating Visual Leak Detector.
+#
+# Customizable variables:
+#   VLD_ROOT_DIR
+#     This variable points to the Visual Leak Detector root directory. By
+#     default, the module looks for the installation directory by examining the
+#     Program Files/Program Files (x86) folders and the VLDROOT environment
+#     variable.
+#
+# Read-only variables:
+#   VLD_FOUND
+#     Indicates that the library has been found.
+#
+#   VLD_INCLUDE_DIRS
+#     Points to the Visual Leak Detector include directory.
+#
+#   VLD_LIBRARY_DIRS
+#     Points to the Visual Leak Detector directory that contains the libraries.
+#     The content of this variable can be passed to link_directories.
+#
+#   VLD_LIBRARIES
+#     Points to the Visual Leak Detector libraries that can be passed to
+#     target_link_libararies.
+#
+#
+# Copyright (c) 2012 Sergiu Dotenco
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+INCLUDE (FindPackageHandleStandardArgs)
+
+SET (_VLD_POSSIBLE_LIB_SUFFIXES lib)
+
+# Version 2.0 uses vld_x86 and vld_x64 instead of simply vld as library names
+IF (CMAKE_SIZEOF_VOID_P EQUAL 4)
+  LIST (APPEND _VLD_POSSIBLE_LIB_SUFFIXES lib/Win32)
+ELSEIF (CMAKE_SIZEOF_VOID_P EQUAL 8)
+  LIST (APPEND _VLD_POSSIBLE_LIB_SUFFIXES lib/Win64)
+ENDIF (CMAKE_SIZEOF_VOID_P EQUAL 4)
+
+FIND_PATH (VLD_ROOT_DIR
+  NAMES include/vld.h
+  PATHS ENV VLDROOT
+        "$ENV{PROGRAMFILES}/Visual Leak Detector"
+        "$ENV{PROGRAMFILES(X86)}/Visual Leak Detector"
+        "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Uninstall\\Visual Leak Detector;InstallLocation]"
+        "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Wow6432Node\\Microsoft\\Windows\\CurrentVersion\\Uninstall\\Visual Leak Detector;InstallLocation]"
+  DOC "VLD root directory")
+
+FIND_PATH (VLD_INCLUDE_DIR
+  NAMES vld.h
+  HINTS ${VLD_ROOT_DIR}
+  PATH_SUFFIXES include
+  DOC "VLD include directory")
+
+FIND_LIBRARY (VLD_LIBRARY_DEBUG
+  NAMES vld
+  HINTS ${VLD_ROOT_DIR}
+  PATH_SUFFIXES ${_VLD_POSSIBLE_LIB_SUFFIXES}
+  DOC "VLD debug library")
+
+IF (VLD_ROOT_DIR)
+  SET (_VLD_VERSION_FILE ${VLD_ROOT_DIR}/CHANGES.txt)
+
+  IF (EXISTS ${_VLD_VERSION_FILE})
+    SET (_VLD_VERSION_REGEX
+      "Visual Leak Detector \\(VLD\\) Version (([0-9]+)\\.([0-9]+)([a-z]|(.([0-9]+)))?)")
+    FILE (STRINGS ${_VLD_VERSION_FILE} _VLD_VERSION_TMP REGEX
+      ${_VLD_VERSION_REGEX})
+
+    STRING (REGEX REPLACE ${_VLD_VERSION_REGEX} "\\1" _VLD_VERSION_TMP
+      "${_VLD_VERSION_TMP}")
+
+    STRING (REGEX REPLACE "([0-9]+).([0-9]+).*" "\\1" VLD_VERSION_MAJOR
+      "${_VLD_VERSION_TMP}")
+    STRING (REGEX REPLACE "([0-9]+).([0-9]+).*" "\\2" VLD_VERSION_MINOR
+      "${_VLD_VERSION_TMP}")
+
+    SET (VLD_VERSION ${VLD_VERSION_MAJOR}.${VLD_VERSION_MINOR})
+
+    IF ("${_VLD_VERSION_TMP}" MATCHES "^([0-9]+).([0-9]+).([0-9]+)$")
+      # major.minor.patch version numbering scheme
+      STRING (REGEX REPLACE "([0-9]+).([0-9]+).([0-9]+)" "\\3"
+        VLD_VERSION_PATCH "${_VLD_VERSION_TMP}")
+      SET (VLD_VERSION "${VLD_VERSION}.${VLD_VERSION_PATCH}")
+      SET (VLD_VERSION_COUNT 3)
+    ELSE ("${_VLD_VERSION_TMP}" MATCHES "^([0-9]+).([0-9]+).([0-9]+)$")
+      # major.minor version numbering scheme. The trailing letter is ignored.
+      SET (VLD_VERSION_COUNT 2)
+    ENDIF ("${_VLD_VERSION_TMP}" MATCHES "^([0-9]+).([0-9]+).([0-9]+)$")
+  ENDIF (EXISTS ${_VLD_VERSION_FILE})
+ENDIF (VLD_ROOT_DIR)
+
+IF (VLD_LIBRARY_DEBUG)
+  SET (VLD_LIBRARY debug ${VLD_LIBRARY_DEBUG} CACHE DOC "VLD library")
+  GET_FILENAME_COMPONENT (_VLD_LIBRARY_DIR ${VLD_LIBRARY_DEBUG} PATH)
+  SET (VLD_LIBRARY_DIR ${_VLD_LIBRARY_DIR} CACHE PATH "VLD library directory")
+ENDIF (VLD_LIBRARY_DEBUG)
+
+SET (VLD_INCLUDE_DIRS ${VLD_INCLUDE_DIR})
+SET (VLD_LIBRARY_DIRS ${VLD_LIBRARY_DIR})
+SET (VLD_LIBRARIES ${VLD_LIBRARY})
+
+MARK_AS_ADVANCED (VLD_INCLUDE_DIR VLD_LIBRARY_DIR VLD_LIBRARY_DEBUG VLD_LIBRARY)
+
+FIND_PACKAGE_HANDLE_STANDARD_ARGS (VLD REQUIRED_VARS VLD_ROOT_DIR
+  VLD_INCLUDE_DIR VLD_LIBRARY VERSION_VAR VLD_VERSION)
--- a/source/tools/CMakeLists.txt	Thu May 16 17:58:42 2013 +0530
+++ b/source/tools/CMakeLists.txt	Fri May 17 09:55:13 2013 +0530
@@ -1,2 +1,1 @@
 add_subdirectory(dr_psnr)
-add_subdirectory(performanceProfiler)
--- a/source/x265cfg.cpp	Thu May 16 17:58:42 2013 +0530
+++ b/source/x265cfg.cpp	Fri May 17 09:55:13 2013 +0530
@@ -65,6 +65,7 @@ TAppEncCfg::TAppEncCfg()
     : m_pchBitstreamFile()
     , m_pchdQPFile()
     , m_scalingListFile()
+    , m_poolHandle(NULL)
 {
     m_aidQP = NULL;
     m_input = NULL;
@@ -85,6 +86,10 @@ TAppEncCfg::~TAppEncCfg()
     {
         delete[] m_aidQP;
     }
+    if (m_poolHandle)
+    {
+        m_poolHandle->Release();
+    }
     free(m_pchBitstreamFile);
     free(m_pchdQPFile);
     free(m_scalingListFile);
@@ -237,7 +242,7 @@ Bool TAppEncCfg::parseCfg(Int argc, Char
 
     string cfg_InputFile;
     string cfg_ReconFile;
-    string cfg_BitstreamFile;
+    string cfg_BitstreamFile("hevc.bin");
     string cfg_dQPFile;
     string cfg_ScalingListFile;
 
@@ -465,7 +470,7 @@ Bool TAppEncCfg::parseCfg(Int argc, Char
     }
 
     x265::SetupPrimitives(cpuid);
-    x265::ThreadPool::AllocThreadPool(threadcount);
+    m_poolHandle = x265::ThreadPool::AllocThreadPool(threadcount);
 
     /*
      * Set any derived parameters
--- a/source/x265cfg.h	Thu May 16 17:58:42 2013 +0530
+++ b/source/x265cfg.h	Fri May 17 09:55:13 2013 +0530
@@ -41,6 +41,7 @@
 #include "TLibCommon/CommonDef.h"
 #include "input/input.h"
 #include "output/output.h"
+#include "threadpool.h"
 #include "x265.h"
 #include <sstream>
 
@@ -57,6 +58,7 @@ class TAppEncCfg : public x265_params
 protected:
     x265::Input*  m_input;
     x265::Output* m_recon;
+    x265::ThreadPool *m_poolHandle;
 
     int       m_inputBitDepth;                  ///< bit-depth of input file
     int       m_outputBitDepth;                 ///< bit-depth of output file
--- a/source/x265main.cpp	Thu May 16 17:58:42 2013 +0530
+++ b/source/x265main.cpp	Fri May 17 09:55:13 2013 +0530
@@ -26,6 +26,11 @@
 #include "x265enc.h"
 #include "PPA/ppa.h"
 
+#if HAVE_VLD
+/* Visual Leak Detector */
+#include <vld.h>
+#endif
+
 using namespace std;
 
 #define XSTR(x) STR(x)
@@ -33,7 +38,9 @@ using namespace std;
 
 int main(int argc, char *argv[])
 {
-    TAppEncTop  cTAppEncTop;
+#if HAVE_VLD
+    VLDSetReportOptions(VLD_OPT_REPORT_TO_DEBUGGER, NULL);
+#endif
 
     PPA_INIT();
 
@@ -49,24 +56,32 @@ int main(int argc, char *argv[])
 #endif
     fprintf(stdout, "\n");
 
-    cTAppEncTop.create();
+    TAppEncTop *app = new TAppEncTop();
 
-    if (!cTAppEncTop.parseCfg(argc, argv))
+    app->create();
+
+    if (!app->parseCfg(argc, argv))
     {
-        cTAppEncTop.destroy();
+        app->destroy();
         return 1;
     }
  
     clock_t lBefore = clock();
 
-    cTAppEncTop.encode();
+    app->encode();
 
     double dResult = (double)(clock() - lBefore) / CLOCKS_PER_SEC;
     printf("\n Total Time: %12.3f sec.\n", dResult);
 
-    cTAppEncTop.destroy();
+    app->destroy();
+
+    delete app;
 
     x265_cleanup();
 
+#if HAVE_VLD
+    assert(VLDReportLeaks() == 0);
+#endif
+
     return 0;
 }