Mercurial > x265
changeset 9568:8d5f9b7b4733 draft
Merge with public
author | Praveen Tiwari <praveen@multicorewareinc.com> |
---|---|
date | Wed, 25 Feb 2015 10:05:40 +0530 |
parents | 8be71cee10f3 (current diff) 87173d41df87 (diff) |
children | 02bac78bde96 |
files | source/encoder/entropy.cpp |
diffstat | 12 files changed, 423 insertions(+-), 439 deletions(-) [+] |
line wrap: on
line diff
--- a/source/common/cudata.cpp Tue Feb 24 17:26:37 2015 +0530 +++ b/source/common/cudata.cpp Wed Feb 25 10:05:40 2015 +0530 @@ -1375,8 +1375,8 @@ bool CUData::hasEqualMotion(uint32_t abs return true; } -/* Construct list of merging candidates */ -uint32_t CUData::getInterMergeCandidates(uint32_t absPartIdx, uint32_t puIdx, MVField(*mvFieldNeighbours)[2], uint8_t* interDirNeighbours) const +/* Construct list of merging candidates, returns count */ +uint32_t CUData::getInterMergeCandidates(uint32_t absPartIdx, uint32_t puIdx, MVField(*candMvField)[2], uint8_t* candDir) const { uint32_t absPartAddr = m_absIdxInCTU + absPartIdx; const bool isInterB = m_slice->isInterB(); @@ -1385,10 +1385,10 @@ uint32_t CUData::getInterMergeCandidates for (uint32_t i = 0; i < maxNumMergeCand; ++i) { - mvFieldNeighbours[i][0].mv = 0; - mvFieldNeighbours[i][1].mv = 0; - mvFieldNeighbours[i][0].refIdx = REF_NOT_VALID; - mvFieldNeighbours[i][1].refIdx = REF_NOT_VALID; + candMvField[i][0].mv = 0; + candMvField[i][1].mv = 0; + candMvField[i][0].refIdx = REF_NOT_VALID; + candMvField[i][1].refIdx = REF_NOT_VALID; } /* calculate the location of upper-left corner pixel and size of the current PU */ @@ -1420,11 +1420,11 @@ uint32_t CUData::getInterMergeCandidates if (isAvailableA1) { // get Inter Dir - interDirNeighbours[count] = cuLeft->m_interDir[leftPartIdx]; + candDir[count] = cuLeft->m_interDir[leftPartIdx]; // get Mv from Left - cuLeft->getMvField(cuLeft, leftPartIdx, 0, mvFieldNeighbours[count][0]); + cuLeft->getMvField(cuLeft, leftPartIdx, 0, candMvField[count][0]); if (isInterB) - cuLeft->getMvField(cuLeft, leftPartIdx, 1, mvFieldNeighbours[count][1]); + cuLeft->getMvField(cuLeft, leftPartIdx, 1, candMvField[count][1]); count++; @@ -1444,11 +1444,11 @@ uint32_t CUData::getInterMergeCandidates if (isAvailableB1 && (!isAvailableA1 || !cuLeft->hasEqualMotion(leftPartIdx, *cuAbove, abovePartIdx))) { // get Inter Dir - interDirNeighbours[count] = cuAbove->m_interDir[abovePartIdx]; + candDir[count] = cuAbove->m_interDir[abovePartIdx]; // get Mv from Left - cuAbove->getMvField(cuAbove, abovePartIdx, 0, mvFieldNeighbours[count][0]); + cuAbove->getMvField(cuAbove, abovePartIdx, 0, candMvField[count][0]); if (isInterB) - cuAbove->getMvField(cuAbove, abovePartIdx, 1, mvFieldNeighbours[count][1]); + cuAbove->getMvField(cuAbove, abovePartIdx, 1, candMvField[count][1]); count++; @@ -1465,11 +1465,11 @@ uint32_t CUData::getInterMergeCandidates if (isAvailableB0 && (!isAvailableB1 || !cuAbove->hasEqualMotion(abovePartIdx, *cuAboveRight, aboveRightPartIdx))) { // get Inter Dir - interDirNeighbours[count] = cuAboveRight->m_interDir[aboveRightPartIdx]; + candDir[count] = cuAboveRight->m_interDir[aboveRightPartIdx]; // get Mv from Left - cuAboveRight->getMvField(cuAboveRight, aboveRightPartIdx, 0, mvFieldNeighbours[count][0]); + cuAboveRight->getMvField(cuAboveRight, aboveRightPartIdx, 0, candMvField[count][0]); if (isInterB) - cuAboveRight->getMvField(cuAboveRight, aboveRightPartIdx, 1, mvFieldNeighbours[count][1]); + cuAboveRight->getMvField(cuAboveRight, aboveRightPartIdx, 1, candMvField[count][1]); count++; @@ -1486,11 +1486,11 @@ uint32_t CUData::getInterMergeCandidates if (isAvailableA0 && (!isAvailableA1 || !cuLeft->hasEqualMotion(leftPartIdx, *cuLeftBottom, leftBottomPartIdx))) { // get Inter Dir - interDirNeighbours[count] = cuLeftBottom->m_interDir[leftBottomPartIdx]; + candDir[count] = cuLeftBottom->m_interDir[leftBottomPartIdx]; // get Mv from Left - cuLeftBottom->getMvField(cuLeftBottom, leftBottomPartIdx, 0, mvFieldNeighbours[count][0]); + cuLeftBottom->getMvField(cuLeftBottom, leftBottomPartIdx, 0, candMvField[count][0]); if (isInterB) - cuLeftBottom->getMvField(cuLeftBottom, leftBottomPartIdx, 1, mvFieldNeighbours[count][1]); + cuLeftBottom->getMvField(cuLeftBottom, leftBottomPartIdx, 1, candMvField[count][1]); count++; @@ -1510,11 +1510,11 @@ uint32_t CUData::getInterMergeCandidates && (!isAvailableB1 || !cuAbove->hasEqualMotion(abovePartIdx, *cuAboveLeft, aboveLeftPartIdx))) { // get Inter Dir - interDirNeighbours[count] = cuAboveLeft->m_interDir[aboveLeftPartIdx]; + candDir[count] = cuAboveLeft->m_interDir[aboveLeftPartIdx]; // get Mv from Left - cuAboveLeft->getMvField(cuAboveLeft, aboveLeftPartIdx, 0, mvFieldNeighbours[count][0]); + cuAboveLeft->getMvField(cuAboveLeft, aboveLeftPartIdx, 0, candMvField[count][0]); if (isInterB) - cuAboveLeft->getMvField(cuAboveLeft, aboveLeftPartIdx, 1, mvFieldNeighbours[count][1]); + cuAboveLeft->getMvField(cuAboveLeft, aboveLeftPartIdx, 1, candMvField[count][1]); count++; @@ -1563,8 +1563,8 @@ uint32_t CUData::getInterMergeCandidates if (bExistMV) { dir |= 1; - mvFieldNeighbours[count][0].mv = colmv; - mvFieldNeighbours[count][0].refIdx = refIdx; + candMvField[count][0].mv = colmv; + candMvField[count][0].refIdx = refIdx; } if (isInterB) @@ -1576,14 +1576,14 @@ uint32_t CUData::getInterMergeCandidates if (bExistMV) { dir |= 2; - mvFieldNeighbours[count][1].mv = colmv; - mvFieldNeighbours[count][1].refIdx = refIdx; + candMvField[count][1].mv = colmv; + candMvField[count][1].refIdx = refIdx; } } if (dir != 0) { - interDirNeighbours[count] = (uint8_t)dir; + candDir[count] = (uint8_t)dir; count++; @@ -1605,20 +1605,20 @@ uint32_t CUData::getInterMergeCandidates priorityList0 >>= 2; priorityList1 >>= 2; - if ((interDirNeighbours[i] & 0x1) && (interDirNeighbours[j] & 0x2)) + if ((candDir[i] & 0x1) && (candDir[j] & 0x2)) { // get Mv from cand[i] and cand[j] - int refIdxL0 = mvFieldNeighbours[i][0].refIdx; - int refIdxL1 = mvFieldNeighbours[j][1].refIdx; + int refIdxL0 = candMvField[i][0].refIdx; + int refIdxL1 = candMvField[j][1].refIdx; int refPOCL0 = m_slice->m_refPOCList[0][refIdxL0]; int refPOCL1 = m_slice->m_refPOCList[1][refIdxL1]; - if (!(refPOCL0 == refPOCL1 && mvFieldNeighbours[i][0].mv == mvFieldNeighbours[j][1].mv)) + if (!(refPOCL0 == refPOCL1 && candMvField[i][0].mv == candMvField[j][1].mv)) { - mvFieldNeighbours[count][0].mv = mvFieldNeighbours[i][0].mv; - mvFieldNeighbours[count][0].refIdx = refIdxL0; - mvFieldNeighbours[count][1].mv = mvFieldNeighbours[j][1].mv; - mvFieldNeighbours[count][1].refIdx = refIdxL1; - interDirNeighbours[count] = 3; + candMvField[count][0].mv = candMvField[i][0].mv; + candMvField[count][0].refIdx = refIdxL0; + candMvField[count][1].mv = candMvField[j][1].mv; + candMvField[count][1].refIdx = refIdxL1; + candDir[count] = 3; count++; @@ -1633,15 +1633,15 @@ uint32_t CUData::getInterMergeCandidates int refcnt = 0; while (count < maxNumMergeCand) { - interDirNeighbours[count] = 1; - mvFieldNeighbours[count][0].mv = 0; - mvFieldNeighbours[count][0].refIdx = r; + candDir[count] = 1; + candMvField[count][0].mv = 0; + candMvField[count][0].refIdx = r; if (isInterB) { - interDirNeighbours[count] = 3; - mvFieldNeighbours[count][1].mv.word = 0; - mvFieldNeighbours[count][1].refIdx = r; + candDir[count] = 3; + candMvField[count][1].mv.word = 0; + candMvField[count][1].refIdx = r; } count++;
--- a/source/common/cudata.h Tue Feb 24 17:26:37 2015 +0530 +++ b/source/common/cudata.h Wed Feb 25 10:05:40 2015 +0530 @@ -195,7 +195,7 @@ public: uint8_t getCbf(uint32_t absPartIdx, TextType ttype, uint32_t tuDepth) const { return (m_cbf[ttype][absPartIdx] >> tuDepth) & 0x1; } uint8_t getQtRootCbf(uint32_t absPartIdx) const { return m_cbf[0][absPartIdx] || m_cbf[1][absPartIdx] || m_cbf[2][absPartIdx]; } int8_t getRefQP(uint32_t currAbsIdxInCTU) const; - uint32_t getInterMergeCandidates(uint32_t absPartIdx, uint32_t puIdx, MVField (*mvFieldNeighbours)[2], uint8_t* interDirNeighbours) const; + uint32_t getInterMergeCandidates(uint32_t absPartIdx, uint32_t puIdx, MVField (*candMvField)[2], uint8_t* candDir) const; void clipMv(MV& outMV) const; int fillMvpCand(uint32_t puIdx, uint32_t absPartIdx, int picList, int refIdx, MV* amvpCand, MV* mvc) const; void getIntraTUQtDepthRange(uint32_t tuDepthRange[2], uint32_t absPartIdx) const;
--- a/source/common/lowres.cpp Tue Feb 24 17:26:37 2015 +0530 +++ b/source/common/lowres.cpp Wed Feb 25 10:05:40 2015 +0530 @@ -56,10 +56,7 @@ bool Lowres::create(PicYuv *origPic, int CHECKED_MALLOC(propagateCost, uint16_t, cuCount); /* allocate lowres buffers */ - CHECKED_MALLOC(buffer[0], pixel, 4 * planesize); - - /* initialize the whole buffer to prevent valgrind warnings on right edge */ - memset(buffer[0], 0, 4 * sizeof(pixel) * planesize); + CHECKED_MALLOC_ZERO(buffer[0], pixel, 4 * planesize); buffer[1] = buffer[0] + planesize; buffer[2] = buffer[1] + planesize;
--- a/source/common/predict.cpp Tue Feb 24 17:26:37 2015 +0530 +++ b/source/common/predict.cpp Wed Feb 25 10:05:40 2015 +0530 @@ -34,6 +34,18 @@ using namespace x265; #pragma warning(disable: 4127) // conditional expression is constant #endif +PredictionUnit::PredictionUnit(const CUData& cu, const CUGeom& cuGeom, int puIdx) +{ + /* address of CTU */ + ctuAddr = cu.m_cuAddr; + + /* offset of CU */ + cuAbsPartIdx = cuGeom.absPartIdx; + + /* offset and dimensions of PU */ + cu.getPartIndexAndSize(puIdx, puAbsPartIdx, width, height); +} + namespace { inline pixel weightBidir(int w0, int16_t P0, int w1, int16_t P1, int round, int shift, int offset) @@ -112,37 +124,25 @@ void Predict::predIntraChromaAng(uint32_ primitives.cu[sizeIdx].intra_pred[dirMode](dst, stride, srcBuf, dirMode, 0); } -void Predict::initMotionCompensation(const CUData& cu, const CUGeom& cuGeom, int partIdx) -{ - m_predSlice = cu.m_slice; - cu.getPartIndexAndSize(partIdx, m_puAbsPartIdx, m_puWidth, m_puHeight); - m_ctuAddr = cu.m_cuAddr; - m_cuAbsPartIdx = cuGeom.absPartIdx; -} - -void Predict::prepMotionCompensation(const CUData& cu, const CUGeom& cuGeom, int partIdx) -{ - initMotionCompensation(cu, cuGeom, partIdx); - m_refIdx0 = cu.m_refIdx[0][m_puAbsPartIdx]; - m_clippedMv[0] = cu.m_mv[0][m_puAbsPartIdx]; - m_refIdx1 = cu.m_refIdx[1][m_puAbsPartIdx]; - m_clippedMv[1] = cu.m_mv[1][m_puAbsPartIdx]; - cu.clipMv(m_clippedMv[0]); - cu.clipMv(m_clippedMv[1]); -} +void Predict::motionCompensation(const CUData& cu, const PredictionUnit& pu, Yuv& predYuv, bool bLuma, bool bChroma) +{ + int refIdx0 = cu.m_refIdx[0][pu.puAbsPartIdx]; + int refIdx1 = cu.m_refIdx[1][pu.puAbsPartIdx]; -void Predict::motionCompensation(Yuv& predYuv, bool bLuma, bool bChroma) -{ - if (m_predSlice->isInterP()) + if (cu.m_slice->isInterP()) { /* P Slice */ WeightValues wv0[3]; - X265_CHECK(m_refIdx0 >= 0, "invalid P refidx\n"); - X265_CHECK(m_refIdx0 < m_predSlice->m_numRefIdx[0], "P refidx out of range\n"); - const WeightParam *wp0 = m_predSlice->m_weightPredTable[0][m_refIdx0]; + + X265_CHECK(refIdx0 >= 0, "invalid P refidx\n"); + X265_CHECK(refIdx0 < cu.m_slice->m_numRefIdx[0], "P refidx out of range\n"); + const WeightParam *wp0 = cu.m_slice->m_weightPredTable[0][refIdx0]; - if (m_predSlice->m_pps->bUseWeightPred && wp0->bPresentFlag) + MV mv0 = cu.m_mv[0][pu.puAbsPartIdx]; + cu.clipMv(mv0); + + if (cu.m_slice->m_pps->bUseWeightPred && wp0->bPresentFlag) { for (int plane = 0; plane < 3; plane++) { @@ -155,18 +155,18 @@ void Predict::motionCompensation(Yuv& pr ShortYuv& shortYuv = m_predShortYuv[0]; if (bLuma) - predInterLumaShort(shortYuv, *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPic, m_clippedMv[0]); + predInterLumaShort(pu, shortYuv, *cu.m_slice->m_refPicList[0][refIdx0]->m_reconPic, mv0); if (bChroma) - predInterChromaShort(shortYuv, *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPic, m_clippedMv[0]); + predInterChromaShort(pu, shortYuv, *cu.m_slice->m_refPicList[0][refIdx0]->m_reconPic, mv0); - addWeightUni(predYuv, shortYuv, wv0, bLuma, bChroma); + addWeightUni(pu, predYuv, shortYuv, wv0, bLuma, bChroma); } else { if (bLuma) - predInterLumaPixel(predYuv, *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPic, m_clippedMv[0]); + predInterLumaPixel(pu, predYuv, *cu.m_slice->m_refPicList[0][refIdx0]->m_reconPic, mv0); if (bChroma) - predInterChromaPixel(predYuv, *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPic, m_clippedMv[0]); + predInterChromaPixel(pu, predYuv, *cu.m_slice->m_refPicList[0][refIdx0]->m_reconPic, mv0); } } else @@ -176,10 +176,10 @@ void Predict::motionCompensation(Yuv& pr WeightValues wv0[3], wv1[3]; const WeightParam *pwp0, *pwp1; - if (m_predSlice->m_pps->bUseWeightedBiPred) + if (cu.m_slice->m_pps->bUseWeightedBiPred) { - pwp0 = m_refIdx0 >= 0 ? m_predSlice->m_weightPredTable[0][m_refIdx0] : NULL; - pwp1 = m_refIdx1 >= 0 ? m_predSlice->m_weightPredTable[1][m_refIdx1] : NULL; + pwp0 = refIdx0 >= 0 ? cu.m_slice->m_weightPredTable[0][refIdx0] : NULL; + pwp1 = refIdx1 >= 0 ? cu.m_slice->m_weightPredTable[1][refIdx1] : NULL; if (pwp0 && pwp1 && (pwp0->bPresentFlag || pwp1->bPresentFlag)) { @@ -200,7 +200,7 @@ void Predict::motionCompensation(Yuv& pr else { /* uniprediction weighting, always outputs to wv0 */ - const WeightParam* pwp = (m_refIdx0 >= 0) ? pwp0 : pwp1; + const WeightParam* pwp = (refIdx0 >= 0) ? pwp0 : pwp1; for (int plane = 0; plane < 3; plane++) { wv0[plane].w = pwp[plane].inputWeight; @@ -213,89 +213,100 @@ void Predict::motionCompensation(Yuv& pr else pwp0 = pwp1 = NULL; - if (m_refIdx0 >= 0 && m_refIdx1 >= 0) + if (refIdx0 >= 0 && refIdx1 >= 0) { + MV mv0 = cu.m_mv[0][pu.puAbsPartIdx]; + MV mv1 = cu.m_mv[1][pu.puAbsPartIdx]; + cu.clipMv(mv0); + cu.clipMv(mv1); + /* Biprediction */ - X265_CHECK(m_refIdx0 < m_predSlice->m_numRefIdx[0], "bidir refidx0 out of range\n"); - X265_CHECK(m_refIdx1 < m_predSlice->m_numRefIdx[1], "bidir refidx1 out of range\n"); + X265_CHECK(refIdx0 < cu.m_slice->m_numRefIdx[0], "bidir refidx0 out of range\n"); + X265_CHECK(refIdx1 < cu.m_slice->m_numRefIdx[1], "bidir refidx1 out of range\n"); if (bLuma) { - predInterLumaShort(m_predShortYuv[0], *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPic, m_clippedMv[0]); - predInterLumaShort(m_predShortYuv[1], *m_predSlice->m_refPicList[1][m_refIdx1]->m_reconPic, m_clippedMv[1]); + predInterLumaShort(pu, m_predShortYuv[0], *cu.m_slice->m_refPicList[0][refIdx0]->m_reconPic, mv0); + predInterLumaShort(pu, m_predShortYuv[1], *cu.m_slice->m_refPicList[1][refIdx1]->m_reconPic, mv1); } if (bChroma) { - predInterChromaShort(m_predShortYuv[0], *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPic, m_clippedMv[0]); - predInterChromaShort(m_predShortYuv[1], *m_predSlice->m_refPicList[1][m_refIdx1]->m_reconPic, m_clippedMv[1]); + predInterChromaShort(pu, m_predShortYuv[0], *cu.m_slice->m_refPicList[0][refIdx0]->m_reconPic, mv0); + predInterChromaShort(pu, m_predShortYuv[1], *cu.m_slice->m_refPicList[1][refIdx1]->m_reconPic, mv1); } if (pwp0 && pwp1 && (pwp0->bPresentFlag || pwp1->bPresentFlag)) - addWeightBi(predYuv, m_predShortYuv[0], m_predShortYuv[1], wv0, wv1, bLuma, bChroma); + addWeightBi(pu, predYuv, m_predShortYuv[0], m_predShortYuv[1], wv0, wv1, bLuma, bChroma); else - predYuv.addAvg(m_predShortYuv[0], m_predShortYuv[1], m_puAbsPartIdx, m_puWidth, m_puHeight, bLuma, bChroma); + predYuv.addAvg(m_predShortYuv[0], m_predShortYuv[1], pu.puAbsPartIdx, pu.width, pu.height, bLuma, bChroma); } - else if (m_refIdx0 >= 0) + else if (refIdx0 >= 0) { + MV mv0 = cu.m_mv[0][pu.puAbsPartIdx]; + cu.clipMv(mv0); + /* uniprediction to L0 */ - X265_CHECK(m_refIdx0 < m_predSlice->m_numRefIdx[0], "unidir refidx0 out of range\n"); + X265_CHECK(refIdx0 < cu.m_slice->m_numRefIdx[0], "unidir refidx0 out of range\n"); if (pwp0 && pwp0->bPresentFlag) { ShortYuv& shortYuv = m_predShortYuv[0]; if (bLuma) - predInterLumaShort(shortYuv, *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPic, m_clippedMv[0]); + predInterLumaShort(pu, shortYuv, *cu.m_slice->m_refPicList[0][refIdx0]->m_reconPic, mv0); if (bChroma) - predInterChromaShort(shortYuv, *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPic, m_clippedMv[0]); + predInterChromaShort(pu, shortYuv, *cu.m_slice->m_refPicList[0][refIdx0]->m_reconPic, mv0); - addWeightUni(predYuv, shortYuv, wv0, bLuma, bChroma); + addWeightUni(pu, predYuv, shortYuv, wv0, bLuma, bChroma); } else { if (bLuma) - predInterLumaPixel(predYuv, *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPic, m_clippedMv[0]); + predInterLumaPixel(pu, predYuv, *cu.m_slice->m_refPicList[0][refIdx0]->m_reconPic, mv0); if (bChroma) - predInterChromaPixel(predYuv, *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPic, m_clippedMv[0]); + predInterChromaPixel(pu, predYuv, *cu.m_slice->m_refPicList[0][refIdx0]->m_reconPic, mv0); } } else { + MV mv1 = cu.m_mv[1][pu.puAbsPartIdx]; + cu.clipMv(mv1); + /* uniprediction to L1 */ - X265_CHECK(m_refIdx1 >= 0, "refidx1 was not positive\n"); - X265_CHECK(m_refIdx1 < m_predSlice->m_numRefIdx[1], "unidir refidx1 out of range\n"); + X265_CHECK(refIdx1 >= 0, "refidx1 was not positive\n"); + X265_CHECK(refIdx1 < cu.m_slice->m_numRefIdx[1], "unidir refidx1 out of range\n"); if (pwp1 && pwp1->bPresentFlag) { ShortYuv& shortYuv = m_predShortYuv[0]; if (bLuma) - predInterLumaShort(shortYuv, *m_predSlice->m_refPicList[1][m_refIdx1]->m_reconPic, m_clippedMv[1]); + predInterLumaShort(pu, shortYuv, *cu.m_slice->m_refPicList[1][refIdx1]->m_reconPic, mv1); if (bChroma) - predInterChromaShort(shortYuv, *m_predSlice->m_refPicList[1][m_refIdx1]->m_reconPic, m_clippedMv[1]); + predInterChromaShort(pu, shortYuv, *cu.m_slice->m_refPicList[1][refIdx1]->m_reconPic, mv1); - addWeightUni(predYuv, shortYuv, wv0, bLuma, bChroma); + addWeightUni(pu, predYuv, shortYuv, wv0, bLuma, bChroma); } else { if (bLuma) - predInterLumaPixel(predYuv, *m_predSlice->m_refPicList[1][m_refIdx1]->m_reconPic, m_clippedMv[1]); + predInterLumaPixel(pu, predYuv, *cu.m_slice->m_refPicList[1][refIdx1]->m_reconPic, mv1); if (bChroma) - predInterChromaPixel(predYuv, *m_predSlice->m_refPicList[1][m_refIdx1]->m_reconPic, m_clippedMv[1]); + predInterChromaPixel(pu, predYuv, *cu.m_slice->m_refPicList[1][refIdx1]->m_reconPic, mv1); } } } } -void Predict::predInterLumaPixel(Yuv& dstYuv, const PicYuv& refPic, const MV& mv) const +void Predict::predInterLumaPixel(const PredictionUnit& pu, Yuv& dstYuv, const PicYuv& refPic, const MV& mv) const { - pixel* dst = dstYuv.getLumaAddr(m_puAbsPartIdx); + pixel* dst = dstYuv.getLumaAddr(pu.puAbsPartIdx); intptr_t dstStride = dstYuv.m_size; intptr_t srcStride = refPic.m_stride; intptr_t srcOffset = (mv.x >> 2) + (mv.y >> 2) * srcStride; - int partEnum = partitionFromSizes(m_puWidth, m_puHeight); - const pixel* src = refPic.getLumaAddr(m_ctuAddr, m_cuAbsPartIdx + m_puAbsPartIdx) + srcOffset; + int partEnum = partitionFromSizes(pu.width, pu.height); + const pixel* src = refPic.getLumaAddr(pu.ctuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx) + srcOffset; int xFrac = mv.x & 0x3; int yFrac = mv.y & 0x3; @@ -310,32 +321,32 @@ void Predict::predInterLumaPixel(Yuv& ds primitives.pu[partEnum].luma_hvpp(src, srcStride, dst, dstStride, xFrac, yFrac); } -void Predict::predInterLumaShort(ShortYuv& dstSYuv, const PicYuv& refPic, const MV& mv) const +void Predict::predInterLumaShort(const PredictionUnit& pu, ShortYuv& dstSYuv, const PicYuv& refPic, const MV& mv) const { - int16_t* dst = dstSYuv.getLumaAddr(m_puAbsPartIdx); + int16_t* dst = dstSYuv.getLumaAddr(pu.puAbsPartIdx); int dstStride = dstSYuv.m_size; intptr_t srcStride = refPic.m_stride; intptr_t srcOffset = (mv.x >> 2) + (mv.y >> 2) * srcStride; - const pixel* src = refPic.getLumaAddr(m_ctuAddr, m_cuAbsPartIdx + m_puAbsPartIdx) + srcOffset; + const pixel* src = refPic.getLumaAddr(pu.ctuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx) + srcOffset; int xFrac = mv.x & 0x3; int yFrac = mv.y & 0x3; - int partEnum = partitionFromSizes(m_puWidth, m_puHeight); + int partEnum = partitionFromSizes(pu.width, pu.height); - X265_CHECK((m_puWidth % 4) + (m_puHeight % 4) == 0, "width or height not divisible by 4\n"); + X265_CHECK((pu.width % 4) + (pu.height % 4) == 0, "width or height not divisible by 4\n"); X265_CHECK(dstStride == MAX_CU_SIZE, "stride expected to be max cu size\n"); if (!(yFrac | xFrac)) - primitives.luma_p2s(src, srcStride, dst, m_puWidth, m_puHeight); + primitives.luma_p2s(src, srcStride, dst, pu.width, pu.height); else if (!yFrac) primitives.pu[partEnum].luma_hps(src, srcStride, dst, dstStride, xFrac, 0); else if (!xFrac) primitives.pu[partEnum].luma_vps(src, srcStride, dst, dstStride, yFrac); else { - int tmpStride = m_puWidth; + int tmpStride = pu.width; int filterSize = NTAPS_LUMA; int halfFilterSize = (filterSize >> 1); primitives.pu[partEnum].luma_hps(src, srcStride, m_immedVals, tmpStride, xFrac, 1); @@ -343,7 +354,7 @@ void Predict::predInterLumaShort(ShortYu } } -void Predict::predInterChromaPixel(Yuv& dstYuv, const PicYuv& refPic, const MV& mv) const +void Predict::predInterChromaPixel(const PredictionUnit& pu, Yuv& dstYuv, const PicYuv& refPic, const MV& mv) const { intptr_t dstStride = dstYuv.m_csize; intptr_t refStride = refPic.m_strideC; @@ -353,16 +364,16 @@ void Predict::predInterChromaPixel(Yuv& intptr_t refOffset = (mv.x >> shiftHor) + (mv.y >> shiftVer) * refStride; - const pixel* refCb = refPic.getCbAddr(m_ctuAddr, m_cuAbsPartIdx + m_puAbsPartIdx) + refOffset; - const pixel* refCr = refPic.getCrAddr(m_ctuAddr, m_cuAbsPartIdx + m_puAbsPartIdx) + refOffset; + const pixel* refCb = refPic.getCbAddr(pu.ctuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx) + refOffset; + const pixel* refCr = refPic.getCrAddr(pu.ctuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx) + refOffset; - pixel* dstCb = dstYuv.getCbAddr(m_puAbsPartIdx); - pixel* dstCr = dstYuv.getCrAddr(m_puAbsPartIdx); + pixel* dstCb = dstYuv.getCbAddr(pu.puAbsPartIdx); + pixel* dstCr = dstYuv.getCrAddr(pu.puAbsPartIdx); int xFrac = mv.x & ((1 << shiftHor) - 1); int yFrac = mv.y & ((1 << shiftVer) - 1); - int partEnum = partitionFromSizes(m_puWidth, m_puHeight); + int partEnum = partitionFromSizes(pu.width, pu.height); if (!(yFrac | xFrac)) { @@ -381,7 +392,7 @@ void Predict::predInterChromaPixel(Yuv& } else { - int extStride = m_puWidth >> m_hChromaShift; + int extStride = pu.width >> m_hChromaShift; int filterSize = NTAPS_CHROMA; int halfFilterSize = (filterSize >> 1); @@ -393,7 +404,7 @@ void Predict::predInterChromaPixel(Yuv& } } -void Predict::predInterChromaShort(ShortYuv& dstSYuv, const PicYuv& refPic, const MV& mv) const +void Predict::predInterChromaShort(const PredictionUnit& pu, ShortYuv& dstSYuv, const PicYuv& refPic, const MV& mv) const { intptr_t refStride = refPic.m_strideC; intptr_t dstStride = dstSYuv.m_csize; @@ -403,19 +414,19 @@ void Predict::predInterChromaShort(Short intptr_t refOffset = (mv.x >> shiftHor) + (mv.y >> shiftVer) * refStride; - const pixel* refCb = refPic.getCbAddr(m_ctuAddr, m_cuAbsPartIdx + m_puAbsPartIdx) + refOffset; - const pixel* refCr = refPic.getCrAddr(m_ctuAddr, m_cuAbsPartIdx + m_puAbsPartIdx) + refOffset; + const pixel* refCb = refPic.getCbAddr(pu.ctuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx) + refOffset; + const pixel* refCr = refPic.getCrAddr(pu.ctuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx) + refOffset; - int16_t* dstCb = dstSYuv.getCbAddr(m_puAbsPartIdx); - int16_t* dstCr = dstSYuv.getCrAddr(m_puAbsPartIdx); + int16_t* dstCb = dstSYuv.getCbAddr(pu.puAbsPartIdx); + int16_t* dstCr = dstSYuv.getCrAddr(pu.puAbsPartIdx); int xFrac = mv.x & ((1 << shiftHor) - 1); int yFrac = mv.y & ((1 << shiftVer) - 1); - int partEnum = partitionFromSizes(m_puWidth, m_puHeight); + int partEnum = partitionFromSizes(pu.width, pu.height); - uint32_t cxWidth = m_puWidth >> m_hChromaShift; - uint32_t cxHeight = m_puHeight >> m_vChromaShift; + uint32_t cxWidth = pu.width >> m_hChromaShift; + uint32_t cxHeight = pu.height >> m_vChromaShift; X265_CHECK(((cxWidth | cxHeight) % 2) == 0, "chroma block size expected to be multiple of 2\n"); @@ -447,7 +458,7 @@ void Predict::predInterChromaShort(Short } /* weighted averaging for bi-pred */ -void Predict::addWeightBi(Yuv& predYuv, const ShortYuv& srcYuv0, const ShortYuv& srcYuv1, const WeightValues wp0[3], const WeightValues wp1[3], bool bLuma, bool bChroma) const +void Predict::addWeightBi(const PredictionUnit& pu, Yuv& predYuv, const ShortYuv& srcYuv0, const ShortYuv& srcYuv1, const WeightValues wp0[3], const WeightValues wp1[3], bool bLuma, bool bChroma) const { int x, y; @@ -456,9 +467,9 @@ void Predict::addWeightBi(Yuv& predYuv, if (bLuma) { - pixel* dstY = predYuv.getLumaAddr(m_puAbsPartIdx); - const int16_t* srcY0 = srcYuv0.getLumaAddr(m_puAbsPartIdx); - const int16_t* srcY1 = srcYuv1.getLumaAddr(m_puAbsPartIdx); + pixel* dstY = predYuv.getLumaAddr(pu.puAbsPartIdx); + const int16_t* srcY0 = srcYuv0.getLumaAddr(pu.puAbsPartIdx); + const int16_t* srcY1 = srcYuv1.getLumaAddr(pu.puAbsPartIdx); // Luma w0 = wp0[0].w; @@ -473,9 +484,9 @@ void Predict::addWeightBi(Yuv& predYuv, dststride = predYuv.m_size; // TODO: can we use weight_sp here? - for (y = m_puHeight - 1; y >= 0; y--) + for (y = pu.height - 1; y >= 0; y--) { - for (x = m_puWidth - 1; x >= 0; ) + for (x = pu.width - 1; x >= 0; ) { // note: luma min width is 4 dstY[x] = weightBidir(w0, srcY0[x], w1, srcY1[x], round, shift, offset); @@ -496,12 +507,12 @@ void Predict::addWeightBi(Yuv& predYuv, if (bChroma) { - pixel* dstU = predYuv.getCbAddr(m_puAbsPartIdx); - pixel* dstV = predYuv.getCrAddr(m_puAbsPartIdx); - const int16_t* srcU0 = srcYuv0.getCbAddr(m_puAbsPartIdx); - const int16_t* srcV0 = srcYuv0.getCrAddr(m_puAbsPartIdx); - const int16_t* srcU1 = srcYuv1.getCbAddr(m_puAbsPartIdx); - const int16_t* srcV1 = srcYuv1.getCrAddr(m_puAbsPartIdx); + pixel* dstU = predYuv.getCbAddr(pu.puAbsPartIdx); + pixel* dstV = predYuv.getCrAddr(pu.puAbsPartIdx); + const int16_t* srcU0 = srcYuv0.getCbAddr(pu.puAbsPartIdx); + const int16_t* srcV0 = srcYuv0.getCrAddr(pu.puAbsPartIdx); + const int16_t* srcU1 = srcYuv1.getCbAddr(pu.puAbsPartIdx); + const int16_t* srcV1 = srcYuv1.getCrAddr(pu.puAbsPartIdx); // Chroma U w0 = wp0[1].w; @@ -515,8 +526,8 @@ void Predict::addWeightBi(Yuv& predYuv, src1Stride = srcYuv1.m_csize; dststride = predYuv.m_csize; - uint32_t cwidth = m_puWidth >> srcYuv0.m_hChromaShift; - uint32_t cheight = m_puHeight >> srcYuv0.m_vChromaShift; + uint32_t cwidth = pu.width >> srcYuv0.m_hChromaShift; + uint32_t cheight = pu.height >> srcYuv0.m_vChromaShift; // TODO: can we use weight_sp here? for (y = cheight - 1; y >= 0; y--) @@ -561,15 +572,15 @@ void Predict::addWeightBi(Yuv& predYuv, } /* weighted averaging for uni-pred */ -void Predict::addWeightUni(Yuv& predYuv, const ShortYuv& srcYuv, const WeightValues wp[3], bool bLuma, bool bChroma) const +void Predict::addWeightUni(const PredictionUnit& pu, Yuv& predYuv, const ShortYuv& srcYuv, const WeightValues wp[3], bool bLuma, bool bChroma) const { int w0, offset, shiftNum, shift, round; uint32_t srcStride, dstStride; if (bLuma) { - pixel* dstY = predYuv.getLumaAddr(m_puAbsPartIdx); - const int16_t* srcY0 = srcYuv.getLumaAddr(m_puAbsPartIdx); + pixel* dstY = predYuv.getLumaAddr(pu.puAbsPartIdx); + const int16_t* srcY0 = srcYuv.getLumaAddr(pu.puAbsPartIdx); // Luma w0 = wp[0].w; @@ -580,15 +591,15 @@ void Predict::addWeightUni(Yuv& predYuv, srcStride = srcYuv.m_size; dstStride = predYuv.m_size; - primitives.weight_sp(srcY0, dstY, srcStride, dstStride, m_puWidth, m_puHeight, w0, round, shift, offset); + primitives.weight_sp(srcY0, dstY, srcStride, dstStride, pu.width, pu.height, w0, round, shift, offset); } if (bChroma) { - pixel* dstU = predYuv.getCbAddr(m_puAbsPartIdx); - pixel* dstV = predYuv.getCrAddr(m_puAbsPartIdx); - const int16_t* srcU0 = srcYuv.getCbAddr(m_puAbsPartIdx); - const int16_t* srcV0 = srcYuv.getCrAddr(m_puAbsPartIdx); + pixel* dstU = predYuv.getCbAddr(pu.puAbsPartIdx); + pixel* dstV = predYuv.getCrAddr(pu.puAbsPartIdx); + const int16_t* srcU0 = srcYuv.getCbAddr(pu.puAbsPartIdx); + const int16_t* srcV0 = srcYuv.getCrAddr(pu.puAbsPartIdx); // Chroma U w0 = wp[1].w; @@ -600,8 +611,8 @@ void Predict::addWeightUni(Yuv& predYuv, srcStride = srcYuv.m_csize; dstStride = predYuv.m_csize; - uint32_t cwidth = m_puWidth >> srcYuv.m_hChromaShift; - uint32_t cheight = m_puHeight >> srcYuv.m_vChromaShift; + uint32_t cwidth = pu.width >> srcYuv.m_hChromaShift; + uint32_t cheight = pu.height >> srcYuv.m_vChromaShift; primitives.weight_sp(srcU0, dstU, srcStride, dstStride, cwidth, cheight, w0, round, shift, offset);
--- a/source/common/predict.h Tue Feb 24 17:26:37 2015 +0530 +++ b/source/common/predict.h Wed Feb 25 10:05:40 2015 +0530 @@ -36,6 +36,17 @@ class CUData; class Slice; struct CUGeom; +struct PredictionUnit +{ + uint32_t ctuAddr; // raster index of current CTU within its picture + uint32_t cuAbsPartIdx; // z-order offset of current CU within its CTU + uint32_t puAbsPartIdx; // z-order offset of current PU with its CU + int width; + int height; + + PredictionUnit(const CUData& cu, const CUGeom& cuGeom, int puIdx); +}; + class Predict { public: @@ -65,38 +76,34 @@ public: // Unfiltered/filtered neighbours of the current partition. pixel intraNeighbourBuf[2][258]; + /* Slice information */ - const Slice* m_predSlice; int m_csp; int m_hChromaShift; int m_vChromaShift; - /* cached CU information for prediction */ - uint32_t m_ctuAddr; // raster index of current CTU within its picture - uint32_t m_cuAbsPartIdx; // z-order index of current CU within its CTU - uint32_t m_puAbsPartIdx; // z-order index of current PU with its CU - int m_puWidth; - int m_puHeight; - int m_refIdx0; - int m_refIdx1; - - /* TODO: Need to investigate clipping while writing into the TComDataCU fields itself */ - MV m_clippedMv[2]; - Predict(); ~Predict(); bool allocBuffers(int csp); // motion compensation functions - void predInterLumaPixel(Yuv& dstYuv, const PicYuv& refPic, const MV& mv) const; - void predInterChromaPixel(Yuv& dstYuv, const PicYuv& refPic, const MV& mv) const; + void predInterLumaPixel(const PredictionUnit& pu, Yuv& dstYuv, const PicYuv& refPic, const MV& mv) const; + void predInterChromaPixel(const PredictionUnit& pu, Yuv& dstYuv, const PicYuv& refPic, const MV& mv) const; + + void predInterLumaShort(const PredictionUnit& pu, ShortYuv& dstSYuv, const PicYuv& refPic, const MV& mv) const; + void predInterChromaShort(const PredictionUnit& pu, ShortYuv& dstSYuv, const PicYuv& refPic, const MV& mv) const; - void predInterLumaShort(ShortYuv& dstSYuv, const PicYuv& refPic, const MV& mv) const; - void predInterChromaShort(ShortYuv& dstSYuv, const PicYuv& refPic, const MV& mv) const; + void addWeightBi(const PredictionUnit& pu, Yuv& predYuv, const ShortYuv& srcYuv0, const ShortYuv& srcYuv1, const WeightValues wp0[3], const WeightValues wp1[3], bool bLuma, bool bChroma) const; + void addWeightUni(const PredictionUnit& pu, Yuv& predYuv, const ShortYuv& srcYuv, const WeightValues wp[3], bool bLuma, bool bChroma) const; + + void motionCompensation(const CUData& cu, const PredictionUnit& pu, Yuv& predYuv, bool bLuma, bool bChroma); - void addWeightBi(Yuv& predYuv, const ShortYuv& srcYuv0, const ShortYuv& srcYuv1, const WeightValues wp0[3], const WeightValues wp1[3], bool bLuma, bool bChroma) const; - void addWeightUni(Yuv& predYuv, const ShortYuv& srcYuv, const WeightValues wp[3], bool bLuma, bool bChroma) const; + /* Angular Intra */ + void predIntraLumaAng(uint32_t dirMode, pixel* pred, intptr_t stride, uint32_t log2TrSize); + void predIntraChromaAng(uint32_t dirMode, pixel* pred, intptr_t stride, uint32_t log2TrSizeC, int chFmt); + void initAdiPattern(const CUData& cu, const CUGeom& cuGeom, uint32_t absPartIdx, const IntraNeighbors& intraNeighbors, int dirMode); + void initAdiPatternChroma(const CUData& cu, const CUGeom& cuGeom, uint32_t absPartIdx, const IntraNeighbors& intraNeighbors, uint32_t chromaId); /* Intra prediction helper functions */ static void initIntraNeighbors(const CUData& cu, uint32_t absPartIdx, uint32_t tuDepth, bool isLuma, IntraNeighbors *IntraNeighbors); @@ -111,19 +118,6 @@ public: static int isAboveRightAvailable(const CUData& cu, uint32_t partIdxRT, bool* bValidFlags, uint32_t numUnits); template<bool cip> static int isBelowLeftAvailable(const CUData& cu, uint32_t partIdxLB, bool* bValidFlags, uint32_t numUnits); - -public: - - /* prepMotionCompensation needs to be called to prepare MC with CU-relevant data */ - void initMotionCompensation(const CUData& cu, const CUGeom& cuGeom, int partIdx); - void prepMotionCompensation(const CUData& cu, const CUGeom& cuGeom, int partIdx); - void motionCompensation(Yuv& predYuv, bool bLuma, bool bChroma); - - /* Angular Intra */ - void predIntraLumaAng(uint32_t dirMode, pixel* pred, intptr_t stride, uint32_t log2TrSize); - void predIntraChromaAng(uint32_t dirMode, pixel* pred, intptr_t stride, uint32_t log2TrSizeC, int chFmt); - void initAdiPattern(const CUData& cu, const CUGeom& cuGeom, uint32_t absPartIdx, const IntraNeighbors& intraNeighbors, int dirMode); - void initAdiPatternChroma(const CUData& cu, const CUGeom& cuGeom, uint32_t absPartIdx, const IntraNeighbors& intraNeighbors, uint32_t chromaId); }; }
--- a/source/common/threadpool.cpp Tue Feb 24 17:26:37 2015 +0530 +++ b/source/common/threadpool.cpp Wed Feb 25 10:05:40 2015 +0530 @@ -321,7 +321,7 @@ ThreadPool* ThreadPool::allocThreadPools numPools = 0; return NULL; } - if (bNumaSupport) + if (numNumaNodes > 1) x265_log(p, X265_LOG_INFO, "Thread pool %d using %d threads on NUMA node %d\n", i, cores, node); else x265_log(p, X265_LOG_INFO, "Thread pool created using %d threads\n", cores);
--- a/source/encoder/analysis.cpp Tue Feb 24 17:26:37 2015 +0530 +++ b/source/encoder/analysis.cpp Wed Feb 25 10:05:40 2015 +0530 @@ -574,8 +574,8 @@ void Analysis::compressInterCU_dist(cons { for (uint32_t puIdx = 0; puIdx < bestInter->cu.getNumPartInter(); puIdx++) { - prepMotionCompensation(bestInter->cu, cuGeom, puIdx); - motionCompensation(bestInter->predYuv, false, true); + PredictionUnit pu(bestInter->cu, cuGeom, puIdx); + motionCompensation(bestInter->cu, pu, bestInter->predYuv, false, true); } } encodeResAndCalcRdInterCU(*bestInter, cuGeom); @@ -610,8 +610,8 @@ void Analysis::compressInterCU_dist(cons /* finally code the best mode selected from SA8D costs */ for (uint32_t puIdx = 0; puIdx < md.bestMode->cu.getNumPartInter(); puIdx++) { - prepMotionCompensation(md.bestMode->cu, cuGeom, puIdx); - motionCompensation(md.bestMode->predYuv, false, true); + PredictionUnit pu(md.bestMode->cu, cuGeom, puIdx); + motionCompensation(md.bestMode->cu, pu, md.bestMode->predYuv, false, true); } encodeResAndCalcRdInterCU(*md.bestMode, cuGeom); } @@ -828,8 +828,8 @@ void Analysis::compressInterCU_rd0_4(con { for (uint32_t puIdx = 0; puIdx < bestInter->cu.getNumPartInter(); puIdx++) { - prepMotionCompensation(bestInter->cu, cuGeom, puIdx); - motionCompensation(bestInter->predYuv, false, true); + PredictionUnit pu(bestInter->cu, cuGeom, puIdx); + motionCompensation(bestInter->cu, pu, bestInter->predYuv, false, true); } } encodeResAndCalcRdInterCU(*bestInter, cuGeom); @@ -883,8 +883,8 @@ void Analysis::compressInterCU_rd0_4(con { for (uint32_t puIdx = 0; puIdx < md.bestMode->cu.getNumPartInter(); puIdx++) { - prepMotionCompensation(md.bestMode->cu, cuGeom, puIdx); - motionCompensation(md.bestMode->predYuv, false, true); + PredictionUnit pu(md.bestMode->cu, cuGeom, puIdx); + motionCompensation(md.bestMode->cu, pu, md.bestMode->predYuv, false, true); } if (m_param->rdLevel == 2) encodeResAndCalcRdInterCU(*md.bestMode, cuGeom); @@ -1217,32 +1217,32 @@ void Analysis::checkMerge2Nx2N_rd0_4(Mod bestPred->cu.setPredModeSubParts(MODE_INTER); bestPred->cu.m_mergeFlag[0] = true; - MVField mvFieldNeighbours[MRG_MAX_NUM_CANDS][2]; // double length for mv of both lists - uint8_t interDirNeighbours[MRG_MAX_NUM_CANDS]; - uint32_t maxNumMergeCand = tempPred->cu.getInterMergeCandidates(0, 0, mvFieldNeighbours, interDirNeighbours); + MVField candMvField[MRG_MAX_NUM_CANDS][2]; // double length for mv of both lists + uint8_t candDir[MRG_MAX_NUM_CANDS]; + uint32_t numMergeCand = tempPred->cu.getInterMergeCandidates(0, 0, candMvField, candDir); + PredictionUnit pu(merge.cu, cuGeom, 0); bestPred->sa8dCost = MAX_INT64; int bestSadCand = -1; int sizeIdx = cuGeom.log2CUSize - 2; - for (uint32_t i = 0; i < maxNumMergeCand; ++i) + for (uint32_t i = 0; i < numMergeCand; ++i) { if (m_bFrameParallel && - (mvFieldNeighbours[i][0].mv.y >= (m_param->searchRange + 1) * 4 || - mvFieldNeighbours[i][1].mv.y >= (m_param->searchRange + 1) * 4)) + (candMvField[i][0].mv.y >= (m_param->searchRange + 1) * 4 || + candMvField[i][1].mv.y >= (m_param->searchRange + 1) * 4)) continue; tempPred->cu.m_mvpIdx[0][0] = (uint8_t)i; // merge candidate ID is stored in L0 MVP idx - tempPred->cu.m_interDir[0] = interDirNeighbours[i]; - tempPred->cu.m_mv[0][0] = mvFieldNeighbours[i][0].mv; - tempPred->cu.m_refIdx[0][0] = (int8_t)mvFieldNeighbours[i][0].refIdx; - tempPred->cu.m_mv[1][0] = mvFieldNeighbours[i][1].mv; - tempPred->cu.m_refIdx[1][0] = (int8_t)mvFieldNeighbours[i][1].refIdx; + tempPred->cu.m_interDir[0] = candDir[i]; + tempPred->cu.m_mv[0][0] = candMvField[i][0].mv; + tempPred->cu.m_mv[1][0] = candMvField[i][1].mv; + tempPred->cu.m_refIdx[0][0] = (int8_t)candMvField[i][0].refIdx; + tempPred->cu.m_refIdx[1][0] = (int8_t)candMvField[i][1].refIdx; - prepMotionCompensation(tempPred->cu, cuGeom, 0); - motionCompensation(tempPred->predYuv, true, m_bChromaSa8d); + motionCompensation(tempPred->cu, pu, tempPred->predYuv, true, m_bChromaSa8d); - tempPred->sa8dBits = getTUBits(i, maxNumMergeCand); + tempPred->sa8dBits = getTUBits(i, numMergeCand); tempPred->distortion = primitives.cu[sizeIdx].sa8d(fencYuv->m_buf[0], fencYuv->m_size, tempPred->predYuv.m_buf[0], tempPred->predYuv.m_size); if (m_bChromaSa8d) { @@ -1264,10 +1264,7 @@ void Analysis::checkMerge2Nx2N_rd0_4(Mod /* calculate the motion compensation for chroma for the best mode selected */ if (!m_bChromaSa8d) /* Chroma MC was done above */ - { - prepMotionCompensation(bestPred->cu, cuGeom, 0); - motionCompensation(bestPred->predYuv, false, true); - } + motionCompensation(bestPred->cu, pu, bestPred->predYuv, false, true); if (m_param->rdLevel) { @@ -1278,11 +1275,11 @@ void Analysis::checkMerge2Nx2N_rd0_4(Mod /* Encode with residual */ tempPred->cu.m_mvpIdx[0][0] = (uint8_t)bestSadCand; - tempPred->cu.setPUInterDir(interDirNeighbours[bestSadCand], 0, 0); - tempPred->cu.setPUMv(0, mvFieldNeighbours[bestSadCand][0].mv, 0, 0); - tempPred->cu.setPURefIdx(0, (int8_t)mvFieldNeighbours[bestSadCand][0].refIdx, 0, 0); - tempPred->cu.setPUMv(1, mvFieldNeighbours[bestSadCand][1].mv, 0, 0); - tempPred->cu.setPURefIdx(1, (int8_t)mvFieldNeighbours[bestSadCand][1].refIdx, 0, 0); + tempPred->cu.setPUInterDir(candDir[bestSadCand], 0, 0); + tempPred->cu.setPUMv(0, candMvField[bestSadCand][0].mv, 0, 0); + tempPred->cu.setPUMv(1, candMvField[bestSadCand][1].mv, 0, 0); + tempPred->cu.setPURefIdx(0, (int8_t)candMvField[bestSadCand][0].refIdx, 0, 0); + tempPred->cu.setPURefIdx(1, (int8_t)candMvField[bestSadCand][1].refIdx, 0, 0); tempPred->sa8dCost = bestPred->sa8dCost; tempPred->predYuv.copyFromYuv(bestPred->predYuv); @@ -1294,11 +1291,11 @@ void Analysis::checkMerge2Nx2N_rd0_4(Mod md.bestMode = bestPred; /* broadcast sets of MV field data */ - bestPred->cu.setPUInterDir(interDirNeighbours[bestSadCand], 0, 0); - bestPred->cu.setPUMv(0, mvFieldNeighbours[bestSadCand][0].mv, 0, 0); - bestPred->cu.setPURefIdx(0, (int8_t)mvFieldNeighbours[bestSadCand][0].refIdx, 0, 0); - bestPred->cu.setPUMv(1, mvFieldNeighbours[bestSadCand][1].mv, 0, 0); - bestPred->cu.setPURefIdx(1, (int8_t)mvFieldNeighbours[bestSadCand][1].refIdx, 0, 0); + bestPred->cu.setPUInterDir(candDir[bestSadCand], 0, 0); + bestPred->cu.setPUMv(0, candMvField[bestSadCand][0].mv, 0, 0); + bestPred->cu.setPUMv(1, candMvField[bestSadCand][1].mv, 0, 0); + bestPred->cu.setPURefIdx(0, (int8_t)candMvField[bestSadCand][0].refIdx, 0, 0); + bestPred->cu.setPURefIdx(1, (int8_t)candMvField[bestSadCand][1].refIdx, 0, 0); } /* sets md.bestMode if a valid merge candidate is found, else leaves it NULL */ @@ -1319,52 +1316,47 @@ void Analysis::checkMerge2Nx2N_rd5_6(Mod skip.cu.setPartSizeSubParts(SIZE_2Nx2N); skip.cu.m_mergeFlag[0] = true; - MVField mvFieldNeighbours[MRG_MAX_NUM_CANDS][2]; // double length for mv of both lists - uint8_t interDirNeighbours[MRG_MAX_NUM_CANDS]; - uint32_t maxNumMergeCand = merge.cu.getInterMergeCandidates(0, 0, mvFieldNeighbours, interDirNeighbours); + MVField candMvField[MRG_MAX_NUM_CANDS][2]; // double length for mv of both lists + uint8_t candDir[MRG_MAX_NUM_CANDS]; + uint32_t numMergeCand = merge.cu.getInterMergeCandidates(0, 0, candMvField, candDir); + PredictionUnit pu(merge.cu, cuGeom, 0); bool foundCbf0Merge = false; bool triedPZero = false, triedBZero = false; bestPred->rdCost = MAX_INT64; - if (m_param->analysisMode == X265_ANALYSIS_LOAD && isSkipMode) + if (isSkipMode) { uint32_t i = *m_reuseBestMergeCand; - tempPred->cu.m_mvpIdx[0][0] = (uint8_t)i; /* merge candidate ID is stored in L0 MVP idx */ - tempPred->cu.m_interDir[0] = interDirNeighbours[i]; - tempPred->cu.m_mv[0][0] = mvFieldNeighbours[i][0].mv; - tempPred->cu.m_refIdx[0][0] = (int8_t)mvFieldNeighbours[i][0].refIdx; - tempPred->cu.m_mv[1][0] = mvFieldNeighbours[i][1].mv; - tempPred->cu.m_refIdx[1][0] = (int8_t)mvFieldNeighbours[i][1].refIdx; - tempPred->cu.setPredModeSubParts(MODE_INTER); /* must be cleared between encode iterations */ + bestPred->cu.m_mvpIdx[0][0] = (uint8_t)i; + bestPred->cu.m_interDir[0] = candDir[i]; + bestPred->cu.m_mv[0][0] = candMvField[i][0].mv; + bestPred->cu.m_mv[1][0] = candMvField[i][1].mv; + bestPred->cu.m_refIdx[0][0] = (int8_t)candMvField[i][0].refIdx; + bestPred->cu.m_refIdx[1][0] = (int8_t)candMvField[i][1].refIdx; - prepMotionCompensation(tempPred->cu, cuGeom, 0); - motionCompensation(tempPred->predYuv, true, true); - - encodeResAndCalcRdSkipCU(*tempPred); - - if (tempPred->rdCost < bestPred->rdCost) - std::swap(tempPred, bestPred); + motionCompensation(bestPred->cu, pu, bestPred->predYuv, true, true); + encodeResAndCalcRdSkipCU(*bestPred); } else { - for (uint32_t i = 0; i < maxNumMergeCand; i++) + for (uint32_t i = 0; i < numMergeCand; i++) { if (m_bFrameParallel && - (mvFieldNeighbours[i][0].mv.y >= (m_param->searchRange + 1) * 4 || - mvFieldNeighbours[i][1].mv.y >= (m_param->searchRange + 1) * 4)) + (candMvField[i][0].mv.y >= (m_param->searchRange + 1) * 4 || + candMvField[i][1].mv.y >= (m_param->searchRange + 1) * 4)) continue; /* the merge candidate list is packed with MV(0,0) ref 0 when it is not full */ - if (interDirNeighbours[i] == 1 && !mvFieldNeighbours[i][0].mv.word && !mvFieldNeighbours[i][0].refIdx) + if (candDir[i] == 1 && !candMvField[i][0].mv.word && !candMvField[i][0].refIdx) { if (triedPZero) continue; triedPZero = true; } - else if (interDirNeighbours[i] == 3 && - !mvFieldNeighbours[i][0].mv.word && !mvFieldNeighbours[i][0].refIdx && - !mvFieldNeighbours[i][1].mv.word && !mvFieldNeighbours[i][1].refIdx) + else if (candDir[i] == 3 && + !candMvField[i][0].mv.word && !candMvField[i][0].refIdx && + !candMvField[i][1].mv.word && !candMvField[i][1].refIdx) { if (triedBZero) continue; @@ -1372,15 +1364,14 @@ void Analysis::checkMerge2Nx2N_rd5_6(Mod } tempPred->cu.m_mvpIdx[0][0] = (uint8_t)i; /* merge candidate ID is stored in L0 MVP idx */ - tempPred->cu.m_interDir[0] = interDirNeighbours[i]; - tempPred->cu.m_mv[0][0] = mvFieldNeighbours[i][0].mv; - tempPred->cu.m_refIdx[0][0] = (int8_t)mvFieldNeighbours[i][0].refIdx; - tempPred->cu.m_mv[1][0] = mvFieldNeighbours[i][1].mv; - tempPred->cu.m_refIdx[1][0] = (int8_t)mvFieldNeighbours[i][1].refIdx; + tempPred->cu.m_interDir[0] = candDir[i]; + tempPred->cu.m_mv[0][0] = candMvField[i][0].mv; + tempPred->cu.m_mv[1][0] = candMvField[i][1].mv; + tempPred->cu.m_refIdx[0][0] = (int8_t)candMvField[i][0].refIdx; + tempPred->cu.m_refIdx[1][0] = (int8_t)candMvField[i][1].refIdx; tempPred->cu.setPredModeSubParts(MODE_INTER); /* must be cleared between encode iterations */ - prepMotionCompensation(tempPred->cu, cuGeom, 0); - motionCompensation(tempPred->predYuv, true, true); + motionCompensation(tempPred->cu, pu, tempPred->predYuv, true, true); uint8_t hasCbf = true; bool swapped = false; @@ -1405,11 +1396,11 @@ void Analysis::checkMerge2Nx2N_rd5_6(Mod if (swapped) { tempPred->cu.m_mvpIdx[0][0] = (uint8_t)i; - tempPred->cu.m_interDir[0] = interDirNeighbours[i]; - tempPred->cu.m_mv[0][0] = mvFieldNeighbours[i][0].mv; - tempPred->cu.m_refIdx[0][0] = (int8_t)mvFieldNeighbours[i][0].refIdx; - tempPred->cu.m_mv[1][0] = mvFieldNeighbours[i][1].mv; - tempPred->cu.m_refIdx[1][0] = (int8_t)mvFieldNeighbours[i][1].refIdx; + tempPred->cu.m_interDir[0] = candDir[i]; + tempPred->cu.m_mv[0][0] = candMvField[i][0].mv; + tempPred->cu.m_mv[1][0] = candMvField[i][1].mv; + tempPred->cu.m_refIdx[0][0] = (int8_t)candMvField[i][0].refIdx; + tempPred->cu.m_refIdx[1][0] = (int8_t)candMvField[i][1].refIdx; tempPred->cu.setPredModeSubParts(MODE_INTER); tempPred->predYuv.copyFromYuv(bestPred->predYuv); } @@ -1428,11 +1419,11 @@ void Analysis::checkMerge2Nx2N_rd5_6(Mod /* broadcast sets of MV field data */ uint32_t bestCand = bestPred->cu.m_mvpIdx[0][0]; - bestPred->cu.setPUInterDir(interDirNeighbours[bestCand], 0, 0); - bestPred->cu.setPUMv(0, mvFieldNeighbours[bestCand][0].mv, 0, 0); - bestPred->cu.setPURefIdx(0, (int8_t)mvFieldNeighbours[bestCand][0].refIdx, 0, 0); - bestPred->cu.setPUMv(1, mvFieldNeighbours[bestCand][1].mv, 0, 0); - bestPred->cu.setPURefIdx(1, (int8_t)mvFieldNeighbours[bestCand][1].refIdx, 0, 0); + bestPred->cu.setPUInterDir(candDir[bestCand], 0, 0); + bestPred->cu.setPUMv(0, candMvField[bestCand][0].mv, 0, 0); + bestPred->cu.setPUMv(1, candMvField[bestCand][1].mv, 0, 0); + bestPred->cu.setPURefIdx(0, (int8_t)candMvField[bestCand][0].refIdx, 0, 0); + bestPred->cu.setPURefIdx(1, (int8_t)candMvField[bestCand][1].refIdx, 0, 0); } if (m_param->analysisMode) @@ -1572,8 +1563,8 @@ void Analysis::checkBidir2Nx2N(Mode& int cu.setPUMv(1, bestME[1].mv, 0, 0); cu.m_mvd[1][0] = bestME[1].mv - mvp1; - prepMotionCompensation(cu, cuGeom, 0); - motionCompensation(bidir2Nx2N.predYuv, true, m_bChromaSa8d); + PredictionUnit pu(cu, cuGeom, 0); + motionCompensation(cu, pu, bidir2Nx2N.predYuv, true, m_bChromaSa8d); int sa8d = primitives.cu[partEnum].sa8d(fencYuv.m_buf[0], fencYuv.m_size, bidir2Nx2N.predYuv.m_buf[0], bidir2Nx2N.predYuv.m_size); if (m_bChromaSa8d) @@ -1612,8 +1603,7 @@ void Analysis::checkBidir2Nx2N(Mode& int cu.m_mv[0][0] = mvzero; cu.m_mv[1][0] = mvzero; - prepMotionCompensation(cu, cuGeom, 0); - motionCompensation(tmpPredYuv, true, true); + motionCompensation(cu, pu, tmpPredYuv, true, true); zsa8d = primitives.cu[partEnum].sa8d(fencYuv.m_buf[0], fencYuv.m_size, tmpPredYuv.m_buf[0], tmpPredYuv.m_size); zsa8d += primitives.chroma[m_csp].cu[partEnum].sa8d(fencYuv.m_buf[1], fencYuv.m_csize, tmpPredYuv.m_buf[1], tmpPredYuv.m_csize); @@ -1621,8 +1611,8 @@ void Analysis::checkBidir2Nx2N(Mode& int } else { - pixel *fref0 = m_slice->m_mref[0][ref0].getLumaAddr(cu.m_cuAddr, cuGeom.absPartIdx); - pixel *fref1 = m_slice->m_mref[1][ref1].getLumaAddr(cu.m_cuAddr, cuGeom.absPartIdx); + pixel *fref0 = m_slice->m_mref[0][ref0].getLumaAddr(pu.ctuAddr, pu.cuAbsPartIdx); + pixel *fref1 = m_slice->m_mref[1][ref1].getLumaAddr(pu.ctuAddr, pu.cuAbsPartIdx); intptr_t refStride = m_slice->m_mref[0][0].lumaStride; primitives.pu[partEnum].pixelavg_pp(tmpPredYuv.m_buf[0], tmpPredYuv.m_size, fref0, refStride, fref1, refStride, 32); @@ -1657,10 +1647,7 @@ void Analysis::checkBidir2Nx2N(Mode& int /* real MC was already performed */ bidir2Nx2N.predYuv.copyFromYuv(tmpPredYuv); else - { - prepMotionCompensation(cu, cuGeom, 0); - motionCompensation(bidir2Nx2N.predYuv, true, true); - } + motionCompensation(cu, pu, bidir2Nx2N.predYuv, true, true); } else if (m_bChromaSa8d) {
--- a/source/encoder/entropy.cpp Tue Feb 24 17:26:37 2015 +0530 +++ b/source/encoder/entropy.cpp Wed Feb 25 10:05:40 2015 +0530 @@ -43,6 +43,7 @@ Entropy::Entropy() { markValid(); m_fracBits = 0; + m_pad = 0; X265_CHECK(sizeof(m_contextState) >= sizeof(m_contextState[0]) * MAX_OFF_CTX_MOD, "context state table is too small\n"); }
--- a/source/encoder/search.cpp Tue Feb 24 17:26:37 2015 +0530 +++ b/source/encoder/search.cpp Wed Feb 25 10:05:40 2015 +0530 @@ -1806,22 +1806,24 @@ uint32_t Search::estIntraPredChromaQT(Mo return totalDistortion; } -/* estimation of best merge coding of an inter PU (not a merge CU) */ -uint32_t Search::mergeEstimation(CUData& cu, const CUGeom& cuGeom, int puIdx, MergeData& m) +/* estimation of best merge coding of an inter PU (2Nx2N merge PUs are evaluated as their own mode) */ +uint32_t Search::mergeEstimation(CUData& cu, const CUGeom& cuGeom, const PredictionUnit& pu, int puIdx, MergeData& m) { - X265_CHECK(cu.m_partSize[0] != SIZE_2Nx2N, "merge tested on non-2Nx2N partition\n"); - - m.maxNumMergeCand = cu.getInterMergeCandidates(m.absPartIdx, puIdx, m.mvFieldNeighbours, m.interDirNeighbours); + X265_CHECK(cu.m_partSize[0] != SIZE_2Nx2N, "mergeEstimation() called for 2Nx2N\n"); + + MVField candMvField[MRG_MAX_NUM_CANDS][2]; + uint8_t candDir[MRG_MAX_NUM_CANDS]; + uint32_t numMergeCand = cu.getInterMergeCandidates(pu.puAbsPartIdx, puIdx, candMvField, candDir); if (cu.isBipredRestriction()) { - /* in 8x8 CUs do not allow bidir merge candidates if not 2Nx2N */ - for (uint32_t mergeCand = 0; mergeCand < m.maxNumMergeCand; ++mergeCand) + /* do not allow bidir merge candidates if PU is smaller than 8x8, drop L1 reference */ + for (uint32_t mergeCand = 0; mergeCand < numMergeCand; ++mergeCand) { - if (m.interDirNeighbours[mergeCand] == 3) + if (candDir[mergeCand] == 3) { - m.interDirNeighbours[mergeCand] = 1; - m.mvFieldNeighbours[mergeCand][1].refIdx = REF_NOT_VALID; + candDir[mergeCand] = 1; + candMvField[mergeCand][1].refIdx = REF_NOT_VALID; } } } @@ -1829,27 +1831,26 @@ uint32_t Search::mergeEstimation(CUData& Yuv& tempYuv = m_rqt[cuGeom.depth].tmpPredYuv; uint32_t outCost = MAX_UINT; - for (uint32_t mergeCand = 0; mergeCand < m.maxNumMergeCand; ++mergeCand) + for (uint32_t mergeCand = 0; mergeCand < numMergeCand; ++mergeCand) { /* Prevent TMVP candidates from using unavailable reference pixels */ if (m_bFrameParallel && - (m.mvFieldNeighbours[mergeCand][0].mv.y >= (m_param->searchRange + 1) * 4 || - m.mvFieldNeighbours[mergeCand][1].mv.y >= (m_param->searchRange + 1) * 4)) + (candMvField[mergeCand][0].mv.y >= (m_param->searchRange + 1) * 4 || + candMvField[mergeCand][1].mv.y >= (m_param->searchRange + 1) * 4)) continue; - cu.m_mv[0][m.absPartIdx] = m.mvFieldNeighbours[mergeCand][0].mv; - cu.m_refIdx[0][m.absPartIdx] = (int8_t)m.mvFieldNeighbours[mergeCand][0].refIdx; - cu.m_mv[1][m.absPartIdx] = m.mvFieldNeighbours[mergeCand][1].mv; - cu.m_refIdx[1][m.absPartIdx] = (int8_t)m.mvFieldNeighbours[mergeCand][1].refIdx; - - prepMotionCompensation(cu, cuGeom, puIdx); - motionCompensation(tempYuv, true, m_me.bChromaSATD); - - uint32_t costCand = m_me.bufSATD(tempYuv.getLumaAddr(m.absPartIdx), tempYuv.m_size); + cu.m_mv[0][pu.puAbsPartIdx] = candMvField[mergeCand][0].mv; + cu.m_refIdx[0][pu.puAbsPartIdx] = (int8_t)candMvField[mergeCand][0].refIdx; + cu.m_mv[1][pu.puAbsPartIdx] = candMvField[mergeCand][1].mv; + cu.m_refIdx[1][pu.puAbsPartIdx] = (int8_t)candMvField[mergeCand][1].refIdx; + + motionCompensation(cu, pu, tempYuv, true, m_me.bChromaSATD); + + uint32_t costCand = m_me.bufSATD(tempYuv.getLumaAddr(pu.puAbsPartIdx), tempYuv.m_size); if (m_me.bChromaSATD) - costCand += m_me.bufChromaSATD(tempYuv, m.absPartIdx); - - uint32_t bitsCand = getTUBits(mergeCand, m.maxNumMergeCand); + costCand += m_me.bufChromaSATD(tempYuv, pu.puAbsPartIdx); + + uint32_t bitsCand = getTUBits(mergeCand, numMergeCand); costCand = costCand + m_rdCost.getCost(bitsCand); if (costCand < outCost) { @@ -1859,9 +1860,9 @@ uint32_t Search::mergeEstimation(CUData& } } - m.mvField[0] = m.mvFieldNeighbours[m.index][0]; - m.mvField[1] = m.mvFieldNeighbours[m.index][1]; - m.interDir = m.interDirNeighbours[m.index]; + m.mvField[0] = candMvField[m.index][0]; + m.mvField[1] = candMvField[m.index][1]; + m.dir = candDir[m.index]; return outCost; } @@ -1899,17 +1900,17 @@ void Search::processPME(PME& pme, Search slave.setQP(*m_slice, m_rdCost.m_qp); slave.m_slice = m_slice; slave.m_frame = m_frame; - slave.m_me.setSourcePU(*pme.mode.fencYuv, pme.mode.cu.m_cuAddr, pme.cuGeom.absPartIdx, m_puAbsPartIdx, m_puWidth, m_puHeight); - slave.prepMotionCompensation(pme.mode.cu, pme.cuGeom, pme.puIdx); + + slave.m_me.setSourcePU(*pme.mode.fencYuv, pme.pu.ctuAddr, pme.pu.cuAbsPartIdx, pme.pu.puAbsPartIdx, pme.pu.width, pme.pu.height); } /* Perform ME, repeat until no more work is available */ do { if (meId < m_slice->m_numRefIdx[0]) - slave.singleMotionEstimation(*this, pme.mode, pme.cuGeom, pme.puIdx, 0, meId); + slave.singleMotionEstimation(*this, pme.mode, pme.cuGeom, pme.pu, pme.puIdx, 0, meId); else - slave.singleMotionEstimation(*this, pme.mode, pme.cuGeom, pme.puIdx, 1, meId - m_slice->m_numRefIdx[0]); + slave.singleMotionEstimation(*this, pme.mode, pme.cuGeom, pme.pu, pme.puIdx, 1, meId - m_slice->m_numRefIdx[0]); meId = -1; pme.m_lock.acquire(); @@ -1920,16 +1921,14 @@ void Search::processPME(PME& pme, Search while (meId >= 0); } -/* this function assumes the caller has configured its MotionEstimation engine with the - * correct source plane and source PU, and has called prepMotionCompensation() to set - * m_puAbsPartIdx, m_puWidth, and m_puHeight */ -void Search::singleMotionEstimation(Search& master, Mode& interMode, const CUGeom& cuGeom, int part, int list, int ref) +void Search::singleMotionEstimation(Search& master, Mode& interMode, const CUGeom& cuGeom, const PredictionUnit& pu, + int part, int list, int ref) { uint32_t bits = master.m_listSelBits[list] + MVP_IDX_BITS; bits += getTUBits(ref, m_slice->m_numRefIdx[list]); MV mvc[(MD_ABOVE_LEFT + 1) * 2 + 1]; - int numMvc = interMode.cu.fillMvpCand(part, m_puAbsPartIdx, list, ref, interMode.amvpCand[list][ref], mvc); + int numMvc = interMode.cu.fillMvpCand(part, pu.puAbsPartIdx, list, ref, interMode.amvpCand[list][ref], mvc); int mvpIdx = 0; int merange = m_param->searchRange; @@ -1949,8 +1948,8 @@ void Search::singleMotionEstimation(Sear interMode.cu.clipMv(mvCand); Yuv& tmpPredYuv = m_rqt[cuGeom.depth].tmpPredYuv; - predInterLumaPixel(tmpPredYuv, *m_slice->m_refPicList[list][ref]->m_reconPic, mvCand); - uint32_t cost = m_me.bufSAD(tmpPredYuv.getLumaAddr(m_puAbsPartIdx), tmpPredYuv.m_size); + predInterLumaPixel(pu, tmpPredYuv, *m_slice->m_refPicList[list][ref]->m_reconPic, mvCand); + uint32_t cost = m_me.bufSAD(tmpPredYuv.getLumaAddr(pu.puAbsPartIdx), tmpPredYuv.m_size); if (bestCost > cost) { @@ -2008,43 +2007,38 @@ void Search::predInterSearch(Mode& inter Yuv& tmpPredYuv = m_rqt[cuGeom.depth].tmpPredYuv; MergeData merge; + uint32_t mrgCost; memset(&merge, 0, sizeof(merge)); for (int puIdx = 0; puIdx < numPart; puIdx++) { MotionData* bestME = interMode.bestME[puIdx]; - - /* sets m_puAbsPartIdx, m_puWidth, m_puHeight */ - initMotionCompensation(cu, cuGeom, puIdx); - - m_me.setSourcePU(*interMode.fencYuv, cu.m_cuAddr, cuGeom.absPartIdx, m_puAbsPartIdx, m_puWidth, m_puHeight); - - uint32_t mrgCost = MAX_UINT; + PredictionUnit pu(cu, cuGeom, puIdx); + + m_me.setSourcePU(*interMode.fencYuv, pu.ctuAddr, pu.cuAbsPartIdx, pu.puAbsPartIdx, pu.width, pu.height); /* find best cost merge candidate. note: 2Nx2N merge and bidir are handled as separate modes */ if (cu.m_partSize[0] != SIZE_2Nx2N) { - merge.absPartIdx = m_puAbsPartIdx; - merge.width = m_puWidth; - merge.height = m_puHeight; - mrgCost = mergeEstimation(cu, cuGeom, puIdx, merge); + mrgCost = mergeEstimation(cu, cuGeom, pu, puIdx, merge); if (bMergeOnly && mrgCost != MAX_UINT) { - cu.m_mergeFlag[m_puAbsPartIdx] = true; - cu.m_mvpIdx[0][m_puAbsPartIdx] = merge.index; // merge candidate ID is stored in L0 MVP idx - cu.setPUInterDir(merge.interDir, m_puAbsPartIdx, puIdx); - cu.setPUMv(0, merge.mvField[0].mv, m_puAbsPartIdx, puIdx); - cu.setPURefIdx(0, merge.mvField[0].refIdx, m_puAbsPartIdx, puIdx); - cu.setPUMv(1, merge.mvField[1].mv, m_puAbsPartIdx, puIdx); - cu.setPURefIdx(1, merge.mvField[1].refIdx, m_puAbsPartIdx, puIdx); + cu.m_mergeFlag[pu.puAbsPartIdx] = true; + cu.m_mvpIdx[0][pu.puAbsPartIdx] = merge.index; // merge candidate ID is stored in L0 MVP idx + cu.setPUInterDir(merge.dir, pu.puAbsPartIdx, puIdx); + cu.setPUMv(0, merge.mvField[0].mv, pu.puAbsPartIdx, puIdx); + cu.setPURefIdx(0, merge.mvField[0].refIdx, pu.puAbsPartIdx, puIdx); + cu.setPUMv(1, merge.mvField[1].mv, pu.puAbsPartIdx, puIdx); + cu.setPURefIdx(1, merge.mvField[1].refIdx, pu.puAbsPartIdx, puIdx); totalmebits += merge.bits; - prepMotionCompensation(cu, cuGeom, puIdx); - motionCompensation(*predYuv, true, bChromaSA8D); + motionCompensation(cu, pu, *predYuv, true, bChromaSA8D); continue; } } + else + mrgCost = MAX_UINT; bestME[0].cost = MAX_UINT; bestME[1].cost = MAX_UINT; @@ -2061,7 +2055,7 @@ void Search::predInterSearch(Mode& inter uint32_t bits = m_listSelBits[l] + MVP_IDX_BITS; bits += getTUBits(ref, numRefIdx[l]); - int numMvc = cu.fillMvpCand(puIdx, m_puAbsPartIdx, l, ref, interMode.amvpCand[l][ref], mvc); + int numMvc = cu.fillMvpCand(puIdx, pu.puAbsPartIdx, l, ref, interMode.amvpCand[l][ref], mvc); // Pick the best possible MVP from AMVP candidates based on least residual int mvpIdx = 0; @@ -2079,8 +2073,8 @@ void Search::predInterSearch(Mode& inter continue; cu.clipMv(mvCand); - predInterLumaPixel(tmpPredYuv, *slice->m_refPicList[l][ref]->m_reconPic, mvCand); - uint32_t cost = m_me.bufSAD(tmpPredYuv.getLumaAddr(m_puAbsPartIdx), tmpPredYuv.m_size); + predInterLumaPixel(pu, tmpPredYuv, *slice->m_refPicList[l][ref]->m_reconPic, mvCand); + uint32_t cost = m_me.bufSAD(tmpPredYuv.getLumaAddr(pu.puAbsPartIdx), tmpPredYuv.m_size); if (bestCost > cost) { @@ -2116,7 +2110,7 @@ void Search::predInterSearch(Mode& inter } else if (bTryDistributed) { - PME pme(*this, interMode, cuGeom, puIdx); + PME pme(*this, interMode, cuGeom, pu, puIdx); pme.m_jobTotal = numME; pme.m_jobAcquired = 1; /* reserve L0-0 */ @@ -2124,7 +2118,7 @@ void Search::predInterSearch(Mode& inter { processPME(pme, *this); - singleMotionEstimation(*this, interMode, cuGeom, puIdx, 0, 0); /* L0-0 */ + singleMotionEstimation(*this, interMode, cuGeom, pu, puIdx, 0, 0); /* L0-0 */ bDoUnidir = false; @@ -2144,7 +2138,7 @@ void Search::predInterSearch(Mode& inter uint32_t bits = m_listSelBits[l] + MVP_IDX_BITS; bits += getTUBits(ref, numRefIdx[l]); - int numMvc = cu.fillMvpCand(puIdx, m_puAbsPartIdx, l, ref, interMode.amvpCand[l][ref], mvc); + int numMvc = cu.fillMvpCand(puIdx, pu.puAbsPartIdx, l, ref, interMode.amvpCand[l][ref], mvc); // Pick the best possible MVP from AMVP candidates based on least residual int mvpIdx = 0; @@ -2162,8 +2156,8 @@ void Search::predInterSearch(Mode& inter continue; cu.clipMv(mvCand); - predInterLumaPixel(tmpPredYuv, *slice->m_refPicList[l][ref]->m_reconPic, mvCand); - uint32_t cost = m_me.bufSAD(tmpPredYuv.getLumaAddr(m_puAbsPartIdx), tmpPredYuv.m_size); + predInterLumaPixel(pu, tmpPredYuv, *slice->m_refPicList[l][ref]->m_reconPic, mvCand); + uint32_t cost = m_me.bufSAD(tmpPredYuv.getLumaAddr(pu.puAbsPartIdx), tmpPredYuv.m_size); if (bestCost > cost) { @@ -2204,7 +2198,7 @@ void Search::predInterSearch(Mode& inter int bidirBits = 0; if (slice->isInterB() && !cu.isBipredRestriction() && /* biprediction is possible for this PU */ - cu.m_partSize[m_puAbsPartIdx] != SIZE_2Nx2N && /* 2Nx2N biprediction is handled elsewhere */ + cu.m_partSize[pu.puAbsPartIdx] != SIZE_2Nx2N && /* 2Nx2N biprediction is handled elsewhere */ bestME[0].cost != MAX_UINT && bestME[1].cost != MAX_UINT) { bidir[0] = bestME[0]; @@ -2214,16 +2208,14 @@ void Search::predInterSearch(Mode& inter if (m_me.bChromaSATD) { - cu.m_mv[0][m_puAbsPartIdx] = bidir[0].mv; - cu.m_refIdx[0][m_puAbsPartIdx] = (int8_t)bidir[0].ref; - cu.m_mv[1][m_puAbsPartIdx] = bidir[1].mv; - cu.m_refIdx[1][m_puAbsPartIdx] = (int8_t)bidir[1].ref; - - prepMotionCompensation(cu, cuGeom, puIdx); - motionCompensation(tmpPredYuv, true, true); - - satdCost = m_me.bufSATD(tmpPredYuv.getLumaAddr(m_puAbsPartIdx), tmpPredYuv.m_size) + - m_me.bufChromaSATD(tmpPredYuv, m_puAbsPartIdx); + cu.m_mv[0][pu.puAbsPartIdx] = bidir[0].mv; + cu.m_refIdx[0][pu.puAbsPartIdx] = (int8_t)bidir[0].ref; + cu.m_mv[1][pu.puAbsPartIdx] = bidir[1].mv; + cu.m_refIdx[1][pu.puAbsPartIdx] = (int8_t)bidir[1].ref; + motionCompensation(cu, pu, tmpPredYuv, true, true); + + satdCost = m_me.bufSATD(tmpPredYuv.getLumaAddr(pu.puAbsPartIdx), tmpPredYuv.m_size) + + m_me.bufChromaSATD(tmpPredYuv, pu.puAbsPartIdx); } else { @@ -2232,11 +2224,11 @@ void Search::predInterSearch(Mode& inter Yuv* bidirYuv = m_rqt[cuGeom.depth].bidirPredYuv; /* Generate reference subpels */ - predInterLumaPixel(bidirYuv[0], *refPic0, bestME[0].mv); - predInterLumaPixel(bidirYuv[1], *refPic1, bestME[1].mv); - - primitives.pu[m_me.partEnum].pixelavg_pp(tmpPredYuv.m_buf[0], tmpPredYuv.m_size, bidirYuv[0].getLumaAddr(m_puAbsPartIdx), bidirYuv[0].m_size, - bidirYuv[1].getLumaAddr(m_puAbsPartIdx), bidirYuv[1].m_size, 32); + predInterLumaPixel(pu, bidirYuv[0], *refPic0, bestME[0].mv); + predInterLumaPixel(pu, bidirYuv[1], *refPic1, bestME[1].mv); + + primitives.pu[m_me.partEnum].pixelavg_pp(tmpPredYuv.m_buf[0], tmpPredYuv.m_size, bidirYuv[0].getLumaAddr(pu.puAbsPartIdx), bidirYuv[0].m_size, + bidirYuv[1].getLumaAddr(pu.puAbsPartIdx), bidirYuv[1].m_size, 32); satdCost = m_me.bufSATD(tmpPredYuv.m_buf[0], tmpPredYuv.m_size); } @@ -2263,21 +2255,19 @@ void Search::predInterSearch(Mode& inter /* coincident blocks of the two reference pictures */ if (m_me.bChromaSATD) { - cu.m_mv[0][m_puAbsPartIdx] = mvzero; - cu.m_refIdx[0][m_puAbsPartIdx] = (int8_t)bidir[0].ref; - cu.m_mv[1][m_puAbsPartIdx] = mvzero; - cu.m_refIdx[1][m_puAbsPartIdx] = (int8_t)bidir[1].ref; - - prepMotionCompensation(cu, cuGeom, puIdx); - motionCompensation(tmpPredYuv, true, true); - - satdCost = m_me.bufSATD(tmpPredYuv.getLumaAddr(m_puAbsPartIdx), tmpPredYuv.m_size) + - m_me.bufChromaSATD(tmpPredYuv, m_puAbsPartIdx); + cu.m_mv[0][pu.puAbsPartIdx] = mvzero; + cu.m_refIdx[0][pu.puAbsPartIdx] = (int8_t)bidir[0].ref; + cu.m_mv[1][pu.puAbsPartIdx] = mvzero; + cu.m_refIdx[1][pu.puAbsPartIdx] = (int8_t)bidir[1].ref; + motionCompensation(cu, pu, tmpPredYuv, true, true); + + satdCost = m_me.bufSATD(tmpPredYuv.getLumaAddr(pu.puAbsPartIdx), tmpPredYuv.m_size) + + m_me.bufChromaSATD(tmpPredYuv, pu.puAbsPartIdx); } else { - const pixel* ref0 = m_slice->m_mref[0][bestME[0].ref].getLumaAddr(cu.m_cuAddr, cuGeom.absPartIdx + m_puAbsPartIdx); - const pixel* ref1 = m_slice->m_mref[1][bestME[1].ref].getLumaAddr(cu.m_cuAddr, cuGeom.absPartIdx + m_puAbsPartIdx); + const pixel* ref0 = m_slice->m_mref[0][bestME[0].ref].getLumaAddr(pu.ctuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx); + const pixel* ref1 = m_slice->m_mref[1][bestME[1].ref].getLumaAddr(pu.ctuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx); intptr_t refStride = slice->m_mref[0][0].lumaStride; primitives.pu[m_me.partEnum].pixelavg_pp(tmpPredYuv.m_buf[0], tmpPredYuv.m_size, ref0, refStride, ref1, refStride, 32); @@ -2315,13 +2305,13 @@ void Search::predInterSearch(Mode& inter /* select best option and store into CU */ if (mrgCost < bidirCost && mrgCost < bestME[0].cost && mrgCost < bestME[1].cost) { - cu.m_mergeFlag[m_puAbsPartIdx] = true; - cu.m_mvpIdx[0][m_puAbsPartIdx] = merge.index; // merge candidate ID is stored in L0 MVP idx - cu.setPUInterDir(merge.interDir, m_puAbsPartIdx, puIdx); - cu.setPUMv(0, merge.mvField[0].mv, m_puAbsPartIdx, puIdx); - cu.setPURefIdx(0, merge.mvField[0].refIdx, m_puAbsPartIdx, puIdx); - cu.setPUMv(1, merge.mvField[1].mv, m_puAbsPartIdx, puIdx); - cu.setPURefIdx(1, merge.mvField[1].refIdx, m_puAbsPartIdx, puIdx); + cu.m_mergeFlag[pu.puAbsPartIdx] = true; + cu.m_mvpIdx[0][pu.puAbsPartIdx] = merge.index; /* merge candidate ID is stored in L0 MVP idx */ + cu.setPUInterDir(merge.dir, pu.puAbsPartIdx, puIdx); + cu.setPUMv(0, merge.mvField[0].mv, pu.puAbsPartIdx, puIdx); + cu.setPURefIdx(0, merge.mvField[0].refIdx, pu.puAbsPartIdx, puIdx); + cu.setPUMv(1, merge.mvField[1].mv, pu.puAbsPartIdx, puIdx); + cu.setPURefIdx(1, merge.mvField[1].refIdx, pu.puAbsPartIdx, puIdx); totalmebits += merge.bits; } @@ -2329,17 +2319,17 @@ void Search::predInterSearch(Mode& inter { lastMode = 2; - cu.m_mergeFlag[m_puAbsPartIdx] = false; - cu.setPUInterDir(3, m_puAbsPartIdx, puIdx); - cu.setPUMv(0, bidir[0].mv, m_puAbsPartIdx, puIdx); - cu.setPURefIdx(0, bestME[0].ref, m_puAbsPartIdx, puIdx); - cu.m_mvd[0][m_puAbsPartIdx] = bidir[0].mv - bidir[0].mvp; - cu.m_mvpIdx[0][m_puAbsPartIdx] = bidir[0].mvpIdx; - - cu.setPUMv(1, bidir[1].mv, m_puAbsPartIdx, puIdx); - cu.setPURefIdx(1, bestME[1].ref, m_puAbsPartIdx, puIdx); - cu.m_mvd[1][m_puAbsPartIdx] = bidir[1].mv - bidir[1].mvp; - cu.m_mvpIdx[1][m_puAbsPartIdx] = bidir[1].mvpIdx; + cu.m_mergeFlag[pu.puAbsPartIdx] = false; + cu.setPUInterDir(3, pu.puAbsPartIdx, puIdx); + cu.setPUMv(0, bidir[0].mv, pu.puAbsPartIdx, puIdx); + cu.setPURefIdx(0, bestME[0].ref, pu.puAbsPartIdx, puIdx); + cu.m_mvd[0][pu.puAbsPartIdx] = bidir[0].mv - bidir[0].mvp; + cu.m_mvpIdx[0][pu.puAbsPartIdx] = bidir[0].mvpIdx; + + cu.setPUMv(1, bidir[1].mv, pu.puAbsPartIdx, puIdx); + cu.setPURefIdx(1, bestME[1].ref, pu.puAbsPartIdx, puIdx); + cu.m_mvd[1][pu.puAbsPartIdx] = bidir[1].mv - bidir[1].mvp; + cu.m_mvpIdx[1][pu.puAbsPartIdx] = bidir[1].mvpIdx; totalmebits += bidirBits; } @@ -2347,15 +2337,15 @@ void Search::predInterSearch(Mode& inter { lastMode = 0; - cu.m_mergeFlag[m_puAbsPartIdx] = false; - cu.setPUInterDir(1, m_puAbsPartIdx, puIdx); - cu.setPUMv(0, bestME[0].mv, m_puAbsPartIdx, puIdx); - cu.setPURefIdx(0, bestME[0].ref, m_puAbsPartIdx, puIdx); - cu.m_mvd[0][m_puAbsPartIdx] = bestME[0].mv - bestME[0].mvp; - cu.m_mvpIdx[0][m_puAbsPartIdx] = bestME[0].mvpIdx; - - cu.setPURefIdx(1, REF_NOT_VALID, m_puAbsPartIdx, puIdx); - cu.setPUMv(1, mvzero, m_puAbsPartIdx, puIdx); + cu.m_mergeFlag[pu.puAbsPartIdx] = false; + cu.setPUInterDir(1, pu.puAbsPartIdx, puIdx); + cu.setPUMv(0, bestME[0].mv, pu.puAbsPartIdx, puIdx); + cu.setPURefIdx(0, bestME[0].ref, pu.puAbsPartIdx, puIdx); + cu.m_mvd[0][pu.puAbsPartIdx] = bestME[0].mv - bestME[0].mvp; + cu.m_mvpIdx[0][pu.puAbsPartIdx] = bestME[0].mvpIdx; + + cu.setPURefIdx(1, REF_NOT_VALID, pu.puAbsPartIdx, puIdx); + cu.setPUMv(1, mvzero, pu.puAbsPartIdx, puIdx); totalmebits += bestME[0].bits; } @@ -2363,21 +2353,20 @@ void Search::predInterSearch(Mode& inter { lastMode = 1; - cu.m_mergeFlag[m_puAbsPartIdx] = false; - cu.setPUInterDir(2, m_puAbsPartIdx, puIdx); - cu.setPUMv(1, bestME[1].mv, m_puAbsPartIdx, puIdx); - cu.setPURefIdx(1, bestME[1].ref, m_puAbsPartIdx, puIdx); - cu.m_mvd[1][m_puAbsPartIdx] = bestME[1].mv - bestME[1].mvp; - cu.m_mvpIdx[1][m_puAbsPartIdx] = bestME[1].mvpIdx; - - cu.setPURefIdx(0, REF_NOT_VALID, m_puAbsPartIdx, puIdx); - cu.setPUMv(0, mvzero, m_puAbsPartIdx, puIdx); + cu.m_mergeFlag[pu.puAbsPartIdx] = false; + cu.setPUInterDir(2, pu.puAbsPartIdx, puIdx); + cu.setPUMv(1, bestME[1].mv, pu.puAbsPartIdx, puIdx); + cu.setPURefIdx(1, bestME[1].ref, pu.puAbsPartIdx, puIdx); + cu.m_mvd[1][pu.puAbsPartIdx] = bestME[1].mv - bestME[1].mvp; + cu.m_mvpIdx[1][pu.puAbsPartIdx] = bestME[1].mvpIdx; + + cu.setPURefIdx(0, REF_NOT_VALID, pu.puAbsPartIdx, puIdx); + cu.setPUMv(0, mvzero, pu.puAbsPartIdx, puIdx); totalmebits += bestME[1].bits; } - prepMotionCompensation(cu, cuGeom, puIdx); - motionCompensation(*predYuv, true, bChromaSA8D); + motionCompensation(cu, pu, *predYuv, true, bChromaSA8D); } interMode.sa8dBits += totalmebits;
--- a/source/encoder/search.h Tue Feb 24 17:26:37 2015 +0530 +++ b/source/encoder/search.h Wed Feb 25 10:05:40 2015 +0530 @@ -288,9 +288,10 @@ public: Search& master; Mode& mode; const CUGeom& cuGeom; + const PredictionUnit& pu; int puIdx; - PME(Search& s, Mode& m, const CUGeom& g, int p) : master(s), mode(m), cuGeom(g), puIdx(p) {} + PME(Search& s, Mode& m, const CUGeom& g, const PredictionUnit& u, int p) : master(s), mode(m), cuGeom(g), pu(u), puIdx(p) {} void processTasks(int workerThreadId); @@ -300,7 +301,7 @@ public: }; void processPME(PME& pme, Search& slave); - void singleMotionEstimation(Search& master, Mode& interMode, const CUGeom& cuGeom, int part, int list, int ref); + void singleMotionEstimation(Search& master, Mode& interMode, const CUGeom& cuGeom, const PredictionUnit& pu, int part, int list, int ref); protected: @@ -347,21 +348,11 @@ protected: // reshuffle CBF flags after coding a pair of 4:2:2 chroma blocks void offsetSubTUCBFs(CUData& cu, TextType ttype, uint32_t tuDepth, uint32_t absPartIdx); + /* output of mergeEstimation, best merge candidate */ struct MergeData { - /* merge candidate data, cached between calls to mergeEstimation */ - MVField mvFieldNeighbours[MRG_MAX_NUM_CANDS][2]; - uint8_t interDirNeighbours[MRG_MAX_NUM_CANDS]; - uint32_t maxNumMergeCand; - - /* data updated for each partition */ - uint32_t absPartIdx; - int width; - int height; - - /* outputs */ MVField mvField[2]; - uint32_t interDir; + uint32_t dir; uint32_t index; uint32_t bits; }; @@ -369,8 +360,8 @@ protected: /* inter/ME helper functions */ void checkBestMVP(MV* amvpCand, MV cMv, MV& mvPred, int& mvpIdx, uint32_t& outBits, uint32_t& outCost) const; void setSearchRange(const CUData& cu, MV mvp, int merange, MV& mvmin, MV& mvmax) const; - uint32_t mergeEstimation(CUData& cu, const CUGeom& cuGeom, int partIdx, MergeData& m); - static void getBlkBits(PartSize cuMode, bool bPSlice, int partIdx, uint32_t lastMode, uint32_t blockBit[3]); + uint32_t mergeEstimation(CUData& cu, const CUGeom& cuGeom, const PredictionUnit& pu, int puIdx, MergeData& m); + static void getBlkBits(PartSize cuMode, bool bPSlice, int puIdx, uint32_t lastMode, uint32_t blockBit[3]); /* intra helper functions */ enum { MAX_RD_INTRA_MODES = 16 };
--- a/source/encoder/slicetype.cpp Tue Feb 24 17:26:37 2015 +0530 +++ b/source/encoder/slicetype.cpp Wed Feb 25 10:05:40 2015 +0530 @@ -210,7 +210,7 @@ void LookaheadTLD::calcAdaptiveQuantFram void LookaheadTLD::lowresIntraEstimate(Lowres& fenc) { ALIGN_VAR_32(pixel, prediction[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE]); - ALIGN_VAR_32(pixel, fencIntra[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE]); + pixel fencIntra[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE]; pixel neighbours[2][X265_LOWRES_CU_SIZE * 4 + 1]; const int lookAheadLambda = (int)x265_lambda_tab[X265_LOOKAHEAD_QP]; @@ -224,6 +224,9 @@ void LookaheadTLD::lowresIntraEstimate(L pixel *planar = (cuSize >= 8) ? neighbours[1] : neighbours[0]; pixelcmp_t satd = primitives.pu[sizeIdx].satd; + fenc.costEst[0][0] = 0; + fenc.costEstAq[0][0] = 0; + for (int cuY = 0; cuY < heightInCU; cuY++) { fenc.rowSatds[0][0][cuY] = 0; @@ -239,7 +242,7 @@ void LookaheadTLD::lowresIntraEstimate(L memcpy(neighbours[0], pixCur - 1 - fenc.lumaStride, (cuSize + 1) * sizeof(pixel)); for (int i = 1; i < cuSize + 1; i++) - neighbours[0][i + cuSize2] = pixCur[-1 - fenc.lumaStride + i * fenc.lumaStride]; /* todo: fixme */ + neighbours[0][i + cuSize2] = pixCur[-1 - fenc.lumaStride + i * fenc.lumaStride]; /* TODO: gcc warning */ for (int i = 0; i < cuSize; i++) { @@ -264,7 +267,7 @@ void LookaheadTLD::lowresIntraEstimate(L uint32_t ilowmode = 0; /* DC and planar */ - primitives.cu[sizeIdx].intra_pred[DC_IDX](prediction, cuSize, neighbours[0], 0, (cuSize <= 16)); + primitives.cu[sizeIdx].intra_pred[DC_IDX](prediction, cuSize, neighbours[0], 0, cuSize <= 16); cost = satd(fencIntra, cuSize, prediction, cuSize); COPY2_IF_LT(icost, cost, ilowmode, DC_IDX); @@ -306,8 +309,20 @@ void LookaheadTLD::lowresIntraEstimate(L fenc.lowresCosts[0][0][cuXY] = (uint16_t)(X265_MIN(icost, LOWRES_COST_MASK) | (0 << LOWRES_COST_SHIFT)); fenc.intraCost[cuXY] = icost; fenc.intraMode[cuXY] = (uint8_t)ilowmode; - fenc.rowSatds[0][0][cuY] += icost; - fenc.costEst[0][0] += icost; + + /* do not include edge blocks in the frame cost estimates, they are not very accurate */ + const bool bFrameScoreCU = (cuX > 0 && cuX < widthInCU - 1 && + cuY > 0 && cuY < heightInCU - 1) || widthInCU <= 2 || heightInCU <= 2; + + int icostAq = (bFrameScoreCU && fenc.invQscaleFactor) ? ((icost * fenc.invQscaleFactor[cuXY] + 128) >> 8) : icost; + + if (bFrameScoreCU) + { + fenc.costEst[0][0] += icost; + fenc.costEstAq[0][0] += icostAq; + } + + fenc.rowSatds[0][0][cuY] += icostAq; } } } @@ -500,7 +515,7 @@ Lookahead::Lookahead(x265_param *param, * do much unnecessary work, some frame cost estimates are not needed, so if * the thread pool is small we disable this feature after the initial burst * of work */ - m_bBatchFrameCosts = m_bBatchMotionSearch; + m_bBatchFrameCosts = 0 && m_bBatchMotionSearch; /* temporarily disabled */ if (m_bBatchMotionSearch && m_pool->m_numWorkers > 12) { @@ -1635,10 +1650,10 @@ void Lookahead::estimateCUPropagate(Lowr if (!referenced) memset(frames[b]->propagateCost, 0, m_widthInCU * sizeof(uint16_t)); - int32_t StrideInCU = m_widthInCU; + int32_t strideInCU = m_widthInCU; for (uint16_t blocky = 0; blocky < m_heightInCU; blocky++) { - int cuIndex = blocky * StrideInCU; + int cuIndex = blocky * strideInCU; primitives.propagateCost(m_scratch, propagateCost, frames[b]->intraCost + cuIndex, frames[b]->lowresCosts[b - p0][p1 - b] + cuIndex, frames[b]->invQscaleFactor + cuIndex, &fpsFactor, m_widthInCU); @@ -1676,10 +1691,10 @@ void Lookahead::estimateCUPropagate(Lowr int32_t y = mvs[list][cuIndex].y; int32_t cux = (x >> 5) + blockx; int32_t cuy = (y >> 5) + blocky; - int32_t idx0 = cux + cuy * StrideInCU; + int32_t idx0 = cux + cuy * strideInCU; int32_t idx1 = idx0 + 1; - int32_t idx2 = idx0 + StrideInCU; - int32_t idx3 = idx0 + StrideInCU + 1; + int32_t idx2 = idx0 + strideInCU; + int32_t idx3 = idx0 + strideInCU + 1; x &= 31; y &= 31; int32_t idx0weight = (32 - y) * (32 - x); @@ -1781,7 +1796,7 @@ int64_t CostEstimateGroup::singleCost(in return estimateFrameCost(tld, p0, p1, b, intraPenalty); } -void CostEstimateGroup::add(int p0, int p1, int b, bool intraPenalty) +void CostEstimateGroup::add(int p0, int p1, int b) { X265_CHECK(m_batchMode || !m_jobTotal, "single CostEstimateGroup instance cannot mix batch modes\n"); m_batchMode = true; @@ -1790,7 +1805,6 @@ void CostEstimateGroup::add(int p0, int e.p0 = p0; e.p1 = p1; e.b = b; - e.bIntraPenalty = intraPenalty; if (m_jobTotal == MAX_BATCH_SIZE) finishBatch(); @@ -1828,7 +1842,7 @@ void CostEstimateGroup::processTasks(int ProfileScopeEvent(estCostSingle); Estimate& e = m_estimates[i]; - estimateFrameCost(tld, e.p0, e.p1, e.b, e.bIntraPenalty); + estimateFrameCost(tld, e.p0, e.p1, e.b, false); } else { @@ -1888,6 +1902,7 @@ int64_t CostEstimateGroup::estimateFrame * going to need motion searches or bidir measurements */ memset(&m_slice, 0, sizeof(Slice) * m_lookahead.m_numCoopSlices); + memset(fenc->rowSatds, 0, sizeof(fenc->rowSatds[0]) * m_lookahead.m_heightInCU); m_lock.acquire(); X265_CHECK(!m_batchMode, "single CostEstimateGroup instance cannot mix batch modes\n");
--- a/source/encoder/slicetype.h Tue Feb 24 17:26:37 2015 +0530 +++ b/source/encoder/slicetype.h Wed Feb 25 10:05:40 2015 +0530 @@ -208,10 +208,9 @@ public: struct Estimate { int p0, b, p1; - bool bIntraPenalty; } m_estimates[MAX_BATCH_SIZE]; - void add(int p0, int p1, int b, bool intraPenalty = false); + void add(int p0, int p1, int b); void finishBatch(); protected: