changeset 1231:4dd4c610646f

Merged in deepthidevaki/xhevc_deepthid (pull request #124) Modifications to subpel generation
author nandaku2 <deepthi@multicorewareinc.com>
date Wed, 08 May 2013 14:53:41 +0530
parents bad457c087fd (current diff) f928fa3e7405 (diff)
children 22363860b159 321ae9002b1e
files
diffstat 4 files changed, 41 insertions(+-), 57 deletions(-) [+]
line wrap: on
line diff
--- a/source/Lib/TLibCommon/TComPicYuv.cpp	Wed May 08 14:49:56 2013 +0530
+++ b/source/Lib/TLibCommon/TComPicYuv.cpp	Wed May 08 14:53:41 2013 +0530
@@ -309,23 +309,22 @@ Void TComPicYuv::xExtendPicCompBorder(Pe
 
 Void TComPicYuv::generateLumaHQpel()
 {
-    Int width      = m_iPicWidth; // + (m_iLumaMarginX << 1)-8;
-    Int height     =  m_iPicHeight; //+ (m_iLumaMarginY << 1)-8;
+    Int width      = m_iPicWidth;
+    Int height     =  m_iPicHeight;
     Int srcStride  =  getStride();
 
     int tmpMarginX = 4; //Generate subpels for entire frame with a margin of tmpMargin
     int tmpMarginY = 4;
 
-    TShortYUV filteredBlockTmp[4];
-
-    int offsetToLuma = m_iLumaMarginY   * getStride()  + m_iLumaMarginX;
+    short* filteredBlockTmp[4];
 
     for (int i = 0; i < 4; i++)
     {
-        filteredBlockTmp[i].create((m_iPicWidth + (m_iLumaMarginX << 1)), (m_iPicHeight + (m_iLumaMarginY << 1)));
+        filteredBlockTmp[i] = (short*)xMalloc(short, (m_iPicWidth + (tmpMarginX << 2)) * (m_iPicHeight + (tmpMarginY << 2)));
     }
 
-    Int intStride = filteredBlockTmp[0].getWidth();
+    Int intStride = (m_iPicWidth + (tmpMarginX << 2)); //filteredBlockTmp[0].getWidth();
+    int offsetToLuma = (tmpMarginY << 1)   * intStride  + (tmpMarginX << 1);
     Int dstStride = srcStride;
     Pel *srcPtr;    //Contains raw pixels
     Short *intPtr;  // Intermediate results in short
@@ -337,9 +336,13 @@ Void TComPicYuv::generateLumaHQpel()
     srcPtr = getLumaAddr() - (tmpMarginY + 4) * srcStride - (tmpMarginX + 4);
     dstPtr = m_filteredBlockOrgY[0][0] - (tmpMarginY + 4) * dstStride - (tmpMarginX + 4);
 
+#if ENABLE_PRIMITIVES
+    x265::primitives.cpyblock(width + (tmpMarginX << 1) + 4, height + (tmpMarginY << 1) + 4, (pixel*)dstPtr, dstStride, (pixel*)srcPtr, srcStride);
+#else
     filterCopy(srcPtr, srcStride, dstPtr, dstStride, width + (tmpMarginX << 1) + 4, height + (tmpMarginY << 1) + 4);
+#endif
 
-    intPtr = filteredBlockTmp[0].getLumaAddr() + offsetToLuma - (tmpMarginY + 4) * intStride - (tmpMarginX + 4);
+    intPtr = filteredBlockTmp[0] + offsetToLuma - (tmpMarginY + 4) * intStride - (tmpMarginX + 4);
 #if ENABLE_PRIMITIVES
     primitives.ipfilterConvert_p_s(g_bitDepthY, (pixel*)srcPtr, srcStride, intPtr,
                                    intStride, width + (tmpMarginX << 1) + 4, height + (tmpMarginY << 1) + 4);
@@ -348,7 +351,7 @@ Void TComPicYuv::generateLumaHQpel()
                             intStride, width + (tmpMarginX << 1) + 4, height + (tmpMarginY << 1) + 4);
 #endif
 
-    intPtr = filteredBlockTmp[0].getLumaAddr() + offsetToLuma - tmpMarginY * intStride - tmpMarginX;
+    intPtr = filteredBlockTmp[0] + offsetToLuma - tmpMarginY * intStride - tmpMarginX;
     dstPtr = m_filteredBlockOrgY[2][0] - tmpMarginY * dstStride - tmpMarginX;
 #if ENABLE_PRIMITIVES
     primitives.ipFilter_s_p[FILTER_V_S_P_8](g_bitDepthY, intPtr, intStride, (pixel*)dstPtr,
@@ -358,14 +361,14 @@ Void TComPicYuv::generateLumaHQpel()
                                          dstStride, width + (tmpMarginX << 1), height + (tmpMarginY << 1), m_lumaFilter[2]);
 #endif
 
-    intPtr = filteredBlockTmp[2].getLumaAddr() + offsetToLuma - (tmpMarginY + 4) * intStride - (tmpMarginX + 4);
+    intPtr = filteredBlockTmp[2] + offsetToLuma - (tmpMarginY + 4) * intStride - (tmpMarginX + 4);
 #if ENABLE_PRIMITIVES
     primitives.ipFilter_p_s[FILTER_H_P_S_8](g_bitDepthY, (pixel*)srcPtr, srcStride, intPtr, intStride, width + (tmpMarginX << 1) + 4, height + (tmpMarginY << 1) + 4,  m_lumaFilter[2]);
 #else
     filterHorizontal_pel_short<NTAPS_LUMA>(g_bitDepthY, srcPtr, srcStride, intPtr, intStride, width + (tmpMarginX << 1) + 4, height + (tmpMarginY << 1) + 4,  m_lumaFilter[2]);
 #endif
 
-    intPtr = filteredBlockTmp[2].getLumaAddr() + offsetToLuma - tmpMarginY * intStride - tmpMarginX;
+    intPtr = filteredBlockTmp[2] + offsetToLuma - tmpMarginY * intStride - tmpMarginX;
     dstPtr = m_filteredBlockOrgY[0][2] - tmpMarginY * dstStride - tmpMarginX;
 #if ENABLE_PRIMITIVES
     primitives.ipfilterConvert_s_p(g_bitDepthY, intPtr, intStride, (pixel*)dstPtr, dstStride, width + (tmpMarginX << 1), height + (tmpMarginY << 1));
@@ -373,7 +376,7 @@ Void TComPicYuv::generateLumaHQpel()
     filterConvertShortToPel(g_bitDepthY, intPtr, intStride, dstPtr, dstStride, width + (tmpMarginX << 1), height + (tmpMarginY << 1));
 #endif
 
-    intPtr = filteredBlockTmp[2].getLumaAddr() + offsetToLuma - tmpMarginY * intStride - tmpMarginX;
+    intPtr = filteredBlockTmp[2] + offsetToLuma - tmpMarginY * intStride - tmpMarginX;
     dstPtr = m_filteredBlockOrgY[2][2] - tmpMarginY * dstStride - tmpMarginX;
 #if ENABLE_PRIMITIVES
     primitives.ipFilter_s_p[FILTER_V_S_P_8](g_bitDepthY, intPtr, intStride, (pixel*)dstPtr, dstStride, width + (tmpMarginX << 1), height + (tmpMarginY << 1), m_lumaFilter[2]);
@@ -383,7 +386,7 @@ Void TComPicYuv::generateLumaHQpel()
 
     /* Generate QPels */
     srcPtr = getLumaAddr() - (tmpMarginY + 4) * srcStride - (tmpMarginX + 4);
-    intPtr = filteredBlockTmp[1].getLumaAddr() + offsetToLuma - (tmpMarginY + 4) * intStride - (tmpMarginX + 4);
+    intPtr = filteredBlockTmp[1] + offsetToLuma - (tmpMarginY + 4) * intStride - (tmpMarginX + 4);
 #if ENABLE_PRIMITIVES
     primitives.ipFilter_p_s[FILTER_H_P_S_8](g_bitDepthY, (pixel*)srcPtr, srcStride, intPtr, intStride, width + (tmpMarginX << 1) + 4, height + (tmpMarginY << 1) + 4, m_lumaFilter[1]);
 #else
@@ -391,7 +394,7 @@ Void TComPicYuv::generateLumaHQpel()
 #endif
 
     srcPtr = getLumaAddr() - (tmpMarginY + 4) * srcStride - (tmpMarginX + 4);
-    intPtr = filteredBlockTmp[3].getLumaAddr() + offsetToLuma - (tmpMarginY + 4) * intStride - (tmpMarginX + 4);
+    intPtr = filteredBlockTmp[3] + offsetToLuma - (tmpMarginY + 4) * intStride - (tmpMarginX + 4);
 #if ENABLE_PRIMITIVES
     primitives.ipFilter_p_s[FILTER_H_P_S_8](g_bitDepthY, (pixel*)srcPtr, srcStride, intPtr, intStride, width + (tmpMarginX << 1) + 4, height + (tmpMarginY << 1) + 4, m_lumaFilter[3]);
 #else
@@ -399,7 +402,7 @@ Void TComPicYuv::generateLumaHQpel()
 #endif
 
     // Generate @ 1,1
-    intPtr = filteredBlockTmp[1].getLumaAddr()  + offsetToLuma - tmpMarginY * intStride - tmpMarginX;
+    intPtr = filteredBlockTmp[1]  + offsetToLuma - tmpMarginY * intStride - tmpMarginX;
     dstPtr = m_filteredBlockOrgY[1][1] - tmpMarginY * dstStride - tmpMarginX;
 #if ENABLE_PRIMITIVES
     primitives.ipFilter_s_p[FILTER_V_S_P_8](g_bitDepthY, intPtr, intStride, (pixel*)dstPtr, dstStride, width + (tmpMarginX << 1), height + (tmpMarginY << 1), m_lumaFilter[1]);
@@ -408,7 +411,7 @@ Void TComPicYuv::generateLumaHQpel()
 #endif
 
     // Generate @ 3,1
-    intPtr = filteredBlockTmp[1].getLumaAddr() + offsetToLuma - tmpMarginY * intStride - tmpMarginX;
+    intPtr = filteredBlockTmp[1] + offsetToLuma - tmpMarginY * intStride - tmpMarginX;
     dstPtr = m_filteredBlockOrgY[3][1] - tmpMarginY * dstStride - tmpMarginX;
 #if ENABLE_PRIMITIVES
     primitives.ipFilter_s_p[FILTER_V_S_P_8](g_bitDepthY, intPtr, intStride, (pixel*)dstPtr, dstStride, width + (tmpMarginX << 1), height + (tmpMarginY << 1), m_lumaFilter[3]);
@@ -417,7 +420,7 @@ Void TComPicYuv::generateLumaHQpel()
 #endif
 
     // Generate @ 2,1
-    intPtr = filteredBlockTmp[1].getLumaAddr() + offsetToLuma - tmpMarginY * intStride - tmpMarginX;
+    intPtr = filteredBlockTmp[1] + offsetToLuma - tmpMarginY * intStride - tmpMarginX;
     dstPtr = m_filteredBlockOrgY[2][1] - tmpMarginY * dstStride - tmpMarginX;
 #if ENABLE_PRIMITIVES
     primitives.ipFilter_s_p[FILTER_V_S_P_8](g_bitDepthY, intPtr, intStride, (pixel*)dstPtr, dstStride, width + (tmpMarginX << 1), height + (tmpMarginY << 1), m_lumaFilter[2]);
@@ -426,7 +429,7 @@ Void TComPicYuv::generateLumaHQpel()
 #endif
 
     // Generate @ 2,3
-    intPtr = filteredBlockTmp[3].getLumaAddr() + offsetToLuma - tmpMarginY * intStride - tmpMarginX;
+    intPtr = filteredBlockTmp[3] + offsetToLuma - tmpMarginY * intStride - tmpMarginX;
     dstPtr = m_filteredBlockOrgY[2][3] - tmpMarginY * dstStride - tmpMarginX;
 
 #if ENABLE_PRIMITIVES
@@ -436,7 +439,7 @@ Void TComPicYuv::generateLumaHQpel()
 #endif
 
     // Generate @ 0,1
-    intPtr = filteredBlockTmp[1].getLumaAddr()  + offsetToLuma - tmpMarginY * intStride - tmpMarginX;
+    intPtr = filteredBlockTmp[1]  + offsetToLuma - tmpMarginY * intStride - tmpMarginX;
     dstPtr = m_filteredBlockOrgY[0][1]  - tmpMarginY * dstStride - tmpMarginX;
 #if ENABLE_PRIMITIVES
     primitives.ipfilterConvert_s_p(g_bitDepthY, intPtr, intStride, (pixel*)dstPtr, dstStride, width + (tmpMarginX << 1), height + (tmpMarginY << 1));
@@ -445,7 +448,7 @@ Void TComPicYuv::generateLumaHQpel()
 #endif
 
     // Generate @ 0,3
-    intPtr = filteredBlockTmp[3].getLumaAddr() + offsetToLuma - tmpMarginY * intStride - tmpMarginX;
+    intPtr = filteredBlockTmp[3] + offsetToLuma - tmpMarginY * intStride - tmpMarginX;
     dstPtr = m_filteredBlockOrgY[0][3] - tmpMarginY * dstStride - tmpMarginX;
 #if ENABLE_PRIMITIVES
     primitives.ipfilterConvert_s_p(g_bitDepthY, intPtr, intStride, (pixel*)dstPtr, dstStride, width + (tmpMarginX << 1), height + (tmpMarginY << 1));
@@ -454,7 +457,7 @@ Void TComPicYuv::generateLumaHQpel()
 #endif
 
     // Generate @ 1,2
-    intPtr = filteredBlockTmp[2].getLumaAddr() + offsetToLuma - tmpMarginY * intStride - tmpMarginX;
+    intPtr = filteredBlockTmp[2] + offsetToLuma - tmpMarginY * intStride - tmpMarginX;
     dstPtr = m_filteredBlockOrgY[1][2] - tmpMarginY * dstStride - tmpMarginX;
 
 #if ENABLE_PRIMITIVES
@@ -464,7 +467,7 @@ Void TComPicYuv::generateLumaHQpel()
 #endif
 
     // Generate @ 3,2
-    intPtr = filteredBlockTmp[2].getLumaAddr() + offsetToLuma - tmpMarginY * intStride - tmpMarginX;
+    intPtr = filteredBlockTmp[2] + offsetToLuma - tmpMarginY * intStride - tmpMarginX;
     dstPtr = m_filteredBlockOrgY[3][2] - tmpMarginY * dstStride - tmpMarginX;
 #if ENABLE_PRIMITIVES
     primitives.ipFilter_s_p[FILTER_V_S_P_8](g_bitDepthY, intPtr, intStride, (pixel*)dstPtr, dstStride,  width + (tmpMarginX << 1), height + (tmpMarginY << 1), m_lumaFilter[3]);
@@ -472,7 +475,7 @@ Void TComPicYuv::generateLumaHQpel()
     filterVertical_short_pel<NTAPS_LUMA>(g_bitDepthY, intPtr, intStride, dstPtr, dstStride,  width + (tmpMarginX << 1), height + (tmpMarginY << 1), m_lumaFilter[3]);
 #endif
     // Generate @ 1,0
-    intPtr = filteredBlockTmp[0].getLumaAddr() + offsetToLuma - tmpMarginY * intStride - tmpMarginX;
+    intPtr = filteredBlockTmp[0] + offsetToLuma - tmpMarginY * intStride - tmpMarginX;
     dstPtr = m_filteredBlockOrgY[1][0] - tmpMarginY * dstStride - tmpMarginX;
 #if ENABLE_PRIMITIVES
     primitives.ipFilter_s_p[FILTER_V_S_P_8](g_bitDepthY, intPtr, intStride, (pixel*)dstPtr, dstStride,  width + (tmpMarginX << 1), height + (tmpMarginY << 1), m_lumaFilter[1]);
@@ -481,7 +484,7 @@ Void TComPicYuv::generateLumaHQpel()
 #endif
 
     // Generate @ 3,0
-    intPtr = filteredBlockTmp[0].getLumaAddr() + offsetToLuma - tmpMarginY * intStride - tmpMarginX;
+    intPtr = filteredBlockTmp[0] + offsetToLuma - tmpMarginY * intStride - tmpMarginX;
     dstPtr = m_filteredBlockOrgY[3][0] - tmpMarginY * dstStride - tmpMarginX;
 #if ENABLE_PRIMITIVES
     primitives.ipFilter_s_p[FILTER_V_S_P_8](g_bitDepthY, intPtr, intStride, (pixel*)dstPtr, dstStride, width + (tmpMarginX << 1), height + (tmpMarginY << 1), m_lumaFilter[3]);
@@ -490,7 +493,7 @@ Void TComPicYuv::generateLumaHQpel()
 #endif
 
 // Generate @ 1,3
-    intPtr = filteredBlockTmp[3].getLumaAddr() + offsetToLuma - tmpMarginY * intStride - tmpMarginX;
+    intPtr = filteredBlockTmp[3] + offsetToLuma - tmpMarginY * intStride - tmpMarginX;
     dstPtr = m_filteredBlockOrgY[1][3] - tmpMarginY * dstStride - tmpMarginX;
 
 #if ENABLE_PRIMITIVES
@@ -500,7 +503,7 @@ Void TComPicYuv::generateLumaHQpel()
 #endif
 
     // Generate @ 3,3
-    intPtr = filteredBlockTmp[3].getLumaAddr() + offsetToLuma - tmpMarginY * intStride - tmpMarginX;
+    intPtr = filteredBlockTmp[3] + offsetToLuma - tmpMarginY * intStride - tmpMarginX;
     dstPtr = m_filteredBlockOrgY[3][3] - tmpMarginY * dstStride - tmpMarginX;
 #if ENABLE_PRIMITIVES
     primitives.ipFilter_s_p[FILTER_V_S_P_8](g_bitDepthY, intPtr, intStride, (pixel*)dstPtr, dstStride, width + (tmpMarginX << 1), height + (tmpMarginY << 1), m_lumaFilter[3]);
@@ -510,7 +513,7 @@ Void TComPicYuv::generateLumaHQpel()
 
     for (int i = 0; i < 4; i++)
     {
-        filteredBlockTmp[i].destroy();
+        xFree(filteredBlockTmp[i]);
     }
 }
 
--- a/source/Lib/TLibCommon/TComPicYuv.h	Wed May 08 14:49:56 2013 +0530
+++ b/source/Lib/TLibCommon/TComPicYuv.h	Wed May 08 14:53:41 2013 +0530
@@ -97,8 +97,7 @@ protected:
 
     Void  xExtendPicCompBorder(Pel* piTxt, Int iStride, Int iWidth, Int iHeight, Int iMarginX, Int iMarginY);
     Void generateLumaHQpel();
-    Void generateChromaHQpel();
-
+    
 public:
 
     TComPicYuv();
--- a/source/Lib/TLibCommon/TComPrediction.cpp	Wed May 08 14:49:56 2013 +0530
+++ b/source/Lib/TLibCommon/TComPrediction.cpp	Wed May 08 14:53:41 2013 +0530
@@ -597,7 +597,11 @@ Void TComPrediction::xPredInterLumaBlk(T
     Pel* src = refPic->getLumaFilterBlock(yFrac, xFrac, cu->getAddr(), cu->getZorderIdxInCU() + partAddr) + refOffset;
     Int srcStride = refPic->getStride();
 
-    filterCopy(src, refPic->getStride(), dst, dstStride, width, height);
+#if ENABLE_PRIMITIVES
+    x265::primitives.cpyblock(width, height, (pixel*)dst, dstStride, (pixel*)src, srcStride);
+#else
+    filterCopy(src, srcStride, dst, dstStride, width, height);
+#endif
 }
 
 /**
@@ -644,8 +648,8 @@ Void TComPrediction::xPredInterChromaBlk
     {
         if (xFrac == 0)
         {
-            filterCopy(refCb, refStride, dstCb, dstStride, cxWidth, cxHeight);
-            filterCopy(refCr, refStride, dstCr, dstStride, cxWidth, cxHeight);
+            x265::primitives.cpyblock(cxWidth, cxHeight, (pixel*)dstCb, dstStride, (pixel*)refCb, refStride);
+            x265::primitives.cpyblock(cxWidth, cxHeight, (pixel*)dstCr, dstStride, (pixel*)refCr, refStride);
         }
         else
         {
@@ -697,28 +701,6 @@ Void TComPrediction::xPredInterChromaBlk
     free(extY);
 
 #endif // if ENABLE_PRIMITIVES
-
-/* //Original HM code
-    if (yFrac == 0)
-    {
-        m_if.filterHorChroma(refCb, refStride, dstCb,  dstStride, cxWidth, cxHeight, xFrac, !bi);
-        m_if.filterHorChroma(refCr, refStride, dstCr,  dstStride, cxWidth, cxHeight, xFrac, !bi);
-    }
-    else if (xFrac == 0)
-    {
-        m_if.filterVerChroma(refCb, refStride, dstCb, dstStride, cxWidth, cxHeight, yFrac, true, !bi);
-        m_if.filterVerChroma(refCr, refStride, dstCr, dstStride, cxWidth, cxHeight, yFrac, true, !bi);
-    }
-    else
-    {
-        m_if.filterHorChroma(refCb - (halfFilterSize - 1) * refStride, refStride, extY,  extStride, cxWidth, cxHeight + filterSize - 1, xFrac, false);
-        m_if.filterVerChroma(extY  + (halfFilterSize - 1) * extStride, extStride, dstCb, dstStride, cxWidth, cxHeight, yFrac, false, !bi);
-
-        m_if.filterHorChroma(refCr - (halfFilterSize - 1) * refStride, refStride, extY,  extStride, cxWidth, cxHeight + filterSize - 1, xFrac, false);
-        m_if.filterVerChroma(extY  + (halfFilterSize - 1) * extStride, extStride, dstCr, dstStride, cxWidth, cxHeight, yFrac, false, !bi);
-    }
-    free(extY);
-*/
 }
 
 Void TComPrediction::xWeightedAverage(TComYuv* pcYuvSrc0, TComYuv* pcYuvSrc1, Int iRefIdx0, Int iRefIdx1, UInt uiPartIdx, Int iWidth, Int iHeight, TComYuv*& rpcYuvDst)
--- a/source/Lib/TLibEncoder/TEncSearch.cpp	Wed May 08 14:49:56 2013 +0530
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp	Wed May 08 14:53:41 2013 +0530
@@ -3891,7 +3891,7 @@ Void TEncSearch::xMotionEstimation(TComD
     Pel*        piRefY      = pcCU->getSlice()->getRefPic(eRefPicList, iRefIdxPred)->getPicYuvRec()->getLumaAddr(pcCU->getAddr(), pcCU->getZorderIdxInCU() + uiPartAddr);
     Int         iRefStride  = pcCU->getSlice()->getRefPic(eRefPicList, iRefIdxPred)->getPicYuvRec()->getStride();
 
-    TComPicYuv* refPic = pcCU->getSlice()->getRefPic(eRefPicList, iRefIdxPred)->getPicYuvRec(); //For testing new generateHpel
+    TComPicYuv* refPic = pcCU->getSlice()->getRefPic(eRefPicList, iRefIdxPred)->getPicYuvRec(); //For new xPatternSearchFracDiff
 
     TComMv      cMvPred = *pcMvPred;
 
@@ -5643,7 +5643,7 @@ Void TEncSearch::xExtDIFUpSamplingQ(TCom
 #else
     filterHorizontal_pel_short<NTAPS_LUMA>(g_bitDepthY, srcPtr, srcStride, intPtr, intStride, width, extHeight, m_lumaFilter[3]);
 #endif
-//------
+
     // Generate @ 1,1
     intPtr = filteredBlockTmp[1].getLumaAddr() + (halfFilterSize - 1) * intStride;
     dstPtr = m_filteredBlock[1][1].getLumaAddr();
@@ -5656,7 +5656,7 @@ Void TEncSearch::xExtDIFUpSamplingQ(TCom
 #else
     filterVertical_short_pel<NTAPS_LUMA>(g_bitDepthY, intPtr, intStride, dstPtr, dstStride, width, height, m_lumaFilter[1]);
 #endif
-//-------
+
     // Generate @ 3,1
     intPtr = filteredBlockTmp[1].getLumaAddr() + (halfFilterSize - 1) * intStride;
     dstPtr = m_filteredBlock[3][1].getLumaAddr();