changeset 2078:573411ef9172

Merged in deepthidevaki/xhevc_deepthid (pull request #185) Added IntraAngular modes for chroma
author Steve Borho <steve@borho.org>
date Fri, 07 Jun 2013 11:16:25 -0500
parents 105c01ee73d9 (current diff) ac2aa44b996b (diff)
children 20d3411fb757
files source/common/vec/intrapred.inc
diffstat 3 files changed, 172 insertions(+-), 129 deletions(-) [+]
line wrap: on
line diff
--- a/source/Lib/TLibCommon/TComPrediction.cpp	Fri Jun 07 11:05:24 2013 -0500
+++ b/source/Lib/TLibCommon/TComPrediction.cpp	Fri Jun 07 11:16:25 2013 -0500
@@ -390,8 +390,19 @@ Void TComPrediction::predIntraChromaAng(
     }
     else
     {
-        // Create the prediction
-        xPredIntraAng(g_bitDepthC, ptrSrc + sw + 1, sw, pDst, uiStride, iWidth, iHeight, uiDirMode, bAbove, bLeft, false);
+         //Create the prediction
+        int k;
+        Pel refAbv[3 * MAX_CU_SIZE];
+        Pel refLft[3 * MAX_CU_SIZE];
+        int limit = ( uiDirMode <=25 && uiDirMode >=11 )? (iWidth + 1) : (2*iWidth+1);
+        memcpy(refAbv + iWidth - 1, ptrSrc, (limit) * sizeof(Pel));
+        for (k = 0; k < limit; k++)
+        {
+               refLft[k + iWidth - 1] = ptrSrc[k * sw];
+        }
+
+        primitives.getIPredAng(g_bitDepthC, (pixel*)pDst, uiStride, iWidth, uiDirMode, false, (pixel*)refLft + iWidth -1, (pixel*)refAbv + iWidth -1);
+  
     }
 }
 
--- a/source/common/vec/intrapred.inc	Fri Jun 07 11:05:24 2013 -0500
+++ b/source/common/vec/intrapred.inc	Fri Jun 07 11:16:25 2013 -0500
@@ -1678,7 +1678,7 @@ void xPredIntraAng4x4(int bitDepth, pixe
 }
 
 #else /* if HIGH_BIT_DEPTH */
-void xPredIntraAng4x4(int /*bitDepth*/, pixel* pDst, int dstStride, int width, int dirMode, pixel *refLeft, pixel *refAbove)
+void xPredIntraAng4x4(int /*bitDepth*/, pixel* pDst, int dstStride, int width, int dirMode, pixel *refLeft, pixel *refAbove, bool bFilter = true)
 {
     int blkSize        = width;
 
@@ -1727,32 +1727,40 @@ void xPredIntraAng4x4(int /*bitDepth*/, 
     {
         if (modeHor)
         {
-            Vec16uc v_temp;
-            Vec8s v_side_0; // refSide[0] value in a vector
-            v_temp = load_partial(const_int(8), (void*)refSide);
-            v_side_0 = broadcast(const_int(0), (Vec8s)v_temp);
-            v_side_0 = v_side_0 & 0x00FF;
-
-            //shift v_side by 1 element (1 byte)
-            Vec2uq tmp = reinterpret_i(v_temp);
-            tmp = tmp >> 8;
-            v_temp = reinterpret_i(tmp);
-            Vec8s v_side = extend_low(v_temp);
-
             Vec16uc v_main;
             v_main = load_partial(const_int(4), (void*)(refMain + 1));
 
             Vec16uc tmp16;
             tmp16 = blend16c<0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23>(v_main, v_main);
             tmp16 = blend16c<0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23>(tmp16, tmp16);
-
-            Vec8s row0 = extend_low(tmp16);
-            v_side -= v_side_0;
-            v_side = v_side >> 1;
-            row0 += v_side;
-            row0 = min(max(0, row0), 255);
-            Vec16uc v_res(compress(row0, 0));
-            store_partial(const_int(4), pDst, v_res);
+            Vec2uq tmp;
+
+            if (bFilter)
+            {
+                Vec16uc v_temp;
+                Vec8s v_side_0; // refSide[0] value in a vector
+                v_temp = load_partial(const_int(8), (void*)refSide);
+                v_side_0 = broadcast(const_int(0), (Vec8s)v_temp);
+                v_side_0 = v_side_0 & 0x00FF;
+
+                //shift v_side by 1 element (1 byte)
+                tmp = reinterpret_i(v_temp);
+                tmp = tmp >> 8;
+                v_temp = reinterpret_i(tmp);
+                Vec8s v_side = extend_low(v_temp);
+
+                Vec8s row0 = extend_low(tmp16);
+                v_side -= v_side_0;
+                v_side = v_side >> 1;
+                row0 += v_side;
+                row0 = min(max(0, row0), 255);
+                Vec16uc v_res(compress(row0, 0));
+                store_partial(const_int(4), pDst, v_res);
+            }
+            else
+            {
+                store_partial(const_int(4), pDst, tmp16);
+            }
 
             tmp = (Vec2uq)tmp16;
             tmp >>= 32;
@@ -1772,10 +1780,12 @@ void xPredIntraAng4x4(int /*bitDepth*/, 
             store_partial(const_int(4), pDst + dstStride, v_main);
             store_partial(const_int(4), pDst + (2 * dstStride), v_main);
             store_partial(const_int(4), pDst + (3 * dstStride), v_main);
-
-            for (int k = 0; k < 4; k++)
+            if (bFilter)
             {
-                pDst[k * dstStride] = (pixel)Clip3((short)0, (short)((1 << 8) - 1), static_cast<short>((pDst[k * dstStride]) + ((refSide[k + 1] - refSide[0]) >> 1)));
+                for (int k = 0; k < 4; k++)
+                {
+                    pDst[k * dstStride] = (pixel)Clip3((short)0, (short)((1 << 8) - 1), static_cast<short>((pDst[k * dstStride]) + ((refSide[k + 1] - refSide[0]) >> 1)));
+                }
             }
         }
     }
@@ -2222,7 +2232,7 @@ void xPredIntraAng4x4(int /*bitDepth*/, 
         RES = ((thirty2 - v_deltaFract) * ROW1 + (v_deltaFract * ROW2) + 16) >> 5; \
 }
 
-void xPredIntraAng8x8(int bitDepth, pixel* pDst, int dstStride, int width, int dirMode, pixel *refLeft, pixel *refAbove)
+void xPredIntraAng8x8(int bitDepth, pixel* pDst, int dstStride, int width, int dirMode, pixel *refLeft, pixel *refAbove, bool bFilter = true)
 {
     int k;
     int blkSize        = width;
@@ -2274,27 +2284,35 @@ void xPredIntraAng8x8(int bitDepth, pixe
         if (modeHor)
         {
             Vec16uc v_temp;
-            Vec8s v_side_0(refSide[0]); // refSide[0] value in a vector
-
-            v_temp.load(refSide + 1);
-            Vec8s v_side;
-            v_side = extend_low(v_temp);
+            Vec16uc tmp1;
 
             v_temp.load(refMain + 1);
             Vec8s v_main;
             v_main = extend_low(v_temp);
 
-            Vec8s row0;
-            row0 = permute8s<0, 0, 0, 0, 0, 0, 0, 0>(v_main);
-            v_side -= v_side_0;
-            v_side = v_side >> 1;
-            row0 = row0 + v_side;
-            row0 = min(max(0, row0), (1 << bitDepth) - 1);
-
-            Vec16uc tmp1;
-            tmp1 = compress(row0, row0);
-            store_partial(const_int(8), pDst, tmp1);                //row0
-
+            if (bFilter)
+            {
+                Vec8s v_side_0(refSide[0]); // refSide[0] value in a vector
+                Vec16uc v_temp16;
+                v_temp16.load(refSide + 1);
+                Vec8s v_side;
+                v_side = extend_low(v_temp16);
+
+                Vec8s row0;
+                row0 = permute8s<0, 0, 0, 0, 0, 0, 0, 0>(v_main);
+                v_side -= v_side_0;
+                v_side = v_side >> 1;
+                row0 = row0 + v_side;
+                row0 = min(max(0, row0), (1 << bitDepth) - 1);
+
+                tmp1 = compress(row0, row0);
+                store_partial(const_int(8), pDst, tmp1);            //row0
+            }
+            else
+            {
+                tmp1 = permute16uc<0, 0, 0, 0, 0, 0, 0, 0, -256, -256, -256, -256, -256, -256, -256, -256>(v_temp);
+                store_partial(const_int(8), pDst, tmp1); //row0
+            }
             tmp1 = permute16uc<1, 1, 1, 1, 1, 1, 1, 1, -256, -256, -256, -256, -256, -256, -256, -256>(v_temp);
             store_partial(const_int(8), pDst + (1 * dstStride), tmp1); //row1
 
@@ -2329,29 +2347,32 @@ void xPredIntraAng8x8(int bitDepth, pixe
             store_partial(const_int(8), pDst + (6 * dstStride), v_main);
             store_partial(const_int(8), pDst + (7 * dstStride), v_main);
 
-            Vec16uc v_temp;
-            Vec8s v_side_0(refSide[0]); // refSide[0] value in a vector
-
-            v_temp.load(refSide + 1);
-            Vec8s v_side;
-            v_side = extend_low(v_temp);
-
-            v_temp.load(refMain + 1);
-            Vec8s row0;
-            row0 = permute16uc<0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1>(v_temp);
-            v_side -= v_side_0;
-            v_side = v_side >> 1;
-            row0 = row0 + v_side;
-            row0 = min(max(0, row0), (1 << bitDepth) - 1);
-
-            pDst[0 * dstStride] = row0[0];
-            pDst[1 * dstStride] = row0[1];
-            pDst[2 * dstStride] = row0[2];
-            pDst[3 * dstStride] = row0[3];
-            pDst[4 * dstStride] = row0[4];
-            pDst[5 * dstStride] = row0[5];
-            pDst[6 * dstStride] = row0[6];
-            pDst[7 * dstStride] = row0[7];
+            if (bFilter)
+            {
+                Vec16uc v_temp;
+                Vec8s v_side_0(refSide[0]); // refSide[0] value in a vector
+
+                v_temp.load(refSide + 1);
+                Vec8s v_side;
+                v_side = extend_low(v_temp);
+
+                v_temp.load(refMain + 1);
+                Vec8s row0;
+                row0 = permute16uc<0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1>(v_temp);
+                v_side -= v_side_0;
+                v_side = v_side >> 1;
+                row0 = row0 + v_side;
+                row0 = min(max(0, row0), (1 << bitDepth) - 1);
+
+                pDst[0 * dstStride] = row0[0];
+                pDst[1 * dstStride] = row0[1];
+                pDst[2 * dstStride] = row0[2];
+                pDst[3 * dstStride] = row0[3];
+                pDst[4 * dstStride] = row0[4];
+                pDst[5 * dstStride] = row0[5];
+                pDst[6 * dstStride] = row0[6];
+                pDst[7 * dstStride] = row0[7];
+            }
         }
     }
     else if (intraPredAngle == -32)
@@ -2786,7 +2807,7 @@ void xPredIntraAng8x8(int bitDepth, pixe
         BLND2_4(R4, R8); \
 }
 
-void xPredIntraAng16x16(int bitDepth, pixel* pDst, int dstStride, int width, int dirMode, pixel *refLeft, pixel *refAbove)
+void xPredIntraAng16x16(int bitDepth, pixel* pDst, int dstStride, int width, int dirMode, pixel *refLeft, pixel *refAbove, bool bFilter = true)
 {
     int k;
     int blkSize        = width;
@@ -2839,30 +2860,38 @@ void xPredIntraAng16x16(int bitDepth, pi
         if (modeHor)
         {
             Vec16uc v_temp;
-            Vec8s v_side_0(refSide[0]); // refSide[0] value in a vector
-
-            v_temp.load(refSide + 1);
-            Vec8s v_side;
-            v_side = extend_low(v_temp);
-
-            Vec8s row01, row02, ref(refMain[1]);
-            v_side -= v_side_0;
-            v_side = v_side >> 1;
-            row01 = ref + v_side;
-            row01 = min(max(0, row01), (1 << bitDepth) - 1);
-
-            v_side = extend_high(v_temp);
-            v_side -= v_side_0;
-            v_side = v_side >> 1;
-            row02 = ref + v_side;
-            row02 = min(max(0, row02), (1 << bitDepth) - 1);
-
             Vec16uc tmp1;
-            tmp1 = compress(row01, row02);
-            tmp1.store(pDst);                //row0
-
             v_temp.load(refMain + 1);
 
+            if (bFilter)
+            {
+                Vec8s v_side_0(refSide[0]); // refSide[0] value in a vector
+                Vec16uc v_temp16;
+                v_temp16.load(refSide + 1);
+                Vec8s v_side;
+                v_side = extend_low(v_temp16);
+
+                Vec8s row01, row02, ref(refMain[1]);
+                v_side -= v_side_0;
+                v_side = v_side >> 1;
+                row01 = ref + v_side;
+                row01 = min(max(0, row01), (1 << bitDepth) - 1);
+
+                v_side = extend_high(v_temp16);
+                v_side -= v_side_0;
+                v_side = v_side >> 1;
+                row02 = ref + v_side;
+                row02 = min(max(0, row02), (1 << bitDepth) - 1);
+
+                tmp1 = compress(row01, row02);
+                tmp1.store(pDst);            //row0
+            }
+            else
+            {
+                tmp1 = permute16uc<0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0>(v_temp);
+                tmp1.store(pDst); //row0
+            }
+
             tmp1 = permute16uc<1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1>(v_temp);
             tmp1.store(pDst + (1 * dstStride)); //row1
 
@@ -2931,41 +2960,44 @@ void xPredIntraAng16x16(int bitDepth, pi
             _mm_storeu_si128((__m128i*)(pDst + (14 * dstStride)), v_main);
             _mm_storeu_si128((__m128i*)(pDst + (15 * dstStride)), v_main);
 
-            Vec16uc v_temp;
-            Vec8s v_side_0(refSide[0]); // refSide[0] value in a vector
-
-            v_temp.load(refSide + 1);
-            Vec8s v_side;
-            v_side = extend_low(v_temp);
-
-            Vec8s row0, ref(refMain[1]);
-            v_side -= v_side_0;
-            v_side = v_side >> 1;
-            row0 = ref + v_side;
-            row0 = min(max(0, row0), (1 << bitDepth) - 1);
-
-            pDst[0 * dstStride] = row0[0];
-            pDst[1 * dstStride] = row0[1];
-            pDst[2 * dstStride] = row0[2];
-            pDst[3 * dstStride] = row0[3];
-            pDst[4 * dstStride] = row0[4];
-            pDst[5 * dstStride] = row0[5];
-            pDst[6 * dstStride] = row0[6];
-            pDst[7 * dstStride] = row0[7];
-
-            v_side = extend_high(v_temp);
-            v_side -= v_side_0;
-            v_side = v_side >> 1;
-            row0 = ref + v_side;
-            row0 = min(max(0, row0), (1 << bitDepth) - 1);
-            pDst[8 * dstStride] = row0[0];
-            pDst[9 * dstStride] = row0[1];
-            pDst[10 * dstStride] = row0[2];
-            pDst[11 * dstStride] = row0[3];
-            pDst[12 * dstStride] = row0[4];
-            pDst[13 * dstStride] = row0[5];
-            pDst[14 * dstStride] = row0[6];
-            pDst[15 * dstStride] = row0[7];
+            if (bFilter)
+            {
+                Vec16uc v_temp;
+                Vec8s v_side_0(refSide[0]); // refSide[0] value in a vector
+
+                v_temp.load(refSide + 1);
+                Vec8s v_side;
+                v_side = extend_low(v_temp);
+
+                Vec8s row0, ref(refMain[1]);
+                v_side -= v_side_0;
+                v_side = v_side >> 1;
+                row0 = ref + v_side;
+                row0 = min(max(0, row0), (1 << bitDepth) - 1);
+
+                pDst[0 * dstStride] = row0[0];
+                pDst[1 * dstStride] = row0[1];
+                pDst[2 * dstStride] = row0[2];
+                pDst[3 * dstStride] = row0[3];
+                pDst[4 * dstStride] = row0[4];
+                pDst[5 * dstStride] = row0[5];
+                pDst[6 * dstStride] = row0[6];
+                pDst[7 * dstStride] = row0[7];
+
+                v_side = extend_high(v_temp);
+                v_side -= v_side_0;
+                v_side = v_side >> 1;
+                row0 = ref + v_side;
+                row0 = min(max(0, row0), (1 << bitDepth) - 1);
+                pDst[8 * dstStride] = row0[0];
+                pDst[9 * dstStride] = row0[1];
+                pDst[10 * dstStride] = row0[2];
+                pDst[11 * dstStride] = row0[3];
+                pDst[12 * dstStride] = row0[4];
+                pDst[13 * dstStride] = row0[5];
+                pDst[14 * dstStride] = row0[6];
+                pDst[15 * dstStride] = row0[7];
+            }
         }
     }
     else if (intraPredAngle == -32)
@@ -4374,13 +4406,13 @@ void xPredIntraAngBufRef(int bitDepth, p
     switch (width)
     {
     case 4:
-        xPredIntraAng4x4(bitDepth, pDst, dstStride, width, dirMode, refLeft, refAbove);
+        xPredIntraAng4x4(bitDepth, pDst, dstStride, width, dirMode, refLeft, refAbove, bFilter);
         return;
     case 8:
-        xPredIntraAng8x8(bitDepth, pDst, dstStride, width, dirMode, refLeft, refAbove);
+        xPredIntraAng8x8(bitDepth, pDst, dstStride, width, dirMode, refLeft, refAbove, bFilter);
         return;
     case 16:
-        xPredIntraAng16x16(bitDepth, pDst, dstStride, width, dirMode, refLeft, refAbove);
+        xPredIntraAng16x16(bitDepth, pDst, dstStride, width, dirMode, refLeft, refAbove, bFilter);
         return;
 #if !defined(_MSC_VER) /* disable temporarily, to save MSVC build times */
     case 32:
--- a/source/test/intrapredharness.cpp	Fri Jun 07 11:05:24 2013 -0500
+++ b/source/test/intrapredharness.cpp	Fri Jun 07 11:16:25 2013 -0500
@@ -134,11 +134,11 @@ bool IntraPredHarness::check_getIPredAng
     int pmode;
     Bool bFilter;
 
-    for (int width = 4; width <= 64; width <<= 1)
+    for (int width = 4; width <= 16; width <<= 1)
     {
-        bFilter  = (width <= 16);
         for (int i = 0; i <= 100; i++)
         {
+            bFilter = (width <= 16) && (rand()%2);
             for (int p = 2; p <= 34; p++)
             {
                 pmode = p;
@@ -227,7 +227,7 @@ void IntraPredHarness::measureSpeed(cons
     }
     if (opt.getIPredAng)
     {
-        for (int ii = 4; ii <= 64; ii <<= 1)
+        for (int ii = 4; ii <= 16; ii <<= 1)
         {
             for (int p = 2; p <= 34; p += 1)
             {