changeset 569:66c24df49d97

Merged multicoreware/xhevc into default
author Deepthi Devaki Akkoorath <deepthidevaki@multicorewareinc.com>
date Tue, 16 Apr 2013 10:29:42 +0530
parents f05a5d3879f0 (current diff) bd316d5a9edc (diff)
children e93b9f1e9f10
files
diffstat 16 files changed, 948 insertions(+-), 649 deletions(-) [+]
line wrap: on
line diff
--- a/source/VectorClass/vectori128.h	Mon Apr 15 17:43:29 2013 +0530
+++ b/source/VectorClass/vectori128.h	Tue Apr 16 10:29:42 2013 +0530
@@ -3697,6 +3697,9 @@ static inline Vec16uc blend16uc(Vec16uc 
     return Vec16uc( blend16c<i0,i1,i2,i3,i4,i5,i6,i7,i8,i9,i10,i11,i12,i13,i14,i15> (a,b));
 }
 
+#if _MSC_VER
+#pragma warning(disable: 4700)
+#endif
 
 template <int i0, int i1, int i2, int i3, int i4, int i5, int i6, int i7>
 static inline Vec8s blend8s(Vec8s const & a, Vec8s const & b) {
--- a/source/encoder/vec/macroblock.inc	Mon Apr 15 17:43:29 2013 +0530
+++ b/source/encoder/vec/macroblock.inc	Tue Apr 16 10:29:42 2013 +0530
@@ -31,7 +31,7 @@
 #define IF_INTERNAL_PREC 14 ///< Number of bits for internal precision
 #define IF_FILTER_PREC    6 ///< Log2 of sum of filter taps
 #define IF_INTERNAL_OFFS (1 << (IF_INTERNAL_PREC - 1)) ///< Offset used internally
- 
+
 void CDECL inversedst(short *tmp, short *block, int shift)  // input tmp, output block
 {
     int rnd_factor = 1 << (shift - 1);
@@ -58,38 +58,10 @@ void CDECL inversedst(short *tmp, short 
     Vec4i c2_final = (74 * c4 + rnd_factor) >> shift;
     Vec4i c3_final = (55 * c0_total + 29 * c2_total - c3_total + rnd_factor) >> shift;
 
-    Vec4i first_arg(-32768);
-    Vec4i second_arg(32767);
-
-    Vec4i max_number1 = max(first_arg, c0_final);
-    Vec4i mid_number1 = min(max_number1, second_arg);
-    Vec4i max_number2 = max(first_arg, c1_final);
-    Vec4i mid_number2 = min(max_number2, second_arg);
-    Vec8s half = compress(mid_number1, mid_number2);
-
-    block[0] = half[0];
-    block[4] = half[1];
-    block[8] = half[2];
-    block[12] = half[3];
-    block[1] = half[4];
-    block[5] = half[5];
-    block[9] = half[6];
-    block[13] = half[7];
-
-    max_number1 = max(first_arg, c2_final);
-    mid_number1 = min(max_number1, second_arg);
-    max_number2 = max(first_arg, c3_final);
-    mid_number2 = min(max_number2, second_arg);
-    half = compress(mid_number1, mid_number2);
-
-    block[2] = half[0];
-    block[6] = half[1];
-    block[10] = half[2];
-    block[14] = half[3];
-    block[3] = half[4];
-    block[7] = half[5];
-    block[11] = half[6];
-    block[15] = half[7];
+    Vec8s half0 = compress(min(max(-32768, c0_final), 32767), min(max(-32768, c1_final), 32767));
+    Vec8s half1 = compress(min(max(-32768, c2_final), 32767), min(max(-32768, c3_final), 32767));
+    blend8s<0, 4, 8, 12, 1, 5, 9, 13>(half0, half1).store(block);
+    blend8s<2, 6, 10, 14, 3, 7, 11, 15>(half0, half1).store(block+8);
 }
 
 template<bool isFirst, bool isLast>
--- a/source/encoder/vec/pixel.inc	Mon Apr 15 17:43:29 2013 +0530
+++ b/source/encoder/vec/pixel.inc	Tue Apr 16 10:29:42 2013 +0530
@@ -26,75 +26,82 @@
 template<int lx, int ly>
 int CDECL sad_4(pixel * piOrg, intptr_t strideOrg, pixel * piCur, intptr_t strideCur)
 {
-    int uiSum = 0;  // This is slower than the C primitve, so I'm not using it
+    Vec8s sum(0);
 
-    for (int Row = 0; Row < ly; Row++)
+    for (int row = 0; row < ly; row++)
     {
         for (int col = 0; col < lx; col += 4)
         {
             Vec8s m1, n1;
-            m1.load_partial(4, piOrg + col);
-            n1.load_partial(4, piCur + col);
-            m1 = m1 - n1;
-            m1 = abs(m1);
-            uiSum += horizontal_add_x(m1);
+            m1.load(piOrg + col);
+            n1.load(piCur + col);
+            sum += abs(m1 - n1);
         }
 
         piOrg += strideOrg;
         piCur += strideCur;
     }
 
-    return uiSum;
+    return horizontal_add(extend_low(sum));
 }
 
 template<int lx, int ly>
 int CDECL sad_8(pixel * piOrg, intptr_t strideOrg, pixel * piCur, intptr_t strideCur)
 {
-    int uiSum = 0;
+    Vec8s sum(0);
 
-    for (int Row = 0; Row < ly; Row++)
+    for (int row = 0; row < ly; row++)
     {
         for (int col = 0; col < lx; col += 8)
         {
             Vec8s m1, n1;
-            m1.load(piOrg + col);
+            m1.load_a(piOrg + col);
             n1.load(piCur + col);
-            m1 = m1 - n1;
-            m1 = abs(m1);
-            uiSum += horizontal_add_x(m1);
+            sum += abs(m1 - n1);
         }
 
         piOrg += strideOrg;
         piCur += strideCur;
     }
 
-    return uiSum;
+    return horizontal_add_x(sum);
 }
 
 template<int lx, int ly>
 int CDECL sad_16(pixel * piOrg, intptr_t strideOrg, pixel * piCur, intptr_t strideCur)
 {
-    int uiSum = 0;
+    Vec16s sum(0);
 
-    for (int Row = 0; Row < ly; Row++)
+    for (int row = 0; row < ly; row++)
     {
         for (int col = 0; col < lx; col += 16)
         {
             Vec16s m1, n1;
-            m1.load(piOrg + col);
+            m1.load_a(piOrg + col);
             n1.load(piCur + col);
-            m1 = m1 - n1;
-            m1 = abs(m1);
-            uiSum += horizontal_add_x(m1);
+            sum += abs(m1 - n1);
         }
 
         piOrg += strideOrg;
         piCur += strideCur;
     }
 
-    return uiSum;
+    return horizontal_add(extend_low(sum)) +
+           horizontal_add(extend_high(sum));
 }
 
+template<int lx, int ly>
+int CDECL sad_16x16(pixel * piOrg, intptr_t strideOrg, pixel * piCur, intptr_t strideCur)
+{
+    int sum = 0;
+
+    for (int row = 0; row < ly; row += 16)
+        for (int col = 0; col < lx; col += 16)
+            sum += sad_16<16,16>(piOrg + row * strideOrg + col, strideOrg,
+                                 piCur + row * strideCur + col, strideCur);
+
+    return sum;
+}
 
 int CDECL satd_4x4(pixel * piOrg, intptr_t iStrideOrg, pixel * piCur, intptr_t iStrideCur)
 {
@@ -382,16 +389,16 @@ void Setup_Vec_PixelPrimitives(EncoderPr
     p.sad[PARTITION_32x8] = sad_16<32, 8>;
     p.sad[PARTITION_16x32] = sad_16<16, 32>;
     p.sad[PARTITION_32x16] = sad_16<32, 16>;
-    p.sad[PARTITION_32x32] = sad_16<32, 32>;
+    p.sad[PARTITION_32x32] = sad_16x16<32, 32>;
     p.sad[PARTITION_4x64] = sad_4<4, 64>;
     p.sad[PARTITION_64x4] = sad_16<64, 4>;
     p.sad[PARTITION_64x8] = sad_16<64, 8>;
     p.sad[PARTITION_8x64] = sad_8<8, 64>;
-    p.sad[PARTITION_16x64] = sad_16<16, 64>;
-    p.sad[PARTITION_64x16] = sad_16<64, 16>;
-    p.sad[PARTITION_32x64] = sad_16<32, 64>;
-    p.sad[PARTITION_64x32] = sad_16<64, 32>;
-    p.sad[PARTITION_64x64] = sad_16<64, 64>;
+    p.sad[PARTITION_16x64] = sad_16x16<16, 64>;
+    p.sad[PARTITION_64x16] = sad_16x16<64, 16>;
+    p.sad[PARTITION_32x64] = sad_16x16<32, 64>;
+    p.sad[PARTITION_64x32] = sad_16x16<64, 32>;
+    p.sad[PARTITION_64x64] = sad_16x16<64, 64>;
 
     p.satd[PARTITION_4x4] = satd_4x4;
     p.satd[PARTITION_4x8] = satd<4, 8>;
--- a/source/encoder/vec/sse2.cpp	Mon Apr 15 17:43:29 2013 +0530
+++ b/source/encoder/vec/sse2.cpp	Tue Apr 16 10:29:42 2013 +0530
@@ -23,7 +23,7 @@
 
 /* this file instantiates SSE2 versions of the vectorized primitives */
 
-#define INSTRSET 3
+#define INSTRSET 2
 #include "vectorclass.h"
 
 #define ARCH sse2
--- a/source/encoder/vec/sse3.cpp	Mon Apr 15 17:43:29 2013 +0530
+++ b/source/encoder/vec/sse3.cpp	Tue Apr 16 10:29:42 2013 +0530
@@ -23,7 +23,7 @@
 
 /* this file instantiates SSE3 versions of the vectorized primitives */
 
-#define INSTRSET 4
+#define INSTRSET 3
 #include "vectorclass.h"
 
 #define ARCH sse3
--- a/source/encoder/vec/ssse3.cpp	Mon Apr 15 17:43:29 2013 +0530
+++ b/source/encoder/vec/ssse3.cpp	Tue Apr 16 10:29:42 2013 +0530
@@ -23,7 +23,7 @@
 
 /* this file instantiates SSSE3 versions of the vectorized primitives */
 
-#define INSTRSET 5
+#define INSTRSET 4
 #include "vectorclass.h"
 
 #define ARCH ssse3
--- a/source/test/CMakeLists.txt	Mon Apr 15 17:43:29 2013 +0530
+++ b/source/test/CMakeLists.txt	Tue Apr 16 10:29:42 2013 +0530
@@ -21,5 +21,9 @@ if(GCC)
     add_definitions(-Wno-sign-compare)
 endif(GCC)
 
-add_executable(TestBench testbench.cpp unittest.cpp unittest.h)
+add_executable(TestBench testbench.cpp testharness.h timer.cpp
+    pixelharness.cpp pixelharness.h
+    filterharness.cpp filterharness.h
+    mbdstharness.cpp mbdstharness.h)
+
 target_link_libraries(TestBench x265 HM ${PLATFORM_LIBS})
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/source/test/filterharness.cpp	Tue Apr 16 10:29:42 2013 +0530
@@ -0,0 +1,185 @@
+/*****************************************************************************
+ * Copyright (C) 2013 x265 project
+ *
+ * Authors: Steve Borho <steve@borho.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing@multicorewareinc.com.
+ *****************************************************************************/
+
+#include "filterharness.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+using namespace x265;
+
+const short m_lumaFilter[4][8] =
+{
+    { 0, 0,   0, 64,  0,   0, 0,  0 },
+    { -1, 4, -10, 58, 17,  -5, 1,  0 },
+    { -1, 4, -11, 40, 40, -11, 4, -1 },
+    { 0, 1,  -5, 17, 58, -10, 4, -1 }
+};
+
+const char *FilterConf_names[] =
+{
+    // Naming convention used is - isVertical_N_isFirst_isLast
+    "Hor_N=4_isFirst=0_isLast=0",
+    "Hor_N=4_isFirst=0_isLast=1",
+    "Hor_N=4_isFirst=1_isLast=0",
+    "Hor_N=4_isFirst=1_isLast=1",
+
+    "Hor_N=8_isFirst=0_isLast=0",
+    "Hor_N=8_isFirst=0_isLast=1",
+    "Hor_N=8_isFirst=1_isLast=0",
+    "Hor_N=8_isFirst=1_isLast=1",
+
+    "Ver_N=4_isFirst=0_isLast=0",
+    "Ver_N=4_isFirst=0_isLast=1",
+    "Ver_N=4_isFirst=1_isLast=0",
+    "Ver_N=4_isFirst=1_isLast=1",
+
+    "Ver_N=8_isFirst=0_isLast=0",
+    "Ver_N=8_isFirst=0_isLast=1",
+    "Ver_N=8_isFirst=1_isLast=0",
+    "Ver_N=8_isFirst=1_isLast=1"
+};
+
+FilterHarness::FilterHarness()
+{
+    ipf_t_size = 200 * 200;
+    pixel_buff = (pixel*)malloc(ipf_t_size * sizeof(pixel));     // Assuming max_height = max_width = max_srcStride = max_dstStride = 100
+    IPF_vec_output = (short*)malloc(ipf_t_size * sizeof(short)); // Output Buffer1
+    IPF_C_output = (short*)malloc(ipf_t_size * sizeof(short));   // Output Buffer2
+
+    if (!pixel_buff || !IPF_vec_output || !IPF_C_output)
+    {
+        fprintf(stderr, "init_IPFilter_buffers: malloc failed, unable to initiate tests!\n");
+        exit(-1);
+    }
+
+    for (int i = 0; i < ipf_t_size; i++)                         // Initialize input buffer
+    {
+        int isPositive = rand() & 1;                             // To randomly generate Positive and Negative values
+        isPositive = (isPositive) ? 1 : -1;
+        pixel_buff[i] = isPositive * (rand() & PIXEL_MAX);
+    }
+}
+
+FilterHarness::~FilterHarness()
+{
+    free(IPF_vec_output);
+    free(IPF_C_output);
+    free(pixel_buff);
+}
+
+bool FilterHarness::check_IPFilter_primitive(IPFilter ref, IPFilter opt)
+{
+    int rand_height = rand() & 100;                 // Randomly generated Height
+    int rand_width = rand() & 100;                  // Randomly generated Width
+    short rand_val, rand_srcStride, rand_dstStride;
+
+    for (int i = 0; i <= 100; i++)
+    {
+        memset(IPF_vec_output, 0, ipf_t_size);      // Initialize output buffer to zero
+        memset(IPF_C_output, 0, ipf_t_size);        // Initialize output buffer to zero
+
+        rand_val = rand() & 24;                     // Random offset in the filter
+        rand_srcStride = rand() & 100;              // Randomly generated srcStride
+        rand_dstStride = rand() & 100;              // Randomly generated dstStride
+
+        opt((short*)(m_lumaFilter + rand_val),
+            pixel_buff,
+            rand_srcStride,
+            (pixel*)IPF_vec_output,
+            rand_dstStride,
+            rand_height,
+            rand_width,
+            BIT_DEPTH);
+        ref((short*)(m_lumaFilter + rand_val),
+            pixel_buff,
+            rand_srcStride,
+            (pixel*)IPF_C_output,
+            rand_dstStride,
+            rand_height,
+            rand_width,
+            BIT_DEPTH);
+
+        if (memcmp(IPF_vec_output, IPF_C_output, ipf_t_size))
+            return false;
+    }
+
+    return true;
+}
+
+bool FilterHarness::testCorrectness( const EncoderPrimitives& ref, const EncoderPrimitives& opt )
+{
+    for (int value = 4; value < 8; value++)
+    {
+        if (opt.filter[value])
+        {
+            if (!check_IPFilter_primitive(ref.filter[value], opt.filter[value]))
+            {
+                printf("\nfilter[%s] failed\n", FilterConf_names[value]);
+                return false;
+            }
+        }
+    }
+
+    return true;
+}
+
+#define FILTER_ITERATIONS   100000
+
+void FilterHarness::measureSpeed( const EncoderPrimitives& ref, const EncoderPrimitives& opt )
+{
+    Timer *t = Timer::CreateTimer();
+
+    /* Add logic here for testing performance of your new primitive*/
+    int rand_height = rand() % 100;             // Randomly generated Height
+    int rand_width = rand() % 100;              // Randomly generated Width
+    short rand_val, rand_srcStride, rand_dstStride;
+
+    rand_val = rand() % 24;                     // Random offset in the filter
+    rand_srcStride = rand() % 100;              // Randomly generated srcStride
+    rand_dstStride = rand() % 100;              // Randomly generated dstStride
+
+    for (int value = 4; value < 8; value++)
+    {
+        memset(IPF_vec_output, 0, ipf_t_size);  // Initialize output buffer to zero
+        memset(IPF_C_output, 0, ipf_t_size);    // Initialize output buffer to zero
+        if (opt.filter[value])
+        {
+            t->Start();
+            for (int j = 0; j < FILTER_ITERATIONS; j++)
+                opt.filter[value]((short*)(m_lumaFilter + rand_val), pixel_buff, rand_srcStride, (pixel*)IPF_vec_output,
+                                      rand_dstStride, rand_height, rand_width, BIT_DEPTH);
+            t->Stop();
+            printf("\nfilter[%s]\tVec: (%1.2f ms) ", FilterConf_names[value], t->ElapsedMS());
+
+            t->Start();
+            for (int j = 0; j < FILTER_ITERATIONS; j++)
+                ref.filter[value]((short*)(m_lumaFilter + rand_val), pixel_buff, rand_srcStride, (pixel*)IPF_vec_output,
+                                    rand_dstStride, rand_height, rand_width, BIT_DEPTH);
+            t->Stop();
+            printf("\tC: (%1.2f ms) ", t->ElapsedMS());
+        }
+    }
+
+    t->Release();
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/source/test/filterharness.h	Tue Apr 16 10:29:42 2013 +0530
@@ -0,0 +1,53 @@
+/*****************************************************************************
+ * Copyright (C) 2013 x265 project
+ *
+ * Authors: Steve Borho <steve@borho.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing@multicorewareinc.com.
+ *****************************************************************************/
+
+#ifndef _FILTERHARNESS_H_1
+#define _FILTERHARNESS_H_1 1
+
+#include "testharness.h"
+#include "primitives.h"
+
+class FilterHarness : public TestHarness
+{
+protected:
+
+    pixel *pixel_buff;
+
+    short *IPF_vec_output, *IPF_C_output;
+
+    int ipf_t_size;
+
+    bool check_IPFilter_primitive(x265::IPFilter ref, x265::IPFilter opt);
+
+public:
+
+    FilterHarness();
+
+    virtual ~FilterHarness();
+
+    bool testCorrectness(const x265::EncoderPrimitives& ref, const x265::EncoderPrimitives& opt);
+
+    void measureSpeed(const x265::EncoderPrimitives& ref, const x265::EncoderPrimitives& opt);
+};
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/source/test/mbdstharness.cpp	Tue Apr 16 10:29:42 2013 +0530
@@ -0,0 +1,113 @@
+/*****************************************************************************
+ * Copyright (C) 2013 x265 project
+ *
+ * Authors: Steve Borho <steve@borho.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing@multicorewareinc.com.
+ *****************************************************************************/
+
+#include "mbdstharness.h"
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+
+using namespace x265;
+
+MBDstHarness::MBDstHarness()
+{
+    mb_t_size = 32;
+
+    mbuf1 = (short*)malloc(0x1e00 * sizeof(pixel));
+    mbuf2 = (short*)malloc(mb_t_size);
+    mbuf3 = (short*)malloc(mb_t_size);
+    if (!mbuf1 || !mbuf2 || !mbuf3)
+    {
+        fprintf(stderr, "malloc failed, unable to initiate tests!\n");
+        exit(1);
+    }
+
+    for (int i = 0; i < 64*100; i++)
+        mbuf1[i] = rand() & PIXEL_MAX;
+    memset(mbuf2, 0, mb_t_size);
+    memset(mbuf3, 0, mb_t_size);
+}
+
+MBDstHarness::~MBDstHarness()
+{
+    free(mbuf1);
+    free(mbuf2);
+    free(mbuf3);
+}
+
+bool MBDstHarness::check_mbdst_primitive(mbdst ref, mbdst opt)
+{
+    int j = 0;
+
+    for (int i = 0; i <= 100; i++)
+    {
+        opt(mbuf1 + j, mbuf2, 16);
+        ref(mbuf1 + j, mbuf3, 16);
+
+        if (memcmp(mbuf2, mbuf3, mb_t_size))
+            return false;
+
+        j += 16;
+        memset(mbuf2, 0, mb_t_size);
+        memset(mbuf3, 0, mb_t_size);
+    }
+
+    return true;
+}
+
+bool MBDstHarness::testCorrectness( const EncoderPrimitives& ref, const EncoderPrimitives& opt )
+{
+    if (opt.inversedst)
+    {
+        if (!check_mbdst_primitive(ref.inversedst, opt.inversedst))
+        {
+            printf("Inversedst: Failed!\n");
+            return false;
+        }
+    }
+
+    return true;
+}
+
+#define MBDST_ITERATIONS 4000000
+
+void MBDstHarness::measureSpeed( const EncoderPrimitives& ref, const EncoderPrimitives& opt )
+{
+    Timer *t = Timer::CreateTimer();
+
+    if (opt.inversedst)
+    {
+        t->Start();
+        for (int j = 0; j < MBDST_ITERATIONS; j++)
+            opt.inversedst(mbuf1, mbuf2, 16);
+        t->Stop();
+        printf("\nInverseDST\tVec: (%1.2f ms) ", t->ElapsedMS());
+
+        t->Start();
+        for (int j = 0; j < MBDST_ITERATIONS; j++)
+            ref.inversedst(mbuf1, mbuf2, 16);
+        t->Stop();
+        printf("\tC: (%1.2f ms) ", t->ElapsedMS());
+    }
+
+    t->Release();
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/source/test/mbdstharness.h	Tue Apr 16 10:29:42 2013 +0530
@@ -0,0 +1,50 @@
+/*****************************************************************************
+ * Copyright (C) 2013 x265 project
+ *
+ * Authors: Steve Borho <steve@borho.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing@multicorewareinc.com.
+ *****************************************************************************/
+
+#ifndef _MBDSTHARNESS_H_1
+#define _MBDSTHARNESS_H_1 1
+
+#include "testharness.h"
+#include "primitives.h"
+
+class MBDstHarness : public TestHarness
+{
+protected:
+
+    short *mbuf1, *mbuf2, *mbuf3;
+
+    int mb_t_size;
+
+    bool check_mbdst_primitive(x265::mbdst ref, x265::mbdst opt);
+
+public:
+    MBDstHarness();
+
+    virtual ~MBDstHarness();
+
+    bool testCorrectness(const x265::EncoderPrimitives& ref, const x265::EncoderPrimitives& opt);
+
+    void measureSpeed(const x265::EncoderPrimitives& ref, const x265::EncoderPrimitives& opt);
+};
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/source/test/pixelharness.cpp	Tue Apr 16 10:29:42 2013 +0530
@@ -0,0 +1,205 @@
+/*****************************************************************************
+ * Copyright (C) 2013 x265 project
+ *
+ * Authors: Steve Borho <steve@borho.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing@multicorewareinc.com.
+ *****************************************************************************/
+
+#include "pixelharness.h"
+#include "primitives.h"
+#include <stdlib.h>
+#include <stdio.h>
+
+using namespace x265;
+
+// Initialize the Func Names for all the Pixel Comp
+static const char *FuncNames[NUM_PARTITIONS] =
+{
+    "4x4", "8x4", "4x8", "8x8", "4x16", "16x4", "8x16", "16x8", "16x16",
+    "4x32", "32x4", "8x32", "32x8", "16x32", "32x16", "32x32", "4x64",
+    "64x4", "8x64", "64x8", "16x64", "64x16", "32x64", "64x32", "64x64"
+};
+
+#if HIGH_BIT_DEPTH
+#define BIT_DEPTH 10
+#else
+#define BIT_DEPTH 8
+#endif
+
+#define PIXEL_MAX ((1 << BIT_DEPTH) - 1)
+
+#define PIXELCMP_ITERATIONS 2000000
+
+#define INCR 16
+#define STRIDE 16
+
+PixelHarness::PixelHarness()
+{
+    pbuf1 = (pixel*)malloc(0x1e00 * sizeof(pixel));
+    pbuf2 = (pixel*)malloc(0x1e00 * sizeof(pixel));
+    if (!pbuf1 || !pbuf2)
+    {
+        fprintf(stderr, "malloc failed, unable to initiate tests!\n");
+        exit(1);
+    }
+
+    for (int i = 0; i < 0x1e00; i++)
+    {
+        //Generate the Random Buffer for Testing
+        pbuf1[i] = rand() & PIXEL_MAX;
+        pbuf2[i] = rand() & PIXEL_MAX;
+    }
+}
+
+PixelHarness::~PixelHarness()
+{
+    free(pbuf1);
+    free(pbuf2);
+}
+
+bool PixelHarness::check_pixel_primitive(pixelcmp ref, pixelcmp opt)
+{
+    int j = 0;
+
+    for (int i = 0; i <= 100; i++)
+    {
+        int vres = opt(pbuf1, STRIDE, pbuf2 + j, STRIDE);
+        int cres = ref(pbuf1, STRIDE, pbuf2 + j, STRIDE);
+        if (vres != cres)
+            return false;
+
+        j += INCR;
+    }
+
+    return true;
+}
+
+bool PixelHarness::testCorrectness( const EncoderPrimitives& ref, const EncoderPrimitives& opt )
+{
+   for (uint16_t curpar = 0; curpar < NUM_PARTITIONS; curpar++)
+   {
+        if (opt.satd[curpar])
+        {
+            if (!check_pixel_primitive(ref.satd[curpar], opt.satd[curpar]))
+            {
+                printf("satd[%s]: failed!\n", FuncNames[curpar]);
+                return false;
+            }
+        }
+
+        if (opt.sad[curpar])
+        {
+            if (!check_pixel_primitive(ref.sad[curpar], opt.sad[curpar]))
+            {
+                printf("sad[%s]: failed!\n", FuncNames[curpar]);
+                return false;
+            }
+        }
+    }
+
+    if (opt.sa8d_8x8)
+    {
+        if (!check_pixel_primitive(ref.sa8d_8x8, opt.sa8d_8x8))
+        {
+            printf("sa8d_8x8: failed!\n");
+            return false;
+        }
+    }
+
+    if (opt.sa8d_16x16)
+    {
+        if (!check_pixel_primitive(ref.sa8d_16x16, opt.sa8d_16x16))
+        {
+            printf("sa8d_16x16: failed!\n");
+            return false;
+        }
+    }
+
+    return true;
+}
+
+void PixelHarness::measureSpeed(const EncoderPrimitives& ref, const EncoderPrimitives& opt)
+{
+    Timer *t = Timer::CreateTimer();
+
+    for (int curpar = 0; curpar < NUM_PARTITIONS; curpar++)
+    {
+        if (opt.satd[curpar])
+        {
+            t->Start();
+            for (int j = 0; j < PIXELCMP_ITERATIONS; j++)
+                opt.satd[curpar](pbuf1, STRIDE, pbuf2, STRIDE);
+            t->Stop();
+            printf("\nsatd[%s]\tVec: (%1.2f ms) ", FuncNames[curpar], t->ElapsedMS());
+
+            t->Start();
+            for (int j = 0; j < PIXELCMP_ITERATIONS; j++)
+                ref.satd[curpar](pbuf1, STRIDE, pbuf2, STRIDE);
+            t->Stop();
+            printf("\tC: (%1.2f ms) ", t->ElapsedMS());
+        }
+
+        if (opt.sad[curpar])
+        {
+            t->Start();
+            for (int j = 0; j < PIXELCMP_ITERATIONS; j++)
+                opt.sad[curpar](pbuf1, STRIDE, pbuf2, STRIDE);
+            t->Stop();
+            printf("\nsad[%s]\tVec: (%1.2f ms) ", FuncNames[curpar], t->ElapsedMS());
+
+            t->Start();
+            for (int j = 0; j < PIXELCMP_ITERATIONS; j++)
+                ref.sad[curpar](pbuf1, STRIDE, pbuf2, STRIDE);
+            t->Stop();
+            printf("\tC: (%1.2f ms) ", t->ElapsedMS());
+        }
+    }
+
+    if (opt.sa8d_8x8)
+    {
+        t->Start();
+        for (int j = 0; j < PIXELCMP_ITERATIONS; j++)
+            opt.sa8d_8x8(pbuf1, STRIDE, pbuf2, STRIDE);
+        t->Stop();
+        printf("\nsa8d_8x8\tVec: (%1.2f ms) ", t->ElapsedMS());
+
+        t->Start();
+        for (int j = 0; j < PIXELCMP_ITERATIONS; j++)
+            ref.sa8d_8x8(pbuf1, STRIDE, pbuf2, STRIDE);
+        t->Stop();
+        printf("\tC: (%1.2f ms) ", t->ElapsedMS());
+    }
+
+    if (opt.sa8d_16x16)
+    {
+        t->Start();
+        for (int j = 0; j < PIXELCMP_ITERATIONS; j++)
+            opt.sa8d_16x16(pbuf1, STRIDE, pbuf2, STRIDE);
+        t->Stop();
+        printf("\nsa8d_16x16\tVec: (%1.2f ms) ", t->ElapsedMS());
+
+        t->Start();
+        for (int j = 0; j < PIXELCMP_ITERATIONS; j++)
+            ref.sa8d_16x16(pbuf1, STRIDE, pbuf2, STRIDE);
+        t->Stop();
+        printf("\tC: (%1.2f ms) ", t->ElapsedMS());
+    }
+
+    t->Release();
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/source/test/pixelharness.h	Tue Apr 16 10:29:42 2013 +0530
@@ -0,0 +1,48 @@
+/*****************************************************************************
+ * Copyright (C) 2013 x265 project
+ *
+ * Authors: Steve Borho <steve@borho.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing@multicorewareinc.com.
+ *****************************************************************************/
+
+#ifndef _PIXELHARNESS_H_1
+#define _PIXELHARNESS_H_1 1
+
+#include "testharness.h"
+#include "primitives.h"
+
+class PixelHarness : public TestHarness
+{
+protected:
+
+    pixel *pbuf1, *pbuf2;
+
+    bool check_pixel_primitive(x265::pixelcmp ref, x265::pixelcmp opt);
+
+public:
+    PixelHarness();
+
+    virtual ~PixelHarness();
+
+    bool testCorrectness(const x265::EncoderPrimitives& ref, const x265::EncoderPrimitives& opt);
+
+    void measureSpeed(const x265::EncoderPrimitives& ref, const x265::EncoderPrimitives& opt );
+};
+
+#endif
--- a/source/test/testbench.cpp	Mon Apr 15 17:43:29 2013 +0530
+++ b/source/test/testbench.cpp	Tue Apr 16 10:29:42 2013 +0530
@@ -23,577 +23,20 @@
  * For more information, contact us at licensing@multicorewareinc.com.
  *****************************************************************************/
 
-#include "unittest.h"
 #include "primitives.h"
-#include <time.h>
-#include <iostream>
-#include <ctype.h>
-#include <sys/types.h>
-#include <sys/timeb.h>
-#include <time.h>
+#include "pixelharness.h"
+#include "filterharness.h"
+#include "mbdstharness.h"
+
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <math.h>
-
-// Code snippet from http://www.winehq.org/pipermail/wine-devel/2003-June/018082.html begins
-// this part is windows implementation of Gettimeoffday() function
-
-#ifndef _TIMEVAL_H
-#define _TIMEVAL_H
-
-#ifdef _WIN32
-
-#define WIN32_LEAN_AND_MEAN
-#include <winsock2.h>
 #include <time.h>
 
-#if defined(_MSC_VER) || defined(__BORLANDC__)
-#define EPOCHFILETIME (116444736000000000i64)
-#else
-#define EPOCHFILETIME (116444736000000000LL)
-#endif
-__inline int gettimeofday(struct timeval *tv,  struct timezone *tz)
-{
-    FILETIME        ft;
-    LARGE_INTEGER   li;
-    __int64         t;
-
-    if (tv)
-    {
-        GetSystemTimeAsFileTime(&ft);
-        li.LowPart  = ft.dwLowDateTime;
-        li.HighPart = ft.dwHighDateTime;
-        t  = li.QuadPart;       /* In 100-nanosecond intervals */
-        t -= EPOCHFILETIME;     /* Offset to the Epoch time */
-        t /= 10;                /* In microseconds */
-        tv->tv_sec  = (long)(t / 1000000);
-        tv->tv_usec = (long)(t % 1000000);
-    }
-
-    return 0;
-}
-
-#else  /* _WIN32 */
-
-#include <sys/time.h>
-
-#endif /* _WIN32 */
-
-#endif /* _TIMEVAL_H */
-
-// Code snippet from http://www.winehq.org/pipermail/wine-devel/2003-June/018082.html ends
-
 using namespace x265;
 
-/* Used for filter */
-#define IF_INTERNAL_PREC 14 ///< Number of bits for internal precision
-#define IF_FILTER_PREC    6 ///< Log2 of sum of filter taps
-#define IF_INTERNAL_OFFS (1 << (IF_INTERNAL_PREC - 1)) ///< Offset used internally
-#define NTAPS_LUMA       8 ///< Number of taps for luma
-const short m_lumaFilter[4][NTAPS_LUMA] =
-{
-{
-    0, 0,   0, 64,  0,   0, 0,  0
-},
-{
-    -1, 4, -10, 58, 17,  -5, 1,  0
-},
-{
-    -1, 4, -11, 40, 40, -11, 4, -1
-},
-{
-    0, 1,  -5, 17, 58, -10, 4, -1
-}
-};
-char FilterConf_names[16][40] =
-{
-    //Naming convention used is - isVertical_N_isFirst_isLast
-    "Hor_N=4_isFirst=0_isLast=0",
-    "Hor_N=4_isFirst=0_isLast=1",
-    "Hor_N=4_isFirst=1_isLast=0",
-    "Hor_N=4_isFirst=1_isLast=1",
-
-    "Hor_N=8_isFirst=0_isLast=0",
-    "Hor_N=8_isFirst=0_isLast=1",
-    "Hor_N=8_isFirst=1_isLast=0",
-    "Hor_N=8_isFirst=1_isLast=1",
-
-    "Ver_N=4_isFirst=0_isLast=0",
-    "Ver_N=4_isFirst=0_isLast=1",
-    "Ver_N=4_isFirst=1_isLast=0",
-    "Ver_N=4_isFirst=1_isLast=1",
-
-    "Ver_N=8_isFirst=0_isLast=0",
-    "Ver_N=8_isFirst=0_isLast=1",
-    "Ver_N=8_isFirst=1_isLast=0",
-    "Ver_N=8_isFirst=1_isLast=1"
-};
-pixel *pixel_buff;
-short *IPF_vec_output, *IPF_C_output;
-int t_size;
-
-/* pbuf1, pbuf2: initialized to random pixel data and shouldn't write into them. */
-pixel *pbuf1, *pbuf2;
-short *mbuf1, *mbuf2, *mbuf3;
-#define BENCH_ALIGNS 16
-
-// Initialize the Func Names for all the Pixel Comp
-static const char *FuncNames[NUM_PARTITIONS] =
-{
-    "4x4", "8x4", "4x8", "8x8", "4x16", "16x4", "8x16", "16x8", "16x16", "4x32", "32x4", "8x32",
-    "32x8", "16x32", "32x16", "32x32", "4x64", "64x4", "8x64", "64x8", "16x64", "64x16", "32x64", "64x32", "64x64"
-};
-
-#if HIGH_BIT_DEPTH
-#define BIT_DEPTH 10
-#else
-#define BIT_DEPTH 8
-#endif
-
-#define PIXEL_MAX ((1 << BIT_DEPTH) - 1)
-
-/* To-do List: Generate the stride values at run time in each run
- *
- */
-
-#define MILSECS_IN_SEC 1000     // Number of milliseconds in a second
-#define NUM_ITERATIONS_CYCLE 10000000    // Number of iterations for cycle count
-#define INCR 16     // Number of bytes the input window shifts across the total buffer
-#define STRIDE 16   // Stride value used while calling primitives
-
-static double timevaldiff(struct timeval *starttime, struct timeval *finishtime)
-{
-    double msec;
-
-    msec =  (finishtime->tv_sec - starttime->tv_sec) * MILSECS_IN_SEC;
-    msec += (double)(finishtime->tv_usec - starttime->tv_usec) / MILSECS_IN_SEC;
-    return msec;
-}
-
-static void check_cycle_count(const EncoderPrimitives& cprim, const EncoderPrimitives& vecprim)
-{
-    struct timeval ts, te;
-
-    for (int curpar = 0; curpar < NUM_PARTITIONS; curpar++)
-    {
-        if (vecprim.satd[curpar])
-        {
-            gettimeofday(&ts, NULL);
-            for (int j = 0; j < NUM_ITERATIONS_CYCLE; j++)
-            {
-                vecprim.satd[curpar](pbuf1, STRIDE, pbuf2, STRIDE);
-            }
-
-            gettimeofday(&te, NULL);
-            printf("\nsatd[%s] vectorized primitive: (%1.4f ms) ", FuncNames[curpar], timevaldiff(&ts, &te));
-
-            gettimeofday(&ts, NULL);
-            for (int j = 0; j < NUM_ITERATIONS_CYCLE; j++)
-            {
-                cprim.satd[curpar](pbuf1, STRIDE, pbuf2, STRIDE);
-            }
-
-            gettimeofday(&te, NULL);
-            printf("\tC primitive: (%1.4f ms) ", timevaldiff(&ts, &te));
-        }
-
-        if (vecprim.sad[curpar])
-        {
-            gettimeofday(&ts, NULL);
-            for (int j = 0; j < NUM_ITERATIONS_CYCLE; j++)
-            {
-                vecprim.sad[curpar](pbuf1, STRIDE, pbuf2, STRIDE);
-            }
-
-            gettimeofday(&te, NULL);
-            printf("\nsad[%s] vectorized primitive: (%1.4f ms) ", FuncNames[curpar], timevaldiff(&ts, &te));
-
-            gettimeofday(&ts, NULL);
-            for (int j = 0; j < NUM_ITERATIONS_CYCLE; j++)
-            {
-                cprim.sad[curpar](pbuf1, STRIDE, pbuf2, STRIDE);
-            }
-
-            gettimeofday(&te, NULL);
-            printf("\tC primitive: (%1.4f ms) ", timevaldiff(&ts, &te));
-        }
-    }
-
-    if (vecprim.sa8d_8x8)
-    {
-        gettimeofday(&ts, NULL);
-        for (int j = 0; j < NUM_ITERATIONS_CYCLE; j++)
-        {
-            vecprim.sa8d_8x8(pbuf1, STRIDE, pbuf2, STRIDE);
-        }
-
-        gettimeofday(&te, NULL);
-        printf("\nsa8d_8x8 vectorized primitive: (%1.4f ms) ", timevaldiff(&ts, &te));
-
-        gettimeofday(&ts, NULL);
-        for (int j = 0; j < NUM_ITERATIONS_CYCLE; j++)
-        {
-            cprim.sa8d_8x8(pbuf1, STRIDE, pbuf2, STRIDE);
-        }
-
-        gettimeofday(&te, NULL);
-        printf("\tC primitive: (%1.4f ms) ", timevaldiff(&ts, &te));
-    }
-
-    if (vecprim.sa8d_16x16)
-    {
-        gettimeofday(&ts, NULL);
-        for (int j = 0; j < NUM_ITERATIONS_CYCLE; j++)
-        {
-            vecprim.sa8d_16x16(pbuf1, STRIDE, pbuf2, STRIDE);
-        }
-
-        gettimeofday(&te, NULL);
-        printf("\nsa8d_16x16 vectorized primitive: (%1.4f ms) ", timevaldiff(&ts, &te));
-
-        gettimeofday(&ts, NULL);
-        for (int j = 0; j < NUM_ITERATIONS_CYCLE; j++)
-        {
-            cprim.sa8d_16x16(pbuf1, STRIDE, pbuf2, STRIDE);
-        }
-
-        gettimeofday(&te, NULL);
-        printf("\tC primitive: (%1.4f ms) ", timevaldiff(&ts, &te));
-    }
-
-    if (vecprim.inversedst)
-    {
-        gettimeofday(&ts, NULL);
-        for (int j = 0; j < NUM_ITERATIONS_CYCLE; j++)
-        {
-            vecprim.inversedst(mbuf1, mbuf2, 16);
-        }
-
-        gettimeofday(&te, NULL);
-        printf("\nInversedst vectorized primitive: (%1.4f ms) ", timevaldiff(&ts, &te));
-
-        gettimeofday(&ts, NULL);
-        for (int j = 0; j < NUM_ITERATIONS_CYCLE; j++)
-        {
-            cprim.inversedst(mbuf1, mbuf2, 16);
-        }
-
-        gettimeofday(&te, NULL);
-        printf("\tC primitive: (%1.4f ms) ", timevaldiff(&ts, &te));
-    }
-
-    /* Add logic here for testing performance of your new primitive*/
-    int rand_height = rand() % 100;                 // Randomly generated Height
-    int rand_width = rand() % 100;                  // Randomly generated Width
-    short rand_val, rand_srcStride, rand_dstStride;
-
-    rand_val = rand() % 24;                     // Random offset in the filter
-    rand_srcStride = rand() % 100;              // Randomly generated srcStride
-    rand_dstStride = rand() % 100;              // Randomly generated dstStride
-
-    for (int value = 4; value < 8; value++)
-    {
-        memset(IPF_vec_output, 0, t_size);      // Initialize output buffer to zero
-        memset(IPF_C_output, 0, t_size);        // Initialize output buffer to zero
-        if (vecprim.filter[value])
-        {
-            gettimeofday(&ts, NULL);
-            for (int j = 0; j < NUM_ITERATIONS_CYCLE; j++)
-            {
-                vecprim.filter[value]((short*)(m_lumaFilter + rand_val), pixel_buff, rand_srcStride, (pixel*)IPF_vec_output,
-                                      rand_dstStride, rand_height, rand_width, BIT_DEPTH);
-            }
-
-            gettimeofday(&te, NULL);
-            printf("\nfilter[%s] vectorized primitive: (%1.4f ms) ", FilterConf_names[value], timevaldiff(&ts, &te));
-
-            gettimeofday(&ts, NULL);
-            for (int j = 0; j < NUM_ITERATIONS_CYCLE; j++)
-            {
-                cprim.filter[value]((short*)(m_lumaFilter + rand_val), pixel_buff, rand_srcStride, (pixel*)IPF_vec_output,
-                                    rand_dstStride, rand_height, rand_width, BIT_DEPTH);
-            }
-
-            gettimeofday(&te, NULL);
-            printf("\tC primitive: (%1.4f ms) ", timevaldiff(&ts, &te));
-        }
-    }
-}
-
-static int check_pixel_primitive(pixelcmp ref, pixelcmp opt)
-{
-    int j = 0;
-
-    for (int i = 0; i <= 100; i++)
-    {
-        int vres = opt(pbuf1, STRIDE, pbuf2 + j, STRIDE);
-        int cres = ref(pbuf1, STRIDE, pbuf2 + j, STRIDE);
-        if (vres != cres)
-            return -1;
-
-        j += INCR;
-    }
-
-    return 0;
-}
-
-//Find the Output Comp and Cycle count
-static int check_mbdst_primitive(mbdst ref, mbdst opt)
-{
-    int j = 0;
-    int t_size = 32;
-
-    for (int i = 0; i <= 100; i++)
-    {
-        opt(mbuf1 + j, mbuf2, 16);
-        ref(mbuf1 + j, mbuf3, 16);
-
-        if (memcmp(mbuf2, mbuf3, 32))
-            return -1;
-
-        j += INCR;
-        memset(mbuf2, 0, t_size);
-        memset(mbuf3, 0, t_size);
-    }
-
-    return 0;
-}
-
-static int check_IPFilter_primitive(IPFilter ref, IPFilter opt)
-{
-    int rand_height = rand() & 100;                 // Randomly generated Height
-    int rand_width = rand() & 100;                  // Randomly generated Width
-    int flag = 0;                                   // Return value
-    short rand_val, rand_srcStride, rand_dstStride;
-
-    for (int i = 0; i <= 100; i++)
-    {
-        memset(IPF_vec_output, 0, t_size);          // Initialize output buffer to zero
-        memset(IPF_C_output, 0, t_size);            // Initialize output buffer to zero
-
-        rand_val = rand() & 24;                     // Random offset in the filter
-        rand_srcStride = rand() & 100;              // Randomly generated srcStride
-        rand_dstStride = rand() & 100;              // Randomly generated dstStride
-
-        opt((short*)(m_lumaFilter + rand_val),
-            pixel_buff,
-            rand_srcStride,
-            (pixel*)IPF_vec_output,
-            rand_dstStride,
-            rand_height,
-            rand_width,
-            BIT_DEPTH);
-        ref((short*)(m_lumaFilter + rand_val),
-            pixel_buff,
-            rand_srcStride,
-            (pixel*)IPF_C_output,
-            rand_dstStride,
-            rand_height,
-            rand_width,
-            BIT_DEPTH);
-
-        if (memcmp(IPF_vec_output, IPF_C_output, t_size))
-        {
-            flag = -1;                                          // Test Failed
-            break;
-        }
-    }
-
-    return flag;
-}
-
-int init_pixelcmp_buffers()
-{
-    pbuf1 = (pixel*)malloc(0x1e00 * sizeof(pixel) + 16 * BENCH_ALIGNS);
-    pbuf2 = (pixel*)malloc(0x1e00 * sizeof(pixel) + 16 * BENCH_ALIGNS);
-    if (!pbuf1 || !pbuf2)
-    {
-        fprintf(stderr, "malloc failed, unable to initiate tests!\n");
-        return -1;
-    }
-
-    for (int i = 0; i < 0x1e00; i++)
-    {
-        //Generate the Random Buffer for Testing
-        pbuf1[i] = rand() & PIXEL_MAX;
-        pbuf2[i] = rand() & PIXEL_MAX;
-    }
-
-    return 0;
-}
-
-int init_IPFilter_buffers()
-{
-    t_size = 200 * 200;
-    pixel_buff = (pixel*)malloc(t_size * sizeof(pixel));     // Assuming max_height = max_width = max_srcStride = max_dstStride = 100
-    IPF_vec_output = (short*)malloc(t_size * sizeof(short));      // Output Buffer1
-    IPF_C_output = (short*)malloc(t_size * sizeof(short));      // Output Buffer2
-
-    if (!pixel_buff || !IPF_vec_output || !IPF_C_output)
-    {
-        fprintf(stderr, "init_IPFilter_buffers: malloc failed, unable to initiate tests!\n");
-        return -1;
-    }
-
-    for (int i = 0; i < t_size; i++)                                    // Initialize input buffer
-    {
-        int isPositive = rand() & 1;                                    // To randomly generate Positive and Negative values
-        isPositive = (isPositive) ? 1 : -1;
-        pixel_buff[i] = isPositive * (rand() & PIXEL_MAX);
-    }
-
-    return 0;
-}
-
-int clean_pixelcmp_buffers()
-{
-    free(pbuf1);
-    free(pbuf2);
-    return 0;
-}
-
-int clean_IPFilter_buffers()
-{
-    free(IPF_vec_output);
-    free(IPF_C_output);
-    free(pixel_buff);
-    return 0;
-}
-
-int init_mbdst_buffers()
-{
-    int t_size = 32;
-
-    mbuf1 = (short*)malloc(0x1e00 * sizeof(pixel) + 16 * BENCH_ALIGNS);
-    mbuf2 = (short*)malloc(t_size);
-    mbuf3 = (short*)malloc(t_size);
-    if (!mbuf1 || !mbuf2 || !mbuf3)
-    {
-        fprintf(stderr, "malloc failed, unable to initiate tests!\n");
-        return -1;
-    }
-
-    memcpy(mbuf1, pbuf1, 64 * 100);
-    memset(mbuf2, 0, t_size);
-    memset(mbuf3, 0, t_size);
-    return 0;
-}
-
-int clean_mbdst_buffers()
-{
-    free(mbuf1);
-    free(mbuf2);
-    free(mbuf3);
-    return 0;
-}
-
-// test all implemented primitives
-static int check_all_primitives(const EncoderPrimitives& cprimitives, const EncoderPrimitives& vectorprimitives)
-{
-    uint16_t curpar = 0;
-
-    /****************** Initialise and run pixelcmp primitives **************************/
-
-    if (init_pixelcmp_buffers() < 0)
-        return -1;
-
-    for (; curpar < NUM_PARTITIONS; curpar++)
-    {
-        if (vectorprimitives.satd[curpar])
-        {
-            if (check_pixel_primitive(cprimitives.satd[curpar], vectorprimitives.satd[curpar]) < 0)
-            {
-                printf("satd[%s]: failed!\n", FuncNames[curpar]);
-                return -1;
-            }
-
-            printf("\nsatd[%s]: passed ", FuncNames[curpar]);
-        }
-
-        if (vectorprimitives.sad[curpar])
-        {
-            if (check_pixel_primitive(cprimitives.sad[curpar], vectorprimitives.sad[curpar]) < 0)
-            {
-                printf("sad[%s]: failed!\n", FuncNames[curpar]);
-                return -1;
-            }
-
-            printf("\nsad[%s]: passed ", FuncNames[curpar]);
-        }
-    }
-
-    if (vectorprimitives.sa8d_8x8)
-    {
-        if (check_pixel_primitive(cprimitives.sa8d_8x8, vectorprimitives.sa8d_8x8) < 0)
-        {
-            printf("sa8d_8x8: failed!\n");
-            return -1;
-        }
-
-        printf("\nsa8d_8x8: passed ");
-    }
-
-    if (vectorprimitives.sa8d_16x16)
-    {
-        if (check_pixel_primitive(cprimitives.sa8d_16x16, vectorprimitives.sa8d_16x16) < 0)
-        {
-            printf("sa8d_16x16: failed!\n");
-            return -1;
-        }
-
-        printf("\nsa8d_16x16: passed ");
-    }
-
-    /********** Run Filter Primitives *******************/
-    if (init_IPFilter_buffers() < 0)
-        return -1;
-
-    for (int value = 4; value < 8; value++)
-    {
-        if (vectorprimitives.filter[value])
-        {
-            if (check_IPFilter_primitive(cprimitives.filter[value], vectorprimitives.filter[value]) < 0)
-            {
-                printf("\nfilter: Failed!\n");
-                return -1;
-            }
-
-            printf("\nFilter[%s]: passed ", FilterConf_names[value]);
-        }
-    }
-
-    /********** Initialise and run mbdst Primitives *******************/
-
-    if (init_mbdst_buffers() < 0)
-        return -1;
-
-    if (vectorprimitives.inversedst)
-    {
-        if (check_mbdst_primitive(cprimitives.inversedst, vectorprimitives.inversedst) < 0)
-        {
-            printf("Inversedst: Failed!\n");
-            return -1;
-        }
-
-        printf("\nInversedst: passed ");
-    }
-
-    /* Initialise and check your primitives here **********/
-
-    /******************* Cycle count for all primitives **********************/
-    check_cycle_count(cprimitives, vectorprimitives);
-
-    /********************* Clean all buffers *****************************/
-    clean_pixelcmp_buffers();
-    clean_mbdst_buffers();
-    clean_IPFilter_buffers();
-    return 0;
-}
-
 int main(int argc, char *argv[])
 {
-    int ret = 0;
     int cpuid = CpuIDDetect();
 
     for (int i = 1; i < argc - 1; i += 2)
@@ -604,39 +47,74 @@ int main(int argc, char *argv[])
         }
     }
 
+    int seed = (int)time(NULL);
+    printf("Using random seed %X\n", seed);
+    srand(seed);
+
+    PixelHarness  HPixel;
+    FilterHarness HFilter;
+    MBDstHarness  HMBDist;
+
+    // To disable classes of tests, simply comment them out in this list
+    TestHarness *harness[] = {
+        &HPixel,
+        &HFilter,
+        &HMBDist
+    };
+
     EncoderPrimitives cprim;
     Setup_C_Primitives(cprim);
 
-    EncoderPrimitives vecprim;
-    memset(&vecprim, 0, sizeof(vecprim));
-
+    for (int i = 1; i <= cpuid; i++)
+    {
 #if ENABLE_VECTOR_PRIMITIVES
-    Setup_Vector_Primitives(vecprim, cpuid);
-    printf("Testing vector class primitives\n");
-    ret = check_all_primitives(cprim, vecprim);
-    if (ret)
-    {
-        fprintf(stderr, "x265: at least one vector primitive has failed. Go and fix that Right Now!\n");
-        return -1;
-    }
-
+        EncoderPrimitives vecprim;
+        memset(&vecprim, 0, sizeof(vecprim));
+        Setup_Vector_Primitives(vecprim, i);
+        printf("Testing vector class primitives: CPUID %d\n", i);
+        for (int h = 0; h < sizeof(harness)/sizeof(TestHarness*); h++)
+        {
+            if (!harness[h]->testCorrectness(cprim, vecprim))
+            {
+                fprintf(stderr, "\nx265: vector primitive has failed. Go and fix that Right Now!\n");
+                return -1;
+            }
+        }
 #endif
 
 #if ENABLE_ASM_PRIMITIVES
-    EncoderPrimitives asmprim;
-    memset(&asmprim, 0, sizeof(asmprim));
-    Setup_Assembly_Primitives(asmprim, cpuid);
-    printf("Testing assembly primitives\n");
-    ret = check_all_primitives(cprim, asmprim);
-    if (ret)
-    {
-        fprintf(stderr, "x265: at least one assembly primitive has failed. Go and fix that Right Now!\n");
-        return -1;
+        EncoderPrimitives asmprim;
+        memset(&asmprim, 0, sizeof(asmprim));
+        Setup_Assembly_Primitives(asmprim, i);
+        printf("Testing assembly primitives: CPUID %d\n", i);
+        for (int h = 0; h < sizeof(harness)/sizeof(TestHarness*); h++)
+        {
+            if (!harness[h]->testCorrectness(cprim, vecprim))
+            {
+                fprintf(stderr, "\nx265: ASM primitive has failed. Go and fix that Right Now!\n");
+                return -1;
+            }
+        }
+#endif // if ENABLE_ASM_PRIMITIVES
     }
+    fprintf(stderr, "\nx265: All tests passed Yeah :)\n");
 
-#endif // if ENABLE_ASM_PRIMITIVES
 
-    fprintf(stderr, "x265: All tests passed Yeah :)\n");
+    /******************* Cycle count for all primitives **********************/
+
+    EncoderPrimitives optprim;
+    memset(&optprim, 0, sizeof(optprim));
+#if ENABLE_VECTOR_PRIMITIVES
+    Setup_Vector_Primitives(optprim, cpuid);
+#endif
+#if ENABLE_ASM_PRIMITIVES
+    Setup_Assembly_Primitives(optprim, cpuid);
+#endif
+
+    for (int h = 0; h < sizeof(harness)/sizeof(TestHarness*); h++)
+        harness[h]->measureSpeed(cprim, optprim);
+
+    printf("\n");
 
     return 0;
 }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/source/test/testharness.h	Tue Apr 16 10:29:42 2013 +0530
@@ -0,0 +1,66 @@
+/*****************************************************************************
+ * Copyright (C) 2013 x265 project
+ *
+ * Authors: Steve Borho <steve@borho.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing@multicorewareinc.com.
+ *****************************************************************************/
+
+#ifndef _TESTHARNESS_H_
+#define _TESTHARNESS_H_ 1
+
+#include "primitives.h"
+
+#if HIGH_BIT_DEPTH
+#define BIT_DEPTH 10
+#else
+#define BIT_DEPTH 8
+#endif
+#define PIXEL_MAX ((1 << BIT_DEPTH) - 1)
+
+class TestHarness
+{
+public:
+    TestHarness() {}
+
+    virtual ~TestHarness() {}
+
+    virtual bool testCorrectness(const x265::EncoderPrimitives& ref, const x265::EncoderPrimitives& opt) = 0;
+
+    virtual void measureSpeed(const x265::EncoderPrimitives& ref, const x265::EncoderPrimitives& opt) = 0;
+};
+
+class Timer
+{
+public:
+    Timer() {}
+
+    virtual ~Timer() {}
+
+    static Timer *CreateTimer();
+
+    virtual void Start() = 0;
+
+    virtual void Stop() = 0;
+
+    virtual float ElapsedMS() = 0;
+
+    virtual void Release() = 0;
+};
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/source/test/timer.cpp	Tue Apr 16 10:29:42 2013 +0530
@@ -0,0 +1,115 @@
+/*****************************************************************************
+ * Copyright (C) 2013 x265 project
+ *
+ * Authors: Gopu Govindaswamy <gopu@govindaswamy.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing@multicorewareinc.com.
+ *****************************************************************************/
+
+#include "testharness.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/timeb.h>
+
+// Code snippet from http://www.winehq.org/pipermail/wine-devel/2003-June/018082.html begins
+// this part is windows implementation of Gettimeoffday() function
+
+#ifndef _TIMEVAL_H
+#ifdef _WIN32
+
+#define WIN32_LEAN_AND_MEAN
+#include <winsock2.h>
+
+#if defined(_MSC_VER)
+#define EPOCHFILETIME (116444736000000000i64)
+#else
+#define EPOCHFILETIME (116444736000000000LL)
+#endif
+
+int gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+    FILETIME        ft;
+    LARGE_INTEGER   li;
+    __int64         t;
+
+    if (tv)
+    {
+        GetSystemTimeAsFileTime(&ft);
+        li.LowPart  = ft.dwLowDateTime;
+        li.HighPart = ft.dwHighDateTime;
+        t  = li.QuadPart;       /* In 100-nanosecond intervals */
+        t -= EPOCHFILETIME;     /* Offset to the Epoch time */
+        t /= 10;                /* In microseconds */
+        tv->tv_sec  = (long)(t / 1000000);
+        tv->tv_usec = (long)(t % 1000000);
+    }
+
+    return 0;
+}
+
+#else  /* _WIN32 */
+
+#include <sys/time.h>
+
+#endif /* _WIN32 */
+#endif /* _TIMEVAL_H */
+
+// Code snippet from http://www.winehq.org/pipermail/wine-devel/2003-June/018082.html ends
+
+
+class TimerImpl : public Timer
+{
+protected:
+
+    timeval start, finish;
+
+public:
+
+    void Start();
+
+    void Stop();
+
+    float ElapsedMS();
+
+    void Release()      { delete this; }
+};
+
+Timer *Timer::CreateTimer()
+{
+    return new TimerImpl();
+}
+
+void TimerImpl::Start()
+{
+    gettimeofday(&start, NULL);
+}
+
+void TimerImpl::Stop()
+{
+    gettimeofday(&finish, NULL);
+}
+
+float TimerImpl::ElapsedMS()
+{
+    float msec = (finish.tv_sec - start.tv_sec) * 1000;
+    msec += (float)(finish.tv_usec - start.tv_usec) / 1000;
+    return msec;
+}