changeset 130:c4df9c66f793

Merged multicoreware/xhevc into default
author Deepthi Devaki Akkoorath <deepthidevaki@multicorewareinc.com>
date Wed, 27 Mar 2013 10:36:25 +0530
parents 921103b76c69 (current diff) a42d35cf40ea (diff)
children ffc0604b89a9
files source/encoder/TComRdCost_SSE.cpp
diffstat 6 files changed, 172 insertions(+-), 46 deletions(-) [+]
line wrap: on
line diff
--- a/build/README.txt	Tue Mar 26 17:56:10 2013 +0530
+++ b/build/README.txt	Wed Mar 27 10:36:25 2013 +0530
@@ -1,25 +1,9 @@
-To compile x265 you must first install cmake and then invoke
-cmake from here with the location of the root CMakeLists.txt
-file in the source folder.  In other words run:
-
-cmake ..\source
-
-cmake will generate Makefiles or Solution files as required
-by your default compiler.
-
-See the CMAKE documentation on how to override the default
-compiler selection and other configurables.
+Compiling x265 requires cmake to be installed to generate project
+or Makefiles for your preferred compiler.  See http://www.cmake.org
 
-
-MSYS example
+Most users simply can use the batch files in the compiler-specific
+sub-directories.
 
-To build x265 with GCC on Windows, you must install MinGW and
-select the MSYS developer environment install option.  Open
-an msys shell (MinGW/msys/1.0/msys.bat) and navigate to this
-build folder:
-
-$ cd /c/repos/x265/build
-$ cmake -G "MSYS Makefiles" ../source
-$ make
-
-You should now have an encoder.exe in build/App/TAppEncoder/
+If you wish to use a compiler that does not have a build folder, you can
+simply run: `cmake -G GENERATOR ../source`  See the CMAKE documentation
+for the list of supported generators
--- a/source/encoder/TComRdCost_SSE.cpp	Tue Mar 26 17:56:10 2013 +0530
+++ b/source/encoder/TComRdCost_SSE.cpp	Wed Mar 27 10:36:25 2013 +0530
@@ -487,6 +487,9 @@ UInt TComRdCost::xGetSAD64( DistParam* p
   return uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8);
 }
 
+#if _MSC_VER
+#pragma warning(disable: 4100)
+#endif
 UInt TComRdCost::xCalcHADs8x8( Pel *piOrg, Pel *piCur, Int iStrideOrg, Int iStrideCur, Int iStep )
 {
   Int  i, j, k, jj, sad=0;
--- a/source/encoder/threadpool.cpp	Tue Mar 26 17:56:10 2013 +0530
+++ b/source/encoder/threadpool.cpp	Wed Mar 27 10:36:25 2013 +0530
@@ -107,6 +107,8 @@ private:
 
 public:
 
+    static ThreadPoolImpl *instance;
+
     JobProvider *m_firstProvider;
     JobProvider *m_lastProvider;
 
@@ -120,7 +122,7 @@ public:
 
     ThreadPoolImpl *AddReference() { m_referenceCount++; return this; }
 
-    void Release() { if (--m_referenceCount == 0) delete this; }
+    void Release();
 
     bool IsValid() const { return m_ok; }
 
@@ -213,19 +215,31 @@ static int get_cpu_count()
 #endif
 }
 
+ThreadPoolImpl *ThreadPoolImpl::instance;
+
 /* static */
 ThreadPool *ThreadPool::AllocThreadPool(int numthreads)
 {
-    static ThreadPoolImpl *_impl;
-    if (_impl)
-        return _impl->AddReference();
-    _impl = new ThreadPoolImpl(numthreads);
-    return _impl;
+    if (ThreadPoolImpl::instance)
+        return ThreadPoolImpl::instance->AddReference();
+    ThreadPoolImpl::instance = new ThreadPoolImpl(numthreads);
+    return ThreadPoolImpl::instance;
+}
+
+void ThreadPoolImpl::Release()
+{ 
+    if (--m_referenceCount == 0)
+    {
+        assert(this == ThreadPoolImpl::instance);
+        ThreadPoolImpl::instance = NULL;
+        delete this;
+    }
 }
 
 ThreadPoolImpl::ThreadPoolImpl(int numThreads)
     : m_ok(false)
     , m_referenceCount(1)
+    , m_numThreads(numThreads)
     , m_firstProvider(NULL)
     , m_lastProvider(NULL)
     , m_idleThreadList(NULL)
@@ -331,7 +345,9 @@ bool QueueFrame::InitJobQueue( int numRo
 
     if (m_pool)
     {
-        m_queuedBitmap = new uint64_t[ (numRows + 63) >> 6 ];
+        m_numWords = (numRows + 63) >> 6;
+        m_queuedBitmap = new uint64_t[ m_numWords ];
+        memset((void*)m_queuedBitmap, 0, sizeof(uint64_t) * m_numWords);
         return m_queuedBitmap != NULL;
     }
 
@@ -357,19 +373,20 @@ void QueueFrame::EnqueueRow( int row )
 
 bool QueueFrame::FindJob()
 { // thread safe
-    for (int w = 0; w < ((m_numRows+63)>>6); w++)
+    for (int w = 0; w < m_numWords; w++)
     {
         while (m_queuedBitmap[w])
         {
             uint64_t word = m_queuedBitmap[w];
             if (word == 0) // race condition
                 break;
-            int bit = 64 - (int) CLZ64(word);
-            uint64_t mask = ~(1LL << bit);
+            int id = 63 - (int) CLZ64(word);
+            uint64_t bit = 1LL << id;
+            uint64_t mask = ~bit;
 
-            if (ATOMIC_AND(&m_queuedBitmap[w], mask) & (1LL << bit))
+            if (ATOMIC_AND(&m_queuedBitmap[w], mask) & bit)
             { // if the bit was actually flipped. process row, else try again
-                ProcessRow( w * 32 + bit );
+                ProcessRow( w * 64 + id );
                 return true;
             }
         }
--- a/source/encoder/threadpool.h	Tue Mar 26 17:56:10 2013 +0530
+++ b/source/encoder/threadpool.h	Wed Mar 27 10:36:25 2013 +0530
@@ -75,6 +75,8 @@ private:
     //< set and clear bits, for thread safety
     uint64_t volatile *m_queuedBitmap;
 
+    int m_numWords;
+
     int m_numRows;
 
     //< QueueFrame's internal implementation. Consults queuedBitmap and calls
--- a/source/test/CMakeLists.txt	Tue Mar 26 17:56:10 2013 +0530
+++ b/source/test/CMakeLists.txt	Wed Mar 27 10:36:25 2013 +0530
@@ -1,2 +1,2 @@
 add_executable(testpool testpool.cpp)
-target_link_libraries(testpool x265)
+target_link_libraries(testpool x265 HM)
--- a/source/test/testpool.cpp	Tue Mar 26 17:56:10 2013 +0530
+++ b/source/test/testpool.cpp	Wed Mar 27 10:36:25 2013 +0530
@@ -22,22 +22,30 @@
  *****************************************************************************/
 
 #include "threadpool.h"
+#include "threading.h"
 #include "libmd5/MD5.h"
+#include "PPA/ppa.h"
+
+#include <stdio.h>
+#include <time.h>
+#include <assert.h>
+#include <string.h>
 
 using namespace x265;
 
-typedef struct
+struct CUData
 {
+    CUData() { memset(digest, 0, sizeof(digest)); }
     unsigned char digest[16];
-}
-CUData;
+};
 
-typedef struct
+struct RowData
 {
+    RowData() : active(false), curCol(0) {}
+
     volatile bool active;
     volatile int  curCol;
-}
-RowData;
+};
 
 // Create a fake frame class with manufactured data in each CU block.  We
 // need to create an MD5 hash such that each CU's hash includes the hashes
@@ -53,23 +61,135 @@ private:
     RowData *row;
     int      numrows;
     int      numcols;
+    Event    complete;
 
 public:
 
     MD5Frame(ThreadPool* pool) : QueueFrame(pool), cu(0), row(0) {}
-    ~MD5Frame();
+    ~MD5Frame() { delete [] this->cu; delete [] this->row; }
 
     void Initialize( int cols, int rows );
 
     void Encode();
 
-    // called by worker threads
     void ProcessRow( int row );
 };
 
+void MD5Frame::Initialize( int cols, int rows )
+{
+    this->cu = new CUData[ rows * cols ];
+    this->row = new RowData[ rows ];
+    this->numrows = rows;
+    this->numcols = cols;
+
+    if (!this->QueueFrame::InitJobQueue( rows ))
+    {
+        assert(!"Unable to initialize job queue");
+    }
+}
+
+void MD5Frame::Encode()
+{
+    clock_t start = clock();
+
+    this->JobProvider::Enqueue();
+
+    this->QueueFrame::EnqueueRow(0);
+
+    this->complete.Wait();
+
+    this->JobProvider::Dequeue();
+
+    clock_t stop = clock();
+
+    unsigned char *outdigest = this->cu[this->numrows * this->numcols - 1].digest;
+
+    printf("%x %1.7fsec\n", outdigest, (float) (stop-start) / CLOCKS_PER_SEC);
+}
+
+void MD5Frame::ProcessRow( int rownum )
+{   // Called by worker thread
+    RowData &curRow = this->row[ rownum ];
+
+    do
+    {
+        int id = rownum * this->numcols + curRow.curCol;
+        CUData  &curCTU = this->cu[ id ];
+        MD5 hash;
+
+        // * Fake CTU processing *
+        PPAStartCpuEventFunc(encode_block)
+        memset(curCTU.digest, id, sizeof(curCTU.digest));
+        hash.update(curCTU.digest, sizeof(curCTU.digest));
+        if (curRow.curCol > 0)
+            hash.update(this->cu[id-1].digest, sizeof(curCTU.digest));
+        if (rownum > 0)
+        {
+            if (curRow.curCol > 0)
+                hash.update(this->cu[id-this->numcols-1].digest, sizeof(curCTU.digest));
+            hash.update(this->cu[id-this->numcols].digest, sizeof(curCTU.digest));
+            if (curRow.curCol < this->numcols-1)
+                hash.update(this->cu[id-this->numcols+1].digest, sizeof(curCTU.digest));
+        }
+        hash.finalize(curCTU.digest);
+        PPAStopCpuEventFunc(encode_block)
+
+        curRow.curCol++;
+        if (curRow.curCol > 2 && rownum < this->numrows-1)
+        {
+            if (this->row[rownum+1].active == 0)
+            {   // set active indicator so row is only enqueued once
+                // row stays marked active until blocked or done
+                this->row[rownum+1].active = 1;
+                this->QueueFrame::EnqueueRow(rownum+1);
+            }
+        }
+        if (rownum > 0 &&
+            curRow.curCol < this->numcols-1 &&
+            this->row[rownum-1].curCol < curRow.curCol+2)
+        {   // row is blocked, quit job
+            curRow.active = 0;
+            return;
+        }
+    }
+    while (curRow.curCol < this->numcols);
+
+    // * Row completed *
+
+    if (rownum == this->numrows-1)
+        this->complete.Trigger();
+}
+
 int main(int, char **)
 {
-    ThreadPool *pool = ThreadPool::AllocThreadPool(); // default size
+    PPA_INIT();
 
-    pool->Release();
+    {
+        ThreadPool *pool = ThreadPool::AllocThreadPool(1);
+        MD5Frame frame(pool);
+        frame.Initialize( 60, 40 );
+        frame.Encode();
+        pool->Release();
+    }
+    {
+        ThreadPool *pool = ThreadPool::AllocThreadPool(2);
+        MD5Frame frame(pool);
+        frame.Initialize( 60, 40 );
+        frame.Encode();
+        pool->Release();
+    }
+    {
+        ThreadPool *pool = ThreadPool::AllocThreadPool(4);
+        MD5Frame frame(pool);
+        frame.Initialize( 60, 40 );
+        frame.Encode();
+        pool->Release();
+    }
+    {
+        ThreadPool *pool = ThreadPool::AllocThreadPool(8);
+        MD5Frame frame(pool);
+        frame.Initialize( 60, 40 );
+        frame.Encode();
+        pool->Release();
+    }
 }