changeset 11898:7b7355a8e587 draft

Implementation of low-pass subband dct approximation.
author hribeiro <mont3z.claro5@gmail.com>
date Mon, 06 Nov 2017 09:54:56 +0530
parents 6ad93877ffe1
children aa9649a2aa8c
files doc/reST/cli.rst source/CMakeLists.txt source/common/CMakeLists.txt source/common/lowpassdct.cpp source/common/param.cpp source/common/primitives.cpp source/common/primitives.h source/x265.h source/x265cli.h
diffstat 9 files changed, 179 insertions(+-), 4 deletions(-) [+]
line wrap: on
line diff
--- a/doc/reST/cli.rst	Tue Oct 31 16:13:49 2017 +0530
+++ b/doc/reST/cli.rst	Mon Nov 06 09:54:56 2017 +0530
@@ -2142,6 +2142,18 @@ Bitstream options
 
 	Only effective at RD levels 5 and 6
 
+DCT Approximations
+=================
+
+.. option:: --lowpass-dct
+
+    If enabled, x265 will use low-pass truncated dct approximation instead of the
+    standard dct. This approximation is less computational intesive but it generates
+    truncated coefficient matrixes for the transformed block. Empirical analysis shows
+    this approximation gives good PSNR results for QP>=23. 
+
+    This approximation should be considered for platforms with performance and time 
+    constrains.
 
 Debugging options
 =================
--- a/source/CMakeLists.txt	Tue Oct 31 16:13:49 2017 +0530
+++ b/source/CMakeLists.txt	Mon Nov 06 09:54:56 2017 +0530
@@ -29,7 +29,7 @@ option(NATIVE_BUILD "Target the build CP
 option(STATIC_LINK_CRT "Statically link C runtime for release builds" OFF)
 mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD)
 # X265_BUILD must be incremented each time the public API is changed
-set(X265_BUILD 138)
+set(X265_BUILD 139)
 configure_file("${PROJECT_SOURCE_DIR}/x265.def.in"
                "${PROJECT_BINARY_DIR}/x265.def")
 configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in"
--- a/source/common/CMakeLists.txt	Tue Oct 31 16:13:49 2017 +0530
+++ b/source/common/CMakeLists.txt	Mon Nov 06 09:54:56 2017 +0530
@@ -131,7 +131,7 @@ endif(WIN32)
 add_library(common OBJECT
     ${ASM_PRIMITIVES} ${VEC_PRIMITIVES} ${ALTIVEC_PRIMITIVES} ${WINXP}
     primitives.cpp primitives.h
-    pixel.cpp dct.cpp ipfilter.cpp intrapred.cpp loopfilter.cpp
+    pixel.cpp dct.cpp lowpassdct.cpp ipfilter.cpp intrapred.cpp loopfilter.cpp
     constants.cpp constants.h
     cpu.cpp cpu.h version.cpp
     threading.cpp threading.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/source/common/lowpassdct.cpp	Mon Nov 06 09:54:56 2017 +0530
@@ -0,0 +1,127 @@
+/*****************************************************************************
+ * Copyright (C) 2017 
+ *
+ * Authors: Humberto Ribeiro Filho <mont3z.claro5@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at license @ x265.com.
+ *****************************************************************************/
+
+#include "common.h"
+#include "primitives.h"
+
+using namespace X265_NS;
+
+/* standard dct transformations */
+static dct_t* s_dct4x4;
+static dct_t* s_dct8x8;
+static dct_t* s_dct16x16;
+
+static void lowPassDct8_c(const int16_t* src, int16_t* dst, intptr_t srcStride)
+{
+    ALIGN_VAR_32(int16_t, coef[4 * 4]);
+    ALIGN_VAR_32(int16_t, avgBlock[4 * 4]);
+    int16_t totalSum = 0;
+    int16_t sum = 0;
+    
+    for (int i = 0; i < 4; i++)
+        for (int j =0; j < 4; j++)
+        {
+            // Calculate average of 2x2 cells
+            sum = src[2*i*srcStride + 2*j] + src[2*i*srcStride + 2*j + 1]
+                    + src[(2*i+1)*srcStride + 2*j] + src[(2*i+1)*srcStride + 2*j + 1];
+            avgBlock[i*4 + j] = sum >> 2;
+
+            totalSum += sum; // use to calculate total block average
+        }
+
+    //dct4
+    (*s_dct4x4)(avgBlock, coef, 4);
+    memset(dst, 0, 64 * sizeof(int16_t));
+    for (int i = 0; i < 4; i++)
+    {
+        memcpy(&dst[i * 8], &coef[i * 4], 4 * sizeof(int16_t));
+    }
+
+    // replace first coef with total block average
+    dst[0] = totalSum << 1;
+}
+
+static void lowPassDct16_c(const int16_t* src, int16_t* dst, intptr_t srcStride)
+{
+    ALIGN_VAR_32(int16_t, coef[8 * 8]);
+    ALIGN_VAR_32(int16_t, avgBlock[8 * 8]);
+    int32_t totalSum = 0;
+    int16_t sum = 0;
+    for (int i = 0; i < 8; i++)
+        for (int j =0; j < 8; j++)
+        {
+            sum = src[2*i*srcStride + 2*j] + src[2*i*srcStride + 2*j + 1]
+                    + src[(2*i+1)*srcStride + 2*j] + src[(2*i+1)*srcStride + 2*j + 1];
+            avgBlock[i*8 + j] = sum >> 2;
+
+            totalSum += sum;
+        }
+
+    (*s_dct8x8)(avgBlock, coef, 8);
+    memset(dst, 0, 256 * sizeof(int16_t));
+    for (int i = 0; i < 8; i++)
+    {
+        memcpy(&dst[i * 16], &coef[i * 8], 8 * sizeof(int16_t));
+    }
+    dst[0] = static_cast<int16_t>(totalSum >> 1);
+}
+
+static void lowPassDct32_c(const int16_t* src, int16_t* dst, intptr_t srcStride)
+{
+    ALIGN_VAR_32(int16_t, coef[16 * 16]);
+    ALIGN_VAR_32(int16_t, avgBlock[16 * 16]);
+    int32_t totalSum = 0;
+    int16_t sum = 0;
+    for (int i = 0; i < 16; i++)
+        for (int j =0; j < 16; j++)
+        {
+            sum = src[2*i*srcStride + 2*j] + src[2*i*srcStride + 2*j + 1]
+                    + src[(2*i+1)*srcStride + 2*j] + src[(2*i+1)*srcStride + 2*j + 1];
+            avgBlock[i*16 + j] = sum >> 2;
+
+            totalSum += sum;
+        }
+
+    (*s_dct16x16)(avgBlock, coef, 16);
+    memset(dst, 0, 1024 * sizeof(int16_t));
+    for (int i = 0; i < 16; i++)
+    {
+        memcpy(&dst[i * 32], &coef[i * 16], 16 * sizeof(int16_t));
+    }
+    dst[0] = static_cast<int16_t>(totalSum >> 3);
+}
+
+namespace X265_NS {
+// x265 private namespace
+
+void setupLowPassPrimitives_c(EncoderPrimitives& p)
+{
+    s_dct4x4 = &(p.cu[BLOCK_4x4].standard_dct);
+    s_dct8x8 = &(p.cu[BLOCK_8x8].standard_dct);
+    s_dct16x16 = &(p.cu[BLOCK_16x16].standard_dct);
+
+    p.cu[BLOCK_8x8].lowpass_dct = lowPassDct8_c;
+    p.cu[BLOCK_16x16].lowpass_dct = lowPassDct16_c;
+    p.cu[BLOCK_32x32].lowpass_dct = lowPassDct32_c;
+}
+}
--- a/source/common/param.cpp	Tue Oct 31 16:13:49 2017 +0530
+++ b/source/common/param.cpp	Mon Nov 06 09:54:56 2017 +0530
@@ -288,6 +288,9 @@ void x265_param_default(x265_param* para
     param->csvfpt = NULL;
     param->forceFlush = 0;
     param->bDisableLookahead = 0;
+
+    /* DCT Approximations */
+    param->bLowPassDct = 0;
 }
 
 int x265_param_default_preset(x265_param* param, const char* preset, const char* tune)
@@ -978,6 +981,7 @@ int x265_param_parse(x265_param* p, cons
         OPT("refine-mv")p->mvRefine = atobool(value);
         OPT("force-flush")p->forceFlush = atoi(value);
         OPT("splitrd-skip") p->bEnableSplitRdSkip = atobool(value);
+		OPT("lowpass-dct") p->bLowPassDct = atobool(value);
         else
             return X265_PARAM_BAD_NAME;
     }
@@ -1676,6 +1680,7 @@ char *x265_param2string(x265_param* p, i
     s += sprintf(s, " refine-mv=%d", p->mvRefine);
     BOOL(p->bLimitSAO, "limit-sao");
     s += sprintf(s, " ctu-info=%d", p->bCTUInfo);
+    BOOL(p->bLowPassDct, "lowpass-dct");
 #undef BOOL
     return buf;
 }
--- a/source/common/primitives.cpp	Tue Oct 31 16:13:49 2017 +0530
+++ b/source/common/primitives.cpp	Mon Nov 06 09:54:56 2017 +0530
@@ -58,11 +58,13 @@ void setupIntraPrimitives_c(EncoderPrimi
 void setupLoopFilterPrimitives_c(EncoderPrimitives &p);
 void setupSaoPrimitives_c(EncoderPrimitives &p);
 void setupSeaIntegralPrimitives_c(EncoderPrimitives &p);
+void setupLowPassPrimitives_c(EncoderPrimitives& p);
 
 void setupCPrimitives(EncoderPrimitives &p)
 {
     setupPixelPrimitives_c(p);      // pixel.cpp
     setupDCTPrimitives_c(p);        // dct.cpp
+    setupLowPassPrimitives_c(p);    // lowpassdct.cpp
     setupFilterPrimitives_c(p);     // ipfilter.cpp
     setupIntraPrimitives_c(p);      // intrapred.cpp
     setupLoopFilterPrimitives_c(p); // loopfilter.cpp
@@ -70,6 +72,19 @@ void setupCPrimitives(EncoderPrimitives 
     setupSeaIntegralPrimitives_c(p);  // framefilter.cpp
 }
 
+void enableLowpassDCTPrimitives(EncoderPrimitives &p)
+{
+    // update copies of the standard dct transform
+    p.cu[BLOCK_4x4].standard_dct = p.cu[BLOCK_4x4].dct;
+    p.cu[BLOCK_8x8].standard_dct = p.cu[BLOCK_8x8].dct;
+    p.cu[BLOCK_16x16].standard_dct = p.cu[BLOCK_16x16].dct;
+    p.cu[BLOCK_32x32].standard_dct = p.cu[BLOCK_32x32].dct;
+
+    // replace active dct by lowpass dct for high dct transforms
+    p.cu[BLOCK_16x16].dct = p.cu[BLOCK_16x16].lowpass_dct;
+    p.cu[BLOCK_32x32].dct = p.cu[BLOCK_32x32].lowpass_dct;
+}
+
 void setupAliasPrimitives(EncoderPrimitives &p)
 {
 #if HIGH_BIT_DEPTH
@@ -256,6 +271,11 @@ void x265_setup_primitives(x265_param *p
 #endif
 
         setupAliasPrimitives(primitives);
+
+        if (param->bLowPassDct && param->rc.qp > 20)
+        {
+            enableLowpassDCTPrimitives(primitives); 
+        }
     }
 
     x265_report_simd(param);
--- a/source/common/primitives.h	Tue Oct 31 16:13:49 2017 +0530
+++ b/source/common/primitives.h	Mon Nov 06 09:54:56 2017 +0530
@@ -259,8 +259,12 @@ struct EncoderPrimitives
      * primitives will leave 64x64 pointers NULL.  Indexed by LumaCU */
     struct CU
     {
-        dct_t           dct;
-        idct_t          idct;
+        dct_t           dct;    // active dct transformation
+        idct_t          idct;   // active idct transformation
+
+        dct_t           standard_dct;   // original dct function, used by lowpass_dct
+        dct_t           lowpass_dct;    // lowpass dct approximation
+
         calcresidual_t  calcresidual;
         pixel_sub_ps_t  sub_ps;
         pixel_add_ps_t  add_ps;
--- a/source/x265.h	Tue Oct 31 16:13:49 2017 +0530
+++ b/source/x265.h	Mon Nov 06 09:54:56 2017 +0530
@@ -1509,6 +1509,11 @@ typedef struct x265_param
 
     /* Disable lookahead */
     int       bDisableLookahead;
+
+    /* Use low-pass truncated dct approximation 
+    *  This DCT approximation is less computational intensive and gives results close to 
+    *  standard DCT for QP >= 23 */
+    int       bLowPassDct;
 } x265_param;
 
 /* x265_param_alloc:
--- a/source/x265cli.h	Tue Oct 31 16:13:49 2017 +0530
+++ b/source/x265cli.h	Mon Nov 06 09:54:56 2017 +0530
@@ -282,6 +282,7 @@ static const struct option long_options[
     { "force-flush",    required_argument, NULL, 0 },
     { "splitrd-skip",         no_argument, NULL, 0 },
     { "no-splitrd-skip",      no_argument, NULL, 0 },
+    { "lowpass-dct",          no_argument, NULL, 0 },
     { 0, 0, 0, 0 },
     { 0, 0, 0, 0 },
     { 0, 0, 0, 0 },
@@ -543,6 +544,7 @@ static void showHelp(x265_param *param)
     H1("-r/--recon <filename>            Reconstructed raw image YUV or Y4M output file name\n");
     H1("   --recon-depth <integer>       Bit-depth of reconstructed raw image file. Defaults to input bit depth, or 8 if Y4M\n");
     H1("   --recon-y4m-exec <string>     pipe reconstructed frames to Y4M viewer, ex:\"ffplay -i pipe:0 -autoexit\"\n");
+    H0("   --lowpass-dct                 Use low-pass subband dct approximation. Default %s\n", OPT(param->bLowPassDct));
     H1("\nExecutable return codes:\n");
     H1("    0 - encode successful\n");
     H1("    1 - unable to parse command line\n");