Mercurial > x265
changeset 11898:7b7355a8e587 draft
Implementation of low-pass subband dct approximation.
author | hribeiro <mont3z.claro5@gmail.com> |
---|---|
date | Mon, 06 Nov 2017 09:54:56 +0530 |
parents | 6ad93877ffe1 |
children | aa9649a2aa8c |
files | doc/reST/cli.rst source/CMakeLists.txt source/common/CMakeLists.txt source/common/lowpassdct.cpp source/common/param.cpp source/common/primitives.cpp source/common/primitives.h source/x265.h source/x265cli.h |
diffstat | 9 files changed, 179 insertions(+-), 4 deletions(-) [+] |
line wrap: on
line diff
--- a/doc/reST/cli.rst Tue Oct 31 16:13:49 2017 +0530 +++ b/doc/reST/cli.rst Mon Nov 06 09:54:56 2017 +0530 @@ -2142,6 +2142,18 @@ Bitstream options Only effective at RD levels 5 and 6 +DCT Approximations +================= + +.. option:: --lowpass-dct + + If enabled, x265 will use low-pass truncated dct approximation instead of the + standard dct. This approximation is less computational intesive but it generates + truncated coefficient matrixes for the transformed block. Empirical analysis shows + this approximation gives good PSNR results for QP>=23. + + This approximation should be considered for platforms with performance and time + constrains. Debugging options =================
--- a/source/CMakeLists.txt Tue Oct 31 16:13:49 2017 +0530 +++ b/source/CMakeLists.txt Mon Nov 06 09:54:56 2017 +0530 @@ -29,7 +29,7 @@ option(NATIVE_BUILD "Target the build CP option(STATIC_LINK_CRT "Statically link C runtime for release builds" OFF) mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD) # X265_BUILD must be incremented each time the public API is changed -set(X265_BUILD 138) +set(X265_BUILD 139) configure_file("${PROJECT_SOURCE_DIR}/x265.def.in" "${PROJECT_BINARY_DIR}/x265.def") configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in"
--- a/source/common/CMakeLists.txt Tue Oct 31 16:13:49 2017 +0530 +++ b/source/common/CMakeLists.txt Mon Nov 06 09:54:56 2017 +0530 @@ -131,7 +131,7 @@ endif(WIN32) add_library(common OBJECT ${ASM_PRIMITIVES} ${VEC_PRIMITIVES} ${ALTIVEC_PRIMITIVES} ${WINXP} primitives.cpp primitives.h - pixel.cpp dct.cpp ipfilter.cpp intrapred.cpp loopfilter.cpp + pixel.cpp dct.cpp lowpassdct.cpp ipfilter.cpp intrapred.cpp loopfilter.cpp constants.cpp constants.h cpu.cpp cpu.h version.cpp threading.cpp threading.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/source/common/lowpassdct.cpp Mon Nov 06 09:54:56 2017 +0530 @@ -0,0 +1,127 @@ +/***************************************************************************** + * Copyright (C) 2017 + * + * Authors: Humberto Ribeiro Filho <mont3z.claro5@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at license @ x265.com. + *****************************************************************************/ + +#include "common.h" +#include "primitives.h" + +using namespace X265_NS; + +/* standard dct transformations */ +static dct_t* s_dct4x4; +static dct_t* s_dct8x8; +static dct_t* s_dct16x16; + +static void lowPassDct8_c(const int16_t* src, int16_t* dst, intptr_t srcStride) +{ + ALIGN_VAR_32(int16_t, coef[4 * 4]); + ALIGN_VAR_32(int16_t, avgBlock[4 * 4]); + int16_t totalSum = 0; + int16_t sum = 0; + + for (int i = 0; i < 4; i++) + for (int j =0; j < 4; j++) + { + // Calculate average of 2x2 cells + sum = src[2*i*srcStride + 2*j] + src[2*i*srcStride + 2*j + 1] + + src[(2*i+1)*srcStride + 2*j] + src[(2*i+1)*srcStride + 2*j + 1]; + avgBlock[i*4 + j] = sum >> 2; + + totalSum += sum; // use to calculate total block average + } + + //dct4 + (*s_dct4x4)(avgBlock, coef, 4); + memset(dst, 0, 64 * sizeof(int16_t)); + for (int i = 0; i < 4; i++) + { + memcpy(&dst[i * 8], &coef[i * 4], 4 * sizeof(int16_t)); + } + + // replace first coef with total block average + dst[0] = totalSum << 1; +} + +static void lowPassDct16_c(const int16_t* src, int16_t* dst, intptr_t srcStride) +{ + ALIGN_VAR_32(int16_t, coef[8 * 8]); + ALIGN_VAR_32(int16_t, avgBlock[8 * 8]); + int32_t totalSum = 0; + int16_t sum = 0; + for (int i = 0; i < 8; i++) + for (int j =0; j < 8; j++) + { + sum = src[2*i*srcStride + 2*j] + src[2*i*srcStride + 2*j + 1] + + src[(2*i+1)*srcStride + 2*j] + src[(2*i+1)*srcStride + 2*j + 1]; + avgBlock[i*8 + j] = sum >> 2; + + totalSum += sum; + } + + (*s_dct8x8)(avgBlock, coef, 8); + memset(dst, 0, 256 * sizeof(int16_t)); + for (int i = 0; i < 8; i++) + { + memcpy(&dst[i * 16], &coef[i * 8], 8 * sizeof(int16_t)); + } + dst[0] = static_cast<int16_t>(totalSum >> 1); +} + +static void lowPassDct32_c(const int16_t* src, int16_t* dst, intptr_t srcStride) +{ + ALIGN_VAR_32(int16_t, coef[16 * 16]); + ALIGN_VAR_32(int16_t, avgBlock[16 * 16]); + int32_t totalSum = 0; + int16_t sum = 0; + for (int i = 0; i < 16; i++) + for (int j =0; j < 16; j++) + { + sum = src[2*i*srcStride + 2*j] + src[2*i*srcStride + 2*j + 1] + + src[(2*i+1)*srcStride + 2*j] + src[(2*i+1)*srcStride + 2*j + 1]; + avgBlock[i*16 + j] = sum >> 2; + + totalSum += sum; + } + + (*s_dct16x16)(avgBlock, coef, 16); + memset(dst, 0, 1024 * sizeof(int16_t)); + for (int i = 0; i < 16; i++) + { + memcpy(&dst[i * 32], &coef[i * 16], 16 * sizeof(int16_t)); + } + dst[0] = static_cast<int16_t>(totalSum >> 3); +} + +namespace X265_NS { +// x265 private namespace + +void setupLowPassPrimitives_c(EncoderPrimitives& p) +{ + s_dct4x4 = &(p.cu[BLOCK_4x4].standard_dct); + s_dct8x8 = &(p.cu[BLOCK_8x8].standard_dct); + s_dct16x16 = &(p.cu[BLOCK_16x16].standard_dct); + + p.cu[BLOCK_8x8].lowpass_dct = lowPassDct8_c; + p.cu[BLOCK_16x16].lowpass_dct = lowPassDct16_c; + p.cu[BLOCK_32x32].lowpass_dct = lowPassDct32_c; +} +}
--- a/source/common/param.cpp Tue Oct 31 16:13:49 2017 +0530 +++ b/source/common/param.cpp Mon Nov 06 09:54:56 2017 +0530 @@ -288,6 +288,9 @@ void x265_param_default(x265_param* para param->csvfpt = NULL; param->forceFlush = 0; param->bDisableLookahead = 0; + + /* DCT Approximations */ + param->bLowPassDct = 0; } int x265_param_default_preset(x265_param* param, const char* preset, const char* tune) @@ -978,6 +981,7 @@ int x265_param_parse(x265_param* p, cons OPT("refine-mv")p->mvRefine = atobool(value); OPT("force-flush")p->forceFlush = atoi(value); OPT("splitrd-skip") p->bEnableSplitRdSkip = atobool(value); + OPT("lowpass-dct") p->bLowPassDct = atobool(value); else return X265_PARAM_BAD_NAME; } @@ -1676,6 +1680,7 @@ char *x265_param2string(x265_param* p, i s += sprintf(s, " refine-mv=%d", p->mvRefine); BOOL(p->bLimitSAO, "limit-sao"); s += sprintf(s, " ctu-info=%d", p->bCTUInfo); + BOOL(p->bLowPassDct, "lowpass-dct"); #undef BOOL return buf; }
--- a/source/common/primitives.cpp Tue Oct 31 16:13:49 2017 +0530 +++ b/source/common/primitives.cpp Mon Nov 06 09:54:56 2017 +0530 @@ -58,11 +58,13 @@ void setupIntraPrimitives_c(EncoderPrimi void setupLoopFilterPrimitives_c(EncoderPrimitives &p); void setupSaoPrimitives_c(EncoderPrimitives &p); void setupSeaIntegralPrimitives_c(EncoderPrimitives &p); +void setupLowPassPrimitives_c(EncoderPrimitives& p); void setupCPrimitives(EncoderPrimitives &p) { setupPixelPrimitives_c(p); // pixel.cpp setupDCTPrimitives_c(p); // dct.cpp + setupLowPassPrimitives_c(p); // lowpassdct.cpp setupFilterPrimitives_c(p); // ipfilter.cpp setupIntraPrimitives_c(p); // intrapred.cpp setupLoopFilterPrimitives_c(p); // loopfilter.cpp @@ -70,6 +72,19 @@ void setupCPrimitives(EncoderPrimitives setupSeaIntegralPrimitives_c(p); // framefilter.cpp } +void enableLowpassDCTPrimitives(EncoderPrimitives &p) +{ + // update copies of the standard dct transform + p.cu[BLOCK_4x4].standard_dct = p.cu[BLOCK_4x4].dct; + p.cu[BLOCK_8x8].standard_dct = p.cu[BLOCK_8x8].dct; + p.cu[BLOCK_16x16].standard_dct = p.cu[BLOCK_16x16].dct; + p.cu[BLOCK_32x32].standard_dct = p.cu[BLOCK_32x32].dct; + + // replace active dct by lowpass dct for high dct transforms + p.cu[BLOCK_16x16].dct = p.cu[BLOCK_16x16].lowpass_dct; + p.cu[BLOCK_32x32].dct = p.cu[BLOCK_32x32].lowpass_dct; +} + void setupAliasPrimitives(EncoderPrimitives &p) { #if HIGH_BIT_DEPTH @@ -256,6 +271,11 @@ void x265_setup_primitives(x265_param *p #endif setupAliasPrimitives(primitives); + + if (param->bLowPassDct && param->rc.qp > 20) + { + enableLowpassDCTPrimitives(primitives); + } } x265_report_simd(param);
--- a/source/common/primitives.h Tue Oct 31 16:13:49 2017 +0530 +++ b/source/common/primitives.h Mon Nov 06 09:54:56 2017 +0530 @@ -259,8 +259,12 @@ struct EncoderPrimitives * primitives will leave 64x64 pointers NULL. Indexed by LumaCU */ struct CU { - dct_t dct; - idct_t idct; + dct_t dct; // active dct transformation + idct_t idct; // active idct transformation + + dct_t standard_dct; // original dct function, used by lowpass_dct + dct_t lowpass_dct; // lowpass dct approximation + calcresidual_t calcresidual; pixel_sub_ps_t sub_ps; pixel_add_ps_t add_ps;
--- a/source/x265.h Tue Oct 31 16:13:49 2017 +0530 +++ b/source/x265.h Mon Nov 06 09:54:56 2017 +0530 @@ -1509,6 +1509,11 @@ typedef struct x265_param /* Disable lookahead */ int bDisableLookahead; + + /* Use low-pass truncated dct approximation + * This DCT approximation is less computational intensive and gives results close to + * standard DCT for QP >= 23 */ + int bLowPassDct; } x265_param; /* x265_param_alloc:
--- a/source/x265cli.h Tue Oct 31 16:13:49 2017 +0530 +++ b/source/x265cli.h Mon Nov 06 09:54:56 2017 +0530 @@ -282,6 +282,7 @@ static const struct option long_options[ { "force-flush", required_argument, NULL, 0 }, { "splitrd-skip", no_argument, NULL, 0 }, { "no-splitrd-skip", no_argument, NULL, 0 }, + { "lowpass-dct", no_argument, NULL, 0 }, { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, @@ -543,6 +544,7 @@ static void showHelp(x265_param *param) H1("-r/--recon <filename> Reconstructed raw image YUV or Y4M output file name\n"); H1(" --recon-depth <integer> Bit-depth of reconstructed raw image file. Defaults to input bit depth, or 8 if Y4M\n"); H1(" --recon-y4m-exec <string> pipe reconstructed frames to Y4M viewer, ex:\"ffplay -i pipe:0 -autoexit\"\n"); + H0(" --lowpass-dct Use low-pass subband dct approximation. Default %s\n", OPT(param->bLowPassDct)); H1("\nExecutable return codes:\n"); H1(" 0 - encode successful\n"); H1(" 1 - unable to parse command line\n");