Coverage Report

Created: 2026-05-16 06:28

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/libde265/libde265/transform.cc
Line
Count
Source
1
/*
2
 * H.265 video codec.
3
 * Copyright (c) 2013-2014 struktur AG, Dirk Farin <farin@struktur.de>
4
 *
5
 * This file is part of libde265.
6
 *
7
 * libde265 is free software: you can redistribute it and/or modify
8
 * it under the terms of the GNU Lesser General Public License as
9
 * published by the Free Software Foundation, either version 3 of
10
 * the License, or (at your option) any later version.
11
 *
12
 * libde265 is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
 * GNU Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public License
18
 * along with libde265.  If not, see <http://www.gnu.org/licenses/>.
19
 */
20
21
#include "transform.h"
22
#include "util.h"
23
24
#include <assert.h>
25
26
27
const int tab8_22[] = { 29,30,31,32,33,33,34,34,35,35,36,36,37 /*,37*/ };
28
29
30
// (8.6.1)
31
void decode_quantization_parameters(thread_context* tctx, int xC,int yC,
32
                                    int xCUBase, int yCUBase)
33
0
{
34
0
  logtrace(LogTransform,">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> decode_quantization_parameters(int xC,int yC)=(%d,%d)\n", xC,yC);
35
36
0
  const pic_parameter_set& pps = tctx->img->get_pps();
37
0
  const seq_parameter_set& sps = tctx->img->get_sps();
38
0
  slice_segment_header* shdr = tctx->shdr;
39
40
  // top left pixel position of current quantization group
41
0
  int xQG = xCUBase - (xCUBase & ((1<<pps.Log2MinCuQpDeltaSize)-1));
42
0
  int yQG = yCUBase - (yCUBase & ((1<<pps.Log2MinCuQpDeltaSize)-1));
43
44
0
  logtrace(LogTransform,"QG: %d,%d\n",xQG,yQG);
45
46
47
  // we only have to set QP in the first call in a quantization-group
48
49
  /* TODO: check why this does not work with HoneyBee stream
50
51
  if (xQG == tctx->currentQG_x &&
52
      yQG == tctx->currentQG_y)
53
    {
54
      return;
55
    }
56
  */
57
58
  // if first QG in CU, remember last QPY of last CU previous QG
59
60
0
  if (xQG != tctx->currentQG_x ||
61
0
      yQG != tctx->currentQG_y)
62
0
    {
63
0
      tctx->lastQPYinPreviousQG = tctx->currentQPY;
64
0
      tctx->currentQG_x = xQG;
65
0
      tctx->currentQG_y = yQG;
66
0
    }
67
68
0
  int qPY_PRED;
69
70
  // first QG in CTB row ?
71
72
0
  int ctbLSBMask = ((1<<sps.Log2CtbSizeY)-1);
73
0
  bool firstInCTBRow = (xQG == 0 && ((yQG & ctbLSBMask)==0));
74
75
  // first QG in slice ?    TODO: a "firstQG" flag in the thread context would be faster
76
77
0
  int first_ctb_in_slice_RS = tctx->shdr->SliceAddrRS;
78
79
0
  int SliceStartX = (first_ctb_in_slice_RS % sps.PicWidthInCtbsY) * sps.CtbSizeY;
80
0
  int SliceStartY = (first_ctb_in_slice_RS / sps.PicWidthInCtbsY) * sps.CtbSizeY;
81
82
0
  bool firstQGInSlice = (SliceStartX == xQG && SliceStartY == yQG);
83
84
  // first QG in tile ?
85
86
0
  bool firstQGInTile = false;
87
0
  if (pps.tiles_enabled_flag) {
88
0
    if ((xQG & ((1 << sps.Log2CtbSizeY)-1)) == 0 &&
89
0
        (yQG & ((1 << sps.Log2CtbSizeY)-1)) == 0)
90
0
      {
91
0
        int ctbX = xQG >> sps.Log2CtbSizeY;
92
0
        int ctbY = yQG >> sps.Log2CtbSizeY;
93
94
0
        firstQGInTile = pps.is_tile_start_CTB(ctbX,ctbY); // TODO: this is slow
95
0
      }
96
0
  }
97
98
99
0
  if (firstQGInSlice || firstQGInTile ||
100
0
      (firstInCTBRow && pps.entropy_coding_sync_enabled_flag)) {
101
0
    qPY_PRED = tctx->shdr->SliceQPY;
102
0
  }
103
0
  else {
104
0
    qPY_PRED = tctx->lastQPYinPreviousQG;
105
0
  }
106
107
108
0
  int qPYA,qPYB;
109
110
0
  if (tctx->img->available_zscan(xQG,yQG, xQG-1,yQG)) {
111
0
    int xTmp = (xQG-1) >> sps.Log2MinTrafoSize;
112
0
    int yTmp = (yQG  ) >> sps.Log2MinTrafoSize;
113
0
    int minTbAddrA = pps.MinTbAddrZS[xTmp + yTmp*sps.PicWidthInTbsY];
114
0
    uint32_t ctbAddrA = minTbAddrA >> (2 * (sps.Log2CtbSizeY-sps.Log2MinTrafoSize));
115
0
    if (ctbAddrA == tctx->CtbAddrInTS) {
116
0
      qPYA = tctx->img->get_QPY(xQG-1,yQG);
117
0
    }
118
0
    else {
119
0
      qPYA = qPY_PRED;
120
0
    }
121
0
  }
122
0
  else {
123
0
    qPYA = qPY_PRED;
124
0
  }
125
126
0
  if (tctx->img->available_zscan(xQG,yQG, xQG,yQG-1)) {
127
0
    int xTmp = (xQG  ) >> sps.Log2MinTrafoSize;
128
0
    int yTmp = (yQG-1) >> sps.Log2MinTrafoSize;
129
0
    uint32_t minTbAddrB = pps.MinTbAddrZS[xTmp + yTmp*sps.PicWidthInTbsY];
130
0
    uint32_t ctbAddrB = minTbAddrB >> (2 * (sps.Log2CtbSizeY-sps.Log2MinTrafoSize));
131
0
    if (ctbAddrB == tctx->CtbAddrInTS) {
132
0
      qPYB = tctx->img->get_QPY(xQG,yQG-1);
133
0
    }
134
0
    else {
135
0
      qPYB = qPY_PRED;
136
0
    }
137
0
  }
138
0
  else {
139
0
    qPYB = qPY_PRED;
140
0
  }
141
142
0
  qPY_PRED = (qPYA + qPYB + 1)>>1;
143
144
0
  logtrace(LogTransform,"qPY_PRED = %d  (%d, %d)\n",qPY_PRED, qPYA, qPYB);
145
146
0
  int QPY = ((qPY_PRED + tctx->CuQpDelta + 52+2*sps.QpBdOffset_Y) %
147
0
             (52 + sps.QpBdOffset_Y)) - sps.QpBdOffset_Y;
148
149
0
  assert(QPY >= -sps.QpBdOffset_Y && QPY <= 51);
150
151
0
  tctx->qPYPrime = QPY + sps.QpBdOffset_Y;
152
153
0
  int qPiCb = Clip3(-sps.QpBdOffset_C,57, QPY+pps.pic_cb_qp_offset + shdr->slice_cb_qp_offset + tctx->CuQpOffsetCb);
154
0
  int qPiCr = Clip3(-sps.QpBdOffset_C,57, QPY+pps.pic_cr_qp_offset + shdr->slice_cr_qp_offset + tctx->CuQpOffsetCr);
155
156
0
  logtrace(LogTransform,"qPiCb:%d (%d %d), qPiCr:%d (%d %d)\n",
157
0
           qPiCb, pps.pic_cb_qp_offset, shdr->slice_cb_qp_offset,
158
0
           qPiCr, pps.pic_cr_qp_offset, shdr->slice_cr_qp_offset);
159
160
0
  int qPCb,qPCr;
161
162
0
  if (sps.ChromaArrayType == CHROMA_420) {
163
0
    qPCb = table8_22(qPiCb);
164
0
    qPCr = table8_22(qPiCr);
165
0
  }
166
0
  else {
167
0
    qPCb = qPiCb;
168
0
    qPCr = qPiCr;
169
0
  }
170
171
  //printf("q: %d %d\n",qPiCb, qPCb);
172
173
0
  tctx->qPCbPrime = qPCb + sps.QpBdOffset_C;
174
0
  if (tctx->qPCbPrime<0) {
175
0
    tctx->qPCbPrime = 0;
176
0
  }
177
178
0
  tctx->qPCrPrime = qPCr + sps.QpBdOffset_C;
179
0
  if (tctx->qPCrPrime<0) {
180
0
    tctx->qPCrPrime = 0;
181
0
  }
182
183
  /*
184
  printf("Q: %d (%d %d %d / %d %d) %d %d %d\n",QPY,
185
         sps->QpBdOffset_Y,
186
         pps->pic_cb_qp_offset + shdr->slice_cb_qp_offset,
187
         pps->pic_cr_qp_offset + shdr->slice_cr_qp_offset,
188
         sps->QpBdOffset_C, sps->QpBdOffset_C,
189
         tctx->qPYPrime, tctx->qPCbPrime, tctx->qPCrPrime);
190
  */
191
192
0
  int log2CbSize = tctx->img->get_log2CbSize(xCUBase, yCUBase);
193
194
  // TODO: On broken input, log2CbSize may be zero (multithreaded only). Not sure yet why.
195
  // Maybe another decoding thread is overwriting the value set in slice.cc:read_coding_unit.
196
  // id:000163,sig:06,src:002041,op:havoc,rep:16.bin
197
0
  if (log2CbSize<3) { log2CbSize=3; }
198
199
0
  tctx->img->set_QPY(xCUBase, yCUBase, log2CbSize, QPY);
200
0
  tctx->currentQPY = QPY;
201
202
  /*
203
  printf("SET QPY POC=%d %d;%d-%d;%d = %d\n",ctx->img->PicOrderCntVal,xCUBase,yCUBase,
204
         xCUBase+(1<<log2CbSize),yCUBase+(1<<log2CbSize), QPY);
205
  */
206
207
0
  logtrace(LogTransform,"qPY(%d,%d,%d)= %d, qPYPrime=%d\n",
208
0
           xCUBase,yCUBase,1<<log2CbSize,QPY,tctx->qPYPrime);
209
0
}
210
211
212
213
template <class pixel_t>
214
void transform_coefficients(acceleration_functions* acceleration,
215
                            int16_t* coeff, int coeffStride, int nT, int trType,
216
                            pixel_t* dst, int dstStride, int bit_depth)
217
0
{
218
0
  logtrace(LogTransform,"transform --- trType: %d nT: %d\n",trType,nT);
219
220
221
0
  if (trType==1) {
222
223
0
    acceleration->transform_4x4_dst_add<pixel_t>(dst, coeff, dstStride, bit_depth);
224
225
0
  } else {
226
227
0
    /**/ if (nT==4)  { acceleration->transform_add<pixel_t>(0,dst,coeff,dstStride, bit_depth); }
228
0
    else if (nT==8)  { acceleration->transform_add<pixel_t>(1,dst,coeff,dstStride, bit_depth); }
229
0
    else if (nT==16) { acceleration->transform_add<pixel_t>(2,dst,coeff,dstStride, bit_depth); }
230
0
    else             { acceleration->transform_add<pixel_t>(3,dst,coeff,dstStride, bit_depth); }
231
0
  }
232
233
#if 0
234
  printf("decoded pixels:\n");
235
  for (int y=0;y<nT;y++,printf("\n"))
236
    for (int x=0;x<nT;x++) {
237
      printf("%02x ",dst[y*dstStride+x]);
238
    }
239
#endif
240
0
}
Unexecuted instantiation: void transform_coefficients<unsigned short>(acceleration_functions*, short*, int, int, int, unsigned short*, int, int)
Unexecuted instantiation: void transform_coefficients<unsigned char>(acceleration_functions*, short*, int, int, int, unsigned char*, int, int)
241
242
243
// TODO: make this an accelerated function
244
void cross_comp_pred(const thread_context* tctx, int32_t* residual, int nT)
245
0
{
246
0
  const int BitDepthC = tctx->img->get_sps().BitDepth_C;
247
0
  const int BitDepthY = tctx->img->get_sps().BitDepth_Y;
248
249
0
  for (int y=0;y<nT;y++)
250
0
    for (int x=0;x<nT;x++) {
251
      /* TODO: the most usual case is definitely BitDepthY == BitDepthC, in which case
252
         we could just omit two shifts. The second most common case is probably
253
         BitDepthY>BitDepthC, for which we could also eliminate one shift. The remaining
254
         case is also one shift only.
255
      */
256
257
0
      residual[y*nT+x] += (tctx->ResScaleVal *
258
0
                           static_cast<int32_t>((static_cast<uint32_t>(tctx->residual_luma[y*nT+x]) << BitDepthC ) >> BitDepthY ) ) >> 3;
259
0
    }
260
0
}
261
262
263
template <class pixel_t>
264
void transform_coefficients_explicit(thread_context* tctx,
265
                                     int16_t* coeff, int coeffStride, int nT, int trType,
266
                                     pixel_t* dst, int dstStride, int bit_depth, int cIdx)
267
0
{
268
0
  logtrace(LogTransform,"transform --- trType: %d nT: %d\n",trType,nT);
269
270
0
  const acceleration_functions* acceleration = &tctx->decctx->acceleration;
271
272
0
  int32_t residual_buffer[32*32];
273
0
  int32_t* residual;
274
0
  if (cIdx==0) {
275
0
    residual = tctx->residual_luma;
276
0
  }
277
0
  else {
278
0
    residual = residual_buffer;
279
0
  }
280
281
282
  // TODO
283
0
  int bdShift = 20 - bit_depth;
284
0
  int max_coeff_bits = 15;
285
286
0
  if (trType==1) {
287
288
0
    acceleration->transform_idst_4x4(residual, coeff, bdShift, max_coeff_bits);
289
290
0
  } else {
291
292
0
    /**/ if (nT==4)  { acceleration->transform_idct_4x4(residual,coeff,bdShift,max_coeff_bits); }
293
0
    else if (nT==8)  { acceleration->transform_idct_8x8(residual,coeff,bdShift,max_coeff_bits); }
294
0
    else if (nT==16) { acceleration->transform_idct_16x16(residual,coeff,bdShift,max_coeff_bits); }
295
0
    else             { acceleration->transform_idct_32x32(residual,coeff,bdShift,max_coeff_bits); }
296
0
  }
297
298
299
  //printBlk("prediction",(uint8_t*)dst,nT,dstStride);
300
  //printBlk("residual",residual,nT,nT);
301
302
0
  if (cIdx != 0) {
303
0
    if (tctx->ResScaleVal != 0) {
304
0
      cross_comp_pred(tctx, residual, nT);
305
0
    }
306
307
    //printBlk("cross-comp-pred modified residual",residual,nT,nT);
308
0
  }
309
310
0
  acceleration->add_residual(dst,dstStride, residual,nT, bit_depth);
311
0
}
Unexecuted instantiation: void transform_coefficients_explicit<unsigned short>(thread_context*, short*, int, int, int, unsigned short*, int, int, int)
Unexecuted instantiation: void transform_coefficients_explicit<unsigned char>(thread_context*, short*, int, int, int, unsigned char*, int, int, int)
312
313
314
void inv_transform(acceleration_functions* acceleration,
315
                   uint8_t* dst, int dstStride, int16_t* coeff,
316
                   int log2TbSize, int trType)
317
0
{
318
0
  if (trType==1) {
319
0
    assert(log2TbSize==2);
320
321
0
    acceleration->transform_4x4_dst_add_8(dst, coeff, dstStride);
322
323
0
  } else {
324
0
    acceleration->transform_add_8[log2TbSize-2](dst,coeff,dstStride);
325
0
  }
326
327
328
#if 0
329
  int nT = 1<<log2TbSize;
330
  printf("decoded pixels:\n");
331
  for (int y=0;y<nT;y++,printf("\n"))
332
    for (int x=0;x<nT;x++) {
333
  printf("%02x ",dst[y*dstStride+x]);
334
}
335
#endif
336
0
}
337
338
339
void fwd_transform(acceleration_functions* acceleration,
340
                   int16_t* coeff, int coeffStride, int log2TbSize, int trType,
341
                   const int16_t* src, int srcStride)
342
0
{
343
0
  logtrace(LogTransform,"transform --- trType: %d nT: %d\n",trType,1<<log2TbSize);
344
345
0
  if (trType==1) {
346
    // DST 4x4
347
348
0
    acceleration->fwd_transform_4x4_dst_8(coeff, src, srcStride);
349
0
  } else {
350
    // DCT 4x4, 8x8, 16x16, 32x32
351
352
0
    acceleration->fwd_transform_8[log2TbSize-2](coeff,src,srcStride);
353
0
  }
354
0
}
355
356
357
358
static const int levelScale[] = { 40,45,51,57,64,72 };
359
360
// (8.6.2) and (8.6.3)
361
template <class pixel_t>
362
void scale_coefficients_internal(thread_context* tctx,
363
                                 int xT,int yT, // position of TU in frame (chroma adapted)
364
                                 int x0,int y0, // position of CU in frame (chroma adapted)
365
                                 int nT, int cIdx,
366
                                 bool transform_skip_flag, bool intra, int rdpcmMode)
367
0
{
368
0
  const seq_parameter_set& sps = tctx->img->get_sps();
369
0
  const pic_parameter_set& pps = tctx->img->get_pps();
370
371
0
  int qP;
372
0
  switch (cIdx) {
373
0
  case 0: qP = tctx->qPYPrime;  break;
374
0
  case 1: qP = tctx->qPCbPrime; break;
375
0
  case 2: qP = tctx->qPCrPrime; break;
376
0
  default: qP = 0; assert(0); break; // should never happen
377
0
  }
378
379
0
  logtrace(LogTransform,"qP: %d\n",qP);
380
381
382
0
  int16_t* coeff;
383
0
  int      coeffStride;
384
385
0
  coeff = tctx->coeffBuf;
386
0
  coeffStride = nT;
387
388
389
390
391
392
0
  pixel_t* pred;
393
0
  int      stride;
394
0
  pred = tctx->img->get_image_plane_at_pos_NEW<pixel_t>(cIdx, xT,yT);
395
0
  stride = tctx->img->get_image_stride(cIdx);
396
397
  // We explicitly include the case for sizeof(pixel_t)==1 so that the compiler
398
  // can optimize away a lot of code for 8-bit pixels.
399
0
  const int bit_depth = ((sizeof(pixel_t)==1) ? 8 : sps.get_bit_depth(cIdx));
400
401
  //assert(intra == (tctx->img->get_pred_mode(xT,yT)==MODE_INTRA));
402
0
  int cuPredModeIntra = (tctx->img->get_pred_mode(xT,yT)==MODE_INTRA);
403
404
0
  bool rotateCoeffs = (sps.range_extension.transform_skip_rotation_enabled_flag &&
405
0
                       nT == 4 &&
406
0
                       cuPredModeIntra);
407
408
0
  if (tctx->cu_transquant_bypass_flag) {
409
410
0
    int32_t residual_buffer[32*32];
411
412
0
    int32_t* residual;
413
0
    if (cIdx==0) residual = tctx->residual_luma;
414
0
    else         residual = residual_buffer;
415
416
417
    // TODO: we could fold the coefficient rotation into the coefficient expansion here:
418
0
    for (int i=0;i<tctx->nCoeff[cIdx];i++) {
419
0
      int32_t currCoeff = tctx->coeffList[cIdx][i];
420
0
      tctx->coeffBuf[ tctx->coeffPos[cIdx][i] ] = currCoeff;
421
0
    }
422
423
0
    if (rotateCoeffs) {
424
0
      tctx->decctx->acceleration.rotate_coefficients(coeff, nT);
425
0
    }
426
427
0
    if (rdpcmMode) {
428
0
      if (rdpcmMode==2)
429
0
        tctx->decctx->acceleration.transform_bypass_rdpcm_v(residual, coeff, nT);
430
0
      else
431
0
        tctx->decctx->acceleration.transform_bypass_rdpcm_h(residual, coeff, nT);
432
0
    }
433
0
    else {
434
0
      tctx->decctx->acceleration.transform_bypass(residual, coeff, nT);
435
0
    }
436
437
0
    if (cIdx != 0) {
438
0
      if (tctx->ResScaleVal != 0) {
439
0
        cross_comp_pred(tctx, residual, nT);
440
0
      }
441
0
    }
442
443
0
    tctx->decctx->acceleration.add_residual(pred,stride, residual,nT, bit_depth);
444
445
0
    if (rotateCoeffs) {
446
0
      memset(coeff, 0, nT*nT*sizeof(int16_t)); // delete all, because we moved the coeffs around
447
0
    }
448
0
  }
449
0
  else {
450
    // (8.6.3)
451
452
0
    int bdShift = (cIdx==0 ? sps.BitDepth_Y : sps.BitDepth_C) + Log2(nT) - 5;
453
454
0
    logtrace(LogTransform,"bdShift=%d\n",bdShift);
455
456
0
    logtrace(LogTransform,"dequant %d;%d cIdx=%d qp=%d\n",xT*(cIdx?2:1),yT*(cIdx?2:1),cIdx,qP);
457
458
459
    // --- inverse quantization ---
460
461
0
    if (sps.scaling_list_enable_flag==0) {
462
463
      //const int m_x_y = 16;
464
0
      const int m_x_y = 1;
465
0
      bdShift -= 4;  // this is equivalent to having a m_x_y of 16 and we can use 32bit integers
466
467
0
      const int offset = (1<<(bdShift-1));
468
0
      const int fact = m_x_y * levelScale[qP%6] << (qP/6);
469
470
0
      for (int i=0;i<tctx->nCoeff[cIdx];i++) {
471
472
0
        int64_t currCoeff  = tctx->coeffList[cIdx][i];
473
474
        //logtrace(LogTransform,"coefficient[%d] = %d\n",tctx->coeffPos[cIdx][i],
475
        //tctx->coeffList[cIdx][i]);
476
477
0
        currCoeff = Clip3(-32768,32767,
478
0
                          ( (currCoeff * fact + offset ) >> bdShift));
479
480
        //logtrace(LogTransform," -> %d\n",currCoeff);
481
482
0
        tctx->coeffBuf[ tctx->coeffPos[cIdx][i] ] = currCoeff;
483
0
      }
484
0
    }
485
0
    else {
486
0
      const int offset = (1<<(bdShift-1));
487
488
0
      const uint8_t* sclist;
489
0
      int matrixID = cIdx;
490
491
0
      if (nT==32) {
492
0
        matrixID=0;
493
0
      }
494
495
0
      if (!intra) {
496
0
        if (nT<32) { matrixID += 3; }
497
0
        else { matrixID++; }
498
0
      }
499
500
0
      switch (nT) {
501
0
      case  4: sclist = &pps.scaling_list.ScalingFactor_Size0[matrixID][0][0]; break;
502
0
      case  8: sclist = &pps.scaling_list.ScalingFactor_Size1[matrixID][0][0]; break;
503
0
      case 16: sclist = &pps.scaling_list.ScalingFactor_Size2[matrixID][0][0]; break;
504
0
      case 32: sclist = &pps.scaling_list.ScalingFactor_Size3[matrixID][0][0]; break;
505
0
      default: assert(0); sclist = nullptr;
506
0
      }
507
508
0
      for (int i=0;i<tctx->nCoeff[cIdx];i++) {
509
0
        int pos = tctx->coeffPos[cIdx][i];
510
511
0
        const int m_x_y = sclist[pos];
512
0
        const int fact = m_x_y * levelScale[qP%6] << (qP/6);
513
514
0
        int64_t currCoeff  = tctx->coeffList[cIdx][i];
515
516
0
        currCoeff = Clip3(-32768,32767,
517
0
                          ( (currCoeff * fact + offset ) >> bdShift));
518
519
0
        tctx->coeffBuf[ tctx->coeffPos[cIdx][i] ] = currCoeff;
520
0
      }
521
0
    }
522
523
524
    // --- do transform or skip ---
525
526
0
    logtrace(LogTransform,"coefficients OUT:\n");
527
0
    for (int y=0;y<nT;y++) {
528
0
      logtrace(LogTransform,"  ");
529
0
      for (int x=0;x<nT;x++) {
530
0
        logtrace(LogTransform,"*%3d ", coeff[x+y*coeffStride]);
531
0
      }
532
0
      logtrace(LogTransform,"*\n");
533
0
    }
534
535
#ifdef DE265_LOG_TRACE
536
    int bdShift2 = (cIdx==0) ? 20-sps.BitDepth_Y : 20-sps.BitDepth_C;
537
#endif
538
539
0
    logtrace(LogTransform,"bdShift2=%d\n",bdShift2);
540
541
0
    logtrace(LogSlice,"get_transform_skip_flag(%d,%d, cIdx=%d)=%d\n",xT,yT,cIdx,
542
0
             transform_skip_flag);
543
544
0
    if (transform_skip_flag) {
545
546
0
      int extended_precision_processing_flag = 0;
547
0
      int Log2nTbS = Log2(nT);
548
0
      int bdShift = libde265_max( 20 - bit_depth, extended_precision_processing_flag ? 11 : 0 );
549
0
      int tsShift = (extended_precision_processing_flag ? libde265_min( 5, bdShift - 2 ) : 5 )
550
0
        + Log2nTbS;
551
552
0
      if (rotateCoeffs) {
553
0
        tctx->decctx->acceleration.rotate_coefficients(coeff, nT);
554
0
      }
555
556
0
      int32_t residual_buffer[32*32];
557
558
0
      int32_t* residual;
559
0
      if (cIdx==0) residual = tctx->residual_luma;
560
0
      else         residual = residual_buffer;
561
562
0
      if (rdpcmMode) {
563
        /*
564
        if (rdpcmMode==2)
565
          tctx->decctx->acceleration.transform_skip_rdpcm_v(pred,coeff, Log2(nT), stride, bit_depth);
566
        else
567
          tctx->decctx->acceleration.transform_skip_rdpcm_h(pred,coeff, Log2(nT), stride, bit_depth);
568
        */
569
570
0
        if (rdpcmMode==2)
571
0
          tctx->decctx->acceleration.rdpcm_v(residual, coeff,nT, tsShift,bdShift);
572
0
        else
573
0
          tctx->decctx->acceleration.rdpcm_h(residual, coeff,nT, tsShift,bdShift);
574
0
      }
575
0
      else {
576
        //tctx->decctx->acceleration.transform_skip(pred, coeff, stride, bit_depth);
577
578
0
        tctx->decctx->acceleration.transform_skip_residual(residual, coeff, nT, tsShift, bdShift);
579
0
      }
580
581
0
      if (cIdx != 0) {
582
0
        if (tctx->ResScaleVal != 0) {
583
0
          cross_comp_pred(tctx, residual, nT);
584
0
        }
585
0
      }
586
587
0
      tctx->decctx->acceleration.add_residual(pred,stride, residual,nT, bit_depth);
588
589
0
      if (rotateCoeffs) {
590
0
        memset(coeff, 0, nT*nT*sizeof(int16_t)); // delete all, because we moved the coeffs around
591
0
      }
592
0
    }
593
0
    else {
594
0
      int trType;
595
596
      //if (nT==4 && cIdx==0 && tctx->img->get_pred_mode(xT,yT)==MODE_INTRA) {
597
0
      if (nT==4 && cIdx==0 && cuPredModeIntra) {
598
0
        trType=1;
599
0
      }
600
0
      else {
601
0
        trType=0;
602
0
      }
603
604
0
      assert(rdpcmMode==0);
605
606
607
0
      if (tctx->img->get_pps().range_extension.cross_component_prediction_enabled_flag) {
608
        // cross-component-prediction: transform to residual buffer and add in a separate step
609
610
0
        transform_coefficients_explicit(tctx, coeff, coeffStride, nT, trType,
611
0
                                        pred, stride, bit_depth, cIdx);
612
0
      }
613
0
      else {
614
0
        transform_coefficients(&tctx->decctx->acceleration, coeff, coeffStride, nT, trType,
615
0
                               pred, stride, bit_depth);
616
0
      }
617
0
    }
618
0
  }
619
620
621
0
  logtrace(LogTransform,"pixels (cIdx:%d), position %d %d:\n",cIdx, xT,yT);
622
623
0
  for (int y=0;y<nT;y++) {
624
0
    logtrace(LogTransform,"RECO-%3d-%3d-%d ",xT,yT+y,cIdx);
625
626
0
    for (int x=0;x<nT;x++) {
627
0
      logtrace(LogTransform,"*%03x ", pred[x+y*stride]);
628
0
    }
629
630
0
    logtrace(LogTransform,"*\n");
631
0
  }
632
633
  // zero out scrap coefficient buffer again
634
635
0
  for (int i=0;i<tctx->nCoeff[cIdx];i++) {
636
0
    tctx->coeffBuf[ tctx->coeffPos[cIdx][i] ] = 0;
637
0
  }
638
0
}
Unexecuted instantiation: void scale_coefficients_internal<unsigned short>(thread_context*, int, int, int, int, int, int, bool, bool, int)
Unexecuted instantiation: void scale_coefficients_internal<unsigned char>(thread_context*, int, int, int, int, int, int, bool, bool, int)
639
640
641
void scale_coefficients(thread_context* tctx,
642
                        int xT,int yT, // position of TU in frame (chroma adapted)
643
                        int x0,int y0, // position of CU in frame (chroma adapted)
644
                        int nT, int cIdx,
645
                        bool transform_skip_flag, bool intra,
646
                        int rdpcmMode // 0 - off, 1 - Horizontal, 2 - Vertical
647
                        )
648
0
{
649
0
  if (tctx->img->high_bit_depth(cIdx)) {
650
0
    scale_coefficients_internal<uint16_t>(tctx, xT,yT, x0,y0, nT,cIdx, transform_skip_flag, intra,
651
0
                                          rdpcmMode);
652
0
  } else {
653
0
    scale_coefficients_internal<uint8_t> (tctx, xT,yT, x0,y0, nT,cIdx, transform_skip_flag, intra,
654
0
                                          rdpcmMode);
655
0
  }
656
0
}
657
658
659
//#define QUANT_IQUANT_SHIFT    20 // Q(QP%6) * IQ(QP%6) = 2^20
660
0
#define QUANT_SHIFT           14 // Q(4) = 2^14
661
//#define SCALE_BITS            15 // Inherited from TMuC, presumably for fractional bit estimates in RDOQ
662
0
#define MAX_TR_DYNAMIC_RANGE  15 // Maximum transform dynamic range (excluding sign bit)
663
664
665
const static uint16_t g_quantScales[6] = {
666
  26214,23302,20560,18396,16384,14564
667
};
668
669
void quant_coefficients(//encoder_context* ectx,
670
                        int16_t* out_coeff,
671
                        const int16_t* in_coeff,
672
                        int log2TrSize, int qp,
673
                        bool intra)
674
0
{
675
0
  const int qpDiv6 = qp / 6;
676
0
  const int qpMod6 = qp % 6;
677
678
  //int uiLog2TrSize = xLog2( iWidth - 1);
679
680
0
  int uiQ = g_quantScales[qpMod6];
681
0
  int bitDepth = 8;
682
0
  int transformShift = MAX_TR_DYNAMIC_RANGE - bitDepth - log2TrSize;  // Represents scaling through forward transform
683
0
  int qBits = QUANT_SHIFT + qpDiv6 + transformShift;
684
685
  /* TODO: originally, this was checking for intra slices, why not for intra mode ?
686
   */
687
0
  int rnd = (intra ? 171 : 85) << (qBits-9);
688
689
0
  int x, y;
690
691
0
  int nStride = (1<<log2TrSize);
692
693
0
  for (y=0; y < (1<<log2TrSize) ; y++) {
694
0
    for (x=0; x < (1<<log2TrSize) ; x++) {
695
0
      int level;
696
0
      int sign;
697
0
      int blockPos = y * nStride + x;
698
0
      level  = in_coeff[blockPos];
699
      //logtrace(LogTransform,"(%d,%d) %d -> ", x,y,level);
700
0
      sign   = (level < 0 ? -1: 1);
701
702
0
      level = (abs_value(level) * uiQ + rnd ) >> qBits;
703
0
      level *= sign;
704
0
      out_coeff[blockPos] = Clip3(-32768, 32767, level);
705
      //logtrace(LogTransform,"%d\n", out_coeff[blockPos]);
706
0
    }
707
0
  }
708
0
}
709
710
711
void dequant_coefficients(int16_t* out_coeff,
712
                          const int16_t* in_coeff,
713
                          int log2TrSize, int qP)
714
0
{
715
0
  const int m_x_y = 1;
716
0
  int bitDepth = 8;
717
0
  int bdShift = bitDepth + log2TrSize - 5;
718
0
  bdShift -= 4;  // this is equivalent to having a m_x_y of 16 and we can use 32bit integers
719
720
0
  const int offset = (1<<(bdShift-1));
721
0
  const int fact = m_x_y * levelScale[qP%6] << (qP/6);
722
723
  //int blkSize = (1<<log2TrSize);
724
0
  int nCoeff  = (1<<(log2TrSize<<1));
725
726
0
  for (int i=0;i<nCoeff;i++) {
727
728
    // usually, this needs to be 64bit, but because we modify the shift above, we can use 16 bit
729
0
    int32_t currCoeff  = in_coeff[i];
730
731
    //logtrace(LogTransform,"coefficient[%d] = %d\n",i,currCoeff);
732
733
0
    currCoeff = Clip3(-32768,32767,
734
0
                      ( (currCoeff * fact + offset ) >> bdShift));
735
736
    //logtrace(LogTransform," -> %d\n",currCoeff);
737
738
0
    out_coeff[i] = currCoeff;
739
0
  }
740
0
}