Coverage Report

Created: 2025-08-28 07:12

/src/ffmpeg/libavcodec/simple_idct_template.c
Line
Count
Source
1
/*
2
 * Simple IDCT
3
 *
4
 * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
5
 *
6
 * This file is part of FFmpeg.
7
 *
8
 * FFmpeg is free software; you can redistribute it and/or
9
 * modify it under the terms of the GNU Lesser General Public
10
 * License as published by the Free Software Foundation; either
11
 * version 2.1 of the License, or (at your option) any later version.
12
 *
13
 * FFmpeg is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16
 * Lesser General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU Lesser General Public
19
 * License along with FFmpeg; if not, write to the Free Software
20
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
 */
22
23
/**
24
 * @file
25
 * simpleidct in C.
26
 */
27
28
/* Based upon some commented-out C code from mpeg2dec (idct_mmx.c
29
 * written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>). */
30
31
#include "bit_depth_template.c"
32
33
#undef W1
34
#undef W2
35
#undef W3
36
#undef W4
37
#undef W5
38
#undef W6
39
#undef W7
40
#undef ROW_SHIFT
41
#undef COL_SHIFT
42
#undef DC_SHIFT
43
#undef MUL
44
#undef MAC
45
46
#if BIT_DEPTH == 8
47
48
#define W1  22725  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
49
3.56G
#define W2  21407  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
50
#define W3  19266  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
51
3.88G
#define W4  16383  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
52
#define W5  12873  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
53
3.56G
#define W6  8867   //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
54
#define W7  4520   //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
55
56
1.07G
#define ROW_SHIFT 11
57
3.08G
#define COL_SHIFT 20
58
2.83G
#define DC_SHIFT 3
59
60
6.58G
#define MUL(a, b)    MUL16(a, b)
61
7.62G
#define MAC(a, b, c) MAC16(a, b, c)
62
63
#elif BIT_DEPTH == 10 || BIT_DEPTH == 12
64
65
# if BIT_DEPTH == 10
66
#define W1 22725 // 90901
67
40.3M
#define W2 21407 //  85627
68
#define W3 19265 //  77062
69
41.7M
#define W4 16384 //  65535
70
#define W5 12873 //  51491
71
40.3M
#define W6  8867 //  35468
72
#define W7  4520 //  18081
73
74
#   ifdef EXTRA_SHIFT
75
2.15M
#define ROW_SHIFT 13
76
44.0M
#define COL_SHIFT 18
77
13.9M
#define DC_SHIFT  1
78
#   elif IN_IDCT_DEPTH == 32
79
8.23M
#define ROW_SHIFT 13
80
914k
#define COL_SHIFT 21
81
#define DC_SHIFT  2
82
#   else
83
8.69M
#define ROW_SHIFT 12
84
11.0M
#define COL_SHIFT 19
85
20.0M
#define DC_SHIFT  2
86
#   endif
87
88
# else
89
#define W1 45451
90
13.4M
#define W2 42813
91
#define W3 38531
92
14.2M
#define W4 32767
93
#define W5 25746
94
13.4M
#define W6 17734
95
#define W7 9041
96
97
2.86M
#define ROW_SHIFT 16
98
12.9M
#define COL_SHIFT 17
99
16.7M
#define DC_SHIFT -1
100
# endif
101
102
100M
#define MUL(a, b)    ((int)((SUINT)(a) * (b)))
103
117M
#define MAC(a, b, c) ((a) += (SUINT)(b) * (c))
104
105
#else
106
107
#error "Unsupported bitdepth"
108
109
#endif
110
111
#ifdef EXTRA_SHIFT
112
static inline void FUNC(idctRowCondDC_extrashift)(int16_t *row, int extra_shift)
113
#else
114
static inline void FUNC6(idctRowCondDC)(idctin *row, int extra_shift)
115
#endif
116
1.55G
{
117
1.55G
    SUINT a0, a1, a2, a3, b0, b1, b2, b3;
118
119
// TODO: Add DC-only support for int32_t input
120
#if IN_IDCT_DEPTH == 16
121
#if HAVE_FAST_64BIT
122
1.55G
#define ROW0_MASK (0xffffULL << 48 * HAVE_BIGENDIAN)
123
1.55G
    if (((AV_RN64A(row) & ~ROW0_MASK) | AV_RN64A(row+4)) == 0) {
124
1.43G
        uint64_t temp;
125
1.43G
        if (DC_SHIFT - extra_shift >= 0) {
126
1.42G
            temp = (row[0] * (1 << (DC_SHIFT - extra_shift))) & 0xffff;
127
1.42G
        } else {
128
10.2M
            temp = ((row[0] + (1<<(extra_shift - DC_SHIFT-1))) >> (extra_shift - DC_SHIFT)) & 0xffff;
129
10.2M
        }
130
1.43G
        temp += temp * (1 << 16);
131
1.43G
        temp += temp * ((uint64_t) 1 << 32);
132
1.43G
        AV_WN64A(row, temp);
133
1.43G
        AV_WN64A(row + 4, temp);
134
1.43G
        return;
135
1.43G
    }
136
#else
137
    if (!(AV_RN32A(row+2) |
138
          AV_RN32A(row+4) |
139
          AV_RN32A(row+6) |
140
          row[1])) {
141
        uint32_t temp;
142
        if (DC_SHIFT - extra_shift >= 0) {
143
            temp = (row[0] * (1 << (DC_SHIFT - extra_shift))) & 0xffff;
144
        } else {
145
            temp = ((row[0] + (1<<(extra_shift - DC_SHIFT-1))) >> (extra_shift - DC_SHIFT)) & 0xffff;
146
        }
147
        temp += temp * (1 << 16);
148
        AV_WN32A(row, temp);
149
        AV_WN32A(row+2, temp);
150
        AV_WN32A(row+4, temp);
151
        AV_WN32A(row+6, temp);
152
        return;
153
    }
154
#endif
155
#endif
156
157
121M
    a0 = ((SUINT)W4 * row[0]) + (1 << (ROW_SHIFT + extra_shift - 1));
158
120M
    a1 = a0;
159
120M
    a2 = a0;
160
120M
    a3 = a0;
161
162
121M
    a0 += (SUINT)W2 * row[2];
163
121M
    a1 += (SUINT)W6 * row[2];
164
121M
    a2 -= (SUINT)W6 * row[2];
165
121M
    a3 -= (SUINT)W2 * row[2];
166
167
121M
    b0 = MUL(W1, row[1]);
168
121M
    MAC(b0, W3, row[3]);
169
121M
    b1 = MUL(W3, row[1]);
170
121M
    MAC(b1, -W7, row[3]);
171
121M
    b2 = MUL(W5, row[1]);
172
121M
    MAC(b2, -W1, row[3]);
173
121M
    b3 = MUL(W7, row[1]);
174
121M
    MAC(b3, -W5, row[3]);
175
176
#if IN_IDCT_DEPTH == 32
177
914k
    if (AV_RN64A(row + 4) | AV_RN64A(row + 6)) {
178
#else
179
120M
    if (AV_RN64A(row + 4)) {
180
55.9M
#endif
181
56.0M
        a0 += (SUINT)  W4*row[4] + (SUINT)W6*row[6];
182
56.0M
        a1 += (SUINT)- W4*row[4] - (SUINT)W2*row[6];
183
56.0M
        a2 += (SUINT)- W4*row[4] + (SUINT)W2*row[6];
184
56.0M
        a3 += (SUINT)  W4*row[4] - (SUINT)W6*row[6];
185
186
56.0M
        MAC(b0,  W5, row[5]);
187
56.0M
        MAC(b0,  W7, row[7]);
188
189
56.0M
        MAC(b1, -W1, row[5]);
190
56.0M
        MAC(b1, -W5, row[7]);
191
192
56.0M
        MAC(b2,  W7, row[5]);
193
56.0M
        MAC(b2,  W3, row[7]);
194
195
56.0M
        MAC(b3,  W3, row[5]);
196
56.0M
        MAC(b3, -W1, row[7]);
197
55.9M
    }
198
199
121M
    row[0] = (int)(a0 + b0) >> (ROW_SHIFT + extra_shift);
200
121M
    row[7] = (int)(a0 - b0) >> (ROW_SHIFT + extra_shift);
201
121M
    row[1] = (int)(a1 + b1) >> (ROW_SHIFT + extra_shift);
202
121M
    row[6] = (int)(a1 - b1) >> (ROW_SHIFT + extra_shift);
203
121M
    row[2] = (int)(a2 + b2) >> (ROW_SHIFT + extra_shift);
204
121M
    row[5] = (int)(a2 - b2) >> (ROW_SHIFT + extra_shift);
205
121M
    row[3] = (int)(a3 + b3) >> (ROW_SHIFT + extra_shift);
206
121M
    row[4] = (int)(a3 - b3) >> (ROW_SHIFT + extra_shift);
207
120M
}
simple_idct.c:idctRowCondDC_int16_8bit
Line
Count
Source
116
1.53G
{
117
1.53G
    SUINT a0, a1, a2, a3, b0, b1, b2, b3;
118
119
// TODO: Add DC-only support for int32_t input
120
1.53G
#if IN_IDCT_DEPTH == 16
121
1.53G
#if HAVE_FAST_64BIT
122
1.53G
#define ROW0_MASK (0xffffULL << 48 * HAVE_BIGENDIAN)
123
1.53G
    if (((AV_RN64A(row) & ~ROW0_MASK) | AV_RN64A(row+4)) == 0) {
124
1.41G
        uint64_t temp;
125
1.41G
        if (DC_SHIFT - extra_shift >= 0) {
126
1.41G
            temp = (row[0] * (1 << (DC_SHIFT - extra_shift))) & 0xffff;
127
1.41G
        } else {
128
0
            temp = ((row[0] + (1<<(extra_shift - DC_SHIFT-1))) >> (extra_shift - DC_SHIFT)) & 0xffff;
129
0
        }
130
1.41G
        temp += temp * (1 << 16);
131
1.41G
        temp += temp * ((uint64_t) 1 << 32);
132
1.41G
        AV_WN64A(row, temp);
133
1.41G
        AV_WN64A(row + 4, temp);
134
1.41G
        return;
135
1.41G
    }
136
#else
137
    if (!(AV_RN32A(row+2) |
138
          AV_RN32A(row+4) |
139
          AV_RN32A(row+6) |
140
          row[1])) {
141
        uint32_t temp;
142
        if (DC_SHIFT - extra_shift >= 0) {
143
            temp = (row[0] * (1 << (DC_SHIFT - extra_shift))) & 0xffff;
144
        } else {
145
            temp = ((row[0] + (1<<(extra_shift - DC_SHIFT-1))) >> (extra_shift - DC_SHIFT)) & 0xffff;
146
        }
147
        temp += temp * (1 << 16);
148
        AV_WN32A(row, temp);
149
        AV_WN32A(row+2, temp);
150
        AV_WN32A(row+4, temp);
151
        AV_WN32A(row+6, temp);
152
        return;
153
    }
154
#endif
155
118M
#endif
156
157
118M
    a0 = ((SUINT)W4 * row[0]) + (1 << (ROW_SHIFT + extra_shift - 1));
158
118M
    a1 = a0;
159
118M
    a2 = a0;
160
118M
    a3 = a0;
161
162
118M
    a0 += (SUINT)W2 * row[2];
163
118M
    a1 += (SUINT)W6 * row[2];
164
118M
    a2 -= (SUINT)W6 * row[2];
165
118M
    a3 -= (SUINT)W2 * row[2];
166
167
118M
    b0 = MUL(W1, row[1]);
168
118M
    MAC(b0, W3, row[3]);
169
118M
    b1 = MUL(W3, row[1]);
170
118M
    MAC(b1, -W7, row[3]);
171
118M
    b2 = MUL(W5, row[1]);
172
118M
    MAC(b2, -W1, row[3]);
173
118M
    b3 = MUL(W7, row[1]);
174
118M
    MAC(b3, -W5, row[3]);
175
176
#if IN_IDCT_DEPTH == 32
177
    if (AV_RN64A(row + 4) | AV_RN64A(row + 6)) {
178
#else
179
118M
    if (AV_RN64A(row + 4)) {
180
55.3M
#endif
181
55.3M
        a0 += (SUINT)  W4*row[4] + (SUINT)W6*row[6];
182
55.3M
        a1 += (SUINT)- W4*row[4] - (SUINT)W2*row[6];
183
55.3M
        a2 += (SUINT)- W4*row[4] + (SUINT)W2*row[6];
184
55.3M
        a3 += (SUINT)  W4*row[4] - (SUINT)W6*row[6];
185
186
55.3M
        MAC(b0,  W5, row[5]);
187
55.3M
        MAC(b0,  W7, row[7]);
188
189
55.3M
        MAC(b1, -W1, row[5]);
190
55.3M
        MAC(b1, -W5, row[7]);
191
192
55.3M
        MAC(b2,  W7, row[5]);
193
55.3M
        MAC(b2,  W3, row[7]);
194
195
55.3M
        MAC(b3,  W3, row[5]);
196
55.3M
        MAC(b3, -W1, row[7]);
197
55.3M
    }
198
199
118M
    row[0] = (int)(a0 + b0) >> (ROW_SHIFT + extra_shift);
200
118M
    row[7] = (int)(a0 - b0) >> (ROW_SHIFT + extra_shift);
201
118M
    row[1] = (int)(a1 + b1) >> (ROW_SHIFT + extra_shift);
202
118M
    row[6] = (int)(a1 - b1) >> (ROW_SHIFT + extra_shift);
203
118M
    row[2] = (int)(a2 + b2) >> (ROW_SHIFT + extra_shift);
204
118M
    row[5] = (int)(a2 - b2) >> (ROW_SHIFT + extra_shift);
205
118M
    row[3] = (int)(a3 + b3) >> (ROW_SHIFT + extra_shift);
206
118M
    row[4] = (int)(a3 - b3) >> (ROW_SHIFT + extra_shift);
207
118M
}
simple_idct.c:idctRowCondDC_int16_10bit
Line
Count
Source
116
11.0M
{
117
11.0M
    SUINT a0, a1, a2, a3, b0, b1, b2, b3;
118
119
// TODO: Add DC-only support for int32_t input
120
11.0M
#if IN_IDCT_DEPTH == 16
121
11.0M
#if HAVE_FAST_64BIT
122
11.0M
#define ROW0_MASK (0xffffULL << 48 * HAVE_BIGENDIAN)
123
11.0M
    if (((AV_RN64A(row) & ~ROW0_MASK) | AV_RN64A(row+4)) == 0) {
124
10.0M
        uint64_t temp;
125
10.0M
        if (DC_SHIFT - extra_shift >= 0) {
126
10.0M
            temp = (row[0] * (1 << (DC_SHIFT - extra_shift))) & 0xffff;
127
10.0M
        } else {
128
0
            temp = ((row[0] + (1<<(extra_shift - DC_SHIFT-1))) >> (extra_shift - DC_SHIFT)) & 0xffff;
129
0
        }
130
10.0M
        temp += temp * (1 << 16);
131
10.0M
        temp += temp * ((uint64_t) 1 << 32);
132
10.0M
        AV_WN64A(row, temp);
133
10.0M
        AV_WN64A(row + 4, temp);
134
10.0M
        return;
135
10.0M
    }
136
#else
137
    if (!(AV_RN32A(row+2) |
138
          AV_RN32A(row+4) |
139
          AV_RN32A(row+6) |
140
          row[1])) {
141
        uint32_t temp;
142
        if (DC_SHIFT - extra_shift >= 0) {
143
            temp = (row[0] * (1 << (DC_SHIFT - extra_shift))) & 0xffff;
144
        } else {
145
            temp = ((row[0] + (1<<(extra_shift - DC_SHIFT-1))) >> (extra_shift - DC_SHIFT)) & 0xffff;
146
        }
147
        temp += temp * (1 << 16);
148
        AV_WN32A(row, temp);
149
        AV_WN32A(row+2, temp);
150
        AV_WN32A(row+4, temp);
151
        AV_WN32A(row+6, temp);
152
        return;
153
    }
154
#endif
155
966k
#endif
156
157
966k
    a0 = ((SUINT)W4 * row[0]) + (1 << (ROW_SHIFT + extra_shift - 1));
158
966k
    a1 = a0;
159
966k
    a2 = a0;
160
966k
    a3 = a0;
161
162
966k
    a0 += (SUINT)W2 * row[2];
163
966k
    a1 += (SUINT)W6 * row[2];
164
966k
    a2 -= (SUINT)W6 * row[2];
165
966k
    a3 -= (SUINT)W2 * row[2];
166
167
966k
    b0 = MUL(W1, row[1]);
168
966k
    MAC(b0, W3, row[3]);
169
966k
    b1 = MUL(W3, row[1]);
170
966k
    MAC(b1, -W7, row[3]);
171
966k
    b2 = MUL(W5, row[1]);
172
966k
    MAC(b2, -W1, row[3]);
173
966k
    b3 = MUL(W7, row[1]);
174
966k
    MAC(b3, -W5, row[3]);
175
176
#if IN_IDCT_DEPTH == 32
177
    if (AV_RN64A(row + 4) | AV_RN64A(row + 6)) {
178
#else
179
966k
    if (AV_RN64A(row + 4)) {
180
333k
#endif
181
333k
        a0 += (SUINT)  W4*row[4] + (SUINT)W6*row[6];
182
333k
        a1 += (SUINT)- W4*row[4] - (SUINT)W2*row[6];
183
333k
        a2 += (SUINT)- W4*row[4] + (SUINT)W2*row[6];
184
333k
        a3 += (SUINT)  W4*row[4] - (SUINT)W6*row[6];
185
186
333k
        MAC(b0,  W5, row[5]);
187
333k
        MAC(b0,  W7, row[7]);
188
189
333k
        MAC(b1, -W1, row[5]);
190
333k
        MAC(b1, -W5, row[7]);
191
192
333k
        MAC(b2,  W7, row[5]);
193
333k
        MAC(b2,  W3, row[7]);
194
195
333k
        MAC(b3,  W3, row[5]);
196
333k
        MAC(b3, -W1, row[7]);
197
333k
    }
198
199
966k
    row[0] = (int)(a0 + b0) >> (ROW_SHIFT + extra_shift);
200
966k
    row[7] = (int)(a0 - b0) >> (ROW_SHIFT + extra_shift);
201
966k
    row[1] = (int)(a1 + b1) >> (ROW_SHIFT + extra_shift);
202
966k
    row[6] = (int)(a1 - b1) >> (ROW_SHIFT + extra_shift);
203
966k
    row[2] = (int)(a2 + b2) >> (ROW_SHIFT + extra_shift);
204
966k
    row[5] = (int)(a2 - b2) >> (ROW_SHIFT + extra_shift);
205
966k
    row[3] = (int)(a3 + b3) >> (ROW_SHIFT + extra_shift);
206
966k
    row[4] = (int)(a3 - b3) >> (ROW_SHIFT + extra_shift);
207
966k
}
simple_idct.c:idctRowCondDC_int16_12bit
Line
Count
Source
116
5.01M
{
117
5.01M
    SUINT a0, a1, a2, a3, b0, b1, b2, b3;
118
119
// TODO: Add DC-only support for int32_t input
120
5.01M
#if IN_IDCT_DEPTH == 16
121
5.01M
#if HAVE_FAST_64BIT
122
5.01M
#define ROW0_MASK (0xffffULL << 48 * HAVE_BIGENDIAN)
123
5.01M
    if (((AV_RN64A(row) & ~ROW0_MASK) | AV_RN64A(row+4)) == 0) {
124
4.74M
        uint64_t temp;
125
4.74M
        if (DC_SHIFT - extra_shift >= 0) {
126
0
            temp = (row[0] * (1 << (DC_SHIFT - extra_shift))) & 0xffff;
127
4.74M
        } else {
128
4.74M
            temp = ((row[0] + (1<<(extra_shift - DC_SHIFT-1))) >> (extra_shift - DC_SHIFT)) & 0xffff;
129
4.74M
        }
130
4.74M
        temp += temp * (1 << 16);
131
4.74M
        temp += temp * ((uint64_t) 1 << 32);
132
4.74M
        AV_WN64A(row, temp);
133
4.74M
        AV_WN64A(row + 4, temp);
134
4.74M
        return;
135
4.74M
    }
136
#else
137
    if (!(AV_RN32A(row+2) |
138
          AV_RN32A(row+4) |
139
          AV_RN32A(row+6) |
140
          row[1])) {
141
        uint32_t temp;
142
        if (DC_SHIFT - extra_shift >= 0) {
143
            temp = (row[0] * (1 << (DC_SHIFT - extra_shift))) & 0xffff;
144
        } else {
145
            temp = ((row[0] + (1<<(extra_shift - DC_SHIFT-1))) >> (extra_shift - DC_SHIFT)) & 0xffff;
146
        }
147
        temp += temp * (1 << 16);
148
        AV_WN32A(row, temp);
149
        AV_WN32A(row+2, temp);
150
        AV_WN32A(row+4, temp);
151
        AV_WN32A(row+6, temp);
152
        return;
153
    }
154
#endif
155
275k
#endif
156
157
275k
    a0 = ((SUINT)W4 * row[0]) + (1 << (ROW_SHIFT + extra_shift - 1));
158
275k
    a1 = a0;
159
275k
    a2 = a0;
160
275k
    a3 = a0;
161
162
275k
    a0 += (SUINT)W2 * row[2];
163
275k
    a1 += (SUINT)W6 * row[2];
164
275k
    a2 -= (SUINT)W6 * row[2];
165
275k
    a3 -= (SUINT)W2 * row[2];
166
167
275k
    b0 = MUL(W1, row[1]);
168
275k
    MAC(b0, W3, row[3]);
169
275k
    b1 = MUL(W3, row[1]);
170
275k
    MAC(b1, -W7, row[3]);
171
275k
    b2 = MUL(W5, row[1]);
172
275k
    MAC(b2, -W1, row[3]);
173
275k
    b3 = MUL(W7, row[1]);
174
275k
    MAC(b3, -W5, row[3]);
175
176
#if IN_IDCT_DEPTH == 32
177
    if (AV_RN64A(row + 4) | AV_RN64A(row + 6)) {
178
#else
179
275k
    if (AV_RN64A(row + 4)) {
180
207k
#endif
181
207k
        a0 += (SUINT)  W4*row[4] + (SUINT)W6*row[6];
182
207k
        a1 += (SUINT)- W4*row[4] - (SUINT)W2*row[6];
183
207k
        a2 += (SUINT)- W4*row[4] + (SUINT)W2*row[6];
184
207k
        a3 += (SUINT)  W4*row[4] - (SUINT)W6*row[6];
185
186
207k
        MAC(b0,  W5, row[5]);
187
207k
        MAC(b0,  W7, row[7]);
188
189
207k
        MAC(b1, -W1, row[5]);
190
207k
        MAC(b1, -W5, row[7]);
191
192
207k
        MAC(b2,  W7, row[5]);
193
207k
        MAC(b2,  W3, row[7]);
194
195
207k
        MAC(b3,  W3, row[5]);
196
207k
        MAC(b3, -W1, row[7]);
197
207k
    }
198
199
275k
    row[0] = (int)(a0 + b0) >> (ROW_SHIFT + extra_shift);
200
275k
    row[7] = (int)(a0 - b0) >> (ROW_SHIFT + extra_shift);
201
275k
    row[1] = (int)(a1 + b1) >> (ROW_SHIFT + extra_shift);
202
275k
    row[6] = (int)(a1 - b1) >> (ROW_SHIFT + extra_shift);
203
275k
    row[2] = (int)(a2 + b2) >> (ROW_SHIFT + extra_shift);
204
275k
    row[5] = (int)(a2 - b2) >> (ROW_SHIFT + extra_shift);
205
275k
    row[3] = (int)(a3 + b3) >> (ROW_SHIFT + extra_shift);
206
275k
    row[4] = (int)(a3 - b3) >> (ROW_SHIFT + extra_shift);
207
275k
}
simple_idct.c:idctRowCondDC_int32_10bit
Line
Count
Source
116
914k
{
117
914k
    SUINT a0, a1, a2, a3, b0, b1, b2, b3;
118
119
// TODO: Add DC-only support for int32_t input
120
#if IN_IDCT_DEPTH == 16
121
#if HAVE_FAST_64BIT
122
#define ROW0_MASK (0xffffULL << 48 * HAVE_BIGENDIAN)
123
    if (((AV_RN64A(row) & ~ROW0_MASK) | AV_RN64A(row+4)) == 0) {
124
        uint64_t temp;
125
        if (DC_SHIFT - extra_shift >= 0) {
126
            temp = (row[0] * (1 << (DC_SHIFT - extra_shift))) & 0xffff;
127
        } else {
128
            temp = ((row[0] + (1<<(extra_shift - DC_SHIFT-1))) >> (extra_shift - DC_SHIFT)) & 0xffff;
129
        }
130
        temp += temp * (1 << 16);
131
        temp += temp * ((uint64_t) 1 << 32);
132
        AV_WN64A(row, temp);
133
        AV_WN64A(row + 4, temp);
134
        return;
135
    }
136
#else
137
    if (!(AV_RN32A(row+2) |
138
          AV_RN32A(row+4) |
139
          AV_RN32A(row+6) |
140
          row[1])) {
141
        uint32_t temp;
142
        if (DC_SHIFT - extra_shift >= 0) {
143
            temp = (row[0] * (1 << (DC_SHIFT - extra_shift))) & 0xffff;
144
        } else {
145
            temp = ((row[0] + (1<<(extra_shift - DC_SHIFT-1))) >> (extra_shift - DC_SHIFT)) & 0xffff;
146
        }
147
        temp += temp * (1 << 16);
148
        AV_WN32A(row, temp);
149
        AV_WN32A(row+2, temp);
150
        AV_WN32A(row+4, temp);
151
        AV_WN32A(row+6, temp);
152
        return;
153
    }
154
#endif
155
#endif
156
157
914k
    a0 = ((SUINT)W4 * row[0]) + (1 << (ROW_SHIFT + extra_shift - 1));
158
914k
    a1 = a0;
159
914k
    a2 = a0;
160
914k
    a3 = a0;
161
162
914k
    a0 += (SUINT)W2 * row[2];
163
914k
    a1 += (SUINT)W6 * row[2];
164
914k
    a2 -= (SUINT)W6 * row[2];
165
914k
    a3 -= (SUINT)W2 * row[2];
166
167
914k
    b0 = MUL(W1, row[1]);
168
914k
    MAC(b0, W3, row[3]);
169
914k
    b1 = MUL(W3, row[1]);
170
914k
    MAC(b1, -W7, row[3]);
171
914k
    b2 = MUL(W5, row[1]);
172
914k
    MAC(b2, -W1, row[3]);
173
914k
    b3 = MUL(W7, row[1]);
174
914k
    MAC(b3, -W5, row[3]);
175
176
914k
#if IN_IDCT_DEPTH == 32
177
914k
    if (AV_RN64A(row + 4) | AV_RN64A(row + 6)) {
178
#else
179
    if (AV_RN64A(row + 4)) {
180
#endif
181
98.3k
        a0 += (SUINT)  W4*row[4] + (SUINT)W6*row[6];
182
98.3k
        a1 += (SUINT)- W4*row[4] - (SUINT)W2*row[6];
183
98.3k
        a2 += (SUINT)- W4*row[4] + (SUINT)W2*row[6];
184
98.3k
        a3 += (SUINT)  W4*row[4] - (SUINT)W6*row[6];
185
186
98.3k
        MAC(b0,  W5, row[5]);
187
98.3k
        MAC(b0,  W7, row[7]);
188
189
98.3k
        MAC(b1, -W1, row[5]);
190
98.3k
        MAC(b1, -W5, row[7]);
191
192
98.3k
        MAC(b2,  W7, row[5]);
193
98.3k
        MAC(b2,  W3, row[7]);
194
195
98.3k
        MAC(b3,  W3, row[5]);
196
98.3k
        MAC(b3, -W1, row[7]);
197
98.3k
    }
198
199
914k
    row[0] = (int)(a0 + b0) >> (ROW_SHIFT + extra_shift);
200
914k
    row[7] = (int)(a0 - b0) >> (ROW_SHIFT + extra_shift);
201
914k
    row[1] = (int)(a1 + b1) >> (ROW_SHIFT + extra_shift);
202
914k
    row[6] = (int)(a1 - b1) >> (ROW_SHIFT + extra_shift);
203
914k
    row[2] = (int)(a2 + b2) >> (ROW_SHIFT + extra_shift);
204
914k
    row[5] = (int)(a2 - b2) >> (ROW_SHIFT + extra_shift);
205
914k
    row[3] = (int)(a3 + b3) >> (ROW_SHIFT + extra_shift);
206
914k
    row[4] = (int)(a3 - b3) >> (ROW_SHIFT + extra_shift);
207
914k
}
proresdsp.c:idctRowCondDC_extrashift_10
Line
Count
Source
116
4.89M
{
117
4.89M
    SUINT a0, a1, a2, a3, b0, b1, b2, b3;
118
119
// TODO: Add DC-only support for int32_t input
120
4.89M
#if IN_IDCT_DEPTH == 16
121
4.89M
#if HAVE_FAST_64BIT
122
4.89M
#define ROW0_MASK (0xffffULL << 48 * HAVE_BIGENDIAN)
123
4.89M
    if (((AV_RN64A(row) & ~ROW0_MASK) | AV_RN64A(row+4)) == 0) {
124
4.65M
        uint64_t temp;
125
4.65M
        if (DC_SHIFT - extra_shift >= 0) {
126
0
            temp = (row[0] * (1 << (DC_SHIFT - extra_shift))) & 0xffff;
127
4.65M
        } else {
128
4.65M
            temp = ((row[0] + (1<<(extra_shift - DC_SHIFT-1))) >> (extra_shift - DC_SHIFT)) & 0xffff;
129
4.65M
        }
130
4.65M
        temp += temp * (1 << 16);
131
4.65M
        temp += temp * ((uint64_t) 1 << 32);
132
4.65M
        AV_WN64A(row, temp);
133
4.65M
        AV_WN64A(row + 4, temp);
134
4.65M
        return;
135
4.65M
    }
136
#else
137
    if (!(AV_RN32A(row+2) |
138
          AV_RN32A(row+4) |
139
          AV_RN32A(row+6) |
140
          row[1])) {
141
        uint32_t temp;
142
        if (DC_SHIFT - extra_shift >= 0) {
143
            temp = (row[0] * (1 << (DC_SHIFT - extra_shift))) & 0xffff;
144
        } else {
145
            temp = ((row[0] + (1<<(extra_shift - DC_SHIFT-1))) >> (extra_shift - DC_SHIFT)) & 0xffff;
146
        }
147
        temp += temp * (1 << 16);
148
        AV_WN32A(row, temp);
149
        AV_WN32A(row+2, temp);
150
        AV_WN32A(row+4, temp);
151
        AV_WN32A(row+6, temp);
152
        return;
153
    }
154
#endif
155
238k
#endif
156
157
238k
    a0 = ((SUINT)W4 * row[0]) + (1 << (ROW_SHIFT + extra_shift - 1));
158
238k
    a1 = a0;
159
238k
    a2 = a0;
160
238k
    a3 = a0;
161
162
238k
    a0 += (SUINT)W2 * row[2];
163
238k
    a1 += (SUINT)W6 * row[2];
164
238k
    a2 -= (SUINT)W6 * row[2];
165
238k
    a3 -= (SUINT)W2 * row[2];
166
167
238k
    b0 = MUL(W1, row[1]);
168
238k
    MAC(b0, W3, row[3]);
169
238k
    b1 = MUL(W3, row[1]);
170
238k
    MAC(b1, -W7, row[3]);
171
238k
    b2 = MUL(W5, row[1]);
172
238k
    MAC(b2, -W1, row[3]);
173
238k
    b3 = MUL(W7, row[1]);
174
238k
    MAC(b3, -W5, row[3]);
175
176
#if IN_IDCT_DEPTH == 32
177
    if (AV_RN64A(row + 4) | AV_RN64A(row + 6)) {
178
#else
179
238k
    if (AV_RN64A(row + 4)) {
180
97.1k
#endif
181
97.1k
        a0 += (SUINT)  W4*row[4] + (SUINT)W6*row[6];
182
97.1k
        a1 += (SUINT)- W4*row[4] - (SUINT)W2*row[6];
183
97.1k
        a2 += (SUINT)- W4*row[4] + (SUINT)W2*row[6];
184
97.1k
        a3 += (SUINT)  W4*row[4] - (SUINT)W6*row[6];
185
186
97.1k
        MAC(b0,  W5, row[5]);
187
97.1k
        MAC(b0,  W7, row[7]);
188
189
97.1k
        MAC(b1, -W1, row[5]);
190
97.1k
        MAC(b1, -W5, row[7]);
191
192
97.1k
        MAC(b2,  W7, row[5]);
193
97.1k
        MAC(b2,  W3, row[7]);
194
195
97.1k
        MAC(b3,  W3, row[5]);
196
97.1k
        MAC(b3, -W1, row[7]);
197
97.1k
    }
198
199
238k
    row[0] = (int)(a0 + b0) >> (ROW_SHIFT + extra_shift);
200
238k
    row[7] = (int)(a0 - b0) >> (ROW_SHIFT + extra_shift);
201
238k
    row[1] = (int)(a1 + b1) >> (ROW_SHIFT + extra_shift);
202
238k
    row[6] = (int)(a1 - b1) >> (ROW_SHIFT + extra_shift);
203
238k
    row[2] = (int)(a2 + b2) >> (ROW_SHIFT + extra_shift);
204
238k
    row[5] = (int)(a2 - b2) >> (ROW_SHIFT + extra_shift);
205
238k
    row[3] = (int)(a3 + b3) >> (ROW_SHIFT + extra_shift);
206
238k
    row[4] = (int)(a3 - b3) >> (ROW_SHIFT + extra_shift);
207
238k
}
proresdsp.c:idctRowCondDC_int16_12bit
Line
Count
Source
116
886k
{
117
886k
    SUINT a0, a1, a2, a3, b0, b1, b2, b3;
118
119
// TODO: Add DC-only support for int32_t input
120
886k
#if IN_IDCT_DEPTH == 16
121
886k
#if HAVE_FAST_64BIT
122
886k
#define ROW0_MASK (0xffffULL << 48 * HAVE_BIGENDIAN)
123
886k
    if (((AV_RN64A(row) & ~ROW0_MASK) | AV_RN64A(row+4)) == 0) {
124
842k
        uint64_t temp;
125
842k
        if (DC_SHIFT - extra_shift >= 0) {
126
0
            temp = (row[0] * (1 << (DC_SHIFT - extra_shift))) & 0xffff;
127
842k
        } else {
128
842k
            temp = ((row[0] + (1<<(extra_shift - DC_SHIFT-1))) >> (extra_shift - DC_SHIFT)) & 0xffff;
129
842k
        }
130
842k
        temp += temp * (1 << 16);
131
842k
        temp += temp * ((uint64_t) 1 << 32);
132
842k
        AV_WN64A(row, temp);
133
842k
        AV_WN64A(row + 4, temp);
134
842k
        return;
135
842k
    }
136
#else
137
    if (!(AV_RN32A(row+2) |
138
          AV_RN32A(row+4) |
139
          AV_RN32A(row+6) |
140
          row[1])) {
141
        uint32_t temp;
142
        if (DC_SHIFT - extra_shift >= 0) {
143
            temp = (row[0] * (1 << (DC_SHIFT - extra_shift))) & 0xffff;
144
        } else {
145
            temp = ((row[0] + (1<<(extra_shift - DC_SHIFT-1))) >> (extra_shift - DC_SHIFT)) & 0xffff;
146
        }
147
        temp += temp * (1 << 16);
148
        AV_WN32A(row, temp);
149
        AV_WN32A(row+2, temp);
150
        AV_WN32A(row+4, temp);
151
        AV_WN32A(row+6, temp);
152
        return;
153
    }
154
#endif
155
43.3k
#endif
156
157
43.3k
    a0 = ((SUINT)W4 * row[0]) + (1 << (ROW_SHIFT + extra_shift - 1));
158
43.3k
    a1 = a0;
159
43.3k
    a2 = a0;
160
43.3k
    a3 = a0;
161
162
43.3k
    a0 += (SUINT)W2 * row[2];
163
43.3k
    a1 += (SUINT)W6 * row[2];
164
43.3k
    a2 -= (SUINT)W6 * row[2];
165
43.3k
    a3 -= (SUINT)W2 * row[2];
166
167
43.3k
    b0 = MUL(W1, row[1]);
168
43.3k
    MAC(b0, W3, row[3]);
169
43.3k
    b1 = MUL(W3, row[1]);
170
43.3k
    MAC(b1, -W7, row[3]);
171
43.3k
    b2 = MUL(W5, row[1]);
172
43.3k
    MAC(b2, -W1, row[3]);
173
43.3k
    b3 = MUL(W7, row[1]);
174
43.3k
    MAC(b3, -W5, row[3]);
175
176
#if IN_IDCT_DEPTH == 32
177
    if (AV_RN64A(row + 4) | AV_RN64A(row + 6)) {
178
#else
179
43.3k
    if (AV_RN64A(row + 4)) {
180
21.3k
#endif
181
21.3k
        a0 += (SUINT)  W4*row[4] + (SUINT)W6*row[6];
182
21.3k
        a1 += (SUINT)- W4*row[4] - (SUINT)W2*row[6];
183
21.3k
        a2 += (SUINT)- W4*row[4] + (SUINT)W2*row[6];
184
21.3k
        a3 += (SUINT)  W4*row[4] - (SUINT)W6*row[6];
185
186
21.3k
        MAC(b0,  W5, row[5]);
187
21.3k
        MAC(b0,  W7, row[7]);
188
189
21.3k
        MAC(b1, -W1, row[5]);
190
21.3k
        MAC(b1, -W5, row[7]);
191
192
21.3k
        MAC(b2,  W7, row[5]);
193
21.3k
        MAC(b2,  W3, row[7]);
194
195
21.3k
        MAC(b3,  W3, row[5]);
196
21.3k
        MAC(b3, -W1, row[7]);
197
21.3k
    }
198
199
43.3k
    row[0] = (int)(a0 + b0) >> (ROW_SHIFT + extra_shift);
200
43.3k
    row[7] = (int)(a0 - b0) >> (ROW_SHIFT + extra_shift);
201
43.3k
    row[1] = (int)(a1 + b1) >> (ROW_SHIFT + extra_shift);
202
43.3k
    row[6] = (int)(a1 - b1) >> (ROW_SHIFT + extra_shift);
203
43.3k
    row[2] = (int)(a2 + b2) >> (ROW_SHIFT + extra_shift);
204
43.3k
    row[5] = (int)(a2 - b2) >> (ROW_SHIFT + extra_shift);
205
43.3k
    row[3] = (int)(a3 + b3) >> (ROW_SHIFT + extra_shift);
206
43.3k
    row[4] = (int)(a3 - b3) >> (ROW_SHIFT + extra_shift);
207
43.3k
}
208
209
1.54G
#define IDCT_COLS do {                                  \
210
1.54G
        a0 = (SUINT)W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); \
211
1.54G
        a1 = a0;                                        \
212
1.54G
        a2 = a0;                                        \
213
1.54G
        a3 = a0;                                        \
214
1.54G
                                                        \
215
1.54G
        a0 += (SUINT) W2*col[8*2];                             \
216
1.54G
        a1 += (SUINT) W6*col[8*2];                             \
217
1.54G
        a2 += (SUINT)-W6*col[8*2];                             \
218
1.54G
        a3 += (SUINT)-W2*col[8*2];                             \
219
1.54G
                                                        \
220
1.54G
        b0 = MUL(W1, col[8*1]);                         \
221
1.54G
        b1 = MUL(W3, col[8*1]);                         \
222
1.54G
        b2 = MUL(W5, col[8*1]);                         \
223
1.54G
        b3 = MUL(W7, col[8*1]);                         \
224
1.54G
                                                        \
225
1.54G
        MAC(b0,  W3, col[8*3]);                         \
226
1.54G
        MAC(b1, -W7, col[8*3]);                         \
227
1.54G
        MAC(b2, -W1, col[8*3]);                         \
228
1.54G
        MAC(b3, -W5, col[8*3]);                         \
229
1.54G
                                                        \
230
1.54G
        if (col[8*4]) {                                 \
231
125M
            a0 += (SUINT) W4*col[8*4];                         \
232
125M
            a1 += (SUINT)-W4*col[8*4];                         \
233
125M
            a2 += (SUINT)-W4*col[8*4];                         \
234
125M
            a3 += (SUINT) W4*col[8*4];                         \
235
125M
        }                                               \
236
1.54G
                                                        \
237
1.54G
        if (col[8*5]) {                                 \
238
93.2M
            MAC(b0,  W5, col[8*5]);                     \
239
93.2M
            MAC(b1, -W1, col[8*5]);                     \
240
93.2M
            MAC(b2,  W7, col[8*5]);                     \
241
93.2M
            MAC(b3,  W3, col[8*5]);                     \
242
93.2M
        }                                               \
243
1.54G
                                                        \
244
1.54G
        if (col[8*6]) {                                 \
245
83.0M
            a0 += (SUINT) W6*col[8*6];                         \
246
83.0M
            a1 += (SUINT)-W2*col[8*6];                         \
247
83.0M
            a2 += (SUINT) W2*col[8*6];                         \
248
83.0M
            a3 += (SUINT)-W6*col[8*6];                         \
249
83.0M
        }                                               \
250
1.54G
                                                        \
251
1.54G
        if (col[8*7]) {                                 \
252
60.6M
            MAC(b0,  W7, col[8*7]);                     \
253
60.6M
            MAC(b1, -W5, col[8*7]);                     \
254
60.6M
            MAC(b2,  W3, col[8*7]);                     \
255
60.6M
            MAC(b3, -W1, col[8*7]);                     \
256
60.6M
        }                                               \
257
1.54G
    } while (0)
258
259
#ifdef EXTRA_SHIFT
260
static inline void FUNC(idctSparseCol_extrashift)(int16_t *col)
261
#else
262
static inline void FUNC6(idctSparseCol)(idctin *col)
263
#endif
264
200M
{
265
200M
    unsigned a0, a1, a2, a3, b0, b1, b2, b3;
266
267
200M
    IDCT_COLS;
268
269
200M
    col[0 ] = ((int)(a0 + b0) >> COL_SHIFT);
270
200M
    col[8 ] = ((int)(a1 + b1) >> COL_SHIFT);
271
200M
    col[16] = ((int)(a2 + b2) >> COL_SHIFT);
272
200M
    col[24] = ((int)(a3 + b3) >> COL_SHIFT);
273
200M
    col[32] = ((int)(a3 - b3) >> COL_SHIFT);
274
200M
    col[40] = ((int)(a2 - b2) >> COL_SHIFT);
275
200M
    col[48] = ((int)(a1 - b1) >> COL_SHIFT);
276
200M
    col[56] = ((int)(a0 - b0) >> COL_SHIFT);
277
200M
}
simple_idct.c:idctSparseCol_int16_8bit
Line
Count
Source
264
194M
{
265
194M
    unsigned a0, a1, a2, a3, b0, b1, b2, b3;
266
267
194M
    IDCT_COLS;
268
269
194M
    col[0 ] = ((int)(a0 + b0) >> COL_SHIFT);
270
194M
    col[8 ] = ((int)(a1 + b1) >> COL_SHIFT);
271
194M
    col[16] = ((int)(a2 + b2) >> COL_SHIFT);
272
194M
    col[24] = ((int)(a3 + b3) >> COL_SHIFT);
273
194M
    col[32] = ((int)(a3 - b3) >> COL_SHIFT);
274
194M
    col[40] = ((int)(a2 - b2) >> COL_SHIFT);
275
194M
    col[48] = ((int)(a1 - b1) >> COL_SHIFT);
276
194M
    col[56] = ((int)(a0 - b0) >> COL_SHIFT);
277
194M
}
Unexecuted instantiation: simple_idct.c:idctSparseCol_int16_10bit
Unexecuted instantiation: simple_idct.c:idctSparseCol_int16_12bit
Unexecuted instantiation: simple_idct.c:idctSparseCol_int32_10bit
proresdsp.c:idctSparseCol_extrashift_10
Line
Count
Source
264
4.89M
{
265
4.89M
    unsigned a0, a1, a2, a3, b0, b1, b2, b3;
266
267
4.89M
    IDCT_COLS;
268
269
4.89M
    col[0 ] = ((int)(a0 + b0) >> COL_SHIFT);
270
4.89M
    col[8 ] = ((int)(a1 + b1) >> COL_SHIFT);
271
4.89M
    col[16] = ((int)(a2 + b2) >> COL_SHIFT);
272
4.89M
    col[24] = ((int)(a3 + b3) >> COL_SHIFT);
273
4.89M
    col[32] = ((int)(a3 - b3) >> COL_SHIFT);
274
4.89M
    col[40] = ((int)(a2 - b2) >> COL_SHIFT);
275
4.89M
    col[48] = ((int)(a1 - b1) >> COL_SHIFT);
276
4.89M
    col[56] = ((int)(a0 - b0) >> COL_SHIFT);
277
4.89M
}
proresdsp.c:idctSparseCol_int16_12bit
Line
Count
Source
264
886k
{
265
886k
    unsigned a0, a1, a2, a3, b0, b1, b2, b3;
266
267
886k
    IDCT_COLS;
268
269
886k
    col[0 ] = ((int)(a0 + b0) >> COL_SHIFT);
270
886k
    col[8 ] = ((int)(a1 + b1) >> COL_SHIFT);
271
886k
    col[16] = ((int)(a2 + b2) >> COL_SHIFT);
272
886k
    col[24] = ((int)(a3 + b3) >> COL_SHIFT);
273
886k
    col[32] = ((int)(a3 - b3) >> COL_SHIFT);
274
886k
    col[40] = ((int)(a2 - b2) >> COL_SHIFT);
275
886k
    col[48] = ((int)(a1 - b1) >> COL_SHIFT);
276
886k
    col[56] = ((int)(a0 - b0) >> COL_SHIFT);
277
886k
}
278
279
#ifndef PRORES_ONLY
280
#ifndef EXTRA_SHIFT
281
static inline void FUNC6(idctSparseColPut)(pixel *dest, ptrdiff_t line_size,
282
                                          idctin *col)
283
1.25G
{
284
1.25G
    SUINT a0, a1, a2, a3, b0, b1, b2, b3;
285
286
1.25G
    IDCT_COLS;
287
288
1.25G
    dest[0] = av_clip_pixel((int)(a0 + b0) >> COL_SHIFT);
289
1.25G
    dest += line_size;
290
1.25G
    dest[0] = av_clip_pixel((int)(a1 + b1) >> COL_SHIFT);
291
1.25G
    dest += line_size;
292
1.25G
    dest[0] = av_clip_pixel((int)(a2 + b2) >> COL_SHIFT);
293
1.25G
    dest += line_size;
294
1.25G
    dest[0] = av_clip_pixel((int)(a3 + b3) >> COL_SHIFT);
295
1.25G
    dest += line_size;
296
1.25G
    dest[0] = av_clip_pixel((int)(a3 - b3) >> COL_SHIFT);
297
1.25G
    dest += line_size;
298
1.25G
    dest[0] = av_clip_pixel((int)(a2 - b2) >> COL_SHIFT);
299
1.25G
    dest += line_size;
300
1.25G
    dest[0] = av_clip_pixel((int)(a1 - b1) >> COL_SHIFT);
301
1.25G
    dest += line_size;
302
1.25G
    dest[0] = av_clip_pixel((int)(a0 - b0) >> COL_SHIFT);
303
1.25G
}
simple_idct.c:idctSparseColPut_int16_8bit
Line
Count
Source
283
1.23G
{
284
1.23G
    SUINT a0, a1, a2, a3, b0, b1, b2, b3;
285
286
1.23G
    IDCT_COLS;
287
288
1.23G
    dest[0] = av_clip_pixel((int)(a0 + b0) >> COL_SHIFT);
289
1.23G
    dest += line_size;
290
1.23G
    dest[0] = av_clip_pixel((int)(a1 + b1) >> COL_SHIFT);
291
1.23G
    dest += line_size;
292
1.23G
    dest[0] = av_clip_pixel((int)(a2 + b2) >> COL_SHIFT);
293
1.23G
    dest += line_size;
294
1.23G
    dest[0] = av_clip_pixel((int)(a3 + b3) >> COL_SHIFT);
295
1.23G
    dest += line_size;
296
1.23G
    dest[0] = av_clip_pixel((int)(a3 - b3) >> COL_SHIFT);
297
1.23G
    dest += line_size;
298
1.23G
    dest[0] = av_clip_pixel((int)(a2 - b2) >> COL_SHIFT);
299
1.23G
    dest += line_size;
300
1.23G
    dest[0] = av_clip_pixel((int)(a1 - b1) >> COL_SHIFT);
301
1.23G
    dest += line_size;
302
1.23G
    dest[0] = av_clip_pixel((int)(a0 - b0) >> COL_SHIFT);
303
1.23G
}
simple_idct.c:idctSparseColPut_int16_10bit
Line
Count
Source
283
11.0M
{
284
11.0M
    SUINT a0, a1, a2, a3, b0, b1, b2, b3;
285
286
11.0M
    IDCT_COLS;
287
288
11.0M
    dest[0] = av_clip_pixel((int)(a0 + b0) >> COL_SHIFT);
289
11.0M
    dest += line_size;
290
11.0M
    dest[0] = av_clip_pixel((int)(a1 + b1) >> COL_SHIFT);
291
11.0M
    dest += line_size;
292
11.0M
    dest[0] = av_clip_pixel((int)(a2 + b2) >> COL_SHIFT);
293
11.0M
    dest += line_size;
294
11.0M
    dest[0] = av_clip_pixel((int)(a3 + b3) >> COL_SHIFT);
295
11.0M
    dest += line_size;
296
11.0M
    dest[0] = av_clip_pixel((int)(a3 - b3) >> COL_SHIFT);
297
11.0M
    dest += line_size;
298
11.0M
    dest[0] = av_clip_pixel((int)(a2 - b2) >> COL_SHIFT);
299
11.0M
    dest += line_size;
300
11.0M
    dest[0] = av_clip_pixel((int)(a1 - b1) >> COL_SHIFT);
301
11.0M
    dest += line_size;
302
11.0M
    dest[0] = av_clip_pixel((int)(a0 - b0) >> COL_SHIFT);
303
11.0M
}
simple_idct.c:idctSparseColPut_int16_12bit
Line
Count
Source
283
5.01M
{
284
5.01M
    SUINT a0, a1, a2, a3, b0, b1, b2, b3;
285
286
5.01M
    IDCT_COLS;
287
288
5.01M
    dest[0] = av_clip_pixel((int)(a0 + b0) >> COL_SHIFT);
289
5.01M
    dest += line_size;
290
5.01M
    dest[0] = av_clip_pixel((int)(a1 + b1) >> COL_SHIFT);
291
5.01M
    dest += line_size;
292
5.01M
    dest[0] = av_clip_pixel((int)(a2 + b2) >> COL_SHIFT);
293
5.01M
    dest += line_size;
294
5.01M
    dest[0] = av_clip_pixel((int)(a3 + b3) >> COL_SHIFT);
295
5.01M
    dest += line_size;
296
5.01M
    dest[0] = av_clip_pixel((int)(a3 - b3) >> COL_SHIFT);
297
5.01M
    dest += line_size;
298
5.01M
    dest[0] = av_clip_pixel((int)(a2 - b2) >> COL_SHIFT);
299
5.01M
    dest += line_size;
300
5.01M
    dest[0] = av_clip_pixel((int)(a1 - b1) >> COL_SHIFT);
301
5.01M
    dest += line_size;
302
5.01M
    dest[0] = av_clip_pixel((int)(a0 - b0) >> COL_SHIFT);
303
5.01M
}
simple_idct.c:idctSparseColPut_int32_10bit
Line
Count
Source
283
914k
{
284
914k
    SUINT a0, a1, a2, a3, b0, b1, b2, b3;
285
286
914k
    IDCT_COLS;
287
288
914k
    dest[0] = av_clip_pixel((int)(a0 + b0) >> COL_SHIFT);
289
914k
    dest += line_size;
290
914k
    dest[0] = av_clip_pixel((int)(a1 + b1) >> COL_SHIFT);
291
914k
    dest += line_size;
292
914k
    dest[0] = av_clip_pixel((int)(a2 + b2) >> COL_SHIFT);
293
914k
    dest += line_size;
294
914k
    dest[0] = av_clip_pixel((int)(a3 + b3) >> COL_SHIFT);
295
914k
    dest += line_size;
296
914k
    dest[0] = av_clip_pixel((int)(a3 - b3) >> COL_SHIFT);
297
914k
    dest += line_size;
298
914k
    dest[0] = av_clip_pixel((int)(a2 - b2) >> COL_SHIFT);
299
914k
    dest += line_size;
300
914k
    dest[0] = av_clip_pixel((int)(a1 - b1) >> COL_SHIFT);
301
914k
    dest += line_size;
302
914k
    dest[0] = av_clip_pixel((int)(a0 - b0) >> COL_SHIFT);
303
914k
}
304
305
static inline void FUNC6(idctSparseColAdd)(pixel *dest, ptrdiff_t line_size,
306
                                          idctin *col)
307
96.4M
{
308
96.4M
    unsigned a0, a1, a2, a3, b0, b1, b2, b3;
309
310
96.4M
    IDCT_COLS;
311
312
96.4M
    dest[0] = av_clip_pixel(dest[0] + ((int)(a0 + b0) >> COL_SHIFT));
313
96.4M
    dest += line_size;
314
96.4M
    dest[0] = av_clip_pixel(dest[0] + ((int)(a1 + b1) >> COL_SHIFT));
315
96.4M
    dest += line_size;
316
96.4M
    dest[0] = av_clip_pixel(dest[0] + ((int)(a2 + b2) >> COL_SHIFT));
317
96.4M
    dest += line_size;
318
96.4M
    dest[0] = av_clip_pixel(dest[0] + ((int)(a3 + b3) >> COL_SHIFT));
319
96.4M
    dest += line_size;
320
96.4M
    dest[0] = av_clip_pixel(dest[0] + ((int)(a3 - b3) >> COL_SHIFT));
321
96.4M
    dest += line_size;
322
96.4M
    dest[0] = av_clip_pixel(dest[0] + ((int)(a2 - b2) >> COL_SHIFT));
323
96.4M
    dest += line_size;
324
96.4M
    dest[0] = av_clip_pixel(dest[0] + ((int)(a1 - b1) >> COL_SHIFT));
325
96.4M
    dest += line_size;
326
96.4M
    dest[0] = av_clip_pixel(dest[0] + ((int)(a0 - b0) >> COL_SHIFT));
327
96.4M
}
simple_idct.c:idctSparseColAdd_int16_8bit
Line
Count
Source
307
96.4M
{
308
96.4M
    unsigned a0, a1, a2, a3, b0, b1, b2, b3;
309
310
96.4M
    IDCT_COLS;
311
312
96.4M
    dest[0] = av_clip_pixel(dest[0] + ((int)(a0 + b0) >> COL_SHIFT));
313
96.4M
    dest += line_size;
314
96.4M
    dest[0] = av_clip_pixel(dest[0] + ((int)(a1 + b1) >> COL_SHIFT));
315
96.4M
    dest += line_size;
316
96.4M
    dest[0] = av_clip_pixel(dest[0] + ((int)(a2 + b2) >> COL_SHIFT));
317
96.4M
    dest += line_size;
318
96.4M
    dest[0] = av_clip_pixel(dest[0] + ((int)(a3 + b3) >> COL_SHIFT));
319
96.4M
    dest += line_size;
320
96.4M
    dest[0] = av_clip_pixel(dest[0] + ((int)(a3 - b3) >> COL_SHIFT));
321
96.4M
    dest += line_size;
322
96.4M
    dest[0] = av_clip_pixel(dest[0] + ((int)(a2 - b2) >> COL_SHIFT));
323
96.4M
    dest += line_size;
324
96.4M
    dest[0] = av_clip_pixel(dest[0] + ((int)(a1 - b1) >> COL_SHIFT));
325
96.4M
    dest += line_size;
326
96.4M
    dest[0] = av_clip_pixel(dest[0] + ((int)(a0 - b0) >> COL_SHIFT));
327
96.4M
}
Unexecuted instantiation: simple_idct.c:idctSparseColAdd_int16_10bit
Unexecuted instantiation: simple_idct.c:idctSparseColAdd_int16_12bit
Unexecuted instantiation: simple_idct.c:idctSparseColAdd_int32_10bit
328
329
void FUNC6(ff_simple_idct_put)(uint8_t *dest_, ptrdiff_t line_size, int16_t *block_)
330
156M
{
331
156M
    idctin *block = (idctin *)block_;
332
156M
    pixel *dest = (pixel *)dest_;
333
156M
    int i;
334
335
156M
    line_size /= sizeof(pixel);
336
337
1.40G
    for (i = 0; i < 8; i++)
338
1.25G
        FUNC6(idctRowCondDC)(block + i*8, 0);
339
340
1.40G
    for (i = 0; i < 8; i++)
341
1.25G
        FUNC6(idctSparseColPut)(dest + i, line_size, block + i);
342
156M
}
ff_simple_idct_put_int16_8bit
Line
Count
Source
330
154M
{
331
154M
    idctin *block = (idctin *)block_;
332
154M
    pixel *dest = (pixel *)dest_;
333
154M
    int i;
334
335
154M
    line_size /= sizeof(pixel);
336
337
1.38G
    for (i = 0; i < 8; i++)
338
1.23G
        FUNC6(idctRowCondDC)(block + i*8, 0);
339
340
1.38G
    for (i = 0; i < 8; i++)
341
1.23G
        FUNC6(idctSparseColPut)(dest + i, line_size, block + i);
342
154M
}
ff_simple_idct_put_int16_10bit
Line
Count
Source
330
1.37M
{
331
1.37M
    idctin *block = (idctin *)block_;
332
1.37M
    pixel *dest = (pixel *)dest_;
333
1.37M
    int i;
334
335
1.37M
    line_size /= sizeof(pixel);
336
337
12.3M
    for (i = 0; i < 8; i++)
338
11.0M
        FUNC6(idctRowCondDC)(block + i*8, 0);
339
340
12.3M
    for (i = 0; i < 8; i++)
341
11.0M
        FUNC6(idctSparseColPut)(dest + i, line_size, block + i);
342
1.37M
}
ff_simple_idct_put_int16_12bit
Line
Count
Source
330
626k
{
331
626k
    idctin *block = (idctin *)block_;
332
626k
    pixel *dest = (pixel *)dest_;
333
626k
    int i;
334
335
626k
    line_size /= sizeof(pixel);
336
337
5.64M
    for (i = 0; i < 8; i++)
338
5.01M
        FUNC6(idctRowCondDC)(block + i*8, 0);
339
340
5.64M
    for (i = 0; i < 8; i++)
341
5.01M
        FUNC6(idctSparseColPut)(dest + i, line_size, block + i);
342
626k
}
ff_simple_idct_put_int32_10bit
Line
Count
Source
330
114k
{
331
114k
    idctin *block = (idctin *)block_;
332
114k
    pixel *dest = (pixel *)dest_;
333
114k
    int i;
334
335
114k
    line_size /= sizeof(pixel);
336
337
1.02M
    for (i = 0; i < 8; i++)
338
914k
        FUNC6(idctRowCondDC)(block + i*8, 0);
339
340
1.02M
    for (i = 0; i < 8; i++)
341
914k
        FUNC6(idctSparseColPut)(dest + i, line_size, block + i);
342
114k
}
343
344
#if IN_IDCT_DEPTH == 16
345
void FUNC6(ff_simple_idct_add)(uint8_t *dest_, ptrdiff_t line_size, int16_t *block)
346
6.91M
{
347
6.91M
    pixel *dest = (pixel *)dest_;
348
6.91M
    int i;
349
350
6.91M
    line_size /= sizeof(pixel);
351
352
62.2M
    for (i = 0; i < 8; i++)
353
55.2M
        FUNC6(idctRowCondDC)(block + i*8, 0);
354
355
62.2M
    for (i = 0; i < 8; i++)
356
55.2M
        FUNC6(idctSparseColAdd)(dest + i, line_size, block + i);
357
6.91M
}
ff_simple_idct_add_int16_8bit
Line
Count
Source
346
6.91M
{
347
6.91M
    pixel *dest = (pixel *)dest_;
348
6.91M
    int i;
349
350
6.91M
    line_size /= sizeof(pixel);
351
352
62.2M
    for (i = 0; i < 8; i++)
353
55.2M
        FUNC6(idctRowCondDC)(block + i*8, 0);
354
355
62.2M
    for (i = 0; i < 8; i++)
356
55.2M
        FUNC6(idctSparseColAdd)(dest + i, line_size, block + i);
357
6.91M
}
Unexecuted instantiation: ff_simple_idct_add_int16_10bit
Unexecuted instantiation: ff_simple_idct_add_int16_12bit
358
359
void FUNC6(ff_simple_idct)(int16_t *block)
360
24.3M
{
361
24.3M
    int i;
362
363
218M
    for (i = 0; i < 8; i++)
364
194M
        FUNC6(idctRowCondDC)(block + i*8, 0);
365
366
218M
    for (i = 0; i < 8; i++)
367
194M
        FUNC6(idctSparseCol)(block + i);
368
24.3M
}
ff_simple_idct_int16_8bit
Line
Count
Source
360
24.3M
{
361
24.3M
    int i;
362
363
218M
    for (i = 0; i < 8; i++)
364
194M
        FUNC6(idctRowCondDC)(block + i*8, 0);
365
366
218M
    for (i = 0; i < 8; i++)
367
194M
        FUNC6(idctSparseCol)(block + i);
368
24.3M
}
Unexecuted instantiation: ff_simple_idct_int16_10bit
Unexecuted instantiation: ff_simple_idct_int16_12bit
369
#endif
370
#endif
371
#endif /* PRORES_ONLY */