Coverage Report

Created: 2025-12-11 06:55

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/mpv/video/out/gpu/error_diffusion.c
Line
Count
Source
1
/*
2
 * This file is part of mpv.
3
 *
4
 * mpv is free software; you can redistribute it and/or
5
 * modify it under the terms of the GNU Lesser General Public
6
 * License as published by the Free Software Foundation; either
7
 * version 2.1 of the License, or (at your option) any later version.
8
 *
9
 * mpv is distributed in the hope that it will be useful,
10
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
 * GNU Lesser General Public License for more details.
13
 *
14
 * You should have received a copy of the GNU Lesser General Public
15
 * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
16
 */
17
18
#include <stdlib.h>
19
20
#include "error_diffusion.h"
21
22
#include "common/common.h"
23
24
0
#define GLSL(...) gl_sc_addf(sc, __VA_ARGS__)
25
0
#define GLSLH(...) gl_sc_haddf(sc, __VA_ARGS__)
26
27
// After a (y, x) -> (y, x + y * shift) mapping, find the right most column that
28
// will be affected by the current column.
29
static int compute_rightmost_shifted_column(const struct error_diffusion_kernel *k)
30
0
{
31
0
    int ret = 0;
32
0
    for (int y = 0; y <= EF_MAX_DELTA_Y; y++) {
33
0
        for (int x = EF_MIN_DELTA_X; x <= EF_MAX_DELTA_X; x++) {
34
0
            if (k->pattern[y][x - EF_MIN_DELTA_X] != 0) {
35
0
                int shifted_x = x + y * k->shift;
36
37
                // The shift mapping guarantees current column (or left of it)
38
                // won't be affected by error diffusion.
39
0
                mp_assert(shifted_x > 0);
40
41
0
                ret = MPMAX(ret, shifted_x);
42
0
            }
43
0
        }
44
0
    }
45
0
    return ret;
46
0
}
47
48
const struct error_diffusion_kernel *mp_find_error_diffusion_kernel(const char *name)
49
1.54k
{
50
1.54k
    if (!name)
51
0
        return NULL;
52
1.54k
    for (const struct error_diffusion_kernel *k = mp_error_diffusion_kernels;
53
8.50k
         k->name;
54
8.08k
         k++) {
55
8.08k
        if (strcmp(k->name, name) == 0)
56
1.12k
            return k;
57
8.08k
    }
58
421
    return NULL;
59
1.54k
}
60
61
int mp_ef_compute_shared_memory_size(const struct error_diffusion_kernel *k,
62
                                     int height)
63
0
{
64
    // We add EF_MAX_DELTA_Y empty lines on the bottom to handle errors
65
    // propagated out from bottom side.
66
0
    int rows = height + EF_MAX_DELTA_Y;
67
0
    int shifted_columns = compute_rightmost_shifted_column(k) + 1;
68
69
    // The shared memory is an array of size rows*shifted_columns. Each element
70
    // is a single uint for three RGB component.
71
0
    return rows * shifted_columns * 4;
72
0
}
73
74
void pass_error_diffusion(struct gl_shader_cache *sc,
75
                          const struct error_diffusion_kernel *k,
76
                          int tex, int width, int height, int depth, int block_size)
77
0
{
78
0
    mp_assert(block_size <= height);
79
80
    // The parallel error diffusion works by applying the shift mapping first.
81
    // Taking the Floyd and Steinberg algorithm for example. After applying
82
    // the (y, x) -> (y, x + y * shift) mapping (with shift=2), all errors are
83
    // propagated into the next few columns, which makes parallel processing on
84
    // the same column possible.
85
    //
86
    //           X    7/16                X    7/16
87
    //    3/16  5/16  1/16   ==>    0     0    3/16  5/16  1/16
88
89
    // Figuring out the size of rectangle containing all shifted pixels.
90
    // The rectangle height is not changed.
91
0
    int shifted_width = width + (height - 1) * k->shift;
92
93
    // We process all pixels from the shifted rectangles column by column, with
94
    // a single global work group of size |block_size|.
95
    // Figuring out how many block are required to process all pixels. We need
96
    // this explicitly to make the number of barrier() calls match.
97
0
    int blocks = (height * shifted_width + block_size - 1) / block_size;
98
99
    // If we figure out how many of the next columns will be affected while the
100
    // current columns is being processed. We can store errors of only a few
101
    // columns in the shared memory. Using a ring buffer will further save the
102
    // cost while iterating to next column.
103
0
    int ring_buffer_rows = height + EF_MAX_DELTA_Y;
104
0
    int ring_buffer_columns = compute_rightmost_shifted_column(k) + 1;
105
0
    int ring_buffer_size = ring_buffer_rows * ring_buffer_columns;
106
107
    // Defines the ring buffer in shared memory.
108
0
    GLSLH("shared uint err_rgb8[%d];\n", ring_buffer_size);
109
110
    // Initialize the ring buffer.
111
0
    GLSL("for (int i = int(gl_LocalInvocationIndex); i < %d; i += %d) ",
112
0
         ring_buffer_size, block_size);
113
0
    GLSL("err_rgb8[i] = 0u;\n");
114
115
0
    GLSL("for (int block_id = 0; block_id < %d; ++block_id) {\n", blocks);
116
117
    // Add barrier here to have previous block all processed before starting
118
    // the processing of the next.
119
0
    GLSL("groupMemoryBarrier();\n");
120
0
    GLSL("barrier();\n");
121
122
    // Compute the coordinate of the pixel we are currently processing, both
123
    // before and after the shift mapping.
124
0
    GLSL("int id = int(gl_LocalInvocationIndex) + block_id * %d;\n", block_size);
125
0
    GLSL("int y = id %% %d, x_shifted = id / %d;\n", height, height);
126
0
    GLSL("int x = x_shifted - y * %d;\n", k->shift);
127
128
    // Proceed only if we are processing a valid pixel.
129
0
    GLSL("if (0 <= x && x < %d) {\n", width);
130
131
    // The index that the current pixel have on the ring buffer.
132
0
    GLSL("int idx = (x_shifted * %d + y) %% %d;\n", ring_buffer_rows, ring_buffer_size);
133
134
    // Fetch the current pixel.
135
0
    GLSL("vec3 pix = texelFetch(texture%d, ivec2(x, y), 0).rgb;\n", tex);
136
137
    // The dithering will quantize pixel value into multiples of 1/dither_quant.
138
0
    int dither_quant = (1 << depth) - 1;
139
140
    // We encode errors in RGB components into a single 32-bit unsigned integer.
141
    // The error we propagate from the current pixel is in range of
142
    // [-0.5 / dither_quant, 0.5 / dither_quant]. While not quite obvious, the
143
    // sum of all errors been propagated into a pixel is also in the same range.
144
    // It's possible to map errors in this range into [-127, 127], and use an
145
    // unsigned 8-bit integer to store it (using standard two's complement).
146
    // The three 8-bit unsigned integers can then be encoded into a single
147
    // 32-bit unsigned integer, with two 4-bit padding to prevent addition
148
    // operation overflows affecting other component. There are at most 12
149
    // addition operations on each pixel, so 4-bit padding should be enough.
150
    // The overflow from R component will be discarded.
151
    //
152
    // The following figure is how the encoding looks like.
153
    //
154
    //     +------------------------------------+
155
    //     |RRRRRRRR|0000|GGGGGGGG|0000|BBBBBBBB|
156
    //     +------------------------------------+
157
    //
158
159
    // The bitshift position for R and G component.
160
0
    int bitshift_r = 24, bitshift_g = 12;
161
    // The multiplier we use to map [-0.5, 0.5] to [-127, 127].
162
0
    int uint8_mul = 127 * 2;
163
164
    // Adding the error previously propagated into current pixel, and clear it
165
    // in the buffer.
166
0
    GLSL("uint err_u32 = err_rgb8[idx] + %uu;\n",
167
0
         (128u << bitshift_r) | (128u << bitshift_g) | 128u);
168
0
    GLSL("pix = pix * %d.0 + vec3("
169
0
         "int((err_u32 >> %d) & 255u) - 128,"
170
0
         "int((err_u32 >> %d) & 255u) - 128,"
171
0
         "int( err_u32        & 255u) - 128"
172
0
         ") / %d.0;\n", dither_quant, bitshift_r, bitshift_g, uint8_mul);
173
0
    GLSL("err_rgb8[idx] = 0u;\n");
174
175
    // Write the dithered pixel.
176
0
    GLSL("vec3 dithered = round(pix);\n");
177
0
    GLSL("imageStore(out_image, ivec2(x, y), vec4(dithered / %d.0, 0.0));\n",
178
0
         dither_quant);
179
180
0
    GLSL("vec3 err_divided = (pix - dithered) * %d.0 / %d.0;\n",
181
0
         uint8_mul, k->divisor);
182
0
    GLSL("ivec3 tmp;\n");
183
184
    // Group error propagation with same weight factor together, in order to
185
    // reduce the number of annoying error encoding.
186
0
    for (int dividend = 1; dividend <= k->divisor; dividend++) {
187
0
        bool err_assigned = false;
188
189
0
        for (int y = 0; y <= EF_MAX_DELTA_Y; y++) {
190
0
            for (int x = EF_MIN_DELTA_X; x <= EF_MAX_DELTA_X; x++) {
191
0
                if (k->pattern[y][x - EF_MIN_DELTA_X] != dividend)
192
0
                    continue;
193
194
0
                if (!err_assigned) {
195
0
                    err_assigned = true;
196
197
0
                    GLSL("tmp = ivec3(round(err_divided * %d.0));\n", dividend);
198
199
0
                    GLSL("err_u32 = "
200
0
                         "(uint(tmp.r & 255) << %d)|"
201
0
                         "(uint(tmp.g & 255) << %d)|"
202
0
                         " uint(tmp.b & 255);\n",
203
0
                         bitshift_r, bitshift_g);
204
0
                }
205
206
0
                int shifted_x = x + y * k->shift;
207
208
                // Unlike the right border, errors propagated out from left
209
                // border will remain in the ring buffer. This will produce
210
                // visible artifacts near the left border, especially for
211
                // shift=3 kernels.
212
0
                if (x < 0)
213
0
                    GLSL("if (x >= %d) ", -x);
214
215
                // Calculate the new position in the ring buffer to propagate
216
                // the error into.
217
0
                int ring_buffer_delta = shifted_x * ring_buffer_rows + y;
218
0
                GLSL("atomicAdd(err_rgb8[(idx + %d) %% %d], err_u32);\n",
219
0
                     ring_buffer_delta, ring_buffer_size);
220
0
            }
221
0
        }
222
0
    }
223
224
0
    GLSL("}\n"); // if (0 <= x && x < width)
225
226
0
    GLSL("}\n"); // block_id
227
0
}
228
229
// Different kernels for error diffusion.
230
// Patterns are from <https://web.archive.org/web/20181031005427/
231
// http://www.efg2.com/Lab/Library/ImageProcessing/DHALF.TXT>
232
const struct error_diffusion_kernel mp_error_diffusion_kernels[] = {
233
    {
234
        .name = "simple",
235
        .shift = 1,
236
        .pattern = {{0, 0, 0, 1, 0},
237
                    {0, 0, 1, 0, 0},
238
                    {0, 0, 0, 0, 0}},
239
        .divisor = 2
240
    },
241
    {
242
        // The "false" Floyd-Steinberg kernel
243
        .name = "false-fs",
244
        .shift = 1,
245
        .pattern = {{0, 0, 0, 3, 0},
246
                    {0, 0, 3, 2, 0},
247
                    {0, 0, 0, 0, 0}},
248
        .divisor = 8
249
    },
250
    {
251
        .name = "sierra-lite",
252
        .shift = 2,
253
        .pattern = {{0, 0, 0, 2, 0},
254
                    {0, 1, 1, 0, 0},
255
                    {0, 0, 0, 0, 0}},
256
        .divisor = 4
257
    },
258
    {
259
        .name = "floyd-steinberg",
260
        .shift = 2,
261
        .pattern = {{0, 0, 0, 7, 0},
262
                    {0, 3, 5, 1, 0},
263
                    {0, 0, 0, 0, 0}},
264
        .divisor = 16
265
    },
266
    {
267
        .name = "atkinson",
268
        .shift = 2,
269
        .pattern = {{0, 0, 0, 1, 1},
270
                    {0, 1, 1, 1, 0},
271
                    {0, 0, 1, 0, 0}},
272
        .divisor = 8
273
    },
274
    // All kernels below have shift value of 3, and probably are too heavy for
275
    // low end GPU.
276
    {
277
        .name = "jarvis-judice-ninke",
278
        .shift = 3,
279
        .pattern = {{0, 0, 0, 7, 5},
280
                    {3, 5, 7, 5, 3},
281
                    {1, 3, 5, 3, 1}},
282
        .divisor = 48
283
    },
284
    {
285
        .name = "stucki",
286
        .shift = 3,
287
        .pattern = {{0, 0, 0, 8, 4},
288
                    {2, 4, 8, 4, 2},
289
                    {1, 2, 4, 2, 1}},
290
        .divisor = 42
291
    },
292
    {
293
        .name = "burkes",
294
        .shift = 3,
295
        .pattern = {{0, 0, 0, 8, 4},
296
                    {2, 4, 8, 4, 2},
297
                    {0, 0, 0, 0, 0}},
298
        .divisor = 32
299
    },
300
    {
301
        .name = "sierra-3",
302
        .shift = 3,
303
        .pattern = {{0, 0, 0, 5, 3},
304
                    {2, 4, 5, 4, 2},
305
                    {0, 2, 3, 2, 0}},
306
        .divisor = 32
307
    },
308
    {
309
        .name = "sierra-2",
310
        .shift = 3,
311
        .pattern = {{0, 0, 0, 4, 3},
312
                    {1, 2, 3, 2, 1},
313
                    {0, 0, 0, 0, 0}},
314
        .divisor = 16
315
    },
316
    {0}
317
};