/src/ffmpeg/libswscale/ops_chain.h
Line | Count | Source |
1 | | /** |
2 | | * Copyright (C) 2025 Niklas Haas |
3 | | * |
4 | | * This file is part of FFmpeg. |
5 | | * |
6 | | * FFmpeg is free software; you can redistribute it and/or |
7 | | * modify it under the terms of the GNU Lesser General Public |
8 | | * License as published by the Free Software Foundation; either |
9 | | * version 2.1 of the License, or (at your option) any later version. |
10 | | * |
11 | | * FFmpeg is distributed in the hope that it will be useful, |
12 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | | * Lesser General Public License for more details. |
15 | | * |
16 | | * You should have received a copy of the GNU Lesser General Public |
17 | | * License along with FFmpeg; if not, write to the Free Software |
18 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
19 | | */ |
20 | | |
21 | | #ifndef SWSCALE_OPS_CHAIN_H |
22 | | #define SWSCALE_OPS_CHAIN_H |
23 | | |
24 | | #include "libavutil/cpu.h" |
25 | | #include "libavutil/mem.h" |
26 | | |
27 | | #include "ops_internal.h" |
28 | | |
29 | | /** |
30 | | * Helpers for SIMD implementations based on chained kernels, using a |
31 | | * continuation passing style to link them together. |
32 | | * |
33 | | * The basic idea here is to "link" together a series of different operation |
34 | | * kernels by constructing a list of kernel addresses into an SwsOpChain. Each |
35 | | * kernel will load the address of the next kernel (the "continuation") from |
36 | | * this struct, and jump directly into it; using an internal function signature |
37 | | * that is an implementation detail of the specific backend. |
38 | | */ |
39 | | |
40 | | typedef struct SwsOpTable SwsOpTable; |
41 | | |
42 | | /** |
43 | | * Private data for each kernel. |
44 | | */ |
45 | | typedef union SwsOpPriv { |
46 | | DECLARE_ALIGNED_16(char, data)[16]; |
47 | | |
48 | | /* Common types */ |
49 | | void *ptr; |
50 | | uint8_t u8[16]; |
51 | | int8_t i8[16]; |
52 | | uint16_t u16[8]; |
53 | | int16_t i16[8]; |
54 | | uint32_t u32[4]; |
55 | | int32_t i32[4]; |
56 | | float f32[4]; |
57 | | uint64_t u64[2]; |
58 | | int64_t i64[2]; |
59 | | uintptr_t uptr[2]; |
60 | | intptr_t iptr[2]; |
61 | | } SwsOpPriv; |
62 | | |
63 | | static_assert(sizeof(SwsOpPriv) == 16, "SwsOpPriv size mismatch"); |
64 | | |
65 | | /** |
66 | | * Per-kernel execution context. |
67 | | * |
68 | | * Note: This struct is hard-coded in assembly, so do not change the layout. |
69 | | */ |
70 | | typedef void (*SwsFuncPtr)(void); |
71 | | typedef struct SwsOpImpl { |
72 | | SwsFuncPtr cont; /* [offset = 0] Continuation for this operation. */ |
73 | | SwsOpPriv priv; /* [offset = 16] Private data for this operation. */ |
74 | | } SwsOpImpl; |
75 | | |
76 | | static_assert(sizeof(SwsOpImpl) == 32, "SwsOpImpl layout mismatch"); |
77 | | static_assert(offsetof(SwsOpImpl, priv) == 16, "SwsOpImpl layout mismatch"); |
78 | | |
79 | | /** |
80 | | * Compiled "chain" of operations, which can be dispatched efficiently. |
81 | | * Effectively just a list of function pointers, alongside a small amount of |
82 | | * private data for each operation. |
83 | | */ |
84 | | typedef struct SwsOpChain { |
85 | 0 | #define SWS_MAX_OPS 16 |
86 | | SwsOpImpl impl[SWS_MAX_OPS + 1]; /* reserve extra space for the entrypoint */ |
87 | | void (*free[SWS_MAX_OPS + 1])(SwsOpPriv *); |
88 | | int num_impl; |
89 | | int cpu_flags; /* set of all used CPU flags */ |
90 | | int over_read; /* chain over-reads input by this many bytes */ |
91 | | int over_write; /* chain over-writes output by this many bytes */ |
92 | | } SwsOpChain; |
93 | | |
94 | | SwsOpChain *ff_sws_op_chain_alloc(void); |
95 | | void ff_sws_op_chain_free_cb(void *chain); |
96 | | static inline void ff_sws_op_chain_free(SwsOpChain *chain) |
97 | 0 | { |
98 | 0 | ff_sws_op_chain_free_cb(chain); |
99 | 0 | } Unexecuted instantiation: ops_backend.c:ff_sws_op_chain_free Unexecuted instantiation: ops_chain.c:ff_sws_op_chain_free Unexecuted instantiation: ops_memcpy.c:ff_sws_op_chain_free |
100 | | |
101 | | /* Returns 0 on success, or a negative error code. */ |
102 | | int ff_sws_op_chain_append(SwsOpChain *chain, SwsFuncPtr func, |
103 | | void (*free)(SwsOpPriv *), const SwsOpPriv *priv); |
104 | | |
105 | | typedef struct SwsImplParams { |
106 | | const SwsOpTable *table; |
107 | | const SwsOp *op; |
108 | | SwsContext *ctx; |
109 | | } SwsImplParams; |
110 | | |
111 | | typedef struct SwsImplResult { |
112 | | SwsFuncPtr func; /* overrides `SwsOpEntry.func` if non-NULL */ |
113 | | SwsOpPriv priv; /* private data for this implementation instance */ |
114 | | void (*free)(SwsOpPriv *priv); /* free function for `priv` */ |
115 | | int over_read; /* implementation over-reads input by this many bytes */ |
116 | | int over_write; /* implementation over-writes output by this many bytes */ |
117 | | } SwsImplResult; |
118 | | |
119 | | typedef struct SwsOpEntry { |
120 | | /* Kernel metadata; reduced size subset of SwsOp */ |
121 | | SwsOpType op; |
122 | | SwsPixelType type; |
123 | | SwsCompMask mask; /* mask of active components (after operation) */ |
124 | | bool flexible; /* if true, only the type and op are matched */ |
125 | | |
126 | | union { /* extra data defining the operation, unless `flexible` is true */ |
127 | | SwsReadWriteOp rw; |
128 | | SwsPackOp pack; |
129 | | SwsSwizzleOp swizzle; |
130 | | SwsConvertOp convert; |
131 | | SwsClearOp clear; |
132 | | uint32_t linear_mask; /* subset of SwsLinearOp */ |
133 | | int dither_size; /* subset of SwsDitherOp */ |
134 | | AVRational scale; /* scale factor for SWS_OP_SCALE */ |
135 | | }; |
136 | | |
137 | | /* Kernel implementation */ |
138 | | SwsFuncPtr func; |
139 | | int (*setup)(const SwsImplParams *params, SwsImplResult *out); /* optional */ |
140 | | bool (*check)(const SwsImplParams *params); /* optional, return true if supported */ |
141 | | } SwsOpEntry; |
142 | | |
143 | | /* Setup helpers for common/trivial operation types */ |
144 | | int ff_sws_setup_shift(const SwsImplParams *params, SwsImplResult *out); |
145 | | int ff_sws_setup_scale(const SwsImplParams *params, SwsImplResult *out); |
146 | | int ff_sws_setup_clamp(const SwsImplParams *params, SwsImplResult *out); |
147 | | int ff_sws_setup_clear(const SwsImplParams *params, SwsImplResult *out); |
148 | | |
149 | | static inline void ff_op_priv_free(SwsOpPriv *priv) |
150 | 0 | { |
151 | 0 | av_freep(&priv->ptr); |
152 | 0 | } Unexecuted instantiation: ops_backend.c:ff_op_priv_free Unexecuted instantiation: ops_chain.c:ff_op_priv_free Unexecuted instantiation: ops_memcpy.c:ff_op_priv_free |
153 | | |
154 | | static inline void ff_op_priv_unref(SwsOpPriv *priv) |
155 | 0 | { |
156 | 0 | av_refstruct_unref(&priv->ptr); |
157 | 0 | } Unexecuted instantiation: ops_backend.c:ff_op_priv_unref Unexecuted instantiation: ops_chain.c:ff_op_priv_unref Unexecuted instantiation: ops_memcpy.c:ff_op_priv_unref |
158 | | |
159 | | struct SwsOpTable { |
160 | | unsigned cpu_flags; /* required CPU flags for this table */ |
161 | | int block_size; /* fixed block size of this table */ |
162 | | const SwsOpEntry *entries[]; /* terminated by NULL */ |
163 | | }; |
164 | | |
165 | | /** |
166 | | * "Compile" a single op by looking it up in a list of fixed size op tables. |
167 | | * See `op_match` in `ops_chain.c` for details on how the matching works. |
168 | | * |
169 | | * Returns 0 or a negative error code. |
170 | | */ |
171 | | int ff_sws_op_compile_tables(SwsContext *ctx, const SwsOpTable *const tables[], |
172 | | int num_tables, SwsOpList *ops, int ops_index, |
173 | | const int block_size, SwsOpChain *chain); |
174 | | |
175 | | #endif |