/src/ffmpeg/libswscale/ops_chain.h
Line | Count | Source |
1 | | /** |
2 | | * Copyright (C) 2025 Niklas Haas |
3 | | * |
4 | | * This file is part of FFmpeg. |
5 | | * |
6 | | * FFmpeg is free software; you can redistribute it and/or |
7 | | * modify it under the terms of the GNU Lesser General Public |
8 | | * License as published by the Free Software Foundation; either |
9 | | * version 2.1 of the License, or (at your option) any later version. |
10 | | * |
11 | | * FFmpeg is distributed in the hope that it will be useful, |
12 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | | * Lesser General Public License for more details. |
15 | | * |
16 | | * You should have received a copy of the GNU Lesser General Public |
17 | | * License along with FFmpeg; if not, write to the Free Software |
18 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
19 | | */ |
20 | | |
21 | | #ifndef SWSCALE_OPS_CHAIN_H |
22 | | #define SWSCALE_OPS_CHAIN_H |
23 | | |
24 | | #include "libavutil/cpu.h" |
25 | | |
26 | | #include "ops_internal.h" |
27 | | |
28 | | /** |
29 | | * Helpers for SIMD implementations based on chained kernels, using a |
30 | | * continuation passing style to link them together. |
31 | | * |
32 | | * The basic idea here is to "link" together a series of different operation |
33 | | * kernels by constructing a list of kernel addresses into an SwsOpChain. Each |
34 | | * kernel will load the address of the next kernel (the "continuation") from |
35 | | * this struct, and jump directly into it; using an internal function signature |
36 | | * that is an implementation detail of the specific backend. |
37 | | */ |
38 | | |
39 | | /** |
40 | | * Private data for each kernel. |
41 | | */ |
42 | | typedef union SwsOpPriv { |
43 | | DECLARE_ALIGNED_16(char, data)[16]; |
44 | | |
45 | | /* Common types */ |
46 | | void *ptr; |
47 | | uint8_t u8[16]; |
48 | | uint16_t u16[8]; |
49 | | uint32_t u32[4]; |
50 | | float f32[4]; |
51 | | } SwsOpPriv; |
52 | | |
53 | | static_assert(sizeof(SwsOpPriv) == 16, "SwsOpPriv size mismatch"); |
54 | | |
55 | | /* Setup helpers */ |
56 | | int ff_sws_setup_u(const SwsOp *op, SwsOpPriv *out); |
57 | | int ff_sws_setup_u8(const SwsOp *op, SwsOpPriv *out); |
58 | | int ff_sws_setup_q(const SwsOp *op, SwsOpPriv *out); |
59 | | int ff_sws_setup_q4(const SwsOp *op, SwsOpPriv *out); |
60 | | |
61 | | /** |
62 | | * Per-kernel execution context. |
63 | | * |
64 | | * Note: This struct is hard-coded in assembly, so do not change the layout. |
65 | | */ |
66 | | typedef void (*SwsFuncPtr)(void); |
67 | | typedef struct SwsOpImpl { |
68 | | SwsFuncPtr cont; /* [offset = 0] Continuation for this operation. */ |
69 | | SwsOpPriv priv; /* [offset = 16] Private data for this operation. */ |
70 | | } SwsOpImpl; |
71 | | |
72 | | static_assert(sizeof(SwsOpImpl) == 32, "SwsOpImpl layout mismatch"); |
73 | | static_assert(offsetof(SwsOpImpl, priv) == 16, "SwsOpImpl layout mismatch"); |
74 | | |
75 | | /** |
76 | | * Compiled "chain" of operations, which can be dispatched efficiently. |
77 | | * Effectively just a list of function pointers, alongside a small amount of |
78 | | * private data for each operation. |
79 | | */ |
80 | | typedef struct SwsOpChain { |
81 | 0 | #define SWS_MAX_OPS 16 |
82 | | SwsOpImpl impl[SWS_MAX_OPS + 1]; /* reserve extra space for the entrypoint */ |
83 | | void (*free[SWS_MAX_OPS + 1])(void *); |
84 | | int num_impl; |
85 | | int cpu_flags; /* set of all used CPU flags */ |
86 | | } SwsOpChain; |
87 | | |
88 | | SwsOpChain *ff_sws_op_chain_alloc(void); |
89 | | void ff_sws_op_chain_free_cb(void *chain); |
90 | | static inline void ff_sws_op_chain_free(SwsOpChain *chain) |
91 | 0 | { |
92 | 0 | ff_sws_op_chain_free_cb(chain); |
93 | 0 | } Unexecuted instantiation: ops_backend.c:ff_sws_op_chain_free Unexecuted instantiation: ops_chain.c:ff_sws_op_chain_free Unexecuted instantiation: ops_memcpy.c:ff_sws_op_chain_free Unexecuted instantiation: ops.c:ff_sws_op_chain_free |
94 | | |
95 | | /* Returns 0 on success, or a negative error code. */ |
96 | | int ff_sws_op_chain_append(SwsOpChain *chain, SwsFuncPtr func, |
97 | | void (*free)(void *), const SwsOpPriv *priv); |
98 | | |
99 | | typedef struct SwsOpEntry { |
100 | | /* Kernel metadata; reduced size subset of SwsOp */ |
101 | | SwsOpType op; |
102 | | SwsPixelType type; |
103 | | bool flexible; /* if true, only the type and op are matched */ |
104 | | bool unused[4]; /* for kernels which operate on a subset of components */ |
105 | | |
106 | | union { /* extra data defining the operation, unless `flexible` is true */ |
107 | | SwsReadWriteOp rw; |
108 | | SwsPackOp pack; |
109 | | SwsSwizzleOp swizzle; |
110 | | SwsConvertOp convert; |
111 | | uint32_t linear_mask; /* subset of SwsLinearOp */ |
112 | | int dither_size; /* subset of SwsDitherOp */ |
113 | | int clear_value; /* clear value for integer clears */ |
114 | | }; |
115 | | |
116 | | /* Kernel implementation */ |
117 | | SwsFuncPtr func; |
118 | | int (*setup)(const SwsOp *op, SwsOpPriv *out); /* optional */ |
119 | | void (*free)(void *priv); |
120 | | } SwsOpEntry; |
121 | | |
122 | | typedef struct SwsOpTable { |
123 | | unsigned cpu_flags; /* required CPU flags for this table */ |
124 | | int block_size; /* fixed block size of this table */ |
125 | | const SwsOpEntry *entries[]; /* terminated by NULL */ |
126 | | } SwsOpTable; |
127 | | |
128 | | /** |
129 | | * "Compile" a single op by looking it up in a list of fixed size op tables. |
130 | | * See `op_match` in `ops_chain.c` for details on how the matching works. |
131 | | * |
132 | | * Returns 0, AVERROR(EAGAIN), or a negative error code. |
133 | | */ |
134 | | int ff_sws_op_compile_tables(const SwsOpTable *const tables[], int num_tables, |
135 | | SwsOpList *ops, const int block_size, |
136 | | SwsOpChain *chain); |
137 | | |
138 | | #endif |