FFmpeg
ops_chain.c
Go to the documentation of this file.
1 /**
2  * Copyright (C) 2025 Niklas Haas
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavutil/avassert.h"
22 #include "libavutil/mem.h"
23 #include "libavutil/rational.h"
24 
25 #include "ops_chain.h"
26 
27 #define Q(N) ((AVRational) { N, 1 })
28 
30 {
31  return av_mallocz(sizeof(SwsOpChain));
32 }
33 
34 void ff_sws_op_chain_free_cb(void *ptr)
35 {
36  if (!ptr)
37  return;
38 
39  SwsOpChain *chain = ptr;
40  for (int i = 0; i < chain->num_impl + 1; i++) {
41  if (chain->free[i])
42  chain->free[i](chain->impl[i].priv.ptr);
43  }
44 
45  av_free(chain);
46 }
47 
49  void (*free)(void *), const SwsOpPriv *priv)
50 {
51  const int idx = chain->num_impl;
52  if (idx == SWS_MAX_OPS)
53  return AVERROR(EINVAL);
54 
56  chain->impl[idx].cont = func;
57  chain->impl[idx + 1].priv = *priv;
58  chain->free[idx + 1] = free;
59  chain->num_impl++;
60  return 0;
61 }
62 
63 /**
64  * Match an operation against a reference operation. Returns a score for how
65  * well the reference matches the operation, or 0 if there is no match.
66  *
67  * If `ref->comps` has any flags set, they must be set in `op` as well.
68  * Likewise, if `ref->comps` has any components marked as unused, they must be
69  * marked as as unused in `ops` as well.
70  *
71  * For SWS_OP_LINEAR, `ref->linear.mask` must be a strict superset of
72  * `op->linear.mask`, but may not contain any columns explicitly ignored by
73  * `op->comps.unused`.
74  *
75  * For SWS_OP_READ, SWS_OP_WRITE, SWS_OP_SWAP_BYTES and SWS_OP_SWIZZLE, the
76  * exact type is not checked, just the size.
77  *
78  * Components set in `next.unused` are ignored when matching. If `flexible`
79  * is true, the op body is ignored - only the operation, pixel type, and
80  * component masks are checked.
81  */
82 static int op_match(const SwsOp *op, const SwsOpEntry *entry, const SwsComps next)
83 {
84  int score = 10;
85  if (op->op != entry->op)
86  return 0;
87 
88  switch (op->op) {
89  case SWS_OP_READ:
90  case SWS_OP_WRITE:
91  case SWS_OP_SWAP_BYTES:
92  case SWS_OP_SWIZZLE:
93  /* Only the size matters for these operations */
95  return 0;
96  break;
97  default:
98  if (op->type != entry->type)
99  return 0;
100  break;
101  }
102 
103  for (int i = 0; i < 4; i++) {
104  if (entry->unused[i]) {
105  if (op->comps.unused[i])
106  score += 1; /* Operating on fewer components is better .. */
107  else
108  return 0; /* .. but not too few! */
109  }
110  }
111 
112  if (op->op == SWS_OP_CLEAR) {
113  /* Clear pattern must match exactly, regardless of `entry->flexible` */
114  for (int i = 0; i < 4; i++) {
115  if (!next.unused[i] && entry->unused[i] != !!op->c.q4[i].den)
116  return 0;
117  }
118  }
119 
120  /* Flexible variants always match, but lower the score to prioritize more
121  * specific implementations if they exist */
122  if (entry->flexible)
123  return score - 5;
124 
125  switch (op->op) {
126  case SWS_OP_INVALID:
127  return 0;
128  case SWS_OP_READ:
129  case SWS_OP_WRITE:
130  if (op->rw.elems != entry->rw.elems ||
131  op->rw.frac != entry->rw.frac ||
132  (op->rw.elems > 1 && op->rw.packed != entry->rw.packed))
133  return 0;
134  return score;
135  case SWS_OP_SWAP_BYTES:
136  return score;
137  case SWS_OP_PACK:
138  case SWS_OP_UNPACK:
139  for (int i = 0; i < 4 && op->pack.pattern[i]; i++) {
140  if (op->pack.pattern[i] != entry->pack.pattern[i])
141  return 0;
142  }
143  return score;
144  case SWS_OP_CLEAR:
145  for (int i = 0; i < 4; i++) {
146  if (!op->c.q4[i].den)
147  continue;
148  if (av_cmp_q(op->c.q4[i], Q(entry->clear_value)) && !next.unused[i])
149  return 0;
150  }
151  return score;
152  case SWS_OP_LSHIFT:
153  case SWS_OP_RSHIFT:
154  av_assert1(entry->flexible);
155  return score;
156  case SWS_OP_SWIZZLE:
157  for (int i = 0; i < 4; i++) {
158  if (op->swizzle.in[i] != entry->swizzle.in[i] && !next.unused[i])
159  return 0;
160  }
161  return score;
162  case SWS_OP_CONVERT:
163  if (op->convert.to != entry->convert.to ||
164  op->convert.expand != entry->convert.expand)
165  return 0;
166  return score;
167  case SWS_OP_DITHER:
168  return op->dither.size_log2 == entry->dither_size ? score : 0;
169  case SWS_OP_MIN:
170  case SWS_OP_MAX:
171  av_assert1(entry->flexible);
172  return score;
173  case SWS_OP_LINEAR:
174  /* All required elements must be present */
175  if (op->lin.mask & ~entry->linear_mask)
176  return 0;
177  /* To avoid operating on possibly undefined memory, filter out
178  * implementations that operate on more input components */
179  for (int i = 0; i < 4; i++) {
180  if ((entry->linear_mask & SWS_MASK_COL(i)) && op->comps.unused[i])
181  return 0;
182  }
183  /* Prioritize smaller implementations */
184  score += av_popcount(SWS_MASK_ALL ^ entry->linear_mask);
185  return score;
186  case SWS_OP_SCALE:
187  return score;
188  case SWS_OP_TYPE_NB:
189  break;
190  }
191 
192  av_unreachable("Invalid operation type!");
193  return 0;
194 }
195 
196 int ff_sws_op_compile_tables(const SwsOpTable *const tables[], int num_tables,
197  SwsOpList *ops, const int block_size,
198  SwsOpChain *chain)
199 {
200  static const SwsOp dummy = { .comps.unused = { true, true, true, true }};
201  const SwsOp *next = ops->num_ops > 1 ? &ops->ops[1] : &dummy;
202  const unsigned cpu_flags = av_get_cpu_flags();
203  const SwsOpEntry *best = NULL;
204  const SwsOp *op = &ops->ops[0];
205  int ret, best_score = 0, best_cpu_flags;
206  SwsOpPriv priv = {0};
207 
208  for (int n = 0; n < num_tables; n++) {
209  const SwsOpTable *table = tables[n];
210  if (table->block_size && table->block_size != block_size ||
211  table->cpu_flags & ~cpu_flags)
212  continue;
213 
214  for (int i = 0; table->entries[i]; i++) {
215  const SwsOpEntry *entry = table->entries[i];
216  int score = op_match(op, entry, next->comps);
217  if (score > best_score) {
218  best_score = score;
219  best_cpu_flags = table->cpu_flags;
220  best = entry;
221  }
222  }
223  }
224 
225  if (!best)
226  return AVERROR(ENOTSUP);
227 
228  if (best->setup) {
229  ret = best->setup(op, &priv);
230  if (ret < 0)
231  return ret;
232  }
233 
234  chain->cpu_flags |= best_cpu_flags;
235  ret = ff_sws_op_chain_append(chain, best->func, best->free, &priv);
236  if (ret < 0) {
237  if (best->free)
238  best->free(priv.ptr);
239  return ret;
240  }
241 
242  ops->ops++;
243  ops->num_ops--;
244  return ops->num_ops ? AVERROR(EAGAIN) : 0;
245 }
246 
247 #define q2pixel(type, q) ((q).den ? (type) (q).num / (q).den : 0)
248 
250 {
251  out->u8[0] = op->c.u;
252  return 0;
253 }
254 
256 {
257  switch (op->type) {
258  case SWS_PIXEL_U8: out->u8[0] = op->c.u; return 0;
259  case SWS_PIXEL_U16: out->u16[0] = op->c.u; return 0;
260  case SWS_PIXEL_U32: out->u32[0] = op->c.u; return 0;
261  case SWS_PIXEL_F32: out->f32[0] = op->c.u; return 0;
262  default: return AVERROR(EINVAL);
263  }
264 }
265 
267 {
268  switch (op->type) {
269  case SWS_PIXEL_U8: out->u8[0] = q2pixel(uint8_t, op->c.q); return 0;
270  case SWS_PIXEL_U16: out->u16[0] = q2pixel(uint16_t, op->c.q); return 0;
271  case SWS_PIXEL_U32: out->u32[0] = q2pixel(uint32_t, op->c.q); return 0;
272  case SWS_PIXEL_F32: out->f32[0] = q2pixel(float, op->c.q); return 0;
273  default: return AVERROR(EINVAL);
274  }
275 
276  return 0;
277 }
278 
280 {
281  for (int i = 0; i < 4; i++) {
282  switch (op->type) {
283  case SWS_PIXEL_U8: out->u8[i] = q2pixel(uint8_t, op->c.q4[i]); break;
284  case SWS_PIXEL_U16: out->u16[i] = q2pixel(uint16_t, op->c.q4[i]); break;
285  case SWS_PIXEL_U32: out->u32[i] = q2pixel(uint32_t, op->c.q4[i]); break;
286  case SWS_PIXEL_F32: out->f32[i] = q2pixel(float, op->c.q4[i]); break;
287  default: return AVERROR(EINVAL);
288  }
289  }
290 
291  return 0;
292 }
SWS_OP_READ
@ SWS_OP_READ
Definition: ops.h:48
SwsOpTable
Definition: ops_chain.h:122
func
int(* func)(AVBPrint *dst, const char *in, const char *arg)
Definition: jacosubdec.c:68
SWS_PIXEL_U16
@ SWS_PIXEL_U16
Definition: ops.h:33
entry
#define entry
Definition: aom_film_grain_template.c:66
SWS_OP_SWIZZLE
@ SWS_OP_SWIZZLE
Definition: ops.h:58
AVERROR
Filter the word β€œframe” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
SWS_OP_LSHIFT
@ SWS_OP_LSHIFT
Definition: ops.h:56
SWS_OP_UNPACK
@ SWS_OP_UNPACK
Definition: ops.h:51
SWS_MAX_OPS
#define SWS_MAX_OPS
Definition: ops_chain.h:81
out
FILE * out
Definition: movenc.c:55
SWS_OP_CLEAR
@ SWS_OP_CLEAR
Definition: ops.h:55
SwsComps::unused
bool unused[4]
Definition: ops.h:89
rational.h
SwsOpImpl::cont
SwsFuncPtr cont
Definition: ops_chain.h:68
SWS_OP_DITHER
@ SWS_OP_DITHER
Definition: ops.h:60
SWS_MASK_ALL
@ SWS_MASK_ALL
Definition: ops.h:161
ff_sws_setup_u
int ff_sws_setup_u(const SwsOp *op, SwsOpPriv *out)
Definition: ops_chain.c:255
table
static const uint16_t table[]
Definition: prosumer.c:203
SWS_PIXEL_U32
@ SWS_PIXEL_U32
Definition: ops.h:34
SWS_OP_TYPE_NB
@ SWS_OP_TYPE_NB
Definition: ops.h:68
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:109
av_popcount
#define av_popcount
Definition: common.h:154
cpu_flags
static atomic_int cpu_flags
Definition: cpu.c:56
ff_sws_pixel_type_size
int ff_sws_pixel_type_size(SwsPixelType type)
Definition: ops.c:64
SwsOpChain::cpu_flags
int cpu_flags
Definition: ops_chain.h:85
SwsOpEntry::setup
int(* setup)(const SwsOp *op, SwsOpPriv *out)
Definition: ops_chain.h:118
SWS_PIXEL_F32
@ SWS_PIXEL_F32
Definition: ops.h:35
SwsFuncPtr
void(* SwsFuncPtr)(void)
Per-kernel execution context.
Definition: ops_chain.h:66
SwsOpList::num_ops
int num_ops
Definition: ops.h:211
SWS_MASK_COL
#define SWS_MASK_COL(J)
Definition: ops.h:158
dummy
int dummy
Definition: motion.c:66
tables
Writing a table generator This documentation is preliminary Parts of the API are not good and should be changed Basic concepts A table generator consists of two *_tablegen c and *_tablegen h The h file will provide the variable declarations and initialization code for the tables
Definition: tablegen.txt:10
SWS_PIXEL_U8
@ SWS_PIXEL_U8
Definition: ops.h:32
SWS_OP_SCALE
@ SWS_OP_SCALE
Definition: ops.h:64
ops_chain.h
avassert.h
SwsOpEntry::free
void(* free)(void *priv)
Definition: ops_chain.h:119
ff_sws_op_chain_alloc
SwsOpChain * ff_sws_op_chain_alloc(void)
Definition: ops_chain.c:29
ff_sws_setup_q
int ff_sws_setup_q(const SwsOp *op, SwsOpPriv *out)
Definition: ops_chain.c:266
op
static int op(uint8_t **dst, const uint8_t *dst_end, GetByteContext *gb, int pixel, int count, int *x, int width, int linesize)
Perform decode operation.
Definition: anm.c:76
SWS_OP_MIN
@ SWS_OP_MIN
Definition: ops.h:65
SWS_OP_LINEAR
@ SWS_OP_LINEAR
Definition: ops.h:63
SwsOpChain::impl
SwsOpImpl impl[SWS_MAX_OPS+1]
Definition: ops_chain.h:82
SWS_OP_PACK
@ SWS_OP_PACK
Definition: ops.h:52
SwsOpPriv::ptr
void * ptr
Definition: ops_chain.h:46
SwsOpChain
Compiled "chain" of operations, which can be dispatched efficiently.
Definition: ops_chain.h:80
NULL
#define NULL
Definition: coverity.c:32
ff_sws_op_compile_tables
int ff_sws_op_compile_tables(const SwsOpTable *const tables[], int num_tables, SwsOpList *ops, const int block_size, SwsOpChain *chain)
"Compile" a single op by looking it up in a list of fixed size op tables.
Definition: ops_chain.c:196
av_unreachable
#define av_unreachable(msg)
Asserts that are used as compiler optimization hints depending upon ASSERT_LEVEL and NBDEBUG.
Definition: avassert.h:108
SwsOpEntry::func
SwsFuncPtr func
Definition: ops_chain.h:117
SWS_OP_RSHIFT
@ SWS_OP_RSHIFT
Definition: ops.h:57
SWS_OP_INVALID
@ SWS_OP_INVALID
Definition: ops.h:45
SWS_OP_WRITE
@ SWS_OP_WRITE
Definition: ops.h:49
SwsOpChain::num_impl
int num_impl
Definition: ops_chain.h:84
SwsOpEntry
Definition: ops_chain.h:99
SwsOp::comps
SwsComps comps
Definition: ops.h:193
ff_sws_op_chain_free_cb
void ff_sws_op_chain_free_cb(void *ptr)
Definition: ops_chain.c:34
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
SwsOpChain::free
void(* free[SWS_MAX_OPS+1])(void *)
Definition: ops_chain.h:83
SwsOpList::ops
SwsOp * ops
Definition: ops.h:210
ff_sws_setup_q4
int ff_sws_setup_q4(const SwsOp *op, SwsOpPriv *out)
Definition: ops_chain.c:279
av_assert1
#define av_assert1(cond)
assert() equivalent, that does not lie in speed critical code.
Definition: avassert.h:57
av_mallocz
void * av_mallocz(size_t size)
Allocate a memory block with alignment suitable for all memory accesses (including vectors if availab...
Definition: mem.c:256
SwsOp
Definition: ops.h:179
av_cmp_q
static int av_cmp_q(AVRational a, AVRational b)
Compare two rationals.
Definition: rational.h:89
Q
#define Q(N)
Copyright (C) 2025 Niklas Haas.
Definition: ops_chain.c:27
ret
ret
Definition: filter_design.txt:187
SWS_OP_MAX
@ SWS_OP_MAX
Definition: ops.h:66
SwsOpImpl::priv
SwsOpPriv priv
Definition: ops_chain.h:69
SwsComps
Definition: ops.h:87
SWS_OP_SWAP_BYTES
@ SWS_OP_SWAP_BYTES
Definition: ops.h:50
ff_sws_setup_u8
int ff_sws_setup_u8(const SwsOp *op, SwsOpPriv *out)
Definition: ops_chain.c:249
mem.h
ff_sws_op_chain_append
int ff_sws_op_chain_append(SwsOpChain *chain, SwsFuncPtr func, void(*free)(void *), const SwsOpPriv *priv)
Definition: ops_chain.c:48
av_free
#define av_free(p)
Definition: tableprint_vlc.h:34
SWS_OP_CONVERT
@ SWS_OP_CONVERT
Definition: ops.h:59
SwsOpList
Helper struct for representing a list of operations.
Definition: ops.h:209
q2pixel
#define q2pixel(type, q)
Definition: ops_chain.c:247
SwsOpPriv
Copyright (C) 2025 Niklas Haas.
Definition: ops_chain.h:42
op_match
static int op_match(const SwsOp *op, const SwsOpEntry *entry, const SwsComps next)
Match an operation against a reference operation.
Definition: ops_chain.c:82