/src/libheif/libheif/sequences/track_visual.cc
Line | Count | Source |
1 | | /* |
2 | | * HEIF image base codec. |
3 | | * Copyright (c) 2025 Dirk Farin <dirk.farin@gmail.com> |
4 | | * |
5 | | * This file is part of libheif. |
6 | | * |
7 | | * libheif is free software: you can redistribute it and/or modify |
8 | | * it under the terms of the GNU Lesser General Public License as |
9 | | * published by the Free Software Foundation, either version 3 of |
10 | | * the License, or (at your option) any later version. |
11 | | * |
12 | | * libheif is distributed in the hope that it will be useful, |
13 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 | | * GNU Lesser General Public License for more details. |
16 | | * |
17 | | * You should have received a copy of the GNU Lesser General Public License |
18 | | * along with libheif. If not, see <http://www.gnu.org/licenses/>. |
19 | | */ |
20 | | |
21 | | #include "track_visual.h" |
22 | | #include "codecs/decoder.h" |
23 | | #include "codecs/encoder.h" |
24 | | #include "chunk.h" |
25 | | #include "pixelimage.h" |
26 | | #include "context.h" |
27 | | #include "api_structs.h" |
28 | | #include "codecs/hevc_boxes.h" |
29 | | |
30 | | |
31 | | Track_Visual::Track_Visual(HeifContext* ctx) |
32 | 0 | : Track(ctx) |
33 | 0 | { |
34 | 0 | } |
35 | | |
36 | | Error Track_Visual::load(const std::shared_ptr<Box_trak>& trak) |
37 | 0 | { |
38 | 0 | Error parentLoadError = Track::load(trak); |
39 | 0 | if (parentLoadError) { |
40 | 0 | return parentLoadError; |
41 | 0 | } |
42 | | |
43 | 0 | const std::vector<uint32_t>& chunk_offsets = m_stco->get_offsets(); |
44 | | |
45 | | // Find sequence resolution |
46 | |
|
47 | 0 | if (!chunk_offsets.empty()) { |
48 | 0 | auto* s2c = m_stsc->get_chunk(static_cast<uint32_t>(1)); |
49 | 0 | if (!s2c) { |
50 | 0 | return { |
51 | 0 | heif_error_Invalid_input, |
52 | 0 | heif_suberror_Unspecified, |
53 | 0 | "Visual track has no chunk 1" |
54 | 0 | }; |
55 | 0 | } |
56 | | |
57 | 0 | Box_stsc::SampleToChunk sampleToChunk = *s2c; |
58 | |
|
59 | 0 | auto sample_description = m_stsd->get_sample_entry(sampleToChunk.sample_description_index - 1); |
60 | 0 | if (!sample_description) { |
61 | 0 | return { |
62 | 0 | heif_error_Invalid_input, |
63 | 0 | heif_suberror_Unspecified, |
64 | 0 | "Visual track has sample description" |
65 | 0 | }; |
66 | 0 | } |
67 | | |
68 | 0 | auto visual_sample_description = std::dynamic_pointer_cast<const Box_VisualSampleEntry>(sample_description); |
69 | 0 | if (!visual_sample_description) { |
70 | 0 | return { |
71 | 0 | heif_error_Invalid_input, |
72 | 0 | heif_suberror_Unspecified, |
73 | 0 | "Visual track sample description does not match visual track." |
74 | 0 | }; |
75 | 0 | } |
76 | | |
77 | 0 | m_width = visual_sample_description->get_VisualSampleEntry_const().width; |
78 | 0 | m_height = visual_sample_description->get_VisualSampleEntry_const().height; |
79 | 0 | } |
80 | | |
81 | 0 | return {}; |
82 | 0 | } |
83 | | |
84 | | |
85 | | void Track_Visual::initialize_after_parsing(HeifContext* ctx, const std::vector<std::shared_ptr<Track>>& all_tracks) |
86 | 0 | { |
87 | | // --- check whether there is an auxiliary alpha track assigned to this track |
88 | | |
89 | | // Only assign to image-sequence tracks (TODO: are there also alpha tracks allowed for video tracks 'heif_track_type_video'?) |
90 | |
|
91 | 0 | if (get_handler() == heif_track_type_image_sequence) { |
92 | 0 | for (auto track : all_tracks) { |
93 | | |
94 | | // skip ourselves |
95 | 0 | if (track->get_id() != get_id()) { |
96 | | |
97 | | // Is this an aux alpha track? |
98 | 0 | auto h = fourcc_to_string(track->get_handler()); |
99 | 0 | if (track->get_handler() == heif_track_type_auxiliary && |
100 | 0 | track->get_auxiliary_info_type() == heif_auxiliary_track_info_type_alpha) { |
101 | | |
102 | | // Is it assigned to the current track |
103 | 0 | auto tref = track->get_tref_box(); |
104 | 0 | auto references = tref->get_references(fourcc("auxl")); |
105 | 0 | if (std::any_of(references.begin(), references.end(), [this](uint32_t id) { return id == get_id(); })) { |
106 | | |
107 | | // Assign it |
108 | |
|
109 | 0 | m_aux_alpha_track = std::dynamic_pointer_cast<Track_Visual>(track); |
110 | 0 | } |
111 | 0 | } |
112 | 0 | } |
113 | 0 | } |
114 | 0 | } |
115 | 0 | } |
116 | | |
117 | | |
118 | | Track_Visual::Track_Visual(HeifContext* ctx, uint32_t track_id, uint16_t width, uint16_t height, |
119 | | const TrackOptions* options, uint32_t handler_type) |
120 | 0 | : Track(ctx, track_id, options, handler_type) |
121 | 0 | { |
122 | 0 | m_tkhd->set_resolution(width, height); |
123 | | //m_hdlr->set_handler_type(handler_type); already done in Track() |
124 | |
|
125 | 0 | auto vmhd = std::make_shared<Box_vmhd>(); |
126 | 0 | m_minf->append_child_box(vmhd); |
127 | 0 | } |
128 | | |
129 | | |
130 | | Result<std::shared_ptr<HeifPixelImage>> Track_Visual::decode_next_image_sample(const heif_decoding_options& options) |
131 | 0 | { |
132 | 0 | uint64_t num_output_samples = m_num_output_samples; |
133 | 0 | if (options.ignore_sequence_editlist) { |
134 | 0 | num_output_samples = m_num_samples; |
135 | 0 | } |
136 | |
|
137 | 0 | if (m_next_sample_to_be_processed >= num_output_samples) { |
138 | 0 | return Error{heif_error_End_of_sequence, |
139 | 0 | heif_suberror_Unspecified, |
140 | 0 | "End of sequence"}; |
141 | 0 | } |
142 | | |
143 | 0 | const auto& sampleTiming = m_presentation_timeline[m_next_sample_to_be_processed % m_presentation_timeline.size()]; |
144 | 0 | uint32_t sample_idx = sampleTiming.sampleIdx; |
145 | 0 | uint32_t chunk_idx = sampleTiming.chunkIdx; |
146 | |
|
147 | 0 | const std::shared_ptr<Chunk>& chunk = m_chunks[chunk_idx]; |
148 | |
|
149 | 0 | auto decoder = chunk->get_decoder(); |
150 | 0 | assert(decoder); |
151 | | |
152 | 0 | decoder->set_data_extent(chunk->get_data_extent_for_sample(sample_idx)); |
153 | |
|
154 | 0 | Result<std::shared_ptr<HeifPixelImage>> decodingResult = decoder->decode_single_frame_from_compressed_data(options, |
155 | 0 | m_heif_context->get_security_limits()); |
156 | 0 | if (!decodingResult) { |
157 | 0 | m_next_sample_to_be_processed++; |
158 | 0 | return decodingResult.error(); |
159 | 0 | } |
160 | | |
161 | 0 | auto image = *decodingResult; |
162 | |
|
163 | 0 | if (m_stts) { |
164 | 0 | image->set_sample_duration(m_stts->get_sample_duration(sample_idx)); |
165 | 0 | } |
166 | | |
167 | | // --- assign alpha if we have an assigned alpha track |
168 | |
|
169 | 0 | if (m_aux_alpha_track) { |
170 | 0 | auto alphaResult = m_aux_alpha_track->decode_next_image_sample(options); |
171 | 0 | if (!alphaResult) { |
172 | 0 | return alphaResult.error(); |
173 | 0 | } |
174 | | |
175 | 0 | auto alphaImage = *alphaResult; |
176 | 0 | image->transfer_plane_from_image_as(alphaImage, heif_channel_Y, heif_channel_Alpha); |
177 | 0 | } |
178 | | |
179 | | |
180 | | // --- read sample auxiliary data |
181 | | |
182 | 0 | if (m_aux_reader_content_ids) { |
183 | 0 | auto readResult = m_aux_reader_content_ids->get_sample_info(get_file().get(), sample_idx); |
184 | 0 | if (!readResult) { |
185 | 0 | return readResult.error(); |
186 | 0 | } |
187 | | |
188 | 0 | Result<std::string> convResult = vector_to_string(*readResult); |
189 | 0 | if (!convResult) { |
190 | 0 | return convResult.error(); |
191 | 0 | } |
192 | | |
193 | 0 | image->set_gimi_sample_content_id(*convResult); |
194 | 0 | } |
195 | | |
196 | 0 | if (m_aux_reader_tai_timestamps) { |
197 | 0 | auto readResult = m_aux_reader_tai_timestamps->get_sample_info(get_file().get(), sample_idx); |
198 | 0 | if (!readResult) { |
199 | 0 | return readResult.error(); |
200 | 0 | } |
201 | | |
202 | 0 | auto resultTai = Box_itai::decode_tai_from_vector(*readResult); |
203 | 0 | if (!resultTai) { |
204 | 0 | return resultTai.error(); |
205 | 0 | } |
206 | | |
207 | 0 | image->set_tai_timestamp(&*resultTai); |
208 | 0 | } |
209 | | |
210 | 0 | m_next_sample_to_be_processed++; |
211 | |
|
212 | 0 | return image; |
213 | 0 | } |
214 | | |
215 | | |
216 | | Error Track_Visual::encode_image(std::shared_ptr<HeifPixelImage> image, |
217 | | heif_encoder* h_encoder, |
218 | | const heif_encoding_options& in_options, |
219 | | heif_image_input_class input_class) |
220 | 0 | { |
221 | 0 | if (image->get_width() > 0xFFFF || |
222 | 0 | image->get_height() > 0xFFFF) { |
223 | 0 | return {heif_error_Invalid_input, |
224 | 0 | heif_suberror_Unspecified, |
225 | 0 | "Input image resolution too high"}; |
226 | 0 | } |
227 | | |
228 | | // === generate compressed image bitstream |
229 | | |
230 | | // generate new chunk for first image or when compression formats don't match |
231 | | |
232 | 0 | bool add_sample_description = false; |
233 | |
|
234 | 0 | if (m_chunks.empty() || m_chunks.back()->get_compression_format() != h_encoder->plugin->compression_format) { |
235 | 0 | add_chunk(h_encoder->plugin->compression_format); |
236 | 0 | add_sample_description = true; |
237 | 0 | } |
238 | | |
239 | | // --- check whether we have to convert the image color space |
240 | | |
241 | | // The reason for doing the color conversion here is that the input might be an RGBA image and the color conversion |
242 | | // will extract the alpha plane anyway. We can reuse that plane below instead of having to do a new conversion. |
243 | |
|
244 | 0 | heif_encoding_options options = in_options; |
245 | |
|
246 | 0 | auto encoder = m_chunks.back()->get_encoder(); |
247 | |
|
248 | 0 | if (const auto* nclx = encoder->get_forced_output_nclx()) { |
249 | 0 | options.output_nclx_profile = const_cast<heif_color_profile_nclx*>(nclx); |
250 | 0 | } |
251 | |
|
252 | 0 | Result<std::shared_ptr<HeifPixelImage>> srcImageResult = encoder->convert_colorspace_for_encoding(image, |
253 | 0 | h_encoder, |
254 | 0 | options, |
255 | 0 | m_heif_context->get_security_limits()); |
256 | 0 | if (!srcImageResult) { |
257 | 0 | return srcImageResult.error(); |
258 | 0 | } |
259 | | |
260 | 0 | std::shared_ptr<HeifPixelImage> colorConvertedImage = *srcImageResult; |
261 | | |
262 | | // --- encode image |
263 | |
|
264 | 0 | Result<Encoder::CodedImageData> encodeResult = encoder->encode(colorConvertedImage, h_encoder, options, input_class); |
265 | 0 | if (!encodeResult) { |
266 | 0 | return encodeResult.error(); |
267 | 0 | } |
268 | | |
269 | 0 | const Encoder::CodedImageData& data = *encodeResult; |
270 | | |
271 | | |
272 | | // --- generate SampleDescriptionBox |
273 | |
|
274 | 0 | if (add_sample_description) { |
275 | 0 | auto sample_description_box = encoder->get_sample_description_box(data); |
276 | 0 | VisualSampleEntry& visualSampleEntry = sample_description_box->get_VisualSampleEntry(); |
277 | 0 | visualSampleEntry.width = static_cast<uint16_t>(colorConvertedImage->get_width()); |
278 | 0 | visualSampleEntry.height = static_cast<uint16_t>(colorConvertedImage->get_height()); |
279 | |
|
280 | 0 | auto ccst = std::make_shared<Box_ccst>(); |
281 | 0 | ccst->set_coding_constraints(data.codingConstraints); |
282 | 0 | sample_description_box->append_child_box(ccst); |
283 | |
|
284 | 0 | set_sample_description_box(sample_description_box); |
285 | 0 | } |
286 | |
|
287 | 0 | Error err = write_sample_data(data.bitstream, |
288 | 0 | colorConvertedImage->get_sample_duration(), |
289 | 0 | data.is_sync_frame, |
290 | 0 | image->get_tai_timestamp(), |
291 | 0 | image->has_gimi_sample_content_id() ? image->get_gimi_sample_content_id() : std::string{}); |
292 | |
|
293 | 0 | if (err) { |
294 | 0 | return err; |
295 | 0 | } |
296 | | |
297 | 0 | return Error::Ok; |
298 | 0 | } |
299 | | |
300 | | |
301 | | heif_brand2 Track_Visual::get_compatible_brand() const |
302 | 0 | { |
303 | 0 | if (m_stsd->get_num_sample_entries() == 0) { |
304 | 0 | return 0; // TODO: error ? Or can we assume at this point that there is at least one sample entry? |
305 | 0 | } |
306 | | |
307 | 0 | auto sampleEntry = m_stsd->get_sample_entry(0); |
308 | |
|
309 | 0 | uint32_t sample_entry_type = sampleEntry->get_short_type(); |
310 | 0 | switch (sample_entry_type) { |
311 | 0 | case fourcc("hvc1"): { |
312 | 0 | auto hvcC = sampleEntry->get_child_box<Box_hvcC>(); |
313 | 0 | if (!hvcC) { return 0; } |
314 | | |
315 | 0 | const auto& config = hvcC->get_configuration(); |
316 | 0 | if (config.is_profile_compatibile(HEVCDecoderConfigurationRecord::Profile_Main) || |
317 | 0 | config.is_profile_compatibile(HEVCDecoderConfigurationRecord::Profile_MainStillPicture)) { |
318 | 0 | return heif_brand2_hevc; |
319 | 0 | } |
320 | 0 | else { |
321 | 0 | return heif_brand2_hevx; |
322 | 0 | } |
323 | 0 | } |
324 | | |
325 | 0 | case fourcc("avc1"): |
326 | 0 | return heif_brand2_avcs; |
327 | | |
328 | 0 | case fourcc("av01"): |
329 | 0 | return heif_brand2_avis; |
330 | | |
331 | 0 | case fourcc("j2ki"): |
332 | 0 | return heif_brand2_j2is; |
333 | | |
334 | 0 | case fourcc("mjpg"): |
335 | 0 | return heif_brand2_jpgs; |
336 | | |
337 | 0 | case fourcc("vvc1"): |
338 | 0 | return heif_brand2_vvis; |
339 | | |
340 | 0 | default: |
341 | 0 | return 0; |
342 | 0 | } |
343 | 0 | } |