/src/ffmpeg/libavformat/hxvs.c
Line | Count | Source |
1 | | /* |
2 | | * HXVS/HXVT IP camera format |
3 | | * |
4 | | * Copyright (c) 2025 Zhao Zhili <quinkblack@foxmail.com> |
5 | | * |
6 | | * This file is part of FFmpeg. |
7 | | * |
8 | | * FFmpeg is free software; you can redistribute it and/or |
9 | | * modify it under the terms of the GNU Lesser General Public |
10 | | * License as published by the Free Software Foundation; either |
11 | | * version 2.1 of the License, or (at your option) any later version. |
12 | | * |
13 | | * FFmpeg is distributed in the hope that it will be useful, |
14 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
16 | | * Lesser General Public License for more details. |
17 | | * |
18 | | * You should have received a copy of the GNU Lesser General Public |
19 | | * License along with FFmpeg; if not, write to the Free Software |
20 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
21 | | */ |
22 | | |
23 | | #include "libavutil/intreadwrite.h" |
24 | | |
25 | | #include "avio_internal.h" |
26 | | #include "demux.h" |
27 | | #include "internal.h" |
28 | | |
29 | | /* |
30 | | * Ref |
31 | | * https://code.videolan.org/videolan/vlc/-/blob/master/modules/demux/hx.c |
32 | | * https://github.com/francescovannini/ipcam26Xconvert/tree/main |
33 | | */ |
34 | | |
35 | | /* H.264 |
36 | | * |
37 | | * uint32_t tag; |
38 | | * uint32_t width; |
39 | | * uint32_t height; |
40 | | * uint8_t padding[4]; |
41 | | */ |
42 | 962k | #define HXVS MKTAG('H', 'X', 'V', 'S') |
43 | | |
44 | | /* H.265 |
45 | | * |
46 | | * Same as HXVS. |
47 | | */ |
48 | 957k | #define HXVT MKTAG('H', 'X', 'V', 'T') |
49 | | |
50 | | /* video frame |
51 | | * |
52 | | * uint32_t tag; |
53 | | * uint32_t bytes |
54 | | * uint32_t timestamp; |
55 | | * uint32_t flags; |
56 | | * ------------------ |
57 | | * uint8_t data[bytes] |
58 | | * |
59 | | * Note: each HXVF contains a single NALU or slice, not a frame. |
60 | | */ |
61 | 41.8k | #define HXVF MKTAG('H', 'X', 'V', 'F') |
62 | | |
63 | | /* audio frame |
64 | | * |
65 | | * uint32_t tag; |
66 | | * uint32_t bytes |
67 | | * uint32_t timestamp; |
68 | | * uint32_t flags; |
69 | | * ------------------ |
70 | | * uint8_t data[bytes] |
71 | | * |
72 | | * Note: The first four bytes of data is fake start code and NALU type, |
73 | | * which should be skipped. |
74 | | */ |
75 | 8.92k | #define HXAF MKTAG('H', 'X', 'A', 'F') |
76 | | |
77 | | /* RAP frame index |
78 | | * |
79 | | * uint32_t tag; |
80 | | * uint32_t bytes |
81 | | * uint32_t duration; |
82 | | * uint32_t flags; |
83 | | */ |
84 | 22.5k | #define HXFI MKTAG('H', 'X', 'F', 'I') |
85 | | |
86 | 397 | #define HXFI_TABLE_SIZE 200000 |
87 | 9.04k | #define HXFI_TABLE_COUNT (200000 / 8) |
88 | | |
89 | | typedef struct HxvsContext { |
90 | | int video_index; |
91 | | int audio_index; |
92 | | } HxvsContext; |
93 | | |
94 | | static int hxvs_probe(const AVProbeData *p) |
95 | 958k | { |
96 | 958k | uint32_t flag = 0; |
97 | 958k | uint32_t bytes; |
98 | | |
99 | 958k | for (size_t i = 0; i < p->buf_size; ) { |
100 | 949k | uint32_t tag = AV_RL32(&p->buf[i]); |
101 | | |
102 | | // first four bytes must begin with HXVS/HXVT |
103 | 949k | if (i == 0) { |
104 | 949k | if (tag != HXVS && tag != HXVT) |
105 | 948k | return 0; |
106 | 184 | flag |= 1; |
107 | 184 | i += 16; |
108 | 184 | continue; |
109 | 949k | } |
110 | | |
111 | | // Got RAP index at the end |
112 | 281 | if (tag == HXFI) { |
113 | 3 | if (flag == 7) |
114 | 0 | return AVPROBE_SCORE_MAX; |
115 | 3 | break; |
116 | 3 | } |
117 | | |
118 | 278 | i += 4; |
119 | 278 | if (tag == HXVF || tag == HXAF) { |
120 | 132 | bytes = AV_RL32(&p->buf[i]); |
121 | 132 | i += 12 + bytes; |
122 | 132 | flag |= (tag == HXVF) ? 2 : 4; |
123 | 132 | continue; |
124 | 132 | } |
125 | | |
126 | 146 | return 0; |
127 | 278 | } |
128 | | |
129 | | // Get audio and video |
130 | 9.28k | if (flag == 7) |
131 | 5 | return AVPROBE_SCORE_EXTENSION + 10; |
132 | | // Get video only |
133 | 9.27k | if (flag == 3) |
134 | 4 | return AVPROBE_SCORE_EXTENSION + 2; |
135 | | |
136 | 9.27k | return 0; |
137 | 9.27k | } |
138 | | |
139 | | static int hxvs_create_video_stream(AVFormatContext *s, enum AVCodecID codec_id) |
140 | 13.0k | { |
141 | 13.0k | HxvsContext *ctx = s->priv_data; |
142 | 13.0k | AVIOContext *pb = s->pb; |
143 | 13.0k | AVStream *vt = avformat_new_stream(s, NULL); |
144 | 13.0k | if (!vt) |
145 | 0 | return AVERROR(ENOMEM); |
146 | | |
147 | 13.0k | vt->id = 0; |
148 | 13.0k | vt->codecpar->codec_type = AVMEDIA_TYPE_VIDEO; |
149 | 13.0k | vt->codecpar->codec_id = codec_id; |
150 | 13.0k | vt->codecpar->width = avio_rl32(pb); |
151 | 13.0k | vt->codecpar->height = avio_rl32(pb); |
152 | 13.0k | avpriv_set_pts_info(vt, 32, 1, 1000); |
153 | 13.0k | ffstream(vt)->need_parsing = AVSTREAM_PARSE_FULL; |
154 | 13.0k | ctx->video_index = vt->index; |
155 | | |
156 | | // skip padding |
157 | 13.0k | avio_skip(pb, 4); |
158 | | |
159 | 13.0k | return 0; |
160 | 13.0k | } |
161 | | |
162 | | static int hxvs_create_audio_stream(AVFormatContext *s) |
163 | 13.0k | { |
164 | 13.0k | HxvsContext *ctx = s->priv_data; |
165 | 13.0k | AVStream *at = avformat_new_stream(s, NULL); |
166 | 13.0k | if (!at) |
167 | 0 | return AVERROR(ENOMEM); |
168 | | |
169 | 13.0k | at->id = 1; |
170 | 13.0k | at->codecpar->codec_type = AVMEDIA_TYPE_AUDIO; |
171 | 13.0k | at->codecpar->codec_id = AV_CODEC_ID_PCM_ALAW; |
172 | 13.0k | at->codecpar->ch_layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_MONO; |
173 | 13.0k | at->codecpar->sample_rate = 8000; |
174 | 13.0k | avpriv_set_pts_info(at, 32, 1, 1000); |
175 | 13.0k | ctx->audio_index = at->index; |
176 | | |
177 | 13.0k | return 0; |
178 | 13.0k | } |
179 | | |
180 | | static int hxvs_build_index(AVFormatContext *s) |
181 | 397 | { |
182 | 397 | HxvsContext *ctx = s->priv_data; |
183 | 397 | AVIOContext *pb = s->pb; |
184 | | |
185 | 397 | int64_t size = avio_size(pb); |
186 | 397 | if (size < 0) |
187 | 0 | return size; |
188 | | // Don't return error when HXFI is missing |
189 | 397 | int64_t pos = avio_seek(pb, size -(HXFI_TABLE_SIZE + 16), SEEK_SET); |
190 | 397 | if (pos < 0) |
191 | 119 | return 0; |
192 | | |
193 | 278 | uint32_t tag = avio_rl32(pb); |
194 | 278 | if (tag != HXFI) |
195 | 253 | return 0; |
196 | 25 | avio_skip(pb, 4); |
197 | 25 | AVStream *st = s->streams[ctx->video_index]; |
198 | 25 | st->duration = avio_rl32(pb); |
199 | 25 | avio_skip(pb, 4); |
200 | | |
201 | 25 | FFStream *const sti = ffstream(st); |
202 | 25 | uint32_t prev_time; |
203 | 9.04k | for (int i = 0; i < HXFI_TABLE_COUNT; i++) { |
204 | 9.04k | uint32_t offset = avio_rl32(pb); |
205 | | // pts = first_frame_pts + time |
206 | 9.04k | uint32_t time = avio_rl32(pb); |
207 | 9.04k | av_log(s, AV_LOG_TRACE, "%s/%d: offset %u, time %u\n", |
208 | 9.04k | av_fourcc2str(HXAF), i, offset, time); |
209 | 9.04k | if (!offset) |
210 | 22 | break; |
211 | | |
212 | 9.02k | if (!i) { |
213 | | // Get first frame timestamp |
214 | 23 | int64_t save_pos = avio_tell(pb); |
215 | 23 | pos = avio_seek(pb, offset, SEEK_SET); |
216 | 23 | if (pos < 0) |
217 | 1 | return pos; |
218 | 22 | tag = avio_rl32(pb); |
219 | 22 | if (tag != HXVF) { |
220 | 2 | av_log(s, AV_LOG_ERROR, "invalid tag %s at pos %u\n", |
221 | 2 | av_fourcc2str(tag), offset); |
222 | 2 | return AVERROR_INVALIDDATA; |
223 | 2 | } |
224 | 20 | avio_skip(pb, 4); |
225 | | // save first frame timestamp to stream start_time |
226 | 20 | st->start_time = avio_rl32(pb); |
227 | 20 | pos = avio_seek(pb, save_pos, SEEK_SET); |
228 | 20 | if (pos < 0) |
229 | 0 | return pos; |
230 | 9.00k | } else if (time == prev_time) { |
231 | | // hxvs put SPS, PPS and slice into separate entries with same timestamp. |
232 | | // Only record the first entry. |
233 | 8.55k | continue; |
234 | 8.55k | } |
235 | 464 | prev_time = time; |
236 | 464 | int ret = ff_add_index_entry(&sti->index_entries, |
237 | 464 | &sti->nb_index_entries, |
238 | 464 | &sti->index_entries_allocated_size, |
239 | 464 | offset, st->start_time + time, |
240 | 464 | 0, 0, AVINDEX_KEYFRAME); |
241 | 464 | if (ret < 0) |
242 | 0 | return ret; |
243 | 464 | } |
244 | | |
245 | 22 | return 0; |
246 | 25 | } |
247 | | |
248 | | static int hxvs_read_header(AVFormatContext *s) |
249 | 13.2k | { |
250 | 13.2k | AVIOContext *pb = s->pb; |
251 | 13.2k | uint32_t tag = avio_rl32(pb); |
252 | 13.2k | enum AVCodecID codec_id; |
253 | | |
254 | 13.2k | if (tag == HXVS) { |
255 | 4.86k | codec_id = AV_CODEC_ID_H264; |
256 | 8.37k | } else if (tag == HXVT) { |
257 | 8.22k | codec_id = AV_CODEC_ID_HEVC; |
258 | 8.22k | } else { |
259 | 146 | av_log(s, AV_LOG_ERROR, "Unknown tag %s\n", av_fourcc2str(tag)); |
260 | 146 | return AVERROR_INVALIDDATA; |
261 | 146 | } |
262 | | |
263 | 13.0k | int ret = hxvs_create_video_stream(s, codec_id); |
264 | 13.0k | if (ret < 0) |
265 | 0 | return ret; |
266 | | |
267 | 13.0k | ret = hxvs_create_audio_stream(s); |
268 | 13.0k | if (ret < 0) |
269 | 0 | return ret; |
270 | | |
271 | 13.0k | if (pb->seekable & AVIO_SEEKABLE_NORMAL) { |
272 | 397 | int64_t pos = avio_tell(pb); |
273 | 397 | if (pos < 0) |
274 | 0 | return pos; |
275 | | |
276 | 397 | ret = hxvs_build_index(s); |
277 | 397 | if (ret < 0) |
278 | 3 | return ret; |
279 | | |
280 | 394 | pos = avio_seek(pb, pos, SEEK_SET); |
281 | 394 | if (pos < 0) |
282 | 0 | return ret; |
283 | 394 | } |
284 | | |
285 | 13.0k | return 0; |
286 | 13.0k | } |
287 | | |
288 | | static int hxvs_read_packet(AVFormatContext *s, AVPacket *pkt) |
289 | 57.2k | { |
290 | 57.2k | HxvsContext *ctx = s->priv_data; |
291 | 57.2k | AVIOContext *pb = s->pb; |
292 | 57.2k | int64_t pos = avio_tell(pb); |
293 | 57.2k | uint32_t tag = avio_rl32(pb); |
294 | 57.2k | uint32_t bytes; |
295 | 57.2k | int ret; |
296 | | |
297 | 57.2k | if (avio_feof(pb) || (tag == HXFI)) |
298 | 35.2k | return AVERROR_EOF; |
299 | | |
300 | 22.0k | if (tag != HXVF && tag != HXAF) |
301 | 2.65k | return AVERROR_INVALIDDATA; |
302 | | |
303 | 19.3k | bytes = avio_rl32(pb); |
304 | 19.3k | if (bytes < 4) |
305 | 8 | return AVERROR_INVALIDDATA; |
306 | | |
307 | 19.3k | uint32_t timestamp = avio_rl32(pb); |
308 | 19.3k | int key_flag = 0; |
309 | 19.3k | int index; |
310 | 19.3k | if (tag == HXVF) { |
311 | 13.3k | if (avio_rl32(pb) == 1) |
312 | 36 | key_flag = AV_PKT_FLAG_KEY; |
313 | 13.3k | index = ctx->video_index; |
314 | 13.3k | } else { |
315 | 6.05k | avio_skip(pb, 8); |
316 | 6.05k | index = ctx->audio_index; |
317 | 6.05k | bytes -= 4; |
318 | 6.05k | } |
319 | | |
320 | 19.3k | ret = av_get_packet(pb, pkt, bytes); |
321 | 19.3k | if (ret < 0) |
322 | 145 | return ret; |
323 | 19.2k | pkt->pts = timestamp; |
324 | 19.2k | pkt->pos = pos; |
325 | 19.2k | pkt->stream_index = index; |
326 | 19.2k | pkt->flags |= key_flag; |
327 | | |
328 | 19.2k | return 0; |
329 | 19.3k | } |
330 | | |
331 | | const FFInputFormat ff_hxvs_demuxer = { |
332 | | .p.name = "hxvs", |
333 | | .p.long_name = NULL_IF_CONFIG_SMALL("HXVF/HXVS IP camera format"), |
334 | | .p.extensions = "264,265", |
335 | | .p.flags = AVFMT_GENERIC_INDEX, |
336 | | .read_probe = hxvs_probe, |
337 | | .read_header = hxvs_read_header, |
338 | | .read_packet = hxvs_read_packet, |
339 | | .priv_data_size = sizeof(HxvsContext), |
340 | | }; |