/src/ffmpeg/libavformat/hxvs.c

Source
/*
 * HXVS/HXVT IP camera format
 *
 * Copyright (c) 2025 Zhao Zhili <quinkblack@foxmail.com>
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#include "libavutil/intreadwrite.h"

#include "avio_internal.h"
#include "demux.h"
#include "internal.h"

/*
 * Ref
 * https://code.videolan.org/videolan/vlc/-/blob/master/modules/demux/hx.c
 * https://github.com/francescovannini/ipcam26Xconvert/tree/main
 */

/* H.264
 *
 * uint32_t tag;
 * uint32_t width;
 * uint32_t height;
 * uint8_t padding[4];
 */
#define HXVS    MKTAG('H', 'X', 'V', 'S')

/* H.265
 *
 * Same as HXVS.
 */
#define HXVT    MKTAG('H', 'X', 'V', 'T')

/* video frame
 *
 * uint32_t tag;
 * uint32_t bytes
 * uint32_t timestamp;
 * uint32_t flags;
 * ------------------
 * uint8_t data[bytes]
 *
 * Note: each HXVF contains a single NALU or slice, not a frame.
 */
#define HXVF    MKTAG('H', 'X', 'V', 'F')

/* audio frame
 *
 * uint32_t tag;
 * uint32_t bytes
 * uint32_t timestamp;
 * uint32_t flags;
 * ------------------
 * uint8_t data[bytes]
 *
 * Note: The first four bytes of data is fake start code and NALU type,
 * which should be skipped.
 */
#define HXAF    MKTAG('H', 'X', 'A', 'F')

/* RAP frame index
 *
 * uint32_t tag;
 * uint32_t bytes
 * uint32_t duration;
 * uint32_t flags;
 */
#define HXFI    MKTAG('H', 'X', 'F', 'I')

#define HXFI_TABLE_SIZE  200000
#define HXFI_TABLE_COUNT (200000 / 8)

typedef struct HxvsContext {
    int video_index;
    int audio_index;
} HxvsContext;

static int hxvs_probe(const AVProbeData *p)
{
    uint32_t flag = 0;
    uint32_t bytes;

    for (size_t i = 0; i < p->buf_size; ) {
        uint32_t tag = AV_RL32(&p->buf[i]);

        // first four bytes must begin with HXVS/HXVT
        if (i == 0) {
            if (tag != HXVS && tag != HXVT)
                return 0;
            flag |= 1;
            i += 16;
            continue;
        }

        // Got RAP index at the end
        if (tag == HXFI) {
            if (flag == 7)
                return AVPROBE_SCORE_MAX;
            break;
        }

        i += 4;
        if (tag == HXVF || tag == HXAF) {
            bytes = AV_RL32(&p->buf[i]);
            i += 12 + bytes;
            flag |= (tag == HXVF) ? 2 : 4;
            continue;
        }

        return 0;
    }

    // Get audio and video
    if (flag == 7)
        return AVPROBE_SCORE_EXTENSION + 10;
    // Get video only
    if (flag == 3)
        return AVPROBE_SCORE_EXTENSION + 2;

    return 0;
}

static int hxvs_create_video_stream(AVFormatContext *s, enum AVCodecID codec_id)
{
    HxvsContext *ctx = s->priv_data;
    AVIOContext *pb = s->pb;
    AVStream *vt = avformat_new_stream(s, NULL);
    if (!vt)
        return AVERROR(ENOMEM);

    vt->id = 0;
    vt->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
    vt->codecpar->codec_id = codec_id;
    vt->codecpar->width = avio_rl32(pb);
    vt->codecpar->height = avio_rl32(pb);
    avpriv_set_pts_info(vt, 32, 1, 1000);
    ffstream(vt)->need_parsing = AVSTREAM_PARSE_FULL;
    ctx->video_index = vt->index;

    // skip padding
    avio_skip(pb, 4);

    return 0;
}

static int hxvs_create_audio_stream(AVFormatContext *s)
{
    HxvsContext *ctx = s->priv_data;
    AVStream *at = avformat_new_stream(s, NULL);
    if (!at)
        return AVERROR(ENOMEM);

    at->id = 1;
    at->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
    at->codecpar->codec_id = AV_CODEC_ID_PCM_ALAW;
    at->codecpar->ch_layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_MONO;
    at->codecpar->sample_rate = 8000;
    avpriv_set_pts_info(at, 32, 1, 1000);
    ctx->audio_index = at->index;

    return 0;
}

static int hxvs_build_index(AVFormatContext *s)
{
    HxvsContext *ctx = s->priv_data;
    AVIOContext *pb = s->pb;

    int64_t size = avio_size(pb);
    if (size < 0)
        return size;
    // Don't return error when HXFI is missing
    int64_t pos = avio_seek(pb, size -(HXFI_TABLE_SIZE + 16), SEEK_SET);
    if (pos < 0)
        return 0;

    uint32_t tag = avio_rl32(pb);
    if (tag != HXFI)
        return 0;
    avio_skip(pb, 4);
    AVStream *st = s->streams[ctx->video_index];
    st->duration = avio_rl32(pb);
    avio_skip(pb, 4);

    FFStream *const sti = ffstream(st);
    uint32_t prev_time;
    for (int i = 0; i < HXFI_TABLE_COUNT; i++) {
        uint32_t offset = avio_rl32(pb);
        // pts = first_frame_pts + time
        uint32_t time = avio_rl32(pb);
        av_log(s, AV_LOG_TRACE, "%s/%d: offset %u, time %u\n",
               av_fourcc2str(HXAF), i, offset, time);
        if (!offset)
            break;

        if (!i) {
            // Get first frame timestamp
            int64_t save_pos = avio_tell(pb);
            pos = avio_seek(pb, offset, SEEK_SET);
            if (pos < 0)
                return pos;
            tag = avio_rl32(pb);
            if (tag != HXVF) {
                av_log(s, AV_LOG_ERROR, "invalid tag %s at pos %u\n",
                       av_fourcc2str(tag), offset);
                return AVERROR_INVALIDDATA;
            }
            avio_skip(pb, 4);
            // save first frame timestamp to stream start_time
            st->start_time = avio_rl32(pb);
            pos = avio_seek(pb, save_pos, SEEK_SET);
            if (pos < 0)
                return pos;
        } else if (time == prev_time) {
            // hxvs put SPS, PPS and slice into separate entries with same timestamp.
            // Only record the first entry.
            continue;
        }
        prev_time = time;
        int ret = ff_add_index_entry(&sti->index_entries,
                                     &sti->nb_index_entries,
                                     &sti->index_entries_allocated_size,
                                     offset, st->start_time + time,
                                     0, 0, AVINDEX_KEYFRAME);
        if (ret < 0)
            return ret;
    }

    return 0;
}

static int hxvs_read_header(AVFormatContext *s)
{
    AVIOContext *pb = s->pb;
    uint32_t tag = avio_rl32(pb);
    enum AVCodecID codec_id;

    if (tag == HXVS) {
        codec_id = AV_CODEC_ID_H264;
    } else if (tag == HXVT) {
        codec_id = AV_CODEC_ID_HEVC;
    } else {
        av_log(s, AV_LOG_ERROR, "Unknown tag %s\n", av_fourcc2str(tag));
        return AVERROR_INVALIDDATA;
    }

    int ret = hxvs_create_video_stream(s, codec_id);
    if (ret < 0)
        return ret;

    ret = hxvs_create_audio_stream(s);
    if (ret < 0)
        return ret;

    if (pb->seekable & AVIO_SEEKABLE_NORMAL) {
        int64_t pos = avio_tell(pb);
        if (pos < 0)
            return pos;

        ret = hxvs_build_index(s);
        if (ret < 0)
            return ret;

        pos = avio_seek(pb, pos, SEEK_SET);
        if (pos < 0)
            return ret;
    }

    return 0;
}

static int hxvs_read_packet(AVFormatContext *s, AVPacket *pkt)
{
    HxvsContext *ctx = s->priv_data;
    AVIOContext *pb = s->pb;
    int64_t pos = avio_tell(pb);
    uint32_t tag = avio_rl32(pb);
    uint32_t bytes;
    int ret;

    if (avio_feof(pb) || (tag == HXFI))
        return AVERROR_EOF;

    if (tag != HXVF && tag != HXAF)
        return AVERROR_INVALIDDATA;

    bytes = avio_rl32(pb);
    if (bytes < 4)
        return AVERROR_INVALIDDATA;

    uint32_t timestamp = avio_rl32(pb);
    int key_flag = 0;
    int index;
    if (tag == HXVF) {
        if (avio_rl32(pb) == 1)
            key_flag = AV_PKT_FLAG_KEY;
        index = ctx->video_index;
    } else {
        avio_skip(pb, 8);
        index = ctx->audio_index;
        bytes -= 4;
    }

    ret = av_get_packet(pb, pkt, bytes);
    if (ret < 0)
        return ret;
    pkt->pts = timestamp;
    pkt->pos = pos;
    pkt->stream_index = index;
    pkt->flags |= key_flag;

    return 0;
}

const FFInputFormat ff_hxvs_demuxer = {
    .p.name         = "hxvs",
    .p.long_name    = NULL_IF_CONFIG_SMALL("HXVF/HXVS IP camera format"),
    .p.extensions   = "264,265",
    .p.flags        = AVFMT_GENERIC_INDEX,
    .read_probe     = hxvs_probe,
    .read_header    = hxvs_read_header,
    .read_packet    = hxvs_read_packet,
    .priv_data_size = sizeof(HxvsContext),
};

Coverage Report

Created: 2026-01-16 07:48

Line	Count	Source
1		/*
2		* HXVS/HXVT IP camera format
3		*
4		* Copyright (c) 2025 Zhao Zhili <quinkblack@foxmail.com>
5		*
6		* This file is part of FFmpeg.
7		*
8		* FFmpeg is free software; you can redistribute it and/or
9		* modify it under the terms of the GNU Lesser General Public
10		* License as published by the Free Software Foundation; either
11		* version 2.1 of the License, or (at your option) any later version.
12		*
13		* FFmpeg is distributed in the hope that it will be useful,
14		* but WITHOUT ANY WARRANTY; without even the implied warranty of
15		* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16		* Lesser General Public License for more details.
17		*
18		* You should have received a copy of the GNU Lesser General Public
19		* License along with FFmpeg; if not, write to the Free Software
20		* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21		*/
22
23		#include "libavutil/intreadwrite.h"
24
25		#include "avio_internal.h"
26		#include "demux.h"
27		#include "internal.h"
28
29		/*
30		* Ref
31		* https://code.videolan.org/videolan/vlc/-/blob/master/modules/demux/hx.c
32		* https://github.com/francescovannini/ipcam26Xconvert/tree/main
33		*/
34
35		/* H.264
36		*
37		* uint32_t tag;
38		* uint32_t width;
39		* uint32_t height;
40		* uint8_t padding[4];
41		*/
42	962k	#define HXVS MKTAG('H', 'X', 'V', 'S')
43
44		/* H.265
45		*
46		* Same as HXVS.
47		*/
48	957k	#define HXVT MKTAG('H', 'X', 'V', 'T')
49
50		/* video frame
51		*
52		* uint32_t tag;
53		* uint32_t bytes
54		* uint32_t timestamp;
55		* uint32_t flags;
56		* ------------------
57		* uint8_t data[bytes]
58		*
59		* Note: each HXVF contains a single NALU or slice, not a frame.
60		*/
61	41.8k	#define HXVF MKTAG('H', 'X', 'V', 'F')
62
63		/* audio frame
64		*
65		* uint32_t tag;
66		* uint32_t bytes
67		* uint32_t timestamp;
68		* uint32_t flags;
69		* ------------------
70		* uint8_t data[bytes]
71		*
72		* Note: The first four bytes of data is fake start code and NALU type,
73		* which should be skipped.
74		*/
75	8.92k	#define HXAF MKTAG('H', 'X', 'A', 'F')
76
77		/* RAP frame index
78		*
79		* uint32_t tag;
80		* uint32_t bytes
81		* uint32_t duration;
82		* uint32_t flags;
83		*/
84	22.5k	#define HXFI MKTAG('H', 'X', 'F', 'I')
85
86	397	#define HXFI_TABLE_SIZE 200000
87	9.04k	#define HXFI_TABLE_COUNT (200000 / 8)
88
89		typedef struct HxvsContext {
90		int video_index;
91		int audio_index;
92		} HxvsContext;
93
94		static int hxvs_probe(const AVProbeData *p)
95	958k	{
96	958k	uint32_t flag = 0;
97	958k	uint32_t bytes;
98
99	958k	for (size_t i = 0; i < p->buf_size; ) {
100	949k	uint32_t tag = AV_RL32(&p->buf[i]);
101
102		// first four bytes must begin with HXVS/HXVT
103	949k	if (i == 0) {
104	949k	if (tag != HXVS && tag != HXVT)
105	948k	return 0;
106	184	flag \|= 1;
107	184	i += 16;
108	184	continue;
109	949k	}
110
111		// Got RAP index at the end
112	281	if (tag == HXFI) {
113	3	if (flag == 7)
114	0	return AVPROBE_SCORE_MAX;
115	3	break;
116	3	}
117
118	278	i += 4;
119	278	if (tag == HXVF \|\| tag == HXAF) {
120	132	bytes = AV_RL32(&p->buf[i]);
121	132	i += 12 + bytes;
122	132	flag \|= (tag == HXVF) ? 2 : 4;
123	132	continue;
124	132	}
125
126	146	return 0;
127	278	}
128
129		// Get audio and video
130	9.28k	if (flag == 7)
131	5	return AVPROBE_SCORE_EXTENSION + 10;
132		// Get video only
133	9.27k	if (flag == 3)
134	4	return AVPROBE_SCORE_EXTENSION + 2;
135
136	9.27k	return 0;
137	9.27k	}
138
139		static int hxvs_create_video_stream(AVFormatContext *s, enum AVCodecID codec_id)
140	13.0k	{
141	13.0k	HxvsContext *ctx = s->priv_data;
142	13.0k	AVIOContext *pb = s->pb;
143	13.0k	AVStream *vt = avformat_new_stream(s, NULL);
144	13.0k	if (!vt)
145	0	return AVERROR(ENOMEM);
146
147	13.0k	vt->id = 0;
148	13.0k	vt->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
149	13.0k	vt->codecpar->codec_id = codec_id;
150	13.0k	vt->codecpar->width = avio_rl32(pb);
151	13.0k	vt->codecpar->height = avio_rl32(pb);
152	13.0k	avpriv_set_pts_info(vt, 32, 1, 1000);
153	13.0k	ffstream(vt)->need_parsing = AVSTREAM_PARSE_FULL;
154	13.0k	ctx->video_index = vt->index;
155
156		// skip padding
157	13.0k	avio_skip(pb, 4);
158
159	13.0k	return 0;
160	13.0k	}
161
162		static int hxvs_create_audio_stream(AVFormatContext *s)
163	13.0k	{
164	13.0k	HxvsContext *ctx = s->priv_data;
165	13.0k	AVStream *at = avformat_new_stream(s, NULL);
166	13.0k	if (!at)
167	0	return AVERROR(ENOMEM);
168
169	13.0k	at->id = 1;
170	13.0k	at->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
171	13.0k	at->codecpar->codec_id = AV_CODEC_ID_PCM_ALAW;
172	13.0k	at->codecpar->ch_layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_MONO;
173	13.0k	at->codecpar->sample_rate = 8000;
174	13.0k	avpriv_set_pts_info(at, 32, 1, 1000);
175	13.0k	ctx->audio_index = at->index;
176
177	13.0k	return 0;
178	13.0k	}
179
180		static int hxvs_build_index(AVFormatContext *s)
181	397	{
182	397	HxvsContext *ctx = s->priv_data;
183	397	AVIOContext *pb = s->pb;
184
185	397	int64_t size = avio_size(pb);
186	397	if (size < 0)
187	0	return size;
188		// Don't return error when HXFI is missing
189	397	int64_t pos = avio_seek(pb, size -(HXFI_TABLE_SIZE + 16), SEEK_SET);
190	397	if (pos < 0)
191	119	return 0;
192
193	278	uint32_t tag = avio_rl32(pb);
194	278	if (tag != HXFI)
195	253	return 0;
196	25	avio_skip(pb, 4);
197	25	AVStream *st = s->streams[ctx->video_index];
198	25	st->duration = avio_rl32(pb);
199	25	avio_skip(pb, 4);
200
201	25	FFStream *const sti = ffstream(st);
202	25	uint32_t prev_time;
203	9.04k	for (int i = 0; i < HXFI_TABLE_COUNT; i++) {
204	9.04k	uint32_t offset = avio_rl32(pb);
205		// pts = first_frame_pts + time
206	9.04k	uint32_t time = avio_rl32(pb);
207	9.04k	av_log(s, AV_LOG_TRACE, "%s/%d: offset %u, time %u\n",
208	9.04k	av_fourcc2str(HXAF), i, offset, time);
209	9.04k	if (!offset)
210	22	break;
211
212	9.02k	if (!i) {
213		// Get first frame timestamp
214	23	int64_t save_pos = avio_tell(pb);
215	23	pos = avio_seek(pb, offset, SEEK_SET);
216	23	if (pos < 0)
217	1	return pos;
218	22	tag = avio_rl32(pb);
219	22	if (tag != HXVF) {
220	2	av_log(s, AV_LOG_ERROR, "invalid tag %s at pos %u\n",
221	2	av_fourcc2str(tag), offset);
222	2	return AVERROR_INVALIDDATA;
223	2	}
224	20	avio_skip(pb, 4);
225		// save first frame timestamp to stream start_time
226	20	st->start_time = avio_rl32(pb);
227	20	pos = avio_seek(pb, save_pos, SEEK_SET);
228	20	if (pos < 0)
229	0	return pos;
230	9.00k	} else if (time == prev_time) {
231		// hxvs put SPS, PPS and slice into separate entries with same timestamp.
232		// Only record the first entry.
233	8.55k	continue;
234	8.55k	}
235	464	prev_time = time;
236	464	int ret = ff_add_index_entry(&sti->index_entries,
237	464	&sti->nb_index_entries,
238	464	&sti->index_entries_allocated_size,
239	464	offset, st->start_time + time,
240	464	0, 0, AVINDEX_KEYFRAME);
241	464	if (ret < 0)
242	0	return ret;
243	464	}
244
245	22	return 0;
246	25	}
247
248		static int hxvs_read_header(AVFormatContext *s)
249	13.2k	{
250	13.2k	AVIOContext *pb = s->pb;
251	13.2k	uint32_t tag = avio_rl32(pb);
252	13.2k	enum AVCodecID codec_id;
253
254	13.2k	if (tag == HXVS) {
255	4.86k	codec_id = AV_CODEC_ID_H264;
256	8.37k	} else if (tag == HXVT) {
257	8.22k	codec_id = AV_CODEC_ID_HEVC;
258	8.22k	} else {
259	146	av_log(s, AV_LOG_ERROR, "Unknown tag %s\n", av_fourcc2str(tag));
260	146	return AVERROR_INVALIDDATA;
261	146	}
262
263	13.0k	int ret = hxvs_create_video_stream(s, codec_id);
264	13.0k	if (ret < 0)
265	0	return ret;
266
267	13.0k	ret = hxvs_create_audio_stream(s);
268	13.0k	if (ret < 0)
269	0	return ret;
270
271	13.0k	if (pb->seekable & AVIO_SEEKABLE_NORMAL) {
272	397	int64_t pos = avio_tell(pb);
273	397	if (pos < 0)
274	0	return pos;
275
276	397	ret = hxvs_build_index(s);
277	397	if (ret < 0)
278	3	return ret;
279
280	394	pos = avio_seek(pb, pos, SEEK_SET);
281	394	if (pos < 0)
282	0	return ret;
283	394	}
284
285	13.0k	return 0;
286	13.0k	}
287
288		static int hxvs_read_packet(AVFormatContext s, AVPacket pkt)
289	57.2k	{
290	57.2k	HxvsContext *ctx = s->priv_data;
291	57.2k	AVIOContext *pb = s->pb;
292	57.2k	int64_t pos = avio_tell(pb);
293	57.2k	uint32_t tag = avio_rl32(pb);
294	57.2k	uint32_t bytes;
295	57.2k	int ret;
296
297	57.2k	if (avio_feof(pb) \|\| (tag == HXFI))
298	35.2k	return AVERROR_EOF;
299
300	22.0k	if (tag != HXVF && tag != HXAF)
301	2.65k	return AVERROR_INVALIDDATA;
302
303	19.3k	bytes = avio_rl32(pb);
304	19.3k	if (bytes < 4)
305	8	return AVERROR_INVALIDDATA;
306
307	19.3k	uint32_t timestamp = avio_rl32(pb);
308	19.3k	int key_flag = 0;
309	19.3k	int index;
310	19.3k	if (tag == HXVF) {
311	13.3k	if (avio_rl32(pb) == 1)
312	36	key_flag = AV_PKT_FLAG_KEY;
313	13.3k	index = ctx->video_index;
314	13.3k	} else {
315	6.05k	avio_skip(pb, 8);
316	6.05k	index = ctx->audio_index;
317	6.05k	bytes -= 4;
318	6.05k	}
319
320	19.3k	ret = av_get_packet(pb, pkt, bytes);
321	19.3k	if (ret < 0)
322	145	return ret;
323	19.2k	pkt->pts = timestamp;
324	19.2k	pkt->pos = pos;
325	19.2k	pkt->stream_index = index;
326	19.2k	pkt->flags \|= key_flag;
327
328	19.2k	return 0;
329	19.3k	}
330
331		const FFInputFormat ff_hxvs_demuxer = {
332		.p.name = "hxvs",
333		.p.long_name = NULL_IF_CONFIG_SMALL("HXVF/HXVS IP camera format"),
334		.p.extensions = "264,265",
335		.p.flags = AVFMT_GENERIC_INDEX,
336		.read_probe = hxvs_probe,
337		.read_header = hxvs_read_header,
338		.read_packet = hxvs_read_packet,
339		.priv_data_size = sizeof(HxvsContext),
340		};