Coverage Report

Created: 2025-07-11 07:16

/src/vlc/modules/demux/xiph_metadata.c
Line
Count
Source (jump to first uncovered line)
1
/*****************************************************************************
2
 * xiph_metadata.h: Vorbis Comment parser
3
 *****************************************************************************
4
 * Copyright © 2008-2013 VLC authors and VideoLAN
5
 *
6
 * Authors: Laurent Aimar <fenrir _AT_ videolan _DOT_ org>
7
 *          Jean-Baptiste Kempf <jb@videolan.org>
8
 *
9
 * This program is free software; you can redistribute it and/or modify it
10
 * under the terms of the GNU Lesser General Public License as published by
11
 * the Free Software Foundation; either version 2.1 of the License, or
12
 * (at your option) any later version.
13
 *
14
 * This program is distributed in the hope that it will be useful,
15
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
 * GNU Lesser General Public License for more details.
18
 *
19
 * You should have received a copy of the GNU Lesser General Public License
20
 * along with this program; if not, write to the Free Software Foundation,
21
 * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
22
 *****************************************************************************/
23
24
#ifdef HAVE_CONFIG_H
25
# include "config.h"
26
#endif
27
28
#include <assert.h>
29
30
#include <vlc_common.h>
31
#include <vlc_arrays.h>
32
#include <vlc_charset.h>
33
#include <vlc_strings.h>
34
#include <vlc_arrays.h>
35
#include <vlc_input.h>
36
#include "xiph_metadata.h"
37
#include "../meta_engine/ID3Pictures.h"
38
39
input_attachment_t* ParseFlacPicture( const uint8_t *p_data, size_t size,
40
    int i_attachments, int *i_cover_score, int *i_cover_idx )
41
0
{
42
0
    uint32_t type, len;
43
44
0
    if( size < 8 )
45
0
        return NULL;
46
0
#define RM(x) \
47
0
    do { \
48
0
        assert(size >= (x)); \
49
0
        size -= (x); \
50
0
        p_data += (x); \
51
0
    } while (0)
52
53
0
    type = GetDWBE( p_data );
54
0
    RM(4);
55
0
    len = GetDWBE( p_data );
56
0
    RM(4);
57
58
0
    if( size < len )
59
0
        return NULL;
60
61
0
    char *mime = strndup( (const char *)p_data, len );
62
0
    if( unlikely(mime == NULL) )
63
0
        return NULL;
64
0
    RM(len);
65
66
0
    if( size < 4 )
67
0
    {
68
0
        free( mime );
69
0
        return NULL;
70
0
    }
71
72
0
    len = GetDWBE( p_data );
73
0
    RM(4);
74
75
0
    if( size < len )
76
0
    {
77
0
        free( mime );
78
0
        return NULL;
79
0
    }
80
81
0
    input_attachment_t *p_attachment = NULL;
82
0
    char *description = strndup( (const char *)p_data, len );
83
0
    if( unlikely(description == NULL) )
84
0
        goto error;
85
0
    RM(len);
86
87
0
    EnsureUTF8( description );
88
89
0
    if( size < 20 )
90
0
        goto error;
91
92
0
    RM(4 * 4); /* skip */
93
94
0
    len = GetDWBE( p_data );
95
0
    RM(4);
96
97
0
    if( size < len )
98
0
        goto error;
99
100
    /* printf( "Picture type=%"PRIu32" mime=%s description='%s' "
101
               "file length=%zu\n", type, mime, description, len ); */
102
103
0
    char name[7 + (sizeof (i_attachments) * 3) + 4 + 1];
104
105
0
    snprintf( name, sizeof (name), "picture%u", i_attachments );
106
107
0
    if( !strcasecmp( mime, "image/jpeg" ) )
108
0
        strcat( name, ".jpg" );
109
0
    else if( !strcasecmp( mime, "image/png" ) )
110
0
        strcat( name, ".png" );
111
112
0
    p_attachment = vlc_input_attachment_New( name, mime, description, p_data,
113
0
                                             size /* XXX: len instead? */ );
114
115
0
    if( type < ARRAY_SIZE(ID3v2_cover_scores) &&
116
0
        *i_cover_score < ID3v2_cover_scores[type] )
117
0
    {
118
0
        *i_cover_idx = i_attachments;
119
0
        *i_cover_score = ID3v2_cover_scores[type];
120
0
    }
121
122
0
error:
123
0
    free( mime );
124
0
    free( description );
125
0
    return p_attachment;
126
0
}
127
128
#undef RM
129
#define RM(x) \
130
6.22k
    do { \
131
6.22k
        i_data -= (x); \
132
6.22k
        p_data += (x); \
133
6.22k
    } while (0)
134
135
136
typedef struct chapters_array_t
137
{
138
    unsigned int i_size;
139
    seekpoint_t ** pp_chapters;
140
} chapters_array_t;
141
142
static seekpoint_t * getChapterEntry( unsigned int i_index, chapters_array_t *p_array )
143
0
{
144
0
    if ( i_index > 4096 ) return NULL;
145
0
    if ( i_index >= p_array->i_size )
146
0
    {
147
0
        unsigned int i_newsize = p_array->i_size;
148
0
        while( i_index >= i_newsize ) i_newsize += 50;
149
150
0
        if ( !p_array->pp_chapters )
151
0
        {
152
0
            p_array->pp_chapters = calloc( i_newsize, sizeof( seekpoint_t * ) );
153
0
            if ( !p_array->pp_chapters ) return NULL;
154
0
            p_array->i_size = i_newsize;
155
0
        } else {
156
0
            seekpoint_t **tmp = calloc( i_newsize, sizeof( seekpoint_t * ) );
157
0
            if ( !tmp ) return NULL;
158
0
            memcpy( tmp, p_array->pp_chapters, p_array->i_size * sizeof( seekpoint_t * ) );
159
0
            free( p_array->pp_chapters );
160
0
            p_array->pp_chapters = tmp;
161
0
            p_array->i_size = i_newsize;
162
0
        }
163
0
    }
164
0
    if ( !p_array->pp_chapters[i_index] )
165
0
        p_array->pp_chapters[i_index] = vlc_seekpoint_New();
166
0
    return p_array->pp_chapters[i_index];
167
0
}
168
169
63
#define XIPHMETA_Title        (1 << 0)
170
63
#define XIPHMETA_Artist       (1 << 1)
171
0
#define XIPHMETA_Genre        (1 << 2)
172
0
#define XIPHMETA_Copyright    (1 << 3)
173
63
#define XIPHMETA_Album        (1 << 4)
174
0
#define XIPHMETA_TrackNum     (1 << 5)
175
0
#define XIPHMETA_Description  (1 << 6)
176
0
#define XIPHMETA_Rating       (1 << 7)
177
0
#define XIPHMETA_Date         (1 << 8)
178
0
#define XIPHMETA_Language     (1 << 9)
179
0
#define XIPHMETA_Publisher    (1 << 10)
180
0
#define XIPHMETA_EncodedBy    (1 << 11)
181
0
#define XIPHMETA_TrackTotal   (1 << 12)
182
183
static char * xiph_ExtractCueSheetMeta( const char *psz_line,
184
                                        const char *psz_tag, int i_tag,
185
                                        bool b_quoted )
186
0
{
187
0
    if( !strncasecmp( psz_line, psz_tag, i_tag ) )
188
0
    {
189
0
        if( !b_quoted )
190
0
            return strdup( &psz_line[i_tag] );
191
192
        /* Unquote string value */
193
0
        char *psz_value = malloc( strlen( psz_line ) - i_tag + 1 );
194
0
        if( psz_value )
195
0
        {
196
0
            char *psz_out = psz_value;
197
0
            psz_line += i_tag;
198
0
            bool b_escaped = false;
199
0
            while( *psz_line )
200
0
            {
201
0
                switch( *psz_line )
202
0
                {
203
0
                    case '\\':
204
0
                        if( b_escaped )
205
0
                        {
206
0
                            b_escaped = false;
207
0
                            *(psz_out++) = *psz_line;
208
0
                        }
209
0
                        else
210
0
                        {
211
0
                            b_escaped = true;
212
0
                        }
213
0
                        break;
214
0
                    case '"':
215
0
                        if( b_escaped )
216
0
                        {
217
0
                            b_escaped = false;
218
0
                            *(psz_out++) = *psz_line;
219
0
                        }
220
0
                        break;
221
0
                    default:
222
0
                        *(psz_out++) = *psz_line;
223
0
                        break;
224
0
                }
225
0
                psz_line++;
226
0
            }
227
0
            *psz_out = 0;
228
0
            return psz_value;
229
0
        }
230
0
    }
231
0
    return NULL;
232
0
}
233
234
static void xiph_ParseCueSheetMeta( unsigned *pi_flags, vlc_meta_t *p_meta,
235
                                    const char *psz_line,
236
                                    int *pi_seekpoint, seekpoint_t ***ppp_seekpoint,
237
                                    seekpoint_t **pp_tmppoint, bool *pb_valid )
238
0
{
239
0
    VLC_UNUSED(pi_seekpoint);
240
0
    VLC_UNUSED(ppp_seekpoint);
241
242
0
    seekpoint_t *p_seekpoint = *pp_tmppoint;
243
0
    char *psz_string;
244
245
0
#define TRY_EXTRACT_CUEMETA(var, string, quoted) \
246
0
    if( !(*pi_flags & XIPHMETA_##var) &&\
247
0
         ( psz_string = xiph_ExtractCueSheetMeta( psz_line, string, sizeof(string) - 1, quoted ) ) )\
248
0
    {\
249
0
        vlc_meta_Set( p_meta, vlc_meta_##var, psz_string );\
250
0
        free( psz_string );\
251
0
        *pi_flags |= XIPHMETA_##var;\
252
0
    }
253
254
0
    TRY_EXTRACT_CUEMETA(Title, "TITLE \"", true)
255
0
    else TRY_EXTRACT_CUEMETA(Genre, "REM GENRE ", false)
256
0
    else TRY_EXTRACT_CUEMETA(Date, "REM DATE ", false)
257
0
    else TRY_EXTRACT_CUEMETA(Artist, "PERFORMER \"", true)
258
0
    else if( !strncasecmp( psz_line, "  TRACK ", 8 ) )
259
0
    {
260
0
        if( p_seekpoint )
261
0
        {
262
0
            if( *pb_valid )
263
0
                TAB_APPEND( *pi_seekpoint, *ppp_seekpoint, p_seekpoint );
264
0
            else
265
0
                vlc_seekpoint_Delete( p_seekpoint );
266
0
            *pb_valid = false;
267
0
        }
268
0
        *pp_tmppoint = p_seekpoint = vlc_seekpoint_New();
269
0
    }
270
0
    else if( p_seekpoint && !strncasecmp( psz_line, "    INDEX 01 ", 13 ) )
271
0
    {
272
0
        unsigned m, s, f;
273
0
        if( sscanf( &psz_line[13], "%u:%u:%u", &m, &s, &f ) == 3 )
274
0
        {
275
0
            p_seekpoint->i_time_offset = vlc_tick_from_sec(m * 60 + s) + vlc_tick_from_samples(f, 75);
276
0
            *pb_valid = true;
277
0
        }
278
0
    }
279
0
    else if( p_seekpoint && !p_seekpoint->psz_name )
280
0
    {
281
0
        p_seekpoint->psz_name = xiph_ExtractCueSheetMeta( psz_line, "    TITLE \"", 11, true );
282
0
    }
283
0
}
284
285
static void xiph_ParseCueSheet( unsigned *pi_flags, vlc_meta_t *p_meta,
286
                                const char *p_data, int i_data,
287
                                int *pi_seekpoint, seekpoint_t ***ppp_seekpoint )
288
0
{
289
0
    seekpoint_t *p_seekpoint = NULL;
290
0
    bool b_valid = false;
291
292
0
    const char *p_head = p_data;
293
0
    const char *p_tail = p_head;
294
0
    while( p_tail < p_data + i_data )
295
0
    {
296
0
        if( *p_tail == 0x0D )
297
0
        {
298
0
            char *psz = strndup( p_head, p_tail - p_head );
299
0
            if( psz )
300
0
            {
301
0
                xiph_ParseCueSheetMeta( pi_flags, p_meta, psz,
302
0
                                        pi_seekpoint, ppp_seekpoint,
303
0
                                        &p_seekpoint, &b_valid );
304
0
                free( psz );
305
0
            }
306
0
            if( *(++p_tail) == 0x0A )
307
0
                p_tail++;
308
0
            p_head = p_tail;
309
0
        }
310
0
        else
311
0
        {
312
0
            p_tail++;
313
0
        }
314
0
    }
315
316
317
0
    if( p_seekpoint )
318
0
    {
319
0
        if( b_valid )
320
0
            TAB_APPEND( *pi_seekpoint, *ppp_seekpoint, p_seekpoint );
321
0
        else
322
0
            vlc_seekpoint_Delete( p_seekpoint );
323
0
    }
324
0
}
325
326
void vorbis_ParseComment( es_format_t *p_fmt, vlc_meta_t **pp_meta,
327
        const uint8_t *p_data, size_t i_data,
328
        int *i_attachments, input_attachment_t ***attachments,
329
        int *i_cover_score, int *i_cover_idx,
330
        int *i_seekpoint, seekpoint_t ***ppp_seekpoint )
331
3.24k
{
332
3.24k
    if( i_data < 8 )
333
92
        return;
334
335
3.15k
    uint32_t vendor_length = GetDWLE(p_data); RM(4);
336
337
3.15k
    if( vendor_length > i_data )
338
1.81k
        return; /* invalid length */
339
340
1.34k
    RM(vendor_length); /* TODO: handle vendor payload */
341
342
1.34k
    if( i_data < 4 )
343
0
        return;
344
345
1.34k
    uint32_t i_comment = GetDWLE(p_data); RM(4);
346
347
1.34k
    if( i_comment > i_data || i_comment == 0 )
348
1.28k
        return; /* invalid length */
349
350
    /* */
351
63
    vlc_meta_t *p_meta = *pp_meta;
352
63
    if( !p_meta )
353
63
        *pp_meta = p_meta = vlc_meta_New();
354
355
63
    if( unlikely( !p_meta ) )
356
0
        return;
357
358
    /* */
359
63
    unsigned hasMetaFlags = 0;
360
361
63
    chapters_array_t chapters_array = { 0, NULL };
362
363
252
    for( ; i_comment > 0 && i_data >= 4; i_comment-- )
364
189
    {
365
189
        uint32_t comment_size = GetDWLE(p_data); RM(4);
366
367
189
        if( comment_size > i_data )
368
0
            break;
369
370
189
        if( comment_size == 0 )
371
0
            continue;
372
373
189
        char* psz_comment = malloc( comment_size + 1 );
374
375
189
        if( unlikely( !psz_comment ) )
376
0
            goto next_comment;
377
378
189
        memcpy( psz_comment, p_data, comment_size );
379
189
        psz_comment[comment_size] = '\0';
380
381
189
#define IF_EXTRACT(txt,var) \
382
504
    if( !strncasecmp(psz_comment, txt, strlen(txt)) ) \
383
504
    { \
384
189
        size_t key_length = strlen(txt); \
385
189
        EnsureUTF8( psz_comment + key_length ); \
386
189
        const char *oldval = vlc_meta_Get( p_meta, vlc_meta_ ## var ); \
387
189
        if( oldval && (hasMetaFlags & XIPHMETA_##var)) \
388
189
        { \
389
0
            char * newval; \
390
0
            if( asprintf( &newval, "%s,%s", oldval, &psz_comment[key_length] ) == -1 ) \
391
0
                newval = NULL; \
392
0
            vlc_meta_Set( p_meta, vlc_meta_ ## var, newval ); \
393
0
            free( newval ); \
394
0
        } \
395
189
        else \
396
189
            vlc_meta_Set( p_meta, vlc_meta_ ## var, &psz_comment[key_length] ); \
397
189
        hasMetaFlags |= XIPHMETA_##var; \
398
189
    }
399
400
189
#define IF_EXTRACT_ONCE(txt,var) \
401
189
    if( !strncasecmp(psz_comment, txt, strlen(txt)) && !(hasMetaFlags & XIPHMETA_##var) ) \
402
0
    { \
403
0
        vlc_meta_Set( p_meta, vlc_meta_ ## var, &psz_comment[strlen(txt)] ); \
404
0
        hasMetaFlags |= XIPHMETA_##var; \
405
0
    }
406
407
189
        IF_EXTRACT("TITLE=", Title )
408
126
        else IF_EXTRACT("ARTIST=", Artist )
409
63
        else IF_EXTRACT("GENRE=", Genre )
410
63
        else IF_EXTRACT("COPYRIGHT=", Copyright )
411
63
        else IF_EXTRACT("ALBUM=", Album )
412
0
        else if( !(hasMetaFlags & XIPHMETA_TrackNum) && !strncasecmp(psz_comment, "TRACKNUMBER=", strlen("TRACKNUMBER=" ) ) )
413
0
        {
414
            /* Yeah yeah, such a clever idea, let's put xx/xx inside TRACKNUMBER
415
             * Oh, and let's not use TRACKTOTAL or TOTALTRACKS... */
416
0
            short unsigned u_track, u_total;
417
0
            int nb_values = sscanf( &psz_comment[strlen("TRACKNUMBER=")], "%hu/%hu", &u_track, &u_total );
418
0
            if( nb_values >= 1 )
419
0
            {
420
0
                char str[6];
421
0
                snprintf(str, 6, "%u", u_track);
422
0
                vlc_meta_Set( p_meta, vlc_meta_TrackNumber, str );
423
0
                hasMetaFlags |= XIPHMETA_TrackNum;
424
0
                if( nb_values >= 2 )
425
0
                {
426
0
                    snprintf(str, 6, "%u", u_total);
427
0
                    vlc_meta_Set( p_meta, vlc_meta_TrackTotal, str );
428
0
                    hasMetaFlags |= XIPHMETA_TrackTotal;
429
0
                }
430
0
            }
431
0
        }
432
0
        else IF_EXTRACT_ONCE("TRACKTOTAL=", TrackTotal )
433
0
        else IF_EXTRACT_ONCE("TOTALTRACKS=", TrackTotal )
434
0
        else IF_EXTRACT("DESCRIPTION=", Description )
435
0
        else IF_EXTRACT("COMMENT=", Description )
436
0
        else IF_EXTRACT("COMMENTS=", Description )
437
0
        else IF_EXTRACT("RATING=", Rating )
438
0
        else IF_EXTRACT("DATE=", Date )
439
0
        else if( !strncasecmp(psz_comment, "LANGUAGE=", strlen("LANGUAGE=") ) )
440
0
        {
441
0
            IF_EXTRACT("LANGUAGE=",Language)
442
0
            if( p_fmt )
443
0
            {
444
0
                free( p_fmt->psz_language );
445
0
                p_fmt->psz_language = strdup(&psz_comment[strlen("LANGUAGE=")]);
446
0
            }
447
0
        }
448
0
        else IF_EXTRACT("ORGANIZATION=", Publisher )
449
0
        else IF_EXTRACT("ENCODER=", EncodedBy )
450
0
        else if( !strncasecmp( psz_comment, "METADATA_BLOCK_PICTURE=", strlen("METADATA_BLOCK_PICTURE=")))
451
0
        {
452
0
            if( attachments == NULL )
453
0
                goto next_comment;
454
455
0
            uint8_t *p_picture;
456
0
            size_t i_size = vlc_b64_decode_binary( &p_picture, &psz_comment[strlen("METADATA_BLOCK_PICTURE=")]);
457
0
            input_attachment_t *p_attachment = ParseFlacPicture( p_picture,
458
0
                i_size, *i_attachments, i_cover_score, i_cover_idx );
459
0
            free( p_picture );
460
0
            if( p_attachment )
461
0
            {
462
0
                TAB_APPEND_CAST( (input_attachment_t**),
463
0
                    *i_attachments, *attachments, p_attachment );
464
0
            }
465
0
        }
466
0
        else if( !strncasecmp(psz_comment, "CHAPTER", 7) )
467
0
        {
468
0
            unsigned int i_chapt;
469
0
            seekpoint_t *p_seekpoint = NULL;
470
471
0
            for( int i = 0; psz_comment[i] && psz_comment[i] != '='; i++ )
472
0
                if( psz_comment[i] >= 'a' && psz_comment[i] <= 'z' )
473
0
                    psz_comment[i] -= 'a' - 'A';
474
475
0
            if( strstr( psz_comment, "NAME=" ) &&
476
0
                    sscanf( psz_comment, "CHAPTER%uNAME=", &i_chapt ) == 1 )
477
0
            {
478
0
                char *p = strchr( psz_comment, '=' );
479
0
                p_seekpoint = getChapterEntry( i_chapt, &chapters_array );
480
0
                if ( !p || ! p_seekpoint ) goto next_comment;
481
0
                EnsureUTF8( ++p );
482
0
                if ( ! p_seekpoint->psz_name )
483
0
                    p_seekpoint->psz_name = strdup( p );
484
0
            }
485
0
            else if( sscanf( psz_comment, "CHAPTER%u=", &i_chapt ) == 1 )
486
0
            {
487
0
                unsigned int h, m, s, ms;
488
0
                char *p = strchr( psz_comment, '=' );
489
0
                if( p && sscanf( ++p, "%u:%u:%u.%u", &h, &m, &s, &ms ) == 4 )
490
0
                {
491
0
                    p_seekpoint = getChapterEntry( i_chapt, &chapters_array );
492
0
                    if ( ! p_seekpoint ) goto next_comment;
493
0
                    p_seekpoint->i_time_offset = vlc_tick_from_sec(h * 3600 + m * 60 + s) + VLC_TICK_FROM_MS(ms);
494
0
                }
495
0
            }
496
0
        }
497
0
        else if( !strncasecmp(psz_comment, "cuesheet=", 9) )
498
0
        {
499
0
            EnsureUTF8( &psz_comment[9] );
500
0
            xiph_ParseCueSheet( &hasMetaFlags, p_meta, &psz_comment[9], comment_size - 9,
501
0
                                i_seekpoint, ppp_seekpoint );
502
0
        }
503
0
        else if( strchr( psz_comment, '=' ) )
504
0
        {
505
            /* generic (PERFORMER/LICENSE/ORGANIZATION/LOCATION/CONTACT/ISRC,
506
             * undocumented tags and replay gain ) */
507
0
            char *p = strchr( psz_comment, '=' );
508
0
            *p++ = '\0';
509
0
            EnsureUTF8( p );
510
511
0
            for( int i = 0; psz_comment[i]; i++ )
512
0
                if( psz_comment[i] >= 'a' && psz_comment[i] <= 'z' )
513
0
                    psz_comment[i] -= 'a' - 'A';
514
515
0
            vlc_meta_SetExtra( p_meta, psz_comment, p );
516
0
        }
517
189
#undef IF_EXTRACT
518
189
next_comment:
519
189
        free( psz_comment );
520
189
        RM( comment_size );
521
189
    }
522
63
#undef RM
523
524
63
    for ( unsigned int i=0; i<chapters_array.i_size; i++ )
525
0
    {
526
0
        if ( !chapters_array.pp_chapters[i] ) continue;
527
0
        TAB_APPEND_CAST( (seekpoint_t**), *i_seekpoint, *ppp_seekpoint,
528
0
                         chapters_array.pp_chapters[i] );
529
0
    }
530
63
    free( chapters_array.pp_chapters );
531
63
}
532
533
const char *FindKateCategoryName( const char *psz_tag )
534
0
{
535
0
    for( size_t i = 0; i < sizeof(Katei18nCategories)/sizeof(Katei18nCategories[0]); i++ )
536
0
    {
537
0
        if( !strcmp( psz_tag, Katei18nCategories[i].psz_tag ) )
538
0
            return Katei18nCategories[i].psz_i18n;
539
0
    }
540
0
    return N_("Unknown category");
541
0
}
542