/src/mpg123/src/libmpg123/id3.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | id3: ID3v2.3 and ID3v2.4 parsing (a relevant subset) |
3 | | |
4 | | copyright 2006-2023 by the mpg123 project - free software under the terms of the LGPL 2.1 |
5 | | see COPYING and AUTHORS files in distribution or http://mpg123.org |
6 | | initially written by Thomas Orgis |
7 | | |
8 | | WIP: Handling of multiple ID3 tags in a stream. |
9 | | |
10 | | 1. With update flag: Add non-unique data, replace unique. |
11 | | - Only one TALB, TPE1, etc. |
12 | | - Only one TXXX with a certain description. |
13 | | - Only one COMM with certain language and description. |
14 | | - Only one APIC with certain type and description, generally only one |
15 | | of type 1 and 2 each. |
16 | | 2. Without update flag: wipe whole data and only store new stuff. |
17 | | |
18 | | BIG BAD BUT: How to properly handle seeks in a stream that make |
19 | | the parser encounter the same tags again in random order? Is |
20 | | there even a correct way to handle that without storing an |
21 | | ordered list of all tags? I could simplify the code here and just |
22 | | communicate that a frame should be an update to previous, and |
23 | | at which stream position the frame was encountered. But since |
24 | | libmpg123 is driven by MPEG frames, there could be multiple |
25 | | ID3v2 tags in direct succession treated by the parser without |
26 | | the library user being able to interfere. |
27 | | |
28 | | This is severely fucked. All that complexity also doesn't matter |
29 | | in practice, as streams use ICY and individual files have just one |
30 | | ID3v2 tag (relevant for libmpg123). It's an academic problem. But |
31 | | for seekable files, I could implement some jumping logic to find |
32 | | and parse all ID3v2 for once and then set a flag that only jumps |
33 | | the frames on seeks. That covers all local disk playback. For |
34 | | streams, seeking is no issue (seeking back, at least), so the |
35 | | update/replace logic works. |
36 | | |
37 | | Look at the standard: |
38 | | |
39 | | ------ |
40 | | 5. Tag location |
41 | | |
42 | | The default location of an ID3v2 tag is prepended to the audio so |
43 | | that players can benefit from the information when the data is |
44 | | streamed. It is however possible to append the tag, or make a |
45 | | prepend/append combination. When deciding upon where an unembedded |
46 | | tag should be located, the following order of preference SHOULD be |
47 | | considered. |
48 | | |
49 | | 1. Prepend the tag. |
50 | | |
51 | | 2. Prepend a tag with all vital information and add a second tag at |
52 | | the end of the file, before tags from other tagging systems. The |
53 | | first tag is required to have a SEEK frame. |
54 | | |
55 | | 3. Add a tag at the end of the file, before tags from other tagging |
56 | | systems. |
57 | | |
58 | | In case 2 and 3 the tag can simply be appended if no other known tags |
59 | | are present. The suggested method to find ID3v2 tags are: |
60 | | |
61 | | 1. Look for a prepended tag using the pattern found in section 3.1. |
62 | | |
63 | | 2. If a SEEK frame was found, use its values to guide further |
64 | | searching. |
65 | | |
66 | | 3. Look for a tag footer, scanning from the back of the file. |
67 | | |
68 | | For every new tag that is found, the old tag should be discarded |
69 | | unless the update flag in the extended header (section 3.2) is set. |
70 | | ------ |
71 | | |
72 | | For seekable streams, I simply need to implement explicit ID3v2 search along |
73 | | that recommendation and keep the complete information. Streams that continue |
74 | | growing during playback will not recognize added ID3v2 tags. So be it. |
75 | | For non-seekable streams, a tag is always parsed when encountered, assuming |
76 | | the order of update tags always matches. |
77 | | |
78 | | First step for the 1.26 release shall be the implementaton of the update |
79 | | logic and glossing over the theoretical problem of re-parsing update |
80 | | frames in the wrong order by ignoring it. They are not that relevant. |
81 | | |
82 | | TODO: Cave in and add the missing frames from the spec. Not that far to go. |
83 | | But need another data structure to communicate those ... |
84 | | */ |
85 | | |
86 | | #include "mpg123lib_intern.h" |
87 | | #include "id3.h" |
88 | | #include "debug.h" |
89 | | |
90 | | #ifndef NO_ID3V2 /* Only the main parsing routine will always be there. */ |
91 | | |
92 | | /* We know the usual text frames plus some specifics. */ |
93 | 0 | #define KNOWN_FRAMES 5 |
94 | | static const char frame_type[KNOWN_FRAMES][5] = { "COMM", "TXXX", "RVA2", "USLT", "APIC" }; |
95 | | enum frame_types { unknown = -2, text = -1, comment, extra, rva2, uslt, picture }; |
96 | | |
97 | | /* UTF support definitions */ |
98 | | |
99 | | typedef void (*text_converter)(mpg123_string *sb, const unsigned char* source, size_t len, const int noquiet); |
100 | | |
101 | | static void convert_latin1 (mpg123_string *sb, const unsigned char* source, size_t len, const int noquiet); |
102 | | static void convert_utf16bom(mpg123_string *sb, const unsigned char* source, size_t len, const int noquiet); |
103 | | static void convert_utf8 (mpg123_string *sb, const unsigned char* source, size_t len, const int noquiet); |
104 | | |
105 | | static const text_converter text_converters[4] = |
106 | | { |
107 | | convert_latin1, |
108 | | /* We always check for (multiple) BOM in 16bit unicode. Without BOM, UTF16 BE is the default. |
109 | | Errors in encoding are detected anyway. */ |
110 | | convert_utf16bom, |
111 | | convert_utf16bom, |
112 | | convert_utf8 |
113 | | }; |
114 | | |
115 | | static const unsigned int encoding_widths[4] = { 1, 2, 2, 1 }; |
116 | | |
117 | | /* the code starts here... */ |
118 | | |
119 | | static void null_id3_links(mpg123_handle *fr) |
120 | 0 | { |
121 | 0 | fr->id3v2.title = NULL; |
122 | 0 | fr->id3v2.artist = NULL; |
123 | 0 | fr->id3v2.album = NULL; |
124 | 0 | fr->id3v2.year = NULL; |
125 | 0 | fr->id3v2.genre = NULL; |
126 | 0 | fr->id3v2.comment = NULL; |
127 | 0 | } |
128 | | |
129 | | void INT123_init_id3(mpg123_handle *fr) |
130 | 0 | { |
131 | 0 | fr->id3v2.version = 0; /* nothing there */ |
132 | 0 | null_id3_links(fr); |
133 | 0 | fr->id3v2.comments = 0; |
134 | 0 | fr->id3v2.comment_list = NULL; |
135 | 0 | fr->id3v2.texts = 0; |
136 | 0 | fr->id3v2.text = NULL; |
137 | 0 | fr->id3v2.extras = 0; |
138 | 0 | fr->id3v2.extra = NULL; |
139 | 0 | fr->id3v2.pictures = 0; |
140 | 0 | fr->id3v2.picture = NULL; |
141 | 0 | } |
142 | | |
143 | | /* Managing of the text, comment and extra lists. */ |
144 | | |
145 | | /* Initialize one element. */ |
146 | | static void init_mpg123_text(mpg123_text *txt) |
147 | 0 | { |
148 | 0 | mpg123_init_string(&txt->text); |
149 | 0 | mpg123_init_string(&txt->description); |
150 | 0 | txt->id[0] = 0; |
151 | 0 | txt->id[1] = 0; |
152 | 0 | txt->id[2] = 0; |
153 | 0 | txt->id[3] = 0; |
154 | 0 | txt->lang[0] = 0; |
155 | 0 | txt->lang[1] = 0; |
156 | 0 | txt->lang[2] = 0; |
157 | 0 | } |
158 | | |
159 | | static void init_mpg123_picture(mpg123_picture *pic) |
160 | 0 | { |
161 | 0 | mpg123_init_string(&pic->mime_type); |
162 | 0 | mpg123_init_string(&pic->description); |
163 | 0 | pic->type = 0; |
164 | 0 | pic->size = 0; |
165 | 0 | pic->data = NULL; |
166 | 0 | } |
167 | | |
168 | | /* Free memory of one element. */ |
169 | | static void free_mpg123_text(mpg123_text *txt) |
170 | 0 | { |
171 | 0 | mpg123_free_string(&txt->text); |
172 | 0 | mpg123_free_string(&txt->description); |
173 | 0 | } |
174 | | |
175 | | static void free_mpg123_picture(mpg123_picture * pic) |
176 | 0 | { |
177 | 0 | mpg123_free_string(&pic->mime_type); |
178 | 0 | mpg123_free_string(&pic->description); |
179 | 0 | if (pic->data != NULL) |
180 | 0 | free(pic->data); |
181 | 0 | } |
182 | | |
183 | | /* Free memory of whole list. */ |
184 | 0 | #define free_comment(mh) free_id3_text(&((mh)->id3v2.comment_list), &((mh)->id3v2.comments)) |
185 | 0 | #define free_text(mh) free_id3_text(&((mh)->id3v2.text), &((mh)->id3v2.texts)) |
186 | 0 | #define free_extra(mh) free_id3_text(&((mh)->id3v2.extra), &((mh)->id3v2.extras)) |
187 | 0 | #define free_picture(mh) free_id3_picture(&((mh)->id3v2.picture), &((mh)->id3v2.pictures)) |
188 | | static void free_id3_text(mpg123_text **list, size_t *size) |
189 | 0 | { |
190 | 0 | size_t i; |
191 | 0 | for(i=0; i<*size; ++i) free_mpg123_text(&((*list)[i])); |
192 | |
|
193 | 0 | free(*list); |
194 | 0 | *list = NULL; |
195 | 0 | *size = 0; |
196 | 0 | } |
197 | | static void free_id3_picture(mpg123_picture **list, size_t *size) |
198 | 0 | { |
199 | 0 | size_t i; |
200 | 0 | for(i=0; i<*size; ++i) free_mpg123_picture(&((*list)[i])); |
201 | |
|
202 | 0 | free(*list); |
203 | 0 | *list = NULL; |
204 | 0 | *size = 0; |
205 | 0 | } |
206 | | |
207 | | /* Add items to the list. */ |
208 | | |
209 | 0 | #define add_comment(mh, l, d) add_id3_text(&((mh)->id3v2.comment_list), &((mh)->id3v2.comments), NULL, l, d) |
210 | 0 | #define add_text(mh, id) add_id3_text(&((mh)->id3v2.text), &((mh)->id3v2.texts), id, NULL, NULL) |
211 | 0 | #define add_uslt(mh, l, d) add_id3_text(&((mh)->id3v2.text), &((mh)->id3v2.texts), id, l, d) |
212 | 0 | #define add_extra(mh, d) add_id3_text(&((mh)->id3v2.extra), &((mh)->id3v2.extras), NULL, NULL, d) |
213 | 0 | #define add_picture(mh, t, d) add_id3_picture(&((mh)->id3v2.picture), &((mh)->id3v2.pictures), t, d) |
214 | | static mpg123_text *add_id3_text( mpg123_text **list, size_t *size |
215 | | , char id[4], char lang[3], mpg123_string *description ) |
216 | 0 | { |
217 | 0 | mdebug( "add_id3_text id=%s lang=%s, desc=%s" |
218 | 0 | , id ? (char[5]) { id[0], id[1], id[2], id[3], 0 } : "(nil)" |
219 | 0 | , lang ? (char[4]) { lang[0], lang[1], lang[2], 0 } : "(nil)" |
220 | 0 | , description ? (description->fill ? description->p : "(empty)") : "(nil)" ); |
221 | 0 | if(lang && !description) |
222 | 0 | return NULL; // no lone language intended |
223 | 0 | if(id || description) |
224 | 0 | { |
225 | | // Look through list of existing texts and return an existing entry |
226 | | // if it should be overwritten. |
227 | 0 | for(size_t i=0; i<*size; ++i) |
228 | 0 | { |
229 | 0 | mpg123_text *entry = *list+i; |
230 | 0 | if(description) |
231 | 0 | { // Overwrite entry with same description and same ID and language. |
232 | 0 | if( (!id || !memcmp(id, entry->id, 4)) |
233 | 0 | && (!lang || !memcmp(entry->lang, lang, 3)) |
234 | 0 | && mpg123_same_string(&(entry->description), description) |
235 | 0 | ) |
236 | 0 | return entry; |
237 | 0 | } else if(id && !memcmp(id, entry->id, 4)) |
238 | 0 | return entry; // Just ovewrite because of same ID. |
239 | 0 | mdebug("add_id3_text: entry %zu was no match", i); |
240 | 0 | } |
241 | 0 | } |
242 | 0 | mdebug("add_id3_text: append to list of %zu", *size); |
243 | | // Nothing found, add new one. |
244 | 0 | mpg123_text *x = INT123_safe_realloc(*list, sizeof(mpg123_text)*(*size+1)); |
245 | 0 | if(x == NULL) return NULL; /* bad */ |
246 | | |
247 | 0 | *list = x; |
248 | 0 | *size += 1; |
249 | 0 | init_mpg123_text(&((*list)[*size-1])); |
250 | |
|
251 | 0 | return &((*list)[*size-1]); /* Return pointer to the added text. */ |
252 | 0 | } |
253 | | |
254 | | |
255 | | static mpg123_picture *add_id3_picture(mpg123_picture **list, size_t *size, char type, mpg123_string *description) |
256 | 0 | { |
257 | 0 | if(!description) |
258 | 0 | return NULL; |
259 | | |
260 | | // Return entry to overwrite, if appropriate. |
261 | 0 | for(size_t i=0; i<*size; ++i) |
262 | 0 | { |
263 | 0 | mpg123_picture *entry = *list+i; |
264 | 0 | if( type == entry->type |
265 | 0 | && ( type == 1 || type == 2 || |
266 | 0 | mpg123_same_string(&entry->description, description) |
267 | 0 | ) |
268 | 0 | ) |
269 | 0 | return entry; |
270 | 0 | } |
271 | | // Append a new one. |
272 | 0 | mpg123_picture *x = INT123_safe_realloc(*list, sizeof(mpg123_picture)*(*size+1)); |
273 | 0 | if(x == NULL) return NULL; /* bad */ |
274 | | |
275 | 0 | *list = x; |
276 | 0 | *size += 1; |
277 | 0 | init_mpg123_picture(&((*list)[*size-1])); |
278 | |
|
279 | 0 | return &((*list)[*size-1]); /* Return pointer to the added picture. */ |
280 | 0 | } |
281 | | |
282 | | /* OK, back to the higher level functions. */ |
283 | | |
284 | | void INT123_exit_id3(mpg123_handle *fr) |
285 | 0 | { |
286 | 0 | free_picture(fr); |
287 | 0 | free_comment(fr); |
288 | 0 | free_extra(fr); |
289 | 0 | free_text(fr); |
290 | 0 | } |
291 | | |
292 | | void INT123_reset_id3(mpg123_handle *fr) |
293 | 0 | { |
294 | 0 | INT123_exit_id3(fr); |
295 | 0 | INT123_init_id3(fr); |
296 | 0 | } |
297 | | |
298 | | /* Set the id3v2.artist id3v2.title ... links to elements of the array. */ |
299 | | void INT123_id3_link(mpg123_handle *fr) |
300 | 0 | { |
301 | 0 | size_t i; |
302 | 0 | mpg123_id3v2 *v2 = &fr->id3v2; |
303 | 0 | debug("linking ID3v2"); |
304 | 0 | null_id3_links(fr); |
305 | 0 | for(i=0; i<v2->texts; ++i) |
306 | 0 | { |
307 | 0 | mpg123_text *entry = &v2->text[i]; |
308 | 0 | if (!strncmp("TIT2", entry->id, 4)) v2->title = &entry->text; |
309 | 0 | else if(!strncmp("TALB", entry->id, 4)) v2->album = &entry->text; |
310 | 0 | else if(!strncmp("TPE1", entry->id, 4)) v2->artist = &entry->text; |
311 | 0 | else if(!strncmp("TYER", entry->id, 4)) v2->year = &entry->text; |
312 | 0 | else if(!strncmp("TCON", entry->id, 4)) v2->genre = &entry->text; |
313 | 0 | } |
314 | 0 | for(i=0; i<v2->comments; ++i) |
315 | 0 | { |
316 | 0 | mpg123_text *entry = &v2->comment_list[i]; |
317 | 0 | if(entry->description.fill == 0 || entry->description.p[0] == 0) |
318 | 0 | v2->comment = &entry->text; |
319 | 0 | } |
320 | | /* When no generic comment found, use the last non-generic one. */ |
321 | 0 | if(v2->comment == NULL && v2->comments > 0) |
322 | 0 | v2->comment = &v2->comment_list[v2->comments-1].text; |
323 | 0 | } |
324 | | |
325 | | /* |
326 | | Store ID3 text data in an mpg123_string; either verbatim copy or |
327 | | everything translated to UTF-8 encoding. |
328 | | Preserve the zero string separator (I don't need strlen for the total size). |
329 | | |
330 | | Since we can overwrite strings with ID3 update frames, don't free |
331 | | memory, just grow strings. |
332 | | */ |
333 | | static void store_id3_text(mpg123_string *sb, unsigned char *source, size_t source_size, const int noquiet, const int notranslate) |
334 | 0 | { |
335 | 0 | unsigned char encoding; |
336 | 0 | if(sb) // Always overwrite, even with nothing. |
337 | 0 | sb->fill = 0; |
338 | 0 | if(!source_size) |
339 | 0 | { |
340 | 0 | debug("Empty id3 data!"); |
341 | 0 | return; |
342 | 0 | } |
343 | | |
344 | | /* We shall just copy the data. Client wants to decode itself. */ |
345 | 0 | if(notranslate) |
346 | 0 | { |
347 | | /* Future: Add a path for ID3 errors. */ |
348 | 0 | if(!mpg123_grow_string(sb, source_size)) |
349 | 0 | { |
350 | 0 | if(noquiet) error("Cannot resize target string, out of memory?"); |
351 | 0 | return; |
352 | 0 | } |
353 | 0 | memcpy(sb->p, source, source_size); |
354 | 0 | sb->fill = source_size; |
355 | 0 | debug1("stored undecoded ID3 text of size %zu", source_size); |
356 | 0 | return; |
357 | 0 | } |
358 | | |
359 | 0 | encoding = source[0]; |
360 | 0 | if(encoding > mpg123_id3_enc_max) |
361 | 0 | { |
362 | 0 | if(noquiet) |
363 | 0 | error1("Unknown text encoding %u, I take no chances, sorry!", encoding); |
364 | 0 | return; |
365 | 0 | } |
366 | 0 | INT123_id3_to_utf8(sb, encoding, source+1, source_size-1, noquiet); |
367 | |
|
368 | 0 | if(sb->fill) debug1("UTF-8 string (the first one): %s", sb->p); |
369 | 0 | else if(noquiet) error("unable to convert string to UTF-8 (out of memory, junk input?)!"); |
370 | 0 | } |
371 | | |
372 | | /* On error, sb->size is 0. */ |
373 | | /* Also, encoding has been checked already! */ |
374 | | void INT123_id3_to_utf8(mpg123_string *sb, unsigned char encoding, const unsigned char *source, size_t source_size, int noquiet) |
375 | 0 | { |
376 | 0 | unsigned int bwidth; |
377 | 0 | if(sb) |
378 | 0 | sb->fill = 0; |
379 | 0 | debug1("encoding: %u", encoding); |
380 | | /* A note: ID3v2.3 uses UCS-2 non-variable 16bit encoding, v2.4 uses UTF16. |
381 | | UTF-16 uses a reserved/private range in UCS-2 to add the magic, so we just always treat it as UTF. */ |
382 | 0 | bwidth = encoding_widths[encoding]; |
383 | | /* Hack! I've seen a stray zero byte before BOM. Is that supposed to happen? */ |
384 | 0 | if(encoding != mpg123_id3_utf16be) /* UTF16be _can_ beging with a null byte! */ |
385 | 0 | while(source_size > bwidth && source[0] == 0) |
386 | 0 | { |
387 | 0 | --source_size; |
388 | 0 | ++source; |
389 | 0 | debug("skipped leading zero"); |
390 | 0 | } |
391 | 0 | if(source_size % bwidth) |
392 | 0 | { |
393 | | /* When we need two bytes for a character, it's strange to have an uneven bytestream length. */ |
394 | 0 | if(noquiet) warning2("Weird tag size %d for encoding %u - I will probably trim too early or something but I think the MP3 is broken.", (int)source_size, encoding); |
395 | 0 | source_size -= source_size % bwidth; |
396 | 0 | } |
397 | 0 | text_converters[encoding](sb, source, source_size, noquiet); |
398 | 0 | } |
399 | | |
400 | | /* You have checked encoding to be in the range already. */ |
401 | | static unsigned char *next_text(unsigned char* prev, unsigned char encoding, size_t limit) |
402 | 0 | { |
403 | 0 | unsigned char *text = prev; |
404 | 0 | size_t width = encoding_widths[encoding]; |
405 | |
|
406 | 0 | if(limit > PTRDIFF_MAX) |
407 | 0 | return NULL; |
408 | | |
409 | | /* So I go lengths to find zero or double zero... |
410 | | Remember bug 2834636: Only check for aligned NULLs! */ |
411 | 0 | while(text-prev < (ptrdiff_t)limit) |
412 | 0 | { |
413 | 0 | if(text[0] == 0) |
414 | 0 | { |
415 | 0 | if(width <= limit-(text-prev)) |
416 | 0 | { |
417 | 0 | size_t i = 1; |
418 | 0 | for(; i<width; ++i) if(text[i] != 0) break; |
419 | |
|
420 | 0 | if(i == width) /* found a null wide enough! */ |
421 | 0 | { |
422 | 0 | text += width; |
423 | 0 | break; |
424 | 0 | } |
425 | 0 | } |
426 | 0 | else return NULL; /* No full character left? This text is broken */ |
427 | 0 | } |
428 | | |
429 | 0 | text += width; |
430 | 0 | } |
431 | 0 | if((size_t)(text-prev) >= limit) text = NULL; |
432 | |
|
433 | 0 | return text; |
434 | 0 | } |
435 | | |
436 | | static const char *enc_name(unsigned char enc) |
437 | 0 | { |
438 | 0 | switch(enc) |
439 | 0 | { |
440 | 0 | case 0: return "Latin 1"; |
441 | 0 | case 1: return "UTF-16 BOM"; |
442 | 0 | case 2: return "UTF-16 BE"; |
443 | 0 | case 3: return "UTF-8"; |
444 | 0 | default: return "unknown!"; |
445 | 0 | } |
446 | 0 | } |
447 | | |
448 | | static void process_text(mpg123_handle *fr, unsigned char *realdata, size_t realsize, char *id) |
449 | 0 | { |
450 | | /* Text encoding $xx */ |
451 | | /* The text (encoded) ... */ |
452 | 0 | mpg123_text *t = add_text(fr, id); |
453 | 0 | if(VERBOSE4) fprintf(stderr, "Note: Storing text from %s encoding\n", enc_name(realdata[0])); |
454 | 0 | if(t == NULL) |
455 | 0 | { |
456 | 0 | if(NOQUIET) error("Unable to attach new text!"); |
457 | 0 | return; |
458 | 0 | } |
459 | 0 | mdebug("process_text: (over)writing entry with ID %s", t->id[0] |
460 | 0 | ? (char[5]) { t->id[0], t->id[1], t->id[2], t->id[3], 0 } |
461 | 0 | : "(nil)" ); |
462 | 0 | memcpy(t->id, id, 4); |
463 | 0 | store_id3_text(&t->text, realdata, realsize, NOQUIET, fr->p.flags & MPG123_PLAIN_ID3TEXT); |
464 | 0 | if(VERBOSE4) // Do not print unsanitized text to terminals! |
465 | 0 | fprintf(stderr, "Note: ID3v2 %c%c%c%c text frame stored\n", id[0], id[1], id[2], id[3]); |
466 | 0 | } |
467 | | |
468 | | static void process_picture(mpg123_handle *fr, unsigned char *realdata, size_t realsize) |
469 | 0 | { |
470 | 0 | unsigned char encoding; |
471 | 0 | mpg123_picture *i = NULL; |
472 | 0 | unsigned char* workpoint = NULL; |
473 | 0 | mpg123_string mime; mpg123_init_string(&mime); |
474 | 0 | unsigned char image_type = 0; |
475 | 0 | mpg123_string description; mpg123_init_string(&description); |
476 | 0 | unsigned char *image_data = NULL; |
477 | 0 | if(realsize < 1) |
478 | 0 | { |
479 | 0 | debug("Empty id3 data!"); |
480 | 0 | return; |
481 | 0 | } |
482 | 0 | encoding = realdata[0]; |
483 | 0 | realdata++; realsize--; |
484 | 0 | if(encoding > mpg123_id3_enc_max) |
485 | 0 | { |
486 | 0 | if(NOQUIET) |
487 | 0 | error1("Unknown text encoding %u, I take no chances, sorry!", encoding); |
488 | 0 | return; |
489 | 0 | } |
490 | 0 | if(VERBOSE4) fprintf(stderr, "Note: Storing picture from APIC frame.\n"); |
491 | | |
492 | | /* get mime type (encoding is always latin-1) */ |
493 | 0 | workpoint = next_text(realdata, 0, realsize); |
494 | 0 | if(!workpoint) |
495 | 0 | { |
496 | 0 | if(NOQUIET) |
497 | 0 | error("Unable to get mime type for picture; skipping picture."); |
498 | 0 | return; |
499 | 0 | } |
500 | 0 | INT123_id3_to_utf8(&mime, 0, realdata, workpoint - realdata, NOQUIET); |
501 | 0 | realsize -= workpoint - realdata; |
502 | 0 | realdata = workpoint; |
503 | | /* get picture type */ |
504 | 0 | image_type = realdata[0]; |
505 | 0 | realdata++; realsize--; |
506 | | /* get description (encoding is encoding) */ |
507 | 0 | workpoint = next_text(realdata, encoding, realsize); |
508 | 0 | if(!workpoint) |
509 | 0 | { |
510 | 0 | if(NOQUIET) |
511 | 0 | error("Unable to get description for picture; skipping picture."); |
512 | 0 | mpg123_free_string(&mime); |
513 | 0 | return; |
514 | 0 | } |
515 | 0 | INT123_id3_to_utf8(&description, encoding, realdata, workpoint - realdata, NOQUIET); |
516 | 0 | realsize -= workpoint - realdata; |
517 | 0 | if(realsize) |
518 | 0 | image_data = (unsigned char*)malloc(realsize); |
519 | 0 | if(!realsize || !image_data) { |
520 | 0 | if(NOQUIET) |
521 | 0 | error("No picture data or malloc failure; skipping picture."); |
522 | 0 | mpg123_free_string(&description); |
523 | 0 | mpg123_free_string(&mime); |
524 | 0 | return; |
525 | 0 | } |
526 | 0 | memcpy(image_data, workpoint, realsize); |
527 | | |
528 | | // All data ready now, append to/replace in list. |
529 | 0 | i = add_picture(fr, image_type, &description); |
530 | 0 | if(!i) |
531 | 0 | { |
532 | 0 | if(NOQUIET) |
533 | 0 | error("Unable to attach new picture!"); |
534 | 0 | free(image_data); |
535 | 0 | mpg123_free_string(&description); |
536 | 0 | mpg123_free_string(&mime); |
537 | 0 | return; |
538 | 0 | } |
539 | | |
540 | | // Either this is a fresh image, or one to be replaced. |
541 | | // We hand over memory, so old storage needs to be freed. |
542 | 0 | free_mpg123_picture(i); |
543 | 0 | i->type = image_type; |
544 | 0 | i->size = realsize; |
545 | 0 | i->data = image_data; |
546 | 0 | mpg123_move_string(&mime, &i->mime_type); |
547 | 0 | mpg123_move_string(&description, &i->description); |
548 | 0 | if(VERBOSE4) |
549 | 0 | fprintf(stderr, "Note: ID3v2 APIC picture frame of type: %d\n", i->type); |
550 | 0 | } |
551 | | |
552 | | /* Store a new comment that perhaps is a RVA / RVA_ALBUM/AUDIOPHILE / RVA_MIX/RADIO one |
553 | | Special gimmik: It also stores USLT to the texts. Stucture is the same as for comments. */ |
554 | | static void process_comment(mpg123_handle *fr, enum frame_types tt, unsigned char *realdata, size_t realsize, int rva_level, char *id) |
555 | 0 | { |
556 | | /* Text encoding $xx */ |
557 | | /* Language $xx xx xx */ |
558 | | /* Short description (encoded!) <text> $00 (00) */ |
559 | | /* Then the comment text (encoded) ... */ |
560 | 0 | unsigned char encoding = realdata[0]; |
561 | 0 | char lang[3]; // realdata + 1 |
562 | 0 | unsigned char *descr = realdata+4; |
563 | 0 | unsigned char *text = NULL; |
564 | 0 | mpg123_text *xcom = NULL; |
565 | 0 | mpg123_text localcom; // UTF-8 variant for local processing, remember to clean up! |
566 | 0 | init_mpg123_text(&localcom); |
567 | |
|
568 | 0 | if(realsize < (size_t)(descr-realdata)) |
569 | 0 | { |
570 | 0 | if(NOQUIET) error1("Invalid frame size of %zu (too small for anything).", realsize); |
571 | 0 | return; |
572 | 0 | } |
573 | 0 | if(encoding > mpg123_id3_enc_max) |
574 | 0 | { |
575 | 0 | if(NOQUIET) |
576 | 0 | error1("Unknown text encoding %u, I take no chances, sorry!", encoding); |
577 | 0 | return; |
578 | 0 | } |
579 | 0 | memcpy(lang, realdata+1, 3); |
580 | | /* Now I can abuse a byte from lang for the encoding. */ |
581 | 0 | descr[-1] = encoding; |
582 | | /* Be careful with finding the end of description, I have to honor encoding here. */ |
583 | 0 | text = next_text(descr, encoding, realsize-(descr-realdata)); |
584 | 0 | if(text == NULL) |
585 | 0 | { |
586 | 0 | if(NOQUIET) |
587 | 0 | error("No comment text / valid description?"); |
588 | 0 | return; |
589 | 0 | } |
590 | 0 | { // just vor variable scope |
591 | 0 | mpg123_string description; |
592 | 0 | mpg123_init_string(&description); |
593 | | // Store the text, with desired encoding, but for comments always a local copy in UTF-8. |
594 | 0 | store_id3_text( &description, descr-1, text-descr+1 |
595 | 0 | , NOQUIET, fr->p.flags & MPG123_PLAIN_ID3TEXT ); |
596 | 0 | if(tt == comment) |
597 | 0 | store_id3_text( &localcom.description, descr-1, text-descr+1 |
598 | 0 | , NOQUIET, 0 ); |
599 | 0 | if(VERBOSE4) |
600 | 0 | fprintf( stderr, "Note: Storing comment from %s encoding\n" |
601 | 0 | , enc_name(realdata[0]) ); |
602 | 0 | xcom = tt == uslt |
603 | 0 | ? add_uslt(fr, lang, &description) |
604 | 0 | : add_comment(fr, lang, &description); |
605 | 0 | if(xcom == NULL) |
606 | 0 | { |
607 | 0 | if(NOQUIET) |
608 | 0 | error("Unable to attach new comment!"); |
609 | 0 | mpg123_free_string(&description); |
610 | 0 | free_mpg123_text(&localcom); |
611 | 0 | return; |
612 | 0 | } |
613 | 0 | memcpy(xcom->id, id, 4); |
614 | 0 | memcpy(xcom->lang, lang, 3); |
615 | | // That takes over the description allocation. |
616 | 0 | mpg123_move_string(&description, &xcom->description); |
617 | 0 | } |
618 | | |
619 | 0 | text[-1] = encoding; /* Byte abusal for encoding... */ |
620 | 0 | store_id3_text(&xcom->text, text-1, realsize+1-(text-realdata), NOQUIET, fr->p.flags & MPG123_PLAIN_ID3TEXT); |
621 | | /* Remember: I will probably decode the above (again) for rva comment checking. So no messing around, please. */ |
622 | |
|
623 | 0 | if(VERBOSE4) /* Do _not_ print the verbatim text: The encoding might be funny! */ |
624 | 0 | { |
625 | 0 | fprintf(stderr, "Note: ID3 comm/uslt desc of length %zu.\n", xcom->description.fill); |
626 | 0 | fprintf(stderr, "Note: ID3 comm/uslt text of length %zu.\n", xcom->text.fill); |
627 | 0 | } |
628 | | /* Look out for RVA info only when we really deal with a straight comment. */ |
629 | 0 | if(tt == comment && localcom.description.fill > 0) |
630 | 0 | { |
631 | 0 | int rva_mode = -1; /* mix / album */ |
632 | 0 | if( !strcasecmp(localcom.description.p, "rva") |
633 | 0 | || !strcasecmp(localcom.description.p, "rva_mix") |
634 | 0 | || !strcasecmp(localcom.description.p, "rva_track") |
635 | 0 | || !strcasecmp(localcom.description.p, "rva_radio") ) |
636 | 0 | rva_mode = 0; |
637 | 0 | else if( !strcasecmp(localcom.description.p, "rva_album") |
638 | 0 | || !strcasecmp(localcom.description.p, "rva_audiophile") |
639 | 0 | || !strcasecmp(localcom.description.p, "rva_user") ) |
640 | 0 | rva_mode = 1; |
641 | 0 | if((rva_mode > -1) && (fr->rva.level[rva_mode] <= rva_level)) |
642 | 0 | { |
643 | | /* Only translate the contents in here where we really need them. */ |
644 | 0 | store_id3_text(&localcom.text, text-1, realsize+1-(text-realdata), NOQUIET, 0); |
645 | 0 | if(localcom.text.fill > 0) |
646 | 0 | { |
647 | 0 | fr->rva.gain[rva_mode] = (float) atof(localcom.text.p); |
648 | 0 | if(VERBOSE3) fprintf(stderr, "Note: RVA value %fdB\n", fr->rva.gain[rva_mode]); |
649 | 0 | fr->rva.peak[rva_mode] = 0; |
650 | 0 | fr->rva.level[rva_mode] = rva_level; |
651 | 0 | } |
652 | 0 | } |
653 | 0 | } |
654 | | /* Make sure to free the local memory... */ |
655 | 0 | free_mpg123_text(&localcom); |
656 | 0 | } |
657 | | |
658 | | static void process_extra(mpg123_handle *fr, unsigned char* realdata, size_t realsize, int rva_level, char *id) |
659 | 0 | { |
660 | | /* Text encoding $xx */ |
661 | | /* Description ... $00 (00) */ |
662 | | /* Text ... */ |
663 | 0 | unsigned char encoding = realdata[0]; |
664 | 0 | unsigned char *descr = realdata+1; /* remember, the encoding is descr[-1] */ |
665 | 0 | unsigned char *text; |
666 | 0 | mpg123_text *xex; |
667 | 0 | mpg123_text localex; |
668 | |
|
669 | 0 | if((int)realsize < descr-realdata) |
670 | 0 | { |
671 | 0 | if(NOQUIET) error1("Invalid frame size of %lu (too small for anything).", (unsigned long)realsize); |
672 | 0 | return; |
673 | 0 | } |
674 | 0 | if(encoding > mpg123_id3_enc_max) |
675 | 0 | { |
676 | 0 | if(NOQUIET) |
677 | 0 | error1("Unknown text encoding %u, I take no chances, sorry!", encoding); |
678 | 0 | return; |
679 | 0 | } |
680 | 0 | text = next_text(descr, encoding, realsize-(descr-realdata)); |
681 | 0 | if(VERBOSE4) fprintf(stderr, "Note: Storing extra from %s encoding\n", enc_name(realdata[0])); |
682 | 0 | if(text == NULL) |
683 | 0 | { |
684 | 0 | if(NOQUIET) error("No extra frame text / valid description?"); |
685 | 0 | return; |
686 | 0 | } |
687 | 0 | { // just vor variable scope |
688 | 0 | mpg123_string description; |
689 | 0 | mpg123_init_string(&description); |
690 | | /* The outside storage gets reencoded to UTF-8 only if not requested otherwise. |
691 | | Remember that we really need the -1 here to hand in the encoding byte!*/ |
692 | 0 | store_id3_text( &description, descr-1, text-descr+1 |
693 | 0 | , NOQUIET, fr->p.flags & MPG123_PLAIN_ID3TEXT ); |
694 | 0 | xex = add_extra(fr, &description); |
695 | 0 | if(xex) |
696 | 0 | mpg123_move_string(&description, &xex->description); |
697 | 0 | else |
698 | 0 | mpg123_free_string(&description); |
699 | 0 | } |
700 | 0 | if(xex == NULL) |
701 | 0 | { |
702 | 0 | if(NOQUIET) error("Unable to attach new extra text!"); |
703 | 0 | return; |
704 | 0 | } |
705 | 0 | memcpy(xex->id, id, 4); |
706 | 0 | init_mpg123_text(&localex); /* For our local copy. */ |
707 | | |
708 | | /* Our local copy is always stored in UTF-8! */ |
709 | 0 | store_id3_text(&localex.description, descr-1, text-descr+1, NOQUIET, 0); |
710 | | /* At first, only store the outside copy of the payload. We may not need the local copy. */ |
711 | 0 | text[-1] = encoding; |
712 | 0 | store_id3_text(&xex->text, text-1, realsize-(text-realdata)+1, NOQUIET, fr->p.flags & MPG123_PLAIN_ID3TEXT); |
713 | | |
714 | | /* Now check if we would like to interpret this extra info for RVA. */ |
715 | 0 | if(localex.description.fill > 0) |
716 | 0 | { |
717 | 0 | int is_peak = 0; |
718 | 0 | int rva_mode = -1; /* mix / album */ |
719 | |
|
720 | 0 | if(!strncasecmp(localex.description.p, "replaygain_track_",17)) |
721 | 0 | { |
722 | 0 | if(VERBOSE3) fprintf(stderr, "Note: RVA ReplayGain track gain/peak\n"); |
723 | |
|
724 | 0 | rva_mode = 0; |
725 | 0 | if(!strcasecmp(localex.description.p, "replaygain_track_peak")) is_peak = 1; |
726 | 0 | else if(strcasecmp(localex.description.p, "replaygain_track_gain")) rva_mode = -1; |
727 | 0 | } |
728 | 0 | else |
729 | 0 | if(!strncasecmp(localex.description.p, "replaygain_album_",17)) |
730 | 0 | { |
731 | 0 | if(VERBOSE3) fprintf(stderr, "Note: RVA ReplayGain album gain/peak\n"); |
732 | |
|
733 | 0 | rva_mode = 1; |
734 | 0 | if(!strcasecmp(localex.description.p, "replaygain_album_peak")) is_peak = 1; |
735 | 0 | else if(strcasecmp(localex.description.p, "replaygain_album_gain")) rva_mode = -1; |
736 | 0 | } |
737 | 0 | if((rva_mode > -1) && (fr->rva.level[rva_mode] <= rva_level)) |
738 | 0 | { |
739 | | /* Now we need the translated copy of the data. */ |
740 | 0 | store_id3_text(&localex.text, text-1, realsize-(text-realdata)+1, NOQUIET, 0); |
741 | 0 | if(localex.text.fill > 0) |
742 | 0 | { |
743 | 0 | if(is_peak) |
744 | 0 | { |
745 | 0 | fr->rva.peak[rva_mode] = (float) atof(localex.text.p); |
746 | 0 | if(VERBOSE3) fprintf(stderr, "Note: RVA peak %f\n", fr->rva.peak[rva_mode]); |
747 | 0 | } |
748 | 0 | else |
749 | 0 | { |
750 | 0 | fr->rva.gain[rva_mode] = (float) atof(localex.text.p); |
751 | 0 | if(VERBOSE3) fprintf(stderr, "Note: RVA gain %fdB\n", fr->rva.gain[rva_mode]); |
752 | 0 | } |
753 | 0 | fr->rva.level[rva_mode] = rva_level; |
754 | 0 | } |
755 | 0 | } |
756 | 0 | } |
757 | |
|
758 | 0 | free_mpg123_text(&localex); |
759 | 0 | } |
760 | | |
761 | | /* Make a ID3v2.3+ 4-byte ID from a ID3v2.2 3-byte ID |
762 | | Note that not all frames survived to 2.4; the mapping goes to 2.3 . |
763 | | A notable miss is the old RVA frame, which is very unspecific anyway. |
764 | | This function returns -1 when a not known 3 char ID was encountered, 0 otherwise. */ |
765 | | static int promote_framename(mpg123_handle *fr, char *id) /* fr because of VERBOSE macros */ |
766 | 0 | { |
767 | 0 | size_t i; |
768 | 0 | char *old[] = |
769 | 0 | { |
770 | 0 | "COM", "TAL", "TBP", "TCM", "TCO", "TCR", "TDA", "TDY", "TEN", "TFT", |
771 | 0 | "TIM", "TKE", "TLA", "TLE", "TMT", "TOA", "TOF", "TOL", "TOR", "TOT", |
772 | 0 | "TP1", "TP2", "TP3", "TP4", "TPA", "TPB", "TRC", "TDA", "TRK", "TSI", |
773 | 0 | "TSS", "TT1", "TT2", "TT3", "TXT", "TXX", "TYE" |
774 | 0 | }; |
775 | 0 | char *new[] = |
776 | 0 | { |
777 | 0 | "COMM", "TALB", "TBPM", "TCOM", "TCON", "TCOP", "TDAT", "TDLY", "TENC", "TFLT", |
778 | 0 | "TIME", "TKEY", "TLAN", "TLEN", "TMED", "TOPE", "TOFN", "TOLY", "TORY", "TOAL", |
779 | 0 | "TPE1", "TPE2", "TPE3", "TPE4", "TPOS", "TPUB", "TSRC", "TRDA", "TRCK", "TSIZ", |
780 | 0 | "TSSE", "TIT1", "TIT2", "TIT3", "TEXT", "TXXX", "TYER" |
781 | 0 | }; |
782 | 0 | for(i=0; i<sizeof(old)/sizeof(char*); ++i) |
783 | 0 | { |
784 | 0 | if(!strncmp(id, old[i], 3)) |
785 | 0 | { |
786 | 0 | memcpy(id, new[i], 4); |
787 | 0 | if(VERBOSE3) fprintf(stderr, "Translated ID3v2.2 frame %s to %s\n", old[i], new[i]); |
788 | 0 | return 0; |
789 | 0 | } |
790 | 0 | } |
791 | 0 | if(VERBOSE3) fprintf(stderr, "Ignoring untranslated ID3v2.2 frame %c%c%c\n", id[0], id[1], id[2]); |
792 | 0 | return -1; |
793 | 0 | } |
794 | | |
795 | | #endif /* NO_ID3V2 */ |
796 | | |
797 | | int store_id3v2( mpg123_handle *fr |
798 | | , unsigned long first4bytes, unsigned char buf[6], unsigned long length ) |
799 | 0 | { |
800 | 0 | int ret = 1; |
801 | 0 | int64_t ret2; |
802 | 0 | unsigned long fullen = 10+length; |
803 | 0 | if(fr->id3v2_raw) |
804 | 0 | free(fr->id3v2_raw); |
805 | 0 | fr->id3v2_size = 0; |
806 | | /* Allocate one byte more for a closing zero as safety catch for strlen(). */ |
807 | 0 | fr->id3v2_raw = malloc(fullen+1); |
808 | 0 | if(!fr->id3v2_raw) |
809 | 0 | { |
810 | 0 | fr->err = MPG123_OUT_OF_MEM; |
811 | 0 | if(NOQUIET) |
812 | 0 | error1("ID3v2: Arrg! Unable to allocate %lu bytes" |
813 | 0 | " for ID3v2 data - trying to skip instead.", length+1); |
814 | 0 | if((ret2=fr->rd->skip_bytes(fr,length)) < 0) |
815 | 0 | ret = ret2; |
816 | 0 | else |
817 | 0 | ret = 0; |
818 | 0 | } |
819 | 0 | else |
820 | 0 | { |
821 | 0 | fr->id3v2_raw[0] = (first4bytes>>24) & 0xff; |
822 | 0 | fr->id3v2_raw[1] = (first4bytes>>16) & 0xff; |
823 | 0 | fr->id3v2_raw[2] = (first4bytes>>8) & 0xff; |
824 | 0 | fr->id3v2_raw[3] = first4bytes & 0xff; |
825 | 0 | memcpy(fr->id3v2_raw+4, buf, 6); |
826 | 0 | if((ret2=fr->rd->read_frame_body(fr, fr->id3v2_raw+10, length)) < 0) |
827 | 0 | { |
828 | 0 | ret=ret2; |
829 | 0 | free(fr->id3v2_raw); |
830 | 0 | fr->id3v2_raw = NULL; |
831 | 0 | } |
832 | 0 | else |
833 | 0 | { /* Closing with a zero for paranoia. */ |
834 | 0 | fr->id3v2_raw[fullen] = 0; |
835 | 0 | fr->id3v2_size = fullen; |
836 | 0 | } |
837 | 0 | } |
838 | 0 | return ret; |
839 | 0 | } |
840 | | |
841 | | /* |
842 | | trying to parse ID3v2.3 and ID3v2.4 tags... |
843 | | |
844 | | returns: 0: bad or just unparseable tag |
845 | | 1: good, (possibly) new tag info |
846 | | <0: reader error (may need more data feed, try again) |
847 | | */ |
848 | | int INT123_parse_new_id3(mpg123_handle *fr, unsigned long first4bytes) |
849 | 0 | { |
850 | 0 | #define UNSYNC_FLAG 128 |
851 | 0 | #define EXTHEAD_FLAG 64 /* ID3v2.3+ */ |
852 | 0 | #define COMPRESS_FLAG 64 /* ID3v2.2 */ |
853 | 0 | #define EXP_FLAG 32 |
854 | 0 | #define FOOTER_FLAG 16 |
855 | 0 | #define EXT_UPDATE_FLAG 64 /* ID3v2.4 only: extended header update flag */ |
856 | 0 | #define UNKNOWN_FLAGS 15 /* 00001111*/ |
857 | 0 | unsigned char buf[6]; |
858 | 0 | unsigned long length=0; |
859 | 0 | unsigned char flags = 0; |
860 | 0 | int ret = 1; |
861 | 0 | int64_t ret2; |
862 | 0 | int storetag = 0; |
863 | 0 | unsigned int footlen = 0; |
864 | 0 | #ifndef NO_ID3V2 |
865 | 0 | int skiptag = 0; |
866 | 0 | #endif |
867 | 0 | unsigned char major = first4bytes & 0xff; |
868 | 0 | debug1("ID3v2: major tag version: %i", major); |
869 | |
|
870 | 0 | if(major == 0xff) return 0; /* Invalid... */ |
871 | 0 | if((ret2 = fr->rd->read_frame_body(fr, buf, 6)) < 0) /* read more header information */ |
872 | 0 | return ret2; |
873 | | |
874 | 0 | if(buf[0] == 0xff) return 0; /* Revision, will never be 0xff. */ |
875 | | |
876 | 0 | if(fr->p.flags & MPG123_STORE_RAW_ID3) |
877 | 0 | storetag = 1; |
878 | | /* second new byte are some nice flags, if these are invalid skip the whole thing */ |
879 | 0 | flags = buf[1]; |
880 | 0 | debug1("ID3v2: flags 0x%08x", flags); |
881 | | /* use 4 bytes from buf to construct 28bit uint value and return 1; return 0 if bytes are not synchsafe */ |
882 | 0 | #define synchsafe_to_long(buf,res) \ |
883 | 0 | ( \ |
884 | 0 | (((buf)[0]|(buf)[1]|(buf)[2]|(buf)[3]) & 0x80) ? 0 : \ |
885 | 0 | (res = (((unsigned long) (buf)[0]) << 21) \ |
886 | 0 | | (((unsigned long) (buf)[1]) << 14) \ |
887 | 0 | | (((unsigned long) (buf)[2]) << 7) \ |
888 | 0 | | ((unsigned long) (buf)[3]) \ |
889 | 0 | ,1) \ |
890 | 0 | ) |
891 | | /* id3v2.3 does not store synchsafe frame sizes, but synchsafe tag size - doh! */ |
892 | | /* Remember: bytes_to_long() can yield ULONG_MAX on 32 bit platforms! */ |
893 | 0 | #define bytes_to_long(buf,res) \ |
894 | 0 | ( \ |
895 | 0 | major == 3 ? \ |
896 | 0 | (res = (((unsigned long) (buf)[0]) << 24) \ |
897 | 0 | | (((unsigned long) (buf)[1]) << 16) \ |
898 | 0 | | (((unsigned long) (buf)[2]) << 8) \ |
899 | 0 | | ((unsigned long) (buf)[3]) \ |
900 | 0 | ,1) : synchsafe_to_long(buf,res) \ |
901 | 0 | ) |
902 | | /* for id3v2.2 only */ |
903 | 0 | #define threebytes_to_long(buf,res) \ |
904 | 0 | ( \ |
905 | 0 | res = (((unsigned long) (buf)[0]) << 16) \ |
906 | 0 | | (((unsigned long) (buf)[1]) << 8) \ |
907 | 0 | | ((unsigned long) (buf)[2]) \ |
908 | 0 | ) |
909 | | |
910 | | /* length-10 or length-20 (footer present); 4 synchsafe integers == 28 bit number */ |
911 | | /* we have already read 10 bytes, so left are length or length+10 bytes belonging to tag */ |
912 | | /* Note: This is an 28 bit value in 32 bit storage, plenty of space for */ |
913 | | /* length+x for reasonable x. */ |
914 | 0 | if(!synchsafe_to_long(buf+2,length)) |
915 | 0 | { |
916 | 0 | if(NOQUIET) error4("Bad tag length (not synchsafe): 0x%02x%02x%02x%02x; You got a bad ID3 tag here.", buf[2],buf[3],buf[4],buf[5]); |
917 | 0 | return 0; |
918 | 0 | } |
919 | 0 | if(flags & FOOTER_FLAG) |
920 | 0 | footlen = 10; |
921 | 0 | debug1("ID3v2: tag data length %lu", length); |
922 | 0 | #ifndef NO_ID3V2 |
923 | 0 | if(VERBOSE2) fprintf(stderr,"Note: ID3v2.%i rev %i tag of %lu bytes\n", major, buf[0], length); |
924 | | /* skip if unknown version/scary flags, parse otherwise */ |
925 | 0 | if(fr->p.flags & MPG123_SKIP_ID3V2) |
926 | 0 | { |
927 | 0 | if(VERBOSE3) |
928 | 0 | fprintf(stderr, "Note: Skipping ID3v2 tag per user request.\n"); |
929 | 0 | skiptag = 1; |
930 | 0 | } |
931 | 0 | if((flags & UNKNOWN_FLAGS) || (major > 4) || (major < 2)) |
932 | 0 | { |
933 | 0 | if(NOQUIET) |
934 | 0 | warning2( "ID3v2: Won't parse the ID3v2 tag with major version" |
935 | 0 | " %u and flags 0x%xu - some extra code may be needed" |
936 | 0 | , major, flags ); |
937 | 0 | skiptag = 1; |
938 | 0 | } |
939 | | // Standard says that compressed tags should be ignored as there isn't an agreed |
940 | | // compressoion scheme. |
941 | 0 | if(major == 2 && flags & COMPRESS_FLAG) |
942 | 0 | { |
943 | 0 | if(NOQUIET) |
944 | 0 | warning("ID3v2: ignoring compressed ID3v2.2 tag"); |
945 | 0 | skiptag = 1; |
946 | 0 | } |
947 | 0 | if(length < 10) |
948 | 0 | { |
949 | 0 | if(NOQUIET) |
950 | 0 | warning1("ID3v2: unrealistic small tag lengh %lu, skipping", length); |
951 | 0 | skiptag = 1; |
952 | 0 | } |
953 | 0 | if(!skiptag) |
954 | 0 | storetag = 1; |
955 | 0 | #endif |
956 | 0 | if(storetag) |
957 | 0 | { |
958 | | /* Stores whole tag with footer and an additonal trailing zero. */ |
959 | 0 | if((ret2 = store_id3v2(fr, first4bytes, buf, length+footlen)) <= 0) |
960 | 0 | return ret2; |
961 | 0 | } |
962 | 0 | #ifndef NO_ID3V2 |
963 | 0 | if(skiptag) |
964 | 0 | { |
965 | 0 | if(VERBOSE3) |
966 | 0 | fprintf(stderr, "Note: skipped tag clearing possibly existing ID3v2 data"); |
967 | 0 | INT123_reset_id3(fr); // Old data is invalid. |
968 | 0 | #endif |
969 | 0 | if(!storetag && (ret2=fr->rd->skip_bytes(fr,length+footlen))<0) |
970 | 0 | ret=ret2; |
971 | 0 | #ifndef NO_ID3V2 |
972 | 0 | } |
973 | 0 | else |
974 | 0 | { |
975 | 0 | unsigned char* tagdata = fr->id3v2_raw+10; |
976 | | /* try to interpret that beast */ |
977 | 0 | debug("ID3v2: analysing frames..."); |
978 | 0 | if(length > 0) |
979 | 0 | { |
980 | 0 | unsigned char extflags = 0; |
981 | 0 | unsigned long tagpos = 0; |
982 | | /* bytes of frame title and of framesize value */ |
983 | 0 | unsigned int head_part = major > 2 ? 4 : 3; |
984 | 0 | unsigned int flag_part = major > 2 ? 2 : 0; |
985 | | /* The amount of bytes that are unconditionally read for each frame: */ |
986 | | /* ID, size, flags. */ |
987 | 0 | unsigned int framebegin = head_part+head_part+flag_part; |
988 | 0 | debug1("ID3v2: have read at all %lu bytes for the tag now", (unsigned long)length+6); |
989 | 0 | if(flags & EXTHEAD_FLAG) |
990 | 0 | { |
991 | 0 | debug("ID3v2: extended header"); |
992 | 0 | if(!bytes_to_long(tagdata, tagpos) || tagpos >= length) |
993 | 0 | { |
994 | 0 | ret = 0; |
995 | 0 | if(NOQUIET) |
996 | 0 | error4( "Bad (non-synchsafe/too large) tag offset from extended header:" |
997 | 0 | "0x%02x%02x%02x%02x" |
998 | 0 | , tagdata[0], tagdata[1], tagdata[2], tagdata[3] ); |
999 | 0 | } else if(tagpos < 6) |
1000 | 0 | { |
1001 | 0 | ret = 0; |
1002 | 0 | if(NOQUIET) |
1003 | 0 | merror("Extended header too small (%lu).", tagpos); |
1004 | 0 | } |
1005 | 0 | if(major == 3) |
1006 | 0 | { |
1007 | 0 | tagpos += 4; // The size itself is not included. |
1008 | 0 | if(tagpos >= length) |
1009 | 0 | { |
1010 | 0 | ret = 0; |
1011 | 0 | if(NOQUIET) |
1012 | 0 | error("Too much extended v2.3 header."); |
1013 | 0 | } |
1014 | 0 | } else if(ret) // v2.4 and at least got my 6 bytes of ext header |
1015 | 0 | { |
1016 | | // Only v4 knows update frames, check for that. |
1017 | | // Need to step back. Header is 4 bytes length, one byte flag size, |
1018 | | // one byte flags. Flag size has to be 1! |
1019 | 0 | if(tagdata[4] == 1 && tagdata[5] & EXT_UPDATE_FLAG) |
1020 | 0 | { |
1021 | 0 | if(VERBOSE3) |
1022 | 0 | fprintf(stderr, "Note: ID3v2.4 update tag\n"); |
1023 | 0 | extflags |= EXT_UPDATE_FLAG; |
1024 | 0 | } |
1025 | 0 | } |
1026 | 0 | } |
1027 | 0 | if(!(extflags & EXT_UPDATE_FLAG)) |
1028 | 0 | { |
1029 | 0 | if(VERBOSE3) |
1030 | 0 | fprintf(stderr, "Note: non-update tag replacing existing ID3v2 data\n"); |
1031 | 0 | INT123_reset_id3(fr); |
1032 | 0 | } |
1033 | 0 | if(ret > 0) |
1034 | 0 | { |
1035 | 0 | char id[5]; |
1036 | 0 | unsigned long framesize; |
1037 | 0 | unsigned long fflags; /* need 16 bits, actually */ |
1038 | 0 | id[4] = 0; |
1039 | 0 | fr->id3v2.version = major; |
1040 | | /* Pos now advanced after ext head, now a frame has to follow. */ |
1041 | | /* Note: tagpos <= length, which is 28 bit integer, so both */ |
1042 | | /* far away from overflow for adding known small values. */ |
1043 | | /* I want to read at least one full header now. */ |
1044 | 0 | while(length >= tagpos+framebegin) |
1045 | 0 | { |
1046 | 0 | int i = 0; |
1047 | 0 | unsigned long pos = tagpos; |
1048 | | /* level 1,2,3 - 0 is info from lame/info tag! */ |
1049 | | /* rva tags with ascending significance, then general frames */ |
1050 | 0 | enum frame_types tt = unknown; |
1051 | | /* we may have entered the padding zone or any other strangeness: check if we have valid frame id characters */ |
1052 | 0 | for(i=0; i< head_part; ++i) |
1053 | 0 | if( !( ((tagdata[tagpos+i] > 47) && (tagdata[tagpos+i] < 58)) |
1054 | 0 | || ((tagdata[tagpos+i] > 64) && (tagdata[tagpos+i] < 91)) ) ) |
1055 | 0 | { |
1056 | 0 | debug5("ID3v2: real tag data apparently ended after %lu bytes with 0x%02x%02x%02x%02x", tagpos, tagdata[tagpos], tagdata[tagpos+1], tagdata[tagpos+2], tagdata[tagpos+3]); |
1057 | | /* This is no hard error... let's just hope that we got something meaningful already (ret==1 in that case). */ |
1058 | 0 | goto tagparse_cleanup; /* Need to escape two loops here. */ |
1059 | 0 | } |
1060 | 0 | if(ret > 0) |
1061 | 0 | { |
1062 | | /* 4 or 3 bytes id */ |
1063 | 0 | strncpy(id, (char*) tagdata+pos, head_part); |
1064 | 0 | id[head_part] = 0; /* terminate for 3 or 4 bytes */ |
1065 | 0 | pos += head_part; |
1066 | 0 | tagpos += head_part; |
1067 | | /* size as 32 bits or 28 bits */ |
1068 | 0 | if(fr->id3v2.version == 2) threebytes_to_long(tagdata+pos, framesize); |
1069 | 0 | else |
1070 | 0 | if(!bytes_to_long(tagdata+pos, framesize)) |
1071 | 0 | { |
1072 | | /* Just assume that up to now there was some good data. */ |
1073 | 0 | if(NOQUIET) error1("ID3v2: non-syncsafe size of %s frame, skipping the remainder of tag", id); |
1074 | 0 | break; |
1075 | 0 | } |
1076 | 0 | if(VERBOSE3) fprintf(stderr, "Note: ID3v2 %s frame of size %lu\n", id, framesize); |
1077 | 0 | tagpos += head_part; |
1078 | 0 | pos += head_part; |
1079 | 0 | if(fr->id3v2.version > 2) |
1080 | 0 | { |
1081 | 0 | fflags = (((unsigned long) tagdata[pos]) << 8) | ((unsigned long) tagdata[pos+1]); |
1082 | 0 | pos += 2; |
1083 | 0 | tagpos += 2; |
1084 | 0 | } |
1085 | 0 | else fflags = 0; |
1086 | |
|
1087 | 0 | if(length - tagpos < framesize) |
1088 | 0 | { |
1089 | 0 | if(NOQUIET) error("Whoa! ID3v2 frame claims to be larger than the whole rest of the tag."); |
1090 | 0 | break; |
1091 | 0 | } |
1092 | 0 | tagpos += framesize; /* the important advancement in whole tag */ |
1093 | | /* for sanity, after full parsing tagpos should be == pos */ |
1094 | | /* debug4("ID3v2: found %s frame, size %lu (as bytes: 0x%08lx), flags 0x%016lx", id, framesize, framesize, fflags); */ |
1095 | | /* v2.4: %0abc0000 %0h00kmnp */ |
1096 | | /* v2.3: %abc00000 %ijk00000 */ |
1097 | | /* v2.2: just zero */ |
1098 | 0 | #define V3 (major == 3) |
1099 | 0 | #define BAD_FFLAGS (unsigned long) (V3 ? 7967 : 36784) |
1100 | 0 | #define PRES_TAG_FFLAG (unsigned long) (V3 ? 32768 : 16384) |
1101 | 0 | #define PRES_FILE_FFLAG (unsigned long) (V3 ? 16384 : 8192) |
1102 | 0 | #define READ_ONLY_FFLAG (unsigned long) (V3 ? 8192 : 4096) |
1103 | 0 | #define GROUP_FFLAG (unsigned long) (V3 ? 32 : 64) |
1104 | 0 | #define COMPR_FFLAG (unsigned long) (V3 ? 128 : 8) |
1105 | 0 | #define ENCR_FFLAG (unsigned long) (V3 ? 64 : 4) |
1106 | 0 | #define UNSYNC_FFLAG (unsigned long) (V3 ? 0 : 2) |
1107 | 0 | #define DATLEN_FFLAG (unsigned long) (V3 ? 0 : 1) |
1108 | 0 | if(head_part < 4 && promote_framename(fr, id) != 0) continue; |
1109 | | |
1110 | | /* shall not or want not handle these */ |
1111 | 0 | if(fflags & (BAD_FFLAGS | COMPR_FFLAG | ENCR_FFLAG)) |
1112 | 0 | { |
1113 | 0 | if(NOQUIET) warning("ID3v2: skipping invalid/unsupported frame"); |
1114 | 0 | continue; |
1115 | 0 | } |
1116 | | |
1117 | 0 | for(i = 0; i < KNOWN_FRAMES; ++i) |
1118 | 0 | if(!strncmp(frame_type[i], id, 4)){ tt = i; break; } |
1119 | |
|
1120 | 0 | if(id[0] == 'T' && tt != extra) tt = text; |
1121 | |
|
1122 | 0 | if(tt != unknown) |
1123 | 0 | { |
1124 | 0 | int rva_mode = -1; /* mix / album */ |
1125 | 0 | unsigned long realsize = framesize; |
1126 | 0 | unsigned char* realdata = tagdata+pos; |
1127 | 0 | unsigned char* unsyncbuffer = NULL; |
1128 | 0 | if(((flags & UNSYNC_FLAG) || (fflags & UNSYNC_FFLAG)) && framesize > 0) |
1129 | 0 | { |
1130 | 0 | unsigned long ipos = 0; |
1131 | 0 | unsigned long opos = 0; |
1132 | 0 | debug("Id3v2: going to de-unsync the frame data"); |
1133 | | /* de-unsync: FF00 -> FF; real FF00 is simply represented as FF0000 ... */ |
1134 | | /* damn, that means I have to delete bytes from withing the data block... thus need temporal storage */ |
1135 | | /* standard mandates that de-unsync should always be safe if flag is set */ |
1136 | 0 | realdata = unsyncbuffer = malloc(framesize+1); /* will need <= bytes, plus a safety zero */ |
1137 | 0 | if(realdata == NULL) |
1138 | 0 | { |
1139 | 0 | if(NOQUIET) error("ID3v2: unable to allocate working buffer for de-unsync"); |
1140 | 0 | continue; |
1141 | 0 | } |
1142 | | /* now going byte per byte through the data... */ |
1143 | 0 | realdata[0] = tagdata[pos]; |
1144 | 0 | opos = 1; |
1145 | 0 | for(ipos = pos+1; ipos < pos+framesize; ++ipos) |
1146 | 0 | { |
1147 | 0 | if(!((tagdata[ipos] == 0) && (tagdata[ipos-1] == 0xff))) |
1148 | 0 | { |
1149 | 0 | realdata[opos++] = tagdata[ipos]; |
1150 | 0 | } |
1151 | 0 | } |
1152 | 0 | realsize = opos; |
1153 | | /* Append a zero to keep strlen() safe. */ |
1154 | 0 | realdata[realsize] = 0; |
1155 | 0 | debug2("ID3v2: de-unsync made %lu out of %lu bytes", realsize, framesize); |
1156 | 0 | } |
1157 | | // The spec says there is a group byte, without explicitly saying that it is |
1158 | | // the first thing following the header. I just assume so, because of the |
1159 | | // ordering of the flags. |
1160 | 0 | if(fflags & GROUP_FFLAG) |
1161 | 0 | { // Just skip group byte. |
1162 | 0 | if(realsize) |
1163 | 0 | { |
1164 | 0 | if(VERBOSE3) |
1165 | 0 | fprintf(stderr, "Note: frame of group %d\n", realdata[0]); |
1166 | 0 | --realsize; |
1167 | 0 | ++realdata; |
1168 | 0 | } else if(NOQUIET) |
1169 | 0 | error("Grouped frame without group byte, even."); |
1170 | 0 | } |
1171 | 0 | if(fflags & DATLEN_FFLAG) |
1172 | 0 | { |
1173 | | // Spec says the original (without compression or unsync) data length follows, |
1174 | | // so it should match de-unsynced data now. |
1175 | 0 | if(realsize >= 4) |
1176 | 0 | { |
1177 | 0 | unsigned long datlen; |
1178 | 0 | if(bytes_to_long(realdata, datlen) && datlen == realsize-4) |
1179 | 0 | { |
1180 | 0 | realsize -= 4; |
1181 | 0 | realdata += 4; |
1182 | 0 | } else |
1183 | 0 | { |
1184 | 0 | if(NOQUIET) |
1185 | 0 | error("frame data length bad, skipping"); |
1186 | 0 | realsize = 0; |
1187 | 0 | } |
1188 | 0 | } else |
1189 | 0 | { |
1190 | 0 | realsize = 0; |
1191 | 0 | if(NOQUIET) |
1192 | 0 | error("frame truncated at frame data length, skipping"); |
1193 | 0 | } |
1194 | 0 | } |
1195 | 0 | pos = 0; /* now at the beginning again... */ |
1196 | | /* Avoid reading over boundary, even if there is a */ |
1197 | | /* zero byte of padding for safety. */ |
1198 | 0 | if(realsize) switch(tt) |
1199 | 0 | { |
1200 | 0 | case comment: |
1201 | 0 | case uslt: |
1202 | 0 | process_comment(fr, tt, realdata, realsize, comment+1, id); |
1203 | 0 | break; |
1204 | 0 | case extra: /* perhaps foobar2000's work */ |
1205 | 0 | process_extra(fr, realdata, realsize, extra+1, id); |
1206 | 0 | break; |
1207 | 0 | case rva2: /* "the" RVA tag */ |
1208 | 0 | { |
1209 | | /* starts with null-terminated identification */ |
1210 | 0 | if(VERBOSE3) fprintf(stderr, "Note: RVA2 identification \"%s\"\n", realdata); |
1211 | | /* default: some individual value, mix mode */ |
1212 | 0 | rva_mode = 0; |
1213 | 0 | if( !strncasecmp((char*)realdata, "album", 5) |
1214 | 0 | || !strncasecmp((char*)realdata, "audiophile", 10) |
1215 | 0 | || !strncasecmp((char*)realdata, "user", 4)) |
1216 | 0 | rva_mode = 1; |
1217 | 0 | if(fr->rva.level[rva_mode] <= rva2+1) |
1218 | 0 | { |
1219 | 0 | pos += strlen((char*) realdata) + 1; |
1220 | 0 | debug2("got my pos: %lu - %lu", realsize, pos); |
1221 | | // channel and two bytes for RVA value |
1222 | | // pos possibly just past the safety zero, so one more than realsize |
1223 | 0 | if(pos > realsize || realsize-pos < 3) |
1224 | 0 | { |
1225 | 0 | if(NOQUIET) |
1226 | 0 | error("bad RVA2 tag (truncated?)"); |
1227 | 0 | } |
1228 | 0 | else if(realdata[pos] == 1) |
1229 | 0 | { |
1230 | 0 | ++pos; |
1231 | | /* only handle master channel */ |
1232 | 0 | debug("ID3v2: it is for the master channel"); |
1233 | | /* two bytes adjustment, one byte for bits representing peak - n bytes, eh bits, for peak */ |
1234 | | /* 16 bit signed integer = dB * 512. Do not shift signed integers! Multiply instead. |
1235 | | Also no implementation-defined casting. Reinterpret the pointer to signed char, then do |
1236 | | proper casting. */ |
1237 | 0 | fr->rva.gain[rva_mode] = (float) ( |
1238 | 0 | ((short)((signed char*)realdata)[pos]) * 256 + (short)realdata[pos+1] ) / 512; |
1239 | 0 | pos += 2; |
1240 | 0 | if(VERBOSE3) fprintf(stderr, "Note: RVA value %fdB\n", fr->rva.gain[rva_mode]); |
1241 | | /* heh, the peak value is represented by a number of bits - but in what manner? Skipping that part */ |
1242 | 0 | fr->rva.peak[rva_mode] = 0; |
1243 | 0 | fr->rva.level[rva_mode] = rva2+1; |
1244 | 0 | } |
1245 | 0 | } |
1246 | 0 | } |
1247 | 0 | break; |
1248 | | /* non-rva metainfo, simply store... */ |
1249 | 0 | case text: |
1250 | 0 | process_text(fr, realdata, realsize, id); |
1251 | 0 | break; |
1252 | 0 | case picture: |
1253 | 0 | if (fr->p.flags & MPG123_PICTURE) |
1254 | 0 | process_picture(fr, realdata, realsize); |
1255 | |
|
1256 | 0 | break; |
1257 | 0 | default: if(NOQUIET) error1("ID3v2: unknown frame type %i", tt); |
1258 | 0 | } |
1259 | 0 | if(unsyncbuffer) |
1260 | 0 | free(unsyncbuffer); |
1261 | 0 | } |
1262 | 0 | #undef V3 |
1263 | 0 | #undef BAD_FFLAGS |
1264 | 0 | #undef PRES_TAG_FFLAG |
1265 | 0 | #undef PRES_FILE_FFLAG |
1266 | 0 | #undef READ_ONLY_FFLAG |
1267 | 0 | #undef GROUP_FFLAG |
1268 | 0 | #undef COMPR_FFLAG |
1269 | 0 | #undef ENCR_FFLAG |
1270 | 0 | #undef UNSYNC_FFLAG |
1271 | 0 | #undef DATLEN_FFLAG |
1272 | 0 | } |
1273 | 0 | else break; |
1274 | 0 | #undef KNOWN_FRAMES |
1275 | 0 | } |
1276 | 0 | } else |
1277 | 0 | { |
1278 | 0 | if(VERBOSE3) |
1279 | 0 | fprintf(stderr, "Note: faulty ID3v2 tag still clearing old data\n"); |
1280 | 0 | INT123_reset_id3(fr); |
1281 | 0 | } |
1282 | 0 | } else // No new data, but still there was a tag that invalidates old data. |
1283 | 0 | { |
1284 | 0 | if(VERBOSE3) |
1285 | 0 | fprintf(stderr, "Note: empty ID3v2 clearing old data\n"); |
1286 | 0 | INT123_reset_id3(fr); |
1287 | 0 | } |
1288 | 0 | tagparse_cleanup: |
1289 | | /* Get rid of stored raw data that should not be kept. */ |
1290 | 0 | if(!(fr->p.flags & MPG123_STORE_RAW_ID3)) |
1291 | 0 | { |
1292 | 0 | free(fr->id3v2_raw); |
1293 | 0 | fr->id3v2_raw = NULL; |
1294 | 0 | fr->id3v2_size = 0; |
1295 | 0 | } |
1296 | 0 | } |
1297 | 0 | #endif /* NO_ID3V2 */ |
1298 | 0 | return ret; |
1299 | 0 | #undef UNSYNC_FLAG |
1300 | 0 | #undef EXTHEAD_FLAG |
1301 | 0 | #undef COMPRESS_FLAG |
1302 | 0 | #undef EXP_FLAG |
1303 | 0 | #undef FOOTER_FLAG |
1304 | 0 | #undef EXT_UPDATE_FLAG |
1305 | 0 | #undef UNKOWN_FLAGS |
1306 | 0 | } |
1307 | | |
1308 | | #ifndef NO_ID3V2 /* Disabling all the rest... */ |
1309 | | |
1310 | | static void convert_latin1(mpg123_string *sb, const unsigned char* s, size_t l, const int noquiet) |
1311 | 0 | { |
1312 | 0 | size_t length = l; |
1313 | 0 | size_t i; |
1314 | 0 | unsigned char *p; |
1315 | | /* determine real length, a latin1 character can at most take 2 in UTF8 */ |
1316 | 0 | for(i=0; i<l; ++i) |
1317 | 0 | if(s[i] >= 0x80) ++length; |
1318 | |
|
1319 | 0 | debug1("UTF-8 length: %lu", (unsigned long)length); |
1320 | | /* one extra zero byte for paranoia */ |
1321 | 0 | if(!mpg123_grow_string(sb, length+1)) |
1322 | 0 | return; |
1323 | | |
1324 | 0 | p = (unsigned char*) sb->p; /* Signedness doesn't matter but it shows I thought about the non-issue */ |
1325 | 0 | for(i=0; i<l; ++i) |
1326 | 0 | if(s[i] < 0x80){ *p = s[i]; ++p; } |
1327 | 0 | else /* two-byte encoding */ |
1328 | 0 | { |
1329 | 0 | *p = 0xc0 | (s[i]>>6); |
1330 | 0 | *(p+1) = 0x80 | (s[i] & 0x3f); |
1331 | 0 | p+=2; |
1332 | 0 | } |
1333 | |
|
1334 | 0 | sb->p[length] = 0; |
1335 | 0 | sb->fill = length+1; |
1336 | 0 | } |
1337 | | |
1338 | | /* |
1339 | | Check if we have a byte oder mark(s) there, return: |
1340 | | -1: little endian |
1341 | | 0: no BOM |
1342 | | 1: big endian |
1343 | | |
1344 | | This modifies source and len to indicate the data _after_ the BOM(s). |
1345 | | Note on nasty data: The last encountered BOM determines the endianness. |
1346 | | I have seen data with multiple BOMS, namely from "the" id3v2 program. |
1347 | | Not nice, but what should I do? |
1348 | | */ |
1349 | | static int check_bom(const unsigned char** source, size_t *len) |
1350 | 0 | { |
1351 | 0 | int last_bom = 0; |
1352 | |
|
1353 | 0 | while(*len >= 2) |
1354 | 0 | { |
1355 | 0 | int this_bom = 0; |
1356 | 0 | if((*source)[0] == 0xff && (*source)[1] == 0xfe) |
1357 | 0 | this_bom = -1; |
1358 | 0 | if((*source)[0] == 0xfe && (*source)[1] == 0xff) |
1359 | 0 | this_bom = 1; |
1360 | 0 | if(this_bom == 0) |
1361 | 0 | break; |
1362 | | /* Skip the detected BOM. */ |
1363 | 0 | last_bom = this_bom; |
1364 | 0 | *source += 2; |
1365 | 0 | *len -= 2; |
1366 | 0 | } |
1367 | 0 | return last_bom; |
1368 | 0 | } |
1369 | | |
1370 | 0 | #define FULLPOINT(f,s) ( (((f)&0x3ff)<<10) + ((s)&0x3ff) + 0x10000 ) |
1371 | | /* Remember: There's a limit at 0x1ffff. */ |
1372 | 0 | #define UTF8LEN(x) ( (x)<0x80 ? 1 : ((x)<0x800 ? 2 : ((x)<0x10000 ? 3 : 4))) |
1373 | | static void convert_utf16bom(mpg123_string *sb, const unsigned char* s, size_t l, const int noquiet) |
1374 | 0 | { |
1375 | 0 | size_t i; |
1376 | 0 | size_t n; /* number bytes that make up full pairs */ |
1377 | 0 | unsigned char *p; |
1378 | 0 | size_t length = 0; /* the resulting UTF-8 length */ |
1379 | | /* Determine real length... extreme case can be more than utf-16 length. */ |
1380 | 0 | size_t high = 0; |
1381 | 0 | size_t low = 1; |
1382 | 0 | int bom_endian; |
1383 | |
|
1384 | 0 | debug1("convert_utf16 with length %lu", (unsigned long)l); |
1385 | |
|
1386 | 0 | bom_endian = check_bom(&s, &l); |
1387 | 0 | debug1("UTF16 endianness check: %i", bom_endian); |
1388 | |
|
1389 | 0 | if(bom_endian == -1) /* little-endian */ |
1390 | 0 | { |
1391 | 0 | high = 1; /* The second byte is the high byte. */ |
1392 | 0 | low = 0; /* The first byte is the low byte. */ |
1393 | 0 | } |
1394 | |
|
1395 | 0 | n = (l/2)*2; /* number bytes that make up full pairs */ |
1396 | | |
1397 | | /* first: get length, check for errors -- stop at first one */ |
1398 | 0 | for(i=0; i < n; i+=2) |
1399 | 0 | { |
1400 | 0 | unsigned long point = ((unsigned long) s[i+high]<<8) + s[i+low]; |
1401 | 0 | if((point & 0xfc00) == 0xd800) /* lead surrogate */ |
1402 | 0 | { |
1403 | 0 | unsigned short second = (i+3 < l) ? (s[i+2+high]<<8) + s[i+2+low] : 0; |
1404 | 0 | if((second & 0xfc00) == 0xdc00) /* good... */ |
1405 | 0 | { |
1406 | 0 | point = FULLPOINT(point,second); |
1407 | 0 | length += UTF8LEN(point); /* possibly 4 bytes */ |
1408 | 0 | i+=2; /* We overstepped one word. */ |
1409 | 0 | } |
1410 | 0 | else /* if no valid pair, break here */ |
1411 | 0 | { |
1412 | 0 | if(noquiet) error2("Invalid UTF16 surrogate pair at %li (0x%04lx).", (unsigned long)i, point); |
1413 | 0 | n = i; /* Forget the half pair, END! */ |
1414 | 0 | break; |
1415 | 0 | } |
1416 | 0 | } |
1417 | 0 | else length += UTF8LEN(point); /* 1,2 or 3 bytes */ |
1418 | 0 | } |
1419 | |
|
1420 | 0 | if(!mpg123_grow_string(sb, length+1)) |
1421 | 0 | return; |
1422 | | |
1423 | | /* Now really convert, skip checks as these have been done just before. */ |
1424 | 0 | p = (unsigned char*) sb->p; /* Signedness doesn't matter but it shows I thought about the non-issue */ |
1425 | 0 | for(i=0; i < n; i+=2) |
1426 | 0 | { |
1427 | 0 | unsigned long codepoint = ((unsigned long) s[i+high]<<8) + s[i+low]; |
1428 | 0 | if((codepoint & 0xfc00) == 0xd800) /* lead surrogate */ |
1429 | 0 | { |
1430 | 0 | unsigned short second = (s[i+2+high]<<8) + s[i+2+low]; |
1431 | 0 | codepoint = FULLPOINT(codepoint,second); |
1432 | 0 | i+=2; /* We overstepped one word. */ |
1433 | 0 | } |
1434 | 0 | if(codepoint < 0x80) *p++ = (unsigned char) codepoint; |
1435 | 0 | else if(codepoint < 0x800) |
1436 | 0 | { |
1437 | 0 | *p++ = (unsigned char) (0xc0 | (codepoint>>6)); |
1438 | 0 | *p++ = (unsigned char) (0x80 | (codepoint & 0x3f)); |
1439 | 0 | } |
1440 | 0 | else if(codepoint < 0x10000) |
1441 | 0 | { |
1442 | 0 | *p++ = (unsigned char) (0xe0 | (codepoint>>12)); |
1443 | 0 | *p++ = 0x80 | ((codepoint>>6) & 0x3f); |
1444 | 0 | *p++ = 0x80 | (codepoint & 0x3f); |
1445 | 0 | } |
1446 | 0 | else if (codepoint < 0x200000) |
1447 | 0 | { |
1448 | 0 | *p++ = (unsigned char) (0xf0 | codepoint>>18); |
1449 | 0 | *p++ = (unsigned char) (0x80 | ((codepoint>>12) & 0x3f)); |
1450 | 0 | *p++ = (unsigned char) (0x80 | ((codepoint>>6) & 0x3f)); |
1451 | 0 | *p++ = (unsigned char) (0x80 | (codepoint & 0x3f)); |
1452 | 0 | } /* ignore bigger ones (that are not possible here anyway) */ |
1453 | 0 | } |
1454 | 0 | sb->p[sb->size-1] = 0; /* paranoia... */ |
1455 | 0 | sb->fill = sb->size; |
1456 | 0 | } |
1457 | | #undef UTF8LEN |
1458 | | #undef FULLPOINT |
1459 | | |
1460 | | static void convert_utf8(mpg123_string *sb, const unsigned char* source, size_t len, const int noquiet) |
1461 | 0 | { |
1462 | 0 | if(mpg123_grow_string(sb, len+1)) |
1463 | 0 | { |
1464 | 0 | memcpy(sb->p, source, len); |
1465 | 0 | sb->p[len] = 0; |
1466 | 0 | sb->fill = len+1; |
1467 | 0 | } |
1468 | 0 | } |
1469 | | |
1470 | | #endif |