Coverage Report

Created: 2025-11-11 06:56

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/yara/libyara/base64.c
Line
Count
Source
1
/*
2
Copyright (c) 2020. The YARA Authors. All Rights Reserved.
3
4
Redistribution and use in source and binary forms, with or without modification,
5
are permitted provided that the following conditions are met:
6
7
1. Redistributions of source code must retain the above copyright notice, this
8
list of conditions and the following disclaimer.
9
10
2. Redistributions in binary form must reproduce the above copyright notice,
11
this list of conditions and the following disclaimer in the documentation and/or
12
other materials provided with the distribution.
13
14
3. Neither the name of the copyright holder nor the names of its contributors
15
may be used to endorse or promote products derived from this software without
16
specific prior written permission.
17
18
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
22
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
25
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
*/
29
30
#include <string.h>
31
#include <yara/base64.h>
32
#include <yara/error.h>
33
#include <yara/mem.h>
34
#include <yara/re.h>
35
#include <yara/sizedstr.h>
36
37
////////////////////////////////////////////////////////////////////////////////
38
// Given a pointer to a SIZED_STRING append 0, 1 or 2 bytes and base64 encode
39
// the string. The number of padding bytes is returned in "pad" and the caller
40
// is expected to trim the appropriate number of leading and trailing bytes.
41
//
42
// This is based upon the ideas at:
43
// https://www.leeholmes.com/searching-for-content-in-base-64-strings/
44
//
45
// The caller is responsible for freeing the returned string.
46
//
47
static SIZED_STRING* _yr_modified_base64_encode(
48
    SIZED_STRING* in,
49
    SIZED_STRING* alphabet,
50
    int i,
51
    int* pad)
52
7.04k
{
53
7.04k
  uint8_t* src = (uint8_t*) in->c_string;
54
7.04k
  size_t len = in->length;
55
7.04k
  SIZED_STRING* out;
56
7.04k
  uint8_t* p;
57
7.04k
  uint8_t* end;
58
7.04k
  char* alphabet_str = alphabet->c_string;
59
7.04k
  uint8_t* tmp;
60
7.04k
  int j;
61
62
7.04k
  *pad = ((i + len) % 3) ? 3 - ((i + len) % 3) : 0;
63
64
  // Add "i" for the number of prepended bytes.
65
7.04k
  out = (SIZED_STRING*) yr_malloc(
66
7.04k
      sizeof(SIZED_STRING) + i + ((len * 4 + 3) / 3) + *pad);
67
68
7.04k
  if (out == NULL)
69
0
    return NULL;
70
71
7.04k
  tmp = (uint8_t*) yr_malloc(sizeof(uint8_t) * (len + i));
72
7.04k
  if (tmp == NULL)
73
0
  {
74
0
    yr_free(out);
75
0
    return NULL;
76
0
  }
77
78
  // Prepend appropriate number of bytes and copy remaining input bytes into
79
  // temporary buffer.
80
14.0k
  for (j = 0; j < i; j++) tmp[j] = 'A';
81
82
7.04k
  memcpy(tmp + j, src, len);
83
7.04k
  src = tmp;
84
85
7.04k
  p = (uint8_t*) out->c_string;
86
7.04k
  end = src + len + j;
87
88
307k
  while (end - src >= 3)
89
300k
  {
90
300k
    *p++ = alphabet_str[src[0] >> 2];
91
300k
    *p++ = alphabet_str[((src[0] & 0x03) << 4 | src[1] >> 4)];
92
300k
    *p++ = alphabet_str[((src[1] & 0x0f) << 2 | (src[2] >> 6))];
93
300k
    *p++ = alphabet_str[src[2] & 0x3f];
94
300k
    src += 3;
95
300k
  }
96
97
  // Handle remaining bytes and padding.
98
7.04k
  if (end - src)
99
4.23k
  {
100
4.23k
    *p++ = alphabet_str[src[0] >> 2];
101
4.23k
    if (end - src == 1)
102
2.81k
    {
103
2.81k
      *p++ = alphabet_str[(src[0] & 0x03) << 4];
104
2.81k
      *p++ = '=';
105
2.81k
    }
106
1.42k
    else
107
1.42k
    {
108
1.42k
      *p++ = alphabet_str[((src[0] & 0x03) << 4 | src[1] >> 4)];
109
1.42k
      *p++ = alphabet_str[(src[1] & 0x0f) << 2];
110
1.42k
    }
111
4.23k
    *p++ = '=';
112
4.23k
  }
113
114
7.04k
  yr_free(tmp);
115
7.04k
  out->length = (uint32_t)(p - (uint8_t*) out->c_string);
116
117
7.04k
  return out;
118
7.04k
}
119
120
////////////////////////////////////////////////////////////////////////////////
121
// Given a base64 encoded string, return a new string with leading and trailing
122
// bytes stripped appropriately. The number of leading bytes to skip is always
123
// (i + 1) or zero when no leading bytes are added and the number of trailing
124
// bytes is always (pad + 1) or zero when pad is zero. Also, convert the final
125
// string to wide if desired.
126
//
127
// Note: This implementation assumes you only prepend 0, 1 or 2 bytes.
128
//
129
static SIZED_STRING* _yr_base64_get_base64_substring(
130
    SIZED_STRING* encoded_str,
131
    int wide,
132
    int i,
133
    int pad)
134
7.04k
{
135
7.04k
  SIZED_STRING* new_str;
136
7.04k
  SIZED_STRING* final_str;
137
7.04k
  char* start;
138
7.04k
  uint32_t length;
139
7.04k
  int trailing;
140
7.04k
  int leading;
141
142
7.04k
  trailing = pad ? pad + 1 : 0;
143
7.04k
  leading = i ? i + 1 : 0;
144
145
7.04k
  length = encoded_str->length - (leading + trailing);
146
147
7.04k
  new_str = (SIZED_STRING*) yr_malloc(sizeof(SIZED_STRING) + length);
148
149
7.04k
  if (new_str == NULL)
150
0
    return NULL;
151
152
7.04k
  start = encoded_str->c_string + leading;
153
154
7.04k
  memcpy(new_str->c_string, start, length);
155
156
7.04k
  new_str->length = length;
157
7.04k
  new_str->c_string[length] = '\0';
158
159
7.04k
  if (wide)
160
3.70k
  {
161
3.70k
    final_str = ss_convert_to_wide(new_str);
162
3.70k
    yr_free(new_str);
163
3.70k
  }
164
3.34k
  else
165
3.34k
  {
166
3.34k
    final_str = new_str;
167
3.34k
  }
168
169
7.04k
  return final_str;
170
7.04k
}
171
172
// RE metacharacters which need to be escaped when generating the final RE.
173
#define IS_METACHAR(x)                                                      \
174
4.30M
  (x == '\\' || x == '^' || x == '$' || x == '|' || x == '(' || x == ')' || \
175
4.30M
   x == '[' || x == ']' || x == '*' || x == '?' || x == '{' || x == ',' ||  \
176
4.30M
   x == '.' || x == '+' || x == '}')
177
178
////////////////////////////////////////////////////////////////////////////////
179
// Given a SIZED_STRING return the number of characters which will need to be
180
// escaped when generating the final string to pass to the regexp compiler.
181
//
182
static int _yr_base64_count_escaped(SIZED_STRING* str)
183
7.04k
{
184
7.04k
  int c = 0;
185
186
2.16M
  for (uint32_t i = 0; i < str->length; i++)
187
2.15M
  {
188
    // We must be careful to escape null bytes because they break the RE lexer.
189
2.15M
    if (IS_METACHAR(str->c_string[i]))
190
28.6k
      c++;
191
2.12M
    else if (str->c_string[i] == '\x00')
192
959k
      c += 4;
193
2.15M
  }
194
195
7.04k
  return c;
196
7.04k
}
197
198
////////////////////////////////////////////////////////////////////////////////
199
// Create nodes representing the different encodings of a base64 string.
200
//
201
static int _yr_base64_create_nodes(
202
    SIZED_STRING* str,
203
    SIZED_STRING* alphabet,
204
    int wide,
205
    BASE64_NODE** head,
206
    BASE64_NODE** tail)
207
2.81k
{
208
2.81k
  SIZED_STRING* encoded_str;
209
2.81k
  SIZED_STRING* final_str;
210
2.81k
  BASE64_NODE* node;
211
212
2.81k
  int pad;
213
214
11.2k
  for (int i = 0; i <= 2; i++)
215
8.43k
  {
216
8.43k
    if (i == 1 && str->length == 1)
217
1.39k
      continue;
218
219
7.04k
    node = (BASE64_NODE*) yr_malloc(sizeof(BASE64_NODE));
220
7.04k
    if (node == NULL)
221
0
      return ERROR_INSUFFICIENT_MEMORY;
222
223
7.04k
    FAIL_ON_NULL_WITH_CLEANUP(
224
7.04k
        encoded_str = _yr_modified_base64_encode(str, alphabet, i, &pad),
225
7.04k
        yr_free(node));
226
227
    // Now take the encoded string and strip the bytes which are affected by
228
    // the leading and trailing bytes of the plaintext.
229
7.04k
    FAIL_ON_NULL_WITH_CLEANUP(
230
7.04k
        final_str = _yr_base64_get_base64_substring(encoded_str, wide, i, pad),
231
7.04k
        {
232
7.04k
          yr_free(encoded_str);
233
7.04k
          yr_free(node);
234
7.04k
        });
235
236
7.04k
    yr_free(encoded_str);
237
238
7.04k
    node->str = final_str;
239
7.04k
    node->escaped = _yr_base64_count_escaped(node->str);
240
7.04k
    node->next = NULL;
241
242
7.04k
    if (*head == NULL)
243
1.92k
      *head = node;
244
245
7.04k
    if (*tail == NULL)
246
1.92k
    {
247
1.92k
      *tail = node;
248
1.92k
    }
249
5.12k
    else
250
5.12k
    {
251
5.12k
      (*tail)->next = node;
252
5.12k
      *tail = node;
253
5.12k
    }
254
7.04k
  }
255
256
2.81k
  return ERROR_SUCCESS;
257
2.81k
}
258
259
////////////////////////////////////////////////////////////////////////////////
260
// Useful for printing the encoded strings.
261
//
262
void _yr_base64_print_nodes(BASE64_NODE* head)
263
0
{
264
0
  BASE64_NODE* p = head;
265
266
0
  while (p != NULL)
267
0
  {
268
0
    for (size_t i = 0; i < p->str->length; i++)
269
0
    {
270
0
      if (p->str->c_string[i] >= 32 && p->str->c_string[i] <= 126)
271
0
        printf("%c", p->str->c_string[i]);
272
0
      else
273
0
        printf("\\x%02x", p->str->c_string[i]);
274
0
    }
275
0
    printf("\n");
276
277
0
    p = p->next;
278
0
  }
279
0
}
280
281
////////////////////////////////////////////////////////////////////////////////
282
// Destroy a list of base64 nodes.
283
//
284
static void _yr_base64_destroy_nodes(BASE64_NODE* head)
285
1.92k
{
286
1.92k
  BASE64_NODE* p = head;
287
1.92k
  BASE64_NODE* next;
288
289
8.97k
  while (p != NULL)
290
7.04k
  {
291
7.04k
    yr_free(p->str);
292
7.04k
    next = p->next;
293
7.04k
    yr_free(p);
294
7.04k
    p = next;
295
7.04k
  }
296
1.92k
}
297
298
////////////////////////////////////////////////////////////////////////////////
299
// Create the regexp that is the alternatives of each of the strings collected
300
// in the BASE64_NODE list.
301
//
302
int _yr_base64_create_regexp(
303
    BASE64_NODE* head,
304
    RE_AST** re_ast,
305
    RE_ERROR* re_error)
306
1.92k
{
307
1.92k
  BASE64_NODE* p = head;
308
1.92k
  char* re_str;
309
1.92k
  char* s;
310
1.92k
  uint32_t length = 0;
311
312
  // The number of nodes in the list, used to know how many '|'.
313
1.92k
  uint32_t c = 0;
314
315
8.97k
  while (p != NULL)
316
7.04k
  {
317
7.04k
    length += (p->str->length + p->escaped);
318
7.04k
    c++;
319
7.04k
    p = p->next;
320
7.04k
  }
321
322
1.92k
  if (c == 0)
323
0
    return ERROR_INSUFFICIENT_MEMORY;
324
325
  // Make sure to include '(' and ')'.
326
  // The number of '|' is number of nodes - 1.
327
1.92k
  re_str = (char*) yr_malloc(length + 2 + (c - 1) + 1);
328
1.92k
  if (re_str == NULL)
329
0
    return ERROR_INSUFFICIENT_MEMORY;
330
331
1.92k
  s = re_str;
332
1.92k
  p = head;
333
1.92k
  *s++ = '(';
334
8.97k
  while (p != NULL)
335
7.04k
  {
336
2.16M
    for (uint32_t i = 0; i < p->str->length; i++)
337
2.15M
    {
338
2.15M
      if (IS_METACHAR(p->str->c_string[i]))
339
28.6k
        *s++ = '\\';
340
341
2.15M
      if (p->str->c_string[i] == '\x00')
342
959k
      {
343
959k
        *s++ = '\\';
344
959k
        *s++ = 'x';
345
959k
        *s++ = '0';
346
959k
        *s++ = '0';
347
959k
      }
348
1.19M
      else
349
1.19M
        *s++ = p->str->c_string[i];
350
2.15M
    }
351
352
7.04k
    if (p->next != NULL)
353
5.12k
      *s++ = '|';
354
355
7.04k
    p = p->next;
356
7.04k
  }
357
1.92k
  *s++ = ')';
358
1.92k
  *s = '\x00';
359
360
  // Useful for debugging as long as the string has no NULL bytes in it. ;)
361
  // printf("%s\n", re_str);
362
363
1.92k
  FAIL_ON_ERROR_WITH_CLEANUP(
364
1.92k
      yr_re_parse(re_str, re_ast, re_error, RE_PARSER_FLAG_NONE), yr_free(re_str));
365
366
1.92k
  yr_free(re_str);
367
368
1.92k
  return ERROR_SUCCESS;
369
1.92k
}
370
371
////////////////////////////////////////////////////////////////////////////////
372
// Given a string and an alphabet, generate the RE_AST suitable for representing
373
// the different encodings of the string. This means we generate
374
// "(ABCD|EFGH|IJKL)" and must be careful to escape any special characters as
375
// a result of the base64 encoding.
376
//
377
// This uses ideas from:
378
// https://www.leeholmes.com/searching-for-content-in-base-64-strings/
379
//
380
// This does not emit the code for the RE. A further call to yr_re_ast_emit_code
381
// is required to get the code.
382
//
383
int yr_base64_ast_from_string(
384
    SIZED_STRING* in_str,
385
    YR_MODIFIER modifier,
386
    RE_AST** re_ast,
387
    RE_ERROR* error)
388
1.92k
{
389
1.92k
  BASE64_NODE* head = NULL;
390
1.92k
  BASE64_NODE* tail = NULL;
391
1.92k
  SIZED_STRING* wide_str;
392
393
1.92k
  if (modifier.flags & STRING_FLAGS_WIDE)
394
527
  {
395
527
    wide_str = ss_convert_to_wide(in_str);
396
397
527
    if (modifier.flags & STRING_FLAGS_BASE64)
398
170
    {
399
170
      FAIL_ON_ERROR_WITH_CLEANUP(
400
170
          _yr_base64_create_nodes(wide_str, modifier.alphabet, 0, &head, &tail),
401
170
          {  // Cleanup
402
170
            strcpy(error->message, "Failure encoding base64 wide string");
403
170
            yr_free(wide_str);
404
170
            _yr_base64_destroy_nodes(head);
405
170
          });
406
170
    }
407
408
527
    if (modifier.flags & STRING_FLAGS_BASE64_WIDE)
409
357
    {
410
357
      FAIL_ON_ERROR_WITH_CLEANUP(
411
357
          _yr_base64_create_nodes(wide_str, modifier.alphabet, 1, &head, &tail),
412
357
          {  // Cleanup
413
357
            strcpy(error->message, "Failure encoding base64wide wide string");
414
357
            yr_free(wide_str);
415
357
            _yr_base64_destroy_nodes(head);
416
357
          });
417
357
    }
418
419
527
    yr_free(wide_str);
420
527
  }
421
422
1.92k
  if (modifier.flags & STRING_FLAGS_ASCII)
423
102
  {
424
102
    if (modifier.flags & STRING_FLAGS_BASE64)
425
49
    {
426
49
      FAIL_ON_ERROR_WITH_CLEANUP(
427
49
          _yr_base64_create_nodes(in_str, modifier.alphabet, 0, &head, &tail),
428
49
          {  // Cleanup
429
49
            strcpy(error->message, "Failure encoding base64 ascii string");
430
49
            _yr_base64_destroy_nodes(head);
431
49
          });
432
49
    }
433
434
102
    if (modifier.flags & STRING_FLAGS_BASE64_WIDE)
435
53
    {
436
53
      FAIL_ON_ERROR_WITH_CLEANUP(
437
53
          _yr_base64_create_nodes(in_str, modifier.alphabet, 1, &head, &tail),
438
53
          {  // Cleanup
439
53
            strcpy(error->message, "Failure encoding base64wide ascii string");
440
53
            _yr_base64_destroy_nodes(head);
441
53
          });
442
53
    }
443
102
  }
444
445
1.92k
  if (!(modifier.flags & STRING_FLAGS_WIDE) &&
446
1.40k
      !(modifier.flags & STRING_FLAGS_ASCII))
447
1.30k
  {
448
1.30k
    if (modifier.flags & STRING_FLAGS_BASE64)
449
1.14k
    {
450
1.14k
      FAIL_ON_ERROR_WITH_CLEANUP(
451
1.14k
          _yr_base64_create_nodes(in_str, modifier.alphabet, 0, &head, &tail),
452
1.14k
          {  // Cleanup
453
1.14k
            strcpy(error->message, "Failure encoding base64 string");
454
1.14k
            _yr_base64_destroy_nodes(head);
455
1.14k
          });
456
1.14k
    }
457
458
1.30k
    if (modifier.flags & STRING_FLAGS_BASE64_WIDE)
459
1.04k
    {
460
1.04k
      FAIL_ON_ERROR_WITH_CLEANUP(
461
1.04k
          _yr_base64_create_nodes(in_str, modifier.alphabet, 1, &head, &tail),
462
1.04k
          {  // Cleanup
463
1.04k
            strcpy(error->message, "Failure encoding base64wide string");
464
1.04k
            _yr_base64_destroy_nodes(head);
465
1.04k
          });
466
1.04k
    }
467
1.30k
  }
468
469
  // Useful for printing the contents of the nodes, to make sure they were
470
  // encoded and stripped properly.
471
  //_yr_base64_print_nodes(head);
472
473
  // Create the final regex string to be parsed from all the nodes.
474
  // Error message is filled in by the caller in case of failure.
475
1.92k
  FAIL_ON_ERROR_WITH_CLEANUP(
476
1.92k
      _yr_base64_create_regexp(head, re_ast, error),
477
1.92k
      _yr_base64_destroy_nodes(head));
478
479
1.92k
  _yr_base64_destroy_nodes(head);
480
481
1.92k
  return ERROR_SUCCESS;
482
1.92k
}