Coverage Report

Created: 2025-10-10 06:07

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/opensips/re.c
Line
Count
Source
1
/*
2
 * regexp and regexp substitutions implementations
3
 *
4
 * Copyright (C) 2001-2003 FhG Fokus
5
 *
6
 * This file is part of opensips, a free SIP server.
7
 *
8
 * opensips is free software; you can redistribute it and/or modify
9
 * it under the terms of the GNU General Public License as published by
10
 * the Free Software Foundation; either version 2 of the License, or
11
 * (at your option) any later version
12
 *
13
 * opensips is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
 * GNU General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU General Public License
19
 * along with this program; if not, write to the Free Software
20
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
21
 *
22
 *
23
 * History:
24
 * --------
25
 *   2003-08-04  created by andrei
26
 *   2004-11-12  minor api extension, added *count (andrei)
27
 *   2007-07-27  split function for parsing of replacing string (ancuta)
28
 */
29
30
/*!
31
 * \file
32
 * \brief Regexp and regexp substitutions implementations
33
 */
34
35
36
37
#include "dprint.h"
38
#include "mem/mem.h"
39
#include "re.h"
40
41
#include <string.h>
42
43
44
45
void subst_expr_free(struct subst_expr* se)
46
0
{
47
0
  int i;
48
49
0
  if (se->replacement.s) pkg_free(se->replacement.s);
50
0
  if (se->re) { regfree(se->re); pkg_free(se->re); };
51
0
  for (i = 0; i < se->n_escapes; i++)
52
0
    if (se->replace[i].type == REPLACE_SPEC
53
0
        && se->replace[i].u.spec.pvp.pvi.type == PV_IDX_PVAR)
54
0
            pv_spec_free(se->replace[i].u.spec.pvp.pvi.u.dval);
55
0
  pkg_free(se);
56
0
}
57
58
59
60
/*! \brief frees the entire list, head (l) too */
61
void replace_lst_free(struct replace_lst* l)
62
0
{
63
0
  struct replace_lst* t;
64
65
0
  while (l){
66
0
    t=l;
67
0
    l=l->next;
68
0
    if (t->rpl.s) pkg_free(t->rpl.s);
69
0
    pkg_free(t);
70
0
  }
71
0
}
72
73
0
#define MAX_REPLACE_WITH 100
74
int parse_repl(struct replace_with * rw, char ** begin,
75
        char * end, int *max_token_nb, int with_sep)
76
0
{
77
78
0
  char* p0;
79
0
  char * repl;
80
0
  str s;
81
0
  int token_nb;
82
0
  int escape;
83
0
  int max_pmatch;
84
0
  char *p, c;
85
86
  /* parse replacement */
87
0
  p = *begin;
88
0
  c = *p;
89
0
  if(with_sep)
90
0
    p++;
91
0
  repl= p;
92
0
  token_nb=0;
93
0
  max_pmatch=0;
94
0
  escape=0;
95
0
  for(;p<end; p++){
96
0
    if (escape){
97
0
      escape=0;
98
0
      switch (*p){
99
        /* special char escapes */
100
0
        case '\\':
101
0
          rw[token_nb].size=2;
102
0
          rw[token_nb].offset=(p-1)-repl;
103
0
          rw[token_nb].type=REPLACE_CHAR;
104
0
          rw[token_nb].u.c='\\';
105
0
          break;
106
0
        case 'n':
107
0
          rw[token_nb].size=2;
108
0
          rw[token_nb].offset=(p-1)-repl;
109
0
          rw[token_nb].type=REPLACE_CHAR;
110
0
          rw[token_nb].u.c='\n';
111
0
          break;
112
0
        case 'r':
113
0
          rw[token_nb].size=2;
114
0
          rw[token_nb].offset=(p-1)-repl;
115
0
          rw[token_nb].type=REPLACE_CHAR;
116
0
          rw[token_nb].u.c='\r';
117
0
          break;
118
0
        case 't':
119
0
          rw[token_nb].size=2;
120
0
          rw[token_nb].offset=(p-1)-repl;
121
0
          rw[token_nb].type=REPLACE_CHAR;
122
0
          rw[token_nb].u.c='\t';
123
0
          break;
124
0
        case PV_MARKER:
125
0
          rw[token_nb].size=2;
126
0
          rw[token_nb].offset=(p-1)-repl;
127
0
          rw[token_nb].type=REPLACE_CHAR;
128
0
          rw[token_nb].u.c=PV_MARKER;
129
0
          break;
130
        /* special sip msg parts escapes */
131
0
        case 'u':
132
0
          rw[token_nb].size=2;
133
0
          rw[token_nb].offset=(p-1)-repl;
134
0
          rw[token_nb].type=REPLACE_URI;
135
0
          break;
136
        /* re matches */
137
0
        case '0': /* allow 0, too, reference to the whole match */
138
0
        case '1':
139
0
        case '2':
140
0
        case '3':
141
0
        case '4':
142
0
        case '5':
143
0
        case '6':
144
0
        case '7':
145
0
        case '8':
146
0
        case '9':
147
0
          rw[token_nb].size=2;
148
0
          rw[token_nb].offset=(p-1)-repl;
149
0
          rw[token_nb].type=REPLACE_NMATCH;
150
0
          rw[token_nb].u.nmatch=(*p)-'0';
151
                /* 0 is the whole matched str*/
152
0
          if (max_pmatch<rw[token_nb].u.nmatch)
153
0
            max_pmatch=rw[token_nb].u.nmatch;
154
0
          break;
155
0
        default: /* just print current char */
156
0
          if (*p!=c){
157
0
            LM_WARN("\\%c unknown escape in %s\n", *p, *begin);
158
0
          }
159
0
          rw[token_nb].size=2;
160
0
          rw[token_nb].offset=(p-1)-repl;
161
0
          rw[token_nb].type=REPLACE_CHAR;
162
0
          rw[token_nb].u.c=*p;
163
0
          break;
164
0
      }
165
166
0
      token_nb++;
167
168
0
      if (token_nb>=MAX_REPLACE_WITH){
169
0
        LM_ERR("too many escapes in the replace part %s\n", *begin);
170
0
        goto error;
171
0
      }
172
0
    }else if (*p=='\\') {
173
0
      escape=1;
174
0
    }else if (*p==PV_MARKER) {
175
0
      s.s = p;
176
0
      s.len = end - s.s;
177
0
      p0 = pv_parse_spec(&s, &rw[token_nb].u.spec);
178
0
      if(p0==NULL)
179
0
      {
180
0
        LM_ERR("bad specifier in replace part %s\n", *begin);
181
0
        goto error;
182
0
      }
183
0
      rw[token_nb].size=p0-p;
184
0
      rw[token_nb].offset=p-repl;
185
0
      rw[token_nb].type=REPLACE_SPEC;
186
0
      token_nb++;
187
0
      p=p0-1;
188
0
    }else  if (*p==c && with_sep){
189
0
        goto found_repl;
190
0
    }
191
0
  }
192
0
  if(with_sep){
193
194
0
    LM_ERR("missing separator: %s\n", *begin);
195
0
    goto error;
196
0
  }
197
198
0
found_repl:
199
200
0
  *max_token_nb = max_pmatch;
201
0
  *begin = p;
202
0
  return token_nb;
203
204
0
error:
205
0
  return -1;
206
0
}
207
208
209
/*! \brief Parse a /regular expression/replacement/flags into a subst_expr structure
210
 */
211
struct subst_expr* subst_parser(str* subst)
212
0
{
213
0
  char c;
214
0
  char* end;
215
0
  char* p;
216
0
  char* re;
217
0
  char* re_end;
218
0
  char* repl;
219
0
  char* repl_end;
220
0
  struct replace_with rw[MAX_REPLACE_WITH];
221
0
  int rw_no;
222
  //int escape;
223
0
  int cflags; /* regcomp flags */
224
0
  int replace_all;
225
0
  struct subst_expr* se;
226
0
  regex_t* regex;
227
0
  int max_pmatch;
228
0
  int r;
229
230
  /* init */
231
0
  se=0;
232
0
  regex=0;
233
0
  cflags=REG_EXTENDED  | REG_NEWLINE; /* don't match newline */
234
0
  replace_all=0;
235
0
  if (subst->len<3){
236
0
    LM_ERR("expression is too short: %.*s\n", subst->len, subst->s);
237
0
    goto error;
238
0
  }
239
240
0
  p=subst->s;
241
0
  end=subst->s+subst->len;
242
243
0
  c=*p;
244
0
  if (c=='\\'){
245
0
    LM_ERR("invalid separator char <%c> in %.*s\n", c,
246
0
        subst->len, subst->s);
247
0
    goto error;
248
0
  }
249
0
  p++;
250
251
  /* find re */
252
0
  re=p;
253
0
  for (;p<end;p++){
254
    /* if unescaped sep. char */
255
0
    if ((*p==c) && (*(p-1)!='\\')) goto found_re;
256
0
  }
257
0
  LM_ERR("no separator found: %.*s\n", subst->len, subst->s);
258
0
  goto error;
259
0
found_re:
260
0
  re_end=p;
261
0
  if(end< (p+2) ){
262
0
    LM_ERR("string too short\n");
263
0
    goto error;
264
0
  }
265
0
  repl=p+1;
266
0
  if((rw_no = parse_repl(rw, &p, end, &max_pmatch, WITH_SEP))< 0)
267
0
    goto error;
268
269
270
0
  repl_end=p;
271
0
  p++;
272
  /* parse flags */
273
0
  for(;p<end; p++){
274
0
    switch(*p){
275
0
      case 'i':
276
0
        cflags|=REG_ICASE;
277
0
        break;
278
0
      case 's':
279
0
        cflags&=(~REG_NEWLINE);
280
0
        break;
281
0
      case 'g':
282
0
        replace_all=1;
283
0
        break;
284
0
      default:
285
0
        LM_ERR("unknown flag %c in %.*s\n", *p, subst->len, subst->s);
286
0
        goto error;
287
0
    }
288
0
  }
289
290
  /* compile the re */
291
0
  if ((regex=pkg_malloc(sizeof(regex_t)))==0){
292
0
    LM_ERR("out of pkg memory (re)\n");
293
0
    goto error;
294
0
  }
295
0
  c=*re_end; /* regcomp expects null terminated strings -- save */
296
0
  *re_end=0;
297
0
  if (regcomp(regex, re, cflags)!=0){
298
0
    *re_end=c; /* restore */
299
0
    LM_ERR("bad regular expression %.*s in %.*s\n",
300
0
        (int)(re_end-re), re, subst->len, subst->s);
301
0
    goto error;
302
0
  }
303
0
  *re_end=c; /* restore */
304
  /* construct the subst_expr structure */
305
0
  se=pkg_malloc(sizeof(struct subst_expr)+
306
0
          ((rw_no)?(rw_no-1)*sizeof(struct replace_with):0));
307
    /* 1 replace_with structure is  already included in subst_expr */
308
0
  if (se==0){
309
0
    LM_ERR("out of pkg memory (subst_expr)\n");
310
0
    goto error;
311
0
  }
312
0
  memset((void*)se, 0, sizeof(struct subst_expr));
313
314
0
  se->replacement.len=repl_end-repl;
315
0
  if ((se->replacement.s=pkg_malloc(se->replacement.len))==0){
316
0
    LM_ERR("out of pkg memory (replacement)\n");
317
0
    goto error;
318
0
  }
319
320
  /* start copying */
321
0
  memcpy(se->replacement.s, repl, se->replacement.len);
322
0
  se->re=regex;
323
0
  se->replace_all=replace_all;
324
0
  se->n_escapes=rw_no;
325
0
  se->max_pmatch=max_pmatch;
326
0
  for (r=0; r<rw_no; r++) se->replace[r]=rw[r];
327
0
  LM_DBG("ok, se is %p\n", se);
328
0
  return se;
329
330
0
error:
331
0
  if (se) { subst_expr_free(se); regex=0; }
332
0
  if (regex) { regfree (regex); pkg_free(regex); }
333
0
  return 0;
334
0
}
335
336
337
#if 0
338
static int replace_len(const char* match, int nmatch, regmatch_t* pmatch,
339
          struct subst_expr* se, struct sip_msg* msg)
340
{
341
  int r;
342
  int len;
343
  str* uri;
344
345
  len=se->replacement.len;
346
  for (r=0; r<se->n_escapes; r++){
347
    switch(se->replace[r].type){
348
      case REPLACE_NMATCH:
349
        len-=se->replace[r].size;
350
        if ((se->replace[r].u.nmatch<nmatch)&&(
351
            pmatch[se->replace[r].u.nmatch].rm_so!=-1)){
352
            /* do the replace */
353
            len+=pmatch[se->replace[r].u.nmatch].rm_eo-
354
                pmatch[se->replace[r].u.nmatch].rm_so;
355
        };
356
        break;
357
      case REPLACE_CHAR:
358
        len-=(se->replace[r].size-1);
359
        break;
360
      case REPLACE_URI:
361
        len-=se->replace[r].size;
362
        if (msg->first_line.type!=SIP_REQUEST){
363
          LM_CRIT("uri substitution on a reply\n");
364
          break; /* ignore, we can continue */
365
        }
366
        uri= (msg->new_uri.s)?(&msg->new_uri):
367
          (&msg->first_line.u.request.uri);
368
        len+=uri->len;
369
        break;
370
      default:
371
        LM_CRIT("unknown type %d\n", se->replace[r].type);
372
        /* ignore it */
373
    }
374
  }
375
  return len;
376
}
377
378
#endif
379
380
/*! \brief Replies will be allocated with the proper size & rpl.len set
381
 * \return 0 on success, <0 on error
382
 */
383
static int replace_build(const char* match, int nmatch, regmatch_t* pmatch,
384
          struct subst_expr* se, struct sip_msg* msg, str* rpl)
385
0
{
386
0
  int r;
387
0
  str* uri;
388
0
  pv_value_t sv;
389
0
  char* p;
390
0
  char* dest;
391
0
  char* end;
392
0
  int size;
393
0
#define REPLACE_BUFFER_SIZE 1024
394
0
  static char rbuf[REPLACE_BUFFER_SIZE];
395
396
#if 0
397
  /* use static bufer now since we cannot easily get the length */
398
  rpl->len=replace_len(match, nmatch, pmatch, se, msg);
399
  if (rpl->len==0){
400
    rpl->s=0; /* empty string */
401
    return 0;
402
  }
403
  rpl->s=pkg_malloc(rpl->len);
404
  if (rpl->s==0){
405
    LM_ERR("out of pkg mem (rpl)\n");
406
    goto error;
407
  }
408
#endif
409
410
0
  p=se->replacement.s;
411
0
  end=p+se->replacement.len;
412
0
  dest=rbuf;
413
0
  for (r=0; r<se->n_escapes; r++){
414
    /* copy the unescaped parts */
415
0
    size=se->replacement.s+se->replace[r].offset-p;
416
0
    if(dest-rbuf+size>=REPLACE_BUFFER_SIZE-1){
417
0
      LM_ERR("overflow\n");
418
0
      goto error;
419
0
    }
420
0
    memcpy(dest, p, size);
421
0
    p+=size+se->replace[r].size;
422
0
    dest+=size;
423
0
    switch(se->replace[r].type){
424
0
      case REPLACE_NMATCH:
425
0
        if ((se->replace[r].u.nmatch<nmatch)&&(
426
0
            pmatch[se->replace[r].u.nmatch].rm_so!=-1)){
427
            /* do the replace */
428
0
            size=pmatch[se->replace[r].u.nmatch].rm_eo-
429
0
                pmatch[se->replace[r].u.nmatch].rm_so;
430
0
            if(dest-rbuf+size>=REPLACE_BUFFER_SIZE-1){
431
0
              LM_ERR("overflow\n");
432
0
              goto error;
433
0
            }
434
0
            memcpy(dest,
435
0
                match+pmatch[se->replace[r].u.nmatch].rm_so,
436
0
                size);
437
0
            dest+=size;
438
0
        };
439
0
        break;
440
0
      case REPLACE_CHAR:
441
0
        if(dest-rbuf+1>=REPLACE_BUFFER_SIZE-1){
442
0
          LM_ERR("overflow\n");
443
0
          goto error;
444
0
        }
445
0
        *dest=se->replace[r].u.c;
446
0
        dest++;
447
0
        break;
448
0
      case REPLACE_URI:
449
0
        if (msg->first_line.type!=SIP_REQUEST){
450
0
          LM_CRIT("uri substitution on a reply\n");
451
0
          break; /* ignore, we can continue */
452
0
        }
453
0
        uri= (msg->new_uri.s)?(&msg->new_uri):
454
0
          (&msg->first_line.u.request.uri);
455
0
        if(dest-rbuf+uri->len>=REPLACE_BUFFER_SIZE-1){
456
0
          LM_ERR("overflow\n");
457
0
          goto error;
458
0
        }
459
0
        memcpy(dest, uri->s, uri->len);
460
0
        dest+=uri->len;
461
0
        break;
462
0
      case REPLACE_SPEC:
463
0
        if(pv_get_spec_value(msg, &se->replace[r].u.spec, &sv)!=0)
464
0
        {
465
0
          LM_CRIT("item substitution returned error\n");
466
0
          break; /* ignore, we can continue */
467
0
        }
468
0
        if(dest-rbuf+sv.rs.len>=REPLACE_BUFFER_SIZE-1){
469
0
          LM_ERR("overflow\n");
470
0
          goto error;
471
0
        }
472
0
        memcpy(dest, sv.rs.s, sv.rs.len);
473
0
        dest+=sv.rs.len;
474
0
        break;
475
0
      default:
476
0
        LM_CRIT("unknown type %d\n", se->replace[r].type);
477
        /* ignore it */
478
0
    }
479
0
  }
480
0
  memcpy(dest, p, end-p);
481
482
0
  rpl->len = (dest-rbuf)+(end-p);
483
0
  rpl->s=pkg_malloc(rpl->len);
484
0
  if (rpl->s==0){
485
0
    LM_ERR("out of pkg mem (rpl)\n");
486
0
    goto error;
487
0
  }
488
0
  memcpy(rpl->s, rbuf, rpl->len);
489
490
0
  return 0;
491
0
error:
492
0
  return -1;
493
0
}
494
495
496
497
/*! \brief run substitutions
498
 * \return 0 if no match or error, or subst result; if count!=0
499
 *           it will be set to 0 (no match), the number of matches
500
 *           or -1 (error).
501
 * \note WARNING: input must be 0 terminated!
502
 */
503
struct replace_lst* subst_run(struct subst_expr* se, const char* input,
504
                struct sip_msg* msg, int* count)
505
0
{
506
0
  struct replace_lst *head;
507
0
  struct replace_lst **crt;
508
0
  const char *p;
509
0
  int r;
510
0
  regmatch_t* pmatch;
511
0
  int nmatch;
512
0
  int eflags;
513
0
  int cnt;
514
515
516
  /* init */
517
0
  head=0;
518
0
  cnt=0;
519
0
  crt=&head;
520
0
  p=input;
521
0
  nmatch=se->max_pmatch+1;
522
  /* no of () referenced + 1 for the whole string: pmatch[0] */
523
0
  pmatch=pkg_malloc(nmatch*sizeof(regmatch_t));
524
0
  if (pmatch==0){
525
0
    LM_ERR("out of pkg mem. (pmatch)\n");
526
0
    goto error;
527
0
  }
528
0
  eflags=0;
529
0
  do{
530
0
    r=regexec(se->re, p, nmatch, pmatch, eflags);
531
0
    LM_DBG("running. r=%d\n", r);
532
    /* subst */
533
0
    if (r==0){ /* != REG_NOMATCH */
534
      /* some checks */
535
0
      if (pmatch[0].rm_so==-1){
536
0
        LM_ERR("unknown offset?\n");
537
0
        goto error;
538
0
      }
539
0
      if (pmatch[0].rm_so==pmatch[0].rm_eo){
540
0
        LM_ERR("matched string is empty... invalid regexp?\n");
541
0
        goto error;
542
0
      }
543
0
      *crt=pkg_malloc(sizeof(struct replace_lst));
544
0
      if (*crt==0){
545
0
        LM_ERR("out of pkg mem (crt)\n");
546
0
        goto error;
547
0
      }
548
0
      memset(*crt, 0, sizeof(struct replace_lst));
549
0
      (*crt)->offset=pmatch[0].rm_so+(int)(p-input);
550
0
      (*crt)->size=pmatch[0].rm_eo-pmatch[0].rm_so;
551
0
      LM_DBG("matched (%d, %d): [%.*s]\n",
552
0
          (*crt)->offset, (*crt)->size,
553
0
          (*crt)->size, input+(*crt)->offset);
554
      /* create subst. string */
555
      /* construct the string from replace[] */
556
0
      if (replace_build(p, nmatch, pmatch, se, msg, &((*crt)->rpl))<0){
557
0
        goto error;
558
0
      }
559
0
      crt=&((*crt)->next);
560
0
      p+=pmatch[0].rm_eo;
561
      /* is it still a string start? */
562
0
      if (*(p-1)=='\n' || *(p-1)=='\r')
563
0
        eflags&=~REG_NOTBOL;
564
0
      else
565
0
        eflags|=REG_NOTBOL;
566
0
      cnt++;
567
0
    }
568
0
  }while((r==0) && se->replace_all);
569
0
  pkg_free(pmatch);
570
0
  if (count)*count=cnt;
571
0
  return head;
572
0
error:
573
0
  if (head) replace_lst_free(head);
574
0
  if (pmatch) pkg_free(pmatch);
575
0
  if (count) *count=-1;
576
0
  return 0;
577
0
}
578
579
580
581
/*! \return the substitution result in a str, input must be 0 term
582
 *  0 on no match or malloc error
583
 *  if count is non zero it will be set to the number of matches, or -1
584
 *   if error
585
 */
586
str* subst_str(const char *input, struct sip_msg* msg, struct subst_expr* se,
587
        int* count)
588
0
{
589
0
  str* res;
590
0
  struct replace_lst *lst;
591
0
  struct replace_lst* l;
592
0
  int len;
593
0
  int size;
594
0
  const char* p;
595
0
  char* dest;
596
0
  const char* end;
597
598
599
  /* compute the len */
600
0
  len=strlen(input);
601
0
  end=input+len;
602
0
  lst=subst_run(se, input, msg, count);
603
0
  if (lst==0){
604
0
    LM_DBG("no match\n");
605
0
    return 0;
606
0
  }
607
0
  for (l=lst; l; l=l->next)
608
0
    len+=(int)(l->rpl.len)-l->size;
609
0
  res=pkg_malloc(sizeof(str));
610
0
  if (res==0){
611
0
    LM_ERR("out of pkg memory\n");
612
0
    goto error;
613
0
  }
614
0
  res->s=pkg_malloc(len+1); /* space for null termination */
615
0
  if (res->s==0){
616
0
    LM_ERR("out of pkg memory (res->s)\n");
617
0
    goto error;
618
0
  }
619
0
  res->s[len]=0;
620
0
  res->len=len;
621
622
  /* replace */
623
0
  dest=res->s;
624
0
  p=input;
625
0
  for(l=lst; l; l=l->next){
626
0
    size=l->offset+input-p;
627
0
    memcpy(dest, p, size); /* copy till offset */
628
0
    p+=size + l->size; /* skip l->size bytes */
629
0
    dest+=size;
630
0
    if (l->rpl.len){
631
0
      memcpy(dest, l->rpl.s, l->rpl.len);
632
0
      dest+=l->rpl.len;
633
0
    }
634
0
  }
635
0
  memcpy(dest, p, end-p);
636
0
  if(lst) replace_lst_free(lst);
637
0
  return res;
638
0
error:
639
0
  if (lst) replace_lst_free(lst);
640
0
  if (res){
641
0
    if (res->s) pkg_free(res->s);
642
0
    pkg_free(res);
643
0
  }
644
0
  if (count) *count=-1;
645
0
  return 0;
646
0
}