Coverage Report

Created: 2025-06-13 07:32

/src/mruby/mrbgems/mruby-sprintf/src/sprintf.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
** sprintf.c - Kernel.#sprintf
3
**
4
** See Copyright Notice in mruby.h
5
*/
6
7
#include <mruby.h>
8
#include <mruby/string.h>
9
#include <mruby/hash.h>
10
#include <mruby/numeric.h>
11
#include <mruby/presym.h>
12
#include <mruby/internal.h>
13
#include <string.h>
14
#include <ctype.h>
15
16
0
#define BIT_DIGITS(N)   (((N)*146)/485 + 1)  /* log2(10) =~ 146/485 */
17
0
#define BITSPERDIG MRB_INT_BIT
18
0
#define EXTENDSIGN(n, l) (((~0U << (n)) >> (((n)*(l)) % BITSPERDIG)) & ~(~0U << (n)))
19
20
mrb_value mrb_bint_2comp(mrb_state *mrb, mrb_value x);
21
22
static char*
23
remove_sign_bits(char *str, int base)
24
0
{
25
0
  char *t;
26
27
0
  t = str;
28
0
  if (base == 16) {
29
0
    while (*t == 'f') {
30
0
      t++;
31
0
    }
32
0
  }
33
0
  else if (base == 8) {
34
0
    *t |= EXTENDSIGN(3, strlen(t));
35
0
    while (*t == '7') {
36
0
      t++;
37
0
    }
38
0
  }
39
0
  else if (base == 2) {
40
0
    while (*t == '1') {
41
0
      t++;
42
0
    }
43
0
  }
44
45
0
  return t;
46
0
}
47
48
static char *
49
mrb_uint_to_cstr(char *buf, size_t len, mrb_int num, int base)
50
0
{
51
0
  char *b = buf + len - 1;
52
0
  const int mask = base-1;
53
0
  int shift;
54
0
  mrb_uint val = (uint64_t)num;
55
56
0
  if (num == 0) {
57
0
    buf[0] = '0'; buf[1] = '\0';
58
0
    return buf;
59
0
  }
60
0
  switch (base) {
61
0
  case 16: shift = 4; break;
62
0
  case 8:  shift = 3; break;
63
0
  case 2:  shift = 1; break;
64
0
  default: return NULL;
65
0
  }
66
0
  *--b = '\0';
67
0
  do {
68
0
    *--b = mrb_digitmap[(int)(val & mask)];
69
0
  } while (val >>= shift);
70
71
0
  if (num < 0) {
72
0
    b = remove_sign_bits(b, base);
73
0
  }
74
75
0
  return b;
76
0
}
77
78
0
#define FNONE  0
79
0
#define FSHARP 1
80
0
#define FMINUS 2
81
0
#define FPLUS  4
82
0
#define FZERO  8
83
0
#define FSPACE 16
84
0
#define FWIDTH 32
85
0
#define FPREC  64
86
0
#define FPREC0 128
87
88
#ifndef MRB_NO_FLOAT
89
static int
90
fmt_float(char *buf, size_t buf_size, char fmt, int flags, int width, int prec, mrb_float f)
91
0
{
92
0
  char sign = '\0';
93
0
  int left_align = 0;
94
0
  int zero_pad = 0;
95
96
0
  if (flags & FSHARP) fmt |= 0x80;
97
0
  if (flags & FPLUS)  sign = '+';
98
0
  if (flags & FMINUS) left_align = 1;
99
0
  if (flags & FZERO)  zero_pad = 1;
100
0
  if (flags & FSPACE) sign = ' ';
101
102
0
  int len = mrb_format_float(f, buf, buf_size, fmt, prec, sign);
103
104
  // buf[0] < '0' returns true if the first character is space, + or -
105
  // buf[1] < '9' matches a digit, and doesn't match when we get back +nan or +inf
106
0
  if (buf[0] < '0' && buf[1] <= '9' && zero_pad) {
107
0
    buf++;
108
0
    width--;
109
0
    len--;
110
0
  }
111
0
  if (*buf < '0' || *buf >= '9') {
112
    // For inf or nan, we don't want to zero pad.
113
0
    zero_pad = 0;
114
0
  }
115
0
  if (len >= width) {
116
0
    return len;
117
0
  }
118
0
  buf[width] = '\0';
119
0
  if (left_align) {
120
0
    memset(&buf[len], ' ', width - len);
121
0
    return width;
122
0
  }
123
0
  memmove(&buf[width - len], buf, len);
124
0
  if (zero_pad) {
125
0
    memset(buf, '0', width - len);
126
0
  }
127
0
  else {
128
0
    memset(buf, ' ', width - len);
129
0
  }
130
0
  return width;
131
0
}
132
#endif
133
134
0
#define CHECK(l) do { \
135
0
  if (blen+(l) >= bsiz) {\
136
0
    while (blen+(l) >= bsiz) {\
137
0
      if (bsiz > MRB_INT_MAX/2) mrb_raise(mrb, E_ARGUMENT_ERROR, "too big specifier");\
138
0
      bsiz*=2;\
139
0
    }\
140
0
    mrb_str_resize(mrb, result, bsiz);\
141
0
  }\
142
0
  buf = RSTRING_PTR(result);\
143
0
} while (0)
144
145
0
#define PUSH(s, l) do { \
146
0
  CHECK(l);\
147
0
  memcpy(&buf[blen], s, l);\
148
0
  blen += (mrb_int)(l);\
149
0
} while (0)
150
151
0
#define FILL(c, l) do { \
152
0
  CHECK(l);\
153
0
  memset(&buf[blen], c, l);\
154
0
  blen += (l);\
155
0
} while (0)
156
157
static void
158
check_next_arg(mrb_state *mrb, int posarg, int nextarg)
159
0
{
160
0
  switch (posarg) {
161
0
  case -1:
162
0
    mrb_raisef(mrb, E_ARGUMENT_ERROR, "unnumbered(%d) mixed with numbered", nextarg);
163
0
    break;
164
0
  case -2:
165
0
    mrb_raisef(mrb, E_ARGUMENT_ERROR, "unnumbered(%d) mixed with named", nextarg);
166
0
    break;
167
0
  default:
168
0
    break;
169
0
  }
170
0
}
171
172
static void
173
check_pos_arg(mrb_state *mrb, int posarg, mrb_int n)
174
0
{
175
0
  if (posarg > 0) {
176
0
    mrb_raisef(mrb, E_ARGUMENT_ERROR, "numbered(%i) after unnumbered(%d)",
177
0
               n, posarg);
178
0
  }
179
0
  if (posarg == -2) {
180
0
    mrb_raisef(mrb, E_ARGUMENT_ERROR, "numbered(%i) after named", n);
181
0
  }
182
0
  if (n < 1) {
183
0
    mrb_raisef(mrb, E_ARGUMENT_ERROR, "invalid index - %i$", n);
184
0
  }
185
0
}
186
187
static void
188
check_name_arg(mrb_state *mrb, int posarg, const char *name, size_t len)
189
0
{
190
0
  if (posarg > 0) {
191
0
    mrb_raisef(mrb, E_ARGUMENT_ERROR, "named%l after unnumbered(%d)",
192
0
               name, len, posarg);
193
0
  }
194
0
  if (posarg == -1) {
195
0
    mrb_raisef(mrb, E_ARGUMENT_ERROR, "named%l after numbered", name, len);
196
0
  }
197
0
}
198
199
0
#define GETNEXTARG() (\
200
0
  check_next_arg(mrb, posarg, nextarg),\
201
0
  (posarg = nextarg++, GETNTHARG(posarg)))
202
203
0
#define GETARG() (!mrb_undef_p(nextvalue) ? nextvalue : GETNEXTARG())
204
205
0
#define GETPOSARG(n) (\
206
0
  check_pos_arg(mrb, posarg, n),\
207
0
  (posarg = -1, GETNTHARG(n)))
208
209
#define GETNTHARG(nth) \
210
0
  ((nth >= argc) ? (mrb_raise(mrb, E_ARGUMENT_ERROR, "too few arguments"), mrb_undef_value()) : argv[nth])
211
212
0
#define CHECKNAMEARG(name, len) (\
213
0
  check_name_arg(mrb, posarg, name, len),\
214
0
  posarg = -2)
215
216
0
#define GETNUM(n, val) do { \
217
0
  if (!(p = get_num(mrb, p, end, &(n)))) \
218
0
    mrb_raise(mrb, E_ARGUMENT_ERROR, #val " too big"); \
219
0
} while(0)
220
221
0
#define GETASTER(num) do { \
222
0
  mrb_value tmp_v; \
223
0
  t = p++; \
224
0
  GETNUM(n, val); \
225
0
  if (*p == '$') { \
226
0
    tmp_v = GETPOSARG(n); \
227
0
  } \
228
0
  else { \
229
0
    tmp_v = GETNEXTARG(); \
230
0
    p = t; \
231
0
  } \
232
0
  num = (int)mrb_as_int(mrb, tmp_v); \
233
0
} while (0)
234
235
static const char*
236
get_num(mrb_state *mrb, const char *p, const char *end, int *valp)
237
0
{
238
0
  char *e;
239
0
  mrb_int n;
240
0
  if (!mrb_read_int(p, end, &e, &n) || INT_MAX < n) {
241
0
    return NULL;
242
0
  }
243
0
  *valp = (int)n;
244
0
  return e;
245
0
}
246
247
static void
248
get_hash(mrb_state *mrb, mrb_value *hash, mrb_int argc, const mrb_value *argv)
249
0
{
250
0
  mrb_value tmp;
251
252
0
  if (!mrb_undef_p(*hash)) return;
253
0
  if (argc != 2) {
254
0
    mrb_raise(mrb, E_ARGUMENT_ERROR, "one hash required");
255
0
  }
256
0
  tmp = mrb_check_hash_type(mrb, argv[1]);
257
0
  if (mrb_nil_p(tmp)) {
258
0
    mrb_raise(mrb, E_ARGUMENT_ERROR, "one hash required");
259
0
  }
260
0
  *hash = tmp;
261
0
}
262
263
static mrb_value
264
mrb_str_format(mrb_state *mrb, mrb_int argc, const mrb_value *argv, mrb_value fmt)
265
0
{
266
0
  const char *p, *end;
267
0
  char *buf;
268
0
  mrb_int blen;
269
0
  mrb_int bsiz;
270
0
  mrb_value result;
271
0
  int n;
272
0
  int width;
273
0
  int prec;
274
0
  int nextarg = 1;
275
0
  int posarg = 0;
276
0
  mrb_value nextvalue;
277
0
  mrb_value str;
278
0
  mrb_value hash = mrb_undef_value();
279
280
0
#define CHECK_FOR_WIDTH(f)                                              \
281
0
  if ((f) & FWIDTH) {                                                   \
282
0
    mrb_raise(mrb, E_ARGUMENT_ERROR, "width given twice");              \
283
0
    }                                                                   \
284
0
  if ((f) & FPREC0) {                                                   \
285
0
    mrb_raise(mrb, E_ARGUMENT_ERROR, "width after precision");          \
286
0
  }
287
0
#define CHECK_FOR_FLAGS(f)                                              \
288
0
  if ((f) & FWIDTH) {                                                   \
289
0
    mrb_raise(mrb, E_ARGUMENT_ERROR, "flag after width");               \
290
0
  }                                                                     \
291
0
  if ((f) & FPREC0) {                                                   \
292
0
    mrb_raise(mrb, E_ARGUMENT_ERROR, "flag after precision");           \
293
0
  }
294
295
0
  argc++;
296
0
  argv--;
297
0
  mrb_ensure_string_type(mrb, fmt);
298
0
  p = RSTRING_PTR(fmt);
299
0
  end = p + RSTRING_LEN(fmt);
300
0
  blen = 0;
301
0
  bsiz = 120;
302
0
  result = mrb_str_new_capa(mrb, bsiz);
303
0
  buf = RSTRING_PTR(result);
304
0
  memset(buf, 0, bsiz);
305
306
0
  int ai = mrb_gc_arena_save(mrb);
307
0
  for (; p < end; p++) {
308
0
    const char *t;
309
0
    mrb_sym id = 0;
310
0
    int flags = FNONE;
311
312
0
    for (t = p; t < end && *t != '%'; t++)
313
0
      ;
314
0
    if (t + 1 == end) {
315
      /* % at the bottom */
316
0
      mrb_raise(mrb, E_ARGUMENT_ERROR, "incomplete format specifier; use %% (double %) instead");
317
0
    }
318
0
    PUSH(p, t - p);
319
0
    if (t >= end)
320
0
      goto sprint_exit; /* end of fmt string */
321
322
0
    p = t + 1;    /* skip '%' */
323
324
0
    width = prec = -1;
325
0
    nextvalue = mrb_undef_value();
326
327
0
retry:
328
0
    switch (*p) {
329
0
      default:
330
0
        mrb_raisef(mrb, E_ARGUMENT_ERROR, "malformed format string - %%%c", *p);
331
0
        break;
332
333
0
      case ' ':
334
0
        CHECK_FOR_FLAGS(flags);
335
0
        flags |= FSPACE;
336
0
        p++;
337
0
        goto retry;
338
339
0
      case '#':
340
0
        CHECK_FOR_FLAGS(flags);
341
0
        flags |= FSHARP;
342
0
        p++;
343
0
        goto retry;
344
345
0
      case '+':
346
0
        CHECK_FOR_FLAGS(flags);
347
0
        flags |= FPLUS;
348
0
        p++;
349
0
        goto retry;
350
351
0
      case '-':
352
0
        CHECK_FOR_FLAGS(flags);
353
0
        flags |= FMINUS;
354
0
        p++;
355
0
        goto retry;
356
357
0
      case '0':
358
0
        CHECK_FOR_FLAGS(flags);
359
0
        flags |= FZERO;
360
0
        p++;
361
0
        goto retry;
362
363
0
      case '1': case '2': case '3': case '4':
364
0
      case '5': case '6': case '7': case '8': case '9':
365
0
        GETNUM(n, width);
366
0
        if (*p == '$') {
367
0
          if (!mrb_undef_p(nextvalue)) {
368
0
            mrb_raisef(mrb, E_ARGUMENT_ERROR, "value given twice - %i$", n);
369
0
          }
370
0
          nextvalue = GETPOSARG(n);
371
0
          p++;
372
0
          goto retry;
373
0
        }
374
0
        CHECK_FOR_WIDTH(flags);
375
0
        width = n;
376
0
        flags |= FWIDTH;
377
0
        goto retry;
378
379
0
      case '<':
380
0
      case '{': {
381
0
        const char *start = p;
382
0
        char term = (*p == '<') ? '>' : '}';
383
384
0
        for (; p < end && *p != term; )
385
0
          p++;
386
0
        if (id) {
387
0
          mrb_raisef(mrb, E_ARGUMENT_ERROR, "name%l after <%n>",
388
0
                     start, p - start + 1, id);
389
0
        }
390
0
        CHECKNAMEARG(start, p - start + 1);
391
0
        get_hash(mrb, &hash, argc, argv);
392
0
        id = mrb_intern_check(mrb, start + 1, p - start - 1);
393
0
        if (id) {
394
0
          nextvalue = mrb_hash_fetch(mrb, hash, mrb_symbol_value(id), mrb_undef_value());
395
0
        }
396
0
        if (!id || mrb_undef_p(nextvalue)) {
397
0
          mrb_raisef(mrb, E_KEY_ERROR, "key%l not found", start, p - start + 1);
398
0
        }
399
0
        if (term == '}') goto format_s;
400
0
        p++;
401
0
        goto retry;
402
0
      }
403
404
0
      case '*':
405
0
        CHECK_FOR_WIDTH(flags);
406
0
        flags |= FWIDTH;
407
0
        GETASTER(width);
408
0
        if (width > INT16_MAX || INT16_MIN > width) {
409
0
          mrb_raise(mrb, E_ARGUMENT_ERROR, "width too big");
410
0
        }
411
0
        if (width < 0) {
412
0
          flags |= FMINUS;
413
0
          width = -width;
414
0
        }
415
0
        p++;
416
0
        goto retry;
417
418
0
      case '.':
419
0
        if (flags & FPREC0) {
420
0
          mrb_raise(mrb, E_ARGUMENT_ERROR, "precision given twice");
421
0
        }
422
0
        flags |= FPREC|FPREC0;
423
424
0
        p++;
425
0
        if (*p == '*') {
426
0
          GETASTER(prec);
427
0
          if (prec < 0) {  /* ignore negative precision */
428
0
            flags &= ~FPREC;
429
0
          }
430
0
          p++;
431
0
          goto retry;
432
0
        }
433
0
        GETNUM(prec, precision);
434
0
        goto retry;
435
436
0
      case '\n':
437
0
      case '\0':
438
0
        p--;
439
        /* fallthrough */
440
0
      case '%':
441
0
        if (flags != FNONE) {
442
0
          mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid format character - %");
443
0
        }
444
0
        PUSH("%", 1);
445
0
        break;
446
447
0
      case 'c': {
448
0
        mrb_value val = GETARG();
449
0
        mrb_value tmp;
450
0
        char *c;
451
452
0
        tmp = mrb_check_string_type(mrb, val);
453
0
        if (!mrb_nil_p(tmp)) {
454
0
          if (RSTRING_LEN(tmp) != 1) {
455
0
            mrb_raise(mrb, E_ARGUMENT_ERROR, "%c requires a character");
456
0
          }
457
0
        }
458
0
        else if (mrb_integer_p(val)) {
459
0
          mrb_int n = mrb_integer(val);
460
0
#ifndef MRB_UTF8_STRING
461
0
          char buf[1];
462
463
0
          buf[0] = (char)n&0xff;
464
0
          tmp = mrb_str_new(mrb, buf, 1);
465
#else
466
          if (n < 0x80) {
467
            char buf[1];
468
469
            buf[0] = (char)n;
470
            tmp = mrb_str_new(mrb, buf, 1);
471
          }
472
          else {
473
            tmp = mrb_funcall_argv(mrb, val, MRB_SYM(chr), 0, NULL);
474
            mrb_check_type(mrb, tmp, MRB_TT_STRING);
475
          }
476
#endif
477
0
        }
478
0
        else {
479
0
          mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid character");
480
0
        }
481
0
        c = RSTRING_PTR(tmp);
482
0
        n = (int)RSTRING_LEN(tmp);
483
0
        if (!(flags & FWIDTH)) {
484
0
          PUSH(c, n);
485
0
        }
486
0
        else if ((flags & FMINUS)) {
487
0
          PUSH(c, n);
488
0
          if (width>0) FILL(' ', width-1);
489
0
        }
490
0
        else {
491
0
          if (width>0) FILL(' ', width-1);
492
0
          PUSH(c, n);
493
0
        }
494
0
        mrb_gc_arena_restore(mrb, ai);
495
0
      }
496
0
      break;
497
498
0
      case 's':
499
0
      case 'p':
500
0
  format_s:
501
0
      {
502
0
        mrb_value arg = GETARG();
503
0
        mrb_int len;
504
0
        mrb_int slen;
505
506
0
        if (*p == 'p') arg = mrb_inspect(mrb, arg);
507
0
        str = mrb_obj_as_string(mrb, arg);
508
0
        len = RSTRING_LEN(str);
509
0
        if (RSTRING(result)->flags & MRB_STR_EMBED) {
510
0
          mrb_int tmp_n = len;
511
0
          RSTRING(result)->flags &= ~MRB_STR_EMBED_LEN_MASK;
512
0
          RSTRING(result)->flags |= tmp_n << MRB_STR_EMBED_LEN_SHIFT;
513
0
        }
514
0
        else {
515
0
          RSTRING(result)->as.heap.len = blen;
516
0
        }
517
0
        if (flags&(FPREC|FWIDTH)) {
518
0
          slen = RSTRING_LEN(str);
519
0
          if (slen < 0) {
520
0
            mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid mbstring sequence");
521
0
          }
522
0
          if ((flags&FPREC) && (prec < slen)) {
523
0
            char *p = RSTRING_PTR(str) + prec;
524
0
            slen = prec;
525
0
            len = (mrb_int)(p - RSTRING_PTR(str));
526
0
          }
527
          /* need to adjust multi-byte string pos */
528
0
          if ((flags&FWIDTH) && (width > slen)) {
529
0
            width -= (int)slen;
530
0
            if (!(flags&FMINUS)) {
531
0
              FILL(' ', width);
532
0
            }
533
0
            PUSH(RSTRING_PTR(str), len);
534
0
            if (flags&FMINUS) {
535
0
              FILL(' ', width);
536
0
            }
537
0
            break;
538
0
          }
539
0
        }
540
0
        PUSH(RSTRING_PTR(str), len);
541
0
        mrb_gc_arena_restore(mrb, ai);
542
0
      }
543
0
      break;
544
545
0
      case 'd':
546
0
      case 'i':
547
0
      case 'o':
548
0
      case 'x':
549
0
      case 'X':
550
0
      case 'b':
551
0
      case 'B':
552
0
      case 'u': {
553
0
        mrb_value val = GETARG();
554
0
        char nbuf[69], *s;
555
0
        const char *prefix = NULL;
556
0
        int sign = 0, dots = 0;
557
0
        char sc = 0;
558
0
        char fc = 0;
559
0
        mrb_int v = 0;
560
0
        int base;
561
0
        int len;
562
563
0
        if (flags & FSHARP) {
564
0
          switch (*p) {
565
0
            case 'o': prefix = "0"; break;
566
0
            case 'x': prefix = "0x"; break;
567
0
            case 'X': prefix = "0X"; break;
568
0
            case 'b': prefix = "0b"; break;
569
0
            case 'B': prefix = "0B"; break;
570
0
            default: break;
571
0
          }
572
0
        }
573
574
0
        switch (*p) {
575
0
          case 'o':
576
0
            base = 8; break;
577
0
          case 'x':
578
0
          case 'X':
579
0
            base = 16; break;
580
0
          case 'b':
581
0
          case 'B':
582
0
            base = 2; break;
583
0
          case 'u':
584
0
          case 'd':
585
0
          case 'i':
586
0
            sign = 1;
587
            /* fall through */
588
0
          default:
589
0
            base = 10; break;
590
0
        }
591
592
0
  bin_retry:
593
0
        switch (mrb_type(val)) {
594
0
#ifndef MRB_NO_FLOAT
595
0
          case MRB_TT_FLOAT:
596
0
            val = mrb_float_to_integer(mrb, val);
597
0
            goto bin_retry;
598
0
#endif
599
0
#ifdef MRB_USE_BIGINT
600
0
          case MRB_TT_BIGINT:
601
0
            {
602
0
              mrb_int n = (mrb_bint_cmp(mrb, val, mrb_fixnum_value(0)));
603
0
              mrb_bool need_dots = ((flags & FPLUS) == 0) && (base == 16 || base == 8 || base == 2) && n < 0;
604
0
              if (need_dots) {
605
0
                val = mrb_bint_2comp(mrb, val);
606
0
                dots = 1;
607
0
                v = -1;
608
0
              }
609
0
              mrb_value str = mrb_bint_to_s(mrb, val, base);
610
0
              s = RSTRING_PTR(str);
611
0
              len = (int)RSTRING_LEN(str);
612
0
            }
613
0
            goto str_skip;
614
0
#endif
615
0
          case MRB_TT_STRING:
616
0
            val = mrb_str_to_integer(mrb, val, 0, TRUE);
617
0
            goto bin_retry;
618
0
          case MRB_TT_INTEGER:
619
0
            v = mrb_integer(val);
620
0
            break;
621
0
          default:
622
0
            v = mrb_as_int(mrb, val);
623
0
            break;
624
0
        }
625
626
0
        if (sign) {
627
0
          if (v >= 0) {
628
0
            if (flags & FPLUS) {
629
0
              sc = '+';
630
0
              width--;
631
0
            }
632
0
            else if (flags & FSPACE) {
633
0
              sc = ' ';
634
0
              width--;
635
0
            }
636
0
          }
637
0
          else {
638
0
            sc = '-';
639
0
            width--;
640
0
          }
641
0
          s = mrb_int_to_cstr(nbuf, sizeof(nbuf), v, base);
642
0
          if (v < 0) s++;       /* skip minus sign */
643
0
        }
644
0
        else {
645
          /* print as unsigned */
646
0
          s = mrb_uint_to_cstr(nbuf, sizeof(nbuf), v, base);
647
0
          if (v < 0) {
648
0
            dots = 1;
649
0
          }
650
0
        }
651
652
0
        {
653
0
          size_t size;
654
0
          size = strlen(s);
655
          /* PARANOID: assert(size <= MRB_INT_MAX) */
656
0
          len = (int)size;
657
0
        }
658
659
0
#ifdef MRB_USE_BIGINT
660
0
      str_skip:
661
0
#endif
662
0
        switch (base) {
663
0
        case 16:
664
0
          fc = 'f'; break;
665
0
        case 8:
666
0
          fc = '7'; break;
667
0
        case 2:
668
0
          fc = '1'; break;
669
0
        }
670
671
0
        if (dots) {
672
0
          if (base == 8 && (*s == '1' || *s == '3')) {
673
0
            s++; len--;
674
0
          }
675
0
          while (*s == fc) {
676
0
            s++; len--;
677
0
          }
678
0
        }
679
0
        if (*p == 'X') {
680
0
          char *pp = s;
681
0
          int c;
682
0
          while ((c = (int)(unsigned char)*pp) != 0) {
683
0
            *pp = toupper(c);
684
0
            pp++;
685
0
          }
686
0
          if (base == 16) {
687
0
            fc = 'F';
688
0
          }
689
0
        }
690
691
0
        if (prefix && !prefix[1]) { /* octal */
692
0
          if (dots) {
693
0
            prefix = NULL;
694
0
          }
695
0
          else if (len == 1 && *s == '0') {
696
0
            len = 0;
697
0
            if (flags & FPREC) prec--;
698
0
          }
699
0
          else if ((flags & FPREC) && (prec > len)) {
700
0
            prefix = NULL;
701
0
          }
702
0
        }
703
0
        else if (len == 1 && *s == '0') {
704
0
          prefix = NULL;
705
0
        }
706
707
0
        if (prefix) {
708
0
          size_t size;
709
0
          size = strlen(prefix);
710
          /* PARANOID: assert(size <= MRB_INT_MAX).
711
           *  this check is absolutely paranoid. */
712
0
          width -= (int)size;
713
0
        }
714
715
0
        if ((flags & (FZERO|FMINUS|FPREC)) == FZERO) {
716
0
          prec = width;
717
0
          width = 0;
718
0
        }
719
0
        else {
720
0
          if (prec < len) {
721
0
            if (!prefix && prec == 0 && len == 1 && *s == '0') len = 0;
722
0
            prec = len;
723
0
          }
724
0
          width -= prec;
725
0
        }
726
727
0
        if (!(flags&FMINUS) && width > 0) {
728
0
          FILL(' ', width);
729
0
          width = 0;
730
0
        }
731
732
0
        if (sc) PUSH(&sc, 1);
733
734
0
        if (prefix) {
735
0
          int plen = (int)strlen(prefix);
736
0
          PUSH(prefix, plen);
737
0
        }
738
0
        if (dots) {
739
0
          prec -= 2;
740
0
          width -= 2;
741
0
          PUSH("..", 2);
742
0
          if (*s != fc) {
743
0
            FILL(fc, 1);
744
0
            prec--; width--;
745
0
          }
746
0
        }
747
748
0
        if (prec > len) {
749
0
          CHECK(prec - len);
750
0
          if ((flags & (FMINUS|FPREC)) != FMINUS) {
751
0
            char c = '0';
752
0
            FILL(c, prec - len);
753
0
          }
754
0
          else if (v < 0) {
755
0
            FILL(fc, prec - len);
756
0
          }
757
0
        }
758
0
        PUSH(s, len);
759
0
        if (width > 0) {
760
0
          FILL(' ', width);
761
0
        }
762
0
      }
763
0
      break;
764
765
0
      case 'f':
766
0
      case 'g':
767
0
      case 'G':
768
0
      case 'e':
769
0
      case 'E': {
770
#ifdef MRB_NO_FLOAT
771
        mrb_raisef(mrb, E_ARGUMENT_ERROR, "%%%c not supported with MRB_NO_FLOAT defined", *p);
772
#else
773
0
        mrb_value val = GETARG();
774
0
        double fval;
775
0
        mrb_int need = 6;
776
777
0
        fval = mrb_as_float(mrb, val);
778
0
        if (!isfinite(fval)) {
779
0
          const char *expr;
780
0
          const int elen = 3;
781
0
          char sign = '\0';
782
783
0
          if (isnan(fval)) {
784
0
            expr = "NaN";
785
0
          }
786
0
          else {
787
0
            expr = "Inf";
788
0
          }
789
0
          need = elen;
790
0
          if (!isnan(fval) && fval < 0.0)
791
0
            sign = '-';
792
0
          else if (flags & (FPLUS|FSPACE))
793
0
            sign = (flags & FPLUS) ? '+' : ' ';
794
0
          if (sign)
795
0
            need++;
796
0
          if ((flags & FWIDTH) && need < width)
797
0
            need = width;
798
799
0
          if (need < 0) {
800
0
            mrb_raise(mrb, E_ARGUMENT_ERROR, "width too big");
801
0
          }
802
0
          FILL(' ', need);
803
0
          if (flags & FMINUS) {
804
0
            if (sign)
805
0
              buf[blen - need--] = sign;
806
0
            memcpy(&buf[blen - need], expr, elen);
807
0
          }
808
0
          else {
809
0
            if (sign)
810
0
              buf[blen - elen - 1] = sign;
811
0
            memcpy(&buf[blen - elen], expr, (size_t)elen);
812
0
          }
813
0
          break;
814
0
        }
815
816
0
        need = 0;
817
0
        if (*p != 'e' && *p != 'E') {
818
0
          int i;
819
0
          frexp(fval, &i);
820
0
          if (i > 0)
821
0
            need = BIT_DIGITS(i);
822
0
        }
823
0
        if (need > MRB_INT_MAX - ((flags&FPREC) ? prec : 6)) {
824
0
        too_big_width_prec:
825
0
          mrb_raise(mrb, E_ARGUMENT_ERROR,
826
0
                    (width > prec ? "width too big" : "prec too big"));
827
0
        }
828
0
        need += (flags&FPREC) ? prec : 6;
829
0
        if ((flags&FWIDTH) && need < width)
830
0
          need = width;
831
0
        if ((mrb_int)need > MRB_INT_MAX - 20) {
832
0
          goto too_big_width_prec;
833
0
        }
834
0
        need += 20;
835
836
0
        CHECK(need);
837
0
        n = fmt_float(&buf[blen], need, *p, flags, width, prec, fval);
838
0
        if (n < 0 || n >= need) {
839
0
          mrb_raise(mrb, E_RUNTIME_ERROR, "formatting error");
840
0
        }
841
0
        blen += n;
842
0
#endif
843
0
      }
844
0
      break;
845
0
    }
846
0
  }
847
848
0
  sprint_exit:
849
#if 0
850
  /* XXX - We cannot validate the number of arguments if (digit)$ style used.
851
   */
852
  if (posarg >= 0 && nextarg < argc) {
853
    const char *mesg = "too many arguments for format string";
854
    if (mrb_test(ruby_debug)) mrb_raise(mrb, E_ARGUMENT_ERROR, mesg);
855
    if (mrb_test(ruby_verbose)) mrb_warn(mrb, mesg);
856
  }
857
#endif
858
0
  mrb_str_resize(mrb, result, blen);
859
860
0
  return result;
861
0
}
862
863
/*
864
 *  call-seq:
865
 *     format(format_string [, arguments...] )   -> string
866
 *     sprintf(format_string [, arguments...] )  -> string
867
 *
868
 *  Returns the string resulting from applying <i>format_string</i> to
869
 *  any additional arguments.  Within the format string, any characters
870
 *  other than format sequences are copied to the result.
871
 *
872
 *  The syntax of a format sequence is follows.
873
 *
874
 *    %[flags][width][.precision]type
875
 *
876
 *  A format
877
 *  sequence consists of a percent sign, followed by optional flags,
878
 *  width, and precision indicators, then terminated with a field type
879
 *  character.  The field type controls how the corresponding
880
 *  <code>sprintf</code> argument is to be interpreted, while the flags
881
 *  modify that interpretation.
882
 *
883
 *  The field type characters are:
884
 *
885
 *      Field |  Integer Format
886
 *      ------+--------------------------------------------------------------
887
 *        b   | Convert argument as a binary number.
888
 *            | Negative numbers will be displayed as a two's complement
889
 *            | prefixed with '..1'.
890
 *        B   | Equivalent to 'b', but uses an uppercase 0B for prefix
891
 *            | in the alternative format by #.
892
 *        d   | Convert argument as a decimal number.
893
 *        i   | Identical to 'd'.
894
 *        o   | Convert argument as an octal number.
895
 *            | Negative numbers will be displayed as a two's complement
896
 *            | prefixed with '..7'.
897
 *        u   | Identical to 'd'.
898
 *        x   | Convert argument as a hexadecimal number.
899
 *            | Negative numbers will be displayed as a two's complement
900
 *            | prefixed with '..f' (representing an infinite string of
901
 *            | leading 'ff's).
902
 *        X   | Equivalent to 'x', but uses uppercase letters.
903
 *
904
 *      Field |  Float Format
905
 *      ------+--------------------------------------------------------------
906
 *        e   | Convert floating-point argument into exponential notation
907
 *            | with one digit before the decimal point as [-]d.dddddde[+-]dd.
908
 *            | The precision specifies the number of digits after the decimal
909
 *            | point (defaulting to six).
910
 *        E   | Equivalent to 'e', but uses an uppercase E to indicate
911
 *            | the exponent.
912
 *        f   | Convert floating-point argument as [-]ddd.dddddd,
913
 *            | where the precision specifies the number of digits after
914
 *            | the decimal point.
915
 *        g   | Convert a floating-point number using exponential form
916
 *            | if the exponent is less than -4 or greater than or
917
 *            | equal to the precision, or in dd.dddd form otherwise.
918
 *            | The precision specifies the number of significant digits.
919
 *        G   | Equivalent to 'g', but use an uppercase 'E' in exponent form.
920
 *
921
 *      Field |  Other Format
922
 *      ------+--------------------------------------------------------------
923
 *        c   | Argument is the numeric code for a single character or
924
 *            | a single character string itself.
925
 *        p   | The valuing of argument.inspect.
926
 *        s   | Argument is a string to be substituted.  If the format
927
 *            | sequence contains a precision, at most that many characters
928
 *            | will be copied.
929
 *        %   | A percent sign itself will be displayed.  No argument taken.
930
 *
931
 *  The flags modifies the behavior of the formats.
932
 *  The flag characters are:
933
 *
934
 *    Flag     | Applies to    | Meaning
935
 *    ---------+---------------+-----------------------------------------
936
 *    space    | bBdiouxX      | Leave a space at the start of
937
 *             | aAeEfgG       | non-negative numbers.
938
 *             | (numeric fmt) | For 'o', 'x', 'X', 'b' and 'B', use
939
 *             |               | a minus sign with absolute value for
940
 *             |               | negative values.
941
 *    ---------+---------------+-----------------------------------------
942
 *    (digit)$ | all           | Specifies the absolute argument number
943
 *             |               | for this field.  Absolute and relative
944
 *             |               | argument numbers cannot be mixed in a
945
 *             |               | sprintf string.
946
 *    ---------+---------------+-----------------------------------------
947
 *     #       | bBoxX         | Use an alternative format.
948
 *             | aAeEfgG       | For the conversions 'o', increase the precision
949
 *             |               | until the first digit will be '0' if
950
 *             |               | it is not formatted as complements.
951
 *             |               | For the conversions 'x', 'X', 'b' and 'B'
952
 *             |               | on non-zero, prefix the result with "0x",
953
 *             |               | "0X", "0b" and "0B", respectively.
954
 *             |               | For 'e', 'E', 'f', 'g', and 'G',
955
 *             |               | force a decimal point to be added,
956
 *             |               | even if no digits follow.
957
 *             |               | For 'g' and 'G', do not remove trailing zeros.
958
 *    ---------+---------------+-----------------------------------------
959
 *    +        | bBdiouxX      | Add a leading plus sign to non-negative
960
 *             | aAeEfgG       | numbers.
961
 *             | (numeric fmt) | For 'o', 'x', 'X', 'b' and 'B', use
962
 *             |               | a minus sign with absolute value for
963
 *             |               | negative values.
964
 *    ---------+---------------+-----------------------------------------
965
 *    -        | all           | Left-justify the result of this conversion.
966
 *    ---------+---------------+-----------------------------------------
967
 *    0 (zero) | bBdiouxX      | Pad with zeros, not spaces.
968
 *             | aAeEfgG       | For 'o', 'x', 'X', 'b' and 'B', radix-1
969
 *             | (numeric fmt) | is used for negative numbers formatted as
970
 *             |               | complements.
971
 *    ---------+---------------+-----------------------------------------
972
 *    *        | all           | Use the next argument as the field width.
973
 *             |               | If negative, left-justify the result. If the
974
 *             |               | asterisk is followed by a number and a dollar
975
 *             |               | sign, use the indicated argument as the width.
976
 *
977
 *  Examples of flags:
978
 *
979
 *   # '+' and space flag specifies the sign of non-negative numbers.
980
 *   sprintf("%d", 123)  #=> "123"
981
 *   sprintf("%+d", 123) #=> "+123"
982
 *   sprintf("% d", 123) #=> " 123"
983
 *
984
 *   # '#' flag for 'o' increases number of digits to show '0'.
985
 *   # '+' and space flag changes format of negative numbers.
986
 *   sprintf("%o", 123)   #=> "173"
987
 *   sprintf("%#o", 123)  #=> "0173"
988
 *   sprintf("%+o", -123) #=> "-173"
989
 *   sprintf("%o", -123)  #=> "..7605"
990
 *   sprintf("%#o", -123) #=> "..7605"
991
 *
992
 *   # '#' flag for 'x' add a prefix '0x' for non-zero numbers.
993
 *   # '+' and space flag disables complements for negative numbers.
994
 *   sprintf("%x", 123)   #=> "7b"
995
 *   sprintf("%#x", 123)  #=> "0x7b"
996
 *   sprintf("%+x", -123) #=> "-7b"
997
 *   sprintf("%x", -123)  #=> "..f85"
998
 *   sprintf("%#x", -123) #=> "0x..f85"
999
 *   sprintf("%#x", 0)    #=> "0"
1000
 *
1001
 *   # '#' for 'X' uses the prefix '0X'.
1002
 *   sprintf("%X", 123)  #=> "7B"
1003
 *   sprintf("%#X", 123) #=> "0X7B"
1004
 *
1005
 *   # '#' flag for 'b' add a prefix '0b' for non-zero numbers.
1006
 *   # '+' and space flag disables complements for negative numbers.
1007
 *   sprintf("%b", 123)   #=> "1111011"
1008
 *   sprintf("%#b", 123)  #=> "0b1111011"
1009
 *   sprintf("%+b", -123) #=> "-1111011"
1010
 *   sprintf("%b", -123)  #=> "..10000101"
1011
 *   sprintf("%#b", -123) #=> "0b..10000101"
1012
 *   sprintf("%#b", 0)    #=> "0"
1013
 *
1014
 *   # '#' for 'B' uses the prefix '0B'.
1015
 *   sprintf("%B", 123)  #=> "1111011"
1016
 *   sprintf("%#B", 123) #=> "0B1111011"
1017
 *
1018
 *   # '#' for 'e' forces to show the decimal point.
1019
 *   sprintf("%.0e", 1)  #=> "1e+00"
1020
 *   sprintf("%#.0e", 1) #=> "1.e+00"
1021
 *
1022
 *   # '#' for 'f' forces to show the decimal point.
1023
 *   sprintf("%.0f", 1234)  #=> "1234"
1024
 *   sprintf("%#.0f", 1234) #=> "1234."
1025
 *
1026
 *   # '#' for 'g' forces to show the decimal point.
1027
 *   # It also disables stripping lowest zeros.
1028
 *   sprintf("%g", 123.4)   #=> "123.4"
1029
 *   sprintf("%#g", 123.4)  #=> "123.400"
1030
 *   sprintf("%g", 123456)  #=> "123456"
1031
 *   sprintf("%#g", 123456) #=> "123456."
1032
 *
1033
 *  The field width is an optional integer, followed optionally by a
1034
 *  period and a precision.  The width specifies the minimum number of
1035
 *  characters that will be written to the result for this field.
1036
 *
1037
 *  Examples of width:
1038
 *
1039
 *   # padding is done by spaces,       width=20
1040
 *   # 0 or radix-1.             <------------------>
1041
 *   sprintf("%20d", 123)   #=> "                 123"
1042
 *   sprintf("%+20d", 123)  #=> "                +123"
1043
 *   sprintf("%020d", 123)  #=> "00000000000000000123"
1044
 *   sprintf("%+020d", 123) #=> "+0000000000000000123"
1045
 *   sprintf("% 020d", 123) #=> " 0000000000000000123"
1046
 *   sprintf("%-20d", 123)  #=> "123                 "
1047
 *   sprintf("%-+20d", 123) #=> "+123                "
1048
 *   sprintf("%- 20d", 123) #=> " 123                "
1049
 *   sprintf("%020x", -123) #=> "..ffffffffffffffff85"
1050
 *
1051
 *  For
1052
 *  numeric fields, the precision controls the number of decimal places
1053
 *  displayed.  For string fields, the precision determines the maximum
1054
 *  number of characters to be copied from the string.  (Thus, the format
1055
 *  sequence <code>%10.10s</code> will always contribute exactly ten
1056
 *  characters to the result.)
1057
 *
1058
 *  Examples of precisions:
1059
 *
1060
 *   # precision for 'd', 'o', 'x' and 'b' is
1061
 *   # minimum number of digits               <------>
1062
 *   sprintf("%20.8d", 123)  #=> "            00000123"
1063
 *   sprintf("%20.8o", 123)  #=> "            00000173"
1064
 *   sprintf("%20.8x", 123)  #=> "            0000007b"
1065
 *   sprintf("%20.8b", 123)  #=> "            01111011"
1066
 *   sprintf("%20.8d", -123) #=> "           -00000123"
1067
 *   sprintf("%20.8o", -123) #=> "            ..777605"
1068
 *   sprintf("%20.8x", -123) #=> "            ..ffff85"
1069
 *   sprintf("%20.8b", -11)  #=> "            ..110101"
1070
 *
1071
 *   # "0x" and "0b" for '#x' and '#b' is not counted for
1072
 *   # precision but "0" for '#o' is counted.  <------>
1073
 *   sprintf("%#20.8d", 123)  #=> "            00000123"
1074
 *   sprintf("%#20.8o", 123)  #=> "            00000173"
1075
 *   sprintf("%#20.8x", 123)  #=> "          0x0000007b"
1076
 *   sprintf("%#20.8b", 123)  #=> "          0b01111011"
1077
 *   sprintf("%#20.8d", -123) #=> "           -00000123"
1078
 *   sprintf("%#20.8o", -123) #=> "            ..777605"
1079
 *   sprintf("%#20.8x", -123) #=> "          0x..ffff85"
1080
 *   sprintf("%#20.8b", -11)  #=> "          0b..110101"
1081
 *
1082
 *   # precision for 'e' is number of
1083
 *   # digits after the decimal point           <------>
1084
 *   sprintf("%20.8e", 1234.56789) #=> "      1.23456789e+03"
1085
 *
1086
 *   # precision for 'f' is number of
1087
 *   # digits after the decimal point               <------>
1088
 *   sprintf("%20.8f", 1234.56789) #=> "       1234.56789000"
1089
 *
1090
 *   # precision for 'g' is number of
1091
 *   # significant digits                          <------->
1092
 *   sprintf("%20.8g", 1234.56789) #=> "           1234.5679"
1093
 *
1094
 *   #                                         <------->
1095
 *   sprintf("%20.8g", 123456789)  #=> "       1.2345679e+08"
1096
 *
1097
 *   # precision for 's' is
1098
 *   # maximum number of characters                    <------>
1099
 *   sprintf("%20.8s", "string test") #=> "            string t"
1100
 *
1101
 *  Examples:
1102
 *
1103
 *     sprintf("%d %04x", 123, 123)               #=> "123 007b"
1104
 *     sprintf("%08b '%4s'", 123, 123)            #=> "01111011 ' 123'"
1105
 *     sprintf("%1$*2$s %2$d %1$s", "hello", 8)   #=> "   hello 8 hello"
1106
 *     sprintf("%1$*2$s %2$d", "hello", -8)       #=> "hello    -8"
1107
 *     sprintf("%+g:% g:%-g", 1.23, 1.23, 1.23)   #=> "+1.23: 1.23:1.23"
1108
 *     sprintf("%u", -123)                        #=> "-123"
1109
 *
1110
 *  For more complex formatting, Ruby supports a reference by name.
1111
 *  %<name>s style uses format style, but %{name} style doesn't.
1112
 *
1113
 *  Examples:
1114
 *    sprintf("%<foo>d : %<bar>f", { :foo => 1, :bar => 2 })
1115
 *      #=> 1 : 2.000000
1116
 *    sprintf("%{foo}f", { :foo => 1 })
1117
 *      # => "1f"
1118
 */
1119
1120
static mrb_value
1121
mrb_f_sprintf(mrb_state *mrb, mrb_value obj)
1122
0
{
1123
0
  mrb_int argc;
1124
0
  const mrb_value *argv;
1125
1126
0
  mrb_get_args(mrb, "*", &argv, &argc);
1127
1128
0
  if (argc <= 0) {
1129
0
    mrb_raise(mrb, E_ARGUMENT_ERROR, "too few arguments");
1130
0
    return mrb_nil_value();
1131
0
  }
1132
0
  else {
1133
0
    return mrb_str_format(mrb, argc - 1, argv + 1, argv[0]);
1134
0
  }
1135
0
}
1136
1137
void
1138
mrb_mruby_sprintf_gem_init(mrb_state *mrb)
1139
2.05k
{
1140
2.05k
  struct RClass *krn = mrb->kernel_module;
1141
2.05k
  mrb_define_module_function(mrb, krn, "sprintf", mrb_f_sprintf, MRB_ARGS_ANY());
1142
2.05k
  mrb_define_module_function(mrb, krn, "format",  mrb_f_sprintf, MRB_ARGS_ANY());
1143
2.05k
}
1144
1145
void
1146
mrb_mruby_sprintf_gem_final(mrb_state *mrb)
1147
2.05k
{
1148
2.05k
}