Coverage Report

Created: 2026-04-10 07:04

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/MapServer/src/textlayout.c
Line
Count
Source
1
/******************************************************************************
2
 *
3
 * Project:  MapServer
4
 * Purpose:  Text Layout functions, eventually using Harfbuzz and Fribidi/ICU
5
 * Author:   Thomas Bonfort and the MapServer team.
6
 *
7
 ******************************************************************************
8
 * Copyright (c) 1996-2013 Regents of the University of Minnesota.
9
 *
10
 * Permission is hereby granted, free of charge, to any person obtaining a
11
 * copy of this software and associated documentation files (the "Software"),
12
 * to deal in the Software without restriction, including without limitation
13
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
14
 * and/or sell copies of the Software, and to permit persons to whom the
15
 * Software is furnished to do so, subject to the following conditions:
16
 *
17
 * The above copyright notice and this permission notice shall be included in
18
 * all copies of this Software or works derived from this Software.
19
 *
20
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
21
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26
 * DEALINGS IN THE SOFTWARE.
27
 *****************************************************************************/
28
29
#include <float.h>
30
#include "mapserver.h"
31
32
#ifdef USE_ICONV
33
#include <iconv.h>
34
#include <wchar.h>
35
#endif
36
#include "fontcache.h"
37
#include FT_ADVANCES_H
38
#include FT_TYPES_H
39
40
typedef struct {
41
  unsigned int *unicodes;
42
  unsigned int *codepoints;
43
#ifdef USE_FRIBIDI
44
  FriBidiCharType *ctypes;
45
  FriBidiLevel *bidi_levels;
46
#endif
47
#ifdef USE_HARFBUZZ
48
  hb_script_t *scripts;
49
#endif
50
} TextInfo;
51
52
typedef struct {
53
  int offset; /* offset in TextInfo entries where the current run is starting */
54
  int length; /* number of unicode glyphs in this run */
55
#ifdef USE_FRIBIDI
56
  FriBidiLevel rtl;   /* bidi embedding level of run: -1 to skip shaping,
57
                         otherwise if pair:ltr, odd:rtl */
58
  hb_script_t script; /* script: latin, arabic, thai, etc... */
59
#endif
60
  int line_number;
61
  face_element *face; /* font face to use for this run */
62
} text_run;
63
64
#ifdef USE_HARFBUZZ
65
struct _ms_hb_user_data {
66
  text_run *run;
67
  TextInfo *info;
68
  int glyph_size;
69
};
70
71
const char *_ms_script_prefix_en = "en:";
72
const char *_ms_script_prefix_ar = "ar:";
73
const char *_ms_script_prefix_cn = "cn:";
74
const char *_ms_script_prefix_hy = "hy:";
75
const char *_ms_script_prefix_bn = "bn:";
76
const char *_ms_script_prefix_iu = "iu";
77
const char *_ms_script_prefix_chr = "chr:";
78
const char *_ms_script_prefix_cop = "cop:";
79
const char *_ms_script_prefix_ru = "ru:";
80
const char *_ms_script_prefix_hi = "hi:";
81
const char *_ms_script_prefix_ka = "ka:";
82
const char *_ms_script_prefix_el = "el:";
83
const char *_ms_script_prefix_gu = "gu:";
84
const char *_ms_script_prefix_pa = "pa:";
85
const char *_ms_script_prefix_ko = "ko:";
86
const char *_ms_script_prefix_he = "he:";
87
const char *_ms_script_prefix_ja = "ja:";
88
const char *_ms_script_prefix_kn = "kn:";
89
const char *_ms_script_prefix_lo = "lo:";
90
const char *_ms_script_prefix_ml = "ml:";
91
const char *_ms_script_prefix_mn = "mn:";
92
const char *_ms_script_prefix_or = "or:";
93
const char *_ms_script_prefix_syr = "syr:";
94
const char *_ms_script_prefix_ta = "ta:";
95
const char *_ms_script_prefix_te = "te:";
96
const char *_ms_script_prefix_th = "th:";
97
const char *_ms_script_prefix_bo = "bo:";
98
const char *_ms_script_prefix_am = "am:";
99
const char *_ms_script_prefix_km = "km:";
100
const char *_ms_script_prefix_my = "my:";
101
const char *_ms_script_prefix_si = "si:";
102
const char *_ms_script_prefix_dv = "dv:";
103
const char *_ms_script_prefix_bku = "bku:";
104
const char *_ms_script_prefix_hnn = "hnn:";
105
const char *_ms_script_prefix_tl = "tl:";
106
const char *_ms_script_prefix_tbw = "tbw:";
107
const char *_ms_script_prefix_uga = "uga:";
108
const char *_ms_script_prefix_bug = "bug:";
109
const char *_ms_script_prefix_peo = "peo:";
110
const char *_ms_script_prefix_syl = "syl:";
111
const char *_ms_script_prefix_nko = "nko:";
112
113
const char *prefix_from_script(hb_script_t script) {
114
  switch (script) {
115
  case HB_SCRIPT_LATIN:
116
    return _ms_script_prefix_en;
117
  case HB_SCRIPT_ARABIC:
118
    return _ms_script_prefix_ar;
119
  case HB_SCRIPT_HAN:
120
    return _ms_script_prefix_cn;
121
  case HB_SCRIPT_ARMENIAN:
122
    return _ms_script_prefix_hy;
123
  case HB_SCRIPT_BENGALI:
124
    return _ms_script_prefix_bn;
125
  case HB_SCRIPT_CANADIAN_ABORIGINAL:
126
    return _ms_script_prefix_iu;
127
  case HB_SCRIPT_CHEROKEE:
128
    return _ms_script_prefix_chr;
129
  case HB_SCRIPT_COPTIC:
130
    return _ms_script_prefix_cop;
131
  case HB_SCRIPT_CYRILLIC:
132
    return _ms_script_prefix_ru;
133
  case HB_SCRIPT_DEVANAGARI:
134
    return _ms_script_prefix_hi;
135
  case HB_SCRIPT_GEORGIAN:
136
    return _ms_script_prefix_ka;
137
  case HB_SCRIPT_GREEK:
138
    return _ms_script_prefix_el;
139
  case HB_SCRIPT_GUJARATI:
140
    return _ms_script_prefix_gu;
141
  case HB_SCRIPT_GURMUKHI:
142
    return _ms_script_prefix_pa;
143
  case HB_SCRIPT_HANGUL:
144
    return _ms_script_prefix_ko;
145
  case HB_SCRIPT_HEBREW:
146
    return _ms_script_prefix_he;
147
  case HB_SCRIPT_HIRAGANA:
148
    return _ms_script_prefix_ja;
149
  case HB_SCRIPT_KANNADA:
150
    return _ms_script_prefix_kn;
151
  case HB_SCRIPT_KATAKANA:
152
    return _ms_script_prefix_ja;
153
  case HB_SCRIPT_LAO:
154
    return _ms_script_prefix_lo;
155
  case HB_SCRIPT_MALAYALAM:
156
    return _ms_script_prefix_ml;
157
  case HB_SCRIPT_MONGOLIAN:
158
    return _ms_script_prefix_mn;
159
  case HB_SCRIPT_ORIYA:
160
    return _ms_script_prefix_or;
161
  case HB_SCRIPT_SYRIAC:
162
    return _ms_script_prefix_syr;
163
  case HB_SCRIPT_TAMIL:
164
    return _ms_script_prefix_ta;
165
  case HB_SCRIPT_TELUGU:
166
    return _ms_script_prefix_te;
167
  case HB_SCRIPT_THAI:
168
    return _ms_script_prefix_th;
169
  case HB_SCRIPT_TIBETAN:
170
    return _ms_script_prefix_bo;
171
  case HB_SCRIPT_ETHIOPIC:
172
    return _ms_script_prefix_am;
173
  case HB_SCRIPT_KHMER:
174
    return _ms_script_prefix_km;
175
  case HB_SCRIPT_MYANMAR:
176
    return _ms_script_prefix_my;
177
  case HB_SCRIPT_SINHALA:
178
    return _ms_script_prefix_si;
179
  case HB_SCRIPT_THAANA:
180
    return _ms_script_prefix_dv;
181
  case HB_SCRIPT_BUHID:
182
    return _ms_script_prefix_bku;
183
  case HB_SCRIPT_HANUNOO:
184
    return _ms_script_prefix_hnn;
185
  case HB_SCRIPT_TAGALOG:
186
    return _ms_script_prefix_tl;
187
  case HB_SCRIPT_TAGBANWA:
188
    return _ms_script_prefix_tbw;
189
  case HB_SCRIPT_UGARITIC:
190
    return _ms_script_prefix_uga;
191
  case HB_SCRIPT_BUGINESE:
192
    return _ms_script_prefix_bug;
193
  case HB_SCRIPT_OLD_PERSIAN:
194
    return _ms_script_prefix_peo;
195
  case HB_SCRIPT_SYLOTI_NAGRI:
196
    return _ms_script_prefix_syl;
197
  case HB_SCRIPT_NKO:
198
    return _ms_script_prefix_nko;
199
  default:
200
    return NULL;
201
  }
202
}
203
204
hb_feature_t hbfeatures[2] = {{HB_TAG('v', 'e', 'r', 't'), 0, 0, INT_MAX},
205
                              {HB_TAG('k', 'e', 'r', 'n'), 0, 0, INT_MAX}};
206
207
static hb_bool_t _ms_get_glyph_func(hb_font_t *font, void *font_data,
208
                                    hb_codepoint_t unicode,
209
                                    hb_codepoint_t variation_selector,
210
                                    hb_codepoint_t *glyph, void *user_data)
211
212
{
213
  (void)font;
214
  (void)variation_selector;
215
  (void)user_data;
216
  /* first check our run, as we have probably already computed this */
217
  int i;
218
  struct _ms_hb_user_data *ud = font_data;
219
  unsigned int *unicodes = ud->info->unicodes + ud->run->offset;
220
221
  for (i = 0; i < ud->run->length; i++) {
222
    if (unicodes[i] == unicode) {
223
      *glyph = *(ud->info->codepoints + ud->run->offset + i);
224
      return *glyph != 0;
225
    }
226
  }
227
228
  {
229
    FT_Face ft_face = ud->run->face->face;
230
231
#ifdef HAVE_FT_FACE_GETCHARVARIANTINDEX
232
    if ((variation_selector)) {
233
      *glyph =
234
          FT_Face_GetCharVariantIndex(ft_face, unicode, variation_selector);
235
      return *glyph != 0;
236
    }
237
#endif
238
239
    *glyph = FT_Get_Char_Index(ft_face, unicode);
240
    return *glyph != 0;
241
  }
242
}
243
244
#if HB_VERSION_ATLEAST(1, 2, 3)
245
static hb_bool_t _ms_get_nominal_glyph_func(hb_font_t *font, void *font_data,
246
                                            hb_codepoint_t unicode,
247
                                            hb_codepoint_t *glyph,
248
                                            void *user_data) {
249
  return _ms_get_glyph_func(font, font_data, unicode, 0, glyph, user_data);
250
}
251
252
static hb_bool_t _ms_get_variation_glyph_func(hb_font_t *font, void *font_data,
253
                                              hb_codepoint_t unicode,
254
                                              hb_codepoint_t variation_selector,
255
                                              hb_codepoint_t *glyph,
256
                                              void *user_data) {
257
  return _ms_get_glyph_func(font, font_data, unicode, variation_selector, glyph,
258
                            user_data);
259
}
260
#endif
261
262
static hb_position_t _ms_get_glyph_h_advance_func(hb_font_t *font,
263
                                                  void *font_data,
264
                                                  hb_codepoint_t glyph,
265
                                                  void *user_data) {
266
  (void)font;
267
  (void)user_data;
268
  struct _ms_hb_user_data *ud = font_data;
269
  glyph_element *glyphc =
270
      msGetGlyphByIndex(ud->run->face, ud->glyph_size, glyph);
271
  if (!glyphc)
272
    return 0;
273
  return glyphc->metrics.advance * 64;
274
}
275
276
static hb_position_t _ms_get_glyph_v_advance_func(hb_font_t *font,
277
                                                  void *font_data,
278
                                                  hb_codepoint_t glyph,
279
                                                  void *user_data) {
280
  (void)font;
281
  (void)font_data;
282
  (void)glyph;
283
  (void)user_data;
284
  return 0; /* we don't support vertical layouts */
285
}
286
#endif
287
288
static int check_single_font(fontSetObj *fontset, char *fontkey, text_run *run,
289
0
                             TextInfo *glyphs, int ignore_missing) {
290
0
  int i;
291
0
  face_element *fcache = NULL;
292
0
  if (fontset && fontkey) {
293
0
    char *fontkey2 = strchr(fontkey, ':'); /* try skipping prefix */
294
0
    if (fontkey2) {
295
0
      fcache = msGetFontFace(fontkey2 + 1, fontset);
296
0
    }
297
0
  }
298
0
  if (!fcache)
299
0
    fcache = msGetFontFace(fontkey, fontset);
300
0
  run->face = fcache;
301
0
  if (MS_UNLIKELY(!fcache))
302
0
    return MS_FAILURE;
303
0
  for (i = 0; i < run->length; i++) {
304
0
    unsigned int codepoint =
305
0
        msGetGlyphIndex(fcache, glyphs->unicodes[run->offset + i]);
306
0
    if (codepoint || ignore_missing) {
307
0
      if (codepoint == 0) {
308
0
        msDebug("Unable to find glyph for codepoint %u. Using ? as fallback.\n",
309
0
                glyphs->unicodes[run->offset + i]);
310
0
        codepoint = msGetGlyphIndex(fcache, '?');
311
0
      }
312
0
      glyphs->codepoints[run->offset + i] = codepoint;
313
0
    } else
314
0
      return MS_FAILURE;
315
0
  }
316
0
  return MS_SUCCESS;
317
0
}
318
319
static int get_face_for_run(fontSetObj *fontset, char *fontlist, text_run *run,
320
0
                            TextInfo *glyphs) {
321
0
  char *startfont, *endfont;
322
#if defined(USE_HARFBUZZ) && defined(USE_FRIBIDI)
323
  const char *prefix = NULL;
324
#endif
325
326
0
  if (!fontset || !fontlist) {
327
0
    int ok = check_single_font(fontset, fontlist, run, glyphs, 0);
328
0
    (void)ok;
329
0
    return MS_SUCCESS;
330
0
  }
331
332
#if defined(USE_HARFBUZZ) && defined(USE_FRIBIDI)
333
  if (run->rtl >= 0) {
334
    prefix = prefix_from_script(run->script);
335
  } else {
336
    prefix = _ms_script_prefix_en;
337
  }
338
339
  if (prefix) {
340
    /* we'll first look for a font who's prefixed by the current script prefix,
341
     * e.g, given the fontlist "arial,ar:arialuni,cn:cjk" check the "cjk" font
342
     * first for HAN scripts
343
     */
344
    int prefixlen = strlen(prefix);
345
    startfont = fontlist;
346
    for (;;) {
347
      if (!*startfont)
348
        break;
349
      endfont = strchr(startfont, ',');
350
      if (!strncmp(startfont, prefix, prefixlen)) {
351
        startfont += strlen(prefix);
352
        if (endfont)
353
          *endfont = 0;
354
        int ok = check_single_font(fontset, startfont, run, glyphs, 0);
355
        if (endfont) {
356
          *endfont = ',';
357
          if (ok == MS_SUCCESS)
358
            return MS_SUCCESS;
359
          startfont = endfont + 1; /* go to next font in list */
360
        } else {
361
          if (ok == MS_SUCCESS)
362
            return MS_SUCCESS;
363
          break;
364
        }
365
      }
366
      if (endfont)
367
        startfont = endfont + 1;
368
      else
369
        break;
370
    }
371
  }
372
#endif
373
374
  /* no prefix, or prefix search didn't return satisfying result */
375
0
  startfont = fontlist;
376
0
  for (;;) {
377
0
    if (!*startfont)
378
0
      break;
379
0
    endfont = strchr(startfont, ',');
380
0
    if (endfont)
381
0
      *endfont = 0;
382
0
    int ok = check_single_font(fontset, startfont, run, glyphs,
383
0
                               !endfont); /* ignore failing glyphs if we're
384
                                             using the last font in the list */
385
0
    if (endfont) {
386
0
      *endfont = ',';
387
0
      if (ok == MS_SUCCESS)
388
0
        return MS_SUCCESS;
389
0
      startfont = endfont + 1; /* go to next font in list */
390
0
    } else {
391
0
      if (ok == MS_SUCCESS)
392
0
        return MS_SUCCESS;
393
0
      break;
394
0
    }
395
0
  }
396
397
0
  return MS_FAILURE;
398
0
}
399
400
#ifdef USE_HARFBUZZ
401
hb_font_t *get_hb_font(struct _ms_hb_user_data *font_data) {
402
  face_element *fcache = font_data->run->face;
403
  hb_font_element *hbf = fcache->hbfont;
404
  FT_Face face = fcache->face;
405
  int reqsize = MS_NINT(font_data->glyph_size * 96.0 / 72.0);
406
407
  if (reqsize != fcache->face->size->metrics.x_ppem) {
408
    FT_Set_Pixel_Sizes(face, 0, reqsize);
409
  }
410
411
  if (!hbf) {
412
    hbf = msSmallMalloc(sizeof(hb_font_element));
413
    hbf->hbparentfont = hb_ft_font_create(face, NULL);
414
    hbf->hbfont = hb_font_create_sub_font(hbf->hbparentfont);
415
    hbf->funcs = hb_font_funcs_create();
416
    hb_font_funcs_set_glyph_h_advance_func(
417
        hbf->funcs, _ms_get_glyph_h_advance_func, NULL, NULL);
418
#if HB_VERSION_ATLEAST(1, 2, 3)
419
    hb_font_funcs_set_nominal_glyph_func(hbf->funcs, _ms_get_nominal_glyph_func,
420
                                         NULL, NULL);
421
    hb_font_funcs_set_variation_glyph_func(
422
        hbf->funcs, _ms_get_variation_glyph_func, NULL, NULL);
423
#else
424
    hb_font_funcs_set_glyph_func(hbf->funcs, _ms_get_glyph_func, NULL, NULL);
425
#endif
426
    hb_font_funcs_set_glyph_v_advance_func(
427
        hbf->funcs, _ms_get_glyph_v_advance_func, NULL, NULL);
428
    hbf->cursize = reqsize;
429
    fcache->hbfont = hbf;
430
    hb_font_set_funcs(hbf->hbfont, hbf->funcs, font_data, NULL);
431
  } else {
432
    if (hbf->cursize != reqsize) {
433
      hb_font_set_scale(hbf->hbparentfont,
434
                        ((uint64_t)face->size->metrics.x_scale *
435
                         (uint64_t)face->units_per_EM) >>
436
                            16,
437
                        ((uint64_t)face->size->metrics.y_scale *
438
                         (uint64_t)face->units_per_EM) >>
439
                            16);
440
      hb_font_set_ppem(hbf->hbparentfont, face->size->metrics.x_ppem,
441
                       face->size->metrics.y_ppem);
442
      hbf->cursize = reqsize;
443
    }
444
  }
445
  hb_font_set_funcs_data(hbf->hbfont, font_data, NULL);
446
  return hbf->hbfont;
447
}
448
449
/*
450
 *  Return non-zero (true) if the given unicode array contains
451
 *  only ASCII and ISO Latin-1 characters, otherwise return zero.
452
 */
453
int unicode_is_latin1(const unsigned int *unicode, long nglyphs) {
454
  long i;
455
456
  for (i = 0; i < nglyphs; i++) {
457
    if (unicode[i] < 0x2B0)
458
      continue;
459
    return 0;
460
  }
461
  return 1;
462
}
463
464
void get_scripts(unsigned int *cp, int len, hb_script_t *scripts) {
465
  int i;
466
  int backwards_scan = 0;
467
  hb_unicode_funcs_t *ufuncs = hb_unicode_funcs_get_default();
468
  hb_script_t last_script = HB_SCRIPT_UNKNOWN;
469
470
  // determine script (forward scan)
471
  for (i = 0; i < len; i++) {
472
    scripts[i] = hb_unicode_script(ufuncs, cp[i]);
473
474
    // common/inherit codepoints inherit script from context
475
    if (scripts[i] == HB_SCRIPT_COMMON || scripts[i] == HB_SCRIPT_INHERITED) {
476
      // unknown is not a valid context
477
      if (last_script != HB_SCRIPT_UNKNOWN)
478
        scripts[i] = last_script;
479
      else
480
        // do a backwards scan to check if next codepoint
481
        // contains a valid script for context
482
        backwards_scan = 1;
483
    } else {
484
      last_script = scripts[i];
485
    }
486
  }
487
488
  // determine script (backwards scan, if needed)
489
  last_script = HB_SCRIPT_UNKNOWN;
490
  for (i = len - 1; i >= 0 && backwards_scan; i--) {
491
    // common/inherit codepoints inherit script from context
492
    if (scripts[i] == HB_SCRIPT_COMMON || scripts[i] == HB_SCRIPT_INHERITED) {
493
      // unknown script is not a valid context
494
      if (last_script != HB_SCRIPT_UNKNOWN)
495
        scripts[i] = last_script;
496
    } else {
497
      last_script = scripts[i];
498
    }
499
  }
500
}
501
#endif
502
503
/* returns 1 if this is a codepoint we should skip. only checks \r for now */
504
0
static int skip_unicode(unsigned int unicode) {
505
0
  switch (unicode) {
506
0
  case '\r':
507
0
    return 1;
508
0
    break;
509
0
  default:
510
0
    return 0;
511
0
  }
512
0
}
513
514
0
#define MS_RTL_LTR 0
515
0
#define MS_RTL_RTL 1
516
0
#define MS_RTL_MIXED 2
517
518
struct line_desc {
519
  int length;
520
  int rtl;
521
};
522
523
0
int msLayoutTextSymbol(mapObj *map, textSymbolObj *ts, textPathObj *tgret) {
524
0
#define STATIC_GLYPHS 100
525
0
#define STATIC_LINES 10
526
0
  text_run static_runs[STATIC_GLYPHS];
527
0
  int i, nruns, start, ret = MS_SUCCESS;
528
0
  size_t text_num_bytes;
529
0
  char *inp;
530
0
  unsigned int static_unicodes[STATIC_GLYPHS];
531
0
  unsigned int static_codepoints[STATIC_GLYPHS];
532
#ifdef USE_FRIBIDI
533
  FriBidiCharType static_ctypes[STATIC_GLYPHS];
534
  FriBidiLevel static_bidi_levels[STATIC_GLYPHS];
535
#endif
536
#ifdef USE_HARFBUZZ
537
  hb_script_t static_scripts[STATIC_GLYPHS];
538
  hb_buffer_t *buf = NULL;
539
#endif
540
0
  struct line_desc static_line_descs[STATIC_LINES];
541
0
  int alloc_glyphs = 0;
542
0
  struct line_desc *line_descs = NULL;
543
0
  text_run *runs;
544
0
  double oldpeny = 3455, peny,
545
0
         penx = 0; /*oldpeny is set to an unreasonable default initial value */
546
0
  fontSetObj *fontset = NULL;
547
548
0
  TextInfo glyphs;
549
0
  int num_glyphs = 0;
550
551
0
  assert(
552
0
      ts->annotext &&
553
0
      *ts->annotext); /* ensure we have at least one character/glyph to treat */
554
555
0
  if (map)
556
0
    fontset = &map->fontset;
557
    /* go through iconv beforehand, so we know we're handling utf8 */
558
0
#ifdef USE_ICONV
559
0
  if (ts->label->encoding && strcasecmp(ts->label->encoding, "UTF-8")) {
560
0
    iconv_t cd;
561
0
    size_t len, bufleft;
562
0
    char *encoded_text, *outp;
563
0
    len = strlen(ts->annotext);
564
0
    bufleft = len * 6;
565
0
    encoded_text = msSmallMalloc(bufleft + 1);
566
0
    cd = iconv_open("UTF-8", ts->label->encoding);
567
568
0
    if (cd == (iconv_t)-1) {
569
0
      msSetError(MS_IDENTERR, "Encoding not supported by libiconv (%s).",
570
0
                 "msGetEncodedString()", ts->label->encoding);
571
0
      return MS_FAILURE;
572
0
    }
573
574
0
    inp = ts->annotext;
575
0
    outp = encoded_text;
576
577
0
    while (len > 0) {
578
0
      const size_t iconv_status = iconv(cd, &inp, &len, &outp, &bufleft);
579
0
      if (iconv_status == (size_t)-1) {
580
0
        break;
581
0
      }
582
0
    }
583
584
0
    text_num_bytes = outp - encoded_text;
585
0
    encoded_text[text_num_bytes] = 0;
586
0
    free(ts->annotext);
587
0
    ts->annotext = encoded_text;
588
0
    iconv_close(cd);
589
0
  } else
590
0
#endif
591
0
  {
592
0
    text_num_bytes = strlen(ts->annotext);
593
0
  }
594
595
0
  if (text_num_bytes == 0)
596
0
    return 0;
597
598
0
  if (text_num_bytes > STATIC_GLYPHS) {
599
#ifdef USE_FRIBIDI
600
    glyphs.bidi_levels = msSmallMalloc(text_num_bytes * sizeof(FriBidiLevel));
601
    glyphs.ctypes = msSmallMalloc(text_num_bytes * sizeof(FriBidiCharType));
602
#endif
603
0
    glyphs.unicodes = msSmallMalloc(text_num_bytes * sizeof(unsigned int));
604
0
    glyphs.codepoints = msSmallMalloc(text_num_bytes * sizeof(unsigned int));
605
#ifdef USE_HARFBUZZ
606
    glyphs.scripts = msSmallMalloc(text_num_bytes * sizeof(hb_script_t));
607
#endif
608
0
    runs = msSmallMalloc(text_num_bytes * sizeof(text_run));
609
0
  } else {
610
#ifdef USE_FRIBIDI
611
    glyphs.bidi_levels = static_bidi_levels;
612
    glyphs.ctypes = static_ctypes;
613
#endif
614
0
    glyphs.unicodes = static_unicodes;
615
0
    glyphs.codepoints = static_codepoints;
616
#ifdef USE_HARFBUZZ
617
    glyphs.scripts = static_scripts;
618
#endif
619
0
    runs = static_runs;
620
0
  }
621
622
  /* populate the unicode entries once and for all */
623
0
  inp = ts->annotext;
624
0
  while (*inp) {
625
0
    unsigned int unicode;
626
0
    inp += msUTF8ToUniChar(inp, &unicode);
627
0
    if (!skip_unicode(unicode)) {
628
0
      glyphs.unicodes[num_glyphs++] = unicode;
629
0
    }
630
0
  }
631
632
0
  if (ts->label->wrap || ts->label->maxlength > 0) {
633
0
    if (ts->label->wrap && ts->label->maxlength == 0) {
634
0
      for (i = 0; i < num_glyphs; i++) {
635
        /* replace all occurrences of the wrap character with a newline */
636
0
        if (glyphs.unicodes[i] == (unsigned)ts->label->wrap)
637
0
          glyphs.unicodes[i] = '\n';
638
0
      }
639
0
    } else {
640
0
      assert(ts->label->maxlength > 0);
641
0
      if (num_glyphs > ts->label->maxlength) {
642
0
        int num_cur_glyph_on_line =
643
0
            0; /*count for the number of glyphs on the current line*/
644
0
        for (i = 0; i < num_glyphs; i++) {
645
          /* wrap at wrap character or at ZERO WIDTH SPACE (unicode 0x200b), if
646
           * current line is too long */
647
0
          if ((glyphs.unicodes[i] == (unsigned)ts->label->wrap ||
648
0
               glyphs.unicodes[i] == (unsigned)0x200b) &&
649
0
              num_cur_glyph_on_line >= ts->label->maxlength) {
650
0
            glyphs.unicodes[i] = '\n';
651
0
            num_cur_glyph_on_line = 0;
652
0
          } else {
653
0
            num_cur_glyph_on_line++;
654
0
          }
655
0
        }
656
0
      }
657
0
    }
658
    /*
659
     * TODO RFC98: RFC40 negative label->wrap. This is left out for the moment
660
     * as it requires handling a realloc and imho is never used and is an
661
     * overly-complex use-case.
662
     */
663
0
  }
664
665
  /* split our text into runs (one for each line) */
666
0
  nruns = 0;
667
0
  start = 0;
668
0
  runs[0].offset = 0;
669
0
  runs[0].line_number = 0;
670
0
  for (i = 0; i < num_glyphs; i++) {
671
0
    if (glyphs.unicodes[i] != '\n')
672
0
      continue;
673
0
    runs[nruns].length = i - start; /* length of current line (without \n) */
674
0
    start = i + 1;                  /* start of next line */
675
0
    runs[nruns + 1].line_number = runs[nruns].line_number + 1;
676
0
    runs[nruns + 1].offset = start;
677
0
    nruns++;
678
0
  }
679
  /* unless the last glyph was a \n, we need to "close" the last run */
680
0
  if (glyphs.unicodes[num_glyphs - 1] != '\n') {
681
0
    runs[nruns].length = num_glyphs - start;
682
0
    nruns++;
683
0
  }
684
685
0
  if (runs[nruns - 1].line_number + 1 > STATIC_LINES) {
686
0
    line_descs = msSmallMalloc((runs[nruns - 1].line_number + 1) *
687
0
                               sizeof(struct line_desc));
688
0
  } else {
689
0
    line_descs = static_line_descs;
690
0
  }
691
692
#ifdef USE_FRIBIDI
693
  for (i = 0; i < nruns; i++) {
694
    /* check the run (at this stage, one run per line), decide if we need to go
695
     * through bidi and/or shaping */
696
    if (unicode_is_latin1(glyphs.unicodes + runs[i].offset, runs[i].length)) {
697
      runs[i].rtl = -1;
698
      line_descs[i].rtl = MS_RTL_LTR;
699
    } else {
700
      runs[i].rtl = 0;
701
    }
702
  }
703
704
  for (i = 0; i < nruns; i++) {
705
    /* split the text into bidi runs */
706
    if (runs[i].rtl >= 0) {
707
      int j, original_num_glyphs, original_offset;
708
      FriBidiLevel prevlevel;
709
      FriBidiParType dir = FRIBIDI_PAR_LTR;
710
      original_offset = runs[i].offset;
711
      original_num_glyphs = runs[i].length;
712
      fribidi_get_bidi_types(glyphs.unicodes + original_offset, runs[i].length,
713
                             glyphs.ctypes + original_offset);
714
      {
715
        FriBidiLevel level = fribidi_get_par_embedding_levels(
716
            glyphs.ctypes + original_offset, runs[i].length, &dir,
717
            glyphs.bidi_levels + runs[i].offset);
718
        (void)level;
719
      }
720
      /* if we have different embedding levels, create a run for each one */
721
      runs[i].rtl = prevlevel = glyphs.bidi_levels[original_offset];
722
      line_descs[runs[i].line_number].rtl =
723
          (prevlevel % 2) ? MS_RTL_RTL : MS_RTL_LTR;
724
      for (j = 1; j < original_num_glyphs; j++) {
725
        if (glyphs.bidi_levels[original_offset + j] != prevlevel) {
726
          line_descs[runs[i].line_number].rtl = MS_RTL_MIXED;
727
          /* create a new run for the different embedding level */
728
          nruns++;
729
730
          /* first move remaining runs */
731
          memmove(runs + i + 2, runs + i + 1,
732
                  (nruns - i - 2) * sizeof(text_run));
733
734
          i++;
735
          /* new run inherints line number */
736
          runs[i].line_number = runs[i - 1].line_number;
737
          runs[i].offset = original_offset + j;
738
          runs[i].length =
739
              (runs[i - 1].offset + runs[i - 1].length) - runs[i].offset;
740
          runs[i - 1].length = runs[i].offset - runs[i - 1].offset;
741
742
          /* new run starts at current position */
743
          runs[i].rtl = prevlevel = glyphs.bidi_levels[original_offset + j];
744
        }
745
      }
746
    }
747
  }
748
#else
749
0
  for (i = 0; i < nruns; i++) {
750
0
    line_descs[i].rtl = MS_RTL_LTR;
751
0
  }
752
0
#endif
753
754
#ifdef USE_FRIBIDI
755
  /* determine the scripts of each run, and split again into runs with identical
756
   * script */
757
  for (i = 0; i < nruns; i++) {
758
    if (runs[i].rtl == -1) {
759
      runs[i].script = HB_SCRIPT_LATIN;
760
      continue; /* skip runs we have determined we are latin (no shaping needed)
761
                 */
762
    } else {
763
      int j, original_num_glyphs, original_offset;
764
      hb_script_t prevscript;
765
      original_offset = runs[i].offset;
766
      original_num_glyphs = runs[i].length;
767
      get_scripts(glyphs.unicodes + original_offset, runs[i].length,
768
                  glyphs.scripts + original_offset);
769
      /* if we have different scripts, create a run for each one */
770
      runs[i].script = prevscript = glyphs.scripts[original_offset];
771
      for (j = 1; j < original_num_glyphs; j++) {
772
        if (glyphs.scripts[original_offset + j] != prevscript) {
773
          /* create a new run for the different embedding level */
774
          nruns++;
775
776
          /* first move remaining runs */
777
          memmove(runs + i + 2, runs + i + 1,
778
                  (nruns - i - 2) * sizeof(text_run));
779
780
          i++;
781
          /* new run inherints line number and rtl*/
782
          runs[i].line_number = runs[i - 1].line_number;
783
          runs[i].rtl = runs[i - 1].rtl;
784
          runs[i].offset = original_offset + j;
785
          runs[i].length =
786
              (runs[i - 1].offset + runs[i - 1].length) - runs[i].offset;
787
          runs[i - 1].length = runs[i].offset - runs[i - 1].offset;
788
789
          runs[i].script = prevscript = glyphs.scripts[original_offset + j];
790
        }
791
      }
792
    }
793
  }
794
#endif
795
796
0
  for (i = 0; i < nruns; i++) {
797
0
    ret = get_face_for_run(fontset, ts->label->font, runs + i, &glyphs);
798
0
    if (MS_UNLIKELY(ret == MS_FAILURE))
799
0
      goto cleanup;
800
0
  }
801
802
  /*
803
   * determine the font face to use for a given run. No splitting needed here
804
   * for now, as we suppose that the decomposition of each run into individual
805
   * bidi direction and script level is sufficient to ensure that a given run
806
   * can be represented by a single font (i.e. there's no need to look into
807
   * multiple fonts to find the glyphs of the run)
808
   */
809
810
0
  tgret->numlines = runs[nruns - 1].line_number + 1;
811
0
  tgret->bounds.bbox.minx = 0;
812
0
  tgret->bounds.bbox.miny = FLT_MAX;
813
0
  tgret->bounds.bbox.maxx = tgret->bounds.bbox.maxy = -FLT_MAX;
814
815
0
  for (i = 0; i < nruns; i++) {
816
0
    if (!runs[i].face)
817
0
      continue;
818
0
    peny = (1 - tgret->numlines + runs[i].line_number) * tgret->line_height;
819
0
    if (peny != oldpeny) {
820
0
      if (i > 0)
821
0
        line_descs[runs[i - 1].line_number].length = penx;
822
0
      if (penx > tgret->bounds.bbox.maxx)
823
0
        tgret->bounds.bbox.maxx = penx;
824
0
      oldpeny = peny;
825
0
      penx = 0;
826
0
    }
827
#if defined(USE_HARFBUZZ) && defined(USE_FRIBIDI)
828
    if (runs[i].rtl == -1 || runs[i].script == HB_SCRIPT_LATIN ||
829
        runs[i].script == HB_SCRIPT_COMMON)
830
#endif
831
0
    {
832
      /* use our basic shaper */
833
0
      unsigned int *codepoint = glyphs.codepoints + runs[i].offset;
834
0
      alloc_glyphs += runs[i].length;
835
0
      tgret->glyphs =
836
0
          msSmallRealloc(tgret->glyphs, alloc_glyphs * sizeof(glyphObj));
837
0
      for (int j = 0; j < runs[i].length; j++) {
838
0
        glyphObj *g = &tgret->glyphs[tgret->numglyphs + j];
839
0
        g->glyph =
840
0
            msGetGlyphByIndex(runs[i].face, tgret->glyph_size, *codepoint);
841
0
        g->face = runs[i].face;
842
0
        codepoint++;
843
0
        g->pnt.x = penx;
844
0
        g->pnt.y = peny;
845
0
        g->rot = 0.0;
846
0
        penx += g->glyph->metrics.advance;
847
0
        if (runs[i].line_number == 0 &&
848
0
            peny - g->glyph->metrics.maxy <
849
0
                tgret->bounds.bbox
850
0
                    .miny) /*compute minimal y, only for the first line */
851
0
          tgret->bounds.bbox.miny = peny - g->glyph->metrics.maxy;
852
0
        if (peny - g->glyph->metrics.miny > tgret->bounds.bbox.maxy)
853
0
          tgret->bounds.bbox.maxy = peny - g->glyph->metrics.miny;
854
0
      }
855
#if defined(USE_HARFBUZZ) && defined(USE_FRIBIDI)
856
    } else {
857
      struct _ms_hb_user_data user_data;
858
      hb_font_t *font;
859
      hb_glyph_info_t *glyph_info;
860
      hb_glyph_position_t *glyph_pos;
861
      if (!buf) {
862
        buf = hb_buffer_create();
863
      }
864
      user_data.info = &glyphs;
865
      user_data.run = runs + i;
866
      user_data.glyph_size = tgret->glyph_size;
867
      hb_buffer_clear_contents(buf);
868
      hb_buffer_set_script(buf, runs[i].script);
869
      font = get_hb_font(&user_data);
870
      hb_buffer_set_direction(buf, (runs[i].rtl % 2) ? HB_DIRECTION_RTL
871
                                                     : HB_DIRECTION_LTR);
872
      hb_buffer_add_utf32(buf, glyphs.unicodes + runs[i].offset, runs[i].length,
873
                          0, runs[i].length);
874
      hb_shape(font, buf, hbfeatures, 2);
875
876
      unsigned int glyph_count;
877
      glyph_info = hb_buffer_get_glyph_infos(buf, &glyph_count);
878
      glyph_pos = hb_buffer_get_glyph_positions(buf, &glyph_count);
879
      alloc_glyphs += glyph_count;
880
      tgret->glyphs =
881
          msSmallRealloc(tgret->glyphs, alloc_glyphs * sizeof(glyphObj));
882
      for (unsigned j = 0; j < glyph_count; j++) {
883
        glyphObj *g = &tgret->glyphs[tgret->numglyphs + j];
884
        g->glyph = msGetGlyphByIndex(runs[i].face, tgret->glyph_size,
885
                                     glyph_info[j].codepoint);
886
        g->face = runs[i].face;
887
        g->pnt.x = penx + glyph_pos[j].x_offset / 64;
888
        g->pnt.y = peny - glyph_pos[j].y_offset / 64;
889
        g->rot = 0;
890
        penx += glyph_pos[j].x_advance / 64.0;
891
        /* peny -= glyph_pos[j].y_advance/64; // we don't do vertical layouts */
892
        if (runs[i].line_number == 0 &&
893
            peny - g->glyph->metrics.maxy <
894
                tgret->bounds.bbox
895
                    .miny) /*compute minimal y, only for the first line */
896
          tgret->bounds.bbox.miny = peny - g->glyph->metrics.maxy;
897
        if (peny - g->glyph->metrics.miny > tgret->bounds.bbox.maxy)
898
          tgret->bounds.bbox.maxy = peny - g->glyph->metrics.miny;
899
      }
900
#endif
901
0
    }
902
0
    tgret->numglyphs = alloc_glyphs;
903
0
    line_descs[runs[nruns - 1].line_number].length = penx;
904
0
    if (penx > tgret->bounds.bbox.maxx)
905
0
      tgret->bounds.bbox.maxx = penx;
906
0
  }
907
908
#ifdef USE_HARFBUZZ
909
  if (buf) {
910
    hb_buffer_destroy(buf);
911
  }
912
#endif
913
914
0
  if (tgret->numlines > 1) {
915
0
    int max_line_length = 0;
916
0
    int line = -1;
917
0
    double cur_line_offset = 0;
918
0
    int prev_default_align =
919
0
        MS_ALIGN_LEFT; /* if we have mixed rtl status, use the alignment of the
920
                          previous line. this defaults to left-alignment if the
921
                          first line is mixed */
922
0
    int cur_default_align = 0;
923
0
    for (i = 0; i < tgret->numlines; i++) {
924
0
      if (line_descs[i].length > max_line_length) {
925
0
        max_line_length = line_descs[i].length;
926
0
      }
927
0
    }
928
0
    oldpeny = 3455;
929
0
    for (i = 0; i < alloc_glyphs; i++) {
930
0
      if (tgret->glyphs[i].pnt.y != oldpeny) {
931
0
        oldpeny = tgret->glyphs[i].pnt.y;
932
0
        line++;
933
        /* compute offset to apply to coming line */
934
0
        switch (ts->label->align) {
935
0
        case MS_ALIGN_CENTER:
936
0
          cur_line_offset = (max_line_length - line_descs[line].length) / 2.0;
937
0
          break;
938
0
        case MS_ALIGN_RIGHT:
939
0
          cur_line_offset = (max_line_length - line_descs[line].length);
940
0
          break;
941
0
        case MS_ALIGN_LEFT:
942
0
          cur_line_offset = 0;
943
0
          break;
944
0
        case MS_ALIGN_DEFAULT:
945
0
        default:
946
0
          switch (line_descs[line].rtl) {
947
0
          case MS_RTL_MIXED:
948
0
            cur_default_align = prev_default_align;
949
0
            break;
950
0
          case MS_RTL_RTL:
951
0
            cur_default_align = prev_default_align = MS_RTL_RTL;
952
0
            break;
953
0
          case MS_RTL_LTR:
954
0
            cur_default_align = prev_default_align = MS_RTL_LTR;
955
0
            break;
956
0
          }
957
0
          switch (cur_default_align) {
958
0
          case MS_RTL_RTL:
959
            /* align to the right */
960
0
            cur_line_offset = (max_line_length - line_descs[line].length);
961
0
            break;
962
0
          case MS_RTL_LTR:
963
0
            cur_line_offset = 0;
964
0
            break;
965
0
          }
966
0
        }
967
0
      }
968
0
      tgret->glyphs[i].pnt.x += cur_line_offset;
969
0
    }
970
0
  }
971
  /*
972
   * msDebug("bounds for %s: %f %f %f
973
   * %f\n",ts->annotext,tgret->bounds.bbox.minx,tgret->bounds.bbox.miny,tgret->bounds.bbox.maxx,tgret->bounds.bbox.maxy);
974
   */
975
976
0
cleanup:
977
0
  if (line_descs != static_line_descs)
978
0
    free(line_descs);
979
0
  if (glyphs.codepoints != static_codepoints) {
980
#ifdef USE_FRIBIDI
981
    free(glyphs.bidi_levels);
982
    free(glyphs.ctypes);
983
#endif
984
0
    free(glyphs.codepoints);
985
#ifdef USE_HARFBUZZ
986
    free(glyphs.scripts);
987
#endif
988
0
    free(glyphs.unicodes);
989
0
    free(runs);
990
0
  }
991
0
  return ret;
992
0
}