Coverage Report

Created: 2025-11-16 09:57

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libreoffice/lingucomponent/source/hyphenator/hyphen/hyphenimp.cxx
Line
Count
Source
1
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
/*
3
 * This file is part of the LibreOffice project.
4
 *
5
 * This Source Code Form is subject to the terms of the Mozilla Public
6
 * License, v. 2.0. If a copy of the MPL was not distributed with this
7
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8
 *
9
 * This file incorporates work covered by the following license notice:
10
 *
11
 *   Licensed to the Apache Software Foundation (ASF) under one or more
12
 *   contributor license agreements. See the NOTICE file distributed
13
 *   with this work for additional information regarding copyright
14
 *   ownership. The ASF licenses this file to you under the Apache
15
 *   License, Version 2.0 (the "License"); you may not use this file
16
 *   except in compliance with the License. You may obtain a copy of
17
 *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18
 */
19
20
#include <com/sun/star/uno/Reference.h>
21
22
#include <comphelper/lok.hxx>
23
#include <comphelper/sequence.hxx>
24
#include <comphelper/processfactory.hxx>
25
#include <cppuhelper/factory.hxx>
26
#include <cppuhelper/supportsservice.hxx>
27
#include <cppuhelper/weak.hxx>
28
#include <com/sun/star/linguistic2/XLinguProperties.hpp>
29
#include <com/sun/star/linguistic2/LinguServiceManager.hpp>
30
#include <com/sun/star/linguistic2/XSpellChecker1.hpp>
31
#include <i18nlangtag/languagetag.hxx>
32
#include <tools/debug.hxx>
33
#include <osl/mutex.hxx>
34
#include <osl/thread.h>
35
36
#include <hyphen.h>
37
#include "hyphenimp.hxx"
38
39
#include <linguistic/hyphdta.hxx>
40
#include <rtl/ustring.hxx>
41
#include <rtl/ustrbuf.hxx>
42
#include <rtl/textenc.h>
43
#include <sal/log.hxx>
44
45
#include <linguistic/misc.hxx>
46
#include <svtools/strings.hrc>
47
#include <unotools/charclass.hxx>
48
#include <unotools/lingucfg.hxx>
49
#include <unotools/resmgr.hxx>
50
#include <osl/file.hxx>
51
52
#include <stdio.h>
53
#include <string.h>
54
55
#include <cassert>
56
#include <numeric>
57
#include <vector>
58
#include <set>
59
#include <memory>
60
#include <o3tl/string_view.hxx>
61
62
// XML-header to query SPELLML support
63
constexpr OUStringLiteral SPELLML_SUPPORT = u"<?xml?>";
64
65
using namespace osl;
66
using namespace com::sun::star;
67
using namespace com::sun::star::beans;
68
using namespace com::sun::star::lang;
69
using namespace com::sun::star::uno;
70
using namespace com::sun::star::linguistic2;
71
using namespace linguistic;
72
73
static uno::Reference< XLinguServiceManager2 > GetLngSvcMgr_Impl()
74
0
{
75
0
    const uno::Reference< XComponentContext >& xContext( comphelper::getProcessComponentContext() );
76
0
    uno::Reference< XLinguServiceManager2 > xRes = LinguServiceManager::create( xContext ) ;
77
0
    return xRes;
78
0
}
79
80
Hyphenator::Hyphenator() :
81
0
    aEvtListeners   ( GetLinguMutex() )
82
0
{
83
0
    bDisposing = false;
84
0
}
85
86
Hyphenator::~Hyphenator()
87
0
{
88
0
    for (auto & rInfo : mvDicts)
89
0
    {
90
0
        if (rInfo.aPtr)
91
0
            hnj_hyphen_free(rInfo.aPtr);
92
0
    }
93
94
0
    if (pPropHelper)
95
0
    {
96
0
        pPropHelper->RemoveAsPropListener();
97
0
    }
98
0
}
99
100
PropertyHelper_Hyphenation& Hyphenator::GetPropHelper_Impl()
101
0
{
102
0
    if (!pPropHelper)
103
0
    {
104
0
        Reference< XLinguProperties >   xPropSet = GetLinguProperties();
105
106
0
        pPropHelper.reset( new PropertyHelper_Hyphenation (static_cast<XHyphenator *>(this), xPropSet ) );
107
0
        pPropHelper->AddAsPropListener();   //! after a reference is established
108
0
    }
109
0
    return *pPropHelper;
110
0
}
111
112
// Requires GetLinguMutex locked
113
void Hyphenator::ensureLocales()
114
0
{
115
    // this routine should return the locales supported by the installed
116
    // dictionaries.
117
0
    if (mvDicts.empty())
118
0
    {
119
0
        SvtLinguConfig aLinguCfg;
120
121
        // get list of dictionaries-to-use
122
        // (or better speaking: the list of dictionaries using the
123
        // new configuration entries).
124
0
        std::vector< SvtLinguConfigDictionaryEntry > aDics;
125
0
        uno::Sequence< OUString > aFormatList;
126
0
        aLinguCfg.GetSupportedDictionaryFormatsFor( u"Hyphenators"_ustr,
127
0
                u"org.openoffice.lingu.LibHnjHyphenator"_ustr, aFormatList );
128
0
        for (const auto& rFormat : aFormatList)
129
0
        {
130
0
            std::vector< SvtLinguConfigDictionaryEntry > aTmpDic(
131
0
                    aLinguCfg.GetActiveDictionariesByFormat( rFormat ) );
132
0
            aDics.insert( aDics.end(), aTmpDic.begin(), aTmpDic.end() );
133
0
        }
134
135
        //!! for compatibility with old dictionaries (the ones not using extensions
136
        //!! or new configuration entries, but still using the dictionary.lst file)
137
        //!! Get the list of old style spell checking dictionaries to use...
138
0
        std::vector< SvtLinguConfigDictionaryEntry > aOldStyleDics(
139
0
                GetOldStyleDics( "HYPH" ) );
140
141
        // to prefer dictionaries with configuration entries we will only
142
        // use those old style dictionaries that add a language that
143
        // is not yet supported by the list of new style dictionaries
144
0
        MergeNewStyleDicsAndOldStyleDics( aDics, aOldStyleDics );
145
146
0
        if (!aDics.empty())
147
0
        {
148
            // get supported locales from the dictionaries-to-use...
149
0
            std::set<OUString> aLocaleNamesSet;
150
0
            for (auto const& dict : aDics)
151
0
            {
152
0
                for (const auto& rLocaleName : dict.aLocaleNames)
153
0
                {
154
0
                    if (!comphelper::LibreOfficeKit::isAllowlistedLanguage(rLocaleName))
155
0
                        continue;
156
0
                    aLocaleNamesSet.insert( rLocaleName );
157
0
                }
158
0
            }
159
            // ... and add them to the resulting sequence
160
0
            std::vector<Locale> aLocalesVec;
161
0
            aLocalesVec.reserve(aLocaleNamesSet.size());
162
163
0
            std::transform(aLocaleNamesSet.begin(), aLocaleNamesSet.end(), std::back_inserter(aLocalesVec),
164
0
                [](const OUString& localeName) { return LanguageTag::convertToLocale(localeName); });
165
166
0
            aSuppLocales = comphelper::containerToSequence(aLocalesVec);
167
168
            //! For each dictionary and each locale we need a separate entry.
169
            //! If this results in more than one dictionary per locale than (for now)
170
            //! it is undefined which dictionary gets used.
171
            //! In the future the implementation should support using several dictionaries
172
            //! for one locale.
173
0
            sal_Int32 numdict = std::accumulate(aDics.begin(), aDics.end(), 0,
174
0
                [](const sal_Int32 nSum, const SvtLinguConfigDictionaryEntry& dict) {
175
0
                    return nSum + dict.aLocaleNames.getLength(); });
176
177
            // add dictionary information
178
0
            mvDicts.resize(numdict);
179
180
0
            sal_Int32 k = 0;
181
0
            for (auto const& dict :  aDics)
182
0
            {
183
0
                if (dict.aLocaleNames.hasElements() &&
184
0
                    dict.aLocations.hasElements())
185
0
                {
186
                    // currently only one language per dictionary is supported in the actual implementation...
187
                    // Thus here we work-around this by adding the same dictionary several times.
188
                    // Once for each of its supported locales.
189
0
                    for (const auto& rLocaleName : dict.aLocaleNames)
190
0
                    {
191
0
                        LanguageTag aLanguageTag(rLocaleName);
192
0
                        mvDicts[k].aPtr = nullptr;
193
0
                        mvDicts[k].eEnc = RTL_TEXTENCODING_DONTKNOW;
194
0
                        mvDicts[k].aLoc = aLanguageTag.getLocale();
195
0
                        mvDicts[k].apCC.reset( new CharClass( std::move(aLanguageTag) ) );
196
                        // also both files have to be in the same directory and the
197
                        // file names must only differ in the extension (.aff/.dic).
198
                        // Thus we use the first location only and strip the extension part.
199
0
                        OUString aLocation = dict.aLocations[0];
200
0
                        sal_Int32 nPos = aLocation.lastIndexOf( '.' );
201
0
                        aLocation = aLocation.copy( 0, nPos );
202
0
                        mvDicts[k].aName = aLocation;
203
204
0
                        ++k;
205
0
                    }
206
0
                }
207
0
            }
208
0
            DBG_ASSERT( k == numdict, "index mismatch?" );
209
0
        }
210
0
        else
211
0
        {
212
            // no dictionary found so register no dictionaries
213
0
            mvDicts.clear();
214
0
            aSuppLocales.realloc(0);
215
0
        }
216
0
    }
217
0
}
218
219
Sequence< Locale > SAL_CALL Hyphenator::getLocales()
220
0
{
221
0
    MutexGuard aGuard(GetLinguMutex());
222
0
    ensureLocales();
223
0
    return aSuppLocales;
224
0
}
225
226
sal_Bool SAL_CALL Hyphenator::hasLocale(const Locale& rLocale)
227
0
{
228
0
    MutexGuard  aGuard( GetLinguMutex() );
229
0
    ensureLocales();
230
0
    return comphelper::findValue(aSuppLocales, rLocale) != -1;
231
0
}
232
233
namespace {
234
bool LoadDictionary(HDInfo& rDict)
235
0
{
236
0
    OUString DictFN = rDict.aName + ".dic";
237
0
    OUString dictpath;
238
239
0
    osl::FileBase::getSystemPathFromFileURL(DictFN, dictpath);
240
241
#if defined(_WIN32)
242
    // hnj_hyphen_load expects UTF-8 encoded paths with \\?\ long path prefix.
243
    OString sTmp = Win_AddLongPathPrefix(OUStringToOString(dictpath, RTL_TEXTENCODING_UTF8));
244
#else
245
0
    OString sTmp(OU2ENC(dictpath, osl_getThreadTextEncoding()));
246
0
#endif
247
0
    HyphenDict* dict = hnj_hyphen_load(sTmp.getStr());
248
0
    if (!dict)
249
0
    {
250
0
        SAL_WARN(
251
0
            "lingucomponent",
252
0
            "Couldn't find file " << dictpath);
253
0
        return false;
254
0
    }
255
0
    rDict.aPtr = dict;
256
0
    rDict.eEnc = getTextEncodingFromCharset(dict->cset);
257
0
    return true;
258
0
}
259
260
OUString makeLowerCase(const OUString& aTerm, CharClass const* pCC)
261
0
{
262
0
    if (pCC)
263
0
        return pCC->lowercase(aTerm);
264
0
    return aTerm;
265
0
}
266
267
OUString makeUpperCase(const OUString& aTerm, CharClass const* pCC)
268
0
{
269
0
    if (pCC)
270
0
        return pCC->uppercase(aTerm);
271
0
    return aTerm;
272
0
}
273
274
OUString makeInitCap(const OUString& aTerm, CharClass const* pCC)
275
0
{
276
0
    sal_Int32 tlen = aTerm.getLength();
277
0
    if (pCC && tlen)
278
0
    {
279
0
        OUString bTemp = aTerm.copy(0, 1);
280
0
        if (tlen > 1)
281
0
            return (pCC->uppercase(bTemp, 0, 1) + pCC->lowercase(aTerm, 1, (tlen - 1)));
282
283
0
        return pCC->uppercase(bTemp, 0, 1);
284
0
    }
285
0
    return aTerm;
286
0
}
287
288
struct hyphenation_result
289
{
290
    int n = 0;
291
    bool failed = true;
292
    char** rep = nullptr; // replacements of discretionary hyphenation
293
    int* pos = nullptr; // array of [hyphenation point] minus [deletion position]
294
    int* cut = nullptr; // length of deletions in original word
295
    std::unique_ptr<char[]> hyphens;
296
297
    ~hyphenation_result()
298
0
    {
299
0
        if (rep)
300
0
        {
301
0
            for (int i = 0; i < n; i++)
302
0
            {
303
0
                if (rep[i])
304
0
                    free(rep[i]);
305
0
            }
306
0
            free(rep);
307
0
        }
308
0
        if (pos)
309
0
            free(pos);
310
0
        if (cut)
311
0
            free(cut);
312
0
    }
313
};
314
315
hyphenation_result getHyphens(std::u16string_view word, const HDInfo& hdInfo, sal_Int16 minLead,
316
                              sal_Int16 minTrail)
317
0
{
318
    // first convert any smart quotes or apostrophes to normal ones
319
0
    OUStringBuffer aBuf(word);
320
0
    for (sal_Int32 ix = 0; ix < aBuf.getLength(); ix++)
321
0
    {
322
0
        sal_Unicode ch = aBuf[ix];
323
0
        if ((ch == 0x201C) || (ch == 0x201D))
324
0
            aBuf[ix] = u'"';
325
0
        if ((ch == 0x2018) || (ch == 0x2019))
326
0
            aBuf[ix] = u'\'';
327
0
    }
328
329
    // now convert word to all lowercase for pattern recognition
330
0
    OUString nTerm(makeLowerCase(OUString::unacquired(aBuf), hdInfo.apCC.get()));
331
332
    // now convert word to needed encoding
333
0
    OString encWord(OU2ENC(nTerm, hdInfo.eEnc));
334
335
    // now strip off any ending periods
336
0
    auto lastValidPos = std::string_view(encWord).find_last_not_of('.');
337
0
    if (lastValidPos == std::string_view::npos)
338
0
        return {};
339
340
0
    int n = lastValidPos + 1;
341
0
    std::unique_ptr<char[]> hyphens(new char[n + 5]);
342
0
    char** rep = nullptr; // replacements of discretionary hyphenation
343
0
    int* pos = nullptr; // array of [hyphenation point] minus [deletion position]
344
0
    int* cut = nullptr; // length of deletions in original word
345
346
0
    HyphenDict* dict = hdInfo.aPtr;
347
0
    const bool failed = 0 != hnj_hyphen_hyphenate3( dict, encWord.getStr(), n, hyphens.get(), nullptr,
348
0
                &rep, &pos, &cut, minLead, minTrail,
349
0
                std::max<sal_Int16>(dict->clhmin, 2) + std::max(0, minLead  - std::max<sal_Int16>(dict->lhmin, 2)),
350
0
                std::max<sal_Int16>(dict->crhmin, 2) + std::max(0, minTrail - std::max<sal_Int16>(dict->rhmin, 2)) );
351
0
    return { n, failed, rep, pos, cut, std::move(hyphens) }; // buffers will free in dtor
352
0
}
353
}
354
355
const HDInfo* Hyphenator::getMatchingDict(const css::lang::Locale& aLocale)
356
0
{
357
0
    MutexGuard aGuard(GetLinguMutex());
358
0
    ensureLocales();
359
0
    auto it = std::find_if(mvDicts.rbegin(), mvDicts.rend(),
360
0
                           [&aLocale](auto& el) { return el.aLoc == aLocale; });
361
0
    if (it == mvDicts.rend())
362
0
        return nullptr;
363
364
    // if this dictionary has not been loaded yet do that
365
0
    if (!it->aPtr)
366
0
    {
367
0
        if (!LoadDictionary(*it))
368
0
            return nullptr;
369
0
    }
370
371
    // we don't want to work with a default text encoding since following incorrect
372
    // results may occur only for specific text and thus may be hard to notice.
373
    // Thus better always make a clean exit here if the text encoding is in question.
374
    // Hopefully something not working at all will raise proper attention quickly. ;-)
375
0
    DBG_ASSERT(it->eEnc != RTL_TEXTENCODING_DONTKNOW,
376
0
               "failed to get text encoding! (maybe incorrect encoding string in file)");
377
0
    if (it->eEnc == RTL_TEXTENCODING_DONTKNOW)
378
0
        return nullptr;
379
380
0
    return &*it;
381
0
}
382
383
Reference< XHyphenatedWord > SAL_CALL Hyphenator::hyphenate( const OUString& aWord,
384
       const css::lang::Locale& aLocale,
385
       sal_Int16 nMaxLeading,
386
       const css::uno::Sequence< css::beans::PropertyValue >& aProperties )
387
0
{
388
0
    PropertyHelper_Hyphenation& rHelper = GetPropHelper();
389
0
    rHelper.SetTmpPropVals(aProperties);
390
0
    sal_Int16 minTrail = rHelper.GetMinTrailing();
391
0
    sal_Int16 minLead = rHelper.GetMinLeading();
392
0
    sal_Int16 minCompoundLead = rHelper.GetCompoundMinLeading();
393
0
    sal_Int16 minLen = rHelper.GetMinWordLength();
394
0
    bool bNoHyphenateCaps = rHelper.IsNoHyphenateCaps();
395
396
    // if we have a hyphenation dictionary matching this locale
397
0
    if (auto pHDInfo = getMatchingDict(aLocale))
398
0
    {
399
0
        int nHyphenationPos = -1;
400
0
        int nHyphenationPosAlt = -1;
401
0
        int nHyphenationPosAltHyph = -1;
402
403
        // hyphenate the word with that dictionary
404
0
        rtl_TextEncoding eEnc = pHDInfo->eEnc;
405
0
        CharClass* pCC = pHDInfo->apCC.get();
406
407
        // Don't hyphenate uppercase words if requested
408
0
        if (bNoHyphenateCaps && aWord == makeUpperCase(aWord, pCC))
409
0
        {
410
0
            return nullptr;
411
0
        }
412
413
0
        CapType ct = capitalType(aWord, pCC);
414
415
0
        auto result = getHyphens(aWord, *pHDInfo, minLead, minTrail);
416
0
        if (result.failed)
417
0
            return nullptr;
418
419
0
        sal_Int32 Leading =  GetPosInWordToCheck( aWord, nMaxLeading );
420
421
        // use morphological analysis of Hunspell to get better hyphenation of compound words
422
        // optionally when hyphenation zone is enabled
423
        // pa: fields contain stems resulted by compound word analysis of non-dictionary words
424
        // hy: fields contain hyphenation data of dictionary (compound) words
425
0
        Reference< XSpellAlternatives > xTmpRes;
426
0
        bool bAnalyzed = false; // enough the analyse once the word
427
0
        bool bCompoundHyphenation = true; // try to hyphenate compound words better
428
0
        OUString sStems; // processed result of the compound word analysis, e.g. com|pound|word
429
0
        sal_Int32 nSuffixLen = 0; // do not remove break points in suffixes
430
431
0
        for (sal_Int32 i = 0; i < result.n; i++)
432
0
        {
433
0
            int leftrep = 0;
434
0
            bool hit = (result.n >= minLen);
435
0
            if (!result.rep || !result.rep[i])
436
0
            {
437
0
                hit = hit && (result.hyphens[i] & 1) && (i < Leading);
438
0
                hit = hit && (i >= (minLead-1) );
439
0
                hit = hit && ((result.n - i - 1) >= minTrail);
440
0
            }
441
0
            else
442
0
            {
443
                // calculate change character length before hyphenation point signed with '='
444
0
                for (char * c = result.rep[i]; *c && (*c != '='); c++)
445
0
                {
446
0
                    if (eEnc == RTL_TEXTENCODING_UTF8)
447
0
                    {
448
0
                        if (static_cast<unsigned char>(*c) >> 6 != 2)
449
0
                            leftrep++;
450
0
                    }
451
0
                    else
452
0
                        leftrep++;
453
0
                }
454
0
                hit = hit && (result.hyphens[i] & 1) && ((i + leftrep - result.pos[i]) < Leading);
455
0
                hit = hit && ((i + leftrep - result.pos[i]) >= (minLead-1) );
456
0
                hit = hit && ((result.n - i - 1 + sal::static_int_cast< sal_sSize >(strlen(result.rep[i])) - leftrep - 1) >= minTrail);
457
0
            }
458
0
            if (hit)
459
0
            {
460
                // skip hyphenation right after stem boundaries in compound words
461
                // if minCompoundLead > 2 (default value: less than n=minCompoundLead character distance)
462
0
                if ( bCompoundHyphenation && minCompoundLead > 2 && nHyphenationPos > -1 && i - nHyphenationPos < minCompoundLead )
463
0
                {
464
0
                    uno::Reference< XLinguServiceManager2 > xLngSvcMgr( GetLngSvcMgr_Impl() );
465
0
                    uno::Reference< XSpellChecker1 > xSpell;
466
467
0
                    LanguageType nLanguage = LinguLocaleToLanguage( aLocale );
468
469
0
                    xSpell.set( xLngSvcMgr->getSpellChecker(), UNO_QUERY );
470
471
                    // get morphological analysis of the word
472
0
                    if ( ( bAnalyzed && xTmpRes.is() ) || ( xSpell.is() && xSpell->isValid(
473
0
                            SPELLML_SUPPORT, static_cast<sal_uInt16>(nLanguage),
474
0
                            uno::Sequence< beans::PropertyValue >() ) ) )
475
0
                    {
476
0
                        if ( !bAnalyzed )
477
0
                        {
478
0
                            xTmpRes = xSpell->spell( "<?xml?><query type='analyze'><word>" +
479
0
                                                       aWord + "</word></query>",
480
0
                                               static_cast<sal_uInt16>(nLanguage),
481
0
                                               uno::Sequence< beans::PropertyValue >() );
482
0
                            bAnalyzed = true;
483
484
0
                            if (xTmpRes.is())
485
0
                            {
486
0
                                Sequence<OUString>seq = xTmpRes->getAlternatives();
487
0
                                if (seq.hasElements())
488
0
                                {
489
0
                                    sal_Int32 nEndOfFirstAnalysis = seq[0].indexOf("</a>");
490
                                    // FIXME use only the first analysis
491
0
                                    OUString morph(
492
0
                                            seq[0].copy(0, nEndOfFirstAnalysis));
493
494
                                    // concatenate pa: fields, i.e. stems in the analysis:
495
                                    // pa:stem1 pa:stem2 pa:stem3 -> stem1||stem2||stem3
496
0
                                    sal_Int32 nPa = -1;
497
0
                                    while ( (nPa = morph.indexOf(u" pa:", nPa + 1)) > -1 )
498
0
                                    {
499
                                        // use hy: field of the actual stem, if it exists
500
                                        // pa:stem1 hy:st|em1 pa:stem2 -> st|em1||stem2
501
0
                                        sal_Int32 nHy = morph.indexOf(u" hy:", nPa + 3);
502
0
                                        sal_Int32 nPa2 = morph.indexOf(u" pa:", nPa + 3);
503
504
0
                                        if ( nHy > -1 && ( nPa2 == -1 || nHy < nPa2 ) )
505
0
                                        {
506
0
                                            OUString sStems2(morph.getToken(1, ' ', nHy).copy(3));
507
0
                                            if ( sStems2.indexOf('|') > -1 )
508
0
                                                sStems += sStems2+ u"||";
509
0
                                            else if ( sal_Int32 nBreak = o3tl::toInt32(sStems2) )
510
0
                                            {
511
0
                                                OUString sPa(morph.getToken(1, ' ', nPa).copy(3));
512
0
                                                if ( nBreak < sPa.getLength() )
513
0
                                                    sStems += OUString::Concat(sPa.subView(0, nBreak)) + u"|" +
514
0
                                                           sPa.subView(nBreak);
515
0
                                            }
516
0
                                        }
517
0
                                        else
518
0
                                        {
519
0
                                            OUString sPa(morph.getToken(1, ' ', nPa).copy(3));
520
521
                                            // handle special case: missing pa: in morphological analysis
522
                                            // before in-word suffixes (German, Sweden etc. dictionaries)
523
                                            // (recognized by the single last pa:)
524
0
                                            if (sStems.isEmpty() && nPa2 == -1 && aWord.endsWith(sPa))
525
0
                                            {
526
0
                                                sStems = OUString::Concat(aWord.subView(0, aWord.getLength() -
527
0
                                                             sPa.getLength())) + u"||" +
528
0
                                                         aWord.subView(aWord.getLength() -
529
0
                                                             sPa.getLength());
530
0
                                                break;
531
0
                                            }
532
533
0
                                            sStems += sPa + "||";
534
535
                                            // count suffix length
536
0
                                            sal_Int32 nSt = morph.lastIndexOf(" st:");
537
0
                                            if ( nSt > -1 )
538
0
                                            {
539
0
                                                sal_Int32 nStemLen =
540
0
                                                    o3tl::getToken(morph, 1, ' ', nSt).length() - 3;
541
0
                                                if ( nStemLen < sPa.getLength() )
542
0
                                                    nSuffixLen = sPa.getLength() - nStemLen;
543
0
                                            }
544
0
                                        }
545
546
0
                                        if ( nPa == -1 ) // getToken() can modify nPa
547
0
                                            break;
548
0
                                    }
549
550
                                    // only hy:, but not pa:
551
0
                                    if ( sStems.isEmpty() )
552
0
                                    {
553
                                        // check hy: (pre-defined hyphenation)
554
0
                                        sal_Int32 nHy = morph.indexOf(" hy:");
555
0
                                        if (nHy > -1)
556
0
                                        {
557
0
                                            sStems = morph.getToken(1, ' ', nHy).copy(3);
558
0
                                            if ( sStems.indexOf('|') == -1 && sStems.indexOf('-') == -1 )
559
0
                                            {
560
0
                                                if ( sal_Int32 nBreak = o3tl::toInt32(sStems) )
561
0
                                                {
562
0
                                                    if ( nBreak < aWord.getLength() )
563
0
                                                        sStems += OUString::Concat(aWord.subView(0, nBreak)) + u"|" +
564
0
                                                               aWord.subView(nBreak);
565
0
                                                }
566
0
                                            }
567
0
                                        }
568
0
                                    }
569
0
                                }
570
0
                            }
571
0
                        }
572
573
                        // handle string separated by |, e.g "program hy:pro|gram"
574
0
                        if ( sStems.indexOf('|') > -1 )
575
0
                        {
576
0
                            sal_Int32 nLetters = 0; // count not separator characters
577
0
                            sal_Int32 nSepPos = -1; // position of last character | used for stem boundaries
578
0
                            bool bWeightedSep = false; // double separator || = weighted stem boundary
579
0
                            sal_Int32 j = 0;
580
0
                            for (; j < sStems.getLength() && nLetters <= i; j++)
581
0
                            {
582
0
                                if ( sStems[j] == '|' )
583
0
                                {
584
0
                                    bWeightedSep = nSepPos > -1 && (j - 1 == nSepPos);
585
0
                                    nSepPos = j;
586
0
                                }
587
0
                                else if ( sStems[j] != '-' && sStems[j] != '=' && sStems[j] != '*' )
588
0
                                    ++nLetters;
589
0
                            }
590
                            // skip break points near stem boundaries
591
0
                            if (
592
                                // there is a stem boundary before the actual break point
593
0
                                nSepPos > -1 &&
594
                                // and the break point is within a stem, i.e. not in the
595
                                // suffix of the last stem
596
0
                                i < aWord.getLength() - nSuffixLen - 1 &&
597
                                // and it is not another stem boundary
598
0
                                j + 1 < sStems.getLength() &&
599
0
                                ( sStems[j + 1] != u'|' ||
600
                                // except if it's only the previous was a weighted one
601
0
                                    ( bWeightedSep && ( j + 2 == sStems.getLength() ||
602
0
                                                        sStems[j + 2] != u'|' ) ) ) )
603
0
                            {
604
0
                                continue;
605
0
                            }
606
0
                        }
607
0
                        else
608
                            // not a compound word
609
0
                            bCompoundHyphenation = false;
610
0
                    }
611
0
                    else
612
                        // no SPELLML support, no morphological analysis
613
0
                        bCompoundHyphenation = false;
614
0
                }
615
616
0
                nHyphenationPos = i;
617
0
                if (result.rep && result.rep[i])
618
0
                {
619
0
                    nHyphenationPosAlt = i - result.pos[i];
620
0
                    nHyphenationPosAltHyph = i + leftrep - result.pos[i];
621
0
                }
622
0
            }
623
0
        }
624
625
0
        Reference<XHyphenatedWord> xRes;
626
0
        if (nHyphenationPos != -1)
627
0
        {
628
0
            if (result.rep && result.rep[nHyphenationPos])
629
0
            {
630
                // remove equal sign
631
0
                char * s = result.rep[nHyphenationPos];
632
0
                int eq = 0;
633
0
                for (; *s; s++)
634
0
                {
635
0
                    if (*s == '=') eq = 1;
636
0
                    if (eq) *s = *(s + 1);
637
0
                }
638
0
                OUString repHyphlow(result.rep[nHyphenationPos], strlen(result.rep[nHyphenationPos]), eEnc);
639
0
                OUString repHyph;
640
0
                switch (ct)
641
0
                {
642
0
                    case CapType::ALLCAP:
643
0
                    {
644
0
                        repHyph = makeUpperCase(repHyphlow, pCC);
645
0
                        break;
646
0
                    }
647
0
                    case CapType::INITCAP:
648
0
                    {
649
0
                        if (nHyphenationPosAlt == -1)
650
0
                            repHyph = makeInitCap(repHyphlow, pCC);
651
0
                        else
652
0
                             repHyph = repHyphlow;
653
0
                        break;
654
0
                    }
655
0
                    default:
656
0
                    {
657
0
                        repHyph = repHyphlow;
658
0
                        break;
659
0
                    }
660
0
                }
661
662
                // handle shortening
663
0
                sal_Int16 nPos = static_cast<sal_Int16>((nHyphenationPosAltHyph < nHyphenationPos) ?
664
0
                nHyphenationPosAltHyph : nHyphenationPos);
665
                // discretionary hyphenation
666
0
                xRes = HyphenatedWord::CreateHyphenatedWord( aWord, LinguLocaleToLanguage( aLocale ), nPos,
667
0
                    aWord.replaceAt(nHyphenationPosAlt + 1, result.cut[nHyphenationPos], repHyph),
668
0
                    static_cast<sal_Int16>(nHyphenationPosAltHyph));
669
0
            }
670
0
            else
671
0
            {
672
0
                xRes = HyphenatedWord::CreateHyphenatedWord( aWord, LinguLocaleToLanguage( aLocale ),
673
0
                    static_cast<sal_Int16>(nHyphenationPos), aWord, static_cast<sal_Int16>(nHyphenationPos));
674
0
            }
675
0
        }
676
0
        return xRes;
677
0
    }
678
0
    return nullptr;
679
0
}
680
681
Reference < XHyphenatedWord > SAL_CALL Hyphenator::queryAlternativeSpelling(
682
        const OUString& aWord,
683
        const css::lang::Locale& aLocale,
684
        sal_Int16 nIndex,
685
        const css::uno::Sequence< css::beans::PropertyValue >& aProperties )
686
0
{
687
    // Firstly we allow only one plus character before the hyphen to avoid to miss the right break point:
688
0
    for (int extrachar = 1; extrachar <= 2; extrachar++)
689
0
    {
690
0
        Reference< XHyphenatedWord > xRes = hyphenate(aWord, aLocale, nIndex + 1 + extrachar, aProperties);
691
0
        if (xRes.is() && xRes->isAlternativeSpelling() && xRes->getHyphenationPos() == nIndex)
692
0
            return xRes;
693
0
    }
694
0
    return nullptr;
695
0
}
696
697
Reference< XPossibleHyphens > SAL_CALL Hyphenator::createPossibleHyphens( const OUString& aWord,
698
        const css::lang::Locale& aLocale,
699
        const css::uno::Sequence< css::beans::PropertyValue >& aProperties )
700
0
{
701
0
    PropertyHelper_Hyphenation& rHelper = GetPropHelper();
702
0
    rHelper.SetTmpPropVals(aProperties);
703
0
    sal_Int16 minTrail = rHelper.GetMinTrailing();
704
0
    sal_Int16 minLead = rHelper.GetMinLeading();
705
0
    sal_Int16 minLen = rHelper.GetMinWordLength();
706
707
    // Resolves: fdo#41083 honour MinWordLength in "createPossibleHyphens" as
708
    // well as "hyphenate"
709
0
    if (aWord.getLength() < minLen)
710
0
        return nullptr;
711
712
    // if we have a hyphenation dictionary matching this locale
713
0
    if (auto pHDInfo = getMatchingDict(aLocale))
714
0
    {
715
        // hyphenate the word with that dictionary
716
0
        auto result = getHyphens(aWord, *pHDInfo, minLead, minTrail);
717
0
        if (result.failed)
718
0
            return nullptr;
719
720
0
        sal_Int32 nHyphCount = 0;
721
722
        // FIXME: shouldn't we iterate code points instead?
723
0
        for (sal_Int32 i = 0; i < aWord.getLength(); i++)
724
0
        {
725
0
            if (result.hyphens[i] & 1)
726
0
                nHyphCount++;
727
0
        }
728
729
0
        Sequence< sal_Int16 > aHyphPos(nHyphCount);
730
0
        sal_Int16 *pPos = aHyphPos.getArray();
731
0
        OUStringBuffer hyphenatedWordBuffer;
732
0
        nHyphCount = 0;
733
734
0
        for (sal_Int32 i = 0; i < aWord.getLength(); i++)
735
0
        {
736
0
            hyphenatedWordBuffer.append(aWord[i]);
737
            // hyphenation position
738
0
            if (result.hyphens[i] & 1)
739
0
            {
740
                // linguistic::PossibleHyphens is stuck with
741
                // css::uno::Sequence<sal_Int16> because of
742
                // css.linguistic2.XPossibleHyphens.getHyphenationPositions, so
743
                // any further positions need to be ignored:
744
0
                assert(i >= SAL_MIN_INT16);
745
0
                if (i > SAL_MAX_INT16)
746
0
                {
747
0
                    SAL_WARN(
748
0
                        "lingucomponent",
749
0
                        "hyphen pos " << i << " > SAL_MAX_INT16 in \"" << aWord
750
0
                            << "\"");
751
0
                    continue;
752
0
                }
753
0
                pPos[nHyphCount] = i;
754
0
                hyphenatedWordBuffer.append('=');
755
0
                nHyphCount++;
756
0
            }
757
0
        }
758
759
0
        OUString hyphenatedWord = hyphenatedWordBuffer.makeStringAndClear();
760
761
0
        return PossibleHyphens::CreatePossibleHyphens(
762
0
            aWord, LinguLocaleToLanguage( aLocale ), hyphenatedWord, aHyphPos);
763
0
    }
764
765
0
    return nullptr;
766
0
}
767
768
sal_Bool SAL_CALL Hyphenator::addLinguServiceEventListener(
769
        const Reference< XLinguServiceEventListener >& rxLstnr )
770
0
{
771
0
    MutexGuard  aGuard( GetLinguMutex() );
772
773
0
    bool bRes = false;
774
0
    if (!bDisposing && rxLstnr.is())
775
0
    {
776
0
        bRes = GetPropHelper().addLinguServiceEventListener( rxLstnr );
777
0
    }
778
0
    return bRes;
779
0
}
780
781
sal_Bool SAL_CALL Hyphenator::removeLinguServiceEventListener(
782
        const Reference< XLinguServiceEventListener >& rxLstnr )
783
0
{
784
0
    MutexGuard  aGuard( GetLinguMutex() );
785
786
0
    bool bRes = false;
787
0
    if (!bDisposing && rxLstnr.is())
788
0
    {
789
0
        bRes = GetPropHelper().removeLinguServiceEventListener( rxLstnr );
790
0
    }
791
0
    return bRes;
792
0
}
793
794
OUString SAL_CALL Hyphenator::getServiceDisplayName(const Locale& rLocale)
795
0
{
796
0
    std::locale loc(Translate::Create("svt", LanguageTag(rLocale)));
797
0
    return Translate::get(STR_DESCRIPTION_LIBHYPHEN, loc);
798
0
}
799
800
void SAL_CALL Hyphenator::initialize( const Sequence< Any >& rArguments )
801
0
{
802
0
    MutexGuard  aGuard( GetLinguMutex() );
803
804
0
    if (pPropHelper)
805
0
        return;
806
807
0
    sal_Int32 nLen = rArguments.getLength();
808
0
    if (2 == nLen)
809
0
    {
810
0
        Reference< XLinguProperties >   xPropSet;
811
0
        rArguments.getConstArray()[0] >>= xPropSet;
812
        // rArguments.getConstArray()[1] >>= xDicList;
813
814
        //! Pointer allows for access of the non-UNO functions.
815
        //! And the reference to the UNO-functions while increasing
816
        //! the ref-count and will implicitly free the memory
817
        //! when the object is no longer used.
818
0
        pPropHelper.reset( new PropertyHelper_Hyphenation( static_cast<XHyphenator *>(this), xPropSet ) );
819
0
        pPropHelper->AddAsPropListener();   //! after a reference is established
820
0
    }
821
0
    else {
822
0
        OSL_FAIL( "wrong number of arguments in sequence" );
823
0
    }
824
0
}
825
826
void SAL_CALL Hyphenator::dispose()
827
0
{
828
0
    MutexGuard  aGuard( GetLinguMutex() );
829
830
0
    if (!bDisposing)
831
0
    {
832
0
        bDisposing = true;
833
0
        EventObject aEvtObj( static_cast<XHyphenator *>(this) );
834
0
        aEvtListeners.disposeAndClear( aEvtObj );
835
0
        if (pPropHelper)
836
0
        {
837
0
            pPropHelper->RemoveAsPropListener();
838
0
            pPropHelper.reset();
839
0
        }
840
0
    }
841
0
}
842
843
void SAL_CALL Hyphenator::addEventListener( const Reference< XEventListener >& rxListener )
844
0
{
845
0
    MutexGuard  aGuard( GetLinguMutex() );
846
847
0
    if (!bDisposing && rxListener.is())
848
0
        aEvtListeners.addInterface( rxListener );
849
0
}
850
851
void SAL_CALL Hyphenator::removeEventListener( const Reference< XEventListener >& rxListener )
852
0
{
853
0
    MutexGuard  aGuard( GetLinguMutex() );
854
855
0
    if (!bDisposing && rxListener.is())
856
0
        aEvtListeners.removeInterface( rxListener );
857
0
}
858
859
// Service specific part
860
OUString SAL_CALL Hyphenator::getImplementationName()
861
0
{
862
0
    return u"org.openoffice.lingu.LibHnjHyphenator"_ustr;
863
0
}
864
865
sal_Bool SAL_CALL Hyphenator::supportsService( const OUString& ServiceName )
866
0
{
867
0
    return cppu::supportsService(this, ServiceName);
868
0
}
869
870
Sequence< OUString > SAL_CALL Hyphenator::getSupportedServiceNames()
871
0
{
872
0
    return { SN_HYPHENATOR };
873
0
}
874
875
extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface*
876
lingucomponent_Hyphenator_get_implementation(
877
    css::uno::XComponentContext* , css::uno::Sequence<css::uno::Any> const&)
878
0
{
879
0
    return cppu::acquire(new Hyphenator());
880
0
}
881
882
883
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */