/src/libreoffice/oox/source/token/tokenmap.cxx
Line | Count | Source |
1 | | /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
2 | | /* |
3 | | * This file is part of the LibreOffice project. |
4 | | * |
5 | | * This Source Code Form is subject to the terms of the Mozilla Public |
6 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
7 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
8 | | * |
9 | | * This file incorporates work covered by the following license notice: |
10 | | * |
11 | | * Licensed to the Apache Software Foundation (ASF) under one or more |
12 | | * contributor license agreements. See the NOTICE file distributed |
13 | | * with this work for additional information regarding copyright |
14 | | * ownership. The ASF licenses this file to you under the Apache |
15 | | * License, Version 2.0 (the "License"); you may not use this file |
16 | | * except in compliance with the License. You may obtain a copy of |
17 | | * the License at http://www.apache.org/licenses/LICENSE-2.0 . |
18 | | */ |
19 | | |
20 | | #include <sal/config.h> |
21 | | |
22 | | #include <array> |
23 | | #include <vector> |
24 | | |
25 | | #include <oox/token/tokenmap.hxx> |
26 | | |
27 | | #include <string.h> |
28 | | #include <sal/log.hxx> |
29 | | #include <rtl/string.hxx> |
30 | | #include <oox/token/tokens.hxx> |
31 | | |
32 | | namespace oox { |
33 | | |
34 | | namespace { |
35 | | // include auto-generated Perfect_Hash |
36 | | #if defined __GNUC__ |
37 | | #pragma GCC diagnostic push |
38 | | #pragma GCC diagnostic ignored "-Wimplicit-fallthrough" |
39 | | #if defined __clang__ |
40 | | #if __has_warning("-Wdeprecated-register") |
41 | | #pragma GCC diagnostic ignored "-Wdeprecated-register" |
42 | | #endif |
43 | | #endif |
44 | | #endif |
45 | | |
46 | | #include <tokenhash.inc> |
47 | | |
48 | | #if defined __GNUC__ |
49 | | #pragma GCC diagnostic pop |
50 | | #endif |
51 | | } // namespace |
52 | | |
53 | | static sal_Int32 getTokenPerfectHash(const char* pStr, sal_Int32 nLength) |
54 | 21.4M | { |
55 | 21.4M | const struct xmltoken* pToken = Perfect_Hash::in_word_set( pStr, nLength ); |
56 | 21.4M | return pToken ? pToken->nToken : XML_TOKEN_INVALID; |
57 | 21.4M | } |
58 | | |
59 | | css::uno::Sequence<sal_Int8> const& TokenMap::getUtf8TokenName(sal_Int32 nToken) |
60 | 1.59k | { |
61 | 1.59k | static const auto saTokenNames = []() |
62 | 1.59k | { |
63 | 3 | static_assert(std::size(wordlist) == XML_TOKEN_COUNT); |
64 | | |
65 | 3 | std::vector<css::uno::Sequence<sal_Int8>> aTokenNames(std::size(wordlist)); |
66 | 3 | for (const auto& resword : wordlist) |
67 | 18.1k | { |
68 | 18.1k | const char *s = reinterpret_cast<const char*>(&stringpool_contents) + resword.name; |
69 | 18.1k | std::string_view aUtf8Token(s, strlen(s)); |
70 | | |
71 | 18.1k | aTokenNames[resword.nToken] = |
72 | 18.1k | css::uno::Sequence<sal_Int8>( |
73 | 18.1k | reinterpret_cast<const sal_Int8*>(aUtf8Token.data()), aUtf8Token.size()); |
74 | 18.1k | } |
75 | | |
76 | 3 | return aTokenNames; |
77 | 3 | }(); |
78 | | |
79 | 1.59k | SAL_WARN_IF(nToken < 0 || nToken >= XML_TOKEN_COUNT, "oox", "Wrong nToken parameter"); |
80 | 1.59k | if (0 <= nToken && nToken < XML_TOKEN_COUNT) |
81 | 1.59k | return saTokenNames[nToken]; |
82 | 0 | static const css::uno::Sequence<sal_Int8> EMPTY_BYTE_SEQ; |
83 | 0 | return EMPTY_BYTE_SEQ; |
84 | 1.59k | } |
85 | | |
86 | | |
87 | | /** Returns the token identifier for a UTF8 string passed in pToken */ |
88 | | sal_Int32 TokenMap::getTokenFromUtf8(std::string_view token) |
89 | 29.5M | { |
90 | 29.5M | static const auto snAlphaTokens = []() |
91 | 29.5M | { |
92 | 3 | std::array<sal_Int32, 26> nAlphaTokens{}; |
93 | 81 | for (char c = 'a'; c <= 'z'; c++) |
94 | 78 | nAlphaTokens[c - 'a'] = getTokenPerfectHash(&c, 1); |
95 | 3 | return nAlphaTokens; |
96 | 3 | }(); |
97 | | |
98 | | // 50% of OOXML tokens are primarily 1 lower-case character, a-z |
99 | 29.5M | if (token.size() == 1) |
100 | 8.82M | { |
101 | 8.82M | char c = token[0]; |
102 | 8.82M | if (c >= 'a' && c <= 'z') |
103 | 8.16M | return snAlphaTokens[c - 'a']; |
104 | 8.82M | } |
105 | 21.4M | return getTokenPerfectHash(token.data(), token.size()); |
106 | 29.5M | } |
107 | | |
108 | | } // namespace oox |
109 | | |
110 | | /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |