/src/assimp/code/AssetLib/FBX/FBXTokenizer.cpp
Line | Count | Source |
1 | | /* |
2 | | Open Asset Import Library (assimp) |
3 | | ---------------------------------------------------------------------- |
4 | | |
5 | | Copyright (c) 2006-2025, assimp team |
6 | | |
7 | | All rights reserved. |
8 | | |
9 | | Redistribution and use of this software in source and binary forms, |
10 | | with or without modification, are permitted provided that the |
11 | | following conditions are met: |
12 | | |
13 | | * Redistributions of source code must retain the above |
14 | | copyright notice, this list of conditions and the |
15 | | following disclaimer. |
16 | | |
17 | | * Redistributions in binary form must reproduce the above |
18 | | copyright notice, this list of conditions and the |
19 | | following disclaimer in the documentation and/or other |
20 | | materials provided with the distribution. |
21 | | |
22 | | * Neither the name of the assimp team, nor the names of its |
23 | | contributors may be used to endorse or promote products |
24 | | derived from this software without specific prior |
25 | | written permission of the assimp team. |
26 | | |
27 | | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
28 | | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
29 | | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
30 | | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
31 | | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
32 | | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
33 | | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
34 | | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
35 | | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
36 | | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
37 | | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
38 | | |
39 | | ---------------------------------------------------------------------- |
40 | | */ |
41 | | |
42 | | /** @file FBXTokenizer.cpp |
43 | | * @brief Implementation of the FBX broadphase lexer |
44 | | */ |
45 | | |
46 | | #ifndef ASSIMP_BUILD_NO_FBX_IMPORTER |
47 | | |
48 | | // tab width for logging columns |
49 | 0 | #define ASSIMP_FBX_TAB_WIDTH 4 |
50 | | |
51 | | #include <assimp/ParsingUtils.h> |
52 | | |
53 | | #include "FBXTokenizer.h" |
54 | | #include "FBXUtil.h" |
55 | | #include <assimp/Exceptional.h> |
56 | | #include <assimp/DefaultLogger.hpp> |
57 | | |
58 | | namespace Assimp { |
59 | | namespace FBX { |
60 | | |
61 | | // ------------------------------------------------------------------------------------------------ |
62 | | Token::Token(const char* sbegin, const char* send, TokenType type, unsigned int line, unsigned int column) |
63 | | : |
64 | | #ifdef DEBUG |
65 | | contents(sbegin, static_cast<size_t>(send-sbegin)), |
66 | | #endif |
67 | 0 | sbegin(sbegin) |
68 | 0 | , send(send) |
69 | 0 | , type(type) |
70 | 0 | , line(line) |
71 | 0 | , column(column) |
72 | 0 | { |
73 | 0 | ai_assert(sbegin); |
74 | 0 | ai_assert(send); |
75 | | |
76 | | // tokens must be of non-zero length |
77 | 0 | ai_assert(static_cast<size_t>(send-sbegin) > 0); |
78 | 0 | } |
79 | | |
80 | | // ------------------------------------------------------------------------------------------------ |
81 | | |
82 | | |
83 | | namespace { |
84 | | |
85 | | // ------------------------------------------------------------------------------------------------ |
86 | | // signal tokenization error, this is always unrecoverable. Throws DeadlyImportError. |
87 | | AI_WONT_RETURN void TokenizeError(const std::string& message, unsigned int line, unsigned int column) AI_WONT_RETURN_SUFFIX; |
88 | | AI_WONT_RETURN void TokenizeError(const std::string& message, unsigned int line, unsigned int column) |
89 | 0 | { |
90 | 0 | throw DeadlyImportError("FBX-Tokenize", Util::GetLineAndColumnText(line,column), message); |
91 | 0 | } |
92 | | |
93 | | |
94 | | // process a potential data token up to 'cur', adding it to 'output_tokens'. |
95 | | // ------------------------------------------------------------------------------------------------ |
96 | | void ProcessDataToken(TokenList &output_tokens, StackAllocator &token_allocator, |
97 | | const char*& start, const char*& end, |
98 | | unsigned int line, |
99 | | unsigned int column, |
100 | | TokenType type = TokenType_DATA, |
101 | | bool must_have_token = false) |
102 | 0 | { |
103 | 0 | if (start && end) { |
104 | | // sanity check: |
105 | | // tokens should have no whitespace outside quoted text and [start,end] should |
106 | | // properly delimit the valid range. |
107 | 0 | bool in_double_quotes = false; |
108 | 0 | for (const char* c = start; c != end + 1; ++c) { |
109 | 0 | if (*c == '\"') { |
110 | 0 | in_double_quotes = !in_double_quotes; |
111 | 0 | } |
112 | |
|
113 | 0 | if (!in_double_quotes && IsSpaceOrNewLine(*c)) { |
114 | 0 | TokenizeError("unexpected whitespace in token", line, column); |
115 | 0 | } |
116 | 0 | } |
117 | | |
118 | 0 | if (in_double_quotes) { |
119 | 0 | TokenizeError("non-terminated double quotes", line, column); |
120 | 0 | } |
121 | | |
122 | 0 | output_tokens.push_back(new_Token(start,end + 1,type,line,column)); |
123 | 0 | } |
124 | 0 | else if (must_have_token) { |
125 | 0 | TokenizeError("unexpected character, expected data token", line, column); |
126 | 0 | } |
127 | | |
128 | 0 | start = end = nullptr; |
129 | 0 | } |
130 | | |
131 | | } |
132 | | |
133 | | // ------------------------------------------------------------------------------------------------ |
134 | 0 | void Tokenize(TokenList &output_tokens, const char *input, StackAllocator &token_allocator) { |
135 | 0 | ai_assert(input); |
136 | 0 | ASSIMP_LOG_DEBUG("Tokenizing ASCII FBX file"); |
137 | | |
138 | | // line and column numbers numbers are one-based |
139 | 0 | unsigned int line = 1; |
140 | 0 | unsigned int column = 1; |
141 | |
|
142 | 0 | bool comment = false; |
143 | 0 | bool in_double_quotes = false; |
144 | 0 | bool pending_data_token = false; |
145 | |
|
146 | 0 | const char *token_begin = nullptr, *token_end = nullptr; |
147 | 0 | for (const char* cur = input;*cur;column += (*cur == '\t' ? ASSIMP_FBX_TAB_WIDTH : 1), ++cur) { |
148 | 0 | const char c = *cur; |
149 | |
|
150 | 0 | if (IsLineEnd(c)) { |
151 | 0 | comment = false; |
152 | |
|
153 | 0 | column = 0; |
154 | 0 | ++line; |
155 | 0 | } |
156 | |
|
157 | 0 | if(comment) { |
158 | 0 | continue; |
159 | 0 | } |
160 | | |
161 | 0 | if(in_double_quotes) { |
162 | 0 | if (c == '\"') { |
163 | 0 | in_double_quotes = false; |
164 | 0 | token_end = cur; |
165 | |
|
166 | 0 | ProcessDataToken(output_tokens, token_allocator, token_begin, token_end, line, column); |
167 | 0 | pending_data_token = false; |
168 | 0 | } |
169 | 0 | continue; |
170 | 0 | } |
171 | | |
172 | 0 | switch(c) |
173 | 0 | { |
174 | 0 | case '\"': |
175 | 0 | if (token_begin) { |
176 | 0 | TokenizeError("unexpected double-quote", line, column); |
177 | 0 | } |
178 | 0 | token_begin = cur; |
179 | 0 | in_double_quotes = true; |
180 | 0 | continue; |
181 | | |
182 | 0 | case ';': |
183 | 0 | ProcessDataToken(output_tokens, token_allocator, token_begin, token_end, line, column); |
184 | 0 | comment = true; |
185 | 0 | continue; |
186 | | |
187 | 0 | case '{': |
188 | 0 | ProcessDataToken(output_tokens, token_allocator, token_begin, token_end, line, column); |
189 | 0 | output_tokens.push_back(new_Token(cur,cur+1,TokenType_OPEN_BRACKET,line,column)); |
190 | 0 | continue; |
191 | | |
192 | 0 | case '}': |
193 | 0 | ProcessDataToken(output_tokens, token_allocator, token_begin, token_end, line, column); |
194 | 0 | output_tokens.push_back(new_Token(cur,cur+1,TokenType_CLOSE_BRACKET,line,column)); |
195 | 0 | continue; |
196 | | |
197 | 0 | case ',': |
198 | 0 | if (pending_data_token) { |
199 | 0 | ProcessDataToken(output_tokens, token_allocator, token_begin, token_end, line, column, TokenType_DATA, true); |
200 | 0 | } |
201 | 0 | output_tokens.push_back(new_Token(cur,cur+1,TokenType_COMMA,line,column)); |
202 | 0 | continue; |
203 | | |
204 | 0 | case ':': |
205 | 0 | if (pending_data_token) { |
206 | 0 | ProcessDataToken(output_tokens, token_allocator, token_begin, token_end, line, column, TokenType_KEY, true); |
207 | 0 | } |
208 | 0 | else { |
209 | 0 | TokenizeError("unexpected colon", line, column); |
210 | 0 | } |
211 | 0 | continue; |
212 | 0 | } |
213 | | |
214 | 0 | if (IsSpaceOrNewLine(c)) { |
215 | |
|
216 | 0 | if (token_begin) { |
217 | | // peek ahead and check if the next token is a colon in which |
218 | | // case this counts as KEY token. |
219 | 0 | TokenType type = TokenType_DATA; |
220 | 0 | for (const char* peek = cur; *peek && IsSpaceOrNewLine(*peek); ++peek) { |
221 | 0 | if (*peek == ':') { |
222 | 0 | type = TokenType_KEY; |
223 | 0 | cur = peek; |
224 | 0 | break; |
225 | 0 | } |
226 | 0 | } |
227 | |
|
228 | 0 | ProcessDataToken(output_tokens, token_allocator, token_begin, token_end, line, column, type); |
229 | 0 | } |
230 | |
|
231 | 0 | pending_data_token = false; |
232 | 0 | } |
233 | 0 | else { |
234 | 0 | token_end = cur; |
235 | 0 | if (!token_begin) { |
236 | 0 | token_begin = cur; |
237 | 0 | } |
238 | |
|
239 | 0 | pending_data_token = true; |
240 | 0 | } |
241 | 0 | } |
242 | 0 | } |
243 | | |
244 | | } // !FBX |
245 | | } // !Assimp |
246 | | |
247 | | #endif |