/src/assimp/code/AssetLib/FBX/FBXTokenizer.cpp

Source
/*
Open Asset Import Library (assimp)
----------------------------------------------------------------------

Copyright (c) 2006-2026, assimp team

All rights reserved.

Redistribution and use of this software in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:

* Redistributions of source code must retain the above
  copyright notice, this list of conditions and the
  following disclaimer.

* Redistributions in binary form must reproduce the above
  copyright notice, this list of conditions and the
  following disclaimer in the documentation and/or other
  materials provided with the distribution.

* Neither the name of the assimp team, nor the names of its
  contributors may be used to endorse or promote products
  derived from this software without specific prior
  written permission of the assimp team.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

----------------------------------------------------------------------
*/

/** @file  FBXTokenizer.cpp
 *  @brief Implementation of the FBX broadphase lexer
 */

#ifndef ASSIMP_BUILD_NO_FBX_IMPORTER

// tab width for logging columns
#define ASSIMP_FBX_TAB_WIDTH 4

#include <assimp/ParsingUtils.h>

#include "FBXTokenizer.h"
#include "FBXUtil.h"
#include <assimp/Exceptional.h>
#include <assimp/DefaultLogger.hpp>

namespace Assimp {
namespace FBX {

// ------------------------------------------------------------------------------------------------
Token::Token(const char* sbegin, const char* send, TokenType type, unsigned int line, unsigned int column)
    :
#ifdef DEBUG
    contents(sbegin, static_cast<size_t>(send-sbegin)),
#endif
    sbegin(sbegin)
    , send(send)
    , type(type)
    , line(line)
    , column(column)
{
    ai_assert(sbegin);
    ai_assert(send);

    // tokens must be of non-zero length
    ai_assert(static_cast<size_t>(send-sbegin) > 0);
}

// ------------------------------------------------------------------------------------------------


namespace {

// ------------------------------------------------------------------------------------------------
// signal tokenization error, this is always unrecoverable. Throws DeadlyImportError.
AI_WONT_RETURN void TokenizeError(const std::string& message, unsigned int line, unsigned int column) AI_WONT_RETURN_SUFFIX;
AI_WONT_RETURN void TokenizeError(const std::string& message, unsigned int line, unsigned int column)
{
    throw DeadlyImportError("FBX-Tokenize", Util::GetLineAndColumnText(line,column), message);
}


// process a potential data token up to 'cur', adding it to 'output_tokens'.
// ------------------------------------------------------------------------------------------------
void ProcessDataToken(TokenList &output_tokens, StackAllocator &token_allocator,
                      const char*& start, const char*& end,
                      unsigned int line,
                      unsigned int column,
                      TokenType type = TokenType_DATA,
                      bool must_have_token = false)
{
    if (start && end) {
        // sanity check:
        // tokens should have no whitespace outside quoted text and [start,end] should
        // properly delimit the valid range.
        bool in_double_quotes = false;
        for (const char* c = start; c != end + 1; ++c) {
            if (*c == '\"') {
                in_double_quotes = !in_double_quotes;
            }

            if (!in_double_quotes && IsSpaceOrNewLine(*c)) {
                TokenizeError("unexpected whitespace in token", line, column);
            }
        }

        if (in_double_quotes) {
            TokenizeError("non-terminated double quotes", line, column);
        }

        output_tokens.push_back(new_Token(start,end + 1,type,line,column));
    }
    else if (must_have_token) {
        TokenizeError("unexpected character, expected data token", line, column);
    }

    start = end = nullptr;
}

}

// ------------------------------------------------------------------------------------------------
void Tokenize(TokenList &output_tokens, const char *input, StackAllocator &token_allocator) {
  ai_assert(input);
  ASSIMP_LOG_DEBUG("Tokenizing ASCII FBX file");

    // line and column numbers numbers are one-based
    unsigned int line = 1;
    unsigned int column = 1;

    bool comment = false;
    bool in_double_quotes = false;
    bool pending_data_token = false;

    const char *token_begin = nullptr, *token_end = nullptr;
    for (const char* cur = input;*cur;column += (*cur == '\t' ? ASSIMP_FBX_TAB_WIDTH : 1), ++cur) {
        const char c = *cur;

        if (IsLineEnd(c)) {
            comment = false;

            column = 0;
            ++line;
        }

        if(comment) {
            continue;
        }

        if(in_double_quotes) {
            if (c == '\"') {
                in_double_quotes = false;
                token_end = cur;

                ProcessDataToken(output_tokens, token_allocator, token_begin, token_end, line, column);
                pending_data_token = false;
            }
            continue;
        }

        switch(c)
        {
        case '\"':
            if (token_begin) {
                TokenizeError("unexpected double-quote", line, column);
            }
            token_begin = cur;
            in_double_quotes = true;
            continue;

        case ';':
            ProcessDataToken(output_tokens, token_allocator, token_begin, token_end, line, column);
            comment = true;
            continue;

        case '{':
            ProcessDataToken(output_tokens, token_allocator, token_begin, token_end, line, column);
            output_tokens.push_back(new_Token(cur,cur+1,TokenType_OPEN_BRACKET,line,column));
            continue;

        case '}':
            ProcessDataToken(output_tokens, token_allocator, token_begin, token_end, line, column);
            output_tokens.push_back(new_Token(cur,cur+1,TokenType_CLOSE_BRACKET,line,column));
            continue;

        case ',':
            if (pending_data_token) {
                ProcessDataToken(output_tokens, token_allocator, token_begin, token_end, line, column, TokenType_DATA, true);
            }
            output_tokens.push_back(new_Token(cur,cur+1,TokenType_COMMA,line,column));
            continue;

        case ':':
            if (pending_data_token) {
                ProcessDataToken(output_tokens, token_allocator, token_begin, token_end, line, column, TokenType_KEY, true);
            }
            else {
                TokenizeError("unexpected colon", line, column);
            }
            continue;
        }

        if (IsSpaceOrNewLine(c)) {

            if (token_begin) {
                // peek ahead and check if the next token is a colon in which
                // case this counts as KEY token.
                TokenType type = TokenType_DATA;
                for (const char* peek = cur;  *peek && IsSpaceOrNewLine(*peek); ++peek) {
                    if (*peek == ':') {
                        type = TokenType_KEY;
                        cur = peek;
                        break;
                    }
                }

                ProcessDataToken(output_tokens, token_allocator, token_begin, token_end, line, column, type);
            }

            pending_data_token = false;
        }
        else {
            token_end = cur;
            if (!token_begin) {
                token_begin = cur;
            }

            pending_data_token = true;
        }
    }
}

} // !FBX
} // !Assimp

#endif

Coverage Report

Created: 2026-02-05 07:01

Line	Count	Source
1		/*
2		Open Asset Import Library (assimp)
3		----------------------------------------------------------------------
4
5		Copyright (c) 2006-2026, assimp team
6
7		All rights reserved.
8
9		Redistribution and use of this software in source and binary forms,
10		with or without modification, are permitted provided that the
11		following conditions are met:
12
13		* Redistributions of source code must retain the above
14		copyright notice, this list of conditions and the
15		following disclaimer.
16
17		* Redistributions in binary form must reproduce the above
18		copyright notice, this list of conditions and the
19		following disclaimer in the documentation and/or other
20		materials provided with the distribution.
21
22		* Neither the name of the assimp team, nor the names of its
23		contributors may be used to endorse or promote products
24		derived from this software without specific prior
25		written permission of the assimp team.
26
27		THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28		"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29		LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30		A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31		OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32		SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33		LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34		DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35		THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36		(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37		OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38
39		----------------------------------------------------------------------
40		*/
41
42		/** @file FBXTokenizer.cpp
43		* @brief Implementation of the FBX broadphase lexer
44		*/
45
46		#ifndef ASSIMP_BUILD_NO_FBX_IMPORTER
47
48		// tab width for logging columns
49	277k	#define ASSIMP_FBX_TAB_WIDTH 4
50
51		#include <assimp/ParsingUtils.h>
52
53		#include "FBXTokenizer.h"
54		#include "FBXUtil.h"
55		#include <assimp/Exceptional.h>
56		#include <assimp/DefaultLogger.hpp>
57
58		namespace Assimp {
59		namespace FBX {
60
61		// ------------------------------------------------------------------------------------------------
62		Token::Token(const char* sbegin, const char* send, TokenType type, unsigned int line, unsigned int column)
63		:
64		#ifdef DEBUG
65		contents(sbegin, static_cast<size_t>(send-sbegin)),
66		#endif
67	5.86M	sbegin(sbegin)
68	5.86M	, send(send)
69	5.86M	, type(type)
70	5.86M	, line(line)
71	5.86M	, column(column)
72	5.86M	{
73	5.86M	ai_assert(sbegin);
74	5.86M	ai_assert(send);
75
76		// tokens must be of non-zero length
77	5.86M	ai_assert(static_cast<size_t>(send-sbegin) > 0);
78	5.86M	}
79
80		// ------------------------------------------------------------------------------------------------
81
82
83		namespace {
84
85		// ------------------------------------------------------------------------------------------------
86		// signal tokenization error, this is always unrecoverable. Throws DeadlyImportError.
87		AI_WONT_RETURN void TokenizeError(const std::string& message, unsigned int line, unsigned int column) AI_WONT_RETURN_SUFFIX;
88		AI_WONT_RETURN void TokenizeError(const std::string& message, unsigned int line, unsigned int column)
89	2	{
90	2	throw DeadlyImportError("FBX-Tokenize", Util::GetLineAndColumnText(line,column), message);
91	2	}
92
93
94		// process a potential data token up to 'cur', adding it to 'output_tokens'.
95		// ------------------------------------------------------------------------------------------------
96		void ProcessDataToken(TokenList &output_tokens, StackAllocator &token_allocator,
97		const char& start, const char& end,
98		unsigned int line,
99		unsigned int column,
100		TokenType type = TokenType_DATA,
101		bool must_have_token = false)
102	3.37M	{
103	3.37M	if (start && end) {
104		// sanity check:
105		// tokens should have no whitespace outside quoted text and [start,end] should
106		// properly delimit the valid range.
107	2.71M	bool in_double_quotes = false;
108	27.6M	for (const char* c = start; c != end + 1; ++c) {
109	24.9M	if (*c == '\"') {
110	279k	in_double_quotes = !in_double_quotes;
111	279k	}
112
113	24.9M	if (!in_double_quotes && IsSpaceOrNewLine(*c)) {
114	0	TokenizeError("unexpected whitespace in token", line, column);
115	0	}
116	24.9M	}
117
118	2.71M	if (in_double_quotes) {
119	0	TokenizeError("non-terminated double quotes", line, column);
120	0	}
121
122	2.71M	output_tokens.push_back(new_Token(start,end + 1,type,line,column));
123	2.71M	}
124	659k	else if (must_have_token) {
125	2	TokenizeError("unexpected character, expected data token", line, column);
126	2	}
127
128	3.37M	start = end = nullptr;
129	3.37M	}
130
131		}
132
133		// ------------------------------------------------------------------------------------------------
134	103	void Tokenize(TokenList &output_tokens, const char *input, StackAllocator &token_allocator) {
135	103	ai_assert(input);
136	103	ASSIMP_LOG_DEBUG("Tokenizing ASCII FBX file");
137
138		// line and column numbers numbers are one-based
139	103	unsigned int line = 1;
140	103	unsigned int column = 1;
141
142	103	bool comment = false;
143	103	bool in_double_quotes = false;
144	103	bool pending_data_token = false;
145
146	103	const char token_begin = nullptr, token_end = nullptr;
147	30.9M	for (const char* cur = input;cur;column += (cur == '\t' ? ASSIMP_FBX_TAB_WIDTH : 1), ++cur) {
148	30.9M	const char c = *cur;
149
150	30.9M	if (IsLineEnd(c)) {
151	200k	comment = false;
152
153	200k	column = 0;
154	200k	++line;
155	200k	}
156
157	30.9M	if(comment) {
158	2.01M	continue;
159	2.01M	}
160
161	28.9M	if(in_double_quotes) {
162	12.1M	if (c == '\"') {
163	139k	in_double_quotes = false;
164	139k	token_end = cur;
165
166	139k	ProcessDataToken(output_tokens, token_allocator, token_begin, token_end, line, column);
167	139k	pending_data_token = false;
168	139k	}
169	12.1M	continue;
170	12.1M	}
171
172	16.7M	switch(c)
173	16.7M	{
174	139k	case '\"':
175	139k	if (token_begin) {
176	0	TokenizeError("unexpected double-quote", line, column);
177	0	}
178	139k	token_begin = cur;
179	139k	in_double_quotes = true;
180	139k	continue;
181
182	52.4k	case ';':
183	52.4k	ProcessDataToken(output_tokens, token_allocator, token_begin, token_end, line, column);
184	52.4k	comment = true;
185	52.4k	continue;
186
187	219k	case '{':
188	219k	ProcessDataToken(output_tokens, token_allocator, token_begin, token_end, line, column);
189	219k	output_tokens.push_back(new_Token(cur,cur+1,TokenType_OPEN_BRACKET,line,column));
190	219k	continue;
191
192	413k	case '}':
193	413k	ProcessDataToken(output_tokens, token_allocator, token_begin, token_end, line, column);
194	413k	output_tokens.push_back(new_Token(cur,cur+1,TokenType_CLOSE_BRACKET,line,column));
195	413k	continue;
196
197	2.51M	case ',':
198	2.51M	if (pending_data_token) {
199	2.39M	ProcessDataToken(output_tokens, token_allocator, token_begin, token_end, line, column, TokenType_DATA, true);
200	2.39M	}
201	2.51M	output_tokens.push_back(new_Token(cur,cur+1,TokenType_COMMA,line,column));
202	2.51M	continue;
203
204	86.4k	case ':':
205	86.4k	if (pending_data_token) {
206	86.4k	ProcessDataToken(output_tokens, token_allocator, token_begin, token_end, line, column, TokenType_KEY, true);
207	86.4k	}
208	0	else {
209	0	TokenizeError("unexpected colon", line, column);
210	0	}
211	86.4k	continue;
212	16.7M	}
213
214	13.3M	if (IsSpaceOrNewLine(c)) {
215
216	632k	if (token_begin) {
217		// peek ahead and check if the next token is a colon in which
218		// case this counts as KEY token.
219	61.5k	TokenType type = TokenType_DATA;
220	302k	for (const char* peek = cur; peek && IsSpaceOrNewLine(peek); ++peek) {
221	241k	if (*peek == ':') {
222	0	type = TokenType_KEY;
223	0	cur = peek;
224	0	break;
225	0	}
226	241k	}
227
228	61.5k	ProcessDataToken(output_tokens, token_allocator, token_begin, token_end, line, column, type);
229	61.5k	}
230
231	632k	pending_data_token = false;
232	632k	}
233	12.6M	else {
234	12.6M	token_end = cur;
235	12.6M	if (!token_begin) {
236	2.57M	token_begin = cur;
237	2.57M	}
238
239	12.6M	pending_data_token = true;
240	12.6M	}
241	13.3M	}
242	103	}
243
244		} // !FBX
245		} // !Assimp
246
247		#endif