/src/assimp/code/AssetLib/FBX/FBXTokenizer.cpp

Source
/*
Open Asset Import Library (assimp)
----------------------------------------------------------------------

Copyright (c) 2006-2026, assimp team

All rights reserved.

Redistribution and use of this software in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:

* Redistributions of source code must retain the above
  copyright notice, this list of conditions and the
  following disclaimer.

* Redistributions in binary form must reproduce the above
  copyright notice, this list of conditions and the
  following disclaimer in the documentation and/or other
  materials provided with the distribution.

* Neither the name of the assimp team, nor the names of its
  contributors may be used to endorse or promote products
  derived from this software without specific prior
  written permission of the assimp team.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

----------------------------------------------------------------------
*/

/** @file  FBXTokenizer.cpp
 *  @brief Implementation of the FBX broadphase lexer
 */

#ifndef ASSIMP_BUILD_NO_FBX_IMPORTER

// tab width for logging columns
#define ASSIMP_FBX_TAB_WIDTH 4

#include <assimp/ParsingUtils.h>

#include "FBXTokenizer.h"
#include "FBXUtil.h"
#include <assimp/Exceptional.h>
#include <assimp/DefaultLogger.hpp>

namespace Assimp {
namespace FBX {

// ------------------------------------------------------------------------------------------------
Token::Token(const char* sbegin, const char* send, TokenType type, unsigned int line, unsigned int column)
    :
#ifdef DEBUG
    contents(sbegin, static_cast<size_t>(send-sbegin)),
#endif
    sbegin(sbegin)
    , send(send)
    , type(type)
    , line(line)
    , column(column)
{
    ai_assert(sbegin);
    ai_assert(send);

    // tokens must be of non-zero length
    ai_assert(static_cast<size_t>(send-sbegin) > 0);
}

// ------------------------------------------------------------------------------------------------


namespace {

// ------------------------------------------------------------------------------------------------
// signal tokenization error, this is always unrecoverable. Throws DeadlyImportError.
AI_WONT_RETURN void TokenizeError(const std::string& message, unsigned int line, unsigned int column) AI_WONT_RETURN_SUFFIX;
AI_WONT_RETURN void TokenizeError(const std::string& message, unsigned int line, unsigned int column)
{
    throw DeadlyImportError("FBX-Tokenize", Util::GetLineAndColumnText(line,column), message);
}


// process a potential data token up to 'cur', adding it to 'output_tokens'.
// ------------------------------------------------------------------------------------------------
void ProcessDataToken(TokenList &output_tokens, StackAllocator &token_allocator,
                      const char*& start, const char*& end,
                      unsigned int line,
                      unsigned int column,
                      TokenType type = TokenType_DATA,
                      bool must_have_token = false)
{
    if (start && end) {
        // sanity check:
        // tokens should have no whitespace outside quoted text and [start,end] should
        // properly delimit the valid range.
        bool in_double_quotes = false;
        for (const char* c = start; c != end + 1; ++c) {
            if (*c == '\"') {
                in_double_quotes = !in_double_quotes;
            }

            if (!in_double_quotes && IsSpaceOrNewLine(*c)) {
                TokenizeError("unexpected whitespace in token", line, column);
            }
        }

        if (in_double_quotes) {
            TokenizeError("non-terminated double quotes", line, column);
        }

        output_tokens.push_back(new_Token(start,end + 1,type,line,column));
    }
    else if (must_have_token) {
        TokenizeError("unexpected character, expected data token", line, column);
    }

    start = end = nullptr;
}

}

// ------------------------------------------------------------------------------------------------
void Tokenize(TokenList &output_tokens, const char *input, StackAllocator &token_allocator) {
  ai_assert(input);
  ASSIMP_LOG_DEBUG("Tokenizing ASCII FBX file");

    // line and column numbers numbers are one-based
    unsigned int line = 1;
    unsigned int column = 1;

    bool comment = false;
    bool in_double_quotes = false;
    bool pending_data_token = false;

    const char *token_begin = nullptr, *token_end = nullptr;
    for (const char* cur = input;*cur;column += (*cur == '\t' ? ASSIMP_FBX_TAB_WIDTH : 1), ++cur) {
        const char c = *cur;

        if (IsLineEnd(c)) {
            comment = false;

            column = 0;
            ++line;
        }

        if(comment) {
            continue;
        }

        if(in_double_quotes) {
            if (c == '\"') {
                in_double_quotes = false;
                token_end = cur;

                ProcessDataToken(output_tokens, token_allocator, token_begin, token_end, line, column);
                pending_data_token = false;
            }
            continue;
        }

        switch(c)
        {
        case '\"':
            if (token_begin) {
                TokenizeError("unexpected double-quote", line, column);
            }
            token_begin = cur;
            in_double_quotes = true;
            continue;

        case ';':
            ProcessDataToken(output_tokens, token_allocator, token_begin, token_end, line, column);
            comment = true;
            continue;

        case '{':
            ProcessDataToken(output_tokens, token_allocator, token_begin, token_end, line, column);
            output_tokens.push_back(new_Token(cur,cur+1,TokenType_OPEN_BRACKET,line,column));
            continue;

        case '}':
            ProcessDataToken(output_tokens, token_allocator, token_begin, token_end, line, column);
            output_tokens.push_back(new_Token(cur,cur+1,TokenType_CLOSE_BRACKET,line,column));
            continue;

        case ',':
            if (pending_data_token) {
                ProcessDataToken(output_tokens, token_allocator, token_begin, token_end, line, column, TokenType_DATA, true);
            }
            output_tokens.push_back(new_Token(cur,cur+1,TokenType_COMMA,line,column));
            continue;

        case ':':
            if (pending_data_token) {
                ProcessDataToken(output_tokens, token_allocator, token_begin, token_end, line, column, TokenType_KEY, true);
            }
            else {
                TokenizeError("unexpected colon", line, column);
            }
            continue;
        }

        if (IsSpaceOrNewLine(c)) {

            if (token_begin) {
                // peek ahead and check if the next token is a colon in which
                // case this counts as KEY token.
                TokenType type = TokenType_DATA;
                for (const char* peek = cur;  *peek && IsSpaceOrNewLine(*peek); ++peek) {
                    if (*peek == ':') {
                        type = TokenType_KEY;
                        cur = peek;
                        break;
                    }
                }

                ProcessDataToken(output_tokens, token_allocator, token_begin, token_end, line, column, type);
            }

            pending_data_token = false;
        }
        else {
            token_end = cur;
            if (!token_begin) {
                token_begin = cur;
            }

            pending_data_token = true;
        }
    }
}

} // !FBX
} // !Assimp

#endif

Coverage Report

Created: 2026-05-23 07:04

Line	Count	Source
1		/*
2		Open Asset Import Library (assimp)
3		----------------------------------------------------------------------
4
5		Copyright (c) 2006-2026, assimp team
6
7		All rights reserved.
8
9		Redistribution and use of this software in source and binary forms,
10		with or without modification, are permitted provided that the
11		following conditions are met:
12
13		* Redistributions of source code must retain the above
14		copyright notice, this list of conditions and the
15		following disclaimer.
16
17		* Redistributions in binary form must reproduce the above
18		copyright notice, this list of conditions and the
19		following disclaimer in the documentation and/or other
20		materials provided with the distribution.
21
22		* Neither the name of the assimp team, nor the names of its
23		contributors may be used to endorse or promote products
24		derived from this software without specific prior
25		written permission of the assimp team.
26
27		THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28		"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29		LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30		A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31		OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32		SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33		LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34		DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35		THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36		(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37		OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38
39		----------------------------------------------------------------------
40		*/
41
42		/** @file FBXTokenizer.cpp
43		* @brief Implementation of the FBX broadphase lexer
44		*/
45
46		#ifndef ASSIMP_BUILD_NO_FBX_IMPORTER
47
48		// tab width for logging columns
49	57.5k	#define ASSIMP_FBX_TAB_WIDTH 4
50
51		#include <assimp/ParsingUtils.h>
52
53		#include "FBXTokenizer.h"
54		#include "FBXUtil.h"
55		#include <assimp/Exceptional.h>
56		#include <assimp/DefaultLogger.hpp>
57
58		namespace Assimp {
59		namespace FBX {
60
61		// ------------------------------------------------------------------------------------------------
62		Token::Token(const char* sbegin, const char* send, TokenType type, unsigned int line, unsigned int column)
63		:
64		#ifdef DEBUG
65		contents(sbegin, static_cast<size_t>(send-sbegin)),
66		#endif
67	2.90M	sbegin(sbegin)
68	2.90M	, send(send)
69	2.90M	, type(type)
70	2.90M	, line(line)
71	2.90M	, column(column)
72	2.90M	{
73	2.90M	ai_assert(sbegin);
74	2.90M	ai_assert(send);
75
76		// tokens must be of non-zero length
77	2.90M	ai_assert(static_cast<size_t>(send-sbegin) > 0);
78	2.90M	}
79
80		// ------------------------------------------------------------------------------------------------
81
82
83		namespace {
84
85		// ------------------------------------------------------------------------------------------------
86		// signal tokenization error, this is always unrecoverable. Throws DeadlyImportError.
87		AI_WONT_RETURN void TokenizeError(const std::string& message, unsigned int line, unsigned int column) AI_WONT_RETURN_SUFFIX;
88		AI_WONT_RETURN void TokenizeError(const std::string& message, unsigned int line, unsigned int column)
89	1	{
90	1	throw DeadlyImportError("FBX-Tokenize", Util::GetLineAndColumnText(line,column), message);
91	1	}
92
93
94		// process a potential data token up to 'cur', adding it to 'output_tokens'.
95		// ------------------------------------------------------------------------------------------------
96		void ProcessDataToken(TokenList &output_tokens, StackAllocator &token_allocator,
97		const char& start, const char& end,
98		unsigned int line,
99		unsigned int column,
100		TokenType type = TokenType_DATA,
101		bool must_have_token = false)
102	1.79M	{
103	1.79M	if (start && end) {
104		// sanity check:
105		// tokens should have no whitespace outside quoted text and [start,end] should
106		// properly delimit the valid range.
107	858k	bool in_double_quotes = false;
108	11.8M	for (const char* c = start; c != end + 1; ++c) {
109	10.9M	if (*c == '\"') {
110	45.1k	in_double_quotes = !in_double_quotes;
111	45.1k	}
112
113	10.9M	if (!in_double_quotes && IsSpaceOrNewLine(*c)) {
114	0	TokenizeError("unexpected whitespace in token", line, column);
115	0	}
116	10.9M	}
117
118	858k	if (in_double_quotes) {
119	0	TokenizeError("non-terminated double quotes", line, column);
120	0	}
121
122	858k	output_tokens.push_back(new_Token(start,end + 1,type,line,column));
123	858k	}
124	935k	else if (must_have_token) {
125	0	TokenizeError("unexpected character, expected data token", line, column);
126	0	}
127
128	1.79M	start = end = nullptr;
129	1.79M	}
130
131		}
132
133		// ------------------------------------------------------------------------------------------------
134	39	void Tokenize(TokenList &output_tokens, const char *input, StackAllocator &token_allocator) {
135	39	ai_assert(input);
136	39	ASSIMP_LOG_DEBUG("Tokenizing ASCII FBX file");
137
138		// line and column numbers numbers are one-based
139	39	unsigned int line = 1;
140	39	unsigned int column = 1;
141
142	39	bool comment = false;
143	39	bool in_double_quotes = false;
144	39	bool pending_data_token = false;
145
146	39	const char token_begin = nullptr, token_end = nullptr;
147	14.3M	for (const char* cur = input;cur;column += (cur == '\t' ? ASSIMP_FBX_TAB_WIDTH : 1), ++cur) {
148	14.3M	const char c = *cur;
149
150	14.3M	if (IsLineEnd(c)) {
151	58.3k	comment = false;
152
153	58.3k	column = 0;
154	58.3k	++line;
155	58.3k	}
156
157	14.3M	if(comment) {
158	1.10M	continue;
159	1.10M	}
160
161	13.2M	if(in_double_quotes) {
162	3.94M	if (c == '\"') {
163	22.5k	in_double_quotes = false;
164	22.5k	token_end = cur;
165
166	22.5k	ProcessDataToken(output_tokens, token_allocator, token_begin, token_end, line, column);
167	22.5k	pending_data_token = false;
168	22.5k	}
169	3.94M	continue;
170	3.94M	}
171
172	9.26M	switch(c)
173	9.26M	{
174	22.5k	case '\"':
175	22.5k	if (token_begin) {
176	1	TokenizeError("unexpected double-quote", line, column);
177	1	}
178	22.5k	token_begin = cur;
179	22.5k	in_double_quotes = true;
180	22.5k	continue;
181
182	25.2k	case ';':
183	25.2k	ProcessDataToken(output_tokens, token_allocator, token_begin, token_end, line, column);
184	25.2k	comment = true;
185	25.2k	continue;
186
187	961k	case '{':
188	961k	ProcessDataToken(output_tokens, token_allocator, token_begin, token_end, line, column);
189	961k	output_tokens.push_back(new_Token(cur,cur+1,TokenType_OPEN_BRACKET,line,column));
190	961k	continue;
191
192	1.37k	case '}':
193	1.37k	ProcessDataToken(output_tokens, token_allocator, token_begin, token_end, line, column);
194	1.37k	output_tokens.push_back(new_Token(cur,cur+1,TokenType_CLOSE_BRACKET,line,column));
195	1.37k	continue;
196
197	1.08M	case ',':
198	1.08M	if (pending_data_token) {
199	752k	ProcessDataToken(output_tokens, token_allocator, token_begin, token_end, line, column, TokenType_DATA, true);
200	752k	}
201	1.08M	output_tokens.push_back(new_Token(cur,cur+1,TokenType_COMMA,line,column));
202	1.08M	continue;
203
204	17.4k	case ':':
205	17.4k	if (pending_data_token) {
206	17.4k	ProcessDataToken(output_tokens, token_allocator, token_begin, token_end, line, column, TokenType_KEY, true);
207	17.4k	}
208	0	else {
209	0	TokenizeError("unexpected colon", line, column);
210	0	}
211	17.4k	continue;
212	9.26M	}
213
214	7.15M	if (IsSpaceOrNewLine(c)) {
215
216	141k	if (token_begin) {
217		// peek ahead and check if the next token is a colon in which
218		// case this counts as KEY token.
219	13.1k	TokenType type = TokenType_DATA;
220	63.6k	for (const char* peek = cur; peek && IsSpaceOrNewLine(peek); ++peek) {
221	50.4k	if (*peek == ':') {
222	0	type = TokenType_KEY;
223	0	cur = peek;
224	0	break;
225	0	}
226	50.4k	}
227
228	13.1k	ProcessDataToken(output_tokens, token_allocator, token_begin, token_end, line, column, type);
229	13.1k	}
230
231	141k	pending_data_token = false;
232	141k	}
233	7.01M	else {
234	7.01M	token_end = cur;
235	7.01M	if (!token_begin) {
236	835k	token_begin = cur;
237	835k	}
238
239	7.01M	pending_data_token = true;
240	7.01M	}
241	7.15M	}
242	39	}
243
244		} // !FBX
245		} // !Assimp
246
247		#endif