/src/serenity/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.h
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2020-2021, the SerenityOS developers. |
3 | | * Copyright (c) 2021-2023, Sam Atkins <atkinssj@serenityos.org> |
4 | | * |
5 | | * SPDX-License-Identifier: BSD-2-Clause |
6 | | */ |
7 | | |
8 | | #pragma once |
9 | | |
10 | | #include <AK/Optional.h> |
11 | | #include <AK/StringView.h> |
12 | | #include <AK/Types.h> |
13 | | #include <AK/Utf8View.h> |
14 | | #include <LibWeb/CSS/Parser/Token.h> |
15 | | #include <LibWeb/Forward.h> |
16 | | |
17 | | namespace Web::CSS::Parser { |
18 | | |
19 | | class U32Twin { |
20 | | public: |
21 | | void set(size_t index, u32 value) |
22 | 0 | { |
23 | 0 | if (index == 0) |
24 | 0 | first = value; |
25 | 0 | if (index == 1) |
26 | 0 | second = value; |
27 | 0 | } |
28 | | |
29 | | u32 first {}; |
30 | | u32 second {}; |
31 | | }; |
32 | | |
33 | | class U32Triplet { |
34 | | public: |
35 | | void set(size_t index, u32 value) |
36 | 0 | { |
37 | 0 | if (index == 0) |
38 | 0 | first = value; |
39 | 0 | if (index == 1) |
40 | 0 | second = value; |
41 | 0 | if (index == 2) |
42 | 0 | third = value; |
43 | 0 | } |
44 | | |
45 | | U32Twin to_twin_12() |
46 | 0 | { |
47 | 0 | return { first, second }; |
48 | 0 | } |
49 | | |
50 | | U32Twin to_twin_23() |
51 | 0 | { |
52 | 0 | return { second, third }; |
53 | 0 | } |
54 | | |
55 | | u32 first {}; |
56 | | u32 second {}; |
57 | | u32 third {}; |
58 | | }; |
59 | | |
60 | | class Tokenizer { |
61 | | public: |
62 | | static Vector<Token> tokenize(StringView input, StringView encoding); |
63 | | |
64 | | [[nodiscard]] static Token create_eof_token(); |
65 | | |
66 | | private: |
67 | | explicit Tokenizer(String decoded_input); |
68 | | |
69 | | [[nodiscard]] Vector<Token> tokenize(); |
70 | | |
71 | | size_t current_byte_offset() const; |
72 | | String input_since(size_t offset) const; |
73 | | |
74 | | [[nodiscard]] u32 next_code_point(); |
75 | | [[nodiscard]] u32 peek_code_point(size_t offset = 0) const; |
76 | | [[nodiscard]] U32Twin peek_twin() const; |
77 | | [[nodiscard]] U32Triplet peek_triplet() const; |
78 | | |
79 | | [[nodiscard]] U32Twin start_of_input_stream_twin(); |
80 | | [[nodiscard]] U32Triplet start_of_input_stream_triplet(); |
81 | | |
82 | | [[nodiscard]] static Token create_new_token(Token::Type); |
83 | | [[nodiscard]] static Token create_value_token(Token::Type, FlyString&& value, String&& representation); |
84 | | [[nodiscard]] static Token create_value_token(Token::Type, u32 value, String&& representation); |
85 | | [[nodiscard]] Token consume_a_token(); |
86 | | [[nodiscard]] Token consume_string_token(u32 ending_code_point); |
87 | | [[nodiscard]] Token consume_a_numeric_token(); |
88 | | [[nodiscard]] Token consume_an_ident_like_token(); |
89 | | [[nodiscard]] Number consume_a_number(); |
90 | | [[nodiscard]] double convert_a_string_to_a_number(StringView); |
91 | | [[nodiscard]] FlyString consume_an_ident_sequence(); |
92 | | [[nodiscard]] u32 consume_escaped_code_point(); |
93 | | [[nodiscard]] Token consume_a_url_token(); |
94 | | void consume_the_remnants_of_a_bad_url(); |
95 | | void consume_comments(); |
96 | | void consume_as_much_whitespace_as_possible(); |
97 | | void reconsume_current_input_code_point(); |
98 | | [[nodiscard]] static bool is_valid_escape_sequence(U32Twin); |
99 | | [[nodiscard]] static bool would_start_an_ident_sequence(U32Triplet); |
100 | | [[nodiscard]] static bool would_start_a_number(U32Triplet); |
101 | | |
102 | | String m_decoded_input; |
103 | | Utf8View m_utf8_view; |
104 | | AK::Utf8CodePointIterator m_utf8_iterator; |
105 | | AK::Utf8CodePointIterator m_prev_utf8_iterator; |
106 | | Token::Position m_position; |
107 | | Token::Position m_prev_position; |
108 | | }; |
109 | | } |