/src/llvm-project/clang/lib/Format/IntegerLiteralSeparatorFixer.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===--- IntegerLiteralSeparatorFixer.cpp -----------------------*- C++ -*-===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | /// |
9 | | /// \file |
10 | | /// This file implements IntegerLiteralSeparatorFixer that fixes C++ integer |
11 | | /// literal separators. |
12 | | /// |
13 | | //===----------------------------------------------------------------------===// |
14 | | |
15 | | #include "IntegerLiteralSeparatorFixer.h" |
16 | | |
17 | | namespace clang { |
18 | | namespace format { |
19 | | |
20 | | enum class Base { Binary, Decimal, Hex, Other }; |
21 | | |
22 | 0 | static Base getBase(const StringRef IntegerLiteral) { |
23 | 0 | assert(IntegerLiteral.size() > 1); |
24 | | |
25 | 0 | if (IntegerLiteral[0] > '0') { |
26 | 0 | assert(IntegerLiteral[0] <= '9'); |
27 | 0 | return Base::Decimal; |
28 | 0 | } |
29 | | |
30 | 0 | assert(IntegerLiteral[0] == '0'); |
31 | | |
32 | 0 | switch (IntegerLiteral[1]) { |
33 | 0 | case 'b': |
34 | 0 | case 'B': |
35 | 0 | return Base::Binary; |
36 | 0 | case 'x': |
37 | 0 | case 'X': |
38 | 0 | return Base::Hex; |
39 | 0 | default: |
40 | 0 | return Base::Other; |
41 | 0 | } |
42 | 0 | } |
43 | | |
44 | | std::pair<tooling::Replacements, unsigned> |
45 | | IntegerLiteralSeparatorFixer::process(const Environment &Env, |
46 | 337 | const FormatStyle &Style) { |
47 | 337 | switch (Style.Language) { |
48 | 337 | case FormatStyle::LK_Cpp: |
49 | 337 | case FormatStyle::LK_ObjC: |
50 | 337 | Separator = '\''; |
51 | 337 | break; |
52 | 0 | case FormatStyle::LK_CSharp: |
53 | 0 | case FormatStyle::LK_Java: |
54 | 0 | case FormatStyle::LK_JavaScript: |
55 | 0 | Separator = '_'; |
56 | 0 | break; |
57 | 0 | default: |
58 | 0 | return {}; |
59 | 337 | } |
60 | | |
61 | 337 | const auto &Option = Style.IntegerLiteralSeparator; |
62 | 337 | const auto Binary = Option.Binary; |
63 | 337 | const auto Decimal = Option.Decimal; |
64 | 337 | const auto Hex = Option.Hex; |
65 | 337 | const bool SkipBinary = Binary == 0; |
66 | 337 | const bool SkipDecimal = Decimal == 0; |
67 | 337 | const bool SkipHex = Hex == 0; |
68 | | |
69 | 337 | if (SkipBinary && SkipDecimal && SkipHex) |
70 | 337 | return {}; |
71 | | |
72 | 0 | const auto BinaryMinDigits = |
73 | 0 | std::max((int)Option.BinaryMinDigits, Binary + 1); |
74 | 0 | const auto DecimalMinDigits = |
75 | 0 | std::max((int)Option.DecimalMinDigits, Decimal + 1); |
76 | 0 | const auto HexMinDigits = std::max((int)Option.HexMinDigits, Hex + 1); |
77 | |
|
78 | 0 | const auto &SourceMgr = Env.getSourceManager(); |
79 | 0 | AffectedRangeManager AffectedRangeMgr(SourceMgr, Env.getCharRanges()); |
80 | |
|
81 | 0 | const auto ID = Env.getFileID(); |
82 | 0 | const auto LangOpts = getFormattingLangOpts(Style); |
83 | 0 | Lexer Lex(ID, SourceMgr.getBufferOrFake(ID), SourceMgr, LangOpts); |
84 | 0 | Lex.SetCommentRetentionState(true); |
85 | |
|
86 | 0 | Token Tok; |
87 | 0 | tooling::Replacements Result; |
88 | |
|
89 | 0 | for (bool Skip = false; !Lex.LexFromRawLexer(Tok);) { |
90 | 0 | auto Length = Tok.getLength(); |
91 | 0 | if (Length < 2) |
92 | 0 | continue; |
93 | 0 | auto Location = Tok.getLocation(); |
94 | 0 | auto Text = StringRef(SourceMgr.getCharacterData(Location), Length); |
95 | 0 | if (Tok.is(tok::comment)) { |
96 | 0 | if (isClangFormatOff(Text)) |
97 | 0 | Skip = true; |
98 | 0 | else if (isClangFormatOn(Text)) |
99 | 0 | Skip = false; |
100 | 0 | continue; |
101 | 0 | } |
102 | 0 | if (Skip || Tok.isNot(tok::numeric_constant) || Text[0] == '.' || |
103 | 0 | !AffectedRangeMgr.affectsCharSourceRange( |
104 | 0 | CharSourceRange::getCharRange(Location, Tok.getEndLoc()))) { |
105 | 0 | continue; |
106 | 0 | } |
107 | 0 | const auto B = getBase(Text); |
108 | 0 | const bool IsBase2 = B == Base::Binary; |
109 | 0 | const bool IsBase10 = B == Base::Decimal; |
110 | 0 | const bool IsBase16 = B == Base::Hex; |
111 | 0 | if ((IsBase2 && SkipBinary) || (IsBase10 && SkipDecimal) || |
112 | 0 | (IsBase16 && SkipHex) || B == Base::Other) { |
113 | 0 | continue; |
114 | 0 | } |
115 | 0 | if (Style.isCpp()) { |
116 | | // Hex alpha digits a-f/A-F must be at the end of the string literal. |
117 | 0 | StringRef Suffixes = "_himnsuyd"; |
118 | 0 | if (const auto Pos = |
119 | 0 | Text.find_first_of(IsBase16 ? Suffixes.drop_back() : Suffixes); |
120 | 0 | Pos != StringRef::npos) { |
121 | 0 | Text = Text.substr(0, Pos); |
122 | 0 | Length = Pos; |
123 | 0 | } |
124 | 0 | } |
125 | 0 | if ((IsBase10 && Text.find_last_of(".eEfFdDmM") != StringRef::npos) || |
126 | 0 | (IsBase16 && Text.find_last_of(".pP") != StringRef::npos)) { |
127 | 0 | continue; |
128 | 0 | } |
129 | 0 | const auto Start = Text[0] == '0' ? 2 : 0; |
130 | 0 | auto End = Text.find_first_of("uUlLzZn", Start); |
131 | 0 | if (End == StringRef::npos) |
132 | 0 | End = Length; |
133 | 0 | if (Start > 0 || End < Length) { |
134 | 0 | Length = End - Start; |
135 | 0 | Text = Text.substr(Start, Length); |
136 | 0 | } |
137 | 0 | auto DigitsPerGroup = Decimal; |
138 | 0 | auto MinDigits = DecimalMinDigits; |
139 | 0 | if (IsBase2) { |
140 | 0 | DigitsPerGroup = Binary; |
141 | 0 | MinDigits = BinaryMinDigits; |
142 | 0 | } else if (IsBase16) { |
143 | 0 | DigitsPerGroup = Hex; |
144 | 0 | MinDigits = HexMinDigits; |
145 | 0 | } |
146 | 0 | const auto SeparatorCount = Text.count(Separator); |
147 | 0 | const int DigitCount = Length - SeparatorCount; |
148 | 0 | const bool RemoveSeparator = DigitsPerGroup < 0 || DigitCount < MinDigits; |
149 | 0 | if (RemoveSeparator && SeparatorCount == 0) |
150 | 0 | continue; |
151 | 0 | if (!RemoveSeparator && SeparatorCount > 0 && |
152 | 0 | checkSeparator(Text, DigitsPerGroup)) { |
153 | 0 | continue; |
154 | 0 | } |
155 | 0 | const auto &Formatted = |
156 | 0 | format(Text, DigitsPerGroup, DigitCount, RemoveSeparator); |
157 | 0 | assert(Formatted != Text); |
158 | 0 | if (Start > 0) |
159 | 0 | Location = Location.getLocWithOffset(Start); |
160 | 0 | cantFail(Result.add( |
161 | 0 | tooling::Replacement(SourceMgr, Location, Length, Formatted))); |
162 | 0 | } |
163 | |
|
164 | 0 | return {Result, 0}; |
165 | 337 | } |
166 | | |
167 | | bool IntegerLiteralSeparatorFixer::checkSeparator( |
168 | 0 | const StringRef IntegerLiteral, int DigitsPerGroup) const { |
169 | 0 | assert(DigitsPerGroup > 0); |
170 | | |
171 | 0 | int I = 0; |
172 | 0 | for (auto C : llvm::reverse(IntegerLiteral)) { |
173 | 0 | if (C == Separator) { |
174 | 0 | if (I < DigitsPerGroup) |
175 | 0 | return false; |
176 | 0 | I = 0; |
177 | 0 | } else { |
178 | 0 | if (I == DigitsPerGroup) |
179 | 0 | return false; |
180 | 0 | ++I; |
181 | 0 | } |
182 | 0 | } |
183 | | |
184 | 0 | return true; |
185 | 0 | } |
186 | | |
187 | | std::string IntegerLiteralSeparatorFixer::format(const StringRef IntegerLiteral, |
188 | | int DigitsPerGroup, |
189 | | int DigitCount, |
190 | 0 | bool RemoveSeparator) const { |
191 | 0 | assert(DigitsPerGroup != 0); |
192 | | |
193 | 0 | std::string Formatted; |
194 | |
|
195 | 0 | if (RemoveSeparator) { |
196 | 0 | for (auto C : IntegerLiteral) |
197 | 0 | if (C != Separator) |
198 | 0 | Formatted.push_back(C); |
199 | 0 | return Formatted; |
200 | 0 | } |
201 | | |
202 | 0 | int Remainder = DigitCount % DigitsPerGroup; |
203 | |
|
204 | 0 | int I = 0; |
205 | 0 | for (auto C : IntegerLiteral) { |
206 | 0 | if (C == Separator) |
207 | 0 | continue; |
208 | 0 | if (I == (Remainder > 0 ? Remainder : DigitsPerGroup)) { |
209 | 0 | Formatted.push_back(Separator); |
210 | 0 | I = 0; |
211 | 0 | Remainder = 0; |
212 | 0 | } |
213 | 0 | Formatted.push_back(C); |
214 | 0 | ++I; |
215 | 0 | } |
216 | |
|
217 | 0 | return Formatted; |
218 | 0 | } |
219 | | |
220 | | } // namespace format |
221 | | } // namespace clang |