/src/mozilla-central/toolkit/components/find/nsFind.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
2 | | /* vim: set ts=8 sts=2 et sw=2 tw=80: */ |
3 | | /* This Source Code Form is subject to the terms of the Mozilla Public |
4 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
5 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
6 | | |
7 | | //#define DEBUG_FIND 1 |
8 | | |
9 | | #include "nsFind.h" |
10 | | #include "nsContentCID.h" |
11 | | #include "nsIContent.h" |
12 | | #include "nsINode.h" |
13 | | #include "nsISelectionController.h" |
14 | | #include "nsIFrame.h" |
15 | | #include "nsITextControlFrame.h" |
16 | | #include "nsIFormControl.h" |
17 | | #include "nsTextFragment.h" |
18 | | #include "nsString.h" |
19 | | #include "nsAtom.h" |
20 | | #include "nsServiceManagerUtils.h" |
21 | | #include "nsUnicharUtils.h" |
22 | | #include "nsCRT.h" |
23 | | #include "nsRange.h" |
24 | | #include "nsContentUtils.h" |
25 | | #include "mozilla/DebugOnly.h" |
26 | | #include "mozilla/TextEditor.h" |
27 | | #include "mozilla/dom/ChildIterator.h" |
28 | | #include "mozilla/dom/TreeIterator.h" |
29 | | #include "mozilla/dom/Element.h" |
30 | | #include "mozilla/dom/Text.h" |
31 | | |
32 | | using namespace mozilla; |
33 | | using namespace mozilla::dom; |
34 | | |
35 | | // Yikes! Casting a char to unichar can fill with ones! |
36 | 0 | #define CHAR_TO_UNICHAR(c) ((char16_t)(unsigned char)c) |
37 | | |
38 | | static NS_DEFINE_CID(kCContentIteratorCID, NS_CONTENTITERATOR_CID); |
39 | | static NS_DEFINE_CID(kCPreContentIteratorCID, NS_PRECONTENTITERATOR_CID); |
40 | | |
41 | 0 | #define CH_QUOTE ((char16_t)0x22) |
42 | 0 | #define CH_APOSTROPHE ((char16_t)0x27) |
43 | 0 | #define CH_LEFT_SINGLE_QUOTE ((char16_t)0x2018) |
44 | 0 | #define CH_RIGHT_SINGLE_QUOTE ((char16_t)0x2019) |
45 | 0 | #define CH_LEFT_DOUBLE_QUOTE ((char16_t)0x201C) |
46 | 0 | #define CH_RIGHT_DOUBLE_QUOTE ((char16_t)0x201D) |
47 | | |
48 | 0 | #define CH_SHY ((char16_t)0xAD) |
49 | | |
50 | | // nsFind::Find casts CH_SHY to char before calling StripChars |
51 | | // This works correctly if and only if CH_SHY <= 255 |
52 | | static_assert(CH_SHY <= 255, "CH_SHY is not an ascii character"); |
53 | | |
54 | 0 | NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsFind) |
55 | 0 | NS_INTERFACE_MAP_ENTRY(nsIFind) |
56 | 0 | NS_INTERFACE_MAP_ENTRY(nsISupports) |
57 | 0 | NS_INTERFACE_MAP_END |
58 | | |
59 | | NS_IMPL_CYCLE_COLLECTING_ADDREF(nsFind) |
60 | | NS_IMPL_CYCLE_COLLECTING_RELEASE(nsFind) |
61 | | |
62 | | NS_IMPL_CYCLE_COLLECTION(nsFind) |
63 | | |
64 | | nsFind::nsFind() |
65 | | : mFindBackward(false) |
66 | | , mCaseSensitive(false) |
67 | | , mWordBreaker(nullptr) |
68 | 0 | { |
69 | 0 | } |
70 | | |
71 | 0 | nsFind::~nsFind() = default; |
72 | | |
73 | | #ifdef DEBUG_FIND |
74 | | #define DEBUG_FIND_PRINTF(...) printf(__VA_ARGS__) |
75 | | #else |
76 | | #define DEBUG_FIND_PRINTF(...) /* nothing */ |
77 | | #endif |
78 | | |
79 | | static nsIContent& |
80 | | AnonymousSubtreeRootParent(const nsINode& aNode) |
81 | 0 | { |
82 | 0 | MOZ_ASSERT(aNode.IsInNativeAnonymousSubtree()); |
83 | 0 |
|
84 | 0 | nsIContent* current = aNode.GetParent(); |
85 | 0 | while (current->IsInNativeAnonymousSubtree()) { |
86 | 0 | current = current->GetParent(); |
87 | 0 | MOZ_ASSERT(current, "huh?"); |
88 | 0 | } |
89 | 0 | return *current; |
90 | 0 | } |
91 | | |
92 | | static void |
93 | | DumpNode(const nsINode* aNode) |
94 | 0 | { |
95 | | #ifdef DEBUG_FIND |
96 | | if (!aNode) { |
97 | | printf(">>>> Node: NULL\n"); |
98 | | return; |
99 | | } |
100 | | nsString nodeName = aNode->NodeName(); |
101 | | if (aNode->IsText()) { |
102 | | nsAutoString newText; |
103 | | aNode->AsText()->AppendTextTo(newText); |
104 | | printf(">>>> Text node (node name %s): '%s'\n", |
105 | | NS_LossyConvertUTF16toASCII(nodeName).get(), |
106 | | NS_LossyConvertUTF16toASCII(newText).get()); |
107 | | } else { |
108 | | printf(">>>> Node: %s\n", NS_LossyConvertUTF16toASCII(nodeName).get()); |
109 | | } |
110 | | #endif |
111 | | } |
112 | | |
113 | | static bool |
114 | | IsBlockNode(const nsIContent* aContent) |
115 | 0 | { |
116 | 0 | if (aContent->IsElement() && aContent->AsElement()->IsDisplayContents()) { |
117 | 0 | return false; |
118 | 0 | } |
119 | 0 | |
120 | 0 | // FIXME(emilio): This is dubious... |
121 | 0 | if (aContent->IsAnyOfHTMLElements(nsGkAtoms::img, |
122 | 0 | nsGkAtoms::hr, |
123 | 0 | nsGkAtoms::th, |
124 | 0 | nsGkAtoms::td)) { |
125 | 0 | return true; |
126 | 0 | } |
127 | 0 | |
128 | 0 | nsIFrame* frame = aContent->GetPrimaryFrame(); |
129 | 0 | return frame && frame->StyleDisplay()->IsBlockOutsideStyle(); |
130 | 0 | } |
131 | | |
132 | | static bool |
133 | | IsDisplayedNode(const nsINode* aNode) |
134 | 0 | { |
135 | 0 | if (!aNode->IsContent()) { |
136 | 0 | return false; |
137 | 0 | } |
138 | 0 | |
139 | 0 | if (aNode->AsContent()->GetPrimaryFrame()) { |
140 | 0 | return true; |
141 | 0 | } |
142 | 0 | |
143 | 0 | // If there's no frame, it's not displayed, unless it's display: contents. |
144 | 0 | return aNode->IsElement() && aNode->AsElement()->IsDisplayContents(); |
145 | 0 | } |
146 | | |
147 | | static bool |
148 | | IsVisibleNode(const nsINode* aNode) |
149 | 0 | { |
150 | 0 | if (!IsDisplayedNode(aNode)) { |
151 | 0 | return false; |
152 | 0 | } |
153 | 0 | |
154 | 0 | nsIFrame* frame = aNode->AsContent()->GetPrimaryFrame(); |
155 | 0 | if (!frame) { |
156 | 0 | // display: contents |
157 | 0 | return true; |
158 | 0 | } |
159 | 0 | |
160 | 0 | return frame->StyleVisibility()->IsVisible(); |
161 | 0 | } |
162 | | |
163 | | static bool |
164 | | IsTextFormControl(nsIContent& aContent) |
165 | 0 | { |
166 | 0 | if (!aContent.IsNodeOfType(nsINode::eHTML_FORM_CONTROL)) { |
167 | 0 | return false; |
168 | 0 | } |
169 | 0 | |
170 | 0 | nsCOMPtr<nsIFormControl> formControl = do_QueryInterface(&aContent); |
171 | 0 | return formControl->IsTextControl(true); |
172 | 0 | } |
173 | | |
174 | | static bool |
175 | | SkipNode(const nsIContent* aContent) |
176 | 0 | { |
177 | 0 | const nsIContent* content = aContent; |
178 | 0 | while (content) { |
179 | 0 | if (!IsDisplayedNode(content) || |
180 | 0 | content->IsComment() || |
181 | 0 | content->IsAnyOfHTMLElements(nsGkAtoms::script, |
182 | 0 | nsGkAtoms::noframes, |
183 | 0 | nsGkAtoms::select)) { |
184 | 0 | DEBUG_FIND_PRINTF("Skipping node: "); |
185 | 0 | DumpNode(content); |
186 | 0 | return true; |
187 | 0 | } |
188 | 0 | |
189 | 0 | // Skip NAC in non-form-control. |
190 | 0 | if (content->IsInNativeAnonymousSubtree() && |
191 | 0 | !IsTextFormControl(AnonymousSubtreeRootParent(*content))) { |
192 | 0 | return true; |
193 | 0 | } |
194 | 0 | |
195 | 0 | // Only climb to the nearest block node |
196 | 0 | if (IsBlockNode(content)) { |
197 | 0 | return false; |
198 | 0 | } |
199 | 0 | |
200 | 0 | content = content->GetFlattenedTreeParent(); |
201 | 0 | } |
202 | 0 |
|
203 | 0 | return false; |
204 | 0 | } |
205 | | |
206 | | static const nsIContent* |
207 | | GetBlockParent(const Text& aNode) |
208 | 0 | { |
209 | 0 | for (const nsIContent* current = aNode.GetFlattenedTreeParent(); |
210 | 0 | current; |
211 | 0 | current = current->GetFlattenedTreeParent()) { |
212 | 0 | if (IsBlockNode(current)) { |
213 | 0 | return current; |
214 | 0 | } |
215 | 0 | } |
216 | 0 | return nullptr; |
217 | 0 | } |
218 | | |
219 | | struct nsFind::State final |
220 | | { |
221 | | State(bool aFindBackward, nsIContent& aRoot, const nsRange& aStartPoint) |
222 | | : mFindBackward(aFindBackward) |
223 | | , mInitialized(false) |
224 | | , mIterOffset(-1) |
225 | | , mLastBlockParent(nullptr) |
226 | | , mIterator(aRoot) |
227 | | , mStartPoint(aStartPoint) |
228 | 0 | { |
229 | 0 | } |
230 | | |
231 | | void PositionAt(Text& aNode) |
232 | 0 | { |
233 | 0 | mIterator.Seek(aNode); |
234 | 0 | } |
235 | | |
236 | | Text* GetCurrentNode() const |
237 | 0 | { |
238 | 0 | MOZ_ASSERT(mInitialized); |
239 | 0 | nsINode* node = mIterator.GetCurrent(); |
240 | 0 | MOZ_ASSERT(!node || node->IsText()); |
241 | 0 | return node ? node->GetAsText() : nullptr; |
242 | 0 | } |
243 | | |
244 | | Text* GetNextNode() |
245 | 0 | { |
246 | 0 | if (MOZ_UNLIKELY(!mInitialized)) { |
247 | 0 | Initialize(); |
248 | 0 | } else { |
249 | 0 | Advance(); |
250 | 0 | mIterOffset = -1; // mIterOffset only really applies to the first node. |
251 | 0 | } |
252 | 0 | return GetCurrentNode(); |
253 | 0 | } |
254 | | |
255 | | // Gets the next non-empty text fragment in the same block, starting by the |
256 | | // _next_ node. |
257 | | const nsTextFragment* GetNextNonEmptyTextFragmentInSameBlock(); |
258 | | |
259 | | private: |
260 | | // Advance to the next visible text-node. |
261 | | void Advance(); |
262 | | // Sets up the first node position and offset. |
263 | | void Initialize(); |
264 | | |
265 | | const bool mFindBackward; |
266 | | |
267 | | // Whether we've called GetNextNode() at least once. |
268 | | bool mInitialized; |
269 | | |
270 | | public: |
271 | | // An offset into the text of the first node we're starting to search at. |
272 | | int mIterOffset; |
273 | | const nsIContent* mLastBlockParent; |
274 | | TreeIterator<StyleChildrenIterator> mIterator; |
275 | | |
276 | | // These are only needed for the first GetNextNode() call. |
277 | | const nsRange& mStartPoint; |
278 | | }; |
279 | | |
280 | | void |
281 | | nsFind::State::Advance() |
282 | 0 | { |
283 | 0 | MOZ_ASSERT(mInitialized); |
284 | 0 |
|
285 | 0 | while (true) { |
286 | 0 | nsIContent* current = |
287 | 0 | mFindBackward ? mIterator.GetPrev() : mIterator.GetNext(); |
288 | 0 |
|
289 | 0 | if (!current) { |
290 | 0 | return; |
291 | 0 | } |
292 | 0 | |
293 | 0 | if (!current->IsContent() || SkipNode(current->AsContent())) { |
294 | 0 | continue; |
295 | 0 | } |
296 | 0 | |
297 | 0 | if (current->IsText()) { |
298 | 0 | return; |
299 | 0 | } |
300 | 0 | } |
301 | 0 | } |
302 | | |
303 | | void |
304 | | nsFind::State::Initialize() |
305 | 0 | { |
306 | 0 | MOZ_ASSERT(!mInitialized); |
307 | 0 | mInitialized = true; |
308 | 0 | mIterOffset = mFindBackward ? -1 : 0; |
309 | 0 |
|
310 | 0 | // Set up ourselves at the first node we want to start searching at. |
311 | 0 | nsINode* beginning = mFindBackward ? mStartPoint.GetEndContainer() |
312 | 0 | : mStartPoint.GetStartContainer(); |
313 | 0 | if (beginning && beginning->IsContent()) { |
314 | 0 | mIterator.Seek(*beginning->AsContent()); |
315 | 0 | } |
316 | 0 |
|
317 | 0 | nsINode* current = mIterator.GetCurrent(); |
318 | 0 | if (!current) { |
319 | 0 | return; |
320 | 0 | } |
321 | 0 | |
322 | 0 | if (!current->IsText() || SkipNode(current->AsText())) { |
323 | 0 | Advance(); |
324 | 0 | return; |
325 | 0 | } |
326 | 0 | |
327 | 0 | mLastBlockParent = GetBlockParent(*current->AsText()); |
328 | 0 |
|
329 | 0 | if (current != beginning) { |
330 | 0 | return; |
331 | 0 | } |
332 | 0 | |
333 | 0 | mIterOffset = mFindBackward ? mStartPoint.EndOffset() |
334 | 0 | : mStartPoint.StartOffset(); |
335 | 0 | } |
336 | | |
337 | | const nsTextFragment* |
338 | | nsFind::State::GetNextNonEmptyTextFragmentInSameBlock() |
339 | 0 | { |
340 | 0 | while (true) { |
341 | 0 | const Text* current = GetNextNode(); |
342 | 0 | if (!current) { |
343 | 0 | return nullptr; |
344 | 0 | } |
345 | 0 | |
346 | 0 | const nsIContent* blockParent = GetBlockParent(*current); |
347 | 0 | if (!blockParent || blockParent != mLastBlockParent) { |
348 | 0 | return nullptr; |
349 | 0 | } |
350 | 0 | |
351 | 0 | const nsTextFragment& frag = current->TextFragment(); |
352 | 0 | if (frag.GetLength()) { |
353 | 0 | return &frag; |
354 | 0 | } |
355 | 0 | } |
356 | 0 | } |
357 | | |
358 | | class MOZ_STACK_CLASS nsFind::StateRestorer final |
359 | | { |
360 | | public: |
361 | | explicit StateRestorer(State& aState) |
362 | | : mState(aState) |
363 | | , mIterOffset(aState.mIterOffset) |
364 | | , mCurrNode(aState.GetCurrentNode()) |
365 | | , mLastBlockParent(aState.mLastBlockParent) |
366 | 0 | { |
367 | 0 | } |
368 | | |
369 | | ~StateRestorer() |
370 | 0 | { |
371 | 0 | mState.mIterOffset = mIterOffset; |
372 | 0 | if (mCurrNode) { |
373 | 0 | mState.PositionAt(*mCurrNode); |
374 | 0 | } |
375 | 0 | mState.mLastBlockParent = mLastBlockParent; |
376 | 0 | } |
377 | | |
378 | | private: |
379 | | State& mState; |
380 | | |
381 | | int32_t mIterOffset; |
382 | | Text* mCurrNode; |
383 | | const nsIContent* mLastBlockParent; |
384 | | }; |
385 | | |
386 | | NS_IMETHODIMP |
387 | | nsFind::GetFindBackwards(bool* aFindBackward) |
388 | 0 | { |
389 | 0 | if (!aFindBackward) { |
390 | 0 | return NS_ERROR_NULL_POINTER; |
391 | 0 | } |
392 | 0 | |
393 | 0 | *aFindBackward = mFindBackward; |
394 | 0 | return NS_OK; |
395 | 0 | } |
396 | | |
397 | | NS_IMETHODIMP |
398 | | nsFind::SetFindBackwards(bool aFindBackward) |
399 | 0 | { |
400 | 0 | mFindBackward = aFindBackward; |
401 | 0 | return NS_OK; |
402 | 0 | } |
403 | | |
404 | | NS_IMETHODIMP |
405 | | nsFind::GetCaseSensitive(bool* aCaseSensitive) |
406 | 0 | { |
407 | 0 | if (!aCaseSensitive) { |
408 | 0 | return NS_ERROR_NULL_POINTER; |
409 | 0 | } |
410 | 0 | |
411 | 0 | *aCaseSensitive = mCaseSensitive; |
412 | 0 | return NS_OK; |
413 | 0 | } |
414 | | |
415 | | NS_IMETHODIMP |
416 | | nsFind::SetCaseSensitive(bool aCaseSensitive) |
417 | 0 | { |
418 | 0 | mCaseSensitive = aCaseSensitive; |
419 | 0 | return NS_OK; |
420 | 0 | } |
421 | | |
422 | | /* attribute boolean entireWord; */ |
423 | | NS_IMETHODIMP |
424 | | nsFind::GetEntireWord(bool *aEntireWord) |
425 | 0 | { |
426 | 0 | if (!aEntireWord) |
427 | 0 | return NS_ERROR_NULL_POINTER; |
428 | 0 | |
429 | 0 | *aEntireWord = !!mWordBreaker; |
430 | 0 | return NS_OK; |
431 | 0 | } |
432 | | |
433 | | NS_IMETHODIMP |
434 | | nsFind::SetEntireWord(bool aEntireWord) |
435 | 0 | { |
436 | 0 | mWordBreaker = aEntireWord ? nsContentUtils::WordBreaker() : nullptr; |
437 | 0 | return NS_OK; |
438 | 0 | } |
439 | | |
440 | | // Here begins the find code. A ten-thousand-foot view of how it works: Find |
441 | | // needs to be able to compare across inline (but not block) nodes, e.g. find |
442 | | // for "abc" should match a<b>b</b>c. So after we've searched a node, we're not |
443 | | // done with it; in the case of a partial match we may need to reset the |
444 | | // iterator to go back to a previously visited node, so we always save the |
445 | | // "match anchor" node and offset. |
446 | | // |
447 | | // Text nodes store their text in an nsTextFragment, which is effectively a |
448 | | // union of a one-byte string or a two-byte string. Single and double strings |
449 | | // are intermixed in the dom. We don't have string classes which can deal with |
450 | | // intermixed strings, so all the handling is done explicitly here. |
451 | | |
452 | | char16_t |
453 | | nsFind::PeekNextChar(State& aState) const |
454 | 0 | { |
455 | 0 | // We need to restore the necessary state before this function returns. |
456 | 0 | StateRestorer restorer(aState); |
457 | 0 |
|
458 | 0 | const nsTextFragment* frag = aState.GetNextNonEmptyTextFragmentInSameBlock(); |
459 | 0 | if (!frag) { |
460 | 0 | return L'\0'; |
461 | 0 | } |
462 | 0 | |
463 | 0 | const char16_t* t2b = nullptr; |
464 | 0 | const char* t1b = nullptr; |
465 | 0 |
|
466 | 0 | if (frag->Is2b()) { |
467 | 0 | t2b = frag->Get2b(); |
468 | 0 | } else { |
469 | 0 | t1b = frag->Get1b(); |
470 | 0 | } |
471 | 0 |
|
472 | 0 | uint32_t len = frag->GetLength(); |
473 | 0 | MOZ_ASSERT(len); |
474 | 0 |
|
475 | 0 | int32_t index = mFindBackward ? len - 1 : 0; |
476 | 0 | return t1b ? CHAR_TO_UNICHAR(t1b[index]) : t2b[index]; |
477 | 0 | } |
478 | | |
479 | 0 | #define NBSP_CHARCODE (CHAR_TO_UNICHAR(160)) |
480 | 0 | #define IsSpace(c) (nsCRT::IsAsciiSpace(c) || (c) == NBSP_CHARCODE) |
481 | | #define OVERFLOW_PINDEX (mFindBackward ? pindex < 0 : pindex > patLen) |
482 | 0 | #define DONE_WITH_PINDEX (mFindBackward ? pindex <= 0 : pindex >= patLen) |
483 | | #define ALMOST_DONE_WITH_PINDEX (mFindBackward ? pindex <= 0 : pindex >= patLen - 1) |
484 | | |
485 | | // Take nodes out of the tree with NextNode, until null (NextNode will return 0 |
486 | | // at the end of our range). |
487 | | NS_IMETHODIMP |
488 | | nsFind::Find(const char16_t* aPatText, nsRange* aSearchRange, |
489 | | nsRange* aStartPoint, nsRange* aEndPoint, |
490 | | nsRange** aRangeRet) |
491 | 0 | { |
492 | 0 | DEBUG_FIND_PRINTF("============== nsFind::Find('%s'%s, %p, %p, %p)\n", |
493 | 0 | NS_LossyConvertUTF16toASCII(aPatText).get(), |
494 | 0 | mFindBackward ? " (backward)" : " (forward)", |
495 | 0 | (void*)aSearchRange, (void*)aStartPoint, (void*)aEndPoint); |
496 | 0 |
|
497 | 0 | NS_ENSURE_ARG(aSearchRange); |
498 | 0 | NS_ENSURE_ARG(aStartPoint); |
499 | 0 | NS_ENSURE_ARG(aEndPoint); |
500 | 0 | NS_ENSURE_ARG_POINTER(aRangeRet); |
501 | 0 |
|
502 | 0 | nsIDocument* document = |
503 | 0 | aStartPoint->GetRoot() ? aStartPoint->GetRoot()->OwnerDoc() : nullptr; |
504 | 0 | NS_ENSURE_ARG(document); |
505 | 0 |
|
506 | 0 | Element* root = document->GetRootElement(); |
507 | 0 | NS_ENSURE_ARG(root); |
508 | 0 |
|
509 | 0 | *aRangeRet = 0; |
510 | 0 |
|
511 | 0 | if (!aPatText) { |
512 | 0 | return NS_ERROR_NULL_POINTER; |
513 | 0 | } |
514 | 0 | |
515 | 0 | nsAutoString patAutoStr(aPatText); |
516 | 0 | if (!mCaseSensitive) { |
517 | 0 | ToLowerCase(patAutoStr); |
518 | 0 | } |
519 | 0 |
|
520 | 0 | // Ignore soft hyphens in the pattern |
521 | 0 | static const char kShy[] = { char(CH_SHY), 0 }; |
522 | 0 | patAutoStr.StripChars(kShy); |
523 | 0 |
|
524 | 0 | const char16_t* patStr = patAutoStr.get(); |
525 | 0 | int32_t patLen = patAutoStr.Length() - 1; |
526 | 0 |
|
527 | 0 | // current offset into the pattern -- reset to beginning/end: |
528 | 0 | int32_t pindex = (mFindBackward ? patLen : 0); |
529 | 0 |
|
530 | 0 | // Current offset into the fragment |
531 | 0 | int32_t findex = 0; |
532 | 0 |
|
533 | 0 | // Direction to move pindex and ptr* |
534 | 0 | int incr = (mFindBackward ? -1 : 1); |
535 | 0 |
|
536 | 0 | const nsTextFragment* frag = nullptr; |
537 | 0 | int32_t fragLen = 0; |
538 | 0 |
|
539 | 0 | // Pointers into the current fragment: |
540 | 0 | const char16_t* t2b = nullptr; |
541 | 0 | const char* t1b = nullptr; |
542 | 0 |
|
543 | 0 | // Keep track of when we're in whitespace: |
544 | 0 | // (only matters when we're matching) |
545 | 0 | bool inWhitespace = false; |
546 | 0 | // Keep track of whether the previous char was a word-breaking one. |
547 | 0 | bool wordBreakPrev = false; |
548 | 0 |
|
549 | 0 | // Place to save the range start point in case we find a match: |
550 | 0 | Text* matchAnchorNode = nullptr; |
551 | 0 | int32_t matchAnchorOffset = 0; |
552 | 0 |
|
553 | 0 | // Get the end point, so we know when to end searches: |
554 | 0 | nsINode* endNode = aEndPoint->GetEndContainer(); |
555 | 0 | uint32_t endOffset = aEndPoint->EndOffset(); |
556 | 0 |
|
557 | 0 | char16_t c = 0; |
558 | 0 | char16_t patc = 0; |
559 | 0 | char16_t prevChar = 0; |
560 | 0 | char16_t prevCharInMatch = 0; |
561 | 0 |
|
562 | 0 | State state(mFindBackward, *root, *aStartPoint); |
563 | 0 | Text* current = nullptr; |
564 | 0 |
|
565 | 0 | while (true) { |
566 | 0 | DEBUG_FIND_PRINTF("Loop ...\n"); |
567 | 0 |
|
568 | 0 | // If this is our first time on a new node, reset the pointers: |
569 | 0 | if (!frag) { |
570 | 0 | current = state.GetNextNode(); |
571 | 0 | if (!current) { |
572 | 0 | return NS_OK; |
573 | 0 | } |
574 | 0 | |
575 | 0 | // We have a new text content. If its block parent is different from the |
576 | 0 | // block parent of the last text content, then we need to clear the match |
577 | 0 | // since we don't want to find across block boundaries. |
578 | 0 | const nsIContent* blockParent = GetBlockParent(*current); |
579 | 0 | DEBUG_FIND_PRINTF("New node: old blockparent = %p, new = %p\n", |
580 | 0 | (void*)state.mLastBlockParent, (void*)blockParent); |
581 | 0 | if (blockParent != state.mLastBlockParent) { |
582 | 0 | DEBUG_FIND_PRINTF("Different block parent!\n"); |
583 | 0 | state.mLastBlockParent = blockParent; |
584 | 0 | // End any pending match: |
585 | 0 | matchAnchorNode = nullptr; |
586 | 0 | matchAnchorOffset = 0; |
587 | 0 | c = 0; |
588 | 0 | prevChar = 0; |
589 | 0 | prevCharInMatch = 0; |
590 | 0 | pindex = (mFindBackward ? patLen : 0); |
591 | 0 | inWhitespace = false; |
592 | 0 | } |
593 | 0 |
|
594 | 0 | frag = ¤t->TextFragment(); |
595 | 0 | fragLen = frag->GetLength(); |
596 | 0 |
|
597 | 0 | // Set our starting point in this node. If we're going back to the anchor |
598 | 0 | // node, which means that we just ended a partial match, use the saved |
599 | 0 | // offset: |
600 | 0 | // |
601 | 0 | // FIXME(emilio): How could current ever be the anchor node, if we had not |
602 | 0 | // seen current so far? |
603 | 0 | if (current == matchAnchorNode) { |
604 | 0 | findex = matchAnchorOffset + (mFindBackward ? 1 : 0); |
605 | 0 | } else if (state.mIterOffset >= 0) { |
606 | 0 | findex = state.mIterOffset - (mFindBackward ? 1 : 0); |
607 | 0 | } else { |
608 | 0 | findex = mFindBackward ? (fragLen - 1) : 0; |
609 | 0 | } |
610 | 0 |
|
611 | 0 | // Offset can only apply to the first node: |
612 | 0 | state.mIterOffset = -1; |
613 | 0 |
|
614 | 0 | DEBUG_FIND_PRINTF("Starting from offset %d of %d\n", findex, fragLen); |
615 | 0 |
|
616 | 0 | // If this is outside the bounds of the string, then skip this node: |
617 | 0 | if (findex < 0 || findex > fragLen - 1) { |
618 | 0 | DEBUG_FIND_PRINTF("At the end of a text node -- skipping to the next\n"); |
619 | 0 | frag = nullptr; |
620 | 0 | continue; |
621 | 0 | } |
622 | 0 | |
623 | 0 | if (frag->Is2b()) { |
624 | 0 | t2b = frag->Get2b(); |
625 | 0 | t1b = nullptr; |
626 | | #ifdef DEBUG_FIND |
627 | | nsAutoString str2(t2b, fragLen); |
628 | | DEBUG_FIND_PRINTF("2 byte, '%s'\n", NS_LossyConvertUTF16toASCII(str2).get()); |
629 | | #endif |
630 | 0 | } else { |
631 | 0 | t1b = frag->Get1b(); |
632 | 0 | t2b = nullptr; |
633 | | #ifdef DEBUG_FIND |
634 | | nsAutoCString str1(t1b, fragLen); |
635 | | DEBUG_FIND_PRINTF("1 byte, '%s'\n", str1.get()); |
636 | | #endif |
637 | | } |
638 | 0 | } else { |
639 | 0 | // Still on the old node. Advance the pointers, then see if we need to |
640 | 0 | // pull a new node. |
641 | 0 | findex += incr; |
642 | 0 | DEBUG_FIND_PRINTF("Same node -- (%d, %d)\n", pindex, findex); |
643 | 0 | if (mFindBackward ? (findex < 0) : (findex >= fragLen)) { |
644 | 0 | DEBUG_FIND_PRINTF("Will need to pull a new node: mAO = %d, frag len=%d\n", |
645 | 0 | matchAnchorOffset, fragLen); |
646 | 0 | // Done with this node. Pull a new one. |
647 | 0 | frag = nullptr; |
648 | 0 | continue; |
649 | 0 | } |
650 | 0 | } |
651 | 0 | |
652 | 0 | // Have we gone past the endpoint yet? If we have, and we're not in the |
653 | 0 | // middle of a match, return. |
654 | 0 | if (state.GetCurrentNode() == endNode && |
655 | 0 | ((mFindBackward && findex < static_cast<int32_t>(endOffset)) || |
656 | 0 | (!mFindBackward && findex > static_cast<int32_t>(endOffset)))) { |
657 | 0 | return NS_OK; |
658 | 0 | } |
659 | 0 | |
660 | 0 | // Save the previous character for word boundary detection |
661 | 0 | prevChar = c; |
662 | 0 | // The two characters we'll be comparing: |
663 | 0 | c = (t2b ? t2b[findex] : CHAR_TO_UNICHAR(t1b[findex])); |
664 | 0 | patc = patStr[pindex]; |
665 | 0 |
|
666 | 0 | DEBUG_FIND_PRINTF("Comparing '%c'=%x to '%c' (%d of %d), findex=%d%s\n", |
667 | 0 | (char)c, (int)c, patc, pindex, patLen, findex, |
668 | 0 | inWhitespace ? " (inWhitespace)" : ""); |
669 | 0 |
|
670 | 0 | // Do we need to go back to non-whitespace mode? If inWhitespace, then this |
671 | 0 | // space in the pat str has already matched at least one space in the |
672 | 0 | // document. |
673 | 0 | if (inWhitespace && !IsSpace(c)) { |
674 | 0 | inWhitespace = false; |
675 | 0 | pindex += incr; |
676 | | #ifdef DEBUG |
677 | | // This shouldn't happen -- if we were still matching, and we were at the |
678 | | // end of the pat string, then we should have caught it in the last |
679 | | // iteration and returned success. |
680 | | if (OVERFLOW_PINDEX) { |
681 | | NS_ASSERTION(false, "Missed a whitespace match"); |
682 | | } |
683 | | #endif |
684 | | patc = patStr[pindex]; |
685 | 0 | } |
686 | 0 | if (!inWhitespace && IsSpace(patc)) { |
687 | 0 | inWhitespace = true; |
688 | 0 | } else if (!inWhitespace && !mCaseSensitive && IsUpperCase(c)) { |
689 | 0 | c = ToLowerCase(c); |
690 | 0 | } |
691 | 0 |
|
692 | 0 | if (c == CH_SHY) { |
693 | 0 | // ignore soft hyphens in the document |
694 | 0 | continue; |
695 | 0 | } |
696 | 0 | |
697 | 0 | if (!mCaseSensitive) { |
698 | 0 | switch (c) { |
699 | 0 | // treat curly and straight quotes as identical |
700 | 0 | case CH_LEFT_SINGLE_QUOTE: |
701 | 0 | case CH_RIGHT_SINGLE_QUOTE: |
702 | 0 | c = CH_APOSTROPHE; |
703 | 0 | break; |
704 | 0 | case CH_LEFT_DOUBLE_QUOTE: |
705 | 0 | case CH_RIGHT_DOUBLE_QUOTE: |
706 | 0 | c = CH_QUOTE; |
707 | 0 | break; |
708 | 0 | } |
709 | 0 |
|
710 | 0 | switch (patc) { |
711 | 0 | // treat curly and straight quotes as identical |
712 | 0 | case CH_LEFT_SINGLE_QUOTE: |
713 | 0 | case CH_RIGHT_SINGLE_QUOTE: |
714 | 0 | patc = CH_APOSTROPHE; |
715 | 0 | break; |
716 | 0 | case CH_LEFT_DOUBLE_QUOTE: |
717 | 0 | case CH_RIGHT_DOUBLE_QUOTE: |
718 | 0 | patc = CH_QUOTE; |
719 | 0 | break; |
720 | 0 | } |
721 | 0 | } |
722 | 0 |
|
723 | 0 | // a '\n' between CJ characters is ignored |
724 | 0 | if (pindex != (mFindBackward ? patLen : 0) && c != patc && !inWhitespace) { |
725 | 0 | if (c == '\n' && t2b && IS_CJ_CHAR(prevCharInMatch)) { |
726 | 0 | int32_t nindex = findex + incr; |
727 | 0 | if (mFindBackward ? (nindex >= 0) : (nindex < fragLen)) { |
728 | 0 | if (IS_CJ_CHAR(t2b[nindex])) { |
729 | 0 | continue; |
730 | 0 | } |
731 | 0 | } |
732 | 0 | } |
733 | 0 | } |
734 | 0 | |
735 | 0 | wordBreakPrev = false; |
736 | 0 | if (mWordBreaker) { |
737 | 0 | if (prevChar == NBSP_CHARCODE) |
738 | 0 | prevChar = CHAR_TO_UNICHAR(' '); |
739 | 0 | wordBreakPrev = mWordBreaker->BreakInBetween(&prevChar, 1, &c, 1); |
740 | 0 | } |
741 | 0 |
|
742 | 0 | // Compare. Match if we're in whitespace and c is whitespace, or if the |
743 | 0 | // characters match and at least one of the following is true: |
744 | 0 | // a) we're not matching the entire word |
745 | 0 | // b) a match has already been stored |
746 | 0 | // c) the previous character is a different "class" than the current character. |
747 | 0 | if ((c == patc && (!mWordBreaker || matchAnchorNode || wordBreakPrev)) || |
748 | 0 | (inWhitespace && IsSpace(c))) |
749 | 0 | { |
750 | 0 | prevCharInMatch = c; |
751 | 0 | if (inWhitespace) { |
752 | 0 | DEBUG_FIND_PRINTF("YES (whitespace)(%d of %d)\n", pindex, patLen); |
753 | 0 | } else { |
754 | 0 | DEBUG_FIND_PRINTF("YES! '%c' == '%c' (%d of %d)\n", c, patc, pindex, patLen); |
755 | 0 | } |
756 | 0 |
|
757 | 0 | // Save the range anchors if we haven't already: |
758 | 0 | if (!matchAnchorNode) { |
759 | 0 | matchAnchorNode = state.GetCurrentNode(); |
760 | 0 | matchAnchorOffset = findex; |
761 | 0 | } |
762 | 0 |
|
763 | 0 | // Are we done? |
764 | 0 | if (DONE_WITH_PINDEX) { |
765 | 0 | // Matched the whole string! |
766 | 0 | DEBUG_FIND_PRINTF("Found a match!\n"); |
767 | 0 |
|
768 | 0 | // Make the range: |
769 | 0 | // Check for word break (if necessary) |
770 | 0 | if (mWordBreaker) { |
771 | 0 | int32_t nextfindex = findex + incr; |
772 | 0 |
|
773 | 0 | char16_t nextChar; |
774 | 0 | // If still in array boundaries, get nextChar. |
775 | 0 | if (mFindBackward ? (nextfindex >= 0) : (nextfindex < fragLen)) |
776 | 0 | nextChar = (t2b ? t2b[nextfindex] : CHAR_TO_UNICHAR(t1b[nextfindex])); |
777 | 0 | // Get next character from the next node. |
778 | 0 | else |
779 | 0 | nextChar = PeekNextChar(state); |
780 | 0 |
|
781 | 0 | if (nextChar == NBSP_CHARCODE) |
782 | 0 | nextChar = CHAR_TO_UNICHAR(' '); |
783 | 0 |
|
784 | 0 | // If a word break isn't there when it needs to be, reset search. |
785 | 0 | if (!mWordBreaker->BreakInBetween(&c, 1, &nextChar, 1)) { |
786 | 0 | matchAnchorNode = nullptr; |
787 | 0 | continue; |
788 | 0 | } |
789 | 0 | } |
790 | 0 | |
791 | 0 | RefPtr<nsRange> range = new nsRange(current); |
792 | 0 |
|
793 | 0 | int32_t matchStartOffset; |
794 | 0 | int32_t matchEndOffset; |
795 | 0 | // convert char index to range point: |
796 | 0 | int32_t mao = matchAnchorOffset + (mFindBackward ? 1 : 0); |
797 | 0 | Text* startParent; |
798 | 0 | Text* endParent; |
799 | 0 | if (mFindBackward) { |
800 | 0 | startParent = current; |
801 | 0 | endParent = matchAnchorNode; |
802 | 0 | matchStartOffset = findex; |
803 | 0 | matchEndOffset = mao; |
804 | 0 | } else { |
805 | 0 | startParent = matchAnchorNode; |
806 | 0 | endParent = current; |
807 | 0 | matchStartOffset = mao; |
808 | 0 | matchEndOffset = findex + 1; |
809 | 0 | } |
810 | 0 | if (startParent && endParent && |
811 | 0 | IsVisibleNode(startParent) && IsVisibleNode(endParent)) { |
812 | 0 | range->SetStart(*startParent, matchStartOffset, IgnoreErrors()); |
813 | 0 | range->SetEnd(*endParent, matchEndOffset, IgnoreErrors()); |
814 | 0 | *aRangeRet = range.get(); |
815 | 0 | NS_ADDREF(*aRangeRet); |
816 | 0 | } else { |
817 | 0 | // This match is no good -- invisible or bad range |
818 | 0 | startParent = nullptr; |
819 | 0 | } |
820 | 0 |
|
821 | 0 | if (startParent) { |
822 | 0 | return NS_OK; |
823 | 0 | } |
824 | 0 | // This match is no good, continue on in document |
825 | 0 | matchAnchorNode = nullptr; |
826 | 0 | } |
827 | 0 |
|
828 | 0 | if (matchAnchorNode) { |
829 | 0 | // Not done, but still matching. Advance and loop around for the next |
830 | 0 | // characters. But don't advance from a space to a non-space: |
831 | 0 | if (!inWhitespace || DONE_WITH_PINDEX || |
832 | 0 | IsSpace(patStr[pindex + incr])) { |
833 | 0 | pindex += incr; |
834 | 0 | inWhitespace = false; |
835 | 0 | DEBUG_FIND_PRINTF("Advancing pindex to %d\n", pindex); |
836 | 0 | } |
837 | 0 |
|
838 | 0 | continue; |
839 | 0 | } |
840 | 0 | } |
841 | 0 |
|
842 | 0 | DEBUG_FIND_PRINTF("NOT: %c == %c\n", c, patc); |
843 | 0 |
|
844 | 0 | // If we didn't match, go back to the beginning of patStr, and set findex |
845 | 0 | // back to the next char after we started the current match. |
846 | 0 | if (matchAnchorNode) { // we're ending a partial match |
847 | 0 | findex = matchAnchorOffset; |
848 | 0 | state.mIterOffset = matchAnchorOffset; |
849 | 0 | // +incr will be added to findex when we continue |
850 | 0 |
|
851 | 0 | // Are we going back to a previous node? |
852 | 0 | if (matchAnchorNode != state.GetCurrentNode()) { |
853 | 0 | frag = nullptr; |
854 | 0 | state.PositionAt(*matchAnchorNode); |
855 | 0 | DEBUG_FIND_PRINTF("Repositioned anchor node\n"); |
856 | 0 | } |
857 | 0 | DEBUG_FIND_PRINTF("Ending a partial match; findex -> %d, mIterOffset -> %d\n", |
858 | 0 | findex, state.mIterOffset); |
859 | 0 | } |
860 | 0 | matchAnchorNode = nullptr; |
861 | 0 | matchAnchorOffset = 0; |
862 | 0 | inWhitespace = false; |
863 | 0 | pindex = mFindBackward ? patLen : 0; |
864 | 0 | DEBUG_FIND_PRINTF("Setting findex back to %d, pindex to %d\n", findex, pindex); |
865 | 0 | } |
866 | 0 |
|
867 | 0 | return NS_OK; |
868 | 0 | } |