/src/mupdf/source/fitz/stext-iterator.c
Line | Count | Source |
1 | | // Copyright (C) 2004-2025 Artifex Software, Inc. |
2 | | // |
3 | | // This file is part of MuPDF. |
4 | | // |
5 | | // MuPDF is free software: you can redistribute it and/or modify it under the |
6 | | // terms of the GNU Affero General Public License as published by the Free |
7 | | // Software Foundation, either version 3 of the License, or (at your option) |
8 | | // any later version. |
9 | | // |
10 | | // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY |
11 | | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
12 | | // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more |
13 | | // details. |
14 | | // |
15 | | // You should have received a copy of the GNU Affero General Public License |
16 | | // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> |
17 | | // |
18 | | // Alternative licensing terms are available from the licensor. |
19 | | // For commercial licensing, see <https://www.artifex.com/> or contact |
20 | | // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, |
21 | | // CA 94129, USA, for further information. |
22 | | |
23 | | #include "mupdf/fitz.h" |
24 | | |
25 | | fz_stext_page_block_iterator fz_stext_page_block_iterator_begin_from(fz_stext_page *page, fz_stext_block *block, fz_stext_struct *top) |
26 | 0 | { |
27 | 0 | fz_stext_page_block_iterator pos; |
28 | |
|
29 | 0 | pos.page = page; |
30 | 0 | pos.parent = top; |
31 | 0 | pos.block = block; |
32 | 0 | pos.top = top; |
33 | |
|
34 | 0 | return pos; |
35 | 0 | } |
36 | | |
37 | | fz_stext_page_block_iterator fz_stext_page_block_iterator_begin(fz_stext_page *page) |
38 | 0 | { |
39 | 0 | return fz_stext_page_block_iterator_begin_from(page, page ? page->first_block : NULL, NULL); |
40 | 0 | } |
41 | | |
42 | | fz_stext_page_block_iterator fz_stext_page_block_iterator_begin_from_dfs(fz_stext_page *page, fz_stext_block *block, fz_stext_struct *top) |
43 | 0 | { |
44 | 0 | fz_stext_page_block_iterator pos; |
45 | |
|
46 | 0 | pos = fz_stext_page_block_iterator_begin_from(page, block, top); |
47 | |
|
48 | 0 | while (1) |
49 | 0 | { |
50 | | /* We cannot stop on a struct block. */ |
51 | 0 | while (pos.block && pos.block->type == FZ_STEXT_BLOCK_STRUCT) |
52 | 0 | { |
53 | | /* Move down. */ |
54 | 0 | pos.parent = pos.block->u.s.down; |
55 | 0 | pos.block = pos.block->u.s.down->first_block; |
56 | 0 | } |
57 | | |
58 | | /* If we're on a block, we're done. */ |
59 | 0 | if (pos.block != NULL) |
60 | 0 | break; |
61 | | |
62 | | /* We can only stop on a NULL block, if we're at the end. */ |
63 | 0 | if (pos.parent == pos.top) |
64 | 0 | return pos; |
65 | | /* Move up, and along. */ |
66 | 0 | pos.block = pos.parent->up->next; |
67 | 0 | pos.parent = pos.parent->parent; |
68 | 0 | } |
69 | | |
70 | 0 | return pos; |
71 | 0 | } |
72 | | |
73 | | fz_stext_page_block_iterator fz_stext_page_block_iterator_begin_dfs(fz_stext_page *page) |
74 | 0 | { |
75 | 0 | return fz_stext_page_block_iterator_begin_from_dfs(page, page ? page->first_block : NULL, NULL); |
76 | 0 | } |
77 | | |
78 | | fz_stext_page_block_iterator fz_stext_page_block_iterator_begin_from_rdfs(fz_stext_page *page, fz_stext_block *block, fz_stext_struct *top) |
79 | 0 | { |
80 | 0 | fz_stext_page_block_iterator pos; |
81 | |
|
82 | 0 | pos = fz_stext_page_block_iterator_begin_from(page, block, top); |
83 | |
|
84 | 0 | while (1) |
85 | 0 | { |
86 | | /* We cannot stop on a struct block. */ |
87 | 0 | while (pos.block && pos.block->type == FZ_STEXT_BLOCK_STRUCT) |
88 | 0 | { |
89 | | /* Move down. */ |
90 | 0 | pos.parent = pos.block->u.s.down; |
91 | 0 | pos.block = pos.block->u.s.down->last_block; |
92 | 0 | } |
93 | | |
94 | | /* If we're on a block, we're done. */ |
95 | 0 | if (pos.block != NULL) |
96 | 0 | break; |
97 | | |
98 | | /* We can only stop on a NULL block, if we're at the start. */ |
99 | 0 | if (pos.parent == pos.top) |
100 | 0 | return pos; |
101 | | /* Move up, and back. */ |
102 | 0 | pos.block = pos.parent->up->prev; |
103 | 0 | pos.parent = pos.parent->parent; |
104 | 0 | } |
105 | | |
106 | 0 | return pos; |
107 | 0 | } |
108 | | |
109 | | fz_stext_page_block_iterator fz_stext_page_block_iterator_begin_rdfs(fz_stext_page *page) |
110 | 0 | { |
111 | 0 | return fz_stext_page_block_iterator_begin_from_rdfs(page, page ? page->last_block : NULL, NULL); |
112 | 0 | } |
113 | | |
114 | | /* Iterates along, stopping at every block. Stops at the end of the run. */ |
115 | | fz_stext_page_block_iterator fz_stext_page_block_iterator_next(fz_stext_page_block_iterator pos) |
116 | 0 | { |
117 | | /* If page == NULL, then this iterator can never go anywhere */ |
118 | 0 | if (pos.page == NULL) |
119 | 0 | return pos; |
120 | | |
121 | | /* If we've hit EOF, then nowhere else to go. */ |
122 | 0 | if (pos.block == NULL) |
123 | 0 | return pos; |
124 | | |
125 | 0 | pos.block = pos.block->next; |
126 | 0 | return pos; |
127 | 0 | } |
128 | | |
129 | | fz_stext_page_block_iterator fz_stext_page_block_iterator_prev(fz_stext_page_block_iterator pos) |
130 | 0 | { |
131 | | /* If page == NULL, then this iterator can never go anywhere */ |
132 | 0 | if (pos.page == NULL) |
133 | 0 | return pos; |
134 | | |
135 | | /* If we've hit EOF, then nowhere else to go. */ |
136 | 0 | if (pos.block == NULL) |
137 | 0 | return pos; |
138 | | |
139 | 0 | pos.block = pos.block->prev; |
140 | 0 | return pos; |
141 | 0 | } |
142 | | |
143 | | fz_stext_page_block_iterator fz_stext_page_block_iterator_down(fz_stext_page_block_iterator pos) |
144 | 0 | { |
145 | | /* Can't throw here, so trying to move down on illegal nodes |
146 | | * will just do nothing. */ |
147 | 0 | if (pos.block == NULL) |
148 | 0 | return pos; |
149 | 0 | if (pos.block->type != FZ_STEXT_BLOCK_STRUCT) |
150 | 0 | return pos; |
151 | | |
152 | 0 | pos.parent = pos.block->u.s.down; |
153 | 0 | pos.block = pos.block->u.s.down->first_block; |
154 | |
|
155 | 0 | return pos; |
156 | 0 | } |
157 | | |
158 | | fz_stext_page_block_iterator fz_stext_page_block_iterator_up(fz_stext_page_block_iterator pos) |
159 | 0 | { |
160 | 0 | if (pos.parent == NULL) |
161 | 0 | return pos; |
162 | | |
163 | | /* pos.parent->up is the struct block we are currently traversing the |
164 | | * children of. So it's where we want to do 'next' from. */ |
165 | 0 | pos.block = pos.parent->up; |
166 | | /* pos.parent->parent is the struct that owns the new pos.block */ |
167 | 0 | pos.parent = pos.parent->parent; |
168 | |
|
169 | 0 | return pos; |
170 | 0 | } |
171 | | |
172 | | /* Iterates along, and automatically (silently) goes down at structure |
173 | | * nodes and up at the end of runs. */ |
174 | | fz_stext_page_block_iterator fz_stext_page_block_iterator_next_dfs(fz_stext_page_block_iterator pos) |
175 | 0 | { |
176 | 0 | while (1) |
177 | 0 | { |
178 | 0 | pos = fz_stext_page_block_iterator_next(pos); |
179 | |
|
180 | 0 | while (pos.block) |
181 | 0 | { |
182 | 0 | if (pos.block->type != FZ_STEXT_BLOCK_STRUCT) |
183 | 0 | return pos; |
184 | | |
185 | | /* Move down. And loop. */ |
186 | 0 | pos.parent = pos.block->u.s.down; |
187 | 0 | pos.block = pos.block->u.s.down->first_block; |
188 | 0 | } |
189 | | |
190 | | /* We've hit the end of the row. Move up. */ |
191 | | /* If no parent, we've really hit the EOD. */ |
192 | 0 | if (pos.parent == pos.top) |
193 | 0 | return pos; /* EOF */ |
194 | | /* pos.parent->up is the struct block we are currently traversing the |
195 | | * children of. So it's where we want to do 'next' from. */ |
196 | 0 | pos.block = pos.parent->up; |
197 | | /* pos.parent->parent is the struct that owns the new pos.block */ |
198 | 0 | pos.parent = pos.parent->parent; |
199 | 0 | } |
200 | 0 | } |
201 | | |
202 | | /* Iterates along backwards, and automatically (silently) goes down at structure |
203 | | * nodes and up at the start of runs. */ |
204 | | fz_stext_page_block_iterator fz_stext_page_block_iterator_next_rdfs(fz_stext_page_block_iterator pos) |
205 | 0 | { |
206 | 0 | while (1) |
207 | 0 | { |
208 | 0 | pos = fz_stext_page_block_iterator_prev(pos); |
209 | |
|
210 | 0 | while (pos.block) |
211 | 0 | { |
212 | 0 | if (pos.block->type != FZ_STEXT_BLOCK_STRUCT) |
213 | 0 | return pos; |
214 | | |
215 | | /* Move down. And loop. */ |
216 | 0 | pos.parent = pos.block->u.s.down; |
217 | 0 | pos.block = pos.block->u.s.down->last_block; |
218 | 0 | } |
219 | | |
220 | | /* We've hit the end of the row. Move up. */ |
221 | | /* If no parent, we've really hit the EOD. */ |
222 | 0 | if (pos.parent == pos.top) |
223 | 0 | return pos; /* EOF */ |
224 | | /* pos.parent->up is the struct block we are currently traversing the |
225 | | * children of. So it's where we want to do 'prev' from. */ |
226 | 0 | pos.block = pos.parent->up; |
227 | | /* pos.parent->parent is the struct that owns the new pos.block */ |
228 | 0 | pos.parent = pos.parent->parent; |
229 | 0 | } |
230 | 0 | } |
231 | | |
232 | | int fz_stext_page_block_iterator_eod(fz_stext_page_block_iterator pos) |
233 | 0 | { |
234 | 0 | return (pos.block == NULL); |
235 | 0 | } |
236 | | |
237 | | int fz_stext_page_block_iterator_eod_dfs(fz_stext_page_block_iterator pos) |
238 | 0 | { |
239 | 0 | while (1) |
240 | 0 | { |
241 | 0 | if (pos.block) |
242 | 0 | return 0; |
243 | 0 | if (pos.parent == pos.top) |
244 | 0 | return 1; |
245 | 0 | pos.block = pos.parent->up; |
246 | 0 | pos.parent = pos.parent->parent; |
247 | 0 | } |
248 | 0 | } |
249 | | |
250 | | int fz_stext_page_block_iterator_eod_rdfs(fz_stext_page_block_iterator pos) |
251 | 0 | { |
252 | 0 | return fz_stext_page_block_iterator_eod_dfs(pos); |
253 | 0 | } |