/src/mdbtools/src/libmdb/index.c
Line | Count | Source |
1 | | /* MDB Tools - A library for reading MS Access database file |
2 | | * Copyright (C) 2000-2004 Brian Bruns |
3 | | * |
4 | | * This library is free software; you can redistribute it and/or |
5 | | * modify it under the terms of the GNU Library General Public |
6 | | * License as published by the Free Software Foundation; either |
7 | | * version 2 of the License, or (at your option) any later version. |
8 | | * |
9 | | * This library is distributed in the hope that it will be useful, |
10 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
12 | | * Library General Public License for more details. |
13 | | * |
14 | | * You should have received a copy of the GNU Library General Public |
15 | | * License along with this library; if not, write to the Free Software |
16 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
17 | | */ |
18 | | |
19 | | #include "mdbtools.h" |
20 | | #include "mdbprivate.h" |
21 | | #ifdef HAVE_LIBMSWSTR |
22 | | #include <mswstr/mswstr.h> |
23 | | #endif |
24 | | |
25 | | static MdbIndexPage *mdb_index_read_bottom_pg(MdbHandle *mdb, MdbIndex *idx, MdbIndexChain *chain); |
26 | | static MdbIndexPage *mdb_chain_add_page(MdbHandle *mdb, MdbIndexChain *chain, guint32 pg); |
27 | | |
28 | | char idx_to_text[] = { |
29 | | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0-7 0x00-0x07 */ |
30 | | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 8-15 0x09-0x0f */ |
31 | | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 16-23 0x10-0x17 */ |
32 | | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 24-31 0x19-0x1f */ |
33 | | ' ', 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 32-39 0x20-0x27 */ |
34 | | 0x00, 0x00, 0x00, 0x00, 0x00, ' ', ' ', 0x00, /* 40-47 0x29-0x2f */ |
35 | | 'V', 'W', 'X', 'Y', 'Z', '[', '\\', ']', /* 48-55 0x30-0x37 */ |
36 | | '^', '_', 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 56-63 0x39-0x3f */ |
37 | | 0x00, '`', 'a', 'b', 'd', 'f', 'g', 'h', /* 64-71 0x40-0x47 */ |
38 | | 'i', 'j', 'k', 'l', 'm', 'o', 'p', 'r', /* 72-79 0x49-0x4f H */ |
39 | | 's', 't', 'u', 'v', 'w', 'x', 'z', '{', /* 80-87 0x50-0x57 P */ |
40 | | '|', '}', '~', '5', '6', '7', '8', '9', /* 88-95 0x59-0x5f */ |
41 | | 0x00, '`', 'a', 'b', 'd', 'f', 'g', 'h', /* 96-103 0x60-0x67 */ |
42 | | 'i', 'j', 'k', 'l', 'm', 'o', 'p', 'r', /* 014-111 0x69-0x6f h */ |
43 | | 's', 't', 'u', 'v', 'w', 'x', 'z', '{', /* 112-119 0x70-0x77 p */ |
44 | | '|', '}', '~', 0x00, 0x00, 0x00, 0x00, 0x00, /* 120-127 0x78-0x7f */ |
45 | | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 128-135 0x80-0x87 */ |
46 | | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ |
47 | | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ |
48 | | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ |
49 | | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ |
50 | | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa8-0xaf */ |
51 | | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb0-0xb7 */ |
52 | | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */ |
53 | | 0x00, 0x00, 0x00, 0x00, 0x00, '`', 0x00, 0x00, /* 0xc0-0xc7 */ |
54 | | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */ |
55 | | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */ |
56 | | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd8-0xdf */ |
57 | | 0x00, '`', 0x00, '`', '`', '`', 0x00, 0x00, /* 0xe0-0xe7 */ |
58 | | 'f', 'f', 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe8-0xef */ |
59 | | 0x00, 0x00, 0x00, 'r', 0x00, 0x00, 'r', 0x00, /* 0xf0-0xf7 */ |
60 | | 0x81, 0x00, 0x00, 0x00, 'x', 0x00, 0x00, 0x00, /* 0xf8-0xff */ |
61 | | }; |
62 | | |
63 | | /* This table doesn't really work accurately, as it is missing |
64 | | * a lot of special processing, therefore do not use! |
65 | | * This is just some kind of fallback if MSWSTR cannot be used |
66 | | * for whatever reason and may not work for most indexes, i.e. |
67 | | * those containing hyphens etc. |
68 | | */ |
69 | | char idx_to_text_ling[] = { |
70 | | 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, /* 0-7 0x00-0x07 */ |
71 | | 0x01, 0x08, 0x08, 0x08, 0x08, 0x08, 0x01, 0x01, /* 8-15 0x08-0x0F */ |
72 | | 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, /* 16-23 0x10-0x17 */ |
73 | | 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, /* 24-31 0x18-0x1F */ |
74 | | 0x07, 0x09, 0x0A, 0x0C, 0x0E, 0x10, 0x12, 0x01, /* 32-39 0x20-0x27 */ |
75 | | 0x14, 0x16, 0x18, ',', 0x1A, 0x01, 0x1C, 0x1E, /* 40-47 0x28-0x2F */ |
76 | | '6', '8', ':', '<', '>', '@', 'B', 'D', /* 48-55 0x30-0x37 */ |
77 | | 'F', 'H', ' ', '"', '.', '0', '2', '$', /* 56-63 0x38-0x3F */ |
78 | | '&', 'J', 'L', 'M', 'O', 'Q', 'S', 'U', /* 64-71 0x40-0x47 */ |
79 | | 'W', 'Y', '[', '\\', '^', '`', 'b', 'd', /* 72-79 0x48-0x4F */ |
80 | | 'f', 'h', 'i', 'k', 'm', 'o', 'q', 's', /* 80-87 0x50-0x57 */ |
81 | | 'u', 'v', 'x', '\'', ')', '*', '+', '+', /* 88-95 0x58-0x5F */ |
82 | | '+', 'J', 'L', 'M', 'O', 'Q', 'S', 'U', /* 96-103 0x60-0x67 */ |
83 | | 'W', 'Y', '[', '\\', '^', '`', 'b', 'd', /* 104-111 0x68-0x6F */ |
84 | | 'f', 'h', 'i', 'k', 'm', 'o', 'q', 's', /* 112-119 0x70-0x77 */ |
85 | | 'u', 'v', 'x', '+', '+', '+', '+', 0x01, /* 120-127 0x78-0x7F */ |
86 | | 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, /* 128-135 0x80-0x87 */ |
87 | | 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, /* 136-143 0x88-0x8F */ |
88 | | 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, /* 144-151 0x90-0x97 */ |
89 | | 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, /* 152-159 0x98-0x9F */ |
90 | | 0x08, '+', '4', '4', '4', '4', '+', '4', /* 160-167 0xA0-0xA7 */ |
91 | | '+', '4', 'J', '3', '4', 0x01, '4', '+', /* 168-175 0xA8-0xAF */ |
92 | | '4', '3', ':', '<', '+', '4', '4', '4', /* 176-183 0xB0-0xB7 */ |
93 | | '+', '8', 'd', '3', '7', '7', '7', '+', /* 184-191 0xB8-0xBF */ |
94 | | 'J', 'J', 'J', 'J', 'J', 'J', 'J', 'M', /* 192-199 0xC0-0xC7 */ |
95 | | 'Q', 'Q', 'Q', 'Q', 'Y', 'Y', 'Y', 'Y', /* 200-207 0xC8-0xCF */ |
96 | | 'O', 'b', 'd', 'd', 'd', 'd', 'd', '3', /* 208-215 0xD0-0xD7 */ |
97 | | 'd', 'o', 'o', 'o', 'o', 'v', 'm', 'k', /* 216-223 0xD8-0xDF */ |
98 | | 'J', 'J', 'J', 'J', 'J', 'J', 'J', 'M', /* 224-231 0xE0-0xE7 */ |
99 | | 'Q', 'Q', 'Q', 'Q', 'Y', 'Y', 'Y', 'Y', /* 232-239 0xE8-0xEF */ |
100 | | 'O', 'b', 'd', 'd', 'd', 'd', 'd', '3', /* 240-247 0xF0-0xF7 */ |
101 | | 'd', 'o', 'o', 'o', 'o', 'v', 'm', 'v', /* 248-255 0xF8-0xFF */ |
102 | | }; |
103 | | |
104 | | /* JET Red (v4) Index definition byte layouts |
105 | | * |
106 | | * Based on: |
107 | | * |
108 | | * http://jabakobob.net/mdb/table-page.html |
109 | | * https://github.com/jahlborn/jackcess |
110 | | * |
111 | | * plus inspection of JET (Red) 4 databases. (JET 3 format has fewer |
112 | | * fields -- some of the ones below omitted, and others narrower.) |
113 | | * |
114 | | * See also JET Blue (Extensible Storage Engine) format information: |
115 | | * |
116 | | * https://github.com/libyal/libesedb/blob/master/documentation/Extensible%20Storage%20Engine%20%28ESE%29%20Database%20File%20%28EDB%29%20format.asciidoc |
117 | | * |
118 | | * which is a later Microsoft embedded database format with the same |
119 | | * early base format. |
120 | | * |
121 | | * ---------------------------------------------------------------------- |
122 | | * Index Column Definitions: |
123 | | * - for each "non foreign key" index (ie pidx->index_type!=2), a list |
124 | | * of columns indexed |
125 | | * |
126 | | * Repeated table->num_real_idxs times: |
127 | | * |
128 | | * Offset Bytes Meaning |
129 | | * 0x0000 4 UNKNOWN; seems to be type marker, usually 1923 or 0 |
130 | | * |
131 | | * 0x0004 2 Column 1 ID |
132 | | * 0x0006 1 Column 1 Flags |
133 | | * 0x0007 2 Column 2 ID |
134 | | * 0x0009 1 Column 2 Flags |
135 | | * 0x000A 2 Column 3 ID |
136 | | * 0x000C 1 Column 3 Flags |
137 | | * 0x000D 2 Column 4 ID |
138 | | * 0x000F 1 Column 4 Flags |
139 | | * 0x0010 2 Column 5 ID |
140 | | * 0x0012 1 Column 5 Flags |
141 | | * 0x0013 2 Column 6 ID |
142 | | * 0x0015 1 Column 6 Flags |
143 | | * 0x0016 2 Column 7 ID |
144 | | * 0x0018 1 Column 7 Flags |
145 | | * 0x0019 2 Column 8 ID |
146 | | * 0x001B 1 Column 8 Flags |
147 | | * 0x001C 2 Column 9 ID |
148 | | * 0x001E 1 Column 9 Flags |
149 | | * 0x001F 2 Column 10 ID |
150 | | * 0x0021 1 Column 10 Flags |
151 | | * |
152 | | * 0x0022 1 Usage Map row |
153 | | * 0x0023 3 Usage Map page (24-bit) |
154 | | * 0x0026 4 First index page |
155 | | * 0x002A 4 UNKNOWN |
156 | | * 0x002E 2 Index Flags |
157 | | * 0x0030 4 UNKNOWN; seems to always be 0 |
158 | | * 0x0034 |
159 | | * |
160 | | * Column ID of 0xFFFF (-1) means "not used" or "end of used columns". |
161 | | * Column Flags: |
162 | | * - 0x01 = Ascending |
163 | | * |
164 | | * Index Flags: |
165 | | * - 0x0001 = Unique index |
166 | | * - 0x0002 = Ignore NULLs |
167 | | * - 0x0008 = Required Index |
168 | | * |
169 | | * ---------------------------------------------------------------------- |
170 | | * Index Definitions |
171 | | * - for each index (normal, primary key, foreign key), details on the |
172 | | * index. |
173 | | * |
174 | | * - this appears to be the union of information required for normal/ |
175 | | * primary key indexes, and the information required for foreign key |
176 | | * indexes. |
177 | | * |
178 | | * Repeated table->num_idxs times: |
179 | | * |
180 | | * Offset Bytes Meaning |
181 | | * 0x0000 4 UNKNOWN; apparently a type marker, usually 1625 or 0 |
182 | | * 0x0004 4 Logical Index Number |
183 | | * 0x0008 4 Index Column Definition Entry |
184 | | * 0x000C 1 FK Index Type |
185 | | * 0x000D 4 FK Index Number |
186 | | * 0x0011 4 FK Index Table Page Number |
187 | | * 0x0015 1 Flags: Update Action |
188 | | * 0x0016 1 Flags: Delete Action |
189 | | * 0x0017 1 Index Type |
190 | | * 0x0018 4 UNKNNOWN; seems to always be 0 |
191 | | * 0x001B |
192 | | * |
193 | | * Where Index Type is: |
194 | | * 0x01 = normal |
195 | | * 0x01 = primary key |
196 | | * 0x02 = foreign key index reference |
197 | | */ |
198 | | |
199 | | /* Debugging helper to dump out raw hex values of index definition */ |
200 | | /* |
201 | | static void hexdump(unsigned char *tmpbuf, int size) { |
202 | | mdb_buffer_dump(tmpbuf, 0, size); |
203 | | } |
204 | | */ |
205 | | |
206 | | |
207 | | GPtrArray * |
208 | | mdb_read_indices(MdbTableDef *table) |
209 | 0 | { |
210 | 0 | MdbCatalogEntry *entry = table->entry; |
211 | 0 | MdbHandle *mdb = entry->mdb; |
212 | 0 | MdbFormatConstants *fmt = mdb->fmt; |
213 | 0 | MdbIndex *pidx; |
214 | 0 | unsigned int i, j, k; |
215 | 0 | int key_num, col_num, cleaned_col_num; |
216 | 0 | int cur_pos, name_sz, idx2_sz, type_offset; |
217 | 0 | int index_start_pg = mdb->cur_pg; |
218 | 0 | gchar *tmpbuf; |
219 | |
|
220 | 0 | table->indices = g_ptr_array_new(); |
221 | |
|
222 | 0 | if (IS_JET3(mdb)) { |
223 | 0 | cur_pos = table->index_start + 39 * table->num_real_idxs; |
224 | 0 | idx2_sz = 20; |
225 | 0 | type_offset = 19; |
226 | 0 | } else { |
227 | 0 | cur_pos = table->index_start + 52 * table->num_real_idxs; |
228 | 0 | idx2_sz = 28; |
229 | 0 | type_offset = 23; |
230 | 0 | } |
231 | | |
232 | | /* Read in the definitions of table indexes, into table->indices */ |
233 | | |
234 | | /* num_real_idxs should be the number of indexes other than type 2. |
235 | | * It's not always the case. Happens on Northwind Orders table. |
236 | | */ |
237 | | //fprintf(stderr, "num_idxs:%d num_real_idxs:%d\n", table->num_idxs, table->num_real_idxs); |
238 | |
|
239 | 0 | unsigned int num_idxs_type_other_than_2 = 0; |
240 | 0 | tmpbuf = g_malloc(idx2_sz); |
241 | 0 | for (i=0;i<table->num_idxs;i++) { |
242 | 0 | if (!read_pg_if_n(mdb, tmpbuf, &cur_pos, idx2_sz)) { |
243 | 0 | g_free(tmpbuf); |
244 | 0 | mdb_free_indices(table->indices); |
245 | 0 | return table->indices = NULL; |
246 | 0 | } |
247 | | //fprintf(stderr, "Index defn: "); |
248 | | //hexdump((unsigned char *)tmpbuf, idx2_sz); |
249 | 0 | pidx = g_malloc0(sizeof(MdbIndex)); |
250 | 0 | pidx->table = table; |
251 | 0 | pidx->index_num = mdb_get_int16(tmpbuf, 4); |
252 | 0 | pidx->index_type = tmpbuf[type_offset]; |
253 | 0 | g_ptr_array_add(table->indices, pidx); |
254 | | /* |
255 | | { |
256 | | gint32 idx_marker = mdb_get_int32(tmpbuf, 0); |
257 | | gint32 index_col_def_num = mdb_get_int16(tmpbuf, 8); |
258 | | gint8 rel_idx_type = tmpbuf[0x0c]; |
259 | | gint32 rel_idx_number = mdb_get_int32(tmpbuf, 0x0d); |
260 | | gint32 rel_idx_page = mdb_get_int32(tmpbuf, 0x11); |
261 | | gint8 update_action_flags = tmpbuf[0x15]; |
262 | | gint8 delete_action_flags = tmpbuf[0x16]; |
263 | | fprintf(stderr, "idx #%d: num2:%d num3:%d type:%d\n", i, pidx->index_num, index_col_def_num, pidx->index_type); |
264 | | fprintf(stderr, "idx #%d: %d %d %d %d %d/%d\n", i, idx_marker, rel_idx_type, rel_idx_number, rel_idx_page, update_action_flags, delete_action_flags); |
265 | | }*/ |
266 | 0 | if (pidx->index_type!=2) |
267 | 0 | num_idxs_type_other_than_2++; |
268 | 0 | } |
269 | 0 | if (num_idxs_type_other_than_2<table->num_real_idxs) |
270 | 0 | table->num_real_idxs=num_idxs_type_other_than_2; |
271 | | |
272 | | //fprintf(stderr, "num_idxs:%d num_real_idxs:%d\n", table->num_idxs, table->num_real_idxs); |
273 | 0 | g_free(tmpbuf); |
274 | | |
275 | | /* Pick up the names of each index */ |
276 | 0 | for (i=0;i<table->num_idxs;i++) { |
277 | 0 | pidx = g_ptr_array_index (table->indices, i); |
278 | 0 | if (IS_JET3(mdb)) { |
279 | 0 | name_sz=read_pg_if_8(mdb, &cur_pos); |
280 | 0 | } else { |
281 | 0 | name_sz=read_pg_if_16(mdb, &cur_pos); |
282 | 0 | } |
283 | 0 | tmpbuf = g_malloc(name_sz); |
284 | 0 | if (read_pg_if_n(mdb, tmpbuf, &cur_pos, name_sz)) |
285 | 0 | mdb_unicode2ascii(mdb, tmpbuf, name_sz, pidx->name, sizeof(pidx->name)); |
286 | 0 | g_free(tmpbuf); |
287 | | //fprintf(stderr, "index %d type %d name %s\n", pidx->index_num, pidx->index_type, pidx->name); |
288 | 0 | } |
289 | | |
290 | | /* Pick up the column definitions for normal/primary key indexes */ |
291 | | /* NOTE: Match should possibly be by index_col_def_num, rather |
292 | | * than index_num; but in files encountered both seem to be the |
293 | | * same (so left with index_num until a counter example is found). |
294 | | */ |
295 | 0 | mdb_read_alt_pg(mdb, entry->table_pg); |
296 | 0 | mdb_read_pg(mdb, index_start_pg); |
297 | 0 | cur_pos = table->index_start; |
298 | 0 | for (i=0;i<table->num_real_idxs;i++) { |
299 | | /* Debugging print out, commented out |
300 | | { |
301 | | gchar *tmpbuf = (gchar *) g_malloc(0x34); |
302 | | int now_pos = cur_pos; |
303 | | read_pg_if_n(mdb, tmpbuf, &now_pos, 0x34); |
304 | | fprintf(stderr, "Index defn: "); |
305 | | hexdump((unsigned char *)tmpbuf, 0x34); |
306 | | g_free(tmpbuf); |
307 | | }*/ |
308 | |
|
309 | 0 | if (!IS_JET3(mdb)) cur_pos += 4; |
310 | | /* look for index number i */ |
311 | 0 | for (j=0; j<table->num_idxs; ++j) { |
312 | 0 | pidx = g_ptr_array_index (table->indices, j); |
313 | 0 | if (pidx->index_type!=2 && (unsigned int)pidx->index_num==i) |
314 | 0 | break; |
315 | 0 | } |
316 | 0 | if (j==table->num_idxs) { |
317 | 0 | fprintf(stderr, "ERROR: can't find index #%d.\n", i); |
318 | 0 | continue; |
319 | 0 | } |
320 | | //fprintf(stderr, "index %d #%d (%s) index_type:%d\n", i, pidx->index_num, pidx->name, pidx->index_type); |
321 | | |
322 | 0 | pidx->num_rows = mdb_get_int32(mdb->alt_pg_buf, |
323 | 0 | fmt->tab_cols_start_offset + |
324 | 0 | (pidx->index_num*fmt->tab_ridx_entry_size)); |
325 | | /* |
326 | | fprintf(stderr, "ridx block1 i:%d data1:0x%08x data2:0x%08x\n", |
327 | | i, |
328 | | (unsigned int)mdb_get_int32(mdb->pg_buf, |
329 | | fmt->tab_cols_start_offset + pidx->index_num * fmt->tab_ridx_entry_size), |
330 | | (unsigned int)mdb_get_int32(mdb->pg_buf, |
331 | | fmt->tab_cols_start_offset + pidx->index_num * fmt->tab_ridx_entry_size +4)); |
332 | | fprintf(stderr, "pidx->num_rows:%d\n", pidx->num_rows);*/ |
333 | | |
334 | | /* Read columns in each index */ |
335 | 0 | key_num=0; |
336 | 0 | for (j=0;j<MDB_MAX_IDX_COLS;j++) { |
337 | 0 | col_num=read_pg_if_16(mdb,&cur_pos); |
338 | 0 | if (col_num == 0xFFFF) { |
339 | 0 | cur_pos++; |
340 | 0 | continue; |
341 | 0 | } |
342 | | /* here we have the internal column number that does not |
343 | | * always match the table columns because of deletions */ |
344 | 0 | cleaned_col_num = -1; |
345 | 0 | for (k=0; k<table->num_cols; k++) { |
346 | 0 | MdbColumn *col = g_ptr_array_index(table->columns,k); |
347 | 0 | if (col->col_num == col_num) { |
348 | 0 | cleaned_col_num = k; |
349 | 0 | break; |
350 | 0 | } |
351 | 0 | } |
352 | 0 | if (cleaned_col_num==-1) { |
353 | 0 | fprintf(stderr, "CRITICAL: can't find column with internal id %d in index %s\n", |
354 | 0 | col_num, pidx->name); |
355 | 0 | cur_pos++; |
356 | 0 | continue; |
357 | 0 | } |
358 | | /* set column number to a 1 based column number and store */ |
359 | 0 | pidx->key_col_num[key_num] = cleaned_col_num + 1; |
360 | 0 | pidx->key_col_order[key_num] = |
361 | 0 | (read_pg_if_8(mdb, &cur_pos)) ? MDB_ASC : MDB_DESC; |
362 | | //fprintf(stderr, "component %d using column #%d (internally %d)\n", j, cleaned_col_num, col_num); |
363 | 0 | key_num++; |
364 | 0 | } |
365 | 0 | pidx->num_keys = key_num; |
366 | |
|
367 | 0 | if (0) // DEBUGGING ONLY |
368 | 0 | { |
369 | 0 | gint32 usage_map = read_pg_if_32(mdb, &cur_pos); |
370 | 0 | fprintf(stderr, "pidx->unknown_pre_first_pg:0x%08x\n", usage_map); |
371 | 0 | } else { |
372 | 0 | cur_pos += 4; // Skip Usage map information |
373 | 0 | } |
374 | 0 | pidx->first_pg = read_pg_if_32(mdb, &cur_pos); |
375 | |
|
376 | 0 | if (!IS_JET3(mdb)) cur_pos += 4; |
377 | |
|
378 | 0 | pidx->flags = read_pg_if_8(mdb, &cur_pos); |
379 | | //fprintf(stderr, "pidx->first_pg:%d pidx->flags:0x%02x\n", pidx->first_pg, pidx->flags); |
380 | 0 | if (!IS_JET3(mdb)) cur_pos += 5; |
381 | 0 | } |
382 | 0 | return NULL; |
383 | 0 | } |
384 | | void |
385 | | mdb_index_hash_text(MdbHandle *mdb, char *text, char *hash) |
386 | 0 | { |
387 | 0 | unsigned int k, len=strlen(text); |
388 | 0 | char *transtbl=NULL; |
389 | |
|
390 | 0 | if (!IS_JET3(mdb)) |
391 | 0 | { |
392 | | #ifdef __MSWSTR_H__ |
393 | | char *out_ptr = alloca((len+1)*2); |
394 | | unsigned int i; |
395 | | // mdb_ascii2unicode doesn't work, we don't want unicode compression! |
396 | | for (i=0; i<len+1; i++) { |
397 | | out_ptr[i*2] = text[i]; |
398 | | out_ptr[i*2+1] = 0; |
399 | | } |
400 | | if (!(k=DBLCMapStringW(MAKELCID(mdb->f->lang_id, 0), |
401 | | LCMAP_LINGUISTIC_CASING | LCMAP_SORTKEY | NORM_IGNORECASE | NORM_IGNOREKANATYPE | NORM_IGNOREWIDTH, |
402 | | (WCHAR*)out_ptr, len, (LPBYTE)hash, len*2))) |
403 | | { |
404 | | len++; |
405 | | #endif |
406 | 0 | transtbl = idx_to_text_ling; |
407 | | #ifdef __MSWSTR_H__ |
408 | | } |
409 | | #endif |
410 | 0 | } |
411 | 0 | else |
412 | 0 | { |
413 | 0 | transtbl = idx_to_text; |
414 | 0 | } |
415 | 0 | if (transtbl) |
416 | 0 | { |
417 | 0 | for (k=0;k<len;k++) { |
418 | 0 | unsigned char c = ((unsigned char *)(text))[k]; |
419 | 0 | hash[k] = transtbl[c]; |
420 | 0 | if (!(hash[k])) fprintf(stderr, |
421 | 0 | "No translation available for %02x %d\n", c, c); |
422 | 0 | } |
423 | 0 | hash[len]='\0'; |
424 | 0 | } |
425 | | //printf ("mdb_index_hash_text %s -> %s (%d -> %d)\n", text, hash, len, k); |
426 | 0 | } |
427 | | /* |
428 | | * reverse the order of the column for hashing |
429 | | */ |
430 | | void |
431 | | mdb_index_swap_n(unsigned char *src, int sz, unsigned char *dest) |
432 | 0 | { |
433 | 0 | int i, j = 0; |
434 | |
|
435 | 0 | for (i = sz-1; i >= 0; i--) { |
436 | 0 | dest[j++] = src[i]; |
437 | 0 | } |
438 | 0 | } |
439 | | void |
440 | | mdb_index_cache_sarg(MdbColumn *col, MdbSarg *sarg, MdbSarg *idx_sarg) |
441 | 0 | { |
442 | | //guint32 cache_int; |
443 | 0 | unsigned char *c; |
444 | |
|
445 | 0 | switch (col->col_type) { |
446 | 0 | case MDB_TEXT: |
447 | 0 | mdb_index_hash_text(col->table->mdbidx, sarg->value.s, idx_sarg->value.s); |
448 | 0 | break; |
449 | | |
450 | 0 | case MDB_LONGINT: |
451 | 0 | idx_sarg->value.i = GUINT32_SWAP_LE_BE(sarg->value.i); |
452 | | //cache_int = sarg->value.i * -1; |
453 | 0 | c = (unsigned char *) &(idx_sarg->value.i); |
454 | 0 | c[0] |= 0x80; |
455 | | //printf("int %08x %02x %02x %02x %02x\n", sarg->value.i, c[0], c[1], c[2], c[3]); |
456 | 0 | break; |
457 | | |
458 | 0 | case MDB_INT: |
459 | 0 | break; |
460 | | |
461 | 0 | default: |
462 | 0 | break; |
463 | 0 | } |
464 | 0 | } |
465 | | #if 0 |
466 | | int |
467 | | mdb_index_test_sarg(MdbHandle *mdb, MdbColumn *col, MdbSarg *sarg, int offset, int len) |
468 | | { |
469 | | char tmpbuf[256]; |
470 | | int lastchar; |
471 | | |
472 | | switch (col->col_type) { |
473 | | case MDB_BYTE: |
474 | | return mdb_test_int(sarg, mdb_pg_get_byte(mdb, offset)); |
475 | | break; |
476 | | case MDB_INT: |
477 | | return mdb_test_int(sarg, mdb_pg_get_int16(mdb, offset)); |
478 | | break; |
479 | | case MDB_LONGINT: |
480 | | return mdb_test_int(sarg, mdb_pg_get_int32(mdb, offset)); |
481 | | break; |
482 | | case MDB_TEXT: |
483 | | strncpy(tmpbuf, &mdb->pg_buf[offset],255); |
484 | | lastchar = len > 255 ? 255 : len; |
485 | | tmpbuf[lastchar]='\0'; |
486 | | return mdb_test_string(sarg, tmpbuf); |
487 | | default: |
488 | | fprintf(stderr, "Calling mdb_test_sarg on unknown type. Add code to mdb_test_sarg() for type %d\n",col->col_type); |
489 | | break; |
490 | | } |
491 | | return 1; |
492 | | } |
493 | | #endif |
494 | | int |
495 | | mdb_index_test_sargs(MdbHandle *mdb, MdbIndex *idx, char *buf, int len) |
496 | 0 | { |
497 | 0 | unsigned int i, j; |
498 | 0 | MdbColumn *col; |
499 | 0 | MdbTableDef *table = idx->table; |
500 | 0 | MdbSarg *idx_sarg; |
501 | 0 | MdbSarg *sarg; |
502 | 0 | MdbField field; |
503 | 0 | MdbSargNode node; |
504 | | //int c_offset = 0, |
505 | 0 | int c_len; |
506 | | |
507 | | //fprintf(stderr,"mdb_index_test_sargs called on "); |
508 | | //mdb_buffer_dump(buf, 0, len); |
509 | | //fprintf(stderr,"\n"); |
510 | 0 | for (i=0;i<idx->num_keys;i++) { |
511 | | //c_offset++; /* the per column null indicator/flags */ |
512 | 0 | col=g_ptr_array_index(table->columns,idx->key_col_num[i]-1); |
513 | | /* |
514 | | * This will go away eventually |
515 | | */ |
516 | 0 | if (col->col_type==MDB_TEXT) { |
517 | | //c_len = strlen(&mdb->pg_buf[offset + c_offset]); |
518 | 0 | c_len = strlen(buf); |
519 | 0 | } else { |
520 | 0 | c_len = col->col_size; |
521 | | //fprintf(stderr,"Only text types currently supported. How did we get here?\n"); |
522 | 0 | } |
523 | | /* |
524 | | * If we have no cached index values for this column, |
525 | | * create them. |
526 | | */ |
527 | 0 | if (col->num_sargs && !col->idx_sarg_cache) { |
528 | 0 | col->idx_sarg_cache = g_ptr_array_new(); |
529 | 0 | for (j=0;j<col->num_sargs;j++) { |
530 | 0 | sarg = g_ptr_array_index (col->sargs, j); |
531 | 0 | idx_sarg = g_memdup2(sarg,sizeof(MdbSarg)); |
532 | | //printf("calling mdb_index_cache_sarg\n"); |
533 | 0 | mdb_index_cache_sarg(col, sarg, idx_sarg); |
534 | 0 | g_ptr_array_add(col->idx_sarg_cache, idx_sarg); |
535 | 0 | } |
536 | 0 | } |
537 | |
|
538 | 0 | for (j=0;j<col->num_sargs;j++) { |
539 | 0 | sarg = g_ptr_array_index (col->idx_sarg_cache, j); |
540 | | /* XXX - kludge */ |
541 | 0 | node.op = sarg->op; |
542 | 0 | node.value = sarg->value; |
543 | | //field.value = &mdb->pg_buf[offset + c_offset]; |
544 | 0 | field.value = buf; |
545 | 0 | field.siz = c_len; |
546 | 0 | field.is_null = FALSE; |
547 | | /* In Jet 4 Index text hashes don't need to be converted from Unicode */ |
548 | 0 | if (!IS_JET3(mdb) && col->col_type == MDB_TEXT) |
549 | 0 | { |
550 | 0 | if (!mdb_test_string(&node, buf)) return 0; |
551 | 0 | } |
552 | 0 | else if (!mdb_test_sarg(mdb, col, &node, &field)) { |
553 | | /* sarg didn't match, no sense going on */ |
554 | 0 | return 0; |
555 | 0 | } |
556 | 0 | } |
557 | 0 | } |
558 | 0 | return 1; |
559 | 0 | } |
560 | | /* |
561 | | * pack the pages bitmap |
562 | | */ |
563 | | int |
564 | | mdb_index_pack_bitmap(MdbHandle *mdb, MdbIndexPage *ipg) |
565 | 0 | { |
566 | 0 | int mask_bit = 0; |
567 | 0 | int mask_pos = IS_JET3(mdb)?0x16:0x1b; |
568 | 0 | int mask_byte = 0; |
569 | 0 | int elem = 0; |
570 | 0 | int len, start, i; |
571 | |
|
572 | 0 | start = ipg->idx_starts[elem++]; |
573 | |
|
574 | 0 | while (start) { |
575 | | //fprintf(stdout, "elem %d is %d\n", elem, ipg->idx_starts[elem]); |
576 | 0 | len = ipg->idx_starts[elem] - start; |
577 | | //fprintf(stdout, "len is %d\n", len); |
578 | 0 | for (i=0; i < len; i++) { |
579 | 0 | mask_bit++; |
580 | 0 | if (mask_bit==8) { |
581 | 0 | mask_bit=0; |
582 | 0 | mdb->pg_buf[mask_pos++] = mask_byte; |
583 | 0 | mask_byte = 0; |
584 | 0 | } |
585 | | /* upon reaching the len, set the bit */ |
586 | 0 | } |
587 | 0 | mask_byte = (1 << mask_bit) | mask_byte; |
588 | | //fprintf(stdout, "mask byte is %02x at %d\n", mask_byte, mask_pos); |
589 | 0 | start = ipg->idx_starts[elem++]; |
590 | 0 | } |
591 | | /* flush the last byte if any */ |
592 | 0 | mdb->pg_buf[mask_pos++] = mask_byte; |
593 | | /* remember to zero the rest of the bitmap */ |
594 | 0 | for (i = mask_pos; i < 0xf8; i++) { |
595 | 0 | mdb->pg_buf[mask_pos++] = 0; |
596 | 0 | } |
597 | 0 | return 0; |
598 | 0 | } |
599 | | /* |
600 | | * unpack the pages bitmap |
601 | | */ |
602 | | int |
603 | | mdb_index_unpack_bitmap(MdbHandle *mdb, MdbIndexPage *ipg) |
604 | 0 | { |
605 | 0 | int mask_bit = 0; |
606 | 0 | int mask_pos = IS_JET3(mdb)?0x16:0x1b; |
607 | 0 | int mask_byte; |
608 | 0 | int jet_start = IS_JET3(mdb)?0xf8:0x1e0; |
609 | 0 | int start = jet_start; |
610 | 0 | int elem = 0; |
611 | 0 | int len = 0; |
612 | |
|
613 | 0 | ipg->idx_starts[elem++]=start; |
614 | | |
615 | | //fprintf(stdout, "Unpacking index page %lu\n", ipg->pg); |
616 | 0 | do { |
617 | 0 | len = 0; |
618 | 0 | do { |
619 | 0 | mask_bit++; |
620 | 0 | if (mask_bit==8) { |
621 | 0 | mask_bit=0; |
622 | 0 | mask_pos++; |
623 | 0 | } |
624 | 0 | mask_byte = mdb->pg_buf[mask_pos]; |
625 | 0 | len++; |
626 | 0 | } while (mask_pos <= jet_start && !((1 << mask_bit) & mask_byte)); |
627 | | //fprintf(stdout, "%d %d %d %d\n", mask_pos, mask_bit, mask_byte, len); |
628 | |
|
629 | 0 | start += len; |
630 | 0 | if (mask_pos < jet_start) ipg->idx_starts[elem++]=start; |
631 | |
|
632 | 0 | } while (mask_pos < jet_start); |
633 | | |
634 | | /* if we zero the next element, so we don't pick up the last pages starts*/ |
635 | 0 | ipg->idx_starts[elem]=0; |
636 | |
|
637 | 0 | return elem; |
638 | 0 | } |
639 | | /* |
640 | | * find the next entry on a page (either index or leaf). Uses state information |
641 | | * stored in the MdbIndexPage across calls. |
642 | | */ |
643 | | int |
644 | | mdb_index_find_next_on_page(MdbHandle *mdb, MdbIndexPage *ipg) |
645 | 0 | { |
646 | 0 | if (!ipg->pg) return 0; |
647 | | |
648 | | /* if this page has not been unpacked to it */ |
649 | 0 | if (!ipg->idx_starts[0]){ |
650 | | //fprintf(stdout, "Unpacking page %d\n", ipg->pg); |
651 | 0 | mdb_index_unpack_bitmap(mdb, ipg); |
652 | 0 | } |
653 | | |
654 | | |
655 | 0 | if (ipg->idx_starts[ipg->start_pos + 1]==0) return 0; |
656 | 0 | ipg->len = ipg->idx_starts[ipg->start_pos+1] - ipg->idx_starts[ipg->start_pos]; |
657 | 0 | ipg->start_pos++; |
658 | | //fprintf(stdout, "Start pos %d\n", ipg->start_pos); |
659 | |
|
660 | 0 | return ipg->len; |
661 | 0 | } |
662 | | void mdb_index_page_reset(MdbHandle *mdb, MdbIndexPage *ipg) |
663 | 0 | { |
664 | 0 | ipg->offset = IS_JET3(mdb)?0xf8:0x1e0; /* start byte of the index entries */ |
665 | 0 | ipg->start_pos=0; |
666 | 0 | ipg->len = 0; |
667 | 0 | ipg->idx_starts[0]=0; |
668 | 0 | } |
669 | | void mdb_index_page_init(MdbHandle *mdb, MdbIndexPage *ipg) |
670 | 0 | { |
671 | 0 | memset(ipg, 0, sizeof(MdbIndexPage)); |
672 | 0 | mdb_index_page_reset(mdb, ipg); |
673 | 0 | } |
674 | | /* |
675 | | * find the next leaf page if any given a chain. Assumes any exhausted leaf |
676 | | * pages at the end of the chain have been peeled off before the call. |
677 | | */ |
678 | | MdbIndexPage * |
679 | | mdb_find_next_leaf(MdbHandle *mdb, MdbIndex *idx, MdbIndexChain *chain) |
680 | 0 | { |
681 | 0 | MdbIndexPage *ipg, *newipg; |
682 | 0 | guint32 pg; |
683 | 0 | guint passed = 0; |
684 | |
|
685 | 0 | ipg = mdb_index_read_bottom_pg(mdb, idx, chain); |
686 | | |
687 | | /* |
688 | | * If we are at the first page deep and it's not an index page then |
689 | | * we are simply done. (there is no page to find |
690 | | */ |
691 | |
|
692 | 0 | if (mdb->pg_buf[0]==MDB_PAGE_LEAF) { |
693 | | /* Indexes can have leaves at the end that don't appear |
694 | | * in the upper tree, stash the last index found so |
695 | | * we can follow it at the end. */ |
696 | 0 | chain->last_leaf_found = ipg->pg; |
697 | 0 | return ipg; |
698 | 0 | } |
699 | | |
700 | | /* |
701 | | * apply sargs here, currently we don't |
702 | | */ |
703 | 0 | do { |
704 | 0 | ipg->len = 0; |
705 | | //printf("finding next on pg %lu\n", ipg->pg); |
706 | 0 | if (!mdb_index_find_next_on_page(mdb, ipg)) { |
707 | | //printf("find_next_on_page returned 0\n"); |
708 | 0 | return 0; |
709 | 0 | } |
710 | 0 | pg = mdb_get_int32_msb(mdb->pg_buf, ipg->offset + ipg->len - 3) >> 8; |
711 | | //printf("Looking at pg %lu at %lu %d\n", pg, ipg->offset, ipg->len); |
712 | 0 | ipg->offset += ipg->len; |
713 | | |
714 | | /* |
715 | | * add to the chain and call this function |
716 | | * recursively. |
717 | | */ |
718 | 0 | if (!mdb_chain_add_page(mdb, chain, pg)) |
719 | 0 | break; |
720 | 0 | newipg = mdb_find_next_leaf(mdb, idx, chain); |
721 | | //printf("returning pg %lu\n",newipg->pg); |
722 | 0 | return newipg; |
723 | 0 | } while (!passed); |
724 | | /* no more pages */ |
725 | 0 | return NULL; |
726 | |
|
727 | 0 | } |
728 | | static MdbIndexPage * |
729 | | mdb_chain_add_page(MdbHandle *mdb, MdbIndexChain *chain, guint32 pg) |
730 | 0 | { |
731 | 0 | MdbIndexPage *ipg; |
732 | |
|
733 | 0 | chain->cur_depth++; |
734 | 0 | if (chain->cur_depth > MDB_MAX_INDEX_DEPTH) { |
735 | 0 | fprintf(stderr,"Error! maximum index depth of %d exceeded. This is probably due to a programming bug, If you are confident that your indexes really are this deep, adjust MDB_MAX_INDEX_DEPTH in mdbtools.h and recompile.\n", MDB_MAX_INDEX_DEPTH); |
736 | 0 | return NULL; |
737 | 0 | } |
738 | 0 | ipg = &(chain->pages[chain->cur_depth - 1]); |
739 | 0 | mdb_index_page_init(mdb, ipg); |
740 | 0 | ipg->pg = pg; |
741 | |
|
742 | 0 | return ipg; |
743 | 0 | } |
744 | | /* |
745 | | * returns the bottom page of the IndexChain, if IndexChain is empty it |
746 | | * initializes it by reading idx->first_pg (the root page) |
747 | | */ |
748 | | static MdbIndexPage * |
749 | | mdb_index_read_bottom_pg(MdbHandle *mdb, MdbIndex *idx, MdbIndexChain *chain) |
750 | 0 | { |
751 | 0 | MdbIndexPage *ipg; |
752 | | |
753 | | /* |
754 | | * if it's new use the root index page (idx->first_pg) |
755 | | */ |
756 | 0 | if (!chain->cur_depth) { |
757 | 0 | ipg = &(chain->pages[0]); |
758 | 0 | mdb_index_page_init(mdb, ipg); |
759 | 0 | chain->cur_depth = 1; |
760 | 0 | ipg->pg = idx->first_pg; |
761 | 0 | if (!(ipg = mdb_find_next_leaf(mdb, idx, chain))) |
762 | 0 | return 0; |
763 | 0 | } else { |
764 | 0 | ipg = &(chain->pages[chain->cur_depth - 1]); |
765 | 0 | ipg->len = 0; |
766 | 0 | } |
767 | | |
768 | 0 | mdb_read_pg(mdb, ipg->pg); |
769 | |
|
770 | 0 | return ipg; |
771 | 0 | } |
772 | | /* |
773 | | * unwind the stack and search for new leaf node |
774 | | */ |
775 | | MdbIndexPage * |
776 | | mdb_index_unwind(MdbHandle *mdb, MdbIndex *idx, MdbIndexChain *chain) |
777 | 0 | { |
778 | 0 | MdbIndexPage *ipg; |
779 | | |
780 | | //printf("page %lu finished\n",ipg->pg); |
781 | 0 | if (chain->cur_depth==1) { |
782 | | //printf("cur_depth == 1 we're out\n"); |
783 | 0 | return NULL; |
784 | 0 | } |
785 | | /* |
786 | | * unwind the stack until we find something or reach |
787 | | * the top. |
788 | | */ |
789 | 0 | ipg = NULL; |
790 | 0 | while (chain->cur_depth>1 && ipg==NULL) { |
791 | | //printf("chain depth %d\n", chain->cur_depth); |
792 | 0 | chain->cur_depth--; |
793 | 0 | ipg = mdb_find_next_leaf(mdb, idx, chain); |
794 | 0 | if (ipg) mdb_index_find_next_on_page(mdb, ipg); |
795 | 0 | } |
796 | 0 | if (chain->cur_depth==1) { |
797 | | //printf("last leaf %lu\n", chain->last_leaf_found); |
798 | 0 | return NULL; |
799 | 0 | } |
800 | 0 | return ipg; |
801 | 0 | } |
802 | | /* |
803 | | * the main index function. |
804 | | * caller provides an index chain which is the current traversal of index |
805 | | * pages from the root page to the leaf. Initially passed as blank, |
806 | | * mdb_index_find_next will store it's state information here. Each invocation |
807 | | * then picks up where the last one left off, allowing us to scroll through |
808 | | * the index one by one. |
809 | | * |
810 | | * Sargs are applied here but also need to be applied on the whole row b/c |
811 | | * text columns may return false positives due to hashing and non-index |
812 | | * columns with sarg values can't be tested here. |
813 | | */ |
814 | | int |
815 | | mdb_index_find_next(MdbHandle *mdb, MdbIndex *idx, MdbIndexChain *chain, guint32 *pg, guint16 *row) |
816 | 0 | { |
817 | 0 | MdbIndexPage *ipg; |
818 | 0 | int passed = 0; |
819 | 0 | int idx_sz; |
820 | 0 | int idx_start = 0; |
821 | 0 | unsigned short compress_bytes; |
822 | 0 | MdbColumn *col; |
823 | 0 | guint32 pg_row; |
824 | |
|
825 | 0 | ipg = mdb_index_read_bottom_pg(mdb, idx, chain); |
826 | | |
827 | | /* |
828 | | * loop while the sargs don't match |
829 | | */ |
830 | 0 | do { |
831 | 0 | ipg->len = 0; |
832 | | /* |
833 | | * if no more rows on this leaf, try to find a new leaf |
834 | | */ |
835 | 0 | if (!mdb_index_find_next_on_page(mdb, ipg)) { |
836 | 0 | if (!chain->clean_up_mode) { |
837 | 0 | if (ipg->rc==1 || !(ipg = mdb_index_unwind(mdb, idx, chain))) |
838 | 0 | chain->clean_up_mode = 1; |
839 | 0 | } |
840 | 0 | if (chain->clean_up_mode) { |
841 | | //fprintf(stdout,"in cleanup mode\n"); |
842 | |
|
843 | 0 | if (!chain->last_leaf_found) return 0; |
844 | 0 | mdb_read_pg(mdb, chain->last_leaf_found); |
845 | 0 | chain->last_leaf_found = mdb_get_int32( |
846 | 0 | mdb->pg_buf, 0x0c); |
847 | | //printf("next leaf %lu\n", chain->last_leaf_found); |
848 | 0 | mdb_read_pg(mdb, chain->last_leaf_found); |
849 | | /* reuse the chain for cleanup mode */ |
850 | 0 | chain->cur_depth = 1; |
851 | 0 | ipg = &chain->pages[0]; |
852 | 0 | mdb_index_page_init(mdb, ipg); |
853 | 0 | ipg->pg = chain->last_leaf_found; |
854 | | //printf("next on page %d\n", |
855 | 0 | if (!mdb_index_find_next_on_page(mdb, ipg)) |
856 | 0 | return 0; |
857 | 0 | } |
858 | 0 | } |
859 | 0 | pg_row = mdb_get_int32_msb(mdb->pg_buf, ipg->offset + ipg->len - 4); |
860 | 0 | *row = pg_row & 0xff; |
861 | 0 | *pg = pg_row >> 8; |
862 | | //printf("row = %d pg = %lu ipg->pg = %lu offset = %lu len = %d\n", *row, *pg, ipg->pg, ipg->offset, ipg->len); |
863 | 0 | col=g_ptr_array_index(idx->table->columns,idx->key_col_num[0]-1); |
864 | 0 | idx_sz = mdb_col_fixed_size(col); |
865 | | /* handle compressed indexes, single key indexes only? */ |
866 | 0 | if (idx_sz<0) idx_sz = ipg->len - (ipg->start_pos==1?5:4); // Length from Index - the 4 trailing bytes (data page/row), Skip flags on first page |
867 | 0 | compress_bytes = mdb_get_int16(mdb->pg_buf, IS_JET3(mdb)?0x14:0x18); |
868 | 0 | if (idx->num_keys==1 && idx_sz>0 && compress_bytes > 1 && ipg->start_pos>1 /*ipg->len - 4 < idx_sz*/) { |
869 | | //printf("short index found\n"); |
870 | | //mdb_buffer_dump(ipg->cache_value, 0, idx_sz); |
871 | 0 | memcpy(&ipg->cache_value[compress_bytes-1], &mdb->pg_buf[ipg->offset], ipg->len); |
872 | | //mdb_buffer_dump(ipg->cache_value, 0, idx_sz); |
873 | 0 | } else { |
874 | 0 | idx_start = ipg->offset + (ipg->len - 4 - idx_sz); |
875 | 0 | memcpy(ipg->cache_value, &mdb->pg_buf[idx_start], idx_sz); |
876 | 0 | } |
877 | | |
878 | | //idx_start = ipg->offset + (ipg->len - 4 - idx_sz); |
879 | 0 | passed = mdb_index_test_sargs(mdb, idx, (char *)(ipg->cache_value), idx_sz); |
880 | 0 | if (passed) ipg->rc=1; else if (ipg->rc) return 0; |
881 | | |
882 | 0 | ipg->offset += ipg->len; |
883 | 0 | } while (!passed); |
884 | | |
885 | | //fprintf(stdout,"len = %d pos %d\n", ipg->len, ipg->mask_pos); |
886 | | //mdb_buffer_dump(mdb->pg_buf, ipg->offset, ipg->len); |
887 | | |
888 | 0 | return ipg->len; |
889 | 0 | } |
890 | | /* |
891 | | * XXX - FIX ME |
892 | | * This function is grossly inefficient. It scans the entire index building |
893 | | * an IndexChain to a specific row. We should be checking the index pages |
894 | | * for matches against the indexed fields to find the proper leaf page, but |
895 | | * getting it working first and then make it fast! |
896 | | */ |
897 | | int |
898 | | mdb_index_find_row(MdbHandle *mdb, MdbIndex *idx, MdbIndexChain *chain, guint32 pg, guint16 row) |
899 | 0 | { |
900 | 0 | MdbIndexPage *ipg; |
901 | 0 | int passed = 0; |
902 | 0 | guint32 pg_row = (pg << 8) | (row & 0xff); |
903 | 0 | guint32 datapg_row; |
904 | |
|
905 | 0 | ipg = mdb_index_read_bottom_pg(mdb, idx, chain); |
906 | |
|
907 | 0 | do { |
908 | 0 | ipg->len = 0; |
909 | | /* |
910 | | * if no more rows on this leaf, try to find a new leaf |
911 | | */ |
912 | 0 | if (!mdb_index_find_next_on_page(mdb, ipg)) { |
913 | | /* back to top? We're done */ |
914 | 0 | if (chain->cur_depth==1) |
915 | 0 | return 0; |
916 | | |
917 | | /* |
918 | | * unwind the stack until we find something or reach |
919 | | * the top. |
920 | | */ |
921 | 0 | while (chain->cur_depth>1) { |
922 | 0 | chain->cur_depth--; |
923 | 0 | if (!(ipg = mdb_find_next_leaf(mdb, idx, chain))) |
924 | 0 | return 0; |
925 | 0 | mdb_index_find_next_on_page(mdb, ipg); |
926 | 0 | } |
927 | 0 | if (chain->cur_depth==1) |
928 | 0 | return 0; |
929 | 0 | } |
930 | | /* test row and pg */ |
931 | 0 | datapg_row = mdb_get_int32_msb(mdb->pg_buf, ipg->offset + ipg->len - 4); |
932 | 0 | if (pg_row == datapg_row) { |
933 | 0 | passed = 1; |
934 | 0 | } |
935 | 0 | ipg->offset += ipg->len; |
936 | 0 | } while (!passed); |
937 | | |
938 | | /* index chain from root to leaf should now be in "chain" */ |
939 | 0 | return 1; |
940 | 0 | } |
941 | | |
942 | | void mdb_index_walk(MdbTableDef *table, MdbIndex *idx) |
943 | 0 | { |
944 | | /* |
945 | | MdbHandle *mdb = table->entry->mdb; |
946 | | int cur_pos = 0; |
947 | | unsigned char marker; |
948 | | MdbColumn *col; |
949 | | unsigned int i; |
950 | | |
951 | | if (idx->num_keys!=1) return; |
952 | | |
953 | | mdb_read_pg(mdb, idx->first_pg); |
954 | | cur_pos = 0xf8; |
955 | | |
956 | | for (i=0;i<idx->num_keys;i++) { |
957 | | marker = mdb->pg_buf[cur_pos++]; |
958 | | col=g_ptr_array_index(table->columns,idx->key_col_num[i]-1); |
959 | | //printf("column %d coltype %d col_size %d (%d)\n",i,col->col_type, mdb_col_fixed_size(col), col->col_size); |
960 | | } |
961 | | */ |
962 | 0 | } |
963 | | void |
964 | | mdb_index_dump(MdbTableDef *table, MdbIndex *idx) |
965 | 0 | { |
966 | 0 | unsigned int i; |
967 | 0 | MdbColumn *col; |
968 | |
|
969 | 0 | fprintf(stdout,"index number %d\n", idx->index_num); |
970 | 0 | fprintf(stdout,"index name %s\n", idx->name); |
971 | 0 | fprintf(stdout,"index first page %d\n", idx->first_pg); |
972 | 0 | fprintf(stdout,"index rows %d\n", idx->num_rows); |
973 | 0 | if (idx->index_type==1) fprintf(stdout,"index is a primary key\n"); |
974 | 0 | for (i=0;i<idx->num_keys;i++) { |
975 | 0 | col=g_ptr_array_index(table->columns,idx->key_col_num[i]-1); |
976 | 0 | fprintf(stdout,"Column %s(%d) Sorted %s Unique: %s\n", |
977 | 0 | col->name, |
978 | 0 | idx->key_col_num[i], |
979 | 0 | idx->key_col_order[i]==MDB_ASC ? "ascending" : "descending", |
980 | 0 | idx->flags & MDB_IDX_UNIQUE ? "Yes" : "No" |
981 | 0 | ); |
982 | 0 | } |
983 | 0 | mdb_index_walk(table, idx); |
984 | 0 | } |
985 | | /* |
986 | | * compute_cost tries to assign a cost to a given index using the sargs |
987 | | * available in this query. |
988 | | * |
989 | | * Indexes with no matching sargs are assigned 0 |
990 | | * Unique indexes are preferred over non-uniques |
991 | | * Operator preference is equal, like, isnull, others |
992 | | */ |
993 | | int mdb_index_compute_cost(MdbTableDef *table, MdbIndex *idx) |
994 | 0 | { |
995 | 0 | unsigned int i; |
996 | 0 | MdbColumn *col; |
997 | 0 | MdbSarg *sarg = NULL; |
998 | 0 | int not_all_equal = 0; |
999 | |
|
1000 | 0 | if (!idx->num_keys) return 0; |
1001 | 0 | if (idx->num_keys > 1) { |
1002 | 0 | for (i=0;i<idx->num_keys;i++) { |
1003 | 0 | col=g_ptr_array_index(table->columns,idx->key_col_num[i]-1); |
1004 | 0 | if (col->sargs) sarg = g_ptr_array_index (col->sargs, 0); |
1005 | 0 | if (!sarg || sarg->op != MDB_EQUAL) not_all_equal++; |
1006 | 0 | } |
1007 | 0 | } |
1008 | |
|
1009 | 0 | col=g_ptr_array_index(table->columns,idx->key_col_num[0]-1); |
1010 | | /* |
1011 | | * if this is the first key column and there are no sargs, |
1012 | | * then this index is useless. |
1013 | | */ |
1014 | 0 | if (!col->num_sargs) return 0; |
1015 | | |
1016 | 0 | sarg = g_ptr_array_index (col->sargs, 0); |
1017 | | |
1018 | | /* |
1019 | | * a like with a wild card first is useless as a sarg */ |
1020 | 0 | if ((sarg->op == MDB_LIKE || sarg->op == MDB_ILIKE) && sarg->value.s[0]=='%') |
1021 | 0 | return 0; |
1022 | | |
1023 | | /* |
1024 | | * this needs a lot of tweaking. |
1025 | | */ |
1026 | 0 | if (idx->flags & MDB_IDX_UNIQUE) { |
1027 | 0 | if (idx->num_keys == 1) { |
1028 | | //printf("op is %d\n", sarg->op); |
1029 | 0 | switch (sarg->op) { |
1030 | 0 | case MDB_EQUAL: |
1031 | 0 | return 1; break; |
1032 | 0 | case MDB_LIKE: |
1033 | 0 | case MDB_ILIKE: |
1034 | 0 | return 4; break; |
1035 | 0 | case MDB_ISNULL: |
1036 | 0 | return 12; break; |
1037 | 0 | default: |
1038 | 0 | return 8; break; |
1039 | 0 | } |
1040 | 0 | } else { |
1041 | 0 | switch (sarg->op) { |
1042 | 0 | case MDB_EQUAL: |
1043 | 0 | if (not_all_equal) return 2; |
1044 | 0 | else return 1; |
1045 | 0 | break; |
1046 | 0 | case MDB_LIKE: |
1047 | 0 | case MDB_ILIKE: |
1048 | 0 | return 6; break; |
1049 | 0 | case MDB_ISNULL: |
1050 | 0 | return 12; break; |
1051 | 0 | default: |
1052 | 0 | return 9; break; |
1053 | 0 | } |
1054 | 0 | } |
1055 | 0 | } else { |
1056 | 0 | if (idx->num_keys == 1) { |
1057 | 0 | switch (sarg->op) { |
1058 | 0 | case MDB_EQUAL: |
1059 | 0 | return 2; break; |
1060 | 0 | case MDB_LIKE: |
1061 | 0 | case MDB_ILIKE: |
1062 | 0 | return 5; break; |
1063 | 0 | case MDB_ISNULL: |
1064 | 0 | return 12; break; |
1065 | 0 | default: |
1066 | 0 | return 10; break; |
1067 | 0 | } |
1068 | 0 | } else { |
1069 | 0 | switch (sarg->op) { |
1070 | 0 | case MDB_EQUAL: |
1071 | 0 | if (not_all_equal) return 3; |
1072 | 0 | else return 2; |
1073 | 0 | break; |
1074 | 0 | case MDB_LIKE: |
1075 | 0 | case MDB_ILIKE: |
1076 | 0 | return 7; break; |
1077 | 0 | case MDB_ISNULL: |
1078 | 0 | return 12; break; |
1079 | 0 | default: |
1080 | 0 | return 11; break; |
1081 | 0 | } |
1082 | 0 | } |
1083 | 0 | } |
1084 | 0 | return 0; |
1085 | 0 | } |
1086 | | /* |
1087 | | * choose_index runs mdb_index_compute_cost for each available index and picks |
1088 | | * the best. |
1089 | | * |
1090 | | * Returns strategy to use (table scan, or index scan) |
1091 | | */ |
1092 | | MdbStrategy |
1093 | | mdb_choose_index(MdbTableDef *table, int *choice) |
1094 | 0 | { |
1095 | 0 | unsigned int i; |
1096 | 0 | MdbIndex *idx; |
1097 | 0 | int cost = 0; |
1098 | 0 | int least = 99; |
1099 | |
|
1100 | 0 | *choice = -1; |
1101 | 0 | for (i=0;i<table->num_idxs;i++) { |
1102 | 0 | idx = g_ptr_array_index (table->indices, i); |
1103 | 0 | cost = mdb_index_compute_cost(table, idx); |
1104 | | //printf("cost for %s is %d\n", idx->name, cost); |
1105 | 0 | if (cost && cost < least) { |
1106 | 0 | least = cost; |
1107 | 0 | *choice = i; |
1108 | 0 | } |
1109 | 0 | } |
1110 | | /* and the winner is: *choice */ |
1111 | 0 | if (least==99) return MDB_TABLE_SCAN; |
1112 | 0 | return MDB_INDEX_SCAN; |
1113 | 0 | } |
1114 | | void |
1115 | | mdb_index_scan_init(MdbHandle *mdb, MdbTableDef *table) |
1116 | 0 | { |
1117 | 0 | int i; |
1118 | |
|
1119 | 0 | if (mdb_get_option(MDB_USE_INDEX) && mdb_choose_index(table, &i) == MDB_INDEX_SCAN) { |
1120 | 0 | table->strategy = MDB_INDEX_SCAN; |
1121 | 0 | table->scan_idx = g_ptr_array_index (table->indices, i); |
1122 | 0 | table->chain = g_malloc0(sizeof(MdbIndexChain)); |
1123 | 0 | table->mdbidx = mdb_clone_handle(mdb); |
1124 | 0 | mdb_read_pg(table->mdbidx, table->scan_idx->first_pg); |
1125 | | //printf("best index is %s\n",table->scan_idx->name); |
1126 | 0 | } |
1127 | | //printf("TABLE SCAN? %d\n", table->strategy); |
1128 | 0 | } |
1129 | | void |
1130 | | mdb_index_scan_free(MdbTableDef *table) |
1131 | 0 | { |
1132 | 0 | if (table->chain) { |
1133 | 0 | g_free(table->chain); |
1134 | 0 | table->chain = NULL; |
1135 | 0 | } |
1136 | 0 | if (table->mdbidx) { |
1137 | 0 | mdb_close(table->mdbidx); |
1138 | 0 | table->mdbidx = NULL; |
1139 | 0 | } |
1140 | 0 | } |
1141 | | |
1142 | | void mdb_free_indices(GPtrArray *indices) |
1143 | 5.62k | { |
1144 | 5.62k | guint i; |
1145 | | |
1146 | 5.62k | if (!indices) return; |
1147 | 0 | for (i=0; i<indices->len; i++) |
1148 | 0 | g_free (g_ptr_array_index(indices, i)); |
1149 | 0 | g_ptr_array_free(indices, TRUE); |
1150 | 0 | } |