/src/php-src/ext/opcache/jit/ir/ir_strtab.c
Line | Count | Source |
1 | | /* |
2 | | * IR - Lightweight JIT Compilation Framework |
3 | | * (String table) |
4 | | * Copyright (C) 2022 Zend by Perforce. |
5 | | * Authors: Dmitry Stogov <dmitry@php.net> |
6 | | */ |
7 | | |
8 | | #include "ir.h" |
9 | | #include "ir_private.h" |
10 | | |
11 | | typedef struct _ir_strtab_bucket { |
12 | | uint32_t h; |
13 | | uint32_t len; |
14 | | const char *str; |
15 | | uint32_t next; |
16 | | ir_ref val; |
17 | | } ir_strtab_bucket; |
18 | | |
19 | | static uint32_t ir_str_hash(const char *str, size_t len) |
20 | 0 | { |
21 | 0 | size_t i; |
22 | 0 | uint32_t h = 5381; |
23 | |
|
24 | 0 | for (i = 0; i < len; i++) { |
25 | 0 | h = ((h << 5) + h) + *str; |
26 | 0 | str++; |
27 | 0 | } |
28 | 0 | return h | 0x10000000; |
29 | 0 | } |
30 | | |
31 | | static uint32_t ir_strtab_hash_size(uint32_t size) |
32 | 0 | { |
33 | | /* Use big enough power of 2 */ |
34 | 0 | size -= 1; |
35 | 0 | size |= (size >> 1); |
36 | 0 | size |= (size >> 2); |
37 | 0 | size |= (size >> 4); |
38 | 0 | size |= (size >> 8); |
39 | 0 | size |= (size >> 16); |
40 | 0 | return size + 1; |
41 | 0 | } |
42 | | |
43 | | static void ir_strtab_resize(ir_strtab *strtab) |
44 | 0 | { |
45 | 0 | uint32_t old_hash_size = (uint32_t)(-(int32_t)strtab->mask); |
46 | 0 | char *old_data = strtab->data; |
47 | 0 | uint32_t size = strtab->size * 2; |
48 | 0 | uint32_t hash_size = ir_strtab_hash_size(size); |
49 | 0 | char *data = ir_mem_malloc(hash_size * sizeof(uint32_t) + size * sizeof(ir_strtab_bucket)); |
50 | 0 | ir_strtab_bucket *p; |
51 | 0 | uint32_t pos, i; |
52 | |
|
53 | 0 | memset(data, IR_INVALID_IDX, hash_size * sizeof(uint32_t)); |
54 | 0 | strtab->data = data + (hash_size * sizeof(uint32_t)); |
55 | 0 | strtab->mask = (uint32_t)(-(int32_t)hash_size); |
56 | 0 | strtab->size = size; |
57 | |
|
58 | 0 | memcpy(strtab->data, old_data, strtab->count * sizeof(ir_strtab_bucket)); |
59 | 0 | ir_mem_free(old_data - (old_hash_size * sizeof(uint32_t))); |
60 | |
|
61 | 0 | i = strtab->count; |
62 | 0 | pos = 0; |
63 | 0 | p = (ir_strtab_bucket*)strtab->data; |
64 | 0 | do { |
65 | 0 | uint32_t h = p->h | strtab->mask; |
66 | 0 | p->next = ((uint32_t*)strtab->data)[(int32_t)h]; |
67 | 0 | ((uint32_t*)strtab->data)[(int32_t)h] = pos; |
68 | 0 | pos += sizeof(ir_strtab_bucket); |
69 | 0 | p++; |
70 | 0 | } while (--i); |
71 | 0 | } |
72 | | |
73 | | static void ir_strtab_grow_buf(ir_strtab *strtab, uint32_t len) |
74 | 0 | { |
75 | 0 | intptr_t old = (intptr_t)strtab->buf; |
76 | |
|
77 | 0 | do { |
78 | 0 | strtab->buf_size *= 2; |
79 | 0 | } while (UNEXPECTED(strtab->buf_size - strtab->buf_top < len + 1)); |
80 | |
|
81 | 0 | strtab->buf = ir_mem_realloc(strtab->buf, strtab->buf_size); |
82 | 0 | if ((intptr_t)strtab->buf != old) { |
83 | 0 | intptr_t offset = (intptr_t)strtab->buf - old; |
84 | 0 | ir_strtab_bucket *p = (ir_strtab_bucket*)strtab->data; |
85 | 0 | uint32_t i; |
86 | 0 | for (i = strtab->count; i > 0; i--) { |
87 | 0 | p->str += offset; |
88 | 0 | p++; |
89 | 0 | } |
90 | 0 | } |
91 | 0 | } |
92 | | |
93 | | void ir_strtab_init(ir_strtab *strtab, uint32_t size, uint32_t buf_size) |
94 | 0 | { |
95 | 0 | IR_ASSERT(size > 0); |
96 | 0 | uint32_t hash_size = ir_strtab_hash_size(size); |
97 | 0 | char *data = ir_mem_malloc(hash_size * sizeof(uint32_t) + size * sizeof(ir_strtab_bucket)); |
98 | 0 | memset(data, IR_INVALID_IDX, hash_size * sizeof(uint32_t)); |
99 | 0 | strtab->data = (data + (hash_size * sizeof(uint32_t))); |
100 | 0 | strtab->mask = (uint32_t)(-(int32_t)hash_size); |
101 | 0 | strtab->size = size; |
102 | 0 | strtab->count = 0; |
103 | 0 | strtab->pos = 0; |
104 | 0 | if (buf_size) { |
105 | 0 | strtab->buf = ir_mem_malloc(buf_size); |
106 | 0 | strtab->buf_size = buf_size; |
107 | 0 | strtab->buf_top = 0; |
108 | 0 | } else { |
109 | 0 | strtab->buf = NULL; |
110 | 0 | strtab->buf_size = 0; |
111 | 0 | strtab->buf_top = 0; |
112 | 0 | } |
113 | 0 | } |
114 | | |
115 | | ir_ref ir_strtab_find(const ir_strtab *strtab, const char *str, uint32_t len) |
116 | 0 | { |
117 | 0 | uint32_t h = ir_str_hash(str, len); |
118 | 0 | const char *data = (const char*)strtab->data; |
119 | 0 | uint32_t pos = ((uint32_t*)data)[(int32_t)(h | strtab->mask)]; |
120 | 0 | ir_strtab_bucket *p; |
121 | |
|
122 | 0 | while (pos != IR_INVALID_IDX) { |
123 | 0 | p = (ir_strtab_bucket*)(data + pos); |
124 | 0 | if (p->h == h |
125 | 0 | && p->len == len |
126 | 0 | && memcmp(p->str, str, len) == 0) { |
127 | 0 | return p->val; |
128 | 0 | } |
129 | 0 | pos = p->next; |
130 | 0 | } |
131 | 0 | return 0; |
132 | 0 | } |
133 | | |
134 | | ir_ref ir_strtab_lookup(ir_strtab *strtab, const char *str, uint32_t len, ir_ref val) |
135 | 0 | { |
136 | 0 | uint32_t h = ir_str_hash(str, len); |
137 | 0 | char *data = (char*)strtab->data; |
138 | 0 | uint32_t pos = ((uint32_t*)data)[(int32_t)(h | strtab->mask)]; |
139 | 0 | ir_strtab_bucket *p; |
140 | |
|
141 | 0 | while (pos != IR_INVALID_IDX) { |
142 | 0 | p = (ir_strtab_bucket*)(data + pos); |
143 | 0 | if (p->h == h |
144 | 0 | && p->len == len |
145 | 0 | && memcmp(p->str, str, len) == 0) { |
146 | 0 | return p->val; |
147 | 0 | } |
148 | 0 | pos = p->next; |
149 | 0 | } |
150 | | |
151 | 0 | IR_ASSERT(val != 0); |
152 | |
|
153 | 0 | if (UNEXPECTED(strtab->count >= strtab->size)) { |
154 | 0 | ir_strtab_resize(strtab); |
155 | 0 | data = strtab->data; |
156 | 0 | } |
157 | |
|
158 | 0 | if (strtab->buf) { |
159 | 0 | if (UNEXPECTED(strtab->buf_size - strtab->buf_top < len + 1)) { |
160 | 0 | ir_strtab_grow_buf(strtab, len + 1); |
161 | 0 | } |
162 | |
|
163 | 0 | memcpy(strtab->buf + strtab->buf_top, str, len); |
164 | 0 | strtab->buf[strtab->buf_top + len] = 0; |
165 | 0 | str = (const char*)strtab->buf + strtab->buf_top; |
166 | 0 | strtab->buf_top += len + 1; |
167 | 0 | } |
168 | |
|
169 | 0 | pos = strtab->pos; |
170 | 0 | strtab->pos += sizeof(ir_strtab_bucket); |
171 | 0 | strtab->count++; |
172 | 0 | p = (ir_strtab_bucket*)(data + pos); |
173 | 0 | p->h = h; |
174 | 0 | p->len = len; |
175 | 0 | p->str = str; |
176 | 0 | h |= strtab->mask; |
177 | 0 | p->next = ((uint32_t*)data)[(int32_t)h]; |
178 | 0 | ((uint32_t*)data)[(int32_t)h] = pos; |
179 | 0 | p->val = val; |
180 | 0 | return val; |
181 | 0 | } |
182 | | |
183 | | ir_ref ir_strtab_update(ir_strtab *strtab, const char *str, uint32_t len, ir_ref val) |
184 | 0 | { |
185 | 0 | uint32_t h = ir_str_hash(str, len); |
186 | 0 | char *data = (char*)strtab->data; |
187 | 0 | uint32_t pos = ((uint32_t*)data)[(int32_t)(h | strtab->mask)]; |
188 | 0 | ir_strtab_bucket *p; |
189 | |
|
190 | 0 | while (pos != IR_INVALID_IDX) { |
191 | 0 | p = (ir_strtab_bucket*)(data + pos); |
192 | 0 | if (p->h == h |
193 | 0 | && p->len == len |
194 | 0 | && memcmp(p->str, str, len) == 0) { |
195 | 0 | return p->val = val; |
196 | 0 | } |
197 | 0 | pos = p->next; |
198 | 0 | } |
199 | 0 | return 0; |
200 | 0 | } |
201 | | |
202 | | const char *ir_strtab_str(const ir_strtab *strtab, ir_ref idx) |
203 | 0 | { |
204 | 0 | IR_ASSERT(idx >= 0 && (uint32_t)idx < strtab->count); |
205 | 0 | return ((const ir_strtab_bucket*)strtab->data)[idx].str; |
206 | 0 | } |
207 | | |
208 | | const char *ir_strtab_strl(const ir_strtab *strtab, ir_ref idx, size_t *len) |
209 | 0 | { |
210 | 0 | const ir_strtab_bucket *b = ((const ir_strtab_bucket*)strtab->data) + idx; |
211 | 0 | IR_ASSERT(idx >= 0 && (uint32_t)idx < strtab->count); |
212 | 0 | *len = b->len; |
213 | 0 | return b->str; |
214 | 0 | } |
215 | | |
216 | | void ir_strtab_free(ir_strtab *strtab) |
217 | 0 | { |
218 | 0 | uint32_t hash_size = (uint32_t)(-(int32_t)strtab->mask); |
219 | 0 | char *data = (char*)strtab->data - (hash_size * sizeof(uint32_t)); |
220 | 0 | ir_mem_free(data); |
221 | 0 | strtab->data = NULL; |
222 | 0 | if (strtab->buf) { |
223 | 0 | ir_mem_free(strtab->buf); |
224 | 0 | strtab->buf = NULL; |
225 | 0 | } |
226 | 0 | } |
227 | | |
228 | | void ir_strtab_apply(const ir_strtab *strtab, ir_strtab_apply_t func) |
229 | 0 | { |
230 | 0 | uint32_t i; |
231 | |
|
232 | 0 | for (i = 0; i < strtab->count; i++) { |
233 | 0 | const ir_strtab_bucket *b = &((ir_strtab_bucket*)strtab->data)[i]; |
234 | 0 | func(b->str, b->len, b->val); |
235 | 0 | } |
236 | 0 | } |