/src/zeek/src/script_opt/GenIDDefs.cc
Line | Count | Source |
1 | | // See the file "COPYING" in the main distribution directory for copyright. |
2 | | |
3 | | #include "zeek/script_opt/GenIDDefs.h" |
4 | | |
5 | | #include "zeek/Desc.h" |
6 | | #include "zeek/Expr.h" |
7 | | #include "zeek/Reporter.h" |
8 | | #include "zeek/Scope.h" |
9 | | #include "zeek/script_opt/Expr.h" |
10 | | #include "zeek/script_opt/ScriptOpt.h" |
11 | | #include "zeek/script_opt/StmtOptInfo.h" |
12 | | |
13 | | namespace zeek::detail { |
14 | | |
15 | | GenIDDefs::GenIDDefs(std::shared_ptr<ProfileFunc> _pf, const FuncPtr& f, ScopePtr scope, StmtPtr body) |
16 | 0 | : pf(std::move(_pf)) { |
17 | 0 | TraverseFunction(f, scope, body); |
18 | 0 | } |
19 | | |
20 | 0 | void GenIDDefs::TraverseFunction(const FuncPtr& f, ScopePtr scope, StmtPtr body) { |
21 | 0 | func_flavor = f->Flavor(); |
22 | | |
23 | | // Establish the outermost set of identifiers. |
24 | 0 | modified_IDs.emplace_back(); |
25 | |
|
26 | 0 | for ( const auto& g : pf->Globals() ) { |
27 | 0 | g->GetOptInfo()->Clear(); |
28 | 0 | TrackID(g); |
29 | 0 | } |
30 | | |
31 | | // Clear the locals before processing the arguments, since |
32 | | // they're included among the locals. |
33 | 0 | for ( const auto& l : pf->Locals() ) |
34 | 0 | l->GetOptInfo()->Clear(); |
35 | |
|
36 | 0 | const auto& args = scope->OrderedVars(); |
37 | 0 | int nparam = f->GetType()->Params()->NumFields(); |
38 | |
|
39 | 0 | for ( const auto& a : args ) { |
40 | 0 | if ( --nparam < 0 ) |
41 | 0 | break; |
42 | | |
43 | 0 | a->GetOptInfo()->Clear(); |
44 | 0 | TrackID(a); |
45 | 0 | } |
46 | |
|
47 | 0 | for ( const auto& o_id : cast_intrusive<ScriptFunc>(f)->GetOuterIDs() ) { |
48 | 0 | o_id->GetOptInfo()->Clear(); |
49 | 0 | TrackID(o_id); |
50 | 0 | } |
51 | |
|
52 | 0 | stmt_num = 0; // 0 = "before the first statement" |
53 | |
|
54 | 0 | body->Traverse(this); |
55 | 0 | } |
56 | | |
57 | 0 | TraversalCode GenIDDefs::PreStmt(const Stmt* s) { |
58 | 0 | last_stmt_traversed = s; |
59 | |
|
60 | 0 | auto si = s->GetOptInfo(); |
61 | 0 | si->stmt_num = ++stmt_num; |
62 | 0 | si->block_level = confluence_blocks.size() + 1; |
63 | |
|
64 | 0 | switch ( s->Tag() ) { |
65 | 0 | case STMT_CATCH_RETURN: { |
66 | 0 | auto cr = s->AsCatchReturnStmt(); |
67 | 0 | auto block = cr->Block(); |
68 | |
|
69 | 0 | cr_active.push_back(confluence_blocks.size()); |
70 | | |
71 | | // Confluence for the bodies of catch-return's is a bit complex. |
72 | | // We would like any expressions computed at the outermost level |
73 | | // of the body to be available for script optimization *outside* |
74 | | // the catch-return; this in particular is helpful in optimizing |
75 | | // coalesced event handlers, but has other benefits as well. |
76 | | // |
77 | | // However, if one of the outermost statements executes a "return", |
78 | | // then any outermost expressions computed after it might not |
79 | | // be available. Put another way, the potentially-returning |
80 | | // statement starts a confluence region that runs through the end |
81 | | // of the body. |
82 | | // |
83 | | // To deal with this, we start off without a new confluence block, |
84 | | // but create one upon encountering a statement that could return. |
85 | |
|
86 | 0 | bool did_confluence = false; |
87 | |
|
88 | 0 | if ( block->Tag() == STMT_LIST ) { |
89 | 0 | auto prev_stmt = s; |
90 | 0 | auto& stmts = block->AsStmtList()->Stmts(); |
91 | 0 | for ( auto& st : stmts ) { |
92 | 0 | if ( ! did_confluence && st->CouldReturn(false) ) { |
93 | 0 | StartConfluenceBlock(prev_stmt); |
94 | 0 | did_confluence = true; |
95 | 0 | } |
96 | |
|
97 | 0 | st->Traverse(this); |
98 | 0 | } |
99 | 0 | } |
100 | 0 | else { |
101 | | // If there's just a single statement then there are two |
102 | | // possibilities. If the statement is atomic (no sub-statements) |
103 | | // then given that it's in reduced form, it's not going to have |
104 | | // any expressions that we can leverage. OTOH, if it's compound |
105 | | // (if/for/while/switch) then there's no safe re-using of |
106 | | // expressions within it since they may-or-may-not wind up |
107 | | // being computed. So we should always start a new block. |
108 | 0 | StartConfluenceBlock(s); |
109 | 0 | did_confluence = true; |
110 | 0 | block->Traverse(this); |
111 | 0 | } |
112 | |
|
113 | 0 | if ( did_confluence ) |
114 | 0 | EndConfluenceBlock(); |
115 | |
|
116 | 0 | cr_active.pop_back(); |
117 | |
|
118 | 0 | auto retvar = cr->RetVar(); |
119 | 0 | if ( retvar ) |
120 | 0 | TrackID(retvar->IdPtr()); |
121 | |
|
122 | 0 | return TC_ABORTSTMT; |
123 | 0 | } |
124 | | |
125 | 0 | case STMT_IF: { |
126 | 0 | auto i = s->AsIfStmt(); |
127 | 0 | auto cond = i->StmtExpr(); |
128 | 0 | auto t_branch = i->TrueBranch(); |
129 | 0 | auto f_branch = i->FalseBranch(); |
130 | |
|
131 | 0 | cond->Traverse(this); |
132 | |
|
133 | 0 | StartConfluenceBlock(s); |
134 | |
|
135 | 0 | t_branch->Traverse(this); |
136 | 0 | if ( ! t_branch->NoFlowAfter(false) ) |
137 | 0 | BranchBeyond(last_stmt_traversed, s, true); |
138 | |
|
139 | 0 | f_branch->Traverse(this); |
140 | 0 | if ( ! f_branch->NoFlowAfter(false) ) |
141 | 0 | BranchBeyond(last_stmt_traversed, s, true); |
142 | |
|
143 | 0 | EndConfluenceBlock(true); |
144 | |
|
145 | 0 | return TC_ABORTSTMT; |
146 | 0 | } |
147 | | |
148 | 0 | case STMT_SWITCH: AnalyzeSwitch(s->AsSwitchStmt()); return TC_ABORTSTMT; |
149 | | |
150 | 0 | case STMT_FOR: { |
151 | 0 | auto f = s->AsForStmt(); |
152 | |
|
153 | 0 | auto ids = f->LoopVars(); |
154 | 0 | auto e = f->LoopExpr(); |
155 | 0 | auto body = f->LoopBody(); |
156 | 0 | auto val_var = f->ValueVar(); |
157 | |
|
158 | 0 | e->Traverse(this); |
159 | |
|
160 | 0 | for ( const auto& id : *ids ) |
161 | 0 | TrackID(id); |
162 | |
|
163 | 0 | if ( val_var ) |
164 | 0 | TrackID(val_var); |
165 | |
|
166 | 0 | StartConfluenceBlock(s); |
167 | 0 | body->Traverse(this); |
168 | |
|
169 | 0 | if ( ! body->NoFlowAfter(false) ) |
170 | 0 | BranchBackTo(last_stmt_traversed, s, true); |
171 | |
|
172 | 0 | EndConfluenceBlock(); |
173 | |
|
174 | 0 | return TC_ABORTSTMT; |
175 | 0 | } |
176 | | |
177 | 0 | case STMT_WHEN: { |
178 | 0 | auto w = s->AsWhenStmt(); |
179 | |
|
180 | 0 | for ( const auto& c : *w->Captures() ) |
181 | 0 | CheckVarUsage(with_location_of(make_intrusive<NameExpr>(c.Id()), w).get(), c.Id()); |
182 | |
|
183 | 0 | auto te = w->TimeoutExpr(); |
184 | 0 | if ( te ) |
185 | 0 | te->Traverse(this); |
186 | |
|
187 | 0 | return TC_ABORTSTMT; |
188 | 0 | } |
189 | | |
190 | 0 | case STMT_WHILE: { |
191 | 0 | auto w = s->AsWhileStmt(); |
192 | |
|
193 | 0 | StartConfluenceBlock(s); |
194 | |
|
195 | 0 | auto cond_pred_stmt = w->CondPredStmt(); |
196 | 0 | if ( cond_pred_stmt ) |
197 | 0 | cond_pred_stmt->Traverse(this); |
198 | | |
199 | | // Important to traverse the condition in its version |
200 | | // interpreted as a statement, so that when evaluating |
201 | | // its variable usage, that's done in the context of |
202 | | // *after* cond_pred_stmt executes, rather than as |
203 | | // part of that execution. |
204 | 0 | auto cond_stmt = w->ConditionAsStmt(); |
205 | 0 | cond_stmt->Traverse(this); |
206 | |
|
207 | 0 | auto body = w->Body(); |
208 | 0 | body->Traverse(this); |
209 | |
|
210 | 0 | if ( ! body->NoFlowAfter(false) ) |
211 | 0 | BranchBackTo(last_stmt_traversed, s, true); |
212 | |
|
213 | 0 | EndConfluenceBlock(); |
214 | |
|
215 | 0 | return TC_ABORTSTMT; |
216 | 0 | } |
217 | | |
218 | 0 | default: return TC_CONTINUE; |
219 | 0 | } |
220 | 0 | } |
221 | | |
222 | 0 | void GenIDDefs::AnalyzeSwitch(const SwitchStmt* sw) { |
223 | 0 | sw->StmtExpr()->Traverse(this); |
224 | |
|
225 | 0 | for ( const auto& c : *sw->Cases() ) { |
226 | | // Important: the confluence block is the switch statement |
227 | | // itself, not the case body. This is needed so that variable |
228 | | // assignments made inside case bodies that end with |
229 | | // "fallthrough" are correctly propagated to the next case |
230 | | // body. |
231 | 0 | StartConfluenceBlock(sw); |
232 | |
|
233 | 0 | auto body = c->Body(); |
234 | |
|
235 | 0 | auto exprs = c->ExprCases(); |
236 | 0 | if ( exprs ) |
237 | 0 | exprs->Traverse(this); |
238 | |
|
239 | 0 | auto type_ids = c->TypeCases(); |
240 | 0 | if ( type_ids ) { |
241 | 0 | for ( const auto& id : *type_ids ) |
242 | 0 | if ( id->Name() ) |
243 | 0 | TrackID(id); |
244 | 0 | } |
245 | |
|
246 | 0 | body->Traverse(this); |
247 | 0 | EndConfluenceBlock(false); |
248 | 0 | } |
249 | 0 | } |
250 | | |
251 | 0 | TraversalCode GenIDDefs::PostStmt(const Stmt* s) { |
252 | 0 | switch ( s->Tag() ) { |
253 | 0 | case STMT_INIT: { |
254 | 0 | auto init = s->AsInitStmt(); |
255 | 0 | auto& inits = init->Inits(); |
256 | |
|
257 | 0 | for ( const auto& id : inits ) { |
258 | 0 | auto id_t = id->GetType(); |
259 | | |
260 | | // Only aggregates get initialized. |
261 | 0 | if ( zeek::IsAggr(id->GetType()->Tag()) ) |
262 | 0 | TrackID(id); |
263 | 0 | } |
264 | |
|
265 | 0 | break; |
266 | 0 | } |
267 | | |
268 | 0 | case STMT_RETURN: ReturnAt(s); break; |
269 | | |
270 | 0 | case STMT_NEXT: BranchBackTo(last_stmt_traversed, FindLoop(), false); break; |
271 | | |
272 | 0 | case STMT_BREAK: { |
273 | 0 | auto target = FindBreakTarget(); |
274 | |
|
275 | 0 | if ( target ) |
276 | 0 | BranchBeyond(s, target, false); |
277 | | |
278 | 0 | else { |
279 | 0 | ASSERT(func_flavor == FUNC_FLAVOR_HOOK); |
280 | 0 | ReturnAt(s); |
281 | 0 | } |
282 | | |
283 | 0 | break; |
284 | 0 | } |
285 | | |
286 | | // No need to do anything, the work all occurs |
287 | | // with NoFlowAfter. |
288 | 0 | case STMT_FALLTHROUGH: |
289 | 0 | default: break; |
290 | 0 | } |
291 | | |
292 | 0 | return TC_CONTINUE; |
293 | 0 | } |
294 | | |
295 | 0 | TraversalCode GenIDDefs::PreExpr(const Expr* e) { |
296 | 0 | e->GetOptInfo()->stmt_num = stmt_num; |
297 | |
|
298 | 0 | switch ( e->Tag() ) { |
299 | 0 | case EXPR_NAME: CheckVarUsage(e, e->AsNameExpr()->IdPtr()); break; |
300 | | |
301 | 0 | case EXPR_ASSIGN: { |
302 | 0 | auto lhs = e->GetOp1(); |
303 | |
|
304 | 0 | if ( lhs->Tag() == EXPR_LIST && in_table_constructor > 0 ) |
305 | | // This is the index portion of a table constructor, process as |
306 | | // that rather than as a compound assignment to an "any" value. |
307 | 0 | return TC_CONTINUE; |
308 | | |
309 | 0 | auto op2 = e->GetOp2(); |
310 | 0 | op2->Traverse(this); |
311 | |
|
312 | 0 | if ( ! CheckLHS(lhs, op2) ) |
313 | | // Not a simple assignment (or group of assignments), |
314 | | // so analyze the accesses to check for use of |
315 | | // possibly undefined values. |
316 | 0 | lhs->Traverse(this); |
317 | |
|
318 | 0 | return TC_ABORTSTMT; |
319 | 0 | } |
320 | | |
321 | 0 | case EXPR_COND: |
322 | | // Special hack. We turn off checking for usage issues |
323 | | // inside conditionals. This is because we use them heavily |
324 | | // to deconstruct logical expressions for which the actual |
325 | | // operand access is safe (guaranteed not to access a value |
326 | | // that hasn't been undefined), but the flow analysis has |
327 | | // trouble determining that. |
328 | 0 | ++suppress_usage; |
329 | 0 | e->GetOp1()->Traverse(this); |
330 | 0 | e->GetOp2()->Traverse(this); |
331 | 0 | e->GetOp3()->Traverse(this); |
332 | 0 | --suppress_usage; |
333 | |
|
334 | 0 | return TC_ABORTSTMT; |
335 | | |
336 | 0 | case EXPR_LAMBDA: { |
337 | 0 | auto l = static_cast<const LambdaExpr*>(e); |
338 | 0 | const auto& ids = l->OuterIDs(); |
339 | |
|
340 | 0 | for ( auto& id : ids ) |
341 | 0 | CheckVarUsage(e, id); |
342 | | |
343 | | // Don't descend into the lambda body - we'll analyze and |
344 | | // optimize it separately, as its own function. |
345 | 0 | return TC_ABORTSTMT; |
346 | 0 | } |
347 | | |
348 | 0 | case EXPR_TABLE_CONSTRUCTOR: ++in_table_constructor; break; |
349 | | |
350 | 0 | default: break; |
351 | 0 | } |
352 | | |
353 | 0 | return TC_CONTINUE; |
354 | 0 | } |
355 | | |
356 | 0 | TraversalCode GenIDDefs::PostExpr(const Expr* e) { |
357 | | // Attend to expressions that reflect assignments after |
358 | | // execution, but for which the assignment target was |
359 | | // also an accessed value (so if we analyzed them |
360 | | // in PreExpr then we'd have had to do manual traversals |
361 | | // of their operands). |
362 | |
|
363 | 0 | auto t = e->Tag(); |
364 | 0 | if ( t == EXPR_INCR || t == EXPR_DECR || t == EXPR_ADD_TO || t == EXPR_REMOVE_FROM ) { |
365 | 0 | auto op = e->GetOp1(); |
366 | 0 | if ( ! IsAggr(op) ) |
367 | 0 | (void)CheckLHS(op); |
368 | 0 | } |
369 | 0 | else if ( t == EXPR_TABLE_CONSTRUCTOR ) |
370 | 0 | --in_table_constructor; |
371 | |
|
372 | 0 | return TC_CONTINUE; |
373 | 0 | } |
374 | | |
375 | 0 | bool GenIDDefs::CheckLHS(const ExprPtr& lhs, const ExprPtr& rhs) { |
376 | 0 | switch ( lhs->Tag() ) { |
377 | 0 | case EXPR_REF: return CheckLHS(lhs->GetOp1(), rhs); |
378 | | |
379 | 0 | case EXPR_NAME: { |
380 | 0 | auto n = lhs->AsNameExpr(); |
381 | 0 | TrackID(n->IdPtr(), rhs); |
382 | 0 | return true; |
383 | 0 | } |
384 | | |
385 | 0 | case EXPR_LIST: { // look for [a, b, c] = any_val |
386 | 0 | auto l = lhs->AsListExpr(); |
387 | 0 | for ( const auto& expr : l->Exprs() ) { |
388 | 0 | if ( expr->Tag() != EXPR_NAME ) |
389 | | // This will happen for table initializers, |
390 | | // for example. |
391 | 0 | return false; |
392 | | |
393 | 0 | auto n = expr->AsNameExpr(); |
394 | 0 | TrackID(n->IdPtr()); |
395 | 0 | } |
396 | | |
397 | 0 | return true; |
398 | 0 | } |
399 | | |
400 | | // If we want to track record field initializations, |
401 | | // we'd handle that here. |
402 | 0 | case EXPR_FIELD: |
403 | | |
404 | | // If we wanted to track potential alterations of |
405 | | // aggregates, we'd do that here. |
406 | 0 | case EXPR_INDEX: return false; |
407 | | |
408 | 0 | default: reporter->InternalError("bad tag in GenIDDefs::CheckLHS"); |
409 | 0 | } |
410 | 0 | } |
411 | | |
412 | 0 | bool GenIDDefs::IsAggr(const Expr* e) const { |
413 | 0 | if ( e->Tag() != EXPR_NAME ) |
414 | 0 | return false; |
415 | | |
416 | 0 | auto n = e->AsNameExpr(); |
417 | 0 | auto id = n->Id(); |
418 | 0 | auto tag = id->GetType()->Tag(); |
419 | |
|
420 | 0 | return zeek::IsAggr(tag); |
421 | 0 | } |
422 | | |
423 | 0 | void GenIDDefs::CheckVarUsage(const Expr* e, const IDPtr& id) { |
424 | 0 | if ( analysis_options.usage_issues != 1 || id->IsGlobal() || suppress_usage > 0 ) |
425 | 0 | return; |
426 | | |
427 | 0 | auto oi = id->GetOptInfo(); |
428 | |
|
429 | 0 | if ( ! oi->DidUndefinedWarning() && ! oi->IsDefinedBefore(last_stmt_traversed) && |
430 | 0 | ! id->GetAttr(ATTR_IS_ASSIGNED) ) { |
431 | 0 | if ( ! oi->IsPossiblyDefinedBefore(last_stmt_traversed) ) { |
432 | 0 | e->Warn("used without definition"); |
433 | 0 | oi->SetDidUndefinedWarning(); |
434 | 0 | } |
435 | | |
436 | 0 | else if ( ! oi->DidPossiblyUndefinedWarning() ) { |
437 | 0 | e->Warn("possibly used without definition"); |
438 | 0 | oi->SetDidPossiblyUndefinedWarning(); |
439 | 0 | } |
440 | 0 | } |
441 | 0 | } |
442 | | |
443 | 0 | void GenIDDefs::StartConfluenceBlock(const Stmt* s) { |
444 | 0 | confluence_blocks.push_back(s); |
445 | 0 | modified_IDs.emplace_back(); |
446 | 0 | } |
447 | | |
448 | 0 | void GenIDDefs::EndConfluenceBlock(bool no_orig) { |
449 | 0 | for ( const auto& id : modified_IDs.back() ) |
450 | 0 | id->GetOptInfo()->ConfluenceBlockEndsAfter(last_stmt_traversed, no_orig); |
451 | |
|
452 | 0 | confluence_blocks.pop_back(); |
453 | 0 | modified_IDs.pop_back(); |
454 | 0 | } |
455 | | |
456 | 0 | void GenIDDefs::BranchBackTo(const Stmt* from, const Stmt* to, bool close_all) { |
457 | 0 | for ( const auto& id : modified_IDs.back() ) |
458 | 0 | id->GetOptInfo()->BranchBackTo(from, to, close_all); |
459 | 0 | } |
460 | | |
461 | 0 | void GenIDDefs::BranchBeyond(const Stmt* from, const Stmt* to, bool close_all) { |
462 | 0 | for ( const auto& id : modified_IDs.back() ) |
463 | 0 | id->GetOptInfo()->BranchBeyond(from, to, close_all); |
464 | |
|
465 | 0 | to->GetOptInfo()->contains_branch_beyond = true; |
466 | 0 | } |
467 | | |
468 | 0 | const Stmt* GenIDDefs::FindLoop() { |
469 | 0 | int i = confluence_blocks.size() - 1; |
470 | 0 | while ( i >= 0 ) { |
471 | 0 | auto t = confluence_blocks[i]->Tag(); |
472 | 0 | if ( t == STMT_WHILE || t == STMT_FOR ) |
473 | 0 | break; |
474 | | |
475 | 0 | --i; |
476 | 0 | } |
477 | |
|
478 | 0 | ASSERT(i >= 0); |
479 | | |
480 | 0 | return confluence_blocks[i]; |
481 | 0 | } |
482 | | |
483 | 0 | const Stmt* GenIDDefs::FindBreakTarget() { |
484 | 0 | int i = confluence_blocks.size() - 1; |
485 | 0 | while ( i >= 0 ) { |
486 | 0 | auto cb = confluence_blocks[i]; |
487 | 0 | auto t = cb->Tag(); |
488 | 0 | if ( t == STMT_WHILE || t == STMT_FOR || t == STMT_SWITCH ) |
489 | 0 | return cb; |
490 | | |
491 | 0 | --i; |
492 | 0 | } |
493 | | |
494 | 0 | return nullptr; |
495 | 0 | } |
496 | | |
497 | 0 | void GenIDDefs::ReturnAt(const Stmt* s) { |
498 | | // If we're right at a catch-return then we don't want to make the |
499 | | // identifier as encountering a scope-ending "return" here. By avoiding |
500 | | // that, we're able to do optimization across catch-return blocks. |
501 | 0 | if ( cr_active.empty() || cr_active.back() != confluence_blocks.size() ) |
502 | 0 | for ( const auto& id : modified_IDs.back() ) |
503 | 0 | id->GetOptInfo()->ReturnAt(s); |
504 | 0 | } |
505 | | |
506 | 0 | void GenIDDefs::TrackID(const IDPtr& id, const ExprPtr& e) { |
507 | 0 | auto oi = id->GetOptInfo(); |
508 | | |
509 | | // The 4th argument here is hardwired to 0, meaning "assess across all |
510 | | // confluence blocks". If we want definitions inside catch-return bodies |
511 | | // to not propagate outside those bodies, we'd instead create new |
512 | | // confluence blocks for catch-return statements, and use their identifier |
513 | | // here to set the lowest limit for definitions. For now we leave |
514 | | // DefinedAfter as capable of supporting that distinction in case we |
515 | | // find need to revive it in the future. |
516 | 0 | oi->SetDefinedAfter(last_stmt_traversed, e, confluence_blocks, 0); |
517 | | |
518 | | // Ensure we track this identifier across all relevant |
519 | | // confluence regions. |
520 | 0 | for ( auto i = 0U; i < confluence_blocks.size(); ++i ) |
521 | | // Add one because modified_IDs includes outer non-confluence |
522 | | // block. |
523 | 0 | modified_IDs[i + 1].insert(id); |
524 | |
|
525 | 0 | if ( confluence_blocks.empty() ) |
526 | | // This is a definition at the outermost level. |
527 | 0 | modified_IDs[0].insert(id); |
528 | 0 | } |
529 | | |
530 | | } // namespace zeek::detail |