Coverage Report

Created: 2026-05-12 06:44

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/zeek/src/script_opt/GenIDDefs.cc
Line
Count
Source
1
// See the file "COPYING" in the main distribution directory for copyright.
2
3
#include "zeek/script_opt/GenIDDefs.h"
4
5
#include "zeek/Desc.h"
6
#include "zeek/Expr.h"
7
#include "zeek/Reporter.h"
8
#include "zeek/Scope.h"
9
#include "zeek/script_opt/Expr.h"
10
#include "zeek/script_opt/ScriptOpt.h"
11
#include "zeek/script_opt/StmtOptInfo.h"
12
13
namespace zeek::detail {
14
15
GenIDDefs::GenIDDefs(std::shared_ptr<ProfileFunc> _pf, const FuncPtr& f, ScopePtr scope, StmtPtr body)
16
0
    : pf(std::move(_pf)) {
17
0
    TraverseFunction(f, scope, body);
18
0
}
19
20
0
void GenIDDefs::TraverseFunction(const FuncPtr& f, ScopePtr scope, StmtPtr body) {
21
0
    func_flavor = f->Flavor();
22
23
    // Establish the outermost set of identifiers.
24
0
    modified_IDs.emplace_back();
25
26
0
    for ( const auto& g : pf->Globals() ) {
27
0
        g->GetOptInfo()->Clear();
28
0
        TrackID(g);
29
0
    }
30
31
    // Clear the locals before processing the arguments, since
32
    // they're included among the locals.
33
0
    for ( const auto& l : pf->Locals() )
34
0
        l->GetOptInfo()->Clear();
35
36
0
    const auto& args = scope->OrderedVars();
37
0
    int nparam = f->GetType()->Params()->NumFields();
38
39
0
    for ( const auto& a : args ) {
40
0
        if ( --nparam < 0 )
41
0
            break;
42
43
0
        a->GetOptInfo()->Clear();
44
0
        TrackID(a);
45
0
    }
46
47
0
    for ( const auto& o_id : cast_intrusive<ScriptFunc>(f)->GetOuterIDs() ) {
48
0
        o_id->GetOptInfo()->Clear();
49
0
        TrackID(o_id);
50
0
    }
51
52
0
    stmt_num = 0; // 0 = "before the first statement"
53
54
0
    body->Traverse(this);
55
0
}
56
57
0
TraversalCode GenIDDefs::PreStmt(const Stmt* s) {
58
0
    last_stmt_traversed = s;
59
60
0
    auto si = s->GetOptInfo();
61
0
    si->stmt_num = ++stmt_num;
62
0
    si->block_level = confluence_blocks.size() + 1;
63
64
0
    switch ( s->Tag() ) {
65
0
        case STMT_CATCH_RETURN: {
66
0
            auto cr = s->AsCatchReturnStmt();
67
0
            auto block = cr->Block();
68
69
0
            cr_active.push_back(confluence_blocks.size());
70
71
            // Confluence for the bodies of catch-return's is a bit complex.
72
            // We would like any expressions computed at the outermost level
73
            // of the body to be available for script optimization *outside*
74
            // the catch-return; this in particular is helpful in optimizing
75
            // coalesced event handlers, but has other benefits as well.
76
            //
77
            // However, if one of the outermost statements executes a "return",
78
            // then any outermost expressions computed after it might not
79
            // be available. Put another way, the potentially-returning
80
            // statement starts a confluence region that runs through the end
81
            // of the body.
82
            //
83
            // To deal with this, we start off without a new confluence block,
84
            // but create one upon encountering a statement that could return.
85
86
0
            bool did_confluence = false;
87
88
0
            if ( block->Tag() == STMT_LIST ) {
89
0
                auto prev_stmt = s;
90
0
                auto& stmts = block->AsStmtList()->Stmts();
91
0
                for ( auto& st : stmts ) {
92
0
                    if ( ! did_confluence && st->CouldReturn(false) ) {
93
0
                        StartConfluenceBlock(prev_stmt);
94
0
                        did_confluence = true;
95
0
                    }
96
97
0
                    st->Traverse(this);
98
0
                }
99
0
            }
100
0
            else {
101
                // If there's just a single statement then there are two
102
                // possibilities. If the statement is atomic (no sub-statements)
103
                // then given that it's in reduced form, it's not going to have
104
                // any expressions that we can leverage. OTOH, if it's compound
105
                // (if/for/while/switch) then there's no safe re-using of
106
                // expressions within it since they may-or-may-not wind up
107
                // being computed. So we should always start a new block.
108
0
                StartConfluenceBlock(s);
109
0
                did_confluence = true;
110
0
                block->Traverse(this);
111
0
            }
112
113
0
            if ( did_confluence )
114
0
                EndConfluenceBlock();
115
116
0
            cr_active.pop_back();
117
118
0
            auto retvar = cr->RetVar();
119
0
            if ( retvar )
120
0
                TrackID(retvar->IdPtr());
121
122
0
            return TC_ABORTSTMT;
123
0
        }
124
125
0
        case STMT_IF: {
126
0
            auto i = s->AsIfStmt();
127
0
            auto cond = i->StmtExpr();
128
0
            auto t_branch = i->TrueBranch();
129
0
            auto f_branch = i->FalseBranch();
130
131
0
            cond->Traverse(this);
132
133
0
            StartConfluenceBlock(s);
134
135
0
            t_branch->Traverse(this);
136
0
            if ( ! t_branch->NoFlowAfter(false) )
137
0
                BranchBeyond(last_stmt_traversed, s, true);
138
139
0
            f_branch->Traverse(this);
140
0
            if ( ! f_branch->NoFlowAfter(false) )
141
0
                BranchBeyond(last_stmt_traversed, s, true);
142
143
0
            EndConfluenceBlock(true);
144
145
0
            return TC_ABORTSTMT;
146
0
        }
147
148
0
        case STMT_SWITCH: AnalyzeSwitch(s->AsSwitchStmt()); return TC_ABORTSTMT;
149
150
0
        case STMT_FOR: {
151
0
            auto f = s->AsForStmt();
152
153
0
            auto ids = f->LoopVars();
154
0
            auto e = f->LoopExpr();
155
0
            auto body = f->LoopBody();
156
0
            auto val_var = f->ValueVar();
157
158
0
            e->Traverse(this);
159
160
0
            for ( const auto& id : *ids )
161
0
                TrackID(id);
162
163
0
            if ( val_var )
164
0
                TrackID(val_var);
165
166
0
            StartConfluenceBlock(s);
167
0
            body->Traverse(this);
168
169
0
            if ( ! body->NoFlowAfter(false) )
170
0
                BranchBackTo(last_stmt_traversed, s, true);
171
172
0
            EndConfluenceBlock();
173
174
0
            return TC_ABORTSTMT;
175
0
        }
176
177
0
        case STMT_WHEN: {
178
0
            auto w = s->AsWhenStmt();
179
180
0
            for ( const auto& c : *w->Captures() )
181
0
                CheckVarUsage(with_location_of(make_intrusive<NameExpr>(c.Id()), w).get(), c.Id());
182
183
0
            auto te = w->TimeoutExpr();
184
0
            if ( te )
185
0
                te->Traverse(this);
186
187
0
            return TC_ABORTSTMT;
188
0
        }
189
190
0
        case STMT_WHILE: {
191
0
            auto w = s->AsWhileStmt();
192
193
0
            StartConfluenceBlock(s);
194
195
0
            auto cond_pred_stmt = w->CondPredStmt();
196
0
            if ( cond_pred_stmt )
197
0
                cond_pred_stmt->Traverse(this);
198
199
            // Important to traverse the condition in its version
200
            // interpreted as a statement, so that when evaluating
201
            // its variable usage, that's done in the context of
202
            // *after* cond_pred_stmt executes, rather than as
203
            // part of that execution.
204
0
            auto cond_stmt = w->ConditionAsStmt();
205
0
            cond_stmt->Traverse(this);
206
207
0
            auto body = w->Body();
208
0
            body->Traverse(this);
209
210
0
            if ( ! body->NoFlowAfter(false) )
211
0
                BranchBackTo(last_stmt_traversed, s, true);
212
213
0
            EndConfluenceBlock();
214
215
0
            return TC_ABORTSTMT;
216
0
        }
217
218
0
        default: return TC_CONTINUE;
219
0
    }
220
0
}
221
222
0
void GenIDDefs::AnalyzeSwitch(const SwitchStmt* sw) {
223
0
    sw->StmtExpr()->Traverse(this);
224
225
0
    for ( const auto& c : *sw->Cases() ) {
226
        // Important: the confluence block is the switch statement
227
        // itself, not the case body.  This is needed so that variable
228
        // assignments made inside case bodies that end with
229
        // "fallthrough" are correctly propagated to the next case
230
        // body.
231
0
        StartConfluenceBlock(sw);
232
233
0
        auto body = c->Body();
234
235
0
        auto exprs = c->ExprCases();
236
0
        if ( exprs )
237
0
            exprs->Traverse(this);
238
239
0
        auto type_ids = c->TypeCases();
240
0
        if ( type_ids ) {
241
0
            for ( const auto& id : *type_ids )
242
0
                if ( id->Name() )
243
0
                    TrackID(id);
244
0
        }
245
246
0
        body->Traverse(this);
247
0
        EndConfluenceBlock(false);
248
0
    }
249
0
}
250
251
0
TraversalCode GenIDDefs::PostStmt(const Stmt* s) {
252
0
    switch ( s->Tag() ) {
253
0
        case STMT_INIT: {
254
0
            auto init = s->AsInitStmt();
255
0
            auto& inits = init->Inits();
256
257
0
            for ( const auto& id : inits ) {
258
0
                auto id_t = id->GetType();
259
260
                // Only aggregates get initialized.
261
0
                if ( zeek::IsAggr(id->GetType()->Tag()) )
262
0
                    TrackID(id);
263
0
            }
264
265
0
            break;
266
0
        }
267
268
0
        case STMT_RETURN: ReturnAt(s); break;
269
270
0
        case STMT_NEXT: BranchBackTo(last_stmt_traversed, FindLoop(), false); break;
271
272
0
        case STMT_BREAK: {
273
0
            auto target = FindBreakTarget();
274
275
0
            if ( target )
276
0
                BranchBeyond(s, target, false);
277
278
0
            else {
279
0
                ASSERT(func_flavor == FUNC_FLAVOR_HOOK);
280
0
                ReturnAt(s);
281
0
            }
282
283
0
            break;
284
0
        }
285
286
        // No need to do anything, the work all occurs
287
        // with NoFlowAfter.
288
0
        case STMT_FALLTHROUGH:
289
0
        default: break;
290
0
    }
291
292
0
    return TC_CONTINUE;
293
0
}
294
295
0
TraversalCode GenIDDefs::PreExpr(const Expr* e) {
296
0
    e->GetOptInfo()->stmt_num = stmt_num;
297
298
0
    switch ( e->Tag() ) {
299
0
        case EXPR_NAME: CheckVarUsage(e, e->AsNameExpr()->IdPtr()); break;
300
301
0
        case EXPR_ASSIGN: {
302
0
            auto lhs = e->GetOp1();
303
304
0
            if ( lhs->Tag() == EXPR_LIST && in_table_constructor > 0 )
305
                // This is the index portion of a table constructor, process as
306
                // that rather than as a compound assignment to an "any" value.
307
0
                return TC_CONTINUE;
308
309
0
            auto op2 = e->GetOp2();
310
0
            op2->Traverse(this);
311
312
0
            if ( ! CheckLHS(lhs, op2) )
313
                // Not a simple assignment (or group of assignments),
314
                // so analyze the accesses to check for use of
315
                // possibly undefined values.
316
0
                lhs->Traverse(this);
317
318
0
            return TC_ABORTSTMT;
319
0
        }
320
321
0
        case EXPR_COND:
322
            // Special hack.  We turn off checking for usage issues
323
            // inside conditionals.  This is because we use them heavily
324
            // to deconstruct logical expressions for which the actual
325
            // operand access is safe (guaranteed not to access a value
326
            // that hasn't been undefined), but the flow analysis has
327
            // trouble determining that.
328
0
            ++suppress_usage;
329
0
            e->GetOp1()->Traverse(this);
330
0
            e->GetOp2()->Traverse(this);
331
0
            e->GetOp3()->Traverse(this);
332
0
            --suppress_usage;
333
334
0
            return TC_ABORTSTMT;
335
336
0
        case EXPR_LAMBDA: {
337
0
            auto l = static_cast<const LambdaExpr*>(e);
338
0
            const auto& ids = l->OuterIDs();
339
340
0
            for ( auto& id : ids )
341
0
                CheckVarUsage(e, id);
342
343
            // Don't descend into the lambda body - we'll analyze and
344
            // optimize it separately, as its own function.
345
0
            return TC_ABORTSTMT;
346
0
        }
347
348
0
        case EXPR_TABLE_CONSTRUCTOR: ++in_table_constructor; break;
349
350
0
        default: break;
351
0
    }
352
353
0
    return TC_CONTINUE;
354
0
}
355
356
0
TraversalCode GenIDDefs::PostExpr(const Expr* e) {
357
    // Attend to expressions that reflect assignments after
358
    // execution, but for which the assignment target was
359
    // also an accessed value (so if we analyzed them
360
    // in PreExpr then we'd have had to do manual traversals
361
    // of their operands).
362
363
0
    auto t = e->Tag();
364
0
    if ( t == EXPR_INCR || t == EXPR_DECR || t == EXPR_ADD_TO || t == EXPR_REMOVE_FROM ) {
365
0
        auto op = e->GetOp1();
366
0
        if ( ! IsAggr(op) )
367
0
            (void)CheckLHS(op);
368
0
    }
369
0
    else if ( t == EXPR_TABLE_CONSTRUCTOR )
370
0
        --in_table_constructor;
371
372
0
    return TC_CONTINUE;
373
0
}
374
375
0
bool GenIDDefs::CheckLHS(const ExprPtr& lhs, const ExprPtr& rhs) {
376
0
    switch ( lhs->Tag() ) {
377
0
        case EXPR_REF: return CheckLHS(lhs->GetOp1(), rhs);
378
379
0
        case EXPR_NAME: {
380
0
            auto n = lhs->AsNameExpr();
381
0
            TrackID(n->IdPtr(), rhs);
382
0
            return true;
383
0
        }
384
385
0
        case EXPR_LIST: { // look for [a, b, c] = any_val
386
0
            auto l = lhs->AsListExpr();
387
0
            for ( const auto& expr : l->Exprs() ) {
388
0
                if ( expr->Tag() != EXPR_NAME )
389
                    // This will happen for table initializers,
390
                    // for example.
391
0
                    return false;
392
393
0
                auto n = expr->AsNameExpr();
394
0
                TrackID(n->IdPtr());
395
0
            }
396
397
0
            return true;
398
0
        }
399
400
        // If we want to track record field initializations,
401
        // we'd handle that here.
402
0
        case EXPR_FIELD:
403
404
        // If we wanted to track potential alterations of
405
        // aggregates, we'd do that here.
406
0
        case EXPR_INDEX: return false;
407
408
0
        default: reporter->InternalError("bad tag in GenIDDefs::CheckLHS");
409
0
    }
410
0
}
411
412
0
bool GenIDDefs::IsAggr(const Expr* e) const {
413
0
    if ( e->Tag() != EXPR_NAME )
414
0
        return false;
415
416
0
    auto n = e->AsNameExpr();
417
0
    auto id = n->Id();
418
0
    auto tag = id->GetType()->Tag();
419
420
0
    return zeek::IsAggr(tag);
421
0
}
422
423
0
void GenIDDefs::CheckVarUsage(const Expr* e, const IDPtr& id) {
424
0
    if ( analysis_options.usage_issues != 1 || id->IsGlobal() || suppress_usage > 0 )
425
0
        return;
426
427
0
    auto oi = id->GetOptInfo();
428
429
0
    if ( ! oi->DidUndefinedWarning() && ! oi->IsDefinedBefore(last_stmt_traversed) &&
430
0
         ! id->GetAttr(ATTR_IS_ASSIGNED) ) {
431
0
        if ( ! oi->IsPossiblyDefinedBefore(last_stmt_traversed) ) {
432
0
            e->Warn("used without definition");
433
0
            oi->SetDidUndefinedWarning();
434
0
        }
435
436
0
        else if ( ! oi->DidPossiblyUndefinedWarning() ) {
437
0
            e->Warn("possibly used without definition");
438
0
            oi->SetDidPossiblyUndefinedWarning();
439
0
        }
440
0
    }
441
0
}
442
443
0
void GenIDDefs::StartConfluenceBlock(const Stmt* s) {
444
0
    confluence_blocks.push_back(s);
445
0
    modified_IDs.emplace_back();
446
0
}
447
448
0
void GenIDDefs::EndConfluenceBlock(bool no_orig) {
449
0
    for ( const auto& id : modified_IDs.back() )
450
0
        id->GetOptInfo()->ConfluenceBlockEndsAfter(last_stmt_traversed, no_orig);
451
452
0
    confluence_blocks.pop_back();
453
0
    modified_IDs.pop_back();
454
0
}
455
456
0
void GenIDDefs::BranchBackTo(const Stmt* from, const Stmt* to, bool close_all) {
457
0
    for ( const auto& id : modified_IDs.back() )
458
0
        id->GetOptInfo()->BranchBackTo(from, to, close_all);
459
0
}
460
461
0
void GenIDDefs::BranchBeyond(const Stmt* from, const Stmt* to, bool close_all) {
462
0
    for ( const auto& id : modified_IDs.back() )
463
0
        id->GetOptInfo()->BranchBeyond(from, to, close_all);
464
465
0
    to->GetOptInfo()->contains_branch_beyond = true;
466
0
}
467
468
0
const Stmt* GenIDDefs::FindLoop() {
469
0
    int i = confluence_blocks.size() - 1;
470
0
    while ( i >= 0 ) {
471
0
        auto t = confluence_blocks[i]->Tag();
472
0
        if ( t == STMT_WHILE || t == STMT_FOR )
473
0
            break;
474
475
0
        --i;
476
0
    }
477
478
0
    ASSERT(i >= 0);
479
480
0
    return confluence_blocks[i];
481
0
}
482
483
0
const Stmt* GenIDDefs::FindBreakTarget() {
484
0
    int i = confluence_blocks.size() - 1;
485
0
    while ( i >= 0 ) {
486
0
        auto cb = confluence_blocks[i];
487
0
        auto t = cb->Tag();
488
0
        if ( t == STMT_WHILE || t == STMT_FOR || t == STMT_SWITCH )
489
0
            return cb;
490
491
0
        --i;
492
0
    }
493
494
0
    return nullptr;
495
0
}
496
497
0
void GenIDDefs::ReturnAt(const Stmt* s) {
498
    // If we're right at a catch-return then we don't want to make the
499
    // identifier as encountering a scope-ending "return" here.  By avoiding
500
    // that, we're able to do optimization across catch-return blocks.
501
0
    if ( cr_active.empty() || cr_active.back() != confluence_blocks.size() )
502
0
        for ( const auto& id : modified_IDs.back() )
503
0
            id->GetOptInfo()->ReturnAt(s);
504
0
}
505
506
0
void GenIDDefs::TrackID(const IDPtr& id, const ExprPtr& e) {
507
0
    auto oi = id->GetOptInfo();
508
509
    // The 4th argument here is hardwired to 0, meaning "assess across all
510
    // confluence blocks". If we want definitions inside catch-return bodies
511
    // to not propagate outside those bodies, we'd instead create new
512
    // confluence blocks for catch-return statements, and use their identifier
513
    // here to set the lowest limit for definitions. For now we leave
514
    // DefinedAfter as capable of supporting that distinction in case we
515
    // find need to revive it in the future.
516
0
    oi->SetDefinedAfter(last_stmt_traversed, e, confluence_blocks, 0);
517
518
    // Ensure we track this identifier across all relevant
519
    // confluence regions.
520
0
    for ( auto i = 0U; i < confluence_blocks.size(); ++i )
521
        // Add one because modified_IDs includes outer non-confluence
522
        // block.
523
0
        modified_IDs[i + 1].insert(id);
524
525
0
    if ( confluence_blocks.empty() )
526
        // This is a definition at the outermost level.
527
0
        modified_IDs[0].insert(id);
528
0
}
529
530
} // namespace zeek::detail