1
#include "source/common/config/well_known_names.h"
2

            
3
#include "absl/strings/str_replace.h"
4

            
5
namespace Envoy {
6
namespace Config {
7

            
8
namespace {
9

            
10
const absl::string_view TAG_VALUE_REGEX = R"([^\.]+)";
11

            
12
// To allow for more readable regular expressions to be declared below, and to
13
// reduce duplication, define a few common pattern substitutions for regex
14
// segments.
15
10227
std::string expandRegex(const std::string& regex) {
16
10227
  return absl::StrReplaceAll(
17
10227
      regex, {// Regex to look for either IPv4 or IPv6 addresses plus port number after underscore.
18
10227
              {"<ADDRESS>", R"((?:(?:\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}|\[[a-fA-F_\d]+\])_\d+))"},
19
              // Cipher names can contain alphanumerics with dashes and
20
              // underscores.
21
10227
              {"<CIPHER>", R"([\w-]+)"},
22
              // TLS version strings are always of the form "TLSv1.3".
23
10227
              {"<TLS_VERSION>", R"(TLSv\d\.\d)"},
24
              // A generic name can contain any character except dots.
25
10227
              {"<TAG_VALUE>", TAG_VALUE_REGEX},
26
              // Route names may contain dots and slashes in addition to
27
              // alphanumerics, underscores, and dashes.
28
10227
              {"<ROUTE_CONFIG_NAME>", R"([\w-\./]+)"},
29
              // Scoped Route names are named similarly to route config names.
30
10227
              {"<SCOPED_ROUTE_CONFIG_NAME>", R"([\w-\.]+)"},
31
              // Match a prefix that is either a listener plus name or cluster plus name
32
10227
              {"<LISTENER_OR_CLUSTER_WITH_NAME>", R"((?:listener|cluster)\..*?)"},
33
10227
              {"<PROXY_PROTOCOL_VERSION>", R"(\d)"}});
34
10227
}
35

            
36
7
const Regex::CompiledGoogleReMatcher& validTagValueRegex() {
37
7
  CONSTRUCT_ON_FIRST_USE(Regex::CompiledGoogleReMatcherNoSafetyChecks,
38
7
                         absl::StrCat("^", TAG_VALUE_REGEX, "$"));
39
7
}
40

            
41
} // namespace
42

            
43
7
bool doesTagNameValueMatchInvalidCharRegex(absl::string_view name) {
44
7
  return validTagValueRegex().match(name);
45
7
}
46

            
47
487
TagNameValues::TagNameValues() {
48
  // Note: the default regexes are defined below in the order that they will typically be matched
49
  // (see the TagExtractor class definition for an explanation of the iterative matching process).
50
  // This ordering is roughly from most specific to least specific. Despite the fact that these
51
  // regexes are defined with a particular ordering in mind, users can customize the ordering of the
52
  // processing of the default tag extraction regexes and include custom tags with regexes via the
53
  // bootstrap configuration. Because of this flexibility, these regexes are designed to not
54
  // interfere with one another no matter the ordering. They are tested in forward and reverse
55
  // ordering to ensure they will be safe in most ordering configurations.
56

            
57
  // To give a more user-friendly explanation of the intended behavior of each regex, each is
58
  // preceded by a comment with a simplified notation to explain what the regex is designed to
59
  // match:
60
  // - The text that the regex is intended to capture will be enclosed in ().
61
  // - Other default tags that are expected to exist in the name (and may or may not have been
62
  // removed before this regex has been applied) are enclosed in [].
63
  // - Stand-ins for a variable segment of the name (including inside capture groups) will be
64
  // enclosed in <>.
65
  // - Typical * notation will be used to denote an arbitrary set of characters.
66

            
67
  // *_rq(_<response_code>)
68
487
  addRe2(RESPONSE_CODE, R"(_rq(_(\d{3}))$)", "_rq_");
69

            
70
  // *_rq_(<response_code_class>)xx
71
487
  addRe2(RESPONSE_CODE_CLASS, R"(_rq_((\d))xx$)", "_rq_");
72

            
73
  // http.[<stat_prefix>.]dynamodb.table.[<table_name>.]capacity.[<operation_name>.](__partition_id=<last_seven_characters_from_partition_id>)
74
487
  addRe2(
75
487
      DYNAMO_PARTITION_ID,
76
487
      R"(^http\.<TAG_VALUE>\.dynamodb\.table\.<TAG_VALUE>\.capacity\.<TAG_VALUE>(\.__partition_id=(\w{7}))$)",
77
487
      ".dynamodb.table.");
78

            
79
  // http.[<stat_prefix>.]dynamodb.operation.(<operation_name>.)* or
80
  // http.[<stat_prefix>.]dynamodb.table.[<table_name>.]capacity.(<operation_name>.)[<partition_id>]
81
487
  addRe2(
82
487
      DYNAMO_OPERATION,
83
487
      R"(^http\.<TAG_VALUE>\.dynamodb.(?:operation|table\.<TAG_VALUE>\.capacity)(\.(<TAG_VALUE>))(?:\.|$))",
84
487
      ".dynamodb.");
85

            
86
  // mongo.[<stat_prefix>.]collection.[<collection>.]callsite.(<callsite>.)query.*
87
487
  addTokenized(MONGO_CALLSITE, "mongo.*.collection.*.callsite.$.query.**");
88

            
89
  // http.[<stat_prefix>.]dynamodb.table.(<table_name>.)* or
90
  // http.[<stat_prefix>.]dynamodb.error.(<table_name>.)*
91
487
  addRe2(DYNAMO_TABLE, R"(^http\.<TAG_VALUE>\.dynamodb.(?:table|error)\.((<TAG_VALUE>)\.))",
92
487
         ".dynamodb.");
93

            
94
  // mongo.[<stat_prefix>.]collection.(<collection>.)query.*
95
487
  addTokenized(MONGO_COLLECTION, "mongo.*.collection.$.**.query.*");
96

            
97
  // mongo.[<stat_prefix>.]cmd.(<cmd>.)*
98
487
  addTokenized(MONGO_CMD, "mongo.*.cmd.$.**");
99

            
100
  // cluster.[<route_target_cluster>.]grpc.[<grpc_service>.](<grpc_method>.)*
101
487
  addTokenized(GRPC_BRIDGE_METHOD, "cluster.*.grpc.*.$.**");
102

            
103
  // http.[<stat_prefix>.]user_agent.(<user_agent>.)*
104
487
  addTokenized(HTTP_USER_AGENT, "http.*.user_agent.$.**");
105

            
106
  // vhost.[<virtual host name>.]vcluster.(<virtual_cluster_name>.)*
107
487
  addTokenized(VIRTUAL_CLUSTER, "vhost.*.vcluster.$.**");
108

            
109
  // http.[<stat_prefix>.]fault.(<downstream_cluster>.)*
110
487
  addTokenized(FAULT_DOWNSTREAM_CLUSTER, "http.*.fault.$.**");
111

            
112
  // listener.[<address>.]ssl.ciphers.(<cipher>)
113
487
  addRe2(SSL_CIPHER, R"(^listener\..*?\.ssl\.ciphers(\.(<CIPHER>))$)", ".ssl.ciphers.");
114

            
115
  // Curves and ciphers have the same pattern so they use the same regex.
116
  // listener.[<address>.]ssl.curves.(<curve>)
117
487
  addRe2(SSL_CURVE, R"(^<LISTENER_OR_CLUSTER_WITH_NAME>\.ssl\.curves(\.(<CIPHER>))$)",
118
487
         ".ssl.curves.");
119

            
120
  // Signing algorithms and ciphers have the same pattern so they use the same regex.
121
  // listener.[<address>.]ssl.sigalgs.(<algorithm>)
122
487
  addRe2(SSL_SIGALG, R"(^<LISTENER_OR_CLUSTER_WITH_NAME>\.ssl\.sigalgs(\.(<CIPHER>))$)",
123
487
         ".ssl.sigalgs.");
124

            
125
  // listener.[<address>.]ssl.versions.(<version>)
126
487
  addRe2(SSL_VERSION, R"(^<LISTENER_OR_CLUSTER_WITH_NAME>\.ssl\.versions(\.(<TLS_VERSION>))$)",
127
487
         ".ssl.versions.");
128

            
129
  // cluster.[<cluster_name>.]ssl.ciphers.(<cipher>)
130
487
  addRe2(SSL_CIPHER_SUITE, R"(^cluster\.<TAG_VALUE>\.ssl\.ciphers(\.(<CIPHER>))$)",
131
487
         ".ssl.ciphers.");
132

            
133
  // cluster.[<route_target_cluster>.]grpc.(<grpc_service>.)*
134
487
  addTokenized(GRPC_BRIDGE_SERVICE, "cluster.*.grpc.$.**");
135

            
136
  // tcp.(<stat_prefix>.)*
137
487
  addTokenized(TCP_PREFIX, "tcp.$.**");
138

            
139
  // udp.(<stat_prefix>.)*
140
487
  addTokenized(UDP_PREFIX, "udp.$.**");
141

            
142
  // auth.clientssl.(<stat_prefix>.)*
143
487
  addTokenized(CLIENTSSL_PREFIX, "auth.clientssl.$.**");
144

            
145
  // ratelimit.(<stat_prefix>.)*
146
487
  addTokenized(RATELIMIT_PREFIX, "ratelimit.$.**");
147

            
148
  // cluster.(<cluster_name>.)*
149
487
  addTokenized(CLUSTER_NAME, "cluster.$.**");
150

            
151
  // listener.[<address>.]http.(<stat_prefix>.)*
152
  // The <address> part can be anything here (.*?) for the sake of a simpler
153
  // internal state of the regex which performs better.
154
487
  addRe2(HTTP_CONN_MANAGER_PREFIX, R"(^listener\..*?\.http\.((<TAG_VALUE>)\.))", ".http.");
155

            
156
  // Extract ext_authz stat_prefix field
157
  // cluster.[<cluster>.]ext_authz.[<ext_authz_prefix>.]*
158
487
  addTokenized(EXT_AUTHZ_PREFIX, "cluster.*.ext_authz.$.**");
159
  // http.[<http_conn_mgr_prefix>.]ext_authz.[<ext_authz_prefix>.]*
160
487
  addTokenized(EXT_AUTHZ_PREFIX, "http.*.ext_authz.$.**");
161

            
162
  // http.(<stat_prefix>.)*
163
487
  addTokenized(HTTP_CONN_MANAGER_PREFIX, "http.$.**");
164

            
165
  // listener.<address|stat_prefix>.(worker_<id>.)*
166
  // listener_manager.(worker_<id>.)*
167
  // server.(worker_<id>.)*
168
  // thread_local_cluster_manager.(worker_<id>.)*
169
487
  addRe2(
170
487
      WORKER_ID,
171
487
      R"(^(?:listener\.(?:<ADDRESS>|<TAG_VALUE>)|server|listener_manager|thread_local_cluster_manager)\.worker_((\d+)\.))",
172
487
      "");
173

            
174
  // listener.(<address|stat_prefix>.)*, but specifically excluding "admin"
175
487
  addRe2(LISTENER_ADDRESS, R"(^listener\.((<ADDRESS>|<TAG_VALUE>)\.))", "", "admin");
176

            
177
  // vhost.(<virtual host name>.)*
178
487
  addTokenized(VIRTUAL_HOST, "vhost.$.**");
179

            
180
  // mongo.(<stat_prefix>.)*
181
487
  addTokenized(MONGO_PREFIX, "mongo.$.**");
182

            
183
  // http.[<stat_prefix>.]rds.(<route_config_name>.)<base_stat>
184
  // Note: <route_config_name> can contain dots thus we have to maintain full
185
  // match.
186
487
  addRe2(RDS_ROUTE_CONFIG, R"(^http\.<TAG_VALUE>\.rds\.((<ROUTE_CONFIG_NAME>)\.)\w+?$)", ".rds.");
187

            
188
  // http.[<stat_prefix>.]scoped_rds.(<scoped_route_config_name>.)<base_stat>
189
487
  addRe2(SCOPED_RDS_CONFIG,
190
487
         R"(^http\.<TAG_VALUE>\.scoped_rds\.((<SCOPED_ROUTE_CONFIG_NAME>)\.)\w+?$)",
191
487
         ".scoped_rds.");
192

            
193
  // vhost.[<virtual host name>.]route.(<route_stat_prefix>.)*
194
487
  addTokenized(ROUTE, "vhost.*.route.$.**");
195

            
196
  // thrift.(<stat_prefix>.)*
197
487
  addTokenized(THRIFT_PREFIX, "thrift.$.**");
198

            
199
  // redis.(<stat_prefix>.)*
200
487
  addTokenized(REDIS_PREFIX, "redis.$.**");
201

            
202
  // (<stat_prefix>.).http_local_rate_limit.**
203
487
  addTokenized(LOCAL_HTTP_RATELIMIT_PREFIX, "$.http_local_rate_limit.**");
204

            
205
  // local_rate_limit.(<stat_prefix>.)
206
487
  addTokenized(LOCAL_NETWORK_RATELIMIT_PREFIX, "local_rate_limit.$.**");
207

            
208
  // listener_local_rate_limit.(<stat_prefix>.)
209
487
  addTokenized(LOCAL_LISTENER_RATELIMIT_PREFIX, "listener_local_ratelimit.$.**");
210

            
211
  // dns_filter.(<stat_prefix>.).**
212
487
  addTokenized(DNS_FILTER_PREFIX, "dns_filter.$.**");
213

            
214
  // connection_limit.(<stat_prefix>.)*
215
487
  addTokenized(CONNECTION_LIMIT_PREFIX, "connection_limit.$.**");
216

            
217
  // http.[<stat_prefix>.]rbac.[<optional stat_prefix>]policy.(<policy
218
  // name>.).(allowed|shadow_allowed|denied|shadow_denied)
219
487
  addRe2(
220
487
      RBAC_POLICY_NAME,
221
487
      R"(^http\.<TAG_VALUE>\.rbac\.(?:<TAG_VALUE>\.)?policy\.((<TAG_VALUE>)\.)(allowed|shadow_allowed|denied|shadow_denied)$)");
222

            
223
  // (<stat_prefix>.).rbac.**
224
487
  addTokenized(RBAC_PREFIX, "$.rbac.**");
225

            
226
  // http.<stat_prefix>.rbac.(<rules_stat_prefix>.)* but excluding policy
227
487
  addRe2(RBAC_HTTP_PREFIX, R"(^http\.<TAG_VALUE>\.rbac\.((<TAG_VALUE>)\.).*?)", "", "policy");
228

            
229
  // proxy_proto.(<stat_prefix>.)**
230
487
  addRe2(PROXY_PROTOCOL_PREFIX, R"(^proxy_proto\.((<TAG_VALUE>)\.).+$)", "", "versions");
231

            
232
  // proxy_proto.([<optional stat_prefix>.]versions.v(<version_number>).)(found|disallowed|error)
233
  //
234
  // Strips out:  [<optional stat_prefix>.]versions.v(<version_number>).
235
  // Leaving: proxy_proto.(found|disallowed|error)
236
487
  addRe2(PROXY_PROTOCOL_VERSION,
237
487
         R"(^proxy_proto\.((?:<TAG_VALUE>\.)?versions\.v(<PROXY_PROTOCOL_VERSION>)\.)\w+$)");
238

            
239
  // grpc.(<stat_prefix>).**
240
487
  addTokenized(GOOGLE_GRPC_CLIENT_PREFIX, "grpc.$.**");
241

            
242
  // listener.[<address>.]ssl.certificate.(<cert_name>).<metric_name> or
243
  // cluster.[<cluster_name>.]ssl.certificate.(<cert_name>).<metric_name>
244
487
  addRe2(TLS_CERTIFICATE,
245
487
         R"(^<LISTENER_OR_CLUSTER_WITH_NAME>\.ssl\.certificate\.((<TAG_VALUE>)\.).*$)",
246
487
         ".ssl.certificate");
247

            
248
  // sds.[<resource_name>.]**
249
487
  addRe2(XDS_RESOURCE_NAME, R"(^sds\.((<TAG_VALUE>)\.).+)");
250
487
}
251

            
252
void TagNameValues::addRe2(const std::string& name, const std::string& regex,
253
10227
                           const std::string& substr, const std::string& negative_matching_value) {
254
10227
  descriptor_vec_.emplace_back(
255
10227
      Descriptor{name, expandRegex(regex), substr, negative_matching_value, Regex::Type::Re2});
256
10227
}
257

            
258
13636
void TagNameValues::addTokenized(const std::string& name, const std::string& tokens) {
259
13636
  tokenized_descriptor_vec_.emplace_back(TokenizedDescriptor{name, tokens});
260
13636
}
261

            
262
} // namespace Config
263
} // namespace Envoy