Line data Source code
1 : #include "source/common/config/well_known_names.h"
2 :
3 : #include "absl/strings/str_replace.h"
4 :
5 : namespace Envoy {
6 : namespace Config {
7 :
8 : namespace {
9 :
10 : const absl::string_view TAG_VALUE_REGEX = R"([^\.]+)";
11 :
12 : // To allow for more readable regular expressions to be declared below, and to
13 : // reduce duplication, define a few common pattern substitutions for regex
14 : // segments.
15 182 : std::string expandRegex(const std::string& regex) {
16 182 : return absl::StrReplaceAll(
17 182 : regex, {// Regex to look for either IPv4 or IPv6 addresses plus port number after underscore.
18 182 : {"<ADDRESS>", R"((?:(?:\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}|\[[a-fA-F_\d]+\])_\d+))"},
19 : // Cipher names can contain alphanumerics with dashes and
20 : // underscores.
21 182 : {"<CIPHER>", R"([\w-]+)"},
22 : // TLS version strings are always of the form "TLSv1.3".
23 182 : {"<TLS_VERSION>", R"(TLSv\d\.\d)"},
24 : // A generic name can contain any character except dots.
25 182 : {"<TAG_VALUE>", TAG_VALUE_REGEX},
26 : // Route names may contain dots in addition to alphanumerics and
27 : // dashes with underscores.
28 182 : {"<ROUTE_CONFIG_NAME>", R"([\w-\.]+)"},
29 : // Match a prefix that is either a listener plus name or cluster plus name
30 182 : {"<LISTENER_OR_CLUSTER_WITH_NAME>", R"((?:listener|cluster)\..*?)"}});
31 182 : }
32 :
33 0 : const Regex::CompiledGoogleReMatcher& validTagValueRegex() {
34 0 : CONSTRUCT_ON_FIRST_USE(Regex::CompiledGoogleReMatcher, absl::StrCat("^", TAG_VALUE_REGEX, "$"),
35 0 : false);
36 0 : }
37 :
38 : } // namespace
39 :
40 0 : bool doesTagNameValueMatchInvalidCharRegex(absl::string_view name) {
41 0 : return validTagValueRegex().match(name);
42 0 : }
43 :
44 13 : TagNameValues::TagNameValues() {
45 : // Note: the default regexes are defined below in the order that they will typically be matched
46 : // (see the TagExtractor class definition for an explanation of the iterative matching process).
47 : // This ordering is roughly from most specific to least specific. Despite the fact that these
48 : // regexes are defined with a particular ordering in mind, users can customize the ordering of the
49 : // processing of the default tag extraction regexes and include custom tags with regexes via the
50 : // bootstrap configuration. Because of this flexibility, these regexes are designed to not
51 : // interfere with one another no matter the ordering. They are tested in forward and reverse
52 : // ordering to ensure they will be safe in most ordering configurations.
53 :
54 : // To give a more user-friendly explanation of the intended behavior of each regex, each is
55 : // preceded by a comment with a simplified notation to explain what the regex is designed to
56 : // match:
57 : // - The text that the regex is intended to capture will be enclosed in ().
58 : // - Other default tags that are expected to exist in the name (and may or may not have been
59 : // removed before this regex has been applied) are enclosed in [].
60 : // - Stand-ins for a variable segment of the name (including inside capture groups) will be
61 : // enclosed in <>.
62 : // - Typical * notation will be used to denote an arbitrary set of characters.
63 :
64 : // *_rq(_<response_code>)
65 13 : addRe2(RESPONSE_CODE, R"(_rq(_(\d{3}))$)", "_rq_");
66 :
67 : // *_rq_(<response_code_class>)xx
68 13 : addRe2(RESPONSE_CODE_CLASS, R"(_rq_((\d))xx$)", "_rq_");
69 :
70 : // http.[<stat_prefix>.]dynamodb.table.[<table_name>.]capacity.[<operation_name>.](__partition_id=<last_seven_characters_from_partition_id>)
71 13 : addRe2(
72 13 : DYNAMO_PARTITION_ID,
73 13 : R"(^http\.<TAG_VALUE>\.dynamodb\.table\.<TAG_VALUE>\.capacity\.<TAG_VALUE>(\.__partition_id=(\w{7}))$)",
74 13 : ".dynamodb.table.");
75 :
76 : // http.[<stat_prefix>.]dynamodb.operation.(<operation_name>.)* or
77 : // http.[<stat_prefix>.]dynamodb.table.[<table_name>.]capacity.(<operation_name>.)[<partition_id>]
78 13 : addRe2(
79 13 : DYNAMO_OPERATION,
80 13 : R"(^http\.<TAG_VALUE>\.dynamodb.(?:operation|table\.<TAG_VALUE>\.capacity)(\.(<TAG_VALUE>))(?:\.|$))",
81 13 : ".dynamodb.");
82 :
83 : // mongo.[<stat_prefix>.]collection.[<collection>.]callsite.(<callsite>.)query.*
84 13 : addTokenized(MONGO_CALLSITE, "mongo.*.collection.*.callsite.$.query.**");
85 :
86 : // http.[<stat_prefix>.]dynamodb.table.(<table_name>.)* or
87 : // http.[<stat_prefix>.]dynamodb.error.(<table_name>.)*
88 13 : addRe2(DYNAMO_TABLE, R"(^http\.<TAG_VALUE>\.dynamodb.(?:table|error)\.((<TAG_VALUE>)\.))",
89 13 : ".dynamodb.");
90 :
91 : // mongo.[<stat_prefix>.]collection.(<collection>.)query.*
92 13 : addTokenized(MONGO_COLLECTION, "mongo.*.collection.$.**.query.*");
93 :
94 : // mongo.[<stat_prefix>.]cmd.(<cmd>.)*
95 13 : addTokenized(MONGO_CMD, "mongo.*.cmd.$.**");
96 :
97 : // cluster.[<route_target_cluster>.]grpc.[<grpc_service>.](<grpc_method>.)*
98 13 : addTokenized(GRPC_BRIDGE_METHOD, "cluster.*.grpc.*.$.**");
99 :
100 : // http.[<stat_prefix>.]user_agent.(<user_agent>.)*
101 13 : addTokenized(HTTP_USER_AGENT, "http.*.user_agent.$.**");
102 :
103 : // vhost.[<virtual host name>.]vcluster.(<virtual_cluster_name>.)*
104 13 : addTokenized(VIRTUAL_CLUSTER, "vhost.*.vcluster.$.**");
105 :
106 : // http.[<stat_prefix>.]fault.(<downstream_cluster>.)*
107 13 : addTokenized(FAULT_DOWNSTREAM_CLUSTER, "http.*.fault.$.**");
108 :
109 : // listener.[<address>.]ssl.ciphers.(<cipher>)
110 13 : addRe2(SSL_CIPHER, R"(^listener\..*?\.ssl\.ciphers(\.(<CIPHER>))$)", ".ssl.ciphers.");
111 :
112 : // Curves and ciphers have the same pattern so they use the same regex.
113 : // listener.[<address>.]ssl.curves.(<curve>)
114 13 : addRe2(SSL_CURVE, R"(^<LISTENER_OR_CLUSTER_WITH_NAME>\.ssl\.curves(\.(<CIPHER>))$)",
115 13 : ".ssl.curves.");
116 :
117 : // Signing algorithms and ciphers have the same pattern so they use the same regex.
118 : // listener.[<address>.]ssl.sigalgs.(<algorithm>)
119 13 : addRe2(SSL_SIGALG, R"(^<LISTENER_OR_CLUSTER_WITH_NAME>\.ssl\.sigalgs(\.(<CIPHER>))$)",
120 13 : ".ssl.sigalgs.");
121 :
122 : // listener.[<address>.]ssl.versions.(<version>)
123 13 : addRe2(SSL_VERSION, R"(^<LISTENER_OR_CLUSTER_WITH_NAME>\.ssl\.versions(\.(<TLS_VERSION>))$)",
124 13 : ".ssl.versions.");
125 :
126 : // cluster.[<cluster_name>.]ssl.ciphers.(<cipher>)
127 13 : addRe2(SSL_CIPHER_SUITE, R"(^cluster\.<TAG_VALUE>\.ssl\.ciphers(\.(<CIPHER>))$)",
128 13 : ".ssl.ciphers.");
129 :
130 : // cluster.[<route_target_cluster>.]grpc.(<grpc_service>.)*
131 13 : addTokenized(GRPC_BRIDGE_SERVICE, "cluster.*.grpc.$.**");
132 :
133 : // tcp.(<stat_prefix>.)*
134 13 : addTokenized(TCP_PREFIX, "tcp.$.**");
135 :
136 : // udp.(<stat_prefix>.)*
137 13 : addTokenized(UDP_PREFIX, "udp.$.**");
138 :
139 : // auth.clientssl.(<stat_prefix>.)*
140 13 : addTokenized(CLIENTSSL_PREFIX, "auth.clientssl.$.**");
141 :
142 : // ratelimit.(<stat_prefix>.)*
143 13 : addTokenized(RATELIMIT_PREFIX, "ratelimit.$.**");
144 :
145 : // cluster.(<cluster_name>.)*
146 13 : addTokenized(CLUSTER_NAME, "cluster.$.**");
147 :
148 : // listener.[<address>.]http.(<stat_prefix>.)*
149 : // The <address> part can be anything here (.*?) for the sake of a simpler
150 : // internal state of the regex which performs better.
151 13 : addRe2(HTTP_CONN_MANAGER_PREFIX, R"(^listener\..*?\.http\.((<TAG_VALUE>)\.))", ".http.");
152 :
153 : // Extract ext_authz stat_prefix field
154 : // cluster.[<cluster>.]ext_authz.[<ext_authz_prefix>.]*
155 13 : addTokenized(EXT_AUTHZ_PREFIX, "cluster.*.ext_authz.$.**");
156 : // http.[<http_conn_mgr_prefix>.]ext_authz.[<ext_authz_prefix>.]*
157 13 : addTokenized(EXT_AUTHZ_PREFIX, "http.*.ext_authz.$.**");
158 :
159 : // http.(<stat_prefix>.)*
160 13 : addTokenized(HTTP_CONN_MANAGER_PREFIX, "http.$.**");
161 :
162 : // listener.<address|stat_prefix>.(worker_<id>.)*
163 : // listener_manager.(worker_<id>.)*
164 : // server.(worker_<id>.)*
165 13 : addRe2(
166 13 : WORKER_ID,
167 13 : R"(^(?:listener\.(?:<ADDRESS>|<TAG_VALUE>)\.|server\.|listener_manager\.)worker_((\d+)\.))",
168 13 : "");
169 :
170 : // listener.(<address|stat_prefix>.)*, but specifically excluding "admin"
171 13 : addRe2(LISTENER_ADDRESS, R"(^listener\.((<ADDRESS>|<TAG_VALUE>)\.))", "", "admin");
172 :
173 : // vhost.(<virtual host name>.)*
174 13 : addTokenized(VIRTUAL_HOST, "vhost.$.**");
175 :
176 : // mongo.(<stat_prefix>.)*
177 13 : addTokenized(MONGO_PREFIX, "mongo.$.**");
178 :
179 : // http.[<stat_prefix>.]rds.(<route_config_name>.)<base_stat>
180 : // Note: <route_config_name> can contain dots thus we have to maintain full
181 : // match.
182 13 : addRe2(RDS_ROUTE_CONFIG, R"(^http\.<TAG_VALUE>\.rds\.((<ROUTE_CONFIG_NAME>)\.)\w+?$)", ".rds.");
183 :
184 : // vhost.[<virtual host name>.]route.(<route_stat_prefix>.)*
185 13 : addTokenized(ROUTE, "vhost.*.route.$.**");
186 :
187 : // thrift.(<stat_prefix>.)*
188 13 : addTokenized(THRIFT_PREFIX, "thrift.$.**");
189 :
190 : // redis.(<stat_prefix>.)*
191 13 : addTokenized(REDIS_PREFIX, "redis.$.**");
192 :
193 : // (<stat_prefix>.).http_local_rate_limit.**
194 13 : addTokenized(LOCAL_HTTP_RATELIMIT_PREFIX, "$.http_local_rate_limit.**");
195 :
196 : // local_rate_limit.(<stat_prefix>.)
197 13 : addTokenized(LOCAL_NETWORK_RATELIMIT_PREFIX, "local_rate_limit.$.**");
198 :
199 : // listener_local_rate_limit.(<stat_prefix>.)
200 13 : addTokenized(LOCAL_LISTENER_RATELIMIT_PREFIX, "listener_local_ratelimit.$.**");
201 :
202 : // dns_filter.(<stat_prefix>.).**
203 13 : addTokenized(DNS_FILTER_PREFIX, "dns_filter.$.**");
204 :
205 : // connection_limit.(<stat_prefix>.)*
206 13 : addTokenized(CONNECTION_LIMIT_PREFIX, "connection_limit.$.**");
207 :
208 : // (<stat_prefix>.).rbac.**
209 13 : addTokenized(RBAC_PREFIX, "$.rbac.**");
210 13 : }
211 :
212 : void TagNameValues::addRe2(const std::string& name, const std::string& regex,
213 182 : const std::string& substr, const std::string& negative_matching_value) {
214 182 : descriptor_vec_.emplace_back(
215 182 : Descriptor{name, expandRegex(regex), substr, negative_matching_value, Regex::Type::Re2});
216 182 : }
217 :
218 351 : void TagNameValues::addTokenized(const std::string& name, const std::string& tokens) {
219 351 : tokenized_descriptor_vec_.emplace_back(TokenizedDescriptor{name, tokens});
220 351 : }
221 :
222 : } // namespace Config
223 : } // namespace Envoy
|