/src/ndpi/src/lib/ndpi_domains.c
Line | Count | Source |
1 | | /* |
2 | | * ndpi_domains.c |
3 | | * |
4 | | * Copyright (C) 2011-26 - ntop.org and contributors |
5 | | * |
6 | | * nDPI is free software: you can redistribute it and/or modify |
7 | | * it under the terms of the GNU Lesser General Public License as published by |
8 | | * the Free Software Foundation, either version 3 of the License, or |
9 | | * (at your option) any later version. |
10 | | * |
11 | | * nDPI is distributed in the hope that it will be useful, |
12 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | | * GNU Lesser General Public License for more details. |
15 | | * |
16 | | * You should have received a copy of the GNU Lesser General Public License |
17 | | * along with nDPI. If not, see <http://www.gnu.org/licenses/>. |
18 | | * |
19 | | */ |
20 | | |
21 | | #include "ndpi_config.h" |
22 | | #include "ndpi_api.h" |
23 | | #include "ndpi_includes.h" |
24 | | #include "ndpi_private.h" |
25 | | |
26 | | /* ******************************* */ |
27 | | |
28 | | int ndpi_load_domain_suffixes(struct ndpi_detection_module_struct *ndpi_str, |
29 | 4.49k | char *public_suffix_list_path) { |
30 | 4.49k | char buf[256], *line; |
31 | 4.49k | FILE *fd; |
32 | 4.49k | u_int16_t domain_id = 1; |
33 | | |
34 | 4.49k | if(ndpi_str == NULL || public_suffix_list_path == NULL) |
35 | 918 | return(-1); |
36 | | |
37 | 3.57k | if((fd = fopen(public_suffix_list_path, "r")) == NULL) |
38 | 655 | return(-2); |
39 | | |
40 | 2.92k | if(ndpi_str->public_domain_suffixes != NULL) { |
41 | | /* An existing license was already loaded: free it and start over */ |
42 | 696 | ndpi_hash_free(&ndpi_str->public_domain_suffixes); |
43 | 696 | } |
44 | | |
45 | 2.92k | if(ndpi_hash_init(&ndpi_str->public_domain_suffixes) != 0) { |
46 | 31 | fclose(fd); |
47 | 31 | return(-3); |
48 | 31 | } |
49 | | |
50 | 32.5M | while((line = fgets(buf, sizeof(buf), fd)) != NULL) { |
51 | 32.5M | u_int offset, len; |
52 | | |
53 | | /* Skip private domains */ |
54 | 32.5M | if(strstr(line, "// ===END ICANN DOMAINS===")) |
55 | 2.88k | break; |
56 | | |
57 | | /* Skip empty lines or comments */ |
58 | 32.5M | if((line[0] == '\0') || (line[0] == '/') || (line[0] == '\n') || (line[0] == '\r')) |
59 | 12.4M | continue; |
60 | | |
61 | 20.0M | if((line[0] == '*') && (line[1] == '.') && (line[2] != '\0')) |
62 | 49.1k | offset = 2; |
63 | 19.9M | else |
64 | 19.9M | offset = 0; |
65 | | |
66 | 20.0M | len = strlen(line) - 1; |
67 | 40.0M | while((len > 0) && (line[len] == '\n')) |
68 | 20.0M | line[len--] = '\0'; |
69 | | |
70 | 20.0M | if(ndpi_hash_add_entry(&ndpi_str->public_domain_suffixes, |
71 | 20.0M | &line[offset], strlen(&line[offset]), domain_id, NULL) != 0) { |
72 | | |
73 | 355k | NDPI_LOG_ERR(ndpi_str, "Error while processing domain %s\n", &line[offset]); |
74 | 355k | } else |
75 | 19.6M | domain_id++; |
76 | 20.0M | } |
77 | | |
78 | 2.88k | fclose(fd); |
79 | | |
80 | 2.88k | if(domain_id > 0) |
81 | 2.88k | NDPI_LOG_DBG(ndpi_str, "Loaded %u domains\n", domain_id-1); |
82 | | |
83 | 2.88k | return(0); |
84 | 2.92k | } |
85 | | |
86 | | /* ******************************* */ |
87 | | |
88 | | /* |
89 | | Example |
90 | | - www.ntop.org -> org |
91 | | - www.bbc.co.uk -> co.uk |
92 | | */ |
93 | | |
94 | | const char* ndpi_get_host_domain_suffix(struct ndpi_detection_module_struct *ndpi_str, |
95 | | const char *hostname, |
96 | 2.22M | u_int64_t *domain_id /* out */) { |
97 | 2.22M | char *dot, *prev_dot; |
98 | | |
99 | 2.22M | if(!ndpi_str || !hostname || !domain_id) |
100 | 2.76k | return NULL; |
101 | | |
102 | 2.22M | *domain_id = 0; |
103 | | |
104 | 2.22M | if(ndpi_str->public_domain_suffixes == NULL) |
105 | 1.66M | return(hostname); |
106 | | |
107 | 556k | prev_dot = dot = strrchr(hostname, '.'); |
108 | | |
109 | 1.08M | while(dot != NULL) { |
110 | 4.99M | while((dot != hostname) && (dot[0] != '.')) |
111 | 3.91M | dot--; |
112 | | |
113 | 1.08M | if((dot == hostname) |
114 | 595k | || (ndpi_hash_find_entry(ndpi_str->public_domain_suffixes, |
115 | 595k | &dot[1], strlen(&dot[1]), domain_id) != 0)) { |
116 | | /* Not found: end of search */ |
117 | 556k | return(&prev_dot[1]); |
118 | 556k | } |
119 | | |
120 | 529k | prev_dot = dot; |
121 | 529k | dot--; |
122 | 529k | } |
123 | | |
124 | 296 | return(hostname); |
125 | 556k | } |
126 | | |
127 | | /* ******************************* */ |
128 | | |
129 | | /* |
130 | | Example |
131 | | - www.ntop.org -> ntop.org |
132 | | - www.bbc.co.uk -> bbc.co.uk |
133 | | */ |
134 | | const char* ndpi_get_host_domain(struct ndpi_detection_module_struct *ndpi_str, |
135 | 14.8k | const char *hostname) { |
136 | 14.8k | const char *ret; |
137 | 14.8k | char *dot, *first_dc; |
138 | 14.8k | u_int64_t domain_id, len; |
139 | | |
140 | 14.8k | if(!ndpi_str || !hostname) |
141 | 2.82k | return NULL; |
142 | | |
143 | 11.9k | if(ndpi_str->public_domain_suffixes == NULL) |
144 | 8.75k | return(hostname); |
145 | | |
146 | 3.23k | len = strlen(hostname); |
147 | 3.23k | if(len == 0) |
148 | 25 | return(hostname); |
149 | 3.21k | else |
150 | 3.21k | len--; |
151 | | |
152 | 3.21k | if((isdigit(hostname[len])) || (hostname[len] == ']' /* IPv6 address [...] */ )) |
153 | 362 | return(hostname); |
154 | | |
155 | 2.84k | if((first_dc = strchr(hostname, ':')) != NULL) { |
156 | 169 | char *last_dc = strchr(hostname, ':'); |
157 | | |
158 | 169 | if((last_dc != NULL) && (first_dc != last_dc)) |
159 | 0 | return(hostname); /* Numeric IPv6 address */ |
160 | 169 | } |
161 | | |
162 | 2.84k | ret = ndpi_get_host_domain_suffix(ndpi_str, hostname, &domain_id); |
163 | | |
164 | 2.84k | if((ret == NULL) || (ret == hostname)) |
165 | 296 | return(hostname); |
166 | | |
167 | 2.55k | if(strcmp(ret, "in-addr.arpa") == 0) |
168 | 0 | return(ret); |
169 | | |
170 | 2.55k | dot = ndpi_strrstr(hostname, ret); |
171 | | |
172 | 2.55k | if(dot == NULL || dot == hostname) |
173 | 0 | return(hostname); |
174 | | |
175 | 2.55k | dot--; |
176 | 14.8k | while(dot != hostname) { |
177 | 14.3k | dot--; |
178 | | |
179 | 14.3k | if(dot[0] == '.') |
180 | 2.03k | return(&dot[1]); |
181 | 14.3k | } |
182 | | |
183 | 518 | return(hostname); |
184 | 2.55k | } |