Coverage Report

Created: 2025-12-10 06:39

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/ndpi/src/lib/protocols/http.c
Line
Count
Source
1
/*
2
 * http.c
3
 *
4
 * Copyright (C) 2011-25 - ntop.org
5
 *
6
 * This file is part of nDPI, an open source deep packet inspection
7
 * library based on the OpenDPI and PACE technology by ipoque GmbH
8
 *
9
 * nDPI is free software: you can redistribute it and/or modify
10
 * it under the terms of the GNU Lesser General Public License as published by
11
 * the Free Software Foundation, either version 3 of the License, or
12
 * (at your option) any later version.
13
 *
14
 * nDPI is distributed in the hope that it will be useful,
15
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17
 * GNU Lesser General Public License for more details.
18
 *
19
 * You should have received a copy of the GNU Lesser General Public License
20
 * along with nDPI.  If not, see <http://www.gnu.org/licenses/>.
21
 *
22
 */
23
24
#include <assert.h>
25
#include <errno.h>
26
27
#include "ndpi_protocol_ids.h"
28
29
#define NDPI_CURRENT_PROTO NDPI_PROTOCOL_HTTP
30
31
#include "ndpi_api.h"
32
#include "ndpi_private.h"
33
34
extern void ndpi_search_json(struct ndpi_detection_module_struct *ndpi_struct,
35
                             struct ndpi_flow_struct *flow);
36
extern void ndpi_search_msgpack(struct ndpi_detection_module_struct *ndpi_struct,
37
                                struct ndpi_flow_struct *flow);
38
39
static const char* binary_exec_file_mimes_e[] = { "exe", NULL };
40
static const char* binary_exec_file_mimes_j[] = { "java-vm", NULL };
41
static const char* binary_exec_file_mimes_v[] = { "vnd.ms-cab-compressed", "vnd.microsoft.portable-executable", NULL };
42
static const char* binary_exec_file_mimes_x[] = { "x-msdownload", "x-dosexec", NULL };
43
44
static const char* download_file_mimes_b[] = { "bz", "bz2", NULL };
45
static const char* download_file_mimes_o[] = { "octet-stream", NULL };
46
static const char* download_file_mimes_x[] = { "x-tar", "x-zip", "x-bzip", NULL };
47
48
10.3k
#define ATTACHMENT_LEN    3
49
static const char* binary_exec_file_ext[] = {
50
          "exe",
51
          "msi",
52
          "cab",
53
          NULL
54
};
55
56
static void ndpi_search_http_tcp(struct ndpi_detection_module_struct *ndpi_struct,
57
         struct ndpi_flow_struct *flow);
58
static void ndpi_check_http_header(struct ndpi_detection_module_struct *ndpi_struct,
59
           struct ndpi_flow_struct *flow);
60
61
/* *********************************************** */
62
63
5.45k
static char* forge_attempt_msg(struct ndpi_flow_struct *flow, char *msg, char *buf, u_int buf_len) {
64
5.45k
  if((flow->http.response_status_code >= 200) && (flow->http.response_status_code < 300))
65
4.25k
    return(msg);
66
1.20k
  else {
67
1.20k
    snprintf(buf, buf_len, "%s (attempt)", msg);
68
1.20k
    return(buf);
69
1.20k
  }
70
5.45k
}
71
72
/* *********************************************** */
73
74
static void ndpi_set_binary_data_transfer(struct ndpi_detection_module_struct *ndpi_struct, struct ndpi_flow_struct *flow,
75
4.30k
            char *msg) {
76
4.30k
  char buf[256];
77
78
4.30k
  ndpi_set_risk(ndpi_struct, flow, NDPI_BINARY_DATA_TRANSFER,
79
4.30k
    forge_attempt_msg(flow, msg, buf, sizeof(buf)));
80
4.30k
}
81
82
/* *********************************************** */
83
84
static void ndpi_set_binary_application_transfer(struct ndpi_detection_module_struct *ndpi_struct,
85
             struct ndpi_flow_struct *flow,
86
1.26k
             char *msg) {
87
  /*
88
    Check known exceptions
89
    https://learn.microsoft.com/en-us/windows/privacy/windows-endpoints-1909-non-enterprise-editions
90
  */
91
1.26k
  if(ends_with(ndpi_struct, (char*)flow->host_server_name, ".windowsupdate.com")
92
1.24k
     || ends_with(ndpi_struct, (char*)flow->host_server_name, ".microsoft.com")
93
1.22k
     || ends_with(ndpi_struct, (char*)flow->host_server_name, ".office365.com")
94
1.18k
     || ends_with(ndpi_struct, (char*)flow->host_server_name, ".windows.com")
95
1.26k
     )
96
108
    ;
97
1.15k
  else {
98
1.15k
    char buf[256];
99
100
1.15k
    ndpi_set_risk(ndpi_struct, flow, NDPI_BINARY_APPLICATION_TRANSFER, forge_attempt_msg(flow, msg, buf, sizeof(buf)));
101
1.15k
  }
102
1.26k
 }
103
104
/* *********************************************** */
105
106
static void ndpi_analyze_content_signature(struct ndpi_detection_module_struct *ndpi_struct,
107
10.2k
             struct ndpi_flow_struct *flow) {
108
10.2k
  u_int8_t set_risk = 0;
109
10.2k
  const char *msg = NULL;
110
111
  /*
112
    NOTE: see also (ndpi_main.c)
113
    - ndpi_search_elf
114
    - ndpi_search_portable_executable
115
    - ndpi_search_shellscript
116
  */
117
118
10.2k
  if((flow->initial_binary_bytes_len >= 2) && (flow->initial_binary_bytes[0] == 0x4D) && (flow->initial_binary_bytes[1] == 0x5A))
119
178
    set_risk = 1, msg = "Found DOS/Windows Exe"; /* Win executable */
120
10.0k
  else if((flow->initial_binary_bytes_len >= 4) && (flow->initial_binary_bytes[0] == 0x7F) && (flow->initial_binary_bytes[1] == 'E')
121
45
    && (flow->initial_binary_bytes[2] == 'L') && (flow->initial_binary_bytes[3] == 'F'))
122
24
    set_risk = 1, msg = "Found Linux Exe"; /* Linux ELF executable */
123
10.0k
  else if((flow->initial_binary_bytes_len >= 4) && (flow->initial_binary_bytes[0] == 0xCF) && (flow->initial_binary_bytes[1] == 0xFA)
124
46
    && (flow->initial_binary_bytes[2] == 0xED) && (flow->initial_binary_bytes[3] == 0xFE))
125
9
    set_risk = 1, msg = "Found Linux Exe"; /* Linux executable */
126
10.0k
  else if((flow->initial_binary_bytes_len >= 3)
127
9.80k
    && (flow->initial_binary_bytes[0] == '#')
128
72
    && (flow->initial_binary_bytes[1] == '!')
129
25
    && (flow->initial_binary_bytes[2] == '/'))
130
10
    set_risk = 1, msg = "Found Unix Script"; /* Unix script (e.g. #!/bin/sh) */
131
10.0k
  else if(flow->initial_binary_bytes_len >= 8) {
132
9.49k
    u_int8_t exec_pattern[] = { 0x64, 0x65, 0x78, 0x0A, 0x30, 0x33, 0x35, 0x00 };
133
134
9.49k
    if(memcmp(flow->initial_binary_bytes, exec_pattern, 8) == 0)
135
82
      set_risk = 1, msg = "Found Android Exe"; /* Dalvik Executable (Android) */
136
9.49k
  }
137
138
10.2k
  if(set_risk)
139
303
    ndpi_set_binary_application_transfer(ndpi_struct, flow, (char*)msg);
140
10.2k
}
141
142
/* *********************************************** */
143
144
static int ndpi_search_http_tcp_again(struct ndpi_detection_module_struct *ndpi_struct,
145
445k
              struct ndpi_flow_struct *flow) {
146
445k
  struct ndpi_packet_struct *packet = &ndpi_struct->packet;
147
445k
  if(packet->payload_packet_len == 0 || packet->tcp_retransmission)
148
194k
    return 1;
149
150
250k
  ndpi_search_http_tcp(ndpi_struct, flow);
151
152
#ifdef HTTP_DEBUG
153
  printf("=> %s()\n", __FUNCTION__);
154
#endif
155
156
250k
  if(flow->extra_packets_func == NULL) {
157
    /* HTTP stuff completed */
158
159
    /* Loook for TLS over websocket */
160
19.3k
    if((ndpi_struct->cfg.tls_heuristics & NDPI_HEURISTICS_TLS_OBFUSCATED_HTTP) && /* Feature enabled */
161
17.2k
       (flow->host_server_name[0] != '\0' &&
162
13.9k
        flow->http.response_status_code != 0) && /* Bidirectional HTTP traffic */
163
9.63k
       flow->http.websocket) {
164
165
244
      switch_extra_dissection_to_tls_obfuscated_heur(ndpi_struct, flow);
166
244
      return(1);
167
244
    }
168
169
19.1k
    return(0); /* We are good now */
170
19.3k
  }
171
172
231k
  if (flow->detected_protocol_stack[1] == NDPI_PROTOCOL_UNKNOWN) {
173
215k
    ndpi_search_json(ndpi_struct, flow);
174
215k
  }
175
231k
  if (flow->detected_protocol_stack[1] == NDPI_PROTOCOL_UNKNOWN) {
176
215k
    ndpi_search_msgpack(ndpi_struct, flow);
177
215k
  }
178
179
  /* Possibly more processing */
180
231k
  return(1);
181
250k
}
182
183
/* *********************************************** */
184
185
42.2k
static int ndpi_http_is_print(char c) {
186
42.2k
  if(ndpi_isprint(c) || (c == '\t') || (c == '\r') || (c == '\n'))
187
36.2k
    return(1);
188
6.04k
  else
189
6.04k
    return(0);
190
42.2k
}
191
192
/* *********************************************** */
193
194
static void ndpi_http_check_human_redeable_content(struct ndpi_detection_module_struct *ndpi_struct,
195
               struct ndpi_flow_struct *flow,
196
14.1k
               const u_int8_t *content, u_int16_t content_len) {
197
14.1k
  if(content_len >= 4) {
198
14.1k
    NDPI_LOG_DBG(ndpi_struct, " [len: %u] [%02X %02X %02X %02X][%c%c%c%c]", content_len,
199
14.1k
     content[0], content[1], content[2], content[3],
200
14.1k
     content[0], content[1], content[2], content[3]
201
14.1k
     );
202
203
14.1k
    if(ndpi_http_is_print(content[0]) && ndpi_http_is_print(content[1])
204
9.44k
       && ndpi_http_is_print(content[2]) && ndpi_http_is_print(content[3])) {
205
      /* OK */
206
8.10k
    } else {
207
      /* Looks bad: last resort check if it's gzipped [1F 8B 08 00] */
208
209
6.04k
      if((content[0] == 0x1F)
210
3.63k
   && (content[1] == 0x8B)
211
3.44k
   && (content[2] == 0x08)
212
3.31k
   && (content[3] == 0x00)) {
213
  /* Looks like compressed data */
214
3.16k
      } else {
215
2.87k
        if(is_flowrisk_info_enabled(ndpi_struct, NDPI_HTTP_SUSPICIOUS_CONTENT)) {
216
2.58k
    char str[32];
217
218
2.58k
    snprintf(str, sizeof(str), "Susp content %02X%02X%02X%02X",
219
2.58k
       content[0], content[1], content[2], content[3]);
220
2.58k
    ndpi_set_risk(ndpi_struct, flow, NDPI_HTTP_SUSPICIOUS_CONTENT, str);
221
2.58k
        } else {
222
286
          ndpi_set_risk(ndpi_struct, flow, NDPI_HTTP_SUSPICIOUS_CONTENT, NULL);
223
286
        }
224
2.87k
      }
225
6.04k
    }
226
14.1k
  }
227
14.1k
}
228
229
/* *********************************************** */
230
231
static void ndpi_validate_http_content(struct ndpi_detection_module_struct *ndpi_struct,
232
62.2k
               struct ndpi_flow_struct *flow) {
233
62.2k
  struct ndpi_packet_struct *packet = &ndpi_struct->packet;
234
62.2k
  const u_int8_t *double_ret = (const u_int8_t *)ndpi_strnstr((const char *)packet->payload, "\r\n\r\n", packet->payload_packet_len);
235
236
62.2k
  NDPI_LOG_DBG(ndpi_struct, "==>>> [len: %u] ", packet->payload_packet_len);
237
62.2k
  NDPI_LOG_DBG(ndpi_struct, "->> %.*s\n", packet->content_line.len, (const char *)packet->content_line.ptr);
238
239
62.2k
  if(double_ret) {
240
38.1k
    u_int len;
241
242
38.1k
    len = packet->payload_packet_len - (double_ret - packet->payload);
243
244
38.1k
    if(flow->http.is_form
245
37.7k
       || ndpi_strnstr((const char *)packet->content_line.ptr, "text/", packet->content_line.len)
246
23.5k
       || ndpi_strnstr((const char *)packet->content_line.ptr, "/json", packet->content_line.len)
247
38.1k
       ) {
248
      /* This is supposed to be a human-readeable text file */
249
15.9k
      packet->http_check_content = 1;
250
251
15.9k
      if(len >= 8 /* 4 chars for \r\n\r\n and at least 4 charts for content guess */) {
252
14.1k
  double_ret += 4;
253
14.1k
  len -= 4;
254
255
14.1k
  ndpi_http_check_human_redeable_content(ndpi_struct, flow, double_ret, len);
256
14.1k
  if (flow->skip_entropy_check == 0) {
257
14.1k
    flow->entropy = ndpi_entropy(double_ret, len);
258
14.1k
  }
259
14.1k
      }
260
15.9k
    }
261
262
    /* Final checks */
263
264
38.1k
    if(ndpi_isset_risk(flow, NDPI_BINARY_APPLICATION_TRANSFER)
265
526
       && flow->http.user_agent && flow->http.content_type) {
266
227
      if(((strncmp((const char *)flow->http.user_agent, "Java/", 5) == 0))
267
105
   &&
268
105
   ((strcmp((const char *)flow->http.content_type, "application/java-vm") == 0))
269
227
   ) {
270
  /*
271
    Java downloads Java: Log4J:
272
    https://corelight.com/blog/detecting-log4j-exploits-via-zeek-when-java-downloads-java
273
  */
274
275
51
  ndpi_set_risk(ndpi_struct, flow, NDPI_POSSIBLE_EXPLOIT, "Suspicious Log4J");
276
51
      }
277
227
    }
278
279
38.1k
    NDPI_LOG_DBG(ndpi_struct, "\n");
280
38.1k
  }
281
282
62.2k
  if((flow->http.user_agent == NULL) || (flow->http.user_agent[0] == '\0'))
283
50.3k
    ndpi_set_risk(ndpi_struct, flow, NDPI_HTTP_SUSPICIOUS_USER_AGENT, "Empty or missing User-Agent");
284
62.2k
}
285
286
/* *********************************************** */
287
288
static void update_category_and_breed(struct ndpi_detection_module_struct *ndpi_struct,
289
9.23k
                                      struct ndpi_flow_struct *flow) {
290
9.23k
  ndpi_master_app_protocol proto;
291
9.23k
  proto.master_protocol = flow->detected_protocol_stack[1];
292
9.23k
  proto.app_protocol = flow->detected_protocol_stack[0];
293
9.23k
  flow->category = get_proto_category(ndpi_struct, proto);
294
9.23k
  flow->breed = get_proto_breed(ndpi_struct, proto);
295
9.23k
}
296
297
/* *********************************************** */
298
299
/* https://www.freeformatter.com/mime-types-list.html */
300
static ndpi_protocol_category_t ndpi_http_check_content(struct ndpi_detection_module_struct *ndpi_struct,
301
41.3k
              struct ndpi_flow_struct *flow) {
302
41.3k
  struct ndpi_packet_struct *packet = &ndpi_struct->packet;
303
304
41.3k
  if(packet->content_line.len > 0) {
305
41.3k
    u_int app_len = sizeof("application");
306
307
41.3k
    if(packet->content_line.len > app_len) {
308
16.4k
      const char *app     = (const char *)&packet->content_line.ptr[app_len];
309
16.4k
      u_int app_len_avail = packet->content_line.len-app_len;
310
311
16.4k
      if(strncasecmp(app, "mpeg", app_len_avail) == 0) {
312
91
  flow->category = NDPI_PROTOCOL_CATEGORY_STREAMING;
313
91
  return(flow->category);
314
16.4k
      } else if(flow->detected_protocol_stack[0] != NDPI_PROTOCOL_WINDOWS_UPDATE) {
315
16.3k
  if(app_len_avail > 3) {
316
13.7k
    const char** cmp_mimes = NULL;
317
13.7k
    bool found = false;
318
319
13.7k
    switch(app[0]) {
320
120
    case 'b': cmp_mimes = download_file_mimes_b; break;
321
4.62k
    case 'o': cmp_mimes = download_file_mimes_o; break;
322
2.20k
    case 'x': cmp_mimes = download_file_mimes_x; break;
323
13.7k
    }
324
325
13.7k
    if(cmp_mimes != NULL) {
326
6.94k
      u_int8_t i;
327
328
15.5k
      for(i = 0; cmp_mimes[i] != NULL; i++) {
329
11.4k
        if(strncasecmp(app, cmp_mimes[i], app_len_avail) == 0) {
330
2.88k
    char str[64];
331
332
2.88k
    flow->category = NDPI_PROTOCOL_CATEGORY_DOWNLOAD_FT;
333
2.88k
    NDPI_LOG_INFO(ndpi_struct, "found HTTP file transfer\n");
334
335
2.88k
    snprintf(str, sizeof(str), "Found binary mime %s", cmp_mimes[i]);
336
2.88k
    ndpi_set_binary_data_transfer(ndpi_struct, flow, str);
337
2.88k
    found = true;
338
2.88k
    break;
339
2.88k
        }
340
11.4k
      }
341
6.94k
    }
342
343
    /* ***************************************** */
344
345
13.7k
    if(!found) {
346
10.8k
      switch(app[0]) {
347
187
      case 'e': cmp_mimes = binary_exec_file_mimes_e; break;
348
2.74k
      case 'j': cmp_mimes = binary_exec_file_mimes_j; break;
349
651
      case 'v': cmp_mimes = binary_exec_file_mimes_v; break;
350
2.20k
      case 'x': cmp_mimes = binary_exec_file_mimes_x; break;
351
10.8k
      }
352
353
10.8k
      if(cmp_mimes != NULL) {
354
7.64k
        u_int8_t i;
355
356
18.2k
        for(i = 0; cmp_mimes[i] != NULL; i++) {
357
10.6k
    if(strncasecmp(app, cmp_mimes[i], app_len_avail) == 0) {
358
576
      char str[64];
359
360
576
      snprintf(str, sizeof(str), "Found mime exe %s", cmp_mimes[i]);
361
576
      flow->category = NDPI_PROTOCOL_CATEGORY_DOWNLOAD_FT;
362
576
      ndpi_set_binary_application_transfer(ndpi_struct, flow, str);
363
576
      NDPI_LOG_INFO(ndpi_struct, "Found executable HTTP transfer");
364
576
    }
365
10.6k
        }
366
7.64k
      }
367
10.8k
    }
368
13.7k
  }
369
16.3k
      }
370
16.4k
    }
371
372
    /* check for attachment */
373
41.2k
    if(packet->content_disposition_line.len > 0) {
374
2.95k
      u_int8_t attachment_len = sizeof("attachment; filename");
375
376
2.95k
      if(packet->content_disposition_line.len > attachment_len &&
377
2.79k
         strncmp((char *)packet->content_disposition_line.ptr, "attachment; filename", 20) == 0) {
378
2.00k
  u_int8_t filename_len = packet->content_disposition_line.len - attachment_len;
379
2.00k
  int i;
380
381
2.00k
  if(packet->content_disposition_line.ptr[attachment_len] == '\"') {
382
924
    if(packet->content_disposition_line.ptr[packet->content_disposition_line.len-1] != '\"') {
383
      //case: filename="file_name
384
232
      if(filename_len >= 2) {
385
144
        flow->http.filename = ndpi_malloc(filename_len);
386
144
        if(flow->http.filename != NULL) {
387
140
          strncpy(flow->http.filename, (char*)packet->content_disposition_line.ptr+attachment_len+1, filename_len-1);
388
140
          flow->http.filename[filename_len-1] = '\0';
389
140
        }
390
144
      }
391
232
    }
392
692
    else if(filename_len >= 2) {
393
      //case: filename="file_name"
394
620
      flow->http.filename = ndpi_malloc(filename_len-1);
395
396
620
      if(flow->http.filename != NULL) {
397
612
        strncpy(flow->http.filename, (char*)packet->content_disposition_line.ptr+attachment_len+1,
398
612
          filename_len-2);
399
612
        flow->http.filename[filename_len-2] = '\0';
400
612
      }
401
620
    }
402
1.08k
  } else {
403
    //case: filename=file_name
404
1.08k
    flow->http.filename = ndpi_malloc(filename_len+1);
405
406
1.08k
    if(flow->http.filename != NULL) {
407
1.06k
      strncpy(flow->http.filename, (char*)packet->content_disposition_line.ptr+attachment_len, filename_len);
408
1.06k
      flow->http.filename[filename_len] = '\0';
409
1.06k
    }
410
1.08k
  }
411
412
2.00k
  if(filename_len > ATTACHMENT_LEN) {
413
1.79k
    attachment_len += filename_len-ATTACHMENT_LEN-1;
414
415
1.79k
    if((attachment_len+ATTACHMENT_LEN) <= packet->content_disposition_line.len) {
416
1.79k
      char str[64];
417
418
6.20k
      for(i = 0; binary_exec_file_ext[i] != NULL; i++) {
419
        /* Use memcmp in case content-disposition contains binary data */
420
4.79k
        if(memcmp(&packet->content_disposition_line.ptr[attachment_len],
421
4.79k
      binary_exec_file_ext[i], ATTACHMENT_LEN) == 0) {
422
423
381
    snprintf(str, sizeof(str), "Found file extn %s", binary_exec_file_ext[i]);
424
381
    flow->category = NDPI_PROTOCOL_CATEGORY_DOWNLOAD_FT;
425
381
    ndpi_set_binary_application_transfer(ndpi_struct, flow, str);
426
381
    NDPI_LOG_INFO(ndpi_struct, "found executable HTTP transfer");
427
381
    return(flow->category);
428
381
        }
429
4.79k
      }
430
431
      /* No executable but just data transfer */
432
1.41k
      snprintf(str, sizeof(str), "File download %s",
433
1.41k
         flow->http.filename ? flow->http.filename : "");
434
1.41k
      ndpi_set_binary_data_transfer(ndpi_struct, flow, str);
435
1.41k
    }
436
1.79k
  }
437
2.00k
      }
438
2.95k
    }
439
440
40.8k
    switch(packet->content_line.ptr[0]) {
441
11.9k
    case 'a':
442
11.9k
      if(strncasecmp((const char *)packet->content_line.ptr, "audio",
443
11.9k
         ndpi_min(packet->content_line.len, 5)) == 0)
444
292
  flow->category = NDPI_PROTOCOL_CATEGORY_MEDIA;
445
11.9k
      break;
446
447
923
    case 'v':
448
923
      if(strncasecmp((const char *)packet->content_line.ptr, "video",
449
923
         ndpi_min(packet->content_line.len, 5)) == 0)
450
448
  flow->category = NDPI_PROTOCOL_CATEGORY_MEDIA;
451
923
      break;
452
40.8k
    }
453
40.8k
  }
454
455
40.8k
  return(flow->category);
456
41.3k
}
457
458
/* *********************************************** */
459
460
static void ndpi_int_http_add_connection(struct ndpi_detection_module_struct *ndpi_struct,
461
           struct ndpi_flow_struct *flow,
462
770k
           u_int16_t master_protocol) {
463
#ifdef HTTP_DEBUG
464
  printf("=> %s()\n", __FUNCTION__);
465
#endif
466
467
  /* Update the classification only if we don't already have master + app;
468
     for example don't change the protocols if we have already detected a
469
     sub-protocol via the (content-matched) subprotocols logic (i.e.
470
     MPEGDASH, SOAP, ....) */
471
770k
  if(flow->detected_protocol_stack[1] == NDPI_PROTOCOL_UNKNOWN) {
472
761k
    NDPI_LOG_DBG2(ndpi_struct, "Master: %d\n", master_protocol);
473
761k
    if(flow->detected_protocol_stack[0] != master_protocol) {
474
575k
      NDPI_LOG_DBG2(ndpi_struct, "Previous master was different\n");
475
575k
      proto_stack_reset(&flow->protocol_stack);
476
575k
    }
477
761k
    ndpi_set_detected_protocol(ndpi_struct, flow, NDPI_PROTOCOL_UNKNOWN,
478
761k
             master_protocol, NDPI_CONFIDENCE_DPI);
479
761k
  }
480
481
770k
  flow->max_extra_packets_to_check = 8;
482
770k
  flow->extra_packets_func = ndpi_search_http_tcp_again;
483
770k
}
484
485
/* ************************************************************* */
486
487
393k
static void setHttpUserAgent(struct ndpi_flow_struct *flow, char *ua) {
488
393k
  if(    !strcmp(ua, "Windows NT 5.0"))  ua = "Windows 2000";
489
392k
  else if(!strcmp(ua, "Windows NT 5.1"))  ua = "Windows XP";
490
392k
  else if(!strcmp(ua, "Windows NT 5.2"))  ua = "Windows Server 2003";
491
392k
  else if(!strcmp(ua, "Windows NT 6.0"))  ua = "Windows Vista";
492
392k
  else if(!strcmp(ua, "Windows NT 6.1"))  ua = "Windows 7";
493
382k
  else if(!strcmp(ua, "Windows NT 6.2"))  ua = "Windows 8";
494
382k
  else if(!strcmp(ua, "Windows NT 6.3"))  ua = "Windows 8.1";
495
381k
  else if(!strcmp(ua, "Windows NT 10.0")) ua = "Windows 10";
496
380k
  else if(!strcmp(ua, "Windows NT 11.0")) ua = "Windows 11";
497
498
  /* Good reference for future implementations:
499
   * https://github.com/ua-parser/uap-core/blob/master/regexes.yaml */
500
501
393k
  if(flow->http.detected_os == NULL)
502
390k
    flow->http.detected_os = ndpi_strdup(ua);
503
393k
}
504
505
/* ************************************************************* */
506
507
static void ndpi_http_parse_subprotocol(struct ndpi_detection_module_struct *ndpi_struct,
508
          struct ndpi_flow_struct *flow,
509
817k
          int hostname_just_set) {
510
817k
  u_int16_t master_protocol;
511
817k
  struct ndpi_packet_struct *packet = &ndpi_struct->packet;
512
513
817k
  if(!ndpi_struct->cfg.http_subclassification_enabled) {
514
143
    NDPI_LOG_DBG2(ndpi_struct, "Skip sub-protocol check because subclassification is disabled\n");
515
143
    return;
516
143
  }
517
518
817k
  master_protocol = NDPI_PROTOCOL_HTTP;
519
817k
  if(flow->detected_protocol_stack[1] != NDPI_PROTOCOL_UNKNOWN)
520
19.1k
    master_protocol = flow->detected_protocol_stack[1];
521
798k
  else if(flow->detected_protocol_stack[0] == NDPI_PROTOCOL_HTTP_CONNECT ||
522
797k
          flow->detected_protocol_stack[0] == NDPI_PROTOCOL_HTTP_PROXY)
523
3.21k
    master_protocol = flow->detected_protocol_stack[0];
524
525
817k
  if(packet->server_line.len > 7 &&
526
34.1k
     strncmp((const char *)packet->server_line.ptr, "ntopng ", 7) == 0) {
527
1.14k
    ndpi_set_detected_protocol(ndpi_struct, flow, NDPI_PROTOCOL_NTOP, master_protocol, NDPI_CONFIDENCE_DPI);
528
1.14k
    update_category_and_breed(ndpi_struct, flow);
529
1.14k
    ndpi_unset_risk(ndpi_struct, flow, NDPI_KNOWN_PROTOCOL_ON_NON_STANDARD_PORT);
530
1.14k
  }
531
532
  /* Matching on Content-Type.
533
      OCSP:  application/ocsp-request, application/ocsp-response
534
  */
535
  /* We overwrite any previous sub-classification (example: via hostname) */
536
817k
  if(packet->content_line.len > 17 &&
537
37.9k
     strncmp((const char *)packet->content_line.ptr, "application/ocsp-", 17) == 0) {
538
934
    NDPI_LOG_DBG2(ndpi_struct, "Found OCSP\n");
539
934
    ndpi_set_detected_protocol(ndpi_struct, flow, NDPI_PROTOCOL_OCSP, master_protocol, NDPI_CONFIDENCE_DPI);
540
934
    update_category_and_breed(ndpi_struct, flow);
541
934
  }
542
543
  /* HTTP Live Streaming */
544
817k
  if (packet->content_line.len > 28 &&
545
25.8k
      (strncmp((const char *)packet->content_line.ptr, "application/vnd.apple.mpegurl", 29) == 0 ||
546
25.5k
      strncmp((const char *)packet->content_line.ptr, "application/x-mpegURL", 21) == 0 ||
547
25.3k
      strncmp((const char *)packet->content_line.ptr, "application/x-mpegurl", 21) == 0)) {
548
641
    NDPI_LOG_DBG2(ndpi_struct, "Found HLS\n");
549
641
    ndpi_set_detected_protocol(ndpi_struct, flow, NDPI_PROTOCOL_HLS, master_protocol, NDPI_CONFIDENCE_DPI);
550
641
    update_category_and_breed(ndpi_struct, flow);
551
641
  }
552
553
817k
  if((flow->http.method == NDPI_HTTP_METHOD_RPC_CONNECT) ||
554
817k
     (flow->http.method == NDPI_HTTP_METHOD_RPC_IN_DATA) ||
555
816k
     (flow->http.method == NDPI_HTTP_METHOD_RPC_OUT_DATA)) {
556
570
    ndpi_set_detected_protocol(ndpi_struct, flow, NDPI_PROTOCOL_MS_RPCH, master_protocol, NDPI_CONFIDENCE_DPI);
557
570
    update_category_and_breed(ndpi_struct, flow);
558
570
  }
559
560
817k
  switch (flow->http.method) {
561
535
    case NDPI_HTTP_METHOD_MKCOL:
562
889
    case NDPI_HTTP_METHOD_MOVE:
563
1.21k
    case NDPI_HTTP_METHOD_COPY:
564
1.51k
    case NDPI_HTTP_METHOD_LOCK:
565
1.84k
    case NDPI_HTTP_METHOD_UNLOCK:
566
2.42k
    case NDPI_HTTP_METHOD_PROPFIND:
567
2.74k
    case NDPI_HTTP_METHOD_PROPPATCH:
568
2.74k
      ndpi_set_detected_protocol(ndpi_struct, flow, NDPI_PROTOCOL_WEBDAV, master_protocol, NDPI_CONFIDENCE_DPI);
569
2.74k
      update_category_and_breed(ndpi_struct, flow);
570
2.74k
      break;
571
814k
    default:
572
814k
      break;
573
817k
  }
574
575
817k
  if(flow->detected_protocol_stack[1] == NDPI_PROTOCOL_UNKNOWN &&
576
794k
     hostname_just_set && flow->host_server_name[0] != '\0') {
577
430k
    ndpi_match_hostname_protocol(ndpi_struct, flow,
578
430k
         master_protocol,
579
430k
         flow->host_server_name,
580
430k
         strlen(flow->host_server_name));
581
430k
  }
582
583
817k
  if(flow->detected_protocol_stack[1] == NDPI_PROTOCOL_UNKNOWN &&
584
779k
     packet->http_origin.len > 0) {
585
6.37k
    ndpi_protocol_match_result ret_match;
586
6.37k
    char *ptr, *origin_hostname;
587
6.37k
    size_t origin_hostname_len;
588
589
    /* Origin syntax:
590
        Origin: null
591
        Origin: <scheme>://<hostname>
592
        Origin: <scheme>://<hostname>:<port>
593
    Try extracting hostname */
594
595
6.37k
    ptr = ndpi_strnstr((const char *)packet->http_origin.ptr, "://", packet->http_origin.len);
596
6.37k
    if(ptr) {
597
4.12k
      origin_hostname = ptr + 3;
598
4.12k
      origin_hostname_len = packet->http_origin.len - (ptr - (char *)packet->http_origin.ptr) - 3;
599
4.12k
      ptr = ndpi_strnstr(origin_hostname, ":", origin_hostname_len);
600
4.12k
      if(ptr) {
601
1.26k
        origin_hostname_len = ptr - origin_hostname;
602
1.26k
      }
603
4.12k
      NDPI_LOG_DBG2(ndpi_struct, "Origin: [%.*s] -> [%.*s]\n", packet->http_origin.len, packet->http_origin.ptr,
604
4.12k
        (int)origin_hostname_len, origin_hostname);
605
      /* We already checked hostname...*/
606
4.12k
      if(strncmp(origin_hostname, flow->host_server_name, origin_hostname_len) != 0) {
607
3.50k
        ndpi_match_host_subprotocol(ndpi_struct, flow,
608
3.50k
            origin_hostname,
609
3.50k
            origin_hostname_len,
610
3.50k
            &ret_match,
611
3.50k
            master_protocol, 1);
612
3.50k
      }
613
4.12k
    }
614
6.37k
  }
615
616
817k
  if(flow->detected_protocol_stack[1] == NDPI_PROTOCOL_UNKNOWN &&
617
779k
     flow->http.url &&
618
465k
     ((strstr(flow->http.url, ":8080/downloading?n=0.") != NULL) ||
619
465k
      (strstr(flow->http.url, ":8080/upload?n=0.") != NULL))) {
620
    /* This looks like Ookla speedtest */
621
1.53k
    ndpi_set_detected_protocol(ndpi_struct, flow, NDPI_PROTOCOL_OOKLA, master_protocol, NDPI_CONFIDENCE_DPI);
622
1.53k
    update_category_and_breed(ndpi_struct, flow);
623
1.53k
    ookla_add_to_cache(ndpi_struct, flow);
624
1.53k
  }
625
626
817k
  if(flow->detected_protocol_stack[1] == NDPI_PROTOCOL_UNKNOWN &&
627
778k
     flow->http.url != NULL &&
628
464k
     strstr(flow->http.url, "micloud.xiaomi.net") != NULL) {
629
397
    ndpi_set_detected_protocol(ndpi_struct, flow, NDPI_PROTOCOL_XIAOMI, master_protocol, NDPI_CONFIDENCE_DPI);
630
397
    update_category_and_breed(ndpi_struct, flow);
631
397
  }
632
633
817k
  if(flow->detected_protocol_stack[1] == NDPI_PROTOCOL_UNKNOWN &&
634
777k
     packet->referer_line.len > 0 &&
635
59.1k
     ndpi_strnstr((const char *)packet->referer_line.ptr, "www.speedtest.net", packet->referer_line.len)) {
636
256
    ndpi_set_detected_protocol(ndpi_struct, flow, NDPI_PROTOCOL_OOKLA, master_protocol, NDPI_CONFIDENCE_DPI);
637
256
    update_category_and_breed(ndpi_struct, flow);
638
256
    ookla_add_to_cache(ndpi_struct, flow);
639
256
  }
640
641
  /* WindowsUpdate over some kind of CDN */
642
817k
  if(flow->detected_protocol_stack[1] == NDPI_PROTOCOL_UNKNOWN &&
643
777k
     flow->http.user_agent && flow->http.url &&
644
362k
     (strstr(flow->http.url, "delivery.mp.microsoft.com/") ||
645
362k
      strstr(flow->http.url, "download.windowsupdate.com/")) &&
646
711
     strstr(flow->http.user_agent, "Microsoft-Delivery-Optimization/") &&
647
189
     ndpi_isset_risk(flow, NDPI_NUMERIC_IP_HOST)) {
648
69
    ndpi_set_detected_protocol(ndpi_struct, flow, NDPI_PROTOCOL_WINDOWS_UPDATE, master_protocol, NDPI_CONFIDENCE_DPI);
649
69
    update_category_and_breed(ndpi_struct, flow);
650
69
  }
651
652
817k
  if(flow->detected_protocol_stack[1] == NDPI_PROTOCOL_UNKNOWN &&
653
777k
     packet->payload_packet_len >= 23 &&
654
765k
     memcmp(packet->payload, "<policy-file-request/>", 23) == 0) {
655
    /*
656
      <policy-file-request/>
657
      <cross-domain-policy>
658
      <allow-access-from domain="*.ookla.com" to-ports="8080"/>
659
      <allow-access-from domain="*.speedtest.net" to-ports="8080"/>
660
      </cross-domain-policy>
661
     */
662
39
    ndpi_set_detected_protocol(ndpi_struct, flow, NDPI_PROTOCOL_OOKLA, master_protocol, NDPI_CONFIDENCE_DPI);
663
39
    update_category_and_breed(ndpi_struct, flow);
664
39
    ookla_add_to_cache(ndpi_struct, flow);
665
39
  }
666
667
817k
  if ((flow->detected_protocol_stack[1] == NDPI_PROTOCOL_UNKNOWN) &&
668
777k
      flow->http.user_agent && strstr(flow->http.user_agent, "MSRPC")) {
669
310
    ndpi_set_detected_protocol(ndpi_struct, flow, NDPI_PROTOCOL_MS_RPCH, master_protocol, NDPI_CONFIDENCE_DPI);
670
310
    update_category_and_breed(ndpi_struct, flow);
671
310
  }
672
673
817k
  if ((flow->detected_protocol_stack[1] == NDPI_PROTOCOL_UNKNOWN) &&
674
777k
      flow->http.user_agent && strstr(flow->http.user_agent, "Valve/Steam HTTP Client")) {
675
184
    ndpi_set_detected_protocol(ndpi_struct, flow, NDPI_PROTOCOL_STEAM, master_protocol, NDPI_CONFIDENCE_DPI);
676
184
    update_category_and_breed(ndpi_struct, flow);
677
184
  }
678
679
817k
  if ((flow->detected_protocol_stack[1] == NDPI_PROTOCOL_UNKNOWN) &&
680
777k
      flow->http.user_agent && strstr(flow->http.user_agent, "AirControl Agent v1.0")) {
681
132
    ndpi_set_detected_protocol(ndpi_struct, flow, NDPI_PROTOCOL_UBNTAC2, master_protocol, NDPI_CONFIDENCE_DPI);
682
132
    update_category_and_breed(ndpi_struct, flow);
683
132
  }
684
685
817k
  if ((flow->detected_protocol_stack[1] == NDPI_PROTOCOL_UNKNOWN) &&
686
776k
      flow->http.user_agent && strstr(flow->http.user_agent, "gtk-gnutella")) {
687
272
    ndpi_set_detected_protocol(ndpi_struct, flow, NDPI_PROTOCOL_GNUTELLA, master_protocol, NDPI_CONFIDENCE_DPI);
688
272
    update_category_and_breed(ndpi_struct, flow);
689
272
  }
690
691
817k
  if(flow->http.request_header_observed) {
692
800k
    if(flow->http.first_payload_after_header_observed == 0) {
693
      /* Skip the last part of the HTTP request */
694
564k
      flow->http.first_payload_after_header_observed = 1;
695
564k
    } else if(flow->http.is_form && (packet->payload_packet_len > 0) &&
696
5.12k
              (ndpi_struct->cfg.http_username_enabled || ndpi_struct->cfg.http_password_enabled)) {
697
      /* Response payload */
698
5.01k
      char *dup = ndpi_strndup((const char *)packet->payload, packet->payload_packet_len);
699
700
5.01k
      if(dup) {
701
4.96k
  char *key, *value, *tmp;
702
703
4.96k
  key = strtok_r(dup, "=", &tmp);
704
705
16.9k
  while((key != NULL)
706
13.3k
        && ((flow->http.username == NULL) || (flow->http.password == NULL))) {
707
13.2k
    value = strtok_r(NULL, "&", &tmp);
708
709
13.2k
    if(!value)
710
1.32k
      break;
711
712
11.9k
    if((strcmp(key, "user") == 0) || (strcmp(key, "username") == 0)) {
713
269
      if(!flow->http.username && ndpi_struct->cfg.http_username_enabled) flow->http.username = ndpi_strdup(value);
714
11.6k
    } else if((strcmp(key, "pwd") == 0) || (strcmp(key, "password") == 0)) {
715
631
      if(!flow->http.password && ndpi_struct->cfg.http_password_enabled) flow->http.password = ndpi_strdup(value);
716
631
      ndpi_set_risk(ndpi_struct, flow, NDPI_CLEAR_TEXT_CREDENTIALS, "Found password");
717
631
    }
718
719
11.9k
    key = strtok_r(NULL, "=", &tmp);
720
11.9k
  }
721
722
4.96k
  ndpi_free(dup);
723
4.96k
      }
724
5.01k
    }
725
800k
  }
726
817k
}
727
728
/* ************************************************************* */
729
730
static void ndpi_check_user_agent(struct ndpi_detection_module_struct *ndpi_struct,
731
                                  struct ndpi_flow_struct *flow,
732
507k
          char const *ua, size_t ua_len) {
733
507k
  char *double_slash;
734
735
507k
  if((!ua) || (ua[0] == '\0'))
736
1.61k
    return;
737
738
506k
  if (ua_len > 12)
739
497k
  {
740
497k
    size_t i, upper_case_count = 0;
741
742
3.63M
    for (i = 0; i < ua_len; ++i)
743
3.63M
    {
744
      /*
745
       * We assume at least one non alpha char.
746
       * e.g. ' ', '-' or ';' ...
747
       */
748
3.63M
      if (ndpi_isalpha(ua[i]) == 0)
749
496k
      {
750
496k
        break;
751
496k
      }
752
3.13M
      if (isupper((unsigned char)ua[i]) != 0)
753
478k
      {
754
478k
        upper_case_count++;
755
478k
      }
756
3.13M
    }
757
758
497k
    if (i == ua_len) {
759
486
      float upper_case_ratio = (float)upper_case_count / (float)ua_len;
760
761
486
      if (upper_case_ratio >= 0.2f) {
762
260
        if(is_flowrisk_info_enabled(ndpi_struct, NDPI_HTTP_SUSPICIOUS_USER_AGENT)) {
763
220
          char str[64];
764
765
220
    snprintf(str, sizeof(str), "UA %s", ua);
766
220
          ndpi_set_risk(ndpi_struct, flow, NDPI_HTTP_SUSPICIOUS_USER_AGENT, str);
767
220
        } else {
768
40
          ndpi_set_risk(ndpi_struct, flow, NDPI_HTTP_SUSPICIOUS_USER_AGENT, NULL);
769
40
        }
770
260
      }
771
486
    }
772
497k
  }
773
774
506k
  if((!strncmp(ua, "<?", 2))
775
505k
     || strchr(ua, '$')
776
506k
     ) {
777
4.78k
    if(is_flowrisk_info_enabled(ndpi_struct, NDPI_HTTP_SUSPICIOUS_USER_AGENT)) {
778
3.16k
      char str[64];
779
780
3.16k
      snprintf(str, sizeof(str), "UA %s", ua);
781
3.16k
      ndpi_set_risk(ndpi_struct, flow, NDPI_HTTP_SUSPICIOUS_USER_AGENT, str);
782
3.16k
    } else {
783
1.61k
      ndpi_set_risk(ndpi_struct, flow, NDPI_HTTP_SUSPICIOUS_USER_AGENT, NULL);
784
1.61k
    }
785
4.78k
  }
786
787
506k
  if((double_slash = strstr(ua, "://")) != NULL) {
788
3.99k
    if(double_slash != ua) /* We're not at the beginning of the user agent */{
789
3.75k
      if((double_slash[-1] != 'p') /* http:// */
790
1.15k
   && (double_slash[-1] != 's') /* https:// */) {
791
954
        if(is_flowrisk_info_enabled(ndpi_struct, NDPI_HTTP_SUSPICIOUS_USER_AGENT)) {
792
709
          char str[64];
793
794
709
    snprintf(str, sizeof(str), "UA %s", ua);
795
709
    ndpi_set_risk(ndpi_struct, flow, NDPI_HTTP_SUSPICIOUS_USER_AGENT, str);
796
709
        } else {
797
245
          ndpi_set_risk(ndpi_struct, flow, NDPI_HTTP_SUSPICIOUS_USER_AGENT, NULL);
798
245
        }
799
954
      }
800
3.75k
    }
801
3.99k
  }
802
803
  /* no else */
804
506k
  if(!strncmp(ua, "jndi:ldap://", 12)) /* Log4J */ {
805
1.03k
    ndpi_set_risk(ndpi_struct, flow, NDPI_POSSIBLE_EXPLOIT, "Suspicious Log4J");
806
505k
  } else if(
807
505k
    (ua_len < 4)      /* Too short */
808
504k
    || (ua_len > 256) /* Too long  */
809
486k
    || (!strncmp(ua, "test", 4))
810
485k
    || strchr(ua, '{')
811
452k
    || strchr(ua, '}')
812
505k
    ) {
813
54.6k
    ndpi_set_risk(ndpi_struct, flow, NDPI_HTTP_SUSPICIOUS_USER_AGENT, "Suspicious Log4J");
814
54.6k
  }
815
816
  /*
817
    Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)
818
    Amazon-Route53-Health-Check-Service (ref 68784dad-be98-49e4-a63c-9fbbe2816d7c; report http://amzn.to/1vsZADi)
819
    Anonymous Crawler/1.0 (Webcrawler developed with StormCrawler; http://example.com/; webcrawler@example.com)
820
   */
821
506k
  if((strstr(ua, "+http:") != NULL)
822
505k
     || (strstr(ua, " http:") != NULL)
823
505k
     || ndpi_strncasestr(ua, "Crawler", ua_len)
824
504k
     || ndpi_strncasestr(ua, "Bot", ua_len) /* bot/robot */
825
506k
     ) {
826
2.37k
    if(is_flowrisk_info_enabled(ndpi_struct, NDPI_HTTP_CRAWLER_BOT)) {
827
1.97k
      char str[64];
828
829
1.97k
      snprintf(str, sizeof(str), "UA %s", ua);
830
831
1.97k
      ndpi_set_risk(ndpi_struct, flow, NDPI_HTTP_CRAWLER_BOT, str);
832
1.97k
    } else {
833
399
      ndpi_set_risk(ndpi_struct, flow, NDPI_HTTP_CRAWLER_BOT, NULL);
834
399
    }
835
2.37k
  }
836
506k
}
837
838
/* ************************************************************* */
839
840
static void http_process_user_agent(struct ndpi_detection_module_struct *ndpi_struct,
841
                                    struct ndpi_flow_struct *flow,
842
515k
                                    const u_int8_t *ua_ptr, u_int16_t ua_ptr_len) {
843
  /**
844
      Format examples:
845
      Mozilla/5.0 (iPad; U; CPU OS 3_2 like Mac OS X; en-us) AppleWebKit/531.21.10 (KHTML, like Gecko) ....
846
      Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:54.0) Gecko/20100101 Firefox/54.0
847
   */
848
515k
  if(ua_ptr_len > 7) {
849
511k
    char ua[256];
850
511k
    u_int mlen = ndpi_min(ua_ptr_len, sizeof(ua)-1);
851
852
511k
    strncpy(ua, (const char *)ua_ptr, mlen);
853
511k
    ua[mlen] = '\0';
854
855
511k
    if(strncmp(ua, "Mozilla", 7) == 0) {
856
407k
      char *parent = strchr(ua, '(');
857
858
407k
      if(parent) {
859
393k
  char *token, *end;
860
861
393k
  parent++;
862
393k
  end = strchr(parent, ')');
863
393k
  if(end) end[0] = '\0';
864
865
393k
  token = strsep(&parent, ";");
866
393k
  if(token) {
867
393k
    if((strcmp(token, "X11") == 0)
868
377k
       || (strcmp(token, "compatible") == 0)
869
372k
       || (strcmp(token, "Linux") == 0)
870
367k
       || (strcmp(token, "Macintosh") == 0)
871
393k
       ) {
872
28.6k
      token = strsep(&parent, ";");
873
28.6k
      if(token && (token[0] == ' ')) token++; /* Skip space */
874
875
28.6k
      if(token
876
28.4k
         && ((strcmp(token, "U") == 0)
877
27.9k
       || (strncmp(token, "MSIE", 4) == 0))) {
878
5.26k
        token = strsep(&parent, ";");
879
5.26k
        if(token && (token[0] == ' ')) token++; /* Skip space */
880
881
5.26k
              if(token && (strncmp(token, "Update", 6)  == 0)) {
882
470
                token = strsep(&parent, ";");
883
884
470
                if(token && (token[0] == ' ')) token++; /* Skip space */
885
886
470
                if(token && (strncmp(token, "AOL", 3)  == 0)) {
887
888
46
                  token = strsep(&parent, ";");
889
46
                  if(token && (token[0] == ' ')) token++; /* Skip space */
890
46
                }
891
470
              }
892
5.26k
            }
893
28.6k
          }
894
895
393k
          if(token)
896
393k
            setHttpUserAgent(flow, token);
897
393k
  }
898
393k
      }
899
407k
    }
900
511k
  }
901
902
515k
  if(ndpi_user_agent_set(flow, ua_ptr, ua_ptr_len) != NULL) {
903
507k
    ndpi_unset_risk(ndpi_struct, flow, NDPI_HTTP_SUSPICIOUS_USER_AGENT);
904
507k
    ndpi_check_user_agent(ndpi_struct, flow, flow->http.user_agent, ua_ptr_len);
905
507k
  } else {
906
7.56k
    NDPI_LOG_DBG2(ndpi_struct, "Could not set HTTP user agent (already set?)\n");
907
7.56k
  }
908
909
515k
  NDPI_LOG_DBG2(ndpi_struct, "User Agent Type line found %.*s\n",
910
515k
    ua_ptr_len, ua_ptr);
911
515k
}
912
913
/* ************************************************************* */
914
915
static void ndpi_check_numeric_ip(struct ndpi_detection_module_struct *ndpi_struct,
916
                                  struct ndpi_flow_struct *flow,
917
374k
          char *ip, u_int ip_len) {
918
374k
  char buf[22], *double_dot;
919
374k
  struct in_addr ip_addr;
920
921
374k
  strncpy(buf, ip, ip_len);
922
374k
  buf[ip_len] = '\0';
923
924
374k
  if((double_dot = strchr(buf, ':')) != NULL)
925
3.23k
    double_dot[0] = '\0';
926
927
374k
  ip_addr.s_addr = inet_addr(buf);
928
374k
  if(strcmp(inet_ntoa(ip_addr), buf) == 0) {
929
341k
    if(is_flowrisk_info_enabled(ndpi_struct, NDPI_NUMERIC_IP_HOST)) {
930
252k
      char str[64];
931
932
252k
      snprintf(str, sizeof(str), "Found host %s", buf);
933
252k
      ndpi_set_risk(ndpi_struct, flow, NDPI_NUMERIC_IP_HOST, str);
934
252k
    } else {
935
89.1k
      ndpi_set_risk(ndpi_struct, flow, NDPI_NUMERIC_IP_HOST, NULL);
936
89.1k
    }
937
341k
  }
938
374k
}
939
940
/* ************************************************************* */
941
942
static void ndpi_check_http_url(struct ndpi_detection_module_struct *ndpi_struct,
943
                                struct ndpi_flow_struct *flow,
944
476k
        char *url) {
945
476k
  char msg[512];
946
476k
  ndpi_risk_enum r;
947
948
476k
  if(strstr(url, "<php>") != NULL /* PHP code in the URL */) {
949
198
    r = NDPI_URL_POSSIBLE_RCE_INJECTION;
950
198
    snprintf(msg, sizeof(msg), "PHP code in URL [%s]", url);
951
476k
  } else if(strncmp(url, "/shell?", 7) == 0) {
952
784
    r = NDPI_URL_POSSIBLE_RCE_INJECTION;
953
784
    snprintf(msg, sizeof(msg), "Possible WebShell detected [%s]", url);
954
475k
  } else if(strncmp(url, "/.", 2) == 0) {
955
1.75k
    r = NDPI_POSSIBLE_EXPLOIT;
956
1.75k
    snprintf(msg, sizeof(msg), "URL starting with dot [%s]", url);
957
474k
  } else {
958
474k
    r = ndpi_validate_url(ndpi_struct, flow, url);
959
474k
    return;
960
474k
  }
961
962
2.73k
  ndpi_set_risk(ndpi_struct, flow, r, msg);
963
2.73k
}
964
965
/* ************************************************************* */
966
967
/* Check custom protocol */
968
static void ndpi_check_http_url_subprotocol(struct ndpi_detection_module_struct *ndpi_struct,
969
476k
              struct ndpi_flow_struct *flow) {
970
476k
  int custom_category = 0;
971
972
476k
  if(flow->http.url) {
973
476k
    if(ndpi_struct->http_url_hashmap) {
974
474k
      u_int64_t id;
975
474k
      u_int16_t proto, category, breed;
976
      
977
      /* This protocol has been defined in protos.txt-like files */
978
474k
      if(ndpi_hash_find_entry(ndpi_struct->http_url_hashmap,
979
474k
            flow->http.url, strlen(flow->http.url),
980
474k
            &id) == 0) {
981
349
        proto = id & 0xFFFF;
982
349
        category = (id & 0xFFFF0000) >> 16;
983
349
        breed = (id & 0xFFFF00000000) >> 32;
984
349
  ndpi_set_detected_protocol(ndpi_struct, flow, proto,
985
349
           ndpi_get_master_proto(ndpi_struct, flow),
986
349
           NDPI_CONFIDENCE_CUSTOM_RULE);
987
349
  flow->category = category;
988
349
  flow->breed = breed;
989
990
349
  if(category != NDPI_PROTOCOL_CATEGORY_UNSPECIFIED)
991
349
    custom_category = 1;
992
993
349
  return;
994
349
      }
995
474k
    }
996
997
476k
    if(!custom_category) { /* Category from custom rule always wins */
998
476k
      if(ends_with(ndpi_struct, (char*)flow->http.url, "/generate_204")
999
476k
         || ends_with(ndpi_struct, (char*)flow->http.url, "/generate204")) {
1000
596
        flow->category = NDPI_PROTOCOL_CATEGORY_CONNECTIVITY_CHECK;
1001
596
      }
1002
476k
    }    
1003
476k
  }
1004
476k
}
1005
1006
/* ************************************************************* */
1007
1008
4.66k
#define MIN_APACHE_VERSION 2004000 /* 2.4.X  [https://endoflife.date/apache] */
1009
3.42k
#define MIN_NGINX_VERSION  1022000 /* 1.22.0 [https://endoflife.date/nginx]  */
1010
1011
static void ndpi_check_http_server(struct ndpi_detection_module_struct *ndpi_struct,
1012
                                   struct ndpi_flow_struct *flow,
1013
46.8k
           const char *server, u_int server_len) {
1014
46.8k
  if(server[0] != '\0') {
1015
46.5k
    if(server_len > 7) {
1016
33.8k
      u_int off, i;
1017
1018
33.8k
      if((strncasecmp(server, "Apache/", off = 7) == 0) /* X.X.X */
1019
28.4k
   || (strncasecmp(server, "nginx/", off = 6) == 0) /* X.X.X */) {
1020
9.55k
  u_int j, a, b, c;
1021
9.55k
  char buf[16] = { '\0' };
1022
1023
62.7k
  for(i=off, j=0; (i<server_len) && (j<sizeof(buf)-1)
1024
59.2k
        && (ndpi_isdigit(server[i]) || (server[i] == '.')); i++)
1025
53.2k
    buf[j++] = server[i];
1026
1027
9.55k
  if(sscanf(buf, "%u.%u.%u", &a, &b, &c) == 3) {
1028
8.09k
    u_int32_t version = (a * 1000000) + (b * 1000) + c;
1029
8.09k
    char msg[64];
1030
1031
8.09k
    if((off == 7) && (version < MIN_APACHE_VERSION)) {
1032
1.60k
      if(is_flowrisk_info_enabled(ndpi_struct, NDPI_HTTP_OBSOLETE_SERVER)) {
1033
1.50k
        snprintf(msg, sizeof(msg), "Obsolete Apache server %s", buf);
1034
1.50k
        ndpi_set_risk(ndpi_struct, flow, NDPI_HTTP_OBSOLETE_SERVER, msg);
1035
1.50k
      } else {
1036
99
        ndpi_set_risk(ndpi_struct, flow, NDPI_HTTP_OBSOLETE_SERVER, NULL);
1037
99
      }
1038
6.49k
    } else if((off == 6) && (version < MIN_NGINX_VERSION)) {
1039
3.05k
      if(is_flowrisk_info_enabled(ndpi_struct, NDPI_HTTP_OBSOLETE_SERVER)) {
1040
2.53k
        snprintf(msg, sizeof(msg), "Obsolete nginx server %s", buf);
1041
2.53k
        ndpi_set_risk(ndpi_struct, flow, NDPI_HTTP_OBSOLETE_SERVER, msg);
1042
2.53k
      } else {
1043
524
        ndpi_set_risk(ndpi_struct, flow, NDPI_HTTP_OBSOLETE_SERVER, NULL);
1044
524
      }
1045
3.05k
    }
1046
8.09k
  }
1047
9.55k
      }
1048
1049
      /* Check server content */
1050
650k
      for(i=0; i<server_len; i++) {
1051
622k
  if(!ndpi_isprint(server[i])) {
1052
5.98k
    char msg[64];
1053
1054
5.98k
    snprintf(msg, sizeof(msg), "Suspicious Agent [%.*s]", server_len, server);
1055
1056
5.98k
    ndpi_set_risk(ndpi_struct, flow, NDPI_HTTP_SUSPICIOUS_HEADER, msg);
1057
5.98k
    break;
1058
5.98k
  }
1059
622k
      }
1060
33.8k
    }
1061
46.5k
  }
1062
46.8k
}
1063
1064
/* ************************************************************* */
1065
1066
/**
1067
   NOTE
1068
   ndpi_parse_packet_line_info is in ndpi_main.c
1069
*/
1070
static void check_content_type_and_change_protocol(struct ndpi_detection_module_struct *ndpi_struct,
1071
817k
               struct ndpi_flow_struct *flow) {
1072
817k
  struct ndpi_packet_struct *packet = &ndpi_struct->packet;
1073
817k
  u_int len;
1074
817k
  int hostname_just_set = 0;
1075
1076
817k
  if((flow->http.url == NULL)
1077
799k
     && (packet->http_url_name.len > 0)
1078
554k
     && (packet->host_line.len > 0)) {
1079
479k
    int len = packet->http_url_name.len + packet->host_line.len + 1;
1080
1081
479k
    if(ndpi_isdigit(packet->host_line.ptr[0])
1082
398k
       && (packet->host_line.len < 21))
1083
374k
      ndpi_check_numeric_ip(ndpi_struct, flow, (char*)packet->host_line.ptr, packet->host_line.len);
1084
1085
479k
    flow->http.url = ndpi_malloc(len);
1086
1087
479k
    if(flow->http.url) {
1088
476k
      u_int offset = 0, host_end = 0;
1089
1090
476k
      if(flow->detected_protocol_stack[0] == NDPI_PROTOCOL_HTTP_CONNECT) {
1091
287
  strncpy(flow->http.url, (char*)packet->http_url_name.ptr,
1092
287
    packet->http_url_name.len);
1093
1094
287
  flow->http.url[packet->http_url_name.len] = '\0';
1095
476k
      } else {
1096
  /* Check if we pass through a proxy (usually there is also the Via: ... header) */
1097
476k
  if(strncmp((char*)packet->http_url_name.ptr, "http://", 7) != 0) {
1098
474k
    strncpy(flow->http.url, (char*)packet->host_line.ptr, offset = packet->host_line.len);
1099
474k
    host_end = packet->host_line.len;
1100
474k
  }
1101
1102
476k
  if((packet->host_line.len == packet->http_url_name.len)
1103
33.6k
     && (strncmp((char*)packet->host_line.ptr,
1104
33.6k
           (char*)packet->http_url_name.ptr, packet->http_url_name.len) == 0))
1105
64
    ;
1106
476k
  else {
1107
476k
    strncpy(&flow->http.url[offset], (char*)packet->http_url_name.ptr,
1108
476k
      packet->http_url_name.len);
1109
476k
    offset += packet->http_url_name.len;
1110
476k
  }
1111
1112
476k
  flow->http.url[offset] = '\0';
1113
476k
      }
1114
1115
476k
      ndpi_check_http_url(ndpi_struct, flow, &flow->http.url[host_end]);
1116
476k
      ndpi_check_http_url_subprotocol(ndpi_struct, flow);
1117
476k
    }
1118
479k
  }
1119
1120
817k
  if(packet->http_method.ptr != NULL)
1121
554k
    flow->http.method = ndpi_http_str2method((const char*)packet->http_method.ptr,
1122
554k
               (u_int16_t)packet->http_method.len);
1123
1124
817k
  if(packet->server_line.ptr != NULL)
1125
46.8k
    ndpi_check_http_server(ndpi_struct, flow, (const char *)packet->server_line.ptr, packet->server_line.len);
1126
1127
817k
  if(packet->user_agent_line.ptr != NULL) {
1128
515k
    http_process_user_agent(ndpi_struct, flow, packet->user_agent_line.ptr, packet->user_agent_line.len);
1129
515k
  }
1130
1131
817k
  if(packet->forwarded_line.ptr != NULL) {
1132
869
    if(flow->http.nat_ip == NULL) {
1133
734
      len = packet->forwarded_line.len;
1134
734
      flow->http.nat_ip = ndpi_malloc(len + 1);
1135
734
      if(flow->http.nat_ip != NULL) {
1136
724
        strncpy(flow->http.nat_ip, (char*)packet->forwarded_line.ptr, len);
1137
724
        flow->http.nat_ip[len] = '\0';
1138
724
      }
1139
734
    }
1140
869
  }
1141
1142
817k
  if(packet->upgrade_line.ptr != NULL) {
1143
3.77k
    if((flow->http.response_status_code == 101)
1144
1.10k
       && (packet->upgrade_line.len >= 9)
1145
980
       && memcmp((char *)packet->upgrade_line.ptr, "websocket", 9) == 0)
1146
696
      flow->http.websocket = 1;
1147
3.77k
  }
1148
1149
817k
  if(packet->server_line.ptr != NULL) {
1150
46.8k
    if(flow->http.server == NULL && ndpi_struct->cfg.http_resp_server_enabled) {
1151
42.7k
      len = packet->server_line.len + 1;
1152
42.7k
      flow->http.server = ndpi_malloc(len);
1153
42.7k
      if(flow->http.server) {
1154
42.2k
        strncpy(flow->http.server, (char*)packet->server_line.ptr,
1155
42.2k
                packet->server_line.len);
1156
42.2k
  flow->http.server[packet->server_line.len] = '\0';
1157
42.2k
      }
1158
42.7k
    }
1159
46.8k
  }
1160
1161
817k
  if(packet->authorization_line.ptr != NULL &&
1162
3.62k
     (ndpi_struct->cfg.http_username_enabled || ndpi_struct->cfg.http_password_enabled)) {
1163
3.46k
    const char *a = NULL, *b = NULL;
1164
1165
3.46k
    NDPI_LOG_DBG2(ndpi_struct, "Authorization line found %.*s\n",
1166
3.46k
      packet->authorization_line.len, packet->authorization_line.ptr);
1167
1168
3.46k
    if(flow->http.username == NULL && flow->http.password == NULL) {
1169
3.25k
      if((a = ndpi_strncasestr((const char*)packet->authorization_line.ptr,
1170
3.25k
                               "Basic", packet->authorization_line.len))
1171
1.54k
         || (b = ndpi_strncasestr((const char*)packet->authorization_line.ptr,
1172
1.99k
                                  "Digest", packet->authorization_line.len))) {
1173
1.99k
        size_t content_len;
1174
1.99k
        u_int len = b ? 7 : 6;
1175
1176
1.99k
  if(packet->authorization_line.len > len) {
1177
1.87k
    u_char *content = ndpi_base64_decode((const u_char*)&packet->authorization_line.ptr[len],
1178
1.87k
                 packet->authorization_line.len - len, &content_len);
1179
1180
1.87k
    if(content != NULL) {
1181
1.38k
      char *double_dot = strchr((char*)content, ':');
1182
1183
1.38k
      if(double_dot) {
1184
1.15k
        double_dot[0] = '\0';
1185
1.15k
        if(ndpi_struct->cfg.http_username_enabled)
1186
1.15k
          flow->http.username = ndpi_strdup((char*)content);
1187
1.15k
        if(ndpi_struct->cfg.http_password_enabled)
1188
1.15k
          flow->http.password = ndpi_strdup(&double_dot[1]);
1189
1.15k
      }
1190
1191
1.38k
      ndpi_free(content);
1192
1.38k
    }
1193
1194
1.87k
    ndpi_set_risk(ndpi_struct, flow, NDPI_CLEAR_TEXT_CREDENTIALS,
1195
1.87k
      "Found credentials in HTTP Auth Line");
1196
1.87k
  }
1197
1.99k
      }
1198
3.25k
    }
1199
3.46k
  }
1200
1201
817k
  if((packet->referer_line.ptr != NULL) && (flow->http.referer == NULL))
1202
65.2k
    if(ndpi_struct->cfg.http_referer_enabled)
1203
62.5k
      flow->http.referer = ndpi_strndup((const char *)packet->referer_line.ptr, packet->referer_line.len);
1204
1205
817k
  if((packet->host_line.ptr != NULL) && (flow->http.host == NULL)) {
1206
574k
    if(ndpi_struct->cfg.http_host_enabled) {
1207
555k
      flow->http.host = ndpi_strndup((const char *)packet->host_line.ptr, packet->host_line.len);
1208
1209
555k
      if(flow->http.host != NULL) {
1210
552k
  char *double_column = strchr(flow->http.host, ':');
1211
1212
552k
  if(double_column != NULL)
1213
29.9k
    double_column[0] = '\0';
1214
1215
552k
  if(ndpi_struct->cfg.hostname_dns_check_enabled
1216
544k
     && (ndpi_check_is_numeric_ip(flow->http.host) == false)) {
1217
162k
    ndpi_ip_addr_t ip_addr;
1218
1219
162k
    memset(&ip_addr, 0, sizeof(ip_addr));
1220
1221
162k
    if(packet->iph)
1222
162k
      ip_addr.ipv4 = packet->iph->daddr;
1223
350
    else
1224
350
      memcpy(&ip_addr.ipv6, &packet->iphv6->ip6_dst, sizeof(struct ndpi_in6_addr));
1225
1226
162k
    if(!ndpi_cache_find_hostname_ip(ndpi_struct, &ip_addr, flow->http.host)) {
1227
#ifdef DEBUG_HTTP
1228
      printf("[HTTP] Not found host %s\n", flow->http.host);
1229
#endif
1230
159k
      ndpi_set_risk(ndpi_struct, flow, NDPI_UNRESOLVED_HOSTNAME, flow->http.host);
1231
1232
159k
    } else {
1233
#ifdef DEBUG_HTTP
1234
      printf("[HTTP] Found host %s\n", flow->http.host);
1235
#endif
1236
3.35k
    }
1237
1238
162k
  }
1239
552k
      }
1240
555k
    }
1241
574k
  }
1242
1243
817k
  if(packet->content_line.ptr != NULL) {
1244
75.1k
    NDPI_LOG_DBG2(ndpi_struct, "Content Type line found %.*s\n",
1245
75.1k
      packet->content_line.len, packet->content_line.ptr);
1246
1247
75.1k
    if(flow->http.response_status_code == 0) {
1248
      /* Request */
1249
33.1k
      if((flow->http.request_content_type == NULL) && (packet->content_line.len > 0)) {
1250
32.1k
  if(ndpi_struct->cfg.http_request_content_type_enabled) {
1251
30.7k
    int len = packet->content_line.len + 1;
1252
1253
30.7k
          flow->http.request_content_type = ndpi_malloc(len);
1254
30.7k
          if(flow->http.request_content_type) {
1255
30.5k
            strncpy(flow->http.request_content_type, (char*)packet->content_line.ptr,
1256
30.5k
                    packet->content_line.len);
1257
30.5k
            flow->http.request_content_type[packet->content_line.len] = '\0';
1258
30.5k
          }
1259
30.7k
  }
1260
1261
32.1k
  if(ndpi_strnstr((char*)packet->content_line.ptr, "x-www-form-urlencoded", packet->content_line.len))
1262
11.4k
     flow->http.is_form = 1;
1263
32.1k
      }
1264
42.0k
    } else {
1265
      /* Response */
1266
42.0k
      if((flow->http.content_type == NULL) && (packet->content_line.len > 0)) {
1267
41.3k
        if(ndpi_struct->cfg.http_resp_content_type_enabled) {
1268
1269
38.1k
          int len = packet->content_line.len + 1;
1270
1271
38.1k
    flow->http.content_type = ndpi_malloc(len);
1272
38.1k
    if(flow->http.content_type) {
1273
37.7k
      strncpy(flow->http.content_type, (char*)packet->content_line.ptr,
1274
37.7k
        packet->content_line.len);
1275
37.7k
      flow->http.content_type[packet->content_line.len] = '\0';
1276
37.7k
    }
1277
38.1k
        }
1278
1279
41.3k
  flow->category = ndpi_http_check_content(ndpi_struct, flow);
1280
41.3k
      }
1281
42.0k
    }
1282
75.1k
  }
1283
1284
  /* check for host line (only if we don't already have an hostname) */
1285
817k
  if(packet->host_line.ptr != NULL && flow->host_server_name[0] == '\0') {
1286
1287
439k
    NDPI_LOG_DBG2(ndpi_struct, "HOST line found %.*s\n",
1288
439k
      packet->host_line.len, packet->host_line.ptr);
1289
1290
    /* Copy result for nDPI apps */
1291
439k
    ndpi_hostname_sni_set(flow, packet->host_line.ptr, packet->host_line.len, NDPI_HOSTNAME_NORM_ALL | NDPI_HOSTNAME_NORM_STRIP_PORT);
1292
1293
439k
    if(strlen(flow->host_server_name) > 0) {
1294
433k
      char *double_col;
1295
433k
      int a, b, c, d;
1296
433k
      u_int16_t host_line_length;
1297
1298
433k
      hostname_just_set = 1;
1299
433k
      host_line_length = packet->host_line.len;
1300
1301
      /* If hostname is $hostame:$port, ignore the trailing port. Check
1302
         that it is a valid port */
1303
433k
      double_col = ndpi_memrchr(packet->host_line.ptr, ':', packet->host_line.len);
1304
433k
      if(double_col) {
1305
28.0k
        char *endptr, port_str[6]; /* 65535 + \0 */
1306
28.0k
        int port_str_len;
1307
28.0k
        long port;
1308
1309
28.0k
        port_str_len = (char *)packet->host_line.ptr +  packet->host_line.len - double_col - 1;
1310
1311
28.0k
        if(port_str_len > 0 && port_str_len < 6) {
1312
6.56k
          memcpy(port_str, double_col + 1, port_str_len);
1313
6.56k
          port_str[port_str_len] = '\0';
1314
1315
          /* We can't easily use ndpi_strtonum because we want to be sure that there are no
1316
             others characters after the number */
1317
6.56k
          errno = 0;    /* To distinguish success/failure after call */
1318
6.56k
          port = strtol(port_str, &endptr, 10);
1319
6.56k
          if(errno == 0 && *endptr == '\0' &&
1320
4.55k
             (port >= 0 && port <= 65535)) {
1321
4.52k
            host_line_length = double_col - (char *)packet->host_line.ptr;
1322
4.52k
          }
1323
6.56k
        }
1324
28.0k
      }
1325
1326
433k
      if(ndpi_is_valid_hostname((char *)packet->host_line.ptr,
1327
433k
                                host_line_length) == 0) {
1328
83.2k
  char str[128];
1329
1330
83.2k
        if(is_flowrisk_info_enabled(ndpi_struct, NDPI_INVALID_CHARACTERS)) {
1331
62.1k
    snprintf(str, sizeof(str), "Invalid host %s", flow->host_server_name);
1332
62.1k
    ndpi_set_risk(ndpi_struct, flow, NDPI_INVALID_CHARACTERS, str);
1333
62.1k
        } else {
1334
21.0k
          ndpi_set_risk(ndpi_struct, flow, NDPI_INVALID_CHARACTERS, NULL);
1335
21.0k
        }
1336
1337
  /* This looks like an attack */
1338
1339
83.2k
  snprintf(str, sizeof(str), "Suspicious hostname [%.*s]: attack ?", packet->host_line.len, (char *)packet->host_line.ptr);
1340
83.2k
  ndpi_set_risk(ndpi_struct, flow, NDPI_POSSIBLE_EXPLOIT, str);
1341
83.2k
      }
1342
1343
433k
      if(ndpi_struct->packet.iph
1344
432k
         && (sscanf(flow->host_server_name, "%d.%d.%d.%d", &a, &b, &c, &d) == 4)) {
1345
        /* IPv4 */
1346
1347
315k
        if(ndpi_struct->packet.iph->daddr != inet_addr(flow->host_server_name)) {
1348
34.1k
          if(is_flowrisk_info_enabled(ndpi_struct, NDPI_HTTP_SUSPICIOUS_HEADER)) {
1349
25.9k
            char buf[64], msg[128];
1350
1351
25.9k
      snprintf(msg, sizeof(msg), "Expected %s, found %s",
1352
25.9k
         ndpi_intoav4(ntohl(ndpi_struct->packet.iph->daddr), buf, sizeof(buf)), flow->host_server_name);
1353
25.9k
      ndpi_set_risk(ndpi_struct, flow, NDPI_HTTP_SUSPICIOUS_HEADER, msg);
1354
25.9k
          } else {
1355
8.15k
            ndpi_set_risk(ndpi_struct, flow, NDPI_HTTP_SUSPICIOUS_HEADER, NULL);
1356
8.15k
          }
1357
34.1k
        }
1358
315k
      }
1359
433k
    }
1360
1361
439k
  }
1362
1363
817k
  ndpi_http_parse_subprotocol(ndpi_struct, flow, hostname_just_set);
1364
1365
817k
  if(hostname_just_set && strlen(flow->host_server_name) > 0) {
1366
433k
    ndpi_check_dga_name(ndpi_struct, flow, flow->host_server_name, 1, 0, 0);
1367
433k
  }
1368
1369
817k
  ndpi_check_http_header(ndpi_struct, flow);
1370
817k
}
1371
1372
/* ************************************************************* */
1373
1374
#ifdef NDPI_ENABLE_DEBUG_MESSAGES
1375
static uint8_t non_ctrl(uint8_t c) {
1376
  return c < 32 ? '.':c;
1377
}
1378
#endif
1379
1380
/* ************************************************************* */
1381
1382
/**
1383
 * Functions to check whether the packet begins with a valid http request
1384
 * @param ndpi_struct
1385
 * @returnvalue 0 if no valid request has been found
1386
 * @returnvalue >0 indicates start of filename but not necessarily in packet limit
1387
 */
1388
1389
#define STATIC_STRING_L(a) {.str=a, .len=sizeof(a)-1 }
1390
1391
static struct l_string {
1392
  const char *str;
1393
  size_t     len;
1394
} http_methods[] = {
1395
        STATIC_STRING_L("GET "),
1396
        STATIC_STRING_L("POST "),
1397
        STATIC_STRING_L("OPTIONS "),
1398
        STATIC_STRING_L("HEAD "),
1399
        STATIC_STRING_L("PUT "),
1400
        STATIC_STRING_L("PATCH "),
1401
        STATIC_STRING_L("DELETE "),
1402
        STATIC_STRING_L("CONNECT "),
1403
        STATIC_STRING_L("PROPFIND "),
1404
        STATIC_STRING_L("PROPPATCH "),
1405
        STATIC_STRING_L("MKCOL "),
1406
        STATIC_STRING_L("MOVE "),
1407
        STATIC_STRING_L("COPY "),
1408
        STATIC_STRING_L("LOCK "),
1409
        STATIC_STRING_L("UNLOCK "),
1410
        STATIC_STRING_L("REPORT "),
1411
        STATIC_STRING_L("RPC_CONNECT "),
1412
        STATIC_STRING_L("RPC_IN_DATA "),
1413
        STATIC_STRING_L("RPC_OUT_DATA ")
1414
};
1415
static const char *http_fs = "CDGHLMOPRU";
1416
1417
static u_int16_t http_request_url_offset(struct ndpi_detection_module_struct *ndpi_struct)
1418
3.03M
{
1419
3.03M
  struct ndpi_packet_struct *packet = &ndpi_struct->packet;
1420
3.03M
  unsigned int i;
1421
1422
3.03M
  NDPI_LOG_DBG2(ndpi_struct, "====>>>> HTTP: %c%c%c%c [len: %u]\n",
1423
3.03M
    packet->payload_packet_len > 0 ? non_ctrl(packet->payload[0]) : '.',
1424
3.03M
    packet->payload_packet_len > 1 ? non_ctrl(packet->payload[1]) : '.',
1425
3.03M
    packet->payload_packet_len > 2 ? non_ctrl(packet->payload[2]) : '.',
1426
3.03M
    packet->payload_packet_len > 3 ? non_ctrl(packet->payload[3]) : '.',
1427
3.03M
    packet->payload_packet_len);
1428
1429
  /* Check first char */
1430
3.03M
  if(!packet->payload_packet_len || !strchr(http_fs,packet->payload[0]))
1431
1.81M
    return 0;
1432
1433
  /**
1434
     FIRST PAYLOAD PACKET FROM CLIENT
1435
  **/
1436
10.5M
  for(i=0; i < sizeof(http_methods)/sizeof(http_methods[0]); i++) {
1437
10.0M
    if(packet->payload_packet_len >= http_methods[i].len &&
1438
8.83M
       strncasecmp((const char*)packet->payload,http_methods[i].str,http_methods[i].len) == 0) {
1439
735k
      size_t url_start = http_methods[i].len;
1440
746k
      while (url_start < packet->payload_packet_len &&
1441
746k
             url_start < http_methods[i].len + 2048 && /* We assume 2048 chars as maximum for URLs. */
1442
746k
             packet->payload[url_start] == ' ') { url_start++; }
1443
735k
      NDPI_LOG_DBG2(ndpi_struct, "HTTP: %sFOUND\n",http_methods[i].str);
1444
735k
      return url_start;
1445
735k
    }
1446
10.0M
  }
1447
484k
  return 0;
1448
1.22M
}
1449
1450
/* *********************************************************************************************** */
1451
1452
/* Trick to speed-up detection */
1453
static const char* suspicious_http_header_keys_A[] = { "Arch", NULL};
1454
static const char* suspicious_http_header_keys_C[] = { "Cores", NULL};
1455
static const char* suspicious_http_header_keys_M[] = { "Mem", NULL};
1456
static const char* suspicious_http_header_keys_O[] = { "Os", "Osname", "Osversion", NULL};
1457
static const char* suspicious_http_header_keys_R[] = { "Root", NULL};
1458
static const char* suspicious_http_header_keys_S[] = { "S", NULL};
1459
static const char* suspicious_http_header_keys_T[] = { "TLS_version", NULL};
1460
static const char* suspicious_http_header_keys_U[] = { "Uuid", NULL};
1461
static const char* suspicious_http_header_keys_X[] = { "X-Hire-Me", NULL};
1462
1463
2.07M
static int is_a_suspicious_header(const char* suspicious_headers[], struct ndpi_int_one_line_struct packet_line) {
1464
2.07M
  int i;
1465
2.07M
  unsigned int header_len;
1466
2.07M
  const u_int8_t* header_limit;
1467
1468
2.07M
  if((header_limit = memchr(packet_line.ptr, ':', packet_line.len))) {
1469
1.96M
    header_len = header_limit - packet_line.ptr;
1470
3.94M
    for(i=0; suspicious_headers[i] != NULL; i++) {
1471
1.98M
      if(!strncasecmp((const char*) packet_line.ptr,
1472
1.98M
          suspicious_headers[i], header_len))
1473
4.70k
  return 1;
1474
1.98M
    }
1475
1.96M
  }
1476
1477
2.06M
  return 0;
1478
2.07M
}
1479
1480
/* *********************************************************************************************** */
1481
1482
static void ndpi_check_http_header(struct ndpi_detection_module_struct *ndpi_struct,
1483
817k
           struct ndpi_flow_struct *flow) {
1484
817k
  u_int32_t i;
1485
817k
  struct ndpi_packet_struct *packet = &ndpi_struct->packet;
1486
1487
4.70M
  for(i=0; (i < packet->parsed_lines)
1488
4.54M
  && (packet->line[i].ptr != NULL)
1489
4.54M
  && (packet->line[i].len > 0); i++) {
1490
3.89M
    switch(packet->line[i].ptr[0]) {
1491
234k
    case 'A':
1492
234k
      if(is_a_suspicious_header(suspicious_http_header_keys_A, packet->line[i])) {
1493
393
        if(is_flowrisk_info_enabled(ndpi_struct, NDPI_HTTP_SUSPICIOUS_HEADER)) {
1494
244
          char str[64];
1495
1496
244
    snprintf(str, sizeof(str), "Found %.*s", packet->line[i].len, packet->line[i].ptr);
1497
244
    ndpi_set_risk(ndpi_struct, flow, NDPI_HTTP_SUSPICIOUS_HEADER, str);
1498
244
        } else {
1499
149
          ndpi_set_risk(ndpi_struct, flow, NDPI_HTTP_SUSPICIOUS_HEADER, NULL);
1500
149
        }
1501
393
  return;
1502
393
      }
1503
233k
      break;
1504
927k
    case 'C':
1505
927k
      if(is_a_suspicious_header(suspicious_http_header_keys_C, packet->line[i])) {
1506
772
        if(is_flowrisk_info_enabled(ndpi_struct, NDPI_HTTP_SUSPICIOUS_HEADER)) {
1507
596
          char str[64];
1508
1509
596
    snprintf(str, sizeof(str), "Found %.*s", packet->line[i].len, packet->line[i].ptr);
1510
596
    ndpi_set_risk(ndpi_struct, flow, NDPI_HTTP_SUSPICIOUS_HEADER, str);
1511
596
        } else {
1512
176
          ndpi_set_risk(ndpi_struct, flow, NDPI_HTTP_SUSPICIOUS_HEADER, NULL);
1513
176
        }
1514
772
  return;
1515
772
      }
1516
926k
      break;
1517
926k
    case 'M':
1518
9.54k
      if(is_a_suspicious_header(suspicious_http_header_keys_M, packet->line[i])) {
1519
155
        if(is_flowrisk_info_enabled(ndpi_struct, NDPI_HTTP_SUSPICIOUS_HEADER)) {
1520
132
          char str[64];
1521
1522
132
    snprintf(str, sizeof(str), "Found %.*s", packet->line[i].len, packet->line[i].ptr);
1523
132
    ndpi_set_risk(ndpi_struct, flow, NDPI_HTTP_SUSPICIOUS_HEADER, str);
1524
132
        } else {
1525
23
          ndpi_set_risk(ndpi_struct, flow, NDPI_HTTP_SUSPICIOUS_HEADER, NULL);
1526
23
        }
1527
155
  return;
1528
155
      }
1529
9.38k
      break;
1530
14.1k
    case 'O':
1531
14.1k
      if(is_a_suspicious_header(suspicious_http_header_keys_O, packet->line[i])) {
1532
187
        if(is_flowrisk_info_enabled(ndpi_struct, NDPI_HTTP_SUSPICIOUS_HEADER)) {
1533
178
          char str[64];
1534
1535
178
    snprintf(str, sizeof(str), "Found %.*s", packet->line[i].len, packet->line[i].ptr);
1536
178
    ndpi_set_risk(ndpi_struct, flow, NDPI_HTTP_SUSPICIOUS_HEADER, str);
1537
178
        } else {
1538
9
          ndpi_set_risk(ndpi_struct, flow, NDPI_HTTP_SUSPICIOUS_HEADER, NULL);
1539
9
        }
1540
187
  return;
1541
187
      }
1542
13.9k
      break;
1543
88.9k
    case 'R':
1544
88.9k
      if(is_a_suspicious_header(suspicious_http_header_keys_R, packet->line[i])) {
1545
535
        if(is_flowrisk_info_enabled(ndpi_struct, NDPI_HTTP_SUSPICIOUS_HEADER)) {
1546
426
          char str[64];
1547
1548
426
    snprintf(str, sizeof(str), "Found %.*s", packet->line[i].len, packet->line[i].ptr);
1549
426
    ndpi_set_risk(ndpi_struct, flow, NDPI_HTTP_SUSPICIOUS_HEADER, str);
1550
426
        } else {
1551
109
          ndpi_set_risk(ndpi_struct, flow, NDPI_HTTP_SUSPICIOUS_HEADER, NULL);
1552
109
        }
1553
535
  return;
1554
535
      }
1555
88.4k
      break;
1556
88.4k
    case 'S':
1557
75.7k
      if(is_a_suspicious_header(suspicious_http_header_keys_S, packet->line[i])) {
1558
907
        if(is_flowrisk_info_enabled(ndpi_struct, NDPI_HTTP_SUSPICIOUS_HEADER)) {
1559
843
          char str[64];
1560
1561
843
    snprintf(str, sizeof(str), "Found %.*s", packet->line[i].len, packet->line[i].ptr);
1562
843
    ndpi_set_risk(ndpi_struct, flow, NDPI_HTTP_SUSPICIOUS_HEADER, str);
1563
843
        } else {
1564
64
          ndpi_set_risk(ndpi_struct, flow, NDPI_HTTP_SUSPICIOUS_HEADER, NULL);
1565
64
        }
1566
907
  return;
1567
907
      }
1568
74.8k
      break;
1569
74.8k
    case 'T':
1570
9.86k
      if(is_a_suspicious_header(suspicious_http_header_keys_T, packet->line[i])) {
1571
348
        if(is_flowrisk_info_enabled(ndpi_struct, NDPI_HTTP_SUSPICIOUS_HEADER)) {
1572
274
          char str[64];
1573
1574
274
    snprintf(str, sizeof(str), "Found %.*s", packet->line[i].len, packet->line[i].ptr);
1575
274
    ndpi_set_risk(ndpi_struct, flow, NDPI_HTTP_SUSPICIOUS_HEADER, str);
1576
274
        } else {
1577
74
          ndpi_set_risk(ndpi_struct, flow, NDPI_HTTP_SUSPICIOUS_HEADER, NULL);
1578
74
        }
1579
348
  return;
1580
348
      }
1581
9.51k
      break;
1582
672k
    case 'U':
1583
672k
      if(is_a_suspicious_header(suspicious_http_header_keys_U, packet->line[i])) {
1584
568
        if(is_flowrisk_info_enabled(ndpi_struct, NDPI_HTTP_SUSPICIOUS_HEADER)) {
1585
434
          char str[64];
1586
1587
434
    snprintf(str, sizeof(str), "Found %.*s", packet->line[i].len, packet->line[i].ptr);
1588
434
    ndpi_set_risk(ndpi_struct, flow, NDPI_HTTP_SUSPICIOUS_HEADER, str);
1589
434
        } else {
1590
134
          ndpi_set_risk(ndpi_struct, flow, NDPI_HTTP_SUSPICIOUS_HEADER, NULL);
1591
134
        }
1592
568
  return;
1593
568
      }
1594
672k
      break;
1595
672k
    case 'X':
1596
37.5k
      if(is_a_suspicious_header(suspicious_http_header_keys_X, packet->line[i])) {
1597
835
        if(is_flowrisk_info_enabled(ndpi_struct, NDPI_HTTP_SUSPICIOUS_HEADER)) {
1598
800
          char str[64];
1599
1600
800
    snprintf(str, sizeof(str), "Found %.*s", packet->line[i].len, packet->line[i].ptr);
1601
800
    ndpi_set_risk(ndpi_struct, flow, NDPI_HTTP_SUSPICIOUS_HEADER, str);
1602
800
        } else {
1603
35
          ndpi_set_risk(ndpi_struct, flow, NDPI_HTTP_SUSPICIOUS_HEADER, NULL);
1604
35
        }
1605
835
  return;
1606
835
      }
1607
1608
36.7k
      break;
1609
3.89M
    }
1610
3.89M
  }
1611
817k
}
1612
1613
static void parse_response_code(struct ndpi_detection_module_struct *ndpi_struct,
1614
        struct ndpi_flow_struct *flow)
1615
62.2k
{
1616
62.2k
  struct ndpi_packet_struct *packet = &ndpi_struct->packet;
1617
62.2k
  char buf[4];
1618
62.2k
  char ec[48];
1619
1620
62.2k
  if(packet->payload_packet_len >= 12) {
1621
    /* Set server HTTP response code */
1622
61.7k
    strncpy(buf, (char*)&packet->payload[9], 3);
1623
61.7k
    buf[3] = '\0';
1624
1625
61.7k
    flow->http.response_status_code = atoi(buf);
1626
61.7k
    NDPI_LOG_DBG2(ndpi_struct, "Response code %d\n", flow->http.response_status_code);
1627
1628
    /* https://en.wikipedia.org/wiki/List_of_HTTP_status_codes */
1629
61.7k
    if((flow->http.response_status_code < 100) || (flow->http.response_status_code > 509))
1630
7.82k
      flow->http.response_status_code = 0; /* Out of range */
1631
1632
61.7k
    if(flow->http.response_status_code >= 400) {
1633
4.07k
      snprintf(ec, sizeof(ec), "HTTP Error Code %u", flow->http.response_status_code);
1634
4.07k
      ndpi_set_risk(ndpi_struct, flow, NDPI_ERROR_CODE_DETECTED, ec);
1635
1636
4.07k
      if(flow->http.url != NULL) {
1637
        /* Let's check for Wordpress */
1638
936
        char *slash = strchr(flow->http.url, '/');
1639
1640
936
  if(slash != NULL &&
1641
874
           (((flow->http.method == NDPI_HTTP_METHOD_POST) && (strncmp(slash, "/wp-admin/", 10) == 0))
1642
872
      || ((flow->http.method == NDPI_HTTP_METHOD_GET) && (strncmp(slash, "/wp-content/uploads/", 20) == 0))
1643
874
     )) {
1644
          /* Example of popular exploits https://www.wordfence.com/blog/2022/05/millions-of-attacks-target-tatsu-builder-plugin/ */
1645
8
    char str[128];
1646
1647
8
    snprintf(str, sizeof(str), "Possible Wordpress Exploit [%s]", slash);
1648
8
          ndpi_set_risk(ndpi_struct, flow, NDPI_POSSIBLE_EXPLOIT, str);
1649
8
  }
1650
936
      }
1651
4.07k
    }
1652
61.7k
  }
1653
62.2k
}
1654
1655
3.03M
static int is_request(struct ndpi_detection_module_struct *ndpi_struct) {
1656
3.03M
  struct ndpi_packet_struct *packet = &ndpi_struct->packet;
1657
3.03M
  u_int16_t filename_start;
1658
1659
3.03M
  filename_start = http_request_url_offset(ndpi_struct);
1660
  /* This check is required as RTSP is pretty similiar to HTTP */
1661
3.03M
  if(filename_start > 0 &&
1662
735k
     strncasecmp((const char *)packet->payload + filename_start,
1663
735k
                 "rtsp://", ndpi_min(7, packet->payload_packet_len - filename_start)) == 0)
1664
621
    return 0;
1665
3.03M
  return filename_start;
1666
3.03M
}
1667
1668
2.32M
static int is_response(struct ndpi_detection_module_struct *ndpi_struct) {
1669
2.32M
  struct ndpi_packet_struct *packet = &ndpi_struct->packet;
1670
2.32M
  if(packet->payload_packet_len >= 7 &&
1671
1.74M
     strncasecmp((const char *)packet->payload, "HTTP/1.", 7) == 0)
1672
62.2k
    return 1;
1673
2.25M
  return 0;
1674
2.32M
}
1675
1676
static void process_request(struct ndpi_detection_module_struct *ndpi_struct,
1677
          struct ndpi_flow_struct *flow,
1678
735k
          u_int16_t filename_start) {
1679
735k
  struct ndpi_packet_struct *packet = &ndpi_struct->packet;
1680
735k
  u_int16_t master_protocol;
1681
1682
735k
  ndpi_parse_packet_line_info(ndpi_struct, flow);
1683
1684
735k
  master_protocol = NDPI_PROTOCOL_HTTP;
1685
1686
735k
  if(packet->parsed_lines == 0 ||
1687
720k
     !(packet->line[0].len >= (9 + filename_start) &&
1688
714k
       strncasecmp((const char *)&packet->line[0].ptr[packet->line[0].len - 9], " HTTP/1.", 8) == 0)) {
1689
180k
    NDPI_LOG_DBG2(ndpi_struct, "Request with an incomplete or invalid first line\n");
1690
    /* Since we don't save data across different packets, we will never have
1691
       the complete url: we can't check for HTTP_PROXY */
1692
180k
    if(filename_start == 8 &&
1693
883
       strncasecmp((const char *)packet->payload, "CONNECT ", 8) == 0) {
1694
472
      master_protocol = NDPI_PROTOCOL_HTTP_CONNECT;
1695
472
    }
1696
554k
  } else {
1697
    /* First line is complete (example: "GET / HTTP/1.1"): extract url */
1698
1699
554k
    packet->http_url_name.ptr = &packet->payload[filename_start];
1700
554k
    packet->http_url_name.len = packet->line[0].len - (filename_start + 9);
1701
1702
554k
    packet->http_method.ptr = packet->line[0].ptr;
1703
554k
    packet->http_method.len = filename_start - 1;
1704
1705
    /* Set the HTTP requested version: 0=HTTP/1.0 and 1=HTTP/1.1 */
1706
554k
    if(memcmp(&packet->line[0].ptr[packet->line[0].len - 1], "1", 1) == 0)
1707
521k
      flow->http.request_version = 1;
1708
32.6k
    else
1709
32.6k
      flow->http.request_version = 0;
1710
1711
554k
    if(packet->http_url_name.len > 7 &&
1712
494k
       !strncasecmp((const char*) packet->http_url_name.ptr, "http://", 7)) {
1713
2.26k
      master_protocol = NDPI_PROTOCOL_HTTP_PROXY;
1714
2.26k
    }
1715
554k
    if(filename_start == 8 &&
1716
650
       strncasecmp((const char *)packet->payload, "CONNECT ", 8) == 0) {
1717
370
      master_protocol = NDPI_PROTOCOL_HTTP_CONNECT;
1718
370
    }
1719
554k
  }
1720
735k
  ndpi_int_http_add_connection(ndpi_struct, flow, master_protocol);
1721
735k
  check_content_type_and_change_protocol(ndpi_struct, flow);
1722
1723
735k
  if(flow->http.user_agent == NULL ||
1724
505k
     flow->http.user_agent[0] == '\0') {
1725
231k
    ndpi_set_risk(ndpi_struct, flow, NDPI_HTTP_SUSPICIOUS_USER_AGENT, "Empty or missing User-Agent");
1726
231k
  }
1727
735k
}
1728
1729
static void process_response(struct ndpi_detection_module_struct *ndpi_struct,
1730
62.2k
           struct ndpi_flow_struct *flow) {
1731
1732
62.2k
  ndpi_parse_packet_line_info(ndpi_struct, flow);
1733
62.2k
  parse_response_code(ndpi_struct, flow);
1734
62.2k
  check_content_type_and_change_protocol(ndpi_struct, flow);
1735
1736
62.2k
  ndpi_validate_http_content(ndpi_struct, flow);
1737
62.2k
}
1738
1739
static void reset(struct ndpi_detection_module_struct *ndpi_struct,
1740
211k
                  struct ndpi_flow_struct *flow) {
1741
1742
211k
  NDPI_LOG_DBG2(ndpi_struct, "Reset status and risks\n");
1743
1744
  /* Reset everything in flow->http.
1745
     TODO: Could we be smarter? Probably some info don't change across
1746
     different req-res transactions... */
1747
1748
211k
  flow->http.method = 0;
1749
211k
  flow->http.request_version = 0;
1750
211k
  flow->http.response_status_code = 0;
1751
211k
  if(flow->http.url) {
1752
118k
    ndpi_free(flow->http.url);
1753
118k
    flow->http.url = NULL;
1754
118k
  }
1755
211k
  if(flow->http.content_type) {
1756
10.6k
    ndpi_free(flow->http.content_type);
1757
10.6k
    flow->http.content_type = NULL;
1758
10.6k
  }
1759
211k
  if(flow->http.request_content_type) {
1760
8.57k
    ndpi_free(flow->http.request_content_type);
1761
8.57k
    flow->http.request_content_type = NULL;
1762
8.57k
  }
1763
211k
  if(flow->http.user_agent) {
1764
129k
    ndpi_free(flow->http.user_agent);
1765
129k
    flow->http.user_agent = NULL;
1766
129k
  }
1767
211k
  if(flow->http.server) {
1768
12.1k
    ndpi_free(flow->http.server);
1769
12.1k
    flow->http.server = NULL;
1770
12.1k
  }
1771
211k
  if(flow->http.referer) {
1772
17.0k
    ndpi_free(flow->http.referer);
1773
17.0k
    flow->http.referer = NULL;
1774
17.0k
  }
1775
211k
  if(flow->http.host) {
1776
144k
    ndpi_free(flow->http.host);
1777
144k
    flow->http.host = NULL;
1778
144k
  }
1779
211k
  if(flow->http.detected_os) {
1780
96.2k
    ndpi_free(flow->http.detected_os);
1781
96.2k
    flow->http.detected_os = NULL;
1782
96.2k
  }
1783
211k
  if(flow->http.nat_ip) {
1784
289
    ndpi_free(flow->http.nat_ip);
1785
289
    flow->http.nat_ip = NULL;
1786
289
  }
1787
211k
  if(flow->http.filename) {
1788
538
    ndpi_free(flow->http.filename);
1789
538
    flow->http.filename = NULL;
1790
538
  }
1791
211k
  if(flow->http.username) {
1792
369
    ndpi_free(flow->http.username);
1793
369
    flow->http.username = NULL;
1794
369
  }
1795
211k
  if(flow->http.password) {
1796
429
    ndpi_free(flow->http.password);
1797
429
    flow->http.password = NULL;
1798
429
  }
1799
1800
  /* Reset flow risks. We should reset only those risks triggered by
1801
     the previous HTTP response... */
1802
  /* TODO */
1803
211k
  ndpi_unset_risk(ndpi_struct, flow, NDPI_BINARY_APPLICATION_TRANSFER);
1804
211k
  ndpi_unset_risk(ndpi_struct, flow, NDPI_HTTP_SUSPICIOUS_CONTENT);
1805
211k
  ndpi_unset_risk(ndpi_struct, flow, NDPI_POSSIBLE_EXPLOIT);
1806
211k
  ndpi_unset_risk(ndpi_struct, flow, NDPI_HTTP_SUSPICIOUS_USER_AGENT);
1807
211k
  ndpi_unset_risk(ndpi_struct, flow, NDPI_HTTP_CRAWLER_BOT);
1808
211k
  ndpi_unset_risk(ndpi_struct, flow, NDPI_NUMERIC_IP_HOST);
1809
211k
  ndpi_unset_risk(ndpi_struct, flow, NDPI_URL_POSSIBLE_RCE_INJECTION);
1810
211k
  ndpi_unset_risk(ndpi_struct, flow, NDPI_HTTP_OBSOLETE_SERVER);
1811
211k
  ndpi_unset_risk(ndpi_struct, flow, NDPI_CLEAR_TEXT_CREDENTIALS);
1812
211k
  ndpi_unset_risk(ndpi_struct, flow, NDPI_INVALID_CHARACTERS);
1813
211k
  ndpi_unset_risk(ndpi_struct, flow, NDPI_HTTP_SUSPICIOUS_HEADER);
1814
211k
  ndpi_unset_risk(ndpi_struct, flow, NDPI_ERROR_CODE_DETECTED);
1815
211k
  ndpi_unset_risk(ndpi_struct, flow, NDPI_MALFORMED_PACKET);
1816
211k
}
1817
1818
static void ndpi_check_http_tcp(struct ndpi_detection_module_struct *ndpi_struct,
1819
3.07M
        struct ndpi_flow_struct *flow) {
1820
3.07M
  struct ndpi_packet_struct *packet = &ndpi_struct->packet;
1821
3.07M
  u_int16_t filename_start;
1822
1823
3.07M
  NDPI_LOG_DBG(ndpi_struct, "http_stage %d dir %d req/res %d/%d\n",
1824
3.07M
         flow->l4.tcp.http_stage, packet->packet_direction,
1825
3.07M
         is_request(ndpi_struct), is_response(ndpi_struct));
1826
1827
3.07M
  if(flow->l4.tcp.http_stage == 0) { /* Start: waiting for (the beginning of) a request */
1828
2.82M
    filename_start = is_request(ndpi_struct);
1829
2.82M
    if(filename_start == 0) {
1830
      /* Flow starting with a response? */
1831
2.28M
      if(is_response(ndpi_struct)) {
1832
35.1k
        NDPI_LOG_DBG2(ndpi_struct, "Response where a request were expected\n");
1833
  /* This is tricky. Two opposing goals:
1834
     1) We want to correctly match request with response!! -> Skip this response
1835
        and keep looking for a request.
1836
     2) We want to support asymmetric detection
1837
     Trade-off:
1838
     a) set HTTP as master (it is a guess; we can't know it from the reply only)
1839
     b) process the response(s) and save the metadata
1840
     c) look for a request. If we found it, reset everything (master,
1841
        classification and metadata!) */
1842
35.1k
        ndpi_int_http_add_connection(ndpi_struct, flow, NDPI_PROTOCOL_HTTP);
1843
35.1k
        process_response(ndpi_struct, flow);
1844
1845
35.1k
  flow->l4.tcp.http_stage = packet->packet_direction + 3; // packet_direction 0: stage 3, packet_direction 1: stage 4
1846
35.1k
        return;
1847
35.1k
      }
1848
      /* The first pkt is neither a request nor a response -> no http */
1849
2.24M
      NDPI_LOG_DBG2(ndpi_struct, "Neither req nor response -> exclude\n");
1850
2.24M
      NDPI_EXCLUDE_DISSECTOR(ndpi_struct, flow);
1851
2.24M
      return;
1852
2.28M
    }
1853
541k
    NDPI_LOG_DBG2(ndpi_struct, "Request where expected\n");
1854
1855
541k
    process_request(ndpi_struct, flow, filename_start);
1856
1857
    /* Wait for the response */
1858
541k
    flow->l4.tcp.http_stage = packet->packet_direction + 1; // packet_direction 0: stage 1, packet_direction 1: stage 2
1859
1860
541k
    return;
1861
2.82M
  } else if(flow->l4.tcp.http_stage == 1 || flow->l4.tcp.http_stage == 2) {
1862
    /* Found a request, looking for the response */
1863
1864
227k
    if(flow->l4.tcp.http_stage - packet->packet_direction == 1) {
1865
      /* Another pkt from the same direction (probably another fragment of the request)
1866
         Keep lookng for the response */
1867
209k
      NDPI_LOG_DBG2(ndpi_struct, "Another piece of request\n");
1868
209k
      filename_start = is_request(ndpi_struct);
1869
209k
      if(filename_start > 0) {
1870
        /* Probably a new, separated request (asymmetric flow or missing pkts?).
1871
     What should we do? We definitely don't want to mix data from different
1872
     requests. The easiest (but costly) idea is to reset the state and
1873
     process it (i.e. we keep the metadata of the last request that we
1874
     have processed) */
1875
194k
        if(flow->l4.tcp.http_asymmetric_stage < 2)
1876
193k
          flow->l4.tcp.http_asymmetric_stage++;
1877
194k
        reset(ndpi_struct, flow);
1878
194k
        process_request(ndpi_struct, flow, filename_start);
1879
194k
  return;
1880
194k
      }
1881
15.1k
      ndpi_parse_packet_line_info(ndpi_struct, flow);
1882
15.1k
      check_content_type_and_change_protocol(ndpi_struct, flow);
1883
15.1k
      return;
1884
209k
    } else if(is_response(ndpi_struct)) {
1885
11.5k
      NDPI_LOG_DBG2(ndpi_struct, "Response where expected\n");
1886
1887
11.5k
      process_response(ndpi_struct, flow);
1888
1889
11.5k
      flow->l4.tcp.http_stage = 0;
1890
11.5k
    } else {
1891
6.84k
      NDPI_LOG_DBG2(ndpi_struct, "The msg from the server doesn't look like a response...\n");
1892
      /* TODO */
1893
6.84k
    }
1894
227k
  } else if(flow->l4.tcp.http_stage == 3 || flow->l4.tcp.http_stage == 4) {
1895
    /* Found a response but we want a request */
1896
1897
22.2k
    if(flow->l4.tcp.http_stage - packet->packet_direction == 3) {
1898
      /* Another pkt from the same direction (probably another fragment of the response)
1899
         Keep lookng for the request */
1900
20.3k
      NDPI_LOG_DBG2(ndpi_struct, "Another piece of response\n");
1901
20.3k
      if(is_response(ndpi_struct)) {
1902
        /* See the comment above about how we handle consecutive requests/responses */
1903
15.4k
        if(flow->l4.tcp.http_asymmetric_stage < 2)
1904
15.1k
          flow->l4.tcp.http_asymmetric_stage++;
1905
15.4k
        reset(ndpi_struct, flow);
1906
15.4k
        process_response(ndpi_struct, flow);
1907
15.4k
  return;
1908
15.4k
      }
1909
4.90k
      ndpi_parse_packet_line_info(ndpi_struct, flow);
1910
4.90k
      check_content_type_and_change_protocol(ndpi_struct, flow);
1911
4.90k
      return;
1912
20.3k
    }
1913
1914
1.90k
    NDPI_LOG_DBG2(ndpi_struct, "Found a request. We need to reset the state!\n");
1915
1916
1.90k
    reset(ndpi_struct, flow);
1917
1.90k
    flow->l4.tcp.http_stage = 0;
1918
1.90k
    ndpi_check_http_tcp(ndpi_struct, flow);
1919
1.90k
  }
1920
3.07M
}
1921
1922
/* ********************************* */
1923
1924
static void ndpi_search_http_tcp(struct ndpi_detection_module_struct *ndpi_struct,
1925
3.07M
         struct ndpi_flow_struct *flow) {
1926
  /* Break after 20 packets. */
1927
3.07M
  if(flow->packet_counter > 20) {
1928
16
    NDPI_EXCLUDE_DISSECTOR(ndpi_struct, flow);
1929
16
    return;
1930
16
  }
1931
1932
3.07M
  NDPI_LOG_DBG(ndpi_struct, "search HTTP\n");
1933
3.07M
  ndpi_check_http_tcp(ndpi_struct, flow);
1934
1935
3.07M
  if((ndpi_struct->cfg.http_parse_response_enabled &&
1936
3.07M
      flow->host_server_name[0] != '\0' &&
1937
607k
      flow->http.response_status_code != 0) ||
1938
3.05M
     (!ndpi_struct->cfg.http_parse_response_enabled &&
1939
278
      (flow->host_server_name[0] != '\0' ||
1940
195
       flow->http.response_status_code != 0)) ||
1941
     /* We have found 3 consecutive requests (without the reply) or 3
1942
        consecutive replies (without the request). If the traffic is really
1943
        asymmetric, stop here, because we will never find the metadata from
1944
        both the request and the reply. We wait for 3 events (instead of 2)
1945
        to avoid false positives triggered by missing/dropped packets */
1946
3.05M
     (flow->l4.tcp.http_asymmetric_stage == 2 &&
1947
9.86k
      (flow->packet_direction_complete_counter[0] == 0 ||
1948
20.1k
       flow->packet_direction_complete_counter[1] == 0))) {
1949
20.1k
    flow->extra_packets_func = NULL; /* We're good now */
1950
1951
20.1k
    if(flow->initial_binary_bytes_len) ndpi_analyze_content_signature(ndpi_struct, flow);
1952
20.1k
  }
1953
3.07M
}
1954
1955
16.3k
void init_http_dissector(struct ndpi_detection_module_struct *ndpi_struct) {
1956
16.3k
  register_dissector("HTTP", ndpi_struct,
1957
16.3k
                     ndpi_search_http_tcp,
1958
16.3k
                     NDPI_SELECTION_BITMASK_PROTOCOL_V4_V6_TCP_WITH_PAYLOAD_WITHOUT_RETRANSMISSION,
1959
16.3k
                     1, NDPI_PROTOCOL_HTTP);
1960
16.3k
}