Coverage Report

Created: 2025-10-10 06:37

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/unbound/util/netevent.c
Line
Count
Source
1
/*
2
 * util/netevent.c - event notification
3
 *
4
 * Copyright (c) 2007, NLnet Labs. All rights reserved.
5
 *
6
 * This software is open source.
7
 *
8
 * Redistribution and use in source and binary forms, with or without
9
 * modification, are permitted provided that the following conditions
10
 * are met:
11
 *
12
 * Redistributions of source code must retain the above copyright notice,
13
 * this list of conditions and the following disclaimer.
14
 *
15
 * Redistributions in binary form must reproduce the above copyright notice,
16
 * this list of conditions and the following disclaimer in the documentation
17
 * and/or other materials provided with the distribution.
18
 *
19
 * Neither the name of the NLNET LABS nor the names of its contributors may
20
 * be used to endorse or promote products derived from this software without
21
 * specific prior written permission.
22
 *
23
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27
 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29
 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34
 */
35
36
/**
37
 * \file
38
 *
39
 * This file contains event notification functions.
40
 */
41
#include "config.h"
42
#include "util/netevent.h"
43
#include "util/ub_event.h"
44
#include "util/log.h"
45
#include "util/net_help.h"
46
#include "util/tcp_conn_limit.h"
47
#include "util/fptr_wlist.h"
48
#include "util/proxy_protocol.h"
49
#include "util/timeval_func.h"
50
#include "sldns/pkthdr.h"
51
#include "sldns/sbuffer.h"
52
#include "sldns/str2wire.h"
53
#include "dnstap/dnstap.h"
54
#include "dnscrypt/dnscrypt.h"
55
#include "services/listen_dnsport.h"
56
#include "util/random.h"
57
#ifdef HAVE_SYS_TYPES_H
58
#include <sys/types.h>
59
#endif
60
#ifdef HAVE_SYS_SOCKET_H
61
#include <sys/socket.h>
62
#endif
63
#ifdef HAVE_NETDB_H
64
#include <netdb.h>
65
#endif
66
#ifdef HAVE_POLL_H
67
#include <poll.h>
68
#endif
69
70
#ifdef HAVE_OPENSSL_SSL_H
71
#include <openssl/ssl.h>
72
#endif
73
#ifdef HAVE_OPENSSL_ERR_H
74
#include <openssl/err.h>
75
#endif
76
77
#ifdef HAVE_NGTCP2
78
#include <ngtcp2/ngtcp2.h>
79
#include <ngtcp2/ngtcp2_crypto.h>
80
#endif
81
82
#ifdef HAVE_LINUX_NET_TSTAMP_H
83
#include <linux/net_tstamp.h>
84
#endif
85
86
/* -------- Start of local definitions -------- */
87
/** if CMSG_ALIGN is not defined on this platform, a workaround */
88
#ifndef CMSG_ALIGN
89
#  ifdef __CMSG_ALIGN
90
#    define CMSG_ALIGN(n) __CMSG_ALIGN(n)
91
#  elif defined(CMSG_DATA_ALIGN)
92
#    define CMSG_ALIGN _CMSG_DATA_ALIGN
93
#  else
94
#    define CMSG_ALIGN(len) (((len)+sizeof(long)-1) & ~(sizeof(long)-1))
95
#  endif
96
#endif
97
98
/** if CMSG_LEN is not defined on this platform, a workaround */
99
#ifndef CMSG_LEN
100
#  define CMSG_LEN(len) (CMSG_ALIGN(sizeof(struct cmsghdr))+(len))
101
#endif
102
103
/** if CMSG_SPACE is not defined on this platform, a workaround */
104
#ifndef CMSG_SPACE
105
#  ifdef _CMSG_HDR_ALIGN
106
#    define CMSG_SPACE(l) (CMSG_ALIGN(l)+_CMSG_HDR_ALIGN(sizeof(struct cmsghdr)))
107
#  else
108
#    define CMSG_SPACE(l) (CMSG_ALIGN(l)+CMSG_ALIGN(sizeof(struct cmsghdr)))
109
#  endif
110
#endif
111
112
/** The TCP writing query timeout in milliseconds */
113
0
#define TCP_QUERY_TIMEOUT 120000
114
/** The minimum actual TCP timeout to use, regardless of what we advertise,
115
 * in msec */
116
0
#define TCP_QUERY_TIMEOUT_MINIMUM 200
117
118
#ifndef NONBLOCKING_IS_BROKEN
119
/** number of UDP reads to perform per read indication from select */
120
0
#define NUM_UDP_PER_SELECT 100
121
#else
122
#define NUM_UDP_PER_SELECT 1
123
#endif
124
125
/** timeout in millisec to wait for write to unblock, packets dropped after.*/
126
0
#define SEND_BLOCKED_WAIT_TIMEOUT 200
127
/** max number of times to wait for write to unblock, packets dropped after.*/
128
0
#define SEND_BLOCKED_MAX_RETRY 5
129
130
/** Let's make timestamping code cleaner and redefine SO_TIMESTAMP* */
131
#ifndef SO_TIMESTAMP
132
#define SO_TIMESTAMP 29
133
#endif
134
#ifndef SO_TIMESTAMPNS
135
#define SO_TIMESTAMPNS 35
136
#endif
137
#ifndef SO_TIMESTAMPING
138
#define SO_TIMESTAMPING 37
139
#endif
140
/**
141
 * The internal event structure for keeping ub_event info for the event.
142
 * Possibly other structures (list, tree) this is part of.
143
 */
144
struct internal_event {
145
  /** the comm base */
146
  struct comm_base* base;
147
  /** ub_event event type */
148
  struct ub_event* ev;
149
};
150
151
/**
152
 * Internal base structure, so that every thread has its own events.
153
 */
154
struct internal_base {
155
  /** ub_event event_base type. */
156
  struct ub_event_base* base;
157
  /** seconds time pointer points here */
158
  time_t secs;
159
  /** timeval with current time */
160
  struct timeval now;
161
  /** the event used for slow_accept timeouts */
162
  struct ub_event* slow_accept;
163
  /** true if slow_accept is enabled */
164
  int slow_accept_enabled;
165
  /** last log time for slow logging of file descriptor errors */
166
  time_t last_slow_log;
167
  /** last log time for slow logging of write wait failures */
168
  time_t last_writewait_log;
169
};
170
171
/**
172
 * Internal timer structure, to store timer event in.
173
 */
174
struct internal_timer {
175
  /** the super struct from which derived */
176
  struct comm_timer super;
177
  /** the comm base */
178
  struct comm_base* base;
179
  /** ub_event event type */
180
  struct ub_event* ev;
181
  /** is timer enabled */
182
  uint8_t enabled;
183
};
184
185
/**
186
 * Internal signal structure, to store signal event in.
187
 */
188
struct internal_signal {
189
  /** ub_event event type */
190
  struct ub_event* ev;
191
  /** next in signal list */
192
  struct internal_signal* next;
193
};
194
195
/** create a tcp handler with a parent */
196
static struct comm_point* comm_point_create_tcp_handler(
197
  struct comm_base *base, struct comm_point* parent, size_t bufsize,
198
  struct sldns_buffer* spoolbuf, comm_point_callback_type* callback,
199
  void* callback_arg, struct unbound_socket* socket);
200
201
/* -------- End of local definitions -------- */
202
203
struct comm_base*
204
comm_base_create(int sigs)
205
4.05k
{
206
4.05k
  struct comm_base* b = (struct comm_base*)calloc(1,
207
4.05k
    sizeof(struct comm_base));
208
4.05k
  const char *evnm="event", *evsys="", *evmethod="";
209
210
4.05k
  if(!b)
211
0
    return NULL;
212
4.05k
  b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base));
213
4.05k
  if(!b->eb) {
214
0
    free(b);
215
0
    return NULL;
216
0
  }
217
4.05k
  b->eb->base = ub_default_event_base(sigs, &b->eb->secs, &b->eb->now);
218
4.05k
  if(!b->eb->base) {
219
0
    free(b->eb);
220
0
    free(b);
221
0
    return NULL;
222
0
  }
223
4.05k
  ub_comm_base_now(b);
224
4.05k
  ub_get_event_sys(b->eb->base, &evnm, &evsys, &evmethod);
225
4.05k
  verbose(VERB_ALGO, "%s %s uses %s method.", evnm, evsys, evmethod);
226
4.05k
  return b;
227
4.05k
}
228
229
struct comm_base*
230
comm_base_create_event(struct ub_event_base* base)
231
0
{
232
0
  struct comm_base* b = (struct comm_base*)calloc(1,
233
0
    sizeof(struct comm_base));
234
0
  if(!b)
235
0
    return NULL;
236
0
  b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base));
237
0
  if(!b->eb) {
238
0
    free(b);
239
0
    return NULL;
240
0
  }
241
0
  b->eb->base = base;
242
0
  ub_comm_base_now(b);
243
0
  return b;
244
0
}
245
246
void
247
comm_base_delete(struct comm_base* b)
248
4.05k
{
249
4.05k
  if(!b)
250
0
    return;
251
4.05k
  if(b->eb->slow_accept_enabled) {
252
0
    if(ub_event_del(b->eb->slow_accept) != 0) {
253
0
      log_err("could not event_del slow_accept");
254
0
    }
255
0
    ub_event_free(b->eb->slow_accept);
256
0
  }
257
4.05k
  ub_event_base_free(b->eb->base);
258
4.05k
  b->eb->base = NULL;
259
4.05k
  free(b->eb);
260
4.05k
  free(b);
261
4.05k
}
262
263
void
264
comm_base_delete_no_base(struct comm_base* b)
265
0
{
266
0
  if(!b)
267
0
    return;
268
0
  if(b->eb->slow_accept_enabled) {
269
0
    if(ub_event_del(b->eb->slow_accept) != 0) {
270
0
      log_err("could not event_del slow_accept");
271
0
    }
272
0
    ub_event_free(b->eb->slow_accept);
273
0
  }
274
0
  b->eb->base = NULL;
275
0
  free(b->eb);
276
0
  free(b);
277
0
}
278
279
void
280
comm_base_timept(struct comm_base* b, time_t** tt, struct timeval** tv)
281
4.05k
{
282
4.05k
  *tt = &b->eb->secs;
283
4.05k
  *tv = &b->eb->now;
284
4.05k
}
285
286
void
287
comm_base_dispatch(struct comm_base* b)
288
0
{
289
0
  int retval;
290
0
  retval = ub_event_base_dispatch(b->eb->base);
291
0
  if(retval < 0) {
292
0
    fatal_exit("event_dispatch returned error %d, "
293
0
      "errno is %s", retval, strerror(errno));
294
0
  }
295
0
}
296
297
void comm_base_exit(struct comm_base* b)
298
0
{
299
0
  if(ub_event_base_loopexit(b->eb->base) != 0) {
300
0
    log_err("Could not loopexit");
301
0
  }
302
0
}
303
304
void comm_base_set_slow_accept_handlers(struct comm_base* b,
305
  void (*stop_acc)(void*), void (*start_acc)(void*), void* arg)
306
0
{
307
0
  b->stop_accept = stop_acc;
308
0
  b->start_accept = start_acc;
309
0
  b->cb_arg = arg;
310
0
}
311
312
struct ub_event_base* comm_base_internal(struct comm_base* b)
313
0
{
314
0
  return b->eb->base;
315
0
}
316
317
struct ub_event* comm_point_internal(struct comm_point* c)
318
0
{
319
0
  return c->ev->ev;
320
0
}
321
322
/** see if errno for udp has to be logged or not uses globals */
323
static int
324
udp_send_errno_needs_log(struct sockaddr* addr, socklen_t addrlen)
325
0
{
326
  /* do not log transient errors (unless high verbosity) */
327
0
#if defined(ENETUNREACH) || defined(EHOSTDOWN) || defined(EHOSTUNREACH) || defined(ENETDOWN)
328
0
  switch(errno) {
329
0
#  ifdef ENETUNREACH
330
0
    case ENETUNREACH:
331
0
#  endif
332
0
#  ifdef EHOSTDOWN
333
0
    case EHOSTDOWN:
334
0
#  endif
335
0
#  ifdef EHOSTUNREACH
336
0
    case EHOSTUNREACH:
337
0
#  endif
338
0
#  ifdef ENETDOWN
339
0
    case ENETDOWN:
340
0
#  endif
341
0
    case EPERM:
342
0
    case EACCES:
343
0
      if(verbosity < VERB_ALGO)
344
0
        return 0;
345
0
      break;
346
0
    default:
347
0
      break;
348
0
  }
349
0
#endif
350
  /* permission denied is gotten for every send if the
351
   * network is disconnected (on some OS), squelch it */
352
0
  if( ((errno == EPERM)
353
0
#  ifdef EADDRNOTAVAIL
354
    /* 'Cannot assign requested address' also when disconnected */
355
0
    || (errno == EADDRNOTAVAIL)
356
0
#  endif
357
0
    ) && verbosity < VERB_ALGO)
358
0
    return 0;
359
0
#  ifdef EADDRINUSE
360
  /* If SO_REUSEADDR is set, we could try to connect to the same server
361
   * from the same source port twice. */
362
0
  if(errno == EADDRINUSE && verbosity < VERB_DETAIL)
363
0
    return 0;
364
0
#  endif
365
  /* squelch errors where people deploy AAAA ::ffff:bla for
366
   * authority servers, which we try for intranets. */
367
0
  if(errno == EINVAL && addr_is_ip4mapped(
368
0
    (struct sockaddr_storage*)addr, addrlen) &&
369
0
    verbosity < VERB_DETAIL)
370
0
    return 0;
371
  /* SO_BROADCAST sockopt can give access to 255.255.255.255,
372
   * but a dns cache does not need it. */
373
0
  if(errno == EACCES && addr_is_broadcast(
374
0
    (struct sockaddr_storage*)addr, addrlen) &&
375
0
    verbosity < VERB_DETAIL)
376
0
    return 0;
377
0
#  ifdef ENOTCONN
378
  /* For 0.0.0.0, ::0 targets it can return that socket is not connected.
379
   * This can be ignored, and the address skipped. It remains
380
   * possible to send there for completeness in configuration. */
381
0
  if(errno == ENOTCONN && addr_is_any(
382
0
    (struct sockaddr_storage*)addr, addrlen) &&
383
0
    verbosity < VERB_DETAIL)
384
0
    return 0;
385
0
#  endif
386
0
  return 1;
387
0
}
388
389
int tcp_connect_errno_needs_log(struct sockaddr* addr, socklen_t addrlen)
390
0
{
391
0
  return udp_send_errno_needs_log(addr, addrlen);
392
0
}
393
394
/* send a UDP reply */
395
int
396
comm_point_send_udp_msg(struct comm_point *c, sldns_buffer* packet,
397
  struct sockaddr* addr, socklen_t addrlen, int is_connected)
398
0
{
399
0
  ssize_t sent;
400
0
  log_assert(c->fd != -1);
401
#ifdef UNBOUND_DEBUG
402
  if(sldns_buffer_remaining(packet) == 0)
403
    log_err("error: send empty UDP packet");
404
#endif
405
0
  log_assert(addr && addrlen > 0);
406
0
  if(!is_connected) {
407
0
    sent = sendto(c->fd, (void*)sldns_buffer_begin(packet),
408
0
      sldns_buffer_remaining(packet), 0,
409
0
      addr, addrlen);
410
0
  } else {
411
0
    sent = send(c->fd, (void*)sldns_buffer_begin(packet),
412
0
      sldns_buffer_remaining(packet), 0);
413
0
  }
414
0
  if(sent == -1) {
415
    /* try again and block, waiting for IO to complete,
416
     * we want to send the answer, and we will wait for
417
     * the ethernet interface buffer to have space. */
418
0
#ifndef USE_WINSOCK
419
0
    if(errno == EAGAIN || errno == EINTR ||
420
0
#  ifdef EWOULDBLOCK
421
0
      errno == EWOULDBLOCK ||
422
0
#  endif
423
0
      errno == ENOBUFS) {
424
#else
425
    if(WSAGetLastError() == WSAEINPROGRESS ||
426
      WSAGetLastError() == WSAEINTR ||
427
      WSAGetLastError() == WSAENOBUFS ||
428
      WSAGetLastError() == WSAEWOULDBLOCK) {
429
#endif
430
0
      int retries = 0;
431
      /* if we set the fd blocking, other threads suddenly
432
       * have a blocking fd that they operate on */
433
0
      while(sent == -1 && retries < SEND_BLOCKED_MAX_RETRY && (
434
0
#ifndef USE_WINSOCK
435
0
        errno == EAGAIN || errno == EINTR ||
436
0
#  ifdef EWOULDBLOCK
437
0
        errno == EWOULDBLOCK ||
438
0
#  endif
439
0
        errno == ENOBUFS
440
#else
441
        WSAGetLastError() == WSAEINPROGRESS ||
442
        WSAGetLastError() == WSAEINTR ||
443
        WSAGetLastError() == WSAENOBUFS ||
444
        WSAGetLastError() == WSAEWOULDBLOCK
445
#endif
446
0
      )) {
447
0
#if defined(HAVE_POLL) || defined(USE_WINSOCK)
448
0
        int send_nobufs = (
449
0
#ifndef USE_WINSOCK
450
0
          errno == ENOBUFS
451
#else
452
          WSAGetLastError() == WSAENOBUFS
453
#endif
454
0
        );
455
0
        struct pollfd p;
456
0
        int pret;
457
0
        memset(&p, 0, sizeof(p));
458
0
        p.fd = c->fd;
459
0
        p.events = POLLOUT
460
0
#ifndef USE_WINSOCK
461
0
          | POLLERR | POLLHUP
462
0
#endif
463
0
          ;
464
0
#  ifndef USE_WINSOCK
465
0
        pret = poll(&p, 1, SEND_BLOCKED_WAIT_TIMEOUT);
466
#  else
467
        pret = WSAPoll(&p, 1,
468
          SEND_BLOCKED_WAIT_TIMEOUT);
469
#  endif
470
0
        if(pret == 0) {
471
          /* timer expired */
472
0
          struct comm_base* b = c->ev->base;
473
0
          if(b->eb->last_writewait_log+SLOW_LOG_TIME <=
474
0
            b->eb->secs) {
475
0
            b->eb->last_writewait_log = b->eb->secs;
476
0
            verbose(VERB_OPS, "send udp blocked "
477
0
              "for long, dropping packet.");
478
0
          }
479
0
          return 0;
480
0
        } else if(pret < 0 &&
481
0
#ifndef USE_WINSOCK
482
0
          errno != EAGAIN && errno != EINTR &&
483
0
#  ifdef EWOULDBLOCK
484
0
          errno != EWOULDBLOCK &&
485
0
#  endif
486
0
          errno != ENOMEM && errno != ENOBUFS
487
#else
488
          WSAGetLastError() != WSAEINPROGRESS &&
489
          WSAGetLastError() != WSAEINTR &&
490
          WSAGetLastError() != WSAENOBUFS &&
491
          WSAGetLastError() != WSAEWOULDBLOCK
492
#endif
493
0
          ) {
494
0
          log_err("poll udp out failed: %s",
495
0
            sock_strerror(errno));
496
0
          return 0;
497
0
        } else if((pret < 0 &&
498
0
#ifndef USE_WINSOCK
499
0
          ( errno == ENOBUFS  /* Maybe some systems */
500
0
          || errno == ENOMEM  /* Linux */
501
0
          || errno == EAGAIN)  /* Macos, solaris, openbsd */
502
#else
503
          WSAGetLastError() == WSAENOBUFS
504
#endif
505
0
          ) || (send_nobufs && retries > 0)) {
506
          /* ENOBUFS/ENOMEM/EAGAIN, and poll
507
           * returned without
508
           * a timeout. Or the retried send call
509
           * returned ENOBUFS/ENOMEM/EAGAIN.
510
           * It is good to wait a bit for the
511
           * error to clear. */
512
          /* The timeout is 20*(2^(retries+1)),
513
           * it increases exponentially, starting
514
           * at 40 msec. After 5 tries, 1240 msec
515
           * have passed in total, when poll
516
           * returned the error, and 1200 msec
517
           * when send returned the errors. */
518
0
#ifndef USE_WINSOCK
519
0
          pret = poll(NULL, 0, (SEND_BLOCKED_WAIT_TIMEOUT/10)<<(retries+1));
520
#else
521
          Sleep((SEND_BLOCKED_WAIT_TIMEOUT/10)<<(retries+1));
522
          pret = 0;
523
#endif
524
0
          if(pret < 0
525
0
#ifndef USE_WINSOCK
526
0
            && errno != EAGAIN && errno != EINTR &&
527
0
#  ifdef EWOULDBLOCK
528
0
            errno != EWOULDBLOCK &&
529
0
#  endif
530
0
            errno != ENOMEM && errno != ENOBUFS
531
#else
532
            /* Sleep does not error */
533
#endif
534
0
          ) {
535
0
            log_err("poll udp out timer failed: %s",
536
0
              sock_strerror(errno));
537
0
          }
538
0
        }
539
0
#endif /* defined(HAVE_POLL) || defined(USE_WINSOCK) */
540
0
        retries++;
541
0
        if (!is_connected) {
542
0
          sent = sendto(c->fd, (void*)sldns_buffer_begin(packet),
543
0
            sldns_buffer_remaining(packet), 0,
544
0
            addr, addrlen);
545
0
        } else {
546
0
          sent = send(c->fd, (void*)sldns_buffer_begin(packet),
547
0
            sldns_buffer_remaining(packet), 0);
548
0
        }
549
0
      }
550
0
    }
551
0
  }
552
0
  if(sent == -1) {
553
0
    if(!udp_send_errno_needs_log(addr, addrlen))
554
0
      return 0;
555
0
    if (!is_connected) {
556
0
      verbose(VERB_OPS, "sendto failed: %s", sock_strerror(errno));
557
0
    } else {
558
0
      verbose(VERB_OPS, "send failed: %s", sock_strerror(errno));
559
0
    }
560
0
    if(addr)
561
0
      log_addr(VERB_OPS, "remote address is",
562
0
        (struct sockaddr_storage*)addr, addrlen);
563
0
    return 0;
564
0
  } else if((size_t)sent != sldns_buffer_remaining(packet)) {
565
0
    log_err("sent %d in place of %d bytes",
566
0
      (int)sent, (int)sldns_buffer_remaining(packet));
567
0
    return 0;
568
0
  }
569
0
  return 1;
570
0
}
571
572
#if defined(AF_INET6) && defined(IPV6_PKTINFO) && (defined(HAVE_RECVMSG) || defined(HAVE_SENDMSG))
573
/** print debug ancillary info */
574
static void p_ancil(const char* str, struct comm_reply* r)
575
0
{
576
0
  if(r->srctype != 4 && r->srctype != 6) {
577
0
    log_info("%s: unknown srctype %d", str, r->srctype);
578
0
    return;
579
0
  }
580
581
0
  if(r->srctype == 6) {
582
0
#ifdef IPV6_PKTINFO
583
0
    char buf[1024];
584
0
    if(inet_ntop(AF_INET6, &r->pktinfo.v6info.ipi6_addr,
585
0
      buf, (socklen_t)sizeof(buf)) == 0) {
586
0
      (void)strlcpy(buf, "(inet_ntop error)", sizeof(buf));
587
0
    }
588
0
    buf[sizeof(buf)-1]=0;
589
0
    log_info("%s: %s %d", str, buf, r->pktinfo.v6info.ipi6_ifindex);
590
0
#endif
591
0
  } else if(r->srctype == 4) {
592
0
#ifdef IP_PKTINFO
593
0
    char buf1[1024], buf2[1024];
594
0
    if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_addr,
595
0
      buf1, (socklen_t)sizeof(buf1)) == 0) {
596
0
      (void)strlcpy(buf1, "(inet_ntop error)", sizeof(buf1));
597
0
    }
598
0
    buf1[sizeof(buf1)-1]=0;
599
0
#ifdef HAVE_STRUCT_IN_PKTINFO_IPI_SPEC_DST
600
0
    if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_spec_dst,
601
0
      buf2, (socklen_t)sizeof(buf2)) == 0) {
602
0
      (void)strlcpy(buf2, "(inet_ntop error)", sizeof(buf2));
603
0
    }
604
0
    buf2[sizeof(buf2)-1]=0;
605
#else
606
    buf2[0]=0;
607
#endif
608
0
    log_info("%s: %d %s %s", str, r->pktinfo.v4info.ipi_ifindex,
609
0
      buf1, buf2);
610
#elif defined(IP_RECVDSTADDR)
611
    char buf1[1024];
612
    if(inet_ntop(AF_INET, &r->pktinfo.v4addr,
613
      buf1, (socklen_t)sizeof(buf1)) == 0) {
614
      (void)strlcpy(buf1, "(inet_ntop error)", sizeof(buf1));
615
    }
616
    buf1[sizeof(buf1)-1]=0;
617
    log_info("%s: %s", str, buf1);
618
#endif /* IP_PKTINFO or PI_RECVDSTDADDR */
619
0
  }
620
0
}
621
#endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG||HAVE_SENDMSG */
622
623
/** send a UDP reply over specified interface*/
624
static int
625
comm_point_send_udp_msg_if(struct comm_point *c, sldns_buffer* packet,
626
  struct sockaddr* addr, socklen_t addrlen, struct comm_reply* r)
627
0
{
628
0
#if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_SENDMSG)
629
0
  ssize_t sent;
630
0
  struct msghdr msg;
631
0
  struct iovec iov[1];
632
0
  union {
633
0
    struct cmsghdr hdr;
634
0
    char buf[256];
635
0
  } control;
636
0
#ifndef S_SPLINT_S
637
0
  struct cmsghdr *cmsg;
638
0
#endif /* S_SPLINT_S */
639
640
0
  log_assert(c->fd != -1);
641
#ifdef UNBOUND_DEBUG
642
  if(sldns_buffer_remaining(packet) == 0)
643
    log_err("error: send empty UDP packet");
644
#endif
645
0
  log_assert(addr && addrlen > 0);
646
647
0
  msg.msg_name = addr;
648
0
  msg.msg_namelen = addrlen;
649
0
  iov[0].iov_base = sldns_buffer_begin(packet);
650
0
  iov[0].iov_len = sldns_buffer_remaining(packet);
651
0
  msg.msg_iov = iov;
652
0
  msg.msg_iovlen = 1;
653
0
  msg.msg_control = control.buf;
654
0
#ifndef S_SPLINT_S
655
0
  msg.msg_controllen = sizeof(control.buf);
656
0
#endif /* S_SPLINT_S */
657
0
  msg.msg_flags = 0;
658
659
0
#ifndef S_SPLINT_S
660
0
  cmsg = CMSG_FIRSTHDR(&msg);
661
0
  if(r->srctype == 4) {
662
0
#ifdef IP_PKTINFO
663
0
    void* cmsg_data;
664
0
    msg.msg_controllen = CMSG_SPACE(sizeof(struct in_pktinfo));
665
0
    log_assert(msg.msg_controllen <= sizeof(control.buf));
666
0
    cmsg->cmsg_level = IPPROTO_IP;
667
0
    cmsg->cmsg_type = IP_PKTINFO;
668
0
    memmove(CMSG_DATA(cmsg), &r->pktinfo.v4info,
669
0
      sizeof(struct in_pktinfo));
670
    /* unset the ifindex to not bypass the routing tables */
671
0
    cmsg_data = CMSG_DATA(cmsg);
672
0
    ((struct in_pktinfo *) cmsg_data)->ipi_ifindex = 0;
673
0
    cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo));
674
    /* zero the padding bytes inserted by the CMSG_LEN */
675
0
    if(sizeof(struct in_pktinfo) < cmsg->cmsg_len)
676
0
      memset(((uint8_t*)(CMSG_DATA(cmsg))) +
677
0
        sizeof(struct in_pktinfo), 0, cmsg->cmsg_len
678
0
        - sizeof(struct in_pktinfo));
679
#elif defined(IP_SENDSRCADDR)
680
    msg.msg_controllen = CMSG_SPACE(sizeof(struct in_addr));
681
    log_assert(msg.msg_controllen <= sizeof(control.buf));
682
    cmsg->cmsg_level = IPPROTO_IP;
683
    cmsg->cmsg_type = IP_SENDSRCADDR;
684
    memmove(CMSG_DATA(cmsg), &r->pktinfo.v4addr,
685
      sizeof(struct in_addr));
686
    cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_addr));
687
    /* zero the padding bytes inserted by the CMSG_LEN */
688
    if(sizeof(struct in_addr) < cmsg->cmsg_len)
689
      memset(((uint8_t*)(CMSG_DATA(cmsg))) +
690
        sizeof(struct in_addr), 0, cmsg->cmsg_len
691
        - sizeof(struct in_addr));
692
#else
693
    verbose(VERB_ALGO, "no IP_PKTINFO or IP_SENDSRCADDR");
694
    msg.msg_control = NULL;
695
#endif /* IP_PKTINFO or IP_SENDSRCADDR */
696
0
  } else if(r->srctype == 6) {
697
0
    void* cmsg_data;
698
0
    msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
699
0
    log_assert(msg.msg_controllen <= sizeof(control.buf));
700
0
    cmsg->cmsg_level = IPPROTO_IPV6;
701
0
    cmsg->cmsg_type = IPV6_PKTINFO;
702
0
    memmove(CMSG_DATA(cmsg), &r->pktinfo.v6info,
703
0
      sizeof(struct in6_pktinfo));
704
    /* unset the ifindex to not bypass the routing tables */
705
0
    cmsg_data = CMSG_DATA(cmsg);
706
0
    ((struct in6_pktinfo *) cmsg_data)->ipi6_ifindex = 0;
707
0
    cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
708
    /* zero the padding bytes inserted by the CMSG_LEN */
709
0
    if(sizeof(struct in6_pktinfo) < cmsg->cmsg_len)
710
0
      memset(((uint8_t*)(CMSG_DATA(cmsg))) +
711
0
        sizeof(struct in6_pktinfo), 0, cmsg->cmsg_len
712
0
        - sizeof(struct in6_pktinfo));
713
0
  } else {
714
    /* try to pass all 0 to use default route */
715
0
    msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
716
0
    log_assert(msg.msg_controllen <= sizeof(control.buf));
717
0
    cmsg->cmsg_level = IPPROTO_IPV6;
718
0
    cmsg->cmsg_type = IPV6_PKTINFO;
719
0
    memset(CMSG_DATA(cmsg), 0, sizeof(struct in6_pktinfo));
720
0
    cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
721
    /* zero the padding bytes inserted by the CMSG_LEN */
722
0
    if(sizeof(struct in6_pktinfo) < cmsg->cmsg_len)
723
0
      memset(((uint8_t*)(CMSG_DATA(cmsg))) +
724
0
        sizeof(struct in6_pktinfo), 0, cmsg->cmsg_len
725
0
        - sizeof(struct in6_pktinfo));
726
0
  }
727
0
#endif /* S_SPLINT_S */
728
0
  if(verbosity >= VERB_ALGO && r->srctype != 0)
729
0
    p_ancil("send_udp over interface", r);
730
0
  sent = sendmsg(c->fd, &msg, 0);
731
0
  if(sent == -1) {
732
    /* try again and block, waiting for IO to complete,
733
     * we want to send the answer, and we will wait for
734
     * the ethernet interface buffer to have space. */
735
0
#ifndef USE_WINSOCK
736
0
    if(errno == EAGAIN || errno == EINTR ||
737
0
#  ifdef EWOULDBLOCK
738
0
      errno == EWOULDBLOCK ||
739
0
#  endif
740
0
      errno == ENOBUFS) {
741
#else
742
    if(WSAGetLastError() == WSAEINPROGRESS ||
743
      WSAGetLastError() == WSAEINTR ||
744
      WSAGetLastError() == WSAENOBUFS ||
745
      WSAGetLastError() == WSAEWOULDBLOCK) {
746
#endif
747
0
      int retries = 0;
748
0
      while(sent == -1 && retries < SEND_BLOCKED_MAX_RETRY && (
749
0
#ifndef USE_WINSOCK
750
0
        errno == EAGAIN || errno == EINTR ||
751
0
#  ifdef EWOULDBLOCK
752
0
        errno == EWOULDBLOCK ||
753
0
#  endif
754
0
        errno == ENOBUFS
755
#else
756
        WSAGetLastError() == WSAEINPROGRESS ||
757
        WSAGetLastError() == WSAEINTR ||
758
        WSAGetLastError() == WSAENOBUFS ||
759
        WSAGetLastError() == WSAEWOULDBLOCK
760
#endif
761
0
      )) {
762
0
#if defined(HAVE_POLL) || defined(USE_WINSOCK)
763
0
        int send_nobufs = (
764
0
#ifndef USE_WINSOCK
765
0
          errno == ENOBUFS
766
#else
767
          WSAGetLastError() == WSAENOBUFS
768
#endif
769
0
        );
770
0
        struct pollfd p;
771
0
        int pret;
772
0
        memset(&p, 0, sizeof(p));
773
0
        p.fd = c->fd;
774
0
        p.events = POLLOUT
775
0
#ifndef USE_WINSOCK
776
0
          | POLLERR | POLLHUP
777
0
#endif
778
0
          ;
779
0
#  ifndef USE_WINSOCK
780
0
        pret = poll(&p, 1, SEND_BLOCKED_WAIT_TIMEOUT);
781
#  else
782
        pret = WSAPoll(&p, 1,
783
          SEND_BLOCKED_WAIT_TIMEOUT);
784
#  endif
785
0
        if(pret == 0) {
786
          /* timer expired */
787
0
          struct comm_base* b = c->ev->base;
788
0
          if(b->eb->last_writewait_log+SLOW_LOG_TIME <=
789
0
            b->eb->secs) {
790
0
            b->eb->last_writewait_log = b->eb->secs;
791
0
            verbose(VERB_OPS, "send udp blocked "
792
0
              "for long, dropping packet.");
793
0
          }
794
0
          return 0;
795
0
        } else if(pret < 0 &&
796
0
#ifndef USE_WINSOCK
797
0
          errno != EAGAIN && errno != EINTR &&
798
0
#  ifdef EWOULDBLOCK
799
0
          errno != EWOULDBLOCK &&
800
0
#  endif
801
0
          errno != ENOMEM && errno != ENOBUFS
802
#else
803
          WSAGetLastError() != WSAEINPROGRESS &&
804
          WSAGetLastError() != WSAEINTR &&
805
          WSAGetLastError() != WSAENOBUFS &&
806
          WSAGetLastError() != WSAEWOULDBLOCK
807
#endif
808
0
          ) {
809
0
          log_err("poll udp out failed: %s",
810
0
            sock_strerror(errno));
811
0
          return 0;
812
0
        } else if((pret < 0 &&
813
0
#ifndef USE_WINSOCK
814
0
          ( errno == ENOBUFS  /* Maybe some systems */
815
0
          || errno == ENOMEM  /* Linux */
816
0
          || errno == EAGAIN)  /* Macos, solaris, openbsd */
817
#else
818
          WSAGetLastError() == WSAENOBUFS
819
#endif
820
0
          ) || (send_nobufs && retries > 0)) {
821
          /* ENOBUFS/ENOMEM/EAGAIN, and poll
822
           * returned without
823
           * a timeout. Or the retried send call
824
           * returned ENOBUFS/ENOMEM/EAGAIN.
825
           * It is good to wait a bit for the
826
           * error to clear. */
827
          /* The timeout is 20*(2^(retries+1)),
828
           * it increases exponentially, starting
829
           * at 40 msec. After 5 tries, 1240 msec
830
           * have passed in total, when poll
831
           * returned the error, and 1200 msec
832
           * when send returned the errors. */
833
0
#ifndef USE_WINSOCK
834
0
          pret = poll(NULL, 0, (SEND_BLOCKED_WAIT_TIMEOUT/10)<<(retries+1));
835
#else
836
          Sleep((SEND_BLOCKED_WAIT_TIMEOUT/10)<<(retries+1));
837
          pret = 0;
838
#endif
839
0
          if(pret < 0
840
0
#ifndef USE_WINSOCK
841
0
            && errno != EAGAIN && errno != EINTR &&
842
0
#  ifdef EWOULDBLOCK
843
0
            errno != EWOULDBLOCK &&
844
0
#  endif
845
0
            errno != ENOMEM && errno != ENOBUFS
846
#else  /* USE_WINSOCK */
847
            /* Sleep does not error */
848
#endif
849
0
          ) {
850
0
            log_err("poll udp out timer failed: %s",
851
0
              sock_strerror(errno));
852
0
          }
853
0
        }
854
0
#endif /* defined(HAVE_POLL) || defined(USE_WINSOCK) */
855
0
        retries++;
856
0
        sent = sendmsg(c->fd, &msg, 0);
857
0
      }
858
0
    }
859
0
  }
860
0
  if(sent == -1) {
861
0
    if(!udp_send_errno_needs_log(addr, addrlen))
862
0
      return 0;
863
0
    verbose(VERB_OPS, "sendmsg failed: %s", strerror(errno));
864
0
    log_addr(VERB_OPS, "remote address is",
865
0
      (struct sockaddr_storage*)addr, addrlen);
866
#ifdef __NetBSD__
867
    /* netbsd 7 has IP_PKTINFO for recv but not send */
868
    if(errno == EINVAL && r->srctype == 4)
869
      log_err("sendmsg: No support for sendmsg(IP_PKTINFO). "
870
        "Please disable interface-automatic");
871
#endif
872
0
    return 0;
873
0
  } else if((size_t)sent != sldns_buffer_remaining(packet)) {
874
0
    log_err("sent %d in place of %d bytes",
875
0
      (int)sent, (int)sldns_buffer_remaining(packet));
876
0
    return 0;
877
0
  }
878
0
  return 1;
879
#else
880
  (void)c;
881
  (void)packet;
882
  (void)addr;
883
  (void)addrlen;
884
  (void)r;
885
  log_err("sendmsg: IPV6_PKTINFO not supported");
886
  return 0;
887
#endif /* AF_INET6 && IPV6_PKTINFO && HAVE_SENDMSG */
888
0
}
889
890
/** return true is UDP receive error needs to be logged */
891
static int udp_recv_needs_log(int err)
892
0
{
893
0
  switch(err) {
894
0
  case EACCES: /* some hosts send ICMP 'Permission Denied' */
895
0
#ifndef USE_WINSOCK
896
0
  case ECONNREFUSED:
897
0
#  ifdef ENETUNREACH
898
0
  case ENETUNREACH:
899
0
#  endif
900
0
#  ifdef EHOSTDOWN
901
0
  case EHOSTDOWN:
902
0
#  endif
903
0
#  ifdef EHOSTUNREACH
904
0
  case EHOSTUNREACH:
905
0
#  endif
906
0
#  ifdef ENETDOWN
907
0
  case ENETDOWN:
908
0
#  endif
909
#else /* USE_WINSOCK */
910
  case WSAECONNREFUSED:
911
  case WSAENETUNREACH:
912
  case WSAEHOSTDOWN:
913
  case WSAEHOSTUNREACH:
914
  case WSAENETDOWN:
915
#endif
916
0
    if(verbosity >= VERB_ALGO)
917
0
      return 1;
918
0
    return 0;
919
0
  default:
920
0
    break;
921
0
  }
922
0
  return 1;
923
0
}
924
925
/** Parses the PROXYv2 header from buf and updates the comm_reply struct.
926
 *  Returns 1 on success, 0 on failure. */
927
static int consume_pp2_header(struct sldns_buffer* buf, struct comm_reply* rep,
928
0
  int stream) {
929
0
  size_t size;
930
0
  struct pp2_header *header;
931
0
  int err = pp2_read_header(sldns_buffer_begin(buf),
932
0
    sldns_buffer_remaining(buf));
933
0
  if(err) return 0;
934
0
  header = (struct pp2_header*)sldns_buffer_begin(buf);
935
0
  size = PP2_HEADER_SIZE + ntohs(header->len);
936
0
  if((header->ver_cmd & 0xF) == PP2_CMD_LOCAL) {
937
    /* A connection from the proxy itself.
938
     * No need to do anything with addresses. */
939
0
    goto done;
940
0
  }
941
0
  if(header->fam_prot == PP2_UNSPEC_UNSPEC) {
942
    /* Unspecified family and protocol. This could be used for
943
     * health checks by proxies.
944
     * No need to do anything with addresses. */
945
0
    goto done;
946
0
  }
947
  /* Read the proxied address */
948
0
  switch(header->fam_prot) {
949
0
    case PP2_INET_STREAM:
950
0
    case PP2_INET_DGRAM:
951
0
      {
952
0
      struct sockaddr_in* addr =
953
0
        (struct sockaddr_in*)&rep->client_addr;
954
0
      addr->sin_family = AF_INET;
955
0
      addr->sin_addr.s_addr = header->addr.addr4.src_addr;
956
0
      addr->sin_port = header->addr.addr4.src_port;
957
0
      rep->client_addrlen = (socklen_t)sizeof(struct sockaddr_in);
958
0
      }
959
      /* Ignore the destination address; it should be us. */
960
0
      break;
961
0
    case PP2_INET6_STREAM:
962
0
    case PP2_INET6_DGRAM:
963
0
      {
964
0
      struct sockaddr_in6* addr =
965
0
        (struct sockaddr_in6*)&rep->client_addr;
966
0
      memset(addr, 0, sizeof(*addr));
967
0
      addr->sin6_family = AF_INET6;
968
0
      memcpy(&addr->sin6_addr,
969
0
        header->addr.addr6.src_addr, 16);
970
0
      addr->sin6_port = header->addr.addr6.src_port;
971
0
      rep->client_addrlen = (socklen_t)sizeof(struct sockaddr_in6);
972
0
      }
973
      /* Ignore the destination address; it should be us. */
974
0
      break;
975
0
    default:
976
0
      log_err("proxy_protocol: unsupported family and "
977
0
        "protocol 0x%x", (int)header->fam_prot);
978
0
      return 0;
979
0
  }
980
0
  rep->is_proxied = 1;
981
0
done:
982
0
  if(!stream) {
983
    /* We are reading a whole packet;
984
     * Move the rest of the data to overwrite the PROXYv2 header */
985
    /* XXX can we do better to avoid memmove? */
986
0
    memmove(header, ((char*)header)+size,
987
0
      sldns_buffer_limit(buf)-size);
988
0
    sldns_buffer_set_limit(buf, sldns_buffer_limit(buf)-size);
989
0
  }
990
0
  return 1;
991
0
}
992
993
#if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_RECVMSG)
994
void
995
comm_point_udp_ancil_callback(int fd, short event, void* arg)
996
0
{
997
0
  struct comm_reply rep;
998
0
  struct msghdr msg;
999
0
  struct iovec iov[1];
1000
0
  ssize_t rcv;
1001
0
  union {
1002
0
    struct cmsghdr hdr;
1003
0
    char buf[256];
1004
0
  } ancil;
1005
0
  int i;
1006
0
#ifndef S_SPLINT_S
1007
0
  struct cmsghdr* cmsg;
1008
0
#endif /* S_SPLINT_S */
1009
0
#ifdef HAVE_LINUX_NET_TSTAMP_H
1010
0
  struct timespec *ts;
1011
0
#endif /* HAVE_LINUX_NET_TSTAMP_H */
1012
1013
0
  rep.c = (struct comm_point*)arg;
1014
0
  log_assert(rep.c->type == comm_udp);
1015
1016
0
  if(!(event&UB_EV_READ))
1017
0
    return;
1018
0
  log_assert(rep.c && rep.c->buffer && rep.c->fd == fd);
1019
0
  ub_comm_base_now(rep.c->ev->base);
1020
0
  for(i=0; i<NUM_UDP_PER_SELECT; i++) {
1021
0
    sldns_buffer_clear(rep.c->buffer);
1022
0
    timeval_clear(&rep.c->recv_tv);
1023
0
    rep.remote_addrlen = (socklen_t)sizeof(rep.remote_addr);
1024
0
    log_assert(fd != -1);
1025
0
    log_assert(sldns_buffer_remaining(rep.c->buffer) > 0);
1026
0
    msg.msg_name = &rep.remote_addr;
1027
0
    msg.msg_namelen = (socklen_t)sizeof(rep.remote_addr);
1028
0
    iov[0].iov_base = sldns_buffer_begin(rep.c->buffer);
1029
0
    iov[0].iov_len = sldns_buffer_remaining(rep.c->buffer);
1030
0
    msg.msg_iov = iov;
1031
0
    msg.msg_iovlen = 1;
1032
0
    msg.msg_control = ancil.buf;
1033
0
#ifndef S_SPLINT_S
1034
0
    msg.msg_controllen = sizeof(ancil.buf);
1035
0
#endif /* S_SPLINT_S */
1036
0
    msg.msg_flags = 0;
1037
0
    rcv = recvmsg(fd, &msg, MSG_DONTWAIT);
1038
0
    if(rcv == -1) {
1039
0
      if(errno != EAGAIN && errno != EINTR
1040
0
        && udp_recv_needs_log(errno)) {
1041
0
        log_err("recvmsg failed: %s", strerror(errno));
1042
0
      }
1043
0
      return;
1044
0
    }
1045
0
    rep.remote_addrlen = msg.msg_namelen;
1046
0
    sldns_buffer_skip(rep.c->buffer, rcv);
1047
0
    sldns_buffer_flip(rep.c->buffer);
1048
0
    rep.srctype = 0;
1049
0
    rep.is_proxied = 0;
1050
0
#ifndef S_SPLINT_S
1051
0
    for(cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL;
1052
0
      cmsg = CMSG_NXTHDR(&msg, cmsg)) {
1053
0
      if( cmsg->cmsg_level == IPPROTO_IPV6 &&
1054
0
        cmsg->cmsg_type == IPV6_PKTINFO) {
1055
0
        rep.srctype = 6;
1056
0
        memmove(&rep.pktinfo.v6info, CMSG_DATA(cmsg),
1057
0
          sizeof(struct in6_pktinfo));
1058
0
        break;
1059
0
#ifdef IP_PKTINFO
1060
0
      } else if( cmsg->cmsg_level == IPPROTO_IP &&
1061
0
        cmsg->cmsg_type == IP_PKTINFO) {
1062
0
        rep.srctype = 4;
1063
0
        memmove(&rep.pktinfo.v4info, CMSG_DATA(cmsg),
1064
0
          sizeof(struct in_pktinfo));
1065
0
        break;
1066
#elif defined(IP_RECVDSTADDR)
1067
      } else if( cmsg->cmsg_level == IPPROTO_IP &&
1068
        cmsg->cmsg_type == IP_RECVDSTADDR) {
1069
        rep.srctype = 4;
1070
        memmove(&rep.pktinfo.v4addr, CMSG_DATA(cmsg),
1071
          sizeof(struct in_addr));
1072
        break;
1073
#endif /* IP_PKTINFO or IP_RECVDSTADDR */
1074
0
#ifdef HAVE_LINUX_NET_TSTAMP_H
1075
0
      } else if( cmsg->cmsg_level == SOL_SOCKET &&
1076
0
        cmsg->cmsg_type == SO_TIMESTAMPNS) {
1077
0
        ts = (struct timespec *)CMSG_DATA(cmsg);
1078
0
        TIMESPEC_TO_TIMEVAL(&rep.c->recv_tv, ts);
1079
0
      } else if( cmsg->cmsg_level == SOL_SOCKET &&
1080
0
        cmsg->cmsg_type == SO_TIMESTAMPING) {
1081
0
        ts = (struct timespec *)CMSG_DATA(cmsg);
1082
0
        TIMESPEC_TO_TIMEVAL(&rep.c->recv_tv, ts);
1083
0
      } else if( cmsg->cmsg_level == SOL_SOCKET &&
1084
0
        cmsg->cmsg_type == SO_TIMESTAMP) {
1085
0
        memmove(&rep.c->recv_tv, CMSG_DATA(cmsg), sizeof(struct timeval));
1086
#elif defined(SO_TIMESTAMP) && defined(SCM_TIMESTAMP)
1087
      } else if( cmsg->cmsg_level == SOL_SOCKET &&
1088
        cmsg->cmsg_type == SCM_TIMESTAMP) {
1089
        /* FreeBSD and also Linux. */
1090
        memmove(&rep.c->recv_tv, CMSG_DATA(cmsg), sizeof(struct timeval));
1091
#endif /* HAVE_LINUX_NET_TSTAMP_H */
1092
0
      }
1093
0
    }
1094
1095
0
    if(verbosity >= VERB_ALGO && rep.srctype != 0)
1096
0
      p_ancil("receive_udp on interface", &rep);
1097
0
#endif /* S_SPLINT_S */
1098
1099
0
    if(rep.c->pp2_enabled && !consume_pp2_header(rep.c->buffer,
1100
0
      &rep, 0)) {
1101
0
      log_err("proxy_protocol: could not consume PROXYv2 header");
1102
0
      return;
1103
0
    }
1104
0
    if(!rep.is_proxied) {
1105
0
      rep.client_addrlen = rep.remote_addrlen;
1106
0
      memmove(&rep.client_addr, &rep.remote_addr,
1107
0
        rep.remote_addrlen);
1108
0
    }
1109
1110
0
    fptr_ok(fptr_whitelist_comm_point(rep.c->callback));
1111
0
    if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) {
1112
      /* send back immediate reply */
1113
0
      struct sldns_buffer *buffer;
1114
#ifdef USE_DNSCRYPT
1115
      buffer = rep.c->dnscrypt_buffer;
1116
#else
1117
0
      buffer = rep.c->buffer;
1118
0
#endif
1119
0
      (void)comm_point_send_udp_msg_if(rep.c, buffer,
1120
0
        (struct sockaddr*)&rep.remote_addr,
1121
0
        rep.remote_addrlen, &rep);
1122
0
    }
1123
0
    if(!rep.c || rep.c->fd == -1) /* commpoint closed */
1124
0
      break;
1125
0
  }
1126
0
}
1127
#endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG */
1128
1129
void
1130
comm_point_udp_callback(int fd, short event, void* arg)
1131
0
{
1132
0
  struct comm_reply rep;
1133
0
  ssize_t rcv;
1134
0
  int i;
1135
0
  struct sldns_buffer *buffer;
1136
1137
0
  rep.c = (struct comm_point*)arg;
1138
0
  log_assert(rep.c->type == comm_udp);
1139
1140
0
  if(!(event&UB_EV_READ))
1141
0
    return;
1142
0
  log_assert(rep.c && rep.c->buffer && rep.c->fd == fd);
1143
0
  ub_comm_base_now(rep.c->ev->base);
1144
0
  for(i=0; i<NUM_UDP_PER_SELECT; i++) {
1145
0
    sldns_buffer_clear(rep.c->buffer);
1146
0
    rep.remote_addrlen = (socklen_t)sizeof(rep.remote_addr);
1147
0
    log_assert(fd != -1);
1148
0
    log_assert(sldns_buffer_remaining(rep.c->buffer) > 0);
1149
0
    rcv = recvfrom(fd, (void*)sldns_buffer_begin(rep.c->buffer),
1150
0
      sldns_buffer_remaining(rep.c->buffer), MSG_DONTWAIT,
1151
0
      (struct sockaddr*)&rep.remote_addr, &rep.remote_addrlen);
1152
0
    if(rcv == -1) {
1153
0
#ifndef USE_WINSOCK
1154
0
      if(errno != EAGAIN && errno != EINTR
1155
0
        && udp_recv_needs_log(errno))
1156
0
        log_err("recvfrom %d failed: %s",
1157
0
          fd, strerror(errno));
1158
#else
1159
      if(WSAGetLastError() != WSAEINPROGRESS &&
1160
        WSAGetLastError() != WSAECONNRESET &&
1161
        WSAGetLastError()!= WSAEWOULDBLOCK &&
1162
        udp_recv_needs_log(WSAGetLastError()))
1163
        log_err("recvfrom failed: %s",
1164
          wsa_strerror(WSAGetLastError()));
1165
#endif
1166
0
      return;
1167
0
    }
1168
0
    sldns_buffer_skip(rep.c->buffer, rcv);
1169
0
    sldns_buffer_flip(rep.c->buffer);
1170
0
    rep.srctype = 0;
1171
0
    rep.is_proxied = 0;
1172
1173
0
    if(rep.c->pp2_enabled && !consume_pp2_header(rep.c->buffer,
1174
0
      &rep, 0)) {
1175
0
      log_err("proxy_protocol: could not consume PROXYv2 header");
1176
0
      return;
1177
0
    }
1178
0
    if(!rep.is_proxied) {
1179
0
      rep.client_addrlen = rep.remote_addrlen;
1180
0
      memmove(&rep.client_addr, &rep.remote_addr,
1181
0
        rep.remote_addrlen);
1182
0
    }
1183
1184
0
    fptr_ok(fptr_whitelist_comm_point(rep.c->callback));
1185
0
    if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) {
1186
      /* send back immediate reply */
1187
#ifdef USE_DNSCRYPT
1188
      buffer = rep.c->dnscrypt_buffer;
1189
#else
1190
0
      buffer = rep.c->buffer;
1191
0
#endif
1192
0
      (void)comm_point_send_udp_msg(rep.c, buffer,
1193
0
        (struct sockaddr*)&rep.remote_addr,
1194
0
        rep.remote_addrlen, 0);
1195
0
    }
1196
0
    if(!rep.c || rep.c->fd != fd) /* commpoint closed to -1 or reused for
1197
    another UDP port. Note rep.c cannot be reused with TCP fd. */
1198
0
      break;
1199
0
  }
1200
0
}
1201
1202
#ifdef HAVE_NGTCP2
1203
void
1204
doq_pkt_addr_init(struct doq_pkt_addr* paddr)
1205
{
1206
  paddr->addrlen = (socklen_t)sizeof(paddr->addr);
1207
  paddr->localaddrlen = (socklen_t)sizeof(paddr->localaddr);
1208
  paddr->ifindex = 0;
1209
}
1210
1211
/** set the ecn on the transmission */
1212
static void
1213
doq_set_ecn(int fd, int family, uint32_t ecn)
1214
{
1215
  unsigned int val = ecn;
1216
  if(family == AF_INET6) {
1217
    if(setsockopt(fd, IPPROTO_IPV6, IPV6_TCLASS, &val,
1218
      (socklen_t)sizeof(val)) == -1) {
1219
      log_err("setsockopt(.. IPV6_TCLASS ..): %s",
1220
        strerror(errno));
1221
    }
1222
    return;
1223
  }
1224
  if(setsockopt(fd, IPPROTO_IP, IP_TOS, &val,
1225
    (socklen_t)sizeof(val)) == -1) {
1226
    log_err("setsockopt(.. IP_TOS ..): %s",
1227
      strerror(errno));
1228
  }
1229
}
1230
1231
/** set the local address in the control ancillary data */
1232
static void
1233
doq_set_localaddr_cmsg(struct msghdr* msg, size_t control_size,
1234
  struct doq_addr_storage* localaddr, socklen_t localaddrlen,
1235
  int ifindex)
1236
{
1237
#ifndef S_SPLINT_S
1238
  struct cmsghdr* cmsg;
1239
#endif /* S_SPLINT_S */
1240
#ifndef S_SPLINT_S
1241
  cmsg = CMSG_FIRSTHDR(msg);
1242
  if(localaddr->sockaddr.in.sin_family == AF_INET) {
1243
#ifdef IP_PKTINFO
1244
    struct sockaddr_in* sa = (struct sockaddr_in*)localaddr;
1245
    struct in_pktinfo v4info;
1246
    log_assert(localaddrlen >= sizeof(struct sockaddr_in));
1247
    msg->msg_controllen = CMSG_SPACE(sizeof(struct in_pktinfo));
1248
    memset(msg->msg_control, 0, msg->msg_controllen);
1249
    log_assert(msg->msg_controllen <= control_size);
1250
    cmsg->cmsg_level = IPPROTO_IP;
1251
    cmsg->cmsg_type = IP_PKTINFO;
1252
    memset(&v4info, 0, sizeof(v4info));
1253
#  ifdef HAVE_STRUCT_IN_PKTINFO_IPI_SPEC_DST
1254
    memmove(&v4info.ipi_spec_dst, &sa->sin_addr,
1255
      sizeof(struct in_addr));
1256
#  else
1257
    memmove(&v4info.ipi_addr, &sa->sin_addr,
1258
      sizeof(struct in_addr));
1259
#  endif
1260
    v4info.ipi_ifindex = ifindex;
1261
    memmove(CMSG_DATA(cmsg), &v4info, sizeof(struct in_pktinfo));
1262
    cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo));
1263
#elif defined(IP_SENDSRCADDR)
1264
    struct sockaddr_in* sa= (struct sockaddr_in*)localaddr;
1265
    log_assert(localaddrlen >= sizeof(struct sockaddr_in));
1266
    msg->msg_controllen = CMSG_SPACE(sizeof(struct in_addr));
1267
    memset(msg->msg_control, 0, msg->msg_controllen);
1268
    log_assert(msg->msg_controllen <= control_size);
1269
    cmsg->cmsg_level = IPPROTO_IP;
1270
    cmsg->cmsg_type = IP_SENDSRCADDR;
1271
    memmove(CMSG_DATA(cmsg),  &sa->sin_addr,
1272
      sizeof(struct in_addr));
1273
    cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_addr));
1274
#endif
1275
  } else {
1276
    struct sockaddr_in6* sa6 = (struct sockaddr_in6*)localaddr;
1277
    struct in6_pktinfo v6info;
1278
    log_assert(localaddrlen >= sizeof(struct sockaddr_in6));
1279
    msg->msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
1280
    memset(msg->msg_control, 0, msg->msg_controllen);
1281
    log_assert(msg->msg_controllen <= control_size);
1282
    cmsg->cmsg_level = IPPROTO_IPV6;
1283
    cmsg->cmsg_type = IPV6_PKTINFO;
1284
    memset(&v6info, 0, sizeof(v6info));
1285
    memmove(&v6info.ipi6_addr, &sa6->sin6_addr,
1286
      sizeof(struct in6_addr));
1287
    v6info.ipi6_ifindex = ifindex;
1288
    memmove(CMSG_DATA(cmsg), &v6info, sizeof(struct in6_pktinfo));
1289
    cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
1290
  }
1291
#endif /* S_SPLINT_S */
1292
  /* Ignore unused variables, if no assertions are compiled. */
1293
  (void)localaddrlen;
1294
  (void)control_size;
1295
}
1296
1297
/** write address and port into strings */
1298
static int
1299
doq_print_addr_port(struct doq_addr_storage* addr, socklen_t addrlen,
1300
  char* host, size_t hostlen, char* port, size_t portlen)
1301
{
1302
  if(addr->sockaddr.in.sin_family == AF_INET) {
1303
    struct sockaddr_in* sa = (struct sockaddr_in*)addr;
1304
    log_assert(addrlen >= sizeof(*sa));
1305
    if(inet_ntop(sa->sin_family, &sa->sin_addr, host,
1306
      (socklen_t)hostlen) == 0) {
1307
      log_hex("inet_ntop error: address", &sa->sin_addr,
1308
        sizeof(sa->sin_addr));
1309
      return 0;
1310
    }
1311
    snprintf(port, portlen, "%u", (unsigned)ntohs(sa->sin_port));
1312
  } else if(addr->sockaddr.in.sin_family == AF_INET6) {
1313
    struct sockaddr_in6* sa6 = (struct sockaddr_in6*)addr;
1314
    log_assert(addrlen >= sizeof(*sa6));
1315
    if(inet_ntop(sa6->sin6_family, &sa6->sin6_addr, host,
1316
      (socklen_t)hostlen) == 0) {
1317
      log_hex("inet_ntop error: address", &sa6->sin6_addr,
1318
        sizeof(sa6->sin6_addr));
1319
      return 0;
1320
    }
1321
    snprintf(port, portlen, "%u", (unsigned)ntohs(sa6->sin6_port));
1322
  }
1323
  return 1;
1324
}
1325
1326
/** doq store the blocked packet when write has blocked */
1327
static void
1328
doq_store_blocked_pkt(struct comm_point* c, struct doq_pkt_addr* paddr,
1329
  uint32_t ecn)
1330
{
1331
  if(c->doq_socket->have_blocked_pkt)
1332
    return; /* should not happen that we write when there is
1333
    already a blocked write, but if so, drop it. */
1334
  if(sldns_buffer_limit(c->doq_socket->pkt_buf) >
1335
    sldns_buffer_capacity(c->doq_socket->blocked_pkt))
1336
    return; /* impossibly large, drop packet. impossible because
1337
    pkt_buf and blocked_pkt are the same size. */
1338
  c->doq_socket->have_blocked_pkt = 1;
1339
  c->doq_socket->blocked_pkt_pi.ecn = ecn;
1340
  memcpy(c->doq_socket->blocked_paddr, paddr,
1341
    sizeof(*c->doq_socket->blocked_paddr));
1342
  sldns_buffer_clear(c->doq_socket->blocked_pkt);
1343
  sldns_buffer_write(c->doq_socket->blocked_pkt,
1344
    sldns_buffer_begin(c->doq_socket->pkt_buf),
1345
    sldns_buffer_limit(c->doq_socket->pkt_buf));
1346
  sldns_buffer_flip(c->doq_socket->blocked_pkt);
1347
}
1348
1349
void
1350
doq_send_pkt(struct comm_point* c, struct doq_pkt_addr* paddr, uint32_t ecn)
1351
{
1352
  struct msghdr msg;
1353
  struct iovec iov[1];
1354
  union {
1355
    struct cmsghdr hdr;
1356
    char buf[256];
1357
  } control;
1358
  ssize_t ret;
1359
  iov[0].iov_base = sldns_buffer_begin(c->doq_socket->pkt_buf);
1360
  iov[0].iov_len = sldns_buffer_limit(c->doq_socket->pkt_buf);
1361
  memset(&msg, 0, sizeof(msg));
1362
  msg.msg_name = (void*)&paddr->addr;
1363
  msg.msg_namelen = paddr->addrlen;
1364
  msg.msg_iov = iov;
1365
  msg.msg_iovlen = 1;
1366
  msg.msg_control = control.buf;
1367
#ifndef S_SPLINT_S
1368
  msg.msg_controllen = sizeof(control.buf);
1369
#endif /* S_SPLINT_S */
1370
  msg.msg_flags = 0;
1371
1372
  doq_set_localaddr_cmsg(&msg, sizeof(control.buf), &paddr->localaddr,
1373
    paddr->localaddrlen, paddr->ifindex);
1374
  doq_set_ecn(c->fd, paddr->addr.sockaddr.in.sin_family, ecn);
1375
1376
  for(;;) {
1377
    ret = sendmsg(c->fd, &msg, MSG_DONTWAIT);
1378
    if(ret == -1 && errno == EINTR)
1379
      continue;
1380
    break;
1381
  }
1382
  if(ret == -1) {
1383
#ifndef USE_WINSOCK
1384
    if(errno == EAGAIN ||
1385
#  ifdef EWOULDBLOCK
1386
      errno == EWOULDBLOCK ||
1387
#  endif
1388
      errno == ENOBUFS)
1389
#else
1390
    if(WSAGetLastError() == WSAEINPROGRESS ||
1391
      WSAGetLastError() == WSAENOBUFS ||
1392
      WSAGetLastError() == WSAEWOULDBLOCK)
1393
#endif
1394
    {
1395
      /* udp send has blocked */
1396
      doq_store_blocked_pkt(c, paddr, ecn);
1397
      return;
1398
    }
1399
    if(!udp_send_errno_needs_log((void*)&paddr->addr,
1400
      paddr->addrlen))
1401
      return;
1402
    if(verbosity >= VERB_OPS) {
1403
      char host[256], port[32];
1404
      if(doq_print_addr_port(&paddr->addr, paddr->addrlen,
1405
        host, sizeof(host), port, sizeof(port))) {
1406
        verbose(VERB_OPS, "doq sendmsg to %s %s "
1407
          "failed: %s", host, port,
1408
          strerror(errno));
1409
      } else {
1410
        verbose(VERB_OPS, "doq sendmsg failed: %s",
1411
          strerror(errno));
1412
      }
1413
    }
1414
    return;
1415
  } else if(ret != (ssize_t)sldns_buffer_limit(c->doq_socket->pkt_buf)) {
1416
    char host[256], port[32];
1417
    if(doq_print_addr_port(&paddr->addr, paddr->addrlen, host,
1418
      sizeof(host), port, sizeof(port))) {
1419
      log_err("doq sendmsg to %s %s failed: "
1420
        "sent %d in place of %d bytes", 
1421
        host, port, (int)ret,
1422
        (int)sldns_buffer_limit(c->doq_socket->pkt_buf));
1423
    } else {
1424
      log_err("doq sendmsg failed: "
1425
        "sent %d in place of %d bytes", 
1426
        (int)ret, (int)sldns_buffer_limit(c->doq_socket->pkt_buf));
1427
    }
1428
    return;
1429
  }
1430
}
1431
1432
/** fetch port number */
1433
static int
1434
doq_sockaddr_get_port(struct doq_addr_storage* addr)
1435
{
1436
  if(addr->sockaddr.in.sin_family == AF_INET) {
1437
    struct sockaddr_in* sa = (struct sockaddr_in*)addr;
1438
    return ntohs(sa->sin_port);
1439
  } else if(addr->sockaddr.in.sin_family == AF_INET6) {
1440
    struct sockaddr_in6* sa6 = (struct sockaddr_in6*)addr;
1441
    return ntohs(sa6->sin6_port);
1442
  }
1443
  return 0;
1444
}
1445
1446
/** get local address from ancillary data headers */
1447
static int
1448
doq_get_localaddr_cmsg(struct comm_point* c, struct doq_pkt_addr* paddr,
1449
  int* pkt_continue, struct msghdr* msg)
1450
{
1451
#ifndef S_SPLINT_S
1452
  struct cmsghdr* cmsg;
1453
#endif /* S_SPLINT_S */
1454
1455
  memset(&paddr->localaddr, 0, sizeof(paddr->localaddr));
1456
#ifndef S_SPLINT_S
1457
  for(cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL;
1458
    cmsg = CMSG_NXTHDR(msg, cmsg)) {
1459
    if( cmsg->cmsg_level == IPPROTO_IPV6 &&
1460
      cmsg->cmsg_type == IPV6_PKTINFO) {
1461
      struct in6_pktinfo* v6info =
1462
        (struct in6_pktinfo*)CMSG_DATA(cmsg);
1463
      struct sockaddr_in6* sa= (struct sockaddr_in6*)
1464
        &paddr->localaddr;
1465
      struct sockaddr_in6* rema = (struct sockaddr_in6*)
1466
        &paddr->addr;
1467
      if(rema->sin6_family != AF_INET6) {
1468
        log_err("doq cmsg family mismatch cmsg is ip6");
1469
        *pkt_continue = 1;
1470
        return 0;
1471
      }
1472
      sa->sin6_family = AF_INET6;
1473
      sa->sin6_port = htons(doq_sockaddr_get_port(
1474
        (void*)c->socket->addr));
1475
      paddr->ifindex = v6info->ipi6_ifindex;
1476
      memmove(&sa->sin6_addr, &v6info->ipi6_addr,
1477
        sizeof(struct in6_addr));
1478
      paddr->localaddrlen = sizeof(struct sockaddr_in6);
1479
      break;
1480
#ifdef IP_PKTINFO
1481
    } else if( cmsg->cmsg_level == IPPROTO_IP &&
1482
      cmsg->cmsg_type == IP_PKTINFO) {
1483
      struct in_pktinfo* v4info =
1484
        (struct in_pktinfo*)CMSG_DATA(cmsg);
1485
      struct sockaddr_in* sa= (struct sockaddr_in*)
1486
        &paddr->localaddr;
1487
      struct sockaddr_in* rema = (struct sockaddr_in*)
1488
        &paddr->addr;
1489
      if(rema->sin_family != AF_INET) {
1490
        log_err("doq cmsg family mismatch cmsg is ip4");
1491
        *pkt_continue = 1;
1492
        return 0;
1493
      }
1494
      sa->sin_family = AF_INET;
1495
      sa->sin_port = htons(doq_sockaddr_get_port(
1496
        (void*)c->socket->addr));
1497
      paddr->ifindex = v4info->ipi_ifindex;
1498
      memmove(&sa->sin_addr, &v4info->ipi_addr,
1499
        sizeof(struct in_addr));
1500
      paddr->localaddrlen = sizeof(struct sockaddr_in);
1501
      break;
1502
#elif defined(IP_RECVDSTADDR)
1503
    } else if( cmsg->cmsg_level == IPPROTO_IP &&
1504
      cmsg->cmsg_type == IP_RECVDSTADDR) {
1505
      struct sockaddr_in* sa= (struct sockaddr_in*)
1506
        &paddr->localaddr;
1507
      struct sockaddr_in* rema = (struct sockaddr_in*)
1508
        &paddr->addr;
1509
      if(rema->sin_family != AF_INET) {
1510
        log_err("doq cmsg family mismatch cmsg is ip4");
1511
        *pkt_continue = 1;
1512
        return 0;
1513
      }
1514
      sa->sin_family = AF_INET;
1515
      sa->sin_port = htons(doq_sockaddr_get_port(
1516
        (void*)c->socket->addr));
1517
      paddr->ifindex = 0;
1518
      memmove(&sa.sin_addr, CMSG_DATA(cmsg),
1519
        sizeof(struct in_addr));
1520
      paddr->localaddrlen = sizeof(struct sockaddr_in);
1521
      break;
1522
#endif /* IP_PKTINFO or IP_RECVDSTADDR */
1523
    }
1524
  }
1525
#endif /* S_SPLINT_S */
1526
1527
return 1;
1528
}
1529
1530
/** get packet ecn information */
1531
static uint32_t
1532
msghdr_get_ecn(struct msghdr* msg, int family)
1533
{
1534
#ifndef S_SPLINT_S
1535
  struct cmsghdr* cmsg;
1536
  if(family == AF_INET6) {
1537
    for(cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL;
1538
      cmsg = CMSG_NXTHDR(msg, cmsg)) {
1539
      if(cmsg->cmsg_level == IPPROTO_IPV6 &&
1540
        cmsg->cmsg_type == IPV6_TCLASS &&
1541
        cmsg->cmsg_len != 0) {
1542
        uint8_t* ecn = (uint8_t*)CMSG_DATA(cmsg);
1543
        return *ecn;
1544
      }
1545
    }
1546
    return 0;
1547
  }
1548
  for(cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL;
1549
    cmsg = CMSG_NXTHDR(msg, cmsg)) {
1550
    if(cmsg->cmsg_level == IPPROTO_IP &&
1551
      cmsg->cmsg_type == IP_TOS &&
1552
      cmsg->cmsg_len != 0) {
1553
      uint8_t* ecn = (uint8_t*)CMSG_DATA(cmsg);
1554
      return *ecn;
1555
    }
1556
  }
1557
#endif /* S_SPLINT_S */
1558
  return 0;
1559
}
1560
1561
/** receive packet for DoQ on UDP. get ancillary data for addresses,
1562
 * return false if failed and the callback can stop receiving UDP packets
1563
 * if pkt_continue is false. */
1564
static int
1565
doq_recv(struct comm_point* c, struct doq_pkt_addr* paddr, int* pkt_continue,
1566
  struct ngtcp2_pkt_info* pi)
1567
{
1568
  struct msghdr msg;
1569
  struct iovec iov[1];
1570
  ssize_t rcv;
1571
  union {
1572
    struct cmsghdr hdr;
1573
    char buf[256];
1574
  } ancil;
1575
1576
  msg.msg_name = &paddr->addr;
1577
  msg.msg_namelen = (socklen_t)sizeof(paddr->addr);
1578
  iov[0].iov_base = sldns_buffer_begin(c->doq_socket->pkt_buf);
1579
  iov[0].iov_len = sldns_buffer_remaining(c->doq_socket->pkt_buf);
1580
  msg.msg_iov = iov;
1581
  msg.msg_iovlen = 1;
1582
  msg.msg_control = ancil.buf;
1583
#ifndef S_SPLINT_S
1584
  msg.msg_controllen = sizeof(ancil.buf);
1585
#endif /* S_SPLINT_S */
1586
  msg.msg_flags = 0;
1587
1588
  rcv = recvmsg(c->fd, &msg, MSG_DONTWAIT);
1589
  if(rcv == -1) {
1590
    if(errno != EAGAIN && errno != EINTR
1591
      && udp_recv_needs_log(errno)) {
1592
      log_err("recvmsg failed for doq: %s", strerror(errno));
1593
    }
1594
    *pkt_continue = 0;
1595
    return 0;
1596
  }
1597
1598
  paddr->addrlen = msg.msg_namelen;
1599
  sldns_buffer_skip(c->doq_socket->pkt_buf, rcv);
1600
  sldns_buffer_flip(c->doq_socket->pkt_buf);
1601
  if(!doq_get_localaddr_cmsg(c, paddr, pkt_continue, &msg))
1602
    return 0;
1603
  pi->ecn = msghdr_get_ecn(&msg, paddr->addr.sockaddr.in.sin_family);
1604
  return 1;
1605
}
1606
1607
/** send the version negotiation for doq. scid and dcid are flipped around
1608
 * to send back to the client. */
1609
static void
1610
doq_send_version_negotiation(struct comm_point* c, struct doq_pkt_addr* paddr,
1611
  const uint8_t* dcid, size_t dcidlen, const uint8_t* scid,
1612
  size_t scidlen)
1613
{
1614
  uint32_t versions[2];
1615
  size_t versions_len = 0;
1616
  ngtcp2_ssize ret;
1617
  uint8_t unused_random;
1618
1619
  /* fill the array with supported versions */
1620
  versions[0] = NGTCP2_PROTO_VER_V1;
1621
  versions_len = 1;
1622
  unused_random = ub_random_max(c->doq_socket->rnd, 256);
1623
  sldns_buffer_clear(c->doq_socket->pkt_buf);
1624
  ret = ngtcp2_pkt_write_version_negotiation(
1625
    sldns_buffer_begin(c->doq_socket->pkt_buf),
1626
    sldns_buffer_capacity(c->doq_socket->pkt_buf), unused_random,
1627
    dcid, dcidlen, scid, scidlen, versions, versions_len);
1628
  if(ret < 0) {
1629
    log_err("ngtcp2_pkt_write_version_negotiation failed: %s",
1630
      ngtcp2_strerror(ret));
1631
    return;
1632
  }
1633
  sldns_buffer_set_position(c->doq_socket->pkt_buf, ret);
1634
  sldns_buffer_flip(c->doq_socket->pkt_buf);
1635
  doq_send_pkt(c, paddr, 0);
1636
}
1637
1638
/** Find the doq_conn object by remote address and dcid */
1639
static struct doq_conn*
1640
doq_conn_find(struct doq_table* table, struct doq_addr_storage* addr,
1641
  socklen_t addrlen, struct doq_addr_storage* localaddr,
1642
  socklen_t localaddrlen, int ifindex, const uint8_t* dcid,
1643
  size_t dcidlen)
1644
{
1645
  struct rbnode_type* node;
1646
  struct doq_conn key;
1647
  memset(&key.node, 0, sizeof(key.node));
1648
  key.node.key = &key;
1649
  memmove(&key.key.paddr.addr, addr, addrlen);
1650
  key.key.paddr.addrlen = addrlen;
1651
  memmove(&key.key.paddr.localaddr, localaddr, localaddrlen);
1652
  key.key.paddr.localaddrlen = localaddrlen;
1653
  key.key.paddr.ifindex = ifindex;
1654
  key.key.dcid = (void*)dcid;
1655
  key.key.dcidlen = dcidlen;
1656
  node = rbtree_search(table->conn_tree, &key);
1657
  if(node)
1658
    return (struct doq_conn*)node->key;
1659
  return NULL;
1660
}
1661
1662
/** find the doq_con by the connection id */
1663
static struct doq_conn*
1664
doq_conn_find_by_id(struct doq_table* table, const uint8_t* dcid,
1665
  size_t dcidlen)
1666
{
1667
  struct doq_conid* conid;
1668
  lock_rw_rdlock(&table->conid_lock);
1669
  conid = doq_conid_find(table, dcid, dcidlen);
1670
  if(conid) {
1671
    /* make a copy of the key */
1672
    struct doq_conn* conn;
1673
    struct doq_conn_key key = conid->key;
1674
    uint8_t cid[NGTCP2_MAX_CIDLEN];
1675
    log_assert(conid->key.dcidlen <= NGTCP2_MAX_CIDLEN);
1676
    memcpy(cid, conid->key.dcid, conid->key.dcidlen);
1677
    key.dcid = cid;
1678
    lock_rw_unlock(&table->conid_lock);
1679
1680
    /* now that the conid lock is released, look up the conn */
1681
    lock_rw_rdlock(&table->lock);
1682
    conn = doq_conn_find(table, &key.paddr.addr,
1683
      key.paddr.addrlen, &key.paddr.localaddr,
1684
      key.paddr.localaddrlen, key.paddr.ifindex, key.dcid,
1685
      key.dcidlen);
1686
    if(!conn) {
1687
      /* The connection got deleted between the conid lookup
1688
       * and the connection lock grab, it no longer exists,
1689
       * so return null. */
1690
      lock_rw_unlock(&table->lock);
1691
      return NULL;
1692
    }
1693
    lock_basic_lock(&conn->lock);
1694
    if(conn->is_deleted) {
1695
      lock_rw_unlock(&table->lock);
1696
      lock_basic_unlock(&conn->lock);
1697
      return NULL;
1698
    }
1699
    lock_rw_unlock(&table->lock);
1700
    return conn;
1701
  }
1702
  lock_rw_unlock(&table->conid_lock);
1703
  return NULL;
1704
}
1705
1706
/** Find the doq_conn, by addr or by connection id */
1707
static struct doq_conn*
1708
doq_conn_find_by_addr_or_cid(struct doq_table* table,
1709
  struct doq_pkt_addr* paddr, const uint8_t* dcid, size_t dcidlen)
1710
{
1711
  struct doq_conn* conn;
1712
  lock_rw_rdlock(&table->lock);
1713
  conn = doq_conn_find(table, &paddr->addr, paddr->addrlen,
1714
    &paddr->localaddr, paddr->localaddrlen, paddr->ifindex,
1715
    dcid, dcidlen);
1716
  if(conn && conn->is_deleted) {
1717
    conn = NULL;
1718
  }
1719
  if(conn) {
1720
    lock_basic_lock(&conn->lock);
1721
    lock_rw_unlock(&table->lock);
1722
    verbose(VERB_ALGO, "doq: found connection by address, dcid");
1723
  } else {
1724
    lock_rw_unlock(&table->lock);
1725
    conn = doq_conn_find_by_id(table, dcid, dcidlen);
1726
    if(conn) {
1727
      verbose(VERB_ALGO, "doq: found connection by dcid");
1728
    }
1729
  }
1730
  return conn;
1731
}
1732
1733
/** decode doq packet header, false on handled or failure, true to continue
1734
 * to process the packet */
1735
static int
1736
doq_decode_pkt_header_negotiate(struct comm_point* c,
1737
  struct doq_pkt_addr* paddr, struct doq_conn** conn)
1738
{
1739
#ifdef HAVE_STRUCT_NGTCP2_VERSION_CID
1740
  struct ngtcp2_version_cid vc;
1741
#else
1742
  uint32_t version;
1743
  const uint8_t *dcid, *scid;
1744
  size_t dcidlen, scidlen;
1745
#endif
1746
  int rv;
1747
1748
#ifdef HAVE_STRUCT_NGTCP2_VERSION_CID
1749
  rv = ngtcp2_pkt_decode_version_cid(&vc,
1750
    sldns_buffer_begin(c->doq_socket->pkt_buf),
1751
    sldns_buffer_limit(c->doq_socket->pkt_buf),
1752
    c->doq_socket->sv_scidlen);
1753
#else
1754
  rv = ngtcp2_pkt_decode_version_cid(&version, &dcid, &dcidlen,
1755
    &scid, &scidlen, sldns_buffer_begin(c->doq_socket->pkt_buf),
1756
    sldns_buffer_limit(c->doq_socket->pkt_buf), c->doq_socket->sv_scidlen);
1757
#endif
1758
  if(rv != 0) {
1759
    if(rv == NGTCP2_ERR_VERSION_NEGOTIATION) {
1760
      /* send the version negotiation */
1761
      doq_send_version_negotiation(c, paddr,
1762
#ifdef HAVE_STRUCT_NGTCP2_VERSION_CID
1763
      vc.scid, vc.scidlen, vc.dcid, vc.dcidlen
1764
#else
1765
      scid, scidlen, dcid, dcidlen
1766
#endif
1767
      );
1768
      return 0;
1769
    }
1770
    verbose(VERB_ALGO, "doq: could not decode version "
1771
      "and CID from QUIC packet header: %s",
1772
      ngtcp2_strerror(rv));
1773
    return 0;
1774
  }
1775
1776
  if(verbosity >= VERB_ALGO) {
1777
    verbose(VERB_ALGO, "ngtcp2_pkt_decode_version_cid packet has "
1778
      "QUIC protocol version %u", (unsigned)
1779
#ifdef HAVE_STRUCT_NGTCP2_VERSION_CID
1780
      vc.
1781
#endif
1782
      version
1783
      );
1784
    log_hex("dcid",
1785
#ifdef HAVE_STRUCT_NGTCP2_VERSION_CID
1786
      (void*)vc.dcid, vc.dcidlen
1787
#else
1788
      (void*)dcid, dcidlen
1789
#endif
1790
      );
1791
    log_hex("scid",
1792
#ifdef HAVE_STRUCT_NGTCP2_VERSION_CID
1793
      (void*)vc.scid, vc.scidlen
1794
#else
1795
      (void*)scid, scidlen
1796
#endif
1797
      );
1798
  }
1799
  *conn = doq_conn_find_by_addr_or_cid(c->doq_socket->table, paddr,
1800
#ifdef HAVE_STRUCT_NGTCP2_VERSION_CID
1801
    vc.dcid, vc.dcidlen
1802
#else
1803
    dcid, dcidlen
1804
#endif
1805
    );
1806
  if(*conn)
1807
    (*conn)->doq_socket = c->doq_socket;
1808
  return 1;
1809
}
1810
1811
/** fill cid structure with random data */
1812
static void doq_cid_randfill(struct ngtcp2_cid* cid, size_t datalen,
1813
  struct ub_randstate* rnd)
1814
{
1815
  uint8_t buf[32];
1816
  if(datalen > sizeof(buf))
1817
    datalen = sizeof(buf);
1818
  doq_fill_rand(rnd, buf, datalen);
1819
  ngtcp2_cid_init(cid, buf, datalen);
1820
}
1821
1822
/** send retry packet for doq connection. */
1823
static void
1824
doq_send_retry(struct comm_point* c, struct doq_pkt_addr* paddr,
1825
  struct ngtcp2_pkt_hd* hd)
1826
{
1827
  char host[256], port[32];
1828
  struct ngtcp2_cid scid;
1829
  uint8_t token[NGTCP2_CRYPTO_MAX_RETRY_TOKENLEN];
1830
  ngtcp2_tstamp ts;
1831
  ngtcp2_ssize tokenlen, ret;
1832
1833
  if(!doq_print_addr_port(&paddr->addr, paddr->addrlen, host,
1834
    sizeof(host), port, sizeof(port))) {
1835
    log_err("doq_send_retry failed");
1836
    return;
1837
  }
1838
  verbose(VERB_ALGO, "doq: sending retry packet to %s %s", host, port);
1839
1840
  /* the server chosen source connection ID */
1841
  scid.datalen = c->doq_socket->sv_scidlen;
1842
  doq_cid_randfill(&scid, scid.datalen, c->doq_socket->rnd);
1843
1844
  ts = doq_get_timestamp_nanosec();
1845
1846
  tokenlen = ngtcp2_crypto_generate_retry_token(token,
1847
    c->doq_socket->static_secret, c->doq_socket->static_secret_len,
1848
    hd->version, (void*)&paddr->addr, paddr->addrlen, &scid,
1849
    &hd->dcid, ts);
1850
  if(tokenlen < 0) {
1851
    log_err("ngtcp2_crypto_generate_retry_token failed: %s",
1852
      ngtcp2_strerror(tokenlen));
1853
    return;
1854
  }
1855
1856
  sldns_buffer_clear(c->doq_socket->pkt_buf);
1857
  ret = ngtcp2_crypto_write_retry(sldns_buffer_begin(c->doq_socket->pkt_buf),
1858
    sldns_buffer_capacity(c->doq_socket->pkt_buf), hd->version,
1859
    &hd->scid, &scid, &hd->dcid, token, tokenlen);
1860
  if(ret < 0) {
1861
    log_err("ngtcp2_crypto_write_retry failed: %s",
1862
      ngtcp2_strerror(ret));
1863
    return;
1864
  }
1865
  sldns_buffer_set_position(c->doq_socket->pkt_buf, ret);
1866
  sldns_buffer_flip(c->doq_socket->pkt_buf);
1867
  doq_send_pkt(c, paddr, 0);
1868
}
1869
1870
/** doq send stateless connection close */
1871
static void
1872
doq_send_stateless_connection_close(struct comm_point* c,
1873
  struct doq_pkt_addr* paddr, struct ngtcp2_pkt_hd* hd,
1874
  uint64_t error_code)
1875
{
1876
  ngtcp2_ssize ret;
1877
  sldns_buffer_clear(c->doq_socket->pkt_buf);
1878
  ret = ngtcp2_crypto_write_connection_close(
1879
    sldns_buffer_begin(c->doq_socket->pkt_buf),
1880
    sldns_buffer_capacity(c->doq_socket->pkt_buf), hd->version, &hd->scid,
1881
    &hd->dcid, error_code, NULL, 0);
1882
  if(ret < 0) {
1883
    log_err("ngtcp2_crypto_write_connection_close failed: %s",
1884
      ngtcp2_strerror(ret));
1885
    return;
1886
  }
1887
  sldns_buffer_set_position(c->doq_socket->pkt_buf, ret);
1888
  sldns_buffer_flip(c->doq_socket->pkt_buf);
1889
  doq_send_pkt(c, paddr, 0);
1890
}
1891
1892
/** doq verify retry token, false on failure */
1893
static int
1894
doq_verify_retry_token(struct comm_point* c, struct doq_pkt_addr* paddr,
1895
  struct ngtcp2_cid* ocid, struct ngtcp2_pkt_hd* hd)
1896
{
1897
  char host[256], port[32];
1898
  ngtcp2_tstamp ts;
1899
  if(!doq_print_addr_port(&paddr->addr, paddr->addrlen, host,
1900
    sizeof(host), port, sizeof(port))) {
1901
    log_err("doq_verify_retry_token failed");
1902
    return 0;
1903
  }
1904
  ts = doq_get_timestamp_nanosec();
1905
  verbose(VERB_ALGO, "doq: verifying retry token from %s %s", host,
1906
    port);
1907
  if(ngtcp2_crypto_verify_retry_token(ocid,
1908
#ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN
1909
    hd->token, hd->tokenlen,
1910
#else
1911
    hd->token.base, hd->token.len,
1912
#endif
1913
    c->doq_socket->static_secret,
1914
    c->doq_socket->static_secret_len, hd->version,
1915
    (void*)&paddr->addr, paddr->addrlen, &hd->dcid,
1916
    10*NGTCP2_SECONDS, ts) != 0) {
1917
    verbose(VERB_ALGO, "doq: could not verify retry token "
1918
      "from %s %s", host, port);
1919
    return 0;
1920
  }
1921
  verbose(VERB_ALGO, "doq: verified retry token from %s %s", host, port);
1922
  return 1;
1923
}
1924
1925
/** doq verify token, false on failure */
1926
static int
1927
doq_verify_token(struct comm_point* c, struct doq_pkt_addr* paddr,
1928
  struct ngtcp2_pkt_hd* hd)
1929
{
1930
  char host[256], port[32];
1931
  ngtcp2_tstamp ts;
1932
  if(!doq_print_addr_port(&paddr->addr, paddr->addrlen, host,
1933
    sizeof(host), port, sizeof(port))) {
1934
    log_err("doq_verify_token failed");
1935
    return 0;
1936
  }
1937
  ts = doq_get_timestamp_nanosec();
1938
  verbose(VERB_ALGO, "doq: verifying token from %s %s", host, port);
1939
  if(ngtcp2_crypto_verify_regular_token(
1940
#ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN
1941
    hd->token, hd->tokenlen,
1942
#else
1943
    hd->token.base, hd->token.len,
1944
#endif
1945
    c->doq_socket->static_secret, c->doq_socket->static_secret_len,
1946
    (void*)&paddr->addr, paddr->addrlen, 3600*NGTCP2_SECONDS,
1947
    ts) != 0) {
1948
    verbose(VERB_ALGO, "doq: could not verify token from %s %s",
1949
      host, port);
1950
    return 0;
1951
  }
1952
  verbose(VERB_ALGO, "doq: verified token from %s %s", host, port);
1953
  return 1;
1954
}
1955
1956
/** delete and remove from the lookup tree the doq_conn connection */
1957
static void
1958
doq_delete_connection(struct comm_point* c, struct doq_conn* conn)
1959
{
1960
  struct doq_conn copy;
1961
  uint8_t cid[NGTCP2_MAX_CIDLEN];
1962
  rbnode_type* node;
1963
  if(!conn)
1964
    return;
1965
  /* Copy the key and set it deleted. */
1966
  conn->is_deleted = 1;
1967
  doq_conn_write_disable(conn);
1968
  copy.key = conn->key;
1969
  log_assert(conn->key.dcidlen <= NGTCP2_MAX_CIDLEN);
1970
  memcpy(cid, conn->key.dcid, conn->key.dcidlen);
1971
  copy.key.dcid = cid;
1972
  copy.node.key = &copy;
1973
  lock_basic_unlock(&conn->lock);
1974
1975
  /* Now get the table lock to delete it from the tree */
1976
  lock_rw_wrlock(&c->doq_socket->table->lock);
1977
  node = rbtree_delete(c->doq_socket->table->conn_tree, copy.node.key);
1978
  if(node) {
1979
    conn = (struct doq_conn*)node->key;
1980
    lock_basic_lock(&conn->lock);
1981
    doq_conn_write_list_remove(c->doq_socket->table, conn);
1982
    if(conn->timer.timer_in_list) {
1983
      /* Remove timer from list first, because finding the
1984
       * rbnode element of the setlist of same timeouts
1985
       * needs tree lookup. Edit the tree structure after
1986
       * that lookup. */
1987
      doq_timer_list_remove(c->doq_socket->table,
1988
        &conn->timer);
1989
    }
1990
    if(conn->timer.timer_in_tree)
1991
      doq_timer_tree_remove(c->doq_socket->table,
1992
        &conn->timer);
1993
  }
1994
  lock_rw_unlock(&c->doq_socket->table->lock);
1995
  if(node) {
1996
    lock_basic_unlock(&conn->lock);
1997
    doq_table_quic_size_subtract(c->doq_socket->table,
1998
      sizeof(*conn)+conn->key.dcidlen);
1999
    doq_conn_delete(conn, c->doq_socket->table);
2000
  }
2001
}
2002
2003
/** create and setup a new doq connection, to a new destination, or with
2004
 * a new dcid. It has a new set of streams. It is inserted in the lookup tree.
2005
 * Returns NULL on failure. */
2006
static struct doq_conn*
2007
doq_setup_new_conn(struct comm_point* c, struct doq_pkt_addr* paddr,
2008
  struct ngtcp2_pkt_hd* hd, struct ngtcp2_cid* ocid)
2009
{
2010
  struct doq_conn* conn;
2011
  if(!doq_table_quic_size_available(c->doq_socket->table,
2012
    c->doq_socket->cfg, sizeof(*conn)+hd->dcid.datalen
2013
    + sizeof(struct doq_stream)
2014
    + 100 /* estimated input query */
2015
    + 1200 /* estimated output query */)) {
2016
    verbose(VERB_ALGO, "doq: no mem available for new connection");
2017
    doq_send_stateless_connection_close(c, paddr, hd,
2018
      NGTCP2_CONNECTION_REFUSED);
2019
    return NULL;
2020
  }
2021
  conn = doq_conn_create(c, paddr, hd->dcid.data, hd->dcid.datalen,
2022
    hd->version);
2023
  if(!conn) {
2024
    log_err("doq: could not allocate doq_conn");
2025
    return NULL;
2026
  }
2027
  lock_rw_wrlock(&c->doq_socket->table->lock);
2028
  lock_basic_lock(&conn->lock);
2029
  if(!rbtree_insert(c->doq_socket->table->conn_tree, &conn->node)) {
2030
    lock_rw_unlock(&c->doq_socket->table->lock);
2031
    log_err("doq: duplicate connection");
2032
    /* conn has no entry in writelist, and no timer yet. */
2033
    lock_basic_unlock(&conn->lock);
2034
    doq_conn_delete(conn, c->doq_socket->table);
2035
    return NULL;
2036
  }
2037
  lock_rw_unlock(&c->doq_socket->table->lock);
2038
  doq_table_quic_size_add(c->doq_socket->table,
2039
    sizeof(*conn)+conn->key.dcidlen);
2040
  verbose(VERB_ALGO, "doq: created new connection");
2041
2042
  /* the scid and dcid switch meaning from the accepted client
2043
   * connection to the server connection. The 'source' and 'destination'
2044
   * meaning is reversed. */
2045
  if(!doq_conn_setup(conn, hd->scid.data, hd->scid.datalen,
2046
    (ocid?ocid->data:NULL), (ocid?ocid->datalen:0),
2047
#ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN
2048
    hd->token, hd->tokenlen
2049
#else
2050
    hd->token.base, hd->token.len
2051
#endif
2052
    )) {
2053
    log_err("doq: could not set up connection");
2054
    doq_delete_connection(c, conn);
2055
    return NULL;
2056
  }
2057
  return conn;
2058
}
2059
2060
/** perform doq address validation */
2061
static int
2062
doq_address_validation(struct comm_point* c, struct doq_pkt_addr* paddr,
2063
  struct ngtcp2_pkt_hd* hd, struct ngtcp2_cid* ocid,
2064
  struct ngtcp2_cid** pocid)
2065
{
2066
#ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN
2067
  const uint8_t* token = hd->token;
2068
  size_t tokenlen = hd->tokenlen;
2069
#else
2070
  const uint8_t* token = hd->token.base;
2071
  size_t tokenlen = hd->token.len;
2072
#endif
2073
  verbose(VERB_ALGO, "doq stateless address validation");
2074
2075
  if(tokenlen == 0 || token == NULL) {
2076
    doq_send_retry(c, paddr, hd);
2077
    return 0;
2078
  }
2079
  if(token[0] != NGTCP2_CRYPTO_TOKEN_MAGIC_RETRY &&
2080
    hd->dcid.datalen < NGTCP2_MIN_INITIAL_DCIDLEN) {
2081
    doq_send_stateless_connection_close(c, paddr, hd,
2082
      NGTCP2_INVALID_TOKEN);
2083
    return 0;
2084
  }
2085
  if(token[0] == NGTCP2_CRYPTO_TOKEN_MAGIC_RETRY) {
2086
    if(!doq_verify_retry_token(c, paddr, ocid, hd)) {
2087
      doq_send_stateless_connection_close(c, paddr, hd,
2088
        NGTCP2_INVALID_TOKEN);
2089
      return 0;
2090
    }
2091
    *pocid = ocid;
2092
  } else if(token[0] == NGTCP2_CRYPTO_TOKEN_MAGIC_REGULAR) {
2093
    if(!doq_verify_token(c, paddr, hd)) {
2094
      doq_send_retry(c, paddr, hd);
2095
      return 0;
2096
    }
2097
#ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN
2098
    hd->token = NULL;
2099
    hd->tokenlen = 0;
2100
#else
2101
    hd->token.base = NULL;
2102
    hd->token.len = 0;
2103
#endif
2104
  } else {
2105
    verbose(VERB_ALGO, "doq address validation: unrecognised "
2106
      "token in hd.token.base with magic byte 0x%2.2x",
2107
      (int)token[0]);
2108
    if(c->doq_socket->validate_addr) {
2109
      doq_send_retry(c, paddr, hd);
2110
      return 0;
2111
    }
2112
#ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN
2113
    hd->token = NULL;
2114
    hd->tokenlen = 0;
2115
#else
2116
    hd->token.base = NULL;
2117
    hd->token.len = 0;
2118
#endif
2119
  }
2120
  return 1;
2121
}
2122
2123
/** the doq accept, returns false if no further processing of content */
2124
static int
2125
doq_accept(struct comm_point* c, struct doq_pkt_addr* paddr,
2126
  struct doq_conn** conn, struct ngtcp2_pkt_info* pi)
2127
{
2128
  int rv;
2129
  struct ngtcp2_pkt_hd hd;
2130
  struct ngtcp2_cid ocid, *pocid=NULL;
2131
  int err_retry;
2132
  memset(&hd, 0, sizeof(hd));
2133
  rv = ngtcp2_accept(&hd, sldns_buffer_begin(c->doq_socket->pkt_buf),
2134
    sldns_buffer_limit(c->doq_socket->pkt_buf));
2135
  if(rv != 0) {
2136
    if(rv == NGTCP2_ERR_RETRY) {
2137
      doq_send_retry(c, paddr, &hd);
2138
      return 0;
2139
    }
2140
    log_err("doq: initial packet failed, ngtcp2_accept failed: %s",
2141
      ngtcp2_strerror(rv));
2142
    return 0;
2143
  }
2144
  if(c->doq_socket->validate_addr ||
2145
#ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN
2146
    hd.tokenlen
2147
#else
2148
    hd.token.len
2149
#endif
2150
    ) {
2151
    if(!doq_address_validation(c, paddr, &hd, &ocid, &pocid))
2152
      return 0;
2153
  }
2154
  *conn = doq_setup_new_conn(c, paddr, &hd, pocid);
2155
  if(!*conn)
2156
    return 0;
2157
  (*conn)->doq_socket = c->doq_socket;
2158
  if(!doq_conn_recv(c, paddr, *conn, pi, &err_retry, NULL)) {
2159
    if(err_retry)
2160
      doq_send_retry(c, paddr, &hd);
2161
    doq_delete_connection(c, *conn);
2162
    *conn = NULL;
2163
    return 0;
2164
  }
2165
  return 1;
2166
}
2167
2168
/** doq pickup a timer to wait for for the worker. If any timer exists. */
2169
static void
2170
doq_pickup_timer(struct comm_point* c)
2171
{
2172
  struct doq_timer* t;
2173
  struct timeval tv;
2174
  int have_time = 0;
2175
  memset(&tv, 0, sizeof(tv));
2176
2177
  lock_rw_wrlock(&c->doq_socket->table->lock);
2178
  RBTREE_FOR(t, struct doq_timer*, c->doq_socket->table->timer_tree) {
2179
    if(t->worker_doq_socket == NULL ||
2180
      t->worker_doq_socket == c->doq_socket) {
2181
      /* pick up this element */
2182
      t->worker_doq_socket = c->doq_socket;
2183
      have_time = 1;
2184
      memcpy(&tv, &t->time, sizeof(tv));
2185
      break;
2186
    }
2187
  }
2188
  lock_rw_unlock(&c->doq_socket->table->lock);
2189
2190
  if(have_time) {
2191
    struct timeval rel;
2192
    timeval_subtract(&rel, &tv, c->doq_socket->now_tv);
2193
    comm_timer_set(c->doq_socket->timer, &rel);
2194
    memcpy(&c->doq_socket->marked_time, &tv,
2195
      sizeof(c->doq_socket->marked_time));
2196
    verbose(VERB_ALGO, "doq pickup timer at %d.%6.6d in %d.%6.6d",
2197
      (int)tv.tv_sec, (int)tv.tv_usec, (int)rel.tv_sec,
2198
      (int)rel.tv_usec);
2199
  } else {
2200
    if(comm_timer_is_set(c->doq_socket->timer))
2201
      comm_timer_disable(c->doq_socket->timer);
2202
    memset(&c->doq_socket->marked_time, 0,
2203
      sizeof(c->doq_socket->marked_time));
2204
    verbose(VERB_ALGO, "doq timer disabled");
2205
  }
2206
}
2207
2208
/** doq done with connection, release locks and setup timer and write */
2209
static void
2210
doq_done_setup_timer_and_write(struct comm_point* c, struct doq_conn* conn)
2211
{
2212
  struct doq_conn copy;
2213
  uint8_t cid[NGTCP2_MAX_CIDLEN];
2214
  rbnode_type* node;
2215
  struct timeval new_tv;
2216
  int write_change = 0, timer_change = 0;
2217
2218
  /* No longer in callbacks, so the pointer to doq_socket is back
2219
   * to NULL. */
2220
  conn->doq_socket = NULL;
2221
2222
  if(doq_conn_check_timer(conn, &new_tv))
2223
    timer_change = 1;
2224
  if( (conn->write_interest && !conn->on_write_list) ||
2225
    (!conn->write_interest && conn->on_write_list))
2226
    write_change = 1;
2227
2228
  if(!timer_change && !write_change) {
2229
    /* Nothing to do. */
2230
    lock_basic_unlock(&conn->lock);
2231
    return;
2232
  }
2233
2234
  /* The table lock is needed to change the write list and timer tree.
2235
   * So the connection lock is release and then the connection is
2236
   * looked up again. */
2237
  copy.key = conn->key;
2238
  log_assert(conn->key.dcidlen <= NGTCP2_MAX_CIDLEN);
2239
  memcpy(cid, conn->key.dcid, conn->key.dcidlen);
2240
  copy.key.dcid = cid;
2241
  copy.node.key = &copy;
2242
  lock_basic_unlock(&conn->lock);
2243
2244
  lock_rw_wrlock(&c->doq_socket->table->lock);
2245
  node = rbtree_search(c->doq_socket->table->conn_tree, copy.node.key);
2246
  if(!node) {
2247
    lock_rw_unlock(&c->doq_socket->table->lock);
2248
    /* Must have been deleted in the mean time. */
2249
    return;
2250
  }
2251
  conn = (struct doq_conn*)node->key;
2252
  lock_basic_lock(&conn->lock);
2253
  if(conn->is_deleted) {
2254
    /* It is deleted now. */
2255
    lock_rw_unlock(&c->doq_socket->table->lock);
2256
    lock_basic_unlock(&conn->lock);
2257
    return;
2258
  }
2259
2260
  if(write_change) {
2261
    /* Edit the write lists, we are holding the table.lock and can
2262
     * edit the list first,last and also prev,next and on_list
2263
     * elements in the doq_conn structures. */
2264
    doq_conn_set_write_list(c->doq_socket->table, conn);
2265
  }
2266
  if(timer_change) {
2267
    doq_timer_set(c->doq_socket->table, &conn->timer,
2268
      c->doq_socket, &new_tv);
2269
  }
2270
  lock_rw_unlock(&c->doq_socket->table->lock);
2271
  lock_basic_unlock(&conn->lock);
2272
}
2273
2274
/** doq done with connection callbacks, release locks and setup write */
2275
static void
2276
doq_done_with_conn_cb(struct comm_point* c, struct doq_conn* conn)
2277
{
2278
  struct doq_conn copy;
2279
  uint8_t cid[NGTCP2_MAX_CIDLEN];
2280
  rbnode_type* node;
2281
2282
  /* no longer in callbacks, so the pointer to doq_socket is back
2283
   * to NULL. */
2284
  conn->doq_socket = NULL;
2285
2286
  if( (conn->write_interest && conn->on_write_list) ||
2287
    (!conn->write_interest && !conn->on_write_list)) {
2288
    /* The connection already has the required write list
2289
     * status. */
2290
    lock_basic_unlock(&conn->lock);
2291
    return;
2292
  }
2293
2294
  /* To edit the write list of connections we have to hold the table
2295
   * lock, so we release the connection and then look it up again. */
2296
  copy.key = conn->key;
2297
  log_assert(conn->key.dcidlen <= NGTCP2_MAX_CIDLEN);
2298
  memcpy(cid, conn->key.dcid, conn->key.dcidlen);
2299
  copy.key.dcid = cid;
2300
  copy.node.key = &copy;
2301
  lock_basic_unlock(&conn->lock);
2302
2303
  lock_rw_wrlock(&c->doq_socket->table->lock);
2304
  node = rbtree_search(c->doq_socket->table->conn_tree, copy.node.key);
2305
  if(!node) {
2306
    lock_rw_unlock(&c->doq_socket->table->lock);
2307
    /* must have been deleted in the mean time */
2308
    return;
2309
  }
2310
  conn = (struct doq_conn*)node->key;
2311
  lock_basic_lock(&conn->lock);
2312
  if(conn->is_deleted) {
2313
    /* it is deleted now. */
2314
    lock_rw_unlock(&c->doq_socket->table->lock);
2315
    lock_basic_unlock(&conn->lock);
2316
    return;
2317
  }
2318
2319
  /* edit the write lists, we are holding the table.lock and can
2320
   * edit the list first,last and also prev,next and on_list elements
2321
   * in the doq_conn structures. */
2322
  doq_conn_set_write_list(c->doq_socket->table, conn);
2323
  lock_rw_unlock(&c->doq_socket->table->lock);
2324
  lock_basic_unlock(&conn->lock);
2325
}
2326
2327
/** doq count the length of the write list */
2328
static size_t
2329
doq_write_list_length(struct comm_point* c)
2330
{
2331
  size_t count = 0;
2332
  struct doq_conn* conn;
2333
  lock_rw_rdlock(&c->doq_socket->table->lock);
2334
  conn = c->doq_socket->table->write_list_first;
2335
  while(conn) {
2336
    count++;
2337
    conn = conn->write_next;
2338
  }
2339
  lock_rw_unlock(&c->doq_socket->table->lock);
2340
  return count;
2341
}
2342
2343
/** doq pop the first element from the write list to have write events */
2344
static struct doq_conn*
2345
doq_pop_write_conn(struct comm_point* c)
2346
{
2347
  struct doq_conn* conn;
2348
  lock_rw_wrlock(&c->doq_socket->table->lock);
2349
  conn = doq_table_pop_first(c->doq_socket->table);
2350
  while(conn && conn->is_deleted) {
2351
    lock_basic_unlock(&conn->lock);
2352
    conn = doq_table_pop_first(c->doq_socket->table);
2353
  }
2354
  lock_rw_unlock(&c->doq_socket->table->lock);
2355
  if(conn)
2356
    conn->doq_socket = c->doq_socket;
2357
  return conn;
2358
}
2359
2360
/** doq the connection is done with write callbacks, release it. */
2361
static void
2362
doq_done_with_write_cb(struct comm_point* c, struct doq_conn* conn,
2363
  int delete_it)
2364
{
2365
  if(delete_it) {
2366
    doq_delete_connection(c, conn);
2367
    return;
2368
  }
2369
  doq_done_setup_timer_and_write(c, conn);
2370
}
2371
2372
/** see if the doq socket wants to write packets */
2373
static int
2374
doq_socket_want_write(struct comm_point* c)
2375
{
2376
  int want_write = 0;
2377
  if(c->doq_socket->have_blocked_pkt)
2378
    return 1;
2379
  lock_rw_rdlock(&c->doq_socket->table->lock);
2380
  if(c->doq_socket->table->write_list_first)
2381
    want_write = 1;
2382
  lock_rw_unlock(&c->doq_socket->table->lock);
2383
  return want_write;
2384
}
2385
2386
/** enable write event for the doq server socket fd */
2387
static void
2388
doq_socket_write_enable(struct comm_point* c)
2389
{
2390
  verbose(VERB_ALGO, "doq socket want write");
2391
  if(c->doq_socket->event_has_write)
2392
    return;
2393
  comm_point_listen_for_rw(c, 1, 1);
2394
  c->doq_socket->event_has_write = 1;
2395
}
2396
2397
/** disable write event for the doq server socket fd */
2398
static void
2399
doq_socket_write_disable(struct comm_point* c)
2400
{
2401
  verbose(VERB_ALGO, "doq socket want no write");
2402
  if(!c->doq_socket->event_has_write)
2403
    return;
2404
  comm_point_listen_for_rw(c, 1, 0);
2405
  c->doq_socket->event_has_write = 0;
2406
}
2407
2408
/** write blocked packet, if possible. returns false if failed, again. */
2409
static int
2410
doq_write_blocked_pkt(struct comm_point* c)
2411
{
2412
  struct doq_pkt_addr paddr;
2413
  if(!c->doq_socket->have_blocked_pkt)
2414
    return 1;
2415
  c->doq_socket->have_blocked_pkt = 0;
2416
  if(sldns_buffer_limit(c->doq_socket->blocked_pkt) >
2417
    sldns_buffer_remaining(c->doq_socket->pkt_buf))
2418
    return 1; /* impossibly large, drop it.
2419
    impossible since pkt_buf is same size as blocked_pkt buf. */
2420
  sldns_buffer_clear(c->doq_socket->pkt_buf);
2421
  sldns_buffer_write(c->doq_socket->pkt_buf,
2422
    sldns_buffer_begin(c->doq_socket->blocked_pkt),
2423
    sldns_buffer_limit(c->doq_socket->blocked_pkt));
2424
  sldns_buffer_flip(c->doq_socket->pkt_buf);
2425
  memcpy(&paddr, c->doq_socket->blocked_paddr, sizeof(paddr));
2426
  doq_send_pkt(c, &paddr, c->doq_socket->blocked_pkt_pi.ecn);
2427
  if(c->doq_socket->have_blocked_pkt)
2428
    return 0;
2429
  return 1;
2430
}
2431
2432
/** doq find a timer that timeouted and return the conn, locked. */
2433
static struct doq_conn*
2434
doq_timer_timeout_conn(struct doq_server_socket* doq_socket)
2435
{
2436
  struct doq_conn* conn = NULL;
2437
  struct rbnode_type* node;
2438
  lock_rw_wrlock(&doq_socket->table->lock);
2439
  node = rbtree_first(doq_socket->table->timer_tree);
2440
  if(node && node != RBTREE_NULL) {
2441
    struct doq_timer* t = (struct doq_timer*)node;
2442
    conn = t->conn;
2443
2444
    /* If now < timer then no further timeouts in tree. */
2445
    if(timeval_smaller(doq_socket->now_tv, &t->time)) {
2446
      lock_rw_unlock(&doq_socket->table->lock);
2447
      return NULL;
2448
    }
2449
2450
    lock_basic_lock(&conn->lock);
2451
    conn->doq_socket = doq_socket;
2452
2453
    /* Now that the timer is fired, remove it. */
2454
    doq_timer_unset(doq_socket->table, t);
2455
    lock_rw_unlock(&doq_socket->table->lock);
2456
    return conn;
2457
  }
2458
  lock_rw_unlock(&doq_socket->table->lock);
2459
  return NULL;
2460
}
2461
2462
/** doq timer erase the marker that said which timer the worker uses. */
2463
static void
2464
doq_timer_erase_marker(struct doq_server_socket* doq_socket)
2465
{
2466
  struct doq_timer* t;
2467
  lock_rw_wrlock(&doq_socket->table->lock);
2468
  t = doq_timer_find_time(doq_socket->table, &doq_socket->marked_time);
2469
  if(t && t->worker_doq_socket == doq_socket)
2470
    t->worker_doq_socket = NULL;
2471
  lock_rw_unlock(&doq_socket->table->lock);
2472
  memset(&doq_socket->marked_time, 0, sizeof(doq_socket->marked_time));
2473
}
2474
2475
void
2476
doq_timer_cb(void* arg)
2477
{
2478
  struct doq_server_socket* doq_socket = (struct doq_server_socket*)arg;
2479
  struct doq_conn* conn;
2480
  verbose(VERB_ALGO, "doq timer callback");
2481
2482
  doq_timer_erase_marker(doq_socket);
2483
2484
  while((conn = doq_timer_timeout_conn(doq_socket)) != NULL) {
2485
    if(conn->is_deleted ||
2486
#ifdef HAVE_NGTCP2_CONN_IN_CLOSING_PERIOD
2487
      ngtcp2_conn_in_closing_period(conn->conn) ||
2488
#else
2489
      ngtcp2_conn_is_in_closing_period(conn->conn) ||
2490
#endif
2491
#ifdef HAVE_NGTCP2_CONN_IN_DRAINING_PERIOD
2492
      ngtcp2_conn_in_draining_period(conn->conn)
2493
#else
2494
      ngtcp2_conn_is_in_draining_period(conn->conn)
2495
#endif
2496
      ) {
2497
      if(verbosity >= VERB_ALGO) {
2498
        char remotestr[256];
2499
        addr_to_str((void*)&conn->key.paddr.addr,
2500
          conn->key.paddr.addrlen, remotestr,
2501
          sizeof(remotestr));
2502
        verbose(VERB_ALGO, "doq conn %s is deleted "
2503
          "after timeout", remotestr);
2504
      }
2505
      doq_delete_connection(doq_socket->cp, conn);
2506
      continue;
2507
    }
2508
    if(!doq_conn_handle_timeout(conn))
2509
      doq_delete_connection(doq_socket->cp, conn);
2510
    else doq_done_setup_timer_and_write(doq_socket->cp, conn);
2511
  }
2512
2513
  if(doq_socket_want_write(doq_socket->cp))
2514
    doq_socket_write_enable(doq_socket->cp);
2515
  else doq_socket_write_disable(doq_socket->cp);
2516
  doq_pickup_timer(doq_socket->cp);
2517
}
2518
2519
void
2520
comm_point_doq_callback(int fd, short event, void* arg)
2521
{
2522
  struct comm_point* c;
2523
  struct doq_pkt_addr paddr;
2524
  int i, pkt_continue, err_drop;
2525
  struct doq_conn* conn;
2526
  struct ngtcp2_pkt_info pi;
2527
  size_t count, num_len;
2528
2529
  c = (struct comm_point*)arg;
2530
  log_assert(c->type == comm_doq);
2531
2532
  log_assert(c && c->doq_socket->pkt_buf && c->fd == fd);
2533
  ub_comm_base_now(c->ev->base);
2534
2535
  /* see if there is a blocked packet, and send that if possible.
2536
   * do not attempt to read yet, even if possible, that would just
2537
   * push more answers in reply to those read packets onto the list
2538
   * of written replies. First attempt to clear the write content out.
2539
   * That keeps the memory usage from bloating up. */
2540
  if(c->doq_socket->have_blocked_pkt) {
2541
    if(!doq_write_blocked_pkt(c)) {
2542
      /* this write has also blocked, attempt to write
2543
       * later. Make sure the event listens to write
2544
       * events. */
2545
      if(!c->doq_socket->event_has_write)
2546
        doq_socket_write_enable(c);
2547
      doq_pickup_timer(c);
2548
      return;
2549
    }
2550
  }
2551
2552
  /* see if there is write interest */
2553
  count = 0;
2554
  num_len = doq_write_list_length(c);
2555
  while((conn = doq_pop_write_conn(c)) != NULL) {
2556
    if(conn->is_deleted ||
2557
#ifdef HAVE_NGTCP2_CONN_IN_CLOSING_PERIOD
2558
      ngtcp2_conn_in_closing_period(conn->conn) ||
2559
#else
2560
      ngtcp2_conn_is_in_closing_period(conn->conn) ||
2561
#endif
2562
#ifdef HAVE_NGTCP2_CONN_IN_DRAINING_PERIOD
2563
      ngtcp2_conn_in_draining_period(conn->conn)
2564
#else
2565
      ngtcp2_conn_is_in_draining_period(conn->conn)
2566
#endif
2567
      ) {
2568
      conn->doq_socket = NULL;
2569
      lock_basic_unlock(&conn->lock);
2570
      if(c->doq_socket->have_blocked_pkt) {
2571
        if(!c->doq_socket->event_has_write)
2572
          doq_socket_write_enable(c);
2573
        doq_pickup_timer(c);
2574
        return;
2575
      }
2576
      if(++count > num_len*2)
2577
        break;
2578
      continue;
2579
    }
2580
    if(verbosity >= VERB_ALGO) {
2581
      char remotestr[256];
2582
      addr_to_str((void*)&conn->key.paddr.addr,
2583
        conn->key.paddr.addrlen, remotestr,
2584
        sizeof(remotestr));
2585
      verbose(VERB_ALGO, "doq write connection %s %d",
2586
        remotestr, doq_sockaddr_get_port(
2587
        &conn->key.paddr.addr));
2588
    }
2589
    if(doq_conn_write_streams(c, conn, &err_drop))
2590
      err_drop = 0;
2591
    doq_done_with_write_cb(c, conn, err_drop);
2592
    if(c->doq_socket->have_blocked_pkt) {
2593
      if(!c->doq_socket->event_has_write)
2594
        doq_socket_write_enable(c);
2595
      doq_pickup_timer(c);
2596
      return;
2597
    }
2598
    /* Stop overly long write lists that are created
2599
     * while we are processing. Do those next time there
2600
     * is a write callback. Stops long loops, and keeps
2601
     * fair for other events. */
2602
    if(++count > num_len*2)
2603
      break;
2604
  }
2605
2606
  /* check for data to read */
2607
  if((event&UB_EV_READ)!=0)
2608
    for(i=0; i<NUM_UDP_PER_SELECT; i++) {
2609
    /* there may be a blocked write packet and if so, stop
2610
     * reading because the reply cannot get written. The
2611
     * blocked packet could be written during the conn_recv
2612
     * handling of replies, or for a connection close. */
2613
    if(c->doq_socket->have_blocked_pkt) {
2614
      if(!c->doq_socket->event_has_write)
2615
        doq_socket_write_enable(c);
2616
      doq_pickup_timer(c);
2617
      return;
2618
    }
2619
    sldns_buffer_clear(c->doq_socket->pkt_buf);
2620
    doq_pkt_addr_init(&paddr);
2621
    log_assert(fd != -1);
2622
    log_assert(sldns_buffer_remaining(c->doq_socket->pkt_buf) > 0);
2623
    if(!doq_recv(c, &paddr, &pkt_continue, &pi)) {
2624
      if(pkt_continue)
2625
        continue;
2626
      break;
2627
    }
2628
2629
    /* handle incoming packet from remote addr to localaddr */
2630
    if(verbosity >= VERB_ALGO) {
2631
      char remotestr[256], localstr[256];
2632
      addr_to_str((void*)&paddr.addr, paddr.addrlen,
2633
        remotestr, sizeof(remotestr));
2634
      addr_to_str((void*)&paddr.localaddr,
2635
        paddr.localaddrlen, localstr,
2636
        sizeof(localstr));
2637
      log_info("incoming doq packet from %s port %d on "
2638
        "%s port %d ifindex %d",
2639
        remotestr, doq_sockaddr_get_port(&paddr.addr),
2640
        localstr,
2641
        doq_sockaddr_get_port(&paddr.localaddr),
2642
        paddr.ifindex);
2643
      log_info("doq_recv length %d ecn 0x%x",
2644
        (int)sldns_buffer_limit(c->doq_socket->pkt_buf),
2645
        (int)pi.ecn);
2646
    }
2647
2648
    if(sldns_buffer_limit(c->doq_socket->pkt_buf) == 0)
2649
      continue;
2650
2651
    conn = NULL;
2652
    if(!doq_decode_pkt_header_negotiate(c, &paddr, &conn))
2653
      continue;
2654
    if(!conn) {
2655
      if(!doq_accept(c, &paddr, &conn, &pi))
2656
        continue;
2657
      if(!doq_conn_write_streams(c, conn, NULL)) {
2658
        doq_delete_connection(c, conn);
2659
        continue;
2660
      }
2661
      doq_done_setup_timer_and_write(c, conn);
2662
      continue;
2663
    }
2664
    if(
2665
#ifdef HAVE_NGTCP2_CONN_IN_CLOSING_PERIOD
2666
      ngtcp2_conn_in_closing_period(conn->conn)
2667
#else
2668
      ngtcp2_conn_is_in_closing_period(conn->conn)
2669
#endif
2670
      ) {
2671
      if(!doq_conn_send_close(c, conn)) {
2672
        doq_delete_connection(c, conn);
2673
      } else {
2674
        doq_done_setup_timer_and_write(c, conn);
2675
      }
2676
      continue;
2677
    }
2678
    if(
2679
#ifdef HAVE_NGTCP2_CONN_IN_DRAINING_PERIOD
2680
      ngtcp2_conn_in_draining_period(conn->conn)
2681
#else
2682
      ngtcp2_conn_is_in_draining_period(conn->conn)
2683
#endif
2684
      ) {
2685
      doq_done_setup_timer_and_write(c, conn);
2686
      continue;
2687
    }
2688
    if(!doq_conn_recv(c, &paddr, conn, &pi, NULL, &err_drop)) {
2689
      /* The receive failed, and if it also failed to send
2690
       * a close, drop the connection. That means it is not
2691
       * in the closing period. */
2692
      if(err_drop) {
2693
        doq_delete_connection(c, conn);
2694
      } else {
2695
        doq_done_setup_timer_and_write(c, conn);
2696
      }
2697
      continue;
2698
    }
2699
    if(!doq_conn_write_streams(c, conn, &err_drop)) {
2700
      if(err_drop) {
2701
        doq_delete_connection(c, conn);
2702
      } else {
2703
        doq_done_setup_timer_and_write(c, conn);
2704
      }
2705
      continue;
2706
    }
2707
    doq_done_setup_timer_and_write(c, conn);
2708
  }
2709
2710
  /* see if we want to have more write events */
2711
  verbose(VERB_ALGO, "doq check write enable");
2712
  if(doq_socket_want_write(c))
2713
    doq_socket_write_enable(c);
2714
  else doq_socket_write_disable(c);
2715
  doq_pickup_timer(c);
2716
}
2717
2718
/** create new doq server socket structure */
2719
static struct doq_server_socket*
2720
doq_server_socket_create(struct doq_table* table, struct ub_randstate* rnd,
2721
  const void* quic_sslctx, struct comm_point* c, struct comm_base* base,
2722
  struct config_file* cfg)
2723
{
2724
  size_t doq_buffer_size = 4096; /* bytes buffer size, for one packet. */
2725
  struct doq_server_socket* doq_socket;
2726
  doq_socket = calloc(1, sizeof(*doq_socket));
2727
  if(!doq_socket) {
2728
    return NULL;
2729
  }
2730
  doq_socket->table = table;
2731
  doq_socket->rnd = rnd;
2732
  doq_socket->validate_addr = 1;
2733
  /* the doq_socket has its own copy of the static secret, as
2734
   * well as other config values, so that they do not need table.lock */
2735
  doq_socket->static_secret_len = table->static_secret_len;
2736
  doq_socket->static_secret = memdup(table->static_secret,
2737
    table->static_secret_len);
2738
  if(!doq_socket->static_secret) {
2739
    free(doq_socket);
2740
    return NULL;
2741
  }
2742
  doq_socket->ctx = (SSL_CTX*)quic_sslctx;
2743
  doq_socket->idle_timeout = table->idle_timeout;
2744
  doq_socket->sv_scidlen = table->sv_scidlen;
2745
  doq_socket->cp = c;
2746
  doq_socket->pkt_buf = sldns_buffer_new(doq_buffer_size);
2747
  if(!doq_socket->pkt_buf) {
2748
    free(doq_socket->static_secret);
2749
    free(doq_socket);
2750
    return NULL;
2751
  }
2752
  doq_socket->blocked_pkt = sldns_buffer_new(
2753
    sldns_buffer_capacity(doq_socket->pkt_buf));
2754
  if(!doq_socket->pkt_buf) {
2755
    free(doq_socket->static_secret);
2756
    sldns_buffer_free(doq_socket->pkt_buf);
2757
    free(doq_socket);
2758
    return NULL;
2759
  }
2760
  doq_socket->blocked_paddr = calloc(1,
2761
    sizeof(*doq_socket->blocked_paddr));
2762
  if(!doq_socket->blocked_paddr) {
2763
    free(doq_socket->static_secret);
2764
    sldns_buffer_free(doq_socket->pkt_buf);
2765
    sldns_buffer_free(doq_socket->blocked_pkt);
2766
    free(doq_socket);
2767
    return NULL;
2768
  }
2769
  doq_socket->timer = comm_timer_create(base, doq_timer_cb, doq_socket);
2770
  if(!doq_socket->timer) {
2771
    free(doq_socket->static_secret);
2772
    sldns_buffer_free(doq_socket->pkt_buf);
2773
    sldns_buffer_free(doq_socket->blocked_pkt);
2774
    free(doq_socket->blocked_paddr);
2775
    free(doq_socket);
2776
    return NULL;
2777
  }
2778
  memset(&doq_socket->marked_time, 0, sizeof(doq_socket->marked_time));
2779
  comm_base_timept(base, &doq_socket->now_tt, &doq_socket->now_tv);
2780
  doq_socket->cfg = cfg;
2781
  return doq_socket;
2782
}
2783
2784
/** delete doq server socket structure */
2785
static void
2786
doq_server_socket_delete(struct doq_server_socket* doq_socket)
2787
{
2788
  if(!doq_socket)
2789
    return;
2790
  free(doq_socket->static_secret);
2791
#ifndef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT
2792
  free(doq_socket->quic_method);
2793
#endif
2794
  sldns_buffer_free(doq_socket->pkt_buf);
2795
  sldns_buffer_free(doq_socket->blocked_pkt);
2796
  free(doq_socket->blocked_paddr);
2797
  comm_timer_delete(doq_socket->timer);
2798
  free(doq_socket);
2799
}
2800
2801
/** find repinfo in the doq table */
2802
static struct doq_conn*
2803
doq_lookup_repinfo(struct doq_table* table, struct comm_reply* repinfo)
2804
{
2805
  struct doq_conn* conn;
2806
  struct doq_conn_key key;
2807
  doq_conn_key_from_repinfo(&key, repinfo);
2808
  lock_rw_rdlock(&table->lock);
2809
  conn = doq_conn_find(table, &key.paddr.addr,
2810
    key.paddr.addrlen, &key.paddr.localaddr,
2811
    key.paddr.localaddrlen, key.paddr.ifindex, key.dcid,
2812
    key.dcidlen);
2813
  if(conn) {
2814
    lock_basic_lock(&conn->lock);
2815
    lock_rw_unlock(&table->lock);
2816
    return conn;
2817
  }
2818
  lock_rw_unlock(&table->lock);
2819
  return NULL;
2820
}
2821
2822
/** doq find connection and stream. From inside callbacks from worker. */
2823
static int
2824
doq_lookup_conn_stream(struct comm_reply* repinfo, struct comm_point* c,
2825
  struct doq_conn** conn, struct doq_stream** stream)
2826
{
2827
  log_assert(c->doq_socket);
2828
  if(c->doq_socket->current_conn) {
2829
    *conn = c->doq_socket->current_conn;
2830
  } else {
2831
    *conn = doq_lookup_repinfo(c->doq_socket->table, repinfo);
2832
    if((*conn) && (*conn)->is_deleted) {
2833
      lock_basic_unlock(&(*conn)->lock);
2834
      *conn = NULL;
2835
    }
2836
    if(*conn) {
2837
      (*conn)->doq_socket = c->doq_socket;
2838
    }
2839
  }
2840
  if(!*conn) {
2841
    *stream = NULL;
2842
    return 0;
2843
  }
2844
  *stream = doq_stream_find(*conn, repinfo->doq_streamid);
2845
  if(!*stream) {
2846
    if(!c->doq_socket->current_conn) {
2847
      /* Not inside callbacks, we have our own lock on conn.
2848
       * Release it. */
2849
      lock_basic_unlock(&(*conn)->lock);
2850
    }
2851
    return 0;
2852
  }
2853
  if((*stream)->is_closed) {
2854
    /* stream is closed, ignore reply or drop */
2855
    if(!c->doq_socket->current_conn) {
2856
      /* Not inside callbacks, we have our own lock on conn.
2857
       * Release it. */
2858
      lock_basic_unlock(&(*conn)->lock);
2859
    }
2860
    return 0;
2861
  }
2862
  return 1;
2863
}
2864
2865
/** doq send a reply from a comm reply */
2866
static void
2867
doq_socket_send_reply(struct comm_reply* repinfo)
2868
{
2869
  struct doq_conn* conn;
2870
  struct doq_stream* stream;
2871
  log_assert(repinfo->c->type == comm_doq);
2872
  if(!doq_lookup_conn_stream(repinfo, repinfo->c, &conn, &stream)) {
2873
    verbose(VERB_ALGO, "doq: send_reply but %s is gone",
2874
      (conn?"stream":"connection"));
2875
    /* No stream, it may have been closed. */
2876
    /* Drop the reply, it cannot be sent. */
2877
    return;
2878
  }
2879
  if(!doq_stream_send_reply(conn, stream, repinfo->c->buffer))
2880
    doq_stream_close(conn, stream, 1);
2881
  if(!repinfo->c->doq_socket->current_conn) {
2882
    /* Not inside callbacks, we have our own lock on conn.
2883
     * Release it. */
2884
    doq_done_with_conn_cb(repinfo->c, conn);
2885
    /* since we sent a reply, or closed it, the assumption is
2886
     * that there is something to write, so enable write event.
2887
     * It waits until the write event happens to write the
2888
     * streams with answers, this allows some answers to be
2889
     * answered before the event loop reaches the doq fd, in
2890
     * repinfo->c->fd, and that collates answers. That would
2891
     * not happen if we write doq packets right now. */
2892
    doq_socket_write_enable(repinfo->c);
2893
  }
2894
}
2895
2896
/** doq drop a reply from a comm reply */
2897
static void
2898
doq_socket_drop_reply(struct comm_reply* repinfo)
2899
{
2900
  struct doq_conn* conn;
2901
  struct doq_stream* stream;
2902
  log_assert(repinfo->c->type == comm_doq);
2903
  if(!doq_lookup_conn_stream(repinfo, repinfo->c, &conn, &stream)) {
2904
    verbose(VERB_ALGO, "doq: drop_reply but %s is gone",
2905
      (conn?"stream":"connection"));
2906
    /* The connection or stream is already gone. */
2907
    return;
2908
  }
2909
  doq_stream_close(conn, stream, 1);
2910
  if(!repinfo->c->doq_socket->current_conn) {
2911
    /* Not inside callbacks, we have our own lock on conn.
2912
     * Release it. */
2913
    doq_done_with_conn_cb(repinfo->c, conn);
2914
    doq_socket_write_enable(repinfo->c);
2915
  }
2916
}
2917
#endif /* HAVE_NGTCP2 */
2918
2919
int adjusted_tcp_timeout(struct comm_point* c)
2920
0
{
2921
0
  if(c->tcp_timeout_msec < TCP_QUERY_TIMEOUT_MINIMUM)
2922
0
    return TCP_QUERY_TIMEOUT_MINIMUM;
2923
0
  return c->tcp_timeout_msec;
2924
0
}
2925
2926
/** Use a new tcp handler for new query fd, set to read query */
2927
static void
2928
setup_tcp_handler(struct comm_point* c, int fd, int cur, int max)
2929
0
{
2930
0
  int handler_usage;
2931
0
  log_assert(c->type == comm_tcp || c->type == comm_http);
2932
0
  log_assert(c->fd == -1);
2933
0
  sldns_buffer_clear(c->buffer);
2934
#ifdef USE_DNSCRYPT
2935
  if (c->dnscrypt)
2936
    sldns_buffer_clear(c->dnscrypt_buffer);
2937
#endif
2938
0
  c->tcp_is_reading = 1;
2939
0
  c->tcp_byte_count = 0;
2940
0
  c->tcp_keepalive = 0;
2941
  /* if more than half the tcp handlers are in use, use a shorter
2942
   * timeout for this TCP connection, we need to make space for
2943
   * other connections to be able to get attention */
2944
  /* If > 50% TCP handler structures in use, set timeout to 1/100th
2945
   *  configured value.
2946
   * If > 65%TCP handler structures in use, set to 1/500th configured
2947
   *  value.
2948
   * If > 80% TCP handler structures in use, set to 0.
2949
   *
2950
   * If the timeout to use falls below 200 milliseconds, an actual
2951
   * timeout of 200ms is used.
2952
   */
2953
0
  handler_usage = (cur * 100) / max;
2954
0
  if(handler_usage > 50 && handler_usage <= 65)
2955
0
    c->tcp_timeout_msec /= 100;
2956
0
  else if (handler_usage > 65 && handler_usage <= 80)
2957
0
    c->tcp_timeout_msec /= 500;
2958
0
  else if (handler_usage > 80)
2959
0
    c->tcp_timeout_msec = 0;
2960
0
  comm_point_start_listening(c, fd, adjusted_tcp_timeout(c));
2961
0
}
2962
2963
void comm_base_handle_slow_accept(int ATTR_UNUSED(fd),
2964
  short ATTR_UNUSED(event), void* arg)
2965
0
{
2966
0
  struct comm_base* b = (struct comm_base*)arg;
2967
  /* timeout for the slow accept, re-enable accepts again */
2968
0
  if(b->start_accept) {
2969
0
    verbose(VERB_ALGO, "wait is over, slow accept disabled");
2970
0
    fptr_ok(fptr_whitelist_start_accept(b->start_accept));
2971
0
    (*b->start_accept)(b->cb_arg);
2972
0
    b->eb->slow_accept_enabled = 0;
2973
0
  }
2974
0
}
2975
2976
int comm_point_perform_accept(struct comm_point* c,
2977
  struct sockaddr_storage* addr, socklen_t* addrlen)
2978
0
{
2979
0
  int new_fd;
2980
0
  *addrlen = (socklen_t)sizeof(*addr);
2981
#ifndef HAVE_ACCEPT4
2982
  new_fd = accept(c->fd, (struct sockaddr*)addr, addrlen);
2983
#else
2984
  /* SOCK_NONBLOCK saves extra calls to fcntl for the same result */
2985
0
  new_fd = accept4(c->fd, (struct sockaddr*)addr, addrlen, SOCK_NONBLOCK);
2986
0
#endif
2987
0
  if(new_fd == -1) {
2988
0
#ifndef USE_WINSOCK
2989
    /* EINTR is signal interrupt. others are closed connection. */
2990
0
    if( errno == EINTR || errno == EAGAIN
2991
0
#ifdef EWOULDBLOCK
2992
0
      || errno == EWOULDBLOCK
2993
0
#endif
2994
0
#ifdef ECONNABORTED
2995
0
      || errno == ECONNABORTED
2996
0
#endif
2997
0
#ifdef EPROTO
2998
0
      || errno == EPROTO
2999
0
#endif /* EPROTO */
3000
0
      )
3001
0
      return -1;
3002
0
#if defined(ENFILE) && defined(EMFILE)
3003
0
    if(errno == ENFILE || errno == EMFILE) {
3004
      /* out of file descriptors, likely outside of our
3005
       * control. stop accept() calls for some time */
3006
0
      if(c->ev->base->stop_accept) {
3007
0
        struct comm_base* b = c->ev->base;
3008
0
        struct timeval tv;
3009
0
        verbose(VERB_ALGO, "out of file descriptors: "
3010
0
          "slow accept");
3011
0
        ub_comm_base_now(b);
3012
0
        if(b->eb->last_slow_log+SLOW_LOG_TIME <=
3013
0
          b->eb->secs) {
3014
0
          b->eb->last_slow_log = b->eb->secs;
3015
0
          verbose(VERB_OPS, "accept failed, "
3016
0
            "slow down accept for %d "
3017
0
            "msec: %s",
3018
0
            NETEVENT_SLOW_ACCEPT_TIME,
3019
0
            sock_strerror(errno));
3020
0
        }
3021
0
        b->eb->slow_accept_enabled = 1;
3022
0
        fptr_ok(fptr_whitelist_stop_accept(
3023
0
          b->stop_accept));
3024
0
        (*b->stop_accept)(b->cb_arg);
3025
        /* set timeout, no mallocs */
3026
0
        tv.tv_sec = NETEVENT_SLOW_ACCEPT_TIME/1000;
3027
0
        tv.tv_usec = (NETEVENT_SLOW_ACCEPT_TIME%1000)*1000;
3028
0
        b->eb->slow_accept = ub_event_new(b->eb->base,
3029
0
          -1, UB_EV_TIMEOUT,
3030
0
          comm_base_handle_slow_accept, b);
3031
0
        if(b->eb->slow_accept == NULL) {
3032
          /* we do not want to log here, because
3033
           * that would spam the logfiles.
3034
           * error: "event_base_set failed." */
3035
0
        }
3036
0
        else if(ub_event_add(b->eb->slow_accept, &tv)
3037
0
          != 0) {
3038
          /* we do not want to log here,
3039
           * error: "event_add failed." */
3040
0
        }
3041
0
      } else {
3042
0
        log_err("accept, with no slow down, "
3043
0
          "failed: %s", sock_strerror(errno));
3044
0
      }
3045
0
      return -1;
3046
0
    }
3047
0
#endif
3048
#else /* USE_WINSOCK */
3049
    if(WSAGetLastError() == WSAEINPROGRESS ||
3050
      WSAGetLastError() == WSAECONNRESET)
3051
      return -1;
3052
    if(WSAGetLastError() == WSAEWOULDBLOCK) {
3053
      ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
3054
      return -1;
3055
    }
3056
#endif
3057
0
    log_err_addr("accept failed", sock_strerror(errno), addr,
3058
0
      *addrlen);
3059
0
    return -1;
3060
0
  }
3061
0
  if(c->tcp_conn_limit && c->type == comm_tcp_accept) {
3062
0
    c->tcl_addr = tcl_addr_lookup(c->tcp_conn_limit, addr, *addrlen);
3063
0
    if(!tcl_new_connection(c->tcl_addr)) {
3064
0
      if(verbosity >= 3)
3065
0
        log_err_addr("accept rejected",
3066
0
        "connection limit exceeded", addr, *addrlen);
3067
0
      sock_close(new_fd);
3068
0
      return -1;
3069
0
    }
3070
0
  }
3071
#ifndef HAVE_ACCEPT4
3072
  fd_set_nonblock(new_fd);
3073
#endif
3074
0
  return new_fd;
3075
0
}
3076
3077
#ifdef USE_WINSOCK
3078
static long win_bio_cb(BIO *b, int oper, const char* ATTR_UNUSED(argp),
3079
#ifdef HAVE_BIO_SET_CALLBACK_EX
3080
  size_t ATTR_UNUSED(len),
3081
#endif
3082
        int ATTR_UNUSED(argi), long argl,
3083
#ifndef HAVE_BIO_SET_CALLBACK_EX
3084
  long retvalue
3085
#else
3086
  int retvalue, size_t* ATTR_UNUSED(processed)
3087
#endif
3088
  )
3089
{
3090
  int wsa_err = WSAGetLastError(); /* store errcode before it is gone */
3091
  verbose(VERB_ALGO, "bio_cb %d, %s %s %s", oper,
3092
    (oper&BIO_CB_RETURN)?"return":"before",
3093
    (oper&BIO_CB_READ)?"read":((oper&BIO_CB_WRITE)?"write":"other"),
3094
    wsa_err==WSAEWOULDBLOCK?"wsawb":"");
3095
  /* on windows, check if previous operation caused EWOULDBLOCK */
3096
  if( (oper == (BIO_CB_READ|BIO_CB_RETURN) && argl == 0) ||
3097
    (oper == (BIO_CB_GETS|BIO_CB_RETURN) && argl == 0)) {
3098
    if(wsa_err == WSAEWOULDBLOCK)
3099
      ub_winsock_tcp_wouldblock((struct ub_event*)
3100
        BIO_get_callback_arg(b), UB_EV_READ);
3101
  }
3102
  if( (oper == (BIO_CB_WRITE|BIO_CB_RETURN) && argl == 0) ||
3103
    (oper == (BIO_CB_PUTS|BIO_CB_RETURN) && argl == 0)) {
3104
    if(wsa_err == WSAEWOULDBLOCK)
3105
      ub_winsock_tcp_wouldblock((struct ub_event*)
3106
        BIO_get_callback_arg(b), UB_EV_WRITE);
3107
  }
3108
  /* return original return value */
3109
  return retvalue;
3110
}
3111
3112
/** set win bio callbacks for nonblocking operations */
3113
void
3114
comm_point_tcp_win_bio_cb(struct comm_point* c, void* thessl)
3115
{
3116
  SSL* ssl = (SSL*)thessl;
3117
  /* set them both just in case, but usually they are the same BIO */
3118
#ifdef HAVE_BIO_SET_CALLBACK_EX
3119
  BIO_set_callback_ex(SSL_get_rbio(ssl), &win_bio_cb);
3120
#else
3121
  BIO_set_callback(SSL_get_rbio(ssl), &win_bio_cb);
3122
#endif
3123
  BIO_set_callback_arg(SSL_get_rbio(ssl), (char*)c->ev->ev);
3124
#ifdef HAVE_BIO_SET_CALLBACK_EX
3125
  BIO_set_callback_ex(SSL_get_wbio(ssl), &win_bio_cb);
3126
#else
3127
  BIO_set_callback(SSL_get_wbio(ssl), &win_bio_cb);
3128
#endif
3129
  BIO_set_callback_arg(SSL_get_wbio(ssl), (char*)c->ev->ev);
3130
}
3131
#endif
3132
3133
#ifdef HAVE_NGHTTP2
3134
/** Create http2 session server.  Per connection, after TCP accepted.*/
3135
static int http2_session_server_create(struct http2_session* h2_session)
3136
{
3137
  log_assert(h2_session->callbacks);
3138
  h2_session->is_drop = 0;
3139
  if(nghttp2_session_server_new(&h2_session->session,
3140
      h2_session->callbacks,
3141
    h2_session) == NGHTTP2_ERR_NOMEM) {
3142
    log_err("failed to create nghttp2 session server");
3143
    return 0;
3144
  }
3145
3146
  return 1;
3147
}
3148
3149
/** Submit http2 setting to session. Once per session. */
3150
static int http2_submit_settings(struct http2_session* h2_session)
3151
{
3152
  int ret;
3153
  nghttp2_settings_entry settings[1] = {
3154
    {NGHTTP2_SETTINGS_MAX_CONCURRENT_STREAMS,
3155
     h2_session->c->http2_max_streams}};
3156
3157
  ret = nghttp2_submit_settings(h2_session->session, NGHTTP2_FLAG_NONE,
3158
    settings, 1);
3159
  if(ret) {
3160
    verbose(VERB_QUERY, "http2: submit_settings failed, "
3161
      "error: %s", nghttp2_strerror(ret));
3162
    return 0;
3163
  }
3164
  return 1;
3165
}
3166
#endif /* HAVE_NGHTTP2 */
3167
3168
#ifdef HAVE_NGHTTP2
3169
/** Delete http2 stream. After session delete or stream close callback */
3170
static void http2_stream_delete(struct http2_session* h2_session,
3171
  struct http2_stream* h2_stream)
3172
{
3173
  if(h2_stream->mesh_state) {
3174
    mesh_state_remove_reply(h2_stream->mesh, h2_stream->mesh_state,
3175
      h2_session->c);
3176
    h2_stream->mesh_state = NULL;
3177
  }
3178
  http2_req_stream_clear(h2_stream);
3179
  free(h2_stream);
3180
}
3181
#endif /* HAVE_NGHTTP2 */
3182
3183
/** delete http2 session server. After closing connection. */
3184
static void http2_session_server_delete(struct http2_session* h2_session)
3185
0
{
3186
#ifdef HAVE_NGHTTP2
3187
  struct http2_stream* h2_stream, *next;
3188
  nghttp2_session_del(h2_session->session); /* NULL input is fine */
3189
  h2_session->session = NULL;
3190
  for(h2_stream = h2_session->first_stream; h2_stream;) {
3191
    next = h2_stream->next;
3192
    http2_stream_delete(h2_session, h2_stream);
3193
    h2_stream = next;
3194
  }
3195
  h2_session->first_stream = NULL;
3196
  h2_session->is_drop = 0;
3197
  h2_session->postpone_drop = 0;
3198
  h2_session->c->h2_stream = NULL;
3199
#endif
3200
0
  (void)h2_session;
3201
0
}
3202
3203
void
3204
comm_point_tcp_accept_callback(int fd, short event, void* arg)
3205
0
{
3206
0
  struct comm_point* c = (struct comm_point*)arg, *c_hdl;
3207
0
  int new_fd;
3208
0
  log_assert(c->type == comm_tcp_accept);
3209
0
  if(!(event & UB_EV_READ)) {
3210
0
    log_info("ignoring tcp accept event %d", (int)event);
3211
0
    return;
3212
0
  }
3213
0
  ub_comm_base_now(c->ev->base);
3214
  /* find free tcp handler. */
3215
0
  if(!c->tcp_free) {
3216
0
    log_warn("accepted too many tcp, connections full");
3217
0
    return;
3218
0
  }
3219
  /* accept incoming connection. */
3220
0
  c_hdl = c->tcp_free;
3221
  /* Should not happen: inconsistent tcp_free state in
3222
   * accept_callback. */
3223
0
  log_assert(c_hdl->is_in_tcp_free);
3224
  /* clear leftover flags from previous use, and then set the
3225
   * correct event base for the event structure for libevent */
3226
0
  ub_event_free(c_hdl->ev->ev);
3227
0
  c_hdl->ev->ev = NULL;
3228
0
  if((c_hdl->type == comm_tcp && c_hdl->tcp_req_info) ||
3229
0
    c_hdl->type == comm_local || c_hdl->type == comm_raw)
3230
0
    c_hdl->tcp_do_toggle_rw = 0;
3231
0
  else  c_hdl->tcp_do_toggle_rw = 1;
3232
3233
0
  if(c_hdl->type == comm_http) {
3234
#ifdef HAVE_NGHTTP2
3235
    if(!c_hdl->h2_session ||
3236
      !http2_session_server_create(c_hdl->h2_session)) {
3237
      log_warn("failed to create nghttp2");
3238
      return;
3239
    }
3240
    if(!c_hdl->h2_session ||
3241
      !http2_submit_settings(c_hdl->h2_session)) {
3242
      log_warn("failed to submit http2 settings");
3243
      if(c_hdl->h2_session)
3244
        http2_session_server_delete(c_hdl->h2_session);
3245
      return;
3246
    }
3247
    if(!c->ssl) {
3248
      c_hdl->tcp_do_toggle_rw = 0;
3249
      c_hdl->use_h2 = 1;
3250
    }
3251
#endif
3252
0
    c_hdl->ev->ev = ub_event_new(c_hdl->ev->base->eb->base, -1,
3253
0
      UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT,
3254
0
      comm_point_http_handle_callback, c_hdl);
3255
0
  } else {
3256
0
    c_hdl->ev->ev = ub_event_new(c_hdl->ev->base->eb->base, -1,
3257
0
      UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT,
3258
0
      comm_point_tcp_handle_callback, c_hdl);
3259
0
  }
3260
0
  if(!c_hdl->ev->ev) {
3261
0
    log_warn("could not ub_event_new, dropped tcp");
3262
#ifdef HAVE_NGHTTP2
3263
    if(c_hdl->type == comm_http && c_hdl->h2_session)
3264
      http2_session_server_delete(c_hdl->h2_session);
3265
#endif
3266
0
    return;
3267
0
  }
3268
0
  log_assert(fd != -1);
3269
0
  (void)fd;
3270
0
  new_fd = comm_point_perform_accept(c, &c_hdl->repinfo.remote_addr,
3271
0
    &c_hdl->repinfo.remote_addrlen);
3272
0
  if(new_fd == -1) {
3273
#ifdef HAVE_NGHTTP2
3274
    if(c_hdl->type == comm_http && c_hdl->h2_session)
3275
      http2_session_server_delete(c_hdl->h2_session);
3276
#endif
3277
0
    return;
3278
0
  }
3279
  /* Copy remote_address to client_address.
3280
   * Simplest way/time for streams to do that. */
3281
0
  c_hdl->repinfo.client_addrlen = c_hdl->repinfo.remote_addrlen;
3282
0
  memmove(&c_hdl->repinfo.client_addr,
3283
0
    &c_hdl->repinfo.remote_addr,
3284
0
    c_hdl->repinfo.remote_addrlen);
3285
0
  if(c->ssl) {
3286
0
    c_hdl->ssl = incoming_ssl_fd(c->ssl, new_fd);
3287
0
    if(!c_hdl->ssl) {
3288
0
      c_hdl->fd = new_fd;
3289
0
      comm_point_close(c_hdl);
3290
0
      return;
3291
0
    }
3292
0
    c_hdl->ssl_shake_state = comm_ssl_shake_read;
3293
#ifdef USE_WINSOCK
3294
    comm_point_tcp_win_bio_cb(c_hdl, c_hdl->ssl);
3295
#endif
3296
0
  }
3297
3298
  /* Paranoia: Check that the state has not changed from above: */
3299
  /* Should not happen: tcp_free state changed within accept_callback. */
3300
0
  log_assert(c_hdl == c->tcp_free);
3301
0
  log_assert(c_hdl->is_in_tcp_free);
3302
  /* grab the tcp handler buffers */
3303
0
  c->cur_tcp_count++;
3304
0
  c->tcp_free = c_hdl->tcp_free;
3305
0
  c_hdl->tcp_free = NULL;
3306
0
  c_hdl->is_in_tcp_free = 0;
3307
0
  if(!c->tcp_free) {
3308
    /* stop accepting incoming queries for now. */
3309
0
    comm_point_stop_listening(c);
3310
0
  }
3311
0
  setup_tcp_handler(c_hdl, new_fd, c->cur_tcp_count, c->max_tcp_count);
3312
0
}
3313
3314
/** Make tcp handler free for next assignment */
3315
static void
3316
reclaim_tcp_handler(struct comm_point* c)
3317
0
{
3318
0
  log_assert(c->type == comm_tcp);
3319
0
  if(c->ssl) {
3320
0
#ifdef HAVE_SSL
3321
0
    SSL_shutdown(c->ssl);
3322
0
    SSL_free(c->ssl);
3323
0
    c->ssl = NULL;
3324
0
#endif
3325
0
  }
3326
0
  comm_point_close(c);
3327
0
  if(c->tcp_parent && !c->is_in_tcp_free) {
3328
    /* Should not happen: bad tcp_free state in reclaim_tcp. */
3329
0
    log_assert(c->tcp_free == NULL);
3330
0
    log_assert(c->tcp_parent->cur_tcp_count > 0);
3331
0
    c->tcp_parent->cur_tcp_count--;
3332
0
    c->tcp_free = c->tcp_parent->tcp_free;
3333
0
    c->tcp_parent->tcp_free = c;
3334
0
    c->is_in_tcp_free = 1;
3335
0
    if(!c->tcp_free) {
3336
      /* re-enable listening on accept socket */
3337
0
      comm_point_start_listening(c->tcp_parent, -1, -1);
3338
0
    }
3339
0
  }
3340
0
  c->tcp_more_read_again = NULL;
3341
0
  c->tcp_more_write_again = NULL;
3342
0
  c->tcp_byte_count = 0;
3343
0
  c->pp2_header_state = pp2_header_none;
3344
0
  sldns_buffer_clear(c->buffer);
3345
0
}
3346
3347
/** do the callback when writing is done */
3348
static void
3349
tcp_callback_writer(struct comm_point* c)
3350
0
{
3351
0
  log_assert(c->type == comm_tcp);
3352
0
  if(!c->tcp_write_and_read) {
3353
0
    sldns_buffer_clear(c->buffer);
3354
0
    c->tcp_byte_count = 0;
3355
0
  }
3356
0
  if(c->tcp_do_toggle_rw)
3357
0
    c->tcp_is_reading = 1;
3358
  /* switch from listening(write) to listening(read) */
3359
0
  if(c->tcp_req_info) {
3360
0
    tcp_req_info_handle_writedone(c->tcp_req_info);
3361
0
  } else {
3362
0
    comm_point_stop_listening(c);
3363
0
    if(c->tcp_write_and_read) {
3364
0
      fptr_ok(fptr_whitelist_comm_point(c->callback));
3365
0
      if( (*c->callback)(c, c->cb_arg, NETEVENT_PKT_WRITTEN,
3366
0
        &c->repinfo) ) {
3367
0
        comm_point_start_listening(c, -1,
3368
0
          adjusted_tcp_timeout(c));
3369
0
      }
3370
0
    } else {
3371
0
      comm_point_start_listening(c, -1,
3372
0
          adjusted_tcp_timeout(c));
3373
0
    }
3374
0
  }
3375
0
}
3376
3377
/** do the callback when reading is done */
3378
static void
3379
tcp_callback_reader(struct comm_point* c)
3380
0
{
3381
0
  log_assert(c->type == comm_tcp || c->type == comm_local);
3382
0
  sldns_buffer_flip(c->buffer);
3383
0
  if(c->tcp_do_toggle_rw)
3384
0
    c->tcp_is_reading = 0;
3385
0
  c->tcp_byte_count = 0;
3386
0
  if(c->tcp_req_info) {
3387
0
    tcp_req_info_handle_readdone(c->tcp_req_info);
3388
0
  } else {
3389
0
    if(c->type == comm_tcp)
3390
0
      comm_point_stop_listening(c);
3391
0
    fptr_ok(fptr_whitelist_comm_point(c->callback));
3392
0
    if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo) ) {
3393
0
      comm_point_start_listening(c, -1,
3394
0
          adjusted_tcp_timeout(c));
3395
0
    }
3396
0
  }
3397
0
}
3398
3399
#ifdef HAVE_SSL
3400
/** true if the ssl handshake error has to be squelched from the logs */
3401
int
3402
squelch_err_ssl_handshake(unsigned long err)
3403
0
{
3404
0
  if(verbosity >= VERB_QUERY)
3405
0
    return 0; /* only squelch on low verbosity */
3406
0
  if(ERR_GET_LIB(err) == ERR_LIB_SSL &&
3407
0
    (ERR_GET_REASON(err) == SSL_R_HTTPS_PROXY_REQUEST ||
3408
0
     ERR_GET_REASON(err) == SSL_R_HTTP_REQUEST ||
3409
0
     ERR_GET_REASON(err) == SSL_R_WRONG_VERSION_NUMBER ||
3410
0
     ERR_GET_REASON(err) == SSL_R_SSLV3_ALERT_BAD_CERTIFICATE
3411
0
#ifdef SSL_F_TLS_POST_PROCESS_CLIENT_HELLO
3412
0
     || ERR_GET_REASON(err) == SSL_R_NO_SHARED_CIPHER
3413
0
#endif
3414
0
#ifdef SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO
3415
0
     || ERR_GET_REASON(err) == SSL_R_UNKNOWN_PROTOCOL
3416
0
     || ERR_GET_REASON(err) == SSL_R_UNSUPPORTED_PROTOCOL
3417
0
#  ifdef SSL_R_VERSION_TOO_LOW
3418
0
     || ERR_GET_REASON(err) == SSL_R_VERSION_TOO_LOW
3419
0
#  endif
3420
0
#endif
3421
0
    ))
3422
0
    return 1;
3423
0
  return 0;
3424
0
}
3425
#endif /* HAVE_SSL */
3426
3427
/** continue ssl handshake */
3428
#ifdef HAVE_SSL
3429
static int
3430
ssl_handshake(struct comm_point* c)
3431
0
{
3432
0
  int r;
3433
0
  if(c->ssl_shake_state == comm_ssl_shake_hs_read) {
3434
    /* read condition satisfied back to writing */
3435
0
    comm_point_listen_for_rw(c, 0, 1);
3436
0
    c->ssl_shake_state = comm_ssl_shake_none;
3437
0
    return 1;
3438
0
  }
3439
0
  if(c->ssl_shake_state == comm_ssl_shake_hs_write) {
3440
    /* write condition satisfied, back to reading */
3441
0
    comm_point_listen_for_rw(c, 1, 0);
3442
0
    c->ssl_shake_state = comm_ssl_shake_none;
3443
0
    return 1;
3444
0
  }
3445
3446
0
  ERR_clear_error();
3447
0
  r = SSL_do_handshake(c->ssl);
3448
0
  if(r != 1) {
3449
0
    int want = SSL_get_error(c->ssl, r);
3450
0
    if(want == SSL_ERROR_WANT_READ) {
3451
0
      if(c->ssl_shake_state == comm_ssl_shake_read)
3452
0
        return 1;
3453
0
      c->ssl_shake_state = comm_ssl_shake_read;
3454
0
      comm_point_listen_for_rw(c, 1, 0);
3455
0
      return 1;
3456
0
    } else if(want == SSL_ERROR_WANT_WRITE) {
3457
0
      if(c->ssl_shake_state == comm_ssl_shake_write)
3458
0
        return 1;
3459
0
      c->ssl_shake_state = comm_ssl_shake_write;
3460
0
      comm_point_listen_for_rw(c, 0, 1);
3461
0
      return 1;
3462
0
    } else if(r == 0) {
3463
0
      return 0; /* closed */
3464
0
    } else if(want == SSL_ERROR_SYSCALL) {
3465
      /* SYSCALL and errno==0 means closed uncleanly */
3466
0
#ifdef EPIPE
3467
0
      if(errno == EPIPE && verbosity < 2)
3468
0
        return 0; /* silence 'broken pipe' */
3469
0
#endif
3470
0
#ifdef ECONNRESET
3471
0
      if(errno == ECONNRESET && verbosity < 2)
3472
0
        return 0; /* silence reset by peer */
3473
0
#endif
3474
0
      if(!tcp_connect_errno_needs_log(
3475
0
        (struct sockaddr*)&c->repinfo.remote_addr,
3476
0
        c->repinfo.remote_addrlen))
3477
0
        return 0; /* silence connect failures that
3478
        show up because after connect this is the
3479
        first system call that accesses the socket */
3480
0
      if(errno != 0)
3481
0
        log_err("SSL_handshake syscall: %s",
3482
0
          strerror(errno));
3483
0
      return 0;
3484
0
    } else {
3485
0
      unsigned long err = ERR_get_error();
3486
0
      if(!squelch_err_ssl_handshake(err)) {
3487
0
        long vr;
3488
0
        log_crypto_err_io_code("ssl handshake failed",
3489
0
          want, err);
3490
0
        if((vr=SSL_get_verify_result(c->ssl)) != 0)
3491
0
          log_err("ssl handshake cert error: %s",
3492
0
            X509_verify_cert_error_string(
3493
0
            vr));
3494
0
        log_addr(VERB_OPS, "ssl handshake failed",
3495
0
          &c->repinfo.remote_addr,
3496
0
          c->repinfo.remote_addrlen);
3497
0
      }
3498
0
      return 0;
3499
0
    }
3500
0
  }
3501
  /* this is where peer verification could take place */
3502
0
  if((SSL_get_verify_mode(c->ssl)&SSL_VERIFY_PEER)) {
3503
    /* verification */
3504
0
    if(SSL_get_verify_result(c->ssl) == X509_V_OK) {
3505
#ifdef HAVE_SSL_GET1_PEER_CERTIFICATE
3506
      X509* x = SSL_get1_peer_certificate(c->ssl);
3507
#else
3508
0
      X509* x = SSL_get_peer_certificate(c->ssl);
3509
0
#endif
3510
0
      if(!x) {
3511
0
        log_addr(VERB_ALGO, "SSL connection failed: "
3512
0
          "no certificate",
3513
0
          &c->repinfo.remote_addr,
3514
0
          c->repinfo.remote_addrlen);
3515
0
        return 0;
3516
0
      }
3517
0
      log_cert(VERB_ALGO, "peer certificate", x);
3518
0
#ifdef HAVE_SSL_GET0_PEERNAME
3519
0
      if(SSL_get0_peername(c->ssl)) {
3520
0
        char buf[255];
3521
0
        snprintf(buf, sizeof(buf), "SSL connection "
3522
0
          "to %s authenticated",
3523
0
          SSL_get0_peername(c->ssl));
3524
0
        log_addr(VERB_ALGO, buf, &c->repinfo.remote_addr,
3525
0
          c->repinfo.remote_addrlen);
3526
0
      } else {
3527
0
#endif
3528
0
        log_addr(VERB_ALGO, "SSL connection "
3529
0
          "authenticated", &c->repinfo.remote_addr,
3530
0
          c->repinfo.remote_addrlen);
3531
0
#ifdef HAVE_SSL_GET0_PEERNAME
3532
0
      }
3533
0
#endif
3534
0
      X509_free(x);
3535
0
    } else {
3536
#ifdef HAVE_SSL_GET1_PEER_CERTIFICATE
3537
      X509* x = SSL_get1_peer_certificate(c->ssl);
3538
#else
3539
0
      X509* x = SSL_get_peer_certificate(c->ssl);
3540
0
#endif
3541
0
      if(x) {
3542
0
        log_cert(VERB_ALGO, "peer certificate", x);
3543
0
        X509_free(x);
3544
0
      }
3545
0
      log_addr(VERB_ALGO, "SSL connection failed: "
3546
0
        "failed to authenticate",
3547
0
        &c->repinfo.remote_addr,
3548
0
        c->repinfo.remote_addrlen);
3549
0
      return 0;
3550
0
    }
3551
0
  } else {
3552
    /* unauthenticated, the verify peer flag was not set
3553
     * in c->ssl when the ssl object was created from ssl_ctx */
3554
0
    log_addr(VERB_ALGO, "SSL connection", &c->repinfo.remote_addr,
3555
0
      c->repinfo.remote_addrlen);
3556
0
  }
3557
3558
0
#ifdef HAVE_SSL_GET0_ALPN_SELECTED
3559
  /* check if http2 use is negotiated */
3560
0
  if(c->type == comm_http && c->h2_session) {
3561
0
    const unsigned char *alpn;
3562
0
    unsigned int alpnlen = 0;
3563
0
    SSL_get0_alpn_selected(c->ssl, &alpn, &alpnlen);
3564
0
    if(alpnlen == 2 && memcmp("h2", alpn, 2) == 0) {
3565
      /* connection upgraded to HTTP2 */
3566
0
      c->tcp_do_toggle_rw = 0;
3567
0
      c->use_h2 = 1;
3568
0
    } else {
3569
0
      verbose(VERB_ALGO, "client doesn't support HTTP/2");
3570
0
      return 0;
3571
0
    }
3572
0
  }
3573
0
#endif
3574
3575
  /* setup listen rw correctly */
3576
0
  if(c->tcp_is_reading) {
3577
0
    if(c->ssl_shake_state != comm_ssl_shake_read)
3578
0
      comm_point_listen_for_rw(c, 1, 0);
3579
0
  } else {
3580
0
    comm_point_listen_for_rw(c, 0, 1);
3581
0
  }
3582
0
  c->ssl_shake_state = comm_ssl_shake_none;
3583
0
  return 1;
3584
0
}
3585
#endif /* HAVE_SSL */
3586
3587
/** ssl read callback on TCP */
3588
static int
3589
ssl_handle_read(struct comm_point* c)
3590
0
{
3591
0
#ifdef HAVE_SSL
3592
0
  int r;
3593
0
  if(c->ssl_shake_state != comm_ssl_shake_none) {
3594
0
    if(!ssl_handshake(c))
3595
0
      return 0;
3596
0
    if(c->ssl_shake_state != comm_ssl_shake_none)
3597
0
      return 1;
3598
0
  }
3599
0
  if(c->pp2_enabled && c->pp2_header_state != pp2_header_done) {
3600
0
    struct pp2_header* header = NULL;
3601
0
    size_t want_read_size = 0;
3602
0
    size_t current_read_size = 0;
3603
0
    if(c->pp2_header_state == pp2_header_none) {
3604
0
      want_read_size = PP2_HEADER_SIZE;
3605
0
      if(sldns_buffer_remaining(c->buffer)<want_read_size) {
3606
0
        log_err_addr("proxy_protocol: not enough "
3607
0
          "buffer size to read PROXYv2 header", "",
3608
0
          &c->repinfo.remote_addr,
3609
0
          c->repinfo.remote_addrlen);
3610
0
        return 0;
3611
0
      }
3612
0
      verbose(VERB_ALGO, "proxy_protocol: reading fixed "
3613
0
        "part of PROXYv2 header (len %lu)",
3614
0
        (unsigned long)want_read_size);
3615
0
      current_read_size = want_read_size;
3616
0
      if(c->tcp_byte_count < current_read_size) {
3617
0
        ERR_clear_error();
3618
0
        if((r=SSL_read(c->ssl, (void*)sldns_buffer_at(
3619
0
          c->buffer, c->tcp_byte_count),
3620
0
          current_read_size -
3621
0
          c->tcp_byte_count)) <= 0) {
3622
0
          int want = SSL_get_error(c->ssl, r);
3623
0
          if(want == SSL_ERROR_ZERO_RETURN) {
3624
0
            if(c->tcp_req_info)
3625
0
              return tcp_req_info_handle_read_close(c->tcp_req_info);
3626
0
            return 0; /* shutdown, closed */
3627
0
          } else if(want == SSL_ERROR_WANT_READ) {
3628
#ifdef USE_WINSOCK
3629
            ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
3630
#endif
3631
0
            return 1; /* read more later */
3632
0
          } else if(want == SSL_ERROR_WANT_WRITE) {
3633
0
            c->ssl_shake_state = comm_ssl_shake_hs_write;
3634
0
            comm_point_listen_for_rw(c, 0, 1);
3635
0
            return 1;
3636
0
          } else if(want == SSL_ERROR_SYSCALL) {
3637
0
#ifdef ECONNRESET
3638
0
            if(errno == ECONNRESET && verbosity < 2)
3639
0
              return 0; /* silence reset by peer */
3640
0
#endif
3641
0
            if(errno != 0)
3642
0
              log_err("SSL_read syscall: %s",
3643
0
                strerror(errno));
3644
0
            return 0;
3645
0
          }
3646
0
          log_crypto_err_io("could not SSL_read",
3647
0
            want);
3648
0
          return 0;
3649
0
        }
3650
0
        c->tcp_byte_count += r;
3651
0
        sldns_buffer_skip(c->buffer, r);
3652
0
        if(c->tcp_byte_count != current_read_size) return 1;
3653
0
        c->pp2_header_state = pp2_header_init;
3654
0
      }
3655
0
    }
3656
0
    if(c->pp2_header_state == pp2_header_init) {
3657
0
      int err;
3658
0
      err = pp2_read_header(
3659
0
        sldns_buffer_begin(c->buffer),
3660
0
        sldns_buffer_limit(c->buffer));
3661
0
      if(err) {
3662
0
        log_err("proxy_protocol: could not parse "
3663
0
          "PROXYv2 header (%s)",
3664
0
          pp_lookup_error(err));
3665
0
        return 0;
3666
0
      }
3667
0
      header = (struct pp2_header*)sldns_buffer_begin(c->buffer);
3668
0
      want_read_size = ntohs(header->len);
3669
0
      if(sldns_buffer_limit(c->buffer) <
3670
0
        PP2_HEADER_SIZE + want_read_size) {
3671
0
        log_err_addr("proxy_protocol: not enough "
3672
0
          "buffer size to read PROXYv2 header", "",
3673
0
          &c->repinfo.remote_addr,
3674
0
          c->repinfo.remote_addrlen);
3675
0
        return 0;
3676
0
      }
3677
0
      verbose(VERB_ALGO, "proxy_protocol: reading variable "
3678
0
        "part of PROXYv2 header (len %lu)",
3679
0
        (unsigned long)want_read_size);
3680
0
      current_read_size = PP2_HEADER_SIZE + want_read_size;
3681
0
      if(want_read_size == 0) {
3682
        /* nothing more to read; header is complete */
3683
0
        c->pp2_header_state = pp2_header_done;
3684
0
      } else if(c->tcp_byte_count < current_read_size) {
3685
0
        ERR_clear_error();
3686
0
        if((r=SSL_read(c->ssl, (void*)sldns_buffer_at(
3687
0
          c->buffer, c->tcp_byte_count),
3688
0
          current_read_size -
3689
0
          c->tcp_byte_count)) <= 0) {
3690
0
          int want = SSL_get_error(c->ssl, r);
3691
0
          if(want == SSL_ERROR_ZERO_RETURN) {
3692
0
            if(c->tcp_req_info)
3693
0
              return tcp_req_info_handle_read_close(c->tcp_req_info);
3694
0
            return 0; /* shutdown, closed */
3695
0
          } else if(want == SSL_ERROR_WANT_READ) {
3696
#ifdef USE_WINSOCK
3697
            ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
3698
#endif
3699
0
            return 1; /* read more later */
3700
0
          } else if(want == SSL_ERROR_WANT_WRITE) {
3701
0
            c->ssl_shake_state = comm_ssl_shake_hs_write;
3702
0
            comm_point_listen_for_rw(c, 0, 1);
3703
0
            return 1;
3704
0
          } else if(want == SSL_ERROR_SYSCALL) {
3705
0
#ifdef ECONNRESET
3706
0
            if(errno == ECONNRESET && verbosity < 2)
3707
0
              return 0; /* silence reset by peer */
3708
0
#endif
3709
0
            if(errno != 0)
3710
0
              log_err("SSL_read syscall: %s",
3711
0
                strerror(errno));
3712
0
            return 0;
3713
0
          }
3714
0
          log_crypto_err_io("could not SSL_read",
3715
0
            want);
3716
0
          return 0;
3717
0
        }
3718
0
        c->tcp_byte_count += r;
3719
0
        sldns_buffer_skip(c->buffer, r);
3720
0
        if(c->tcp_byte_count != current_read_size) return 1;
3721
0
        c->pp2_header_state = pp2_header_done;
3722
0
      }
3723
0
    }
3724
0
    if(c->pp2_header_state != pp2_header_done || !header) {
3725
0
      log_err_addr("proxy_protocol: wrong state for the "
3726
0
        "PROXYv2 header", "", &c->repinfo.remote_addr,
3727
0
        c->repinfo.remote_addrlen);
3728
0
      return 0;
3729
0
    }
3730
0
    sldns_buffer_flip(c->buffer);
3731
0
    if(!consume_pp2_header(c->buffer, &c->repinfo, 1)) {
3732
0
      log_err_addr("proxy_protocol: could not consume "
3733
0
        "PROXYv2 header", "", &c->repinfo.remote_addr,
3734
0
        c->repinfo.remote_addrlen);
3735
0
      return 0;
3736
0
    }
3737
0
    verbose(VERB_ALGO, "proxy_protocol: successful read of "
3738
0
      "PROXYv2 header");
3739
    /* Clear and reset the buffer to read the following
3740
     * DNS packet(s). */
3741
0
    sldns_buffer_clear(c->buffer);
3742
0
    c->tcp_byte_count = 0;
3743
0
    return 1;
3744
0
  }
3745
0
  if(c->tcp_byte_count < sizeof(uint16_t)) {
3746
    /* read length bytes */
3747
0
    ERR_clear_error();
3748
0
    if((r=SSL_read(c->ssl, (void*)sldns_buffer_at(c->buffer,
3749
0
      c->tcp_byte_count), (int)(sizeof(uint16_t) -
3750
0
      c->tcp_byte_count))) <= 0) {
3751
0
      int want = SSL_get_error(c->ssl, r);
3752
0
      if(want == SSL_ERROR_ZERO_RETURN) {
3753
0
        if(c->tcp_req_info)
3754
0
          return tcp_req_info_handle_read_close(c->tcp_req_info);
3755
0
        return 0; /* shutdown, closed */
3756
0
      } else if(want == SSL_ERROR_WANT_READ) {
3757
#ifdef USE_WINSOCK
3758
        ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
3759
#endif
3760
0
        return 1; /* read more later */
3761
0
      } else if(want == SSL_ERROR_WANT_WRITE) {
3762
0
        c->ssl_shake_state = comm_ssl_shake_hs_write;
3763
0
        comm_point_listen_for_rw(c, 0, 1);
3764
0
        return 1;
3765
0
      } else if(want == SSL_ERROR_SYSCALL) {
3766
0
#ifdef ECONNRESET
3767
0
        if(errno == ECONNRESET && verbosity < 2)
3768
0
          return 0; /* silence reset by peer */
3769
0
#endif
3770
0
        if(errno != 0)
3771
0
          log_err("SSL_read syscall: %s",
3772
0
            strerror(errno));
3773
0
        return 0;
3774
0
      }
3775
0
      log_crypto_err_io("could not SSL_read", want);
3776
0
      return 0;
3777
0
    }
3778
0
    c->tcp_byte_count += r;
3779
0
    if(c->tcp_byte_count < sizeof(uint16_t))
3780
0
      return 1;
3781
0
    if(sldns_buffer_read_u16_at(c->buffer, 0) >
3782
0
      sldns_buffer_capacity(c->buffer)) {
3783
0
      verbose(VERB_QUERY, "ssl: dropped larger than buffer");
3784
0
      return 0;
3785
0
    }
3786
0
    sldns_buffer_set_limit(c->buffer,
3787
0
      sldns_buffer_read_u16_at(c->buffer, 0));
3788
0
    if(sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) {
3789
0
      verbose(VERB_QUERY, "ssl: dropped bogus too short.");
3790
0
      return 0;
3791
0
    }
3792
0
    sldns_buffer_skip(c->buffer, (ssize_t)(c->tcp_byte_count-sizeof(uint16_t)));
3793
0
    verbose(VERB_ALGO, "Reading ssl tcp query of length %d",
3794
0
      (int)sldns_buffer_limit(c->buffer));
3795
0
  }
3796
0
  if(sldns_buffer_remaining(c->buffer) > 0) {
3797
0
    ERR_clear_error();
3798
0
    r = SSL_read(c->ssl, (void*)sldns_buffer_current(c->buffer),
3799
0
      (int)sldns_buffer_remaining(c->buffer));
3800
0
    if(r <= 0) {
3801
0
      int want = SSL_get_error(c->ssl, r);
3802
0
      if(want == SSL_ERROR_ZERO_RETURN) {
3803
0
        if(c->tcp_req_info)
3804
0
          return tcp_req_info_handle_read_close(c->tcp_req_info);
3805
0
        return 0; /* shutdown, closed */
3806
0
      } else if(want == SSL_ERROR_WANT_READ) {
3807
#ifdef USE_WINSOCK
3808
        ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
3809
#endif
3810
0
        return 1; /* read more later */
3811
0
      } else if(want == SSL_ERROR_WANT_WRITE) {
3812
0
        c->ssl_shake_state = comm_ssl_shake_hs_write;
3813
0
        comm_point_listen_for_rw(c, 0, 1);
3814
0
        return 1;
3815
0
      } else if(want == SSL_ERROR_SYSCALL) {
3816
0
#ifdef ECONNRESET
3817
0
        if(errno == ECONNRESET && verbosity < 2)
3818
0
          return 0; /* silence reset by peer */
3819
0
#endif
3820
0
        if(errno != 0)
3821
0
          log_err("SSL_read syscall: %s",
3822
0
            strerror(errno));
3823
0
        return 0;
3824
0
      }
3825
0
      log_crypto_err_io("could not SSL_read", want);
3826
0
      return 0;
3827
0
    }
3828
0
    sldns_buffer_skip(c->buffer, (ssize_t)r);
3829
0
  }
3830
0
  if(sldns_buffer_remaining(c->buffer) <= 0) {
3831
0
    tcp_callback_reader(c);
3832
0
  }
3833
0
  return 1;
3834
#else
3835
  (void)c;
3836
  return 0;
3837
#endif /* HAVE_SSL */
3838
0
}
3839
3840
/** ssl write callback on TCP */
3841
static int
3842
ssl_handle_write(struct comm_point* c)
3843
0
{
3844
0
#ifdef HAVE_SSL
3845
0
  int r;
3846
0
  if(c->ssl_shake_state != comm_ssl_shake_none) {
3847
0
    if(!ssl_handshake(c))
3848
0
      return 0;
3849
0
    if(c->ssl_shake_state != comm_ssl_shake_none)
3850
0
      return 1;
3851
0
  }
3852
  /* ignore return, if fails we may simply block */
3853
0
  (void)SSL_set_mode(c->ssl, (long)SSL_MODE_ENABLE_PARTIAL_WRITE);
3854
0
  if((c->tcp_write_and_read?c->tcp_write_byte_count:c->tcp_byte_count) < sizeof(uint16_t)) {
3855
0
    uint16_t len = htons(c->tcp_write_and_read?c->tcp_write_pkt_len:sldns_buffer_limit(c->buffer));
3856
0
    ERR_clear_error();
3857
0
    if(c->tcp_write_and_read) {
3858
0
      if(c->tcp_write_pkt_len + 2 < LDNS_RR_BUF_SIZE) {
3859
        /* combine the tcp length and the query for
3860
         * write, this emulates writev */
3861
0
        uint8_t buf[LDNS_RR_BUF_SIZE];
3862
0
        memmove(buf, &len, sizeof(uint16_t));
3863
0
        memmove(buf+sizeof(uint16_t),
3864
0
          c->tcp_write_pkt,
3865
0
          c->tcp_write_pkt_len);
3866
0
        r = SSL_write(c->ssl,
3867
0
          (void*)(buf+c->tcp_write_byte_count),
3868
0
          c->tcp_write_pkt_len + 2 -
3869
0
          c->tcp_write_byte_count);
3870
0
      } else {
3871
0
        r = SSL_write(c->ssl,
3872
0
          (void*)(((uint8_t*)&len)+c->tcp_write_byte_count),
3873
0
          (int)(sizeof(uint16_t)-c->tcp_write_byte_count));
3874
0
      }
3875
0
    } else if(sizeof(uint16_t)+sldns_buffer_remaining(c->buffer) <
3876
0
      LDNS_RR_BUF_SIZE) {
3877
      /* combine the tcp length and the query for write,
3878
       * this emulates writev */
3879
0
      uint8_t buf[LDNS_RR_BUF_SIZE];
3880
0
      memmove(buf, &len, sizeof(uint16_t));
3881
0
      memmove(buf+sizeof(uint16_t),
3882
0
        sldns_buffer_current(c->buffer),
3883
0
        sldns_buffer_remaining(c->buffer));
3884
0
      r = SSL_write(c->ssl, (void*)(buf+c->tcp_byte_count),
3885
0
        (int)(sizeof(uint16_t)+
3886
0
        sldns_buffer_remaining(c->buffer)
3887
0
        - c->tcp_byte_count));
3888
0
    } else {
3889
0
      r = SSL_write(c->ssl,
3890
0
        (void*)(((uint8_t*)&len)+c->tcp_byte_count),
3891
0
        (int)(sizeof(uint16_t)-c->tcp_byte_count));
3892
0
    }
3893
0
    if(r <= 0) {
3894
0
      int want = SSL_get_error(c->ssl, r);
3895
0
      if(want == SSL_ERROR_ZERO_RETURN) {
3896
0
        return 0; /* closed */
3897
0
      } else if(want == SSL_ERROR_WANT_READ) {
3898
0
        c->ssl_shake_state = comm_ssl_shake_hs_read;
3899
0
        comm_point_listen_for_rw(c, 1, 0);
3900
0
        return 1; /* wait for read condition */
3901
0
      } else if(want == SSL_ERROR_WANT_WRITE) {
3902
#ifdef USE_WINSOCK
3903
        ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
3904
#endif
3905
0
        return 1; /* write more later */
3906
0
      } else if(want == SSL_ERROR_SYSCALL) {
3907
0
#ifdef EPIPE
3908
0
        if(errno == EPIPE && verbosity < 2)
3909
0
          return 0; /* silence 'broken pipe' */
3910
0
#endif
3911
0
        if(errno != 0)
3912
0
          log_err("SSL_write syscall: %s",
3913
0
            strerror(errno));
3914
0
        return 0;
3915
0
      }
3916
0
      log_crypto_err_io("could not SSL_write", want);
3917
0
      return 0;
3918
0
    }
3919
0
    if(c->tcp_write_and_read) {
3920
0
      c->tcp_write_byte_count += r;
3921
0
      if(c->tcp_write_byte_count < sizeof(uint16_t))
3922
0
        return 1;
3923
0
    } else {
3924
0
      c->tcp_byte_count += r;
3925
0
      if(c->tcp_byte_count < sizeof(uint16_t))
3926
0
        return 1;
3927
0
      sldns_buffer_set_position(c->buffer, c->tcp_byte_count -
3928
0
        sizeof(uint16_t));
3929
0
    }
3930
0
    if((!c->tcp_write_and_read && sldns_buffer_remaining(c->buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) {
3931
0
      tcp_callback_writer(c);
3932
0
      return 1;
3933
0
    }
3934
0
  }
3935
0
  log_assert(c->tcp_write_and_read || sldns_buffer_remaining(c->buffer) > 0);
3936
0
  log_assert(!c->tcp_write_and_read || c->tcp_write_byte_count < c->tcp_write_pkt_len + 2);
3937
0
  ERR_clear_error();
3938
0
  if(c->tcp_write_and_read) {
3939
0
    r = SSL_write(c->ssl, (void*)(c->tcp_write_pkt + c->tcp_write_byte_count - 2),
3940
0
      (int)(c->tcp_write_pkt_len + 2 - c->tcp_write_byte_count));
3941
0
  } else {
3942
0
    r = SSL_write(c->ssl, (void*)sldns_buffer_current(c->buffer),
3943
0
      (int)sldns_buffer_remaining(c->buffer));
3944
0
  }
3945
0
  if(r <= 0) {
3946
0
    int want = SSL_get_error(c->ssl, r);
3947
0
    if(want == SSL_ERROR_ZERO_RETURN) {
3948
0
      return 0; /* closed */
3949
0
    } else if(want == SSL_ERROR_WANT_READ) {
3950
0
      c->ssl_shake_state = comm_ssl_shake_hs_read;
3951
0
      comm_point_listen_for_rw(c, 1, 0);
3952
0
      return 1; /* wait for read condition */
3953
0
    } else if(want == SSL_ERROR_WANT_WRITE) {
3954
#ifdef USE_WINSOCK
3955
      ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
3956
#endif
3957
0
      return 1; /* write more later */
3958
0
    } else if(want == SSL_ERROR_SYSCALL) {
3959
0
#ifdef EPIPE
3960
0
      if(errno == EPIPE && verbosity < 2)
3961
0
        return 0; /* silence 'broken pipe' */
3962
0
#endif
3963
0
      if(errno != 0)
3964
0
        log_err("SSL_write syscall: %s",
3965
0
          strerror(errno));
3966
0
      return 0;
3967
0
    }
3968
0
    log_crypto_err_io("could not SSL_write", want);
3969
0
    return 0;
3970
0
  }
3971
0
  if(c->tcp_write_and_read) {
3972
0
    c->tcp_write_byte_count += r;
3973
0
  } else {
3974
0
    sldns_buffer_skip(c->buffer, (ssize_t)r);
3975
0
  }
3976
3977
0
  if((!c->tcp_write_and_read && sldns_buffer_remaining(c->buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) {
3978
0
    tcp_callback_writer(c);
3979
0
  }
3980
0
  return 1;
3981
#else
3982
  (void)c;
3983
  return 0;
3984
#endif /* HAVE_SSL */
3985
0
}
3986
3987
/** handle ssl tcp connection with dns contents */
3988
static int
3989
ssl_handle_it(struct comm_point* c, int is_write)
3990
0
{
3991
  /* handle case where renegotiation wants read during write call
3992
   * or write during read calls */
3993
0
  if(is_write && c->ssl_shake_state == comm_ssl_shake_hs_write)
3994
0
    return ssl_handle_read(c);
3995
0
  else if(!is_write && c->ssl_shake_state == comm_ssl_shake_hs_read)
3996
0
    return ssl_handle_write(c);
3997
  /* handle read events for read operation and write events for a
3998
   * write operation */
3999
0
  else if(!is_write)
4000
0
    return ssl_handle_read(c);
4001
0
  return ssl_handle_write(c);
4002
0
}
4003
4004
/**
4005
 * Handle tcp reading callback.
4006
 * @param fd: file descriptor of socket.
4007
 * @param c: comm point to read from into buffer.
4008
 * @param short_ok: if true, very short packets are OK (for comm_local).
4009
 * @return: 0 on error
4010
 */
4011
static int
4012
comm_point_tcp_handle_read(int fd, struct comm_point* c, int short_ok)
4013
0
{
4014
0
  ssize_t r;
4015
0
  int recv_initial = 0;
4016
0
  log_assert(c->type == comm_tcp || c->type == comm_local);
4017
0
  if(c->ssl)
4018
0
    return ssl_handle_it(c, 0);
4019
0
  if(!c->tcp_is_reading && !c->tcp_write_and_read)
4020
0
    return 0;
4021
4022
0
  log_assert(fd != -1);
4023
0
  if(c->pp2_enabled && c->pp2_header_state != pp2_header_done) {
4024
0
    struct pp2_header* header = NULL;
4025
0
    size_t want_read_size = 0;
4026
0
    size_t current_read_size = 0;
4027
0
    if(c->pp2_header_state == pp2_header_none) {
4028
0
      want_read_size = PP2_HEADER_SIZE;
4029
0
      if(sldns_buffer_remaining(c->buffer)<want_read_size) {
4030
0
        log_err_addr("proxy_protocol: not enough "
4031
0
          "buffer size to read PROXYv2 header", "",
4032
0
          &c->repinfo.remote_addr,
4033
0
          c->repinfo.remote_addrlen);
4034
0
        return 0;
4035
0
      }
4036
0
      verbose(VERB_ALGO, "proxy_protocol: reading fixed "
4037
0
        "part of PROXYv2 header (len %lu)",
4038
0
        (unsigned long)want_read_size);
4039
0
      current_read_size = want_read_size;
4040
0
      if(c->tcp_byte_count < current_read_size) {
4041
0
        r = recv(fd, (void*)sldns_buffer_at(c->buffer,
4042
0
          c->tcp_byte_count),
4043
0
          current_read_size-c->tcp_byte_count, MSG_DONTWAIT);
4044
0
        if(r == 0) {
4045
0
          if(c->tcp_req_info)
4046
0
            return tcp_req_info_handle_read_close(c->tcp_req_info);
4047
0
          return 0;
4048
0
        } else if(r == -1) {
4049
0
          goto recv_error_initial;
4050
0
        }
4051
0
        c->tcp_byte_count += r;
4052
0
        sldns_buffer_skip(c->buffer, r);
4053
0
        if(c->tcp_byte_count != current_read_size) return 1;
4054
0
        c->pp2_header_state = pp2_header_init;
4055
0
      }
4056
0
    }
4057
0
    if(c->pp2_header_state == pp2_header_init) {
4058
0
      int err;
4059
0
      err = pp2_read_header(
4060
0
        sldns_buffer_begin(c->buffer),
4061
0
        sldns_buffer_limit(c->buffer));
4062
0
      if(err) {
4063
0
        log_err("proxy_protocol: could not parse "
4064
0
          "PROXYv2 header (%s)",
4065
0
          pp_lookup_error(err));
4066
0
        return 0;
4067
0
      }
4068
0
      header = (struct pp2_header*)sldns_buffer_begin(c->buffer);
4069
0
      want_read_size = ntohs(header->len);
4070
0
      if(sldns_buffer_limit(c->buffer) <
4071
0
        PP2_HEADER_SIZE + want_read_size) {
4072
0
        log_err_addr("proxy_protocol: not enough "
4073
0
          "buffer size to read PROXYv2 header", "",
4074
0
          &c->repinfo.remote_addr,
4075
0
          c->repinfo.remote_addrlen);
4076
0
        return 0;
4077
0
      }
4078
0
      verbose(VERB_ALGO, "proxy_protocol: reading variable "
4079
0
        "part of PROXYv2 header (len %lu)",
4080
0
        (unsigned long)want_read_size);
4081
0
      current_read_size = PP2_HEADER_SIZE + want_read_size;
4082
0
      if(want_read_size == 0) {
4083
        /* nothing more to read; header is complete */
4084
0
        c->pp2_header_state = pp2_header_done;
4085
0
      } else if(c->tcp_byte_count < current_read_size) {
4086
0
        r = recv(fd, (void*)sldns_buffer_at(c->buffer,
4087
0
          c->tcp_byte_count),
4088
0
          current_read_size-c->tcp_byte_count, MSG_DONTWAIT);
4089
0
        if(r == 0) {
4090
0
          if(c->tcp_req_info)
4091
0
            return tcp_req_info_handle_read_close(c->tcp_req_info);
4092
0
          return 0;
4093
0
        } else if(r == -1) {
4094
0
          goto recv_error;
4095
0
        }
4096
0
        c->tcp_byte_count += r;
4097
0
        sldns_buffer_skip(c->buffer, r);
4098
0
        if(c->tcp_byte_count != current_read_size) return 1;
4099
0
        c->pp2_header_state = pp2_header_done;
4100
0
      }
4101
0
    }
4102
0
    if(c->pp2_header_state != pp2_header_done || !header) {
4103
0
      log_err_addr("proxy_protocol: wrong state for the "
4104
0
        "PROXYv2 header", "", &c->repinfo.remote_addr,
4105
0
        c->repinfo.remote_addrlen);
4106
0
      return 0;
4107
0
    }
4108
0
    sldns_buffer_flip(c->buffer);
4109
0
    if(!consume_pp2_header(c->buffer, &c->repinfo, 1)) {
4110
0
      log_err_addr("proxy_protocol: could not consume "
4111
0
        "PROXYv2 header", "", &c->repinfo.remote_addr,
4112
0
        c->repinfo.remote_addrlen);
4113
0
      return 0;
4114
0
    }
4115
0
    verbose(VERB_ALGO, "proxy_protocol: successful read of "
4116
0
      "PROXYv2 header");
4117
    /* Clear and reset the buffer to read the following
4118
        * DNS packet(s). */
4119
0
    sldns_buffer_clear(c->buffer);
4120
0
    c->tcp_byte_count = 0;
4121
0
    return 1;
4122
0
  }
4123
4124
0
  if(c->tcp_byte_count < sizeof(uint16_t)) {
4125
    /* read length bytes */
4126
0
    r = recv(fd,(void*)sldns_buffer_at(c->buffer,c->tcp_byte_count),
4127
0
      sizeof(uint16_t)-c->tcp_byte_count, MSG_DONTWAIT);
4128
0
    if(r == 0) {
4129
0
      if(c->tcp_req_info)
4130
0
        return tcp_req_info_handle_read_close(c->tcp_req_info);
4131
0
      return 0;
4132
0
    } else if(r == -1) {
4133
0
      if(c->pp2_enabled) goto recv_error;
4134
0
      goto recv_error_initial;
4135
0
    }
4136
0
    c->tcp_byte_count += r;
4137
0
    if(c->tcp_byte_count != sizeof(uint16_t))
4138
0
      return 1;
4139
0
    if(sldns_buffer_read_u16_at(c->buffer, 0) >
4140
0
      sldns_buffer_capacity(c->buffer)) {
4141
0
      verbose(VERB_QUERY, "tcp: dropped larger than buffer");
4142
0
      return 0;
4143
0
    }
4144
0
    sldns_buffer_set_limit(c->buffer,
4145
0
      sldns_buffer_read_u16_at(c->buffer, 0));
4146
0
    if(!short_ok &&
4147
0
      sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) {
4148
0
      verbose(VERB_QUERY, "tcp: dropped bogus too short.");
4149
0
      return 0;
4150
0
    }
4151
0
    verbose(VERB_ALGO, "Reading tcp query of length %d",
4152
0
      (int)sldns_buffer_limit(c->buffer));
4153
0
  }
4154
4155
0
  if(sldns_buffer_remaining(c->buffer) == 0)
4156
0
    log_err("in comm_point_tcp_handle_read buffer_remaining is "
4157
0
      "not > 0 as expected, continuing with (harmless) 0 "
4158
0
      "length recv");
4159
0
  r = recv(fd, (void*)sldns_buffer_current(c->buffer),
4160
0
    sldns_buffer_remaining(c->buffer), MSG_DONTWAIT);
4161
0
  if(r == 0) {
4162
0
    if(c->tcp_req_info)
4163
0
      return tcp_req_info_handle_read_close(c->tcp_req_info);
4164
0
    return 0;
4165
0
  } else if(r == -1) {
4166
0
    goto recv_error;
4167
0
  }
4168
0
  sldns_buffer_skip(c->buffer, r);
4169
0
  if(sldns_buffer_remaining(c->buffer) <= 0) {
4170
0
    tcp_callback_reader(c);
4171
0
  }
4172
0
  return 1;
4173
4174
0
recv_error_initial:
4175
0
  recv_initial = 1;
4176
0
recv_error:
4177
0
#ifndef USE_WINSOCK
4178
0
  if(errno == EINTR || errno == EAGAIN)
4179
0
    return 1;
4180
0
#ifdef ECONNRESET
4181
0
  if(errno == ECONNRESET && verbosity < 2)
4182
0
    return 0; /* silence reset by peer */
4183
0
#endif
4184
0
  if(recv_initial) {
4185
0
#ifdef ECONNREFUSED
4186
0
    if(errno == ECONNREFUSED && verbosity < 2)
4187
0
      return 0; /* silence reset by peer */
4188
0
#endif
4189
0
#ifdef ENETUNREACH
4190
0
    if(errno == ENETUNREACH && verbosity < 2)
4191
0
      return 0; /* silence it */
4192
0
#endif
4193
0
#ifdef EHOSTDOWN
4194
0
    if(errno == EHOSTDOWN && verbosity < 2)
4195
0
      return 0; /* silence it */
4196
0
#endif
4197
0
#ifdef EHOSTUNREACH
4198
0
    if(errno == EHOSTUNREACH && verbosity < 2)
4199
0
      return 0; /* silence it */
4200
0
#endif
4201
0
#ifdef ENETDOWN
4202
0
    if(errno == ENETDOWN && verbosity < 2)
4203
0
      return 0; /* silence it */
4204
0
#endif
4205
0
#ifdef EACCES
4206
0
    if(errno == EACCES && verbosity < 2)
4207
0
      return 0; /* silence it */
4208
0
#endif
4209
0
#ifdef ENOTCONN
4210
0
    if(errno == ENOTCONN) {
4211
0
      log_err_addr("read (in tcp initial) failed and this "
4212
0
        "could be because TCP Fast Open is "
4213
0
        "enabled [--disable-tfo-client "
4214
0
        "--disable-tfo-server] but does not "
4215
0
        "work", sock_strerror(errno),
4216
0
        &c->repinfo.remote_addr,
4217
0
        c->repinfo.remote_addrlen);
4218
0
      return 0;
4219
0
    }
4220
0
#endif
4221
0
  }
4222
#else /* USE_WINSOCK */
4223
  if(recv_initial) {
4224
    if(WSAGetLastError() == WSAECONNREFUSED && verbosity < 2)
4225
      return 0;
4226
    if(WSAGetLastError() == WSAEHOSTDOWN && verbosity < 2)
4227
      return 0;
4228
    if(WSAGetLastError() == WSAEHOSTUNREACH && verbosity < 2)
4229
      return 0;
4230
    if(WSAGetLastError() == WSAENETDOWN && verbosity < 2)
4231
      return 0;
4232
    if(WSAGetLastError() == WSAENETUNREACH && verbosity < 2)
4233
      return 0;
4234
  }
4235
  if(WSAGetLastError() == WSAECONNRESET)
4236
    return 0;
4237
  if(WSAGetLastError() == WSAEINPROGRESS)
4238
    return 1;
4239
  if(WSAGetLastError() == WSAEWOULDBLOCK) {
4240
    ub_winsock_tcp_wouldblock(c->ev->ev,
4241
      UB_EV_READ);
4242
    return 1;
4243
  }
4244
#endif
4245
0
  log_err_addr((recv_initial?"read (in tcp initial)":"read (in tcp)"),
4246
0
    sock_strerror(errno), &c->repinfo.remote_addr,
4247
0
    c->repinfo.remote_addrlen);
4248
0
  return 0;
4249
0
}
4250
4251
/**
4252
 * Handle tcp writing callback.
4253
 * @param fd: file descriptor of socket.
4254
 * @param c: comm point to write buffer out of.
4255
 * @return: 0 on error
4256
 */
4257
static int
4258
comm_point_tcp_handle_write(int fd, struct comm_point* c)
4259
0
{
4260
0
  ssize_t r;
4261
0
  struct sldns_buffer *buffer;
4262
0
  log_assert(c->type == comm_tcp);
4263
#ifdef USE_DNSCRYPT
4264
  buffer = c->dnscrypt_buffer;
4265
#else
4266
0
  buffer = c->buffer;
4267
0
#endif
4268
0
  if(c->tcp_is_reading && !c->ssl && !c->tcp_write_and_read)
4269
0
    return 0;
4270
0
  log_assert(fd != -1);
4271
0
  if(((!c->tcp_write_and_read && c->tcp_byte_count == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == 0)) && c->tcp_check_nb_connect) {
4272
    /* check for pending error from nonblocking connect */
4273
    /* from Stevens, unix network programming, vol1, 3rd ed, p450*/
4274
0
    int error = 0;
4275
0
    socklen_t len = (socklen_t)sizeof(error);
4276
0
    if(getsockopt(fd, SOL_SOCKET, SO_ERROR, (void*)&error,
4277
0
      &len) < 0){
4278
0
#ifndef USE_WINSOCK
4279
0
      error = errno; /* on solaris errno is error */
4280
#else /* USE_WINSOCK */
4281
      error = WSAGetLastError();
4282
#endif
4283
0
    }
4284
0
#ifndef USE_WINSOCK
4285
0
#if defined(EINPROGRESS) && defined(EWOULDBLOCK)
4286
0
    if(error == EINPROGRESS || error == EWOULDBLOCK)
4287
0
      return 1; /* try again later */
4288
0
    else
4289
0
#endif
4290
0
    if(error != 0 && verbosity < 2)
4291
0
      return 0; /* silence lots of chatter in the logs */
4292
0
                else if(error != 0) {
4293
0
      log_err_addr("tcp connect", strerror(error),
4294
0
        &c->repinfo.remote_addr,
4295
0
        c->repinfo.remote_addrlen);
4296
#else /* USE_WINSOCK */
4297
    /* examine error */
4298
    if(error == WSAEINPROGRESS)
4299
      return 1;
4300
    else if(error == WSAEWOULDBLOCK) {
4301
      ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
4302
      return 1;
4303
    } else if(error != 0 && verbosity < 2)
4304
      return 0;
4305
    else if(error != 0) {
4306
      log_err_addr("tcp connect", wsa_strerror(error),
4307
        &c->repinfo.remote_addr,
4308
        c->repinfo.remote_addrlen);
4309
#endif /* USE_WINSOCK */
4310
0
      return 0;
4311
0
    }
4312
0
  }
4313
0
  if(c->ssl)
4314
0
    return ssl_handle_it(c, 1);
4315
4316
#ifdef USE_MSG_FASTOPEN
4317
  /* Only try this on first use of a connection that uses tfo,
4318
     otherwise fall through to normal write */
4319
  /* Also, TFO support on WINDOWS not implemented at the moment */
4320
  if(c->tcp_do_fastopen == 1) {
4321
    /* this form of sendmsg() does both a connect() and send() so need to
4322
       look for various flavours of error*/
4323
    uint16_t len = htons(c->tcp_write_and_read?c->tcp_write_pkt_len:sldns_buffer_limit(buffer));
4324
    struct msghdr msg;
4325
    struct iovec iov[2];
4326
    c->tcp_do_fastopen = 0;
4327
    memset(&msg, 0, sizeof(msg));
4328
    if(c->tcp_write_and_read) {
4329
      iov[0].iov_base = (uint8_t*)&len + c->tcp_write_byte_count;
4330
      iov[0].iov_len = sizeof(uint16_t) - c->tcp_write_byte_count;
4331
      iov[1].iov_base = c->tcp_write_pkt;
4332
      iov[1].iov_len = c->tcp_write_pkt_len;
4333
    } else {
4334
      iov[0].iov_base = (uint8_t*)&len + c->tcp_byte_count;
4335
      iov[0].iov_len = sizeof(uint16_t) - c->tcp_byte_count;
4336
      iov[1].iov_base = sldns_buffer_begin(buffer);
4337
      iov[1].iov_len = sldns_buffer_limit(buffer);
4338
    }
4339
    log_assert(iov[0].iov_len > 0);
4340
    msg.msg_name = &c->repinfo.remote_addr;
4341
    msg.msg_namelen = c->repinfo.remote_addrlen;
4342
    msg.msg_iov = iov;
4343
    msg.msg_iovlen = 2;
4344
    r = sendmsg(fd, &msg, MSG_FASTOPEN);
4345
    if (r == -1) {
4346
#if defined(EINPROGRESS) && defined(EWOULDBLOCK)
4347
      /* Handshake is underway, maybe because no TFO cookie available.
4348
         Come back to write the message*/
4349
      if(errno == EINPROGRESS || errno == EWOULDBLOCK)
4350
        return 1;
4351
#endif
4352
      if(errno == EINTR || errno == EAGAIN)
4353
        return 1;
4354
      /* Not handling EISCONN here as shouldn't ever hit that case.*/
4355
      if(errno != EPIPE
4356
#ifdef EOPNOTSUPP
4357
        /* if /proc/sys/net/ipv4/tcp_fastopen is
4358
         * disabled on Linux, sendmsg may return
4359
         * 'Operation not supported', if so
4360
         * fallthrough to ordinary connect. */
4361
        && errno != EOPNOTSUPP
4362
#endif
4363
        && errno != 0) {
4364
        if(verbosity < 2)
4365
          return 0; /* silence lots of chatter in the logs */
4366
        log_err_addr("tcp sendmsg", strerror(errno),
4367
          &c->repinfo.remote_addr,
4368
          c->repinfo.remote_addrlen);
4369
        return 0;
4370
      }
4371
      verbose(VERB_ALGO, "tcp sendmsg for fastopen failed (with %s), try normal connect", strerror(errno));
4372
      /* fallthrough to nonFASTOPEN
4373
       * (MSG_FASTOPEN on Linux 3 produces EPIPE)
4374
       * we need to perform connect() */
4375
      if(connect(fd, (struct sockaddr *)&c->repinfo.remote_addr,
4376
        c->repinfo.remote_addrlen) == -1) {
4377
#ifdef EINPROGRESS
4378
        if(errno == EINPROGRESS)
4379
          return 1; /* wait until connect done*/
4380
#endif
4381
#ifdef USE_WINSOCK
4382
        if(WSAGetLastError() == WSAEINPROGRESS ||
4383
          WSAGetLastError() == WSAEWOULDBLOCK)
4384
          return 1; /* wait until connect done*/
4385
#endif
4386
        if(tcp_connect_errno_needs_log(
4387
          (struct sockaddr *)&c->repinfo.remote_addr,
4388
          c->repinfo.remote_addrlen)) {
4389
          log_err_addr("outgoing tcp: connect after EPIPE for fastopen",
4390
            strerror(errno),
4391
            &c->repinfo.remote_addr,
4392
            c->repinfo.remote_addrlen);
4393
        }
4394
        return 0;
4395
      }
4396
4397
    } else {
4398
      if(c->tcp_write_and_read) {
4399
        c->tcp_write_byte_count += r;
4400
        if(c->tcp_write_byte_count < sizeof(uint16_t))
4401
          return 1;
4402
      } else {
4403
        c->tcp_byte_count += r;
4404
        if(c->tcp_byte_count < sizeof(uint16_t))
4405
          return 1;
4406
        sldns_buffer_set_position(buffer, c->tcp_byte_count -
4407
          sizeof(uint16_t));
4408
      }
4409
      if((!c->tcp_write_and_read && sldns_buffer_remaining(buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) {
4410
        tcp_callback_writer(c);
4411
        return 1;
4412
      }
4413
    }
4414
  }
4415
#endif /* USE_MSG_FASTOPEN */
4416
4417
0
  if((c->tcp_write_and_read?c->tcp_write_byte_count:c->tcp_byte_count) < sizeof(uint16_t)) {
4418
0
    uint16_t len = htons(c->tcp_write_and_read?c->tcp_write_pkt_len:sldns_buffer_limit(buffer));
4419
0
#ifdef HAVE_WRITEV
4420
0
    struct iovec iov[2];
4421
0
    if(c->tcp_write_and_read) {
4422
0
      iov[0].iov_base = (uint8_t*)&len + c->tcp_write_byte_count;
4423
0
      iov[0].iov_len = sizeof(uint16_t) - c->tcp_write_byte_count;
4424
0
      iov[1].iov_base = c->tcp_write_pkt;
4425
0
      iov[1].iov_len = c->tcp_write_pkt_len;
4426
0
    } else {
4427
0
      iov[0].iov_base = (uint8_t*)&len + c->tcp_byte_count;
4428
0
      iov[0].iov_len = sizeof(uint16_t) - c->tcp_byte_count;
4429
0
      iov[1].iov_base = sldns_buffer_begin(buffer);
4430
0
      iov[1].iov_len = sldns_buffer_limit(buffer);
4431
0
    }
4432
0
    log_assert(iov[0].iov_len > 0);
4433
0
    r = writev(fd, iov, 2);
4434
#else /* HAVE_WRITEV */
4435
    if(c->tcp_write_and_read) {
4436
      r = send(fd, (void*)(((uint8_t*)&len)+c->tcp_write_byte_count),
4437
        sizeof(uint16_t)-c->tcp_write_byte_count, 0);
4438
    } else {
4439
      r = send(fd, (void*)(((uint8_t*)&len)+c->tcp_byte_count),
4440
        sizeof(uint16_t)-c->tcp_byte_count, 0);
4441
    }
4442
#endif /* HAVE_WRITEV */
4443
0
    if(r == -1) {
4444
0
#ifndef USE_WINSOCK
4445
0
#  ifdef EPIPE
4446
0
                  if(errno == EPIPE && verbosity < 2)
4447
0
                          return 0; /* silence 'broken pipe' */
4448
0
  #endif
4449
0
      if(errno == EINTR || errno == EAGAIN)
4450
0
        return 1;
4451
0
#ifdef ECONNRESET
4452
0
      if(errno == ECONNRESET && verbosity < 2)
4453
0
        return 0; /* silence reset by peer */
4454
0
#endif
4455
0
#  ifdef HAVE_WRITEV
4456
0
      log_err_addr("tcp writev", strerror(errno),
4457
0
        &c->repinfo.remote_addr,
4458
0
        c->repinfo.remote_addrlen);
4459
#  else /* HAVE_WRITEV */
4460
      log_err_addr("tcp send s", strerror(errno),
4461
        &c->repinfo.remote_addr,
4462
        c->repinfo.remote_addrlen);
4463
#  endif /* HAVE_WRITEV */
4464
#else
4465
      if(WSAGetLastError() == WSAENOTCONN)
4466
        return 1;
4467
      if(WSAGetLastError() == WSAEINPROGRESS)
4468
        return 1;
4469
      if(WSAGetLastError() == WSAEWOULDBLOCK) {
4470
        ub_winsock_tcp_wouldblock(c->ev->ev,
4471
          UB_EV_WRITE);
4472
        return 1;
4473
      }
4474
      if(WSAGetLastError() == WSAECONNRESET && verbosity < 2)
4475
        return 0; /* silence reset by peer */
4476
      log_err_addr("tcp send s",
4477
        wsa_strerror(WSAGetLastError()),
4478
        &c->repinfo.remote_addr,
4479
        c->repinfo.remote_addrlen);
4480
#endif
4481
0
      return 0;
4482
0
    }
4483
0
    if(c->tcp_write_and_read) {
4484
0
      c->tcp_write_byte_count += r;
4485
0
      if(c->tcp_write_byte_count < sizeof(uint16_t))
4486
0
        return 1;
4487
0
    } else {
4488
0
      c->tcp_byte_count += r;
4489
0
      if(c->tcp_byte_count < sizeof(uint16_t))
4490
0
        return 1;
4491
0
      sldns_buffer_set_position(buffer, c->tcp_byte_count -
4492
0
        sizeof(uint16_t));
4493
0
    }
4494
0
    if((!c->tcp_write_and_read && sldns_buffer_remaining(buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) {
4495
0
      tcp_callback_writer(c);
4496
0
      return 1;
4497
0
    }
4498
0
  }
4499
0
  log_assert(c->tcp_write_and_read || sldns_buffer_remaining(buffer) > 0);
4500
0
  log_assert(!c->tcp_write_and_read || c->tcp_write_byte_count < c->tcp_write_pkt_len + 2);
4501
0
  if(c->tcp_write_and_read) {
4502
0
    r = send(fd, (void*)(c->tcp_write_pkt + c->tcp_write_byte_count - 2),
4503
0
      c->tcp_write_pkt_len + 2 - c->tcp_write_byte_count, 0);
4504
0
  } else {
4505
0
    r = send(fd, (void*)sldns_buffer_current(buffer),
4506
0
      sldns_buffer_remaining(buffer), 0);
4507
0
  }
4508
0
  if(r == -1) {
4509
0
#ifndef USE_WINSOCK
4510
0
    if(errno == EINTR || errno == EAGAIN)
4511
0
      return 1;
4512
0
#ifdef ECONNRESET
4513
0
    if(errno == ECONNRESET && verbosity < 2)
4514
0
      return 0; /* silence reset by peer */
4515
0
#endif
4516
#else
4517
    if(WSAGetLastError() == WSAEINPROGRESS)
4518
      return 1;
4519
    if(WSAGetLastError() == WSAEWOULDBLOCK) {
4520
      ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
4521
      return 1;
4522
    }
4523
    if(WSAGetLastError() == WSAECONNRESET && verbosity < 2)
4524
      return 0; /* silence reset by peer */
4525
#endif
4526
0
    log_err_addr("tcp send r", sock_strerror(errno),
4527
0
      &c->repinfo.remote_addr,
4528
0
      c->repinfo.remote_addrlen);
4529
0
    return 0;
4530
0
  }
4531
0
  if(c->tcp_write_and_read) {
4532
0
    c->tcp_write_byte_count += r;
4533
0
  } else {
4534
0
    sldns_buffer_skip(buffer, r);
4535
0
  }
4536
4537
0
  if((!c->tcp_write_and_read && sldns_buffer_remaining(buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) {
4538
0
    tcp_callback_writer(c);
4539
0
  }
4540
4541
0
  return 1;
4542
0
}
4543
4544
/** read again to drain buffers when there could be more to read, returns 0
4545
 * on failure which means the comm point is closed. */
4546
static int
4547
tcp_req_info_read_again(int fd, struct comm_point* c)
4548
0
{
4549
0
  while(c->tcp_req_info->read_again) {
4550
0
    int r;
4551
0
    c->tcp_req_info->read_again = 0;
4552
0
    if(c->tcp_is_reading)
4553
0
      r = comm_point_tcp_handle_read(fd, c, 0);
4554
0
    else  r = comm_point_tcp_handle_write(fd, c);
4555
0
    if(!r) {
4556
0
      reclaim_tcp_handler(c);
4557
0
      if(!c->tcp_do_close) {
4558
0
        fptr_ok(fptr_whitelist_comm_point(
4559
0
          c->callback));
4560
0
        (void)(*c->callback)(c, c->cb_arg,
4561
0
          NETEVENT_CLOSED, NULL);
4562
0
      }
4563
0
      return 0;
4564
0
    }
4565
0
  }
4566
0
  return 1;
4567
0
}
4568
4569
/** read again to drain buffers when there could be more to read */
4570
static void
4571
tcp_more_read_again(int fd, struct comm_point* c)
4572
0
{
4573
  /* if the packet is done, but another one could be waiting on
4574
   * the connection, the callback signals this, and we try again */
4575
  /* this continues until the read routines get EAGAIN or so,
4576
   * and thus does not call the callback, and the bool is 0 */
4577
0
  int* moreread = c->tcp_more_read_again;
4578
0
  while(moreread && *moreread) {
4579
0
    *moreread = 0;
4580
0
    if(!comm_point_tcp_handle_read(fd, c, 0)) {
4581
0
      reclaim_tcp_handler(c);
4582
0
      if(!c->tcp_do_close) {
4583
0
        fptr_ok(fptr_whitelist_comm_point(
4584
0
          c->callback));
4585
0
        (void)(*c->callback)(c, c->cb_arg,
4586
0
          NETEVENT_CLOSED, NULL);
4587
0
      }
4588
0
      return;
4589
0
    }
4590
0
  }
4591
0
}
4592
4593
/** write again to fill up when there could be more to write */
4594
static void
4595
tcp_more_write_again(int fd, struct comm_point* c)
4596
0
{
4597
  /* if the packet is done, but another is waiting to be written,
4598
   * the callback signals it and we try again. */
4599
  /* this continues until the write routines get EAGAIN or so,
4600
   * and thus does not call the callback, and the bool is 0 */
4601
0
  int* morewrite = c->tcp_more_write_again;
4602
0
  while(morewrite && *morewrite) {
4603
0
    *morewrite = 0;
4604
0
    if(!comm_point_tcp_handle_write(fd, c)) {
4605
0
      reclaim_tcp_handler(c);
4606
0
      if(!c->tcp_do_close) {
4607
0
        fptr_ok(fptr_whitelist_comm_point(
4608
0
          c->callback));
4609
0
        (void)(*c->callback)(c, c->cb_arg,
4610
0
          NETEVENT_CLOSED, NULL);
4611
0
      }
4612
0
      return;
4613
0
    }
4614
0
  }
4615
0
}
4616
4617
void
4618
comm_point_tcp_handle_callback(int fd, short event, void* arg)
4619
0
{
4620
0
  struct comm_point* c = (struct comm_point*)arg;
4621
0
  log_assert(c->type == comm_tcp);
4622
0
  ub_comm_base_now(c->ev->base);
4623
4624
0
  if(c->fd == -1 || c->fd != fd)
4625
0
    return; /* duplicate event, but commpoint closed. */
4626
4627
#ifdef USE_DNSCRYPT
4628
  /* Initialize if this is a dnscrypt socket */
4629
  if(c->tcp_parent) {
4630
    c->dnscrypt = c->tcp_parent->dnscrypt;
4631
  }
4632
  if(c->dnscrypt && c->dnscrypt_buffer == c->buffer) {
4633
    c->dnscrypt_buffer = sldns_buffer_new(sldns_buffer_capacity(c->buffer));
4634
    if(!c->dnscrypt_buffer) {
4635
      log_err("Could not allocate dnscrypt buffer");
4636
      reclaim_tcp_handler(c);
4637
      if(!c->tcp_do_close) {
4638
        fptr_ok(fptr_whitelist_comm_point(
4639
          c->callback));
4640
        (void)(*c->callback)(c, c->cb_arg,
4641
          NETEVENT_CLOSED, NULL);
4642
      }
4643
      return;
4644
    }
4645
  }
4646
#endif
4647
4648
0
  if((event&UB_EV_TIMEOUT)) {
4649
0
    verbose(VERB_QUERY, "tcp took too long, dropped");
4650
0
    reclaim_tcp_handler(c);
4651
0
    if(!c->tcp_do_close) {
4652
0
      fptr_ok(fptr_whitelist_comm_point(c->callback));
4653
0
      (void)(*c->callback)(c, c->cb_arg,
4654
0
        NETEVENT_TIMEOUT, NULL);
4655
0
    }
4656
0
    return;
4657
0
  }
4658
0
  if((event&UB_EV_READ)
4659
#ifdef USE_MSG_FASTOPEN
4660
    && !(c->tcp_do_fastopen && (event&UB_EV_WRITE))
4661
#endif
4662
0
    ) {
4663
0
    int has_tcpq = (c->tcp_req_info != NULL);
4664
0
    int* moreread = c->tcp_more_read_again;
4665
0
    if(!comm_point_tcp_handle_read(fd, c, 0)) {
4666
0
      reclaim_tcp_handler(c);
4667
0
      if(!c->tcp_do_close) {
4668
0
        fptr_ok(fptr_whitelist_comm_point(
4669
0
          c->callback));
4670
0
        (void)(*c->callback)(c, c->cb_arg,
4671
0
          NETEVENT_CLOSED, NULL);
4672
0
      }
4673
0
      return;
4674
0
    }
4675
0
    if(has_tcpq && c->tcp_req_info && c->tcp_req_info->read_again) {
4676
0
      if(!tcp_req_info_read_again(fd, c))
4677
0
        return;
4678
0
    }
4679
0
    if(moreread && *moreread)
4680
0
      tcp_more_read_again(fd, c);
4681
0
    return;
4682
0
  }
4683
0
  if((event&UB_EV_WRITE)) {
4684
0
    int has_tcpq = (c->tcp_req_info != NULL);
4685
0
    int* morewrite = c->tcp_more_write_again;
4686
0
    if(!comm_point_tcp_handle_write(fd, c)) {
4687
0
      reclaim_tcp_handler(c);
4688
0
      if(!c->tcp_do_close) {
4689
0
        fptr_ok(fptr_whitelist_comm_point(
4690
0
          c->callback));
4691
0
        (void)(*c->callback)(c, c->cb_arg,
4692
0
          NETEVENT_CLOSED, NULL);
4693
0
      }
4694
0
      return;
4695
0
    }
4696
0
    if(has_tcpq && c->tcp_req_info && c->tcp_req_info->read_again) {
4697
0
      if(!tcp_req_info_read_again(fd, c))
4698
0
        return;
4699
0
    }
4700
0
    if(morewrite && *morewrite)
4701
0
      tcp_more_write_again(fd, c);
4702
0
    return;
4703
0
  }
4704
0
  log_err("Ignored event %d for tcphdl.", event);
4705
0
}
4706
4707
/** Make http handler free for next assignment */
4708
static void
4709
reclaim_http_handler(struct comm_point* c)
4710
0
{
4711
0
  log_assert(c->type == comm_http);
4712
0
  if(c->ssl) {
4713
0
#ifdef HAVE_SSL
4714
0
    SSL_shutdown(c->ssl);
4715
0
    SSL_free(c->ssl);
4716
0
    c->ssl = NULL;
4717
0
#endif
4718
0
  }
4719
0
  comm_point_close(c);
4720
0
  if(c->tcp_parent && !c->is_in_tcp_free) {
4721
    /* Should not happen: bad tcp_free state in reclaim_http. */
4722
0
    log_assert(c->tcp_free == NULL);
4723
0
    log_assert(c->tcp_parent->cur_tcp_count > 0);
4724
0
    c->tcp_parent->cur_tcp_count--;
4725
0
    c->tcp_free = c->tcp_parent->tcp_free;
4726
0
    c->tcp_parent->tcp_free = c;
4727
0
    c->is_in_tcp_free = 1;
4728
0
    if(!c->tcp_free) {
4729
      /* re-enable listening on accept socket */
4730
0
      comm_point_start_listening(c->tcp_parent, -1, -1);
4731
0
    }
4732
0
  }
4733
0
}
4734
4735
/** read more data for http (with ssl) */
4736
static int
4737
ssl_http_read_more(struct comm_point* c)
4738
0
{
4739
0
#ifdef HAVE_SSL
4740
0
  int r;
4741
0
  log_assert(sldns_buffer_remaining(c->buffer) > 0);
4742
0
  ERR_clear_error();
4743
0
  r = SSL_read(c->ssl, (void*)sldns_buffer_current(c->buffer),
4744
0
    (int)sldns_buffer_remaining(c->buffer));
4745
0
  if(r <= 0) {
4746
0
    int want = SSL_get_error(c->ssl, r);
4747
0
    if(want == SSL_ERROR_ZERO_RETURN) {
4748
0
      return 0; /* shutdown, closed */
4749
0
    } else if(want == SSL_ERROR_WANT_READ) {
4750
0
      return 1; /* read more later */
4751
0
    } else if(want == SSL_ERROR_WANT_WRITE) {
4752
0
      c->ssl_shake_state = comm_ssl_shake_hs_write;
4753
0
      comm_point_listen_for_rw(c, 0, 1);
4754
0
      return 1;
4755
0
    } else if(want == SSL_ERROR_SYSCALL) {
4756
0
#ifdef ECONNRESET
4757
0
      if(errno == ECONNRESET && verbosity < 2)
4758
0
        return 0; /* silence reset by peer */
4759
0
#endif
4760
0
      if(errno != 0)
4761
0
        log_err("SSL_read syscall: %s",
4762
0
          strerror(errno));
4763
0
      return 0;
4764
0
    }
4765
0
    log_crypto_err_io("could not SSL_read", want);
4766
0
    return 0;
4767
0
  }
4768
0
  verbose(VERB_ALGO, "ssl http read more skip to %d + %d",
4769
0
    (int)sldns_buffer_position(c->buffer), (int)r);
4770
0
  sldns_buffer_skip(c->buffer, (ssize_t)r);
4771
0
  return 1;
4772
#else
4773
  (void)c;
4774
  return 0;
4775
#endif /* HAVE_SSL */
4776
0
}
4777
4778
/** read more data for http */
4779
static int
4780
http_read_more(int fd, struct comm_point* c)
4781
0
{
4782
0
  ssize_t r;
4783
0
  log_assert(sldns_buffer_remaining(c->buffer) > 0);
4784
0
  r = recv(fd, (void*)sldns_buffer_current(c->buffer),
4785
0
    sldns_buffer_remaining(c->buffer), MSG_DONTWAIT);
4786
0
  if(r == 0) {
4787
0
    return 0;
4788
0
  } else if(r == -1) {
4789
0
#ifndef USE_WINSOCK
4790
0
    if(errno == EINTR || errno == EAGAIN)
4791
0
      return 1;
4792
#else /* USE_WINSOCK */
4793
    if(WSAGetLastError() == WSAECONNRESET)
4794
      return 0;
4795
    if(WSAGetLastError() == WSAEINPROGRESS)
4796
      return 1;
4797
    if(WSAGetLastError() == WSAEWOULDBLOCK) {
4798
      ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
4799
      return 1;
4800
    }
4801
#endif
4802
0
    log_err_addr("read (in http r)", sock_strerror(errno),
4803
0
      &c->repinfo.remote_addr, c->repinfo.remote_addrlen);
4804
0
    return 0;
4805
0
  }
4806
0
  verbose(VERB_ALGO, "http read more skip to %d + %d",
4807
0
    (int)sldns_buffer_position(c->buffer), (int)r);
4808
0
  sldns_buffer_skip(c->buffer, r);
4809
0
  return 1;
4810
0
}
4811
4812
/** return true if http header has been read (one line complete) */
4813
static int
4814
http_header_done(sldns_buffer* buf)
4815
0
{
4816
0
  size_t i;
4817
0
  for(i=sldns_buffer_position(buf); i<sldns_buffer_limit(buf); i++) {
4818
    /* there was a \r before the \n, but we ignore that */
4819
0
    if((char)sldns_buffer_read_u8_at(buf, i) == '\n')
4820
0
      return 1;
4821
0
  }
4822
0
  return 0;
4823
0
}
4824
4825
/** return character string into buffer for header line, moves buffer
4826
 * past that line and puts zero terminator into linefeed-newline */
4827
static char*
4828
http_header_line(sldns_buffer* buf)
4829
0
{
4830
0
  char* result = (char*)sldns_buffer_current(buf);
4831
0
  size_t i;
4832
0
  for(i=sldns_buffer_position(buf); i<sldns_buffer_limit(buf); i++) {
4833
    /* terminate the string on the \r */
4834
0
    if((char)sldns_buffer_read_u8_at(buf, i) == '\r')
4835
0
      sldns_buffer_write_u8_at(buf, i, 0);
4836
    /* terminate on the \n and skip past the it and done */
4837
0
    if((char)sldns_buffer_read_u8_at(buf, i) == '\n') {
4838
0
      sldns_buffer_write_u8_at(buf, i, 0);
4839
0
      sldns_buffer_set_position(buf, i+1);
4840
0
      return result;
4841
0
    }
4842
0
  }
4843
0
  return NULL;
4844
0
}
4845
4846
/** move unread buffer to start and clear rest for putting the rest into it */
4847
static void
4848
http_moveover_buffer(sldns_buffer* buf)
4849
0
{
4850
0
  size_t pos = sldns_buffer_position(buf);
4851
0
  size_t len = sldns_buffer_remaining(buf);
4852
0
  sldns_buffer_clear(buf);
4853
0
  memmove(sldns_buffer_begin(buf), sldns_buffer_at(buf, pos), len);
4854
0
  sldns_buffer_set_position(buf, len);
4855
0
}
4856
4857
/** a http header is complete, process it */
4858
static int
4859
http_process_initial_header(struct comm_point* c)
4860
0
{
4861
0
  char* line = http_header_line(c->buffer);
4862
0
  if(!line) return 1;
4863
0
  verbose(VERB_ALGO, "http header: %s", line);
4864
0
  if(strncasecmp(line, "HTTP/1.1 ", 9) == 0) {
4865
    /* check returncode */
4866
0
    if(line[9] != '2') {
4867
0
      verbose(VERB_ALGO, "http bad status %s", line+9);
4868
0
      return 0;
4869
0
    }
4870
0
  } else if(strncasecmp(line, "Content-Length: ", 16) == 0) {
4871
0
    if(!c->http_is_chunked)
4872
0
      c->tcp_byte_count = (size_t)atoi(line+16);
4873
0
  } else if(strncasecmp(line, "Transfer-Encoding: chunked", 19+7) == 0) {
4874
0
    c->tcp_byte_count = 0;
4875
0
    c->http_is_chunked = 1;
4876
0
  } else if(line[0] == 0) {
4877
    /* end of initial headers */
4878
0
    c->http_in_headers = 0;
4879
0
    if(c->http_is_chunked)
4880
0
      c->http_in_chunk_headers = 1;
4881
    /* remove header text from front of buffer
4882
     * the buffer is going to be used to return the data segment
4883
     * itself and we don't want the header to get returned
4884
     * prepended with it */
4885
0
    http_moveover_buffer(c->buffer);
4886
0
    sldns_buffer_flip(c->buffer);
4887
0
    return 1;
4888
0
  }
4889
  /* ignore other headers */
4890
0
  return 1;
4891
0
}
4892
4893
/** a chunk header is complete, process it, return 0=fail, 1=continue next
4894
 * header line, 2=done with chunked transfer*/
4895
static int
4896
http_process_chunk_header(struct comm_point* c)
4897
0
{
4898
0
  char* line = http_header_line(c->buffer);
4899
0
  if(!line) return 1;
4900
0
  if(c->http_in_chunk_headers == 3) {
4901
0
    verbose(VERB_ALGO, "http chunk trailer: %s", line);
4902
    /* are we done ? */
4903
0
    if(line[0] == 0 && c->tcp_byte_count == 0) {
4904
      /* callback of http reader when NETEVENT_DONE,
4905
       * end of data, with no data in buffer */
4906
0
      sldns_buffer_set_position(c->buffer, 0);
4907
0
      sldns_buffer_set_limit(c->buffer, 0);
4908
0
      fptr_ok(fptr_whitelist_comm_point(c->callback));
4909
0
      (void)(*c->callback)(c, c->cb_arg, NETEVENT_DONE, NULL);
4910
      /* return that we are done */
4911
0
      return 2;
4912
0
    }
4913
0
    if(line[0] == 0) {
4914
      /* continue with header of the next chunk */
4915
0
      c->http_in_chunk_headers = 1;
4916
      /* remove header text from front of buffer */
4917
0
      http_moveover_buffer(c->buffer);
4918
0
      sldns_buffer_flip(c->buffer);
4919
0
      return 1;
4920
0
    }
4921
    /* ignore further trail headers */
4922
0
    return 1;
4923
0
  }
4924
0
  verbose(VERB_ALGO, "http chunk header: %s", line);
4925
0
  if(c->http_in_chunk_headers == 1) {
4926
    /* read chunked start line */
4927
0
    char* end = NULL;
4928
0
    c->tcp_byte_count = (size_t)strtol(line, &end, 16);
4929
0
    if(end == line)
4930
0
      return 0;
4931
0
    c->http_in_chunk_headers = 0;
4932
    /* remove header text from front of buffer */
4933
0
    http_moveover_buffer(c->buffer);
4934
0
    sldns_buffer_flip(c->buffer);
4935
0
    if(c->tcp_byte_count == 0) {
4936
      /* done with chunks, process chunk_trailer lines */
4937
0
      c->http_in_chunk_headers = 3;
4938
0
    }
4939
0
    return 1;
4940
0
  }
4941
  /* ignore other headers */
4942
0
  return 1;
4943
0
}
4944
4945
/** handle nonchunked data segment, 0=fail, 1=wait */
4946
static int
4947
http_nonchunk_segment(struct comm_point* c)
4948
0
{
4949
  /* c->buffer at position..limit has new data we read in.
4950
   * the buffer itself is full of nonchunked data.
4951
   * we are looking to read tcp_byte_count more data
4952
   * and then the transfer is done. */
4953
0
  size_t remainbufferlen;
4954
0
  size_t got_now = sldns_buffer_limit(c->buffer);
4955
0
  if(c->tcp_byte_count <= got_now) {
4956
    /* done, this is the last data fragment */
4957
0
    c->http_stored = 0;
4958
0
    sldns_buffer_set_position(c->buffer, 0);
4959
0
    fptr_ok(fptr_whitelist_comm_point(c->callback));
4960
0
    (void)(*c->callback)(c, c->cb_arg, NETEVENT_DONE, NULL);
4961
0
    return 1;
4962
0
  }
4963
  /* if we have the buffer space,
4964
   * read more data collected into the buffer */
4965
0
  remainbufferlen = sldns_buffer_capacity(c->buffer) -
4966
0
    sldns_buffer_limit(c->buffer);
4967
0
  if(remainbufferlen+got_now >= c->tcp_byte_count ||
4968
0
    remainbufferlen >= (size_t)(c->ssl?16384:2048)) {
4969
0
    size_t total = sldns_buffer_limit(c->buffer);
4970
0
    sldns_buffer_clear(c->buffer);
4971
0
    sldns_buffer_set_position(c->buffer, total);
4972
0
    c->http_stored = total;
4973
    /* return and wait to read more */
4974
0
    return 1;
4975
0
  }
4976
  /* call callback with this data amount, then
4977
   * wait for more */
4978
0
  c->tcp_byte_count -= got_now;
4979
0
  c->http_stored = 0;
4980
0
  sldns_buffer_set_position(c->buffer, 0);
4981
0
  fptr_ok(fptr_whitelist_comm_point(c->callback));
4982
0
  (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, NULL);
4983
  /* c->callback has to buffer_clear(c->buffer). */
4984
  /* return and wait to read more */
4985
0
  return 1;
4986
0
}
4987
4988
/** handle chunked data segment, return 0=fail, 1=wait, 2=process more */
4989
static int
4990
http_chunked_segment(struct comm_point* c)
4991
0
{
4992
  /* the c->buffer has from position..limit new data we read. */
4993
  /* the current chunk has length tcp_byte_count.
4994
   * once we read that read more chunk headers.
4995
   */
4996
0
  size_t remainbufferlen;
4997
0
  size_t got_now = sldns_buffer_limit(c->buffer) - c->http_stored;
4998
0
  verbose(VERB_ALGO, "http_chunked_segment: got now %d, tcpbytcount %d, http_stored %d, buffer pos %d, buffer limit %d", (int)got_now, (int)c->tcp_byte_count, (int)c->http_stored, (int)sldns_buffer_position(c->buffer), (int)sldns_buffer_limit(c->buffer));
4999
0
  if(c->tcp_byte_count <= got_now) {
5000
    /* the chunk has completed (with perhaps some extra data
5001
     * from next chunk header and next chunk) */
5002
    /* save too much info into temp buffer */
5003
0
    size_t fraglen;
5004
0
    struct comm_reply repinfo;
5005
0
    c->http_stored = 0;
5006
0
    sldns_buffer_skip(c->buffer, (ssize_t)c->tcp_byte_count);
5007
0
    sldns_buffer_clear(c->http_temp);
5008
0
    sldns_buffer_write(c->http_temp,
5009
0
      sldns_buffer_current(c->buffer),
5010
0
      sldns_buffer_remaining(c->buffer));
5011
0
    sldns_buffer_flip(c->http_temp);
5012
5013
    /* callback with this fragment */
5014
0
    fraglen = sldns_buffer_position(c->buffer);
5015
0
    sldns_buffer_set_position(c->buffer, 0);
5016
0
    sldns_buffer_set_limit(c->buffer, fraglen);
5017
0
    repinfo = c->repinfo;
5018
0
    fptr_ok(fptr_whitelist_comm_point(c->callback));
5019
0
    (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &repinfo);
5020
    /* c->callback has to buffer_clear(). */
5021
5022
    /* is commpoint deleted? */
5023
0
    if(!repinfo.c) {
5024
0
      return 1;
5025
0
    }
5026
    /* copy waiting info */
5027
0
    sldns_buffer_clear(c->buffer);
5028
0
    sldns_buffer_write(c->buffer,
5029
0
      sldns_buffer_begin(c->http_temp),
5030
0
      sldns_buffer_remaining(c->http_temp));
5031
0
    sldns_buffer_flip(c->buffer);
5032
    /* process end of chunk trailer header lines, until
5033
     * an empty line */
5034
0
    c->http_in_chunk_headers = 3;
5035
    /* process more data in buffer (if any) */
5036
0
    return 2;
5037
0
  }
5038
0
  c->tcp_byte_count -= got_now;
5039
5040
  /* if we have the buffer space,
5041
   * read more data collected into the buffer */
5042
0
  remainbufferlen = sldns_buffer_capacity(c->buffer) -
5043
0
    sldns_buffer_limit(c->buffer);
5044
0
  if(remainbufferlen >= c->tcp_byte_count ||
5045
0
    remainbufferlen >= 2048) {
5046
0
    size_t total = sldns_buffer_limit(c->buffer);
5047
0
    sldns_buffer_clear(c->buffer);
5048
0
    sldns_buffer_set_position(c->buffer, total);
5049
0
    c->http_stored = total;
5050
    /* return and wait to read more */
5051
0
    return 1;
5052
0
  }
5053
5054
  /* callback of http reader for a new part of the data */
5055
0
  c->http_stored = 0;
5056
0
  sldns_buffer_set_position(c->buffer, 0);
5057
0
  fptr_ok(fptr_whitelist_comm_point(c->callback));
5058
0
  (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, NULL);
5059
  /* c->callback has to buffer_clear(c->buffer). */
5060
  /* return and wait to read more */
5061
0
  return 1;
5062
0
}
5063
5064
#ifdef HAVE_NGHTTP2
5065
/** Create new http2 session. Called when creating handling comm point. */
5066
static struct http2_session* http2_session_create(struct comm_point* c)
5067
{
5068
  struct http2_session* session = calloc(1, sizeof(*session));
5069
  if(!session) {
5070
    log_err("malloc failure while creating http2 session");
5071
    return NULL;
5072
  }
5073
  session->c = c;
5074
5075
  return session;
5076
}
5077
#endif
5078
5079
/** Delete http2 session. After closing connection or on error */
5080
static void http2_session_delete(struct http2_session* h2_session)
5081
0
{
5082
#ifdef HAVE_NGHTTP2
5083
  if(h2_session->callbacks)
5084
    nghttp2_session_callbacks_del(h2_session->callbacks);
5085
  free(h2_session);
5086
#else
5087
0
  (void)h2_session;
5088
0
#endif
5089
0
}
5090
5091
#ifdef HAVE_NGHTTP2
5092
struct http2_stream* http2_stream_create(int32_t stream_id)
5093
{
5094
  struct http2_stream* h2_stream = calloc(1, sizeof(*h2_stream));
5095
  if(!h2_stream) {
5096
    log_err("malloc failure while creating http2 stream");
5097
    return NULL;
5098
  }
5099
  h2_stream->stream_id = stream_id;
5100
  return h2_stream;
5101
}
5102
#endif
5103
5104
void http2_stream_add_meshstate(struct http2_stream* h2_stream,
5105
  struct mesh_area* mesh, struct mesh_state* m)
5106
0
{
5107
0
  h2_stream->mesh = mesh;
5108
0
  h2_stream->mesh_state = m;
5109
0
}
5110
5111
void http2_stream_remove_mesh_state(struct http2_stream* h2_stream)
5112
0
{
5113
0
  if(!h2_stream)
5114
0
    return;
5115
0
  h2_stream->mesh_state = NULL;
5116
0
}
5117
5118
#ifdef HAVE_NGHTTP2
5119
void http2_session_add_stream(struct http2_session* h2_session,
5120
  struct http2_stream* h2_stream)
5121
{
5122
  if(h2_session->first_stream)
5123
    h2_session->first_stream->prev = h2_stream;
5124
  h2_stream->next = h2_session->first_stream;
5125
  h2_session->first_stream = h2_stream;
5126
}
5127
5128
/** remove stream from session linked list. After stream close callback or
5129
 * closing connection */
5130
static void http2_session_remove_stream(struct http2_session* h2_session,
5131
  struct http2_stream* h2_stream)
5132
{
5133
  if(h2_stream->prev)
5134
    h2_stream->prev->next = h2_stream->next;
5135
  else
5136
    h2_session->first_stream = h2_stream->next;
5137
  if(h2_stream->next)
5138
    h2_stream->next->prev = h2_stream->prev;
5139
5140
}
5141
5142
int http2_stream_close_cb(nghttp2_session* ATTR_UNUSED(session),
5143
  int32_t stream_id, uint32_t ATTR_UNUSED(error_code), void* cb_arg)
5144
{
5145
  struct http2_stream* h2_stream;
5146
  struct http2_session* h2_session = (struct http2_session*)cb_arg;
5147
  if(!(h2_stream = nghttp2_session_get_stream_user_data(
5148
    h2_session->session, stream_id))) {
5149
    return 0;
5150
  }
5151
  http2_session_remove_stream(h2_session, h2_stream);
5152
  http2_stream_delete(h2_session, h2_stream);
5153
  return 0;
5154
}
5155
5156
ssize_t http2_recv_cb(nghttp2_session* ATTR_UNUSED(session), uint8_t* buf,
5157
  size_t len, int ATTR_UNUSED(flags), void* cb_arg)
5158
{
5159
  struct http2_session* h2_session = (struct http2_session*)cb_arg;
5160
  ssize_t ret;
5161
5162
  log_assert(h2_session->c->type == comm_http);
5163
  log_assert(h2_session->c->h2_session);
5164
  if(++h2_session->reads_count > h2_session->c->http2_max_streams) {
5165
    /* We are somewhat arbitrarily capping the amount of
5166
     * consecutive reads on the HTTP2 session to the number of max
5167
     * allowed streams.
5168
     * When we reach the cap, error out with NGHTTP2_ERR_WOULDBLOCK
5169
     * to signal nghttp2_session_recv() to stop reading for now. */
5170
    h2_session->reads_count = 0;
5171
    return NGHTTP2_ERR_WOULDBLOCK;
5172
  }
5173
5174
#ifdef HAVE_SSL
5175
  if(h2_session->c->ssl) {
5176
    int r;
5177
    ERR_clear_error();
5178
    r = SSL_read(h2_session->c->ssl, buf, len);
5179
    if(r <= 0) {
5180
      int want = SSL_get_error(h2_session->c->ssl, r);
5181
      if(want == SSL_ERROR_ZERO_RETURN) {
5182
        return NGHTTP2_ERR_EOF;
5183
      } else if(want == SSL_ERROR_WANT_READ) {
5184
        return NGHTTP2_ERR_WOULDBLOCK;
5185
      } else if(want == SSL_ERROR_WANT_WRITE) {
5186
        h2_session->c->ssl_shake_state = comm_ssl_shake_hs_write;
5187
        comm_point_listen_for_rw(h2_session->c, 0, 1);
5188
        return NGHTTP2_ERR_WOULDBLOCK;
5189
      } else if(want == SSL_ERROR_SYSCALL) {
5190
#ifdef ECONNRESET
5191
        if(errno == ECONNRESET && verbosity < 2)
5192
          return NGHTTP2_ERR_CALLBACK_FAILURE;
5193
#endif
5194
        if(errno != 0)
5195
          log_err("SSL_read syscall: %s",
5196
            strerror(errno));
5197
        return NGHTTP2_ERR_CALLBACK_FAILURE;
5198
      }
5199
      log_crypto_err_io("could not SSL_read", want);
5200
      return NGHTTP2_ERR_CALLBACK_FAILURE;
5201
    }
5202
    return r;
5203
  }
5204
#endif /* HAVE_SSL */
5205
5206
  ret = recv(h2_session->c->fd, (void*)buf, len, MSG_DONTWAIT);
5207
  if(ret == 0) {
5208
    return NGHTTP2_ERR_EOF;
5209
  } else if(ret < 0) {
5210
#ifndef USE_WINSOCK
5211
    if(errno == EINTR || errno == EAGAIN)
5212
      return NGHTTP2_ERR_WOULDBLOCK;
5213
#ifdef ECONNRESET
5214
    if(errno == ECONNRESET && verbosity < 2)
5215
      return NGHTTP2_ERR_CALLBACK_FAILURE;
5216
#endif
5217
    log_err_addr("could not http2 recv: %s", strerror(errno),
5218
      &h2_session->c->repinfo.remote_addr,
5219
      h2_session->c->repinfo.remote_addrlen);
5220
#else /* USE_WINSOCK */
5221
    if(WSAGetLastError() == WSAECONNRESET)
5222
      return NGHTTP2_ERR_CALLBACK_FAILURE;
5223
    if(WSAGetLastError() == WSAEINPROGRESS)
5224
      return NGHTTP2_ERR_WOULDBLOCK;
5225
    if(WSAGetLastError() == WSAEWOULDBLOCK) {
5226
      ub_winsock_tcp_wouldblock(h2_session->c->ev->ev,
5227
        UB_EV_READ);
5228
      return NGHTTP2_ERR_WOULDBLOCK;
5229
    }
5230
    log_err_addr("could not http2 recv: %s",
5231
      wsa_strerror(WSAGetLastError()),
5232
      &h2_session->c->repinfo.remote_addr,
5233
      h2_session->c->repinfo.remote_addrlen);
5234
#endif
5235
    return NGHTTP2_ERR_CALLBACK_FAILURE;
5236
  }
5237
  return ret;
5238
}
5239
#endif /* HAVE_NGHTTP2 */
5240
5241
/** Handle http2 read */
5242
static int
5243
comm_point_http2_handle_read(int ATTR_UNUSED(fd), struct comm_point* c)
5244
0
{
5245
#ifdef HAVE_NGHTTP2
5246
  int ret;
5247
  log_assert(c->h2_session);
5248
5249
  /* reading until recv cb returns NGHTTP2_ERR_WOULDBLOCK */
5250
  ret = nghttp2_session_recv(c->h2_session->session);
5251
  if(ret) {
5252
    if(ret != NGHTTP2_ERR_EOF &&
5253
      ret != NGHTTP2_ERR_CALLBACK_FAILURE) {
5254
      char a[256];
5255
      addr_to_str(&c->repinfo.remote_addr,
5256
        c->repinfo.remote_addrlen, a, sizeof(a));
5257
      verbose(VERB_QUERY, "http2: session_recv from %s failed, "
5258
        "error: %s", a, nghttp2_strerror(ret));
5259
    }
5260
    return 0;
5261
  }
5262
  if(nghttp2_session_want_write(c->h2_session->session)) {
5263
    c->tcp_is_reading = 0;
5264
    comm_point_stop_listening(c);
5265
    comm_point_start_listening(c, -1, adjusted_tcp_timeout(c));
5266
  } else if(!nghttp2_session_want_read(c->h2_session->session))
5267
    return 0; /* connection can be closed */
5268
  return 1;
5269
#else
5270
0
  (void)c;
5271
0
  return 0;
5272
0
#endif
5273
0
}
5274
5275
/**
5276
 * Handle http reading callback.
5277
 * @param fd: file descriptor of socket.
5278
 * @param c: comm point to read from into buffer.
5279
 * @return: 0 on error
5280
 */
5281
static int
5282
comm_point_http_handle_read(int fd, struct comm_point* c)
5283
0
{
5284
0
  log_assert(c->type == comm_http);
5285
0
  log_assert(fd != -1);
5286
5287
  /* if we are in ssl handshake, handle SSL handshake */
5288
0
#ifdef HAVE_SSL
5289
0
  if(c->ssl && c->ssl_shake_state != comm_ssl_shake_none) {
5290
0
    if(!ssl_handshake(c))
5291
0
      return 0;
5292
0
    if(c->ssl_shake_state != comm_ssl_shake_none)
5293
0
      return 1;
5294
0
  }
5295
0
#endif /* HAVE_SSL */
5296
5297
0
  if(!c->tcp_is_reading)
5298
0
    return 1;
5299
5300
0
  if(c->use_h2) {
5301
0
    return comm_point_http2_handle_read(fd, c);
5302
0
  }
5303
5304
  /* http version is <= http/1.1 */
5305
5306
0
  if(c->http_min_version >= http_version_2) {
5307
    /* HTTP/2 failed, not allowed to use lower version. */
5308
0
    return 0;
5309
0
  }
5310
5311
  /* read more data */
5312
0
  if(c->ssl) {
5313
0
    if(!ssl_http_read_more(c))
5314
0
      return 0;
5315
0
  } else {
5316
0
    if(!http_read_more(fd, c))
5317
0
      return 0;
5318
0
  }
5319
5320
0
  if(c->http_stored >= sldns_buffer_position(c->buffer)) {
5321
    /* read did not work but we wanted more data, there is
5322
     * no bytes to process now. */
5323
0
    return 1;
5324
0
  }
5325
0
  sldns_buffer_flip(c->buffer);
5326
  /* if we are partway in a segment of data, position us at the point
5327
   * where we left off previously */
5328
0
  if(c->http_stored < sldns_buffer_limit(c->buffer))
5329
0
    sldns_buffer_set_position(c->buffer, c->http_stored);
5330
0
  else  sldns_buffer_set_position(c->buffer, sldns_buffer_limit(c->buffer));
5331
5332
0
  while(sldns_buffer_remaining(c->buffer) > 0) {
5333
    /* Handle HTTP/1.x data */
5334
    /* if we are reading headers, read more headers */
5335
0
    if(c->http_in_headers || c->http_in_chunk_headers) {
5336
      /* if header is done, process the header */
5337
0
      if(!http_header_done(c->buffer)) {
5338
        /* copy remaining data to front of buffer
5339
         * and set rest for writing into it */
5340
0
        http_moveover_buffer(c->buffer);
5341
        /* return and wait to read more */
5342
0
        return 1;
5343
0
      }
5344
0
      if(!c->http_in_chunk_headers) {
5345
        /* process initial headers */
5346
0
        if(!http_process_initial_header(c))
5347
0
          return 0;
5348
0
      } else {
5349
        /* process chunk headers */
5350
0
        int r = http_process_chunk_header(c);
5351
0
        if(r == 0) return 0;
5352
0
        if(r == 2) return 1; /* done */
5353
        /* r == 1, continue */
5354
0
      }
5355
      /* see if we have more to process */
5356
0
      continue;
5357
0
    }
5358
5359
0
    if(!c->http_is_chunked) {
5360
      /* if we are reading nonchunks, process that*/
5361
0
      return http_nonchunk_segment(c);
5362
0
    } else {
5363
      /* if we are reading chunks, read the chunk */
5364
0
      int r = http_chunked_segment(c);
5365
0
      if(r == 0) return 0;
5366
0
      if(r == 1) return 1;
5367
0
      continue;
5368
0
    }
5369
0
  }
5370
  /* broke out of the loop; could not process header instead need
5371
   * to read more */
5372
  /* moveover any remaining data and read more data */
5373
0
  http_moveover_buffer(c->buffer);
5374
  /* return and wait to read more */
5375
0
  return 1;
5376
0
}
5377
5378
/** check pending connect for http */
5379
static int
5380
http_check_connect(int fd, struct comm_point* c)
5381
0
{
5382
  /* check for pending error from nonblocking connect */
5383
  /* from Stevens, unix network programming, vol1, 3rd ed, p450*/
5384
0
  int error = 0;
5385
0
  socklen_t len = (socklen_t)sizeof(error);
5386
0
  if(getsockopt(fd, SOL_SOCKET, SO_ERROR, (void*)&error,
5387
0
    &len) < 0){
5388
0
#ifndef USE_WINSOCK
5389
0
    error = errno; /* on solaris errno is error */
5390
#else /* USE_WINSOCK */
5391
    error = WSAGetLastError();
5392
#endif
5393
0
  }
5394
0
#ifndef USE_WINSOCK
5395
0
#if defined(EINPROGRESS) && defined(EWOULDBLOCK)
5396
0
  if(error == EINPROGRESS || error == EWOULDBLOCK)
5397
0
    return 1; /* try again later */
5398
0
  else
5399
0
#endif
5400
0
  if(error != 0 && verbosity < 2)
5401
0
    return 0; /* silence lots of chatter in the logs */
5402
0
  else if(error != 0) {
5403
0
    log_err_addr("http connect", strerror(error),
5404
0
      &c->repinfo.remote_addr, c->repinfo.remote_addrlen);
5405
#else /* USE_WINSOCK */
5406
  /* examine error */
5407
  if(error == WSAEINPROGRESS)
5408
    return 1;
5409
  else if(error == WSAEWOULDBLOCK) {
5410
    ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
5411
    return 1;
5412
  } else if(error != 0 && verbosity < 2)
5413
    return 0;
5414
  else if(error != 0) {
5415
    log_err_addr("http connect", wsa_strerror(error),
5416
      &c->repinfo.remote_addr, c->repinfo.remote_addrlen);
5417
#endif /* USE_WINSOCK */
5418
0
    return 0;
5419
0
  }
5420
  /* keep on processing this socket */
5421
0
  return 2;
5422
0
}
5423
5424
/** write more data for http (with ssl) */
5425
static int
5426
ssl_http_write_more(struct comm_point* c)
5427
0
{
5428
0
#ifdef HAVE_SSL
5429
0
  int r;
5430
0
  log_assert(sldns_buffer_remaining(c->buffer) > 0);
5431
0
  ERR_clear_error();
5432
0
  r = SSL_write(c->ssl, (void*)sldns_buffer_current(c->buffer),
5433
0
    (int)sldns_buffer_remaining(c->buffer));
5434
0
  if(r <= 0) {
5435
0
    int want = SSL_get_error(c->ssl, r);
5436
0
    if(want == SSL_ERROR_ZERO_RETURN) {
5437
0
      return 0; /* closed */
5438
0
    } else if(want == SSL_ERROR_WANT_READ) {
5439
0
      c->ssl_shake_state = comm_ssl_shake_hs_read;
5440
0
      comm_point_listen_for_rw(c, 1, 0);
5441
0
      return 1; /* wait for read condition */
5442
0
    } else if(want == SSL_ERROR_WANT_WRITE) {
5443
0
      return 1; /* write more later */
5444
0
    } else if(want == SSL_ERROR_SYSCALL) {
5445
0
#ifdef EPIPE
5446
0
      if(errno == EPIPE && verbosity < 2)
5447
0
        return 0; /* silence 'broken pipe' */
5448
0
#endif
5449
0
      if(errno != 0)
5450
0
        log_err("SSL_write syscall: %s",
5451
0
          strerror(errno));
5452
0
      return 0;
5453
0
    }
5454
0
    log_crypto_err_io("could not SSL_write", want);
5455
0
    return 0;
5456
0
  }
5457
0
  sldns_buffer_skip(c->buffer, (ssize_t)r);
5458
0
  return 1;
5459
#else
5460
  (void)c;
5461
  return 0;
5462
#endif /* HAVE_SSL */
5463
0
}
5464
5465
/** write more data for http */
5466
static int
5467
http_write_more(int fd, struct comm_point* c)
5468
0
{
5469
0
  ssize_t r;
5470
0
  log_assert(sldns_buffer_remaining(c->buffer) > 0);
5471
0
  r = send(fd, (void*)sldns_buffer_current(c->buffer),
5472
0
    sldns_buffer_remaining(c->buffer), 0);
5473
0
  if(r == -1) {
5474
0
#ifndef USE_WINSOCK
5475
0
    if(errno == EINTR || errno == EAGAIN)
5476
0
      return 1;
5477
#else
5478
    if(WSAGetLastError() == WSAEINPROGRESS)
5479
      return 1;
5480
    if(WSAGetLastError() == WSAEWOULDBLOCK) {
5481
      ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
5482
      return 1;
5483
    }
5484
#endif
5485
0
    log_err_addr("http send r", sock_strerror(errno),
5486
0
      &c->repinfo.remote_addr, c->repinfo.remote_addrlen);
5487
0
    return 0;
5488
0
  }
5489
0
  sldns_buffer_skip(c->buffer, r);
5490
0
  return 1;
5491
0
}
5492
5493
#ifdef HAVE_NGHTTP2
5494
ssize_t http2_send_cb(nghttp2_session* ATTR_UNUSED(session), const uint8_t* buf,
5495
  size_t len, int ATTR_UNUSED(flags), void* cb_arg)
5496
{
5497
  ssize_t ret;
5498
  struct http2_session* h2_session = (struct http2_session*)cb_arg;
5499
  log_assert(h2_session->c->type == comm_http);
5500
  log_assert(h2_session->c->h2_session);
5501
5502
#ifdef HAVE_SSL
5503
  if(h2_session->c->ssl) {
5504
    int r;
5505
    ERR_clear_error();
5506
    r = SSL_write(h2_session->c->ssl, buf, len);
5507
    if(r <= 0) {
5508
      int want = SSL_get_error(h2_session->c->ssl, r);
5509
      if(want == SSL_ERROR_ZERO_RETURN) {
5510
        return NGHTTP2_ERR_CALLBACK_FAILURE;
5511
      } else if(want == SSL_ERROR_WANT_READ) {
5512
        h2_session->c->ssl_shake_state = comm_ssl_shake_hs_read;
5513
        comm_point_listen_for_rw(h2_session->c, 1, 0);
5514
        return NGHTTP2_ERR_WOULDBLOCK;
5515
      } else if(want == SSL_ERROR_WANT_WRITE) {
5516
        return NGHTTP2_ERR_WOULDBLOCK;
5517
      } else if(want == SSL_ERROR_SYSCALL) {
5518
#ifdef EPIPE
5519
        if(errno == EPIPE && verbosity < 2)
5520
          return NGHTTP2_ERR_CALLBACK_FAILURE;
5521
#endif
5522
        if(errno != 0)
5523
          log_err("SSL_write syscall: %s",
5524
            strerror(errno));
5525
        return NGHTTP2_ERR_CALLBACK_FAILURE;
5526
      }
5527
      log_crypto_err_io("could not SSL_write", want);
5528
      return NGHTTP2_ERR_CALLBACK_FAILURE;
5529
    }
5530
    return r;
5531
  }
5532
#endif /* HAVE_SSL */
5533
5534
  ret = send(h2_session->c->fd, (void*)buf, len, 0);
5535
  if(ret == 0) {
5536
    return NGHTTP2_ERR_CALLBACK_FAILURE;
5537
  } else if(ret < 0) {
5538
#ifndef USE_WINSOCK
5539
    if(errno == EINTR || errno == EAGAIN)
5540
      return NGHTTP2_ERR_WOULDBLOCK;
5541
#ifdef EPIPE
5542
    if(errno == EPIPE && verbosity < 2)
5543
      return NGHTTP2_ERR_CALLBACK_FAILURE;
5544
#endif
5545
#ifdef ECONNRESET
5546
    if(errno == ECONNRESET && verbosity < 2)
5547
      return NGHTTP2_ERR_CALLBACK_FAILURE;
5548
#endif
5549
    log_err_addr("could not http2 write: %s", strerror(errno),
5550
      &h2_session->c->repinfo.remote_addr,
5551
      h2_session->c->repinfo.remote_addrlen);
5552
#else /* USE_WINSOCK */
5553
    if(WSAGetLastError() == WSAENOTCONN)
5554
      return NGHTTP2_ERR_WOULDBLOCK;
5555
    if(WSAGetLastError() == WSAEINPROGRESS)
5556
      return NGHTTP2_ERR_WOULDBLOCK;
5557
    if(WSAGetLastError() == WSAEWOULDBLOCK) {
5558
      ub_winsock_tcp_wouldblock(h2_session->c->ev->ev,
5559
        UB_EV_WRITE);
5560
      return NGHTTP2_ERR_WOULDBLOCK;
5561
    }
5562
    if(WSAGetLastError() == WSAECONNRESET && verbosity < 2)
5563
      return NGHTTP2_ERR_CALLBACK_FAILURE;
5564
    log_err_addr("could not http2 write: %s",
5565
      wsa_strerror(WSAGetLastError()),
5566
      &h2_session->c->repinfo.remote_addr,
5567
      h2_session->c->repinfo.remote_addrlen);
5568
#endif
5569
    return NGHTTP2_ERR_CALLBACK_FAILURE;
5570
  }
5571
  return ret;
5572
}
5573
#endif /* HAVE_NGHTTP2 */
5574
5575
/** Handle http2 writing */
5576
static int
5577
comm_point_http2_handle_write(int ATTR_UNUSED(fd), struct comm_point* c)
5578
0
{
5579
#ifdef HAVE_NGHTTP2
5580
  int ret;
5581
  log_assert(c->h2_session);
5582
5583
  ret = nghttp2_session_send(c->h2_session->session);
5584
  if(ret) {
5585
    verbose(VERB_QUERY, "http2: session_send failed, "
5586
      "error: %s", nghttp2_strerror(ret));
5587
    return 0;
5588
  }
5589
5590
  if(nghttp2_session_want_read(c->h2_session->session)) {
5591
    c->tcp_is_reading = 1;
5592
    comm_point_stop_listening(c);
5593
    comm_point_start_listening(c, -1, adjusted_tcp_timeout(c));
5594
  } else if(!nghttp2_session_want_write(c->h2_session->session))
5595
    return 0; /* connection can be closed */
5596
  return 1;
5597
#else
5598
0
  (void)c;
5599
0
  return 0;
5600
0
#endif
5601
0
}
5602
5603
/**
5604
 * Handle http writing callback.
5605
 * @param fd: file descriptor of socket.
5606
 * @param c: comm point to write buffer out of.
5607
 * @return: 0 on error
5608
 */
5609
static int
5610
comm_point_http_handle_write(int fd, struct comm_point* c)
5611
0
{
5612
0
  log_assert(c->type == comm_http);
5613
0
  log_assert(fd != -1);
5614
5615
  /* check pending connect errors, if that fails, we wait for more,
5616
   * or we can continue to write contents */
5617
0
  if(c->tcp_check_nb_connect) {
5618
0
    int r = http_check_connect(fd, c);
5619
0
    if(r == 0) return 0;
5620
0
    if(r == 1) return 1;
5621
0
    c->tcp_check_nb_connect = 0;
5622
0
  }
5623
  /* if we are in ssl handshake, handle SSL handshake */
5624
0
#ifdef HAVE_SSL
5625
0
  if(c->ssl && c->ssl_shake_state != comm_ssl_shake_none) {
5626
0
    if(!ssl_handshake(c))
5627
0
      return 0;
5628
0
    if(c->ssl_shake_state != comm_ssl_shake_none)
5629
0
      return 1;
5630
0
  }
5631
0
#endif /* HAVE_SSL */
5632
0
  if(c->tcp_is_reading)
5633
0
    return 1;
5634
5635
0
  if(c->use_h2) {
5636
0
    return comm_point_http2_handle_write(fd, c);
5637
0
  }
5638
5639
  /* http version is <= http/1.1 */
5640
5641
0
  if(c->http_min_version >= http_version_2) {
5642
    /* HTTP/2 failed, not allowed to use lower version. */
5643
0
    return 0;
5644
0
  }
5645
5646
  /* if we are writing, write more */
5647
0
  if(c->ssl) {
5648
0
    if(!ssl_http_write_more(c))
5649
0
      return 0;
5650
0
  } else {
5651
0
    if(!http_write_more(fd, c))
5652
0
      return 0;
5653
0
  }
5654
5655
  /* we write a single buffer contents, that can contain
5656
   * the http request, and then flip to read the results */
5657
  /* see if write is done */
5658
0
  if(sldns_buffer_remaining(c->buffer) == 0) {
5659
0
    sldns_buffer_clear(c->buffer);
5660
0
    if(c->tcp_do_toggle_rw)
5661
0
      c->tcp_is_reading = 1;
5662
0
    c->tcp_byte_count = 0;
5663
    /* switch from listening(write) to listening(read) */
5664
0
    comm_point_stop_listening(c);
5665
0
    comm_point_start_listening(c, -1, -1);
5666
0
  }
5667
0
  return 1;
5668
0
}
5669
5670
void
5671
comm_point_http_handle_callback(int fd, short event, void* arg)
5672
0
{
5673
0
  struct comm_point* c = (struct comm_point*)arg;
5674
0
  log_assert(c->type == comm_http);
5675
0
  ub_comm_base_now(c->ev->base);
5676
5677
0
  if((event&UB_EV_TIMEOUT)) {
5678
0
    verbose(VERB_QUERY, "http took too long, dropped");
5679
0
    reclaim_http_handler(c);
5680
0
    if(!c->tcp_do_close) {
5681
0
      fptr_ok(fptr_whitelist_comm_point(c->callback));
5682
0
      (void)(*c->callback)(c, c->cb_arg,
5683
0
        NETEVENT_TIMEOUT, NULL);
5684
0
    }
5685
0
    return;
5686
0
  }
5687
0
  if((event&UB_EV_READ)) {
5688
0
    if(!comm_point_http_handle_read(fd, c)) {
5689
0
      reclaim_http_handler(c);
5690
0
      if(!c->tcp_do_close) {
5691
0
        fptr_ok(fptr_whitelist_comm_point(
5692
0
          c->callback));
5693
0
        (void)(*c->callback)(c, c->cb_arg,
5694
0
          NETEVENT_CLOSED, NULL);
5695
0
      }
5696
0
    }
5697
0
    return;
5698
0
  }
5699
0
  if((event&UB_EV_WRITE)) {
5700
0
    if(!comm_point_http_handle_write(fd, c)) {
5701
0
      reclaim_http_handler(c);
5702
0
      if(!c->tcp_do_close) {
5703
0
        fptr_ok(fptr_whitelist_comm_point(
5704
0
          c->callback));
5705
0
        (void)(*c->callback)(c, c->cb_arg,
5706
0
          NETEVENT_CLOSED, NULL);
5707
0
      }
5708
0
    }
5709
0
    return;
5710
0
  }
5711
0
  log_err("Ignored event %d for httphdl.", event);
5712
0
}
5713
5714
void comm_point_local_handle_callback(int fd, short event, void* arg)
5715
0
{
5716
0
  struct comm_point* c = (struct comm_point*)arg;
5717
0
  log_assert(c->type == comm_local);
5718
0
  ub_comm_base_now(c->ev->base);
5719
5720
0
  if((event&UB_EV_READ)) {
5721
0
    if(!comm_point_tcp_handle_read(fd, c, 1)) {
5722
0
      fptr_ok(fptr_whitelist_comm_point(c->callback));
5723
0
      (void)(*c->callback)(c, c->cb_arg, NETEVENT_CLOSED,
5724
0
        NULL);
5725
0
    }
5726
0
    return;
5727
0
  }
5728
0
  log_err("Ignored event %d for localhdl.", event);
5729
0
}
5730
5731
void comm_point_raw_handle_callback(int ATTR_UNUSED(fd),
5732
  short event, void* arg)
5733
0
{
5734
0
  struct comm_point* c = (struct comm_point*)arg;
5735
0
  int err = NETEVENT_NOERROR;
5736
0
  log_assert(c->type == comm_raw);
5737
0
  ub_comm_base_now(c->ev->base);
5738
5739
0
  if((event&UB_EV_TIMEOUT))
5740
0
    err = NETEVENT_TIMEOUT;
5741
0
  fptr_ok(fptr_whitelist_comm_point_raw(c->callback));
5742
0
  (void)(*c->callback)(c, c->cb_arg, err, NULL);
5743
0
}
5744
5745
struct comm_point*
5746
comm_point_create_udp(struct comm_base *base, int fd, sldns_buffer* buffer,
5747
  int pp2_enabled, comm_point_callback_type* callback,
5748
  void* callback_arg, struct unbound_socket* socket)
5749
0
{
5750
0
  struct comm_point* c = (struct comm_point*)calloc(1,
5751
0
    sizeof(struct comm_point));
5752
0
  short evbits;
5753
0
  if(!c)
5754
0
    return NULL;
5755
0
  c->ev = (struct internal_event*)calloc(1,
5756
0
    sizeof(struct internal_event));
5757
0
  if(!c->ev) {
5758
0
    free(c);
5759
0
    return NULL;
5760
0
  }
5761
0
  c->ev->base = base;
5762
0
  c->fd = fd;
5763
0
  c->buffer = buffer;
5764
0
  c->timeout = NULL;
5765
0
  c->tcp_is_reading = 0;
5766
0
  c->tcp_byte_count = 0;
5767
0
  c->tcp_parent = NULL;
5768
0
  c->max_tcp_count = 0;
5769
0
  c->cur_tcp_count = 0;
5770
0
  c->tcp_handlers = NULL;
5771
0
  c->tcp_free = NULL;
5772
0
  c->is_in_tcp_free = 0;
5773
0
  c->type = comm_udp;
5774
0
  c->tcp_do_close = 0;
5775
0
  c->do_not_close = 0;
5776
0
  c->tcp_do_toggle_rw = 0;
5777
0
  c->tcp_check_nb_connect = 0;
5778
#ifdef USE_MSG_FASTOPEN
5779
  c->tcp_do_fastopen = 0;
5780
#endif
5781
#ifdef USE_DNSCRYPT
5782
  c->dnscrypt = 0;
5783
  c->dnscrypt_buffer = buffer;
5784
#endif
5785
0
  c->inuse = 0;
5786
0
  c->callback = callback;
5787
0
  c->cb_arg = callback_arg;
5788
0
  c->socket = socket;
5789
0
  c->pp2_enabled = pp2_enabled;
5790
0
  c->pp2_header_state = pp2_header_none;
5791
0
  evbits = UB_EV_READ | UB_EV_PERSIST;
5792
  /* ub_event stuff */
5793
0
  c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
5794
0
    comm_point_udp_callback, c);
5795
0
  if(c->ev->ev == NULL) {
5796
0
    log_err("could not baseset udp event");
5797
0
    comm_point_delete(c);
5798
0
    return NULL;
5799
0
  }
5800
0
  if(fd!=-1 && ub_event_add(c->ev->ev, c->timeout) != 0 ) {
5801
0
    log_err("could not add udp event");
5802
0
    comm_point_delete(c);
5803
0
    return NULL;
5804
0
  }
5805
0
  c->event_added = 1;
5806
0
  return c;
5807
0
}
5808
5809
#if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_RECVMSG)
5810
struct comm_point*
5811
comm_point_create_udp_ancil(struct comm_base *base, int fd,
5812
  sldns_buffer* buffer, int pp2_enabled,
5813
  comm_point_callback_type* callback, void* callback_arg, struct unbound_socket* socket)
5814
0
{
5815
0
  struct comm_point* c = (struct comm_point*)calloc(1,
5816
0
    sizeof(struct comm_point));
5817
0
  short evbits;
5818
0
  if(!c)
5819
0
    return NULL;
5820
0
  c->ev = (struct internal_event*)calloc(1,
5821
0
    sizeof(struct internal_event));
5822
0
  if(!c->ev) {
5823
0
    free(c);
5824
0
    return NULL;
5825
0
  }
5826
0
  c->ev->base = base;
5827
0
  c->fd = fd;
5828
0
  c->buffer = buffer;
5829
0
  c->timeout = NULL;
5830
0
  c->tcp_is_reading = 0;
5831
0
  c->tcp_byte_count = 0;
5832
0
  c->tcp_parent = NULL;
5833
0
  c->max_tcp_count = 0;
5834
0
  c->cur_tcp_count = 0;
5835
0
  c->tcp_handlers = NULL;
5836
0
  c->tcp_free = NULL;
5837
0
  c->is_in_tcp_free = 0;
5838
0
  c->type = comm_udp;
5839
0
  c->tcp_do_close = 0;
5840
0
  c->do_not_close = 0;
5841
#ifdef USE_DNSCRYPT
5842
  c->dnscrypt = 0;
5843
  c->dnscrypt_buffer = buffer;
5844
#endif
5845
0
  c->inuse = 0;
5846
0
  c->tcp_do_toggle_rw = 0;
5847
0
  c->tcp_check_nb_connect = 0;
5848
#ifdef USE_MSG_FASTOPEN
5849
  c->tcp_do_fastopen = 0;
5850
#endif
5851
0
  c->callback = callback;
5852
0
  c->cb_arg = callback_arg;
5853
0
  c->socket = socket;
5854
0
  c->pp2_enabled = pp2_enabled;
5855
0
  c->pp2_header_state = pp2_header_none;
5856
0
  evbits = UB_EV_READ | UB_EV_PERSIST;
5857
  /* ub_event stuff */
5858
0
  c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
5859
0
    comm_point_udp_ancil_callback, c);
5860
0
  if(c->ev->ev == NULL) {
5861
0
    log_err("could not baseset udp event");
5862
0
    comm_point_delete(c);
5863
0
    return NULL;
5864
0
  }
5865
0
  if(fd!=-1 && ub_event_add(c->ev->ev, c->timeout) != 0 ) {
5866
0
    log_err("could not add udp event");
5867
0
    comm_point_delete(c);
5868
0
    return NULL;
5869
0
  }
5870
0
  c->event_added = 1;
5871
0
  return c;
5872
0
}
5873
#endif
5874
5875
struct comm_point*
5876
comm_point_create_doq(struct comm_base *base, int fd, sldns_buffer* buffer,
5877
  comm_point_callback_type* callback, void* callback_arg,
5878
  struct unbound_socket* socket, struct doq_table* table,
5879
  struct ub_randstate* rnd, const void* quic_sslctx,
5880
  struct config_file* cfg)
5881
0
{
5882
#ifdef HAVE_NGTCP2
5883
  struct comm_point* c = (struct comm_point*)calloc(1,
5884
    sizeof(struct comm_point));
5885
  short evbits;
5886
  if(!c)
5887
    return NULL;
5888
  c->ev = (struct internal_event*)calloc(1,
5889
    sizeof(struct internal_event));
5890
  if(!c->ev) {
5891
    free(c);
5892
    return NULL;
5893
  }
5894
  c->ev->base = base;
5895
  c->fd = fd;
5896
  c->buffer = buffer;
5897
  c->timeout = NULL;
5898
  c->tcp_is_reading = 0;
5899
  c->tcp_byte_count = 0;
5900
  c->tcp_parent = NULL;
5901
  c->max_tcp_count = 0;
5902
  c->cur_tcp_count = 0;
5903
  c->tcp_handlers = NULL;
5904
  c->tcp_free = NULL;
5905
  c->is_in_tcp_free = 0;
5906
  c->type = comm_doq;
5907
  c->tcp_do_close = 0;
5908
  c->do_not_close = 0;
5909
  c->tcp_do_toggle_rw = 0;
5910
  c->tcp_check_nb_connect = 0;
5911
#ifdef USE_MSG_FASTOPEN
5912
  c->tcp_do_fastopen = 0;
5913
#endif
5914
#ifdef USE_DNSCRYPT
5915
  c->dnscrypt = 0;
5916
  c->dnscrypt_buffer = NULL;
5917
#endif
5918
  c->doq_socket = doq_server_socket_create(table, rnd, quic_sslctx, c,
5919
    base, cfg);
5920
  if(!c->doq_socket) {
5921
    log_err("could not create doq comm_point");
5922
    comm_point_delete(c);
5923
    return NULL;
5924
  }
5925
  c->inuse = 0;
5926
  c->callback = callback;
5927
  c->cb_arg = callback_arg;
5928
  c->socket = socket;
5929
  c->pp2_enabled = 0;
5930
  c->pp2_header_state = pp2_header_none;
5931
  evbits = UB_EV_READ | UB_EV_PERSIST;
5932
  /* ub_event stuff */
5933
  c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
5934
    comm_point_doq_callback, c);
5935
  if(c->ev->ev == NULL) {
5936
    log_err("could not baseset udp event");
5937
    comm_point_delete(c);
5938
    return NULL;
5939
  }
5940
  if(fd!=-1 && ub_event_add(c->ev->ev, c->timeout) != 0 ) {
5941
    log_err("could not add udp event");
5942
    comm_point_delete(c);
5943
    return NULL;
5944
  }
5945
  c->event_added = 1;
5946
  return c;
5947
#else
5948
  /* no libngtcp2, so no QUIC support */
5949
0
  (void)base;
5950
0
  (void)buffer;
5951
0
  (void)callback;
5952
0
  (void)callback_arg;
5953
0
  (void)socket;
5954
0
  (void)rnd;
5955
0
  (void)table;
5956
0
  (void)quic_sslctx;
5957
0
  (void)cfg;
5958
0
  sock_close(fd);
5959
0
  return NULL;
5960
0
#endif /* HAVE_NGTCP2 */
5961
0
}
5962
5963
static struct comm_point*
5964
comm_point_create_tcp_handler(struct comm_base *base,
5965
  struct comm_point* parent, size_t bufsize,
5966
  struct sldns_buffer* spoolbuf, comm_point_callback_type* callback,
5967
  void* callback_arg, struct unbound_socket* socket)
5968
0
{
5969
0
  struct comm_point* c = (struct comm_point*)calloc(1,
5970
0
    sizeof(struct comm_point));
5971
0
  short evbits;
5972
0
  if(!c)
5973
0
    return NULL;
5974
0
  c->ev = (struct internal_event*)calloc(1,
5975
0
    sizeof(struct internal_event));
5976
0
  if(!c->ev) {
5977
0
    free(c);
5978
0
    return NULL;
5979
0
  }
5980
0
  c->ev->base = base;
5981
0
  c->fd = -1;
5982
0
  c->buffer = sldns_buffer_new(bufsize);
5983
0
  if(!c->buffer) {
5984
0
    free(c->ev);
5985
0
    free(c);
5986
0
    return NULL;
5987
0
  }
5988
0
  c->timeout = (struct timeval*)malloc(sizeof(struct timeval));
5989
0
  if(!c->timeout) {
5990
0
    sldns_buffer_free(c->buffer);
5991
0
    free(c->ev);
5992
0
    free(c);
5993
0
    return NULL;
5994
0
  }
5995
0
  c->tcp_is_reading = 0;
5996
0
  c->tcp_byte_count = 0;
5997
0
  c->tcp_parent = parent;
5998
0
  c->tcp_timeout_msec = parent->tcp_timeout_msec;
5999
0
  c->tcp_conn_limit = parent->tcp_conn_limit;
6000
0
  c->tcl_addr = NULL;
6001
0
  c->tcp_keepalive = 0;
6002
0
  c->max_tcp_count = 0;
6003
0
  c->cur_tcp_count = 0;
6004
0
  c->tcp_handlers = NULL;
6005
0
  c->tcp_free = NULL;
6006
0
  c->is_in_tcp_free = 0;
6007
0
  c->type = comm_tcp;
6008
0
  c->tcp_do_close = 0;
6009
0
  c->do_not_close = 0;
6010
0
  c->tcp_do_toggle_rw = 1;
6011
0
  c->tcp_check_nb_connect = 0;
6012
#ifdef USE_MSG_FASTOPEN
6013
  c->tcp_do_fastopen = 0;
6014
#endif
6015
#ifdef USE_DNSCRYPT
6016
  c->dnscrypt = 0;
6017
  /* We don't know just yet if this is a dnscrypt channel. Allocation
6018
   * will be done when handling the callback. */
6019
  c->dnscrypt_buffer = c->buffer;
6020
#endif
6021
0
  c->repinfo.c = c;
6022
0
  c->callback = callback;
6023
0
  c->cb_arg = callback_arg;
6024
0
  c->socket = socket;
6025
0
  c->pp2_enabled = parent->pp2_enabled;
6026
0
  c->pp2_header_state = pp2_header_none;
6027
0
  if(spoolbuf) {
6028
0
    c->tcp_req_info = tcp_req_info_create(spoolbuf);
6029
0
    if(!c->tcp_req_info) {
6030
0
      log_err("could not create tcp commpoint");
6031
0
      sldns_buffer_free(c->buffer);
6032
0
      free(c->timeout);
6033
0
      free(c->ev);
6034
0
      free(c);
6035
0
      return NULL;
6036
0
    }
6037
0
    c->tcp_req_info->cp = c;
6038
0
    c->tcp_do_close = 1;
6039
0
    c->tcp_do_toggle_rw = 0;
6040
0
  }
6041
  /* add to parent free list */
6042
0
  c->tcp_free = parent->tcp_free;
6043
0
  parent->tcp_free = c;
6044
0
  c->is_in_tcp_free = 1;
6045
  /* ub_event stuff */
6046
0
  evbits = UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT;
6047
0
  c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
6048
0
    comm_point_tcp_handle_callback, c);
6049
0
  if(c->ev->ev == NULL)
6050
0
  {
6051
0
    log_err("could not basetset tcphdl event");
6052
0
    parent->tcp_free = c->tcp_free;
6053
0
    tcp_req_info_delete(c->tcp_req_info);
6054
0
    sldns_buffer_free(c->buffer);
6055
0
    free(c->timeout);
6056
0
    free(c->ev);
6057
0
    free(c);
6058
0
    return NULL;
6059
0
  }
6060
0
  return c;
6061
0
}
6062
6063
static struct comm_point*
6064
comm_point_create_http_handler(struct comm_base *base,
6065
  struct comm_point* parent, size_t bufsize, int harden_large_queries,
6066
  uint32_t http_max_streams, char* http_endpoint,
6067
  comm_point_callback_type* callback, void* callback_arg,
6068
  struct unbound_socket* socket)
6069
0
{
6070
0
  struct comm_point* c = (struct comm_point*)calloc(1,
6071
0
    sizeof(struct comm_point));
6072
0
  short evbits;
6073
0
  if(!c)
6074
0
    return NULL;
6075
0
  c->ev = (struct internal_event*)calloc(1,
6076
0
    sizeof(struct internal_event));
6077
0
  if(!c->ev) {
6078
0
    free(c);
6079
0
    return NULL;
6080
0
  }
6081
0
  c->ev->base = base;
6082
0
  c->fd = -1;
6083
0
  c->buffer = sldns_buffer_new(bufsize);
6084
0
  if(!c->buffer) {
6085
0
    free(c->ev);
6086
0
    free(c);
6087
0
    return NULL;
6088
0
  }
6089
0
  c->timeout = (struct timeval*)malloc(sizeof(struct timeval));
6090
0
  if(!c->timeout) {
6091
0
    sldns_buffer_free(c->buffer);
6092
0
    free(c->ev);
6093
0
    free(c);
6094
0
    return NULL;
6095
0
  }
6096
0
  c->tcp_is_reading = 0;
6097
0
  c->tcp_byte_count = 0;
6098
0
  c->tcp_parent = parent;
6099
0
  c->tcp_timeout_msec = parent->tcp_timeout_msec;
6100
0
  c->tcp_conn_limit = parent->tcp_conn_limit;
6101
0
  c->tcl_addr = NULL;
6102
0
  c->tcp_keepalive = 0;
6103
0
  c->max_tcp_count = 0;
6104
0
  c->cur_tcp_count = 0;
6105
0
  c->tcp_handlers = NULL;
6106
0
  c->tcp_free = NULL;
6107
0
  c->is_in_tcp_free = 0;
6108
0
  c->type = comm_http;
6109
0
  c->tcp_do_close = 1;
6110
0
  c->do_not_close = 0;
6111
0
  c->tcp_do_toggle_rw = 1; /* will be set to 0 after http2 upgrade */
6112
0
  c->tcp_check_nb_connect = 0;
6113
#ifdef USE_MSG_FASTOPEN
6114
  c->tcp_do_fastopen = 0;
6115
#endif
6116
#ifdef USE_DNSCRYPT
6117
  c->dnscrypt = 0;
6118
  c->dnscrypt_buffer = NULL;
6119
#endif
6120
0
  c->repinfo.c = c;
6121
0
  c->callback = callback;
6122
0
  c->cb_arg = callback_arg;
6123
0
  c->socket = socket;
6124
0
  c->pp2_enabled = 0;
6125
0
  c->pp2_header_state = pp2_header_none;
6126
6127
0
  c->http_min_version = http_version_2;
6128
0
  c->http2_stream_max_qbuffer_size = bufsize;
6129
0
  if(harden_large_queries && bufsize > 512)
6130
0
    c->http2_stream_max_qbuffer_size = 512;
6131
0
  c->http2_max_streams = http_max_streams;
6132
0
  if(!(c->http_endpoint = strdup(http_endpoint))) {
6133
0
    log_err("could not strdup http_endpoint");
6134
0
    sldns_buffer_free(c->buffer);
6135
0
    free(c->timeout);
6136
0
    free(c->ev);
6137
0
    free(c);
6138
0
    return NULL;
6139
0
  }
6140
0
  c->use_h2 = 0;
6141
#ifdef HAVE_NGHTTP2
6142
  if(!(c->h2_session = http2_session_create(c))) {
6143
    log_err("could not create http2 session");
6144
    free(c->http_endpoint);
6145
    sldns_buffer_free(c->buffer);
6146
    free(c->timeout);
6147
    free(c->ev);
6148
    free(c);
6149
    return NULL;
6150
  }
6151
  if(!(c->h2_session->callbacks = http2_req_callbacks_create())) {
6152
    log_err("could not create http2 callbacks");
6153
    http2_session_delete(c->h2_session);
6154
    free(c->http_endpoint);
6155
    sldns_buffer_free(c->buffer);
6156
    free(c->timeout);
6157
    free(c->ev);
6158
    free(c);
6159
    return NULL;
6160
  }
6161
#endif
6162
6163
  /* add to parent free list */
6164
0
  c->tcp_free = parent->tcp_free;
6165
0
  parent->tcp_free = c;
6166
0
  c->is_in_tcp_free = 1;
6167
  /* ub_event stuff */
6168
0
  evbits = UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT;
6169
0
  c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
6170
0
    comm_point_http_handle_callback, c);
6171
0
  if(c->ev->ev == NULL)
6172
0
  {
6173
0
    log_err("could not set http handler event");
6174
0
    parent->tcp_free = c->tcp_free;
6175
0
    http2_session_delete(c->h2_session);
6176
0
    sldns_buffer_free(c->buffer);
6177
0
    free(c->timeout);
6178
0
    free(c->ev);
6179
0
    free(c);
6180
0
    return NULL;
6181
0
  }
6182
0
  return c;
6183
0
}
6184
6185
struct comm_point*
6186
comm_point_create_tcp(struct comm_base *base, int fd, int num,
6187
  int idle_timeout, int harden_large_queries,
6188
  uint32_t http_max_streams, char* http_endpoint,
6189
  struct tcl_list* tcp_conn_limit, size_t bufsize,
6190
  struct sldns_buffer* spoolbuf, enum listen_type port_type,
6191
  int pp2_enabled, comm_point_callback_type* callback,
6192
  void* callback_arg, struct unbound_socket* socket)
6193
0
{
6194
0
  struct comm_point* c = (struct comm_point*)calloc(1,
6195
0
    sizeof(struct comm_point));
6196
0
  short evbits;
6197
0
  int i;
6198
  /* first allocate the TCP accept listener */
6199
0
  if(!c)
6200
0
    return NULL;
6201
0
  c->ev = (struct internal_event*)calloc(1,
6202
0
    sizeof(struct internal_event));
6203
0
  if(!c->ev) {
6204
0
    free(c);
6205
0
    return NULL;
6206
0
  }
6207
0
  c->ev->base = base;
6208
0
  c->fd = fd;
6209
0
  c->buffer = NULL;
6210
0
  c->timeout = NULL;
6211
0
  c->tcp_is_reading = 0;
6212
0
  c->tcp_byte_count = 0;
6213
0
  c->tcp_timeout_msec = idle_timeout;
6214
0
  c->tcp_conn_limit = tcp_conn_limit;
6215
0
  c->tcl_addr = NULL;
6216
0
  c->tcp_keepalive = 0;
6217
0
  c->tcp_parent = NULL;
6218
0
  c->max_tcp_count = num;
6219
0
  c->cur_tcp_count = 0;
6220
0
  c->tcp_handlers = (struct comm_point**)calloc((size_t)num,
6221
0
    sizeof(struct comm_point*));
6222
0
  if(!c->tcp_handlers) {
6223
0
    free(c->ev);
6224
0
    free(c);
6225
0
    return NULL;
6226
0
  }
6227
0
  c->tcp_free = NULL;
6228
0
  c->is_in_tcp_free = 0;
6229
0
  c->type = comm_tcp_accept;
6230
0
  c->tcp_do_close = 0;
6231
0
  c->do_not_close = 0;
6232
0
  c->tcp_do_toggle_rw = 0;
6233
0
  c->tcp_check_nb_connect = 0;
6234
#ifdef USE_MSG_FASTOPEN
6235
  c->tcp_do_fastopen = 0;
6236
#endif
6237
#ifdef USE_DNSCRYPT
6238
  c->dnscrypt = 0;
6239
  c->dnscrypt_buffer = NULL;
6240
#endif
6241
0
  c->callback = NULL;
6242
0
  c->cb_arg = NULL;
6243
0
  c->socket = socket;
6244
0
  c->pp2_enabled = (port_type==listen_type_http?0:pp2_enabled);
6245
0
  c->pp2_header_state = pp2_header_none;
6246
0
  evbits = UB_EV_READ | UB_EV_PERSIST;
6247
  /* ub_event stuff */
6248
0
  c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
6249
0
    comm_point_tcp_accept_callback, c);
6250
0
  if(c->ev->ev == NULL) {
6251
0
    log_err("could not baseset tcpacc event");
6252
0
    comm_point_delete(c);
6253
0
    return NULL;
6254
0
  }
6255
0
  if (ub_event_add(c->ev->ev, c->timeout) != 0) {
6256
0
    log_err("could not add tcpacc event");
6257
0
    comm_point_delete(c);
6258
0
    return NULL;
6259
0
  }
6260
0
  c->event_added = 1;
6261
  /* now prealloc the handlers */
6262
0
  for(i=0; i<num; i++) {
6263
0
    if(port_type == listen_type_tcp ||
6264
0
      port_type == listen_type_ssl ||
6265
0
      port_type == listen_type_tcp_dnscrypt) {
6266
0
      c->tcp_handlers[i] = comm_point_create_tcp_handler(base,
6267
0
        c, bufsize, spoolbuf, callback, callback_arg, socket);
6268
0
    } else if(port_type == listen_type_http) {
6269
0
      c->tcp_handlers[i] = comm_point_create_http_handler(
6270
0
        base, c, bufsize, harden_large_queries,
6271
0
        http_max_streams, http_endpoint,
6272
0
        callback, callback_arg, socket);
6273
0
    }
6274
0
    else {
6275
0
      log_err("could not create tcp handler, unknown listen "
6276
0
        "type");
6277
0
      return NULL;
6278
0
    }
6279
0
    if(!c->tcp_handlers[i]) {
6280
0
      comm_point_delete(c);
6281
0
      return NULL;
6282
0
    }
6283
0
  }
6284
6285
0
  return c;
6286
0
}
6287
6288
struct comm_point*
6289
comm_point_create_tcp_out(struct comm_base *base, size_t bufsize,
6290
        comm_point_callback_type* callback, void* callback_arg)
6291
0
{
6292
0
  struct comm_point* c = (struct comm_point*)calloc(1,
6293
0
    sizeof(struct comm_point));
6294
0
  short evbits;
6295
0
  if(!c)
6296
0
    return NULL;
6297
0
  c->ev = (struct internal_event*)calloc(1,
6298
0
    sizeof(struct internal_event));
6299
0
  if(!c->ev) {
6300
0
    free(c);
6301
0
    return NULL;
6302
0
  }
6303
0
  c->ev->base = base;
6304
0
  c->fd = -1;
6305
0
  c->buffer = sldns_buffer_new(bufsize);
6306
0
  if(!c->buffer) {
6307
0
    free(c->ev);
6308
0
    free(c);
6309
0
    return NULL;
6310
0
  }
6311
0
  c->timeout = NULL;
6312
0
  c->tcp_is_reading = 0;
6313
0
  c->tcp_byte_count = 0;
6314
0
  c->tcp_timeout_msec = TCP_QUERY_TIMEOUT;
6315
0
  c->tcp_conn_limit = NULL;
6316
0
  c->tcl_addr = NULL;
6317
0
  c->tcp_keepalive = 0;
6318
0
  c->tcp_parent = NULL;
6319
0
  c->max_tcp_count = 0;
6320
0
  c->cur_tcp_count = 0;
6321
0
  c->tcp_handlers = NULL;
6322
0
  c->tcp_free = NULL;
6323
0
  c->is_in_tcp_free = 0;
6324
0
  c->type = comm_tcp;
6325
0
  c->tcp_do_close = 0;
6326
0
  c->do_not_close = 0;
6327
0
  c->tcp_do_toggle_rw = 1;
6328
0
  c->tcp_check_nb_connect = 1;
6329
#ifdef USE_MSG_FASTOPEN
6330
  c->tcp_do_fastopen = 1;
6331
#endif
6332
#ifdef USE_DNSCRYPT
6333
  c->dnscrypt = 0;
6334
  c->dnscrypt_buffer = c->buffer;
6335
#endif
6336
0
  c->repinfo.c = c;
6337
0
  c->callback = callback;
6338
0
  c->cb_arg = callback_arg;
6339
0
  c->pp2_enabled = 0;
6340
0
  c->pp2_header_state = pp2_header_none;
6341
0
  evbits = UB_EV_PERSIST | UB_EV_WRITE;
6342
0
  c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
6343
0
    comm_point_tcp_handle_callback, c);
6344
0
  if(c->ev->ev == NULL)
6345
0
  {
6346
0
    log_err("could not baseset tcpout event");
6347
0
    sldns_buffer_free(c->buffer);
6348
0
    free(c->ev);
6349
0
    free(c);
6350
0
    return NULL;
6351
0
  }
6352
6353
0
  return c;
6354
0
}
6355
6356
struct comm_point*
6357
comm_point_create_http_out(struct comm_base *base, size_t bufsize,
6358
        comm_point_callback_type* callback, void* callback_arg,
6359
  sldns_buffer* temp)
6360
0
{
6361
0
  struct comm_point* c = (struct comm_point*)calloc(1,
6362
0
    sizeof(struct comm_point));
6363
0
  short evbits;
6364
0
  if(!c)
6365
0
    return NULL;
6366
0
  c->ev = (struct internal_event*)calloc(1,
6367
0
    sizeof(struct internal_event));
6368
0
  if(!c->ev) {
6369
0
    free(c);
6370
0
    return NULL;
6371
0
  }
6372
0
  c->ev->base = base;
6373
0
  c->fd = -1;
6374
0
  c->buffer = sldns_buffer_new(bufsize);
6375
0
  if(!c->buffer) {
6376
0
    free(c->ev);
6377
0
    free(c);
6378
0
    return NULL;
6379
0
  }
6380
0
  c->timeout = NULL;
6381
0
  c->tcp_is_reading = 0;
6382
0
  c->tcp_byte_count = 0;
6383
0
  c->tcp_parent = NULL;
6384
0
  c->max_tcp_count = 0;
6385
0
  c->cur_tcp_count = 0;
6386
0
  c->tcp_handlers = NULL;
6387
0
  c->tcp_free = NULL;
6388
0
  c->is_in_tcp_free = 0;
6389
0
  c->type = comm_http;
6390
0
  c->tcp_do_close = 0;
6391
0
  c->do_not_close = 0;
6392
0
  c->tcp_do_toggle_rw = 1;
6393
0
  c->tcp_check_nb_connect = 1;
6394
0
  c->http_in_headers = 1;
6395
0
  c->http_in_chunk_headers = 0;
6396
0
  c->http_is_chunked = 0;
6397
0
  c->http_temp = temp;
6398
#ifdef USE_MSG_FASTOPEN
6399
  c->tcp_do_fastopen = 1;
6400
#endif
6401
#ifdef USE_DNSCRYPT
6402
  c->dnscrypt = 0;
6403
  c->dnscrypt_buffer = c->buffer;
6404
#endif
6405
0
  c->repinfo.c = c;
6406
0
  c->callback = callback;
6407
0
  c->cb_arg = callback_arg;
6408
0
  c->pp2_enabled = 0;
6409
0
  c->pp2_header_state = pp2_header_none;
6410
0
  evbits = UB_EV_PERSIST | UB_EV_WRITE;
6411
0
  c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
6412
0
    comm_point_http_handle_callback, c);
6413
0
  if(c->ev->ev == NULL)
6414
0
  {
6415
0
    log_err("could not baseset tcpout event");
6416
0
#ifdef HAVE_SSL
6417
0
    SSL_free(c->ssl);
6418
0
#endif
6419
0
    sldns_buffer_free(c->buffer);
6420
0
    free(c->ev);
6421
0
    free(c);
6422
0
    return NULL;
6423
0
  }
6424
6425
0
  return c;
6426
0
}
6427
6428
struct comm_point*
6429
comm_point_create_local(struct comm_base *base, int fd, size_t bufsize,
6430
        comm_point_callback_type* callback, void* callback_arg)
6431
0
{
6432
0
  struct comm_point* c = (struct comm_point*)calloc(1,
6433
0
    sizeof(struct comm_point));
6434
0
  short evbits;
6435
0
  if(!c)
6436
0
    return NULL;
6437
0
  c->ev = (struct internal_event*)calloc(1,
6438
0
    sizeof(struct internal_event));
6439
0
  if(!c->ev) {
6440
0
    free(c);
6441
0
    return NULL;
6442
0
  }
6443
0
  c->ev->base = base;
6444
0
  c->fd = fd;
6445
0
  c->buffer = sldns_buffer_new(bufsize);
6446
0
  if(!c->buffer) {
6447
0
    free(c->ev);
6448
0
    free(c);
6449
0
    return NULL;
6450
0
  }
6451
0
  c->timeout = NULL;
6452
0
  c->tcp_is_reading = 1;
6453
0
  c->tcp_byte_count = 0;
6454
0
  c->tcp_parent = NULL;
6455
0
  c->max_tcp_count = 0;
6456
0
  c->cur_tcp_count = 0;
6457
0
  c->tcp_handlers = NULL;
6458
0
  c->tcp_free = NULL;
6459
0
  c->is_in_tcp_free = 0;
6460
0
  c->type = comm_local;
6461
0
  c->tcp_do_close = 0;
6462
0
  c->do_not_close = 1;
6463
0
  c->tcp_do_toggle_rw = 0;
6464
0
  c->tcp_check_nb_connect = 0;
6465
#ifdef USE_MSG_FASTOPEN
6466
  c->tcp_do_fastopen = 0;
6467
#endif
6468
#ifdef USE_DNSCRYPT
6469
  c->dnscrypt = 0;
6470
  c->dnscrypt_buffer = c->buffer;
6471
#endif
6472
0
  c->callback = callback;
6473
0
  c->cb_arg = callback_arg;
6474
0
  c->pp2_enabled = 0;
6475
0
  c->pp2_header_state = pp2_header_none;
6476
  /* ub_event stuff */
6477
0
  evbits = UB_EV_PERSIST | UB_EV_READ;
6478
0
  c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
6479
0
    comm_point_local_handle_callback, c);
6480
0
  if(c->ev->ev == NULL) {
6481
0
    log_err("could not baseset localhdl event");
6482
0
    free(c->ev);
6483
0
    free(c);
6484
0
    return NULL;
6485
0
  }
6486
0
  if (ub_event_add(c->ev->ev, c->timeout) != 0) {
6487
0
    log_err("could not add localhdl event");
6488
0
    ub_event_free(c->ev->ev);
6489
0
    free(c->ev);
6490
0
    free(c);
6491
0
    return NULL;
6492
0
  }
6493
0
  c->event_added = 1;
6494
0
  return c;
6495
0
}
6496
6497
struct comm_point*
6498
comm_point_create_raw(struct comm_base* base, int fd, int writing,
6499
  comm_point_callback_type* callback, void* callback_arg)
6500
0
{
6501
0
  struct comm_point* c = (struct comm_point*)calloc(1,
6502
0
    sizeof(struct comm_point));
6503
0
  short evbits;
6504
0
  if(!c)
6505
0
    return NULL;
6506
0
  c->ev = (struct internal_event*)calloc(1,
6507
0
    sizeof(struct internal_event));
6508
0
  if(!c->ev) {
6509
0
    free(c);
6510
0
    return NULL;
6511
0
  }
6512
0
  c->ev->base = base;
6513
0
  c->fd = fd;
6514
0
  c->buffer = NULL;
6515
0
  c->timeout = NULL;
6516
0
  c->tcp_is_reading = 0;
6517
0
  c->tcp_byte_count = 0;
6518
0
  c->tcp_parent = NULL;
6519
0
  c->max_tcp_count = 0;
6520
0
  c->cur_tcp_count = 0;
6521
0
  c->tcp_handlers = NULL;
6522
0
  c->tcp_free = NULL;
6523
0
  c->is_in_tcp_free = 0;
6524
0
  c->type = comm_raw;
6525
0
  c->tcp_do_close = 0;
6526
0
  c->do_not_close = 1;
6527
0
  c->tcp_do_toggle_rw = 0;
6528
0
  c->tcp_check_nb_connect = 0;
6529
#ifdef USE_MSG_FASTOPEN
6530
  c->tcp_do_fastopen = 0;
6531
#endif
6532
#ifdef USE_DNSCRYPT
6533
  c->dnscrypt = 0;
6534
  c->dnscrypt_buffer = c->buffer;
6535
#endif
6536
0
  c->callback = callback;
6537
0
  c->cb_arg = callback_arg;
6538
0
  c->pp2_enabled = 0;
6539
0
  c->pp2_header_state = pp2_header_none;
6540
  /* ub_event stuff */
6541
0
  if(writing)
6542
0
    evbits = UB_EV_PERSIST | UB_EV_WRITE;
6543
0
  else  evbits = UB_EV_PERSIST | UB_EV_READ;
6544
0
  c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
6545
0
    comm_point_raw_handle_callback, c);
6546
0
  if(c->ev->ev == NULL) {
6547
0
    log_err("could not baseset rawhdl event");
6548
0
    free(c->ev);
6549
0
    free(c);
6550
0
    return NULL;
6551
0
  }
6552
0
  if (ub_event_add(c->ev->ev, c->timeout) != 0) {
6553
0
    log_err("could not add rawhdl event");
6554
0
    ub_event_free(c->ev->ev);
6555
0
    free(c->ev);
6556
0
    free(c);
6557
0
    return NULL;
6558
0
  }
6559
0
  c->event_added = 1;
6560
0
  return c;
6561
0
}
6562
6563
void
6564
comm_point_close(struct comm_point* c)
6565
0
{
6566
0
  if(!c)
6567
0
    return;
6568
0
  if(c->fd != -1) {
6569
0
    verbose(5, "comm_point_close of %d: event_del", c->fd);
6570
0
    if(c->event_added) {
6571
0
      if(ub_event_del(c->ev->ev) != 0) {
6572
0
        log_err("could not event_del on close");
6573
0
      }
6574
0
      c->event_added = 0;
6575
0
    }
6576
0
  }
6577
0
  tcl_close_connection(c->tcl_addr);
6578
0
  if(c->tcp_req_info)
6579
0
    tcp_req_info_clear(c->tcp_req_info);
6580
0
  if(c->h2_session)
6581
0
    http2_session_server_delete(c->h2_session);
6582
  /* stop the comm point from reading or writing after it is closed. */
6583
0
  if(c->tcp_more_read_again && *c->tcp_more_read_again)
6584
0
    *c->tcp_more_read_again = 0;
6585
0
  if(c->tcp_more_write_again && *c->tcp_more_write_again)
6586
0
    *c->tcp_more_write_again = 0;
6587
6588
  /* close fd after removing from event lists, or epoll.. is messed up */
6589
0
  if(c->fd != -1 && !c->do_not_close) {
6590
#ifdef USE_WINSOCK
6591
    if(c->type == comm_tcp || c->type == comm_http) {
6592
      /* delete sticky events for the fd, it gets closed */
6593
      ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
6594
      ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
6595
    }
6596
#endif
6597
0
    verbose(VERB_ALGO, "close fd %d", c->fd);
6598
0
    sock_close(c->fd);
6599
0
  }
6600
0
  c->fd = -1;
6601
0
}
6602
6603
void
6604
comm_point_delete(struct comm_point* c)
6605
0
{
6606
0
  if(!c)
6607
0
    return;
6608
0
  if((c->type == comm_tcp || c->type == comm_http) && c->ssl) {
6609
0
#ifdef HAVE_SSL
6610
0
    SSL_shutdown(c->ssl);
6611
0
    SSL_free(c->ssl);
6612
0
#endif
6613
0
  }
6614
0
  if(c->type == comm_http && c->http_endpoint) {
6615
0
    free(c->http_endpoint);
6616
0
    c->http_endpoint = NULL;
6617
0
  }
6618
0
  comm_point_close(c);
6619
0
  if(c->tcp_handlers) {
6620
0
    int i;
6621
0
    for(i=0; i<c->max_tcp_count; i++)
6622
0
      comm_point_delete(c->tcp_handlers[i]);
6623
0
    free(c->tcp_handlers);
6624
0
  }
6625
0
  free(c->timeout);
6626
0
  if(c->type == comm_tcp || c->type == comm_local || c->type == comm_http) {
6627
0
    sldns_buffer_free(c->buffer);
6628
#ifdef USE_DNSCRYPT
6629
    if(c->dnscrypt && c->dnscrypt_buffer != c->buffer) {
6630
      sldns_buffer_free(c->dnscrypt_buffer);
6631
    }
6632
#endif
6633
0
    if(c->tcp_req_info) {
6634
0
      tcp_req_info_delete(c->tcp_req_info);
6635
0
    }
6636
0
    if(c->h2_session) {
6637
0
      http2_session_delete(c->h2_session);
6638
0
    }
6639
0
  }
6640
#ifdef HAVE_NGTCP2
6641
  if(c->doq_socket)
6642
    doq_server_socket_delete(c->doq_socket);
6643
#endif
6644
0
  ub_event_free(c->ev->ev);
6645
0
  free(c->ev);
6646
0
  free(c);
6647
0
}
6648
6649
#ifdef USE_DNSTAP
6650
static void
6651
send_reply_dnstap(struct dt_env* dtenv,
6652
  struct sockaddr* addr, socklen_t addrlen,
6653
  struct sockaddr_storage* client_addr, socklen_t client_addrlen,
6654
  enum comm_point_type type, void* ssl, sldns_buffer* buffer)
6655
{
6656
  log_addr(VERB_ALGO, "from local addr", (void*)addr, addrlen);
6657
  log_addr(VERB_ALGO, "response to client", client_addr, client_addrlen);
6658
  dt_msg_send_client_response(dtenv, client_addr,
6659
    (struct sockaddr_storage*)addr, type, ssl, buffer);
6660
}
6661
#endif
6662
6663
void
6664
comm_point_send_reply(struct comm_reply *repinfo)
6665
0
{
6666
0
  struct sldns_buffer* buffer;
6667
0
  log_assert(repinfo && repinfo->c);
6668
#ifdef USE_DNSCRYPT
6669
  buffer = repinfo->c->dnscrypt_buffer;
6670
  if(!dnsc_handle_uncurved_request(repinfo)) {
6671
    return;
6672
  }
6673
#else
6674
0
  buffer = repinfo->c->buffer;
6675
0
#endif
6676
0
  if(repinfo->c->type == comm_udp) {
6677
0
    if(repinfo->srctype)
6678
0
      comm_point_send_udp_msg_if(repinfo->c, buffer,
6679
0
        (struct sockaddr*)&repinfo->remote_addr,
6680
0
        repinfo->remote_addrlen, repinfo);
6681
0
    else
6682
0
      comm_point_send_udp_msg(repinfo->c, buffer,
6683
0
        (struct sockaddr*)&repinfo->remote_addr,
6684
0
        repinfo->remote_addrlen, 0);
6685
#ifdef USE_DNSTAP
6686
    /*
6687
     * sending src (client)/dst (local service) addresses over
6688
     * DNSTAP from udp callback
6689
     */
6690
    if(repinfo->c->dtenv != NULL && repinfo->c->dtenv->log_client_response_messages) {
6691
      send_reply_dnstap(repinfo->c->dtenv,
6692
        repinfo->c->socket->addr,
6693
        repinfo->c->socket->addrlen,
6694
        &repinfo->client_addr, repinfo->client_addrlen,
6695
        repinfo->c->type, repinfo->c->ssl,
6696
        repinfo->c->buffer);
6697
    }
6698
#endif
6699
0
  } else {
6700
#ifdef USE_DNSTAP
6701
    struct dt_env* dtenv =
6702
#ifdef HAVE_NGTCP2
6703
      repinfo->c->doq_socket
6704
      ?repinfo->c->dtenv:
6705
#endif
6706
      repinfo->c->tcp_parent->dtenv;
6707
    struct sldns_buffer* dtbuffer = repinfo->c->tcp_req_info
6708
      ?repinfo->c->tcp_req_info->spool_buffer
6709
      :repinfo->c->buffer;
6710
#ifdef USE_DNSCRYPT
6711
    if(repinfo->c->dnscrypt && repinfo->is_dnscrypted)
6712
      dtbuffer = repinfo->c->buffer;
6713
#endif
6714
    /*
6715
     * sending src (client)/dst (local service) addresses over
6716
     * DNSTAP from other callbacks
6717
     */
6718
    if(dtenv != NULL && dtenv->log_client_response_messages) {
6719
      send_reply_dnstap(dtenv,
6720
        repinfo->c->socket->addr,
6721
        repinfo->c->socket->addrlen,
6722
        &repinfo->client_addr, repinfo->client_addrlen,
6723
        repinfo->c->type, repinfo->c->ssl,
6724
        dtbuffer);
6725
    }
6726
#endif
6727
0
    if(repinfo->c->tcp_req_info) {
6728
0
      tcp_req_info_send_reply(repinfo->c->tcp_req_info);
6729
0
    } else if(repinfo->c->use_h2) {
6730
0
      if(!http2_submit_dns_response(repinfo->c->h2_session)) {
6731
0
        comm_point_drop_reply(repinfo);
6732
0
        return;
6733
0
      }
6734
0
      repinfo->c->h2_stream = NULL;
6735
0
      repinfo->c->tcp_is_reading = 0;
6736
0
      comm_point_stop_listening(repinfo->c);
6737
0
      comm_point_start_listening(repinfo->c, -1,
6738
0
        adjusted_tcp_timeout(repinfo->c));
6739
0
      return;
6740
#ifdef HAVE_NGTCP2
6741
    } else if(repinfo->c->doq_socket) {
6742
      doq_socket_send_reply(repinfo);
6743
#endif
6744
0
    } else {
6745
0
      comm_point_start_listening(repinfo->c, -1,
6746
0
        adjusted_tcp_timeout(repinfo->c));
6747
0
    }
6748
0
  }
6749
0
}
6750
6751
void
6752
comm_point_drop_reply(struct comm_reply* repinfo)
6753
0
{
6754
0
  if(!repinfo)
6755
0
    return;
6756
0
  log_assert(repinfo->c);
6757
0
  log_assert(repinfo->c->type != comm_tcp_accept);
6758
0
  if(repinfo->c->type == comm_udp)
6759
0
    return;
6760
0
  if(repinfo->c->tcp_req_info)
6761
0
    repinfo->c->tcp_req_info->is_drop = 1;
6762
0
  if(repinfo->c->type == comm_http) {
6763
0
    if(repinfo->c->h2_session) {
6764
0
      repinfo->c->h2_session->is_drop = 1;
6765
0
      if(!repinfo->c->h2_session->postpone_drop)
6766
0
        reclaim_http_handler(repinfo->c);
6767
0
      return;
6768
0
    }
6769
0
    reclaim_http_handler(repinfo->c);
6770
0
    return;
6771
#ifdef HAVE_NGTCP2
6772
  } else if(repinfo->c->doq_socket) {
6773
    doq_socket_drop_reply(repinfo);
6774
    return;
6775
#endif
6776
0
  }
6777
0
  reclaim_tcp_handler(repinfo->c);
6778
0
}
6779
6780
void
6781
comm_point_stop_listening(struct comm_point* c)
6782
0
{
6783
0
  verbose(VERB_ALGO, "comm point stop listening %d", c->fd);
6784
0
  if(c->event_added) {
6785
0
    if(ub_event_del(c->ev->ev) != 0) {
6786
0
      log_err("event_del error to stoplisten");
6787
0
    }
6788
0
    c->event_added = 0;
6789
0
  }
6790
0
}
6791
6792
void
6793
comm_point_start_listening(struct comm_point* c, int newfd, int msec)
6794
0
{
6795
0
  verbose(VERB_ALGO, "comm point start listening %d (%d msec)",
6796
0
    c->fd==-1?newfd:c->fd, msec);
6797
0
  if(c->type == comm_tcp_accept && !c->tcp_free) {
6798
    /* no use to start listening no free slots. */
6799
0
    return;
6800
0
  }
6801
0
  if(c->event_added) {
6802
0
    if(ub_event_del(c->ev->ev) != 0) {
6803
0
      log_err("event_del error to startlisten");
6804
0
    }
6805
0
    c->event_added = 0;
6806
0
  }
6807
0
  if(msec != -1 && msec != 0) {
6808
0
    if(!c->timeout) {
6809
0
      c->timeout = (struct timeval*)malloc(sizeof(
6810
0
        struct timeval));
6811
0
      if(!c->timeout) {
6812
0
        log_err("cpsl: malloc failed. No net read.");
6813
0
        return;
6814
0
      }
6815
0
    }
6816
0
    ub_event_add_bits(c->ev->ev, UB_EV_TIMEOUT);
6817
0
#ifndef S_SPLINT_S /* splint fails on struct timeval. */
6818
0
    c->timeout->tv_sec = msec/1000;
6819
0
    c->timeout->tv_usec = (msec%1000)*1000;
6820
0
#endif /* S_SPLINT_S */
6821
0
  } else {
6822
0
    if(msec == 0 || !c->timeout) {
6823
0
      ub_event_del_bits(c->ev->ev, UB_EV_TIMEOUT);
6824
0
    }
6825
0
  }
6826
0
  if(c->type == comm_tcp || c->type == comm_http) {
6827
0
    ub_event_del_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE);
6828
0
    if(c->tcp_write_and_read) {
6829
0
      verbose(5, "startlistening %d mode rw", (newfd==-1?c->fd:newfd));
6830
0
      ub_event_add_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE);
6831
0
    } else if(c->tcp_is_reading) {
6832
0
      verbose(5, "startlistening %d mode r", (newfd==-1?c->fd:newfd));
6833
0
      ub_event_add_bits(c->ev->ev, UB_EV_READ);
6834
0
    } else  {
6835
0
      verbose(5, "startlistening %d mode w", (newfd==-1?c->fd:newfd));
6836
0
      ub_event_add_bits(c->ev->ev, UB_EV_WRITE);
6837
0
    }
6838
0
  }
6839
0
  if(newfd != -1) {
6840
0
    if(c->fd != -1 && c->fd != newfd) {
6841
0
      verbose(5, "cpsl close of fd %d for %d", c->fd, newfd);
6842
0
      sock_close(c->fd);
6843
0
    }
6844
0
    c->fd = newfd;
6845
0
    ub_event_set_fd(c->ev->ev, c->fd);
6846
0
  }
6847
0
  if(ub_event_add(c->ev->ev, msec==0?NULL:c->timeout) != 0) {
6848
0
    log_err("event_add failed. in cpsl.");
6849
0
    return;
6850
0
  }
6851
0
  c->event_added = 1;
6852
0
}
6853
6854
void comm_point_listen_for_rw(struct comm_point* c, int rd, int wr)
6855
0
{
6856
0
  verbose(VERB_ALGO, "comm point listen_for_rw %d %d", c->fd, wr);
6857
0
  if(c->event_added) {
6858
0
    if(ub_event_del(c->ev->ev) != 0) {
6859
0
      log_err("event_del error to cplf");
6860
0
    }
6861
0
    c->event_added = 0;
6862
0
  }
6863
0
  if(!c->timeout) {
6864
0
    ub_event_del_bits(c->ev->ev, UB_EV_TIMEOUT);
6865
0
  }
6866
0
  ub_event_del_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE);
6867
0
  if(rd) ub_event_add_bits(c->ev->ev, UB_EV_READ);
6868
0
  if(wr) ub_event_add_bits(c->ev->ev, UB_EV_WRITE);
6869
0
  if(ub_event_add(c->ev->ev, c->timeout) != 0) {
6870
0
    log_err("event_add failed. in cplf.");
6871
0
    return;
6872
0
  }
6873
0
  c->event_added = 1;
6874
0
}
6875
6876
size_t comm_point_get_mem(struct comm_point* c)
6877
0
{
6878
0
  size_t s;
6879
0
  if(!c)
6880
0
    return 0;
6881
0
  s = sizeof(*c) + sizeof(*c->ev);
6882
0
  if(c->timeout)
6883
0
    s += sizeof(*c->timeout);
6884
0
  if(c->type == comm_tcp || c->type == comm_local) {
6885
0
    s += sizeof(*c->buffer) + sldns_buffer_capacity(c->buffer);
6886
#ifdef USE_DNSCRYPT
6887
    s += sizeof(*c->dnscrypt_buffer);
6888
    if(c->buffer != c->dnscrypt_buffer) {
6889
      s += sldns_buffer_capacity(c->dnscrypt_buffer);
6890
    }
6891
#endif
6892
0
  }
6893
0
  if(c->type == comm_tcp_accept) {
6894
0
    int i;
6895
0
    for(i=0; i<c->max_tcp_count; i++)
6896
0
      s += comm_point_get_mem(c->tcp_handlers[i]);
6897
0
  }
6898
0
  return s;
6899
0
}
6900
6901
struct comm_timer*
6902
comm_timer_create(struct comm_base* base, void (*cb)(void*), void* cb_arg)
6903
0
{
6904
0
  struct internal_timer *tm = (struct internal_timer*)calloc(1,
6905
0
    sizeof(struct internal_timer));
6906
0
  if(!tm) {
6907
0
    log_err("malloc failed");
6908
0
    return NULL;
6909
0
  }
6910
0
  tm->super.ev_timer = tm;
6911
0
  tm->base = base;
6912
0
  tm->super.callback = cb;
6913
0
  tm->super.cb_arg = cb_arg;
6914
0
  tm->ev = ub_event_new(base->eb->base, -1, UB_EV_TIMEOUT,
6915
0
    comm_timer_callback, &tm->super);
6916
0
  if(tm->ev == NULL) {
6917
0
    log_err("timer_create: event_base_set failed.");
6918
0
    free(tm);
6919
0
    return NULL;
6920
0
  }
6921
0
  return &tm->super;
6922
0
}
6923
6924
void
6925
comm_timer_disable(struct comm_timer* timer)
6926
0
{
6927
0
  if(!timer)
6928
0
    return;
6929
0
  ub_timer_del(timer->ev_timer->ev);
6930
0
  timer->ev_timer->enabled = 0;
6931
0
}
6932
6933
void
6934
comm_timer_set(struct comm_timer* timer, struct timeval* tv)
6935
0
{
6936
0
  log_assert(tv);
6937
0
  if(timer->ev_timer->enabled)
6938
0
    comm_timer_disable(timer);
6939
0
  if(ub_timer_add(timer->ev_timer->ev, timer->ev_timer->base->eb->base,
6940
0
    comm_timer_callback, timer, tv) != 0)
6941
0
    log_err("comm_timer_set: evtimer_add failed.");
6942
0
  timer->ev_timer->enabled = 1;
6943
0
}
6944
6945
void
6946
comm_timer_delete(struct comm_timer* timer)
6947
0
{
6948
0
  if(!timer)
6949
0
    return;
6950
0
  comm_timer_disable(timer);
6951
  /* Free the sub struct timer->ev_timer derived from the super struct timer.
6952
   * i.e. assert(timer == timer->ev_timer)
6953
   */
6954
0
  ub_event_free(timer->ev_timer->ev);
6955
0
  free(timer->ev_timer);
6956
0
}
6957
6958
void
6959
comm_timer_callback(int ATTR_UNUSED(fd), short event, void* arg)
6960
0
{
6961
0
  struct comm_timer* tm = (struct comm_timer*)arg;
6962
0
  if(!(event&UB_EV_TIMEOUT))
6963
0
    return;
6964
0
  ub_comm_base_now(tm->ev_timer->base);
6965
0
  tm->ev_timer->enabled = 0;
6966
0
  fptr_ok(fptr_whitelist_comm_timer(tm->callback));
6967
0
  (*tm->callback)(tm->cb_arg);
6968
0
}
6969
6970
int
6971
comm_timer_is_set(struct comm_timer* timer)
6972
0
{
6973
0
  return (int)timer->ev_timer->enabled;
6974
0
}
6975
6976
size_t
6977
comm_timer_get_mem(struct comm_timer* timer)
6978
0
{
6979
0
  if(!timer) return 0;
6980
0
  return sizeof(struct internal_timer);
6981
0
}
6982
6983
struct comm_signal*
6984
comm_signal_create(struct comm_base* base,
6985
        void (*callback)(int, void*), void* cb_arg)
6986
0
{
6987
0
  struct comm_signal* com = (struct comm_signal*)malloc(
6988
0
    sizeof(struct comm_signal));
6989
0
  if(!com) {
6990
0
    log_err("malloc failed");
6991
0
    return NULL;
6992
0
  }
6993
0
  com->base = base;
6994
0
  com->callback = callback;
6995
0
  com->cb_arg = cb_arg;
6996
0
  com->ev_signal = NULL;
6997
0
  return com;
6998
0
}
6999
7000
void
7001
comm_signal_callback(int sig, short event, void* arg)
7002
0
{
7003
0
  struct comm_signal* comsig = (struct comm_signal*)arg;
7004
0
  if(!(event & UB_EV_SIGNAL))
7005
0
    return;
7006
0
  ub_comm_base_now(comsig->base);
7007
0
  fptr_ok(fptr_whitelist_comm_signal(comsig->callback));
7008
0
  (*comsig->callback)(sig, comsig->cb_arg);
7009
0
}
7010
7011
int
7012
comm_signal_bind(struct comm_signal* comsig, int sig)
7013
0
{
7014
0
  struct internal_signal* entry = (struct internal_signal*)calloc(1,
7015
0
    sizeof(struct internal_signal));
7016
0
  if(!entry) {
7017
0
    log_err("malloc failed");
7018
0
    return 0;
7019
0
  }
7020
0
  log_assert(comsig);
7021
  /* add signal event */
7022
0
  entry->ev = ub_signal_new(comsig->base->eb->base, sig,
7023
0
    comm_signal_callback, comsig);
7024
0
  if(entry->ev == NULL) {
7025
0
    log_err("Could not create signal event");
7026
0
    free(entry);
7027
0
    return 0;
7028
0
  }
7029
0
  if(ub_signal_add(entry->ev, NULL) != 0) {
7030
0
    log_err("Could not add signal handler");
7031
0
    ub_event_free(entry->ev);
7032
0
    free(entry);
7033
0
    return 0;
7034
0
  }
7035
  /* link into list */
7036
0
  entry->next = comsig->ev_signal;
7037
0
  comsig->ev_signal = entry;
7038
0
  return 1;
7039
0
}
7040
7041
void
7042
comm_signal_delete(struct comm_signal* comsig)
7043
0
{
7044
0
  struct internal_signal* p, *np;
7045
0
  if(!comsig)
7046
0
    return;
7047
0
  p=comsig->ev_signal;
7048
0
  while(p) {
7049
0
    np = p->next;
7050
0
    ub_signal_del(p->ev);
7051
0
    ub_event_free(p->ev);
7052
0
    free(p);
7053
0
    p = np;
7054
0
  }
7055
0
  free(comsig);
7056
0
}