Coverage Report

Created: 2026-02-09 06:53

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/unbound/util/netevent.c
Line
Count
Source
1
/*
2
 * util/netevent.c - event notification
3
 *
4
 * Copyright (c) 2007, NLnet Labs. All rights reserved.
5
 *
6
 * This software is open source.
7
 *
8
 * Redistribution and use in source and binary forms, with or without
9
 * modification, are permitted provided that the following conditions
10
 * are met:
11
 *
12
 * Redistributions of source code must retain the above copyright notice,
13
 * this list of conditions and the following disclaimer.
14
 *
15
 * Redistributions in binary form must reproduce the above copyright notice,
16
 * this list of conditions and the following disclaimer in the documentation
17
 * and/or other materials provided with the distribution.
18
 *
19
 * Neither the name of the NLNET LABS nor the names of its contributors may
20
 * be used to endorse or promote products derived from this software without
21
 * specific prior written permission.
22
 *
23
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27
 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29
 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34
 */
35
36
/**
37
 * \file
38
 *
39
 * This file contains event notification functions.
40
 */
41
#include "config.h"
42
#include "util/netevent.h"
43
#include "util/ub_event.h"
44
#include "util/log.h"
45
#include "util/net_help.h"
46
#include "util/tcp_conn_limit.h"
47
#include "util/fptr_wlist.h"
48
#include "util/proxy_protocol.h"
49
#include "util/timeval_func.h"
50
#include "sldns/pkthdr.h"
51
#include "sldns/sbuffer.h"
52
#include "sldns/str2wire.h"
53
#include "dnstap/dnstap.h"
54
#include "dnscrypt/dnscrypt.h"
55
#include "services/listen_dnsport.h"
56
#include "util/random.h"
57
#ifdef HAVE_SYS_TYPES_H
58
#include <sys/types.h>
59
#endif
60
#ifdef HAVE_SYS_SOCKET_H
61
#include <sys/socket.h>
62
#endif
63
#ifdef HAVE_NETDB_H
64
#include <netdb.h>
65
#endif
66
#ifdef HAVE_POLL_H
67
#include <poll.h>
68
#endif
69
70
#ifdef HAVE_OPENSSL_SSL_H
71
#include <openssl/ssl.h>
72
#endif
73
#ifdef HAVE_OPENSSL_ERR_H
74
#include <openssl/err.h>
75
#endif
76
77
#ifdef HAVE_NGTCP2
78
#include <ngtcp2/ngtcp2.h>
79
#include <ngtcp2/ngtcp2_crypto.h>
80
#endif
81
82
#ifdef HAVE_LINUX_NET_TSTAMP_H
83
#include <linux/net_tstamp.h>
84
#endif
85
86
/* -------- Start of local definitions -------- */
87
/** if CMSG_ALIGN is not defined on this platform, a workaround */
88
#ifndef CMSG_ALIGN
89
#  ifdef __CMSG_ALIGN
90
#    define CMSG_ALIGN(n) __CMSG_ALIGN(n)
91
#  elif defined(CMSG_DATA_ALIGN)
92
#    define CMSG_ALIGN _CMSG_DATA_ALIGN
93
#  else
94
#    define CMSG_ALIGN(len) (((len)+sizeof(long)-1) & ~(sizeof(long)-1))
95
#  endif
96
#endif
97
98
/** if CMSG_LEN is not defined on this platform, a workaround */
99
#ifndef CMSG_LEN
100
#  define CMSG_LEN(len) (CMSG_ALIGN(sizeof(struct cmsghdr))+(len))
101
#endif
102
103
/** if CMSG_SPACE is not defined on this platform, a workaround */
104
#ifndef CMSG_SPACE
105
#  ifdef _CMSG_HDR_ALIGN
106
#    define CMSG_SPACE(l) (CMSG_ALIGN(l)+_CMSG_HDR_ALIGN(sizeof(struct cmsghdr)))
107
#  else
108
#    define CMSG_SPACE(l) (CMSG_ALIGN(l)+CMSG_ALIGN(sizeof(struct cmsghdr)))
109
#  endif
110
#endif
111
112
/** The TCP writing query timeout in milliseconds */
113
0
#define TCP_QUERY_TIMEOUT 120000
114
/** The minimum actual TCP timeout to use, regardless of what we advertise,
115
 * in msec */
116
0
#define TCP_QUERY_TIMEOUT_MINIMUM 200
117
118
#ifndef NONBLOCKING_IS_BROKEN
119
/** number of UDP reads to perform per read indication from select */
120
0
#define NUM_UDP_PER_SELECT 100
121
#else
122
#define NUM_UDP_PER_SELECT 1
123
#endif
124
125
/** timeout in millisec to wait for write to unblock, packets dropped after.*/
126
0
#define SEND_BLOCKED_WAIT_TIMEOUT 200
127
/** max number of times to wait for write to unblock, packets dropped after.*/
128
0
#define SEND_BLOCKED_MAX_RETRY 5
129
130
/** Let's make timestamping code cleaner and redefine SO_TIMESTAMP* */
131
#ifndef SO_TIMESTAMP
132
#define SO_TIMESTAMP 29
133
#endif
134
#ifndef SO_TIMESTAMPNS
135
#define SO_TIMESTAMPNS 35
136
#endif
137
#ifndef SO_TIMESTAMPING
138
#define SO_TIMESTAMPING 37
139
#endif
140
/**
141
 * The internal event structure for keeping ub_event info for the event.
142
 * Possibly other structures (list, tree) this is part of.
143
 */
144
struct internal_event {
145
  /** the comm base */
146
  struct comm_base* base;
147
  /** ub_event event type */
148
  struct ub_event* ev;
149
};
150
151
/**
152
 * Internal base structure, so that every thread has its own events.
153
 */
154
struct internal_base {
155
  /** ub_event event_base type. */
156
  struct ub_event_base* base;
157
  /** seconds time pointer points here */
158
  time_t secs;
159
  /** timeval with current time */
160
  struct timeval now;
161
  /** the event used for slow_accept timeouts */
162
  struct ub_event* slow_accept;
163
  /** true if slow_accept is enabled */
164
  int slow_accept_enabled;
165
  /** last log time for slow logging of file descriptor errors */
166
  time_t last_slow_log;
167
  /** last log time for slow logging of write wait failures */
168
  time_t last_writewait_log;
169
};
170
171
/**
172
 * Internal timer structure, to store timer event in.
173
 */
174
struct internal_timer {
175
  /** the super struct from which derived */
176
  struct comm_timer super;
177
  /** the comm base */
178
  struct comm_base* base;
179
  /** ub_event event type */
180
  struct ub_event* ev;
181
  /** is timer enabled */
182
  uint8_t enabled;
183
};
184
185
/**
186
 * Internal signal structure, to store signal event in.
187
 */
188
struct internal_signal {
189
  /** ub_event event type */
190
  struct ub_event* ev;
191
  /** next in signal list */
192
  struct internal_signal* next;
193
};
194
195
/** create a tcp handler with a parent */
196
static struct comm_point* comm_point_create_tcp_handler(
197
  struct comm_base *base, struct comm_point* parent, size_t bufsize,
198
  struct sldns_buffer* spoolbuf, comm_point_callback_type* callback,
199
  void* callback_arg, struct unbound_socket* socket);
200
201
/* -------- End of local definitions -------- */
202
203
struct comm_base*
204
comm_base_create(int sigs)
205
0
{
206
0
  struct comm_base* b = (struct comm_base*)calloc(1,
207
0
    sizeof(struct comm_base));
208
0
  const char *evnm="event", *evsys="", *evmethod="";
209
210
0
  if(!b)
211
0
    return NULL;
212
0
  b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base));
213
0
  if(!b->eb) {
214
0
    free(b);
215
0
    return NULL;
216
0
  }
217
0
  b->eb->base = ub_default_event_base(sigs, &b->eb->secs, &b->eb->now);
218
0
  if(!b->eb->base) {
219
0
    free(b->eb);
220
0
    free(b);
221
0
    return NULL;
222
0
  }
223
0
  ub_comm_base_now(b);
224
0
  ub_get_event_sys(b->eb->base, &evnm, &evsys, &evmethod);
225
0
  verbose(VERB_ALGO, "%s %s uses %s method.", evnm, evsys, evmethod);
226
0
  return b;
227
0
}
228
229
struct comm_base*
230
comm_base_create_event(struct ub_event_base* base)
231
0
{
232
0
  struct comm_base* b = (struct comm_base*)calloc(1,
233
0
    sizeof(struct comm_base));
234
0
  if(!b)
235
0
    return NULL;
236
0
  b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base));
237
0
  if(!b->eb) {
238
0
    free(b);
239
0
    return NULL;
240
0
  }
241
0
  b->eb->base = base;
242
0
  ub_comm_base_now(b);
243
0
  return b;
244
0
}
245
246
void
247
comm_base_delete(struct comm_base* b)
248
0
{
249
0
  if(!b)
250
0
    return;
251
0
  if(b->eb->slow_accept_enabled) {
252
0
    if(ub_event_del(b->eb->slow_accept) != 0) {
253
0
      log_err("could not event_del slow_accept");
254
0
    }
255
0
    ub_event_free(b->eb->slow_accept);
256
0
  }
257
0
  ub_event_base_free(b->eb->base);
258
0
  b->eb->base = NULL;
259
0
  free(b->eb);
260
0
  free(b);
261
0
}
262
263
void
264
comm_base_delete_no_base(struct comm_base* b)
265
0
{
266
0
  if(!b)
267
0
    return;
268
0
  if(b->eb->slow_accept_enabled) {
269
0
    if(ub_event_del(b->eb->slow_accept) != 0) {
270
0
      log_err("could not event_del slow_accept");
271
0
    }
272
0
    ub_event_free(b->eb->slow_accept);
273
0
  }
274
0
  b->eb->base = NULL;
275
0
  free(b->eb);
276
0
  free(b);
277
0
}
278
279
void
280
comm_base_timept(struct comm_base* b, time_t** tt, struct timeval** tv)
281
0
{
282
0
  *tt = &b->eb->secs;
283
0
  *tv = &b->eb->now;
284
0
}
285
286
void
287
comm_base_dispatch(struct comm_base* b)
288
0
{
289
0
  int retval;
290
0
  retval = ub_event_base_dispatch(b->eb->base);
291
0
  if(retval < 0) {
292
0
    fatal_exit("event_dispatch returned error %d, "
293
0
      "errno is %s", retval, strerror(errno));
294
0
  }
295
0
}
296
297
void comm_base_exit(struct comm_base* b)
298
0
{
299
0
  if(ub_event_base_loopexit(b->eb->base) != 0) {
300
0
    log_err("Could not loopexit");
301
0
  }
302
0
}
303
304
void comm_base_set_slow_accept_handlers(struct comm_base* b,
305
  void (*stop_acc)(void*), void (*start_acc)(void*), void* arg)
306
0
{
307
0
  b->stop_accept = stop_acc;
308
0
  b->start_accept = start_acc;
309
0
  b->cb_arg = arg;
310
0
}
311
312
struct ub_event_base* comm_base_internal(struct comm_base* b)
313
0
{
314
0
  return b->eb->base;
315
0
}
316
317
struct ub_event* comm_point_internal(struct comm_point* c)
318
0
{
319
0
  return c->ev->ev;
320
0
}
321
322
/** see if errno for udp has to be logged or not uses globals */
323
static int
324
udp_send_errno_needs_log(struct sockaddr* addr, socklen_t addrlen)
325
0
{
326
  /* do not log transient errors (unless high verbosity) */
327
0
#if defined(ENETUNREACH) || defined(EHOSTDOWN) || defined(EHOSTUNREACH) || defined(ENETDOWN)
328
0
  switch(errno) {
329
0
#  ifdef ENETUNREACH
330
0
    case ENETUNREACH:
331
0
#  endif
332
0
#  ifdef EHOSTDOWN
333
0
    case EHOSTDOWN:
334
0
#  endif
335
0
#  ifdef EHOSTUNREACH
336
0
    case EHOSTUNREACH:
337
0
#  endif
338
0
#  ifdef ENETDOWN
339
0
    case ENETDOWN:
340
0
#  endif
341
0
    case EPERM:
342
0
    case EACCES:
343
0
      if(verbosity < VERB_ALGO)
344
0
        return 0;
345
0
      break;
346
0
    default:
347
0
      break;
348
0
  }
349
0
#endif
350
  /* permission denied is gotten for every send if the
351
   * network is disconnected (on some OS), squelch it */
352
0
  if( ((errno == EPERM)
353
0
#  ifdef EADDRNOTAVAIL
354
    /* 'Cannot assign requested address' also when disconnected */
355
0
    || (errno == EADDRNOTAVAIL)
356
0
#  endif
357
0
    ) && verbosity < VERB_ALGO)
358
0
    return 0;
359
0
#  ifdef EADDRINUSE
360
  /* If SO_REUSEADDR is set, we could try to connect to the same server
361
   * from the same source port twice. */
362
0
  if(errno == EADDRINUSE && verbosity < VERB_DETAIL)
363
0
    return 0;
364
0
#  endif
365
  /* squelch errors where people deploy AAAA ::ffff:bla for
366
   * authority servers, which we try for intranets. */
367
0
  if(errno == EINVAL && addr_is_ip4mapped(
368
0
    (struct sockaddr_storage*)addr, addrlen) &&
369
0
    verbosity < VERB_DETAIL)
370
0
    return 0;
371
  /* SO_BROADCAST sockopt can give access to 255.255.255.255,
372
   * but a dns cache does not need it. */
373
0
  if(errno == EACCES && addr_is_broadcast(
374
0
    (struct sockaddr_storage*)addr, addrlen) &&
375
0
    verbosity < VERB_DETAIL)
376
0
    return 0;
377
0
#  ifdef ENOTCONN
378
  /* For 0.0.0.0, ::0 targets it can return that socket is not connected.
379
   * This can be ignored, and the address skipped. It remains
380
   * possible to send there for completeness in configuration. */
381
0
  if(errno == ENOTCONN && addr_is_any(
382
0
    (struct sockaddr_storage*)addr, addrlen) &&
383
0
    verbosity < VERB_DETAIL)
384
0
    return 0;
385
0
#  endif
386
0
  return 1;
387
0
}
388
389
int tcp_connect_errno_needs_log(struct sockaddr* addr, socklen_t addrlen)
390
0
{
391
0
  return udp_send_errno_needs_log(addr, addrlen);
392
0
}
393
394
/* send a UDP reply */
395
int
396
comm_point_send_udp_msg(struct comm_point *c, sldns_buffer* packet,
397
  struct sockaddr* addr, socklen_t addrlen, int is_connected)
398
0
{
399
0
  ssize_t sent;
400
0
  log_assert(c->fd != -1);
401
#ifdef UNBOUND_DEBUG
402
  if(sldns_buffer_remaining(packet) == 0)
403
    log_err("error: send empty UDP packet");
404
#endif
405
0
  log_assert(addr && addrlen > 0);
406
0
  if(!is_connected) {
407
0
    sent = sendto(c->fd, (void*)sldns_buffer_begin(packet),
408
0
      sldns_buffer_remaining(packet), 0,
409
0
      addr, addrlen);
410
0
  } else {
411
0
    sent = send(c->fd, (void*)sldns_buffer_begin(packet),
412
0
      sldns_buffer_remaining(packet), 0);
413
0
  }
414
0
  if(sent == -1) {
415
    /* try again and block, waiting for IO to complete,
416
     * we want to send the answer, and we will wait for
417
     * the ethernet interface buffer to have space. */
418
0
#ifndef USE_WINSOCK
419
0
    if(errno == EAGAIN || errno == EINTR ||
420
0
#  ifdef EWOULDBLOCK
421
0
      errno == EWOULDBLOCK ||
422
0
#  endif
423
0
      errno == ENOBUFS) {
424
#else
425
    if(WSAGetLastError() == WSAEINPROGRESS ||
426
      WSAGetLastError() == WSAEINTR ||
427
      WSAGetLastError() == WSAENOBUFS ||
428
      WSAGetLastError() == WSAEWOULDBLOCK) {
429
#endif
430
0
      int retries = 0;
431
      /* if we set the fd blocking, other threads suddenly
432
       * have a blocking fd that they operate on */
433
0
      while(sent == -1 && retries < SEND_BLOCKED_MAX_RETRY && (
434
0
#ifndef USE_WINSOCK
435
0
        errno == EAGAIN || errno == EINTR ||
436
0
#  ifdef EWOULDBLOCK
437
0
        errno == EWOULDBLOCK ||
438
0
#  endif
439
0
        errno == ENOBUFS
440
#else
441
        WSAGetLastError() == WSAEINPROGRESS ||
442
        WSAGetLastError() == WSAEINTR ||
443
        WSAGetLastError() == WSAENOBUFS ||
444
        WSAGetLastError() == WSAEWOULDBLOCK
445
#endif
446
0
      )) {
447
0
#if defined(HAVE_POLL) || defined(USE_WINSOCK)
448
0
        int send_nobufs = (
449
0
#ifndef USE_WINSOCK
450
0
          errno == ENOBUFS
451
#else
452
          WSAGetLastError() == WSAENOBUFS
453
#endif
454
0
        );
455
0
        struct pollfd p;
456
0
        int pret;
457
0
        memset(&p, 0, sizeof(p));
458
0
        p.fd = c->fd;
459
0
        p.events = POLLOUT
460
0
#ifndef USE_WINSOCK
461
0
          | POLLERR | POLLHUP
462
0
#endif
463
0
          ;
464
0
#  ifndef USE_WINSOCK
465
0
        pret = poll(&p, 1, SEND_BLOCKED_WAIT_TIMEOUT);
466
#  else
467
        pret = WSAPoll(&p, 1,
468
          SEND_BLOCKED_WAIT_TIMEOUT);
469
#  endif
470
0
        if(pret == 0) {
471
          /* timer expired */
472
0
          struct comm_base* b = c->ev->base;
473
0
          if(b->eb->last_writewait_log+SLOW_LOG_TIME <=
474
0
            b->eb->secs) {
475
0
            b->eb->last_writewait_log = b->eb->secs;
476
0
            verbose(VERB_OPS, "send udp blocked "
477
0
              "for long, dropping packet.");
478
0
          }
479
0
          return 0;
480
0
        } else if(pret < 0 &&
481
0
#ifndef USE_WINSOCK
482
0
          errno != EAGAIN && errno != EINTR &&
483
0
#  ifdef EWOULDBLOCK
484
0
          errno != EWOULDBLOCK &&
485
0
#  endif
486
0
          errno != ENOMEM && errno != ENOBUFS
487
#else
488
          WSAGetLastError() != WSAEINPROGRESS &&
489
          WSAGetLastError() != WSAEINTR &&
490
          WSAGetLastError() != WSAENOBUFS &&
491
          WSAGetLastError() != WSAEWOULDBLOCK
492
#endif
493
0
          ) {
494
0
          log_err("poll udp out failed: %s",
495
0
            sock_strerror(errno));
496
0
          return 0;
497
0
        } else if((pret < 0 &&
498
0
#ifndef USE_WINSOCK
499
0
          ( errno == ENOBUFS  /* Maybe some systems */
500
0
          || errno == ENOMEM  /* Linux */
501
0
          || errno == EAGAIN)  /* Macos, solaris, openbsd */
502
#else
503
          WSAGetLastError() == WSAENOBUFS
504
#endif
505
0
          ) || (send_nobufs && retries > 0)) {
506
          /* ENOBUFS/ENOMEM/EAGAIN, and poll
507
           * returned without
508
           * a timeout. Or the retried send call
509
           * returned ENOBUFS/ENOMEM/EAGAIN.
510
           * It is good to wait a bit for the
511
           * error to clear. */
512
          /* The timeout is 20*(2^(retries+1)),
513
           * it increases exponentially, starting
514
           * at 40 msec. After 5 tries, 1240 msec
515
           * have passed in total, when poll
516
           * returned the error, and 1200 msec
517
           * when send returned the errors. */
518
0
#ifndef USE_WINSOCK
519
0
          pret = poll(NULL, 0, (SEND_BLOCKED_WAIT_TIMEOUT/10)<<(retries+1));
520
#else
521
          Sleep((SEND_BLOCKED_WAIT_TIMEOUT/10)<<(retries+1));
522
          pret = 0;
523
#endif
524
0
          if(pret < 0
525
0
#ifndef USE_WINSOCK
526
0
            && errno != EAGAIN && errno != EINTR &&
527
0
#  ifdef EWOULDBLOCK
528
0
            errno != EWOULDBLOCK &&
529
0
#  endif
530
0
            errno != ENOMEM && errno != ENOBUFS
531
#else
532
            /* Sleep does not error */
533
#endif
534
0
          ) {
535
0
            log_err("poll udp out timer failed: %s",
536
0
              sock_strerror(errno));
537
0
          }
538
0
        }
539
0
#endif /* defined(HAVE_POLL) || defined(USE_WINSOCK) */
540
0
        retries++;
541
0
        if (!is_connected) {
542
0
          sent = sendto(c->fd, (void*)sldns_buffer_begin(packet),
543
0
            sldns_buffer_remaining(packet), 0,
544
0
            addr, addrlen);
545
0
        } else {
546
0
          sent = send(c->fd, (void*)sldns_buffer_begin(packet),
547
0
            sldns_buffer_remaining(packet), 0);
548
0
        }
549
0
      }
550
0
    }
551
0
  }
552
0
  if(sent == -1) {
553
0
    if(!udp_send_errno_needs_log(addr, addrlen))
554
0
      return 0;
555
0
    if (!is_connected) {
556
0
      verbose(VERB_OPS, "sendto failed: %s", sock_strerror(errno));
557
0
    } else {
558
0
      verbose(VERB_OPS, "send failed: %s", sock_strerror(errno));
559
0
    }
560
0
    if(addr)
561
0
      log_addr(VERB_OPS, "remote address is",
562
0
        (struct sockaddr_storage*)addr, addrlen);
563
0
    return 0;
564
0
  } else if((size_t)sent != sldns_buffer_remaining(packet)) {
565
0
    log_err("sent %d in place of %d bytes",
566
0
      (int)sent, (int)sldns_buffer_remaining(packet));
567
0
    return 0;
568
0
  }
569
0
  return 1;
570
0
}
571
572
#if defined(AF_INET6) && defined(IPV6_PKTINFO) && (defined(HAVE_RECVMSG) || defined(HAVE_SENDMSG))
573
/** print debug ancillary info */
574
static void p_ancil(const char* str, struct comm_reply* r)
575
0
{
576
0
  if(r->srctype != 4 && r->srctype != 6) {
577
0
    log_info("%s: unknown srctype %d", str, r->srctype);
578
0
    return;
579
0
  }
580
581
0
  if(r->srctype == 6) {
582
0
#ifdef IPV6_PKTINFO
583
0
    char buf[1024];
584
0
    if(inet_ntop(AF_INET6, &r->pktinfo.v6info.ipi6_addr,
585
0
      buf, (socklen_t)sizeof(buf)) == 0) {
586
0
      (void)strlcpy(buf, "(inet_ntop error)", sizeof(buf));
587
0
    }
588
0
    buf[sizeof(buf)-1]=0;
589
0
    log_info("%s: %s %d", str, buf, r->pktinfo.v6info.ipi6_ifindex);
590
0
#endif
591
0
  } else if(r->srctype == 4) {
592
0
#ifdef IP_PKTINFO
593
0
    char buf1[1024], buf2[1024];
594
0
    if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_addr,
595
0
      buf1, (socklen_t)sizeof(buf1)) == 0) {
596
0
      (void)strlcpy(buf1, "(inet_ntop error)", sizeof(buf1));
597
0
    }
598
0
    buf1[sizeof(buf1)-1]=0;
599
0
#ifdef HAVE_STRUCT_IN_PKTINFO_IPI_SPEC_DST
600
0
    if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_spec_dst,
601
0
      buf2, (socklen_t)sizeof(buf2)) == 0) {
602
0
      (void)strlcpy(buf2, "(inet_ntop error)", sizeof(buf2));
603
0
    }
604
0
    buf2[sizeof(buf2)-1]=0;
605
#else
606
    buf2[0]=0;
607
#endif
608
0
    log_info("%s: %d %s %s", str, r->pktinfo.v4info.ipi_ifindex,
609
0
      buf1, buf2);
610
#elif defined(IP_RECVDSTADDR)
611
    char buf1[1024];
612
    if(inet_ntop(AF_INET, &r->pktinfo.v4addr,
613
      buf1, (socklen_t)sizeof(buf1)) == 0) {
614
      (void)strlcpy(buf1, "(inet_ntop error)", sizeof(buf1));
615
    }
616
    buf1[sizeof(buf1)-1]=0;
617
    log_info("%s: %s", str, buf1);
618
#endif /* IP_PKTINFO or PI_RECVDSTDADDR */
619
0
  }
620
0
}
621
#endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG||HAVE_SENDMSG */
622
623
/** send a UDP reply over specified interface*/
624
static int
625
comm_point_send_udp_msg_if(struct comm_point *c, sldns_buffer* packet,
626
  struct sockaddr* addr, socklen_t addrlen, struct comm_reply* r)
627
0
{
628
0
#if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_SENDMSG)
629
0
  ssize_t sent;
630
0
  struct msghdr msg;
631
0
  struct iovec iov[1];
632
0
  union {
633
0
    struct cmsghdr hdr;
634
0
    char buf[256];
635
0
  } control;
636
0
#ifndef S_SPLINT_S
637
0
  struct cmsghdr *cmsg;
638
0
#endif /* S_SPLINT_S */
639
640
0
  log_assert(c->fd != -1);
641
#ifdef UNBOUND_DEBUG
642
  if(sldns_buffer_remaining(packet) == 0)
643
    log_err("error: send empty UDP packet");
644
#endif
645
0
  log_assert(addr && addrlen > 0);
646
647
0
  msg.msg_name = addr;
648
0
  msg.msg_namelen = addrlen;
649
0
  iov[0].iov_base = sldns_buffer_begin(packet);
650
0
  iov[0].iov_len = sldns_buffer_remaining(packet);
651
0
  msg.msg_iov = iov;
652
0
  msg.msg_iovlen = 1;
653
0
  msg.msg_control = control.buf;
654
0
#ifndef S_SPLINT_S
655
0
  msg.msg_controllen = sizeof(control.buf);
656
0
#endif /* S_SPLINT_S */
657
0
  msg.msg_flags = 0;
658
659
0
#ifndef S_SPLINT_S
660
0
  cmsg = CMSG_FIRSTHDR(&msg);
661
0
  if(r->srctype == 4) {
662
0
#ifdef IP_PKTINFO
663
0
    void* cmsg_data;
664
0
    msg.msg_controllen = CMSG_SPACE(sizeof(struct in_pktinfo));
665
0
    log_assert(msg.msg_controllen <= sizeof(control.buf));
666
0
    cmsg->cmsg_level = IPPROTO_IP;
667
0
    cmsg->cmsg_type = IP_PKTINFO;
668
0
    memmove(CMSG_DATA(cmsg), &r->pktinfo.v4info,
669
0
      sizeof(struct in_pktinfo));
670
    /* unset the ifindex to not bypass the routing tables */
671
0
    cmsg_data = CMSG_DATA(cmsg);
672
0
    ((struct in_pktinfo *) cmsg_data)->ipi_ifindex = 0;
673
0
    cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo));
674
    /* zero the padding bytes inserted by the CMSG_LEN */
675
0
    if(sizeof(struct in_pktinfo) < cmsg->cmsg_len)
676
0
      memset(((uint8_t*)(CMSG_DATA(cmsg))) +
677
0
        sizeof(struct in_pktinfo), 0, cmsg->cmsg_len
678
0
        - sizeof(struct in_pktinfo));
679
#elif defined(IP_SENDSRCADDR)
680
    msg.msg_controllen = CMSG_SPACE(sizeof(struct in_addr));
681
    log_assert(msg.msg_controllen <= sizeof(control.buf));
682
    cmsg->cmsg_level = IPPROTO_IP;
683
    cmsg->cmsg_type = IP_SENDSRCADDR;
684
    memmove(CMSG_DATA(cmsg), &r->pktinfo.v4addr,
685
      sizeof(struct in_addr));
686
    cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_addr));
687
    /* zero the padding bytes inserted by the CMSG_LEN */
688
    if(sizeof(struct in_addr) < cmsg->cmsg_len)
689
      memset(((uint8_t*)(CMSG_DATA(cmsg))) +
690
        sizeof(struct in_addr), 0, cmsg->cmsg_len
691
        - sizeof(struct in_addr));
692
#else
693
    verbose(VERB_ALGO, "no IP_PKTINFO or IP_SENDSRCADDR");
694
    msg.msg_control = NULL;
695
#endif /* IP_PKTINFO or IP_SENDSRCADDR */
696
0
  } else if(r->srctype == 6) {
697
0
    void* cmsg_data;
698
0
    msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
699
0
    log_assert(msg.msg_controllen <= sizeof(control.buf));
700
0
    cmsg->cmsg_level = IPPROTO_IPV6;
701
0
    cmsg->cmsg_type = IPV6_PKTINFO;
702
0
    memmove(CMSG_DATA(cmsg), &r->pktinfo.v6info,
703
0
      sizeof(struct in6_pktinfo));
704
    /* unset the ifindex to not bypass the routing tables */
705
0
    cmsg_data = CMSG_DATA(cmsg);
706
0
    ((struct in6_pktinfo *) cmsg_data)->ipi6_ifindex = 0;
707
0
    cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
708
    /* zero the padding bytes inserted by the CMSG_LEN */
709
0
    if(sizeof(struct in6_pktinfo) < cmsg->cmsg_len)
710
0
      memset(((uint8_t*)(CMSG_DATA(cmsg))) +
711
0
        sizeof(struct in6_pktinfo), 0, cmsg->cmsg_len
712
0
        - sizeof(struct in6_pktinfo));
713
0
  } else {
714
    /* try to pass all 0 to use default route */
715
0
    msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
716
0
    log_assert(msg.msg_controllen <= sizeof(control.buf));
717
0
    cmsg->cmsg_level = IPPROTO_IPV6;
718
0
    cmsg->cmsg_type = IPV6_PKTINFO;
719
0
    memset(CMSG_DATA(cmsg), 0, sizeof(struct in6_pktinfo));
720
0
    cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
721
    /* zero the padding bytes inserted by the CMSG_LEN */
722
0
    if(sizeof(struct in6_pktinfo) < cmsg->cmsg_len)
723
0
      memset(((uint8_t*)(CMSG_DATA(cmsg))) +
724
0
        sizeof(struct in6_pktinfo), 0, cmsg->cmsg_len
725
0
        - sizeof(struct in6_pktinfo));
726
0
  }
727
0
#endif /* S_SPLINT_S */
728
0
  if(verbosity >= VERB_ALGO && r->srctype != 0)
729
0
    p_ancil("send_udp over interface", r);
730
0
  sent = sendmsg(c->fd, &msg, 0);
731
0
  if(sent == -1) {
732
    /* try again and block, waiting for IO to complete,
733
     * we want to send the answer, and we will wait for
734
     * the ethernet interface buffer to have space. */
735
0
#ifndef USE_WINSOCK
736
0
    if(errno == EAGAIN || errno == EINTR ||
737
0
#  ifdef EWOULDBLOCK
738
0
      errno == EWOULDBLOCK ||
739
0
#  endif
740
0
      errno == ENOBUFS) {
741
#else
742
    if(WSAGetLastError() == WSAEINPROGRESS ||
743
      WSAGetLastError() == WSAEINTR ||
744
      WSAGetLastError() == WSAENOBUFS ||
745
      WSAGetLastError() == WSAEWOULDBLOCK) {
746
#endif
747
0
      int retries = 0;
748
0
      while(sent == -1 && retries < SEND_BLOCKED_MAX_RETRY && (
749
0
#ifndef USE_WINSOCK
750
0
        errno == EAGAIN || errno == EINTR ||
751
0
#  ifdef EWOULDBLOCK
752
0
        errno == EWOULDBLOCK ||
753
0
#  endif
754
0
        errno == ENOBUFS
755
#else
756
        WSAGetLastError() == WSAEINPROGRESS ||
757
        WSAGetLastError() == WSAEINTR ||
758
        WSAGetLastError() == WSAENOBUFS ||
759
        WSAGetLastError() == WSAEWOULDBLOCK
760
#endif
761
0
      )) {
762
0
#if defined(HAVE_POLL) || defined(USE_WINSOCK)
763
0
        int send_nobufs = (
764
0
#ifndef USE_WINSOCK
765
0
          errno == ENOBUFS
766
#else
767
          WSAGetLastError() == WSAENOBUFS
768
#endif
769
0
        );
770
0
        struct pollfd p;
771
0
        int pret;
772
0
        memset(&p, 0, sizeof(p));
773
0
        p.fd = c->fd;
774
0
        p.events = POLLOUT
775
0
#ifndef USE_WINSOCK
776
0
          | POLLERR | POLLHUP
777
0
#endif
778
0
          ;
779
0
#  ifndef USE_WINSOCK
780
0
        pret = poll(&p, 1, SEND_BLOCKED_WAIT_TIMEOUT);
781
#  else
782
        pret = WSAPoll(&p, 1,
783
          SEND_BLOCKED_WAIT_TIMEOUT);
784
#  endif
785
0
        if(pret == 0) {
786
          /* timer expired */
787
0
          struct comm_base* b = c->ev->base;
788
0
          if(b->eb->last_writewait_log+SLOW_LOG_TIME <=
789
0
            b->eb->secs) {
790
0
            b->eb->last_writewait_log = b->eb->secs;
791
0
            verbose(VERB_OPS, "send udp blocked "
792
0
              "for long, dropping packet.");
793
0
          }
794
0
          return 0;
795
0
        } else if(pret < 0 &&
796
0
#ifndef USE_WINSOCK
797
0
          errno != EAGAIN && errno != EINTR &&
798
0
#  ifdef EWOULDBLOCK
799
0
          errno != EWOULDBLOCK &&
800
0
#  endif
801
0
          errno != ENOMEM && errno != ENOBUFS
802
#else
803
          WSAGetLastError() != WSAEINPROGRESS &&
804
          WSAGetLastError() != WSAEINTR &&
805
          WSAGetLastError() != WSAENOBUFS &&
806
          WSAGetLastError() != WSAEWOULDBLOCK
807
#endif
808
0
          ) {
809
0
          log_err("poll udp out failed: %s",
810
0
            sock_strerror(errno));
811
0
          return 0;
812
0
        } else if((pret < 0 &&
813
0
#ifndef USE_WINSOCK
814
0
          ( errno == ENOBUFS  /* Maybe some systems */
815
0
          || errno == ENOMEM  /* Linux */
816
0
          || errno == EAGAIN)  /* Macos, solaris, openbsd */
817
#else
818
          WSAGetLastError() == WSAENOBUFS
819
#endif
820
0
          ) || (send_nobufs && retries > 0)) {
821
          /* ENOBUFS/ENOMEM/EAGAIN, and poll
822
           * returned without
823
           * a timeout. Or the retried send call
824
           * returned ENOBUFS/ENOMEM/EAGAIN.
825
           * It is good to wait a bit for the
826
           * error to clear. */
827
          /* The timeout is 20*(2^(retries+1)),
828
           * it increases exponentially, starting
829
           * at 40 msec. After 5 tries, 1240 msec
830
           * have passed in total, when poll
831
           * returned the error, and 1200 msec
832
           * when send returned the errors. */
833
0
#ifndef USE_WINSOCK
834
0
          pret = poll(NULL, 0, (SEND_BLOCKED_WAIT_TIMEOUT/10)<<(retries+1));
835
#else
836
          Sleep((SEND_BLOCKED_WAIT_TIMEOUT/10)<<(retries+1));
837
          pret = 0;
838
#endif
839
0
          if(pret < 0
840
0
#ifndef USE_WINSOCK
841
0
            && errno != EAGAIN && errno != EINTR &&
842
0
#  ifdef EWOULDBLOCK
843
0
            errno != EWOULDBLOCK &&
844
0
#  endif
845
0
            errno != ENOMEM && errno != ENOBUFS
846
#else  /* USE_WINSOCK */
847
            /* Sleep does not error */
848
#endif
849
0
          ) {
850
0
            log_err("poll udp out timer failed: %s",
851
0
              sock_strerror(errno));
852
0
          }
853
0
        }
854
0
#endif /* defined(HAVE_POLL) || defined(USE_WINSOCK) */
855
0
        retries++;
856
0
        sent = sendmsg(c->fd, &msg, 0);
857
0
      }
858
0
    }
859
0
  }
860
0
  if(sent == -1) {
861
0
    if(!udp_send_errno_needs_log(addr, addrlen))
862
0
      return 0;
863
0
    verbose(VERB_OPS, "sendmsg failed: %s", strerror(errno));
864
0
    log_addr(VERB_OPS, "remote address is",
865
0
      (struct sockaddr_storage*)addr, addrlen);
866
#ifdef __NetBSD__
867
    /* netbsd 7 has IP_PKTINFO for recv but not send */
868
    if(errno == EINVAL && r->srctype == 4)
869
      log_err("sendmsg: No support for sendmsg(IP_PKTINFO). "
870
        "Please disable interface-automatic");
871
#endif
872
0
    return 0;
873
0
  } else if((size_t)sent != sldns_buffer_remaining(packet)) {
874
0
    log_err("sent %d in place of %d bytes",
875
0
      (int)sent, (int)sldns_buffer_remaining(packet));
876
0
    return 0;
877
0
  }
878
0
  return 1;
879
#else
880
  (void)c;
881
  (void)packet;
882
  (void)addr;
883
  (void)addrlen;
884
  (void)r;
885
  log_err("sendmsg: IPV6_PKTINFO not supported");
886
  return 0;
887
#endif /* AF_INET6 && IPV6_PKTINFO && HAVE_SENDMSG */
888
0
}
889
890
/** return true is UDP receive error needs to be logged */
891
static int udp_recv_needs_log(int err)
892
0
{
893
0
  switch(err) {
894
0
  case EACCES: /* some hosts send ICMP 'Permission Denied' */
895
0
#ifndef USE_WINSOCK
896
0
  case ECONNREFUSED:
897
0
#  ifdef ENETUNREACH
898
0
  case ENETUNREACH:
899
0
#  endif
900
0
#  ifdef EHOSTDOWN
901
0
  case EHOSTDOWN:
902
0
#  endif
903
0
#  ifdef EHOSTUNREACH
904
0
  case EHOSTUNREACH:
905
0
#  endif
906
0
#  ifdef ENETDOWN
907
0
  case ENETDOWN:
908
0
#  endif
909
#else /* USE_WINSOCK */
910
  case WSAECONNREFUSED:
911
  case WSAENETUNREACH:
912
  case WSAEHOSTDOWN:
913
  case WSAEHOSTUNREACH:
914
  case WSAENETDOWN:
915
#endif
916
0
    if(verbosity >= VERB_ALGO)
917
0
      return 1;
918
0
    return 0;
919
0
  default:
920
0
    break;
921
0
  }
922
0
  return 1;
923
0
}
924
925
/** Parses the PROXYv2 header from buf and updates the comm_reply struct.
926
 *  Returns 1 on success, 0 on failure. */
927
static int consume_pp2_header(struct sldns_buffer* buf, struct comm_reply* rep,
928
0
  int stream) {
929
0
  size_t size;
930
0
  struct pp2_header *header;
931
0
  int err = pp2_read_header(sldns_buffer_begin(buf),
932
0
    sldns_buffer_remaining(buf));
933
0
  if(err) return 0;
934
0
  header = (struct pp2_header*)sldns_buffer_begin(buf);
935
0
  size = PP2_HEADER_SIZE + ntohs(header->len);
936
0
  if((header->ver_cmd & 0xF) == PP2_CMD_LOCAL) {
937
    /* A connection from the proxy itself.
938
     * No need to do anything with addresses. */
939
0
    goto done;
940
0
  }
941
0
  if(header->fam_prot == PP2_UNSPEC_UNSPEC) {
942
    /* Unspecified family and protocol. This could be used for
943
     * health checks by proxies.
944
     * No need to do anything with addresses. */
945
0
    goto done;
946
0
  }
947
  /* Read the proxied address */
948
0
  switch(header->fam_prot) {
949
0
    case PP2_INET_STREAM:
950
0
    case PP2_INET_DGRAM:
951
0
      {
952
0
      struct sockaddr_in* addr =
953
0
        (struct sockaddr_in*)&rep->client_addr;
954
0
      addr->sin_family = AF_INET;
955
0
      addr->sin_addr.s_addr = header->addr.addr4.src_addr;
956
0
      addr->sin_port = header->addr.addr4.src_port;
957
0
      rep->client_addrlen = (socklen_t)sizeof(struct sockaddr_in);
958
0
      }
959
      /* Ignore the destination address; it should be us. */
960
0
      break;
961
0
    case PP2_INET6_STREAM:
962
0
    case PP2_INET6_DGRAM:
963
0
      {
964
0
      struct sockaddr_in6* addr =
965
0
        (struct sockaddr_in6*)&rep->client_addr;
966
0
      memset(addr, 0, sizeof(*addr));
967
0
      addr->sin6_family = AF_INET6;
968
0
      memcpy(&addr->sin6_addr,
969
0
        header->addr.addr6.src_addr, 16);
970
0
      addr->sin6_port = header->addr.addr6.src_port;
971
0
      rep->client_addrlen = (socklen_t)sizeof(struct sockaddr_in6);
972
0
      }
973
      /* Ignore the destination address; it should be us. */
974
0
      break;
975
0
    default:
976
0
      log_err("proxy_protocol: unsupported family and "
977
0
        "protocol 0x%x", (int)header->fam_prot);
978
0
      return 0;
979
0
  }
980
0
  rep->is_proxied = 1;
981
0
done:
982
0
  if(!stream) {
983
    /* We are reading a whole packet;
984
     * Move the rest of the data to overwrite the PROXYv2 header */
985
    /* XXX can we do better to avoid memmove? */
986
0
    memmove(header, ((char*)header)+size,
987
0
      sldns_buffer_limit(buf)-size);
988
0
    sldns_buffer_set_limit(buf, sldns_buffer_limit(buf)-size);
989
0
  }
990
0
  return 1;
991
0
}
992
993
#if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_RECVMSG)
994
void
995
comm_point_udp_ancil_callback(int fd, short event, void* arg)
996
0
{
997
0
  struct comm_reply rep;
998
0
  struct msghdr msg;
999
0
  struct iovec iov[1];
1000
0
  ssize_t rcv;
1001
0
  union {
1002
0
    struct cmsghdr hdr;
1003
0
    char buf[256];
1004
0
  } ancil;
1005
0
  int i;
1006
0
#ifndef S_SPLINT_S
1007
0
  struct cmsghdr* cmsg;
1008
0
#endif /* S_SPLINT_S */
1009
0
#ifdef HAVE_LINUX_NET_TSTAMP_H
1010
0
  struct timespec *ts;
1011
0
#endif /* HAVE_LINUX_NET_TSTAMP_H */
1012
1013
0
  rep.c = (struct comm_point*)arg;
1014
0
  log_assert(rep.c->type == comm_udp);
1015
1016
0
  if(!(event&UB_EV_READ))
1017
0
    return;
1018
0
  log_assert(rep.c && rep.c->buffer && rep.c->fd == fd);
1019
0
  ub_comm_base_now(rep.c->ev->base);
1020
0
  for(i=0; i<NUM_UDP_PER_SELECT; i++) {
1021
0
    sldns_buffer_clear(rep.c->buffer);
1022
0
    timeval_clear(&rep.c->recv_tv);
1023
0
    rep.remote_addrlen = (socklen_t)sizeof(rep.remote_addr);
1024
0
    log_assert(fd != -1);
1025
0
    log_assert(sldns_buffer_remaining(rep.c->buffer) > 0);
1026
0
    msg.msg_name = &rep.remote_addr;
1027
0
    msg.msg_namelen = (socklen_t)sizeof(rep.remote_addr);
1028
0
    iov[0].iov_base = sldns_buffer_begin(rep.c->buffer);
1029
0
    iov[0].iov_len = sldns_buffer_remaining(rep.c->buffer);
1030
0
    msg.msg_iov = iov;
1031
0
    msg.msg_iovlen = 1;
1032
0
    msg.msg_control = ancil.buf;
1033
0
#ifndef S_SPLINT_S
1034
0
    msg.msg_controllen = sizeof(ancil.buf);
1035
0
#endif /* S_SPLINT_S */
1036
0
    msg.msg_flags = 0;
1037
0
    rcv = recvmsg(fd, &msg, MSG_DONTWAIT);
1038
0
    if(rcv == -1) {
1039
0
      if(errno != EAGAIN && errno != EINTR
1040
0
        && udp_recv_needs_log(errno)) {
1041
0
        log_err("recvmsg failed: %s", strerror(errno));
1042
0
      }
1043
0
      return;
1044
0
    }
1045
0
    rep.remote_addrlen = msg.msg_namelen;
1046
0
    sldns_buffer_skip(rep.c->buffer, rcv);
1047
0
    sldns_buffer_flip(rep.c->buffer);
1048
0
    rep.srctype = 0;
1049
0
    rep.is_proxied = 0;
1050
0
#ifndef S_SPLINT_S
1051
0
    for(cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL;
1052
0
      cmsg = CMSG_NXTHDR(&msg, cmsg)) {
1053
0
      if( cmsg->cmsg_level == IPPROTO_IPV6 &&
1054
0
        cmsg->cmsg_type == IPV6_PKTINFO) {
1055
0
        rep.srctype = 6;
1056
0
        memmove(&rep.pktinfo.v6info, CMSG_DATA(cmsg),
1057
0
          sizeof(struct in6_pktinfo));
1058
0
        break;
1059
0
#ifdef IP_PKTINFO
1060
0
      } else if( cmsg->cmsg_level == IPPROTO_IP &&
1061
0
        cmsg->cmsg_type == IP_PKTINFO) {
1062
0
        rep.srctype = 4;
1063
0
        memmove(&rep.pktinfo.v4info, CMSG_DATA(cmsg),
1064
0
          sizeof(struct in_pktinfo));
1065
0
        break;
1066
#elif defined(IP_RECVDSTADDR)
1067
      } else if( cmsg->cmsg_level == IPPROTO_IP &&
1068
        cmsg->cmsg_type == IP_RECVDSTADDR) {
1069
        rep.srctype = 4;
1070
        memmove(&rep.pktinfo.v4addr, CMSG_DATA(cmsg),
1071
          sizeof(struct in_addr));
1072
        break;
1073
#endif /* IP_PKTINFO or IP_RECVDSTADDR */
1074
0
#ifdef HAVE_LINUX_NET_TSTAMP_H
1075
0
      } else if( cmsg->cmsg_level == SOL_SOCKET &&
1076
0
        cmsg->cmsg_type == SO_TIMESTAMPNS) {
1077
0
        ts = (struct timespec *)CMSG_DATA(cmsg);
1078
0
        TIMESPEC_TO_TIMEVAL(&rep.c->recv_tv, ts);
1079
0
      } else if( cmsg->cmsg_level == SOL_SOCKET &&
1080
0
        cmsg->cmsg_type == SO_TIMESTAMPING) {
1081
0
        ts = (struct timespec *)CMSG_DATA(cmsg);
1082
0
        TIMESPEC_TO_TIMEVAL(&rep.c->recv_tv, ts);
1083
0
      } else if( cmsg->cmsg_level == SOL_SOCKET &&
1084
0
        cmsg->cmsg_type == SO_TIMESTAMP) {
1085
0
        memmove(&rep.c->recv_tv, CMSG_DATA(cmsg), sizeof(struct timeval));
1086
#elif defined(SO_TIMESTAMP) && defined(SCM_TIMESTAMP)
1087
      } else if( cmsg->cmsg_level == SOL_SOCKET &&
1088
        cmsg->cmsg_type == SCM_TIMESTAMP) {
1089
        /* FreeBSD and also Linux. */
1090
        memmove(&rep.c->recv_tv, CMSG_DATA(cmsg), sizeof(struct timeval));
1091
#endif /* HAVE_LINUX_NET_TSTAMP_H */
1092
0
      }
1093
0
    }
1094
1095
0
    if(verbosity >= VERB_ALGO && rep.srctype != 0)
1096
0
      p_ancil("receive_udp on interface", &rep);
1097
0
#endif /* S_SPLINT_S */
1098
1099
0
    if(rep.c->pp2_enabled && !consume_pp2_header(rep.c->buffer,
1100
0
      &rep, 0)) {
1101
0
      log_err("proxy_protocol: could not consume PROXYv2 header");
1102
0
      return;
1103
0
    }
1104
0
    if(!rep.is_proxied) {
1105
0
      rep.client_addrlen = rep.remote_addrlen;
1106
0
      memmove(&rep.client_addr, &rep.remote_addr,
1107
0
        rep.remote_addrlen);
1108
0
    }
1109
1110
0
    fptr_ok(fptr_whitelist_comm_point(rep.c->callback));
1111
0
    if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) {
1112
      /* send back immediate reply */
1113
0
      struct sldns_buffer *buffer;
1114
#ifdef USE_DNSCRYPT
1115
      buffer = rep.c->dnscrypt_buffer;
1116
#else
1117
0
      buffer = rep.c->buffer;
1118
0
#endif
1119
0
      (void)comm_point_send_udp_msg_if(rep.c, buffer,
1120
0
        (struct sockaddr*)&rep.remote_addr,
1121
0
        rep.remote_addrlen, &rep);
1122
0
    }
1123
0
    if(!rep.c || rep.c->fd == -1) /* commpoint closed */
1124
0
      break;
1125
0
  }
1126
0
}
1127
#endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG */
1128
1129
void
1130
comm_point_udp_callback(int fd, short event, void* arg)
1131
0
{
1132
0
  struct comm_reply rep;
1133
0
  ssize_t rcv;
1134
0
  int i;
1135
0
  struct sldns_buffer *buffer;
1136
1137
0
  rep.c = (struct comm_point*)arg;
1138
0
  log_assert(rep.c->type == comm_udp);
1139
1140
0
  if(!(event&UB_EV_READ))
1141
0
    return;
1142
0
  log_assert(rep.c && rep.c->buffer && rep.c->fd == fd);
1143
0
  ub_comm_base_now(rep.c->ev->base);
1144
0
  for(i=0; i<NUM_UDP_PER_SELECT; i++) {
1145
0
    sldns_buffer_clear(rep.c->buffer);
1146
0
    rep.remote_addrlen = (socklen_t)sizeof(rep.remote_addr);
1147
0
    log_assert(fd != -1);
1148
0
    log_assert(sldns_buffer_remaining(rep.c->buffer) > 0);
1149
0
    rcv = recvfrom(fd, (void*)sldns_buffer_begin(rep.c->buffer),
1150
0
      sldns_buffer_remaining(rep.c->buffer), MSG_DONTWAIT,
1151
0
      (struct sockaddr*)&rep.remote_addr, &rep.remote_addrlen);
1152
0
    if(rcv == -1) {
1153
0
#ifndef USE_WINSOCK
1154
0
      if(errno != EAGAIN && errno != EINTR
1155
0
        && udp_recv_needs_log(errno))
1156
0
        log_err("recvfrom %d failed: %s",
1157
0
          fd, strerror(errno));
1158
#else
1159
      if(WSAGetLastError() != WSAEINPROGRESS &&
1160
        WSAGetLastError() != WSAECONNRESET &&
1161
        WSAGetLastError()!= WSAEWOULDBLOCK &&
1162
        udp_recv_needs_log(WSAGetLastError()))
1163
        log_err("recvfrom failed: %s",
1164
          wsa_strerror(WSAGetLastError()));
1165
#endif
1166
0
      return;
1167
0
    }
1168
0
    sldns_buffer_skip(rep.c->buffer, rcv);
1169
0
    sldns_buffer_flip(rep.c->buffer);
1170
0
    rep.srctype = 0;
1171
0
    rep.is_proxied = 0;
1172
1173
0
    if(rep.c->pp2_enabled && !consume_pp2_header(rep.c->buffer,
1174
0
      &rep, 0)) {
1175
0
      log_err("proxy_protocol: could not consume PROXYv2 header");
1176
0
      return;
1177
0
    }
1178
0
    if(!rep.is_proxied) {
1179
0
      rep.client_addrlen = rep.remote_addrlen;
1180
0
      memmove(&rep.client_addr, &rep.remote_addr,
1181
0
        rep.remote_addrlen);
1182
0
    }
1183
1184
0
    fptr_ok(fptr_whitelist_comm_point(rep.c->callback));
1185
0
    if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) {
1186
      /* send back immediate reply */
1187
#ifdef USE_DNSCRYPT
1188
      buffer = rep.c->dnscrypt_buffer;
1189
#else
1190
0
      buffer = rep.c->buffer;
1191
0
#endif
1192
0
      (void)comm_point_send_udp_msg(rep.c, buffer,
1193
0
        (struct sockaddr*)&rep.remote_addr,
1194
0
        rep.remote_addrlen, 0);
1195
0
    }
1196
0
    if(!rep.c || rep.c->fd != fd) /* commpoint closed to -1 or reused for
1197
    another UDP port. Note rep.c cannot be reused with TCP fd. */
1198
0
      break;
1199
0
  }
1200
0
}
1201
1202
#ifdef HAVE_NGTCP2
1203
void
1204
doq_pkt_addr_init(struct doq_pkt_addr* paddr)
1205
{
1206
  paddr->addrlen = (socklen_t)sizeof(paddr->addr);
1207
  paddr->localaddrlen = (socklen_t)sizeof(paddr->localaddr);
1208
  paddr->ifindex = 0;
1209
}
1210
1211
/** set the ecn on the transmission */
1212
static void
1213
doq_set_ecn(int fd, int family, uint32_t ecn)
1214
{
1215
  unsigned int val = ecn;
1216
  if(family == AF_INET6) {
1217
    if(setsockopt(fd, IPPROTO_IPV6, IPV6_TCLASS, &val,
1218
      (socklen_t)sizeof(val)) == -1) {
1219
      log_err("setsockopt(.. IPV6_TCLASS ..): %s",
1220
        strerror(errno));
1221
    }
1222
    return;
1223
  }
1224
  if(setsockopt(fd, IPPROTO_IP, IP_TOS, &val,
1225
    (socklen_t)sizeof(val)) == -1) {
1226
    log_err("setsockopt(.. IP_TOS ..): %s",
1227
      strerror(errno));
1228
  }
1229
}
1230
1231
/** set the local address in the control ancillary data */
1232
static void
1233
doq_set_localaddr_cmsg(struct msghdr* msg, size_t control_size,
1234
  struct doq_addr_storage* localaddr, socklen_t localaddrlen,
1235
  int ifindex)
1236
{
1237
#ifndef S_SPLINT_S
1238
  struct cmsghdr* cmsg;
1239
#endif /* S_SPLINT_S */
1240
#ifndef S_SPLINT_S
1241
  cmsg = CMSG_FIRSTHDR(msg);
1242
  if(localaddr->sockaddr.in.sin_family == AF_INET) {
1243
#ifdef IP_PKTINFO
1244
    struct sockaddr_in* sa = (struct sockaddr_in*)localaddr;
1245
    struct in_pktinfo v4info;
1246
    log_assert(localaddrlen >= sizeof(struct sockaddr_in));
1247
    msg->msg_controllen = CMSG_SPACE(sizeof(struct in_pktinfo));
1248
    memset(msg->msg_control, 0, msg->msg_controllen);
1249
    log_assert(msg->msg_controllen <= control_size);
1250
    cmsg->cmsg_level = IPPROTO_IP;
1251
    cmsg->cmsg_type = IP_PKTINFO;
1252
    memset(&v4info, 0, sizeof(v4info));
1253
#  ifdef HAVE_STRUCT_IN_PKTINFO_IPI_SPEC_DST
1254
    memmove(&v4info.ipi_spec_dst, &sa->sin_addr,
1255
      sizeof(struct in_addr));
1256
#  else
1257
    memmove(&v4info.ipi_addr, &sa->sin_addr,
1258
      sizeof(struct in_addr));
1259
#  endif
1260
    v4info.ipi_ifindex = ifindex;
1261
    memmove(CMSG_DATA(cmsg), &v4info, sizeof(struct in_pktinfo));
1262
    cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo));
1263
#elif defined(IP_SENDSRCADDR)
1264
    struct sockaddr_in* sa= (struct sockaddr_in*)localaddr;
1265
    log_assert(localaddrlen >= sizeof(struct sockaddr_in));
1266
    msg->msg_controllen = CMSG_SPACE(sizeof(struct in_addr));
1267
    memset(msg->msg_control, 0, msg->msg_controllen);
1268
    log_assert(msg->msg_controllen <= control_size);
1269
    cmsg->cmsg_level = IPPROTO_IP;
1270
    cmsg->cmsg_type = IP_SENDSRCADDR;
1271
    memmove(CMSG_DATA(cmsg),  &sa->sin_addr,
1272
      sizeof(struct in_addr));
1273
    cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_addr));
1274
#endif
1275
  } else {
1276
    struct sockaddr_in6* sa6 = (struct sockaddr_in6*)localaddr;
1277
    struct in6_pktinfo v6info;
1278
    log_assert(localaddrlen >= sizeof(struct sockaddr_in6));
1279
    msg->msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
1280
    memset(msg->msg_control, 0, msg->msg_controllen);
1281
    log_assert(msg->msg_controllen <= control_size);
1282
    cmsg->cmsg_level = IPPROTO_IPV6;
1283
    cmsg->cmsg_type = IPV6_PKTINFO;
1284
    memset(&v6info, 0, sizeof(v6info));
1285
    memmove(&v6info.ipi6_addr, &sa6->sin6_addr,
1286
      sizeof(struct in6_addr));
1287
    v6info.ipi6_ifindex = ifindex;
1288
    memmove(CMSG_DATA(cmsg), &v6info, sizeof(struct in6_pktinfo));
1289
    cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
1290
  }
1291
#endif /* S_SPLINT_S */
1292
  /* Ignore unused variables, if no assertions are compiled. */
1293
  (void)localaddrlen;
1294
  (void)control_size;
1295
}
1296
1297
/** write address and port into strings */
1298
static int
1299
doq_print_addr_port(struct doq_addr_storage* addr, socklen_t addrlen,
1300
  char* host, size_t hostlen, char* port, size_t portlen)
1301
{
1302
  if(addr->sockaddr.in.sin_family == AF_INET) {
1303
    struct sockaddr_in* sa = (struct sockaddr_in*)addr;
1304
    log_assert(addrlen >= sizeof(*sa));
1305
    if(inet_ntop(sa->sin_family, &sa->sin_addr, host,
1306
      (socklen_t)hostlen) == 0) {
1307
      log_hex("inet_ntop error: address", &sa->sin_addr,
1308
        sizeof(sa->sin_addr));
1309
      return 0;
1310
    }
1311
    snprintf(port, portlen, "%u", (unsigned)ntohs(sa->sin_port));
1312
  } else if(addr->sockaddr.in.sin_family == AF_INET6) {
1313
    struct sockaddr_in6* sa6 = (struct sockaddr_in6*)addr;
1314
    log_assert(addrlen >= sizeof(*sa6));
1315
    if(inet_ntop(sa6->sin6_family, &sa6->sin6_addr, host,
1316
      (socklen_t)hostlen) == 0) {
1317
      log_hex("inet_ntop error: address", &sa6->sin6_addr,
1318
        sizeof(sa6->sin6_addr));
1319
      return 0;
1320
    }
1321
    snprintf(port, portlen, "%u", (unsigned)ntohs(sa6->sin6_port));
1322
  }
1323
  return 1;
1324
}
1325
1326
/** doq store the blocked packet when write has blocked */
1327
static void
1328
doq_store_blocked_pkt(struct comm_point* c, struct doq_pkt_addr* paddr,
1329
  uint32_t ecn)
1330
{
1331
  if(c->doq_socket->have_blocked_pkt)
1332
    return; /* should not happen that we write when there is
1333
    already a blocked write, but if so, drop it. */
1334
  if(sldns_buffer_limit(c->doq_socket->pkt_buf) >
1335
    sldns_buffer_capacity(c->doq_socket->blocked_pkt))
1336
    return; /* impossibly large, drop packet. impossible because
1337
    pkt_buf and blocked_pkt are the same size. */
1338
  c->doq_socket->have_blocked_pkt = 1;
1339
  c->doq_socket->blocked_pkt_pi.ecn = ecn;
1340
  memcpy(c->doq_socket->blocked_paddr, paddr,
1341
    sizeof(*c->doq_socket->blocked_paddr));
1342
  sldns_buffer_clear(c->doq_socket->blocked_pkt);
1343
  sldns_buffer_write(c->doq_socket->blocked_pkt,
1344
    sldns_buffer_begin(c->doq_socket->pkt_buf),
1345
    sldns_buffer_limit(c->doq_socket->pkt_buf));
1346
  sldns_buffer_flip(c->doq_socket->blocked_pkt);
1347
}
1348
1349
void
1350
doq_send_pkt(struct comm_point* c, struct doq_pkt_addr* paddr, uint32_t ecn)
1351
{
1352
  struct msghdr msg;
1353
  struct iovec iov[1];
1354
  union {
1355
    struct cmsghdr hdr;
1356
    char buf[256];
1357
  } control;
1358
  ssize_t ret;
1359
  iov[0].iov_base = sldns_buffer_begin(c->doq_socket->pkt_buf);
1360
  iov[0].iov_len = sldns_buffer_limit(c->doq_socket->pkt_buf);
1361
  memset(&msg, 0, sizeof(msg));
1362
  msg.msg_name = (void*)&paddr->addr;
1363
  msg.msg_namelen = paddr->addrlen;
1364
  msg.msg_iov = iov;
1365
  msg.msg_iovlen = 1;
1366
  msg.msg_control = control.buf;
1367
#ifndef S_SPLINT_S
1368
  msg.msg_controllen = sizeof(control.buf);
1369
#endif /* S_SPLINT_S */
1370
  msg.msg_flags = 0;
1371
1372
  doq_set_localaddr_cmsg(&msg, sizeof(control.buf), &paddr->localaddr,
1373
    paddr->localaddrlen, paddr->ifindex);
1374
  doq_set_ecn(c->fd, paddr->addr.sockaddr.in.sin_family, ecn);
1375
1376
  for(;;) {
1377
    ret = sendmsg(c->fd, &msg, MSG_DONTWAIT);
1378
    if(ret == -1 && errno == EINTR)
1379
      continue;
1380
    break;
1381
  }
1382
  if(ret == -1) {
1383
#ifndef USE_WINSOCK
1384
    if(errno == EAGAIN ||
1385
#  ifdef EWOULDBLOCK
1386
      errno == EWOULDBLOCK ||
1387
#  endif
1388
      errno == ENOBUFS)
1389
#else
1390
    if(WSAGetLastError() == WSAEINPROGRESS ||
1391
      WSAGetLastError() == WSAENOBUFS ||
1392
      WSAGetLastError() == WSAEWOULDBLOCK)
1393
#endif
1394
    {
1395
      /* udp send has blocked */
1396
      doq_store_blocked_pkt(c, paddr, ecn);
1397
      return;
1398
    }
1399
    if(!udp_send_errno_needs_log((void*)&paddr->addr,
1400
      paddr->addrlen))
1401
      return;
1402
    if(verbosity >= VERB_OPS) {
1403
      char host[256], port[32];
1404
      if(doq_print_addr_port(&paddr->addr, paddr->addrlen,
1405
        host, sizeof(host), port, sizeof(port))) {
1406
        verbose(VERB_OPS, "doq sendmsg to %s %s "
1407
          "failed: %s", host, port,
1408
          strerror(errno));
1409
      } else {
1410
        verbose(VERB_OPS, "doq sendmsg failed: %s",
1411
          strerror(errno));
1412
      }
1413
    }
1414
    return;
1415
  } else if(ret != (ssize_t)sldns_buffer_limit(c->doq_socket->pkt_buf)) {
1416
    char host[256], port[32];
1417
    if(doq_print_addr_port(&paddr->addr, paddr->addrlen, host,
1418
      sizeof(host), port, sizeof(port))) {
1419
      log_err("doq sendmsg to %s %s failed: "
1420
        "sent %d in place of %d bytes", 
1421
        host, port, (int)ret,
1422
        (int)sldns_buffer_limit(c->doq_socket->pkt_buf));
1423
    } else {
1424
      log_err("doq sendmsg failed: "
1425
        "sent %d in place of %d bytes", 
1426
        (int)ret, (int)sldns_buffer_limit(c->doq_socket->pkt_buf));
1427
    }
1428
    return;
1429
  }
1430
}
1431
1432
/** fetch port number */
1433
static int
1434
doq_sockaddr_get_port(struct doq_addr_storage* addr)
1435
{
1436
  if(addr->sockaddr.in.sin_family == AF_INET) {
1437
    struct sockaddr_in* sa = (struct sockaddr_in*)addr;
1438
    return ntohs(sa->sin_port);
1439
  } else if(addr->sockaddr.in.sin_family == AF_INET6) {
1440
    struct sockaddr_in6* sa6 = (struct sockaddr_in6*)addr;
1441
    return ntohs(sa6->sin6_port);
1442
  }
1443
  return 0;
1444
}
1445
1446
/** get local address from ancillary data headers */
1447
static int
1448
doq_get_localaddr_cmsg(struct comm_point* c, struct doq_pkt_addr* paddr,
1449
  int* pkt_continue, struct msghdr* msg)
1450
{
1451
#ifndef S_SPLINT_S
1452
  struct cmsghdr* cmsg;
1453
#endif /* S_SPLINT_S */
1454
1455
  memset(&paddr->localaddr, 0, sizeof(paddr->localaddr));
1456
#ifndef S_SPLINT_S
1457
  for(cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL;
1458
    cmsg = CMSG_NXTHDR(msg, cmsg)) {
1459
    if( cmsg->cmsg_level == IPPROTO_IPV6 &&
1460
      cmsg->cmsg_type == IPV6_PKTINFO) {
1461
      struct in6_pktinfo* v6info =
1462
        (struct in6_pktinfo*)CMSG_DATA(cmsg);
1463
      struct sockaddr_in6* sa= (struct sockaddr_in6*)
1464
        &paddr->localaddr;
1465
      struct sockaddr_in6* rema = (struct sockaddr_in6*)
1466
        &paddr->addr;
1467
      if(rema->sin6_family != AF_INET6) {
1468
        log_err("doq cmsg family mismatch cmsg is ip6");
1469
        *pkt_continue = 1;
1470
        return 0;
1471
      }
1472
      sa->sin6_family = AF_INET6;
1473
      sa->sin6_port = htons(doq_sockaddr_get_port(
1474
        (void*)c->socket->addr));
1475
      paddr->ifindex = v6info->ipi6_ifindex;
1476
      memmove(&sa->sin6_addr, &v6info->ipi6_addr,
1477
        sizeof(struct in6_addr));
1478
      paddr->localaddrlen = sizeof(struct sockaddr_in6);
1479
      break;
1480
#ifdef IP_PKTINFO
1481
    } else if( cmsg->cmsg_level == IPPROTO_IP &&
1482
      cmsg->cmsg_type == IP_PKTINFO) {
1483
      struct in_pktinfo* v4info =
1484
        (struct in_pktinfo*)CMSG_DATA(cmsg);
1485
      struct sockaddr_in* sa= (struct sockaddr_in*)
1486
        &paddr->localaddr;
1487
      struct sockaddr_in* rema = (struct sockaddr_in*)
1488
        &paddr->addr;
1489
      if(rema->sin_family != AF_INET) {
1490
        log_err("doq cmsg family mismatch cmsg is ip4");
1491
        *pkt_continue = 1;
1492
        return 0;
1493
      }
1494
      sa->sin_family = AF_INET;
1495
      sa->sin_port = htons(doq_sockaddr_get_port(
1496
        (void*)c->socket->addr));
1497
      paddr->ifindex = v4info->ipi_ifindex;
1498
      memmove(&sa->sin_addr, &v4info->ipi_addr,
1499
        sizeof(struct in_addr));
1500
      paddr->localaddrlen = sizeof(struct sockaddr_in);
1501
      break;
1502
#elif defined(IP_RECVDSTADDR)
1503
    } else if( cmsg->cmsg_level == IPPROTO_IP &&
1504
      cmsg->cmsg_type == IP_RECVDSTADDR) {
1505
      struct sockaddr_in* sa= (struct sockaddr_in*)
1506
        &paddr->localaddr;
1507
      struct sockaddr_in* rema = (struct sockaddr_in*)
1508
        &paddr->addr;
1509
      if(rema->sin_family != AF_INET) {
1510
        log_err("doq cmsg family mismatch cmsg is ip4");
1511
        *pkt_continue = 1;
1512
        return 0;
1513
      }
1514
      sa->sin_family = AF_INET;
1515
      sa->sin_port = htons(doq_sockaddr_get_port(
1516
        (void*)c->socket->addr));
1517
      paddr->ifindex = 0;
1518
      memmove(&sa.sin_addr, CMSG_DATA(cmsg),
1519
        sizeof(struct in_addr));
1520
      paddr->localaddrlen = sizeof(struct sockaddr_in);
1521
      break;
1522
#endif /* IP_PKTINFO or IP_RECVDSTADDR */
1523
    }
1524
  }
1525
#endif /* S_SPLINT_S */
1526
1527
return 1;
1528
}
1529
1530
/** get packet ecn information */
1531
static uint32_t
1532
msghdr_get_ecn(struct msghdr* msg, int family)
1533
{
1534
#ifndef S_SPLINT_S
1535
  struct cmsghdr* cmsg;
1536
  if(family == AF_INET6) {
1537
    for(cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL;
1538
      cmsg = CMSG_NXTHDR(msg, cmsg)) {
1539
      if(cmsg->cmsg_level == IPPROTO_IPV6 &&
1540
        cmsg->cmsg_type == IPV6_TCLASS &&
1541
        cmsg->cmsg_len != 0) {
1542
        uint8_t* ecn = (uint8_t*)CMSG_DATA(cmsg);
1543
        return *ecn;
1544
      }
1545
    }
1546
    return 0;
1547
  }
1548
  for(cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL;
1549
    cmsg = CMSG_NXTHDR(msg, cmsg)) {
1550
    if(cmsg->cmsg_level == IPPROTO_IP &&
1551
      cmsg->cmsg_type == IP_TOS &&
1552
      cmsg->cmsg_len != 0) {
1553
      uint8_t* ecn = (uint8_t*)CMSG_DATA(cmsg);
1554
      return *ecn;
1555
    }
1556
  }
1557
#endif /* S_SPLINT_S */
1558
  return 0;
1559
}
1560
1561
/** receive packet for DoQ on UDP. get ancillary data for addresses,
1562
 * return false if failed and the callback can stop receiving UDP packets
1563
 * if pkt_continue is false. */
1564
static int
1565
doq_recv(struct comm_point* c, struct doq_pkt_addr* paddr, int* pkt_continue,
1566
  struct ngtcp2_pkt_info* pi)
1567
{
1568
  struct msghdr msg;
1569
  struct iovec iov[1];
1570
  ssize_t rcv;
1571
  union {
1572
    struct cmsghdr hdr;
1573
    char buf[256];
1574
  } ancil;
1575
1576
  msg.msg_name = &paddr->addr;
1577
  msg.msg_namelen = (socklen_t)sizeof(paddr->addr);
1578
  iov[0].iov_base = sldns_buffer_begin(c->doq_socket->pkt_buf);
1579
  iov[0].iov_len = sldns_buffer_remaining(c->doq_socket->pkt_buf);
1580
  msg.msg_iov = iov;
1581
  msg.msg_iovlen = 1;
1582
  msg.msg_control = ancil.buf;
1583
#ifndef S_SPLINT_S
1584
  msg.msg_controllen = sizeof(ancil.buf);
1585
#endif /* S_SPLINT_S */
1586
  msg.msg_flags = 0;
1587
1588
  rcv = recvmsg(c->fd, &msg, MSG_DONTWAIT);
1589
  if(rcv == -1) {
1590
    if(errno != EAGAIN && errno != EINTR
1591
      && udp_recv_needs_log(errno)) {
1592
      log_err("recvmsg failed for doq: %s", strerror(errno));
1593
    }
1594
    *pkt_continue = 0;
1595
    return 0;
1596
  }
1597
1598
  paddr->addrlen = msg.msg_namelen;
1599
  sldns_buffer_skip(c->doq_socket->pkt_buf, rcv);
1600
  sldns_buffer_flip(c->doq_socket->pkt_buf);
1601
  if(!doq_get_localaddr_cmsg(c, paddr, pkt_continue, &msg))
1602
    return 0;
1603
  pi->ecn = msghdr_get_ecn(&msg, paddr->addr.sockaddr.in.sin_family);
1604
  return 1;
1605
}
1606
1607
/** send the version negotiation for doq. scid and dcid are flipped around
1608
 * to send back to the client. */
1609
static void
1610
doq_send_version_negotiation(struct comm_point* c, struct doq_pkt_addr* paddr,
1611
  const uint8_t* dcid, size_t dcidlen, const uint8_t* scid,
1612
  size_t scidlen)
1613
{
1614
  uint32_t versions[2];
1615
  size_t versions_len = 0;
1616
  ngtcp2_ssize ret;
1617
  uint8_t unused_random;
1618
1619
  /* fill the array with supported versions */
1620
  versions[0] = NGTCP2_PROTO_VER_V1;
1621
  versions_len = 1;
1622
  unused_random = ub_random_max(c->doq_socket->rnd, 256);
1623
  sldns_buffer_clear(c->doq_socket->pkt_buf);
1624
  ret = ngtcp2_pkt_write_version_negotiation(
1625
    sldns_buffer_begin(c->doq_socket->pkt_buf),
1626
    sldns_buffer_capacity(c->doq_socket->pkt_buf), unused_random,
1627
    dcid, dcidlen, scid, scidlen, versions, versions_len);
1628
  if(ret < 0) {
1629
    log_err("ngtcp2_pkt_write_version_negotiation failed: %s",
1630
      ngtcp2_strerror(ret));
1631
    return;
1632
  }
1633
  sldns_buffer_set_position(c->doq_socket->pkt_buf, ret);
1634
  sldns_buffer_flip(c->doq_socket->pkt_buf);
1635
  doq_send_pkt(c, paddr, 0);
1636
}
1637
1638
/** Find the doq_conn object by remote address and dcid */
1639
static struct doq_conn*
1640
doq_conn_find(struct doq_table* table, struct doq_addr_storage* addr,
1641
  socklen_t addrlen, struct doq_addr_storage* localaddr,
1642
  socklen_t localaddrlen, int ifindex, const uint8_t* dcid,
1643
  size_t dcidlen)
1644
{
1645
  struct rbnode_type* node;
1646
  struct doq_conn key;
1647
  memset(&key.node, 0, sizeof(key.node));
1648
  key.node.key = &key;
1649
  memmove(&key.key.paddr.addr, addr, addrlen);
1650
  key.key.paddr.addrlen = addrlen;
1651
  memmove(&key.key.paddr.localaddr, localaddr, localaddrlen);
1652
  key.key.paddr.localaddrlen = localaddrlen;
1653
  key.key.paddr.ifindex = ifindex;
1654
  key.key.dcid = (void*)dcid;
1655
  key.key.dcidlen = dcidlen;
1656
  node = rbtree_search(table->conn_tree, &key);
1657
  if(node)
1658
    return (struct doq_conn*)node->key;
1659
  return NULL;
1660
}
1661
1662
/** find the doq_con by the connection id */
1663
static struct doq_conn*
1664
doq_conn_find_by_id(struct doq_table* table, const uint8_t* dcid,
1665
  size_t dcidlen)
1666
{
1667
  struct doq_conid* conid;
1668
  lock_rw_rdlock(&table->conid_lock);
1669
  conid = doq_conid_find(table, dcid, dcidlen);
1670
  if(conid) {
1671
    /* make a copy of the key */
1672
    struct doq_conn* conn;
1673
    struct doq_conn_key key = conid->key;
1674
    uint8_t cid[NGTCP2_MAX_CIDLEN];
1675
    log_assert(conid->key.dcidlen <= NGTCP2_MAX_CIDLEN);
1676
    memcpy(cid, conid->key.dcid, conid->key.dcidlen);
1677
    key.dcid = cid;
1678
    lock_rw_unlock(&table->conid_lock);
1679
1680
    /* now that the conid lock is released, look up the conn */
1681
    lock_rw_rdlock(&table->lock);
1682
    conn = doq_conn_find(table, &key.paddr.addr,
1683
      key.paddr.addrlen, &key.paddr.localaddr,
1684
      key.paddr.localaddrlen, key.paddr.ifindex, key.dcid,
1685
      key.dcidlen);
1686
    if(!conn) {
1687
      /* The connection got deleted between the conid lookup
1688
       * and the connection lock grab, it no longer exists,
1689
       * so return null. */
1690
      lock_rw_unlock(&table->lock);
1691
      return NULL;
1692
    }
1693
    lock_basic_lock(&conn->lock);
1694
    if(conn->is_deleted) {
1695
      lock_rw_unlock(&table->lock);
1696
      lock_basic_unlock(&conn->lock);
1697
      return NULL;
1698
    }
1699
    lock_rw_unlock(&table->lock);
1700
    return conn;
1701
  }
1702
  lock_rw_unlock(&table->conid_lock);
1703
  return NULL;
1704
}
1705
1706
/** Find the doq_conn, by addr or by connection id */
1707
static struct doq_conn*
1708
doq_conn_find_by_addr_or_cid(struct doq_table* table,
1709
  struct doq_pkt_addr* paddr, const uint8_t* dcid, size_t dcidlen)
1710
{
1711
  struct doq_conn* conn;
1712
  lock_rw_rdlock(&table->lock);
1713
  conn = doq_conn_find(table, &paddr->addr, paddr->addrlen,
1714
    &paddr->localaddr, paddr->localaddrlen, paddr->ifindex,
1715
    dcid, dcidlen);
1716
  if(conn && conn->is_deleted) {
1717
    conn = NULL;
1718
  }
1719
  if(conn) {
1720
    lock_basic_lock(&conn->lock);
1721
    lock_rw_unlock(&table->lock);
1722
    verbose(VERB_ALGO, "doq: found connection by address, dcid");
1723
  } else {
1724
    lock_rw_unlock(&table->lock);
1725
    conn = doq_conn_find_by_id(table, dcid, dcidlen);
1726
    if(conn) {
1727
      verbose(VERB_ALGO, "doq: found connection by dcid");
1728
    }
1729
  }
1730
  return conn;
1731
}
1732
1733
/** decode doq packet header, false on handled or failure, true to continue
1734
 * to process the packet */
1735
static int
1736
doq_decode_pkt_header_negotiate(struct comm_point* c,
1737
  struct doq_pkt_addr* paddr, struct doq_conn** conn)
1738
{
1739
#ifdef HAVE_STRUCT_NGTCP2_VERSION_CID
1740
  struct ngtcp2_version_cid vc;
1741
#else
1742
  uint32_t version;
1743
  const uint8_t *dcid, *scid;
1744
  size_t dcidlen, scidlen;
1745
#endif
1746
  int rv;
1747
1748
#ifdef HAVE_STRUCT_NGTCP2_VERSION_CID
1749
  rv = ngtcp2_pkt_decode_version_cid(&vc,
1750
    sldns_buffer_begin(c->doq_socket->pkt_buf),
1751
    sldns_buffer_limit(c->doq_socket->pkt_buf),
1752
    c->doq_socket->sv_scidlen);
1753
#else
1754
  rv = ngtcp2_pkt_decode_version_cid(&version, &dcid, &dcidlen,
1755
    &scid, &scidlen, sldns_buffer_begin(c->doq_socket->pkt_buf),
1756
    sldns_buffer_limit(c->doq_socket->pkt_buf), c->doq_socket->sv_scidlen);
1757
#endif
1758
  if(rv != 0) {
1759
    if(rv == NGTCP2_ERR_VERSION_NEGOTIATION) {
1760
      /* send the version negotiation */
1761
      doq_send_version_negotiation(c, paddr,
1762
#ifdef HAVE_STRUCT_NGTCP2_VERSION_CID
1763
      vc.scid, vc.scidlen, vc.dcid, vc.dcidlen
1764
#else
1765
      scid, scidlen, dcid, dcidlen
1766
#endif
1767
      );
1768
      return 0;
1769
    }
1770
    verbose(VERB_ALGO, "doq: could not decode version "
1771
      "and CID from QUIC packet header: %s",
1772
      ngtcp2_strerror(rv));
1773
    return 0;
1774
  }
1775
1776
  if(verbosity >= VERB_ALGO) {
1777
    verbose(VERB_ALGO, "ngtcp2_pkt_decode_version_cid packet has "
1778
      "QUIC protocol version %u", (unsigned)
1779
#ifdef HAVE_STRUCT_NGTCP2_VERSION_CID
1780
      vc.
1781
#endif
1782
      version
1783
      );
1784
    log_hex("dcid",
1785
#ifdef HAVE_STRUCT_NGTCP2_VERSION_CID
1786
      (void*)vc.dcid, vc.dcidlen
1787
#else
1788
      (void*)dcid, dcidlen
1789
#endif
1790
      );
1791
    log_hex("scid",
1792
#ifdef HAVE_STRUCT_NGTCP2_VERSION_CID
1793
      (void*)vc.scid, vc.scidlen
1794
#else
1795
      (void*)scid, scidlen
1796
#endif
1797
      );
1798
  }
1799
  *conn = doq_conn_find_by_addr_or_cid(c->doq_socket->table, paddr,
1800
#ifdef HAVE_STRUCT_NGTCP2_VERSION_CID
1801
    vc.dcid, vc.dcidlen
1802
#else
1803
    dcid, dcidlen
1804
#endif
1805
    );
1806
  if(*conn)
1807
    (*conn)->doq_socket = c->doq_socket;
1808
  return 1;
1809
}
1810
1811
/** fill cid structure with random data */
1812
static void doq_cid_randfill(struct ngtcp2_cid* cid, size_t datalen,
1813
  struct ub_randstate* rnd)
1814
{
1815
  uint8_t buf[32];
1816
  if(datalen > sizeof(buf))
1817
    datalen = sizeof(buf);
1818
  doq_fill_rand(rnd, buf, datalen);
1819
  ngtcp2_cid_init(cid, buf, datalen);
1820
}
1821
1822
/** send retry packet for doq connection. */
1823
static void
1824
doq_send_retry(struct comm_point* c, struct doq_pkt_addr* paddr,
1825
  struct ngtcp2_pkt_hd* hd)
1826
{
1827
  char host[256], port[32];
1828
  struct ngtcp2_cid scid;
1829
  uint8_t token[NGTCP2_CRYPTO_MAX_RETRY_TOKENLEN];
1830
  ngtcp2_tstamp ts;
1831
  ngtcp2_ssize tokenlen, ret;
1832
1833
  if(!doq_print_addr_port(&paddr->addr, paddr->addrlen, host,
1834
    sizeof(host), port, sizeof(port))) {
1835
    log_err("doq_send_retry failed");
1836
    return;
1837
  }
1838
  verbose(VERB_ALGO, "doq: sending retry packet to %s %s", host, port);
1839
1840
  /* the server chosen source connection ID */
1841
  scid.datalen = c->doq_socket->sv_scidlen;
1842
  doq_cid_randfill(&scid, scid.datalen, c->doq_socket->rnd);
1843
1844
  ts = doq_get_timestamp_nanosec();
1845
1846
  tokenlen = ngtcp2_crypto_generate_retry_token(token,
1847
    c->doq_socket->static_secret, c->doq_socket->static_secret_len,
1848
    hd->version, (void*)&paddr->addr, paddr->addrlen, &scid,
1849
    &hd->dcid, ts);
1850
  if(tokenlen < 0) {
1851
    log_err("ngtcp2_crypto_generate_retry_token failed: %s",
1852
      ngtcp2_strerror(tokenlen));
1853
    return;
1854
  }
1855
1856
  sldns_buffer_clear(c->doq_socket->pkt_buf);
1857
  ret = ngtcp2_crypto_write_retry(sldns_buffer_begin(c->doq_socket->pkt_buf),
1858
    sldns_buffer_capacity(c->doq_socket->pkt_buf), hd->version,
1859
    &hd->scid, &scid, &hd->dcid, token, tokenlen);
1860
  if(ret < 0) {
1861
    log_err("ngtcp2_crypto_write_retry failed: %s",
1862
      ngtcp2_strerror(ret));
1863
    return;
1864
  }
1865
  sldns_buffer_set_position(c->doq_socket->pkt_buf, ret);
1866
  sldns_buffer_flip(c->doq_socket->pkt_buf);
1867
  doq_send_pkt(c, paddr, 0);
1868
}
1869
1870
/** doq send stateless connection close */
1871
static void
1872
doq_send_stateless_connection_close(struct comm_point* c,
1873
  struct doq_pkt_addr* paddr, struct ngtcp2_pkt_hd* hd,
1874
  uint64_t error_code)
1875
{
1876
  ngtcp2_ssize ret;
1877
  sldns_buffer_clear(c->doq_socket->pkt_buf);
1878
  ret = ngtcp2_crypto_write_connection_close(
1879
    sldns_buffer_begin(c->doq_socket->pkt_buf),
1880
    sldns_buffer_capacity(c->doq_socket->pkt_buf), hd->version, &hd->scid,
1881
    &hd->dcid, error_code, NULL, 0);
1882
  if(ret < 0) {
1883
    log_err("ngtcp2_crypto_write_connection_close failed: %s",
1884
      ngtcp2_strerror(ret));
1885
    return;
1886
  }
1887
  sldns_buffer_set_position(c->doq_socket->pkt_buf, ret);
1888
  sldns_buffer_flip(c->doq_socket->pkt_buf);
1889
  doq_send_pkt(c, paddr, 0);
1890
}
1891
1892
/** doq verify retry token, false on failure */
1893
static int
1894
doq_verify_retry_token(struct comm_point* c, struct doq_pkt_addr* paddr,
1895
  struct ngtcp2_cid* ocid, struct ngtcp2_pkt_hd* hd)
1896
{
1897
  char host[256], port[32];
1898
  ngtcp2_tstamp ts;
1899
  if(!doq_print_addr_port(&paddr->addr, paddr->addrlen, host,
1900
    sizeof(host), port, sizeof(port))) {
1901
    log_err("doq_verify_retry_token failed");
1902
    return 0;
1903
  }
1904
  ts = doq_get_timestamp_nanosec();
1905
  verbose(VERB_ALGO, "doq: verifying retry token from %s %s", host,
1906
    port);
1907
  if(ngtcp2_crypto_verify_retry_token(ocid,
1908
#ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN
1909
    hd->token, hd->tokenlen,
1910
#else
1911
    hd->token.base, hd->token.len,
1912
#endif
1913
    c->doq_socket->static_secret,
1914
    c->doq_socket->static_secret_len, hd->version,
1915
    (void*)&paddr->addr, paddr->addrlen, &hd->dcid,
1916
    10*NGTCP2_SECONDS, ts) != 0) {
1917
    verbose(VERB_ALGO, "doq: could not verify retry token "
1918
      "from %s %s", host, port);
1919
    return 0;
1920
  }
1921
  verbose(VERB_ALGO, "doq: verified retry token from %s %s", host, port);
1922
  return 1;
1923
}
1924
1925
/** doq verify token, false on failure */
1926
static int
1927
doq_verify_token(struct comm_point* c, struct doq_pkt_addr* paddr,
1928
  struct ngtcp2_pkt_hd* hd)
1929
{
1930
  char host[256], port[32];
1931
  ngtcp2_tstamp ts;
1932
  if(!doq_print_addr_port(&paddr->addr, paddr->addrlen, host,
1933
    sizeof(host), port, sizeof(port))) {
1934
    log_err("doq_verify_token failed");
1935
    return 0;
1936
  }
1937
  ts = doq_get_timestamp_nanosec();
1938
  verbose(VERB_ALGO, "doq: verifying token from %s %s", host, port);
1939
  if(ngtcp2_crypto_verify_regular_token(
1940
#ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN
1941
    hd->token, hd->tokenlen,
1942
#else
1943
    hd->token.base, hd->token.len,
1944
#endif
1945
    c->doq_socket->static_secret, c->doq_socket->static_secret_len,
1946
    (void*)&paddr->addr, paddr->addrlen, 3600*NGTCP2_SECONDS,
1947
    ts) != 0) {
1948
    verbose(VERB_ALGO, "doq: could not verify token from %s %s",
1949
      host, port);
1950
    return 0;
1951
  }
1952
  verbose(VERB_ALGO, "doq: verified token from %s %s", host, port);
1953
  return 1;
1954
}
1955
1956
/** delete and remove from the lookup tree the doq_conn connection */
1957
static void
1958
doq_delete_connection(struct comm_point* c, struct doq_conn* conn)
1959
{
1960
  struct doq_conn copy;
1961
  uint8_t cid[NGTCP2_MAX_CIDLEN];
1962
  rbnode_type* node;
1963
  if(!conn)
1964
    return;
1965
  /* Copy the key and set it deleted. */
1966
  conn->is_deleted = 1;
1967
  doq_conn_write_disable(conn);
1968
  copy.key = conn->key;
1969
  log_assert(conn->key.dcidlen <= NGTCP2_MAX_CIDLEN);
1970
  memcpy(cid, conn->key.dcid, conn->key.dcidlen);
1971
  copy.key.dcid = cid;
1972
  copy.node.key = &copy;
1973
  lock_basic_unlock(&conn->lock);
1974
1975
  /* Now get the table lock to delete it from the tree */
1976
  lock_rw_wrlock(&c->doq_socket->table->lock);
1977
  node = rbtree_delete(c->doq_socket->table->conn_tree, copy.node.key);
1978
  if(node) {
1979
    conn = (struct doq_conn*)node->key;
1980
    lock_basic_lock(&conn->lock);
1981
    doq_conn_write_list_remove(c->doq_socket->table, conn);
1982
    if(conn->timer.timer_in_list) {
1983
      /* Remove timer from list first, because finding the
1984
       * rbnode element of the setlist of same timeouts
1985
       * needs tree lookup. Edit the tree structure after
1986
       * that lookup. */
1987
      doq_timer_list_remove(c->doq_socket->table,
1988
        &conn->timer);
1989
    }
1990
    if(conn->timer.timer_in_tree)
1991
      doq_timer_tree_remove(c->doq_socket->table,
1992
        &conn->timer);
1993
  }
1994
  lock_rw_unlock(&c->doq_socket->table->lock);
1995
  if(node) {
1996
    lock_basic_unlock(&conn->lock);
1997
    doq_table_quic_size_subtract(c->doq_socket->table,
1998
      sizeof(*conn)+conn->key.dcidlen);
1999
    doq_conn_delete(conn, c->doq_socket->table);
2000
  }
2001
}
2002
2003
/** create and setup a new doq connection, to a new destination, or with
2004
 * a new dcid. It has a new set of streams. It is inserted in the lookup tree.
2005
 * Returns NULL on failure. */
2006
static struct doq_conn*
2007
doq_setup_new_conn(struct comm_point* c, struct doq_pkt_addr* paddr,
2008
  struct ngtcp2_pkt_hd* hd, struct ngtcp2_cid* ocid)
2009
{
2010
  struct doq_conn* conn;
2011
  if(!doq_table_quic_size_available(c->doq_socket->table,
2012
    c->doq_socket->cfg, sizeof(*conn)+hd->dcid.datalen
2013
    + sizeof(struct doq_stream)
2014
    + 100 /* estimated input query */
2015
    + 1200 /* estimated output query */)) {
2016
    verbose(VERB_ALGO, "doq: no mem available for new connection");
2017
    doq_send_stateless_connection_close(c, paddr, hd,
2018
      NGTCP2_CONNECTION_REFUSED);
2019
    return NULL;
2020
  }
2021
  conn = doq_conn_create(c, paddr, hd->dcid.data, hd->dcid.datalen,
2022
    hd->version);
2023
  if(!conn) {
2024
    log_err("doq: could not allocate doq_conn");
2025
    return NULL;
2026
  }
2027
  lock_rw_wrlock(&c->doq_socket->table->lock);
2028
  lock_basic_lock(&conn->lock);
2029
  if(!rbtree_insert(c->doq_socket->table->conn_tree, &conn->node)) {
2030
    lock_rw_unlock(&c->doq_socket->table->lock);
2031
    log_err("doq: duplicate connection");
2032
    /* conn has no entry in writelist, and no timer yet. */
2033
    lock_basic_unlock(&conn->lock);
2034
    doq_conn_delete(conn, c->doq_socket->table);
2035
    return NULL;
2036
  }
2037
  lock_rw_unlock(&c->doq_socket->table->lock);
2038
  doq_table_quic_size_add(c->doq_socket->table,
2039
    sizeof(*conn)+conn->key.dcidlen);
2040
  verbose(VERB_ALGO, "doq: created new connection");
2041
2042
  /* the scid and dcid switch meaning from the accepted client
2043
   * connection to the server connection. The 'source' and 'destination'
2044
   * meaning is reversed. */
2045
  if(!doq_conn_setup(conn, hd->scid.data, hd->scid.datalen,
2046
    (ocid?ocid->data:NULL), (ocid?ocid->datalen:0),
2047
#ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN
2048
    hd->token, hd->tokenlen
2049
#else
2050
    hd->token.base, hd->token.len
2051
#endif
2052
    )) {
2053
    log_err("doq: could not set up connection");
2054
    doq_delete_connection(c, conn);
2055
    return NULL;
2056
  }
2057
  return conn;
2058
}
2059
2060
/** perform doq address validation */
2061
static int
2062
doq_address_validation(struct comm_point* c, struct doq_pkt_addr* paddr,
2063
  struct ngtcp2_pkt_hd* hd, struct ngtcp2_cid* ocid,
2064
  struct ngtcp2_cid** pocid)
2065
{
2066
#ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN
2067
  const uint8_t* token = hd->token;
2068
  size_t tokenlen = hd->tokenlen;
2069
#else
2070
  const uint8_t* token = hd->token.base;
2071
  size_t tokenlen = hd->token.len;
2072
#endif
2073
  verbose(VERB_ALGO, "doq stateless address validation");
2074
2075
  if(tokenlen == 0 || token == NULL) {
2076
    doq_send_retry(c, paddr, hd);
2077
    return 0;
2078
  }
2079
  if(token[0] != NGTCP2_CRYPTO_TOKEN_MAGIC_RETRY &&
2080
    hd->dcid.datalen < NGTCP2_MIN_INITIAL_DCIDLEN) {
2081
    doq_send_stateless_connection_close(c, paddr, hd,
2082
      NGTCP2_INVALID_TOKEN);
2083
    return 0;
2084
  }
2085
  if(token[0] == NGTCP2_CRYPTO_TOKEN_MAGIC_RETRY) {
2086
    if(!doq_verify_retry_token(c, paddr, ocid, hd)) {
2087
      doq_send_stateless_connection_close(c, paddr, hd,
2088
        NGTCP2_INVALID_TOKEN);
2089
      return 0;
2090
    }
2091
    *pocid = ocid;
2092
  } else if(token[0] == NGTCP2_CRYPTO_TOKEN_MAGIC_REGULAR) {
2093
    if(!doq_verify_token(c, paddr, hd)) {
2094
      doq_send_retry(c, paddr, hd);
2095
      return 0;
2096
    }
2097
#ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN
2098
    hd->token = NULL;
2099
    hd->tokenlen = 0;
2100
#else
2101
    hd->token.base = NULL;
2102
    hd->token.len = 0;
2103
#endif
2104
  } else {
2105
    verbose(VERB_ALGO, "doq address validation: unrecognised "
2106
      "token in hd.token.base with magic byte 0x%2.2x",
2107
      (int)token[0]);
2108
    if(c->doq_socket->validate_addr) {
2109
      doq_send_retry(c, paddr, hd);
2110
      return 0;
2111
    }
2112
#ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN
2113
    hd->token = NULL;
2114
    hd->tokenlen = 0;
2115
#else
2116
    hd->token.base = NULL;
2117
    hd->token.len = 0;
2118
#endif
2119
  }
2120
  return 1;
2121
}
2122
2123
/** the doq accept, returns false if no further processing of content */
2124
static int
2125
doq_accept(struct comm_point* c, struct doq_pkt_addr* paddr,
2126
  struct doq_conn** conn, struct ngtcp2_pkt_info* pi)
2127
{
2128
  int rv;
2129
  struct ngtcp2_pkt_hd hd;
2130
  struct ngtcp2_cid ocid, *pocid=NULL;
2131
  int err_retry;
2132
  memset(&hd, 0, sizeof(hd));
2133
  rv = ngtcp2_accept(&hd, sldns_buffer_begin(c->doq_socket->pkt_buf),
2134
    sldns_buffer_limit(c->doq_socket->pkt_buf));
2135
  if(rv != 0) {
2136
    if(rv == NGTCP2_ERR_RETRY) {
2137
      doq_send_retry(c, paddr, &hd);
2138
      return 0;
2139
    }
2140
    log_err("doq: initial packet failed, ngtcp2_accept failed: %s",
2141
      ngtcp2_strerror(rv));
2142
    return 0;
2143
  }
2144
  if(c->doq_socket->validate_addr ||
2145
#ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN
2146
    hd.tokenlen
2147
#else
2148
    hd.token.len
2149
#endif
2150
    ) {
2151
    if(!doq_address_validation(c, paddr, &hd, &ocid, &pocid))
2152
      return 0;
2153
  }
2154
  *conn = doq_setup_new_conn(c, paddr, &hd, pocid);
2155
  if(!*conn)
2156
    return 0;
2157
  (*conn)->doq_socket = c->doq_socket;
2158
  if(!doq_conn_recv(c, paddr, *conn, pi, &err_retry, NULL)) {
2159
    if(err_retry)
2160
      doq_send_retry(c, paddr, &hd);
2161
    doq_delete_connection(c, *conn);
2162
    *conn = NULL;
2163
    return 0;
2164
  }
2165
  return 1;
2166
}
2167
2168
/** doq pickup a timer to wait for for the worker. If any timer exists. */
2169
static void
2170
doq_pickup_timer(struct comm_point* c)
2171
{
2172
  struct doq_timer* t;
2173
  struct timeval tv;
2174
  int have_time = 0;
2175
  memset(&tv, 0, sizeof(tv));
2176
2177
  lock_rw_wrlock(&c->doq_socket->table->lock);
2178
  RBTREE_FOR(t, struct doq_timer*, c->doq_socket->table->timer_tree) {
2179
    if(t->worker_doq_socket == NULL ||
2180
      t->worker_doq_socket == c->doq_socket) {
2181
      /* pick up this element */
2182
      t->worker_doq_socket = c->doq_socket;
2183
      have_time = 1;
2184
      memcpy(&tv, &t->time, sizeof(tv));
2185
      break;
2186
    }
2187
  }
2188
  lock_rw_unlock(&c->doq_socket->table->lock);
2189
2190
  if(have_time) {
2191
    struct timeval rel;
2192
    timeval_subtract(&rel, &tv, c->doq_socket->now_tv);
2193
    comm_timer_set(c->doq_socket->timer, &rel);
2194
    memcpy(&c->doq_socket->marked_time, &tv,
2195
      sizeof(c->doq_socket->marked_time));
2196
    verbose(VERB_ALGO, "doq pickup timer at %d.%6.6d in %d.%6.6d",
2197
      (int)tv.tv_sec, (int)tv.tv_usec, (int)rel.tv_sec,
2198
      (int)rel.tv_usec);
2199
  } else {
2200
    if(comm_timer_is_set(c->doq_socket->timer))
2201
      comm_timer_disable(c->doq_socket->timer);
2202
    memset(&c->doq_socket->marked_time, 0,
2203
      sizeof(c->doq_socket->marked_time));
2204
    verbose(VERB_ALGO, "doq timer disabled");
2205
  }
2206
}
2207
2208
/** doq done with connection, release locks and setup timer and write */
2209
static void
2210
doq_done_setup_timer_and_write(struct comm_point* c, struct doq_conn* conn)
2211
{
2212
  struct doq_conn copy;
2213
  uint8_t cid[NGTCP2_MAX_CIDLEN];
2214
  rbnode_type* node;
2215
  struct timeval new_tv;
2216
  int write_change = 0, timer_change = 0;
2217
2218
  /* No longer in callbacks, so the pointer to doq_socket is back
2219
   * to NULL. */
2220
  conn->doq_socket = NULL;
2221
2222
  if(doq_conn_check_timer(conn, &new_tv))
2223
    timer_change = 1;
2224
  if( (conn->write_interest && !conn->on_write_list) ||
2225
    (!conn->write_interest && conn->on_write_list))
2226
    write_change = 1;
2227
2228
  if(!timer_change && !write_change) {
2229
    /* Nothing to do. */
2230
    lock_basic_unlock(&conn->lock);
2231
    return;
2232
  }
2233
2234
  /* The table lock is needed to change the write list and timer tree.
2235
   * So the connection lock is release and then the connection is
2236
   * looked up again. */
2237
  copy.key = conn->key;
2238
  log_assert(conn->key.dcidlen <= NGTCP2_MAX_CIDLEN);
2239
  memcpy(cid, conn->key.dcid, conn->key.dcidlen);
2240
  copy.key.dcid = cid;
2241
  copy.node.key = &copy;
2242
  lock_basic_unlock(&conn->lock);
2243
2244
  lock_rw_wrlock(&c->doq_socket->table->lock);
2245
  node = rbtree_search(c->doq_socket->table->conn_tree, copy.node.key);
2246
  if(!node) {
2247
    lock_rw_unlock(&c->doq_socket->table->lock);
2248
    /* Must have been deleted in the mean time. */
2249
    return;
2250
  }
2251
  conn = (struct doq_conn*)node->key;
2252
  lock_basic_lock(&conn->lock);
2253
  if(conn->is_deleted) {
2254
    /* It is deleted now. */
2255
    lock_rw_unlock(&c->doq_socket->table->lock);
2256
    lock_basic_unlock(&conn->lock);
2257
    return;
2258
  }
2259
2260
  if(write_change) {
2261
    /* Edit the write lists, we are holding the table.lock and can
2262
     * edit the list first,last and also prev,next and on_list
2263
     * elements in the doq_conn structures. */
2264
    doq_conn_set_write_list(c->doq_socket->table, conn);
2265
  }
2266
  if(timer_change) {
2267
    doq_timer_set(c->doq_socket->table, &conn->timer,
2268
      c->doq_socket, &new_tv);
2269
  }
2270
  lock_rw_unlock(&c->doq_socket->table->lock);
2271
  lock_basic_unlock(&conn->lock);
2272
}
2273
2274
/** doq done with connection callbacks, release locks and setup write */
2275
static void
2276
doq_done_with_conn_cb(struct comm_point* c, struct doq_conn* conn)
2277
{
2278
  struct doq_conn copy;
2279
  uint8_t cid[NGTCP2_MAX_CIDLEN];
2280
  rbnode_type* node;
2281
2282
  /* no longer in callbacks, so the pointer to doq_socket is back
2283
   * to NULL. */
2284
  conn->doq_socket = NULL;
2285
2286
  if( (conn->write_interest && conn->on_write_list) ||
2287
    (!conn->write_interest && !conn->on_write_list)) {
2288
    /* The connection already has the required write list
2289
     * status. */
2290
    lock_basic_unlock(&conn->lock);
2291
    return;
2292
  }
2293
2294
  /* To edit the write list of connections we have to hold the table
2295
   * lock, so we release the connection and then look it up again. */
2296
  copy.key = conn->key;
2297
  log_assert(conn->key.dcidlen <= NGTCP2_MAX_CIDLEN);
2298
  memcpy(cid, conn->key.dcid, conn->key.dcidlen);
2299
  copy.key.dcid = cid;
2300
  copy.node.key = &copy;
2301
  lock_basic_unlock(&conn->lock);
2302
2303
  lock_rw_wrlock(&c->doq_socket->table->lock);
2304
  node = rbtree_search(c->doq_socket->table->conn_tree, copy.node.key);
2305
  if(!node) {
2306
    lock_rw_unlock(&c->doq_socket->table->lock);
2307
    /* must have been deleted in the mean time */
2308
    return;
2309
  }
2310
  conn = (struct doq_conn*)node->key;
2311
  lock_basic_lock(&conn->lock);
2312
  if(conn->is_deleted) {
2313
    /* it is deleted now. */
2314
    lock_rw_unlock(&c->doq_socket->table->lock);
2315
    lock_basic_unlock(&conn->lock);
2316
    return;
2317
  }
2318
2319
  /* edit the write lists, we are holding the table.lock and can
2320
   * edit the list first,last and also prev,next and on_list elements
2321
   * in the doq_conn structures. */
2322
  doq_conn_set_write_list(c->doq_socket->table, conn);
2323
  lock_rw_unlock(&c->doq_socket->table->lock);
2324
  lock_basic_unlock(&conn->lock);
2325
}
2326
2327
/** doq count the length of the write list */
2328
static size_t
2329
doq_write_list_length(struct comm_point* c)
2330
{
2331
  size_t count = 0;
2332
  struct doq_conn* conn;
2333
  lock_rw_rdlock(&c->doq_socket->table->lock);
2334
  conn = c->doq_socket->table->write_list_first;
2335
  while(conn) {
2336
    count++;
2337
    conn = conn->write_next;
2338
  }
2339
  lock_rw_unlock(&c->doq_socket->table->lock);
2340
  return count;
2341
}
2342
2343
/** doq pop the first element from the write list to have write events */
2344
static struct doq_conn*
2345
doq_pop_write_conn(struct comm_point* c)
2346
{
2347
  struct doq_conn* conn;
2348
  lock_rw_wrlock(&c->doq_socket->table->lock);
2349
  conn = doq_table_pop_first(c->doq_socket->table);
2350
  while(conn && conn->is_deleted) {
2351
    lock_basic_unlock(&conn->lock);
2352
    conn = doq_table_pop_first(c->doq_socket->table);
2353
  }
2354
  lock_rw_unlock(&c->doq_socket->table->lock);
2355
  if(conn)
2356
    conn->doq_socket = c->doq_socket;
2357
  return conn;
2358
}
2359
2360
/** doq the connection is done with write callbacks, release it. */
2361
static void
2362
doq_done_with_write_cb(struct comm_point* c, struct doq_conn* conn,
2363
  int delete_it)
2364
{
2365
  if(delete_it) {
2366
    doq_delete_connection(c, conn);
2367
    return;
2368
  }
2369
  doq_done_setup_timer_and_write(c, conn);
2370
}
2371
2372
/** see if the doq socket wants to write packets */
2373
static int
2374
doq_socket_want_write(struct comm_point* c)
2375
{
2376
  int want_write = 0;
2377
  if(c->doq_socket->have_blocked_pkt)
2378
    return 1;
2379
  lock_rw_rdlock(&c->doq_socket->table->lock);
2380
  if(c->doq_socket->table->write_list_first)
2381
    want_write = 1;
2382
  lock_rw_unlock(&c->doq_socket->table->lock);
2383
  return want_write;
2384
}
2385
2386
/** enable write event for the doq server socket fd */
2387
static void
2388
doq_socket_write_enable(struct comm_point* c)
2389
{
2390
  verbose(VERB_ALGO, "doq socket want write");
2391
  if(c->doq_socket->event_has_write)
2392
    return;
2393
  comm_point_listen_for_rw(c, 1, 1);
2394
  c->doq_socket->event_has_write = 1;
2395
}
2396
2397
/** disable write event for the doq server socket fd */
2398
static void
2399
doq_socket_write_disable(struct comm_point* c)
2400
{
2401
  verbose(VERB_ALGO, "doq socket want no write");
2402
  if(!c->doq_socket->event_has_write)
2403
    return;
2404
  comm_point_listen_for_rw(c, 1, 0);
2405
  c->doq_socket->event_has_write = 0;
2406
}
2407
2408
/** write blocked packet, if possible. returns false if failed, again. */
2409
static int
2410
doq_write_blocked_pkt(struct comm_point* c)
2411
{
2412
  struct doq_pkt_addr paddr;
2413
  if(!c->doq_socket->have_blocked_pkt)
2414
    return 1;
2415
  c->doq_socket->have_blocked_pkt = 0;
2416
  if(sldns_buffer_limit(c->doq_socket->blocked_pkt) >
2417
    sldns_buffer_remaining(c->doq_socket->pkt_buf))
2418
    return 1; /* impossibly large, drop it.
2419
    impossible since pkt_buf is same size as blocked_pkt buf. */
2420
  sldns_buffer_clear(c->doq_socket->pkt_buf);
2421
  sldns_buffer_write(c->doq_socket->pkt_buf,
2422
    sldns_buffer_begin(c->doq_socket->blocked_pkt),
2423
    sldns_buffer_limit(c->doq_socket->blocked_pkt));
2424
  sldns_buffer_flip(c->doq_socket->pkt_buf);
2425
  memcpy(&paddr, c->doq_socket->blocked_paddr, sizeof(paddr));
2426
  doq_send_pkt(c, &paddr, c->doq_socket->blocked_pkt_pi.ecn);
2427
  if(c->doq_socket->have_blocked_pkt)
2428
    return 0;
2429
  return 1;
2430
}
2431
2432
/** doq find a timer that timeouted and return the conn, locked. */
2433
static struct doq_conn*
2434
doq_timer_timeout_conn(struct doq_server_socket* doq_socket)
2435
{
2436
  struct doq_conn* conn = NULL;
2437
  struct rbnode_type* node;
2438
  lock_rw_wrlock(&doq_socket->table->lock);
2439
  node = rbtree_first(doq_socket->table->timer_tree);
2440
  if(node && node != RBTREE_NULL) {
2441
    struct doq_timer* t = (struct doq_timer*)node;
2442
    conn = t->conn;
2443
2444
    /* If now < timer then no further timeouts in tree. */
2445
    if(timeval_smaller(doq_socket->now_tv, &t->time)) {
2446
      lock_rw_unlock(&doq_socket->table->lock);
2447
      return NULL;
2448
    }
2449
2450
    lock_basic_lock(&conn->lock);
2451
    conn->doq_socket = doq_socket;
2452
2453
    /* Now that the timer is fired, remove it. */
2454
    doq_timer_unset(doq_socket->table, t);
2455
    lock_rw_unlock(&doq_socket->table->lock);
2456
    return conn;
2457
  }
2458
  lock_rw_unlock(&doq_socket->table->lock);
2459
  return NULL;
2460
}
2461
2462
/** doq timer erase the marker that said which timer the worker uses. */
2463
static void
2464
doq_timer_erase_marker(struct doq_server_socket* doq_socket)
2465
{
2466
  struct doq_timer* t;
2467
  lock_rw_wrlock(&doq_socket->table->lock);
2468
  t = doq_timer_find_time(doq_socket->table, &doq_socket->marked_time);
2469
  if(t && t->worker_doq_socket == doq_socket)
2470
    t->worker_doq_socket = NULL;
2471
  lock_rw_unlock(&doq_socket->table->lock);
2472
  memset(&doq_socket->marked_time, 0, sizeof(doq_socket->marked_time));
2473
}
2474
2475
void
2476
doq_timer_cb(void* arg)
2477
{
2478
  struct doq_server_socket* doq_socket = (struct doq_server_socket*)arg;
2479
  struct doq_conn* conn;
2480
  verbose(VERB_ALGO, "doq timer callback");
2481
2482
  doq_timer_erase_marker(doq_socket);
2483
2484
  while((conn = doq_timer_timeout_conn(doq_socket)) != NULL) {
2485
    if(conn->is_deleted ||
2486
#ifdef HAVE_NGTCP2_CONN_IN_CLOSING_PERIOD
2487
      ngtcp2_conn_in_closing_period(conn->conn) ||
2488
#else
2489
      ngtcp2_conn_is_in_closing_period(conn->conn) ||
2490
#endif
2491
#ifdef HAVE_NGTCP2_CONN_IN_DRAINING_PERIOD
2492
      ngtcp2_conn_in_draining_period(conn->conn)
2493
#else
2494
      ngtcp2_conn_is_in_draining_period(conn->conn)
2495
#endif
2496
      ) {
2497
      if(verbosity >= VERB_ALGO) {
2498
        char remotestr[256];
2499
        addr_to_str((void*)&conn->key.paddr.addr,
2500
          conn->key.paddr.addrlen, remotestr,
2501
          sizeof(remotestr));
2502
        verbose(VERB_ALGO, "doq conn %s is deleted "
2503
          "after timeout", remotestr);
2504
      }
2505
      doq_delete_connection(doq_socket->cp, conn);
2506
      continue;
2507
    }
2508
    if(!doq_conn_handle_timeout(conn))
2509
      doq_delete_connection(doq_socket->cp, conn);
2510
    else doq_done_setup_timer_and_write(doq_socket->cp, conn);
2511
  }
2512
2513
  if(doq_socket_want_write(doq_socket->cp))
2514
    doq_socket_write_enable(doq_socket->cp);
2515
  else doq_socket_write_disable(doq_socket->cp);
2516
  doq_pickup_timer(doq_socket->cp);
2517
}
2518
2519
void
2520
comm_point_doq_callback(int fd, short event, void* arg)
2521
{
2522
  struct comm_point* c;
2523
  struct doq_pkt_addr paddr;
2524
  int i, pkt_continue, err_drop;
2525
  struct doq_conn* conn;
2526
  struct ngtcp2_pkt_info pi;
2527
  size_t count, num_len;
2528
2529
  c = (struct comm_point*)arg;
2530
  log_assert(c->type == comm_doq);
2531
2532
  log_assert(c && c->doq_socket->pkt_buf && c->fd == fd);
2533
  ub_comm_base_now(c->ev->base);
2534
2535
  /* see if there is a blocked packet, and send that if possible.
2536
   * do not attempt to read yet, even if possible, that would just
2537
   * push more answers in reply to those read packets onto the list
2538
   * of written replies. First attempt to clear the write content out.
2539
   * That keeps the memory usage from bloating up. */
2540
  if(c->doq_socket->have_blocked_pkt) {
2541
    if(!doq_write_blocked_pkt(c)) {
2542
      /* this write has also blocked, attempt to write
2543
       * later. Make sure the event listens to write
2544
       * events. */
2545
      if(!c->doq_socket->event_has_write)
2546
        doq_socket_write_enable(c);
2547
      doq_pickup_timer(c);
2548
      return;
2549
    }
2550
  }
2551
2552
  /* see if there is write interest */
2553
  count = 0;
2554
  num_len = doq_write_list_length(c);
2555
  while((conn = doq_pop_write_conn(c)) != NULL) {
2556
    if(conn->is_deleted ||
2557
#ifdef HAVE_NGTCP2_CONN_IN_CLOSING_PERIOD
2558
      ngtcp2_conn_in_closing_period(conn->conn) ||
2559
#else
2560
      ngtcp2_conn_is_in_closing_period(conn->conn) ||
2561
#endif
2562
#ifdef HAVE_NGTCP2_CONN_IN_DRAINING_PERIOD
2563
      ngtcp2_conn_in_draining_period(conn->conn)
2564
#else
2565
      ngtcp2_conn_is_in_draining_period(conn->conn)
2566
#endif
2567
      ) {
2568
      conn->doq_socket = NULL;
2569
      lock_basic_unlock(&conn->lock);
2570
      if(c->doq_socket->have_blocked_pkt) {
2571
        if(!c->doq_socket->event_has_write)
2572
          doq_socket_write_enable(c);
2573
        doq_pickup_timer(c);
2574
        return;
2575
      }
2576
      if(++count > num_len*2)
2577
        break;
2578
      continue;
2579
    }
2580
    if(verbosity >= VERB_ALGO) {
2581
      char remotestr[256];
2582
      addr_to_str((void*)&conn->key.paddr.addr,
2583
        conn->key.paddr.addrlen, remotestr,
2584
        sizeof(remotestr));
2585
      verbose(VERB_ALGO, "doq write connection %s %d",
2586
        remotestr, doq_sockaddr_get_port(
2587
        &conn->key.paddr.addr));
2588
    }
2589
    if(doq_conn_write_streams(c, conn, &err_drop))
2590
      err_drop = 0;
2591
    doq_done_with_write_cb(c, conn, err_drop);
2592
    if(c->doq_socket->have_blocked_pkt) {
2593
      if(!c->doq_socket->event_has_write)
2594
        doq_socket_write_enable(c);
2595
      doq_pickup_timer(c);
2596
      return;
2597
    }
2598
    /* Stop overly long write lists that are created
2599
     * while we are processing. Do those next time there
2600
     * is a write callback. Stops long loops, and keeps
2601
     * fair for other events. */
2602
    if(++count > num_len*2)
2603
      break;
2604
  }
2605
2606
  /* check for data to read */
2607
  if((event&UB_EV_READ)!=0)
2608
    for(i=0; i<NUM_UDP_PER_SELECT; i++) {
2609
    /* there may be a blocked write packet and if so, stop
2610
     * reading because the reply cannot get written. The
2611
     * blocked packet could be written during the conn_recv
2612
     * handling of replies, or for a connection close. */
2613
    if(c->doq_socket->have_blocked_pkt) {
2614
      if(!c->doq_socket->event_has_write)
2615
        doq_socket_write_enable(c);
2616
      doq_pickup_timer(c);
2617
      return;
2618
    }
2619
    sldns_buffer_clear(c->doq_socket->pkt_buf);
2620
    doq_pkt_addr_init(&paddr);
2621
    log_assert(fd != -1);
2622
    log_assert(sldns_buffer_remaining(c->doq_socket->pkt_buf) > 0);
2623
    if(!doq_recv(c, &paddr, &pkt_continue, &pi)) {
2624
      if(pkt_continue)
2625
        continue;
2626
      break;
2627
    }
2628
2629
    /* handle incoming packet from remote addr to localaddr */
2630
    if(verbosity >= VERB_ALGO) {
2631
      char remotestr[256], localstr[256];
2632
      addr_to_str((void*)&paddr.addr, paddr.addrlen,
2633
        remotestr, sizeof(remotestr));
2634
      addr_to_str((void*)&paddr.localaddr,
2635
        paddr.localaddrlen, localstr,
2636
        sizeof(localstr));
2637
      log_info("incoming doq packet from %s port %d on "
2638
        "%s port %d ifindex %d",
2639
        remotestr, doq_sockaddr_get_port(&paddr.addr),
2640
        localstr,
2641
        doq_sockaddr_get_port(&paddr.localaddr),
2642
        paddr.ifindex);
2643
      log_info("doq_recv length %d ecn 0x%x",
2644
        (int)sldns_buffer_limit(c->doq_socket->pkt_buf),
2645
        (int)pi.ecn);
2646
    }
2647
2648
    if(sldns_buffer_limit(c->doq_socket->pkt_buf) == 0)
2649
      continue;
2650
2651
    conn = NULL;
2652
    if(!doq_decode_pkt_header_negotiate(c, &paddr, &conn))
2653
      continue;
2654
    if(!conn) {
2655
      if(!doq_accept(c, &paddr, &conn, &pi))
2656
        continue;
2657
      if(!doq_conn_write_streams(c, conn, NULL)) {
2658
        doq_delete_connection(c, conn);
2659
        continue;
2660
      }
2661
      doq_done_setup_timer_and_write(c, conn);
2662
      continue;
2663
    }
2664
    if(
2665
#ifdef HAVE_NGTCP2_CONN_IN_CLOSING_PERIOD
2666
      ngtcp2_conn_in_closing_period(conn->conn)
2667
#else
2668
      ngtcp2_conn_is_in_closing_period(conn->conn)
2669
#endif
2670
      ) {
2671
      if(!doq_conn_send_close(c, conn)) {
2672
        doq_delete_connection(c, conn);
2673
      } else {
2674
        doq_done_setup_timer_and_write(c, conn);
2675
      }
2676
      continue;
2677
    }
2678
    if(
2679
#ifdef HAVE_NGTCP2_CONN_IN_DRAINING_PERIOD
2680
      ngtcp2_conn_in_draining_period(conn->conn)
2681
#else
2682
      ngtcp2_conn_is_in_draining_period(conn->conn)
2683
#endif
2684
      ) {
2685
      doq_done_setup_timer_and_write(c, conn);
2686
      continue;
2687
    }
2688
    if(!doq_conn_recv(c, &paddr, conn, &pi, NULL, &err_drop)) {
2689
      /* The receive failed, and if it also failed to send
2690
       * a close, drop the connection. That means it is not
2691
       * in the closing period. */
2692
      if(err_drop) {
2693
        doq_delete_connection(c, conn);
2694
      } else {
2695
        doq_done_setup_timer_and_write(c, conn);
2696
      }
2697
      continue;
2698
    }
2699
    if(!doq_conn_write_streams(c, conn, &err_drop)) {
2700
      if(err_drop) {
2701
        doq_delete_connection(c, conn);
2702
      } else {
2703
        doq_done_setup_timer_and_write(c, conn);
2704
      }
2705
      continue;
2706
    }
2707
    doq_done_setup_timer_and_write(c, conn);
2708
  }
2709
2710
  /* see if we want to have more write events */
2711
  verbose(VERB_ALGO, "doq check write enable");
2712
  if(doq_socket_want_write(c))
2713
    doq_socket_write_enable(c);
2714
  else doq_socket_write_disable(c);
2715
  doq_pickup_timer(c);
2716
}
2717
2718
/** create new doq server socket structure */
2719
static struct doq_server_socket*
2720
doq_server_socket_create(struct doq_table* table, struct ub_randstate* rnd,
2721
  const void* quic_sslctx, struct comm_point* c, struct comm_base* base,
2722
  struct config_file* cfg)
2723
{
2724
  size_t doq_buffer_size = 4096; /* bytes buffer size, for one packet. */
2725
  struct doq_server_socket* doq_socket;
2726
  log_assert(table != NULL);
2727
  doq_socket = calloc(1, sizeof(*doq_socket));
2728
  if(!doq_socket) {
2729
    return NULL;
2730
  }
2731
  doq_socket->table = table;
2732
  doq_socket->rnd = rnd;
2733
  doq_socket->validate_addr = 1;
2734
  /* the doq_socket has its own copy of the static secret, as
2735
   * well as other config values, so that they do not need table.lock */
2736
  doq_socket->static_secret_len = table->static_secret_len;
2737
  doq_socket->static_secret = memdup(table->static_secret,
2738
    table->static_secret_len);
2739
  if(!doq_socket->static_secret) {
2740
    free(doq_socket);
2741
    return NULL;
2742
  }
2743
  doq_socket->ctx = (SSL_CTX*)quic_sslctx;
2744
  doq_socket->idle_timeout = table->idle_timeout;
2745
  doq_socket->sv_scidlen = table->sv_scidlen;
2746
  doq_socket->cp = c;
2747
  doq_socket->pkt_buf = sldns_buffer_new(doq_buffer_size);
2748
  if(!doq_socket->pkt_buf) {
2749
    free(doq_socket->static_secret);
2750
    free(doq_socket);
2751
    return NULL;
2752
  }
2753
  doq_socket->blocked_pkt = sldns_buffer_new(
2754
    sldns_buffer_capacity(doq_socket->pkt_buf));
2755
  if(!doq_socket->pkt_buf) {
2756
    free(doq_socket->static_secret);
2757
    sldns_buffer_free(doq_socket->pkt_buf);
2758
    free(doq_socket);
2759
    return NULL;
2760
  }
2761
  doq_socket->blocked_paddr = calloc(1,
2762
    sizeof(*doq_socket->blocked_paddr));
2763
  if(!doq_socket->blocked_paddr) {
2764
    free(doq_socket->static_secret);
2765
    sldns_buffer_free(doq_socket->pkt_buf);
2766
    sldns_buffer_free(doq_socket->blocked_pkt);
2767
    free(doq_socket);
2768
    return NULL;
2769
  }
2770
  doq_socket->timer = comm_timer_create(base, doq_timer_cb, doq_socket);
2771
  if(!doq_socket->timer) {
2772
    free(doq_socket->static_secret);
2773
    sldns_buffer_free(doq_socket->pkt_buf);
2774
    sldns_buffer_free(doq_socket->blocked_pkt);
2775
    free(doq_socket->blocked_paddr);
2776
    free(doq_socket);
2777
    return NULL;
2778
  }
2779
  memset(&doq_socket->marked_time, 0, sizeof(doq_socket->marked_time));
2780
  comm_base_timept(base, &doq_socket->now_tt, &doq_socket->now_tv);
2781
  doq_socket->cfg = cfg;
2782
  return doq_socket;
2783
}
2784
2785
/** delete doq server socket structure */
2786
static void
2787
doq_server_socket_delete(struct doq_server_socket* doq_socket)
2788
{
2789
  if(!doq_socket)
2790
    return;
2791
  free(doq_socket->static_secret);
2792
#ifndef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT
2793
  free(doq_socket->quic_method);
2794
#endif
2795
  sldns_buffer_free(doq_socket->pkt_buf);
2796
  sldns_buffer_free(doq_socket->blocked_pkt);
2797
  free(doq_socket->blocked_paddr);
2798
  comm_timer_delete(doq_socket->timer);
2799
  free(doq_socket);
2800
}
2801
2802
/** find repinfo in the doq table */
2803
static struct doq_conn*
2804
doq_lookup_repinfo(struct doq_table* table, struct comm_reply* repinfo)
2805
{
2806
  struct doq_conn* conn;
2807
  struct doq_conn_key key;
2808
  log_assert(table != NULL);
2809
  doq_conn_key_from_repinfo(&key, repinfo);
2810
  lock_rw_rdlock(&table->lock);
2811
  conn = doq_conn_find(table, &key.paddr.addr,
2812
    key.paddr.addrlen, &key.paddr.localaddr,
2813
    key.paddr.localaddrlen, key.paddr.ifindex, key.dcid,
2814
    key.dcidlen);
2815
  if(conn) {
2816
    lock_basic_lock(&conn->lock);
2817
    lock_rw_unlock(&table->lock);
2818
    return conn;
2819
  }
2820
  lock_rw_unlock(&table->lock);
2821
  return NULL;
2822
}
2823
2824
/** doq find connection and stream. From inside callbacks from worker. */
2825
static int
2826
doq_lookup_conn_stream(struct comm_reply* repinfo, struct comm_point* c,
2827
  struct doq_conn** conn, struct doq_stream** stream)
2828
{
2829
  log_assert(c->doq_socket);
2830
  if(c->doq_socket->current_conn) {
2831
    *conn = c->doq_socket->current_conn;
2832
  } else {
2833
    *conn = doq_lookup_repinfo(c->doq_socket->table, repinfo);
2834
    if((*conn) && (*conn)->is_deleted) {
2835
      lock_basic_unlock(&(*conn)->lock);
2836
      *conn = NULL;
2837
    }
2838
    if(*conn) {
2839
      (*conn)->doq_socket = c->doq_socket;
2840
    }
2841
  }
2842
  if(!*conn) {
2843
    *stream = NULL;
2844
    return 0;
2845
  }
2846
  *stream = doq_stream_find(*conn, repinfo->doq_streamid);
2847
  if(!*stream) {
2848
    if(!c->doq_socket->current_conn) {
2849
      /* Not inside callbacks, we have our own lock on conn.
2850
       * Release it. */
2851
      lock_basic_unlock(&(*conn)->lock);
2852
    }
2853
    return 0;
2854
  }
2855
  if((*stream)->is_closed) {
2856
    /* stream is closed, ignore reply or drop */
2857
    if(!c->doq_socket->current_conn) {
2858
      /* Not inside callbacks, we have our own lock on conn.
2859
       * Release it. */
2860
      lock_basic_unlock(&(*conn)->lock);
2861
    }
2862
    return 0;
2863
  }
2864
  return 1;
2865
}
2866
2867
/** doq send a reply from a comm reply */
2868
static void
2869
doq_socket_send_reply(struct comm_reply* repinfo)
2870
{
2871
  struct doq_conn* conn;
2872
  struct doq_stream* stream;
2873
  log_assert(repinfo->c->type == comm_doq);
2874
  if(!doq_lookup_conn_stream(repinfo, repinfo->c, &conn, &stream)) {
2875
    verbose(VERB_ALGO, "doq: send_reply but %s is gone",
2876
      (conn?"stream":"connection"));
2877
    /* No stream, it may have been closed. */
2878
    /* Drop the reply, it cannot be sent. */
2879
    return;
2880
  }
2881
  if(!doq_stream_send_reply(conn, stream, repinfo->c->buffer))
2882
    doq_stream_close(conn, stream, 1);
2883
  if(!repinfo->c->doq_socket->current_conn) {
2884
    /* Not inside callbacks, we have our own lock on conn.
2885
     * Release it. */
2886
    doq_done_with_conn_cb(repinfo->c, conn);
2887
    /* since we sent a reply, or closed it, the assumption is
2888
     * that there is something to write, so enable write event.
2889
     * It waits until the write event happens to write the
2890
     * streams with answers, this allows some answers to be
2891
     * answered before the event loop reaches the doq fd, in
2892
     * repinfo->c->fd, and that collates answers. That would
2893
     * not happen if we write doq packets right now. */
2894
    doq_socket_write_enable(repinfo->c);
2895
  }
2896
}
2897
2898
/** doq drop a reply from a comm reply */
2899
static void
2900
doq_socket_drop_reply(struct comm_reply* repinfo)
2901
{
2902
  struct doq_conn* conn;
2903
  struct doq_stream* stream;
2904
  log_assert(repinfo->c->type == comm_doq);
2905
  if(!doq_lookup_conn_stream(repinfo, repinfo->c, &conn, &stream)) {
2906
    verbose(VERB_ALGO, "doq: drop_reply but %s is gone",
2907
      (conn?"stream":"connection"));
2908
    /* The connection or stream is already gone. */
2909
    return;
2910
  }
2911
  doq_stream_close(conn, stream, 1);
2912
  if(!repinfo->c->doq_socket->current_conn) {
2913
    /* Not inside callbacks, we have our own lock on conn.
2914
     * Release it. */
2915
    doq_done_with_conn_cb(repinfo->c, conn);
2916
    doq_socket_write_enable(repinfo->c);
2917
  }
2918
}
2919
#endif /* HAVE_NGTCP2 */
2920
2921
int adjusted_tcp_timeout(struct comm_point* c)
2922
0
{
2923
0
  if(c->tcp_timeout_msec < TCP_QUERY_TIMEOUT_MINIMUM)
2924
0
    return TCP_QUERY_TIMEOUT_MINIMUM;
2925
0
  return c->tcp_timeout_msec;
2926
0
}
2927
2928
/** Use a new tcp handler for new query fd, set to read query */
2929
static void
2930
setup_tcp_handler(struct comm_point* c, int fd, int cur, int max)
2931
0
{
2932
0
  int handler_usage;
2933
0
  log_assert(c->type == comm_tcp || c->type == comm_http);
2934
0
  log_assert(c->fd == -1);
2935
0
  sldns_buffer_clear(c->buffer);
2936
#ifdef USE_DNSCRYPT
2937
  if (c->dnscrypt)
2938
    sldns_buffer_clear(c->dnscrypt_buffer);
2939
#endif
2940
0
  c->tcp_is_reading = 1;
2941
0
  c->tcp_byte_count = 0;
2942
0
  c->tcp_keepalive = 0;
2943
  /* if more than half the tcp handlers are in use, use a shorter
2944
   * timeout for this TCP connection, we need to make space for
2945
   * other connections to be able to get attention */
2946
  /* If > 50% TCP handler structures in use, set timeout to 1/100th
2947
   *  configured value.
2948
   * If > 65%TCP handler structures in use, set to 1/500th configured
2949
   *  value.
2950
   * If > 80% TCP handler structures in use, set to 0.
2951
   *
2952
   * If the timeout to use falls below 200 milliseconds, an actual
2953
   * timeout of 200ms is used.
2954
   */
2955
0
  handler_usage = (cur * 100) / max;
2956
0
  if(handler_usage > 50 && handler_usage <= 65)
2957
0
    c->tcp_timeout_msec /= 100;
2958
0
  else if (handler_usage > 65 && handler_usage <= 80)
2959
0
    c->tcp_timeout_msec /= 500;
2960
0
  else if (handler_usage > 80)
2961
0
    c->tcp_timeout_msec = 0;
2962
0
  comm_point_start_listening(c, fd, adjusted_tcp_timeout(c));
2963
0
}
2964
2965
void comm_base_handle_slow_accept(int ATTR_UNUSED(fd),
2966
  short ATTR_UNUSED(event), void* arg)
2967
0
{
2968
0
  struct comm_base* b = (struct comm_base*)arg;
2969
  /* timeout for the slow accept, re-enable accepts again */
2970
0
  if(b->start_accept) {
2971
0
    verbose(VERB_ALGO, "wait is over, slow accept disabled");
2972
0
    fptr_ok(fptr_whitelist_start_accept(b->start_accept));
2973
0
    (*b->start_accept)(b->cb_arg);
2974
0
    b->eb->slow_accept_enabled = 0;
2975
0
  }
2976
0
}
2977
2978
int comm_point_perform_accept(struct comm_point* c,
2979
  struct sockaddr_storage* addr, socklen_t* addrlen)
2980
0
{
2981
0
  int new_fd;
2982
0
  *addrlen = (socklen_t)sizeof(*addr);
2983
#ifndef HAVE_ACCEPT4
2984
  new_fd = accept(c->fd, (struct sockaddr*)addr, addrlen);
2985
#else
2986
  /* SOCK_NONBLOCK saves extra calls to fcntl for the same result */
2987
0
  new_fd = accept4(c->fd, (struct sockaddr*)addr, addrlen, SOCK_NONBLOCK);
2988
0
#endif
2989
0
  if(new_fd == -1) {
2990
0
#ifndef USE_WINSOCK
2991
    /* EINTR is signal interrupt. others are closed connection. */
2992
0
    if( errno == EINTR || errno == EAGAIN
2993
0
#ifdef EWOULDBLOCK
2994
0
      || errno == EWOULDBLOCK
2995
0
#endif
2996
0
#ifdef ECONNABORTED
2997
0
      || errno == ECONNABORTED
2998
0
#endif
2999
0
#ifdef EPROTO
3000
0
      || errno == EPROTO
3001
0
#endif /* EPROTO */
3002
0
      )
3003
0
      return -1;
3004
0
#if defined(ENFILE) && defined(EMFILE)
3005
0
    if(errno == ENFILE || errno == EMFILE) {
3006
      /* out of file descriptors, likely outside of our
3007
       * control. stop accept() calls for some time */
3008
0
      if(c->ev->base->stop_accept) {
3009
0
        struct comm_base* b = c->ev->base;
3010
0
        struct timeval tv;
3011
0
        verbose(VERB_ALGO, "out of file descriptors: "
3012
0
          "slow accept");
3013
0
        ub_comm_base_now(b);
3014
0
        if(b->eb->last_slow_log+SLOW_LOG_TIME <=
3015
0
          b->eb->secs) {
3016
0
          b->eb->last_slow_log = b->eb->secs;
3017
0
          verbose(VERB_OPS, "accept failed, "
3018
0
            "slow down accept for %d "
3019
0
            "msec: %s",
3020
0
            NETEVENT_SLOW_ACCEPT_TIME,
3021
0
            sock_strerror(errno));
3022
0
        }
3023
0
        b->eb->slow_accept_enabled = 1;
3024
0
        fptr_ok(fptr_whitelist_stop_accept(
3025
0
          b->stop_accept));
3026
0
        (*b->stop_accept)(b->cb_arg);
3027
        /* set timeout, no mallocs */
3028
0
        tv.tv_sec = NETEVENT_SLOW_ACCEPT_TIME/1000;
3029
0
        tv.tv_usec = (NETEVENT_SLOW_ACCEPT_TIME%1000)*1000;
3030
0
        b->eb->slow_accept = ub_event_new(b->eb->base,
3031
0
          -1, UB_EV_TIMEOUT,
3032
0
          comm_base_handle_slow_accept, b);
3033
0
        if(b->eb->slow_accept == NULL) {
3034
          /* we do not want to log here, because
3035
           * that would spam the logfiles.
3036
           * error: "event_base_set failed." */
3037
0
        }
3038
0
        else if(ub_event_add(b->eb->slow_accept, &tv)
3039
0
          != 0) {
3040
          /* we do not want to log here,
3041
           * error: "event_add failed." */
3042
0
        }
3043
0
      } else {
3044
0
        log_err("accept, with no slow down, "
3045
0
          "failed: %s", sock_strerror(errno));
3046
0
      }
3047
0
      return -1;
3048
0
    }
3049
0
#endif
3050
#else /* USE_WINSOCK */
3051
    if(WSAGetLastError() == WSAEINPROGRESS ||
3052
      WSAGetLastError() == WSAECONNRESET)
3053
      return -1;
3054
    if(WSAGetLastError() == WSAEWOULDBLOCK) {
3055
      ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
3056
      return -1;
3057
    }
3058
#endif
3059
0
    log_err_addr("accept failed", sock_strerror(errno), addr,
3060
0
      *addrlen);
3061
0
    return -1;
3062
0
  }
3063
0
  if(c->tcp_conn_limit && c->type == comm_tcp_accept) {
3064
0
    c->tcl_addr = tcl_addr_lookup(c->tcp_conn_limit, addr, *addrlen);
3065
0
    if(!tcl_new_connection(c->tcl_addr)) {
3066
0
      if(verbosity >= 3)
3067
0
        log_err_addr("accept rejected",
3068
0
        "connection limit exceeded", addr, *addrlen);
3069
0
      sock_close(new_fd);
3070
0
      return -1;
3071
0
    }
3072
0
  }
3073
#ifndef HAVE_ACCEPT4
3074
  fd_set_nonblock(new_fd);
3075
#endif
3076
0
  return new_fd;
3077
0
}
3078
3079
#ifdef USE_WINSOCK
3080
static long win_bio_cb(BIO *b, int oper, const char* ATTR_UNUSED(argp),
3081
#ifdef HAVE_BIO_SET_CALLBACK_EX
3082
  size_t ATTR_UNUSED(len),
3083
#endif
3084
        int ATTR_UNUSED(argi), long argl,
3085
#ifndef HAVE_BIO_SET_CALLBACK_EX
3086
  long retvalue
3087
#else
3088
  int retvalue, size_t* ATTR_UNUSED(processed)
3089
#endif
3090
  )
3091
{
3092
  int wsa_err = WSAGetLastError(); /* store errcode before it is gone */
3093
  verbose(VERB_ALGO, "bio_cb %d, %s %s %s", oper,
3094
    (oper&BIO_CB_RETURN)?"return":"before",
3095
    (oper&BIO_CB_READ)?"read":((oper&BIO_CB_WRITE)?"write":"other"),
3096
    wsa_err==WSAEWOULDBLOCK?"wsawb":"");
3097
  /* on windows, check if previous operation caused EWOULDBLOCK */
3098
  if( (oper == (BIO_CB_READ|BIO_CB_RETURN) && argl == 0) ||
3099
    (oper == (BIO_CB_GETS|BIO_CB_RETURN) && argl == 0)) {
3100
    if(wsa_err == WSAEWOULDBLOCK)
3101
      ub_winsock_tcp_wouldblock((struct ub_event*)
3102
        BIO_get_callback_arg(b), UB_EV_READ);
3103
  }
3104
  if( (oper == (BIO_CB_WRITE|BIO_CB_RETURN) && argl == 0) ||
3105
    (oper == (BIO_CB_PUTS|BIO_CB_RETURN) && argl == 0)) {
3106
    if(wsa_err == WSAEWOULDBLOCK)
3107
      ub_winsock_tcp_wouldblock((struct ub_event*)
3108
        BIO_get_callback_arg(b), UB_EV_WRITE);
3109
  }
3110
  /* return original return value */
3111
  return retvalue;
3112
}
3113
3114
/** set win bio callbacks for nonblocking operations */
3115
void
3116
comm_point_tcp_win_bio_cb(struct comm_point* c, void* thessl)
3117
{
3118
  SSL* ssl = (SSL*)thessl;
3119
  /* set them both just in case, but usually they are the same BIO */
3120
#ifdef HAVE_BIO_SET_CALLBACK_EX
3121
  BIO_set_callback_ex(SSL_get_rbio(ssl), &win_bio_cb);
3122
#else
3123
  BIO_set_callback(SSL_get_rbio(ssl), &win_bio_cb);
3124
#endif
3125
  BIO_set_callback_arg(SSL_get_rbio(ssl), (char*)c->ev->ev);
3126
#ifdef HAVE_BIO_SET_CALLBACK_EX
3127
  BIO_set_callback_ex(SSL_get_wbio(ssl), &win_bio_cb);
3128
#else
3129
  BIO_set_callback(SSL_get_wbio(ssl), &win_bio_cb);
3130
#endif
3131
  BIO_set_callback_arg(SSL_get_wbio(ssl), (char*)c->ev->ev);
3132
}
3133
#endif
3134
3135
#ifdef HAVE_NGHTTP2
3136
/** Create http2 session server.  Per connection, after TCP accepted.*/
3137
static int http2_session_server_create(struct http2_session* h2_session)
3138
{
3139
  log_assert(h2_session->callbacks);
3140
  h2_session->is_drop = 0;
3141
  if(nghttp2_session_server_new(&h2_session->session,
3142
      h2_session->callbacks,
3143
    h2_session) == NGHTTP2_ERR_NOMEM) {
3144
    log_err("failed to create nghttp2 session server");
3145
    return 0;
3146
  }
3147
3148
  return 1;
3149
}
3150
3151
/** Submit http2 setting to session. Once per session. */
3152
static int http2_submit_settings(struct http2_session* h2_session)
3153
{
3154
  int ret;
3155
  nghttp2_settings_entry settings[1] = {
3156
    {NGHTTP2_SETTINGS_MAX_CONCURRENT_STREAMS,
3157
     h2_session->c->http2_max_streams}};
3158
3159
  ret = nghttp2_submit_settings(h2_session->session, NGHTTP2_FLAG_NONE,
3160
    settings, 1);
3161
  if(ret) {
3162
    verbose(VERB_QUERY, "http2: submit_settings failed, "
3163
      "error: %s", nghttp2_strerror(ret));
3164
    return 0;
3165
  }
3166
  return 1;
3167
}
3168
#endif /* HAVE_NGHTTP2 */
3169
3170
#ifdef HAVE_NGHTTP2
3171
/** Delete http2 stream. After session delete or stream close callback */
3172
static void http2_stream_delete(struct http2_session* h2_session,
3173
  struct http2_stream* h2_stream)
3174
{
3175
  if(h2_stream->mesh_state) {
3176
    mesh_state_remove_reply(h2_stream->mesh, h2_stream->mesh_state,
3177
      h2_session->c);
3178
    h2_stream->mesh_state = NULL;
3179
  }
3180
  http2_req_stream_clear(h2_stream);
3181
  free(h2_stream);
3182
}
3183
#endif /* HAVE_NGHTTP2 */
3184
3185
/** delete http2 session server. After closing connection. */
3186
static void http2_session_server_delete(struct http2_session* h2_session)
3187
0
{
3188
#ifdef HAVE_NGHTTP2
3189
  struct http2_stream* h2_stream, *next;
3190
  nghttp2_session_del(h2_session->session); /* NULL input is fine */
3191
  h2_session->session = NULL;
3192
  for(h2_stream = h2_session->first_stream; h2_stream;) {
3193
    next = h2_stream->next;
3194
    http2_stream_delete(h2_session, h2_stream);
3195
    h2_stream = next;
3196
  }
3197
  h2_session->first_stream = NULL;
3198
  h2_session->is_drop = 0;
3199
  h2_session->postpone_drop = 0;
3200
  h2_session->c->h2_stream = NULL;
3201
#endif
3202
0
  (void)h2_session;
3203
0
}
3204
3205
void
3206
comm_point_tcp_accept_callback(int fd, short event, void* arg)
3207
0
{
3208
0
  struct comm_point* c = (struct comm_point*)arg, *c_hdl;
3209
0
  int new_fd;
3210
0
  log_assert(c->type == comm_tcp_accept);
3211
0
  if(!(event & UB_EV_READ)) {
3212
0
    log_info("ignoring tcp accept event %d", (int)event);
3213
0
    return;
3214
0
  }
3215
0
  ub_comm_base_now(c->ev->base);
3216
  /* find free tcp handler. */
3217
0
  if(!c->tcp_free) {
3218
0
    log_warn("accepted too many tcp, connections full");
3219
0
    return;
3220
0
  }
3221
  /* accept incoming connection. */
3222
0
  c_hdl = c->tcp_free;
3223
  /* Should not happen: inconsistent tcp_free state in
3224
   * accept_callback. */
3225
0
  log_assert(c_hdl->is_in_tcp_free);
3226
  /* clear leftover flags from previous use, and then set the
3227
   * correct event base for the event structure for libevent */
3228
0
  ub_event_free(c_hdl->ev->ev);
3229
0
  c_hdl->ev->ev = NULL;
3230
0
  if((c_hdl->type == comm_tcp && c_hdl->tcp_req_info) ||
3231
0
    c_hdl->type == comm_local || c_hdl->type == comm_raw)
3232
0
    c_hdl->tcp_do_toggle_rw = 0;
3233
0
  else  c_hdl->tcp_do_toggle_rw = 1;
3234
3235
0
  if(c_hdl->type == comm_http) {
3236
#ifdef HAVE_NGHTTP2
3237
    if(!c_hdl->h2_session ||
3238
      !http2_session_server_create(c_hdl->h2_session)) {
3239
      log_warn("failed to create nghttp2");
3240
      return;
3241
    }
3242
    if(!c_hdl->h2_session ||
3243
      !http2_submit_settings(c_hdl->h2_session)) {
3244
      log_warn("failed to submit http2 settings");
3245
      if(c_hdl->h2_session)
3246
        http2_session_server_delete(c_hdl->h2_session);
3247
      return;
3248
    }
3249
    if(!c->ssl) {
3250
      c_hdl->tcp_do_toggle_rw = 0;
3251
      c_hdl->use_h2 = 1;
3252
    }
3253
#endif
3254
0
    c_hdl->ev->ev = ub_event_new(c_hdl->ev->base->eb->base, -1,
3255
0
      UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT,
3256
0
      comm_point_http_handle_callback, c_hdl);
3257
0
  } else {
3258
0
    c_hdl->ev->ev = ub_event_new(c_hdl->ev->base->eb->base, -1,
3259
0
      UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT,
3260
0
      comm_point_tcp_handle_callback, c_hdl);
3261
0
  }
3262
0
  if(!c_hdl->ev->ev) {
3263
0
    log_warn("could not ub_event_new, dropped tcp");
3264
#ifdef HAVE_NGHTTP2
3265
    if(c_hdl->type == comm_http && c_hdl->h2_session)
3266
      http2_session_server_delete(c_hdl->h2_session);
3267
#endif
3268
0
    return;
3269
0
  }
3270
0
  log_assert(fd != -1);
3271
0
  (void)fd;
3272
0
  new_fd = comm_point_perform_accept(c, &c_hdl->repinfo.remote_addr,
3273
0
    &c_hdl->repinfo.remote_addrlen);
3274
0
  if(new_fd == -1) {
3275
#ifdef HAVE_NGHTTP2
3276
    if(c_hdl->type == comm_http && c_hdl->h2_session)
3277
      http2_session_server_delete(c_hdl->h2_session);
3278
#endif
3279
0
    return;
3280
0
  }
3281
  /* Copy remote_address to client_address.
3282
   * Simplest way/time for streams to do that. */
3283
0
  c_hdl->repinfo.client_addrlen = c_hdl->repinfo.remote_addrlen;
3284
0
  memmove(&c_hdl->repinfo.client_addr,
3285
0
    &c_hdl->repinfo.remote_addr,
3286
0
    c_hdl->repinfo.remote_addrlen);
3287
0
  if(c->ssl) {
3288
0
    c_hdl->ssl = incoming_ssl_fd(c->ssl, new_fd);
3289
0
    if(!c_hdl->ssl) {
3290
0
      c_hdl->fd = new_fd;
3291
0
      comm_point_close(c_hdl);
3292
0
      return;
3293
0
    }
3294
0
    c_hdl->ssl_shake_state = comm_ssl_shake_read;
3295
#ifdef USE_WINSOCK
3296
    comm_point_tcp_win_bio_cb(c_hdl, c_hdl->ssl);
3297
#endif
3298
0
  }
3299
3300
  /* Paranoia: Check that the state has not changed from above: */
3301
  /* Should not happen: tcp_free state changed within accept_callback. */
3302
0
  log_assert(c_hdl == c->tcp_free);
3303
0
  log_assert(c_hdl->is_in_tcp_free);
3304
  /* grab the tcp handler buffers */
3305
0
  c->cur_tcp_count++;
3306
0
  c->tcp_free = c_hdl->tcp_free;
3307
0
  c_hdl->tcp_free = NULL;
3308
0
  c_hdl->is_in_tcp_free = 0;
3309
0
  if(!c->tcp_free) {
3310
    /* stop accepting incoming queries for now. */
3311
0
    comm_point_stop_listening(c);
3312
0
  }
3313
0
  setup_tcp_handler(c_hdl, new_fd, c->cur_tcp_count, c->max_tcp_count);
3314
0
}
3315
3316
/** Make tcp handler free for next assignment */
3317
static void
3318
reclaim_tcp_handler(struct comm_point* c)
3319
0
{
3320
0
  log_assert(c->type == comm_tcp);
3321
0
  if(c->ssl) {
3322
0
#ifdef HAVE_SSL
3323
0
    SSL_shutdown(c->ssl);
3324
0
    SSL_free(c->ssl);
3325
0
    c->ssl = NULL;
3326
0
#endif
3327
0
  }
3328
0
  comm_point_close(c);
3329
0
  if(c->tcp_parent && !c->is_in_tcp_free) {
3330
    /* Should not happen: bad tcp_free state in reclaim_tcp. */
3331
0
    log_assert(c->tcp_free == NULL);
3332
0
    log_assert(c->tcp_parent->cur_tcp_count > 0);
3333
0
    c->tcp_parent->cur_tcp_count--;
3334
0
    c->tcp_free = c->tcp_parent->tcp_free;
3335
0
    c->tcp_parent->tcp_free = c;
3336
0
    c->is_in_tcp_free = 1;
3337
0
    if(!c->tcp_free) {
3338
      /* re-enable listening on accept socket */
3339
0
      comm_point_start_listening(c->tcp_parent, -1, -1);
3340
0
    }
3341
0
  }
3342
0
  c->tcp_more_read_again = NULL;
3343
0
  c->tcp_more_write_again = NULL;
3344
0
  c->tcp_byte_count = 0;
3345
0
  c->pp2_header_state = pp2_header_none;
3346
0
  sldns_buffer_clear(c->buffer);
3347
0
}
3348
3349
/** do the callback when writing is done */
3350
static void
3351
tcp_callback_writer(struct comm_point* c)
3352
0
{
3353
0
  log_assert(c->type == comm_tcp);
3354
0
  if(!c->tcp_write_and_read) {
3355
0
    sldns_buffer_clear(c->buffer);
3356
0
    c->tcp_byte_count = 0;
3357
0
  }
3358
0
  if(c->tcp_do_toggle_rw)
3359
0
    c->tcp_is_reading = 1;
3360
  /* switch from listening(write) to listening(read) */
3361
0
  if(c->tcp_req_info) {
3362
0
    tcp_req_info_handle_writedone(c->tcp_req_info);
3363
0
  } else {
3364
0
    comm_point_stop_listening(c);
3365
0
    if(c->tcp_write_and_read) {
3366
0
      fptr_ok(fptr_whitelist_comm_point(c->callback));
3367
0
      if( (*c->callback)(c, c->cb_arg, NETEVENT_PKT_WRITTEN,
3368
0
        &c->repinfo) ) {
3369
0
        comm_point_start_listening(c, -1,
3370
0
          adjusted_tcp_timeout(c));
3371
0
      }
3372
0
    } else {
3373
0
      comm_point_start_listening(c, -1,
3374
0
          adjusted_tcp_timeout(c));
3375
0
    }
3376
0
  }
3377
0
}
3378
3379
/** do the callback when reading is done */
3380
static void
3381
tcp_callback_reader(struct comm_point* c)
3382
0
{
3383
0
  log_assert(c->type == comm_tcp || c->type == comm_local);
3384
0
  sldns_buffer_flip(c->buffer);
3385
0
  if(c->tcp_do_toggle_rw)
3386
0
    c->tcp_is_reading = 0;
3387
0
  c->tcp_byte_count = 0;
3388
0
  if(c->tcp_req_info) {
3389
0
    tcp_req_info_handle_readdone(c->tcp_req_info);
3390
0
  } else {
3391
0
    if(c->type == comm_tcp)
3392
0
      comm_point_stop_listening(c);
3393
0
    fptr_ok(fptr_whitelist_comm_point(c->callback));
3394
0
    if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo) ) {
3395
0
      comm_point_start_listening(c, -1,
3396
0
          adjusted_tcp_timeout(c));
3397
0
    }
3398
0
  }
3399
0
}
3400
3401
#ifdef HAVE_SSL
3402
/** true if the ssl handshake error has to be squelched from the logs */
3403
int
3404
squelch_err_ssl_handshake(unsigned long err)
3405
0
{
3406
0
  if(verbosity >= VERB_QUERY)
3407
0
    return 0; /* only squelch on low verbosity */
3408
0
  if(ERR_GET_LIB(err) == ERR_LIB_SSL &&
3409
0
    (ERR_GET_REASON(err) == SSL_R_HTTPS_PROXY_REQUEST ||
3410
0
     ERR_GET_REASON(err) == SSL_R_HTTP_REQUEST ||
3411
0
     ERR_GET_REASON(err) == SSL_R_WRONG_VERSION_NUMBER ||
3412
0
     ERR_GET_REASON(err) == SSL_R_SSLV3_ALERT_BAD_CERTIFICATE
3413
0
#ifdef SSL_F_TLS_POST_PROCESS_CLIENT_HELLO
3414
0
     || ERR_GET_REASON(err) == SSL_R_NO_SHARED_CIPHER
3415
0
#endif
3416
0
#ifdef SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO
3417
0
     || ERR_GET_REASON(err) == SSL_R_UNKNOWN_PROTOCOL
3418
0
     || ERR_GET_REASON(err) == SSL_R_UNSUPPORTED_PROTOCOL
3419
0
#  ifdef SSL_R_VERSION_TOO_LOW
3420
0
     || ERR_GET_REASON(err) == SSL_R_VERSION_TOO_LOW
3421
0
#  endif
3422
0
#endif
3423
0
    ))
3424
0
    return 1;
3425
0
  return 0;
3426
0
}
3427
#endif /* HAVE_SSL */
3428
3429
/** continue ssl handshake */
3430
#ifdef HAVE_SSL
3431
static int
3432
ssl_handshake(struct comm_point* c)
3433
0
{
3434
0
  int r;
3435
0
  if(c->ssl_shake_state == comm_ssl_shake_hs_read) {
3436
    /* read condition satisfied back to writing */
3437
0
    comm_point_listen_for_rw(c, 0, 1);
3438
0
    c->ssl_shake_state = comm_ssl_shake_none;
3439
0
    return 1;
3440
0
  }
3441
0
  if(c->ssl_shake_state == comm_ssl_shake_hs_write) {
3442
    /* write condition satisfied, back to reading */
3443
0
    comm_point_listen_for_rw(c, 1, 0);
3444
0
    c->ssl_shake_state = comm_ssl_shake_none;
3445
0
    return 1;
3446
0
  }
3447
3448
0
  ERR_clear_error();
3449
0
  r = SSL_do_handshake(c->ssl);
3450
0
  if(r != 1) {
3451
0
    int want = SSL_get_error(c->ssl, r);
3452
0
    if(want == SSL_ERROR_WANT_READ) {
3453
0
      if(c->ssl_shake_state == comm_ssl_shake_read)
3454
0
        return 1;
3455
0
      c->ssl_shake_state = comm_ssl_shake_read;
3456
0
      comm_point_listen_for_rw(c, 1, 0);
3457
0
      return 1;
3458
0
    } else if(want == SSL_ERROR_WANT_WRITE) {
3459
0
      if(c->ssl_shake_state == comm_ssl_shake_write)
3460
0
        return 1;
3461
0
      c->ssl_shake_state = comm_ssl_shake_write;
3462
0
      comm_point_listen_for_rw(c, 0, 1);
3463
0
      return 1;
3464
0
    } else if(r == 0) {
3465
0
      return 0; /* closed */
3466
0
    } else if(want == SSL_ERROR_SYSCALL) {
3467
      /* SYSCALL and errno==0 means closed uncleanly */
3468
0
#ifdef EPIPE
3469
0
      if(errno == EPIPE && verbosity < 2)
3470
0
        return 0; /* silence 'broken pipe' */
3471
0
#endif
3472
0
#ifdef ECONNRESET
3473
0
      if(errno == ECONNRESET && verbosity < 2)
3474
0
        return 0; /* silence reset by peer */
3475
0
#endif
3476
0
      if(!tcp_connect_errno_needs_log(
3477
0
        (struct sockaddr*)&c->repinfo.remote_addr,
3478
0
        c->repinfo.remote_addrlen))
3479
0
        return 0; /* silence connect failures that
3480
        show up because after connect this is the
3481
        first system call that accesses the socket */
3482
0
      if(errno != 0)
3483
0
        log_err("SSL_handshake syscall: %s",
3484
0
          strerror(errno));
3485
0
      return 0;
3486
0
    } else {
3487
0
      unsigned long err = ERR_get_error();
3488
0
      if(!squelch_err_ssl_handshake(err)) {
3489
0
        long vr;
3490
0
        log_crypto_err_io_code("ssl handshake failed",
3491
0
          want, err);
3492
0
        if((vr=SSL_get_verify_result(c->ssl)) != 0)
3493
0
          log_err("ssl handshake cert error: %s",
3494
0
            X509_verify_cert_error_string(
3495
0
            vr));
3496
0
        log_addr(VERB_OPS, "ssl handshake failed",
3497
0
          &c->repinfo.remote_addr,
3498
0
          c->repinfo.remote_addrlen);
3499
0
      }
3500
0
      return 0;
3501
0
    }
3502
0
  }
3503
  /* this is where peer verification could take place */
3504
0
  if((SSL_get_verify_mode(c->ssl)&SSL_VERIFY_PEER)) {
3505
    /* verification */
3506
0
    if(SSL_get_verify_result(c->ssl) == X509_V_OK) {
3507
#ifdef HAVE_SSL_GET1_PEER_CERTIFICATE
3508
      X509* x = SSL_get1_peer_certificate(c->ssl);
3509
#else
3510
0
      X509* x = SSL_get_peer_certificate(c->ssl);
3511
0
#endif
3512
0
      if(!x) {
3513
0
        log_addr(VERB_ALGO, "SSL connection failed: "
3514
0
          "no certificate",
3515
0
          &c->repinfo.remote_addr,
3516
0
          c->repinfo.remote_addrlen);
3517
0
        return 0;
3518
0
      }
3519
0
      log_cert(VERB_ALGO, "peer certificate", x);
3520
0
#ifdef HAVE_SSL_GET0_PEERNAME
3521
0
      if(SSL_get0_peername(c->ssl)) {
3522
0
        char buf[255];
3523
0
        snprintf(buf, sizeof(buf), "SSL connection "
3524
0
          "to %s authenticated",
3525
0
          SSL_get0_peername(c->ssl));
3526
0
        log_addr(VERB_ALGO, buf, &c->repinfo.remote_addr,
3527
0
          c->repinfo.remote_addrlen);
3528
0
      } else {
3529
0
#endif
3530
0
        log_addr(VERB_ALGO, "SSL connection "
3531
0
          "authenticated", &c->repinfo.remote_addr,
3532
0
          c->repinfo.remote_addrlen);
3533
0
#ifdef HAVE_SSL_GET0_PEERNAME
3534
0
      }
3535
0
#endif
3536
0
      X509_free(x);
3537
0
    } else {
3538
#ifdef HAVE_SSL_GET1_PEER_CERTIFICATE
3539
      X509* x = SSL_get1_peer_certificate(c->ssl);
3540
#else
3541
0
      X509* x = SSL_get_peer_certificate(c->ssl);
3542
0
#endif
3543
0
      if(x) {
3544
0
        log_cert(VERB_ALGO, "peer certificate", x);
3545
0
        X509_free(x);
3546
0
      }
3547
0
      log_addr(VERB_ALGO, "SSL connection failed: "
3548
0
        "failed to authenticate",
3549
0
        &c->repinfo.remote_addr,
3550
0
        c->repinfo.remote_addrlen);
3551
0
      return 0;
3552
0
    }
3553
0
  } else {
3554
    /* unauthenticated, the verify peer flag was not set
3555
     * in c->ssl when the ssl object was created from ssl_ctx */
3556
0
    log_addr(VERB_ALGO, "SSL connection", &c->repinfo.remote_addr,
3557
0
      c->repinfo.remote_addrlen);
3558
0
  }
3559
3560
0
#ifdef HAVE_SSL_GET0_ALPN_SELECTED
3561
  /* check if http2 use is negotiated */
3562
0
  if(c->type == comm_http && c->h2_session) {
3563
0
    const unsigned char *alpn;
3564
0
    unsigned int alpnlen = 0;
3565
0
    SSL_get0_alpn_selected(c->ssl, &alpn, &alpnlen);
3566
0
    if(alpnlen == 2 && memcmp("h2", alpn, 2) == 0) {
3567
      /* connection upgraded to HTTP2 */
3568
0
      c->tcp_do_toggle_rw = 0;
3569
0
      c->use_h2 = 1;
3570
0
    } else {
3571
0
      verbose(VERB_ALGO, "client doesn't support HTTP/2");
3572
0
      return 0;
3573
0
    }
3574
0
  }
3575
0
#endif
3576
3577
  /* setup listen rw correctly */
3578
0
  if(c->tcp_is_reading) {
3579
0
    if(c->ssl_shake_state != comm_ssl_shake_read)
3580
0
      comm_point_listen_for_rw(c, 1, 0);
3581
0
  } else {
3582
0
    comm_point_listen_for_rw(c, 0, 1);
3583
0
  }
3584
0
  c->ssl_shake_state = comm_ssl_shake_none;
3585
0
  return 1;
3586
0
}
3587
#endif /* HAVE_SSL */
3588
3589
/** ssl read callback on TCP */
3590
static int
3591
ssl_handle_read(struct comm_point* c)
3592
0
{
3593
0
#ifdef HAVE_SSL
3594
0
  int r;
3595
0
  if(c->ssl_shake_state != comm_ssl_shake_none) {
3596
0
    if(!ssl_handshake(c))
3597
0
      return 0;
3598
0
    if(c->ssl_shake_state != comm_ssl_shake_none)
3599
0
      return 1;
3600
0
  }
3601
0
  if(c->pp2_enabled && c->pp2_header_state != pp2_header_done) {
3602
0
    struct pp2_header* header = NULL;
3603
0
    size_t want_read_size = 0;
3604
0
    size_t current_read_size = 0;
3605
0
    if(c->pp2_header_state == pp2_header_none) {
3606
0
      want_read_size = PP2_HEADER_SIZE;
3607
0
      if(sldns_buffer_remaining(c->buffer)<want_read_size) {
3608
0
        log_err_addr("proxy_protocol: not enough "
3609
0
          "buffer size to read PROXYv2 header", "",
3610
0
          &c->repinfo.remote_addr,
3611
0
          c->repinfo.remote_addrlen);
3612
0
        return 0;
3613
0
      }
3614
0
      verbose(VERB_ALGO, "proxy_protocol: reading fixed "
3615
0
        "part of PROXYv2 header (len %lu)",
3616
0
        (unsigned long)want_read_size);
3617
0
      current_read_size = want_read_size;
3618
0
      if(c->tcp_byte_count < current_read_size) {
3619
0
        ERR_clear_error();
3620
0
        if((r=SSL_read(c->ssl, (void*)sldns_buffer_at(
3621
0
          c->buffer, c->tcp_byte_count),
3622
0
          current_read_size -
3623
0
          c->tcp_byte_count)) <= 0) {
3624
0
          int want = SSL_get_error(c->ssl, r);
3625
0
          if(want == SSL_ERROR_ZERO_RETURN) {
3626
0
            if(c->tcp_req_info)
3627
0
              return tcp_req_info_handle_read_close(c->tcp_req_info);
3628
0
            return 0; /* shutdown, closed */
3629
0
          } else if(want == SSL_ERROR_WANT_READ) {
3630
#ifdef USE_WINSOCK
3631
            ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
3632
#endif
3633
0
            return 1; /* read more later */
3634
0
          } else if(want == SSL_ERROR_WANT_WRITE) {
3635
0
            c->ssl_shake_state = comm_ssl_shake_hs_write;
3636
0
            comm_point_listen_for_rw(c, 0, 1);
3637
0
            return 1;
3638
0
          } else if(want == SSL_ERROR_SYSCALL) {
3639
0
#ifdef ECONNRESET
3640
0
            if(errno == ECONNRESET && verbosity < 2)
3641
0
              return 0; /* silence reset by peer */
3642
0
#endif
3643
0
            if(errno != 0)
3644
0
              log_err("SSL_read syscall: %s",
3645
0
                strerror(errno));
3646
0
            return 0;
3647
0
          }
3648
0
          log_crypto_err_io("could not SSL_read",
3649
0
            want);
3650
0
          return 0;
3651
0
        }
3652
0
        c->tcp_byte_count += r;
3653
0
        sldns_buffer_skip(c->buffer, r);
3654
0
        if(c->tcp_byte_count != current_read_size) return 1;
3655
0
        c->pp2_header_state = pp2_header_init;
3656
0
      }
3657
0
    }
3658
0
    if(c->pp2_header_state == pp2_header_init) {
3659
0
      int err;
3660
0
      err = pp2_read_header(
3661
0
        sldns_buffer_begin(c->buffer),
3662
0
        sldns_buffer_limit(c->buffer));
3663
0
      if(err) {
3664
0
        log_err("proxy_protocol: could not parse "
3665
0
          "PROXYv2 header (%s)",
3666
0
          pp_lookup_error(err));
3667
0
        return 0;
3668
0
      }
3669
0
      header = (struct pp2_header*)sldns_buffer_begin(c->buffer);
3670
0
      want_read_size = ntohs(header->len);
3671
0
      if(sldns_buffer_limit(c->buffer) <
3672
0
        PP2_HEADER_SIZE + want_read_size) {
3673
0
        log_err_addr("proxy_protocol: not enough "
3674
0
          "buffer size to read PROXYv2 header", "",
3675
0
          &c->repinfo.remote_addr,
3676
0
          c->repinfo.remote_addrlen);
3677
0
        return 0;
3678
0
      }
3679
0
      verbose(VERB_ALGO, "proxy_protocol: reading variable "
3680
0
        "part of PROXYv2 header (len %lu)",
3681
0
        (unsigned long)want_read_size);
3682
0
      current_read_size = PP2_HEADER_SIZE + want_read_size;
3683
0
      if(want_read_size == 0) {
3684
        /* nothing more to read; header is complete */
3685
0
        c->pp2_header_state = pp2_header_done;
3686
0
      } else if(c->tcp_byte_count < current_read_size) {
3687
0
        ERR_clear_error();
3688
0
        if((r=SSL_read(c->ssl, (void*)sldns_buffer_at(
3689
0
          c->buffer, c->tcp_byte_count),
3690
0
          current_read_size -
3691
0
          c->tcp_byte_count)) <= 0) {
3692
0
          int want = SSL_get_error(c->ssl, r);
3693
0
          if(want == SSL_ERROR_ZERO_RETURN) {
3694
0
            if(c->tcp_req_info)
3695
0
              return tcp_req_info_handle_read_close(c->tcp_req_info);
3696
0
            return 0; /* shutdown, closed */
3697
0
          } else if(want == SSL_ERROR_WANT_READ) {
3698
#ifdef USE_WINSOCK
3699
            ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
3700
#endif
3701
0
            return 1; /* read more later */
3702
0
          } else if(want == SSL_ERROR_WANT_WRITE) {
3703
0
            c->ssl_shake_state = comm_ssl_shake_hs_write;
3704
0
            comm_point_listen_for_rw(c, 0, 1);
3705
0
            return 1;
3706
0
          } else if(want == SSL_ERROR_SYSCALL) {
3707
0
#ifdef ECONNRESET
3708
0
            if(errno == ECONNRESET && verbosity < 2)
3709
0
              return 0; /* silence reset by peer */
3710
0
#endif
3711
0
            if(errno != 0)
3712
0
              log_err("SSL_read syscall: %s",
3713
0
                strerror(errno));
3714
0
            return 0;
3715
0
          }
3716
0
          log_crypto_err_io("could not SSL_read",
3717
0
            want);
3718
0
          return 0;
3719
0
        }
3720
0
        c->tcp_byte_count += r;
3721
0
        sldns_buffer_skip(c->buffer, r);
3722
0
        if(c->tcp_byte_count != current_read_size) return 1;
3723
0
        c->pp2_header_state = pp2_header_done;
3724
0
      }
3725
0
    }
3726
0
    if(c->pp2_header_state != pp2_header_done || !header) {
3727
0
      log_err_addr("proxy_protocol: wrong state for the "
3728
0
        "PROXYv2 header", "", &c->repinfo.remote_addr,
3729
0
        c->repinfo.remote_addrlen);
3730
0
      return 0;
3731
0
    }
3732
0
    sldns_buffer_flip(c->buffer);
3733
0
    if(!consume_pp2_header(c->buffer, &c->repinfo, 1)) {
3734
0
      log_err_addr("proxy_protocol: could not consume "
3735
0
        "PROXYv2 header", "", &c->repinfo.remote_addr,
3736
0
        c->repinfo.remote_addrlen);
3737
0
      return 0;
3738
0
    }
3739
0
    verbose(VERB_ALGO, "proxy_protocol: successful read of "
3740
0
      "PROXYv2 header");
3741
    /* Clear and reset the buffer to read the following
3742
     * DNS packet(s). */
3743
0
    sldns_buffer_clear(c->buffer);
3744
0
    c->tcp_byte_count = 0;
3745
0
    return 1;
3746
0
  }
3747
0
  if(c->tcp_byte_count < sizeof(uint16_t)) {
3748
    /* read length bytes */
3749
0
    ERR_clear_error();
3750
0
    if((r=SSL_read(c->ssl, (void*)sldns_buffer_at(c->buffer,
3751
0
      c->tcp_byte_count), (int)(sizeof(uint16_t) -
3752
0
      c->tcp_byte_count))) <= 0) {
3753
0
      int want = SSL_get_error(c->ssl, r);
3754
0
      if(want == SSL_ERROR_ZERO_RETURN) {
3755
0
        if(c->tcp_req_info)
3756
0
          return tcp_req_info_handle_read_close(c->tcp_req_info);
3757
0
        return 0; /* shutdown, closed */
3758
0
      } else if(want == SSL_ERROR_WANT_READ) {
3759
#ifdef USE_WINSOCK
3760
        ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
3761
#endif
3762
0
        return 1; /* read more later */
3763
0
      } else if(want == SSL_ERROR_WANT_WRITE) {
3764
0
        c->ssl_shake_state = comm_ssl_shake_hs_write;
3765
0
        comm_point_listen_for_rw(c, 0, 1);
3766
0
        return 1;
3767
0
      } else if(want == SSL_ERROR_SYSCALL) {
3768
0
#ifdef ECONNRESET
3769
0
        if(errno == ECONNRESET && verbosity < 2)
3770
0
          return 0; /* silence reset by peer */
3771
0
#endif
3772
0
        if(errno != 0)
3773
0
          log_err("SSL_read syscall: %s",
3774
0
            strerror(errno));
3775
0
        return 0;
3776
0
      }
3777
0
      log_crypto_err_io("could not SSL_read", want);
3778
0
      return 0;
3779
0
    }
3780
0
    c->tcp_byte_count += r;
3781
0
    if(c->tcp_byte_count < sizeof(uint16_t))
3782
0
      return 1;
3783
0
    if(sldns_buffer_read_u16_at(c->buffer, 0) >
3784
0
      sldns_buffer_capacity(c->buffer)) {
3785
0
      verbose(VERB_QUERY, "ssl: dropped larger than buffer");
3786
0
      return 0;
3787
0
    }
3788
0
    sldns_buffer_set_limit(c->buffer,
3789
0
      sldns_buffer_read_u16_at(c->buffer, 0));
3790
0
    if(sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) {
3791
0
      verbose(VERB_QUERY, "ssl: dropped bogus too short.");
3792
0
      return 0;
3793
0
    }
3794
0
    sldns_buffer_skip(c->buffer, (ssize_t)(c->tcp_byte_count-sizeof(uint16_t)));
3795
0
    verbose(VERB_ALGO, "Reading ssl tcp query of length %d",
3796
0
      (int)sldns_buffer_limit(c->buffer));
3797
0
  }
3798
0
  if(sldns_buffer_remaining(c->buffer) > 0) {
3799
0
    ERR_clear_error();
3800
0
    r = SSL_read(c->ssl, (void*)sldns_buffer_current(c->buffer),
3801
0
      (int)sldns_buffer_remaining(c->buffer));
3802
0
    if(r <= 0) {
3803
0
      int want = SSL_get_error(c->ssl, r);
3804
0
      if(want == SSL_ERROR_ZERO_RETURN) {
3805
0
        if(c->tcp_req_info)
3806
0
          return tcp_req_info_handle_read_close(c->tcp_req_info);
3807
0
        return 0; /* shutdown, closed */
3808
0
      } else if(want == SSL_ERROR_WANT_READ) {
3809
#ifdef USE_WINSOCK
3810
        ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
3811
#endif
3812
0
        return 1; /* read more later */
3813
0
      } else if(want == SSL_ERROR_WANT_WRITE) {
3814
0
        c->ssl_shake_state = comm_ssl_shake_hs_write;
3815
0
        comm_point_listen_for_rw(c, 0, 1);
3816
0
        return 1;
3817
0
      } else if(want == SSL_ERROR_SYSCALL) {
3818
0
#ifdef ECONNRESET
3819
0
        if(errno == ECONNRESET && verbosity < 2)
3820
0
          return 0; /* silence reset by peer */
3821
0
#endif
3822
0
        if(errno != 0)
3823
0
          log_err("SSL_read syscall: %s",
3824
0
            strerror(errno));
3825
0
        return 0;
3826
0
      }
3827
0
      log_crypto_err_io("could not SSL_read", want);
3828
0
      return 0;
3829
0
    }
3830
0
    sldns_buffer_skip(c->buffer, (ssize_t)r);
3831
0
  }
3832
0
  if(sldns_buffer_remaining(c->buffer) <= 0) {
3833
0
    tcp_callback_reader(c);
3834
0
  }
3835
0
  return 1;
3836
#else
3837
  (void)c;
3838
  return 0;
3839
#endif /* HAVE_SSL */
3840
0
}
3841
3842
/** ssl write callback on TCP */
3843
static int
3844
ssl_handle_write(struct comm_point* c)
3845
0
{
3846
0
#ifdef HAVE_SSL
3847
0
  int r;
3848
0
  if(c->ssl_shake_state != comm_ssl_shake_none) {
3849
0
    if(!ssl_handshake(c))
3850
0
      return 0;
3851
0
    if(c->ssl_shake_state != comm_ssl_shake_none)
3852
0
      return 1;
3853
0
  }
3854
  /* ignore return, if fails we may simply block */
3855
0
  (void)SSL_set_mode(c->ssl, (long)SSL_MODE_ENABLE_PARTIAL_WRITE);
3856
0
  if((c->tcp_write_and_read?c->tcp_write_byte_count:c->tcp_byte_count) < sizeof(uint16_t)) {
3857
0
    uint16_t len = htons(c->tcp_write_and_read?c->tcp_write_pkt_len:sldns_buffer_limit(c->buffer));
3858
0
    ERR_clear_error();
3859
0
    if(c->tcp_write_and_read) {
3860
0
      if(c->tcp_write_pkt_len + 2 < LDNS_RR_BUF_SIZE) {
3861
        /* combine the tcp length and the query for
3862
         * write, this emulates writev */
3863
0
        uint8_t buf[LDNS_RR_BUF_SIZE];
3864
0
        memmove(buf, &len, sizeof(uint16_t));
3865
0
        memmove(buf+sizeof(uint16_t),
3866
0
          c->tcp_write_pkt,
3867
0
          c->tcp_write_pkt_len);
3868
0
        r = SSL_write(c->ssl,
3869
0
          (void*)(buf+c->tcp_write_byte_count),
3870
0
          c->tcp_write_pkt_len + 2 -
3871
0
          c->tcp_write_byte_count);
3872
0
      } else {
3873
0
        r = SSL_write(c->ssl,
3874
0
          (void*)(((uint8_t*)&len)+c->tcp_write_byte_count),
3875
0
          (int)(sizeof(uint16_t)-c->tcp_write_byte_count));
3876
0
      }
3877
0
    } else if(sizeof(uint16_t)+sldns_buffer_remaining(c->buffer) <
3878
0
      LDNS_RR_BUF_SIZE) {
3879
      /* combine the tcp length and the query for write,
3880
       * this emulates writev */
3881
0
      uint8_t buf[LDNS_RR_BUF_SIZE];
3882
0
      memmove(buf, &len, sizeof(uint16_t));
3883
0
      memmove(buf+sizeof(uint16_t),
3884
0
        sldns_buffer_current(c->buffer),
3885
0
        sldns_buffer_remaining(c->buffer));
3886
0
      r = SSL_write(c->ssl, (void*)(buf+c->tcp_byte_count),
3887
0
        (int)(sizeof(uint16_t)+
3888
0
        sldns_buffer_remaining(c->buffer)
3889
0
        - c->tcp_byte_count));
3890
0
    } else {
3891
0
      r = SSL_write(c->ssl,
3892
0
        (void*)(((uint8_t*)&len)+c->tcp_byte_count),
3893
0
        (int)(sizeof(uint16_t)-c->tcp_byte_count));
3894
0
    }
3895
0
    if(r <= 0) {
3896
0
      int want = SSL_get_error(c->ssl, r);
3897
0
      if(want == SSL_ERROR_ZERO_RETURN) {
3898
0
        return 0; /* closed */
3899
0
      } else if(want == SSL_ERROR_WANT_READ) {
3900
0
        c->ssl_shake_state = comm_ssl_shake_hs_read;
3901
0
        comm_point_listen_for_rw(c, 1, 0);
3902
0
        return 1; /* wait for read condition */
3903
0
      } else if(want == SSL_ERROR_WANT_WRITE) {
3904
#ifdef USE_WINSOCK
3905
        ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
3906
#endif
3907
0
        return 1; /* write more later */
3908
0
      } else if(want == SSL_ERROR_SYSCALL) {
3909
0
#ifdef EPIPE
3910
0
        if(errno == EPIPE && verbosity < 2)
3911
0
          return 0; /* silence 'broken pipe' */
3912
0
#endif
3913
0
        if(errno != 0)
3914
0
          log_err("SSL_write syscall: %s",
3915
0
            strerror(errno));
3916
0
        return 0;
3917
0
      }
3918
0
      log_crypto_err_io("could not SSL_write", want);
3919
0
      return 0;
3920
0
    }
3921
0
    if(c->tcp_write_and_read) {
3922
0
      c->tcp_write_byte_count += r;
3923
0
      if(c->tcp_write_byte_count < sizeof(uint16_t))
3924
0
        return 1;
3925
0
    } else {
3926
0
      c->tcp_byte_count += r;
3927
0
      if(c->tcp_byte_count < sizeof(uint16_t))
3928
0
        return 1;
3929
0
      sldns_buffer_set_position(c->buffer, c->tcp_byte_count -
3930
0
        sizeof(uint16_t));
3931
0
    }
3932
0
    if((!c->tcp_write_and_read && sldns_buffer_remaining(c->buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) {
3933
0
      tcp_callback_writer(c);
3934
0
      return 1;
3935
0
    }
3936
0
  }
3937
0
  log_assert(c->tcp_write_and_read || sldns_buffer_remaining(c->buffer) > 0);
3938
0
  log_assert(!c->tcp_write_and_read || c->tcp_write_byte_count < c->tcp_write_pkt_len + 2);
3939
0
  ERR_clear_error();
3940
0
  if(c->tcp_write_and_read) {
3941
0
    r = SSL_write(c->ssl, (void*)(c->tcp_write_pkt + c->tcp_write_byte_count - 2),
3942
0
      (int)(c->tcp_write_pkt_len + 2 - c->tcp_write_byte_count));
3943
0
  } else {
3944
0
    r = SSL_write(c->ssl, (void*)sldns_buffer_current(c->buffer),
3945
0
      (int)sldns_buffer_remaining(c->buffer));
3946
0
  }
3947
0
  if(r <= 0) {
3948
0
    int want = SSL_get_error(c->ssl, r);
3949
0
    if(want == SSL_ERROR_ZERO_RETURN) {
3950
0
      return 0; /* closed */
3951
0
    } else if(want == SSL_ERROR_WANT_READ) {
3952
0
      c->ssl_shake_state = comm_ssl_shake_hs_read;
3953
0
      comm_point_listen_for_rw(c, 1, 0);
3954
0
      return 1; /* wait for read condition */
3955
0
    } else if(want == SSL_ERROR_WANT_WRITE) {
3956
#ifdef USE_WINSOCK
3957
      ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
3958
#endif
3959
0
      return 1; /* write more later */
3960
0
    } else if(want == SSL_ERROR_SYSCALL) {
3961
0
#ifdef EPIPE
3962
0
      if(errno == EPIPE && verbosity < 2)
3963
0
        return 0; /* silence 'broken pipe' */
3964
0
#endif
3965
0
      if(errno != 0)
3966
0
        log_err("SSL_write syscall: %s",
3967
0
          strerror(errno));
3968
0
      return 0;
3969
0
    }
3970
0
    log_crypto_err_io("could not SSL_write", want);
3971
0
    return 0;
3972
0
  }
3973
0
  if(c->tcp_write_and_read) {
3974
0
    c->tcp_write_byte_count += r;
3975
0
  } else {
3976
0
    sldns_buffer_skip(c->buffer, (ssize_t)r);
3977
0
  }
3978
3979
0
  if((!c->tcp_write_and_read && sldns_buffer_remaining(c->buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) {
3980
0
    tcp_callback_writer(c);
3981
0
  }
3982
0
  return 1;
3983
#else
3984
  (void)c;
3985
  return 0;
3986
#endif /* HAVE_SSL */
3987
0
}
3988
3989
/** handle ssl tcp connection with dns contents */
3990
static int
3991
ssl_handle_it(struct comm_point* c, int is_write)
3992
0
{
3993
  /* handle case where renegotiation wants read during write call
3994
   * or write during read calls */
3995
0
  if(is_write && c->ssl_shake_state == comm_ssl_shake_hs_write)
3996
0
    return ssl_handle_read(c);
3997
0
  else if(!is_write && c->ssl_shake_state == comm_ssl_shake_hs_read)
3998
0
    return ssl_handle_write(c);
3999
  /* handle read events for read operation and write events for a
4000
   * write operation */
4001
0
  else if(!is_write)
4002
0
    return ssl_handle_read(c);
4003
0
  return ssl_handle_write(c);
4004
0
}
4005
4006
/**
4007
 * Handle tcp reading callback.
4008
 * @param fd: file descriptor of socket.
4009
 * @param c: comm point to read from into buffer.
4010
 * @param short_ok: if true, very short packets are OK (for comm_local).
4011
 * @return: 0 on error
4012
 */
4013
static int
4014
comm_point_tcp_handle_read(int fd, struct comm_point* c, int short_ok)
4015
0
{
4016
0
  ssize_t r;
4017
0
  int recv_initial = 0;
4018
0
  log_assert(c->type == comm_tcp || c->type == comm_local);
4019
0
  if(c->ssl)
4020
0
    return ssl_handle_it(c, 0);
4021
0
  if(!c->tcp_is_reading && !c->tcp_write_and_read)
4022
0
    return 0;
4023
4024
0
  log_assert(fd != -1);
4025
0
  if(c->pp2_enabled && c->pp2_header_state != pp2_header_done) {
4026
0
    struct pp2_header* header = NULL;
4027
0
    size_t want_read_size = 0;
4028
0
    size_t current_read_size = 0;
4029
0
    if(c->pp2_header_state == pp2_header_none) {
4030
0
      want_read_size = PP2_HEADER_SIZE;
4031
0
      if(sldns_buffer_remaining(c->buffer)<want_read_size) {
4032
0
        log_err_addr("proxy_protocol: not enough "
4033
0
          "buffer size to read PROXYv2 header", "",
4034
0
          &c->repinfo.remote_addr,
4035
0
          c->repinfo.remote_addrlen);
4036
0
        return 0;
4037
0
      }
4038
0
      verbose(VERB_ALGO, "proxy_protocol: reading fixed "
4039
0
        "part of PROXYv2 header (len %lu)",
4040
0
        (unsigned long)want_read_size);
4041
0
      current_read_size = want_read_size;
4042
0
      if(c->tcp_byte_count < current_read_size) {
4043
0
        r = recv(fd, (void*)sldns_buffer_at(c->buffer,
4044
0
          c->tcp_byte_count),
4045
0
          current_read_size-c->tcp_byte_count, MSG_DONTWAIT);
4046
0
        if(r == 0) {
4047
0
          if(c->tcp_req_info)
4048
0
            return tcp_req_info_handle_read_close(c->tcp_req_info);
4049
0
          return 0;
4050
0
        } else if(r == -1) {
4051
0
          goto recv_error_initial;
4052
0
        }
4053
0
        c->tcp_byte_count += r;
4054
0
        sldns_buffer_skip(c->buffer, r);
4055
0
        if(c->tcp_byte_count != current_read_size) return 1;
4056
0
        c->pp2_header_state = pp2_header_init;
4057
0
      }
4058
0
    }
4059
0
    if(c->pp2_header_state == pp2_header_init) {
4060
0
      int err;
4061
0
      err = pp2_read_header(
4062
0
        sldns_buffer_begin(c->buffer),
4063
0
        sldns_buffer_limit(c->buffer));
4064
0
      if(err) {
4065
0
        log_err("proxy_protocol: could not parse "
4066
0
          "PROXYv2 header (%s)",
4067
0
          pp_lookup_error(err));
4068
0
        return 0;
4069
0
      }
4070
0
      header = (struct pp2_header*)sldns_buffer_begin(c->buffer);
4071
0
      want_read_size = ntohs(header->len);
4072
0
      if(sldns_buffer_limit(c->buffer) <
4073
0
        PP2_HEADER_SIZE + want_read_size) {
4074
0
        log_err_addr("proxy_protocol: not enough "
4075
0
          "buffer size to read PROXYv2 header", "",
4076
0
          &c->repinfo.remote_addr,
4077
0
          c->repinfo.remote_addrlen);
4078
0
        return 0;
4079
0
      }
4080
0
      verbose(VERB_ALGO, "proxy_protocol: reading variable "
4081
0
        "part of PROXYv2 header (len %lu)",
4082
0
        (unsigned long)want_read_size);
4083
0
      current_read_size = PP2_HEADER_SIZE + want_read_size;
4084
0
      if(want_read_size == 0) {
4085
        /* nothing more to read; header is complete */
4086
0
        c->pp2_header_state = pp2_header_done;
4087
0
      } else if(c->tcp_byte_count < current_read_size) {
4088
0
        r = recv(fd, (void*)sldns_buffer_at(c->buffer,
4089
0
          c->tcp_byte_count),
4090
0
          current_read_size-c->tcp_byte_count, MSG_DONTWAIT);
4091
0
        if(r == 0) {
4092
0
          if(c->tcp_req_info)
4093
0
            return tcp_req_info_handle_read_close(c->tcp_req_info);
4094
0
          return 0;
4095
0
        } else if(r == -1) {
4096
0
          goto recv_error;
4097
0
        }
4098
0
        c->tcp_byte_count += r;
4099
0
        sldns_buffer_skip(c->buffer, r);
4100
0
        if(c->tcp_byte_count != current_read_size) return 1;
4101
0
        c->pp2_header_state = pp2_header_done;
4102
0
      }
4103
0
    }
4104
0
    if(c->pp2_header_state != pp2_header_done || !header) {
4105
0
      log_err_addr("proxy_protocol: wrong state for the "
4106
0
        "PROXYv2 header", "", &c->repinfo.remote_addr,
4107
0
        c->repinfo.remote_addrlen);
4108
0
      return 0;
4109
0
    }
4110
0
    sldns_buffer_flip(c->buffer);
4111
0
    if(!consume_pp2_header(c->buffer, &c->repinfo, 1)) {
4112
0
      log_err_addr("proxy_protocol: could not consume "
4113
0
        "PROXYv2 header", "", &c->repinfo.remote_addr,
4114
0
        c->repinfo.remote_addrlen);
4115
0
      return 0;
4116
0
    }
4117
0
    verbose(VERB_ALGO, "proxy_protocol: successful read of "
4118
0
      "PROXYv2 header");
4119
    /* Clear and reset the buffer to read the following
4120
        * DNS packet(s). */
4121
0
    sldns_buffer_clear(c->buffer);
4122
0
    c->tcp_byte_count = 0;
4123
0
    return 1;
4124
0
  }
4125
4126
0
  if(c->tcp_byte_count < sizeof(uint16_t)) {
4127
    /* read length bytes */
4128
0
    r = recv(fd,(void*)sldns_buffer_at(c->buffer,c->tcp_byte_count),
4129
0
      sizeof(uint16_t)-c->tcp_byte_count, MSG_DONTWAIT);
4130
0
    if(r == 0) {
4131
0
      if(c->tcp_req_info)
4132
0
        return tcp_req_info_handle_read_close(c->tcp_req_info);
4133
0
      return 0;
4134
0
    } else if(r == -1) {
4135
0
      if(c->pp2_enabled) goto recv_error;
4136
0
      goto recv_error_initial;
4137
0
    }
4138
0
    c->tcp_byte_count += r;
4139
0
    if(c->tcp_byte_count != sizeof(uint16_t))
4140
0
      return 1;
4141
0
    if(sldns_buffer_read_u16_at(c->buffer, 0) >
4142
0
      sldns_buffer_capacity(c->buffer)) {
4143
0
      verbose(VERB_QUERY, "tcp: dropped larger than buffer");
4144
0
      return 0;
4145
0
    }
4146
0
    sldns_buffer_set_limit(c->buffer,
4147
0
      sldns_buffer_read_u16_at(c->buffer, 0));
4148
0
    if(!short_ok &&
4149
0
      sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) {
4150
0
      verbose(VERB_QUERY, "tcp: dropped bogus too short.");
4151
0
      return 0;
4152
0
    }
4153
0
    verbose(VERB_ALGO, "Reading tcp query of length %d",
4154
0
      (int)sldns_buffer_limit(c->buffer));
4155
0
  }
4156
4157
0
  if(sldns_buffer_remaining(c->buffer) == 0)
4158
0
    log_err("in comm_point_tcp_handle_read buffer_remaining is "
4159
0
      "not > 0 as expected, continuing with (harmless) 0 "
4160
0
      "length recv");
4161
0
  r = recv(fd, (void*)sldns_buffer_current(c->buffer),
4162
0
    sldns_buffer_remaining(c->buffer), MSG_DONTWAIT);
4163
0
  if(r == 0) {
4164
0
    if(c->tcp_req_info)
4165
0
      return tcp_req_info_handle_read_close(c->tcp_req_info);
4166
0
    return 0;
4167
0
  } else if(r == -1) {
4168
0
    goto recv_error;
4169
0
  }
4170
0
  sldns_buffer_skip(c->buffer, r);
4171
0
  if(sldns_buffer_remaining(c->buffer) <= 0) {
4172
0
    tcp_callback_reader(c);
4173
0
  }
4174
0
  return 1;
4175
4176
0
recv_error_initial:
4177
0
  recv_initial = 1;
4178
0
recv_error:
4179
0
#ifndef USE_WINSOCK
4180
0
  if(errno == EINTR || errno == EAGAIN)
4181
0
    return 1;
4182
0
#ifdef ECONNRESET
4183
0
  if(errno == ECONNRESET && verbosity < 2)
4184
0
    return 0; /* silence reset by peer */
4185
0
#endif
4186
0
  if(recv_initial) {
4187
0
#ifdef ECONNREFUSED
4188
0
    if(errno == ECONNREFUSED && verbosity < 2)
4189
0
      return 0; /* silence reset by peer */
4190
0
#endif
4191
0
#ifdef ENETUNREACH
4192
0
    if(errno == ENETUNREACH && verbosity < 2)
4193
0
      return 0; /* silence it */
4194
0
#endif
4195
0
#ifdef EHOSTDOWN
4196
0
    if(errno == EHOSTDOWN && verbosity < 2)
4197
0
      return 0; /* silence it */
4198
0
#endif
4199
0
#ifdef EHOSTUNREACH
4200
0
    if(errno == EHOSTUNREACH && verbosity < 2)
4201
0
      return 0; /* silence it */
4202
0
#endif
4203
0
#ifdef ENETDOWN
4204
0
    if(errno == ENETDOWN && verbosity < 2)
4205
0
      return 0; /* silence it */
4206
0
#endif
4207
0
#ifdef EACCES
4208
0
    if(errno == EACCES && verbosity < 2)
4209
0
      return 0; /* silence it */
4210
0
#endif
4211
0
#ifdef ENOTCONN
4212
0
    if(errno == ENOTCONN) {
4213
0
      log_err_addr("read (in tcp initial) failed and this "
4214
0
        "could be because TCP Fast Open is "
4215
0
        "enabled [--disable-tfo-client "
4216
0
        "--disable-tfo-server] but does not "
4217
0
        "work", sock_strerror(errno),
4218
0
        &c->repinfo.remote_addr,
4219
0
        c->repinfo.remote_addrlen);
4220
0
      return 0;
4221
0
    }
4222
0
#endif
4223
0
  }
4224
#else /* USE_WINSOCK */
4225
  if(recv_initial) {
4226
    if(WSAGetLastError() == WSAECONNREFUSED && verbosity < 2)
4227
      return 0;
4228
    if(WSAGetLastError() == WSAEHOSTDOWN && verbosity < 2)
4229
      return 0;
4230
    if(WSAGetLastError() == WSAEHOSTUNREACH && verbosity < 2)
4231
      return 0;
4232
    if(WSAGetLastError() == WSAENETDOWN && verbosity < 2)
4233
      return 0;
4234
    if(WSAGetLastError() == WSAENETUNREACH && verbosity < 2)
4235
      return 0;
4236
  }
4237
  if(WSAGetLastError() == WSAECONNRESET)
4238
    return 0;
4239
  if(WSAGetLastError() == WSAEINPROGRESS)
4240
    return 1;
4241
  if(WSAGetLastError() == WSAEWOULDBLOCK) {
4242
    ub_winsock_tcp_wouldblock(c->ev->ev,
4243
      UB_EV_READ);
4244
    return 1;
4245
  }
4246
#endif
4247
0
  log_err_addr((recv_initial?"read (in tcp initial)":"read (in tcp)"),
4248
0
    sock_strerror(errno), &c->repinfo.remote_addr,
4249
0
    c->repinfo.remote_addrlen);
4250
0
  return 0;
4251
0
}
4252
4253
/**
4254
 * Handle tcp writing callback.
4255
 * @param fd: file descriptor of socket.
4256
 * @param c: comm point to write buffer out of.
4257
 * @return: 0 on error
4258
 */
4259
static int
4260
comm_point_tcp_handle_write(int fd, struct comm_point* c)
4261
0
{
4262
0
  ssize_t r;
4263
0
  struct sldns_buffer *buffer;
4264
0
  log_assert(c->type == comm_tcp);
4265
#ifdef USE_DNSCRYPT
4266
  buffer = c->dnscrypt_buffer;
4267
#else
4268
0
  buffer = c->buffer;
4269
0
#endif
4270
0
  if(c->tcp_is_reading && !c->ssl && !c->tcp_write_and_read)
4271
0
    return 0;
4272
0
  log_assert(fd != -1);
4273
0
  if(((!c->tcp_write_and_read && c->tcp_byte_count == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == 0)) && c->tcp_check_nb_connect) {
4274
    /* check for pending error from nonblocking connect */
4275
    /* from Stevens, unix network programming, vol1, 3rd ed, p450*/
4276
0
    int error = 0;
4277
0
    socklen_t len = (socklen_t)sizeof(error);
4278
0
    if(getsockopt(fd, SOL_SOCKET, SO_ERROR, (void*)&error,
4279
0
      &len) < 0){
4280
0
#ifndef USE_WINSOCK
4281
0
      error = errno; /* on solaris errno is error */
4282
#else /* USE_WINSOCK */
4283
      error = WSAGetLastError();
4284
#endif
4285
0
    }
4286
0
#ifndef USE_WINSOCK
4287
0
#if defined(EINPROGRESS) && defined(EWOULDBLOCK)
4288
0
    if(error == EINPROGRESS || error == EWOULDBLOCK)
4289
0
      return 1; /* try again later */
4290
0
    else
4291
0
#endif
4292
0
    if(error != 0 && verbosity < 2)
4293
0
      return 0; /* silence lots of chatter in the logs */
4294
0
                else if(error != 0) {
4295
0
      log_err_addr("tcp connect", strerror(error),
4296
0
        &c->repinfo.remote_addr,
4297
0
        c->repinfo.remote_addrlen);
4298
#else /* USE_WINSOCK */
4299
    /* examine error */
4300
    if(error == WSAEINPROGRESS)
4301
      return 1;
4302
    else if(error == WSAEWOULDBLOCK) {
4303
      ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
4304
      return 1;
4305
    } else if(error != 0 && verbosity < 2)
4306
      return 0;
4307
    else if(error != 0) {
4308
      log_err_addr("tcp connect", wsa_strerror(error),
4309
        &c->repinfo.remote_addr,
4310
        c->repinfo.remote_addrlen);
4311
#endif /* USE_WINSOCK */
4312
0
      return 0;
4313
0
    }
4314
0
  }
4315
0
  if(c->ssl)
4316
0
    return ssl_handle_it(c, 1);
4317
4318
#ifdef USE_MSG_FASTOPEN
4319
  /* Only try this on first use of a connection that uses tfo,
4320
     otherwise fall through to normal write */
4321
  /* Also, TFO support on WINDOWS not implemented at the moment */
4322
  if(c->tcp_do_fastopen == 1) {
4323
    /* this form of sendmsg() does both a connect() and send() so need to
4324
       look for various flavours of error*/
4325
    uint16_t len = htons(c->tcp_write_and_read?c->tcp_write_pkt_len:sldns_buffer_limit(buffer));
4326
    struct msghdr msg;
4327
    struct iovec iov[2];
4328
    c->tcp_do_fastopen = 0;
4329
    memset(&msg, 0, sizeof(msg));
4330
    if(c->tcp_write_and_read) {
4331
      iov[0].iov_base = (uint8_t*)&len + c->tcp_write_byte_count;
4332
      iov[0].iov_len = sizeof(uint16_t) - c->tcp_write_byte_count;
4333
      iov[1].iov_base = c->tcp_write_pkt;
4334
      iov[1].iov_len = c->tcp_write_pkt_len;
4335
    } else {
4336
      iov[0].iov_base = (uint8_t*)&len + c->tcp_byte_count;
4337
      iov[0].iov_len = sizeof(uint16_t) - c->tcp_byte_count;
4338
      iov[1].iov_base = sldns_buffer_begin(buffer);
4339
      iov[1].iov_len = sldns_buffer_limit(buffer);
4340
    }
4341
    log_assert(iov[0].iov_len > 0);
4342
    msg.msg_name = &c->repinfo.remote_addr;
4343
    msg.msg_namelen = c->repinfo.remote_addrlen;
4344
    msg.msg_iov = iov;
4345
    msg.msg_iovlen = 2;
4346
    r = sendmsg(fd, &msg, MSG_FASTOPEN);
4347
    if (r == -1) {
4348
#if defined(EINPROGRESS) && defined(EWOULDBLOCK)
4349
      /* Handshake is underway, maybe because no TFO cookie available.
4350
         Come back to write the message*/
4351
      if(errno == EINPROGRESS || errno == EWOULDBLOCK)
4352
        return 1;
4353
#endif
4354
      if(errno == EINTR || errno == EAGAIN)
4355
        return 1;
4356
      /* Not handling EISCONN here as shouldn't ever hit that case.*/
4357
      if(errno != EPIPE
4358
#ifdef EOPNOTSUPP
4359
        /* if /proc/sys/net/ipv4/tcp_fastopen is
4360
         * disabled on Linux, sendmsg may return
4361
         * 'Operation not supported', if so
4362
         * fallthrough to ordinary connect. */
4363
        && errno != EOPNOTSUPP
4364
#endif
4365
        && errno != 0) {
4366
        if(verbosity < 2)
4367
          return 0; /* silence lots of chatter in the logs */
4368
        log_err_addr("tcp sendmsg", strerror(errno),
4369
          &c->repinfo.remote_addr,
4370
          c->repinfo.remote_addrlen);
4371
        return 0;
4372
      }
4373
      verbose(VERB_ALGO, "tcp sendmsg for fastopen failed (with %s), try normal connect", strerror(errno));
4374
      /* fallthrough to nonFASTOPEN
4375
       * (MSG_FASTOPEN on Linux 3 produces EPIPE)
4376
       * we need to perform connect() */
4377
      if(connect(fd, (struct sockaddr *)&c->repinfo.remote_addr,
4378
        c->repinfo.remote_addrlen) == -1) {
4379
#ifdef EINPROGRESS
4380
        if(errno == EINPROGRESS)
4381
          return 1; /* wait until connect done*/
4382
#endif
4383
#ifdef USE_WINSOCK
4384
        if(WSAGetLastError() == WSAEINPROGRESS ||
4385
          WSAGetLastError() == WSAEWOULDBLOCK)
4386
          return 1; /* wait until connect done*/
4387
#endif
4388
        if(tcp_connect_errno_needs_log(
4389
          (struct sockaddr *)&c->repinfo.remote_addr,
4390
          c->repinfo.remote_addrlen)) {
4391
          log_err_addr("outgoing tcp: connect after EPIPE for fastopen",
4392
            strerror(errno),
4393
            &c->repinfo.remote_addr,
4394
            c->repinfo.remote_addrlen);
4395
        }
4396
        return 0;
4397
      }
4398
4399
    } else {
4400
      if(c->tcp_write_and_read) {
4401
        c->tcp_write_byte_count += r;
4402
        if(c->tcp_write_byte_count < sizeof(uint16_t))
4403
          return 1;
4404
      } else {
4405
        c->tcp_byte_count += r;
4406
        if(c->tcp_byte_count < sizeof(uint16_t))
4407
          return 1;
4408
        sldns_buffer_set_position(buffer, c->tcp_byte_count -
4409
          sizeof(uint16_t));
4410
      }
4411
      if((!c->tcp_write_and_read && sldns_buffer_remaining(buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) {
4412
        tcp_callback_writer(c);
4413
        return 1;
4414
      }
4415
    }
4416
  }
4417
#endif /* USE_MSG_FASTOPEN */
4418
4419
0
  if((c->tcp_write_and_read?c->tcp_write_byte_count:c->tcp_byte_count) < sizeof(uint16_t)) {
4420
0
    uint16_t len = htons(c->tcp_write_and_read?c->tcp_write_pkt_len:sldns_buffer_limit(buffer));
4421
0
#ifdef HAVE_WRITEV
4422
0
    struct iovec iov[2];
4423
0
    if(c->tcp_write_and_read) {
4424
0
      iov[0].iov_base = (uint8_t*)&len + c->tcp_write_byte_count;
4425
0
      iov[0].iov_len = sizeof(uint16_t) - c->tcp_write_byte_count;
4426
0
      iov[1].iov_base = c->tcp_write_pkt;
4427
0
      iov[1].iov_len = c->tcp_write_pkt_len;
4428
0
    } else {
4429
0
      iov[0].iov_base = (uint8_t*)&len + c->tcp_byte_count;
4430
0
      iov[0].iov_len = sizeof(uint16_t) - c->tcp_byte_count;
4431
0
      iov[1].iov_base = sldns_buffer_begin(buffer);
4432
0
      iov[1].iov_len = sldns_buffer_limit(buffer);
4433
0
    }
4434
0
    log_assert(iov[0].iov_len > 0);
4435
0
    r = writev(fd, iov, 2);
4436
#else /* HAVE_WRITEV */
4437
    if(c->tcp_write_and_read) {
4438
      r = send(fd, (void*)(((uint8_t*)&len)+c->tcp_write_byte_count),
4439
        sizeof(uint16_t)-c->tcp_write_byte_count, 0);
4440
    } else {
4441
      r = send(fd, (void*)(((uint8_t*)&len)+c->tcp_byte_count),
4442
        sizeof(uint16_t)-c->tcp_byte_count, 0);
4443
    }
4444
#endif /* HAVE_WRITEV */
4445
0
    if(r == -1) {
4446
0
#ifndef USE_WINSOCK
4447
0
#  ifdef EPIPE
4448
0
                  if(errno == EPIPE && verbosity < 2)
4449
0
                          return 0; /* silence 'broken pipe' */
4450
0
  #endif
4451
0
      if(errno == EINTR || errno == EAGAIN)
4452
0
        return 1;
4453
0
#ifdef ECONNRESET
4454
0
      if(errno == ECONNRESET && verbosity < 2)
4455
0
        return 0; /* silence reset by peer */
4456
0
#endif
4457
0
#  ifdef HAVE_WRITEV
4458
0
      log_err_addr("tcp writev", strerror(errno),
4459
0
        &c->repinfo.remote_addr,
4460
0
        c->repinfo.remote_addrlen);
4461
#  else /* HAVE_WRITEV */
4462
      log_err_addr("tcp send s", strerror(errno),
4463
        &c->repinfo.remote_addr,
4464
        c->repinfo.remote_addrlen);
4465
#  endif /* HAVE_WRITEV */
4466
#else
4467
      if(WSAGetLastError() == WSAENOTCONN)
4468
        return 1;
4469
      if(WSAGetLastError() == WSAEINPROGRESS)
4470
        return 1;
4471
      if(WSAGetLastError() == WSAEWOULDBLOCK) {
4472
        ub_winsock_tcp_wouldblock(c->ev->ev,
4473
          UB_EV_WRITE);
4474
        return 1;
4475
      }
4476
      if(WSAGetLastError() == WSAECONNRESET && verbosity < 2)
4477
        return 0; /* silence reset by peer */
4478
      log_err_addr("tcp send s",
4479
        wsa_strerror(WSAGetLastError()),
4480
        &c->repinfo.remote_addr,
4481
        c->repinfo.remote_addrlen);
4482
#endif
4483
0
      return 0;
4484
0
    }
4485
0
    if(c->tcp_write_and_read) {
4486
0
      c->tcp_write_byte_count += r;
4487
0
      if(c->tcp_write_byte_count < sizeof(uint16_t))
4488
0
        return 1;
4489
0
    } else {
4490
0
      c->tcp_byte_count += r;
4491
0
      if(c->tcp_byte_count < sizeof(uint16_t))
4492
0
        return 1;
4493
0
      sldns_buffer_set_position(buffer, c->tcp_byte_count -
4494
0
        sizeof(uint16_t));
4495
0
    }
4496
0
    if((!c->tcp_write_and_read && sldns_buffer_remaining(buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) {
4497
0
      tcp_callback_writer(c);
4498
0
      return 1;
4499
0
    }
4500
0
  }
4501
0
  log_assert(c->tcp_write_and_read || sldns_buffer_remaining(buffer) > 0);
4502
0
  log_assert(!c->tcp_write_and_read || c->tcp_write_byte_count < c->tcp_write_pkt_len + 2);
4503
0
  if(c->tcp_write_and_read) {
4504
0
    r = send(fd, (void*)(c->tcp_write_pkt + c->tcp_write_byte_count - 2),
4505
0
      c->tcp_write_pkt_len + 2 - c->tcp_write_byte_count, 0);
4506
0
  } else {
4507
0
    r = send(fd, (void*)sldns_buffer_current(buffer),
4508
0
      sldns_buffer_remaining(buffer), 0);
4509
0
  }
4510
0
  if(r == -1) {
4511
0
#ifndef USE_WINSOCK
4512
0
    if(errno == EINTR || errno == EAGAIN)
4513
0
      return 1;
4514
0
#ifdef ECONNRESET
4515
0
    if(errno == ECONNRESET && verbosity < 2)
4516
0
      return 0; /* silence reset by peer */
4517
0
#endif
4518
#else
4519
    if(WSAGetLastError() == WSAEINPROGRESS)
4520
      return 1;
4521
    if(WSAGetLastError() == WSAEWOULDBLOCK) {
4522
      ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
4523
      return 1;
4524
    }
4525
    if(WSAGetLastError() == WSAECONNRESET && verbosity < 2)
4526
      return 0; /* silence reset by peer */
4527
#endif
4528
0
    log_err_addr("tcp send r", sock_strerror(errno),
4529
0
      &c->repinfo.remote_addr,
4530
0
      c->repinfo.remote_addrlen);
4531
0
    return 0;
4532
0
  }
4533
0
  if(c->tcp_write_and_read) {
4534
0
    c->tcp_write_byte_count += r;
4535
0
  } else {
4536
0
    sldns_buffer_skip(buffer, r);
4537
0
  }
4538
4539
0
  if((!c->tcp_write_and_read && sldns_buffer_remaining(buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) {
4540
0
    tcp_callback_writer(c);
4541
0
  }
4542
4543
0
  return 1;
4544
0
}
4545
4546
/** read again to drain buffers when there could be more to read, returns 0
4547
 * on failure which means the comm point is closed. */
4548
static int
4549
tcp_req_info_read_again(int fd, struct comm_point* c)
4550
0
{
4551
0
  while(c->tcp_req_info->read_again) {
4552
0
    int r;
4553
0
    c->tcp_req_info->read_again = 0;
4554
0
    if(c->tcp_is_reading)
4555
0
      r = comm_point_tcp_handle_read(fd, c, 0);
4556
0
    else  r = comm_point_tcp_handle_write(fd, c);
4557
0
    if(!r) {
4558
0
      reclaim_tcp_handler(c);
4559
0
      if(!c->tcp_do_close) {
4560
0
        fptr_ok(fptr_whitelist_comm_point(
4561
0
          c->callback));
4562
0
        (void)(*c->callback)(c, c->cb_arg,
4563
0
          NETEVENT_CLOSED, NULL);
4564
0
      }
4565
0
      return 0;
4566
0
    }
4567
0
  }
4568
0
  return 1;
4569
0
}
4570
4571
/** read again to drain buffers when there could be more to read */
4572
static void
4573
tcp_more_read_again(int fd, struct comm_point* c)
4574
0
{
4575
  /* if the packet is done, but another one could be waiting on
4576
   * the connection, the callback signals this, and we try again */
4577
  /* this continues until the read routines get EAGAIN or so,
4578
   * and thus does not call the callback, and the bool is 0 */
4579
0
  int* moreread = c->tcp_more_read_again;
4580
0
  while(moreread && *moreread) {
4581
0
    *moreread = 0;
4582
0
    if(!comm_point_tcp_handle_read(fd, c, 0)) {
4583
0
      reclaim_tcp_handler(c);
4584
0
      if(!c->tcp_do_close) {
4585
0
        fptr_ok(fptr_whitelist_comm_point(
4586
0
          c->callback));
4587
0
        (void)(*c->callback)(c, c->cb_arg,
4588
0
          NETEVENT_CLOSED, NULL);
4589
0
      }
4590
0
      return;
4591
0
    }
4592
0
  }
4593
0
}
4594
4595
/** write again to fill up when there could be more to write */
4596
static void
4597
tcp_more_write_again(int fd, struct comm_point* c)
4598
0
{
4599
  /* if the packet is done, but another is waiting to be written,
4600
   * the callback signals it and we try again. */
4601
  /* this continues until the write routines get EAGAIN or so,
4602
   * and thus does not call the callback, and the bool is 0 */
4603
0
  int* morewrite = c->tcp_more_write_again;
4604
0
  while(morewrite && *morewrite) {
4605
0
    *morewrite = 0;
4606
0
    if(!comm_point_tcp_handle_write(fd, c)) {
4607
0
      reclaim_tcp_handler(c);
4608
0
      if(!c->tcp_do_close) {
4609
0
        fptr_ok(fptr_whitelist_comm_point(
4610
0
          c->callback));
4611
0
        (void)(*c->callback)(c, c->cb_arg,
4612
0
          NETEVENT_CLOSED, NULL);
4613
0
      }
4614
0
      return;
4615
0
    }
4616
0
  }
4617
0
}
4618
4619
void
4620
comm_point_tcp_handle_callback(int fd, short event, void* arg)
4621
0
{
4622
0
  struct comm_point* c = (struct comm_point*)arg;
4623
0
  log_assert(c->type == comm_tcp);
4624
0
  ub_comm_base_now(c->ev->base);
4625
4626
0
  if(c->fd == -1 || c->fd != fd)
4627
0
    return; /* duplicate event, but commpoint closed. */
4628
4629
#ifdef USE_DNSCRYPT
4630
  /* Initialize if this is a dnscrypt socket */
4631
  if(c->tcp_parent) {
4632
    c->dnscrypt = c->tcp_parent->dnscrypt;
4633
  }
4634
  if(c->dnscrypt && c->dnscrypt_buffer == c->buffer) {
4635
    c->dnscrypt_buffer = sldns_buffer_new(sldns_buffer_capacity(c->buffer));
4636
    if(!c->dnscrypt_buffer) {
4637
      log_err("Could not allocate dnscrypt buffer");
4638
      reclaim_tcp_handler(c);
4639
      if(!c->tcp_do_close) {
4640
        fptr_ok(fptr_whitelist_comm_point(
4641
          c->callback));
4642
        (void)(*c->callback)(c, c->cb_arg,
4643
          NETEVENT_CLOSED, NULL);
4644
      }
4645
      return;
4646
    }
4647
  }
4648
#endif
4649
4650
0
  if((event&UB_EV_TIMEOUT)) {
4651
0
    verbose(VERB_QUERY, "tcp took too long, dropped");
4652
0
    reclaim_tcp_handler(c);
4653
0
    if(!c->tcp_do_close) {
4654
0
      fptr_ok(fptr_whitelist_comm_point(c->callback));
4655
0
      (void)(*c->callback)(c, c->cb_arg,
4656
0
        NETEVENT_TIMEOUT, NULL);
4657
0
    }
4658
0
    return;
4659
0
  }
4660
0
  if((event&UB_EV_READ)
4661
#ifdef USE_MSG_FASTOPEN
4662
    && !(c->tcp_do_fastopen && (event&UB_EV_WRITE))
4663
#endif
4664
0
    ) {
4665
0
    int has_tcpq = (c->tcp_req_info != NULL);
4666
0
    int* moreread = c->tcp_more_read_again;
4667
0
    if(!comm_point_tcp_handle_read(fd, c, 0)) {
4668
0
      reclaim_tcp_handler(c);
4669
0
      if(!c->tcp_do_close) {
4670
0
        fptr_ok(fptr_whitelist_comm_point(
4671
0
          c->callback));
4672
0
        (void)(*c->callback)(c, c->cb_arg,
4673
0
          NETEVENT_CLOSED, NULL);
4674
0
      }
4675
0
      return;
4676
0
    }
4677
0
    if(has_tcpq && c->tcp_req_info && c->tcp_req_info->read_again) {
4678
0
      if(!tcp_req_info_read_again(fd, c))
4679
0
        return;
4680
0
    }
4681
0
    if(moreread && *moreread)
4682
0
      tcp_more_read_again(fd, c);
4683
0
    return;
4684
0
  }
4685
0
  if((event&UB_EV_WRITE)) {
4686
0
    int has_tcpq = (c->tcp_req_info != NULL);
4687
0
    int* morewrite = c->tcp_more_write_again;
4688
0
    if(!comm_point_tcp_handle_write(fd, c)) {
4689
0
      reclaim_tcp_handler(c);
4690
0
      if(!c->tcp_do_close) {
4691
0
        fptr_ok(fptr_whitelist_comm_point(
4692
0
          c->callback));
4693
0
        (void)(*c->callback)(c, c->cb_arg,
4694
0
          NETEVENT_CLOSED, NULL);
4695
0
      }
4696
0
      return;
4697
0
    }
4698
0
    if(has_tcpq && c->tcp_req_info && c->tcp_req_info->read_again) {
4699
0
      if(!tcp_req_info_read_again(fd, c))
4700
0
        return;
4701
0
    }
4702
0
    if(morewrite && *morewrite)
4703
0
      tcp_more_write_again(fd, c);
4704
0
    return;
4705
0
  }
4706
0
  log_err("Ignored event %d for tcphdl.", event);
4707
0
}
4708
4709
/** Make http handler free for next assignment */
4710
static void
4711
reclaim_http_handler(struct comm_point* c)
4712
0
{
4713
0
  log_assert(c->type == comm_http);
4714
0
  if(c->ssl) {
4715
0
#ifdef HAVE_SSL
4716
0
    SSL_shutdown(c->ssl);
4717
0
    SSL_free(c->ssl);
4718
0
    c->ssl = NULL;
4719
0
#endif
4720
0
  }
4721
0
  comm_point_close(c);
4722
0
  if(c->tcp_parent && !c->is_in_tcp_free) {
4723
    /* Should not happen: bad tcp_free state in reclaim_http. */
4724
0
    log_assert(c->tcp_free == NULL);
4725
0
    log_assert(c->tcp_parent->cur_tcp_count > 0);
4726
0
    c->tcp_parent->cur_tcp_count--;
4727
0
    c->tcp_free = c->tcp_parent->tcp_free;
4728
0
    c->tcp_parent->tcp_free = c;
4729
0
    c->is_in_tcp_free = 1;
4730
0
    if(!c->tcp_free) {
4731
      /* re-enable listening on accept socket */
4732
0
      comm_point_start_listening(c->tcp_parent, -1, -1);
4733
0
    }
4734
0
  }
4735
0
}
4736
4737
/** read more data for http (with ssl) */
4738
static int
4739
ssl_http_read_more(struct comm_point* c)
4740
0
{
4741
0
#ifdef HAVE_SSL
4742
0
  int r;
4743
0
  log_assert(sldns_buffer_remaining(c->buffer) > 0);
4744
0
  ERR_clear_error();
4745
0
  r = SSL_read(c->ssl, (void*)sldns_buffer_current(c->buffer),
4746
0
    (int)sldns_buffer_remaining(c->buffer));
4747
0
  if(r <= 0) {
4748
0
    int want = SSL_get_error(c->ssl, r);
4749
0
    if(want == SSL_ERROR_ZERO_RETURN) {
4750
0
      return 0; /* shutdown, closed */
4751
0
    } else if(want == SSL_ERROR_WANT_READ) {
4752
0
      return 1; /* read more later */
4753
0
    } else if(want == SSL_ERROR_WANT_WRITE) {
4754
0
      c->ssl_shake_state = comm_ssl_shake_hs_write;
4755
0
      comm_point_listen_for_rw(c, 0, 1);
4756
0
      return 1;
4757
0
    } else if(want == SSL_ERROR_SYSCALL) {
4758
0
#ifdef ECONNRESET
4759
0
      if(errno == ECONNRESET && verbosity < 2)
4760
0
        return 0; /* silence reset by peer */
4761
0
#endif
4762
0
      if(errno != 0)
4763
0
        log_err("SSL_read syscall: %s",
4764
0
          strerror(errno));
4765
0
      return 0;
4766
0
    }
4767
0
    log_crypto_err_io("could not SSL_read", want);
4768
0
    return 0;
4769
0
  }
4770
0
  verbose(VERB_ALGO, "ssl http read more skip to %d + %d",
4771
0
    (int)sldns_buffer_position(c->buffer), (int)r);
4772
0
  sldns_buffer_skip(c->buffer, (ssize_t)r);
4773
0
  return 1;
4774
#else
4775
  (void)c;
4776
  return 0;
4777
#endif /* HAVE_SSL */
4778
0
}
4779
4780
/** read more data for http */
4781
static int
4782
http_read_more(int fd, struct comm_point* c)
4783
0
{
4784
0
  ssize_t r;
4785
0
  log_assert(sldns_buffer_remaining(c->buffer) > 0);
4786
0
  r = recv(fd, (void*)sldns_buffer_current(c->buffer),
4787
0
    sldns_buffer_remaining(c->buffer), MSG_DONTWAIT);
4788
0
  if(r == 0) {
4789
0
    return 0;
4790
0
  } else if(r == -1) {
4791
0
#ifndef USE_WINSOCK
4792
0
    if(errno == EINTR || errno == EAGAIN)
4793
0
      return 1;
4794
#else /* USE_WINSOCK */
4795
    if(WSAGetLastError() == WSAECONNRESET)
4796
      return 0;
4797
    if(WSAGetLastError() == WSAEINPROGRESS)
4798
      return 1;
4799
    if(WSAGetLastError() == WSAEWOULDBLOCK) {
4800
      ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
4801
      return 1;
4802
    }
4803
#endif
4804
0
    log_err_addr("read (in http r)", sock_strerror(errno),
4805
0
      &c->repinfo.remote_addr, c->repinfo.remote_addrlen);
4806
0
    return 0;
4807
0
  }
4808
0
  verbose(VERB_ALGO, "http read more skip to %d + %d",
4809
0
    (int)sldns_buffer_position(c->buffer), (int)r);
4810
0
  sldns_buffer_skip(c->buffer, r);
4811
0
  return 1;
4812
0
}
4813
4814
/** return true if http header has been read (one line complete) */
4815
static int
4816
http_header_done(sldns_buffer* buf)
4817
0
{
4818
0
  size_t i;
4819
0
  for(i=sldns_buffer_position(buf); i<sldns_buffer_limit(buf); i++) {
4820
    /* there was a \r before the \n, but we ignore that */
4821
0
    if((char)sldns_buffer_read_u8_at(buf, i) == '\n')
4822
0
      return 1;
4823
0
  }
4824
0
  return 0;
4825
0
}
4826
4827
/** return character string into buffer for header line, moves buffer
4828
 * past that line and puts zero terminator into linefeed-newline */
4829
static char*
4830
http_header_line(sldns_buffer* buf)
4831
0
{
4832
0
  char* result = (char*)sldns_buffer_current(buf);
4833
0
  size_t i;
4834
0
  for(i=sldns_buffer_position(buf); i<sldns_buffer_limit(buf); i++) {
4835
    /* terminate the string on the \r */
4836
0
    if((char)sldns_buffer_read_u8_at(buf, i) == '\r')
4837
0
      sldns_buffer_write_u8_at(buf, i, 0);
4838
    /* terminate on the \n and skip past the it and done */
4839
0
    if((char)sldns_buffer_read_u8_at(buf, i) == '\n') {
4840
0
      sldns_buffer_write_u8_at(buf, i, 0);
4841
0
      sldns_buffer_set_position(buf, i+1);
4842
0
      return result;
4843
0
    }
4844
0
  }
4845
0
  return NULL;
4846
0
}
4847
4848
/** move unread buffer to start and clear rest for putting the rest into it */
4849
static void
4850
http_moveover_buffer(sldns_buffer* buf)
4851
0
{
4852
0
  size_t pos = sldns_buffer_position(buf);
4853
0
  size_t len = sldns_buffer_remaining(buf);
4854
0
  sldns_buffer_clear(buf);
4855
0
  memmove(sldns_buffer_begin(buf), sldns_buffer_at(buf, pos), len);
4856
0
  sldns_buffer_set_position(buf, len);
4857
0
}
4858
4859
/** a http header is complete, process it */
4860
static int
4861
http_process_initial_header(struct comm_point* c)
4862
0
{
4863
0
  char* line = http_header_line(c->buffer);
4864
0
  if(!line) return 1;
4865
0
  verbose(VERB_ALGO, "http header: %s", line);
4866
0
  if(strncasecmp(line, "HTTP/1.1 ", 9) == 0) {
4867
    /* check returncode */
4868
0
    if(line[9] != '2') {
4869
0
      verbose(VERB_ALGO, "http bad status %s", line+9);
4870
0
      return 0;
4871
0
    }
4872
0
  } else if(strncasecmp(line, "Content-Length: ", 16) == 0) {
4873
0
    if(!c->http_is_chunked)
4874
0
      c->tcp_byte_count = (size_t)atoi(line+16);
4875
0
  } else if(strncasecmp(line, "Transfer-Encoding: chunked", 19+7) == 0) {
4876
0
    c->tcp_byte_count = 0;
4877
0
    c->http_is_chunked = 1;
4878
0
  } else if(line[0] == 0) {
4879
    /* end of initial headers */
4880
0
    c->http_in_headers = 0;
4881
0
    if(c->http_is_chunked)
4882
0
      c->http_in_chunk_headers = 1;
4883
    /* remove header text from front of buffer
4884
     * the buffer is going to be used to return the data segment
4885
     * itself and we don't want the header to get returned
4886
     * prepended with it */
4887
0
    http_moveover_buffer(c->buffer);
4888
0
    sldns_buffer_flip(c->buffer);
4889
0
    return 1;
4890
0
  }
4891
  /* ignore other headers */
4892
0
  return 1;
4893
0
}
4894
4895
/** a chunk header is complete, process it, return 0=fail, 1=continue next
4896
 * header line, 2=done with chunked transfer*/
4897
static int
4898
http_process_chunk_header(struct comm_point* c)
4899
0
{
4900
0
  char* line = http_header_line(c->buffer);
4901
0
  if(!line) return 1;
4902
0
  if(c->http_in_chunk_headers == 3) {
4903
0
    verbose(VERB_ALGO, "http chunk trailer: %s", line);
4904
    /* are we done ? */
4905
0
    if(line[0] == 0 && c->tcp_byte_count == 0) {
4906
      /* callback of http reader when NETEVENT_DONE,
4907
       * end of data, with no data in buffer */
4908
0
      sldns_buffer_set_position(c->buffer, 0);
4909
0
      sldns_buffer_set_limit(c->buffer, 0);
4910
0
      fptr_ok(fptr_whitelist_comm_point(c->callback));
4911
0
      (void)(*c->callback)(c, c->cb_arg, NETEVENT_DONE, NULL);
4912
      /* return that we are done */
4913
0
      return 2;
4914
0
    }
4915
0
    if(line[0] == 0) {
4916
      /* continue with header of the next chunk */
4917
0
      c->http_in_chunk_headers = 1;
4918
      /* remove header text from front of buffer */
4919
0
      http_moveover_buffer(c->buffer);
4920
0
      sldns_buffer_flip(c->buffer);
4921
0
      return 1;
4922
0
    }
4923
    /* ignore further trail headers */
4924
0
    return 1;
4925
0
  }
4926
0
  verbose(VERB_ALGO, "http chunk header: %s", line);
4927
0
  if(c->http_in_chunk_headers == 1) {
4928
    /* read chunked start line */
4929
0
    char* end = NULL;
4930
0
    c->tcp_byte_count = (size_t)strtol(line, &end, 16);
4931
0
    if(end == line)
4932
0
      return 0;
4933
0
    c->http_in_chunk_headers = 0;
4934
    /* remove header text from front of buffer */
4935
0
    http_moveover_buffer(c->buffer);
4936
0
    sldns_buffer_flip(c->buffer);
4937
0
    if(c->tcp_byte_count == 0) {
4938
      /* done with chunks, process chunk_trailer lines */
4939
0
      c->http_in_chunk_headers = 3;
4940
0
    }
4941
0
    return 1;
4942
0
  }
4943
  /* ignore other headers */
4944
0
  return 1;
4945
0
}
4946
4947
/** handle nonchunked data segment, 0=fail, 1=wait */
4948
static int
4949
http_nonchunk_segment(struct comm_point* c)
4950
0
{
4951
  /* c->buffer at position..limit has new data we read in.
4952
   * the buffer itself is full of nonchunked data.
4953
   * we are looking to read tcp_byte_count more data
4954
   * and then the transfer is done. */
4955
0
  size_t remainbufferlen;
4956
0
  size_t got_now = sldns_buffer_limit(c->buffer);
4957
0
  if(c->tcp_byte_count <= got_now) {
4958
    /* done, this is the last data fragment */
4959
0
    c->http_stored = 0;
4960
0
    sldns_buffer_set_position(c->buffer, 0);
4961
0
    fptr_ok(fptr_whitelist_comm_point(c->callback));
4962
0
    (void)(*c->callback)(c, c->cb_arg, NETEVENT_DONE, NULL);
4963
0
    return 1;
4964
0
  }
4965
  /* if we have the buffer space,
4966
   * read more data collected into the buffer */
4967
0
  remainbufferlen = sldns_buffer_capacity(c->buffer) -
4968
0
    sldns_buffer_limit(c->buffer);
4969
0
  if(remainbufferlen+got_now >= c->tcp_byte_count ||
4970
0
    remainbufferlen >= (size_t)(c->ssl?16384:2048)) {
4971
0
    size_t total = sldns_buffer_limit(c->buffer);
4972
0
    sldns_buffer_clear(c->buffer);
4973
0
    sldns_buffer_set_position(c->buffer, total);
4974
0
    c->http_stored = total;
4975
    /* return and wait to read more */
4976
0
    return 1;
4977
0
  }
4978
  /* call callback with this data amount, then
4979
   * wait for more */
4980
0
  c->tcp_byte_count -= got_now;
4981
0
  c->http_stored = 0;
4982
0
  sldns_buffer_set_position(c->buffer, 0);
4983
0
  fptr_ok(fptr_whitelist_comm_point(c->callback));
4984
0
  (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, NULL);
4985
  /* c->callback has to buffer_clear(c->buffer). */
4986
  /* return and wait to read more */
4987
0
  return 1;
4988
0
}
4989
4990
/** handle chunked data segment, return 0=fail, 1=wait, 2=process more */
4991
static int
4992
http_chunked_segment(struct comm_point* c)
4993
0
{
4994
  /* the c->buffer has from position..limit new data we read. */
4995
  /* the current chunk has length tcp_byte_count.
4996
   * once we read that read more chunk headers.
4997
   */
4998
0
  size_t remainbufferlen;
4999
0
  size_t got_now = sldns_buffer_limit(c->buffer) - c->http_stored;
5000
0
  verbose(VERB_ALGO, "http_chunked_segment: got now %d, tcpbytcount %d, http_stored %d, buffer pos %d, buffer limit %d", (int)got_now, (int)c->tcp_byte_count, (int)c->http_stored, (int)sldns_buffer_position(c->buffer), (int)sldns_buffer_limit(c->buffer));
5001
0
  if(c->tcp_byte_count <= got_now) {
5002
    /* the chunk has completed (with perhaps some extra data
5003
     * from next chunk header and next chunk) */
5004
    /* save too much info into temp buffer */
5005
0
    size_t fraglen;
5006
0
    struct comm_reply repinfo;
5007
0
    c->http_stored = 0;
5008
0
    sldns_buffer_skip(c->buffer, (ssize_t)c->tcp_byte_count);
5009
0
    sldns_buffer_clear(c->http_temp);
5010
0
    sldns_buffer_write(c->http_temp,
5011
0
      sldns_buffer_current(c->buffer),
5012
0
      sldns_buffer_remaining(c->buffer));
5013
0
    sldns_buffer_flip(c->http_temp);
5014
5015
    /* callback with this fragment */
5016
0
    fraglen = sldns_buffer_position(c->buffer);
5017
0
    sldns_buffer_set_position(c->buffer, 0);
5018
0
    sldns_buffer_set_limit(c->buffer, fraglen);
5019
0
    repinfo = c->repinfo;
5020
0
    fptr_ok(fptr_whitelist_comm_point(c->callback));
5021
0
    (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &repinfo);
5022
    /* c->callback has to buffer_clear(). */
5023
5024
    /* is commpoint deleted? */
5025
0
    if(!repinfo.c) {
5026
0
      return 1;
5027
0
    }
5028
    /* copy waiting info */
5029
0
    sldns_buffer_clear(c->buffer);
5030
0
    sldns_buffer_write(c->buffer,
5031
0
      sldns_buffer_begin(c->http_temp),
5032
0
      sldns_buffer_remaining(c->http_temp));
5033
0
    sldns_buffer_flip(c->buffer);
5034
    /* process end of chunk trailer header lines, until
5035
     * an empty line */
5036
0
    c->http_in_chunk_headers = 3;
5037
    /* process more data in buffer (if any) */
5038
0
    return 2;
5039
0
  }
5040
0
  c->tcp_byte_count -= got_now;
5041
5042
  /* if we have the buffer space,
5043
   * read more data collected into the buffer */
5044
0
  remainbufferlen = sldns_buffer_capacity(c->buffer) -
5045
0
    sldns_buffer_limit(c->buffer);
5046
0
  if(remainbufferlen >= c->tcp_byte_count ||
5047
0
    remainbufferlen >= 2048) {
5048
0
    size_t total = sldns_buffer_limit(c->buffer);
5049
0
    sldns_buffer_clear(c->buffer);
5050
0
    sldns_buffer_set_position(c->buffer, total);
5051
0
    c->http_stored = total;
5052
    /* return and wait to read more */
5053
0
    return 1;
5054
0
  }
5055
5056
  /* callback of http reader for a new part of the data */
5057
0
  c->http_stored = 0;
5058
0
  sldns_buffer_set_position(c->buffer, 0);
5059
0
  fptr_ok(fptr_whitelist_comm_point(c->callback));
5060
0
  (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, NULL);
5061
  /* c->callback has to buffer_clear(c->buffer). */
5062
  /* return and wait to read more */
5063
0
  return 1;
5064
0
}
5065
5066
#ifdef HAVE_NGHTTP2
5067
/** Create new http2 session. Called when creating handling comm point. */
5068
static struct http2_session* http2_session_create(struct comm_point* c)
5069
{
5070
  struct http2_session* session = calloc(1, sizeof(*session));
5071
  if(!session) {
5072
    log_err("malloc failure while creating http2 session");
5073
    return NULL;
5074
  }
5075
  session->c = c;
5076
5077
  return session;
5078
}
5079
#endif
5080
5081
/** Delete http2 session. After closing connection or on error */
5082
static void http2_session_delete(struct http2_session* h2_session)
5083
0
{
5084
#ifdef HAVE_NGHTTP2
5085
  if(h2_session->callbacks)
5086
    nghttp2_session_callbacks_del(h2_session->callbacks);
5087
  free(h2_session);
5088
#else
5089
0
  (void)h2_session;
5090
0
#endif
5091
0
}
5092
5093
#ifdef HAVE_NGHTTP2
5094
struct http2_stream* http2_stream_create(int32_t stream_id)
5095
{
5096
  struct http2_stream* h2_stream = calloc(1, sizeof(*h2_stream));
5097
  if(!h2_stream) {
5098
    log_err("malloc failure while creating http2 stream");
5099
    return NULL;
5100
  }
5101
  h2_stream->stream_id = stream_id;
5102
  return h2_stream;
5103
}
5104
#endif
5105
5106
void http2_stream_add_meshstate(struct http2_stream* h2_stream,
5107
  struct mesh_area* mesh, struct mesh_state* m)
5108
0
{
5109
0
  h2_stream->mesh = mesh;
5110
0
  h2_stream->mesh_state = m;
5111
0
}
5112
5113
void http2_stream_remove_mesh_state(struct http2_stream* h2_stream)
5114
0
{
5115
0
  if(!h2_stream)
5116
0
    return;
5117
0
  h2_stream->mesh_state = NULL;
5118
0
}
5119
5120
#ifdef HAVE_NGHTTP2
5121
void http2_session_add_stream(struct http2_session* h2_session,
5122
  struct http2_stream* h2_stream)
5123
{
5124
  if(h2_session->first_stream)
5125
    h2_session->first_stream->prev = h2_stream;
5126
  h2_stream->next = h2_session->first_stream;
5127
  h2_session->first_stream = h2_stream;
5128
}
5129
5130
/** remove stream from session linked list. After stream close callback or
5131
 * closing connection */
5132
static void http2_session_remove_stream(struct http2_session* h2_session,
5133
  struct http2_stream* h2_stream)
5134
{
5135
  if(h2_stream->prev)
5136
    h2_stream->prev->next = h2_stream->next;
5137
  else
5138
    h2_session->first_stream = h2_stream->next;
5139
  if(h2_stream->next)
5140
    h2_stream->next->prev = h2_stream->prev;
5141
5142
}
5143
5144
int http2_stream_close_cb(nghttp2_session* ATTR_UNUSED(session),
5145
  int32_t stream_id, uint32_t ATTR_UNUSED(error_code), void* cb_arg)
5146
{
5147
  struct http2_stream* h2_stream;
5148
  struct http2_session* h2_session = (struct http2_session*)cb_arg;
5149
  if(!(h2_stream = nghttp2_session_get_stream_user_data(
5150
    h2_session->session, stream_id))) {
5151
    return 0;
5152
  }
5153
  http2_session_remove_stream(h2_session, h2_stream);
5154
  http2_stream_delete(h2_session, h2_stream);
5155
  return 0;
5156
}
5157
5158
ssize_t http2_recv_cb(nghttp2_session* ATTR_UNUSED(session), uint8_t* buf,
5159
  size_t len, int ATTR_UNUSED(flags), void* cb_arg)
5160
{
5161
  struct http2_session* h2_session = (struct http2_session*)cb_arg;
5162
  ssize_t ret;
5163
5164
  log_assert(h2_session->c->type == comm_http);
5165
  log_assert(h2_session->c->h2_session);
5166
  if(++h2_session->reads_count > h2_session->c->http2_max_streams) {
5167
    /* We are somewhat arbitrarily capping the amount of
5168
     * consecutive reads on the HTTP2 session to the number of max
5169
     * allowed streams.
5170
     * When we reach the cap, error out with NGHTTP2_ERR_WOULDBLOCK
5171
     * to signal nghttp2_session_recv() to stop reading for now. */
5172
    h2_session->reads_count = 0;
5173
    return NGHTTP2_ERR_WOULDBLOCK;
5174
  }
5175
5176
#ifdef HAVE_SSL
5177
  if(h2_session->c->ssl) {
5178
    int r;
5179
    ERR_clear_error();
5180
    r = SSL_read(h2_session->c->ssl, buf, len);
5181
    if(r <= 0) {
5182
      int want = SSL_get_error(h2_session->c->ssl, r);
5183
      if(want == SSL_ERROR_ZERO_RETURN) {
5184
        return NGHTTP2_ERR_EOF;
5185
      } else if(want == SSL_ERROR_WANT_READ) {
5186
        return NGHTTP2_ERR_WOULDBLOCK;
5187
      } else if(want == SSL_ERROR_WANT_WRITE) {
5188
        h2_session->c->ssl_shake_state = comm_ssl_shake_hs_write;
5189
        comm_point_listen_for_rw(h2_session->c, 0, 1);
5190
        return NGHTTP2_ERR_WOULDBLOCK;
5191
      } else if(want == SSL_ERROR_SYSCALL) {
5192
#ifdef ECONNRESET
5193
        if(errno == ECONNRESET && verbosity < 2)
5194
          return NGHTTP2_ERR_CALLBACK_FAILURE;
5195
#endif
5196
        if(errno != 0)
5197
          log_err("SSL_read syscall: %s",
5198
            strerror(errno));
5199
        return NGHTTP2_ERR_CALLBACK_FAILURE;
5200
      }
5201
      log_crypto_err_io("could not SSL_read", want);
5202
      return NGHTTP2_ERR_CALLBACK_FAILURE;
5203
    }
5204
    return r;
5205
  }
5206
#endif /* HAVE_SSL */
5207
5208
  ret = recv(h2_session->c->fd, (void*)buf, len, MSG_DONTWAIT);
5209
  if(ret == 0) {
5210
    return NGHTTP2_ERR_EOF;
5211
  } else if(ret < 0) {
5212
#ifndef USE_WINSOCK
5213
    if(errno == EINTR || errno == EAGAIN)
5214
      return NGHTTP2_ERR_WOULDBLOCK;
5215
#ifdef ECONNRESET
5216
    if(errno == ECONNRESET && verbosity < 2)
5217
      return NGHTTP2_ERR_CALLBACK_FAILURE;
5218
#endif
5219
    log_err_addr("could not http2 recv: %s", strerror(errno),
5220
      &h2_session->c->repinfo.remote_addr,
5221
      h2_session->c->repinfo.remote_addrlen);
5222
#else /* USE_WINSOCK */
5223
    if(WSAGetLastError() == WSAECONNRESET)
5224
      return NGHTTP2_ERR_CALLBACK_FAILURE;
5225
    if(WSAGetLastError() == WSAEINPROGRESS)
5226
      return NGHTTP2_ERR_WOULDBLOCK;
5227
    if(WSAGetLastError() == WSAEWOULDBLOCK) {
5228
      ub_winsock_tcp_wouldblock(h2_session->c->ev->ev,
5229
        UB_EV_READ);
5230
      return NGHTTP2_ERR_WOULDBLOCK;
5231
    }
5232
    log_err_addr("could not http2 recv: %s",
5233
      wsa_strerror(WSAGetLastError()),
5234
      &h2_session->c->repinfo.remote_addr,
5235
      h2_session->c->repinfo.remote_addrlen);
5236
#endif
5237
    return NGHTTP2_ERR_CALLBACK_FAILURE;
5238
  }
5239
  return ret;
5240
}
5241
#endif /* HAVE_NGHTTP2 */
5242
5243
/** Handle http2 read */
5244
static int
5245
comm_point_http2_handle_read(int ATTR_UNUSED(fd), struct comm_point* c)
5246
0
{
5247
#ifdef HAVE_NGHTTP2
5248
  int ret;
5249
  log_assert(c->h2_session);
5250
5251
  /* reading until recv cb returns NGHTTP2_ERR_WOULDBLOCK */
5252
  ret = nghttp2_session_recv(c->h2_session->session);
5253
  if(ret) {
5254
    if(ret != NGHTTP2_ERR_EOF &&
5255
      ret != NGHTTP2_ERR_CALLBACK_FAILURE) {
5256
      char a[256];
5257
      addr_to_str(&c->repinfo.remote_addr,
5258
        c->repinfo.remote_addrlen, a, sizeof(a));
5259
      verbose(VERB_QUERY, "http2: session_recv from %s failed, "
5260
        "error: %s", a, nghttp2_strerror(ret));
5261
    }
5262
    return 0;
5263
  }
5264
  if(nghttp2_session_want_write(c->h2_session->session)) {
5265
    c->tcp_is_reading = 0;
5266
    comm_point_stop_listening(c);
5267
    comm_point_start_listening(c, -1, adjusted_tcp_timeout(c));
5268
  } else if(!nghttp2_session_want_read(c->h2_session->session))
5269
    return 0; /* connection can be closed */
5270
  return 1;
5271
#else
5272
0
  (void)c;
5273
0
  return 0;
5274
0
#endif
5275
0
}
5276
5277
/**
5278
 * Handle http reading callback.
5279
 * @param fd: file descriptor of socket.
5280
 * @param c: comm point to read from into buffer.
5281
 * @return: 0 on error
5282
 */
5283
static int
5284
comm_point_http_handle_read(int fd, struct comm_point* c)
5285
0
{
5286
0
  log_assert(c->type == comm_http);
5287
0
  log_assert(fd != -1);
5288
5289
  /* if we are in ssl handshake, handle SSL handshake */
5290
0
#ifdef HAVE_SSL
5291
0
  if(c->ssl && c->ssl_shake_state != comm_ssl_shake_none) {
5292
0
    if(!ssl_handshake(c))
5293
0
      return 0;
5294
0
    if(c->ssl_shake_state != comm_ssl_shake_none)
5295
0
      return 1;
5296
0
  }
5297
0
#endif /* HAVE_SSL */
5298
5299
0
  if(!c->tcp_is_reading)
5300
0
    return 1;
5301
5302
0
  if(c->use_h2) {
5303
0
    return comm_point_http2_handle_read(fd, c);
5304
0
  }
5305
5306
  /* http version is <= http/1.1 */
5307
5308
0
  if(c->http_min_version >= http_version_2) {
5309
    /* HTTP/2 failed, not allowed to use lower version. */
5310
0
    return 0;
5311
0
  }
5312
5313
  /* read more data */
5314
0
  if(c->ssl) {
5315
0
    if(!ssl_http_read_more(c))
5316
0
      return 0;
5317
0
  } else {
5318
0
    if(!http_read_more(fd, c))
5319
0
      return 0;
5320
0
  }
5321
5322
0
  if(c->http_stored >= sldns_buffer_position(c->buffer)) {
5323
    /* read did not work but we wanted more data, there is
5324
     * no bytes to process now. */
5325
0
    return 1;
5326
0
  }
5327
0
  sldns_buffer_flip(c->buffer);
5328
  /* if we are partway in a segment of data, position us at the point
5329
   * where we left off previously */
5330
0
  if(c->http_stored < sldns_buffer_limit(c->buffer))
5331
0
    sldns_buffer_set_position(c->buffer, c->http_stored);
5332
0
  else  sldns_buffer_set_position(c->buffer, sldns_buffer_limit(c->buffer));
5333
5334
0
  while(sldns_buffer_remaining(c->buffer) > 0) {
5335
    /* Handle HTTP/1.x data */
5336
    /* if we are reading headers, read more headers */
5337
0
    if(c->http_in_headers || c->http_in_chunk_headers) {
5338
      /* if header is done, process the header */
5339
0
      if(!http_header_done(c->buffer)) {
5340
        /* copy remaining data to front of buffer
5341
         * and set rest for writing into it */
5342
0
        http_moveover_buffer(c->buffer);
5343
        /* return and wait to read more */
5344
0
        return 1;
5345
0
      }
5346
0
      if(!c->http_in_chunk_headers) {
5347
        /* process initial headers */
5348
0
        if(!http_process_initial_header(c))
5349
0
          return 0;
5350
0
      } else {
5351
        /* process chunk headers */
5352
0
        int r = http_process_chunk_header(c);
5353
0
        if(r == 0) return 0;
5354
0
        if(r == 2) return 1; /* done */
5355
        /* r == 1, continue */
5356
0
      }
5357
      /* see if we have more to process */
5358
0
      continue;
5359
0
    }
5360
5361
0
    if(!c->http_is_chunked) {
5362
      /* if we are reading nonchunks, process that*/
5363
0
      return http_nonchunk_segment(c);
5364
0
    } else {
5365
      /* if we are reading chunks, read the chunk */
5366
0
      int r = http_chunked_segment(c);
5367
0
      if(r == 0) return 0;
5368
0
      if(r == 1) return 1;
5369
0
      continue;
5370
0
    }
5371
0
  }
5372
  /* broke out of the loop; could not process header instead need
5373
   * to read more */
5374
  /* moveover any remaining data and read more data */
5375
0
  http_moveover_buffer(c->buffer);
5376
  /* return and wait to read more */
5377
0
  return 1;
5378
0
}
5379
5380
/** check pending connect for http */
5381
static int
5382
http_check_connect(int fd, struct comm_point* c)
5383
0
{
5384
  /* check for pending error from nonblocking connect */
5385
  /* from Stevens, unix network programming, vol1, 3rd ed, p450*/
5386
0
  int error = 0;
5387
0
  socklen_t len = (socklen_t)sizeof(error);
5388
0
  if(getsockopt(fd, SOL_SOCKET, SO_ERROR, (void*)&error,
5389
0
    &len) < 0){
5390
0
#ifndef USE_WINSOCK
5391
0
    error = errno; /* on solaris errno is error */
5392
#else /* USE_WINSOCK */
5393
    error = WSAGetLastError();
5394
#endif
5395
0
  }
5396
0
#ifndef USE_WINSOCK
5397
0
#if defined(EINPROGRESS) && defined(EWOULDBLOCK)
5398
0
  if(error == EINPROGRESS || error == EWOULDBLOCK)
5399
0
    return 1; /* try again later */
5400
0
  else
5401
0
#endif
5402
0
  if(error != 0 && verbosity < 2)
5403
0
    return 0; /* silence lots of chatter in the logs */
5404
0
  else if(error != 0) {
5405
0
    log_err_addr("http connect", strerror(error),
5406
0
      &c->repinfo.remote_addr, c->repinfo.remote_addrlen);
5407
#else /* USE_WINSOCK */
5408
  /* examine error */
5409
  if(error == WSAEINPROGRESS)
5410
    return 1;
5411
  else if(error == WSAEWOULDBLOCK) {
5412
    ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
5413
    return 1;
5414
  } else if(error != 0 && verbosity < 2)
5415
    return 0;
5416
  else if(error != 0) {
5417
    log_err_addr("http connect", wsa_strerror(error),
5418
      &c->repinfo.remote_addr, c->repinfo.remote_addrlen);
5419
#endif /* USE_WINSOCK */
5420
0
    return 0;
5421
0
  }
5422
  /* keep on processing this socket */
5423
0
  return 2;
5424
0
}
5425
5426
/** write more data for http (with ssl) */
5427
static int
5428
ssl_http_write_more(struct comm_point* c)
5429
0
{
5430
0
#ifdef HAVE_SSL
5431
0
  int r;
5432
0
  log_assert(sldns_buffer_remaining(c->buffer) > 0);
5433
0
  ERR_clear_error();
5434
0
  r = SSL_write(c->ssl, (void*)sldns_buffer_current(c->buffer),
5435
0
    (int)sldns_buffer_remaining(c->buffer));
5436
0
  if(r <= 0) {
5437
0
    int want = SSL_get_error(c->ssl, r);
5438
0
    if(want == SSL_ERROR_ZERO_RETURN) {
5439
0
      return 0; /* closed */
5440
0
    } else if(want == SSL_ERROR_WANT_READ) {
5441
0
      c->ssl_shake_state = comm_ssl_shake_hs_read;
5442
0
      comm_point_listen_for_rw(c, 1, 0);
5443
0
      return 1; /* wait for read condition */
5444
0
    } else if(want == SSL_ERROR_WANT_WRITE) {
5445
0
      return 1; /* write more later */
5446
0
    } else if(want == SSL_ERROR_SYSCALL) {
5447
0
#ifdef EPIPE
5448
0
      if(errno == EPIPE && verbosity < 2)
5449
0
        return 0; /* silence 'broken pipe' */
5450
0
#endif
5451
0
      if(errno != 0)
5452
0
        log_err("SSL_write syscall: %s",
5453
0
          strerror(errno));
5454
0
      return 0;
5455
0
    }
5456
0
    log_crypto_err_io("could not SSL_write", want);
5457
0
    return 0;
5458
0
  }
5459
0
  sldns_buffer_skip(c->buffer, (ssize_t)r);
5460
0
  return 1;
5461
#else
5462
  (void)c;
5463
  return 0;
5464
#endif /* HAVE_SSL */
5465
0
}
5466
5467
/** write more data for http */
5468
static int
5469
http_write_more(int fd, struct comm_point* c)
5470
0
{
5471
0
  ssize_t r;
5472
0
  log_assert(sldns_buffer_remaining(c->buffer) > 0);
5473
0
  r = send(fd, (void*)sldns_buffer_current(c->buffer),
5474
0
    sldns_buffer_remaining(c->buffer), 0);
5475
0
  if(r == -1) {
5476
0
#ifndef USE_WINSOCK
5477
0
    if(errno == EINTR || errno == EAGAIN)
5478
0
      return 1;
5479
#else
5480
    if(WSAGetLastError() == WSAEINPROGRESS)
5481
      return 1;
5482
    if(WSAGetLastError() == WSAEWOULDBLOCK) {
5483
      ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
5484
      return 1;
5485
    }
5486
#endif
5487
0
    log_err_addr("http send r", sock_strerror(errno),
5488
0
      &c->repinfo.remote_addr, c->repinfo.remote_addrlen);
5489
0
    return 0;
5490
0
  }
5491
0
  sldns_buffer_skip(c->buffer, r);
5492
0
  return 1;
5493
0
}
5494
5495
#ifdef HAVE_NGHTTP2
5496
ssize_t http2_send_cb(nghttp2_session* ATTR_UNUSED(session), const uint8_t* buf,
5497
  size_t len, int ATTR_UNUSED(flags), void* cb_arg)
5498
{
5499
  ssize_t ret;
5500
  struct http2_session* h2_session = (struct http2_session*)cb_arg;
5501
  log_assert(h2_session->c->type == comm_http);
5502
  log_assert(h2_session->c->h2_session);
5503
5504
#ifdef HAVE_SSL
5505
  if(h2_session->c->ssl) {
5506
    int r;
5507
    ERR_clear_error();
5508
    r = SSL_write(h2_session->c->ssl, buf, len);
5509
    if(r <= 0) {
5510
      int want = SSL_get_error(h2_session->c->ssl, r);
5511
      if(want == SSL_ERROR_ZERO_RETURN) {
5512
        return NGHTTP2_ERR_CALLBACK_FAILURE;
5513
      } else if(want == SSL_ERROR_WANT_READ) {
5514
        h2_session->c->ssl_shake_state = comm_ssl_shake_hs_read;
5515
        comm_point_listen_for_rw(h2_session->c, 1, 0);
5516
        return NGHTTP2_ERR_WOULDBLOCK;
5517
      } else if(want == SSL_ERROR_WANT_WRITE) {
5518
        return NGHTTP2_ERR_WOULDBLOCK;
5519
      } else if(want == SSL_ERROR_SYSCALL) {
5520
#ifdef EPIPE
5521
        if(errno == EPIPE && verbosity < 2)
5522
          return NGHTTP2_ERR_CALLBACK_FAILURE;
5523
#endif
5524
        if(errno != 0)
5525
          log_err("SSL_write syscall: %s",
5526
            strerror(errno));
5527
        return NGHTTP2_ERR_CALLBACK_FAILURE;
5528
      }
5529
      log_crypto_err_io("could not SSL_write", want);
5530
      return NGHTTP2_ERR_CALLBACK_FAILURE;
5531
    }
5532
    return r;
5533
  }
5534
#endif /* HAVE_SSL */
5535
5536
  ret = send(h2_session->c->fd, (void*)buf, len, 0);
5537
  if(ret == 0) {
5538
    return NGHTTP2_ERR_CALLBACK_FAILURE;
5539
  } else if(ret < 0) {
5540
#ifndef USE_WINSOCK
5541
    if(errno == EINTR || errno == EAGAIN)
5542
      return NGHTTP2_ERR_WOULDBLOCK;
5543
#ifdef EPIPE
5544
    if(errno == EPIPE && verbosity < 2)
5545
      return NGHTTP2_ERR_CALLBACK_FAILURE;
5546
#endif
5547
#ifdef ECONNRESET
5548
    if(errno == ECONNRESET && verbosity < 2)
5549
      return NGHTTP2_ERR_CALLBACK_FAILURE;
5550
#endif
5551
    log_err_addr("could not http2 write: %s", strerror(errno),
5552
      &h2_session->c->repinfo.remote_addr,
5553
      h2_session->c->repinfo.remote_addrlen);
5554
#else /* USE_WINSOCK */
5555
    if(WSAGetLastError() == WSAENOTCONN)
5556
      return NGHTTP2_ERR_WOULDBLOCK;
5557
    if(WSAGetLastError() == WSAEINPROGRESS)
5558
      return NGHTTP2_ERR_WOULDBLOCK;
5559
    if(WSAGetLastError() == WSAEWOULDBLOCK) {
5560
      ub_winsock_tcp_wouldblock(h2_session->c->ev->ev,
5561
        UB_EV_WRITE);
5562
      return NGHTTP2_ERR_WOULDBLOCK;
5563
    }
5564
    if(WSAGetLastError() == WSAECONNRESET && verbosity < 2)
5565
      return NGHTTP2_ERR_CALLBACK_FAILURE;
5566
    log_err_addr("could not http2 write: %s",
5567
      wsa_strerror(WSAGetLastError()),
5568
      &h2_session->c->repinfo.remote_addr,
5569
      h2_session->c->repinfo.remote_addrlen);
5570
#endif
5571
    return NGHTTP2_ERR_CALLBACK_FAILURE;
5572
  }
5573
  return ret;
5574
}
5575
#endif /* HAVE_NGHTTP2 */
5576
5577
/** Handle http2 writing */
5578
static int
5579
comm_point_http2_handle_write(int ATTR_UNUSED(fd), struct comm_point* c)
5580
0
{
5581
#ifdef HAVE_NGHTTP2
5582
  int ret;
5583
  log_assert(c->h2_session);
5584
5585
  ret = nghttp2_session_send(c->h2_session->session);
5586
  if(ret) {
5587
    verbose(VERB_QUERY, "http2: session_send failed, "
5588
      "error: %s", nghttp2_strerror(ret));
5589
    return 0;
5590
  }
5591
5592
  if(nghttp2_session_want_read(c->h2_session->session)) {
5593
    c->tcp_is_reading = 1;
5594
    comm_point_stop_listening(c);
5595
    comm_point_start_listening(c, -1, adjusted_tcp_timeout(c));
5596
  } else if(!nghttp2_session_want_write(c->h2_session->session))
5597
    return 0; /* connection can be closed */
5598
  return 1;
5599
#else
5600
0
  (void)c;
5601
0
  return 0;
5602
0
#endif
5603
0
}
5604
5605
/**
5606
 * Handle http writing callback.
5607
 * @param fd: file descriptor of socket.
5608
 * @param c: comm point to write buffer out of.
5609
 * @return: 0 on error
5610
 */
5611
static int
5612
comm_point_http_handle_write(int fd, struct comm_point* c)
5613
0
{
5614
0
  log_assert(c->type == comm_http);
5615
0
  log_assert(fd != -1);
5616
5617
  /* check pending connect errors, if that fails, we wait for more,
5618
   * or we can continue to write contents */
5619
0
  if(c->tcp_check_nb_connect) {
5620
0
    int r = http_check_connect(fd, c);
5621
0
    if(r == 0) return 0;
5622
0
    if(r == 1) return 1;
5623
0
    c->tcp_check_nb_connect = 0;
5624
0
  }
5625
  /* if we are in ssl handshake, handle SSL handshake */
5626
0
#ifdef HAVE_SSL
5627
0
  if(c->ssl && c->ssl_shake_state != comm_ssl_shake_none) {
5628
0
    if(!ssl_handshake(c))
5629
0
      return 0;
5630
0
    if(c->ssl_shake_state != comm_ssl_shake_none)
5631
0
      return 1;
5632
0
  }
5633
0
#endif /* HAVE_SSL */
5634
0
  if(c->tcp_is_reading)
5635
0
    return 1;
5636
5637
0
  if(c->use_h2) {
5638
0
    return comm_point_http2_handle_write(fd, c);
5639
0
  }
5640
5641
  /* http version is <= http/1.1 */
5642
5643
0
  if(c->http_min_version >= http_version_2) {
5644
    /* HTTP/2 failed, not allowed to use lower version. */
5645
0
    return 0;
5646
0
  }
5647
5648
  /* if we are writing, write more */
5649
0
  if(c->ssl) {
5650
0
    if(!ssl_http_write_more(c))
5651
0
      return 0;
5652
0
  } else {
5653
0
    if(!http_write_more(fd, c))
5654
0
      return 0;
5655
0
  }
5656
5657
  /* we write a single buffer contents, that can contain
5658
   * the http request, and then flip to read the results */
5659
  /* see if write is done */
5660
0
  if(sldns_buffer_remaining(c->buffer) == 0) {
5661
0
    sldns_buffer_clear(c->buffer);
5662
0
    if(c->tcp_do_toggle_rw)
5663
0
      c->tcp_is_reading = 1;
5664
0
    c->tcp_byte_count = 0;
5665
    /* switch from listening(write) to listening(read) */
5666
0
    comm_point_stop_listening(c);
5667
0
    comm_point_start_listening(c, -1, -1);
5668
0
  }
5669
0
  return 1;
5670
0
}
5671
5672
void
5673
comm_point_http_handle_callback(int fd, short event, void* arg)
5674
0
{
5675
0
  struct comm_point* c = (struct comm_point*)arg;
5676
0
  log_assert(c->type == comm_http);
5677
0
  ub_comm_base_now(c->ev->base);
5678
5679
0
  if((event&UB_EV_TIMEOUT)) {
5680
0
    verbose(VERB_QUERY, "http took too long, dropped");
5681
0
    reclaim_http_handler(c);
5682
0
    if(!c->tcp_do_close) {
5683
0
      fptr_ok(fptr_whitelist_comm_point(c->callback));
5684
0
      (void)(*c->callback)(c, c->cb_arg,
5685
0
        NETEVENT_TIMEOUT, NULL);
5686
0
    }
5687
0
    return;
5688
0
  }
5689
0
  if((event&UB_EV_READ)) {
5690
0
    if(!comm_point_http_handle_read(fd, c)) {
5691
0
      reclaim_http_handler(c);
5692
0
      if(!c->tcp_do_close) {
5693
0
        fptr_ok(fptr_whitelist_comm_point(
5694
0
          c->callback));
5695
0
        (void)(*c->callback)(c, c->cb_arg,
5696
0
          NETEVENT_CLOSED, NULL);
5697
0
      }
5698
0
    }
5699
0
    return;
5700
0
  }
5701
0
  if((event&UB_EV_WRITE)) {
5702
0
    if(!comm_point_http_handle_write(fd, c)) {
5703
0
      reclaim_http_handler(c);
5704
0
      if(!c->tcp_do_close) {
5705
0
        fptr_ok(fptr_whitelist_comm_point(
5706
0
          c->callback));
5707
0
        (void)(*c->callback)(c, c->cb_arg,
5708
0
          NETEVENT_CLOSED, NULL);
5709
0
      }
5710
0
    }
5711
0
    return;
5712
0
  }
5713
0
  log_err("Ignored event %d for httphdl.", event);
5714
0
}
5715
5716
void comm_point_local_handle_callback(int fd, short event, void* arg)
5717
0
{
5718
0
  struct comm_point* c = (struct comm_point*)arg;
5719
0
  log_assert(c->type == comm_local);
5720
0
  ub_comm_base_now(c->ev->base);
5721
5722
0
  if((event&UB_EV_READ)) {
5723
0
    if(!comm_point_tcp_handle_read(fd, c, 1)) {
5724
0
      fptr_ok(fptr_whitelist_comm_point(c->callback));
5725
0
      (void)(*c->callback)(c, c->cb_arg, NETEVENT_CLOSED,
5726
0
        NULL);
5727
0
    }
5728
0
    return;
5729
0
  }
5730
0
  log_err("Ignored event %d for localhdl.", event);
5731
0
}
5732
5733
void comm_point_raw_handle_callback(int ATTR_UNUSED(fd),
5734
  short event, void* arg)
5735
0
{
5736
0
  struct comm_point* c = (struct comm_point*)arg;
5737
0
  int err = NETEVENT_NOERROR;
5738
0
  log_assert(c->type == comm_raw);
5739
0
  ub_comm_base_now(c->ev->base);
5740
5741
0
  if((event&UB_EV_TIMEOUT))
5742
0
    err = NETEVENT_TIMEOUT;
5743
0
  fptr_ok(fptr_whitelist_comm_point_raw(c->callback));
5744
0
  (void)(*c->callback)(c, c->cb_arg, err, NULL);
5745
0
}
5746
5747
struct comm_point*
5748
comm_point_create_udp(struct comm_base *base, int fd, sldns_buffer* buffer,
5749
  int pp2_enabled, comm_point_callback_type* callback,
5750
  void* callback_arg, struct unbound_socket* socket)
5751
0
{
5752
0
  struct comm_point* c = (struct comm_point*)calloc(1,
5753
0
    sizeof(struct comm_point));
5754
0
  short evbits;
5755
0
  if(!c)
5756
0
    return NULL;
5757
0
  c->ev = (struct internal_event*)calloc(1,
5758
0
    sizeof(struct internal_event));
5759
0
  if(!c->ev) {
5760
0
    free(c);
5761
0
    return NULL;
5762
0
  }
5763
0
  c->ev->base = base;
5764
0
  c->fd = fd;
5765
0
  c->buffer = buffer;
5766
0
  c->timeout = NULL;
5767
0
  c->tcp_is_reading = 0;
5768
0
  c->tcp_byte_count = 0;
5769
0
  c->tcp_parent = NULL;
5770
0
  c->max_tcp_count = 0;
5771
0
  c->cur_tcp_count = 0;
5772
0
  c->tcp_handlers = NULL;
5773
0
  c->tcp_free = NULL;
5774
0
  c->is_in_tcp_free = 0;
5775
0
  c->type = comm_udp;
5776
0
  c->tcp_do_close = 0;
5777
0
  c->do_not_close = 0;
5778
0
  c->tcp_do_toggle_rw = 0;
5779
0
  c->tcp_check_nb_connect = 0;
5780
#ifdef USE_MSG_FASTOPEN
5781
  c->tcp_do_fastopen = 0;
5782
#endif
5783
#ifdef USE_DNSCRYPT
5784
  c->dnscrypt = 0;
5785
  c->dnscrypt_buffer = buffer;
5786
#endif
5787
0
  c->inuse = 0;
5788
0
  c->callback = callback;
5789
0
  c->cb_arg = callback_arg;
5790
0
  c->socket = socket;
5791
0
  c->pp2_enabled = pp2_enabled;
5792
0
  c->pp2_header_state = pp2_header_none;
5793
0
  evbits = UB_EV_READ | UB_EV_PERSIST;
5794
  /* ub_event stuff */
5795
0
  c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
5796
0
    comm_point_udp_callback, c);
5797
0
  if(c->ev->ev == NULL) {
5798
0
    log_err("could not baseset udp event");
5799
0
    comm_point_delete(c);
5800
0
    return NULL;
5801
0
  }
5802
0
  if(fd!=-1 && ub_event_add(c->ev->ev, c->timeout) != 0 ) {
5803
0
    log_err("could not add udp event");
5804
0
    comm_point_delete(c);
5805
0
    return NULL;
5806
0
  }
5807
0
  c->event_added = 1;
5808
0
  return c;
5809
0
}
5810
5811
#if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_RECVMSG)
5812
struct comm_point*
5813
comm_point_create_udp_ancil(struct comm_base *base, int fd,
5814
  sldns_buffer* buffer, int pp2_enabled,
5815
  comm_point_callback_type* callback, void* callback_arg, struct unbound_socket* socket)
5816
0
{
5817
0
  struct comm_point* c = (struct comm_point*)calloc(1,
5818
0
    sizeof(struct comm_point));
5819
0
  short evbits;
5820
0
  if(!c)
5821
0
    return NULL;
5822
0
  c->ev = (struct internal_event*)calloc(1,
5823
0
    sizeof(struct internal_event));
5824
0
  if(!c->ev) {
5825
0
    free(c);
5826
0
    return NULL;
5827
0
  }
5828
0
  c->ev->base = base;
5829
0
  c->fd = fd;
5830
0
  c->buffer = buffer;
5831
0
  c->timeout = NULL;
5832
0
  c->tcp_is_reading = 0;
5833
0
  c->tcp_byte_count = 0;
5834
0
  c->tcp_parent = NULL;
5835
0
  c->max_tcp_count = 0;
5836
0
  c->cur_tcp_count = 0;
5837
0
  c->tcp_handlers = NULL;
5838
0
  c->tcp_free = NULL;
5839
0
  c->is_in_tcp_free = 0;
5840
0
  c->type = comm_udp;
5841
0
  c->tcp_do_close = 0;
5842
0
  c->do_not_close = 0;
5843
#ifdef USE_DNSCRYPT
5844
  c->dnscrypt = 0;
5845
  c->dnscrypt_buffer = buffer;
5846
#endif
5847
0
  c->inuse = 0;
5848
0
  c->tcp_do_toggle_rw = 0;
5849
0
  c->tcp_check_nb_connect = 0;
5850
#ifdef USE_MSG_FASTOPEN
5851
  c->tcp_do_fastopen = 0;
5852
#endif
5853
0
  c->callback = callback;
5854
0
  c->cb_arg = callback_arg;
5855
0
  c->socket = socket;
5856
0
  c->pp2_enabled = pp2_enabled;
5857
0
  c->pp2_header_state = pp2_header_none;
5858
0
  evbits = UB_EV_READ | UB_EV_PERSIST;
5859
  /* ub_event stuff */
5860
0
  c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
5861
0
    comm_point_udp_ancil_callback, c);
5862
0
  if(c->ev->ev == NULL) {
5863
0
    log_err("could not baseset udp event");
5864
0
    comm_point_delete(c);
5865
0
    return NULL;
5866
0
  }
5867
0
  if(fd!=-1 && ub_event_add(c->ev->ev, c->timeout) != 0 ) {
5868
0
    log_err("could not add udp event");
5869
0
    comm_point_delete(c);
5870
0
    return NULL;
5871
0
  }
5872
0
  c->event_added = 1;
5873
0
  return c;
5874
0
}
5875
#endif
5876
5877
struct comm_point*
5878
comm_point_create_doq(struct comm_base *base, int fd, sldns_buffer* buffer,
5879
  comm_point_callback_type* callback, void* callback_arg,
5880
  struct unbound_socket* socket, struct doq_table* table,
5881
  struct ub_randstate* rnd, const void* quic_sslctx,
5882
  struct config_file* cfg)
5883
0
{
5884
#ifdef HAVE_NGTCP2
5885
  struct comm_point* c = (struct comm_point*)calloc(1,
5886
    sizeof(struct comm_point));
5887
  short evbits;
5888
  log_assert(table != NULL);
5889
  if(!c)
5890
    return NULL;
5891
  c->ev = (struct internal_event*)calloc(1,
5892
    sizeof(struct internal_event));
5893
  if(!c->ev) {
5894
    free(c);
5895
    return NULL;
5896
  }
5897
  c->ev->base = base;
5898
  c->fd = fd;
5899
  c->buffer = buffer;
5900
  c->timeout = NULL;
5901
  c->tcp_is_reading = 0;
5902
  c->tcp_byte_count = 0;
5903
  c->tcp_parent = NULL;
5904
  c->max_tcp_count = 0;
5905
  c->cur_tcp_count = 0;
5906
  c->tcp_handlers = NULL;
5907
  c->tcp_free = NULL;
5908
  c->is_in_tcp_free = 0;
5909
  c->type = comm_doq;
5910
  c->tcp_do_close = 0;
5911
  c->do_not_close = 0;
5912
  c->tcp_do_toggle_rw = 0;
5913
  c->tcp_check_nb_connect = 0;
5914
#ifdef USE_MSG_FASTOPEN
5915
  c->tcp_do_fastopen = 0;
5916
#endif
5917
#ifdef USE_DNSCRYPT
5918
  c->dnscrypt = 0;
5919
  c->dnscrypt_buffer = NULL;
5920
#endif
5921
  c->doq_socket = doq_server_socket_create(table, rnd, quic_sslctx, c,
5922
    base, cfg);
5923
  if(!c->doq_socket) {
5924
    log_err("could not create doq comm_point");
5925
    comm_point_delete(c);
5926
    return NULL;
5927
  }
5928
  c->inuse = 0;
5929
  c->callback = callback;
5930
  c->cb_arg = callback_arg;
5931
  c->socket = socket;
5932
  c->pp2_enabled = 0;
5933
  c->pp2_header_state = pp2_header_none;
5934
  evbits = UB_EV_READ | UB_EV_PERSIST;
5935
  /* ub_event stuff */
5936
  c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
5937
    comm_point_doq_callback, c);
5938
  if(c->ev->ev == NULL) {
5939
    log_err("could not baseset udp event");
5940
    comm_point_delete(c);
5941
    return NULL;
5942
  }
5943
  if(fd!=-1 && ub_event_add(c->ev->ev, c->timeout) != 0 ) {
5944
    log_err("could not add udp event");
5945
    comm_point_delete(c);
5946
    return NULL;
5947
  }
5948
  c->event_added = 1;
5949
  return c;
5950
#else
5951
  /* no libngtcp2, so no QUIC support */
5952
0
  (void)base;
5953
0
  (void)buffer;
5954
0
  (void)callback;
5955
0
  (void)callback_arg;
5956
0
  (void)socket;
5957
0
  (void)rnd;
5958
0
  (void)table;
5959
0
  (void)quic_sslctx;
5960
0
  (void)cfg;
5961
0
  sock_close(fd);
5962
0
  return NULL;
5963
0
#endif /* HAVE_NGTCP2 */
5964
0
}
5965
5966
static struct comm_point*
5967
comm_point_create_tcp_handler(struct comm_base *base,
5968
  struct comm_point* parent, size_t bufsize,
5969
  struct sldns_buffer* spoolbuf, comm_point_callback_type* callback,
5970
  void* callback_arg, struct unbound_socket* socket)
5971
0
{
5972
0
  struct comm_point* c = (struct comm_point*)calloc(1,
5973
0
    sizeof(struct comm_point));
5974
0
  short evbits;
5975
0
  if(!c)
5976
0
    return NULL;
5977
0
  c->ev = (struct internal_event*)calloc(1,
5978
0
    sizeof(struct internal_event));
5979
0
  if(!c->ev) {
5980
0
    free(c);
5981
0
    return NULL;
5982
0
  }
5983
0
  c->ev->base = base;
5984
0
  c->fd = -1;
5985
0
  c->buffer = sldns_buffer_new(bufsize);
5986
0
  if(!c->buffer) {
5987
0
    free(c->ev);
5988
0
    free(c);
5989
0
    return NULL;
5990
0
  }
5991
0
  c->timeout = (struct timeval*)malloc(sizeof(struct timeval));
5992
0
  if(!c->timeout) {
5993
0
    sldns_buffer_free(c->buffer);
5994
0
    free(c->ev);
5995
0
    free(c);
5996
0
    return NULL;
5997
0
  }
5998
0
  c->tcp_is_reading = 0;
5999
0
  c->tcp_byte_count = 0;
6000
0
  c->tcp_parent = parent;
6001
0
  c->tcp_timeout_msec = parent->tcp_timeout_msec;
6002
0
  c->tcp_conn_limit = parent->tcp_conn_limit;
6003
0
  c->tcl_addr = NULL;
6004
0
  c->tcp_keepalive = 0;
6005
0
  c->max_tcp_count = 0;
6006
0
  c->cur_tcp_count = 0;
6007
0
  c->tcp_handlers = NULL;
6008
0
  c->tcp_free = NULL;
6009
0
  c->is_in_tcp_free = 0;
6010
0
  c->type = comm_tcp;
6011
0
  c->tcp_do_close = 0;
6012
0
  c->do_not_close = 0;
6013
0
  c->tcp_do_toggle_rw = 1;
6014
0
  c->tcp_check_nb_connect = 0;
6015
#ifdef USE_MSG_FASTOPEN
6016
  c->tcp_do_fastopen = 0;
6017
#endif
6018
#ifdef USE_DNSCRYPT
6019
  c->dnscrypt = 0;
6020
  /* We don't know just yet if this is a dnscrypt channel. Allocation
6021
   * will be done when handling the callback. */
6022
  c->dnscrypt_buffer = c->buffer;
6023
#endif
6024
0
  c->repinfo.c = c;
6025
0
  c->callback = callback;
6026
0
  c->cb_arg = callback_arg;
6027
0
  c->socket = socket;
6028
0
  c->pp2_enabled = parent->pp2_enabled;
6029
0
  c->pp2_header_state = pp2_header_none;
6030
0
  if(spoolbuf) {
6031
0
    c->tcp_req_info = tcp_req_info_create(spoolbuf);
6032
0
    if(!c->tcp_req_info) {
6033
0
      log_err("could not create tcp commpoint");
6034
0
      sldns_buffer_free(c->buffer);
6035
0
      free(c->timeout);
6036
0
      free(c->ev);
6037
0
      free(c);
6038
0
      return NULL;
6039
0
    }
6040
0
    c->tcp_req_info->cp = c;
6041
0
    c->tcp_do_close = 1;
6042
0
    c->tcp_do_toggle_rw = 0;
6043
0
  }
6044
  /* add to parent free list */
6045
0
  c->tcp_free = parent->tcp_free;
6046
0
  parent->tcp_free = c;
6047
0
  c->is_in_tcp_free = 1;
6048
  /* ub_event stuff */
6049
0
  evbits = UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT;
6050
0
  c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
6051
0
    comm_point_tcp_handle_callback, c);
6052
0
  if(c->ev->ev == NULL)
6053
0
  {
6054
0
    log_err("could not basetset tcphdl event");
6055
0
    parent->tcp_free = c->tcp_free;
6056
0
    tcp_req_info_delete(c->tcp_req_info);
6057
0
    sldns_buffer_free(c->buffer);
6058
0
    free(c->timeout);
6059
0
    free(c->ev);
6060
0
    free(c);
6061
0
    return NULL;
6062
0
  }
6063
0
  return c;
6064
0
}
6065
6066
static struct comm_point*
6067
comm_point_create_http_handler(struct comm_base *base,
6068
  struct comm_point* parent, size_t bufsize, int harden_large_queries,
6069
  uint32_t http_max_streams, char* http_endpoint,
6070
  comm_point_callback_type* callback, void* callback_arg,
6071
  struct unbound_socket* socket)
6072
0
{
6073
0
  struct comm_point* c = (struct comm_point*)calloc(1,
6074
0
    sizeof(struct comm_point));
6075
0
  short evbits;
6076
0
  if(!c)
6077
0
    return NULL;
6078
0
  c->ev = (struct internal_event*)calloc(1,
6079
0
    sizeof(struct internal_event));
6080
0
  if(!c->ev) {
6081
0
    free(c);
6082
0
    return NULL;
6083
0
  }
6084
0
  c->ev->base = base;
6085
0
  c->fd = -1;
6086
0
  c->buffer = sldns_buffer_new(bufsize);
6087
0
  if(!c->buffer) {
6088
0
    free(c->ev);
6089
0
    free(c);
6090
0
    return NULL;
6091
0
  }
6092
0
  c->timeout = (struct timeval*)malloc(sizeof(struct timeval));
6093
0
  if(!c->timeout) {
6094
0
    sldns_buffer_free(c->buffer);
6095
0
    free(c->ev);
6096
0
    free(c);
6097
0
    return NULL;
6098
0
  }
6099
0
  c->tcp_is_reading = 0;
6100
0
  c->tcp_byte_count = 0;
6101
0
  c->tcp_parent = parent;
6102
0
  c->tcp_timeout_msec = parent->tcp_timeout_msec;
6103
0
  c->tcp_conn_limit = parent->tcp_conn_limit;
6104
0
  c->tcl_addr = NULL;
6105
0
  c->tcp_keepalive = 0;
6106
0
  c->max_tcp_count = 0;
6107
0
  c->cur_tcp_count = 0;
6108
0
  c->tcp_handlers = NULL;
6109
0
  c->tcp_free = NULL;
6110
0
  c->is_in_tcp_free = 0;
6111
0
  c->type = comm_http;
6112
0
  c->tcp_do_close = 1;
6113
0
  c->do_not_close = 0;
6114
0
  c->tcp_do_toggle_rw = 1; /* will be set to 0 after http2 upgrade */
6115
0
  c->tcp_check_nb_connect = 0;
6116
#ifdef USE_MSG_FASTOPEN
6117
  c->tcp_do_fastopen = 0;
6118
#endif
6119
#ifdef USE_DNSCRYPT
6120
  c->dnscrypt = 0;
6121
  c->dnscrypt_buffer = NULL;
6122
#endif
6123
0
  c->repinfo.c = c;
6124
0
  c->callback = callback;
6125
0
  c->cb_arg = callback_arg;
6126
0
  c->socket = socket;
6127
0
  c->pp2_enabled = 0;
6128
0
  c->pp2_header_state = pp2_header_none;
6129
6130
0
  c->http_min_version = http_version_2;
6131
0
  c->http2_stream_max_qbuffer_size = bufsize;
6132
0
  if(harden_large_queries && bufsize > 512)
6133
0
    c->http2_stream_max_qbuffer_size = 512;
6134
0
  c->http2_max_streams = http_max_streams;
6135
0
  if(!(c->http_endpoint = strdup(http_endpoint))) {
6136
0
    log_err("could not strdup http_endpoint");
6137
0
    sldns_buffer_free(c->buffer);
6138
0
    free(c->timeout);
6139
0
    free(c->ev);
6140
0
    free(c);
6141
0
    return NULL;
6142
0
  }
6143
0
  c->use_h2 = 0;
6144
#ifdef HAVE_NGHTTP2
6145
  if(!(c->h2_session = http2_session_create(c))) {
6146
    log_err("could not create http2 session");
6147
    free(c->http_endpoint);
6148
    sldns_buffer_free(c->buffer);
6149
    free(c->timeout);
6150
    free(c->ev);
6151
    free(c);
6152
    return NULL;
6153
  }
6154
  if(!(c->h2_session->callbacks = http2_req_callbacks_create())) {
6155
    log_err("could not create http2 callbacks");
6156
    http2_session_delete(c->h2_session);
6157
    free(c->http_endpoint);
6158
    sldns_buffer_free(c->buffer);
6159
    free(c->timeout);
6160
    free(c->ev);
6161
    free(c);
6162
    return NULL;
6163
  }
6164
#endif
6165
6166
  /* add to parent free list */
6167
0
  c->tcp_free = parent->tcp_free;
6168
0
  parent->tcp_free = c;
6169
0
  c->is_in_tcp_free = 1;
6170
  /* ub_event stuff */
6171
0
  evbits = UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT;
6172
0
  c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
6173
0
    comm_point_http_handle_callback, c);
6174
0
  if(c->ev->ev == NULL)
6175
0
  {
6176
0
    log_err("could not set http handler event");
6177
0
    parent->tcp_free = c->tcp_free;
6178
0
    http2_session_delete(c->h2_session);
6179
0
    sldns_buffer_free(c->buffer);
6180
0
    free(c->timeout);
6181
0
    free(c->ev);
6182
0
    free(c);
6183
0
    return NULL;
6184
0
  }
6185
0
  return c;
6186
0
}
6187
6188
struct comm_point*
6189
comm_point_create_tcp(struct comm_base *base, int fd, int num,
6190
  int idle_timeout, int harden_large_queries,
6191
  uint32_t http_max_streams, char* http_endpoint,
6192
  struct tcl_list* tcp_conn_limit, size_t bufsize,
6193
  struct sldns_buffer* spoolbuf, enum listen_type port_type,
6194
  int pp2_enabled, comm_point_callback_type* callback,
6195
  void* callback_arg, struct unbound_socket* socket)
6196
0
{
6197
0
  struct comm_point* c = (struct comm_point*)calloc(1,
6198
0
    sizeof(struct comm_point));
6199
0
  short evbits;
6200
0
  int i;
6201
  /* first allocate the TCP accept listener */
6202
0
  if(!c)
6203
0
    return NULL;
6204
0
  c->ev = (struct internal_event*)calloc(1,
6205
0
    sizeof(struct internal_event));
6206
0
  if(!c->ev) {
6207
0
    free(c);
6208
0
    return NULL;
6209
0
  }
6210
0
  c->ev->base = base;
6211
0
  c->fd = fd;
6212
0
  c->buffer = NULL;
6213
0
  c->timeout = NULL;
6214
0
  c->tcp_is_reading = 0;
6215
0
  c->tcp_byte_count = 0;
6216
0
  c->tcp_timeout_msec = idle_timeout;
6217
0
  c->tcp_conn_limit = tcp_conn_limit;
6218
0
  c->tcl_addr = NULL;
6219
0
  c->tcp_keepalive = 0;
6220
0
  c->tcp_parent = NULL;
6221
0
  c->max_tcp_count = num;
6222
0
  c->cur_tcp_count = 0;
6223
0
  c->tcp_handlers = (struct comm_point**)calloc((size_t)num,
6224
0
    sizeof(struct comm_point*));
6225
0
  if(!c->tcp_handlers) {
6226
0
    free(c->ev);
6227
0
    free(c);
6228
0
    return NULL;
6229
0
  }
6230
0
  c->tcp_free = NULL;
6231
0
  c->is_in_tcp_free = 0;
6232
0
  c->type = comm_tcp_accept;
6233
0
  c->tcp_do_close = 0;
6234
0
  c->do_not_close = 0;
6235
0
  c->tcp_do_toggle_rw = 0;
6236
0
  c->tcp_check_nb_connect = 0;
6237
#ifdef USE_MSG_FASTOPEN
6238
  c->tcp_do_fastopen = 0;
6239
#endif
6240
#ifdef USE_DNSCRYPT
6241
  c->dnscrypt = 0;
6242
  c->dnscrypt_buffer = NULL;
6243
#endif
6244
0
  c->callback = NULL;
6245
0
  c->cb_arg = NULL;
6246
0
  c->socket = socket;
6247
0
  c->pp2_enabled = (port_type==listen_type_http?0:pp2_enabled);
6248
0
  c->pp2_header_state = pp2_header_none;
6249
0
  evbits = UB_EV_READ | UB_EV_PERSIST;
6250
  /* ub_event stuff */
6251
0
  c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
6252
0
    comm_point_tcp_accept_callback, c);
6253
0
  if(c->ev->ev == NULL) {
6254
0
    log_err("could not baseset tcpacc event");
6255
0
    comm_point_delete(c);
6256
0
    return NULL;
6257
0
  }
6258
0
  if (ub_event_add(c->ev->ev, c->timeout) != 0) {
6259
0
    log_err("could not add tcpacc event");
6260
0
    comm_point_delete(c);
6261
0
    return NULL;
6262
0
  }
6263
0
  c->event_added = 1;
6264
  /* now prealloc the handlers */
6265
0
  for(i=0; i<num; i++) {
6266
0
    if(port_type == listen_type_tcp ||
6267
0
      port_type == listen_type_ssl ||
6268
0
      port_type == listen_type_tcp_dnscrypt) {
6269
0
      c->tcp_handlers[i] = comm_point_create_tcp_handler(base,
6270
0
        c, bufsize, spoolbuf, callback, callback_arg, socket);
6271
0
    } else if(port_type == listen_type_http) {
6272
0
      c->tcp_handlers[i] = comm_point_create_http_handler(
6273
0
        base, c, bufsize, harden_large_queries,
6274
0
        http_max_streams, http_endpoint,
6275
0
        callback, callback_arg, socket);
6276
0
    }
6277
0
    else {
6278
0
      log_err("could not create tcp handler, unknown listen "
6279
0
        "type");
6280
0
      return NULL;
6281
0
    }
6282
0
    if(!c->tcp_handlers[i]) {
6283
0
      comm_point_delete(c);
6284
0
      return NULL;
6285
0
    }
6286
0
  }
6287
6288
0
  return c;
6289
0
}
6290
6291
struct comm_point*
6292
comm_point_create_tcp_out(struct comm_base *base, size_t bufsize,
6293
        comm_point_callback_type* callback, void* callback_arg)
6294
0
{
6295
0
  struct comm_point* c = (struct comm_point*)calloc(1,
6296
0
    sizeof(struct comm_point));
6297
0
  short evbits;
6298
0
  if(!c)
6299
0
    return NULL;
6300
0
  c->ev = (struct internal_event*)calloc(1,
6301
0
    sizeof(struct internal_event));
6302
0
  if(!c->ev) {
6303
0
    free(c);
6304
0
    return NULL;
6305
0
  }
6306
0
  c->ev->base = base;
6307
0
  c->fd = -1;
6308
0
  c->buffer = sldns_buffer_new(bufsize);
6309
0
  if(!c->buffer) {
6310
0
    free(c->ev);
6311
0
    free(c);
6312
0
    return NULL;
6313
0
  }
6314
0
  c->timeout = NULL;
6315
0
  c->tcp_is_reading = 0;
6316
0
  c->tcp_byte_count = 0;
6317
0
  c->tcp_timeout_msec = TCP_QUERY_TIMEOUT;
6318
0
  c->tcp_conn_limit = NULL;
6319
0
  c->tcl_addr = NULL;
6320
0
  c->tcp_keepalive = 0;
6321
0
  c->tcp_parent = NULL;
6322
0
  c->max_tcp_count = 0;
6323
0
  c->cur_tcp_count = 0;
6324
0
  c->tcp_handlers = NULL;
6325
0
  c->tcp_free = NULL;
6326
0
  c->is_in_tcp_free = 0;
6327
0
  c->type = comm_tcp;
6328
0
  c->tcp_do_close = 0;
6329
0
  c->do_not_close = 0;
6330
0
  c->tcp_do_toggle_rw = 1;
6331
0
  c->tcp_check_nb_connect = 1;
6332
#ifdef USE_MSG_FASTOPEN
6333
  c->tcp_do_fastopen = 1;
6334
#endif
6335
#ifdef USE_DNSCRYPT
6336
  c->dnscrypt = 0;
6337
  c->dnscrypt_buffer = c->buffer;
6338
#endif
6339
0
  c->repinfo.c = c;
6340
0
  c->callback = callback;
6341
0
  c->cb_arg = callback_arg;
6342
0
  c->pp2_enabled = 0;
6343
0
  c->pp2_header_state = pp2_header_none;
6344
0
  evbits = UB_EV_PERSIST | UB_EV_WRITE;
6345
0
  c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
6346
0
    comm_point_tcp_handle_callback, c);
6347
0
  if(c->ev->ev == NULL)
6348
0
  {
6349
0
    log_err("could not baseset tcpout event");
6350
0
    sldns_buffer_free(c->buffer);
6351
0
    free(c->ev);
6352
0
    free(c);
6353
0
    return NULL;
6354
0
  }
6355
6356
0
  return c;
6357
0
}
6358
6359
struct comm_point*
6360
comm_point_create_http_out(struct comm_base *base, size_t bufsize,
6361
        comm_point_callback_type* callback, void* callback_arg,
6362
  sldns_buffer* temp)
6363
0
{
6364
0
  struct comm_point* c = (struct comm_point*)calloc(1,
6365
0
    sizeof(struct comm_point));
6366
0
  short evbits;
6367
0
  if(!c)
6368
0
    return NULL;
6369
0
  c->ev = (struct internal_event*)calloc(1,
6370
0
    sizeof(struct internal_event));
6371
0
  if(!c->ev) {
6372
0
    free(c);
6373
0
    return NULL;
6374
0
  }
6375
0
  c->ev->base = base;
6376
0
  c->fd = -1;
6377
0
  c->buffer = sldns_buffer_new(bufsize);
6378
0
  if(!c->buffer) {
6379
0
    free(c->ev);
6380
0
    free(c);
6381
0
    return NULL;
6382
0
  }
6383
0
  c->timeout = NULL;
6384
0
  c->tcp_is_reading = 0;
6385
0
  c->tcp_byte_count = 0;
6386
0
  c->tcp_parent = NULL;
6387
0
  c->max_tcp_count = 0;
6388
0
  c->cur_tcp_count = 0;
6389
0
  c->tcp_handlers = NULL;
6390
0
  c->tcp_free = NULL;
6391
0
  c->is_in_tcp_free = 0;
6392
0
  c->type = comm_http;
6393
0
  c->tcp_do_close = 0;
6394
0
  c->do_not_close = 0;
6395
0
  c->tcp_do_toggle_rw = 1;
6396
0
  c->tcp_check_nb_connect = 1;
6397
0
  c->http_in_headers = 1;
6398
0
  c->http_in_chunk_headers = 0;
6399
0
  c->http_is_chunked = 0;
6400
0
  c->http_temp = temp;
6401
#ifdef USE_MSG_FASTOPEN
6402
  c->tcp_do_fastopen = 1;
6403
#endif
6404
#ifdef USE_DNSCRYPT
6405
  c->dnscrypt = 0;
6406
  c->dnscrypt_buffer = c->buffer;
6407
#endif
6408
0
  c->repinfo.c = c;
6409
0
  c->callback = callback;
6410
0
  c->cb_arg = callback_arg;
6411
0
  c->pp2_enabled = 0;
6412
0
  c->pp2_header_state = pp2_header_none;
6413
0
  evbits = UB_EV_PERSIST | UB_EV_WRITE;
6414
0
  c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
6415
0
    comm_point_http_handle_callback, c);
6416
0
  if(c->ev->ev == NULL)
6417
0
  {
6418
0
    log_err("could not baseset tcpout event");
6419
0
#ifdef HAVE_SSL
6420
0
    SSL_free(c->ssl);
6421
0
#endif
6422
0
    sldns_buffer_free(c->buffer);
6423
0
    free(c->ev);
6424
0
    free(c);
6425
0
    return NULL;
6426
0
  }
6427
6428
0
  return c;
6429
0
}
6430
6431
struct comm_point*
6432
comm_point_create_local(struct comm_base *base, int fd, size_t bufsize,
6433
        comm_point_callback_type* callback, void* callback_arg)
6434
0
{
6435
0
  struct comm_point* c = (struct comm_point*)calloc(1,
6436
0
    sizeof(struct comm_point));
6437
0
  short evbits;
6438
0
  if(!c)
6439
0
    return NULL;
6440
0
  c->ev = (struct internal_event*)calloc(1,
6441
0
    sizeof(struct internal_event));
6442
0
  if(!c->ev) {
6443
0
    free(c);
6444
0
    return NULL;
6445
0
  }
6446
0
  c->ev->base = base;
6447
0
  c->fd = fd;
6448
0
  c->buffer = sldns_buffer_new(bufsize);
6449
0
  if(!c->buffer) {
6450
0
    free(c->ev);
6451
0
    free(c);
6452
0
    return NULL;
6453
0
  }
6454
0
  c->timeout = NULL;
6455
0
  c->tcp_is_reading = 1;
6456
0
  c->tcp_byte_count = 0;
6457
0
  c->tcp_parent = NULL;
6458
0
  c->max_tcp_count = 0;
6459
0
  c->cur_tcp_count = 0;
6460
0
  c->tcp_handlers = NULL;
6461
0
  c->tcp_free = NULL;
6462
0
  c->is_in_tcp_free = 0;
6463
0
  c->type = comm_local;
6464
0
  c->tcp_do_close = 0;
6465
0
  c->do_not_close = 1;
6466
0
  c->tcp_do_toggle_rw = 0;
6467
0
  c->tcp_check_nb_connect = 0;
6468
#ifdef USE_MSG_FASTOPEN
6469
  c->tcp_do_fastopen = 0;
6470
#endif
6471
#ifdef USE_DNSCRYPT
6472
  c->dnscrypt = 0;
6473
  c->dnscrypt_buffer = c->buffer;
6474
#endif
6475
0
  c->callback = callback;
6476
0
  c->cb_arg = callback_arg;
6477
0
  c->pp2_enabled = 0;
6478
0
  c->pp2_header_state = pp2_header_none;
6479
  /* ub_event stuff */
6480
0
  evbits = UB_EV_PERSIST | UB_EV_READ;
6481
0
  c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
6482
0
    comm_point_local_handle_callback, c);
6483
0
  if(c->ev->ev == NULL) {
6484
0
    log_err("could not baseset localhdl event");
6485
0
    free(c->ev);
6486
0
    free(c);
6487
0
    return NULL;
6488
0
  }
6489
0
  if (ub_event_add(c->ev->ev, c->timeout) != 0) {
6490
0
    log_err("could not add localhdl event");
6491
0
    ub_event_free(c->ev->ev);
6492
0
    free(c->ev);
6493
0
    free(c);
6494
0
    return NULL;
6495
0
  }
6496
0
  c->event_added = 1;
6497
0
  return c;
6498
0
}
6499
6500
struct comm_point*
6501
comm_point_create_raw(struct comm_base* base, int fd, int writing,
6502
  comm_point_callback_type* callback, void* callback_arg)
6503
0
{
6504
0
  struct comm_point* c = (struct comm_point*)calloc(1,
6505
0
    sizeof(struct comm_point));
6506
0
  short evbits;
6507
0
  if(!c)
6508
0
    return NULL;
6509
0
  c->ev = (struct internal_event*)calloc(1,
6510
0
    sizeof(struct internal_event));
6511
0
  if(!c->ev) {
6512
0
    free(c);
6513
0
    return NULL;
6514
0
  }
6515
0
  c->ev->base = base;
6516
0
  c->fd = fd;
6517
0
  c->buffer = NULL;
6518
0
  c->timeout = NULL;
6519
0
  c->tcp_is_reading = 0;
6520
0
  c->tcp_byte_count = 0;
6521
0
  c->tcp_parent = NULL;
6522
0
  c->max_tcp_count = 0;
6523
0
  c->cur_tcp_count = 0;
6524
0
  c->tcp_handlers = NULL;
6525
0
  c->tcp_free = NULL;
6526
0
  c->is_in_tcp_free = 0;
6527
0
  c->type = comm_raw;
6528
0
  c->tcp_do_close = 0;
6529
0
  c->do_not_close = 1;
6530
0
  c->tcp_do_toggle_rw = 0;
6531
0
  c->tcp_check_nb_connect = 0;
6532
#ifdef USE_MSG_FASTOPEN
6533
  c->tcp_do_fastopen = 0;
6534
#endif
6535
#ifdef USE_DNSCRYPT
6536
  c->dnscrypt = 0;
6537
  c->dnscrypt_buffer = c->buffer;
6538
#endif
6539
0
  c->callback = callback;
6540
0
  c->cb_arg = callback_arg;
6541
0
  c->pp2_enabled = 0;
6542
0
  c->pp2_header_state = pp2_header_none;
6543
  /* ub_event stuff */
6544
0
  if(writing)
6545
0
    evbits = UB_EV_PERSIST | UB_EV_WRITE;
6546
0
  else  evbits = UB_EV_PERSIST | UB_EV_READ;
6547
0
  c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
6548
0
    comm_point_raw_handle_callback, c);
6549
0
  if(c->ev->ev == NULL) {
6550
0
    log_err("could not baseset rawhdl event");
6551
0
    free(c->ev);
6552
0
    free(c);
6553
0
    return NULL;
6554
0
  }
6555
0
  if (ub_event_add(c->ev->ev, c->timeout) != 0) {
6556
0
    log_err("could not add rawhdl event");
6557
0
    ub_event_free(c->ev->ev);
6558
0
    free(c->ev);
6559
0
    free(c);
6560
0
    return NULL;
6561
0
  }
6562
0
  c->event_added = 1;
6563
0
  return c;
6564
0
}
6565
6566
void
6567
comm_point_close(struct comm_point* c)
6568
0
{
6569
0
  if(!c)
6570
0
    return;
6571
0
  if(c->fd != -1) {
6572
0
    verbose(5, "comm_point_close of %d: event_del", c->fd);
6573
0
    if(c->event_added) {
6574
0
      if(ub_event_del(c->ev->ev) != 0) {
6575
0
        log_err("could not event_del on close");
6576
0
      }
6577
0
      c->event_added = 0;
6578
0
    }
6579
0
  }
6580
0
  tcl_close_connection(c->tcl_addr);
6581
0
  if(c->tcp_req_info)
6582
0
    tcp_req_info_clear(c->tcp_req_info);
6583
0
  if(c->h2_session)
6584
0
    http2_session_server_delete(c->h2_session);
6585
  /* stop the comm point from reading or writing after it is closed. */
6586
0
  if(c->tcp_more_read_again && *c->tcp_more_read_again)
6587
0
    *c->tcp_more_read_again = 0;
6588
0
  if(c->tcp_more_write_again && *c->tcp_more_write_again)
6589
0
    *c->tcp_more_write_again = 0;
6590
6591
  /* close fd after removing from event lists, or epoll.. is messed up */
6592
0
  if(c->fd != -1 && !c->do_not_close) {
6593
#ifdef USE_WINSOCK
6594
    if(c->type == comm_tcp || c->type == comm_http) {
6595
      /* delete sticky events for the fd, it gets closed */
6596
      ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
6597
      ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
6598
    }
6599
#endif
6600
0
    verbose(VERB_ALGO, "close fd %d", c->fd);
6601
0
    sock_close(c->fd);
6602
0
  }
6603
0
  c->fd = -1;
6604
0
}
6605
6606
void
6607
comm_point_delete(struct comm_point* c)
6608
0
{
6609
0
  if(!c)
6610
0
    return;
6611
0
  if((c->type == comm_tcp || c->type == comm_http) && c->ssl) {
6612
0
#ifdef HAVE_SSL
6613
0
    SSL_shutdown(c->ssl);
6614
0
    SSL_free(c->ssl);
6615
0
#endif
6616
0
  }
6617
0
  if(c->type == comm_http && c->http_endpoint) {
6618
0
    free(c->http_endpoint);
6619
0
    c->http_endpoint = NULL;
6620
0
  }
6621
0
  comm_point_close(c);
6622
0
  if(c->tcp_handlers) {
6623
0
    int i;
6624
0
    for(i=0; i<c->max_tcp_count; i++)
6625
0
      comm_point_delete(c->tcp_handlers[i]);
6626
0
    free(c->tcp_handlers);
6627
0
  }
6628
0
  free(c->timeout);
6629
0
  if(c->type == comm_tcp || c->type == comm_local || c->type == comm_http) {
6630
0
    sldns_buffer_free(c->buffer);
6631
#ifdef USE_DNSCRYPT
6632
    if(c->dnscrypt && c->dnscrypt_buffer != c->buffer) {
6633
      sldns_buffer_free(c->dnscrypt_buffer);
6634
    }
6635
#endif
6636
0
    if(c->tcp_req_info) {
6637
0
      tcp_req_info_delete(c->tcp_req_info);
6638
0
    }
6639
0
    if(c->h2_session) {
6640
0
      http2_session_delete(c->h2_session);
6641
0
    }
6642
0
  }
6643
#ifdef HAVE_NGTCP2
6644
  if(c->doq_socket)
6645
    doq_server_socket_delete(c->doq_socket);
6646
#endif
6647
0
  ub_event_free(c->ev->ev);
6648
0
  free(c->ev);
6649
0
  free(c);
6650
0
}
6651
6652
#ifdef USE_DNSTAP
6653
static void
6654
send_reply_dnstap(struct dt_env* dtenv,
6655
  struct sockaddr* addr, socklen_t addrlen,
6656
  struct sockaddr_storage* client_addr, socklen_t client_addrlen,
6657
  enum comm_point_type type, void* ssl, sldns_buffer* buffer)
6658
{
6659
  log_addr(VERB_ALGO, "from local addr", (void*)addr, addrlen);
6660
  log_addr(VERB_ALGO, "response to client", client_addr, client_addrlen);
6661
  dt_msg_send_client_response(dtenv, client_addr,
6662
    (struct sockaddr_storage*)addr, type, ssl, buffer);
6663
}
6664
#endif
6665
6666
void
6667
comm_point_send_reply(struct comm_reply *repinfo)
6668
0
{
6669
0
  struct sldns_buffer* buffer;
6670
0
  log_assert(repinfo && repinfo->c);
6671
#ifdef USE_DNSCRYPT
6672
  buffer = repinfo->c->dnscrypt_buffer;
6673
  if(!dnsc_handle_uncurved_request(repinfo)) {
6674
    return;
6675
  }
6676
#else
6677
0
  buffer = repinfo->c->buffer;
6678
0
#endif
6679
0
  if(repinfo->c->type == comm_udp) {
6680
0
    if(repinfo->srctype)
6681
0
      comm_point_send_udp_msg_if(repinfo->c, buffer,
6682
0
        (struct sockaddr*)&repinfo->remote_addr,
6683
0
        repinfo->remote_addrlen, repinfo);
6684
0
    else
6685
0
      comm_point_send_udp_msg(repinfo->c, buffer,
6686
0
        (struct sockaddr*)&repinfo->remote_addr,
6687
0
        repinfo->remote_addrlen, 0);
6688
#ifdef USE_DNSTAP
6689
    /*
6690
     * sending src (client)/dst (local service) addresses over
6691
     * DNSTAP from udp callback
6692
     */
6693
    if(repinfo->c->dtenv != NULL && repinfo->c->dtenv->log_client_response_messages) {
6694
      send_reply_dnstap(repinfo->c->dtenv,
6695
        repinfo->c->socket->addr,
6696
        repinfo->c->socket->addrlen,
6697
        &repinfo->client_addr, repinfo->client_addrlen,
6698
        repinfo->c->type, repinfo->c->ssl,
6699
        repinfo->c->buffer);
6700
    }
6701
#endif
6702
0
  } else {
6703
#ifdef USE_DNSTAP
6704
    struct dt_env* dtenv =
6705
#ifdef HAVE_NGTCP2
6706
      repinfo->c->doq_socket
6707
      ?repinfo->c->dtenv:
6708
#endif
6709
      repinfo->c->tcp_parent->dtenv;
6710
    struct sldns_buffer* dtbuffer = repinfo->c->tcp_req_info
6711
      ?repinfo->c->tcp_req_info->spool_buffer
6712
      :repinfo->c->buffer;
6713
#ifdef USE_DNSCRYPT
6714
    if(repinfo->c->dnscrypt && repinfo->is_dnscrypted)
6715
      dtbuffer = repinfo->c->buffer;
6716
#endif
6717
    /*
6718
     * sending src (client)/dst (local service) addresses over
6719
     * DNSTAP from other callbacks
6720
     */
6721
    if(dtenv != NULL && dtenv->log_client_response_messages) {
6722
      send_reply_dnstap(dtenv,
6723
        repinfo->c->socket->addr,
6724
        repinfo->c->socket->addrlen,
6725
        &repinfo->client_addr, repinfo->client_addrlen,
6726
        repinfo->c->type, repinfo->c->ssl,
6727
        dtbuffer);
6728
    }
6729
#endif
6730
0
    if(repinfo->c->tcp_req_info) {
6731
0
      tcp_req_info_send_reply(repinfo->c->tcp_req_info);
6732
0
    } else if(repinfo->c->use_h2) {
6733
0
      if(!http2_submit_dns_response(repinfo->c->h2_session)) {
6734
0
        return;
6735
0
      }
6736
0
      repinfo->c->h2_stream = NULL;
6737
0
      repinfo->c->tcp_is_reading = 0;
6738
0
      comm_point_stop_listening(repinfo->c);
6739
0
      comm_point_start_listening(repinfo->c, -1,
6740
0
        adjusted_tcp_timeout(repinfo->c));
6741
0
      return;
6742
#ifdef HAVE_NGTCP2
6743
    } else if(repinfo->c->doq_socket) {
6744
      doq_socket_send_reply(repinfo);
6745
#endif
6746
0
    } else {
6747
0
      comm_point_start_listening(repinfo->c, -1,
6748
0
        adjusted_tcp_timeout(repinfo->c));
6749
0
    }
6750
0
  }
6751
0
}
6752
6753
void
6754
comm_point_drop_reply(struct comm_reply* repinfo)
6755
0
{
6756
0
  if(!repinfo)
6757
0
    return;
6758
0
  log_assert(repinfo->c);
6759
0
  log_assert(repinfo->c->type != comm_tcp_accept);
6760
0
  if(repinfo->c->type == comm_udp)
6761
0
    return;
6762
0
  if(repinfo->c->tcp_req_info)
6763
0
    repinfo->c->tcp_req_info->is_drop = 1;
6764
0
  if(repinfo->c->type == comm_http) {
6765
0
    if(repinfo->c->h2_session) {
6766
0
      repinfo->c->h2_session->is_drop = 1;
6767
0
      if(!repinfo->c->h2_session->postpone_drop)
6768
0
        reclaim_http_handler(repinfo->c);
6769
0
      return;
6770
0
    }
6771
0
    reclaim_http_handler(repinfo->c);
6772
0
    return;
6773
#ifdef HAVE_NGTCP2
6774
  } else if(repinfo->c->doq_socket) {
6775
    doq_socket_drop_reply(repinfo);
6776
    return;
6777
#endif
6778
0
  }
6779
0
  reclaim_tcp_handler(repinfo->c);
6780
0
}
6781
6782
void
6783
comm_point_stop_listening(struct comm_point* c)
6784
0
{
6785
0
  verbose(VERB_ALGO, "comm point stop listening %d", c->fd);
6786
0
  if(c->event_added) {
6787
0
    if(ub_event_del(c->ev->ev) != 0) {
6788
0
      log_err("event_del error to stoplisten");
6789
0
    }
6790
0
    c->event_added = 0;
6791
0
  }
6792
0
}
6793
6794
void
6795
comm_point_start_listening(struct comm_point* c, int newfd, int msec)
6796
0
{
6797
0
  verbose(VERB_ALGO, "comm point start listening %d (%d msec)",
6798
0
    c->fd==-1?newfd:c->fd, msec);
6799
0
  if(c->type == comm_tcp_accept && !c->tcp_free) {
6800
    /* no use to start listening no free slots. */
6801
0
    return;
6802
0
  }
6803
0
  if(c->event_added) {
6804
0
    if(ub_event_del(c->ev->ev) != 0) {
6805
0
      log_err("event_del error to startlisten");
6806
0
    }
6807
0
    c->event_added = 0;
6808
0
  }
6809
0
  if(msec != -1 && msec != 0) {
6810
0
    if(!c->timeout) {
6811
0
      c->timeout = (struct timeval*)malloc(sizeof(
6812
0
        struct timeval));
6813
0
      if(!c->timeout) {
6814
0
        log_err("cpsl: malloc failed. No net read.");
6815
0
        return;
6816
0
      }
6817
0
    }
6818
0
    ub_event_add_bits(c->ev->ev, UB_EV_TIMEOUT);
6819
0
#ifndef S_SPLINT_S /* splint fails on struct timeval. */
6820
0
    c->timeout->tv_sec = msec/1000;
6821
0
    c->timeout->tv_usec = (msec%1000)*1000;
6822
0
#endif /* S_SPLINT_S */
6823
0
  } else {
6824
0
    if(msec == 0 || !c->timeout) {
6825
0
      ub_event_del_bits(c->ev->ev, UB_EV_TIMEOUT);
6826
0
    }
6827
0
  }
6828
0
  if(c->type == comm_tcp || c->type == comm_http) {
6829
0
    ub_event_del_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE);
6830
0
    if(c->tcp_write_and_read) {
6831
0
      verbose(5, "startlistening %d mode rw", (newfd==-1?c->fd:newfd));
6832
0
      ub_event_add_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE);
6833
0
    } else if(c->tcp_is_reading) {
6834
0
      verbose(5, "startlistening %d mode r", (newfd==-1?c->fd:newfd));
6835
0
      ub_event_add_bits(c->ev->ev, UB_EV_READ);
6836
0
    } else  {
6837
0
      verbose(5, "startlistening %d mode w", (newfd==-1?c->fd:newfd));
6838
0
      ub_event_add_bits(c->ev->ev, UB_EV_WRITE);
6839
0
    }
6840
0
  }
6841
0
  if(newfd != -1) {
6842
0
    if(c->fd != -1 && c->fd != newfd) {
6843
0
      verbose(5, "cpsl close of fd %d for %d", c->fd, newfd);
6844
0
      sock_close(c->fd);
6845
0
    }
6846
0
    c->fd = newfd;
6847
0
    ub_event_set_fd(c->ev->ev, c->fd);
6848
0
  }
6849
0
  if(ub_event_add(c->ev->ev, msec==0?NULL:c->timeout) != 0) {
6850
0
    log_err("event_add failed. in cpsl.");
6851
0
    return;
6852
0
  }
6853
0
  c->event_added = 1;
6854
0
}
6855
6856
void comm_point_listen_for_rw(struct comm_point* c, int rd, int wr)
6857
0
{
6858
0
  verbose(VERB_ALGO, "comm point listen_for_rw %d %d", c->fd, wr);
6859
0
  if(c->event_added) {
6860
0
    if(ub_event_del(c->ev->ev) != 0) {
6861
0
      log_err("event_del error to cplf");
6862
0
    }
6863
0
    c->event_added = 0;
6864
0
  }
6865
0
  if(!c->timeout) {
6866
0
    ub_event_del_bits(c->ev->ev, UB_EV_TIMEOUT);
6867
0
  }
6868
0
  ub_event_del_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE);
6869
0
  if(rd) ub_event_add_bits(c->ev->ev, UB_EV_READ);
6870
0
  if(wr) ub_event_add_bits(c->ev->ev, UB_EV_WRITE);
6871
0
  if(ub_event_add(c->ev->ev, c->timeout) != 0) {
6872
0
    log_err("event_add failed. in cplf.");
6873
0
    return;
6874
0
  }
6875
0
  c->event_added = 1;
6876
0
}
6877
6878
size_t comm_point_get_mem(struct comm_point* c)
6879
0
{
6880
0
  size_t s;
6881
0
  if(!c)
6882
0
    return 0;
6883
0
  s = sizeof(*c) + sizeof(*c->ev);
6884
0
  if(c->timeout)
6885
0
    s += sizeof(*c->timeout);
6886
0
  if(c->type == comm_tcp || c->type == comm_local) {
6887
0
    s += sizeof(*c->buffer) + sldns_buffer_capacity(c->buffer);
6888
#ifdef USE_DNSCRYPT
6889
    s += sizeof(*c->dnscrypt_buffer);
6890
    if(c->buffer != c->dnscrypt_buffer) {
6891
      s += sldns_buffer_capacity(c->dnscrypt_buffer);
6892
    }
6893
#endif
6894
0
  }
6895
0
  if(c->type == comm_tcp_accept) {
6896
0
    int i;
6897
0
    for(i=0; i<c->max_tcp_count; i++)
6898
0
      s += comm_point_get_mem(c->tcp_handlers[i]);
6899
0
  }
6900
0
  return s;
6901
0
}
6902
6903
struct comm_timer*
6904
comm_timer_create(struct comm_base* base, void (*cb)(void*), void* cb_arg)
6905
0
{
6906
0
  struct internal_timer *tm = (struct internal_timer*)calloc(1,
6907
0
    sizeof(struct internal_timer));
6908
0
  if(!tm) {
6909
0
    log_err("malloc failed");
6910
0
    return NULL;
6911
0
  }
6912
0
  tm->super.ev_timer = tm;
6913
0
  tm->base = base;
6914
0
  tm->super.callback = cb;
6915
0
  tm->super.cb_arg = cb_arg;
6916
0
  tm->ev = ub_event_new(base->eb->base, -1, UB_EV_TIMEOUT,
6917
0
    comm_timer_callback, &tm->super);
6918
0
  if(tm->ev == NULL) {
6919
0
    log_err("timer_create: event_base_set failed.");
6920
0
    free(tm);
6921
0
    return NULL;
6922
0
  }
6923
0
  return &tm->super;
6924
0
}
6925
6926
void
6927
comm_timer_disable(struct comm_timer* timer)
6928
0
{
6929
0
  if(!timer)
6930
0
    return;
6931
0
  ub_timer_del(timer->ev_timer->ev);
6932
0
  timer->ev_timer->enabled = 0;
6933
0
}
6934
6935
void
6936
comm_timer_set(struct comm_timer* timer, struct timeval* tv)
6937
0
{
6938
0
  log_assert(tv);
6939
0
  if(timer->ev_timer->enabled)
6940
0
    comm_timer_disable(timer);
6941
0
  if(ub_timer_add(timer->ev_timer->ev, timer->ev_timer->base->eb->base,
6942
0
    comm_timer_callback, timer, tv) != 0)
6943
0
    log_err("comm_timer_set: evtimer_add failed.");
6944
0
  timer->ev_timer->enabled = 1;
6945
0
}
6946
6947
void
6948
comm_timer_delete(struct comm_timer* timer)
6949
0
{
6950
0
  if(!timer)
6951
0
    return;
6952
0
  comm_timer_disable(timer);
6953
  /* Free the sub struct timer->ev_timer derived from the super struct timer.
6954
   * i.e. assert(timer == timer->ev_timer)
6955
   */
6956
0
  ub_event_free(timer->ev_timer->ev);
6957
0
  free(timer->ev_timer);
6958
0
}
6959
6960
void
6961
comm_timer_callback(int ATTR_UNUSED(fd), short event, void* arg)
6962
0
{
6963
0
  struct comm_timer* tm = (struct comm_timer*)arg;
6964
0
  if(!(event&UB_EV_TIMEOUT))
6965
0
    return;
6966
0
  ub_comm_base_now(tm->ev_timer->base);
6967
0
  tm->ev_timer->enabled = 0;
6968
0
  fptr_ok(fptr_whitelist_comm_timer(tm->callback));
6969
0
  (*tm->callback)(tm->cb_arg);
6970
0
}
6971
6972
int
6973
comm_timer_is_set(struct comm_timer* timer)
6974
0
{
6975
0
  return (int)timer->ev_timer->enabled;
6976
0
}
6977
6978
size_t
6979
comm_timer_get_mem(struct comm_timer* timer)
6980
0
{
6981
0
  if(!timer) return 0;
6982
0
  return sizeof(struct internal_timer);
6983
0
}
6984
6985
struct comm_signal*
6986
comm_signal_create(struct comm_base* base,
6987
        void (*callback)(int, void*), void* cb_arg)
6988
0
{
6989
0
  struct comm_signal* com = (struct comm_signal*)malloc(
6990
0
    sizeof(struct comm_signal));
6991
0
  if(!com) {
6992
0
    log_err("malloc failed");
6993
0
    return NULL;
6994
0
  }
6995
0
  com->base = base;
6996
0
  com->callback = callback;
6997
0
  com->cb_arg = cb_arg;
6998
0
  com->ev_signal = NULL;
6999
0
  return com;
7000
0
}
7001
7002
void
7003
comm_signal_callback(int sig, short event, void* arg)
7004
0
{
7005
0
  struct comm_signal* comsig = (struct comm_signal*)arg;
7006
0
  if(!(event & UB_EV_SIGNAL))
7007
0
    return;
7008
0
  ub_comm_base_now(comsig->base);
7009
0
  fptr_ok(fptr_whitelist_comm_signal(comsig->callback));
7010
0
  (*comsig->callback)(sig, comsig->cb_arg);
7011
0
}
7012
7013
int
7014
comm_signal_bind(struct comm_signal* comsig, int sig)
7015
0
{
7016
0
  struct internal_signal* entry = (struct internal_signal*)calloc(1,
7017
0
    sizeof(struct internal_signal));
7018
0
  if(!entry) {
7019
0
    log_err("malloc failed");
7020
0
    return 0;
7021
0
  }
7022
0
  log_assert(comsig);
7023
  /* add signal event */
7024
0
  entry->ev = ub_signal_new(comsig->base->eb->base, sig,
7025
0
    comm_signal_callback, comsig);
7026
0
  if(entry->ev == NULL) {
7027
0
    log_err("Could not create signal event");
7028
0
    free(entry);
7029
0
    return 0;
7030
0
  }
7031
0
  if(ub_signal_add(entry->ev, NULL) != 0) {
7032
0
    log_err("Could not add signal handler");
7033
0
    ub_event_free(entry->ev);
7034
0
    free(entry);
7035
0
    return 0;
7036
0
  }
7037
  /* link into list */
7038
0
  entry->next = comsig->ev_signal;
7039
0
  comsig->ev_signal = entry;
7040
0
  return 1;
7041
0
}
7042
7043
void
7044
comm_signal_delete(struct comm_signal* comsig)
7045
0
{
7046
0
  struct internal_signal* p, *np;
7047
0
  if(!comsig)
7048
0
    return;
7049
0
  p=comsig->ev_signal;
7050
0
  while(p) {
7051
0
    np = p->next;
7052
0
    ub_signal_del(p->ev);
7053
0
    ub_event_free(p->ev);
7054
0
    free(p);
7055
0
    p = np;
7056
0
  }
7057
0
  free(comsig);
7058
0
}