Coverage Report

Created: 2025-07-11 06:09

/src/unbound/util/netevent.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * util/netevent.c - event notification
3
 *
4
 * Copyright (c) 2007, NLnet Labs. All rights reserved.
5
 *
6
 * This software is open source.
7
 *
8
 * Redistribution and use in source and binary forms, with or without
9
 * modification, are permitted provided that the following conditions
10
 * are met:
11
 *
12
 * Redistributions of source code must retain the above copyright notice,
13
 * this list of conditions and the following disclaimer.
14
 *
15
 * Redistributions in binary form must reproduce the above copyright notice,
16
 * this list of conditions and the following disclaimer in the documentation
17
 * and/or other materials provided with the distribution.
18
 *
19
 * Neither the name of the NLNET LABS nor the names of its contributors may
20
 * be used to endorse or promote products derived from this software without
21
 * specific prior written permission.
22
 *
23
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27
 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29
 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34
 */
35
36
/**
37
 * \file
38
 *
39
 * This file contains event notification functions.
40
 */
41
#include "config.h"
42
#include "util/netevent.h"
43
#include "util/ub_event.h"
44
#include "util/log.h"
45
#include "util/net_help.h"
46
#include "util/tcp_conn_limit.h"
47
#include "util/fptr_wlist.h"
48
#include "util/proxy_protocol.h"
49
#include "util/timeval_func.h"
50
#include "sldns/pkthdr.h"
51
#include "sldns/sbuffer.h"
52
#include "sldns/str2wire.h"
53
#include "dnstap/dnstap.h"
54
#include "dnscrypt/dnscrypt.h"
55
#include "services/listen_dnsport.h"
56
#include "util/random.h"
57
#ifdef HAVE_SYS_TYPES_H
58
#include <sys/types.h>
59
#endif
60
#ifdef HAVE_SYS_SOCKET_H
61
#include <sys/socket.h>
62
#endif
63
#ifdef HAVE_NETDB_H
64
#include <netdb.h>
65
#endif
66
#ifdef HAVE_POLL_H
67
#include <poll.h>
68
#endif
69
70
#ifdef HAVE_OPENSSL_SSL_H
71
#include <openssl/ssl.h>
72
#endif
73
#ifdef HAVE_OPENSSL_ERR_H
74
#include <openssl/err.h>
75
#endif
76
77
#ifdef HAVE_NGTCP2
78
#include <ngtcp2/ngtcp2.h>
79
#include <ngtcp2/ngtcp2_crypto.h>
80
#endif
81
82
#ifdef HAVE_LINUX_NET_TSTAMP_H
83
#include <linux/net_tstamp.h>
84
#endif
85
86
/* -------- Start of local definitions -------- */
87
/** if CMSG_ALIGN is not defined on this platform, a workaround */
88
#ifndef CMSG_ALIGN
89
#  ifdef __CMSG_ALIGN
90
#    define CMSG_ALIGN(n) __CMSG_ALIGN(n)
91
#  elif defined(CMSG_DATA_ALIGN)
92
#    define CMSG_ALIGN _CMSG_DATA_ALIGN
93
#  else
94
#    define CMSG_ALIGN(len) (((len)+sizeof(long)-1) & ~(sizeof(long)-1))
95
#  endif
96
#endif
97
98
/** if CMSG_LEN is not defined on this platform, a workaround */
99
#ifndef CMSG_LEN
100
#  define CMSG_LEN(len) (CMSG_ALIGN(sizeof(struct cmsghdr))+(len))
101
#endif
102
103
/** if CMSG_SPACE is not defined on this platform, a workaround */
104
#ifndef CMSG_SPACE
105
#  ifdef _CMSG_HDR_ALIGN
106
#    define CMSG_SPACE(l) (CMSG_ALIGN(l)+_CMSG_HDR_ALIGN(sizeof(struct cmsghdr)))
107
#  else
108
#    define CMSG_SPACE(l) (CMSG_ALIGN(l)+CMSG_ALIGN(sizeof(struct cmsghdr)))
109
#  endif
110
#endif
111
112
/** The TCP writing query timeout in milliseconds */
113
0
#define TCP_QUERY_TIMEOUT 120000
114
/** The minimum actual TCP timeout to use, regardless of what we advertise,
115
 * in msec */
116
0
#define TCP_QUERY_TIMEOUT_MINIMUM 200
117
118
#ifndef NONBLOCKING_IS_BROKEN
119
/** number of UDP reads to perform per read indication from select */
120
0
#define NUM_UDP_PER_SELECT 100
121
#else
122
#define NUM_UDP_PER_SELECT 1
123
#endif
124
125
/** timeout in millisec to wait for write to unblock, packets dropped after.*/
126
0
#define SEND_BLOCKED_WAIT_TIMEOUT 200
127
/** max number of times to wait for write to unblock, packets dropped after.*/
128
0
#define SEND_BLOCKED_MAX_RETRY 5
129
130
/** Let's make timestamping code cleaner and redefine SO_TIMESTAMP* */
131
#ifndef SO_TIMESTAMP
132
#define SO_TIMESTAMP 29
133
#endif
134
#ifndef SO_TIMESTAMPNS
135
#define SO_TIMESTAMPNS 35
136
#endif
137
#ifndef SO_TIMESTAMPING
138
#define SO_TIMESTAMPING 37
139
#endif
140
/**
141
 * The internal event structure for keeping ub_event info for the event.
142
 * Possibly other structures (list, tree) this is part of.
143
 */
144
struct internal_event {
145
  /** the comm base */
146
  struct comm_base* base;
147
  /** ub_event event type */
148
  struct ub_event* ev;
149
};
150
151
/**
152
 * Internal base structure, so that every thread has its own events.
153
 */
154
struct internal_base {
155
  /** ub_event event_base type. */
156
  struct ub_event_base* base;
157
  /** seconds time pointer points here */
158
  time_t secs;
159
  /** timeval with current time */
160
  struct timeval now;
161
  /** the event used for slow_accept timeouts */
162
  struct ub_event* slow_accept;
163
  /** true if slow_accept is enabled */
164
  int slow_accept_enabled;
165
  /** last log time for slow logging of file descriptor errors */
166
  time_t last_slow_log;
167
  /** last log time for slow logging of write wait failures */
168
  time_t last_writewait_log;
169
};
170
171
/**
172
 * Internal timer structure, to store timer event in.
173
 */
174
struct internal_timer {
175
  /** the super struct from which derived */
176
  struct comm_timer super;
177
  /** the comm base */
178
  struct comm_base* base;
179
  /** ub_event event type */
180
  struct ub_event* ev;
181
  /** is timer enabled */
182
  uint8_t enabled;
183
};
184
185
/**
186
 * Internal signal structure, to store signal event in.
187
 */
188
struct internal_signal {
189
  /** ub_event event type */
190
  struct ub_event* ev;
191
  /** next in signal list */
192
  struct internal_signal* next;
193
};
194
195
/** create a tcp handler with a parent */
196
static struct comm_point* comm_point_create_tcp_handler(
197
  struct comm_base *base, struct comm_point* parent, size_t bufsize,
198
  struct sldns_buffer* spoolbuf, comm_point_callback_type* callback,
199
  void* callback_arg, struct unbound_socket* socket);
200
201
/* -------- End of local definitions -------- */
202
203
struct comm_base*
204
comm_base_create(int sigs)
205
0
{
206
0
  struct comm_base* b = (struct comm_base*)calloc(1,
207
0
    sizeof(struct comm_base));
208
0
  const char *evnm="event", *evsys="", *evmethod="";
209
210
0
  if(!b)
211
0
    return NULL;
212
0
  b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base));
213
0
  if(!b->eb) {
214
0
    free(b);
215
0
    return NULL;
216
0
  }
217
0
  b->eb->base = ub_default_event_base(sigs, &b->eb->secs, &b->eb->now);
218
0
  if(!b->eb->base) {
219
0
    free(b->eb);
220
0
    free(b);
221
0
    return NULL;
222
0
  }
223
0
  ub_comm_base_now(b);
224
0
  ub_get_event_sys(b->eb->base, &evnm, &evsys, &evmethod);
225
0
  verbose(VERB_ALGO, "%s %s uses %s method.", evnm, evsys, evmethod);
226
0
  return b;
227
0
}
228
229
struct comm_base*
230
comm_base_create_event(struct ub_event_base* base)
231
0
{
232
0
  struct comm_base* b = (struct comm_base*)calloc(1,
233
0
    sizeof(struct comm_base));
234
0
  if(!b)
235
0
    return NULL;
236
0
  b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base));
237
0
  if(!b->eb) {
238
0
    free(b);
239
0
    return NULL;
240
0
  }
241
0
  b->eb->base = base;
242
0
  ub_comm_base_now(b);
243
0
  return b;
244
0
}
245
246
void
247
comm_base_delete(struct comm_base* b)
248
0
{
249
0
  if(!b)
250
0
    return;
251
0
  if(b->eb->slow_accept_enabled) {
252
0
    if(ub_event_del(b->eb->slow_accept) != 0) {
253
0
      log_err("could not event_del slow_accept");
254
0
    }
255
0
    ub_event_free(b->eb->slow_accept);
256
0
  }
257
0
  ub_event_base_free(b->eb->base);
258
0
  b->eb->base = NULL;
259
0
  free(b->eb);
260
0
  free(b);
261
0
}
262
263
void
264
comm_base_delete_no_base(struct comm_base* b)
265
0
{
266
0
  if(!b)
267
0
    return;
268
0
  if(b->eb->slow_accept_enabled) {
269
0
    if(ub_event_del(b->eb->slow_accept) != 0) {
270
0
      log_err("could not event_del slow_accept");
271
0
    }
272
0
    ub_event_free(b->eb->slow_accept);
273
0
  }
274
0
  b->eb->base = NULL;
275
0
  free(b->eb);
276
0
  free(b);
277
0
}
278
279
void
280
comm_base_timept(struct comm_base* b, time_t** tt, struct timeval** tv)
281
0
{
282
0
  *tt = &b->eb->secs;
283
0
  *tv = &b->eb->now;
284
0
}
285
286
void
287
comm_base_dispatch(struct comm_base* b)
288
0
{
289
0
  int retval;
290
0
  retval = ub_event_base_dispatch(b->eb->base);
291
0
  if(retval < 0) {
292
0
    fatal_exit("event_dispatch returned error %d, "
293
0
      "errno is %s", retval, strerror(errno));
294
0
  }
295
0
}
296
297
void comm_base_exit(struct comm_base* b)
298
0
{
299
0
  if(ub_event_base_loopexit(b->eb->base) != 0) {
300
0
    log_err("Could not loopexit");
301
0
  }
302
0
}
303
304
void comm_base_set_slow_accept_handlers(struct comm_base* b,
305
  void (*stop_acc)(void*), void (*start_acc)(void*), void* arg)
306
0
{
307
0
  b->stop_accept = stop_acc;
308
0
  b->start_accept = start_acc;
309
0
  b->cb_arg = arg;
310
0
}
311
312
struct ub_event_base* comm_base_internal(struct comm_base* b)
313
0
{
314
0
  return b->eb->base;
315
0
}
316
317
struct ub_event* comm_point_internal(struct comm_point* c)
318
0
{
319
0
  return c->ev->ev;
320
0
}
321
322
/** see if errno for udp has to be logged or not uses globals */
323
static int
324
udp_send_errno_needs_log(struct sockaddr* addr, socklen_t addrlen)
325
0
{
326
  /* do not log transient errors (unless high verbosity) */
327
0
#if defined(ENETUNREACH) || defined(EHOSTDOWN) || defined(EHOSTUNREACH) || defined(ENETDOWN)
328
0
  switch(errno) {
329
0
#  ifdef ENETUNREACH
330
0
    case ENETUNREACH:
331
0
#  endif
332
0
#  ifdef EHOSTDOWN
333
0
    case EHOSTDOWN:
334
0
#  endif
335
0
#  ifdef EHOSTUNREACH
336
0
    case EHOSTUNREACH:
337
0
#  endif
338
0
#  ifdef ENETDOWN
339
0
    case ENETDOWN:
340
0
#  endif
341
0
    case EPERM:
342
0
    case EACCES:
343
0
      if(verbosity < VERB_ALGO)
344
0
        return 0;
345
0
      break;
346
0
    default:
347
0
      break;
348
0
  }
349
0
#endif
350
  /* permission denied is gotten for every send if the
351
   * network is disconnected (on some OS), squelch it */
352
0
  if( ((errno == EPERM)
353
0
#  ifdef EADDRNOTAVAIL
354
    /* 'Cannot assign requested address' also when disconnected */
355
0
    || (errno == EADDRNOTAVAIL)
356
0
#  endif
357
0
    ) && verbosity < VERB_ALGO)
358
0
    return 0;
359
0
#  ifdef EADDRINUSE
360
  /* If SO_REUSEADDR is set, we could try to connect to the same server
361
   * from the same source port twice. */
362
0
  if(errno == EADDRINUSE && verbosity < VERB_DETAIL)
363
0
    return 0;
364
0
#  endif
365
  /* squelch errors where people deploy AAAA ::ffff:bla for
366
   * authority servers, which we try for intranets. */
367
0
  if(errno == EINVAL && addr_is_ip4mapped(
368
0
    (struct sockaddr_storage*)addr, addrlen) &&
369
0
    verbosity < VERB_DETAIL)
370
0
    return 0;
371
  /* SO_BROADCAST sockopt can give access to 255.255.255.255,
372
   * but a dns cache does not need it. */
373
0
  if(errno == EACCES && addr_is_broadcast(
374
0
    (struct sockaddr_storage*)addr, addrlen) &&
375
0
    verbosity < VERB_DETAIL)
376
0
    return 0;
377
0
#  ifdef ENOTCONN
378
  /* For 0.0.0.0, ::0 targets it can return that socket is not connected.
379
   * This can be ignored, and the address skipped. It remains
380
   * possible to send there for completeness in configuration. */
381
0
  if(errno == ENOTCONN && addr_is_any(
382
0
    (struct sockaddr_storage*)addr, addrlen) &&
383
0
    verbosity < VERB_DETAIL)
384
0
    return 0;
385
0
#  endif
386
0
  return 1;
387
0
}
388
389
int tcp_connect_errno_needs_log(struct sockaddr* addr, socklen_t addrlen)
390
0
{
391
0
  return udp_send_errno_needs_log(addr, addrlen);
392
0
}
393
394
/* send a UDP reply */
395
int
396
comm_point_send_udp_msg(struct comm_point *c, sldns_buffer* packet,
397
  struct sockaddr* addr, socklen_t addrlen, int is_connected)
398
0
{
399
0
  ssize_t sent;
400
0
  log_assert(c->fd != -1);
401
#ifdef UNBOUND_DEBUG
402
  if(sldns_buffer_remaining(packet) == 0)
403
    log_err("error: send empty UDP packet");
404
#endif
405
0
  log_assert(addr && addrlen > 0);
406
0
  if(!is_connected) {
407
0
    sent = sendto(c->fd, (void*)sldns_buffer_begin(packet),
408
0
      sldns_buffer_remaining(packet), 0,
409
0
      addr, addrlen);
410
0
  } else {
411
0
    sent = send(c->fd, (void*)sldns_buffer_begin(packet),
412
0
      sldns_buffer_remaining(packet), 0);
413
0
  }
414
0
  if(sent == -1) {
415
    /* try again and block, waiting for IO to complete,
416
     * we want to send the answer, and we will wait for
417
     * the ethernet interface buffer to have space. */
418
0
#ifndef USE_WINSOCK
419
0
    if(errno == EAGAIN || errno == EINTR ||
420
0
#  ifdef EWOULDBLOCK
421
0
      errno == EWOULDBLOCK ||
422
0
#  endif
423
0
      errno == ENOBUFS) {
424
#else
425
    if(WSAGetLastError() == WSAEINPROGRESS ||
426
      WSAGetLastError() == WSAEINTR ||
427
      WSAGetLastError() == WSAENOBUFS ||
428
      WSAGetLastError() == WSAEWOULDBLOCK) {
429
#endif
430
0
      int retries = 0;
431
      /* if we set the fd blocking, other threads suddenly
432
       * have a blocking fd that they operate on */
433
0
      while(sent == -1 && retries < SEND_BLOCKED_MAX_RETRY && (
434
0
#ifndef USE_WINSOCK
435
0
        errno == EAGAIN || errno == EINTR ||
436
0
#  ifdef EWOULDBLOCK
437
0
        errno == EWOULDBLOCK ||
438
0
#  endif
439
0
        errno == ENOBUFS
440
#else
441
        WSAGetLastError() == WSAEINPROGRESS ||
442
        WSAGetLastError() == WSAEINTR ||
443
        WSAGetLastError() == WSAENOBUFS ||
444
        WSAGetLastError() == WSAEWOULDBLOCK
445
#endif
446
0
      )) {
447
0
#if defined(HAVE_POLL) || defined(USE_WINSOCK)
448
0
        int send_nobufs = (
449
0
#ifndef USE_WINSOCK
450
0
          errno == ENOBUFS
451
#else
452
          WSAGetLastError() == WSAENOBUFS
453
#endif
454
0
        );
455
0
        struct pollfd p;
456
0
        int pret;
457
0
        memset(&p, 0, sizeof(p));
458
0
        p.fd = c->fd;
459
0
        p.events = POLLOUT
460
0
#ifndef USE_WINSOCK
461
0
          | POLLERR | POLLHUP
462
0
#endif
463
0
          ;
464
0
#  ifndef USE_WINSOCK
465
0
        pret = poll(&p, 1, SEND_BLOCKED_WAIT_TIMEOUT);
466
#  else
467
        pret = WSAPoll(&p, 1,
468
          SEND_BLOCKED_WAIT_TIMEOUT);
469
#  endif
470
0
        if(pret == 0) {
471
          /* timer expired */
472
0
          struct comm_base* b = c->ev->base;
473
0
          if(b->eb->last_writewait_log+SLOW_LOG_TIME <=
474
0
            b->eb->secs) {
475
0
            b->eb->last_writewait_log = b->eb->secs;
476
0
            verbose(VERB_OPS, "send udp blocked "
477
0
              "for long, dropping packet.");
478
0
          }
479
0
          return 0;
480
0
        } else if(pret < 0 &&
481
0
#ifndef USE_WINSOCK
482
0
          errno != EAGAIN && errno != EINTR &&
483
0
#  ifdef EWOULDBLOCK
484
0
          errno != EWOULDBLOCK &&
485
0
#  endif
486
0
          errno != ENOMEM && errno != ENOBUFS
487
#else
488
          WSAGetLastError() != WSAEINPROGRESS &&
489
          WSAGetLastError() != WSAEINTR &&
490
          WSAGetLastError() != WSAENOBUFS &&
491
          WSAGetLastError() != WSAEWOULDBLOCK
492
#endif
493
0
          ) {
494
0
          log_err("poll udp out failed: %s",
495
0
            sock_strerror(errno));
496
0
          return 0;
497
0
        } else if((pret < 0 &&
498
0
#ifndef USE_WINSOCK
499
0
          ( errno == ENOBUFS  /* Maybe some systems */
500
0
          || errno == ENOMEM  /* Linux */
501
0
          || errno == EAGAIN)  /* Macos, solaris, openbsd */
502
#else
503
          WSAGetLastError() == WSAENOBUFS
504
#endif
505
0
          ) || (send_nobufs && retries > 0)) {
506
          /* ENOBUFS/ENOMEM/EAGAIN, and poll
507
           * returned without
508
           * a timeout. Or the retried send call
509
           * returned ENOBUFS/ENOMEM/EAGAIN.
510
           * It is good to wait a bit for the
511
           * error to clear. */
512
          /* The timeout is 20*(2^(retries+1)),
513
           * it increases exponentially, starting
514
           * at 40 msec. After 5 tries, 1240 msec
515
           * have passed in total, when poll
516
           * returned the error, and 1200 msec
517
           * when send returned the errors. */
518
0
#ifndef USE_WINSOCK
519
0
          pret = poll(NULL, 0, (SEND_BLOCKED_WAIT_TIMEOUT/10)<<(retries+1));
520
#else
521
          Sleep((SEND_BLOCKED_WAIT_TIMEOUT/10)<<(retries+1));
522
          pret = 0;
523
#endif
524
0
          if(pret < 0
525
0
#ifndef USE_WINSOCK
526
0
            && errno != EAGAIN && errno != EINTR &&
527
0
#  ifdef EWOULDBLOCK
528
0
            errno != EWOULDBLOCK &&
529
0
#  endif
530
0
            errno != ENOMEM && errno != ENOBUFS
531
#else
532
            /* Sleep does not error */
533
#endif
534
0
          ) {
535
0
            log_err("poll udp out timer failed: %s",
536
0
              sock_strerror(errno));
537
0
          }
538
0
        }
539
0
#endif /* defined(HAVE_POLL) || defined(USE_WINSOCK) */
540
0
        retries++;
541
0
        if (!is_connected) {
542
0
          sent = sendto(c->fd, (void*)sldns_buffer_begin(packet),
543
0
            sldns_buffer_remaining(packet), 0,
544
0
            addr, addrlen);
545
0
        } else {
546
0
          sent = send(c->fd, (void*)sldns_buffer_begin(packet),
547
0
            sldns_buffer_remaining(packet), 0);
548
0
        }
549
0
      }
550
0
    }
551
0
  }
552
0
  if(sent == -1) {
553
0
    if(!udp_send_errno_needs_log(addr, addrlen))
554
0
      return 0;
555
0
    if (!is_connected) {
556
0
      verbose(VERB_OPS, "sendto failed: %s", sock_strerror(errno));
557
0
    } else {
558
0
      verbose(VERB_OPS, "send failed: %s", sock_strerror(errno));
559
0
    }
560
0
    if(addr)
561
0
      log_addr(VERB_OPS, "remote address is",
562
0
        (struct sockaddr_storage*)addr, addrlen);
563
0
    return 0;
564
0
  } else if((size_t)sent != sldns_buffer_remaining(packet)) {
565
0
    log_err("sent %d in place of %d bytes",
566
0
      (int)sent, (int)sldns_buffer_remaining(packet));
567
0
    return 0;
568
0
  }
569
0
  return 1;
570
0
}
571
572
#if defined(AF_INET6) && defined(IPV6_PKTINFO) && (defined(HAVE_RECVMSG) || defined(HAVE_SENDMSG))
573
/** print debug ancillary info */
574
static void p_ancil(const char* str, struct comm_reply* r)
575
0
{
576
0
  if(r->srctype != 4 && r->srctype != 6) {
577
0
    log_info("%s: unknown srctype %d", str, r->srctype);
578
0
    return;
579
0
  }
580
581
0
  if(r->srctype == 6) {
582
0
#ifdef IPV6_PKTINFO
583
0
    char buf[1024];
584
0
    if(inet_ntop(AF_INET6, &r->pktinfo.v6info.ipi6_addr,
585
0
      buf, (socklen_t)sizeof(buf)) == 0) {
586
0
      (void)strlcpy(buf, "(inet_ntop error)", sizeof(buf));
587
0
    }
588
0
    buf[sizeof(buf)-1]=0;
589
0
    log_info("%s: %s %d", str, buf, r->pktinfo.v6info.ipi6_ifindex);
590
0
#endif
591
0
  } else if(r->srctype == 4) {
592
0
#ifdef IP_PKTINFO
593
0
    char buf1[1024], buf2[1024];
594
0
    if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_addr,
595
0
      buf1, (socklen_t)sizeof(buf1)) == 0) {
596
0
      (void)strlcpy(buf1, "(inet_ntop error)", sizeof(buf1));
597
0
    }
598
0
    buf1[sizeof(buf1)-1]=0;
599
0
#ifdef HAVE_STRUCT_IN_PKTINFO_IPI_SPEC_DST
600
0
    if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_spec_dst,
601
0
      buf2, (socklen_t)sizeof(buf2)) == 0) {
602
0
      (void)strlcpy(buf2, "(inet_ntop error)", sizeof(buf2));
603
0
    }
604
0
    buf2[sizeof(buf2)-1]=0;
605
#else
606
    buf2[0]=0;
607
#endif
608
0
    log_info("%s: %d %s %s", str, r->pktinfo.v4info.ipi_ifindex,
609
0
      buf1, buf2);
610
#elif defined(IP_RECVDSTADDR)
611
    char buf1[1024];
612
    if(inet_ntop(AF_INET, &r->pktinfo.v4addr,
613
      buf1, (socklen_t)sizeof(buf1)) == 0) {
614
      (void)strlcpy(buf1, "(inet_ntop error)", sizeof(buf1));
615
    }
616
    buf1[sizeof(buf1)-1]=0;
617
    log_info("%s: %s", str, buf1);
618
#endif /* IP_PKTINFO or PI_RECVDSTDADDR */
619
0
  }
620
0
}
621
#endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG||HAVE_SENDMSG */
622
623
/** send a UDP reply over specified interface*/
624
static int
625
comm_point_send_udp_msg_if(struct comm_point *c, sldns_buffer* packet,
626
  struct sockaddr* addr, socklen_t addrlen, struct comm_reply* r)
627
0
{
628
0
#if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_SENDMSG)
629
0
  ssize_t sent;
630
0
  struct msghdr msg;
631
0
  struct iovec iov[1];
632
0
  union {
633
0
    struct cmsghdr hdr;
634
0
    char buf[256];
635
0
  } control;
636
0
#ifndef S_SPLINT_S
637
0
  struct cmsghdr *cmsg;
638
0
#endif /* S_SPLINT_S */
639
640
0
  log_assert(c->fd != -1);
641
#ifdef UNBOUND_DEBUG
642
  if(sldns_buffer_remaining(packet) == 0)
643
    log_err("error: send empty UDP packet");
644
#endif
645
0
  log_assert(addr && addrlen > 0);
646
647
0
  msg.msg_name = addr;
648
0
  msg.msg_namelen = addrlen;
649
0
  iov[0].iov_base = sldns_buffer_begin(packet);
650
0
  iov[0].iov_len = sldns_buffer_remaining(packet);
651
0
  msg.msg_iov = iov;
652
0
  msg.msg_iovlen = 1;
653
0
  msg.msg_control = control.buf;
654
0
#ifndef S_SPLINT_S
655
0
  msg.msg_controllen = sizeof(control.buf);
656
0
#endif /* S_SPLINT_S */
657
0
  msg.msg_flags = 0;
658
659
0
#ifndef S_SPLINT_S
660
0
  cmsg = CMSG_FIRSTHDR(&msg);
661
0
  if(r->srctype == 4) {
662
0
#ifdef IP_PKTINFO
663
0
    void* cmsg_data;
664
0
    msg.msg_controllen = CMSG_SPACE(sizeof(struct in_pktinfo));
665
0
    log_assert(msg.msg_controllen <= sizeof(control.buf));
666
0
    cmsg->cmsg_level = IPPROTO_IP;
667
0
    cmsg->cmsg_type = IP_PKTINFO;
668
0
    memmove(CMSG_DATA(cmsg), &r->pktinfo.v4info,
669
0
      sizeof(struct in_pktinfo));
670
    /* unset the ifindex to not bypass the routing tables */
671
0
    cmsg_data = CMSG_DATA(cmsg);
672
0
    ((struct in_pktinfo *) cmsg_data)->ipi_ifindex = 0;
673
0
    cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo));
674
    /* zero the padding bytes inserted by the CMSG_LEN */
675
0
    if(sizeof(struct in_pktinfo) < cmsg->cmsg_len)
676
0
      memset(((uint8_t*)(CMSG_DATA(cmsg))) +
677
0
        sizeof(struct in_pktinfo), 0, cmsg->cmsg_len
678
0
        - sizeof(struct in_pktinfo));
679
#elif defined(IP_SENDSRCADDR)
680
    msg.msg_controllen = CMSG_SPACE(sizeof(struct in_addr));
681
    log_assert(msg.msg_controllen <= sizeof(control.buf));
682
    cmsg->cmsg_level = IPPROTO_IP;
683
    cmsg->cmsg_type = IP_SENDSRCADDR;
684
    memmove(CMSG_DATA(cmsg), &r->pktinfo.v4addr,
685
      sizeof(struct in_addr));
686
    cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_addr));
687
    /* zero the padding bytes inserted by the CMSG_LEN */
688
    if(sizeof(struct in_addr) < cmsg->cmsg_len)
689
      memset(((uint8_t*)(CMSG_DATA(cmsg))) +
690
        sizeof(struct in_addr), 0, cmsg->cmsg_len
691
        - sizeof(struct in_addr));
692
#else
693
    verbose(VERB_ALGO, "no IP_PKTINFO or IP_SENDSRCADDR");
694
    msg.msg_control = NULL;
695
#endif /* IP_PKTINFO or IP_SENDSRCADDR */
696
0
  } else if(r->srctype == 6) {
697
0
    void* cmsg_data;
698
0
    msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
699
0
    log_assert(msg.msg_controllen <= sizeof(control.buf));
700
0
    cmsg->cmsg_level = IPPROTO_IPV6;
701
0
    cmsg->cmsg_type = IPV6_PKTINFO;
702
0
    memmove(CMSG_DATA(cmsg), &r->pktinfo.v6info,
703
0
      sizeof(struct in6_pktinfo));
704
    /* unset the ifindex to not bypass the routing tables */
705
0
    cmsg_data = CMSG_DATA(cmsg);
706
0
    ((struct in6_pktinfo *) cmsg_data)->ipi6_ifindex = 0;
707
0
    cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
708
    /* zero the padding bytes inserted by the CMSG_LEN */
709
0
    if(sizeof(struct in6_pktinfo) < cmsg->cmsg_len)
710
0
      memset(((uint8_t*)(CMSG_DATA(cmsg))) +
711
0
        sizeof(struct in6_pktinfo), 0, cmsg->cmsg_len
712
0
        - sizeof(struct in6_pktinfo));
713
0
  } else {
714
    /* try to pass all 0 to use default route */
715
0
    msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
716
0
    log_assert(msg.msg_controllen <= sizeof(control.buf));
717
0
    cmsg->cmsg_level = IPPROTO_IPV6;
718
0
    cmsg->cmsg_type = IPV6_PKTINFO;
719
0
    memset(CMSG_DATA(cmsg), 0, sizeof(struct in6_pktinfo));
720
0
    cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
721
    /* zero the padding bytes inserted by the CMSG_LEN */
722
0
    if(sizeof(struct in6_pktinfo) < cmsg->cmsg_len)
723
0
      memset(((uint8_t*)(CMSG_DATA(cmsg))) +
724
0
        sizeof(struct in6_pktinfo), 0, cmsg->cmsg_len
725
0
        - sizeof(struct in6_pktinfo));
726
0
  }
727
0
#endif /* S_SPLINT_S */
728
0
  if(verbosity >= VERB_ALGO && r->srctype != 0)
729
0
    p_ancil("send_udp over interface", r);
730
0
  sent = sendmsg(c->fd, &msg, 0);
731
0
  if(sent == -1) {
732
    /* try again and block, waiting for IO to complete,
733
     * we want to send the answer, and we will wait for
734
     * the ethernet interface buffer to have space. */
735
0
#ifndef USE_WINSOCK
736
0
    if(errno == EAGAIN || errno == EINTR ||
737
0
#  ifdef EWOULDBLOCK
738
0
      errno == EWOULDBLOCK ||
739
0
#  endif
740
0
      errno == ENOBUFS) {
741
#else
742
    if(WSAGetLastError() == WSAEINPROGRESS ||
743
      WSAGetLastError() == WSAEINTR ||
744
      WSAGetLastError() == WSAENOBUFS ||
745
      WSAGetLastError() == WSAEWOULDBLOCK) {
746
#endif
747
0
      int retries = 0;
748
0
      while(sent == -1 && retries < SEND_BLOCKED_MAX_RETRY && (
749
0
#ifndef USE_WINSOCK
750
0
        errno == EAGAIN || errno == EINTR ||
751
0
#  ifdef EWOULDBLOCK
752
0
        errno == EWOULDBLOCK ||
753
0
#  endif
754
0
        errno == ENOBUFS
755
#else
756
        WSAGetLastError() == WSAEINPROGRESS ||
757
        WSAGetLastError() == WSAEINTR ||
758
        WSAGetLastError() == WSAENOBUFS ||
759
        WSAGetLastError() == WSAEWOULDBLOCK
760
#endif
761
0
      )) {
762
0
#if defined(HAVE_POLL) || defined(USE_WINSOCK)
763
0
        int send_nobufs = (
764
0
#ifndef USE_WINSOCK
765
0
          errno == ENOBUFS
766
#else
767
          WSAGetLastError() == WSAENOBUFS
768
#endif
769
0
        );
770
0
        struct pollfd p;
771
0
        int pret;
772
0
        memset(&p, 0, sizeof(p));
773
0
        p.fd = c->fd;
774
0
        p.events = POLLOUT
775
0
#ifndef USE_WINSOCK
776
0
          | POLLERR | POLLHUP
777
0
#endif
778
0
          ;
779
0
#  ifndef USE_WINSOCK
780
0
        pret = poll(&p, 1, SEND_BLOCKED_WAIT_TIMEOUT);
781
#  else
782
        pret = WSAPoll(&p, 1,
783
          SEND_BLOCKED_WAIT_TIMEOUT);
784
#  endif
785
0
        if(pret == 0) {
786
          /* timer expired */
787
0
          struct comm_base* b = c->ev->base;
788
0
          if(b->eb->last_writewait_log+SLOW_LOG_TIME <=
789
0
            b->eb->secs) {
790
0
            b->eb->last_writewait_log = b->eb->secs;
791
0
            verbose(VERB_OPS, "send udp blocked "
792
0
              "for long, dropping packet.");
793
0
          }
794
0
          return 0;
795
0
        } else if(pret < 0 &&
796
0
#ifndef USE_WINSOCK
797
0
          errno != EAGAIN && errno != EINTR &&
798
0
#  ifdef EWOULDBLOCK
799
0
          errno != EWOULDBLOCK &&
800
0
#  endif
801
0
          errno != ENOMEM && errno != ENOBUFS
802
#else
803
          WSAGetLastError() != WSAEINPROGRESS &&
804
          WSAGetLastError() != WSAEINTR &&
805
          WSAGetLastError() != WSAENOBUFS &&
806
          WSAGetLastError() != WSAEWOULDBLOCK
807
#endif
808
0
          ) {
809
0
          log_err("poll udp out failed: %s",
810
0
            sock_strerror(errno));
811
0
          return 0;
812
0
        } else if((pret < 0 &&
813
0
#ifndef USE_WINSOCK
814
0
          ( errno == ENOBUFS  /* Maybe some systems */
815
0
          || errno == ENOMEM  /* Linux */
816
0
          || errno == EAGAIN)  /* Macos, solaris, openbsd */
817
#else
818
          WSAGetLastError() == WSAENOBUFS
819
#endif
820
0
          ) || (send_nobufs && retries > 0)) {
821
          /* ENOBUFS/ENOMEM/EAGAIN, and poll
822
           * returned without
823
           * a timeout. Or the retried send call
824
           * returned ENOBUFS/ENOMEM/EAGAIN.
825
           * It is good to wait a bit for the
826
           * error to clear. */
827
          /* The timeout is 20*(2^(retries+1)),
828
           * it increases exponentially, starting
829
           * at 40 msec. After 5 tries, 1240 msec
830
           * have passed in total, when poll
831
           * returned the error, and 1200 msec
832
           * when send returned the errors. */
833
0
#ifndef USE_WINSOCK
834
0
          pret = poll(NULL, 0, (SEND_BLOCKED_WAIT_TIMEOUT/10)<<(retries+1));
835
#else
836
          Sleep((SEND_BLOCKED_WAIT_TIMEOUT/10)<<(retries+1));
837
          pret = 0;
838
#endif
839
0
          if(pret < 0
840
0
#ifndef USE_WINSOCK
841
0
            && errno != EAGAIN && errno != EINTR &&
842
0
#  ifdef EWOULDBLOCK
843
0
            errno != EWOULDBLOCK &&
844
0
#  endif
845
0
            errno != ENOMEM && errno != ENOBUFS
846
#else  /* USE_WINSOCK */
847
            /* Sleep does not error */
848
#endif
849
0
          ) {
850
0
            log_err("poll udp out timer failed: %s",
851
0
              sock_strerror(errno));
852
0
          }
853
0
        }
854
0
#endif /* defined(HAVE_POLL) || defined(USE_WINSOCK) */
855
0
        retries++;
856
0
        sent = sendmsg(c->fd, &msg, 0);
857
0
      }
858
0
    }
859
0
  }
860
0
  if(sent == -1) {
861
0
    if(!udp_send_errno_needs_log(addr, addrlen))
862
0
      return 0;
863
0
    verbose(VERB_OPS, "sendmsg failed: %s", strerror(errno));
864
0
    log_addr(VERB_OPS, "remote address is",
865
0
      (struct sockaddr_storage*)addr, addrlen);
866
#ifdef __NetBSD__
867
    /* netbsd 7 has IP_PKTINFO for recv but not send */
868
    if(errno == EINVAL && r->srctype == 4)
869
      log_err("sendmsg: No support for sendmsg(IP_PKTINFO). "
870
        "Please disable interface-automatic");
871
#endif
872
0
    return 0;
873
0
  } else if((size_t)sent != sldns_buffer_remaining(packet)) {
874
0
    log_err("sent %d in place of %d bytes",
875
0
      (int)sent, (int)sldns_buffer_remaining(packet));
876
0
    return 0;
877
0
  }
878
0
  return 1;
879
#else
880
  (void)c;
881
  (void)packet;
882
  (void)addr;
883
  (void)addrlen;
884
  (void)r;
885
  log_err("sendmsg: IPV6_PKTINFO not supported");
886
  return 0;
887
#endif /* AF_INET6 && IPV6_PKTINFO && HAVE_SENDMSG */
888
0
}
889
890
/** return true is UDP receive error needs to be logged */
891
static int udp_recv_needs_log(int err)
892
0
{
893
0
  switch(err) {
894
0
  case EACCES: /* some hosts send ICMP 'Permission Denied' */
895
0
#ifndef USE_WINSOCK
896
0
  case ECONNREFUSED:
897
0
#  ifdef ENETUNREACH
898
0
  case ENETUNREACH:
899
0
#  endif
900
0
#  ifdef EHOSTDOWN
901
0
  case EHOSTDOWN:
902
0
#  endif
903
0
#  ifdef EHOSTUNREACH
904
0
  case EHOSTUNREACH:
905
0
#  endif
906
0
#  ifdef ENETDOWN
907
0
  case ENETDOWN:
908
0
#  endif
909
#else /* USE_WINSOCK */
910
  case WSAECONNREFUSED:
911
  case WSAENETUNREACH:
912
  case WSAEHOSTDOWN:
913
  case WSAEHOSTUNREACH:
914
  case WSAENETDOWN:
915
#endif
916
0
    if(verbosity >= VERB_ALGO)
917
0
      return 1;
918
0
    return 0;
919
0
  default:
920
0
    break;
921
0
  }
922
0
  return 1;
923
0
}
924
925
/** Parses the PROXYv2 header from buf and updates the comm_reply struct.
926
 *  Returns 1 on success, 0 on failure. */
927
static int consume_pp2_header(struct sldns_buffer* buf, struct comm_reply* rep,
928
0
  int stream) {
929
0
  size_t size;
930
0
  struct pp2_header *header;
931
0
  int err = pp2_read_header(sldns_buffer_begin(buf),
932
0
    sldns_buffer_remaining(buf));
933
0
  if(err) return 0;
934
0
  header = (struct pp2_header*)sldns_buffer_begin(buf);
935
0
  size = PP2_HEADER_SIZE + ntohs(header->len);
936
0
  if((header->ver_cmd & 0xF) == PP2_CMD_LOCAL) {
937
    /* A connection from the proxy itself.
938
     * No need to do anything with addresses. */
939
0
    goto done;
940
0
  }
941
0
  if(header->fam_prot == PP2_UNSPEC_UNSPEC) {
942
    /* Unspecified family and protocol. This could be used for
943
     * health checks by proxies.
944
     * No need to do anything with addresses. */
945
0
    goto done;
946
0
  }
947
  /* Read the proxied address */
948
0
  switch(header->fam_prot) {
949
0
    case PP2_INET_STREAM:
950
0
    case PP2_INET_DGRAM:
951
0
      {
952
0
      struct sockaddr_in* addr =
953
0
        (struct sockaddr_in*)&rep->client_addr;
954
0
      addr->sin_family = AF_INET;
955
0
      addr->sin_addr.s_addr = header->addr.addr4.src_addr;
956
0
      addr->sin_port = header->addr.addr4.src_port;
957
0
      rep->client_addrlen = (socklen_t)sizeof(struct sockaddr_in);
958
0
      }
959
      /* Ignore the destination address; it should be us. */
960
0
      break;
961
0
    case PP2_INET6_STREAM:
962
0
    case PP2_INET6_DGRAM:
963
0
      {
964
0
      struct sockaddr_in6* addr =
965
0
        (struct sockaddr_in6*)&rep->client_addr;
966
0
      memset(addr, 0, sizeof(*addr));
967
0
      addr->sin6_family = AF_INET6;
968
0
      memcpy(&addr->sin6_addr,
969
0
        header->addr.addr6.src_addr, 16);
970
0
      addr->sin6_port = header->addr.addr6.src_port;
971
0
      rep->client_addrlen = (socklen_t)sizeof(struct sockaddr_in6);
972
0
      }
973
      /* Ignore the destination address; it should be us. */
974
0
      break;
975
0
    default:
976
0
      log_err("proxy_protocol: unsupported family and "
977
0
        "protocol 0x%x", (int)header->fam_prot);
978
0
      return 0;
979
0
  }
980
0
  rep->is_proxied = 1;
981
0
done:
982
0
  if(!stream) {
983
    /* We are reading a whole packet;
984
     * Move the rest of the data to overwrite the PROXYv2 header */
985
    /* XXX can we do better to avoid memmove? */
986
0
    memmove(header, ((char*)header)+size,
987
0
      sldns_buffer_limit(buf)-size);
988
0
    sldns_buffer_set_limit(buf, sldns_buffer_limit(buf)-size);
989
0
  }
990
0
  return 1;
991
0
}
992
993
#if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_RECVMSG)
994
void
995
comm_point_udp_ancil_callback(int fd, short event, void* arg)
996
0
{
997
0
  struct comm_reply rep;
998
0
  struct msghdr msg;
999
0
  struct iovec iov[1];
1000
0
  ssize_t rcv;
1001
0
  union {
1002
0
    struct cmsghdr hdr;
1003
0
    char buf[256];
1004
0
  } ancil;
1005
0
  int i;
1006
0
#ifndef S_SPLINT_S
1007
0
  struct cmsghdr* cmsg;
1008
0
#endif /* S_SPLINT_S */
1009
0
#ifdef HAVE_LINUX_NET_TSTAMP_H
1010
0
  struct timespec *ts;
1011
0
#endif /* HAVE_LINUX_NET_TSTAMP_H */
1012
1013
0
  rep.c = (struct comm_point*)arg;
1014
0
  log_assert(rep.c->type == comm_udp);
1015
1016
0
  if(!(event&UB_EV_READ))
1017
0
    return;
1018
0
  log_assert(rep.c && rep.c->buffer && rep.c->fd == fd);
1019
0
  ub_comm_base_now(rep.c->ev->base);
1020
0
  for(i=0; i<NUM_UDP_PER_SELECT; i++) {
1021
0
    sldns_buffer_clear(rep.c->buffer);
1022
0
    timeval_clear(&rep.c->recv_tv);
1023
0
    rep.remote_addrlen = (socklen_t)sizeof(rep.remote_addr);
1024
0
    log_assert(fd != -1);
1025
0
    log_assert(sldns_buffer_remaining(rep.c->buffer) > 0);
1026
0
    msg.msg_name = &rep.remote_addr;
1027
0
    msg.msg_namelen = (socklen_t)sizeof(rep.remote_addr);
1028
0
    iov[0].iov_base = sldns_buffer_begin(rep.c->buffer);
1029
0
    iov[0].iov_len = sldns_buffer_remaining(rep.c->buffer);
1030
0
    msg.msg_iov = iov;
1031
0
    msg.msg_iovlen = 1;
1032
0
    msg.msg_control = ancil.buf;
1033
0
#ifndef S_SPLINT_S
1034
0
    msg.msg_controllen = sizeof(ancil.buf);
1035
0
#endif /* S_SPLINT_S */
1036
0
    msg.msg_flags = 0;
1037
0
    rcv = recvmsg(fd, &msg, MSG_DONTWAIT);
1038
0
    if(rcv == -1) {
1039
0
      if(errno != EAGAIN && errno != EINTR
1040
0
        && udp_recv_needs_log(errno)) {
1041
0
        log_err("recvmsg failed: %s", strerror(errno));
1042
0
      }
1043
0
      return;
1044
0
    }
1045
0
    rep.remote_addrlen = msg.msg_namelen;
1046
0
    sldns_buffer_skip(rep.c->buffer, rcv);
1047
0
    sldns_buffer_flip(rep.c->buffer);
1048
0
    rep.srctype = 0;
1049
0
    rep.is_proxied = 0;
1050
0
#ifndef S_SPLINT_S
1051
0
    for(cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL;
1052
0
      cmsg = CMSG_NXTHDR(&msg, cmsg)) {
1053
0
      if( cmsg->cmsg_level == IPPROTO_IPV6 &&
1054
0
        cmsg->cmsg_type == IPV6_PKTINFO) {
1055
0
        rep.srctype = 6;
1056
0
        memmove(&rep.pktinfo.v6info, CMSG_DATA(cmsg),
1057
0
          sizeof(struct in6_pktinfo));
1058
0
        break;
1059
0
#ifdef IP_PKTINFO
1060
0
      } else if( cmsg->cmsg_level == IPPROTO_IP &&
1061
0
        cmsg->cmsg_type == IP_PKTINFO) {
1062
0
        rep.srctype = 4;
1063
0
        memmove(&rep.pktinfo.v4info, CMSG_DATA(cmsg),
1064
0
          sizeof(struct in_pktinfo));
1065
0
        break;
1066
#elif defined(IP_RECVDSTADDR)
1067
      } else if( cmsg->cmsg_level == IPPROTO_IP &&
1068
        cmsg->cmsg_type == IP_RECVDSTADDR) {
1069
        rep.srctype = 4;
1070
        memmove(&rep.pktinfo.v4addr, CMSG_DATA(cmsg),
1071
          sizeof(struct in_addr));
1072
        break;
1073
#endif /* IP_PKTINFO or IP_RECVDSTADDR */
1074
0
#ifdef HAVE_LINUX_NET_TSTAMP_H
1075
0
      } else if( cmsg->cmsg_level == SOL_SOCKET &&
1076
0
        cmsg->cmsg_type == SO_TIMESTAMPNS) {
1077
0
        ts = (struct timespec *)CMSG_DATA(cmsg);
1078
0
        TIMESPEC_TO_TIMEVAL(&rep.c->recv_tv, ts);
1079
0
      } else if( cmsg->cmsg_level == SOL_SOCKET &&
1080
0
        cmsg->cmsg_type == SO_TIMESTAMPING) {
1081
0
        ts = (struct timespec *)CMSG_DATA(cmsg);
1082
0
        TIMESPEC_TO_TIMEVAL(&rep.c->recv_tv, ts);
1083
0
      } else if( cmsg->cmsg_level == SOL_SOCKET &&
1084
0
        cmsg->cmsg_type == SO_TIMESTAMP) {
1085
0
        memmove(&rep.c->recv_tv, CMSG_DATA(cmsg), sizeof(struct timeval));
1086
#elif defined(SO_TIMESTAMP) && defined(SCM_TIMESTAMP)
1087
      } else if( cmsg->cmsg_level == SOL_SOCKET &&
1088
        cmsg->cmsg_type == SCM_TIMESTAMP) {
1089
        /* FreeBSD and also Linux. */
1090
        memmove(&rep.c->recv_tv, CMSG_DATA(cmsg), sizeof(struct timeval));
1091
#endif /* HAVE_LINUX_NET_TSTAMP_H */
1092
0
      }
1093
0
    }
1094
1095
0
    if(verbosity >= VERB_ALGO && rep.srctype != 0)
1096
0
      p_ancil("receive_udp on interface", &rep);
1097
0
#endif /* S_SPLINT_S */
1098
1099
0
    if(rep.c->pp2_enabled && !consume_pp2_header(rep.c->buffer,
1100
0
      &rep, 0)) {
1101
0
      log_err("proxy_protocol: could not consume PROXYv2 header");
1102
0
      return;
1103
0
    }
1104
0
    if(!rep.is_proxied) {
1105
0
      rep.client_addrlen = rep.remote_addrlen;
1106
0
      memmove(&rep.client_addr, &rep.remote_addr,
1107
0
        rep.remote_addrlen);
1108
0
    }
1109
1110
0
    fptr_ok(fptr_whitelist_comm_point(rep.c->callback));
1111
0
    if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) {
1112
      /* send back immediate reply */
1113
0
      struct sldns_buffer *buffer;
1114
#ifdef USE_DNSCRYPT
1115
      buffer = rep.c->dnscrypt_buffer;
1116
#else
1117
0
      buffer = rep.c->buffer;
1118
0
#endif
1119
0
      (void)comm_point_send_udp_msg_if(rep.c, buffer,
1120
0
        (struct sockaddr*)&rep.remote_addr,
1121
0
        rep.remote_addrlen, &rep);
1122
0
    }
1123
0
    if(!rep.c || rep.c->fd == -1) /* commpoint closed */
1124
0
      break;
1125
0
  }
1126
0
}
1127
#endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG */
1128
1129
void
1130
comm_point_udp_callback(int fd, short event, void* arg)
1131
0
{
1132
0
  struct comm_reply rep;
1133
0
  ssize_t rcv;
1134
0
  int i;
1135
0
  struct sldns_buffer *buffer;
1136
1137
0
  rep.c = (struct comm_point*)arg;
1138
0
  log_assert(rep.c->type == comm_udp);
1139
1140
0
  if(!(event&UB_EV_READ))
1141
0
    return;
1142
0
  log_assert(rep.c && rep.c->buffer && rep.c->fd == fd);
1143
0
  ub_comm_base_now(rep.c->ev->base);
1144
0
  for(i=0; i<NUM_UDP_PER_SELECT; i++) {
1145
0
    sldns_buffer_clear(rep.c->buffer);
1146
0
    rep.remote_addrlen = (socklen_t)sizeof(rep.remote_addr);
1147
0
    log_assert(fd != -1);
1148
0
    log_assert(sldns_buffer_remaining(rep.c->buffer) > 0);
1149
0
    rcv = recvfrom(fd, (void*)sldns_buffer_begin(rep.c->buffer),
1150
0
      sldns_buffer_remaining(rep.c->buffer), MSG_DONTWAIT,
1151
0
      (struct sockaddr*)&rep.remote_addr, &rep.remote_addrlen);
1152
0
    if(rcv == -1) {
1153
0
#ifndef USE_WINSOCK
1154
0
      if(errno != EAGAIN && errno != EINTR
1155
0
        && udp_recv_needs_log(errno))
1156
0
        log_err("recvfrom %d failed: %s",
1157
0
          fd, strerror(errno));
1158
#else
1159
      if(WSAGetLastError() != WSAEINPROGRESS &&
1160
        WSAGetLastError() != WSAECONNRESET &&
1161
        WSAGetLastError()!= WSAEWOULDBLOCK &&
1162
        udp_recv_needs_log(WSAGetLastError()))
1163
        log_err("recvfrom failed: %s",
1164
          wsa_strerror(WSAGetLastError()));
1165
#endif
1166
0
      return;
1167
0
    }
1168
0
    sldns_buffer_skip(rep.c->buffer, rcv);
1169
0
    sldns_buffer_flip(rep.c->buffer);
1170
0
    rep.srctype = 0;
1171
0
    rep.is_proxied = 0;
1172
1173
0
    if(rep.c->pp2_enabled && !consume_pp2_header(rep.c->buffer,
1174
0
      &rep, 0)) {
1175
0
      log_err("proxy_protocol: could not consume PROXYv2 header");
1176
0
      return;
1177
0
    }
1178
0
    if(!rep.is_proxied) {
1179
0
      rep.client_addrlen = rep.remote_addrlen;
1180
0
      memmove(&rep.client_addr, &rep.remote_addr,
1181
0
        rep.remote_addrlen);
1182
0
    }
1183
1184
0
    fptr_ok(fptr_whitelist_comm_point(rep.c->callback));
1185
0
    if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) {
1186
      /* send back immediate reply */
1187
#ifdef USE_DNSCRYPT
1188
      buffer = rep.c->dnscrypt_buffer;
1189
#else
1190
0
      buffer = rep.c->buffer;
1191
0
#endif
1192
0
      (void)comm_point_send_udp_msg(rep.c, buffer,
1193
0
        (struct sockaddr*)&rep.remote_addr,
1194
0
        rep.remote_addrlen, 0);
1195
0
    }
1196
0
    if(!rep.c || rep.c->fd != fd) /* commpoint closed to -1 or reused for
1197
    another UDP port. Note rep.c cannot be reused with TCP fd. */
1198
0
      break;
1199
0
  }
1200
0
}
1201
1202
#ifdef HAVE_NGTCP2
1203
void
1204
doq_pkt_addr_init(struct doq_pkt_addr* paddr)
1205
{
1206
  paddr->addrlen = (socklen_t)sizeof(paddr->addr);
1207
  paddr->localaddrlen = (socklen_t)sizeof(paddr->localaddr);
1208
  paddr->ifindex = 0;
1209
}
1210
1211
/** set the ecn on the transmission */
1212
static void
1213
doq_set_ecn(int fd, int family, uint32_t ecn)
1214
{
1215
  unsigned int val = ecn;
1216
  if(family == AF_INET6) {
1217
    if(setsockopt(fd, IPPROTO_IPV6, IPV6_TCLASS, &val,
1218
      (socklen_t)sizeof(val)) == -1) {
1219
      log_err("setsockopt(.. IPV6_TCLASS ..): %s",
1220
        strerror(errno));
1221
    }
1222
    return;
1223
  }
1224
  if(setsockopt(fd, IPPROTO_IP, IP_TOS, &val,
1225
    (socklen_t)sizeof(val)) == -1) {
1226
    log_err("setsockopt(.. IP_TOS ..): %s",
1227
      strerror(errno));
1228
  }
1229
}
1230
1231
/** set the local address in the control ancillary data */
1232
static void
1233
doq_set_localaddr_cmsg(struct msghdr* msg, size_t control_size,
1234
  struct doq_addr_storage* localaddr, socklen_t localaddrlen,
1235
  int ifindex)
1236
{
1237
#ifndef S_SPLINT_S
1238
  struct cmsghdr* cmsg;
1239
#endif /* S_SPLINT_S */
1240
#ifndef S_SPLINT_S
1241
  cmsg = CMSG_FIRSTHDR(msg);
1242
  if(localaddr->sockaddr.in.sin_family == AF_INET) {
1243
#ifdef IP_PKTINFO
1244
    struct sockaddr_in* sa = (struct sockaddr_in*)localaddr;
1245
    struct in_pktinfo v4info;
1246
    log_assert(localaddrlen >= sizeof(struct sockaddr_in));
1247
    msg->msg_controllen = CMSG_SPACE(sizeof(struct in_pktinfo));
1248
    memset(msg->msg_control, 0, msg->msg_controllen);
1249
    log_assert(msg->msg_controllen <= control_size);
1250
    cmsg->cmsg_level = IPPROTO_IP;
1251
    cmsg->cmsg_type = IP_PKTINFO;
1252
    memset(&v4info, 0, sizeof(v4info));
1253
#  ifdef HAVE_STRUCT_IN_PKTINFO_IPI_SPEC_DST
1254
    memmove(&v4info.ipi_spec_dst, &sa->sin_addr,
1255
      sizeof(struct in_addr));
1256
#  else
1257
    memmove(&v4info.ipi_addr, &sa->sin_addr,
1258
      sizeof(struct in_addr));
1259
#  endif
1260
    v4info.ipi_ifindex = ifindex;
1261
    memmove(CMSG_DATA(cmsg), &v4info, sizeof(struct in_pktinfo));
1262
    cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo));
1263
#elif defined(IP_SENDSRCADDR)
1264
    struct sockaddr_in* sa= (struct sockaddr_in*)localaddr;
1265
    log_assert(localaddrlen >= sizeof(struct sockaddr_in));
1266
    msg->msg_controllen = CMSG_SPACE(sizeof(struct in_addr));
1267
    memset(msg->msg_control, 0, msg->msg_controllen);
1268
    log_assert(msg->msg_controllen <= control_size);
1269
    cmsg->cmsg_level = IPPROTO_IP;
1270
    cmsg->cmsg_type = IP_SENDSRCADDR;
1271
    memmove(CMSG_DATA(cmsg),  &sa->sin_addr,
1272
      sizeof(struct in_addr));
1273
    cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_addr));
1274
#endif
1275
  } else {
1276
    struct sockaddr_in6* sa6 = (struct sockaddr_in6*)localaddr;
1277
    struct in6_pktinfo v6info;
1278
    log_assert(localaddrlen >= sizeof(struct sockaddr_in6));
1279
    msg->msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
1280
    memset(msg->msg_control, 0, msg->msg_controllen);
1281
    log_assert(msg->msg_controllen <= control_size);
1282
    cmsg->cmsg_level = IPPROTO_IPV6;
1283
    cmsg->cmsg_type = IPV6_PKTINFO;
1284
    memset(&v6info, 0, sizeof(v6info));
1285
    memmove(&v6info.ipi6_addr, &sa6->sin6_addr,
1286
      sizeof(struct in6_addr));
1287
    v6info.ipi6_ifindex = ifindex;
1288
    memmove(CMSG_DATA(cmsg), &v6info, sizeof(struct in6_pktinfo));
1289
    cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
1290
  }
1291
#endif /* S_SPLINT_S */
1292
  /* Ignore unused variables, if no assertions are compiled. */
1293
  (void)localaddrlen;
1294
  (void)control_size;
1295
}
1296
1297
/** write address and port into strings */
1298
static int
1299
doq_print_addr_port(struct doq_addr_storage* addr, socklen_t addrlen,
1300
  char* host, size_t hostlen, char* port, size_t portlen)
1301
{
1302
  if(addr->sockaddr.in.sin_family == AF_INET) {
1303
    struct sockaddr_in* sa = (struct sockaddr_in*)addr;
1304
    log_assert(addrlen >= sizeof(*sa));
1305
    if(inet_ntop(sa->sin_family, &sa->sin_addr, host,
1306
      (socklen_t)hostlen) == 0) {
1307
      log_hex("inet_ntop error: address", &sa->sin_addr,
1308
        sizeof(sa->sin_addr));
1309
      return 0;
1310
    }
1311
    snprintf(port, portlen, "%u", (unsigned)ntohs(sa->sin_port));
1312
  } else if(addr->sockaddr.in.sin_family == AF_INET6) {
1313
    struct sockaddr_in6* sa6 = (struct sockaddr_in6*)addr;
1314
    log_assert(addrlen >= sizeof(*sa6));
1315
    if(inet_ntop(sa6->sin6_family, &sa6->sin6_addr, host,
1316
      (socklen_t)hostlen) == 0) {
1317
      log_hex("inet_ntop error: address", &sa6->sin6_addr,
1318
        sizeof(sa6->sin6_addr));
1319
      return 0;
1320
    }
1321
    snprintf(port, portlen, "%u", (unsigned)ntohs(sa6->sin6_port));
1322
  }
1323
  return 1;
1324
}
1325
1326
/** doq store the blocked packet when write has blocked */
1327
static void
1328
doq_store_blocked_pkt(struct comm_point* c, struct doq_pkt_addr* paddr,
1329
  uint32_t ecn)
1330
{
1331
  if(c->doq_socket->have_blocked_pkt)
1332
    return; /* should not happen that we write when there is
1333
    already a blocked write, but if so, drop it. */
1334
  if(sldns_buffer_limit(c->doq_socket->pkt_buf) >
1335
    sldns_buffer_capacity(c->doq_socket->blocked_pkt))
1336
    return; /* impossibly large, drop packet. impossible because
1337
    pkt_buf and blocked_pkt are the same size. */
1338
  c->doq_socket->have_blocked_pkt = 1;
1339
  c->doq_socket->blocked_pkt_pi.ecn = ecn;
1340
  memcpy(c->doq_socket->blocked_paddr, paddr,
1341
    sizeof(*c->doq_socket->blocked_paddr));
1342
  sldns_buffer_clear(c->doq_socket->blocked_pkt);
1343
  sldns_buffer_write(c->doq_socket->blocked_pkt,
1344
    sldns_buffer_begin(c->doq_socket->pkt_buf),
1345
    sldns_buffer_limit(c->doq_socket->pkt_buf));
1346
  sldns_buffer_flip(c->doq_socket->blocked_pkt);
1347
}
1348
1349
void
1350
doq_send_pkt(struct comm_point* c, struct doq_pkt_addr* paddr, uint32_t ecn)
1351
{
1352
  struct msghdr msg;
1353
  struct iovec iov[1];
1354
  union {
1355
    struct cmsghdr hdr;
1356
    char buf[256];
1357
  } control;
1358
  ssize_t ret;
1359
  iov[0].iov_base = sldns_buffer_begin(c->doq_socket->pkt_buf);
1360
  iov[0].iov_len = sldns_buffer_limit(c->doq_socket->pkt_buf);
1361
  memset(&msg, 0, sizeof(msg));
1362
  msg.msg_name = (void*)&paddr->addr;
1363
  msg.msg_namelen = paddr->addrlen;
1364
  msg.msg_iov = iov;
1365
  msg.msg_iovlen = 1;
1366
  msg.msg_control = control.buf;
1367
#ifndef S_SPLINT_S
1368
  msg.msg_controllen = sizeof(control.buf);
1369
#endif /* S_SPLINT_S */
1370
  msg.msg_flags = 0;
1371
1372
  doq_set_localaddr_cmsg(&msg, sizeof(control.buf), &paddr->localaddr,
1373
    paddr->localaddrlen, paddr->ifindex);
1374
  doq_set_ecn(c->fd, paddr->addr.sockaddr.in.sin_family, ecn);
1375
1376
  for(;;) {
1377
    ret = sendmsg(c->fd, &msg, MSG_DONTWAIT);
1378
    if(ret == -1 && errno == EINTR)
1379
      continue;
1380
    break;
1381
  }
1382
  if(ret == -1) {
1383
#ifndef USE_WINSOCK
1384
    if(errno == EAGAIN ||
1385
#  ifdef EWOULDBLOCK
1386
      errno == EWOULDBLOCK ||
1387
#  endif
1388
      errno == ENOBUFS)
1389
#else
1390
    if(WSAGetLastError() == WSAEINPROGRESS ||
1391
      WSAGetLastError() == WSAENOBUFS ||
1392
      WSAGetLastError() == WSAEWOULDBLOCK)
1393
#endif
1394
    {
1395
      /* udp send has blocked */
1396
      doq_store_blocked_pkt(c, paddr, ecn);
1397
      return;
1398
    }
1399
    if(!udp_send_errno_needs_log((void*)&paddr->addr,
1400
      paddr->addrlen))
1401
      return;
1402
    if(verbosity >= VERB_OPS) {
1403
      char host[256], port[32];
1404
      if(doq_print_addr_port(&paddr->addr, paddr->addrlen,
1405
        host, sizeof(host), port, sizeof(port))) {
1406
        verbose(VERB_OPS, "doq sendmsg to %s %s "
1407
          "failed: %s", host, port,
1408
          strerror(errno));
1409
      } else {
1410
        verbose(VERB_OPS, "doq sendmsg failed: %s",
1411
          strerror(errno));
1412
      }
1413
    }
1414
    return;
1415
  } else if(ret != (ssize_t)sldns_buffer_limit(c->doq_socket->pkt_buf)) {
1416
    char host[256], port[32];
1417
    if(doq_print_addr_port(&paddr->addr, paddr->addrlen, host,
1418
      sizeof(host), port, sizeof(port))) {
1419
      log_err("doq sendmsg to %s %s failed: "
1420
        "sent %d in place of %d bytes", 
1421
        host, port, (int)ret,
1422
        (int)sldns_buffer_limit(c->doq_socket->pkt_buf));
1423
    } else {
1424
      log_err("doq sendmsg failed: "
1425
        "sent %d in place of %d bytes", 
1426
        (int)ret, (int)sldns_buffer_limit(c->doq_socket->pkt_buf));
1427
    }
1428
    return;
1429
  }
1430
}
1431
1432
/** fetch port number */
1433
static int
1434
doq_sockaddr_get_port(struct doq_addr_storage* addr)
1435
{
1436
  if(addr->sockaddr.in.sin_family == AF_INET) {
1437
    struct sockaddr_in* sa = (struct sockaddr_in*)addr;
1438
    return ntohs(sa->sin_port);
1439
  } else if(addr->sockaddr.in.sin_family == AF_INET6) {
1440
    struct sockaddr_in6* sa6 = (struct sockaddr_in6*)addr;
1441
    return ntohs(sa6->sin6_port);
1442
  }
1443
  return 0;
1444
}
1445
1446
/** get local address from ancillary data headers */
1447
static int
1448
doq_get_localaddr_cmsg(struct comm_point* c, struct doq_pkt_addr* paddr,
1449
  int* pkt_continue, struct msghdr* msg)
1450
{
1451
#ifndef S_SPLINT_S
1452
  struct cmsghdr* cmsg;
1453
#endif /* S_SPLINT_S */
1454
1455
  memset(&paddr->localaddr, 0, sizeof(paddr->localaddr));
1456
#ifndef S_SPLINT_S
1457
  for(cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL;
1458
    cmsg = CMSG_NXTHDR(msg, cmsg)) {
1459
    if( cmsg->cmsg_level == IPPROTO_IPV6 &&
1460
      cmsg->cmsg_type == IPV6_PKTINFO) {
1461
      struct in6_pktinfo* v6info =
1462
        (struct in6_pktinfo*)CMSG_DATA(cmsg);
1463
      struct sockaddr_in6* sa= (struct sockaddr_in6*)
1464
        &paddr->localaddr;
1465
      struct sockaddr_in6* rema = (struct sockaddr_in6*)
1466
        &paddr->addr;
1467
      if(rema->sin6_family != AF_INET6) {
1468
        log_err("doq cmsg family mismatch cmsg is ip6");
1469
        *pkt_continue = 1;
1470
        return 0;
1471
      }
1472
      sa->sin6_family = AF_INET6;
1473
      sa->sin6_port = htons(doq_sockaddr_get_port(
1474
        (void*)c->socket->addr));
1475
      paddr->ifindex = v6info->ipi6_ifindex;
1476
      memmove(&sa->sin6_addr, &v6info->ipi6_addr,
1477
        sizeof(struct in6_addr));
1478
      paddr->localaddrlen = sizeof(struct sockaddr_in6);
1479
      break;
1480
#ifdef IP_PKTINFO
1481
    } else if( cmsg->cmsg_level == IPPROTO_IP &&
1482
      cmsg->cmsg_type == IP_PKTINFO) {
1483
      struct in_pktinfo* v4info =
1484
        (struct in_pktinfo*)CMSG_DATA(cmsg);
1485
      struct sockaddr_in* sa= (struct sockaddr_in*)
1486
        &paddr->localaddr;
1487
      struct sockaddr_in* rema = (struct sockaddr_in*)
1488
        &paddr->addr;
1489
      if(rema->sin_family != AF_INET) {
1490
        log_err("doq cmsg family mismatch cmsg is ip4");
1491
        *pkt_continue = 1;
1492
        return 0;
1493
      }
1494
      sa->sin_family = AF_INET;
1495
      sa->sin_port = htons(doq_sockaddr_get_port(
1496
        (void*)c->socket->addr));
1497
      paddr->ifindex = v4info->ipi_ifindex;
1498
      memmove(&sa->sin_addr, &v4info->ipi_addr,
1499
        sizeof(struct in_addr));
1500
      paddr->localaddrlen = sizeof(struct sockaddr_in);
1501
      break;
1502
#elif defined(IP_RECVDSTADDR)
1503
    } else if( cmsg->cmsg_level == IPPROTO_IP &&
1504
      cmsg->cmsg_type == IP_RECVDSTADDR) {
1505
      struct sockaddr_in* sa= (struct sockaddr_in*)
1506
        &paddr->localaddr;
1507
      struct sockaddr_in* rema = (struct sockaddr_in*)
1508
        &paddr->addr;
1509
      if(rema->sin_family != AF_INET) {
1510
        log_err("doq cmsg family mismatch cmsg is ip4");
1511
        *pkt_continue = 1;
1512
        return 0;
1513
      }
1514
      sa->sin_family = AF_INET;
1515
      sa->sin_port = htons(doq_sockaddr_get_port(
1516
        (void*)c->socket->addr));
1517
      paddr->ifindex = 0;
1518
      memmove(&sa.sin_addr, CMSG_DATA(cmsg),
1519
        sizeof(struct in_addr));
1520
      paddr->localaddrlen = sizeof(struct sockaddr_in);
1521
      break;
1522
#endif /* IP_PKTINFO or IP_RECVDSTADDR */
1523
    }
1524
  }
1525
#endif /* S_SPLINT_S */
1526
1527
return 1;
1528
}
1529
1530
/** get packet ecn information */
1531
static uint32_t
1532
msghdr_get_ecn(struct msghdr* msg, int family)
1533
{
1534
#ifndef S_SPLINT_S
1535
  struct cmsghdr* cmsg;
1536
  if(family == AF_INET6) {
1537
    for(cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL;
1538
      cmsg = CMSG_NXTHDR(msg, cmsg)) {
1539
      if(cmsg->cmsg_level == IPPROTO_IPV6 &&
1540
        cmsg->cmsg_type == IPV6_TCLASS &&
1541
        cmsg->cmsg_len != 0) {
1542
        uint8_t* ecn = (uint8_t*)CMSG_DATA(cmsg);
1543
        return *ecn;
1544
      }
1545
    }
1546
    return 0;
1547
  }
1548
  for(cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL;
1549
    cmsg = CMSG_NXTHDR(msg, cmsg)) {
1550
    if(cmsg->cmsg_level == IPPROTO_IP &&
1551
      cmsg->cmsg_type == IP_TOS &&
1552
      cmsg->cmsg_len != 0) {
1553
      uint8_t* ecn = (uint8_t*)CMSG_DATA(cmsg);
1554
      return *ecn;
1555
    }
1556
  }
1557
#endif /* S_SPLINT_S */
1558
  return 0;
1559
}
1560
1561
/** receive packet for DoQ on UDP. get ancillary data for addresses,
1562
 * return false if failed and the callback can stop receiving UDP packets
1563
 * if pkt_continue is false. */
1564
static int
1565
doq_recv(struct comm_point* c, struct doq_pkt_addr* paddr, int* pkt_continue,
1566
  struct ngtcp2_pkt_info* pi)
1567
{
1568
  struct msghdr msg;
1569
  struct iovec iov[1];
1570
  ssize_t rcv;
1571
  union {
1572
    struct cmsghdr hdr;
1573
    char buf[256];
1574
  } ancil;
1575
1576
  msg.msg_name = &paddr->addr;
1577
  msg.msg_namelen = (socklen_t)sizeof(paddr->addr);
1578
  iov[0].iov_base = sldns_buffer_begin(c->doq_socket->pkt_buf);
1579
  iov[0].iov_len = sldns_buffer_remaining(c->doq_socket->pkt_buf);
1580
  msg.msg_iov = iov;
1581
  msg.msg_iovlen = 1;
1582
  msg.msg_control = ancil.buf;
1583
#ifndef S_SPLINT_S
1584
  msg.msg_controllen = sizeof(ancil.buf);
1585
#endif /* S_SPLINT_S */
1586
  msg.msg_flags = 0;
1587
1588
  rcv = recvmsg(c->fd, &msg, MSG_DONTWAIT);
1589
  if(rcv == -1) {
1590
    if(errno != EAGAIN && errno != EINTR
1591
      && udp_recv_needs_log(errno)) {
1592
      log_err("recvmsg failed for doq: %s", strerror(errno));
1593
    }
1594
    *pkt_continue = 0;
1595
    return 0;
1596
  }
1597
1598
  paddr->addrlen = msg.msg_namelen;
1599
  sldns_buffer_skip(c->doq_socket->pkt_buf, rcv);
1600
  sldns_buffer_flip(c->doq_socket->pkt_buf);
1601
  if(!doq_get_localaddr_cmsg(c, paddr, pkt_continue, &msg))
1602
    return 0;
1603
  pi->ecn = msghdr_get_ecn(&msg, paddr->addr.sockaddr.in.sin_family);
1604
  return 1;
1605
}
1606
1607
/** send the version negotiation for doq. scid and dcid are flipped around
1608
 * to send back to the client. */
1609
static void
1610
doq_send_version_negotiation(struct comm_point* c, struct doq_pkt_addr* paddr,
1611
  const uint8_t* dcid, size_t dcidlen, const uint8_t* scid,
1612
  size_t scidlen)
1613
{
1614
  uint32_t versions[2];
1615
  size_t versions_len = 0;
1616
  ngtcp2_ssize ret;
1617
  uint8_t unused_random;
1618
1619
  /* fill the array with supported versions */
1620
  versions[0] = NGTCP2_PROTO_VER_V1;
1621
  versions_len = 1;
1622
  unused_random = ub_random_max(c->doq_socket->rnd, 256);
1623
  sldns_buffer_clear(c->doq_socket->pkt_buf);
1624
  ret = ngtcp2_pkt_write_version_negotiation(
1625
    sldns_buffer_begin(c->doq_socket->pkt_buf),
1626
    sldns_buffer_capacity(c->doq_socket->pkt_buf), unused_random,
1627
    dcid, dcidlen, scid, scidlen, versions, versions_len);
1628
  if(ret < 0) {
1629
    log_err("ngtcp2_pkt_write_version_negotiation failed: %s",
1630
      ngtcp2_strerror(ret));
1631
    return;
1632
  }
1633
  sldns_buffer_set_position(c->doq_socket->pkt_buf, ret);
1634
  sldns_buffer_flip(c->doq_socket->pkt_buf);
1635
  doq_send_pkt(c, paddr, 0);
1636
}
1637
1638
/** Find the doq_conn object by remote address and dcid */
1639
static struct doq_conn*
1640
doq_conn_find(struct doq_table* table, struct doq_addr_storage* addr,
1641
  socklen_t addrlen, struct doq_addr_storage* localaddr,
1642
  socklen_t localaddrlen, int ifindex, const uint8_t* dcid,
1643
  size_t dcidlen)
1644
{
1645
  struct rbnode_type* node;
1646
  struct doq_conn key;
1647
  memset(&key.node, 0, sizeof(key.node));
1648
  key.node.key = &key;
1649
  memmove(&key.key.paddr.addr, addr, addrlen);
1650
  key.key.paddr.addrlen = addrlen;
1651
  memmove(&key.key.paddr.localaddr, localaddr, localaddrlen);
1652
  key.key.paddr.localaddrlen = localaddrlen;
1653
  key.key.paddr.ifindex = ifindex;
1654
  key.key.dcid = (void*)dcid;
1655
  key.key.dcidlen = dcidlen;
1656
  node = rbtree_search(table->conn_tree, &key);
1657
  if(node)
1658
    return (struct doq_conn*)node->key;
1659
  return NULL;
1660
}
1661
1662
/** find the doq_con by the connection id */
1663
static struct doq_conn*
1664
doq_conn_find_by_id(struct doq_table* table, const uint8_t* dcid,
1665
  size_t dcidlen)
1666
{
1667
  struct doq_conid* conid;
1668
  lock_rw_rdlock(&table->conid_lock);
1669
  conid = doq_conid_find(table, dcid, dcidlen);
1670
  if(conid) {
1671
    /* make a copy of the key */
1672
    struct doq_conn* conn;
1673
    struct doq_conn_key key = conid->key;
1674
    uint8_t cid[NGTCP2_MAX_CIDLEN];
1675
    log_assert(conid->key.dcidlen <= NGTCP2_MAX_CIDLEN);
1676
    memcpy(cid, conid->key.dcid, conid->key.dcidlen);
1677
    key.dcid = cid;
1678
    lock_rw_unlock(&table->conid_lock);
1679
1680
    /* now that the conid lock is released, look up the conn */
1681
    lock_rw_rdlock(&table->lock);
1682
    conn = doq_conn_find(table, &key.paddr.addr,
1683
      key.paddr.addrlen, &key.paddr.localaddr,
1684
      key.paddr.localaddrlen, key.paddr.ifindex, key.dcid,
1685
      key.dcidlen);
1686
    if(!conn) {
1687
      /* The connection got deleted between the conid lookup
1688
       * and the connection lock grab, it no longer exists,
1689
       * so return null. */
1690
      lock_rw_unlock(&table->lock);
1691
      return NULL;
1692
    }
1693
    lock_basic_lock(&conn->lock);
1694
    if(conn->is_deleted) {
1695
      lock_rw_unlock(&table->lock);
1696
      lock_basic_unlock(&conn->lock);
1697
      return NULL;
1698
    }
1699
    lock_rw_unlock(&table->lock);
1700
    return conn;
1701
  }
1702
  lock_rw_unlock(&table->conid_lock);
1703
  return NULL;
1704
}
1705
1706
/** Find the doq_conn, by addr or by connection id */
1707
static struct doq_conn*
1708
doq_conn_find_by_addr_or_cid(struct doq_table* table,
1709
  struct doq_pkt_addr* paddr, const uint8_t* dcid, size_t dcidlen)
1710
{
1711
  struct doq_conn* conn;
1712
  lock_rw_rdlock(&table->lock);
1713
  conn = doq_conn_find(table, &paddr->addr, paddr->addrlen,
1714
    &paddr->localaddr, paddr->localaddrlen, paddr->ifindex,
1715
    dcid, dcidlen);
1716
  if(conn && conn->is_deleted) {
1717
    conn = NULL;
1718
  }
1719
  if(conn) {
1720
    lock_basic_lock(&conn->lock);
1721
    lock_rw_unlock(&table->lock);
1722
    verbose(VERB_ALGO, "doq: found connection by address, dcid");
1723
  } else {
1724
    lock_rw_unlock(&table->lock);
1725
    conn = doq_conn_find_by_id(table, dcid, dcidlen);
1726
    if(conn) {
1727
      verbose(VERB_ALGO, "doq: found connection by dcid");
1728
    }
1729
  }
1730
  return conn;
1731
}
1732
1733
/** decode doq packet header, false on handled or failure, true to continue
1734
 * to process the packet */
1735
static int
1736
doq_decode_pkt_header_negotiate(struct comm_point* c,
1737
  struct doq_pkt_addr* paddr, struct doq_conn** conn)
1738
{
1739
#ifdef HAVE_STRUCT_NGTCP2_VERSION_CID
1740
  struct ngtcp2_version_cid vc;
1741
#else
1742
  uint32_t version;
1743
  const uint8_t *dcid, *scid;
1744
  size_t dcidlen, scidlen;
1745
#endif
1746
  int rv;
1747
1748
#ifdef HAVE_STRUCT_NGTCP2_VERSION_CID
1749
  rv = ngtcp2_pkt_decode_version_cid(&vc,
1750
    sldns_buffer_begin(c->doq_socket->pkt_buf),
1751
    sldns_buffer_limit(c->doq_socket->pkt_buf),
1752
    c->doq_socket->sv_scidlen);
1753
#else
1754
  rv = ngtcp2_pkt_decode_version_cid(&version, &dcid, &dcidlen,
1755
    &scid, &scidlen, sldns_buffer_begin(c->doq_socket->pkt_buf),
1756
    sldns_buffer_limit(c->doq_socket->pkt_buf), c->doq_socket->sv_scidlen);
1757
#endif
1758
  if(rv != 0) {
1759
    if(rv == NGTCP2_ERR_VERSION_NEGOTIATION) {
1760
      /* send the version negotiation */
1761
      doq_send_version_negotiation(c, paddr,
1762
#ifdef HAVE_STRUCT_NGTCP2_VERSION_CID
1763
      vc.scid, vc.scidlen, vc.dcid, vc.dcidlen
1764
#else
1765
      scid, scidlen, dcid, dcidlen
1766
#endif
1767
      );
1768
      return 0;
1769
    }
1770
    verbose(VERB_ALGO, "doq: could not decode version "
1771
      "and CID from QUIC packet header: %s",
1772
      ngtcp2_strerror(rv));
1773
    return 0;
1774
  }
1775
1776
  if(verbosity >= VERB_ALGO) {
1777
    verbose(VERB_ALGO, "ngtcp2_pkt_decode_version_cid packet has "
1778
      "QUIC protocol version %u", (unsigned)
1779
#ifdef HAVE_STRUCT_NGTCP2_VERSION_CID
1780
      vc.
1781
#endif
1782
      version
1783
      );
1784
    log_hex("dcid",
1785
#ifdef HAVE_STRUCT_NGTCP2_VERSION_CID
1786
      (void*)vc.dcid, vc.dcidlen
1787
#else
1788
      (void*)dcid, dcidlen
1789
#endif
1790
      );
1791
    log_hex("scid",
1792
#ifdef HAVE_STRUCT_NGTCP2_VERSION_CID
1793
      (void*)vc.scid, vc.scidlen
1794
#else
1795
      (void*)scid, scidlen
1796
#endif
1797
      );
1798
  }
1799
  *conn = doq_conn_find_by_addr_or_cid(c->doq_socket->table, paddr,
1800
#ifdef HAVE_STRUCT_NGTCP2_VERSION_CID
1801
    vc.dcid, vc.dcidlen
1802
#else
1803
    dcid, dcidlen
1804
#endif
1805
    );
1806
  if(*conn)
1807
    (*conn)->doq_socket = c->doq_socket;
1808
  return 1;
1809
}
1810
1811
/** fill cid structure with random data */
1812
static void doq_cid_randfill(struct ngtcp2_cid* cid, size_t datalen,
1813
  struct ub_randstate* rnd)
1814
{
1815
  uint8_t buf[32];
1816
  if(datalen > sizeof(buf))
1817
    datalen = sizeof(buf);
1818
  doq_fill_rand(rnd, buf, datalen);
1819
  ngtcp2_cid_init(cid, buf, datalen);
1820
}
1821
1822
/** send retry packet for doq connection. */
1823
static void
1824
doq_send_retry(struct comm_point* c, struct doq_pkt_addr* paddr,
1825
  struct ngtcp2_pkt_hd* hd)
1826
{
1827
  char host[256], port[32];
1828
  struct ngtcp2_cid scid;
1829
  uint8_t token[NGTCP2_CRYPTO_MAX_RETRY_TOKENLEN];
1830
  ngtcp2_tstamp ts;
1831
  ngtcp2_ssize tokenlen, ret;
1832
1833
  if(!doq_print_addr_port(&paddr->addr, paddr->addrlen, host,
1834
    sizeof(host), port, sizeof(port))) {
1835
    log_err("doq_send_retry failed");
1836
    return;
1837
  }
1838
  verbose(VERB_ALGO, "doq: sending retry packet to %s %s", host, port);
1839
1840
  /* the server chosen source connection ID */
1841
  scid.datalen = c->doq_socket->sv_scidlen;
1842
  doq_cid_randfill(&scid, scid.datalen, c->doq_socket->rnd);
1843
1844
  ts = doq_get_timestamp_nanosec();
1845
1846
  tokenlen = ngtcp2_crypto_generate_retry_token(token,
1847
    c->doq_socket->static_secret, c->doq_socket->static_secret_len,
1848
    hd->version, (void*)&paddr->addr, paddr->addrlen, &scid,
1849
    &hd->dcid, ts);
1850
  if(tokenlen < 0) {
1851
    log_err("ngtcp2_crypto_generate_retry_token failed: %s",
1852
      ngtcp2_strerror(tokenlen));
1853
    return;
1854
  }
1855
1856
  sldns_buffer_clear(c->doq_socket->pkt_buf);
1857
  ret = ngtcp2_crypto_write_retry(sldns_buffer_begin(c->doq_socket->pkt_buf),
1858
    sldns_buffer_capacity(c->doq_socket->pkt_buf), hd->version,
1859
    &hd->scid, &scid, &hd->dcid, token, tokenlen);
1860
  if(ret < 0) {
1861
    log_err("ngtcp2_crypto_write_retry failed: %s",
1862
      ngtcp2_strerror(ret));
1863
    return;
1864
  }
1865
  sldns_buffer_set_position(c->doq_socket->pkt_buf, ret);
1866
  sldns_buffer_flip(c->doq_socket->pkt_buf);
1867
  doq_send_pkt(c, paddr, 0);
1868
}
1869
1870
/** doq send stateless connection close */
1871
static void
1872
doq_send_stateless_connection_close(struct comm_point* c,
1873
  struct doq_pkt_addr* paddr, struct ngtcp2_pkt_hd* hd,
1874
  uint64_t error_code)
1875
{
1876
  ngtcp2_ssize ret;
1877
  sldns_buffer_clear(c->doq_socket->pkt_buf);
1878
  ret = ngtcp2_crypto_write_connection_close(
1879
    sldns_buffer_begin(c->doq_socket->pkt_buf),
1880
    sldns_buffer_capacity(c->doq_socket->pkt_buf), hd->version, &hd->scid,
1881
    &hd->dcid, error_code, NULL, 0);
1882
  if(ret < 0) {
1883
    log_err("ngtcp2_crypto_write_connection_close failed: %s",
1884
      ngtcp2_strerror(ret));
1885
    return;
1886
  }
1887
  sldns_buffer_set_position(c->doq_socket->pkt_buf, ret);
1888
  sldns_buffer_flip(c->doq_socket->pkt_buf);
1889
  doq_send_pkt(c, paddr, 0);
1890
}
1891
1892
/** doq verify retry token, false on failure */
1893
static int
1894
doq_verify_retry_token(struct comm_point* c, struct doq_pkt_addr* paddr,
1895
  struct ngtcp2_cid* ocid, struct ngtcp2_pkt_hd* hd)
1896
{
1897
  char host[256], port[32];
1898
  ngtcp2_tstamp ts;
1899
  if(!doq_print_addr_port(&paddr->addr, paddr->addrlen, host,
1900
    sizeof(host), port, sizeof(port))) {
1901
    log_err("doq_verify_retry_token failed");
1902
    return 0;
1903
  }
1904
  ts = doq_get_timestamp_nanosec();
1905
  verbose(VERB_ALGO, "doq: verifying retry token from %s %s", host,
1906
    port);
1907
  if(ngtcp2_crypto_verify_retry_token(ocid,
1908
#ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN
1909
    hd->token, hd->tokenlen,
1910
#else
1911
    hd->token.base, hd->token.len,
1912
#endif
1913
    c->doq_socket->static_secret,
1914
    c->doq_socket->static_secret_len, hd->version,
1915
    (void*)&paddr->addr, paddr->addrlen, &hd->dcid,
1916
    10*NGTCP2_SECONDS, ts) != 0) {
1917
    verbose(VERB_ALGO, "doq: could not verify retry token "
1918
      "from %s %s", host, port);
1919
    return 0;
1920
  }
1921
  verbose(VERB_ALGO, "doq: verified retry token from %s %s", host, port);
1922
  return 1;
1923
}
1924
1925
/** doq verify token, false on failure */
1926
static int
1927
doq_verify_token(struct comm_point* c, struct doq_pkt_addr* paddr,
1928
  struct ngtcp2_pkt_hd* hd)
1929
{
1930
  char host[256], port[32];
1931
  ngtcp2_tstamp ts;
1932
  if(!doq_print_addr_port(&paddr->addr, paddr->addrlen, host,
1933
    sizeof(host), port, sizeof(port))) {
1934
    log_err("doq_verify_token failed");
1935
    return 0;
1936
  }
1937
  ts = doq_get_timestamp_nanosec();
1938
  verbose(VERB_ALGO, "doq: verifying token from %s %s", host, port);
1939
  if(ngtcp2_crypto_verify_regular_token(
1940
#ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN
1941
    hd->token, hd->tokenlen,
1942
#else
1943
    hd->token.base, hd->token.len,
1944
#endif
1945
    c->doq_socket->static_secret, c->doq_socket->static_secret_len,
1946
    (void*)&paddr->addr, paddr->addrlen, 3600*NGTCP2_SECONDS,
1947
    ts) != 0) {
1948
    verbose(VERB_ALGO, "doq: could not verify token from %s %s",
1949
      host, port);
1950
    return 0;
1951
  }
1952
  verbose(VERB_ALGO, "doq: verified token from %s %s", host, port);
1953
  return 1;
1954
}
1955
1956
/** delete and remove from the lookup tree the doq_conn connection */
1957
static void
1958
doq_delete_connection(struct comm_point* c, struct doq_conn* conn)
1959
{
1960
  struct doq_conn copy;
1961
  uint8_t cid[NGTCP2_MAX_CIDLEN];
1962
  rbnode_type* node;
1963
  if(!conn)
1964
    return;
1965
  /* Copy the key and set it deleted. */
1966
  conn->is_deleted = 1;
1967
  doq_conn_write_disable(conn);
1968
  copy.key = conn->key;
1969
  log_assert(conn->key.dcidlen <= NGTCP2_MAX_CIDLEN);
1970
  memcpy(cid, conn->key.dcid, conn->key.dcidlen);
1971
  copy.key.dcid = cid;
1972
  copy.node.key = &copy;
1973
  lock_basic_unlock(&conn->lock);
1974
1975
  /* Now get the table lock to delete it from the tree */
1976
  lock_rw_wrlock(&c->doq_socket->table->lock);
1977
  node = rbtree_delete(c->doq_socket->table->conn_tree, copy.node.key);
1978
  if(node) {
1979
    conn = (struct doq_conn*)node->key;
1980
    lock_basic_lock(&conn->lock);
1981
    doq_conn_write_list_remove(c->doq_socket->table, conn);
1982
    if(conn->timer.timer_in_list) {
1983
      /* Remove timer from list first, because finding the
1984
       * rbnode element of the setlist of same timeouts
1985
       * needs tree lookup. Edit the tree structure after
1986
       * that lookup. */
1987
      doq_timer_list_remove(c->doq_socket->table,
1988
        &conn->timer);
1989
    }
1990
    if(conn->timer.timer_in_tree)
1991
      doq_timer_tree_remove(c->doq_socket->table,
1992
        &conn->timer);
1993
  }
1994
  lock_rw_unlock(&c->doq_socket->table->lock);
1995
  if(node) {
1996
    lock_basic_unlock(&conn->lock);
1997
    doq_table_quic_size_subtract(c->doq_socket->table,
1998
      sizeof(*conn)+conn->key.dcidlen);
1999
    doq_conn_delete(conn, c->doq_socket->table);
2000
  }
2001
}
2002
2003
/** create and setup a new doq connection, to a new destination, or with
2004
 * a new dcid. It has a new set of streams. It is inserted in the lookup tree.
2005
 * Returns NULL on failure. */
2006
static struct doq_conn*
2007
doq_setup_new_conn(struct comm_point* c, struct doq_pkt_addr* paddr,
2008
  struct ngtcp2_pkt_hd* hd, struct ngtcp2_cid* ocid)
2009
{
2010
  struct doq_conn* conn;
2011
  if(!doq_table_quic_size_available(c->doq_socket->table,
2012
    c->doq_socket->cfg, sizeof(*conn)+hd->dcid.datalen
2013
    + sizeof(struct doq_stream)
2014
    + 100 /* estimated input query */
2015
    + 1200 /* estimated output query */)) {
2016
    verbose(VERB_ALGO, "doq: no mem available for new connection");
2017
    doq_send_stateless_connection_close(c, paddr, hd,
2018
      NGTCP2_CONNECTION_REFUSED);
2019
    return NULL;
2020
  }
2021
  conn = doq_conn_create(c, paddr, hd->dcid.data, hd->dcid.datalen,
2022
    hd->version);
2023
  if(!conn) {
2024
    log_err("doq: could not allocate doq_conn");
2025
    return NULL;
2026
  }
2027
  lock_rw_wrlock(&c->doq_socket->table->lock);
2028
  lock_basic_lock(&conn->lock);
2029
  if(!rbtree_insert(c->doq_socket->table->conn_tree, &conn->node)) {
2030
    lock_rw_unlock(&c->doq_socket->table->lock);
2031
    log_err("doq: duplicate connection");
2032
    /* conn has no entry in writelist, and no timer yet. */
2033
    lock_basic_unlock(&conn->lock);
2034
    doq_conn_delete(conn, c->doq_socket->table);
2035
    return NULL;
2036
  }
2037
  lock_rw_unlock(&c->doq_socket->table->lock);
2038
  doq_table_quic_size_add(c->doq_socket->table,
2039
    sizeof(*conn)+conn->key.dcidlen);
2040
  verbose(VERB_ALGO, "doq: created new connection");
2041
2042
  /* the scid and dcid switch meaning from the accepted client
2043
   * connection to the server connection. The 'source' and 'destination'
2044
   * meaning is reversed. */
2045
  if(!doq_conn_setup(conn, hd->scid.data, hd->scid.datalen,
2046
    (ocid?ocid->data:NULL), (ocid?ocid->datalen:0),
2047
#ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN
2048
    hd->token, hd->tokenlen
2049
#else
2050
    hd->token.base, hd->token.len
2051
#endif
2052
    )) {
2053
    log_err("doq: could not set up connection");
2054
    doq_delete_connection(c, conn);
2055
    return NULL;
2056
  }
2057
  return conn;
2058
}
2059
2060
/** perform doq address validation */
2061
static int
2062
doq_address_validation(struct comm_point* c, struct doq_pkt_addr* paddr,
2063
  struct ngtcp2_pkt_hd* hd, struct ngtcp2_cid* ocid,
2064
  struct ngtcp2_cid** pocid)
2065
{
2066
#ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN
2067
  const uint8_t* token = hd->token;
2068
  size_t tokenlen = hd->tokenlen;
2069
#else
2070
  const uint8_t* token = hd->token.base;
2071
  size_t tokenlen = hd->token.len;
2072
#endif
2073
  verbose(VERB_ALGO, "doq stateless address validation");
2074
2075
  if(tokenlen == 0 || token == NULL) {
2076
    doq_send_retry(c, paddr, hd);
2077
    return 0;
2078
  }
2079
  if(token[0] != NGTCP2_CRYPTO_TOKEN_MAGIC_RETRY &&
2080
    hd->dcid.datalen < NGTCP2_MIN_INITIAL_DCIDLEN) {
2081
    doq_send_stateless_connection_close(c, paddr, hd,
2082
      NGTCP2_INVALID_TOKEN);
2083
    return 0;
2084
  }
2085
  if(token[0] == NGTCP2_CRYPTO_TOKEN_MAGIC_RETRY) {
2086
    if(!doq_verify_retry_token(c, paddr, ocid, hd)) {
2087
      doq_send_stateless_connection_close(c, paddr, hd,
2088
        NGTCP2_INVALID_TOKEN);
2089
      return 0;
2090
    }
2091
    *pocid = ocid;
2092
  } else if(token[0] == NGTCP2_CRYPTO_TOKEN_MAGIC_REGULAR) {
2093
    if(!doq_verify_token(c, paddr, hd)) {
2094
      doq_send_retry(c, paddr, hd);
2095
      return 0;
2096
    }
2097
#ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN
2098
    hd->token = NULL;
2099
    hd->tokenlen = 0;
2100
#else
2101
    hd->token.base = NULL;
2102
    hd->token.len = 0;
2103
#endif
2104
  } else {
2105
    verbose(VERB_ALGO, "doq address validation: unrecognised "
2106
      "token in hd.token.base with magic byte 0x%2.2x",
2107
      (int)token[0]);
2108
    if(c->doq_socket->validate_addr) {
2109
      doq_send_retry(c, paddr, hd);
2110
      return 0;
2111
    }
2112
#ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN
2113
    hd->token = NULL;
2114
    hd->tokenlen = 0;
2115
#else
2116
    hd->token.base = NULL;
2117
    hd->token.len = 0;
2118
#endif
2119
  }
2120
  return 1;
2121
}
2122
2123
/** the doq accept, returns false if no further processing of content */
2124
static int
2125
doq_accept(struct comm_point* c, struct doq_pkt_addr* paddr,
2126
  struct doq_conn** conn, struct ngtcp2_pkt_info* pi)
2127
{
2128
  int rv;
2129
  struct ngtcp2_pkt_hd hd;
2130
  struct ngtcp2_cid ocid, *pocid=NULL;
2131
  int err_retry;
2132
  memset(&hd, 0, sizeof(hd));
2133
  rv = ngtcp2_accept(&hd, sldns_buffer_begin(c->doq_socket->pkt_buf),
2134
    sldns_buffer_limit(c->doq_socket->pkt_buf));
2135
  if(rv != 0) {
2136
    if(rv == NGTCP2_ERR_RETRY) {
2137
      doq_send_retry(c, paddr, &hd);
2138
      return 0;
2139
    }
2140
    log_err("doq: initial packet failed, ngtcp2_accept failed: %s",
2141
      ngtcp2_strerror(rv));
2142
    return 0;
2143
  }
2144
  if(c->doq_socket->validate_addr ||
2145
#ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN
2146
    hd.tokenlen
2147
#else
2148
    hd.token.len
2149
#endif
2150
    ) {
2151
    if(!doq_address_validation(c, paddr, &hd, &ocid, &pocid))
2152
      return 0;
2153
  }
2154
  *conn = doq_setup_new_conn(c, paddr, &hd, pocid);
2155
  if(!*conn)
2156
    return 0;
2157
  (*conn)->doq_socket = c->doq_socket;
2158
  if(!doq_conn_recv(c, paddr, *conn, pi, &err_retry, NULL)) {
2159
    if(err_retry)
2160
      doq_send_retry(c, paddr, &hd);
2161
    doq_delete_connection(c, *conn);
2162
    *conn = NULL;
2163
    return 0;
2164
  }
2165
  return 1;
2166
}
2167
2168
/** doq pickup a timer to wait for for the worker. If any timer exists. */
2169
static void
2170
doq_pickup_timer(struct comm_point* c)
2171
{
2172
  struct doq_timer* t;
2173
  struct timeval tv;
2174
  int have_time = 0;
2175
  memset(&tv, 0, sizeof(tv));
2176
2177
  lock_rw_wrlock(&c->doq_socket->table->lock);
2178
  RBTREE_FOR(t, struct doq_timer*, c->doq_socket->table->timer_tree) {
2179
    if(t->worker_doq_socket == NULL ||
2180
      t->worker_doq_socket == c->doq_socket) {
2181
      /* pick up this element */
2182
      t->worker_doq_socket = c->doq_socket;
2183
      have_time = 1;
2184
      memcpy(&tv, &t->time, sizeof(tv));
2185
      break;
2186
    }
2187
  }
2188
  lock_rw_unlock(&c->doq_socket->table->lock);
2189
2190
  if(have_time) {
2191
    struct timeval rel;
2192
    timeval_subtract(&rel, &tv, c->doq_socket->now_tv);
2193
    comm_timer_set(c->doq_socket->timer, &rel);
2194
    memcpy(&c->doq_socket->marked_time, &tv,
2195
      sizeof(c->doq_socket->marked_time));
2196
    verbose(VERB_ALGO, "doq pickup timer at %d.%6.6d in %d.%6.6d",
2197
      (int)tv.tv_sec, (int)tv.tv_usec, (int)rel.tv_sec,
2198
      (int)rel.tv_usec);
2199
  } else {
2200
    if(comm_timer_is_set(c->doq_socket->timer))
2201
      comm_timer_disable(c->doq_socket->timer);
2202
    memset(&c->doq_socket->marked_time, 0,
2203
      sizeof(c->doq_socket->marked_time));
2204
    verbose(VERB_ALGO, "doq timer disabled");
2205
  }
2206
}
2207
2208
/** doq done with connection, release locks and setup timer and write */
2209
static void
2210
doq_done_setup_timer_and_write(struct comm_point* c, struct doq_conn* conn)
2211
{
2212
  struct doq_conn copy;
2213
  uint8_t cid[NGTCP2_MAX_CIDLEN];
2214
  rbnode_type* node;
2215
  struct timeval new_tv;
2216
  int write_change = 0, timer_change = 0;
2217
2218
  /* No longer in callbacks, so the pointer to doq_socket is back
2219
   * to NULL. */
2220
  conn->doq_socket = NULL;
2221
2222
  if(doq_conn_check_timer(conn, &new_tv))
2223
    timer_change = 1;
2224
  if( (conn->write_interest && !conn->on_write_list) ||
2225
    (!conn->write_interest && conn->on_write_list))
2226
    write_change = 1;
2227
2228
  if(!timer_change && !write_change) {
2229
    /* Nothing to do. */
2230
    lock_basic_unlock(&conn->lock);
2231
    return;
2232
  }
2233
2234
  /* The table lock is needed to change the write list and timer tree.
2235
   * So the connection lock is release and then the connection is
2236
   * looked up again. */
2237
  copy.key = conn->key;
2238
  log_assert(conn->key.dcidlen <= NGTCP2_MAX_CIDLEN);
2239
  memcpy(cid, conn->key.dcid, conn->key.dcidlen);
2240
  copy.key.dcid = cid;
2241
  copy.node.key = &copy;
2242
  lock_basic_unlock(&conn->lock);
2243
2244
  lock_rw_wrlock(&c->doq_socket->table->lock);
2245
  node = rbtree_search(c->doq_socket->table->conn_tree, copy.node.key);
2246
  if(!node) {
2247
    lock_rw_unlock(&c->doq_socket->table->lock);
2248
    /* Must have been deleted in the mean time. */
2249
    return;
2250
  }
2251
  conn = (struct doq_conn*)node->key;
2252
  lock_basic_lock(&conn->lock);
2253
  if(conn->is_deleted) {
2254
    /* It is deleted now. */
2255
    lock_rw_unlock(&c->doq_socket->table->lock);
2256
    lock_basic_unlock(&conn->lock);
2257
    return;
2258
  }
2259
2260
  if(write_change) {
2261
    /* Edit the write lists, we are holding the table.lock and can
2262
     * edit the list first,last and also prev,next and on_list
2263
     * elements in the doq_conn structures. */
2264
    doq_conn_set_write_list(c->doq_socket->table, conn);
2265
  }
2266
  if(timer_change) {
2267
    doq_timer_set(c->doq_socket->table, &conn->timer,
2268
      c->doq_socket, &new_tv);
2269
  }
2270
  lock_rw_unlock(&c->doq_socket->table->lock);
2271
  lock_basic_unlock(&conn->lock);
2272
}
2273
2274
/** doq done with connection callbacks, release locks and setup write */
2275
static void
2276
doq_done_with_conn_cb(struct comm_point* c, struct doq_conn* conn)
2277
{
2278
  struct doq_conn copy;
2279
  uint8_t cid[NGTCP2_MAX_CIDLEN];
2280
  rbnode_type* node;
2281
2282
  /* no longer in callbacks, so the pointer to doq_socket is back
2283
   * to NULL. */
2284
  conn->doq_socket = NULL;
2285
2286
  if( (conn->write_interest && conn->on_write_list) ||
2287
    (!conn->write_interest && !conn->on_write_list)) {
2288
    /* The connection already has the required write list
2289
     * status. */
2290
    lock_basic_unlock(&conn->lock);
2291
    return;
2292
  }
2293
2294
  /* To edit the write list of connections we have to hold the table
2295
   * lock, so we release the connection and then look it up again. */
2296
  copy.key = conn->key;
2297
  log_assert(conn->key.dcidlen <= NGTCP2_MAX_CIDLEN);
2298
  memcpy(cid, conn->key.dcid, conn->key.dcidlen);
2299
  copy.key.dcid = cid;
2300
  copy.node.key = &copy;
2301
  lock_basic_unlock(&conn->lock);
2302
2303
  lock_rw_wrlock(&c->doq_socket->table->lock);
2304
  node = rbtree_search(c->doq_socket->table->conn_tree, copy.node.key);
2305
  if(!node) {
2306
    lock_rw_unlock(&c->doq_socket->table->lock);
2307
    /* must have been deleted in the mean time */
2308
    return;
2309
  }
2310
  conn = (struct doq_conn*)node->key;
2311
  lock_basic_lock(&conn->lock);
2312
  if(conn->is_deleted) {
2313
    /* it is deleted now. */
2314
    lock_rw_unlock(&c->doq_socket->table->lock);
2315
    lock_basic_unlock(&conn->lock);
2316
    return;
2317
  }
2318
2319
  /* edit the write lists, we are holding the table.lock and can
2320
   * edit the list first,last and also prev,next and on_list elements
2321
   * in the doq_conn structures. */
2322
  doq_conn_set_write_list(c->doq_socket->table, conn);
2323
  lock_rw_unlock(&c->doq_socket->table->lock);
2324
  lock_basic_unlock(&conn->lock);
2325
}
2326
2327
/** doq count the length of the write list */
2328
static size_t
2329
doq_write_list_length(struct comm_point* c)
2330
{
2331
  size_t count = 0;
2332
  struct doq_conn* conn;
2333
  lock_rw_rdlock(&c->doq_socket->table->lock);
2334
  conn = c->doq_socket->table->write_list_first;
2335
  while(conn) {
2336
    count++;
2337
    conn = conn->write_next;
2338
  }
2339
  lock_rw_unlock(&c->doq_socket->table->lock);
2340
  return count;
2341
}
2342
2343
/** doq pop the first element from the write list to have write events */
2344
static struct doq_conn*
2345
doq_pop_write_conn(struct comm_point* c)
2346
{
2347
  struct doq_conn* conn;
2348
  lock_rw_wrlock(&c->doq_socket->table->lock);
2349
  conn = doq_table_pop_first(c->doq_socket->table);
2350
  while(conn && conn->is_deleted) {
2351
    lock_basic_unlock(&conn->lock);
2352
    conn = doq_table_pop_first(c->doq_socket->table);
2353
  }
2354
  lock_rw_unlock(&c->doq_socket->table->lock);
2355
  if(conn)
2356
    conn->doq_socket = c->doq_socket;
2357
  return conn;
2358
}
2359
2360
/** doq the connection is done with write callbacks, release it. */
2361
static void
2362
doq_done_with_write_cb(struct comm_point* c, struct doq_conn* conn,
2363
  int delete_it)
2364
{
2365
  if(delete_it) {
2366
    doq_delete_connection(c, conn);
2367
    return;
2368
  }
2369
  doq_done_setup_timer_and_write(c, conn);
2370
}
2371
2372
/** see if the doq socket wants to write packets */
2373
static int
2374
doq_socket_want_write(struct comm_point* c)
2375
{
2376
  int want_write = 0;
2377
  if(c->doq_socket->have_blocked_pkt)
2378
    return 1;
2379
  lock_rw_rdlock(&c->doq_socket->table->lock);
2380
  if(c->doq_socket->table->write_list_first)
2381
    want_write = 1;
2382
  lock_rw_unlock(&c->doq_socket->table->lock);
2383
  return want_write;
2384
}
2385
2386
/** enable write event for the doq server socket fd */
2387
static void
2388
doq_socket_write_enable(struct comm_point* c)
2389
{
2390
  verbose(VERB_ALGO, "doq socket want write");
2391
  if(c->doq_socket->event_has_write)
2392
    return;
2393
  comm_point_listen_for_rw(c, 1, 1);
2394
  c->doq_socket->event_has_write = 1;
2395
}
2396
2397
/** disable write event for the doq server socket fd */
2398
static void
2399
doq_socket_write_disable(struct comm_point* c)
2400
{
2401
  verbose(VERB_ALGO, "doq socket want no write");
2402
  if(!c->doq_socket->event_has_write)
2403
    return;
2404
  comm_point_listen_for_rw(c, 1, 0);
2405
  c->doq_socket->event_has_write = 0;
2406
}
2407
2408
/** write blocked packet, if possible. returns false if failed, again. */
2409
static int
2410
doq_write_blocked_pkt(struct comm_point* c)
2411
{
2412
  struct doq_pkt_addr paddr;
2413
  if(!c->doq_socket->have_blocked_pkt)
2414
    return 1;
2415
  c->doq_socket->have_blocked_pkt = 0;
2416
  if(sldns_buffer_limit(c->doq_socket->blocked_pkt) >
2417
    sldns_buffer_remaining(c->doq_socket->pkt_buf))
2418
    return 1; /* impossibly large, drop it.
2419
    impossible since pkt_buf is same size as blocked_pkt buf. */
2420
  sldns_buffer_clear(c->doq_socket->pkt_buf);
2421
  sldns_buffer_write(c->doq_socket->pkt_buf,
2422
    sldns_buffer_begin(c->doq_socket->blocked_pkt),
2423
    sldns_buffer_limit(c->doq_socket->blocked_pkt));
2424
  sldns_buffer_flip(c->doq_socket->pkt_buf);
2425
  memcpy(&paddr, c->doq_socket->blocked_paddr, sizeof(paddr));
2426
  doq_send_pkt(c, &paddr, c->doq_socket->blocked_pkt_pi.ecn);
2427
  if(c->doq_socket->have_blocked_pkt)
2428
    return 0;
2429
  return 1;
2430
}
2431
2432
/** doq find a timer that timeouted and return the conn, locked. */
2433
static struct doq_conn*
2434
doq_timer_timeout_conn(struct doq_server_socket* doq_socket)
2435
{
2436
  struct doq_conn* conn = NULL;
2437
  struct rbnode_type* node;
2438
  lock_rw_wrlock(&doq_socket->table->lock);
2439
  node = rbtree_first(doq_socket->table->timer_tree);
2440
  if(node && node != RBTREE_NULL) {
2441
    struct doq_timer* t = (struct doq_timer*)node;
2442
    conn = t->conn;
2443
2444
    /* If now < timer then no further timeouts in tree. */
2445
    if(timeval_smaller(doq_socket->now_tv, &t->time)) {
2446
      lock_rw_unlock(&doq_socket->table->lock);
2447
      return NULL;
2448
    }
2449
2450
    lock_basic_lock(&conn->lock);
2451
    conn->doq_socket = doq_socket;
2452
2453
    /* Now that the timer is fired, remove it. */
2454
    doq_timer_unset(doq_socket->table, t);
2455
    lock_rw_unlock(&doq_socket->table->lock);
2456
    return conn;
2457
  }
2458
  lock_rw_unlock(&doq_socket->table->lock);
2459
  return NULL;
2460
}
2461
2462
/** doq timer erase the marker that said which timer the worker uses. */
2463
static void
2464
doq_timer_erase_marker(struct doq_server_socket* doq_socket)
2465
{
2466
  struct doq_timer* t;
2467
  lock_rw_wrlock(&doq_socket->table->lock);
2468
  t = doq_timer_find_time(doq_socket->table, &doq_socket->marked_time);
2469
  if(t && t->worker_doq_socket == doq_socket)
2470
    t->worker_doq_socket = NULL;
2471
  lock_rw_unlock(&doq_socket->table->lock);
2472
  memset(&doq_socket->marked_time, 0, sizeof(doq_socket->marked_time));
2473
}
2474
2475
void
2476
doq_timer_cb(void* arg)
2477
{
2478
  struct doq_server_socket* doq_socket = (struct doq_server_socket*)arg;
2479
  struct doq_conn* conn;
2480
  verbose(VERB_ALGO, "doq timer callback");
2481
2482
  doq_timer_erase_marker(doq_socket);
2483
2484
  while((conn = doq_timer_timeout_conn(doq_socket)) != NULL) {
2485
    if(conn->is_deleted ||
2486
#ifdef HAVE_NGTCP2_CONN_IN_CLOSING_PERIOD
2487
      ngtcp2_conn_in_closing_period(conn->conn) ||
2488
#else
2489
      ngtcp2_conn_is_in_closing_period(conn->conn) ||
2490
#endif
2491
#ifdef HAVE_NGTCP2_CONN_IN_DRAINING_PERIOD
2492
      ngtcp2_conn_in_draining_period(conn->conn)
2493
#else
2494
      ngtcp2_conn_is_in_draining_period(conn->conn)
2495
#endif
2496
      ) {
2497
      if(verbosity >= VERB_ALGO) {
2498
        char remotestr[256];
2499
        addr_to_str((void*)&conn->key.paddr.addr,
2500
          conn->key.paddr.addrlen, remotestr,
2501
          sizeof(remotestr));
2502
        verbose(VERB_ALGO, "doq conn %s is deleted "
2503
          "after timeout", remotestr);
2504
      }
2505
      doq_delete_connection(doq_socket->cp, conn);
2506
      continue;
2507
    }
2508
    if(!doq_conn_handle_timeout(conn))
2509
      doq_delete_connection(doq_socket->cp, conn);
2510
    else doq_done_setup_timer_and_write(doq_socket->cp, conn);
2511
  }
2512
2513
  if(doq_socket_want_write(doq_socket->cp))
2514
    doq_socket_write_enable(doq_socket->cp);
2515
  else doq_socket_write_disable(doq_socket->cp);
2516
  doq_pickup_timer(doq_socket->cp);
2517
}
2518
2519
void
2520
comm_point_doq_callback(int fd, short event, void* arg)
2521
{
2522
  struct comm_point* c;
2523
  struct doq_pkt_addr paddr;
2524
  int i, pkt_continue, err_drop;
2525
  struct doq_conn* conn;
2526
  struct ngtcp2_pkt_info pi;
2527
  size_t count, num_len;
2528
2529
  c = (struct comm_point*)arg;
2530
  log_assert(c->type == comm_doq);
2531
2532
  log_assert(c && c->doq_socket->pkt_buf && c->fd == fd);
2533
  ub_comm_base_now(c->ev->base);
2534
2535
  /* see if there is a blocked packet, and send that if possible.
2536
   * do not attempt to read yet, even if possible, that would just
2537
   * push more answers in reply to those read packets onto the list
2538
   * of written replies. First attempt to clear the write content out.
2539
   * That keeps the memory usage from bloating up. */
2540
  if(c->doq_socket->have_blocked_pkt) {
2541
    if(!doq_write_blocked_pkt(c)) {
2542
      /* this write has also blocked, attempt to write
2543
       * later. Make sure the event listens to write
2544
       * events. */
2545
      if(!c->doq_socket->event_has_write)
2546
        doq_socket_write_enable(c);
2547
      doq_pickup_timer(c);
2548
      return;
2549
    }
2550
  }
2551
2552
  /* see if there is write interest */
2553
  count = 0;
2554
  num_len = doq_write_list_length(c);
2555
  while((conn = doq_pop_write_conn(c)) != NULL) {
2556
    if(conn->is_deleted ||
2557
#ifdef HAVE_NGTCP2_CONN_IN_CLOSING_PERIOD
2558
      ngtcp2_conn_in_closing_period(conn->conn) ||
2559
#else
2560
      ngtcp2_conn_is_in_closing_period(conn->conn) ||
2561
#endif
2562
#ifdef HAVE_NGTCP2_CONN_IN_DRAINING_PERIOD
2563
      ngtcp2_conn_in_draining_period(conn->conn)
2564
#else
2565
      ngtcp2_conn_is_in_draining_period(conn->conn)
2566
#endif
2567
      ) {
2568
      conn->doq_socket = NULL;
2569
      lock_basic_unlock(&conn->lock);
2570
      if(c->doq_socket->have_blocked_pkt) {
2571
        if(!c->doq_socket->event_has_write)
2572
          doq_socket_write_enable(c);
2573
        doq_pickup_timer(c);
2574
        return;
2575
      }
2576
      if(++count > num_len*2)
2577
        break;
2578
      continue;
2579
    }
2580
    if(verbosity >= VERB_ALGO) {
2581
      char remotestr[256];
2582
      addr_to_str((void*)&conn->key.paddr.addr,
2583
        conn->key.paddr.addrlen, remotestr,
2584
        sizeof(remotestr));
2585
      verbose(VERB_ALGO, "doq write connection %s %d",
2586
        remotestr, doq_sockaddr_get_port(
2587
        &conn->key.paddr.addr));
2588
    }
2589
    if(doq_conn_write_streams(c, conn, &err_drop))
2590
      err_drop = 0;
2591
    doq_done_with_write_cb(c, conn, err_drop);
2592
    if(c->doq_socket->have_blocked_pkt) {
2593
      if(!c->doq_socket->event_has_write)
2594
        doq_socket_write_enable(c);
2595
      doq_pickup_timer(c);
2596
      return;
2597
    }
2598
    /* Stop overly long write lists that are created
2599
     * while we are processing. Do those next time there
2600
     * is a write callback. Stops long loops, and keeps
2601
     * fair for other events. */
2602
    if(++count > num_len*2)
2603
      break;
2604
  }
2605
2606
  /* check for data to read */
2607
  if((event&UB_EV_READ)!=0)
2608
    for(i=0; i<NUM_UDP_PER_SELECT; i++) {
2609
    /* there may be a blocked write packet and if so, stop
2610
     * reading because the reply cannot get written. The
2611
     * blocked packet could be written during the conn_recv
2612
     * handling of replies, or for a connection close. */
2613
    if(c->doq_socket->have_blocked_pkt) {
2614
      if(!c->doq_socket->event_has_write)
2615
        doq_socket_write_enable(c);
2616
      doq_pickup_timer(c);
2617
      return;
2618
    }
2619
    sldns_buffer_clear(c->doq_socket->pkt_buf);
2620
    doq_pkt_addr_init(&paddr);
2621
    log_assert(fd != -1);
2622
    log_assert(sldns_buffer_remaining(c->doq_socket->pkt_buf) > 0);
2623
    if(!doq_recv(c, &paddr, &pkt_continue, &pi)) {
2624
      if(pkt_continue)
2625
        continue;
2626
      break;
2627
    }
2628
2629
    /* handle incoming packet from remote addr to localaddr */
2630
    if(verbosity >= VERB_ALGO) {
2631
      char remotestr[256], localstr[256];
2632
      addr_to_str((void*)&paddr.addr, paddr.addrlen,
2633
        remotestr, sizeof(remotestr));
2634
      addr_to_str((void*)&paddr.localaddr,
2635
        paddr.localaddrlen, localstr,
2636
        sizeof(localstr));
2637
      log_info("incoming doq packet from %s port %d on "
2638
        "%s port %d ifindex %d",
2639
        remotestr, doq_sockaddr_get_port(&paddr.addr),
2640
        localstr,
2641
        doq_sockaddr_get_port(&paddr.localaddr),
2642
        paddr.ifindex);
2643
      log_info("doq_recv length %d ecn 0x%x",
2644
        (int)sldns_buffer_limit(c->doq_socket->pkt_buf),
2645
        (int)pi.ecn);
2646
    }
2647
2648
    if(sldns_buffer_limit(c->doq_socket->pkt_buf) == 0)
2649
      continue;
2650
2651
    conn = NULL;
2652
    if(!doq_decode_pkt_header_negotiate(c, &paddr, &conn))
2653
      continue;
2654
    if(!conn) {
2655
      if(!doq_accept(c, &paddr, &conn, &pi))
2656
        continue;
2657
      if(!doq_conn_write_streams(c, conn, NULL)) {
2658
        doq_delete_connection(c, conn);
2659
        continue;
2660
      }
2661
      doq_done_setup_timer_and_write(c, conn);
2662
      continue;
2663
    }
2664
    if(
2665
#ifdef HAVE_NGTCP2_CONN_IN_CLOSING_PERIOD
2666
      ngtcp2_conn_in_closing_period(conn->conn)
2667
#else
2668
      ngtcp2_conn_is_in_closing_period(conn->conn)
2669
#endif
2670
      ) {
2671
      if(!doq_conn_send_close(c, conn)) {
2672
        doq_delete_connection(c, conn);
2673
      } else {
2674
        doq_done_setup_timer_and_write(c, conn);
2675
      }
2676
      continue;
2677
    }
2678
    if(
2679
#ifdef HAVE_NGTCP2_CONN_IN_DRAINING_PERIOD
2680
      ngtcp2_conn_in_draining_period(conn->conn)
2681
#else
2682
      ngtcp2_conn_is_in_draining_period(conn->conn)
2683
#endif
2684
      ) {
2685
      doq_done_setup_timer_and_write(c, conn);
2686
      continue;
2687
    }
2688
    if(!doq_conn_recv(c, &paddr, conn, &pi, NULL, &err_drop)) {
2689
      /* The receive failed, and if it also failed to send
2690
       * a close, drop the connection. That means it is not
2691
       * in the closing period. */
2692
      if(err_drop) {
2693
        doq_delete_connection(c, conn);
2694
      } else {
2695
        doq_done_setup_timer_and_write(c, conn);
2696
      }
2697
      continue;
2698
    }
2699
    if(!doq_conn_write_streams(c, conn, &err_drop)) {
2700
      if(err_drop) {
2701
        doq_delete_connection(c, conn);
2702
      } else {
2703
        doq_done_setup_timer_and_write(c, conn);
2704
      }
2705
      continue;
2706
    }
2707
    doq_done_setup_timer_and_write(c, conn);
2708
  }
2709
2710
  /* see if we want to have more write events */
2711
  verbose(VERB_ALGO, "doq check write enable");
2712
  if(doq_socket_want_write(c))
2713
    doq_socket_write_enable(c);
2714
  else doq_socket_write_disable(c);
2715
  doq_pickup_timer(c);
2716
}
2717
2718
/** create new doq server socket structure */
2719
static struct doq_server_socket*
2720
doq_server_socket_create(struct doq_table* table, struct ub_randstate* rnd,
2721
  const void* quic_sslctx, struct comm_point* c, struct comm_base* base,
2722
  struct config_file* cfg)
2723
{
2724
  size_t doq_buffer_size = 4096; /* bytes buffer size, for one packet. */
2725
  struct doq_server_socket* doq_socket;
2726
  doq_socket = calloc(1, sizeof(*doq_socket));
2727
  if(!doq_socket) {
2728
    return NULL;
2729
  }
2730
  doq_socket->table = table;
2731
  doq_socket->rnd = rnd;
2732
  doq_socket->validate_addr = 1;
2733
  /* the doq_socket has its own copy of the static secret, as
2734
   * well as other config values, so that they do not need table.lock */
2735
  doq_socket->static_secret_len = table->static_secret_len;
2736
  doq_socket->static_secret = memdup(table->static_secret,
2737
    table->static_secret_len);
2738
  if(!doq_socket->static_secret) {
2739
    free(doq_socket);
2740
    return NULL;
2741
  }
2742
  doq_socket->ctx = (SSL_CTX*)quic_sslctx;
2743
  doq_socket->idle_timeout = table->idle_timeout;
2744
  doq_socket->sv_scidlen = table->sv_scidlen;
2745
  doq_socket->cp = c;
2746
  doq_socket->pkt_buf = sldns_buffer_new(doq_buffer_size);
2747
  if(!doq_socket->pkt_buf) {
2748
    free(doq_socket->static_secret);
2749
    free(doq_socket);
2750
    return NULL;
2751
  }
2752
  doq_socket->blocked_pkt = sldns_buffer_new(
2753
    sldns_buffer_capacity(doq_socket->pkt_buf));
2754
  if(!doq_socket->pkt_buf) {
2755
    free(doq_socket->static_secret);
2756
    sldns_buffer_free(doq_socket->pkt_buf);
2757
    free(doq_socket);
2758
    return NULL;
2759
  }
2760
  doq_socket->blocked_paddr = calloc(1,
2761
    sizeof(*doq_socket->blocked_paddr));
2762
  if(!doq_socket->blocked_paddr) {
2763
    free(doq_socket->static_secret);
2764
    sldns_buffer_free(doq_socket->pkt_buf);
2765
    sldns_buffer_free(doq_socket->blocked_pkt);
2766
    free(doq_socket);
2767
    return NULL;
2768
  }
2769
  doq_socket->timer = comm_timer_create(base, doq_timer_cb, doq_socket);
2770
  if(!doq_socket->timer) {
2771
    free(doq_socket->static_secret);
2772
    sldns_buffer_free(doq_socket->pkt_buf);
2773
    sldns_buffer_free(doq_socket->blocked_pkt);
2774
    free(doq_socket->blocked_paddr);
2775
    free(doq_socket);
2776
    return NULL;
2777
  }
2778
  memset(&doq_socket->marked_time, 0, sizeof(doq_socket->marked_time));
2779
  comm_base_timept(base, &doq_socket->now_tt, &doq_socket->now_tv);
2780
  doq_socket->cfg = cfg;
2781
  return doq_socket;
2782
}
2783
2784
/** delete doq server socket structure */
2785
static void
2786
doq_server_socket_delete(struct doq_server_socket* doq_socket)
2787
{
2788
  if(!doq_socket)
2789
    return;
2790
  free(doq_socket->static_secret);
2791
#ifndef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT
2792
  free(doq_socket->quic_method);
2793
#endif
2794
  sldns_buffer_free(doq_socket->pkt_buf);
2795
  sldns_buffer_free(doq_socket->blocked_pkt);
2796
  free(doq_socket->blocked_paddr);
2797
  comm_timer_delete(doq_socket->timer);
2798
  free(doq_socket);
2799
}
2800
2801
/** find repinfo in the doq table */
2802
static struct doq_conn*
2803
doq_lookup_repinfo(struct doq_table* table, struct comm_reply* repinfo)
2804
{
2805
  struct doq_conn* conn;
2806
  struct doq_conn_key key;
2807
  doq_conn_key_from_repinfo(&key, repinfo);
2808
  lock_rw_rdlock(&table->lock);
2809
  conn = doq_conn_find(table, &key.paddr.addr,
2810
    key.paddr.addrlen, &key.paddr.localaddr,
2811
    key.paddr.localaddrlen, key.paddr.ifindex, key.dcid,
2812
    key.dcidlen);
2813
  if(conn) {
2814
    lock_basic_lock(&conn->lock);
2815
    lock_rw_unlock(&table->lock);
2816
    return conn;
2817
  }
2818
  lock_rw_unlock(&table->lock);
2819
  return NULL;
2820
}
2821
2822
/** doq find connection and stream. From inside callbacks from worker. */
2823
static int
2824
doq_lookup_conn_stream(struct comm_reply* repinfo, struct comm_point* c,
2825
  struct doq_conn** conn, struct doq_stream** stream)
2826
{
2827
  log_assert(c->doq_socket);
2828
  if(c->doq_socket->current_conn) {
2829
    *conn = c->doq_socket->current_conn;
2830
  } else {
2831
    *conn = doq_lookup_repinfo(c->doq_socket->table, repinfo);
2832
    if((*conn) && (*conn)->is_deleted) {
2833
      lock_basic_unlock(&(*conn)->lock);
2834
      *conn = NULL;
2835
    }
2836
    if(*conn) {
2837
      (*conn)->doq_socket = c->doq_socket;
2838
    }
2839
  }
2840
  if(!*conn) {
2841
    *stream = NULL;
2842
    return 0;
2843
  }
2844
  *stream = doq_stream_find(*conn, repinfo->doq_streamid);
2845
  if(!*stream) {
2846
    if(!c->doq_socket->current_conn) {
2847
      /* Not inside callbacks, we have our own lock on conn.
2848
       * Release it. */
2849
      lock_basic_unlock(&(*conn)->lock);
2850
    }
2851
    return 0;
2852
  }
2853
  if((*stream)->is_closed) {
2854
    /* stream is closed, ignore reply or drop */
2855
    if(!c->doq_socket->current_conn) {
2856
      /* Not inside callbacks, we have our own lock on conn.
2857
       * Release it. */
2858
      lock_basic_unlock(&(*conn)->lock);
2859
    }
2860
    return 0;
2861
  }
2862
  return 1;
2863
}
2864
2865
/** doq send a reply from a comm reply */
2866
static void
2867
doq_socket_send_reply(struct comm_reply* repinfo)
2868
{
2869
  struct doq_conn* conn;
2870
  struct doq_stream* stream;
2871
  log_assert(repinfo->c->type == comm_doq);
2872
  if(!doq_lookup_conn_stream(repinfo, repinfo->c, &conn, &stream)) {
2873
    verbose(VERB_ALGO, "doq: send_reply but %s is gone",
2874
      (conn?"stream":"connection"));
2875
    /* No stream, it may have been closed. */
2876
    /* Drop the reply, it cannot be sent. */
2877
    return;
2878
  }
2879
  if(!doq_stream_send_reply(conn, stream, repinfo->c->buffer))
2880
    doq_stream_close(conn, stream, 1);
2881
  if(!repinfo->c->doq_socket->current_conn) {
2882
    /* Not inside callbacks, we have our own lock on conn.
2883
     * Release it. */
2884
    doq_done_with_conn_cb(repinfo->c, conn);
2885
    /* since we sent a reply, or closed it, the assumption is
2886
     * that there is something to write, so enable write event.
2887
     * It waits until the write event happens to write the
2888
     * streams with answers, this allows some answers to be
2889
     * answered before the event loop reaches the doq fd, in
2890
     * repinfo->c->fd, and that collates answers. That would
2891
     * not happen if we write doq packets right now. */
2892
    doq_socket_write_enable(repinfo->c);
2893
  }
2894
}
2895
2896
/** doq drop a reply from a comm reply */
2897
static void
2898
doq_socket_drop_reply(struct comm_reply* repinfo)
2899
{
2900
  struct doq_conn* conn;
2901
  struct doq_stream* stream;
2902
  log_assert(repinfo->c->type == comm_doq);
2903
  if(!doq_lookup_conn_stream(repinfo, repinfo->c, &conn, &stream)) {
2904
    verbose(VERB_ALGO, "doq: drop_reply but %s is gone",
2905
      (conn?"stream":"connection"));
2906
    /* The connection or stream is already gone. */
2907
    return;
2908
  }
2909
  doq_stream_close(conn, stream, 1);
2910
  if(!repinfo->c->doq_socket->current_conn) {
2911
    /* Not inside callbacks, we have our own lock on conn.
2912
     * Release it. */
2913
    doq_done_with_conn_cb(repinfo->c, conn);
2914
    doq_socket_write_enable(repinfo->c);
2915
  }
2916
}
2917
#endif /* HAVE_NGTCP2 */
2918
2919
int adjusted_tcp_timeout(struct comm_point* c)
2920
0
{
2921
0
  if(c->tcp_timeout_msec < TCP_QUERY_TIMEOUT_MINIMUM)
2922
0
    return TCP_QUERY_TIMEOUT_MINIMUM;
2923
0
  return c->tcp_timeout_msec;
2924
0
}
2925
2926
/** Use a new tcp handler for new query fd, set to read query */
2927
static void
2928
setup_tcp_handler(struct comm_point* c, int fd, int cur, int max)
2929
0
{
2930
0
  int handler_usage;
2931
0
  log_assert(c->type == comm_tcp || c->type == comm_http);
2932
0
  log_assert(c->fd == -1);
2933
0
  sldns_buffer_clear(c->buffer);
2934
#ifdef USE_DNSCRYPT
2935
  if (c->dnscrypt)
2936
    sldns_buffer_clear(c->dnscrypt_buffer);
2937
#endif
2938
0
  c->tcp_is_reading = 1;
2939
0
  c->tcp_byte_count = 0;
2940
0
  c->tcp_keepalive = 0;
2941
  /* if more than half the tcp handlers are in use, use a shorter
2942
   * timeout for this TCP connection, we need to make space for
2943
   * other connections to be able to get attention */
2944
  /* If > 50% TCP handler structures in use, set timeout to 1/100th
2945
   *  configured value.
2946
   * If > 65%TCP handler structures in use, set to 1/500th configured
2947
   *  value.
2948
   * If > 80% TCP handler structures in use, set to 0.
2949
   *
2950
   * If the timeout to use falls below 200 milliseconds, an actual
2951
   * timeout of 200ms is used.
2952
   */
2953
0
  handler_usage = (cur * 100) / max;
2954
0
  if(handler_usage > 50 && handler_usage <= 65)
2955
0
    c->tcp_timeout_msec /= 100;
2956
0
  else if (handler_usage > 65 && handler_usage <= 80)
2957
0
    c->tcp_timeout_msec /= 500;
2958
0
  else if (handler_usage > 80)
2959
0
    c->tcp_timeout_msec = 0;
2960
0
  comm_point_start_listening(c, fd, adjusted_tcp_timeout(c));
2961
0
}
2962
2963
void comm_base_handle_slow_accept(int ATTR_UNUSED(fd),
2964
  short ATTR_UNUSED(event), void* arg)
2965
0
{
2966
0
  struct comm_base* b = (struct comm_base*)arg;
2967
  /* timeout for the slow accept, re-enable accepts again */
2968
0
  if(b->start_accept) {
2969
0
    verbose(VERB_ALGO, "wait is over, slow accept disabled");
2970
0
    fptr_ok(fptr_whitelist_start_accept(b->start_accept));
2971
0
    (*b->start_accept)(b->cb_arg);
2972
0
    b->eb->slow_accept_enabled = 0;
2973
0
  }
2974
0
}
2975
2976
int comm_point_perform_accept(struct comm_point* c,
2977
  struct sockaddr_storage* addr, socklen_t* addrlen)
2978
0
{
2979
0
  int new_fd;
2980
0
  *addrlen = (socklen_t)sizeof(*addr);
2981
#ifndef HAVE_ACCEPT4
2982
  new_fd = accept(c->fd, (struct sockaddr*)addr, addrlen);
2983
#else
2984
  /* SOCK_NONBLOCK saves extra calls to fcntl for the same result */
2985
0
  new_fd = accept4(c->fd, (struct sockaddr*)addr, addrlen, SOCK_NONBLOCK);
2986
0
#endif
2987
0
  if(new_fd == -1) {
2988
0
#ifndef USE_WINSOCK
2989
    /* EINTR is signal interrupt. others are closed connection. */
2990
0
    if( errno == EINTR || errno == EAGAIN
2991
0
#ifdef EWOULDBLOCK
2992
0
      || errno == EWOULDBLOCK
2993
0
#endif
2994
0
#ifdef ECONNABORTED
2995
0
      || errno == ECONNABORTED
2996
0
#endif
2997
0
#ifdef EPROTO
2998
0
      || errno == EPROTO
2999
0
#endif /* EPROTO */
3000
0
      )
3001
0
      return -1;
3002
0
#if defined(ENFILE) && defined(EMFILE)
3003
0
    if(errno == ENFILE || errno == EMFILE) {
3004
      /* out of file descriptors, likely outside of our
3005
       * control. stop accept() calls for some time */
3006
0
      if(c->ev->base->stop_accept) {
3007
0
        struct comm_base* b = c->ev->base;
3008
0
        struct timeval tv;
3009
0
        verbose(VERB_ALGO, "out of file descriptors: "
3010
0
          "slow accept");
3011
0
        ub_comm_base_now(b);
3012
0
        if(b->eb->last_slow_log+SLOW_LOG_TIME <=
3013
0
          b->eb->secs) {
3014
0
          b->eb->last_slow_log = b->eb->secs;
3015
0
          verbose(VERB_OPS, "accept failed, "
3016
0
            "slow down accept for %d "
3017
0
            "msec: %s",
3018
0
            NETEVENT_SLOW_ACCEPT_TIME,
3019
0
            sock_strerror(errno));
3020
0
        }
3021
0
        b->eb->slow_accept_enabled = 1;
3022
0
        fptr_ok(fptr_whitelist_stop_accept(
3023
0
          b->stop_accept));
3024
0
        (*b->stop_accept)(b->cb_arg);
3025
        /* set timeout, no mallocs */
3026
0
        tv.tv_sec = NETEVENT_SLOW_ACCEPT_TIME/1000;
3027
0
        tv.tv_usec = (NETEVENT_SLOW_ACCEPT_TIME%1000)*1000;
3028
0
        b->eb->slow_accept = ub_event_new(b->eb->base,
3029
0
          -1, UB_EV_TIMEOUT,
3030
0
          comm_base_handle_slow_accept, b);
3031
0
        if(b->eb->slow_accept == NULL) {
3032
          /* we do not want to log here, because
3033
           * that would spam the logfiles.
3034
           * error: "event_base_set failed." */
3035
0
        }
3036
0
        else if(ub_event_add(b->eb->slow_accept, &tv)
3037
0
          != 0) {
3038
          /* we do not want to log here,
3039
           * error: "event_add failed." */
3040
0
        }
3041
0
      } else {
3042
0
        log_err("accept, with no slow down, "
3043
0
          "failed: %s", sock_strerror(errno));
3044
0
      }
3045
0
      return -1;
3046
0
    }
3047
0
#endif
3048
#else /* USE_WINSOCK */
3049
    if(WSAGetLastError() == WSAEINPROGRESS ||
3050
      WSAGetLastError() == WSAECONNRESET)
3051
      return -1;
3052
    if(WSAGetLastError() == WSAEWOULDBLOCK) {
3053
      ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
3054
      return -1;
3055
    }
3056
#endif
3057
0
    log_err_addr("accept failed", sock_strerror(errno), addr,
3058
0
      *addrlen);
3059
0
    return -1;
3060
0
  }
3061
0
  if(c->tcp_conn_limit && c->type == comm_tcp_accept) {
3062
0
    c->tcl_addr = tcl_addr_lookup(c->tcp_conn_limit, addr, *addrlen);
3063
0
    if(!tcl_new_connection(c->tcl_addr)) {
3064
0
      if(verbosity >= 3)
3065
0
        log_err_addr("accept rejected",
3066
0
        "connection limit exceeded", addr, *addrlen);
3067
0
      sock_close(new_fd);
3068
0
      return -1;
3069
0
    }
3070
0
  }
3071
#ifndef HAVE_ACCEPT4
3072
  fd_set_nonblock(new_fd);
3073
#endif
3074
0
  return new_fd;
3075
0
}
3076
3077
#ifdef USE_WINSOCK
3078
static long win_bio_cb(BIO *b, int oper, const char* ATTR_UNUSED(argp),
3079
#ifdef HAVE_BIO_SET_CALLBACK_EX
3080
  size_t ATTR_UNUSED(len),
3081
#endif
3082
        int ATTR_UNUSED(argi), long argl,
3083
#ifndef HAVE_BIO_SET_CALLBACK_EX
3084
  long retvalue
3085
#else
3086
  int retvalue, size_t* ATTR_UNUSED(processed)
3087
#endif
3088
  )
3089
{
3090
  int wsa_err = WSAGetLastError(); /* store errcode before it is gone */
3091
  verbose(VERB_ALGO, "bio_cb %d, %s %s %s", oper,
3092
    (oper&BIO_CB_RETURN)?"return":"before",
3093
    (oper&BIO_CB_READ)?"read":((oper&BIO_CB_WRITE)?"write":"other"),
3094
    wsa_err==WSAEWOULDBLOCK?"wsawb":"");
3095
  /* on windows, check if previous operation caused EWOULDBLOCK */
3096
  if( (oper == (BIO_CB_READ|BIO_CB_RETURN) && argl == 0) ||
3097
    (oper == (BIO_CB_GETS|BIO_CB_RETURN) && argl == 0)) {
3098
    if(wsa_err == WSAEWOULDBLOCK)
3099
      ub_winsock_tcp_wouldblock((struct ub_event*)
3100
        BIO_get_callback_arg(b), UB_EV_READ);
3101
  }
3102
  if( (oper == (BIO_CB_WRITE|BIO_CB_RETURN) && argl == 0) ||
3103
    (oper == (BIO_CB_PUTS|BIO_CB_RETURN) && argl == 0)) {
3104
    if(wsa_err == WSAEWOULDBLOCK)
3105
      ub_winsock_tcp_wouldblock((struct ub_event*)
3106
        BIO_get_callback_arg(b), UB_EV_WRITE);
3107
  }
3108
  /* return original return value */
3109
  return retvalue;
3110
}
3111
3112
/** set win bio callbacks for nonblocking operations */
3113
void
3114
comm_point_tcp_win_bio_cb(struct comm_point* c, void* thessl)
3115
{
3116
  SSL* ssl = (SSL*)thessl;
3117
  /* set them both just in case, but usually they are the same BIO */
3118
#ifdef HAVE_BIO_SET_CALLBACK_EX
3119
  BIO_set_callback_ex(SSL_get_rbio(ssl), &win_bio_cb);
3120
#else
3121
  BIO_set_callback(SSL_get_rbio(ssl), &win_bio_cb);
3122
#endif
3123
  BIO_set_callback_arg(SSL_get_rbio(ssl), (char*)c->ev->ev);
3124
#ifdef HAVE_BIO_SET_CALLBACK_EX
3125
  BIO_set_callback_ex(SSL_get_wbio(ssl), &win_bio_cb);
3126
#else
3127
  BIO_set_callback(SSL_get_wbio(ssl), &win_bio_cb);
3128
#endif
3129
  BIO_set_callback_arg(SSL_get_wbio(ssl), (char*)c->ev->ev);
3130
}
3131
#endif
3132
3133
#ifdef HAVE_NGHTTP2
3134
/** Create http2 session server.  Per connection, after TCP accepted.*/
3135
static int http2_session_server_create(struct http2_session* h2_session)
3136
{
3137
  log_assert(h2_session->callbacks);
3138
  h2_session->is_drop = 0;
3139
  if(nghttp2_session_server_new(&h2_session->session,
3140
      h2_session->callbacks,
3141
    h2_session) == NGHTTP2_ERR_NOMEM) {
3142
    log_err("failed to create nghttp2 session server");
3143
    return 0;
3144
  }
3145
3146
  return 1;
3147
}
3148
3149
/** Submit http2 setting to session. Once per session. */
3150
static int http2_submit_settings(struct http2_session* h2_session)
3151
{
3152
  int ret;
3153
  nghttp2_settings_entry settings[1] = {
3154
    {NGHTTP2_SETTINGS_MAX_CONCURRENT_STREAMS,
3155
     h2_session->c->http2_max_streams}};
3156
3157
  ret = nghttp2_submit_settings(h2_session->session, NGHTTP2_FLAG_NONE,
3158
    settings, 1);
3159
  if(ret) {
3160
    verbose(VERB_QUERY, "http2: submit_settings failed, "
3161
      "error: %s", nghttp2_strerror(ret));
3162
    return 0;
3163
  }
3164
  return 1;
3165
}
3166
#endif /* HAVE_NGHTTP2 */
3167
3168
#ifdef HAVE_NGHTTP2
3169
/** Delete http2 stream. After session delete or stream close callback */
3170
static void http2_stream_delete(struct http2_session* h2_session,
3171
  struct http2_stream* h2_stream)
3172
{
3173
  if(h2_stream->mesh_state) {
3174
    mesh_state_remove_reply(h2_stream->mesh, h2_stream->mesh_state,
3175
      h2_session->c);
3176
    h2_stream->mesh_state = NULL;
3177
  }
3178
  http2_req_stream_clear(h2_stream);
3179
  free(h2_stream);
3180
}
3181
#endif /* HAVE_NGHTTP2 */
3182
3183
/** delete http2 session server. After closing connection. */
3184
static void http2_session_server_delete(struct http2_session* h2_session)
3185
0
{
3186
#ifdef HAVE_NGHTTP2
3187
  struct http2_stream* h2_stream, *next;
3188
  nghttp2_session_del(h2_session->session); /* NULL input is fine */
3189
  h2_session->session = NULL;
3190
  for(h2_stream = h2_session->first_stream; h2_stream;) {
3191
    next = h2_stream->next;
3192
    http2_stream_delete(h2_session, h2_stream);
3193
    h2_stream = next;
3194
  }
3195
  h2_session->first_stream = NULL;
3196
  h2_session->is_drop = 0;
3197
  h2_session->postpone_drop = 0;
3198
  h2_session->c->h2_stream = NULL;
3199
#endif
3200
0
  (void)h2_session;
3201
0
}
3202
3203
void
3204
comm_point_tcp_accept_callback(int fd, short event, void* arg)
3205
0
{
3206
0
  struct comm_point* c = (struct comm_point*)arg, *c_hdl;
3207
0
  int new_fd;
3208
0
  log_assert(c->type == comm_tcp_accept);
3209
0
  if(!(event & UB_EV_READ)) {
3210
0
    log_info("ignoring tcp accept event %d", (int)event);
3211
0
    return;
3212
0
  }
3213
0
  ub_comm_base_now(c->ev->base);
3214
  /* find free tcp handler. */
3215
0
  if(!c->tcp_free) {
3216
0
    log_warn("accepted too many tcp, connections full");
3217
0
    return;
3218
0
  }
3219
  /* accept incoming connection. */
3220
0
  c_hdl = c->tcp_free;
3221
  /* clear leftover flags from previous use, and then set the
3222
   * correct event base for the event structure for libevent */
3223
0
  ub_event_free(c_hdl->ev->ev);
3224
0
  c_hdl->ev->ev = NULL;
3225
0
  if((c_hdl->type == comm_tcp && c_hdl->tcp_req_info) ||
3226
0
    c_hdl->type == comm_local || c_hdl->type == comm_raw)
3227
0
    c_hdl->tcp_do_toggle_rw = 0;
3228
0
  else  c_hdl->tcp_do_toggle_rw = 1;
3229
3230
0
  if(c_hdl->type == comm_http) {
3231
#ifdef HAVE_NGHTTP2
3232
    if(!c_hdl->h2_session ||
3233
      !http2_session_server_create(c_hdl->h2_session)) {
3234
      log_warn("failed to create nghttp2");
3235
      return;
3236
    }
3237
    if(!c_hdl->h2_session ||
3238
      !http2_submit_settings(c_hdl->h2_session)) {
3239
      log_warn("failed to submit http2 settings");
3240
      if(c_hdl->h2_session)
3241
        http2_session_server_delete(c_hdl->h2_session);
3242
      return;
3243
    }
3244
    if(!c->ssl) {
3245
      c_hdl->tcp_do_toggle_rw = 0;
3246
      c_hdl->use_h2 = 1;
3247
    }
3248
#endif
3249
0
    c_hdl->ev->ev = ub_event_new(c_hdl->ev->base->eb->base, -1,
3250
0
      UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT,
3251
0
      comm_point_http_handle_callback, c_hdl);
3252
0
  } else {
3253
0
    c_hdl->ev->ev = ub_event_new(c_hdl->ev->base->eb->base, -1,
3254
0
      UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT,
3255
0
      comm_point_tcp_handle_callback, c_hdl);
3256
0
  }
3257
0
  if(!c_hdl->ev->ev) {
3258
0
    log_warn("could not ub_event_new, dropped tcp");
3259
#ifdef HAVE_NGHTTP2
3260
    if(c_hdl->type == comm_http && c_hdl->h2_session)
3261
      http2_session_server_delete(c_hdl->h2_session);
3262
#endif
3263
0
    return;
3264
0
  }
3265
0
  log_assert(fd != -1);
3266
0
  (void)fd;
3267
0
  new_fd = comm_point_perform_accept(c, &c_hdl->repinfo.remote_addr,
3268
0
    &c_hdl->repinfo.remote_addrlen);
3269
0
  if(new_fd == -1) {
3270
#ifdef HAVE_NGHTTP2
3271
    if(c_hdl->type == comm_http && c_hdl->h2_session)
3272
      http2_session_server_delete(c_hdl->h2_session);
3273
#endif
3274
0
    return;
3275
0
  }
3276
  /* Copy remote_address to client_address.
3277
   * Simplest way/time for streams to do that. */
3278
0
  c_hdl->repinfo.client_addrlen = c_hdl->repinfo.remote_addrlen;
3279
0
  memmove(&c_hdl->repinfo.client_addr,
3280
0
    &c_hdl->repinfo.remote_addr,
3281
0
    c_hdl->repinfo.remote_addrlen);
3282
0
  if(c->ssl) {
3283
0
    c_hdl->ssl = incoming_ssl_fd(c->ssl, new_fd);
3284
0
    if(!c_hdl->ssl) {
3285
0
      c_hdl->fd = new_fd;
3286
0
      comm_point_close(c_hdl);
3287
0
      return;
3288
0
    }
3289
0
    c_hdl->ssl_shake_state = comm_ssl_shake_read;
3290
#ifdef USE_WINSOCK
3291
    comm_point_tcp_win_bio_cb(c_hdl, c_hdl->ssl);
3292
#endif
3293
0
  }
3294
3295
  /* grab the tcp handler buffers */
3296
0
  c->cur_tcp_count++;
3297
0
  c->tcp_free = c_hdl->tcp_free;
3298
0
  c_hdl->tcp_free = NULL;
3299
0
  if(!c->tcp_free) {
3300
    /* stop accepting incoming queries for now. */
3301
0
    comm_point_stop_listening(c);
3302
0
  }
3303
0
  setup_tcp_handler(c_hdl, new_fd, c->cur_tcp_count, c->max_tcp_count);
3304
0
}
3305
3306
/** Make tcp handler free for next assignment */
3307
static void
3308
reclaim_tcp_handler(struct comm_point* c)
3309
0
{
3310
0
  log_assert(c->type == comm_tcp);
3311
0
  if(c->ssl) {
3312
0
#ifdef HAVE_SSL
3313
0
    SSL_shutdown(c->ssl);
3314
0
    SSL_free(c->ssl);
3315
0
    c->ssl = NULL;
3316
0
#endif
3317
0
  }
3318
0
  comm_point_close(c);
3319
0
  if(c->tcp_parent) {
3320
0
    if(c != c->tcp_parent->tcp_free) {
3321
0
      c->tcp_parent->cur_tcp_count--;
3322
0
      c->tcp_free = c->tcp_parent->tcp_free;
3323
0
      c->tcp_parent->tcp_free = c;
3324
0
    }
3325
0
    if(!c->tcp_free) {
3326
      /* re-enable listening on accept socket */
3327
0
      comm_point_start_listening(c->tcp_parent, -1, -1);
3328
0
    }
3329
0
  }
3330
0
  c->tcp_more_read_again = NULL;
3331
0
  c->tcp_more_write_again = NULL;
3332
0
  c->tcp_byte_count = 0;
3333
0
  c->pp2_header_state = pp2_header_none;
3334
0
  sldns_buffer_clear(c->buffer);
3335
0
}
3336
3337
/** do the callback when writing is done */
3338
static void
3339
tcp_callback_writer(struct comm_point* c)
3340
0
{
3341
0
  log_assert(c->type == comm_tcp);
3342
0
  if(!c->tcp_write_and_read) {
3343
0
    sldns_buffer_clear(c->buffer);
3344
0
    c->tcp_byte_count = 0;
3345
0
  }
3346
0
  if(c->tcp_do_toggle_rw)
3347
0
    c->tcp_is_reading = 1;
3348
  /* switch from listening(write) to listening(read) */
3349
0
  if(c->tcp_req_info) {
3350
0
    tcp_req_info_handle_writedone(c->tcp_req_info);
3351
0
  } else {
3352
0
    comm_point_stop_listening(c);
3353
0
    if(c->tcp_write_and_read) {
3354
0
      fptr_ok(fptr_whitelist_comm_point(c->callback));
3355
0
      if( (*c->callback)(c, c->cb_arg, NETEVENT_PKT_WRITTEN,
3356
0
        &c->repinfo) ) {
3357
0
        comm_point_start_listening(c, -1,
3358
0
          adjusted_tcp_timeout(c));
3359
0
      }
3360
0
    } else {
3361
0
      comm_point_start_listening(c, -1,
3362
0
          adjusted_tcp_timeout(c));
3363
0
    }
3364
0
  }
3365
0
}
3366
3367
/** do the callback when reading is done */
3368
static void
3369
tcp_callback_reader(struct comm_point* c)
3370
0
{
3371
0
  log_assert(c->type == comm_tcp || c->type == comm_local);
3372
0
  sldns_buffer_flip(c->buffer);
3373
0
  if(c->tcp_do_toggle_rw)
3374
0
    c->tcp_is_reading = 0;
3375
0
  c->tcp_byte_count = 0;
3376
0
  if(c->tcp_req_info) {
3377
0
    tcp_req_info_handle_readdone(c->tcp_req_info);
3378
0
  } else {
3379
0
    if(c->type == comm_tcp)
3380
0
      comm_point_stop_listening(c);
3381
0
    fptr_ok(fptr_whitelist_comm_point(c->callback));
3382
0
    if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo) ) {
3383
0
      comm_point_start_listening(c, -1,
3384
0
          adjusted_tcp_timeout(c));
3385
0
    }
3386
0
  }
3387
0
}
3388
3389
#ifdef HAVE_SSL
3390
/** true if the ssl handshake error has to be squelched from the logs */
3391
int
3392
squelch_err_ssl_handshake(unsigned long err)
3393
0
{
3394
0
  if(verbosity >= VERB_QUERY)
3395
0
    return 0; /* only squelch on low verbosity */
3396
0
  if(ERR_GET_LIB(err) == ERR_LIB_SSL &&
3397
0
    (ERR_GET_REASON(err) == SSL_R_HTTPS_PROXY_REQUEST ||
3398
0
     ERR_GET_REASON(err) == SSL_R_HTTP_REQUEST ||
3399
0
     ERR_GET_REASON(err) == SSL_R_WRONG_VERSION_NUMBER ||
3400
0
     ERR_GET_REASON(err) == SSL_R_SSLV3_ALERT_BAD_CERTIFICATE
3401
0
#ifdef SSL_F_TLS_POST_PROCESS_CLIENT_HELLO
3402
0
     || ERR_GET_REASON(err) == SSL_R_NO_SHARED_CIPHER
3403
0
#endif
3404
0
#ifdef SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO
3405
0
     || ERR_GET_REASON(err) == SSL_R_UNKNOWN_PROTOCOL
3406
0
     || ERR_GET_REASON(err) == SSL_R_UNSUPPORTED_PROTOCOL
3407
0
#  ifdef SSL_R_VERSION_TOO_LOW
3408
0
     || ERR_GET_REASON(err) == SSL_R_VERSION_TOO_LOW
3409
0
#  endif
3410
0
#endif
3411
0
    ))
3412
0
    return 1;
3413
0
  return 0;
3414
0
}
3415
#endif /* HAVE_SSL */
3416
3417
/** continue ssl handshake */
3418
#ifdef HAVE_SSL
3419
static int
3420
ssl_handshake(struct comm_point* c)
3421
0
{
3422
0
  int r;
3423
0
  if(c->ssl_shake_state == comm_ssl_shake_hs_read) {
3424
    /* read condition satisfied back to writing */
3425
0
    comm_point_listen_for_rw(c, 0, 1);
3426
0
    c->ssl_shake_state = comm_ssl_shake_none;
3427
0
    return 1;
3428
0
  }
3429
0
  if(c->ssl_shake_state == comm_ssl_shake_hs_write) {
3430
    /* write condition satisfied, back to reading */
3431
0
    comm_point_listen_for_rw(c, 1, 0);
3432
0
    c->ssl_shake_state = comm_ssl_shake_none;
3433
0
    return 1;
3434
0
  }
3435
3436
0
  ERR_clear_error();
3437
0
  r = SSL_do_handshake(c->ssl);
3438
0
  if(r != 1) {
3439
0
    int want = SSL_get_error(c->ssl, r);
3440
0
    if(want == SSL_ERROR_WANT_READ) {
3441
0
      if(c->ssl_shake_state == comm_ssl_shake_read)
3442
0
        return 1;
3443
0
      c->ssl_shake_state = comm_ssl_shake_read;
3444
0
      comm_point_listen_for_rw(c, 1, 0);
3445
0
      return 1;
3446
0
    } else if(want == SSL_ERROR_WANT_WRITE) {
3447
0
      if(c->ssl_shake_state == comm_ssl_shake_write)
3448
0
        return 1;
3449
0
      c->ssl_shake_state = comm_ssl_shake_write;
3450
0
      comm_point_listen_for_rw(c, 0, 1);
3451
0
      return 1;
3452
0
    } else if(r == 0) {
3453
0
      return 0; /* closed */
3454
0
    } else if(want == SSL_ERROR_SYSCALL) {
3455
      /* SYSCALL and errno==0 means closed uncleanly */
3456
0
#ifdef EPIPE
3457
0
      if(errno == EPIPE && verbosity < 2)
3458
0
        return 0; /* silence 'broken pipe' */
3459
0
#endif
3460
0
#ifdef ECONNRESET
3461
0
      if(errno == ECONNRESET && verbosity < 2)
3462
0
        return 0; /* silence reset by peer */
3463
0
#endif
3464
0
      if(!tcp_connect_errno_needs_log(
3465
0
        (struct sockaddr*)&c->repinfo.remote_addr,
3466
0
        c->repinfo.remote_addrlen))
3467
0
        return 0; /* silence connect failures that
3468
        show up because after connect this is the
3469
        first system call that accesses the socket */
3470
0
      if(errno != 0)
3471
0
        log_err("SSL_handshake syscall: %s",
3472
0
          strerror(errno));
3473
0
      return 0;
3474
0
    } else {
3475
0
      unsigned long err = ERR_get_error();
3476
0
      if(!squelch_err_ssl_handshake(err)) {
3477
0
        long vr;
3478
0
        log_crypto_err_io_code("ssl handshake failed",
3479
0
          want, err);
3480
0
        if((vr=SSL_get_verify_result(c->ssl)) != 0)
3481
0
          log_err("ssl handshake cert error: %s",
3482
0
            X509_verify_cert_error_string(
3483
0
            vr));
3484
0
        log_addr(VERB_OPS, "ssl handshake failed",
3485
0
          &c->repinfo.remote_addr,
3486
0
          c->repinfo.remote_addrlen);
3487
0
      }
3488
0
      return 0;
3489
0
    }
3490
0
  }
3491
  /* this is where peer verification could take place */
3492
0
  if((SSL_get_verify_mode(c->ssl)&SSL_VERIFY_PEER)) {
3493
    /* verification */
3494
0
    if(SSL_get_verify_result(c->ssl) == X509_V_OK) {
3495
#ifdef HAVE_SSL_GET1_PEER_CERTIFICATE
3496
      X509* x = SSL_get1_peer_certificate(c->ssl);
3497
#else
3498
0
      X509* x = SSL_get_peer_certificate(c->ssl);
3499
0
#endif
3500
0
      if(!x) {
3501
0
        log_addr(VERB_ALGO, "SSL connection failed: "
3502
0
          "no certificate",
3503
0
          &c->repinfo.remote_addr,
3504
0
          c->repinfo.remote_addrlen);
3505
0
        return 0;
3506
0
      }
3507
0
      log_cert(VERB_ALGO, "peer certificate", x);
3508
0
#ifdef HAVE_SSL_GET0_PEERNAME
3509
0
      if(SSL_get0_peername(c->ssl)) {
3510
0
        char buf[255];
3511
0
        snprintf(buf, sizeof(buf), "SSL connection "
3512
0
          "to %s authenticated",
3513
0
          SSL_get0_peername(c->ssl));
3514
0
        log_addr(VERB_ALGO, buf, &c->repinfo.remote_addr,
3515
0
          c->repinfo.remote_addrlen);
3516
0
      } else {
3517
0
#endif
3518
0
        log_addr(VERB_ALGO, "SSL connection "
3519
0
          "authenticated", &c->repinfo.remote_addr,
3520
0
          c->repinfo.remote_addrlen);
3521
0
#ifdef HAVE_SSL_GET0_PEERNAME
3522
0
      }
3523
0
#endif
3524
0
      X509_free(x);
3525
0
    } else {
3526
#ifdef HAVE_SSL_GET1_PEER_CERTIFICATE
3527
      X509* x = SSL_get1_peer_certificate(c->ssl);
3528
#else
3529
0
      X509* x = SSL_get_peer_certificate(c->ssl);
3530
0
#endif
3531
0
      if(x) {
3532
0
        log_cert(VERB_ALGO, "peer certificate", x);
3533
0
        X509_free(x);
3534
0
      }
3535
0
      log_addr(VERB_ALGO, "SSL connection failed: "
3536
0
        "failed to authenticate",
3537
0
        &c->repinfo.remote_addr,
3538
0
        c->repinfo.remote_addrlen);
3539
0
      return 0;
3540
0
    }
3541
0
  } else {
3542
    /* unauthenticated, the verify peer flag was not set
3543
     * in c->ssl when the ssl object was created from ssl_ctx */
3544
0
    log_addr(VERB_ALGO, "SSL connection", &c->repinfo.remote_addr,
3545
0
      c->repinfo.remote_addrlen);
3546
0
  }
3547
3548
0
#ifdef HAVE_SSL_GET0_ALPN_SELECTED
3549
  /* check if http2 use is negotiated */
3550
0
  if(c->type == comm_http && c->h2_session) {
3551
0
    const unsigned char *alpn;
3552
0
    unsigned int alpnlen = 0;
3553
0
    SSL_get0_alpn_selected(c->ssl, &alpn, &alpnlen);
3554
0
    if(alpnlen == 2 && memcmp("h2", alpn, 2) == 0) {
3555
      /* connection upgraded to HTTP2 */
3556
0
      c->tcp_do_toggle_rw = 0;
3557
0
      c->use_h2 = 1;
3558
0
    } else {
3559
0
      verbose(VERB_ALGO, "client doesn't support HTTP/2");
3560
0
      return 0;
3561
0
    }
3562
0
  }
3563
0
#endif
3564
3565
  /* setup listen rw correctly */
3566
0
  if(c->tcp_is_reading) {
3567
0
    if(c->ssl_shake_state != comm_ssl_shake_read)
3568
0
      comm_point_listen_for_rw(c, 1, 0);
3569
0
  } else {
3570
0
    comm_point_listen_for_rw(c, 0, 1);
3571
0
  }
3572
0
  c->ssl_shake_state = comm_ssl_shake_none;
3573
0
  return 1;
3574
0
}
3575
#endif /* HAVE_SSL */
3576
3577
/** ssl read callback on TCP */
3578
static int
3579
ssl_handle_read(struct comm_point* c)
3580
0
{
3581
0
#ifdef HAVE_SSL
3582
0
  int r;
3583
0
  if(c->ssl_shake_state != comm_ssl_shake_none) {
3584
0
    if(!ssl_handshake(c))
3585
0
      return 0;
3586
0
    if(c->ssl_shake_state != comm_ssl_shake_none)
3587
0
      return 1;
3588
0
  }
3589
0
  if(c->pp2_enabled && c->pp2_header_state != pp2_header_done) {
3590
0
    struct pp2_header* header = NULL;
3591
0
    size_t want_read_size = 0;
3592
0
    size_t current_read_size = 0;
3593
0
    if(c->pp2_header_state == pp2_header_none) {
3594
0
      want_read_size = PP2_HEADER_SIZE;
3595
0
      if(sldns_buffer_remaining(c->buffer)<want_read_size) {
3596
0
        log_err_addr("proxy_protocol: not enough "
3597
0
          "buffer size to read PROXYv2 header", "",
3598
0
          &c->repinfo.remote_addr,
3599
0
          c->repinfo.remote_addrlen);
3600
0
        return 0;
3601
0
      }
3602
0
      verbose(VERB_ALGO, "proxy_protocol: reading fixed "
3603
0
        "part of PROXYv2 header (len %lu)",
3604
0
        (unsigned long)want_read_size);
3605
0
      current_read_size = want_read_size;
3606
0
      if(c->tcp_byte_count < current_read_size) {
3607
0
        ERR_clear_error();
3608
0
        if((r=SSL_read(c->ssl, (void*)sldns_buffer_at(
3609
0
          c->buffer, c->tcp_byte_count),
3610
0
          current_read_size -
3611
0
          c->tcp_byte_count)) <= 0) {
3612
0
          int want = SSL_get_error(c->ssl, r);
3613
0
          if(want == SSL_ERROR_ZERO_RETURN) {
3614
0
            if(c->tcp_req_info)
3615
0
              return tcp_req_info_handle_read_close(c->tcp_req_info);
3616
0
            return 0; /* shutdown, closed */
3617
0
          } else if(want == SSL_ERROR_WANT_READ) {
3618
#ifdef USE_WINSOCK
3619
            ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
3620
#endif
3621
0
            return 1; /* read more later */
3622
0
          } else if(want == SSL_ERROR_WANT_WRITE) {
3623
0
            c->ssl_shake_state = comm_ssl_shake_hs_write;
3624
0
            comm_point_listen_for_rw(c, 0, 1);
3625
0
            return 1;
3626
0
          } else if(want == SSL_ERROR_SYSCALL) {
3627
0
#ifdef ECONNRESET
3628
0
            if(errno == ECONNRESET && verbosity < 2)
3629
0
              return 0; /* silence reset by peer */
3630
0
#endif
3631
0
            if(errno != 0)
3632
0
              log_err("SSL_read syscall: %s",
3633
0
                strerror(errno));
3634
0
            return 0;
3635
0
          }
3636
0
          log_crypto_err_io("could not SSL_read",
3637
0
            want);
3638
0
          return 0;
3639
0
        }
3640
0
        c->tcp_byte_count += r;
3641
0
        sldns_buffer_skip(c->buffer, r);
3642
0
        if(c->tcp_byte_count != current_read_size) return 1;
3643
0
        c->pp2_header_state = pp2_header_init;
3644
0
      }
3645
0
    }
3646
0
    if(c->pp2_header_state == pp2_header_init) {
3647
0
      int err;
3648
0
      err = pp2_read_header(
3649
0
        sldns_buffer_begin(c->buffer),
3650
0
        sldns_buffer_limit(c->buffer));
3651
0
      if(err) {
3652
0
        log_err("proxy_protocol: could not parse "
3653
0
          "PROXYv2 header (%s)",
3654
0
          pp_lookup_error(err));
3655
0
        return 0;
3656
0
      }
3657
0
      header = (struct pp2_header*)sldns_buffer_begin(c->buffer);
3658
0
      want_read_size = ntohs(header->len);
3659
0
      if(sldns_buffer_limit(c->buffer) <
3660
0
        PP2_HEADER_SIZE + want_read_size) {
3661
0
        log_err_addr("proxy_protocol: not enough "
3662
0
          "buffer size to read PROXYv2 header", "",
3663
0
          &c->repinfo.remote_addr,
3664
0
          c->repinfo.remote_addrlen);
3665
0
        return 0;
3666
0
      }
3667
0
      verbose(VERB_ALGO, "proxy_protocol: reading variable "
3668
0
        "part of PROXYv2 header (len %lu)",
3669
0
        (unsigned long)want_read_size);
3670
0
      current_read_size = PP2_HEADER_SIZE + want_read_size;
3671
0
      if(want_read_size == 0) {
3672
        /* nothing more to read; header is complete */
3673
0
        c->pp2_header_state = pp2_header_done;
3674
0
      } else if(c->tcp_byte_count < current_read_size) {
3675
0
        ERR_clear_error();
3676
0
        if((r=SSL_read(c->ssl, (void*)sldns_buffer_at(
3677
0
          c->buffer, c->tcp_byte_count),
3678
0
          current_read_size -
3679
0
          c->tcp_byte_count)) <= 0) {
3680
0
          int want = SSL_get_error(c->ssl, r);
3681
0
          if(want == SSL_ERROR_ZERO_RETURN) {
3682
0
            if(c->tcp_req_info)
3683
0
              return tcp_req_info_handle_read_close(c->tcp_req_info);
3684
0
            return 0; /* shutdown, closed */
3685
0
          } else if(want == SSL_ERROR_WANT_READ) {
3686
#ifdef USE_WINSOCK
3687
            ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
3688
#endif
3689
0
            return 1; /* read more later */
3690
0
          } else if(want == SSL_ERROR_WANT_WRITE) {
3691
0
            c->ssl_shake_state = comm_ssl_shake_hs_write;
3692
0
            comm_point_listen_for_rw(c, 0, 1);
3693
0
            return 1;
3694
0
          } else if(want == SSL_ERROR_SYSCALL) {
3695
0
#ifdef ECONNRESET
3696
0
            if(errno == ECONNRESET && verbosity < 2)
3697
0
              return 0; /* silence reset by peer */
3698
0
#endif
3699
0
            if(errno != 0)
3700
0
              log_err("SSL_read syscall: %s",
3701
0
                strerror(errno));
3702
0
            return 0;
3703
0
          }
3704
0
          log_crypto_err_io("could not SSL_read",
3705
0
            want);
3706
0
          return 0;
3707
0
        }
3708
0
        c->tcp_byte_count += r;
3709
0
        sldns_buffer_skip(c->buffer, r);
3710
0
        if(c->tcp_byte_count != current_read_size) return 1;
3711
0
        c->pp2_header_state = pp2_header_done;
3712
0
      }
3713
0
    }
3714
0
    if(c->pp2_header_state != pp2_header_done || !header) {
3715
0
      log_err_addr("proxy_protocol: wrong state for the "
3716
0
        "PROXYv2 header", "", &c->repinfo.remote_addr,
3717
0
        c->repinfo.remote_addrlen);
3718
0
      return 0;
3719
0
    }
3720
0
    sldns_buffer_flip(c->buffer);
3721
0
    if(!consume_pp2_header(c->buffer, &c->repinfo, 1)) {
3722
0
      log_err_addr("proxy_protocol: could not consume "
3723
0
        "PROXYv2 header", "", &c->repinfo.remote_addr,
3724
0
        c->repinfo.remote_addrlen);
3725
0
      return 0;
3726
0
    }
3727
0
    verbose(VERB_ALGO, "proxy_protocol: successful read of "
3728
0
      "PROXYv2 header");
3729
    /* Clear and reset the buffer to read the following
3730
     * DNS packet(s). */
3731
0
    sldns_buffer_clear(c->buffer);
3732
0
    c->tcp_byte_count = 0;
3733
0
    return 1;
3734
0
  }
3735
0
  if(c->tcp_byte_count < sizeof(uint16_t)) {
3736
    /* read length bytes */
3737
0
    ERR_clear_error();
3738
0
    if((r=SSL_read(c->ssl, (void*)sldns_buffer_at(c->buffer,
3739
0
      c->tcp_byte_count), (int)(sizeof(uint16_t) -
3740
0
      c->tcp_byte_count))) <= 0) {
3741
0
      int want = SSL_get_error(c->ssl, r);
3742
0
      if(want == SSL_ERROR_ZERO_RETURN) {
3743
0
        if(c->tcp_req_info)
3744
0
          return tcp_req_info_handle_read_close(c->tcp_req_info);
3745
0
        return 0; /* shutdown, closed */
3746
0
      } else if(want == SSL_ERROR_WANT_READ) {
3747
#ifdef USE_WINSOCK
3748
        ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
3749
#endif
3750
0
        return 1; /* read more later */
3751
0
      } else if(want == SSL_ERROR_WANT_WRITE) {
3752
0
        c->ssl_shake_state = comm_ssl_shake_hs_write;
3753
0
        comm_point_listen_for_rw(c, 0, 1);
3754
0
        return 1;
3755
0
      } else if(want == SSL_ERROR_SYSCALL) {
3756
0
#ifdef ECONNRESET
3757
0
        if(errno == ECONNRESET && verbosity < 2)
3758
0
          return 0; /* silence reset by peer */
3759
0
#endif
3760
0
        if(errno != 0)
3761
0
          log_err("SSL_read syscall: %s",
3762
0
            strerror(errno));
3763
0
        return 0;
3764
0
      }
3765
0
      log_crypto_err_io("could not SSL_read", want);
3766
0
      return 0;
3767
0
    }
3768
0
    c->tcp_byte_count += r;
3769
0
    if(c->tcp_byte_count < sizeof(uint16_t))
3770
0
      return 1;
3771
0
    if(sldns_buffer_read_u16_at(c->buffer, 0) >
3772
0
      sldns_buffer_capacity(c->buffer)) {
3773
0
      verbose(VERB_QUERY, "ssl: dropped larger than buffer");
3774
0
      return 0;
3775
0
    }
3776
0
    sldns_buffer_set_limit(c->buffer,
3777
0
      sldns_buffer_read_u16_at(c->buffer, 0));
3778
0
    if(sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) {
3779
0
      verbose(VERB_QUERY, "ssl: dropped bogus too short.");
3780
0
      return 0;
3781
0
    }
3782
0
    sldns_buffer_skip(c->buffer, (ssize_t)(c->tcp_byte_count-sizeof(uint16_t)));
3783
0
    verbose(VERB_ALGO, "Reading ssl tcp query of length %d",
3784
0
      (int)sldns_buffer_limit(c->buffer));
3785
0
  }
3786
0
  if(sldns_buffer_remaining(c->buffer) > 0) {
3787
0
    ERR_clear_error();
3788
0
    r = SSL_read(c->ssl, (void*)sldns_buffer_current(c->buffer),
3789
0
      (int)sldns_buffer_remaining(c->buffer));
3790
0
    if(r <= 0) {
3791
0
      int want = SSL_get_error(c->ssl, r);
3792
0
      if(want == SSL_ERROR_ZERO_RETURN) {
3793
0
        if(c->tcp_req_info)
3794
0
          return tcp_req_info_handle_read_close(c->tcp_req_info);
3795
0
        return 0; /* shutdown, closed */
3796
0
      } else if(want == SSL_ERROR_WANT_READ) {
3797
#ifdef USE_WINSOCK
3798
        ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
3799
#endif
3800
0
        return 1; /* read more later */
3801
0
      } else if(want == SSL_ERROR_WANT_WRITE) {
3802
0
        c->ssl_shake_state = comm_ssl_shake_hs_write;
3803
0
        comm_point_listen_for_rw(c, 0, 1);
3804
0
        return 1;
3805
0
      } else if(want == SSL_ERROR_SYSCALL) {
3806
0
#ifdef ECONNRESET
3807
0
        if(errno == ECONNRESET && verbosity < 2)
3808
0
          return 0; /* silence reset by peer */
3809
0
#endif
3810
0
        if(errno != 0)
3811
0
          log_err("SSL_read syscall: %s",
3812
0
            strerror(errno));
3813
0
        return 0;
3814
0
      }
3815
0
      log_crypto_err_io("could not SSL_read", want);
3816
0
      return 0;
3817
0
    }
3818
0
    sldns_buffer_skip(c->buffer, (ssize_t)r);
3819
0
  }
3820
0
  if(sldns_buffer_remaining(c->buffer) <= 0) {
3821
0
    tcp_callback_reader(c);
3822
0
  }
3823
0
  return 1;
3824
#else
3825
  (void)c;
3826
  return 0;
3827
#endif /* HAVE_SSL */
3828
0
}
3829
3830
/** ssl write callback on TCP */
3831
static int
3832
ssl_handle_write(struct comm_point* c)
3833
0
{
3834
0
#ifdef HAVE_SSL
3835
0
  int r;
3836
0
  if(c->ssl_shake_state != comm_ssl_shake_none) {
3837
0
    if(!ssl_handshake(c))
3838
0
      return 0;
3839
0
    if(c->ssl_shake_state != comm_ssl_shake_none)
3840
0
      return 1;
3841
0
  }
3842
  /* ignore return, if fails we may simply block */
3843
0
  (void)SSL_set_mode(c->ssl, (long)SSL_MODE_ENABLE_PARTIAL_WRITE);
3844
0
  if((c->tcp_write_and_read?c->tcp_write_byte_count:c->tcp_byte_count) < sizeof(uint16_t)) {
3845
0
    uint16_t len = htons(c->tcp_write_and_read?c->tcp_write_pkt_len:sldns_buffer_limit(c->buffer));
3846
0
    ERR_clear_error();
3847
0
    if(c->tcp_write_and_read) {
3848
0
      if(c->tcp_write_pkt_len + 2 < LDNS_RR_BUF_SIZE) {
3849
        /* combine the tcp length and the query for
3850
         * write, this emulates writev */
3851
0
        uint8_t buf[LDNS_RR_BUF_SIZE];
3852
0
        memmove(buf, &len, sizeof(uint16_t));
3853
0
        memmove(buf+sizeof(uint16_t),
3854
0
          c->tcp_write_pkt,
3855
0
          c->tcp_write_pkt_len);
3856
0
        r = SSL_write(c->ssl,
3857
0
          (void*)(buf+c->tcp_write_byte_count),
3858
0
          c->tcp_write_pkt_len + 2 -
3859
0
          c->tcp_write_byte_count);
3860
0
      } else {
3861
0
        r = SSL_write(c->ssl,
3862
0
          (void*)(((uint8_t*)&len)+c->tcp_write_byte_count),
3863
0
          (int)(sizeof(uint16_t)-c->tcp_write_byte_count));
3864
0
      }
3865
0
    } else if(sizeof(uint16_t)+sldns_buffer_remaining(c->buffer) <
3866
0
      LDNS_RR_BUF_SIZE) {
3867
      /* combine the tcp length and the query for write,
3868
       * this emulates writev */
3869
0
      uint8_t buf[LDNS_RR_BUF_SIZE];
3870
0
      memmove(buf, &len, sizeof(uint16_t));
3871
0
      memmove(buf+sizeof(uint16_t),
3872
0
        sldns_buffer_current(c->buffer),
3873
0
        sldns_buffer_remaining(c->buffer));
3874
0
      r = SSL_write(c->ssl, (void*)(buf+c->tcp_byte_count),
3875
0
        (int)(sizeof(uint16_t)+
3876
0
        sldns_buffer_remaining(c->buffer)
3877
0
        - c->tcp_byte_count));
3878
0
    } else {
3879
0
      r = SSL_write(c->ssl,
3880
0
        (void*)(((uint8_t*)&len)+c->tcp_byte_count),
3881
0
        (int)(sizeof(uint16_t)-c->tcp_byte_count));
3882
0
    }
3883
0
    if(r <= 0) {
3884
0
      int want = SSL_get_error(c->ssl, r);
3885
0
      if(want == SSL_ERROR_ZERO_RETURN) {
3886
0
        return 0; /* closed */
3887
0
      } else if(want == SSL_ERROR_WANT_READ) {
3888
0
        c->ssl_shake_state = comm_ssl_shake_hs_read;
3889
0
        comm_point_listen_for_rw(c, 1, 0);
3890
0
        return 1; /* wait for read condition */
3891
0
      } else if(want == SSL_ERROR_WANT_WRITE) {
3892
#ifdef USE_WINSOCK
3893
        ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
3894
#endif
3895
0
        return 1; /* write more later */
3896
0
      } else if(want == SSL_ERROR_SYSCALL) {
3897
0
#ifdef EPIPE
3898
0
        if(errno == EPIPE && verbosity < 2)
3899
0
          return 0; /* silence 'broken pipe' */
3900
0
#endif
3901
0
        if(errno != 0)
3902
0
          log_err("SSL_write syscall: %s",
3903
0
            strerror(errno));
3904
0
        return 0;
3905
0
      }
3906
0
      log_crypto_err_io("could not SSL_write", want);
3907
0
      return 0;
3908
0
    }
3909
0
    if(c->tcp_write_and_read) {
3910
0
      c->tcp_write_byte_count += r;
3911
0
      if(c->tcp_write_byte_count < sizeof(uint16_t))
3912
0
        return 1;
3913
0
    } else {
3914
0
      c->tcp_byte_count += r;
3915
0
      if(c->tcp_byte_count < sizeof(uint16_t))
3916
0
        return 1;
3917
0
      sldns_buffer_set_position(c->buffer, c->tcp_byte_count -
3918
0
        sizeof(uint16_t));
3919
0
    }
3920
0
    if((!c->tcp_write_and_read && sldns_buffer_remaining(c->buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) {
3921
0
      tcp_callback_writer(c);
3922
0
      return 1;
3923
0
    }
3924
0
  }
3925
0
  log_assert(c->tcp_write_and_read || sldns_buffer_remaining(c->buffer) > 0);
3926
0
  log_assert(!c->tcp_write_and_read || c->tcp_write_byte_count < c->tcp_write_pkt_len + 2);
3927
0
  ERR_clear_error();
3928
0
  if(c->tcp_write_and_read) {
3929
0
    r = SSL_write(c->ssl, (void*)(c->tcp_write_pkt + c->tcp_write_byte_count - 2),
3930
0
      (int)(c->tcp_write_pkt_len + 2 - c->tcp_write_byte_count));
3931
0
  } else {
3932
0
    r = SSL_write(c->ssl, (void*)sldns_buffer_current(c->buffer),
3933
0
      (int)sldns_buffer_remaining(c->buffer));
3934
0
  }
3935
0
  if(r <= 0) {
3936
0
    int want = SSL_get_error(c->ssl, r);
3937
0
    if(want == SSL_ERROR_ZERO_RETURN) {
3938
0
      return 0; /* closed */
3939
0
    } else if(want == SSL_ERROR_WANT_READ) {
3940
0
      c->ssl_shake_state = comm_ssl_shake_hs_read;
3941
0
      comm_point_listen_for_rw(c, 1, 0);
3942
0
      return 1; /* wait for read condition */
3943
0
    } else if(want == SSL_ERROR_WANT_WRITE) {
3944
#ifdef USE_WINSOCK
3945
      ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
3946
#endif
3947
0
      return 1; /* write more later */
3948
0
    } else if(want == SSL_ERROR_SYSCALL) {
3949
0
#ifdef EPIPE
3950
0
      if(errno == EPIPE && verbosity < 2)
3951
0
        return 0; /* silence 'broken pipe' */
3952
0
#endif
3953
0
      if(errno != 0)
3954
0
        log_err("SSL_write syscall: %s",
3955
0
          strerror(errno));
3956
0
      return 0;
3957
0
    }
3958
0
    log_crypto_err_io("could not SSL_write", want);
3959
0
    return 0;
3960
0
  }
3961
0
  if(c->tcp_write_and_read) {
3962
0
    c->tcp_write_byte_count += r;
3963
0
  } else {
3964
0
    sldns_buffer_skip(c->buffer, (ssize_t)r);
3965
0
  }
3966
3967
0
  if((!c->tcp_write_and_read && sldns_buffer_remaining(c->buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) {
3968
0
    tcp_callback_writer(c);
3969
0
  }
3970
0
  return 1;
3971
#else
3972
  (void)c;
3973
  return 0;
3974
#endif /* HAVE_SSL */
3975
0
}
3976
3977
/** handle ssl tcp connection with dns contents */
3978
static int
3979
ssl_handle_it(struct comm_point* c, int is_write)
3980
0
{
3981
  /* handle case where renegotiation wants read during write call
3982
   * or write during read calls */
3983
0
  if(is_write && c->ssl_shake_state == comm_ssl_shake_hs_write)
3984
0
    return ssl_handle_read(c);
3985
0
  else if(!is_write && c->ssl_shake_state == comm_ssl_shake_hs_read)
3986
0
    return ssl_handle_write(c);
3987
  /* handle read events for read operation and write events for a
3988
   * write operation */
3989
0
  else if(!is_write)
3990
0
    return ssl_handle_read(c);
3991
0
  return ssl_handle_write(c);
3992
0
}
3993
3994
/**
3995
 * Handle tcp reading callback.
3996
 * @param fd: file descriptor of socket.
3997
 * @param c: comm point to read from into buffer.
3998
 * @param short_ok: if true, very short packets are OK (for comm_local).
3999
 * @return: 0 on error
4000
 */
4001
static int
4002
comm_point_tcp_handle_read(int fd, struct comm_point* c, int short_ok)
4003
0
{
4004
0
  ssize_t r;
4005
0
  int recv_initial = 0;
4006
0
  log_assert(c->type == comm_tcp || c->type == comm_local);
4007
0
  if(c->ssl)
4008
0
    return ssl_handle_it(c, 0);
4009
0
  if(!c->tcp_is_reading && !c->tcp_write_and_read)
4010
0
    return 0;
4011
4012
0
  log_assert(fd != -1);
4013
0
  if(c->pp2_enabled && c->pp2_header_state != pp2_header_done) {
4014
0
    struct pp2_header* header = NULL;
4015
0
    size_t want_read_size = 0;
4016
0
    size_t current_read_size = 0;
4017
0
    if(c->pp2_header_state == pp2_header_none) {
4018
0
      want_read_size = PP2_HEADER_SIZE;
4019
0
      if(sldns_buffer_remaining(c->buffer)<want_read_size) {
4020
0
        log_err_addr("proxy_protocol: not enough "
4021
0
          "buffer size to read PROXYv2 header", "",
4022
0
          &c->repinfo.remote_addr,
4023
0
          c->repinfo.remote_addrlen);
4024
0
        return 0;
4025
0
      }
4026
0
      verbose(VERB_ALGO, "proxy_protocol: reading fixed "
4027
0
        "part of PROXYv2 header (len %lu)",
4028
0
        (unsigned long)want_read_size);
4029
0
      current_read_size = want_read_size;
4030
0
      if(c->tcp_byte_count < current_read_size) {
4031
0
        r = recv(fd, (void*)sldns_buffer_at(c->buffer,
4032
0
          c->tcp_byte_count),
4033
0
          current_read_size-c->tcp_byte_count, MSG_DONTWAIT);
4034
0
        if(r == 0) {
4035
0
          if(c->tcp_req_info)
4036
0
            return tcp_req_info_handle_read_close(c->tcp_req_info);
4037
0
          return 0;
4038
0
        } else if(r == -1) {
4039
0
          goto recv_error_initial;
4040
0
        }
4041
0
        c->tcp_byte_count += r;
4042
0
        sldns_buffer_skip(c->buffer, r);
4043
0
        if(c->tcp_byte_count != current_read_size) return 1;
4044
0
        c->pp2_header_state = pp2_header_init;
4045
0
      }
4046
0
    }
4047
0
    if(c->pp2_header_state == pp2_header_init) {
4048
0
      int err;
4049
0
      err = pp2_read_header(
4050
0
        sldns_buffer_begin(c->buffer),
4051
0
        sldns_buffer_limit(c->buffer));
4052
0
      if(err) {
4053
0
        log_err("proxy_protocol: could not parse "
4054
0
          "PROXYv2 header (%s)",
4055
0
          pp_lookup_error(err));
4056
0
        return 0;
4057
0
      }
4058
0
      header = (struct pp2_header*)sldns_buffer_begin(c->buffer);
4059
0
      want_read_size = ntohs(header->len);
4060
0
      if(sldns_buffer_limit(c->buffer) <
4061
0
        PP2_HEADER_SIZE + want_read_size) {
4062
0
        log_err_addr("proxy_protocol: not enough "
4063
0
          "buffer size to read PROXYv2 header", "",
4064
0
          &c->repinfo.remote_addr,
4065
0
          c->repinfo.remote_addrlen);
4066
0
        return 0;
4067
0
      }
4068
0
      verbose(VERB_ALGO, "proxy_protocol: reading variable "
4069
0
        "part of PROXYv2 header (len %lu)",
4070
0
        (unsigned long)want_read_size);
4071
0
      current_read_size = PP2_HEADER_SIZE + want_read_size;
4072
0
      if(want_read_size == 0) {
4073
        /* nothing more to read; header is complete */
4074
0
        c->pp2_header_state = pp2_header_done;
4075
0
      } else if(c->tcp_byte_count < current_read_size) {
4076
0
        r = recv(fd, (void*)sldns_buffer_at(c->buffer,
4077
0
          c->tcp_byte_count),
4078
0
          current_read_size-c->tcp_byte_count, MSG_DONTWAIT);
4079
0
        if(r == 0) {
4080
0
          if(c->tcp_req_info)
4081
0
            return tcp_req_info_handle_read_close(c->tcp_req_info);
4082
0
          return 0;
4083
0
        } else if(r == -1) {
4084
0
          goto recv_error;
4085
0
        }
4086
0
        c->tcp_byte_count += r;
4087
0
        sldns_buffer_skip(c->buffer, r);
4088
0
        if(c->tcp_byte_count != current_read_size) return 1;
4089
0
        c->pp2_header_state = pp2_header_done;
4090
0
      }
4091
0
    }
4092
0
    if(c->pp2_header_state != pp2_header_done || !header) {
4093
0
      log_err_addr("proxy_protocol: wrong state for the "
4094
0
        "PROXYv2 header", "", &c->repinfo.remote_addr,
4095
0
        c->repinfo.remote_addrlen);
4096
0
      return 0;
4097
0
    }
4098
0
    sldns_buffer_flip(c->buffer);
4099
0
    if(!consume_pp2_header(c->buffer, &c->repinfo, 1)) {
4100
0
      log_err_addr("proxy_protocol: could not consume "
4101
0
        "PROXYv2 header", "", &c->repinfo.remote_addr,
4102
0
        c->repinfo.remote_addrlen);
4103
0
      return 0;
4104
0
    }
4105
0
    verbose(VERB_ALGO, "proxy_protocol: successful read of "
4106
0
      "PROXYv2 header");
4107
    /* Clear and reset the buffer to read the following
4108
        * DNS packet(s). */
4109
0
    sldns_buffer_clear(c->buffer);
4110
0
    c->tcp_byte_count = 0;
4111
0
    return 1;
4112
0
  }
4113
4114
0
  if(c->tcp_byte_count < sizeof(uint16_t)) {
4115
    /* read length bytes */
4116
0
    r = recv(fd,(void*)sldns_buffer_at(c->buffer,c->tcp_byte_count),
4117
0
      sizeof(uint16_t)-c->tcp_byte_count, MSG_DONTWAIT);
4118
0
    if(r == 0) {
4119
0
      if(c->tcp_req_info)
4120
0
        return tcp_req_info_handle_read_close(c->tcp_req_info);
4121
0
      return 0;
4122
0
    } else if(r == -1) {
4123
0
      if(c->pp2_enabled) goto recv_error;
4124
0
      goto recv_error_initial;
4125
0
    }
4126
0
    c->tcp_byte_count += r;
4127
0
    if(c->tcp_byte_count != sizeof(uint16_t))
4128
0
      return 1;
4129
0
    if(sldns_buffer_read_u16_at(c->buffer, 0) >
4130
0
      sldns_buffer_capacity(c->buffer)) {
4131
0
      verbose(VERB_QUERY, "tcp: dropped larger than buffer");
4132
0
      return 0;
4133
0
    }
4134
0
    sldns_buffer_set_limit(c->buffer,
4135
0
      sldns_buffer_read_u16_at(c->buffer, 0));
4136
0
    if(!short_ok &&
4137
0
      sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) {
4138
0
      verbose(VERB_QUERY, "tcp: dropped bogus too short.");
4139
0
      return 0;
4140
0
    }
4141
0
    verbose(VERB_ALGO, "Reading tcp query of length %d",
4142
0
      (int)sldns_buffer_limit(c->buffer));
4143
0
  }
4144
4145
0
  if(sldns_buffer_remaining(c->buffer) == 0)
4146
0
    log_err("in comm_point_tcp_handle_read buffer_remaining is "
4147
0
      "not > 0 as expected, continuing with (harmless) 0 "
4148
0
      "length recv");
4149
0
  r = recv(fd, (void*)sldns_buffer_current(c->buffer),
4150
0
    sldns_buffer_remaining(c->buffer), MSG_DONTWAIT);
4151
0
  if(r == 0) {
4152
0
    if(c->tcp_req_info)
4153
0
      return tcp_req_info_handle_read_close(c->tcp_req_info);
4154
0
    return 0;
4155
0
  } else if(r == -1) {
4156
0
    goto recv_error;
4157
0
  }
4158
0
  sldns_buffer_skip(c->buffer, r);
4159
0
  if(sldns_buffer_remaining(c->buffer) <= 0) {
4160
0
    tcp_callback_reader(c);
4161
0
  }
4162
0
  return 1;
4163
4164
0
recv_error_initial:
4165
0
  recv_initial = 1;
4166
0
recv_error:
4167
0
#ifndef USE_WINSOCK
4168
0
  if(errno == EINTR || errno == EAGAIN)
4169
0
    return 1;
4170
0
#ifdef ECONNRESET
4171
0
  if(errno == ECONNRESET && verbosity < 2)
4172
0
    return 0; /* silence reset by peer */
4173
0
#endif
4174
0
  if(recv_initial) {
4175
0
#ifdef ECONNREFUSED
4176
0
    if(errno == ECONNREFUSED && verbosity < 2)
4177
0
      return 0; /* silence reset by peer */
4178
0
#endif
4179
0
#ifdef ENETUNREACH
4180
0
    if(errno == ENETUNREACH && verbosity < 2)
4181
0
      return 0; /* silence it */
4182
0
#endif
4183
0
#ifdef EHOSTDOWN
4184
0
    if(errno == EHOSTDOWN && verbosity < 2)
4185
0
      return 0; /* silence it */
4186
0
#endif
4187
0
#ifdef EHOSTUNREACH
4188
0
    if(errno == EHOSTUNREACH && verbosity < 2)
4189
0
      return 0; /* silence it */
4190
0
#endif
4191
0
#ifdef ENETDOWN
4192
0
    if(errno == ENETDOWN && verbosity < 2)
4193
0
      return 0; /* silence it */
4194
0
#endif
4195
0
#ifdef EACCES
4196
0
    if(errno == EACCES && verbosity < 2)
4197
0
      return 0; /* silence it */
4198
0
#endif
4199
0
#ifdef ENOTCONN
4200
0
    if(errno == ENOTCONN) {
4201
0
      log_err_addr("read (in tcp initial) failed and this "
4202
0
        "could be because TCP Fast Open is "
4203
0
        "enabled [--disable-tfo-client "
4204
0
        "--disable-tfo-server] but does not "
4205
0
        "work", sock_strerror(errno),
4206
0
        &c->repinfo.remote_addr,
4207
0
        c->repinfo.remote_addrlen);
4208
0
      return 0;
4209
0
    }
4210
0
#endif
4211
0
  }
4212
#else /* USE_WINSOCK */
4213
  if(recv_initial) {
4214
    if(WSAGetLastError() == WSAECONNREFUSED && verbosity < 2)
4215
      return 0;
4216
    if(WSAGetLastError() == WSAEHOSTDOWN && verbosity < 2)
4217
      return 0;
4218
    if(WSAGetLastError() == WSAEHOSTUNREACH && verbosity < 2)
4219
      return 0;
4220
    if(WSAGetLastError() == WSAENETDOWN && verbosity < 2)
4221
      return 0;
4222
    if(WSAGetLastError() == WSAENETUNREACH && verbosity < 2)
4223
      return 0;
4224
  }
4225
  if(WSAGetLastError() == WSAECONNRESET)
4226
    return 0;
4227
  if(WSAGetLastError() == WSAEINPROGRESS)
4228
    return 1;
4229
  if(WSAGetLastError() == WSAEWOULDBLOCK) {
4230
    ub_winsock_tcp_wouldblock(c->ev->ev,
4231
      UB_EV_READ);
4232
    return 1;
4233
  }
4234
#endif
4235
0
  log_err_addr((recv_initial?"read (in tcp initial)":"read (in tcp)"),
4236
0
    sock_strerror(errno), &c->repinfo.remote_addr,
4237
0
    c->repinfo.remote_addrlen);
4238
0
  return 0;
4239
0
}
4240
4241
/**
4242
 * Handle tcp writing callback.
4243
 * @param fd: file descriptor of socket.
4244
 * @param c: comm point to write buffer out of.
4245
 * @return: 0 on error
4246
 */
4247
static int
4248
comm_point_tcp_handle_write(int fd, struct comm_point* c)
4249
0
{
4250
0
  ssize_t r;
4251
0
  struct sldns_buffer *buffer;
4252
0
  log_assert(c->type == comm_tcp);
4253
#ifdef USE_DNSCRYPT
4254
  buffer = c->dnscrypt_buffer;
4255
#else
4256
0
  buffer = c->buffer;
4257
0
#endif
4258
0
  if(c->tcp_is_reading && !c->ssl && !c->tcp_write_and_read)
4259
0
    return 0;
4260
0
  log_assert(fd != -1);
4261
0
  if(((!c->tcp_write_and_read && c->tcp_byte_count == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == 0)) && c->tcp_check_nb_connect) {
4262
    /* check for pending error from nonblocking connect */
4263
    /* from Stevens, unix network programming, vol1, 3rd ed, p450*/
4264
0
    int error = 0;
4265
0
    socklen_t len = (socklen_t)sizeof(error);
4266
0
    if(getsockopt(fd, SOL_SOCKET, SO_ERROR, (void*)&error,
4267
0
      &len) < 0){
4268
0
#ifndef USE_WINSOCK
4269
0
      error = errno; /* on solaris errno is error */
4270
#else /* USE_WINSOCK */
4271
      error = WSAGetLastError();
4272
#endif
4273
0
    }
4274
0
#ifndef USE_WINSOCK
4275
0
#if defined(EINPROGRESS) && defined(EWOULDBLOCK)
4276
0
    if(error == EINPROGRESS || error == EWOULDBLOCK)
4277
0
      return 1; /* try again later */
4278
0
    else
4279
0
#endif
4280
0
    if(error != 0 && verbosity < 2)
4281
0
      return 0; /* silence lots of chatter in the logs */
4282
0
                else if(error != 0) {
4283
0
      log_err_addr("tcp connect", strerror(error),
4284
0
        &c->repinfo.remote_addr,
4285
0
        c->repinfo.remote_addrlen);
4286
#else /* USE_WINSOCK */
4287
    /* examine error */
4288
    if(error == WSAEINPROGRESS)
4289
      return 1;
4290
    else if(error == WSAEWOULDBLOCK) {
4291
      ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
4292
      return 1;
4293
    } else if(error != 0 && verbosity < 2)
4294
      return 0;
4295
    else if(error != 0) {
4296
      log_err_addr("tcp connect", wsa_strerror(error),
4297
        &c->repinfo.remote_addr,
4298
        c->repinfo.remote_addrlen);
4299
#endif /* USE_WINSOCK */
4300
0
      return 0;
4301
0
    }
4302
0
  }
4303
0
  if(c->ssl)
4304
0
    return ssl_handle_it(c, 1);
4305
4306
#ifdef USE_MSG_FASTOPEN
4307
  /* Only try this on first use of a connection that uses tfo,
4308
     otherwise fall through to normal write */
4309
  /* Also, TFO support on WINDOWS not implemented at the moment */
4310
  if(c->tcp_do_fastopen == 1) {
4311
    /* this form of sendmsg() does both a connect() and send() so need to
4312
       look for various flavours of error*/
4313
    uint16_t len = htons(c->tcp_write_and_read?c->tcp_write_pkt_len:sldns_buffer_limit(buffer));
4314
    struct msghdr msg;
4315
    struct iovec iov[2];
4316
    c->tcp_do_fastopen = 0;
4317
    memset(&msg, 0, sizeof(msg));
4318
    if(c->tcp_write_and_read) {
4319
      iov[0].iov_base = (uint8_t*)&len + c->tcp_write_byte_count;
4320
      iov[0].iov_len = sizeof(uint16_t) - c->tcp_write_byte_count;
4321
      iov[1].iov_base = c->tcp_write_pkt;
4322
      iov[1].iov_len = c->tcp_write_pkt_len;
4323
    } else {
4324
      iov[0].iov_base = (uint8_t*)&len + c->tcp_byte_count;
4325
      iov[0].iov_len = sizeof(uint16_t) - c->tcp_byte_count;
4326
      iov[1].iov_base = sldns_buffer_begin(buffer);
4327
      iov[1].iov_len = sldns_buffer_limit(buffer);
4328
    }
4329
    log_assert(iov[0].iov_len > 0);
4330
    msg.msg_name = &c->repinfo.remote_addr;
4331
    msg.msg_namelen = c->repinfo.remote_addrlen;
4332
    msg.msg_iov = iov;
4333
    msg.msg_iovlen = 2;
4334
    r = sendmsg(fd, &msg, MSG_FASTOPEN);
4335
    if (r == -1) {
4336
#if defined(EINPROGRESS) && defined(EWOULDBLOCK)
4337
      /* Handshake is underway, maybe because no TFO cookie available.
4338
         Come back to write the message*/
4339
      if(errno == EINPROGRESS || errno == EWOULDBLOCK)
4340
        return 1;
4341
#endif
4342
      if(errno == EINTR || errno == EAGAIN)
4343
        return 1;
4344
      /* Not handling EISCONN here as shouldn't ever hit that case.*/
4345
      if(errno != EPIPE
4346
#ifdef EOPNOTSUPP
4347
        /* if /proc/sys/net/ipv4/tcp_fastopen is
4348
         * disabled on Linux, sendmsg may return
4349
         * 'Operation not supported', if so
4350
         * fallthrough to ordinary connect. */
4351
        && errno != EOPNOTSUPP
4352
#endif
4353
        && errno != 0) {
4354
        if(verbosity < 2)
4355
          return 0; /* silence lots of chatter in the logs */
4356
        log_err_addr("tcp sendmsg", strerror(errno),
4357
          &c->repinfo.remote_addr,
4358
          c->repinfo.remote_addrlen);
4359
        return 0;
4360
      }
4361
      verbose(VERB_ALGO, "tcp sendmsg for fastopen failed (with %s), try normal connect", strerror(errno));
4362
      /* fallthrough to nonFASTOPEN
4363
       * (MSG_FASTOPEN on Linux 3 produces EPIPE)
4364
       * we need to perform connect() */
4365
      if(connect(fd, (struct sockaddr *)&c->repinfo.remote_addr,
4366
        c->repinfo.remote_addrlen) == -1) {
4367
#ifdef EINPROGRESS
4368
        if(errno == EINPROGRESS)
4369
          return 1; /* wait until connect done*/
4370
#endif
4371
#ifdef USE_WINSOCK
4372
        if(WSAGetLastError() == WSAEINPROGRESS ||
4373
          WSAGetLastError() == WSAEWOULDBLOCK)
4374
          return 1; /* wait until connect done*/
4375
#endif
4376
        if(tcp_connect_errno_needs_log(
4377
          (struct sockaddr *)&c->repinfo.remote_addr,
4378
          c->repinfo.remote_addrlen)) {
4379
          log_err_addr("outgoing tcp: connect after EPIPE for fastopen",
4380
            strerror(errno),
4381
            &c->repinfo.remote_addr,
4382
            c->repinfo.remote_addrlen);
4383
        }
4384
        return 0;
4385
      }
4386
4387
    } else {
4388
      if(c->tcp_write_and_read) {
4389
        c->tcp_write_byte_count += r;
4390
        if(c->tcp_write_byte_count < sizeof(uint16_t))
4391
          return 1;
4392
      } else {
4393
        c->tcp_byte_count += r;
4394
        if(c->tcp_byte_count < sizeof(uint16_t))
4395
          return 1;
4396
        sldns_buffer_set_position(buffer, c->tcp_byte_count -
4397
          sizeof(uint16_t));
4398
      }
4399
      if((!c->tcp_write_and_read && sldns_buffer_remaining(buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) {
4400
        tcp_callback_writer(c);
4401
        return 1;
4402
      }
4403
    }
4404
  }
4405
#endif /* USE_MSG_FASTOPEN */
4406
4407
0
  if((c->tcp_write_and_read?c->tcp_write_byte_count:c->tcp_byte_count) < sizeof(uint16_t)) {
4408
0
    uint16_t len = htons(c->tcp_write_and_read?c->tcp_write_pkt_len:sldns_buffer_limit(buffer));
4409
0
#ifdef HAVE_WRITEV
4410
0
    struct iovec iov[2];
4411
0
    if(c->tcp_write_and_read) {
4412
0
      iov[0].iov_base = (uint8_t*)&len + c->tcp_write_byte_count;
4413
0
      iov[0].iov_len = sizeof(uint16_t) - c->tcp_write_byte_count;
4414
0
      iov[1].iov_base = c->tcp_write_pkt;
4415
0
      iov[1].iov_len = c->tcp_write_pkt_len;
4416
0
    } else {
4417
0
      iov[0].iov_base = (uint8_t*)&len + c->tcp_byte_count;
4418
0
      iov[0].iov_len = sizeof(uint16_t) - c->tcp_byte_count;
4419
0
      iov[1].iov_base = sldns_buffer_begin(buffer);
4420
0
      iov[1].iov_len = sldns_buffer_limit(buffer);
4421
0
    }
4422
0
    log_assert(iov[0].iov_len > 0);
4423
0
    r = writev(fd, iov, 2);
4424
#else /* HAVE_WRITEV */
4425
    if(c->tcp_write_and_read) {
4426
      r = send(fd, (void*)(((uint8_t*)&len)+c->tcp_write_byte_count),
4427
        sizeof(uint16_t)-c->tcp_write_byte_count, 0);
4428
    } else {
4429
      r = send(fd, (void*)(((uint8_t*)&len)+c->tcp_byte_count),
4430
        sizeof(uint16_t)-c->tcp_byte_count, 0);
4431
    }
4432
#endif /* HAVE_WRITEV */
4433
0
    if(r == -1) {
4434
0
#ifndef USE_WINSOCK
4435
0
#  ifdef EPIPE
4436
0
                  if(errno == EPIPE && verbosity < 2)
4437
0
                          return 0; /* silence 'broken pipe' */
4438
0
  #endif
4439
0
      if(errno == EINTR || errno == EAGAIN)
4440
0
        return 1;
4441
0
#ifdef ECONNRESET
4442
0
      if(errno == ECONNRESET && verbosity < 2)
4443
0
        return 0; /* silence reset by peer */
4444
0
#endif
4445
0
#  ifdef HAVE_WRITEV
4446
0
      log_err_addr("tcp writev", strerror(errno),
4447
0
        &c->repinfo.remote_addr,
4448
0
        c->repinfo.remote_addrlen);
4449
#  else /* HAVE_WRITEV */
4450
      log_err_addr("tcp send s", strerror(errno),
4451
        &c->repinfo.remote_addr,
4452
        c->repinfo.remote_addrlen);
4453
#  endif /* HAVE_WRITEV */
4454
#else
4455
      if(WSAGetLastError() == WSAENOTCONN)
4456
        return 1;
4457
      if(WSAGetLastError() == WSAEINPROGRESS)
4458
        return 1;
4459
      if(WSAGetLastError() == WSAEWOULDBLOCK) {
4460
        ub_winsock_tcp_wouldblock(c->ev->ev,
4461
          UB_EV_WRITE);
4462
        return 1;
4463
      }
4464
      if(WSAGetLastError() == WSAECONNRESET && verbosity < 2)
4465
        return 0; /* silence reset by peer */
4466
      log_err_addr("tcp send s",
4467
        wsa_strerror(WSAGetLastError()),
4468
        &c->repinfo.remote_addr,
4469
        c->repinfo.remote_addrlen);
4470
#endif
4471
0
      return 0;
4472
0
    }
4473
0
    if(c->tcp_write_and_read) {
4474
0
      c->tcp_write_byte_count += r;
4475
0
      if(c->tcp_write_byte_count < sizeof(uint16_t))
4476
0
        return 1;
4477
0
    } else {
4478
0
      c->tcp_byte_count += r;
4479
0
      if(c->tcp_byte_count < sizeof(uint16_t))
4480
0
        return 1;
4481
0
      sldns_buffer_set_position(buffer, c->tcp_byte_count -
4482
0
        sizeof(uint16_t));
4483
0
    }
4484
0
    if((!c->tcp_write_and_read && sldns_buffer_remaining(buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) {
4485
0
      tcp_callback_writer(c);
4486
0
      return 1;
4487
0
    }
4488
0
  }
4489
0
  log_assert(c->tcp_write_and_read || sldns_buffer_remaining(buffer) > 0);
4490
0
  log_assert(!c->tcp_write_and_read || c->tcp_write_byte_count < c->tcp_write_pkt_len + 2);
4491
0
  if(c->tcp_write_and_read) {
4492
0
    r = send(fd, (void*)(c->tcp_write_pkt + c->tcp_write_byte_count - 2),
4493
0
      c->tcp_write_pkt_len + 2 - c->tcp_write_byte_count, 0);
4494
0
  } else {
4495
0
    r = send(fd, (void*)sldns_buffer_current(buffer),
4496
0
      sldns_buffer_remaining(buffer), 0);
4497
0
  }
4498
0
  if(r == -1) {
4499
0
#ifndef USE_WINSOCK
4500
0
    if(errno == EINTR || errno == EAGAIN)
4501
0
      return 1;
4502
0
#ifdef ECONNRESET
4503
0
    if(errno == ECONNRESET && verbosity < 2)
4504
0
      return 0; /* silence reset by peer */
4505
0
#endif
4506
#else
4507
    if(WSAGetLastError() == WSAEINPROGRESS)
4508
      return 1;
4509
    if(WSAGetLastError() == WSAEWOULDBLOCK) {
4510
      ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
4511
      return 1;
4512
    }
4513
    if(WSAGetLastError() == WSAECONNRESET && verbosity < 2)
4514
      return 0; /* silence reset by peer */
4515
#endif
4516
0
    log_err_addr("tcp send r", sock_strerror(errno),
4517
0
      &c->repinfo.remote_addr,
4518
0
      c->repinfo.remote_addrlen);
4519
0
    return 0;
4520
0
  }
4521
0
  if(c->tcp_write_and_read) {
4522
0
    c->tcp_write_byte_count += r;
4523
0
  } else {
4524
0
    sldns_buffer_skip(buffer, r);
4525
0
  }
4526
4527
0
  if((!c->tcp_write_and_read && sldns_buffer_remaining(buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) {
4528
0
    tcp_callback_writer(c);
4529
0
  }
4530
4531
0
  return 1;
4532
0
}
4533
4534
/** read again to drain buffers when there could be more to read, returns 0
4535
 * on failure which means the comm point is closed. */
4536
static int
4537
tcp_req_info_read_again(int fd, struct comm_point* c)
4538
0
{
4539
0
  while(c->tcp_req_info->read_again) {
4540
0
    int r;
4541
0
    c->tcp_req_info->read_again = 0;
4542
0
    if(c->tcp_is_reading)
4543
0
      r = comm_point_tcp_handle_read(fd, c, 0);
4544
0
    else  r = comm_point_tcp_handle_write(fd, c);
4545
0
    if(!r) {
4546
0
      reclaim_tcp_handler(c);
4547
0
      if(!c->tcp_do_close) {
4548
0
        fptr_ok(fptr_whitelist_comm_point(
4549
0
          c->callback));
4550
0
        (void)(*c->callback)(c, c->cb_arg,
4551
0
          NETEVENT_CLOSED, NULL);
4552
0
      }
4553
0
      return 0;
4554
0
    }
4555
0
  }
4556
0
  return 1;
4557
0
}
4558
4559
/** read again to drain buffers when there could be more to read */
4560
static void
4561
tcp_more_read_again(int fd, struct comm_point* c)
4562
0
{
4563
  /* if the packet is done, but another one could be waiting on
4564
   * the connection, the callback signals this, and we try again */
4565
  /* this continues until the read routines get EAGAIN or so,
4566
   * and thus does not call the callback, and the bool is 0 */
4567
0
  int* moreread = c->tcp_more_read_again;
4568
0
  while(moreread && *moreread) {
4569
0
    *moreread = 0;
4570
0
    if(!comm_point_tcp_handle_read(fd, c, 0)) {
4571
0
      reclaim_tcp_handler(c);
4572
0
      if(!c->tcp_do_close) {
4573
0
        fptr_ok(fptr_whitelist_comm_point(
4574
0
          c->callback));
4575
0
        (void)(*c->callback)(c, c->cb_arg,
4576
0
          NETEVENT_CLOSED, NULL);
4577
0
      }
4578
0
      return;
4579
0
    }
4580
0
  }
4581
0
}
4582
4583
/** write again to fill up when there could be more to write */
4584
static void
4585
tcp_more_write_again(int fd, struct comm_point* c)
4586
0
{
4587
  /* if the packet is done, but another is waiting to be written,
4588
   * the callback signals it and we try again. */
4589
  /* this continues until the write routines get EAGAIN or so,
4590
   * and thus does not call the callback, and the bool is 0 */
4591
0
  int* morewrite = c->tcp_more_write_again;
4592
0
  while(morewrite && *morewrite) {
4593
0
    *morewrite = 0;
4594
0
    if(!comm_point_tcp_handle_write(fd, c)) {
4595
0
      reclaim_tcp_handler(c);
4596
0
      if(!c->tcp_do_close) {
4597
0
        fptr_ok(fptr_whitelist_comm_point(
4598
0
          c->callback));
4599
0
        (void)(*c->callback)(c, c->cb_arg,
4600
0
          NETEVENT_CLOSED, NULL);
4601
0
      }
4602
0
      return;
4603
0
    }
4604
0
  }
4605
0
}
4606
4607
void
4608
comm_point_tcp_handle_callback(int fd, short event, void* arg)
4609
0
{
4610
0
  struct comm_point* c = (struct comm_point*)arg;
4611
0
  log_assert(c->type == comm_tcp);
4612
0
  ub_comm_base_now(c->ev->base);
4613
4614
0
  if(c->fd == -1 || c->fd != fd)
4615
0
    return; /* duplicate event, but commpoint closed. */
4616
4617
#ifdef USE_DNSCRYPT
4618
  /* Initialize if this is a dnscrypt socket */
4619
  if(c->tcp_parent) {
4620
    c->dnscrypt = c->tcp_parent->dnscrypt;
4621
  }
4622
  if(c->dnscrypt && c->dnscrypt_buffer == c->buffer) {
4623
    c->dnscrypt_buffer = sldns_buffer_new(sldns_buffer_capacity(c->buffer));
4624
    if(!c->dnscrypt_buffer) {
4625
      log_err("Could not allocate dnscrypt buffer");
4626
      reclaim_tcp_handler(c);
4627
      if(!c->tcp_do_close) {
4628
        fptr_ok(fptr_whitelist_comm_point(
4629
          c->callback));
4630
        (void)(*c->callback)(c, c->cb_arg,
4631
          NETEVENT_CLOSED, NULL);
4632
      }
4633
      return;
4634
    }
4635
  }
4636
#endif
4637
4638
0
  if((event&UB_EV_TIMEOUT)) {
4639
0
    verbose(VERB_QUERY, "tcp took too long, dropped");
4640
0
    reclaim_tcp_handler(c);
4641
0
    if(!c->tcp_do_close) {
4642
0
      fptr_ok(fptr_whitelist_comm_point(c->callback));
4643
0
      (void)(*c->callback)(c, c->cb_arg,
4644
0
        NETEVENT_TIMEOUT, NULL);
4645
0
    }
4646
0
    return;
4647
0
  }
4648
0
  if((event&UB_EV_READ)
4649
#ifdef USE_MSG_FASTOPEN
4650
    && !(c->tcp_do_fastopen && (event&UB_EV_WRITE))
4651
#endif
4652
0
    ) {
4653
0
    int has_tcpq = (c->tcp_req_info != NULL);
4654
0
    int* moreread = c->tcp_more_read_again;
4655
0
    if(!comm_point_tcp_handle_read(fd, c, 0)) {
4656
0
      reclaim_tcp_handler(c);
4657
0
      if(!c->tcp_do_close) {
4658
0
        fptr_ok(fptr_whitelist_comm_point(
4659
0
          c->callback));
4660
0
        (void)(*c->callback)(c, c->cb_arg,
4661
0
          NETEVENT_CLOSED, NULL);
4662
0
      }
4663
0
      return;
4664
0
    }
4665
0
    if(has_tcpq && c->tcp_req_info && c->tcp_req_info->read_again) {
4666
0
      if(!tcp_req_info_read_again(fd, c))
4667
0
        return;
4668
0
    }
4669
0
    if(moreread && *moreread)
4670
0
      tcp_more_read_again(fd, c);
4671
0
    return;
4672
0
  }
4673
0
  if((event&UB_EV_WRITE)) {
4674
0
    int has_tcpq = (c->tcp_req_info != NULL);
4675
0
    int* morewrite = c->tcp_more_write_again;
4676
0
    if(!comm_point_tcp_handle_write(fd, c)) {
4677
0
      reclaim_tcp_handler(c);
4678
0
      if(!c->tcp_do_close) {
4679
0
        fptr_ok(fptr_whitelist_comm_point(
4680
0
          c->callback));
4681
0
        (void)(*c->callback)(c, c->cb_arg,
4682
0
          NETEVENT_CLOSED, NULL);
4683
0
      }
4684
0
      return;
4685
0
    }
4686
0
    if(has_tcpq && c->tcp_req_info && c->tcp_req_info->read_again) {
4687
0
      if(!tcp_req_info_read_again(fd, c))
4688
0
        return;
4689
0
    }
4690
0
    if(morewrite && *morewrite)
4691
0
      tcp_more_write_again(fd, c);
4692
0
    return;
4693
0
  }
4694
0
  log_err("Ignored event %d for tcphdl.", event);
4695
0
}
4696
4697
/** Make http handler free for next assignment */
4698
static void
4699
reclaim_http_handler(struct comm_point* c)
4700
0
{
4701
0
  log_assert(c->type == comm_http);
4702
0
  if(c->ssl) {
4703
0
#ifdef HAVE_SSL
4704
0
    SSL_shutdown(c->ssl);
4705
0
    SSL_free(c->ssl);
4706
0
    c->ssl = NULL;
4707
0
#endif
4708
0
  }
4709
0
  comm_point_close(c);
4710
0
  if(c->tcp_parent) {
4711
0
    if(c != c->tcp_parent->tcp_free) {
4712
0
      c->tcp_parent->cur_tcp_count--;
4713
0
      c->tcp_free = c->tcp_parent->tcp_free;
4714
0
      c->tcp_parent->tcp_free = c;
4715
0
    }
4716
0
    if(!c->tcp_free) {
4717
      /* re-enable listening on accept socket */
4718
0
      comm_point_start_listening(c->tcp_parent, -1, -1);
4719
0
    }
4720
0
  }
4721
0
}
4722
4723
/** read more data for http (with ssl) */
4724
static int
4725
ssl_http_read_more(struct comm_point* c)
4726
0
{
4727
0
#ifdef HAVE_SSL
4728
0
  int r;
4729
0
  log_assert(sldns_buffer_remaining(c->buffer) > 0);
4730
0
  ERR_clear_error();
4731
0
  r = SSL_read(c->ssl, (void*)sldns_buffer_current(c->buffer),
4732
0
    (int)sldns_buffer_remaining(c->buffer));
4733
0
  if(r <= 0) {
4734
0
    int want = SSL_get_error(c->ssl, r);
4735
0
    if(want == SSL_ERROR_ZERO_RETURN) {
4736
0
      return 0; /* shutdown, closed */
4737
0
    } else if(want == SSL_ERROR_WANT_READ) {
4738
0
      return 1; /* read more later */
4739
0
    } else if(want == SSL_ERROR_WANT_WRITE) {
4740
0
      c->ssl_shake_state = comm_ssl_shake_hs_write;
4741
0
      comm_point_listen_for_rw(c, 0, 1);
4742
0
      return 1;
4743
0
    } else if(want == SSL_ERROR_SYSCALL) {
4744
0
#ifdef ECONNRESET
4745
0
      if(errno == ECONNRESET && verbosity < 2)
4746
0
        return 0; /* silence reset by peer */
4747
0
#endif
4748
0
      if(errno != 0)
4749
0
        log_err("SSL_read syscall: %s",
4750
0
          strerror(errno));
4751
0
      return 0;
4752
0
    }
4753
0
    log_crypto_err_io("could not SSL_read", want);
4754
0
    return 0;
4755
0
  }
4756
0
  verbose(VERB_ALGO, "ssl http read more skip to %d + %d",
4757
0
    (int)sldns_buffer_position(c->buffer), (int)r);
4758
0
  sldns_buffer_skip(c->buffer, (ssize_t)r);
4759
0
  return 1;
4760
#else
4761
  (void)c;
4762
  return 0;
4763
#endif /* HAVE_SSL */
4764
0
}
4765
4766
/** read more data for http */
4767
static int
4768
http_read_more(int fd, struct comm_point* c)
4769
0
{
4770
0
  ssize_t r;
4771
0
  log_assert(sldns_buffer_remaining(c->buffer) > 0);
4772
0
  r = recv(fd, (void*)sldns_buffer_current(c->buffer),
4773
0
    sldns_buffer_remaining(c->buffer), MSG_DONTWAIT);
4774
0
  if(r == 0) {
4775
0
    return 0;
4776
0
  } else if(r == -1) {
4777
0
#ifndef USE_WINSOCK
4778
0
    if(errno == EINTR || errno == EAGAIN)
4779
0
      return 1;
4780
#else /* USE_WINSOCK */
4781
    if(WSAGetLastError() == WSAECONNRESET)
4782
      return 0;
4783
    if(WSAGetLastError() == WSAEINPROGRESS)
4784
      return 1;
4785
    if(WSAGetLastError() == WSAEWOULDBLOCK) {
4786
      ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
4787
      return 1;
4788
    }
4789
#endif
4790
0
    log_err_addr("read (in http r)", sock_strerror(errno),
4791
0
      &c->repinfo.remote_addr, c->repinfo.remote_addrlen);
4792
0
    return 0;
4793
0
  }
4794
0
  verbose(VERB_ALGO, "http read more skip to %d + %d",
4795
0
    (int)sldns_buffer_position(c->buffer), (int)r);
4796
0
  sldns_buffer_skip(c->buffer, r);
4797
0
  return 1;
4798
0
}
4799
4800
/** return true if http header has been read (one line complete) */
4801
static int
4802
http_header_done(sldns_buffer* buf)
4803
0
{
4804
0
  size_t i;
4805
0
  for(i=sldns_buffer_position(buf); i<sldns_buffer_limit(buf); i++) {
4806
    /* there was a \r before the \n, but we ignore that */
4807
0
    if((char)sldns_buffer_read_u8_at(buf, i) == '\n')
4808
0
      return 1;
4809
0
  }
4810
0
  return 0;
4811
0
}
4812
4813
/** return character string into buffer for header line, moves buffer
4814
 * past that line and puts zero terminator into linefeed-newline */
4815
static char*
4816
http_header_line(sldns_buffer* buf)
4817
0
{
4818
0
  char* result = (char*)sldns_buffer_current(buf);
4819
0
  size_t i;
4820
0
  for(i=sldns_buffer_position(buf); i<sldns_buffer_limit(buf); i++) {
4821
    /* terminate the string on the \r */
4822
0
    if((char)sldns_buffer_read_u8_at(buf, i) == '\r')
4823
0
      sldns_buffer_write_u8_at(buf, i, 0);
4824
    /* terminate on the \n and skip past the it and done */
4825
0
    if((char)sldns_buffer_read_u8_at(buf, i) == '\n') {
4826
0
      sldns_buffer_write_u8_at(buf, i, 0);
4827
0
      sldns_buffer_set_position(buf, i+1);
4828
0
      return result;
4829
0
    }
4830
0
  }
4831
0
  return NULL;
4832
0
}
4833
4834
/** move unread buffer to start and clear rest for putting the rest into it */
4835
static void
4836
http_moveover_buffer(sldns_buffer* buf)
4837
0
{
4838
0
  size_t pos = sldns_buffer_position(buf);
4839
0
  size_t len = sldns_buffer_remaining(buf);
4840
0
  sldns_buffer_clear(buf);
4841
0
  memmove(sldns_buffer_begin(buf), sldns_buffer_at(buf, pos), len);
4842
0
  sldns_buffer_set_position(buf, len);
4843
0
}
4844
4845
/** a http header is complete, process it */
4846
static int
4847
http_process_initial_header(struct comm_point* c)
4848
0
{
4849
0
  char* line = http_header_line(c->buffer);
4850
0
  if(!line) return 1;
4851
0
  verbose(VERB_ALGO, "http header: %s", line);
4852
0
  if(strncasecmp(line, "HTTP/1.1 ", 9) == 0) {
4853
    /* check returncode */
4854
0
    if(line[9] != '2') {
4855
0
      verbose(VERB_ALGO, "http bad status %s", line+9);
4856
0
      return 0;
4857
0
    }
4858
0
  } else if(strncasecmp(line, "Content-Length: ", 16) == 0) {
4859
0
    if(!c->http_is_chunked)
4860
0
      c->tcp_byte_count = (size_t)atoi(line+16);
4861
0
  } else if(strncasecmp(line, "Transfer-Encoding: chunked", 19+7) == 0) {
4862
0
    c->tcp_byte_count = 0;
4863
0
    c->http_is_chunked = 1;
4864
0
  } else if(line[0] == 0) {
4865
    /* end of initial headers */
4866
0
    c->http_in_headers = 0;
4867
0
    if(c->http_is_chunked)
4868
0
      c->http_in_chunk_headers = 1;
4869
    /* remove header text from front of buffer
4870
     * the buffer is going to be used to return the data segment
4871
     * itself and we don't want the header to get returned
4872
     * prepended with it */
4873
0
    http_moveover_buffer(c->buffer);
4874
0
    sldns_buffer_flip(c->buffer);
4875
0
    return 1;
4876
0
  }
4877
  /* ignore other headers */
4878
0
  return 1;
4879
0
}
4880
4881
/** a chunk header is complete, process it, return 0=fail, 1=continue next
4882
 * header line, 2=done with chunked transfer*/
4883
static int
4884
http_process_chunk_header(struct comm_point* c)
4885
0
{
4886
0
  char* line = http_header_line(c->buffer);
4887
0
  if(!line) return 1;
4888
0
  if(c->http_in_chunk_headers == 3) {
4889
0
    verbose(VERB_ALGO, "http chunk trailer: %s", line);
4890
    /* are we done ? */
4891
0
    if(line[0] == 0 && c->tcp_byte_count == 0) {
4892
      /* callback of http reader when NETEVENT_DONE,
4893
       * end of data, with no data in buffer */
4894
0
      sldns_buffer_set_position(c->buffer, 0);
4895
0
      sldns_buffer_set_limit(c->buffer, 0);
4896
0
      fptr_ok(fptr_whitelist_comm_point(c->callback));
4897
0
      (void)(*c->callback)(c, c->cb_arg, NETEVENT_DONE, NULL);
4898
      /* return that we are done */
4899
0
      return 2;
4900
0
    }
4901
0
    if(line[0] == 0) {
4902
      /* continue with header of the next chunk */
4903
0
      c->http_in_chunk_headers = 1;
4904
      /* remove header text from front of buffer */
4905
0
      http_moveover_buffer(c->buffer);
4906
0
      sldns_buffer_flip(c->buffer);
4907
0
      return 1;
4908
0
    }
4909
    /* ignore further trail headers */
4910
0
    return 1;
4911
0
  }
4912
0
  verbose(VERB_ALGO, "http chunk header: %s", line);
4913
0
  if(c->http_in_chunk_headers == 1) {
4914
    /* read chunked start line */
4915
0
    char* end = NULL;
4916
0
    c->tcp_byte_count = (size_t)strtol(line, &end, 16);
4917
0
    if(end == line)
4918
0
      return 0;
4919
0
    c->http_in_chunk_headers = 0;
4920
    /* remove header text from front of buffer */
4921
0
    http_moveover_buffer(c->buffer);
4922
0
    sldns_buffer_flip(c->buffer);
4923
0
    if(c->tcp_byte_count == 0) {
4924
      /* done with chunks, process chunk_trailer lines */
4925
0
      c->http_in_chunk_headers = 3;
4926
0
    }
4927
0
    return 1;
4928
0
  }
4929
  /* ignore other headers */
4930
0
  return 1;
4931
0
}
4932
4933
/** handle nonchunked data segment, 0=fail, 1=wait */
4934
static int
4935
http_nonchunk_segment(struct comm_point* c)
4936
0
{
4937
  /* c->buffer at position..limit has new data we read in.
4938
   * the buffer itself is full of nonchunked data.
4939
   * we are looking to read tcp_byte_count more data
4940
   * and then the transfer is done. */
4941
0
  size_t remainbufferlen;
4942
0
  size_t got_now = sldns_buffer_limit(c->buffer);
4943
0
  if(c->tcp_byte_count <= got_now) {
4944
    /* done, this is the last data fragment */
4945
0
    c->http_stored = 0;
4946
0
    sldns_buffer_set_position(c->buffer, 0);
4947
0
    fptr_ok(fptr_whitelist_comm_point(c->callback));
4948
0
    (void)(*c->callback)(c, c->cb_arg, NETEVENT_DONE, NULL);
4949
0
    return 1;
4950
0
  }
4951
  /* if we have the buffer space,
4952
   * read more data collected into the buffer */
4953
0
  remainbufferlen = sldns_buffer_capacity(c->buffer) -
4954
0
    sldns_buffer_limit(c->buffer);
4955
0
  if(remainbufferlen+got_now >= c->tcp_byte_count ||
4956
0
    remainbufferlen >= (size_t)(c->ssl?16384:2048)) {
4957
0
    size_t total = sldns_buffer_limit(c->buffer);
4958
0
    sldns_buffer_clear(c->buffer);
4959
0
    sldns_buffer_set_position(c->buffer, total);
4960
0
    c->http_stored = total;
4961
    /* return and wait to read more */
4962
0
    return 1;
4963
0
  }
4964
  /* call callback with this data amount, then
4965
   * wait for more */
4966
0
  c->tcp_byte_count -= got_now;
4967
0
  c->http_stored = 0;
4968
0
  sldns_buffer_set_position(c->buffer, 0);
4969
0
  fptr_ok(fptr_whitelist_comm_point(c->callback));
4970
0
  (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, NULL);
4971
  /* c->callback has to buffer_clear(c->buffer). */
4972
  /* return and wait to read more */
4973
0
  return 1;
4974
0
}
4975
4976
/** handle chunked data segment, return 0=fail, 1=wait, 2=process more */
4977
static int
4978
http_chunked_segment(struct comm_point* c)
4979
0
{
4980
  /* the c->buffer has from position..limit new data we read. */
4981
  /* the current chunk has length tcp_byte_count.
4982
   * once we read that read more chunk headers.
4983
   */
4984
0
  size_t remainbufferlen;
4985
0
  size_t got_now = sldns_buffer_limit(c->buffer) - c->http_stored;
4986
0
  verbose(VERB_ALGO, "http_chunked_segment: got now %d, tcpbytcount %d, http_stored %d, buffer pos %d, buffer limit %d", (int)got_now, (int)c->tcp_byte_count, (int)c->http_stored, (int)sldns_buffer_position(c->buffer), (int)sldns_buffer_limit(c->buffer));
4987
0
  if(c->tcp_byte_count <= got_now) {
4988
    /* the chunk has completed (with perhaps some extra data
4989
     * from next chunk header and next chunk) */
4990
    /* save too much info into temp buffer */
4991
0
    size_t fraglen;
4992
0
    struct comm_reply repinfo;
4993
0
    c->http_stored = 0;
4994
0
    sldns_buffer_skip(c->buffer, (ssize_t)c->tcp_byte_count);
4995
0
    sldns_buffer_clear(c->http_temp);
4996
0
    sldns_buffer_write(c->http_temp,
4997
0
      sldns_buffer_current(c->buffer),
4998
0
      sldns_buffer_remaining(c->buffer));
4999
0
    sldns_buffer_flip(c->http_temp);
5000
5001
    /* callback with this fragment */
5002
0
    fraglen = sldns_buffer_position(c->buffer);
5003
0
    sldns_buffer_set_position(c->buffer, 0);
5004
0
    sldns_buffer_set_limit(c->buffer, fraglen);
5005
0
    repinfo = c->repinfo;
5006
0
    fptr_ok(fptr_whitelist_comm_point(c->callback));
5007
0
    (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &repinfo);
5008
    /* c->callback has to buffer_clear(). */
5009
5010
    /* is commpoint deleted? */
5011
0
    if(!repinfo.c) {
5012
0
      return 1;
5013
0
    }
5014
    /* copy waiting info */
5015
0
    sldns_buffer_clear(c->buffer);
5016
0
    sldns_buffer_write(c->buffer,
5017
0
      sldns_buffer_begin(c->http_temp),
5018
0
      sldns_buffer_remaining(c->http_temp));
5019
0
    sldns_buffer_flip(c->buffer);
5020
    /* process end of chunk trailer header lines, until
5021
     * an empty line */
5022
0
    c->http_in_chunk_headers = 3;
5023
    /* process more data in buffer (if any) */
5024
0
    return 2;
5025
0
  }
5026
0
  c->tcp_byte_count -= got_now;
5027
5028
  /* if we have the buffer space,
5029
   * read more data collected into the buffer */
5030
0
  remainbufferlen = sldns_buffer_capacity(c->buffer) -
5031
0
    sldns_buffer_limit(c->buffer);
5032
0
  if(remainbufferlen >= c->tcp_byte_count ||
5033
0
    remainbufferlen >= 2048) {
5034
0
    size_t total = sldns_buffer_limit(c->buffer);
5035
0
    sldns_buffer_clear(c->buffer);
5036
0
    sldns_buffer_set_position(c->buffer, total);
5037
0
    c->http_stored = total;
5038
    /* return and wait to read more */
5039
0
    return 1;
5040
0
  }
5041
5042
  /* callback of http reader for a new part of the data */
5043
0
  c->http_stored = 0;
5044
0
  sldns_buffer_set_position(c->buffer, 0);
5045
0
  fptr_ok(fptr_whitelist_comm_point(c->callback));
5046
0
  (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, NULL);
5047
  /* c->callback has to buffer_clear(c->buffer). */
5048
  /* return and wait to read more */
5049
0
  return 1;
5050
0
}
5051
5052
#ifdef HAVE_NGHTTP2
5053
/** Create new http2 session. Called when creating handling comm point. */
5054
static struct http2_session* http2_session_create(struct comm_point* c)
5055
{
5056
  struct http2_session* session = calloc(1, sizeof(*session));
5057
  if(!session) {
5058
    log_err("malloc failure while creating http2 session");
5059
    return NULL;
5060
  }
5061
  session->c = c;
5062
5063
  return session;
5064
}
5065
#endif
5066
5067
/** Delete http2 session. After closing connection or on error */
5068
static void http2_session_delete(struct http2_session* h2_session)
5069
0
{
5070
#ifdef HAVE_NGHTTP2
5071
  if(h2_session->callbacks)
5072
    nghttp2_session_callbacks_del(h2_session->callbacks);
5073
  free(h2_session);
5074
#else
5075
0
  (void)h2_session;
5076
0
#endif
5077
0
}
5078
5079
#ifdef HAVE_NGHTTP2
5080
struct http2_stream* http2_stream_create(int32_t stream_id)
5081
{
5082
  struct http2_stream* h2_stream = calloc(1, sizeof(*h2_stream));
5083
  if(!h2_stream) {
5084
    log_err("malloc failure while creating http2 stream");
5085
    return NULL;
5086
  }
5087
  h2_stream->stream_id = stream_id;
5088
  return h2_stream;
5089
}
5090
#endif
5091
5092
void http2_stream_add_meshstate(struct http2_stream* h2_stream,
5093
  struct mesh_area* mesh, struct mesh_state* m)
5094
0
{
5095
0
  h2_stream->mesh = mesh;
5096
0
  h2_stream->mesh_state = m;
5097
0
}
5098
5099
void http2_stream_remove_mesh_state(struct http2_stream* h2_stream)
5100
0
{
5101
0
  if(!h2_stream)
5102
0
    return;
5103
0
  h2_stream->mesh_state = NULL;
5104
0
}
5105
5106
#ifdef HAVE_NGHTTP2
5107
void http2_session_add_stream(struct http2_session* h2_session,
5108
  struct http2_stream* h2_stream)
5109
{
5110
  if(h2_session->first_stream)
5111
    h2_session->first_stream->prev = h2_stream;
5112
  h2_stream->next = h2_session->first_stream;
5113
  h2_session->first_stream = h2_stream;
5114
}
5115
5116
/** remove stream from session linked list. After stream close callback or
5117
 * closing connection */
5118
static void http2_session_remove_stream(struct http2_session* h2_session,
5119
  struct http2_stream* h2_stream)
5120
{
5121
  if(h2_stream->prev)
5122
    h2_stream->prev->next = h2_stream->next;
5123
  else
5124
    h2_session->first_stream = h2_stream->next;
5125
  if(h2_stream->next)
5126
    h2_stream->next->prev = h2_stream->prev;
5127
5128
}
5129
5130
int http2_stream_close_cb(nghttp2_session* ATTR_UNUSED(session),
5131
  int32_t stream_id, uint32_t ATTR_UNUSED(error_code), void* cb_arg)
5132
{
5133
  struct http2_stream* h2_stream;
5134
  struct http2_session* h2_session = (struct http2_session*)cb_arg;
5135
  if(!(h2_stream = nghttp2_session_get_stream_user_data(
5136
    h2_session->session, stream_id))) {
5137
    return 0;
5138
  }
5139
  http2_session_remove_stream(h2_session, h2_stream);
5140
  http2_stream_delete(h2_session, h2_stream);
5141
  return 0;
5142
}
5143
5144
ssize_t http2_recv_cb(nghttp2_session* ATTR_UNUSED(session), uint8_t* buf,
5145
  size_t len, int ATTR_UNUSED(flags), void* cb_arg)
5146
{
5147
  struct http2_session* h2_session = (struct http2_session*)cb_arg;
5148
  ssize_t ret;
5149
5150
  log_assert(h2_session->c->type == comm_http);
5151
  log_assert(h2_session->c->h2_session);
5152
5153
#ifdef HAVE_SSL
5154
  if(h2_session->c->ssl) {
5155
    int r;
5156
    ERR_clear_error();
5157
    r = SSL_read(h2_session->c->ssl, buf, len);
5158
    if(r <= 0) {
5159
      int want = SSL_get_error(h2_session->c->ssl, r);
5160
      if(want == SSL_ERROR_ZERO_RETURN) {
5161
        return NGHTTP2_ERR_EOF;
5162
      } else if(want == SSL_ERROR_WANT_READ) {
5163
        return NGHTTP2_ERR_WOULDBLOCK;
5164
      } else if(want == SSL_ERROR_WANT_WRITE) {
5165
        h2_session->c->ssl_shake_state = comm_ssl_shake_hs_write;
5166
        comm_point_listen_for_rw(h2_session->c, 0, 1);
5167
        return NGHTTP2_ERR_WOULDBLOCK;
5168
      } else if(want == SSL_ERROR_SYSCALL) {
5169
#ifdef ECONNRESET
5170
        if(errno == ECONNRESET && verbosity < 2)
5171
          return NGHTTP2_ERR_CALLBACK_FAILURE;
5172
#endif
5173
        if(errno != 0)
5174
          log_err("SSL_read syscall: %s",
5175
            strerror(errno));
5176
        return NGHTTP2_ERR_CALLBACK_FAILURE;
5177
      }
5178
      log_crypto_err_io("could not SSL_read", want);
5179
      return NGHTTP2_ERR_CALLBACK_FAILURE;
5180
    }
5181
    return r;
5182
  }
5183
#endif /* HAVE_SSL */
5184
5185
  ret = recv(h2_session->c->fd, buf, len, MSG_DONTWAIT);
5186
  if(ret == 0) {
5187
    return NGHTTP2_ERR_EOF;
5188
  } else if(ret < 0) {
5189
#ifndef USE_WINSOCK
5190
    if(errno == EINTR || errno == EAGAIN)
5191
      return NGHTTP2_ERR_WOULDBLOCK;
5192
#ifdef ECONNRESET
5193
    if(errno == ECONNRESET && verbosity < 2)
5194
      return NGHTTP2_ERR_CALLBACK_FAILURE;
5195
#endif
5196
    log_err_addr("could not http2 recv: %s", strerror(errno),
5197
      &h2_session->c->repinfo.remote_addr,
5198
      h2_session->c->repinfo.remote_addrlen);
5199
#else /* USE_WINSOCK */
5200
    if(WSAGetLastError() == WSAECONNRESET)
5201
      return NGHTTP2_ERR_CALLBACK_FAILURE;
5202
    if(WSAGetLastError() == WSAEINPROGRESS)
5203
      return NGHTTP2_ERR_WOULDBLOCK;
5204
    if(WSAGetLastError() == WSAEWOULDBLOCK) {
5205
      ub_winsock_tcp_wouldblock(h2_session->c->ev->ev,
5206
        UB_EV_READ);
5207
      return NGHTTP2_ERR_WOULDBLOCK;
5208
    }
5209
    log_err_addr("could not http2 recv: %s",
5210
      wsa_strerror(WSAGetLastError()),
5211
      &h2_session->c->repinfo.remote_addr,
5212
      h2_session->c->repinfo.remote_addrlen);
5213
#endif
5214
    return NGHTTP2_ERR_CALLBACK_FAILURE;
5215
  }
5216
  return ret;
5217
}
5218
#endif /* HAVE_NGHTTP2 */
5219
5220
/** Handle http2 read */
5221
static int
5222
comm_point_http2_handle_read(int ATTR_UNUSED(fd), struct comm_point* c)
5223
0
{
5224
#ifdef HAVE_NGHTTP2
5225
  int ret;
5226
  log_assert(c->h2_session);
5227
5228
  /* reading until recv cb returns NGHTTP2_ERR_WOULDBLOCK */
5229
  ret = nghttp2_session_recv(c->h2_session->session);
5230
  if(ret) {
5231
    if(ret != NGHTTP2_ERR_EOF &&
5232
      ret != NGHTTP2_ERR_CALLBACK_FAILURE) {
5233
      char a[256];
5234
      addr_to_str(&c->repinfo.remote_addr,
5235
        c->repinfo.remote_addrlen, a, sizeof(a));
5236
      verbose(VERB_QUERY, "http2: session_recv from %s failed, "
5237
        "error: %s", a, nghttp2_strerror(ret));
5238
    }
5239
    return 0;
5240
  }
5241
  if(nghttp2_session_want_write(c->h2_session->session)) {
5242
    c->tcp_is_reading = 0;
5243
    comm_point_stop_listening(c);
5244
    comm_point_start_listening(c, -1, adjusted_tcp_timeout(c));
5245
  } else if(!nghttp2_session_want_read(c->h2_session->session))
5246
    return 0; /* connection can be closed */
5247
  return 1;
5248
#else
5249
0
  (void)c;
5250
0
  return 0;
5251
0
#endif
5252
0
}
5253
5254
/**
5255
 * Handle http reading callback.
5256
 * @param fd: file descriptor of socket.
5257
 * @param c: comm point to read from into buffer.
5258
 * @return: 0 on error
5259
 */
5260
static int
5261
comm_point_http_handle_read(int fd, struct comm_point* c)
5262
0
{
5263
0
  log_assert(c->type == comm_http);
5264
0
  log_assert(fd != -1);
5265
5266
  /* if we are in ssl handshake, handle SSL handshake */
5267
0
#ifdef HAVE_SSL
5268
0
  if(c->ssl && c->ssl_shake_state != comm_ssl_shake_none) {
5269
0
    if(!ssl_handshake(c))
5270
0
      return 0;
5271
0
    if(c->ssl_shake_state != comm_ssl_shake_none)
5272
0
      return 1;
5273
0
  }
5274
0
#endif /* HAVE_SSL */
5275
5276
0
  if(!c->tcp_is_reading)
5277
0
    return 1;
5278
5279
0
  if(c->use_h2) {
5280
0
    return comm_point_http2_handle_read(fd, c);
5281
0
  }
5282
5283
  /* http version is <= http/1.1 */
5284
5285
0
  if(c->http_min_version >= http_version_2) {
5286
    /* HTTP/2 failed, not allowed to use lower version. */
5287
0
    return 0;
5288
0
  }
5289
5290
  /* read more data */
5291
0
  if(c->ssl) {
5292
0
    if(!ssl_http_read_more(c))
5293
0
      return 0;
5294
0
  } else {
5295
0
    if(!http_read_more(fd, c))
5296
0
      return 0;
5297
0
  }
5298
5299
0
  if(c->http_stored >= sldns_buffer_position(c->buffer)) {
5300
    /* read did not work but we wanted more data, there is
5301
     * no bytes to process now. */
5302
0
    return 1;
5303
0
  }
5304
0
  sldns_buffer_flip(c->buffer);
5305
  /* if we are partway in a segment of data, position us at the point
5306
   * where we left off previously */
5307
0
  if(c->http_stored < sldns_buffer_limit(c->buffer))
5308
0
    sldns_buffer_set_position(c->buffer, c->http_stored);
5309
0
  else  sldns_buffer_set_position(c->buffer, sldns_buffer_limit(c->buffer));
5310
5311
0
  while(sldns_buffer_remaining(c->buffer) > 0) {
5312
    /* Handle HTTP/1.x data */
5313
    /* if we are reading headers, read more headers */
5314
0
    if(c->http_in_headers || c->http_in_chunk_headers) {
5315
      /* if header is done, process the header */
5316
0
      if(!http_header_done(c->buffer)) {
5317
        /* copy remaining data to front of buffer
5318
         * and set rest for writing into it */
5319
0
        http_moveover_buffer(c->buffer);
5320
        /* return and wait to read more */
5321
0
        return 1;
5322
0
      }
5323
0
      if(!c->http_in_chunk_headers) {
5324
        /* process initial headers */
5325
0
        if(!http_process_initial_header(c))
5326
0
          return 0;
5327
0
      } else {
5328
        /* process chunk headers */
5329
0
        int r = http_process_chunk_header(c);
5330
0
        if(r == 0) return 0;
5331
0
        if(r == 2) return 1; /* done */
5332
        /* r == 1, continue */
5333
0
      }
5334
      /* see if we have more to process */
5335
0
      continue;
5336
0
    }
5337
5338
0
    if(!c->http_is_chunked) {
5339
      /* if we are reading nonchunks, process that*/
5340
0
      return http_nonchunk_segment(c);
5341
0
    } else {
5342
      /* if we are reading chunks, read the chunk */
5343
0
      int r = http_chunked_segment(c);
5344
0
      if(r == 0) return 0;
5345
0
      if(r == 1) return 1;
5346
0
      continue;
5347
0
    }
5348
0
  }
5349
  /* broke out of the loop; could not process header instead need
5350
   * to read more */
5351
  /* moveover any remaining data and read more data */
5352
0
  http_moveover_buffer(c->buffer);
5353
  /* return and wait to read more */
5354
0
  return 1;
5355
0
}
5356
5357
/** check pending connect for http */
5358
static int
5359
http_check_connect(int fd, struct comm_point* c)
5360
0
{
5361
  /* check for pending error from nonblocking connect */
5362
  /* from Stevens, unix network programming, vol1, 3rd ed, p450*/
5363
0
  int error = 0;
5364
0
  socklen_t len = (socklen_t)sizeof(error);
5365
0
  if(getsockopt(fd, SOL_SOCKET, SO_ERROR, (void*)&error,
5366
0
    &len) < 0){
5367
0
#ifndef USE_WINSOCK
5368
0
    error = errno; /* on solaris errno is error */
5369
#else /* USE_WINSOCK */
5370
    error = WSAGetLastError();
5371
#endif
5372
0
  }
5373
0
#ifndef USE_WINSOCK
5374
0
#if defined(EINPROGRESS) && defined(EWOULDBLOCK)
5375
0
  if(error == EINPROGRESS || error == EWOULDBLOCK)
5376
0
    return 1; /* try again later */
5377
0
  else
5378
0
#endif
5379
0
  if(error != 0 && verbosity < 2)
5380
0
    return 0; /* silence lots of chatter in the logs */
5381
0
  else if(error != 0) {
5382
0
    log_err_addr("http connect", strerror(error),
5383
0
      &c->repinfo.remote_addr, c->repinfo.remote_addrlen);
5384
#else /* USE_WINSOCK */
5385
  /* examine error */
5386
  if(error == WSAEINPROGRESS)
5387
    return 1;
5388
  else if(error == WSAEWOULDBLOCK) {
5389
    ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
5390
    return 1;
5391
  } else if(error != 0 && verbosity < 2)
5392
    return 0;
5393
  else if(error != 0) {
5394
    log_err_addr("http connect", wsa_strerror(error),
5395
      &c->repinfo.remote_addr, c->repinfo.remote_addrlen);
5396
#endif /* USE_WINSOCK */
5397
0
    return 0;
5398
0
  }
5399
  /* keep on processing this socket */
5400
0
  return 2;
5401
0
}
5402
5403
/** write more data for http (with ssl) */
5404
static int
5405
ssl_http_write_more(struct comm_point* c)
5406
0
{
5407
0
#ifdef HAVE_SSL
5408
0
  int r;
5409
0
  log_assert(sldns_buffer_remaining(c->buffer) > 0);
5410
0
  ERR_clear_error();
5411
0
  r = SSL_write(c->ssl, (void*)sldns_buffer_current(c->buffer),
5412
0
    (int)sldns_buffer_remaining(c->buffer));
5413
0
  if(r <= 0) {
5414
0
    int want = SSL_get_error(c->ssl, r);
5415
0
    if(want == SSL_ERROR_ZERO_RETURN) {
5416
0
      return 0; /* closed */
5417
0
    } else if(want == SSL_ERROR_WANT_READ) {
5418
0
      c->ssl_shake_state = comm_ssl_shake_hs_read;
5419
0
      comm_point_listen_for_rw(c, 1, 0);
5420
0
      return 1; /* wait for read condition */
5421
0
    } else if(want == SSL_ERROR_WANT_WRITE) {
5422
0
      return 1; /* write more later */
5423
0
    } else if(want == SSL_ERROR_SYSCALL) {
5424
0
#ifdef EPIPE
5425
0
      if(errno == EPIPE && verbosity < 2)
5426
0
        return 0; /* silence 'broken pipe' */
5427
0
#endif
5428
0
      if(errno != 0)
5429
0
        log_err("SSL_write syscall: %s",
5430
0
          strerror(errno));
5431
0
      return 0;
5432
0
    }
5433
0
    log_crypto_err_io("could not SSL_write", want);
5434
0
    return 0;
5435
0
  }
5436
0
  sldns_buffer_skip(c->buffer, (ssize_t)r);
5437
0
  return 1;
5438
#else
5439
  (void)c;
5440
  return 0;
5441
#endif /* HAVE_SSL */
5442
0
}
5443
5444
/** write more data for http */
5445
static int
5446
http_write_more(int fd, struct comm_point* c)
5447
0
{
5448
0
  ssize_t r;
5449
0
  log_assert(sldns_buffer_remaining(c->buffer) > 0);
5450
0
  r = send(fd, (void*)sldns_buffer_current(c->buffer),
5451
0
    sldns_buffer_remaining(c->buffer), 0);
5452
0
  if(r == -1) {
5453
0
#ifndef USE_WINSOCK
5454
0
    if(errno == EINTR || errno == EAGAIN)
5455
0
      return 1;
5456
#else
5457
    if(WSAGetLastError() == WSAEINPROGRESS)
5458
      return 1;
5459
    if(WSAGetLastError() == WSAEWOULDBLOCK) {
5460
      ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
5461
      return 1;
5462
    }
5463
#endif
5464
0
    log_err_addr("http send r", sock_strerror(errno),
5465
0
      &c->repinfo.remote_addr, c->repinfo.remote_addrlen);
5466
0
    return 0;
5467
0
  }
5468
0
  sldns_buffer_skip(c->buffer, r);
5469
0
  return 1;
5470
0
}
5471
5472
#ifdef HAVE_NGHTTP2
5473
ssize_t http2_send_cb(nghttp2_session* ATTR_UNUSED(session), const uint8_t* buf,
5474
  size_t len, int ATTR_UNUSED(flags), void* cb_arg)
5475
{
5476
  ssize_t ret;
5477
  struct http2_session* h2_session = (struct http2_session*)cb_arg;
5478
  log_assert(h2_session->c->type == comm_http);
5479
  log_assert(h2_session->c->h2_session);
5480
5481
#ifdef HAVE_SSL
5482
  if(h2_session->c->ssl) {
5483
    int r;
5484
    ERR_clear_error();
5485
    r = SSL_write(h2_session->c->ssl, buf, len);
5486
    if(r <= 0) {
5487
      int want = SSL_get_error(h2_session->c->ssl, r);
5488
      if(want == SSL_ERROR_ZERO_RETURN) {
5489
        return NGHTTP2_ERR_CALLBACK_FAILURE;
5490
      } else if(want == SSL_ERROR_WANT_READ) {
5491
        h2_session->c->ssl_shake_state = comm_ssl_shake_hs_read;
5492
        comm_point_listen_for_rw(h2_session->c, 1, 0);
5493
        return NGHTTP2_ERR_WOULDBLOCK;
5494
      } else if(want == SSL_ERROR_WANT_WRITE) {
5495
        return NGHTTP2_ERR_WOULDBLOCK;
5496
      } else if(want == SSL_ERROR_SYSCALL) {
5497
#ifdef EPIPE
5498
        if(errno == EPIPE && verbosity < 2)
5499
          return NGHTTP2_ERR_CALLBACK_FAILURE;
5500
#endif
5501
        if(errno != 0)
5502
          log_err("SSL_write syscall: %s",
5503
            strerror(errno));
5504
        return NGHTTP2_ERR_CALLBACK_FAILURE;
5505
      }
5506
      log_crypto_err_io("could not SSL_write", want);
5507
      return NGHTTP2_ERR_CALLBACK_FAILURE;
5508
    }
5509
    return r;
5510
  }
5511
#endif /* HAVE_SSL */
5512
5513
  ret = send(h2_session->c->fd, buf, len, 0);
5514
  if(ret == 0) {
5515
    return NGHTTP2_ERR_CALLBACK_FAILURE;
5516
  } else if(ret < 0) {
5517
#ifndef USE_WINSOCK
5518
    if(errno == EINTR || errno == EAGAIN)
5519
      return NGHTTP2_ERR_WOULDBLOCK;
5520
#ifdef EPIPE
5521
    if(errno == EPIPE && verbosity < 2)
5522
      return NGHTTP2_ERR_CALLBACK_FAILURE;
5523
#endif
5524
#ifdef ECONNRESET
5525
    if(errno == ECONNRESET && verbosity < 2)
5526
      return NGHTTP2_ERR_CALLBACK_FAILURE;
5527
#endif
5528
    log_err_addr("could not http2 write: %s", strerror(errno),
5529
      &h2_session->c->repinfo.remote_addr,
5530
      h2_session->c->repinfo.remote_addrlen);
5531
#else /* USE_WINSOCK */
5532
    if(WSAGetLastError() == WSAENOTCONN)
5533
      return NGHTTP2_ERR_WOULDBLOCK;
5534
    if(WSAGetLastError() == WSAEINPROGRESS)
5535
      return NGHTTP2_ERR_WOULDBLOCK;
5536
    if(WSAGetLastError() == WSAEWOULDBLOCK) {
5537
      ub_winsock_tcp_wouldblock(h2_session->c->ev->ev,
5538
        UB_EV_WRITE);
5539
      return NGHTTP2_ERR_WOULDBLOCK;
5540
    }
5541
    if(WSAGetLastError() == WSAECONNRESET && verbosity < 2)
5542
      return NGHTTP2_ERR_CALLBACK_FAILURE;
5543
    log_err_addr("could not http2 write: %s",
5544
      wsa_strerror(WSAGetLastError()),
5545
      &h2_session->c->repinfo.remote_addr,
5546
      h2_session->c->repinfo.remote_addrlen);
5547
#endif
5548
    return NGHTTP2_ERR_CALLBACK_FAILURE;
5549
  }
5550
  return ret;
5551
}
5552
#endif /* HAVE_NGHTTP2 */
5553
5554
/** Handle http2 writing */
5555
static int
5556
comm_point_http2_handle_write(int ATTR_UNUSED(fd), struct comm_point* c)
5557
0
{
5558
#ifdef HAVE_NGHTTP2
5559
  int ret;
5560
  log_assert(c->h2_session);
5561
5562
  ret = nghttp2_session_send(c->h2_session->session);
5563
  if(ret) {
5564
    verbose(VERB_QUERY, "http2: session_send failed, "
5565
      "error: %s", nghttp2_strerror(ret));
5566
    return 0;
5567
  }
5568
5569
  if(nghttp2_session_want_read(c->h2_session->session)) {
5570
    c->tcp_is_reading = 1;
5571
    comm_point_stop_listening(c);
5572
    comm_point_start_listening(c, -1, adjusted_tcp_timeout(c));
5573
  } else if(!nghttp2_session_want_write(c->h2_session->session))
5574
    return 0; /* connection can be closed */
5575
  return 1;
5576
#else
5577
0
  (void)c;
5578
0
  return 0;
5579
0
#endif
5580
0
}
5581
5582
/**
5583
 * Handle http writing callback.
5584
 * @param fd: file descriptor of socket.
5585
 * @param c: comm point to write buffer out of.
5586
 * @return: 0 on error
5587
 */
5588
static int
5589
comm_point_http_handle_write(int fd, struct comm_point* c)
5590
0
{
5591
0
  log_assert(c->type == comm_http);
5592
0
  log_assert(fd != -1);
5593
5594
  /* check pending connect errors, if that fails, we wait for more,
5595
   * or we can continue to write contents */
5596
0
  if(c->tcp_check_nb_connect) {
5597
0
    int r = http_check_connect(fd, c);
5598
0
    if(r == 0) return 0;
5599
0
    if(r == 1) return 1;
5600
0
    c->tcp_check_nb_connect = 0;
5601
0
  }
5602
  /* if we are in ssl handshake, handle SSL handshake */
5603
0
#ifdef HAVE_SSL
5604
0
  if(c->ssl && c->ssl_shake_state != comm_ssl_shake_none) {
5605
0
    if(!ssl_handshake(c))
5606
0
      return 0;
5607
0
    if(c->ssl_shake_state != comm_ssl_shake_none)
5608
0
      return 1;
5609
0
  }
5610
0
#endif /* HAVE_SSL */
5611
0
  if(c->tcp_is_reading)
5612
0
    return 1;
5613
5614
0
  if(c->use_h2) {
5615
0
    return comm_point_http2_handle_write(fd, c);
5616
0
  }
5617
5618
  /* http version is <= http/1.1 */
5619
5620
0
  if(c->http_min_version >= http_version_2) {
5621
    /* HTTP/2 failed, not allowed to use lower version. */
5622
0
    return 0;
5623
0
  }
5624
5625
  /* if we are writing, write more */
5626
0
  if(c->ssl) {
5627
0
    if(!ssl_http_write_more(c))
5628
0
      return 0;
5629
0
  } else {
5630
0
    if(!http_write_more(fd, c))
5631
0
      return 0;
5632
0
  }
5633
5634
  /* we write a single buffer contents, that can contain
5635
   * the http request, and then flip to read the results */
5636
  /* see if write is done */
5637
0
  if(sldns_buffer_remaining(c->buffer) == 0) {
5638
0
    sldns_buffer_clear(c->buffer);
5639
0
    if(c->tcp_do_toggle_rw)
5640
0
      c->tcp_is_reading = 1;
5641
0
    c->tcp_byte_count = 0;
5642
    /* switch from listening(write) to listening(read) */
5643
0
    comm_point_stop_listening(c);
5644
0
    comm_point_start_listening(c, -1, -1);
5645
0
  }
5646
0
  return 1;
5647
0
}
5648
5649
void
5650
comm_point_http_handle_callback(int fd, short event, void* arg)
5651
0
{
5652
0
  struct comm_point* c = (struct comm_point*)arg;
5653
0
  log_assert(c->type == comm_http);
5654
0
  ub_comm_base_now(c->ev->base);
5655
5656
0
  if((event&UB_EV_TIMEOUT)) {
5657
0
    verbose(VERB_QUERY, "http took too long, dropped");
5658
0
    reclaim_http_handler(c);
5659
0
    if(!c->tcp_do_close) {
5660
0
      fptr_ok(fptr_whitelist_comm_point(c->callback));
5661
0
      (void)(*c->callback)(c, c->cb_arg,
5662
0
        NETEVENT_TIMEOUT, NULL);
5663
0
    }
5664
0
    return;
5665
0
  }
5666
0
  if((event&UB_EV_READ)) {
5667
0
    if(!comm_point_http_handle_read(fd, c)) {
5668
0
      reclaim_http_handler(c);
5669
0
      if(!c->tcp_do_close) {
5670
0
        fptr_ok(fptr_whitelist_comm_point(
5671
0
          c->callback));
5672
0
        (void)(*c->callback)(c, c->cb_arg,
5673
0
          NETEVENT_CLOSED, NULL);
5674
0
      }
5675
0
    }
5676
0
    return;
5677
0
  }
5678
0
  if((event&UB_EV_WRITE)) {
5679
0
    if(!comm_point_http_handle_write(fd, c)) {
5680
0
      reclaim_http_handler(c);
5681
0
      if(!c->tcp_do_close) {
5682
0
        fptr_ok(fptr_whitelist_comm_point(
5683
0
          c->callback));
5684
0
        (void)(*c->callback)(c, c->cb_arg,
5685
0
          NETEVENT_CLOSED, NULL);
5686
0
      }
5687
0
    }
5688
0
    return;
5689
0
  }
5690
0
  log_err("Ignored event %d for httphdl.", event);
5691
0
}
5692
5693
void comm_point_local_handle_callback(int fd, short event, void* arg)
5694
0
{
5695
0
  struct comm_point* c = (struct comm_point*)arg;
5696
0
  log_assert(c->type == comm_local);
5697
0
  ub_comm_base_now(c->ev->base);
5698
5699
0
  if((event&UB_EV_READ)) {
5700
0
    if(!comm_point_tcp_handle_read(fd, c, 1)) {
5701
0
      fptr_ok(fptr_whitelist_comm_point(c->callback));
5702
0
      (void)(*c->callback)(c, c->cb_arg, NETEVENT_CLOSED,
5703
0
        NULL);
5704
0
    }
5705
0
    return;
5706
0
  }
5707
0
  log_err("Ignored event %d for localhdl.", event);
5708
0
}
5709
5710
void comm_point_raw_handle_callback(int ATTR_UNUSED(fd),
5711
  short event, void* arg)
5712
0
{
5713
0
  struct comm_point* c = (struct comm_point*)arg;
5714
0
  int err = NETEVENT_NOERROR;
5715
0
  log_assert(c->type == comm_raw);
5716
0
  ub_comm_base_now(c->ev->base);
5717
5718
0
  if((event&UB_EV_TIMEOUT))
5719
0
    err = NETEVENT_TIMEOUT;
5720
0
  fptr_ok(fptr_whitelist_comm_point_raw(c->callback));
5721
0
  (void)(*c->callback)(c, c->cb_arg, err, NULL);
5722
0
}
5723
5724
struct comm_point*
5725
comm_point_create_udp(struct comm_base *base, int fd, sldns_buffer* buffer,
5726
  int pp2_enabled, comm_point_callback_type* callback,
5727
  void* callback_arg, struct unbound_socket* socket)
5728
0
{
5729
0
  struct comm_point* c = (struct comm_point*)calloc(1,
5730
0
    sizeof(struct comm_point));
5731
0
  short evbits;
5732
0
  if(!c)
5733
0
    return NULL;
5734
0
  c->ev = (struct internal_event*)calloc(1,
5735
0
    sizeof(struct internal_event));
5736
0
  if(!c->ev) {
5737
0
    free(c);
5738
0
    return NULL;
5739
0
  }
5740
0
  c->ev->base = base;
5741
0
  c->fd = fd;
5742
0
  c->buffer = buffer;
5743
0
  c->timeout = NULL;
5744
0
  c->tcp_is_reading = 0;
5745
0
  c->tcp_byte_count = 0;
5746
0
  c->tcp_parent = NULL;
5747
0
  c->max_tcp_count = 0;
5748
0
  c->cur_tcp_count = 0;
5749
0
  c->tcp_handlers = NULL;
5750
0
  c->tcp_free = NULL;
5751
0
  c->type = comm_udp;
5752
0
  c->tcp_do_close = 0;
5753
0
  c->do_not_close = 0;
5754
0
  c->tcp_do_toggle_rw = 0;
5755
0
  c->tcp_check_nb_connect = 0;
5756
#ifdef USE_MSG_FASTOPEN
5757
  c->tcp_do_fastopen = 0;
5758
#endif
5759
#ifdef USE_DNSCRYPT
5760
  c->dnscrypt = 0;
5761
  c->dnscrypt_buffer = buffer;
5762
#endif
5763
0
  c->inuse = 0;
5764
0
  c->callback = callback;
5765
0
  c->cb_arg = callback_arg;
5766
0
  c->socket = socket;
5767
0
  c->pp2_enabled = pp2_enabled;
5768
0
  c->pp2_header_state = pp2_header_none;
5769
0
  evbits = UB_EV_READ | UB_EV_PERSIST;
5770
  /* ub_event stuff */
5771
0
  c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
5772
0
    comm_point_udp_callback, c);
5773
0
  if(c->ev->ev == NULL) {
5774
0
    log_err("could not baseset udp event");
5775
0
    comm_point_delete(c);
5776
0
    return NULL;
5777
0
  }
5778
0
  if(fd!=-1 && ub_event_add(c->ev->ev, c->timeout) != 0 ) {
5779
0
    log_err("could not add udp event");
5780
0
    comm_point_delete(c);
5781
0
    return NULL;
5782
0
  }
5783
0
  c->event_added = 1;
5784
0
  return c;
5785
0
}
5786
5787
#if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_RECVMSG)
5788
struct comm_point*
5789
comm_point_create_udp_ancil(struct comm_base *base, int fd,
5790
  sldns_buffer* buffer, int pp2_enabled,
5791
  comm_point_callback_type* callback, void* callback_arg, struct unbound_socket* socket)
5792
0
{
5793
0
  struct comm_point* c = (struct comm_point*)calloc(1,
5794
0
    sizeof(struct comm_point));
5795
0
  short evbits;
5796
0
  if(!c)
5797
0
    return NULL;
5798
0
  c->ev = (struct internal_event*)calloc(1,
5799
0
    sizeof(struct internal_event));
5800
0
  if(!c->ev) {
5801
0
    free(c);
5802
0
    return NULL;
5803
0
  }
5804
0
  c->ev->base = base;
5805
0
  c->fd = fd;
5806
0
  c->buffer = buffer;
5807
0
  c->timeout = NULL;
5808
0
  c->tcp_is_reading = 0;
5809
0
  c->tcp_byte_count = 0;
5810
0
  c->tcp_parent = NULL;
5811
0
  c->max_tcp_count = 0;
5812
0
  c->cur_tcp_count = 0;
5813
0
  c->tcp_handlers = NULL;
5814
0
  c->tcp_free = NULL;
5815
0
  c->type = comm_udp;
5816
0
  c->tcp_do_close = 0;
5817
0
  c->do_not_close = 0;
5818
#ifdef USE_DNSCRYPT
5819
  c->dnscrypt = 0;
5820
  c->dnscrypt_buffer = buffer;
5821
#endif
5822
0
  c->inuse = 0;
5823
0
  c->tcp_do_toggle_rw = 0;
5824
0
  c->tcp_check_nb_connect = 0;
5825
#ifdef USE_MSG_FASTOPEN
5826
  c->tcp_do_fastopen = 0;
5827
#endif
5828
0
  c->callback = callback;
5829
0
  c->cb_arg = callback_arg;
5830
0
  c->socket = socket;
5831
0
  c->pp2_enabled = pp2_enabled;
5832
0
  c->pp2_header_state = pp2_header_none;
5833
0
  evbits = UB_EV_READ | UB_EV_PERSIST;
5834
  /* ub_event stuff */
5835
0
  c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
5836
0
    comm_point_udp_ancil_callback, c);
5837
0
  if(c->ev->ev == NULL) {
5838
0
    log_err("could not baseset udp event");
5839
0
    comm_point_delete(c);
5840
0
    return NULL;
5841
0
  }
5842
0
  if(fd!=-1 && ub_event_add(c->ev->ev, c->timeout) != 0 ) {
5843
0
    log_err("could not add udp event");
5844
0
    comm_point_delete(c);
5845
0
    return NULL;
5846
0
  }
5847
0
  c->event_added = 1;
5848
0
  return c;
5849
0
}
5850
#endif
5851
5852
struct comm_point*
5853
comm_point_create_doq(struct comm_base *base, int fd, sldns_buffer* buffer,
5854
  comm_point_callback_type* callback, void* callback_arg,
5855
  struct unbound_socket* socket, struct doq_table* table,
5856
  struct ub_randstate* rnd, const void* quic_sslctx,
5857
  struct config_file* cfg)
5858
0
{
5859
#ifdef HAVE_NGTCP2
5860
  struct comm_point* c = (struct comm_point*)calloc(1,
5861
    sizeof(struct comm_point));
5862
  short evbits;
5863
  if(!c)
5864
    return NULL;
5865
  c->ev = (struct internal_event*)calloc(1,
5866
    sizeof(struct internal_event));
5867
  if(!c->ev) {
5868
    free(c);
5869
    return NULL;
5870
  }
5871
  c->ev->base = base;
5872
  c->fd = fd;
5873
  c->buffer = buffer;
5874
  c->timeout = NULL;
5875
  c->tcp_is_reading = 0;
5876
  c->tcp_byte_count = 0;
5877
  c->tcp_parent = NULL;
5878
  c->max_tcp_count = 0;
5879
  c->cur_tcp_count = 0;
5880
  c->tcp_handlers = NULL;
5881
  c->tcp_free = NULL;
5882
  c->type = comm_doq;
5883
  c->tcp_do_close = 0;
5884
  c->do_not_close = 0;
5885
  c->tcp_do_toggle_rw = 0;
5886
  c->tcp_check_nb_connect = 0;
5887
#ifdef USE_MSG_FASTOPEN
5888
  c->tcp_do_fastopen = 0;
5889
#endif
5890
#ifdef USE_DNSCRYPT
5891
  c->dnscrypt = 0;
5892
  c->dnscrypt_buffer = NULL;
5893
#endif
5894
  c->doq_socket = doq_server_socket_create(table, rnd, quic_sslctx, c,
5895
    base, cfg);
5896
  if(!c->doq_socket) {
5897
    log_err("could not create doq comm_point");
5898
    comm_point_delete(c);
5899
    return NULL;
5900
  }
5901
  c->inuse = 0;
5902
  c->callback = callback;
5903
  c->cb_arg = callback_arg;
5904
  c->socket = socket;
5905
  c->pp2_enabled = 0;
5906
  c->pp2_header_state = pp2_header_none;
5907
  evbits = UB_EV_READ | UB_EV_PERSIST;
5908
  /* ub_event stuff */
5909
  c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
5910
    comm_point_doq_callback, c);
5911
  if(c->ev->ev == NULL) {
5912
    log_err("could not baseset udp event");
5913
    comm_point_delete(c);
5914
    return NULL;
5915
  }
5916
  if(fd!=-1 && ub_event_add(c->ev->ev, c->timeout) != 0 ) {
5917
    log_err("could not add udp event");
5918
    comm_point_delete(c);
5919
    return NULL;
5920
  }
5921
  c->event_added = 1;
5922
  return c;
5923
#else
5924
  /* no libngtcp2, so no QUIC support */
5925
0
  (void)base;
5926
0
  (void)buffer;
5927
0
  (void)callback;
5928
0
  (void)callback_arg;
5929
0
  (void)socket;
5930
0
  (void)rnd;
5931
0
  (void)table;
5932
0
  (void)quic_sslctx;
5933
0
  (void)cfg;
5934
0
  sock_close(fd);
5935
0
  return NULL;
5936
0
#endif /* HAVE_NGTCP2 */
5937
0
}
5938
5939
static struct comm_point*
5940
comm_point_create_tcp_handler(struct comm_base *base,
5941
  struct comm_point* parent, size_t bufsize,
5942
  struct sldns_buffer* spoolbuf, comm_point_callback_type* callback,
5943
  void* callback_arg, struct unbound_socket* socket)
5944
0
{
5945
0
  struct comm_point* c = (struct comm_point*)calloc(1,
5946
0
    sizeof(struct comm_point));
5947
0
  short evbits;
5948
0
  if(!c)
5949
0
    return NULL;
5950
0
  c->ev = (struct internal_event*)calloc(1,
5951
0
    sizeof(struct internal_event));
5952
0
  if(!c->ev) {
5953
0
    free(c);
5954
0
    return NULL;
5955
0
  }
5956
0
  c->ev->base = base;
5957
0
  c->fd = -1;
5958
0
  c->buffer = sldns_buffer_new(bufsize);
5959
0
  if(!c->buffer) {
5960
0
    free(c->ev);
5961
0
    free(c);
5962
0
    return NULL;
5963
0
  }
5964
0
  c->timeout = (struct timeval*)malloc(sizeof(struct timeval));
5965
0
  if(!c->timeout) {
5966
0
    sldns_buffer_free(c->buffer);
5967
0
    free(c->ev);
5968
0
    free(c);
5969
0
    return NULL;
5970
0
  }
5971
0
  c->tcp_is_reading = 0;
5972
0
  c->tcp_byte_count = 0;
5973
0
  c->tcp_parent = parent;
5974
0
  c->tcp_timeout_msec = parent->tcp_timeout_msec;
5975
0
  c->tcp_conn_limit = parent->tcp_conn_limit;
5976
0
  c->tcl_addr = NULL;
5977
0
  c->tcp_keepalive = 0;
5978
0
  c->max_tcp_count = 0;
5979
0
  c->cur_tcp_count = 0;
5980
0
  c->tcp_handlers = NULL;
5981
0
  c->tcp_free = NULL;
5982
0
  c->type = comm_tcp;
5983
0
  c->tcp_do_close = 0;
5984
0
  c->do_not_close = 0;
5985
0
  c->tcp_do_toggle_rw = 1;
5986
0
  c->tcp_check_nb_connect = 0;
5987
#ifdef USE_MSG_FASTOPEN
5988
  c->tcp_do_fastopen = 0;
5989
#endif
5990
#ifdef USE_DNSCRYPT
5991
  c->dnscrypt = 0;
5992
  /* We don't know just yet if this is a dnscrypt channel. Allocation
5993
   * will be done when handling the callback. */
5994
  c->dnscrypt_buffer = c->buffer;
5995
#endif
5996
0
  c->repinfo.c = c;
5997
0
  c->callback = callback;
5998
0
  c->cb_arg = callback_arg;
5999
0
  c->socket = socket;
6000
0
  c->pp2_enabled = parent->pp2_enabled;
6001
0
  c->pp2_header_state = pp2_header_none;
6002
0
  if(spoolbuf) {
6003
0
    c->tcp_req_info = tcp_req_info_create(spoolbuf);
6004
0
    if(!c->tcp_req_info) {
6005
0
      log_err("could not create tcp commpoint");
6006
0
      sldns_buffer_free(c->buffer);
6007
0
      free(c->timeout);
6008
0
      free(c->ev);
6009
0
      free(c);
6010
0
      return NULL;
6011
0
    }
6012
0
    c->tcp_req_info->cp = c;
6013
0
    c->tcp_do_close = 1;
6014
0
    c->tcp_do_toggle_rw = 0;
6015
0
  }
6016
  /* add to parent free list */
6017
0
  c->tcp_free = parent->tcp_free;
6018
0
  parent->tcp_free = c;
6019
  /* ub_event stuff */
6020
0
  evbits = UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT;
6021
0
  c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
6022
0
    comm_point_tcp_handle_callback, c);
6023
0
  if(c->ev->ev == NULL)
6024
0
  {
6025
0
    log_err("could not basetset tcphdl event");
6026
0
    parent->tcp_free = c->tcp_free;
6027
0
    tcp_req_info_delete(c->tcp_req_info);
6028
0
    sldns_buffer_free(c->buffer);
6029
0
    free(c->timeout);
6030
0
    free(c->ev);
6031
0
    free(c);
6032
0
    return NULL;
6033
0
  }
6034
0
  return c;
6035
0
}
6036
6037
static struct comm_point*
6038
comm_point_create_http_handler(struct comm_base *base,
6039
  struct comm_point* parent, size_t bufsize, int harden_large_queries,
6040
  uint32_t http_max_streams, char* http_endpoint,
6041
  comm_point_callback_type* callback, void* callback_arg,
6042
  struct unbound_socket* socket)
6043
0
{
6044
0
  struct comm_point* c = (struct comm_point*)calloc(1,
6045
0
    sizeof(struct comm_point));
6046
0
  short evbits;
6047
0
  if(!c)
6048
0
    return NULL;
6049
0
  c->ev = (struct internal_event*)calloc(1,
6050
0
    sizeof(struct internal_event));
6051
0
  if(!c->ev) {
6052
0
    free(c);
6053
0
    return NULL;
6054
0
  }
6055
0
  c->ev->base = base;
6056
0
  c->fd = -1;
6057
0
  c->buffer = sldns_buffer_new(bufsize);
6058
0
  if(!c->buffer) {
6059
0
    free(c->ev);
6060
0
    free(c);
6061
0
    return NULL;
6062
0
  }
6063
0
  c->timeout = (struct timeval*)malloc(sizeof(struct timeval));
6064
0
  if(!c->timeout) {
6065
0
    sldns_buffer_free(c->buffer);
6066
0
    free(c->ev);
6067
0
    free(c);
6068
0
    return NULL;
6069
0
  }
6070
0
  c->tcp_is_reading = 0;
6071
0
  c->tcp_byte_count = 0;
6072
0
  c->tcp_parent = parent;
6073
0
  c->tcp_timeout_msec = parent->tcp_timeout_msec;
6074
0
  c->tcp_conn_limit = parent->tcp_conn_limit;
6075
0
  c->tcl_addr = NULL;
6076
0
  c->tcp_keepalive = 0;
6077
0
  c->max_tcp_count = 0;
6078
0
  c->cur_tcp_count = 0;
6079
0
  c->tcp_handlers = NULL;
6080
0
  c->tcp_free = NULL;
6081
0
  c->type = comm_http;
6082
0
  c->tcp_do_close = 1;
6083
0
  c->do_not_close = 0;
6084
0
  c->tcp_do_toggle_rw = 1; /* will be set to 0 after http2 upgrade */
6085
0
  c->tcp_check_nb_connect = 0;
6086
#ifdef USE_MSG_FASTOPEN
6087
  c->tcp_do_fastopen = 0;
6088
#endif
6089
#ifdef USE_DNSCRYPT
6090
  c->dnscrypt = 0;
6091
  c->dnscrypt_buffer = NULL;
6092
#endif
6093
0
  c->repinfo.c = c;
6094
0
  c->callback = callback;
6095
0
  c->cb_arg = callback_arg;
6096
0
  c->socket = socket;
6097
0
  c->pp2_enabled = 0;
6098
0
  c->pp2_header_state = pp2_header_none;
6099
6100
0
  c->http_min_version = http_version_2;
6101
0
  c->http2_stream_max_qbuffer_size = bufsize;
6102
0
  if(harden_large_queries && bufsize > 512)
6103
0
    c->http2_stream_max_qbuffer_size = 512;
6104
0
  c->http2_max_streams = http_max_streams;
6105
0
  if(!(c->http_endpoint = strdup(http_endpoint))) {
6106
0
    log_err("could not strdup http_endpoint");
6107
0
    sldns_buffer_free(c->buffer);
6108
0
    free(c->timeout);
6109
0
    free(c->ev);
6110
0
    free(c);
6111
0
    return NULL;
6112
0
  }
6113
0
  c->use_h2 = 0;
6114
#ifdef HAVE_NGHTTP2
6115
  if(!(c->h2_session = http2_session_create(c))) {
6116
    log_err("could not create http2 session");
6117
    free(c->http_endpoint);
6118
    sldns_buffer_free(c->buffer);
6119
    free(c->timeout);
6120
    free(c->ev);
6121
    free(c);
6122
    return NULL;
6123
  }
6124
  if(!(c->h2_session->callbacks = http2_req_callbacks_create())) {
6125
    log_err("could not create http2 callbacks");
6126
    http2_session_delete(c->h2_session);
6127
    free(c->http_endpoint);
6128
    sldns_buffer_free(c->buffer);
6129
    free(c->timeout);
6130
    free(c->ev);
6131
    free(c);
6132
    return NULL;
6133
  }
6134
#endif
6135
6136
  /* add to parent free list */
6137
0
  c->tcp_free = parent->tcp_free;
6138
0
  parent->tcp_free = c;
6139
  /* ub_event stuff */
6140
0
  evbits = UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT;
6141
0
  c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
6142
0
    comm_point_http_handle_callback, c);
6143
0
  if(c->ev->ev == NULL)
6144
0
  {
6145
0
    log_err("could not set http handler event");
6146
0
    parent->tcp_free = c->tcp_free;
6147
0
    http2_session_delete(c->h2_session);
6148
0
    sldns_buffer_free(c->buffer);
6149
0
    free(c->timeout);
6150
0
    free(c->ev);
6151
0
    free(c);
6152
0
    return NULL;
6153
0
  }
6154
0
  return c;
6155
0
}
6156
6157
struct comm_point*
6158
comm_point_create_tcp(struct comm_base *base, int fd, int num,
6159
  int idle_timeout, int harden_large_queries,
6160
  uint32_t http_max_streams, char* http_endpoint,
6161
  struct tcl_list* tcp_conn_limit, size_t bufsize,
6162
  struct sldns_buffer* spoolbuf, enum listen_type port_type,
6163
  int pp2_enabled, comm_point_callback_type* callback,
6164
  void* callback_arg, struct unbound_socket* socket)
6165
0
{
6166
0
  struct comm_point* c = (struct comm_point*)calloc(1,
6167
0
    sizeof(struct comm_point));
6168
0
  short evbits;
6169
0
  int i;
6170
  /* first allocate the TCP accept listener */
6171
0
  if(!c)
6172
0
    return NULL;
6173
0
  c->ev = (struct internal_event*)calloc(1,
6174
0
    sizeof(struct internal_event));
6175
0
  if(!c->ev) {
6176
0
    free(c);
6177
0
    return NULL;
6178
0
  }
6179
0
  c->ev->base = base;
6180
0
  c->fd = fd;
6181
0
  c->buffer = NULL;
6182
0
  c->timeout = NULL;
6183
0
  c->tcp_is_reading = 0;
6184
0
  c->tcp_byte_count = 0;
6185
0
  c->tcp_timeout_msec = idle_timeout;
6186
0
  c->tcp_conn_limit = tcp_conn_limit;
6187
0
  c->tcl_addr = NULL;
6188
0
  c->tcp_keepalive = 0;
6189
0
  c->tcp_parent = NULL;
6190
0
  c->max_tcp_count = num;
6191
0
  c->cur_tcp_count = 0;
6192
0
  c->tcp_handlers = (struct comm_point**)calloc((size_t)num,
6193
0
    sizeof(struct comm_point*));
6194
0
  if(!c->tcp_handlers) {
6195
0
    free(c->ev);
6196
0
    free(c);
6197
0
    return NULL;
6198
0
  }
6199
0
  c->tcp_free = NULL;
6200
0
  c->type = comm_tcp_accept;
6201
0
  c->tcp_do_close = 0;
6202
0
  c->do_not_close = 0;
6203
0
  c->tcp_do_toggle_rw = 0;
6204
0
  c->tcp_check_nb_connect = 0;
6205
#ifdef USE_MSG_FASTOPEN
6206
  c->tcp_do_fastopen = 0;
6207
#endif
6208
#ifdef USE_DNSCRYPT
6209
  c->dnscrypt = 0;
6210
  c->dnscrypt_buffer = NULL;
6211
#endif
6212
0
  c->callback = NULL;
6213
0
  c->cb_arg = NULL;
6214
0
  c->socket = socket;
6215
0
  c->pp2_enabled = (port_type==listen_type_http?0:pp2_enabled);
6216
0
  c->pp2_header_state = pp2_header_none;
6217
0
  evbits = UB_EV_READ | UB_EV_PERSIST;
6218
  /* ub_event stuff */
6219
0
  c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
6220
0
    comm_point_tcp_accept_callback, c);
6221
0
  if(c->ev->ev == NULL) {
6222
0
    log_err("could not baseset tcpacc event");
6223
0
    comm_point_delete(c);
6224
0
    return NULL;
6225
0
  }
6226
0
  if (ub_event_add(c->ev->ev, c->timeout) != 0) {
6227
0
    log_err("could not add tcpacc event");
6228
0
    comm_point_delete(c);
6229
0
    return NULL;
6230
0
  }
6231
0
  c->event_added = 1;
6232
  /* now prealloc the handlers */
6233
0
  for(i=0; i<num; i++) {
6234
0
    if(port_type == listen_type_tcp ||
6235
0
      port_type == listen_type_ssl ||
6236
0
      port_type == listen_type_tcp_dnscrypt) {
6237
0
      c->tcp_handlers[i] = comm_point_create_tcp_handler(base,
6238
0
        c, bufsize, spoolbuf, callback, callback_arg, socket);
6239
0
    } else if(port_type == listen_type_http) {
6240
0
      c->tcp_handlers[i] = comm_point_create_http_handler(
6241
0
        base, c, bufsize, harden_large_queries,
6242
0
        http_max_streams, http_endpoint,
6243
0
        callback, callback_arg, socket);
6244
0
    }
6245
0
    else {
6246
0
      log_err("could not create tcp handler, unknown listen "
6247
0
        "type");
6248
0
      return NULL;
6249
0
    }
6250
0
    if(!c->tcp_handlers[i]) {
6251
0
      comm_point_delete(c);
6252
0
      return NULL;
6253
0
    }
6254
0
  }
6255
6256
0
  return c;
6257
0
}
6258
6259
struct comm_point*
6260
comm_point_create_tcp_out(struct comm_base *base, size_t bufsize,
6261
        comm_point_callback_type* callback, void* callback_arg)
6262
0
{
6263
0
  struct comm_point* c = (struct comm_point*)calloc(1,
6264
0
    sizeof(struct comm_point));
6265
0
  short evbits;
6266
0
  if(!c)
6267
0
    return NULL;
6268
0
  c->ev = (struct internal_event*)calloc(1,
6269
0
    sizeof(struct internal_event));
6270
0
  if(!c->ev) {
6271
0
    free(c);
6272
0
    return NULL;
6273
0
  }
6274
0
  c->ev->base = base;
6275
0
  c->fd = -1;
6276
0
  c->buffer = sldns_buffer_new(bufsize);
6277
0
  if(!c->buffer) {
6278
0
    free(c->ev);
6279
0
    free(c);
6280
0
    return NULL;
6281
0
  }
6282
0
  c->timeout = NULL;
6283
0
  c->tcp_is_reading = 0;
6284
0
  c->tcp_byte_count = 0;
6285
0
  c->tcp_timeout_msec = TCP_QUERY_TIMEOUT;
6286
0
  c->tcp_conn_limit = NULL;
6287
0
  c->tcl_addr = NULL;
6288
0
  c->tcp_keepalive = 0;
6289
0
  c->tcp_parent = NULL;
6290
0
  c->max_tcp_count = 0;
6291
0
  c->cur_tcp_count = 0;
6292
0
  c->tcp_handlers = NULL;
6293
0
  c->tcp_free = NULL;
6294
0
  c->type = comm_tcp;
6295
0
  c->tcp_do_close = 0;
6296
0
  c->do_not_close = 0;
6297
0
  c->tcp_do_toggle_rw = 1;
6298
0
  c->tcp_check_nb_connect = 1;
6299
#ifdef USE_MSG_FASTOPEN
6300
  c->tcp_do_fastopen = 1;
6301
#endif
6302
#ifdef USE_DNSCRYPT
6303
  c->dnscrypt = 0;
6304
  c->dnscrypt_buffer = c->buffer;
6305
#endif
6306
0
  c->repinfo.c = c;
6307
0
  c->callback = callback;
6308
0
  c->cb_arg = callback_arg;
6309
0
  c->pp2_enabled = 0;
6310
0
  c->pp2_header_state = pp2_header_none;
6311
0
  evbits = UB_EV_PERSIST | UB_EV_WRITE;
6312
0
  c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
6313
0
    comm_point_tcp_handle_callback, c);
6314
0
  if(c->ev->ev == NULL)
6315
0
  {
6316
0
    log_err("could not baseset tcpout event");
6317
0
    sldns_buffer_free(c->buffer);
6318
0
    free(c->ev);
6319
0
    free(c);
6320
0
    return NULL;
6321
0
  }
6322
6323
0
  return c;
6324
0
}
6325
6326
struct comm_point*
6327
comm_point_create_http_out(struct comm_base *base, size_t bufsize,
6328
        comm_point_callback_type* callback, void* callback_arg,
6329
  sldns_buffer* temp)
6330
0
{
6331
0
  struct comm_point* c = (struct comm_point*)calloc(1,
6332
0
    sizeof(struct comm_point));
6333
0
  short evbits;
6334
0
  if(!c)
6335
0
    return NULL;
6336
0
  c->ev = (struct internal_event*)calloc(1,
6337
0
    sizeof(struct internal_event));
6338
0
  if(!c->ev) {
6339
0
    free(c);
6340
0
    return NULL;
6341
0
  }
6342
0
  c->ev->base = base;
6343
0
  c->fd = -1;
6344
0
  c->buffer = sldns_buffer_new(bufsize);
6345
0
  if(!c->buffer) {
6346
0
    free(c->ev);
6347
0
    free(c);
6348
0
    return NULL;
6349
0
  }
6350
0
  c->timeout = NULL;
6351
0
  c->tcp_is_reading = 0;
6352
0
  c->tcp_byte_count = 0;
6353
0
  c->tcp_parent = NULL;
6354
0
  c->max_tcp_count = 0;
6355
0
  c->cur_tcp_count = 0;
6356
0
  c->tcp_handlers = NULL;
6357
0
  c->tcp_free = NULL;
6358
0
  c->type = comm_http;
6359
0
  c->tcp_do_close = 0;
6360
0
  c->do_not_close = 0;
6361
0
  c->tcp_do_toggle_rw = 1;
6362
0
  c->tcp_check_nb_connect = 1;
6363
0
  c->http_in_headers = 1;
6364
0
  c->http_in_chunk_headers = 0;
6365
0
  c->http_is_chunked = 0;
6366
0
  c->http_temp = temp;
6367
#ifdef USE_MSG_FASTOPEN
6368
  c->tcp_do_fastopen = 1;
6369
#endif
6370
#ifdef USE_DNSCRYPT
6371
  c->dnscrypt = 0;
6372
  c->dnscrypt_buffer = c->buffer;
6373
#endif
6374
0
  c->repinfo.c = c;
6375
0
  c->callback = callback;
6376
0
  c->cb_arg = callback_arg;
6377
0
  c->pp2_enabled = 0;
6378
0
  c->pp2_header_state = pp2_header_none;
6379
0
  evbits = UB_EV_PERSIST | UB_EV_WRITE;
6380
0
  c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
6381
0
    comm_point_http_handle_callback, c);
6382
0
  if(c->ev->ev == NULL)
6383
0
  {
6384
0
    log_err("could not baseset tcpout event");
6385
0
#ifdef HAVE_SSL
6386
0
    SSL_free(c->ssl);
6387
0
#endif
6388
0
    sldns_buffer_free(c->buffer);
6389
0
    free(c->ev);
6390
0
    free(c);
6391
0
    return NULL;
6392
0
  }
6393
6394
0
  return c;
6395
0
}
6396
6397
struct comm_point*
6398
comm_point_create_local(struct comm_base *base, int fd, size_t bufsize,
6399
        comm_point_callback_type* callback, void* callback_arg)
6400
0
{
6401
0
  struct comm_point* c = (struct comm_point*)calloc(1,
6402
0
    sizeof(struct comm_point));
6403
0
  short evbits;
6404
0
  if(!c)
6405
0
    return NULL;
6406
0
  c->ev = (struct internal_event*)calloc(1,
6407
0
    sizeof(struct internal_event));
6408
0
  if(!c->ev) {
6409
0
    free(c);
6410
0
    return NULL;
6411
0
  }
6412
0
  c->ev->base = base;
6413
0
  c->fd = fd;
6414
0
  c->buffer = sldns_buffer_new(bufsize);
6415
0
  if(!c->buffer) {
6416
0
    free(c->ev);
6417
0
    free(c);
6418
0
    return NULL;
6419
0
  }
6420
0
  c->timeout = NULL;
6421
0
  c->tcp_is_reading = 1;
6422
0
  c->tcp_byte_count = 0;
6423
0
  c->tcp_parent = NULL;
6424
0
  c->max_tcp_count = 0;
6425
0
  c->cur_tcp_count = 0;
6426
0
  c->tcp_handlers = NULL;
6427
0
  c->tcp_free = NULL;
6428
0
  c->type = comm_local;
6429
0
  c->tcp_do_close = 0;
6430
0
  c->do_not_close = 1;
6431
0
  c->tcp_do_toggle_rw = 0;
6432
0
  c->tcp_check_nb_connect = 0;
6433
#ifdef USE_MSG_FASTOPEN
6434
  c->tcp_do_fastopen = 0;
6435
#endif
6436
#ifdef USE_DNSCRYPT
6437
  c->dnscrypt = 0;
6438
  c->dnscrypt_buffer = c->buffer;
6439
#endif
6440
0
  c->callback = callback;
6441
0
  c->cb_arg = callback_arg;
6442
0
  c->pp2_enabled = 0;
6443
0
  c->pp2_header_state = pp2_header_none;
6444
  /* ub_event stuff */
6445
0
  evbits = UB_EV_PERSIST | UB_EV_READ;
6446
0
  c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
6447
0
    comm_point_local_handle_callback, c);
6448
0
  if(c->ev->ev == NULL) {
6449
0
    log_err("could not baseset localhdl event");
6450
0
    free(c->ev);
6451
0
    free(c);
6452
0
    return NULL;
6453
0
  }
6454
0
  if (ub_event_add(c->ev->ev, c->timeout) != 0) {
6455
0
    log_err("could not add localhdl event");
6456
0
    ub_event_free(c->ev->ev);
6457
0
    free(c->ev);
6458
0
    free(c);
6459
0
    return NULL;
6460
0
  }
6461
0
  c->event_added = 1;
6462
0
  return c;
6463
0
}
6464
6465
struct comm_point*
6466
comm_point_create_raw(struct comm_base* base, int fd, int writing,
6467
  comm_point_callback_type* callback, void* callback_arg)
6468
0
{
6469
0
  struct comm_point* c = (struct comm_point*)calloc(1,
6470
0
    sizeof(struct comm_point));
6471
0
  short evbits;
6472
0
  if(!c)
6473
0
    return NULL;
6474
0
  c->ev = (struct internal_event*)calloc(1,
6475
0
    sizeof(struct internal_event));
6476
0
  if(!c->ev) {
6477
0
    free(c);
6478
0
    return NULL;
6479
0
  }
6480
0
  c->ev->base = base;
6481
0
  c->fd = fd;
6482
0
  c->buffer = NULL;
6483
0
  c->timeout = NULL;
6484
0
  c->tcp_is_reading = 0;
6485
0
  c->tcp_byte_count = 0;
6486
0
  c->tcp_parent = NULL;
6487
0
  c->max_tcp_count = 0;
6488
0
  c->cur_tcp_count = 0;
6489
0
  c->tcp_handlers = NULL;
6490
0
  c->tcp_free = NULL;
6491
0
  c->type = comm_raw;
6492
0
  c->tcp_do_close = 0;
6493
0
  c->do_not_close = 1;
6494
0
  c->tcp_do_toggle_rw = 0;
6495
0
  c->tcp_check_nb_connect = 0;
6496
#ifdef USE_MSG_FASTOPEN
6497
  c->tcp_do_fastopen = 0;
6498
#endif
6499
#ifdef USE_DNSCRYPT
6500
  c->dnscrypt = 0;
6501
  c->dnscrypt_buffer = c->buffer;
6502
#endif
6503
0
  c->callback = callback;
6504
0
  c->cb_arg = callback_arg;
6505
0
  c->pp2_enabled = 0;
6506
0
  c->pp2_header_state = pp2_header_none;
6507
  /* ub_event stuff */
6508
0
  if(writing)
6509
0
    evbits = UB_EV_PERSIST | UB_EV_WRITE;
6510
0
  else  evbits = UB_EV_PERSIST | UB_EV_READ;
6511
0
  c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
6512
0
    comm_point_raw_handle_callback, c);
6513
0
  if(c->ev->ev == NULL) {
6514
0
    log_err("could not baseset rawhdl event");
6515
0
    free(c->ev);
6516
0
    free(c);
6517
0
    return NULL;
6518
0
  }
6519
0
  if (ub_event_add(c->ev->ev, c->timeout) != 0) {
6520
0
    log_err("could not add rawhdl event");
6521
0
    ub_event_free(c->ev->ev);
6522
0
    free(c->ev);
6523
0
    free(c);
6524
0
    return NULL;
6525
0
  }
6526
0
  c->event_added = 1;
6527
0
  return c;
6528
0
}
6529
6530
void
6531
comm_point_close(struct comm_point* c)
6532
0
{
6533
0
  if(!c)
6534
0
    return;
6535
0
  if(c->fd != -1) {
6536
0
    verbose(5, "comm_point_close of %d: event_del", c->fd);
6537
0
    if(c->event_added) {
6538
0
      if(ub_event_del(c->ev->ev) != 0) {
6539
0
        log_err("could not event_del on close");
6540
0
      }
6541
0
      c->event_added = 0;
6542
0
    }
6543
0
  }
6544
0
  tcl_close_connection(c->tcl_addr);
6545
0
  if(c->tcp_req_info)
6546
0
    tcp_req_info_clear(c->tcp_req_info);
6547
0
  if(c->h2_session)
6548
0
    http2_session_server_delete(c->h2_session);
6549
  /* stop the comm point from reading or writing after it is closed. */
6550
0
  if(c->tcp_more_read_again && *c->tcp_more_read_again)
6551
0
    *c->tcp_more_read_again = 0;
6552
0
  if(c->tcp_more_write_again && *c->tcp_more_write_again)
6553
0
    *c->tcp_more_write_again = 0;
6554
6555
  /* close fd after removing from event lists, or epoll.. is messed up */
6556
0
  if(c->fd != -1 && !c->do_not_close) {
6557
#ifdef USE_WINSOCK
6558
    if(c->type == comm_tcp || c->type == comm_http) {
6559
      /* delete sticky events for the fd, it gets closed */
6560
      ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
6561
      ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
6562
    }
6563
#endif
6564
0
    verbose(VERB_ALGO, "close fd %d", c->fd);
6565
0
    sock_close(c->fd);
6566
0
  }
6567
0
  c->fd = -1;
6568
0
}
6569
6570
void
6571
comm_point_delete(struct comm_point* c)
6572
0
{
6573
0
  if(!c)
6574
0
    return;
6575
0
  if((c->type == comm_tcp || c->type == comm_http) && c->ssl) {
6576
0
#ifdef HAVE_SSL
6577
0
    SSL_shutdown(c->ssl);
6578
0
    SSL_free(c->ssl);
6579
0
#endif
6580
0
  }
6581
0
  if(c->type == comm_http && c->http_endpoint) {
6582
0
    free(c->http_endpoint);
6583
0
    c->http_endpoint = NULL;
6584
0
  }
6585
0
  comm_point_close(c);
6586
0
  if(c->tcp_handlers) {
6587
0
    int i;
6588
0
    for(i=0; i<c->max_tcp_count; i++)
6589
0
      comm_point_delete(c->tcp_handlers[i]);
6590
0
    free(c->tcp_handlers);
6591
0
  }
6592
0
  free(c->timeout);
6593
0
  if(c->type == comm_tcp || c->type == comm_local || c->type == comm_http) {
6594
0
    sldns_buffer_free(c->buffer);
6595
#ifdef USE_DNSCRYPT
6596
    if(c->dnscrypt && c->dnscrypt_buffer != c->buffer) {
6597
      sldns_buffer_free(c->dnscrypt_buffer);
6598
    }
6599
#endif
6600
0
    if(c->tcp_req_info) {
6601
0
      tcp_req_info_delete(c->tcp_req_info);
6602
0
    }
6603
0
    if(c->h2_session) {
6604
0
      http2_session_delete(c->h2_session);
6605
0
    }
6606
0
  }
6607
#ifdef HAVE_NGTCP2
6608
  if(c->doq_socket)
6609
    doq_server_socket_delete(c->doq_socket);
6610
#endif
6611
0
  ub_event_free(c->ev->ev);
6612
0
  free(c->ev);
6613
0
  free(c);
6614
0
}
6615
6616
#ifdef USE_DNSTAP
6617
static void
6618
send_reply_dnstap(struct dt_env* dtenv,
6619
  struct sockaddr* addr, socklen_t addrlen,
6620
  struct sockaddr_storage* client_addr, socklen_t client_addrlen,
6621
  enum comm_point_type type, void* ssl, sldns_buffer* buffer)
6622
{
6623
  log_addr(VERB_ALGO, "from local addr", (void*)addr, addrlen);
6624
  log_addr(VERB_ALGO, "response to client", client_addr, client_addrlen);
6625
  dt_msg_send_client_response(dtenv, client_addr,
6626
    (struct sockaddr_storage*)addr, type, ssl, buffer);
6627
}
6628
#endif
6629
6630
void
6631
comm_point_send_reply(struct comm_reply *repinfo)
6632
0
{
6633
0
  struct sldns_buffer* buffer;
6634
0
  log_assert(repinfo && repinfo->c);
6635
#ifdef USE_DNSCRYPT
6636
  buffer = repinfo->c->dnscrypt_buffer;
6637
  if(!dnsc_handle_uncurved_request(repinfo)) {
6638
    return;
6639
  }
6640
#else
6641
0
  buffer = repinfo->c->buffer;
6642
0
#endif
6643
0
  if(repinfo->c->type == comm_udp) {
6644
0
    if(repinfo->srctype)
6645
0
      comm_point_send_udp_msg_if(repinfo->c, buffer,
6646
0
        (struct sockaddr*)&repinfo->remote_addr,
6647
0
        repinfo->remote_addrlen, repinfo);
6648
0
    else
6649
0
      comm_point_send_udp_msg(repinfo->c, buffer,
6650
0
        (struct sockaddr*)&repinfo->remote_addr,
6651
0
        repinfo->remote_addrlen, 0);
6652
#ifdef USE_DNSTAP
6653
    /*
6654
     * sending src (client)/dst (local service) addresses over
6655
     * DNSTAP from udp callback
6656
     */
6657
    if(repinfo->c->dtenv != NULL && repinfo->c->dtenv->log_client_response_messages) {
6658
      send_reply_dnstap(repinfo->c->dtenv,
6659
        repinfo->c->socket->addr,
6660
        repinfo->c->socket->addrlen,
6661
        &repinfo->client_addr, repinfo->client_addrlen,
6662
        repinfo->c->type, repinfo->c->ssl,
6663
        repinfo->c->buffer);
6664
    }
6665
#endif
6666
0
  } else {
6667
#ifdef USE_DNSTAP
6668
    struct dt_env* dtenv =
6669
#ifdef HAVE_NGTCP2
6670
      repinfo->c->doq_socket
6671
      ?repinfo->c->dtenv:
6672
#endif
6673
      repinfo->c->tcp_parent->dtenv;
6674
    struct sldns_buffer* dtbuffer = repinfo->c->tcp_req_info
6675
      ?repinfo->c->tcp_req_info->spool_buffer
6676
      :repinfo->c->buffer;
6677
#ifdef USE_DNSCRYPT
6678
    if(repinfo->c->dnscrypt && repinfo->is_dnscrypted)
6679
      dtbuffer = repinfo->c->buffer;
6680
#endif
6681
    /*
6682
     * sending src (client)/dst (local service) addresses over
6683
     * DNSTAP from other callbacks
6684
     */
6685
    if(dtenv != NULL && dtenv->log_client_response_messages) {
6686
      send_reply_dnstap(dtenv,
6687
        repinfo->c->socket->addr,
6688
        repinfo->c->socket->addrlen,
6689
        &repinfo->client_addr, repinfo->client_addrlen,
6690
        repinfo->c->type, repinfo->c->ssl,
6691
        dtbuffer);
6692
    }
6693
#endif
6694
0
    if(repinfo->c->tcp_req_info) {
6695
0
      tcp_req_info_send_reply(repinfo->c->tcp_req_info);
6696
0
    } else if(repinfo->c->use_h2) {
6697
0
      if(!http2_submit_dns_response(repinfo->c->h2_session)) {
6698
0
        comm_point_drop_reply(repinfo);
6699
0
        return;
6700
0
      }
6701
0
      repinfo->c->h2_stream = NULL;
6702
0
      repinfo->c->tcp_is_reading = 0;
6703
0
      comm_point_stop_listening(repinfo->c);
6704
0
      comm_point_start_listening(repinfo->c, -1,
6705
0
        adjusted_tcp_timeout(repinfo->c));
6706
0
      return;
6707
#ifdef HAVE_NGTCP2
6708
    } else if(repinfo->c->doq_socket) {
6709
      doq_socket_send_reply(repinfo);
6710
#endif
6711
0
    } else {
6712
0
      comm_point_start_listening(repinfo->c, -1,
6713
0
        adjusted_tcp_timeout(repinfo->c));
6714
0
    }
6715
0
  }
6716
0
}
6717
6718
void
6719
comm_point_drop_reply(struct comm_reply* repinfo)
6720
0
{
6721
0
  if(!repinfo)
6722
0
    return;
6723
0
  log_assert(repinfo->c);
6724
0
  log_assert(repinfo->c->type != comm_tcp_accept);
6725
0
  if(repinfo->c->type == comm_udp)
6726
0
    return;
6727
0
  if(repinfo->c->tcp_req_info)
6728
0
    repinfo->c->tcp_req_info->is_drop = 1;
6729
0
  if(repinfo->c->type == comm_http) {
6730
0
    if(repinfo->c->h2_session) {
6731
0
      repinfo->c->h2_session->is_drop = 1;
6732
0
      if(!repinfo->c->h2_session->postpone_drop)
6733
0
        reclaim_http_handler(repinfo->c);
6734
0
      return;
6735
0
    }
6736
0
    reclaim_http_handler(repinfo->c);
6737
0
    return;
6738
#ifdef HAVE_NGTCP2
6739
  } else if(repinfo->c->doq_socket) {
6740
    doq_socket_drop_reply(repinfo);
6741
    return;
6742
#endif
6743
0
  }
6744
0
  reclaim_tcp_handler(repinfo->c);
6745
0
}
6746
6747
void
6748
comm_point_stop_listening(struct comm_point* c)
6749
0
{
6750
0
  verbose(VERB_ALGO, "comm point stop listening %d", c->fd);
6751
0
  if(c->event_added) {
6752
0
    if(ub_event_del(c->ev->ev) != 0) {
6753
0
      log_err("event_del error to stoplisten");
6754
0
    }
6755
0
    c->event_added = 0;
6756
0
  }
6757
0
}
6758
6759
void
6760
comm_point_start_listening(struct comm_point* c, int newfd, int msec)
6761
0
{
6762
0
  verbose(VERB_ALGO, "comm point start listening %d (%d msec)",
6763
0
    c->fd==-1?newfd:c->fd, msec);
6764
0
  if(c->type == comm_tcp_accept && !c->tcp_free) {
6765
    /* no use to start listening no free slots. */
6766
0
    return;
6767
0
  }
6768
0
  if(c->event_added) {
6769
0
    if(ub_event_del(c->ev->ev) != 0) {
6770
0
      log_err("event_del error to startlisten");
6771
0
    }
6772
0
    c->event_added = 0;
6773
0
  }
6774
0
  if(msec != -1 && msec != 0) {
6775
0
    if(!c->timeout) {
6776
0
      c->timeout = (struct timeval*)malloc(sizeof(
6777
0
        struct timeval));
6778
0
      if(!c->timeout) {
6779
0
        log_err("cpsl: malloc failed. No net read.");
6780
0
        return;
6781
0
      }
6782
0
    }
6783
0
    ub_event_add_bits(c->ev->ev, UB_EV_TIMEOUT);
6784
0
#ifndef S_SPLINT_S /* splint fails on struct timeval. */
6785
0
    c->timeout->tv_sec = msec/1000;
6786
0
    c->timeout->tv_usec = (msec%1000)*1000;
6787
0
#endif /* S_SPLINT_S */
6788
0
  } else {
6789
0
    if(msec == 0 || !c->timeout) {
6790
0
      ub_event_del_bits(c->ev->ev, UB_EV_TIMEOUT);
6791
0
    }
6792
0
  }
6793
0
  if(c->type == comm_tcp || c->type == comm_http) {
6794
0
    ub_event_del_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE);
6795
0
    if(c->tcp_write_and_read) {
6796
0
      verbose(5, "startlistening %d mode rw", (newfd==-1?c->fd:newfd));
6797
0
      ub_event_add_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE);
6798
0
    } else if(c->tcp_is_reading) {
6799
0
      verbose(5, "startlistening %d mode r", (newfd==-1?c->fd:newfd));
6800
0
      ub_event_add_bits(c->ev->ev, UB_EV_READ);
6801
0
    } else  {
6802
0
      verbose(5, "startlistening %d mode w", (newfd==-1?c->fd:newfd));
6803
0
      ub_event_add_bits(c->ev->ev, UB_EV_WRITE);
6804
0
    }
6805
0
  }
6806
0
  if(newfd != -1) {
6807
0
    if(c->fd != -1 && c->fd != newfd) {
6808
0
      verbose(5, "cpsl close of fd %d for %d", c->fd, newfd);
6809
0
      sock_close(c->fd);
6810
0
    }
6811
0
    c->fd = newfd;
6812
0
    ub_event_set_fd(c->ev->ev, c->fd);
6813
0
  }
6814
0
  if(ub_event_add(c->ev->ev, msec==0?NULL:c->timeout) != 0) {
6815
0
    log_err("event_add failed. in cpsl.");
6816
0
    return;
6817
0
  }
6818
0
  c->event_added = 1;
6819
0
}
6820
6821
void comm_point_listen_for_rw(struct comm_point* c, int rd, int wr)
6822
0
{
6823
0
  verbose(VERB_ALGO, "comm point listen_for_rw %d %d", c->fd, wr);
6824
0
  if(c->event_added) {
6825
0
    if(ub_event_del(c->ev->ev) != 0) {
6826
0
      log_err("event_del error to cplf");
6827
0
    }
6828
0
    c->event_added = 0;
6829
0
  }
6830
0
  if(!c->timeout) {
6831
0
    ub_event_del_bits(c->ev->ev, UB_EV_TIMEOUT);
6832
0
  }
6833
0
  ub_event_del_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE);
6834
0
  if(rd) ub_event_add_bits(c->ev->ev, UB_EV_READ);
6835
0
  if(wr) ub_event_add_bits(c->ev->ev, UB_EV_WRITE);
6836
0
  if(ub_event_add(c->ev->ev, c->timeout) != 0) {
6837
0
    log_err("event_add failed. in cplf.");
6838
0
    return;
6839
0
  }
6840
0
  c->event_added = 1;
6841
0
}
6842
6843
size_t comm_point_get_mem(struct comm_point* c)
6844
0
{
6845
0
  size_t s;
6846
0
  if(!c)
6847
0
    return 0;
6848
0
  s = sizeof(*c) + sizeof(*c->ev);
6849
0
  if(c->timeout)
6850
0
    s += sizeof(*c->timeout);
6851
0
  if(c->type == comm_tcp || c->type == comm_local) {
6852
0
    s += sizeof(*c->buffer) + sldns_buffer_capacity(c->buffer);
6853
#ifdef USE_DNSCRYPT
6854
    s += sizeof(*c->dnscrypt_buffer);
6855
    if(c->buffer != c->dnscrypt_buffer) {
6856
      s += sldns_buffer_capacity(c->dnscrypt_buffer);
6857
    }
6858
#endif
6859
0
  }
6860
0
  if(c->type == comm_tcp_accept) {
6861
0
    int i;
6862
0
    for(i=0; i<c->max_tcp_count; i++)
6863
0
      s += comm_point_get_mem(c->tcp_handlers[i]);
6864
0
  }
6865
0
  return s;
6866
0
}
6867
6868
struct comm_timer*
6869
comm_timer_create(struct comm_base* base, void (*cb)(void*), void* cb_arg)
6870
0
{
6871
0
  struct internal_timer *tm = (struct internal_timer*)calloc(1,
6872
0
    sizeof(struct internal_timer));
6873
0
  if(!tm) {
6874
0
    log_err("malloc failed");
6875
0
    return NULL;
6876
0
  }
6877
0
  tm->super.ev_timer = tm;
6878
0
  tm->base = base;
6879
0
  tm->super.callback = cb;
6880
0
  tm->super.cb_arg = cb_arg;
6881
0
  tm->ev = ub_event_new(base->eb->base, -1, UB_EV_TIMEOUT,
6882
0
    comm_timer_callback, &tm->super);
6883
0
  if(tm->ev == NULL) {
6884
0
    log_err("timer_create: event_base_set failed.");
6885
0
    free(tm);
6886
0
    return NULL;
6887
0
  }
6888
0
  return &tm->super;
6889
0
}
6890
6891
void
6892
comm_timer_disable(struct comm_timer* timer)
6893
0
{
6894
0
  if(!timer)
6895
0
    return;
6896
0
  ub_timer_del(timer->ev_timer->ev);
6897
0
  timer->ev_timer->enabled = 0;
6898
0
}
6899
6900
void
6901
comm_timer_set(struct comm_timer* timer, struct timeval* tv)
6902
0
{
6903
0
  log_assert(tv);
6904
0
  if(timer->ev_timer->enabled)
6905
0
    comm_timer_disable(timer);
6906
0
  if(ub_timer_add(timer->ev_timer->ev, timer->ev_timer->base->eb->base,
6907
0
    comm_timer_callback, timer, tv) != 0)
6908
0
    log_err("comm_timer_set: evtimer_add failed.");
6909
0
  timer->ev_timer->enabled = 1;
6910
0
}
6911
6912
void
6913
comm_timer_delete(struct comm_timer* timer)
6914
0
{
6915
0
  if(!timer)
6916
0
    return;
6917
0
  comm_timer_disable(timer);
6918
  /* Free the sub struct timer->ev_timer derived from the super struct timer.
6919
   * i.e. assert(timer == timer->ev_timer)
6920
   */
6921
0
  ub_event_free(timer->ev_timer->ev);
6922
0
  free(timer->ev_timer);
6923
0
}
6924
6925
void
6926
comm_timer_callback(int ATTR_UNUSED(fd), short event, void* arg)
6927
0
{
6928
0
  struct comm_timer* tm = (struct comm_timer*)arg;
6929
0
  if(!(event&UB_EV_TIMEOUT))
6930
0
    return;
6931
0
  ub_comm_base_now(tm->ev_timer->base);
6932
0
  tm->ev_timer->enabled = 0;
6933
0
  fptr_ok(fptr_whitelist_comm_timer(tm->callback));
6934
0
  (*tm->callback)(tm->cb_arg);
6935
0
}
6936
6937
int
6938
comm_timer_is_set(struct comm_timer* timer)
6939
0
{
6940
0
  return (int)timer->ev_timer->enabled;
6941
0
}
6942
6943
size_t
6944
comm_timer_get_mem(struct comm_timer* timer)
6945
0
{
6946
0
  if(!timer) return 0;
6947
0
  return sizeof(struct internal_timer);
6948
0
}
6949
6950
struct comm_signal*
6951
comm_signal_create(struct comm_base* base,
6952
        void (*callback)(int, void*), void* cb_arg)
6953
0
{
6954
0
  struct comm_signal* com = (struct comm_signal*)malloc(
6955
0
    sizeof(struct comm_signal));
6956
0
  if(!com) {
6957
0
    log_err("malloc failed");
6958
0
    return NULL;
6959
0
  }
6960
0
  com->base = base;
6961
0
  com->callback = callback;
6962
0
  com->cb_arg = cb_arg;
6963
0
  com->ev_signal = NULL;
6964
0
  return com;
6965
0
}
6966
6967
void
6968
comm_signal_callback(int sig, short event, void* arg)
6969
0
{
6970
0
  struct comm_signal* comsig = (struct comm_signal*)arg;
6971
0
  if(!(event & UB_EV_SIGNAL))
6972
0
    return;
6973
0
  ub_comm_base_now(comsig->base);
6974
0
  fptr_ok(fptr_whitelist_comm_signal(comsig->callback));
6975
0
  (*comsig->callback)(sig, comsig->cb_arg);
6976
0
}
6977
6978
int
6979
comm_signal_bind(struct comm_signal* comsig, int sig)
6980
0
{
6981
0
  struct internal_signal* entry = (struct internal_signal*)calloc(1,
6982
0
    sizeof(struct internal_signal));
6983
0
  if(!entry) {
6984
0
    log_err("malloc failed");
6985
0
    return 0;
6986
0
  }
6987
0
  log_assert(comsig);
6988
  /* add signal event */
6989
0
  entry->ev = ub_signal_new(comsig->base->eb->base, sig,
6990
0
    comm_signal_callback, comsig);
6991
0
  if(entry->ev == NULL) {
6992
0
    log_err("Could not create signal event");
6993
0
    free(entry);
6994
0
    return 0;
6995
0
  }
6996
0
  if(ub_signal_add(entry->ev, NULL) != 0) {
6997
0
    log_err("Could not add signal handler");
6998
0
    ub_event_free(entry->ev);
6999
0
    free(entry);
7000
0
    return 0;
7001
0
  }
7002
  /* link into list */
7003
0
  entry->next = comsig->ev_signal;
7004
0
  comsig->ev_signal = entry;
7005
0
  return 1;
7006
0
}
7007
7008
void
7009
comm_signal_delete(struct comm_signal* comsig)
7010
0
{
7011
0
  struct internal_signal* p, *np;
7012
0
  if(!comsig)
7013
0
    return;
7014
0
  p=comsig->ev_signal;
7015
0
  while(p) {
7016
0
    np = p->next;
7017
0
    ub_signal_del(p->ev);
7018
0
    ub_event_free(p->ev);
7019
0
    free(p);
7020
0
    p = np;
7021
0
  }
7022
0
  free(comsig);
7023
0
}