Coverage Report

Created: 2025-08-26 06:34

/src/unbound/util/netevent.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * util/netevent.c - event notification
3
 *
4
 * Copyright (c) 2007, NLnet Labs. All rights reserved.
5
 *
6
 * This software is open source.
7
 *
8
 * Redistribution and use in source and binary forms, with or without
9
 * modification, are permitted provided that the following conditions
10
 * are met:
11
 *
12
 * Redistributions of source code must retain the above copyright notice,
13
 * this list of conditions and the following disclaimer.
14
 *
15
 * Redistributions in binary form must reproduce the above copyright notice,
16
 * this list of conditions and the following disclaimer in the documentation
17
 * and/or other materials provided with the distribution.
18
 *
19
 * Neither the name of the NLNET LABS nor the names of its contributors may
20
 * be used to endorse or promote products derived from this software without
21
 * specific prior written permission.
22
 *
23
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27
 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29
 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34
 */
35
36
/**
37
 * \file
38
 *
39
 * This file contains event notification functions.
40
 */
41
#include "config.h"
42
#include "util/netevent.h"
43
#include "util/ub_event.h"
44
#include "util/log.h"
45
#include "util/net_help.h"
46
#include "util/tcp_conn_limit.h"
47
#include "util/fptr_wlist.h"
48
#include "util/proxy_protocol.h"
49
#include "util/timeval_func.h"
50
#include "sldns/pkthdr.h"
51
#include "sldns/sbuffer.h"
52
#include "sldns/str2wire.h"
53
#include "dnstap/dnstap.h"
54
#include "dnscrypt/dnscrypt.h"
55
#include "services/listen_dnsport.h"
56
#include "util/random.h"
57
#ifdef HAVE_SYS_TYPES_H
58
#include <sys/types.h>
59
#endif
60
#ifdef HAVE_SYS_SOCKET_H
61
#include <sys/socket.h>
62
#endif
63
#ifdef HAVE_NETDB_H
64
#include <netdb.h>
65
#endif
66
#ifdef HAVE_POLL_H
67
#include <poll.h>
68
#endif
69
70
#ifdef HAVE_OPENSSL_SSL_H
71
#include <openssl/ssl.h>
72
#endif
73
#ifdef HAVE_OPENSSL_ERR_H
74
#include <openssl/err.h>
75
#endif
76
77
#ifdef HAVE_NGTCP2
78
#include <ngtcp2/ngtcp2.h>
79
#include <ngtcp2/ngtcp2_crypto.h>
80
#endif
81
82
#ifdef HAVE_LINUX_NET_TSTAMP_H
83
#include <linux/net_tstamp.h>
84
#endif
85
86
/* -------- Start of local definitions -------- */
87
/** if CMSG_ALIGN is not defined on this platform, a workaround */
88
#ifndef CMSG_ALIGN
89
#  ifdef __CMSG_ALIGN
90
#    define CMSG_ALIGN(n) __CMSG_ALIGN(n)
91
#  elif defined(CMSG_DATA_ALIGN)
92
#    define CMSG_ALIGN _CMSG_DATA_ALIGN
93
#  else
94
#    define CMSG_ALIGN(len) (((len)+sizeof(long)-1) & ~(sizeof(long)-1))
95
#  endif
96
#endif
97
98
/** if CMSG_LEN is not defined on this platform, a workaround */
99
#ifndef CMSG_LEN
100
#  define CMSG_LEN(len) (CMSG_ALIGN(sizeof(struct cmsghdr))+(len))
101
#endif
102
103
/** if CMSG_SPACE is not defined on this platform, a workaround */
104
#ifndef CMSG_SPACE
105
#  ifdef _CMSG_HDR_ALIGN
106
#    define CMSG_SPACE(l) (CMSG_ALIGN(l)+_CMSG_HDR_ALIGN(sizeof(struct cmsghdr)))
107
#  else
108
#    define CMSG_SPACE(l) (CMSG_ALIGN(l)+CMSG_ALIGN(sizeof(struct cmsghdr)))
109
#  endif
110
#endif
111
112
/** The TCP writing query timeout in milliseconds */
113
0
#define TCP_QUERY_TIMEOUT 120000
114
/** The minimum actual TCP timeout to use, regardless of what we advertise,
115
 * in msec */
116
0
#define TCP_QUERY_TIMEOUT_MINIMUM 200
117
118
#ifndef NONBLOCKING_IS_BROKEN
119
/** number of UDP reads to perform per read indication from select */
120
0
#define NUM_UDP_PER_SELECT 100
121
#else
122
#define NUM_UDP_PER_SELECT 1
123
#endif
124
125
/** timeout in millisec to wait for write to unblock, packets dropped after.*/
126
0
#define SEND_BLOCKED_WAIT_TIMEOUT 200
127
/** max number of times to wait for write to unblock, packets dropped after.*/
128
0
#define SEND_BLOCKED_MAX_RETRY 5
129
130
/** Let's make timestamping code cleaner and redefine SO_TIMESTAMP* */
131
#ifndef SO_TIMESTAMP
132
#define SO_TIMESTAMP 29
133
#endif
134
#ifndef SO_TIMESTAMPNS
135
#define SO_TIMESTAMPNS 35
136
#endif
137
#ifndef SO_TIMESTAMPING
138
#define SO_TIMESTAMPING 37
139
#endif
140
/**
141
 * The internal event structure for keeping ub_event info for the event.
142
 * Possibly other structures (list, tree) this is part of.
143
 */
144
struct internal_event {
145
  /** the comm base */
146
  struct comm_base* base;
147
  /** ub_event event type */
148
  struct ub_event* ev;
149
};
150
151
/**
152
 * Internal base structure, so that every thread has its own events.
153
 */
154
struct internal_base {
155
  /** ub_event event_base type. */
156
  struct ub_event_base* base;
157
  /** seconds time pointer points here */
158
  time_t secs;
159
  /** timeval with current time */
160
  struct timeval now;
161
  /** the event used for slow_accept timeouts */
162
  struct ub_event* slow_accept;
163
  /** true if slow_accept is enabled */
164
  int slow_accept_enabled;
165
  /** last log time for slow logging of file descriptor errors */
166
  time_t last_slow_log;
167
  /** last log time for slow logging of write wait failures */
168
  time_t last_writewait_log;
169
};
170
171
/**
172
 * Internal timer structure, to store timer event in.
173
 */
174
struct internal_timer {
175
  /** the super struct from which derived */
176
  struct comm_timer super;
177
  /** the comm base */
178
  struct comm_base* base;
179
  /** ub_event event type */
180
  struct ub_event* ev;
181
  /** is timer enabled */
182
  uint8_t enabled;
183
};
184
185
/**
186
 * Internal signal structure, to store signal event in.
187
 */
188
struct internal_signal {
189
  /** ub_event event type */
190
  struct ub_event* ev;
191
  /** next in signal list */
192
  struct internal_signal* next;
193
};
194
195
/** create a tcp handler with a parent */
196
static struct comm_point* comm_point_create_tcp_handler(
197
  struct comm_base *base, struct comm_point* parent, size_t bufsize,
198
  struct sldns_buffer* spoolbuf, comm_point_callback_type* callback,
199
  void* callback_arg, struct unbound_socket* socket);
200
201
/* -------- End of local definitions -------- */
202
203
struct comm_base*
204
comm_base_create(int sigs)
205
4.31k
{
206
4.31k
  struct comm_base* b = (struct comm_base*)calloc(1,
207
4.31k
    sizeof(struct comm_base));
208
4.31k
  const char *evnm="event", *evsys="", *evmethod="";
209
210
4.31k
  if(!b)
211
0
    return NULL;
212
4.31k
  b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base));
213
4.31k
  if(!b->eb) {
214
0
    free(b);
215
0
    return NULL;
216
0
  }
217
4.31k
  b->eb->base = ub_default_event_base(sigs, &b->eb->secs, &b->eb->now);
218
4.31k
  if(!b->eb->base) {
219
0
    free(b->eb);
220
0
    free(b);
221
0
    return NULL;
222
0
  }
223
4.31k
  ub_comm_base_now(b);
224
4.31k
  ub_get_event_sys(b->eb->base, &evnm, &evsys, &evmethod);
225
4.31k
  verbose(VERB_ALGO, "%s %s uses %s method.", evnm, evsys, evmethod);
226
4.31k
  return b;
227
4.31k
}
228
229
struct comm_base*
230
comm_base_create_event(struct ub_event_base* base)
231
0
{
232
0
  struct comm_base* b = (struct comm_base*)calloc(1,
233
0
    sizeof(struct comm_base));
234
0
  if(!b)
235
0
    return NULL;
236
0
  b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base));
237
0
  if(!b->eb) {
238
0
    free(b);
239
0
    return NULL;
240
0
  }
241
0
  b->eb->base = base;
242
0
  ub_comm_base_now(b);
243
0
  return b;
244
0
}
245
246
void
247
comm_base_delete(struct comm_base* b)
248
4.31k
{
249
4.31k
  if(!b)
250
0
    return;
251
4.31k
  if(b->eb->slow_accept_enabled) {
252
0
    if(ub_event_del(b->eb->slow_accept) != 0) {
253
0
      log_err("could not event_del slow_accept");
254
0
    }
255
0
    ub_event_free(b->eb->slow_accept);
256
0
  }
257
4.31k
  ub_event_base_free(b->eb->base);
258
4.31k
  b->eb->base = NULL;
259
4.31k
  free(b->eb);
260
4.31k
  free(b);
261
4.31k
}
262
263
void
264
comm_base_delete_no_base(struct comm_base* b)
265
0
{
266
0
  if(!b)
267
0
    return;
268
0
  if(b->eb->slow_accept_enabled) {
269
0
    if(ub_event_del(b->eb->slow_accept) != 0) {
270
0
      log_err("could not event_del slow_accept");
271
0
    }
272
0
    ub_event_free(b->eb->slow_accept);
273
0
  }
274
0
  b->eb->base = NULL;
275
0
  free(b->eb);
276
0
  free(b);
277
0
}
278
279
void
280
comm_base_timept(struct comm_base* b, time_t** tt, struct timeval** tv)
281
4.31k
{
282
4.31k
  *tt = &b->eb->secs;
283
4.31k
  *tv = &b->eb->now;
284
4.31k
}
285
286
void
287
comm_base_dispatch(struct comm_base* b)
288
0
{
289
0
  int retval;
290
0
  retval = ub_event_base_dispatch(b->eb->base);
291
0
  if(retval < 0) {
292
0
    fatal_exit("event_dispatch returned error %d, "
293
0
      "errno is %s", retval, strerror(errno));
294
0
  }
295
0
}
296
297
void comm_base_exit(struct comm_base* b)
298
0
{
299
0
  if(ub_event_base_loopexit(b->eb->base) != 0) {
300
0
    log_err("Could not loopexit");
301
0
  }
302
0
}
303
304
void comm_base_set_slow_accept_handlers(struct comm_base* b,
305
  void (*stop_acc)(void*), void (*start_acc)(void*), void* arg)
306
0
{
307
0
  b->stop_accept = stop_acc;
308
0
  b->start_accept = start_acc;
309
0
  b->cb_arg = arg;
310
0
}
311
312
struct ub_event_base* comm_base_internal(struct comm_base* b)
313
0
{
314
0
  return b->eb->base;
315
0
}
316
317
struct ub_event* comm_point_internal(struct comm_point* c)
318
0
{
319
0
  return c->ev->ev;
320
0
}
321
322
/** see if errno for udp has to be logged or not uses globals */
323
static int
324
udp_send_errno_needs_log(struct sockaddr* addr, socklen_t addrlen)
325
0
{
326
  /* do not log transient errors (unless high verbosity) */
327
0
#if defined(ENETUNREACH) || defined(EHOSTDOWN) || defined(EHOSTUNREACH) || defined(ENETDOWN)
328
0
  switch(errno) {
329
0
#  ifdef ENETUNREACH
330
0
    case ENETUNREACH:
331
0
#  endif
332
0
#  ifdef EHOSTDOWN
333
0
    case EHOSTDOWN:
334
0
#  endif
335
0
#  ifdef EHOSTUNREACH
336
0
    case EHOSTUNREACH:
337
0
#  endif
338
0
#  ifdef ENETDOWN
339
0
    case ENETDOWN:
340
0
#  endif
341
0
    case EPERM:
342
0
    case EACCES:
343
0
      if(verbosity < VERB_ALGO)
344
0
        return 0;
345
0
      break;
346
0
    default:
347
0
      break;
348
0
  }
349
0
#endif
350
  /* permission denied is gotten for every send if the
351
   * network is disconnected (on some OS), squelch it */
352
0
  if( ((errno == EPERM)
353
0
#  ifdef EADDRNOTAVAIL
354
    /* 'Cannot assign requested address' also when disconnected */
355
0
    || (errno == EADDRNOTAVAIL)
356
0
#  endif
357
0
    ) && verbosity < VERB_ALGO)
358
0
    return 0;
359
0
#  ifdef EADDRINUSE
360
  /* If SO_REUSEADDR is set, we could try to connect to the same server
361
   * from the same source port twice. */
362
0
  if(errno == EADDRINUSE && verbosity < VERB_DETAIL)
363
0
    return 0;
364
0
#  endif
365
  /* squelch errors where people deploy AAAA ::ffff:bla for
366
   * authority servers, which we try for intranets. */
367
0
  if(errno == EINVAL && addr_is_ip4mapped(
368
0
    (struct sockaddr_storage*)addr, addrlen) &&
369
0
    verbosity < VERB_DETAIL)
370
0
    return 0;
371
  /* SO_BROADCAST sockopt can give access to 255.255.255.255,
372
   * but a dns cache does not need it. */
373
0
  if(errno == EACCES && addr_is_broadcast(
374
0
    (struct sockaddr_storage*)addr, addrlen) &&
375
0
    verbosity < VERB_DETAIL)
376
0
    return 0;
377
0
#  ifdef ENOTCONN
378
  /* For 0.0.0.0, ::0 targets it can return that socket is not connected.
379
   * This can be ignored, and the address skipped. It remains
380
   * possible to send there for completeness in configuration. */
381
0
  if(errno == ENOTCONN && addr_is_any(
382
0
    (struct sockaddr_storage*)addr, addrlen) &&
383
0
    verbosity < VERB_DETAIL)
384
0
    return 0;
385
0
#  endif
386
0
  return 1;
387
0
}
388
389
int tcp_connect_errno_needs_log(struct sockaddr* addr, socklen_t addrlen)
390
0
{
391
0
  return udp_send_errno_needs_log(addr, addrlen);
392
0
}
393
394
/* send a UDP reply */
395
int
396
comm_point_send_udp_msg(struct comm_point *c, sldns_buffer* packet,
397
  struct sockaddr* addr, socklen_t addrlen, int is_connected)
398
0
{
399
0
  ssize_t sent;
400
0
  log_assert(c->fd != -1);
401
#ifdef UNBOUND_DEBUG
402
  if(sldns_buffer_remaining(packet) == 0)
403
    log_err("error: send empty UDP packet");
404
#endif
405
0
  log_assert(addr && addrlen > 0);
406
0
  if(!is_connected) {
407
0
    sent = sendto(c->fd, (void*)sldns_buffer_begin(packet),
408
0
      sldns_buffer_remaining(packet), 0,
409
0
      addr, addrlen);
410
0
  } else {
411
0
    sent = send(c->fd, (void*)sldns_buffer_begin(packet),
412
0
      sldns_buffer_remaining(packet), 0);
413
0
  }
414
0
  if(sent == -1) {
415
    /* try again and block, waiting for IO to complete,
416
     * we want to send the answer, and we will wait for
417
     * the ethernet interface buffer to have space. */
418
0
#ifndef USE_WINSOCK
419
0
    if(errno == EAGAIN || errno == EINTR ||
420
0
#  ifdef EWOULDBLOCK
421
0
      errno == EWOULDBLOCK ||
422
0
#  endif
423
0
      errno == ENOBUFS) {
424
#else
425
    if(WSAGetLastError() == WSAEINPROGRESS ||
426
      WSAGetLastError() == WSAEINTR ||
427
      WSAGetLastError() == WSAENOBUFS ||
428
      WSAGetLastError() == WSAEWOULDBLOCK) {
429
#endif
430
0
      int retries = 0;
431
      /* if we set the fd blocking, other threads suddenly
432
       * have a blocking fd that they operate on */
433
0
      while(sent == -1 && retries < SEND_BLOCKED_MAX_RETRY && (
434
0
#ifndef USE_WINSOCK
435
0
        errno == EAGAIN || errno == EINTR ||
436
0
#  ifdef EWOULDBLOCK
437
0
        errno == EWOULDBLOCK ||
438
0
#  endif
439
0
        errno == ENOBUFS
440
#else
441
        WSAGetLastError() == WSAEINPROGRESS ||
442
        WSAGetLastError() == WSAEINTR ||
443
        WSAGetLastError() == WSAENOBUFS ||
444
        WSAGetLastError() == WSAEWOULDBLOCK
445
#endif
446
0
      )) {
447
0
#if defined(HAVE_POLL) || defined(USE_WINSOCK)
448
0
        int send_nobufs = (
449
0
#ifndef USE_WINSOCK
450
0
          errno == ENOBUFS
451
#else
452
          WSAGetLastError() == WSAENOBUFS
453
#endif
454
0
        );
455
0
        struct pollfd p;
456
0
        int pret;
457
0
        memset(&p, 0, sizeof(p));
458
0
        p.fd = c->fd;
459
0
        p.events = POLLOUT
460
0
#ifndef USE_WINSOCK
461
0
          | POLLERR | POLLHUP
462
0
#endif
463
0
          ;
464
0
#  ifndef USE_WINSOCK
465
0
        pret = poll(&p, 1, SEND_BLOCKED_WAIT_TIMEOUT);
466
#  else
467
        pret = WSAPoll(&p, 1,
468
          SEND_BLOCKED_WAIT_TIMEOUT);
469
#  endif
470
0
        if(pret == 0) {
471
          /* timer expired */
472
0
          struct comm_base* b = c->ev->base;
473
0
          if(b->eb->last_writewait_log+SLOW_LOG_TIME <=
474
0
            b->eb->secs) {
475
0
            b->eb->last_writewait_log = b->eb->secs;
476
0
            verbose(VERB_OPS, "send udp blocked "
477
0
              "for long, dropping packet.");
478
0
          }
479
0
          return 0;
480
0
        } else if(pret < 0 &&
481
0
#ifndef USE_WINSOCK
482
0
          errno != EAGAIN && errno != EINTR &&
483
0
#  ifdef EWOULDBLOCK
484
0
          errno != EWOULDBLOCK &&
485
0
#  endif
486
0
          errno != ENOMEM && errno != ENOBUFS
487
#else
488
          WSAGetLastError() != WSAEINPROGRESS &&
489
          WSAGetLastError() != WSAEINTR &&
490
          WSAGetLastError() != WSAENOBUFS &&
491
          WSAGetLastError() != WSAEWOULDBLOCK
492
#endif
493
0
          ) {
494
0
          log_err("poll udp out failed: %s",
495
0
            sock_strerror(errno));
496
0
          return 0;
497
0
        } else if((pret < 0 &&
498
0
#ifndef USE_WINSOCK
499
0
          ( errno == ENOBUFS  /* Maybe some systems */
500
0
          || errno == ENOMEM  /* Linux */
501
0
          || errno == EAGAIN)  /* Macos, solaris, openbsd */
502
#else
503
          WSAGetLastError() == WSAENOBUFS
504
#endif
505
0
          ) || (send_nobufs && retries > 0)) {
506
          /* ENOBUFS/ENOMEM/EAGAIN, and poll
507
           * returned without
508
           * a timeout. Or the retried send call
509
           * returned ENOBUFS/ENOMEM/EAGAIN.
510
           * It is good to wait a bit for the
511
           * error to clear. */
512
          /* The timeout is 20*(2^(retries+1)),
513
           * it increases exponentially, starting
514
           * at 40 msec. After 5 tries, 1240 msec
515
           * have passed in total, when poll
516
           * returned the error, and 1200 msec
517
           * when send returned the errors. */
518
0
#ifndef USE_WINSOCK
519
0
          pret = poll(NULL, 0, (SEND_BLOCKED_WAIT_TIMEOUT/10)<<(retries+1));
520
#else
521
          Sleep((SEND_BLOCKED_WAIT_TIMEOUT/10)<<(retries+1));
522
          pret = 0;
523
#endif
524
0
          if(pret < 0
525
0
#ifndef USE_WINSOCK
526
0
            && errno != EAGAIN && errno != EINTR &&
527
0
#  ifdef EWOULDBLOCK
528
0
            errno != EWOULDBLOCK &&
529
0
#  endif
530
0
            errno != ENOMEM && errno != ENOBUFS
531
#else
532
            /* Sleep does not error */
533
#endif
534
0
          ) {
535
0
            log_err("poll udp out timer failed: %s",
536
0
              sock_strerror(errno));
537
0
          }
538
0
        }
539
0
#endif /* defined(HAVE_POLL) || defined(USE_WINSOCK) */
540
0
        retries++;
541
0
        if (!is_connected) {
542
0
          sent = sendto(c->fd, (void*)sldns_buffer_begin(packet),
543
0
            sldns_buffer_remaining(packet), 0,
544
0
            addr, addrlen);
545
0
        } else {
546
0
          sent = send(c->fd, (void*)sldns_buffer_begin(packet),
547
0
            sldns_buffer_remaining(packet), 0);
548
0
        }
549
0
      }
550
0
    }
551
0
  }
552
0
  if(sent == -1) {
553
0
    if(!udp_send_errno_needs_log(addr, addrlen))
554
0
      return 0;
555
0
    if (!is_connected) {
556
0
      verbose(VERB_OPS, "sendto failed: %s", sock_strerror(errno));
557
0
    } else {
558
0
      verbose(VERB_OPS, "send failed: %s", sock_strerror(errno));
559
0
    }
560
0
    if(addr)
561
0
      log_addr(VERB_OPS, "remote address is",
562
0
        (struct sockaddr_storage*)addr, addrlen);
563
0
    return 0;
564
0
  } else if((size_t)sent != sldns_buffer_remaining(packet)) {
565
0
    log_err("sent %d in place of %d bytes",
566
0
      (int)sent, (int)sldns_buffer_remaining(packet));
567
0
    return 0;
568
0
  }
569
0
  return 1;
570
0
}
571
572
#if defined(AF_INET6) && defined(IPV6_PKTINFO) && (defined(HAVE_RECVMSG) || defined(HAVE_SENDMSG))
573
/** print debug ancillary info */
574
static void p_ancil(const char* str, struct comm_reply* r)
575
0
{
576
0
  if(r->srctype != 4 && r->srctype != 6) {
577
0
    log_info("%s: unknown srctype %d", str, r->srctype);
578
0
    return;
579
0
  }
580
581
0
  if(r->srctype == 6) {
582
0
#ifdef IPV6_PKTINFO
583
0
    char buf[1024];
584
0
    if(inet_ntop(AF_INET6, &r->pktinfo.v6info.ipi6_addr,
585
0
      buf, (socklen_t)sizeof(buf)) == 0) {
586
0
      (void)strlcpy(buf, "(inet_ntop error)", sizeof(buf));
587
0
    }
588
0
    buf[sizeof(buf)-1]=0;
589
0
    log_info("%s: %s %d", str, buf, r->pktinfo.v6info.ipi6_ifindex);
590
0
#endif
591
0
  } else if(r->srctype == 4) {
592
0
#ifdef IP_PKTINFO
593
0
    char buf1[1024], buf2[1024];
594
0
    if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_addr,
595
0
      buf1, (socklen_t)sizeof(buf1)) == 0) {
596
0
      (void)strlcpy(buf1, "(inet_ntop error)", sizeof(buf1));
597
0
    }
598
0
    buf1[sizeof(buf1)-1]=0;
599
0
#ifdef HAVE_STRUCT_IN_PKTINFO_IPI_SPEC_DST
600
0
    if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_spec_dst,
601
0
      buf2, (socklen_t)sizeof(buf2)) == 0) {
602
0
      (void)strlcpy(buf2, "(inet_ntop error)", sizeof(buf2));
603
0
    }
604
0
    buf2[sizeof(buf2)-1]=0;
605
#else
606
    buf2[0]=0;
607
#endif
608
0
    log_info("%s: %d %s %s", str, r->pktinfo.v4info.ipi_ifindex,
609
0
      buf1, buf2);
610
#elif defined(IP_RECVDSTADDR)
611
    char buf1[1024];
612
    if(inet_ntop(AF_INET, &r->pktinfo.v4addr,
613
      buf1, (socklen_t)sizeof(buf1)) == 0) {
614
      (void)strlcpy(buf1, "(inet_ntop error)", sizeof(buf1));
615
    }
616
    buf1[sizeof(buf1)-1]=0;
617
    log_info("%s: %s", str, buf1);
618
#endif /* IP_PKTINFO or PI_RECVDSTDADDR */
619
0
  }
620
0
}
621
#endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG||HAVE_SENDMSG */
622
623
/** send a UDP reply over specified interface*/
624
static int
625
comm_point_send_udp_msg_if(struct comm_point *c, sldns_buffer* packet,
626
  struct sockaddr* addr, socklen_t addrlen, struct comm_reply* r)
627
0
{
628
0
#if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_SENDMSG)
629
0
  ssize_t sent;
630
0
  struct msghdr msg;
631
0
  struct iovec iov[1];
632
0
  union {
633
0
    struct cmsghdr hdr;
634
0
    char buf[256];
635
0
  } control;
636
0
#ifndef S_SPLINT_S
637
0
  struct cmsghdr *cmsg;
638
0
#endif /* S_SPLINT_S */
639
640
0
  log_assert(c->fd != -1);
641
#ifdef UNBOUND_DEBUG
642
  if(sldns_buffer_remaining(packet) == 0)
643
    log_err("error: send empty UDP packet");
644
#endif
645
0
  log_assert(addr && addrlen > 0);
646
647
0
  msg.msg_name = addr;
648
0
  msg.msg_namelen = addrlen;
649
0
  iov[0].iov_base = sldns_buffer_begin(packet);
650
0
  iov[0].iov_len = sldns_buffer_remaining(packet);
651
0
  msg.msg_iov = iov;
652
0
  msg.msg_iovlen = 1;
653
0
  msg.msg_control = control.buf;
654
0
#ifndef S_SPLINT_S
655
0
  msg.msg_controllen = sizeof(control.buf);
656
0
#endif /* S_SPLINT_S */
657
0
  msg.msg_flags = 0;
658
659
0
#ifndef S_SPLINT_S
660
0
  cmsg = CMSG_FIRSTHDR(&msg);
661
0
  if(r->srctype == 4) {
662
0
#ifdef IP_PKTINFO
663
0
    void* cmsg_data;
664
0
    msg.msg_controllen = CMSG_SPACE(sizeof(struct in_pktinfo));
665
0
    log_assert(msg.msg_controllen <= sizeof(control.buf));
666
0
    cmsg->cmsg_level = IPPROTO_IP;
667
0
    cmsg->cmsg_type = IP_PKTINFO;
668
0
    memmove(CMSG_DATA(cmsg), &r->pktinfo.v4info,
669
0
      sizeof(struct in_pktinfo));
670
    /* unset the ifindex to not bypass the routing tables */
671
0
    cmsg_data = CMSG_DATA(cmsg);
672
0
    ((struct in_pktinfo *) cmsg_data)->ipi_ifindex = 0;
673
0
    cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo));
674
    /* zero the padding bytes inserted by the CMSG_LEN */
675
0
    if(sizeof(struct in_pktinfo) < cmsg->cmsg_len)
676
0
      memset(((uint8_t*)(CMSG_DATA(cmsg))) +
677
0
        sizeof(struct in_pktinfo), 0, cmsg->cmsg_len
678
0
        - sizeof(struct in_pktinfo));
679
#elif defined(IP_SENDSRCADDR)
680
    msg.msg_controllen = CMSG_SPACE(sizeof(struct in_addr));
681
    log_assert(msg.msg_controllen <= sizeof(control.buf));
682
    cmsg->cmsg_level = IPPROTO_IP;
683
    cmsg->cmsg_type = IP_SENDSRCADDR;
684
    memmove(CMSG_DATA(cmsg), &r->pktinfo.v4addr,
685
      sizeof(struct in_addr));
686
    cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_addr));
687
    /* zero the padding bytes inserted by the CMSG_LEN */
688
    if(sizeof(struct in_addr) < cmsg->cmsg_len)
689
      memset(((uint8_t*)(CMSG_DATA(cmsg))) +
690
        sizeof(struct in_addr), 0, cmsg->cmsg_len
691
        - sizeof(struct in_addr));
692
#else
693
    verbose(VERB_ALGO, "no IP_PKTINFO or IP_SENDSRCADDR");
694
    msg.msg_control = NULL;
695
#endif /* IP_PKTINFO or IP_SENDSRCADDR */
696
0
  } else if(r->srctype == 6) {
697
0
    void* cmsg_data;
698
0
    msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
699
0
    log_assert(msg.msg_controllen <= sizeof(control.buf));
700
0
    cmsg->cmsg_level = IPPROTO_IPV6;
701
0
    cmsg->cmsg_type = IPV6_PKTINFO;
702
0
    memmove(CMSG_DATA(cmsg), &r->pktinfo.v6info,
703
0
      sizeof(struct in6_pktinfo));
704
    /* unset the ifindex to not bypass the routing tables */
705
0
    cmsg_data = CMSG_DATA(cmsg);
706
0
    ((struct in6_pktinfo *) cmsg_data)->ipi6_ifindex = 0;
707
0
    cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
708
    /* zero the padding bytes inserted by the CMSG_LEN */
709
0
    if(sizeof(struct in6_pktinfo) < cmsg->cmsg_len)
710
0
      memset(((uint8_t*)(CMSG_DATA(cmsg))) +
711
0
        sizeof(struct in6_pktinfo), 0, cmsg->cmsg_len
712
0
        - sizeof(struct in6_pktinfo));
713
0
  } else {
714
    /* try to pass all 0 to use default route */
715
0
    msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
716
0
    log_assert(msg.msg_controllen <= sizeof(control.buf));
717
0
    cmsg->cmsg_level = IPPROTO_IPV6;
718
0
    cmsg->cmsg_type = IPV6_PKTINFO;
719
0
    memset(CMSG_DATA(cmsg), 0, sizeof(struct in6_pktinfo));
720
0
    cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
721
    /* zero the padding bytes inserted by the CMSG_LEN */
722
0
    if(sizeof(struct in6_pktinfo) < cmsg->cmsg_len)
723
0
      memset(((uint8_t*)(CMSG_DATA(cmsg))) +
724
0
        sizeof(struct in6_pktinfo), 0, cmsg->cmsg_len
725
0
        - sizeof(struct in6_pktinfo));
726
0
  }
727
0
#endif /* S_SPLINT_S */
728
0
  if(verbosity >= VERB_ALGO && r->srctype != 0)
729
0
    p_ancil("send_udp over interface", r);
730
0
  sent = sendmsg(c->fd, &msg, 0);
731
0
  if(sent == -1) {
732
    /* try again and block, waiting for IO to complete,
733
     * we want to send the answer, and we will wait for
734
     * the ethernet interface buffer to have space. */
735
0
#ifndef USE_WINSOCK
736
0
    if(errno == EAGAIN || errno == EINTR ||
737
0
#  ifdef EWOULDBLOCK
738
0
      errno == EWOULDBLOCK ||
739
0
#  endif
740
0
      errno == ENOBUFS) {
741
#else
742
    if(WSAGetLastError() == WSAEINPROGRESS ||
743
      WSAGetLastError() == WSAEINTR ||
744
      WSAGetLastError() == WSAENOBUFS ||
745
      WSAGetLastError() == WSAEWOULDBLOCK) {
746
#endif
747
0
      int retries = 0;
748
0
      while(sent == -1 && retries < SEND_BLOCKED_MAX_RETRY && (
749
0
#ifndef USE_WINSOCK
750
0
        errno == EAGAIN || errno == EINTR ||
751
0
#  ifdef EWOULDBLOCK
752
0
        errno == EWOULDBLOCK ||
753
0
#  endif
754
0
        errno == ENOBUFS
755
#else
756
        WSAGetLastError() == WSAEINPROGRESS ||
757
        WSAGetLastError() == WSAEINTR ||
758
        WSAGetLastError() == WSAENOBUFS ||
759
        WSAGetLastError() == WSAEWOULDBLOCK
760
#endif
761
0
      )) {
762
0
#if defined(HAVE_POLL) || defined(USE_WINSOCK)
763
0
        int send_nobufs = (
764
0
#ifndef USE_WINSOCK
765
0
          errno == ENOBUFS
766
#else
767
          WSAGetLastError() == WSAENOBUFS
768
#endif
769
0
        );
770
0
        struct pollfd p;
771
0
        int pret;
772
0
        memset(&p, 0, sizeof(p));
773
0
        p.fd = c->fd;
774
0
        p.events = POLLOUT
775
0
#ifndef USE_WINSOCK
776
0
          | POLLERR | POLLHUP
777
0
#endif
778
0
          ;
779
0
#  ifndef USE_WINSOCK
780
0
        pret = poll(&p, 1, SEND_BLOCKED_WAIT_TIMEOUT);
781
#  else
782
        pret = WSAPoll(&p, 1,
783
          SEND_BLOCKED_WAIT_TIMEOUT);
784
#  endif
785
0
        if(pret == 0) {
786
          /* timer expired */
787
0
          struct comm_base* b = c->ev->base;
788
0
          if(b->eb->last_writewait_log+SLOW_LOG_TIME <=
789
0
            b->eb->secs) {
790
0
            b->eb->last_writewait_log = b->eb->secs;
791
0
            verbose(VERB_OPS, "send udp blocked "
792
0
              "for long, dropping packet.");
793
0
          }
794
0
          return 0;
795
0
        } else if(pret < 0 &&
796
0
#ifndef USE_WINSOCK
797
0
          errno != EAGAIN && errno != EINTR &&
798
0
#  ifdef EWOULDBLOCK
799
0
          errno != EWOULDBLOCK &&
800
0
#  endif
801
0
          errno != ENOMEM && errno != ENOBUFS
802
#else
803
          WSAGetLastError() != WSAEINPROGRESS &&
804
          WSAGetLastError() != WSAEINTR &&
805
          WSAGetLastError() != WSAENOBUFS &&
806
          WSAGetLastError() != WSAEWOULDBLOCK
807
#endif
808
0
          ) {
809
0
          log_err("poll udp out failed: %s",
810
0
            sock_strerror(errno));
811
0
          return 0;
812
0
        } else if((pret < 0 &&
813
0
#ifndef USE_WINSOCK
814
0
          ( errno == ENOBUFS  /* Maybe some systems */
815
0
          || errno == ENOMEM  /* Linux */
816
0
          || errno == EAGAIN)  /* Macos, solaris, openbsd */
817
#else
818
          WSAGetLastError() == WSAENOBUFS
819
#endif
820
0
          ) || (send_nobufs && retries > 0)) {
821
          /* ENOBUFS/ENOMEM/EAGAIN, and poll
822
           * returned without
823
           * a timeout. Or the retried send call
824
           * returned ENOBUFS/ENOMEM/EAGAIN.
825
           * It is good to wait a bit for the
826
           * error to clear. */
827
          /* The timeout is 20*(2^(retries+1)),
828
           * it increases exponentially, starting
829
           * at 40 msec. After 5 tries, 1240 msec
830
           * have passed in total, when poll
831
           * returned the error, and 1200 msec
832
           * when send returned the errors. */
833
0
#ifndef USE_WINSOCK
834
0
          pret = poll(NULL, 0, (SEND_BLOCKED_WAIT_TIMEOUT/10)<<(retries+1));
835
#else
836
          Sleep((SEND_BLOCKED_WAIT_TIMEOUT/10)<<(retries+1));
837
          pret = 0;
838
#endif
839
0
          if(pret < 0
840
0
#ifndef USE_WINSOCK
841
0
            && errno != EAGAIN && errno != EINTR &&
842
0
#  ifdef EWOULDBLOCK
843
0
            errno != EWOULDBLOCK &&
844
0
#  endif
845
0
            errno != ENOMEM && errno != ENOBUFS
846
#else  /* USE_WINSOCK */
847
            /* Sleep does not error */
848
#endif
849
0
          ) {
850
0
            log_err("poll udp out timer failed: %s",
851
0
              sock_strerror(errno));
852
0
          }
853
0
        }
854
0
#endif /* defined(HAVE_POLL) || defined(USE_WINSOCK) */
855
0
        retries++;
856
0
        sent = sendmsg(c->fd, &msg, 0);
857
0
      }
858
0
    }
859
0
  }
860
0
  if(sent == -1) {
861
0
    if(!udp_send_errno_needs_log(addr, addrlen))
862
0
      return 0;
863
0
    verbose(VERB_OPS, "sendmsg failed: %s", strerror(errno));
864
0
    log_addr(VERB_OPS, "remote address is",
865
0
      (struct sockaddr_storage*)addr, addrlen);
866
#ifdef __NetBSD__
867
    /* netbsd 7 has IP_PKTINFO for recv but not send */
868
    if(errno == EINVAL && r->srctype == 4)
869
      log_err("sendmsg: No support for sendmsg(IP_PKTINFO). "
870
        "Please disable interface-automatic");
871
#endif
872
0
    return 0;
873
0
  } else if((size_t)sent != sldns_buffer_remaining(packet)) {
874
0
    log_err("sent %d in place of %d bytes",
875
0
      (int)sent, (int)sldns_buffer_remaining(packet));
876
0
    return 0;
877
0
  }
878
0
  return 1;
879
#else
880
  (void)c;
881
  (void)packet;
882
  (void)addr;
883
  (void)addrlen;
884
  (void)r;
885
  log_err("sendmsg: IPV6_PKTINFO not supported");
886
  return 0;
887
#endif /* AF_INET6 && IPV6_PKTINFO && HAVE_SENDMSG */
888
0
}
889
890
/** return true is UDP receive error needs to be logged */
891
static int udp_recv_needs_log(int err)
892
0
{
893
0
  switch(err) {
894
0
  case EACCES: /* some hosts send ICMP 'Permission Denied' */
895
0
#ifndef USE_WINSOCK
896
0
  case ECONNREFUSED:
897
0
#  ifdef ENETUNREACH
898
0
  case ENETUNREACH:
899
0
#  endif
900
0
#  ifdef EHOSTDOWN
901
0
  case EHOSTDOWN:
902
0
#  endif
903
0
#  ifdef EHOSTUNREACH
904
0
  case EHOSTUNREACH:
905
0
#  endif
906
0
#  ifdef ENETDOWN
907
0
  case ENETDOWN:
908
0
#  endif
909
#else /* USE_WINSOCK */
910
  case WSAECONNREFUSED:
911
  case WSAENETUNREACH:
912
  case WSAEHOSTDOWN:
913
  case WSAEHOSTUNREACH:
914
  case WSAENETDOWN:
915
#endif
916
0
    if(verbosity >= VERB_ALGO)
917
0
      return 1;
918
0
    return 0;
919
0
  default:
920
0
    break;
921
0
  }
922
0
  return 1;
923
0
}
924
925
/** Parses the PROXYv2 header from buf and updates the comm_reply struct.
926
 *  Returns 1 on success, 0 on failure. */
927
static int consume_pp2_header(struct sldns_buffer* buf, struct comm_reply* rep,
928
0
  int stream) {
929
0
  size_t size;
930
0
  struct pp2_header *header;
931
0
  int err = pp2_read_header(sldns_buffer_begin(buf),
932
0
    sldns_buffer_remaining(buf));
933
0
  if(err) return 0;
934
0
  header = (struct pp2_header*)sldns_buffer_begin(buf);
935
0
  size = PP2_HEADER_SIZE + ntohs(header->len);
936
0
  if((header->ver_cmd & 0xF) == PP2_CMD_LOCAL) {
937
    /* A connection from the proxy itself.
938
     * No need to do anything with addresses. */
939
0
    goto done;
940
0
  }
941
0
  if(header->fam_prot == PP2_UNSPEC_UNSPEC) {
942
    /* Unspecified family and protocol. This could be used for
943
     * health checks by proxies.
944
     * No need to do anything with addresses. */
945
0
    goto done;
946
0
  }
947
  /* Read the proxied address */
948
0
  switch(header->fam_prot) {
949
0
    case PP2_INET_STREAM:
950
0
    case PP2_INET_DGRAM:
951
0
      {
952
0
      struct sockaddr_in* addr =
953
0
        (struct sockaddr_in*)&rep->client_addr;
954
0
      addr->sin_family = AF_INET;
955
0
      addr->sin_addr.s_addr = header->addr.addr4.src_addr;
956
0
      addr->sin_port = header->addr.addr4.src_port;
957
0
      rep->client_addrlen = (socklen_t)sizeof(struct sockaddr_in);
958
0
      }
959
      /* Ignore the destination address; it should be us. */
960
0
      break;
961
0
    case PP2_INET6_STREAM:
962
0
    case PP2_INET6_DGRAM:
963
0
      {
964
0
      struct sockaddr_in6* addr =
965
0
        (struct sockaddr_in6*)&rep->client_addr;
966
0
      memset(addr, 0, sizeof(*addr));
967
0
      addr->sin6_family = AF_INET6;
968
0
      memcpy(&addr->sin6_addr,
969
0
        header->addr.addr6.src_addr, 16);
970
0
      addr->sin6_port = header->addr.addr6.src_port;
971
0
      rep->client_addrlen = (socklen_t)sizeof(struct sockaddr_in6);
972
0
      }
973
      /* Ignore the destination address; it should be us. */
974
0
      break;
975
0
    default:
976
0
      log_err("proxy_protocol: unsupported family and "
977
0
        "protocol 0x%x", (int)header->fam_prot);
978
0
      return 0;
979
0
  }
980
0
  rep->is_proxied = 1;
981
0
done:
982
0
  if(!stream) {
983
    /* We are reading a whole packet;
984
     * Move the rest of the data to overwrite the PROXYv2 header */
985
    /* XXX can we do better to avoid memmove? */
986
0
    memmove(header, ((char*)header)+size,
987
0
      sldns_buffer_limit(buf)-size);
988
0
    sldns_buffer_set_limit(buf, sldns_buffer_limit(buf)-size);
989
0
  }
990
0
  return 1;
991
0
}
992
993
#if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_RECVMSG)
994
void
995
comm_point_udp_ancil_callback(int fd, short event, void* arg)
996
0
{
997
0
  struct comm_reply rep;
998
0
  struct msghdr msg;
999
0
  struct iovec iov[1];
1000
0
  ssize_t rcv;
1001
0
  union {
1002
0
    struct cmsghdr hdr;
1003
0
    char buf[256];
1004
0
  } ancil;
1005
0
  int i;
1006
0
#ifndef S_SPLINT_S
1007
0
  struct cmsghdr* cmsg;
1008
0
#endif /* S_SPLINT_S */
1009
0
#ifdef HAVE_LINUX_NET_TSTAMP_H
1010
0
  struct timespec *ts;
1011
0
#endif /* HAVE_LINUX_NET_TSTAMP_H */
1012
1013
0
  rep.c = (struct comm_point*)arg;
1014
0
  log_assert(rep.c->type == comm_udp);
1015
1016
0
  if(!(event&UB_EV_READ))
1017
0
    return;
1018
0
  log_assert(rep.c && rep.c->buffer && rep.c->fd == fd);
1019
0
  ub_comm_base_now(rep.c->ev->base);
1020
0
  for(i=0; i<NUM_UDP_PER_SELECT; i++) {
1021
0
    sldns_buffer_clear(rep.c->buffer);
1022
0
    timeval_clear(&rep.c->recv_tv);
1023
0
    rep.remote_addrlen = (socklen_t)sizeof(rep.remote_addr);
1024
0
    log_assert(fd != -1);
1025
0
    log_assert(sldns_buffer_remaining(rep.c->buffer) > 0);
1026
0
    msg.msg_name = &rep.remote_addr;
1027
0
    msg.msg_namelen = (socklen_t)sizeof(rep.remote_addr);
1028
0
    iov[0].iov_base = sldns_buffer_begin(rep.c->buffer);
1029
0
    iov[0].iov_len = sldns_buffer_remaining(rep.c->buffer);
1030
0
    msg.msg_iov = iov;
1031
0
    msg.msg_iovlen = 1;
1032
0
    msg.msg_control = ancil.buf;
1033
0
#ifndef S_SPLINT_S
1034
0
    msg.msg_controllen = sizeof(ancil.buf);
1035
0
#endif /* S_SPLINT_S */
1036
0
    msg.msg_flags = 0;
1037
0
    rcv = recvmsg(fd, &msg, MSG_DONTWAIT);
1038
0
    if(rcv == -1) {
1039
0
      if(errno != EAGAIN && errno != EINTR
1040
0
        && udp_recv_needs_log(errno)) {
1041
0
        log_err("recvmsg failed: %s", strerror(errno));
1042
0
      }
1043
0
      return;
1044
0
    }
1045
0
    rep.remote_addrlen = msg.msg_namelen;
1046
0
    sldns_buffer_skip(rep.c->buffer, rcv);
1047
0
    sldns_buffer_flip(rep.c->buffer);
1048
0
    rep.srctype = 0;
1049
0
    rep.is_proxied = 0;
1050
0
#ifndef S_SPLINT_S
1051
0
    for(cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL;
1052
0
      cmsg = CMSG_NXTHDR(&msg, cmsg)) {
1053
0
      if( cmsg->cmsg_level == IPPROTO_IPV6 &&
1054
0
        cmsg->cmsg_type == IPV6_PKTINFO) {
1055
0
        rep.srctype = 6;
1056
0
        memmove(&rep.pktinfo.v6info, CMSG_DATA(cmsg),
1057
0
          sizeof(struct in6_pktinfo));
1058
0
        break;
1059
0
#ifdef IP_PKTINFO
1060
0
      } else if( cmsg->cmsg_level == IPPROTO_IP &&
1061
0
        cmsg->cmsg_type == IP_PKTINFO) {
1062
0
        rep.srctype = 4;
1063
0
        memmove(&rep.pktinfo.v4info, CMSG_DATA(cmsg),
1064
0
          sizeof(struct in_pktinfo));
1065
0
        break;
1066
#elif defined(IP_RECVDSTADDR)
1067
      } else if( cmsg->cmsg_level == IPPROTO_IP &&
1068
        cmsg->cmsg_type == IP_RECVDSTADDR) {
1069
        rep.srctype = 4;
1070
        memmove(&rep.pktinfo.v4addr, CMSG_DATA(cmsg),
1071
          sizeof(struct in_addr));
1072
        break;
1073
#endif /* IP_PKTINFO or IP_RECVDSTADDR */
1074
0
#ifdef HAVE_LINUX_NET_TSTAMP_H
1075
0
      } else if( cmsg->cmsg_level == SOL_SOCKET &&
1076
0
        cmsg->cmsg_type == SO_TIMESTAMPNS) {
1077
0
        ts = (struct timespec *)CMSG_DATA(cmsg);
1078
0
        TIMESPEC_TO_TIMEVAL(&rep.c->recv_tv, ts);
1079
0
      } else if( cmsg->cmsg_level == SOL_SOCKET &&
1080
0
        cmsg->cmsg_type == SO_TIMESTAMPING) {
1081
0
        ts = (struct timespec *)CMSG_DATA(cmsg);
1082
0
        TIMESPEC_TO_TIMEVAL(&rep.c->recv_tv, ts);
1083
0
      } else if( cmsg->cmsg_level == SOL_SOCKET &&
1084
0
        cmsg->cmsg_type == SO_TIMESTAMP) {
1085
0
        memmove(&rep.c->recv_tv, CMSG_DATA(cmsg), sizeof(struct timeval));
1086
#elif defined(SO_TIMESTAMP) && defined(SCM_TIMESTAMP)
1087
      } else if( cmsg->cmsg_level == SOL_SOCKET &&
1088
        cmsg->cmsg_type == SCM_TIMESTAMP) {
1089
        /* FreeBSD and also Linux. */
1090
        memmove(&rep.c->recv_tv, CMSG_DATA(cmsg), sizeof(struct timeval));
1091
#endif /* HAVE_LINUX_NET_TSTAMP_H */
1092
0
      }
1093
0
    }
1094
1095
0
    if(verbosity >= VERB_ALGO && rep.srctype != 0)
1096
0
      p_ancil("receive_udp on interface", &rep);
1097
0
#endif /* S_SPLINT_S */
1098
1099
0
    if(rep.c->pp2_enabled && !consume_pp2_header(rep.c->buffer,
1100
0
      &rep, 0)) {
1101
0
      log_err("proxy_protocol: could not consume PROXYv2 header");
1102
0
      return;
1103
0
    }
1104
0
    if(!rep.is_proxied) {
1105
0
      rep.client_addrlen = rep.remote_addrlen;
1106
0
      memmove(&rep.client_addr, &rep.remote_addr,
1107
0
        rep.remote_addrlen);
1108
0
    }
1109
1110
0
    fptr_ok(fptr_whitelist_comm_point(rep.c->callback));
1111
0
    if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) {
1112
      /* send back immediate reply */
1113
0
      struct sldns_buffer *buffer;
1114
#ifdef USE_DNSCRYPT
1115
      buffer = rep.c->dnscrypt_buffer;
1116
#else
1117
0
      buffer = rep.c->buffer;
1118
0
#endif
1119
0
      (void)comm_point_send_udp_msg_if(rep.c, buffer,
1120
0
        (struct sockaddr*)&rep.remote_addr,
1121
0
        rep.remote_addrlen, &rep);
1122
0
    }
1123
0
    if(!rep.c || rep.c->fd == -1) /* commpoint closed */
1124
0
      break;
1125
0
  }
1126
0
}
1127
#endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG */
1128
1129
void
1130
comm_point_udp_callback(int fd, short event, void* arg)
1131
0
{
1132
0
  struct comm_reply rep;
1133
0
  ssize_t rcv;
1134
0
  int i;
1135
0
  struct sldns_buffer *buffer;
1136
1137
0
  rep.c = (struct comm_point*)arg;
1138
0
  log_assert(rep.c->type == comm_udp);
1139
1140
0
  if(!(event&UB_EV_READ))
1141
0
    return;
1142
0
  log_assert(rep.c && rep.c->buffer && rep.c->fd == fd);
1143
0
  ub_comm_base_now(rep.c->ev->base);
1144
0
  for(i=0; i<NUM_UDP_PER_SELECT; i++) {
1145
0
    sldns_buffer_clear(rep.c->buffer);
1146
0
    rep.remote_addrlen = (socklen_t)sizeof(rep.remote_addr);
1147
0
    log_assert(fd != -1);
1148
0
    log_assert(sldns_buffer_remaining(rep.c->buffer) > 0);
1149
0
    rcv = recvfrom(fd, (void*)sldns_buffer_begin(rep.c->buffer),
1150
0
      sldns_buffer_remaining(rep.c->buffer), MSG_DONTWAIT,
1151
0
      (struct sockaddr*)&rep.remote_addr, &rep.remote_addrlen);
1152
0
    if(rcv == -1) {
1153
0
#ifndef USE_WINSOCK
1154
0
      if(errno != EAGAIN && errno != EINTR
1155
0
        && udp_recv_needs_log(errno))
1156
0
        log_err("recvfrom %d failed: %s",
1157
0
          fd, strerror(errno));
1158
#else
1159
      if(WSAGetLastError() != WSAEINPROGRESS &&
1160
        WSAGetLastError() != WSAECONNRESET &&
1161
        WSAGetLastError()!= WSAEWOULDBLOCK &&
1162
        udp_recv_needs_log(WSAGetLastError()))
1163
        log_err("recvfrom failed: %s",
1164
          wsa_strerror(WSAGetLastError()));
1165
#endif
1166
0
      return;
1167
0
    }
1168
0
    sldns_buffer_skip(rep.c->buffer, rcv);
1169
0
    sldns_buffer_flip(rep.c->buffer);
1170
0
    rep.srctype = 0;
1171
0
    rep.is_proxied = 0;
1172
1173
0
    if(rep.c->pp2_enabled && !consume_pp2_header(rep.c->buffer,
1174
0
      &rep, 0)) {
1175
0
      log_err("proxy_protocol: could not consume PROXYv2 header");
1176
0
      return;
1177
0
    }
1178
0
    if(!rep.is_proxied) {
1179
0
      rep.client_addrlen = rep.remote_addrlen;
1180
0
      memmove(&rep.client_addr, &rep.remote_addr,
1181
0
        rep.remote_addrlen);
1182
0
    }
1183
1184
0
    fptr_ok(fptr_whitelist_comm_point(rep.c->callback));
1185
0
    if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) {
1186
      /* send back immediate reply */
1187
#ifdef USE_DNSCRYPT
1188
      buffer = rep.c->dnscrypt_buffer;
1189
#else
1190
0
      buffer = rep.c->buffer;
1191
0
#endif
1192
0
      (void)comm_point_send_udp_msg(rep.c, buffer,
1193
0
        (struct sockaddr*)&rep.remote_addr,
1194
0
        rep.remote_addrlen, 0);
1195
0
    }
1196
0
    if(!rep.c || rep.c->fd != fd) /* commpoint closed to -1 or reused for
1197
    another UDP port. Note rep.c cannot be reused with TCP fd. */
1198
0
      break;
1199
0
  }
1200
0
}
1201
1202
#ifdef HAVE_NGTCP2
1203
void
1204
doq_pkt_addr_init(struct doq_pkt_addr* paddr)
1205
{
1206
  paddr->addrlen = (socklen_t)sizeof(paddr->addr);
1207
  paddr->localaddrlen = (socklen_t)sizeof(paddr->localaddr);
1208
  paddr->ifindex = 0;
1209
}
1210
1211
/** set the ecn on the transmission */
1212
static void
1213
doq_set_ecn(int fd, int family, uint32_t ecn)
1214
{
1215
  unsigned int val = ecn;
1216
  if(family == AF_INET6) {
1217
    if(setsockopt(fd, IPPROTO_IPV6, IPV6_TCLASS, &val,
1218
      (socklen_t)sizeof(val)) == -1) {
1219
      log_err("setsockopt(.. IPV6_TCLASS ..): %s",
1220
        strerror(errno));
1221
    }
1222
    return;
1223
  }
1224
  if(setsockopt(fd, IPPROTO_IP, IP_TOS, &val,
1225
    (socklen_t)sizeof(val)) == -1) {
1226
    log_err("setsockopt(.. IP_TOS ..): %s",
1227
      strerror(errno));
1228
  }
1229
}
1230
1231
/** set the local address in the control ancillary data */
1232
static void
1233
doq_set_localaddr_cmsg(struct msghdr* msg, size_t control_size,
1234
  struct doq_addr_storage* localaddr, socklen_t localaddrlen,
1235
  int ifindex)
1236
{
1237
#ifndef S_SPLINT_S
1238
  struct cmsghdr* cmsg;
1239
#endif /* S_SPLINT_S */
1240
#ifndef S_SPLINT_S
1241
  cmsg = CMSG_FIRSTHDR(msg);
1242
  if(localaddr->sockaddr.in.sin_family == AF_INET) {
1243
#ifdef IP_PKTINFO
1244
    struct sockaddr_in* sa = (struct sockaddr_in*)localaddr;
1245
    struct in_pktinfo v4info;
1246
    log_assert(localaddrlen >= sizeof(struct sockaddr_in));
1247
    msg->msg_controllen = CMSG_SPACE(sizeof(struct in_pktinfo));
1248
    memset(msg->msg_control, 0, msg->msg_controllen);
1249
    log_assert(msg->msg_controllen <= control_size);
1250
    cmsg->cmsg_level = IPPROTO_IP;
1251
    cmsg->cmsg_type = IP_PKTINFO;
1252
    memset(&v4info, 0, sizeof(v4info));
1253
#  ifdef HAVE_STRUCT_IN_PKTINFO_IPI_SPEC_DST
1254
    memmove(&v4info.ipi_spec_dst, &sa->sin_addr,
1255
      sizeof(struct in_addr));
1256
#  else
1257
    memmove(&v4info.ipi_addr, &sa->sin_addr,
1258
      sizeof(struct in_addr));
1259
#  endif
1260
    v4info.ipi_ifindex = ifindex;
1261
    memmove(CMSG_DATA(cmsg), &v4info, sizeof(struct in_pktinfo));
1262
    cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo));
1263
#elif defined(IP_SENDSRCADDR)
1264
    struct sockaddr_in* sa= (struct sockaddr_in*)localaddr;
1265
    log_assert(localaddrlen >= sizeof(struct sockaddr_in));
1266
    msg->msg_controllen = CMSG_SPACE(sizeof(struct in_addr));
1267
    memset(msg->msg_control, 0, msg->msg_controllen);
1268
    log_assert(msg->msg_controllen <= control_size);
1269
    cmsg->cmsg_level = IPPROTO_IP;
1270
    cmsg->cmsg_type = IP_SENDSRCADDR;
1271
    memmove(CMSG_DATA(cmsg),  &sa->sin_addr,
1272
      sizeof(struct in_addr));
1273
    cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_addr));
1274
#endif
1275
  } else {
1276
    struct sockaddr_in6* sa6 = (struct sockaddr_in6*)localaddr;
1277
    struct in6_pktinfo v6info;
1278
    log_assert(localaddrlen >= sizeof(struct sockaddr_in6));
1279
    msg->msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
1280
    memset(msg->msg_control, 0, msg->msg_controllen);
1281
    log_assert(msg->msg_controllen <= control_size);
1282
    cmsg->cmsg_level = IPPROTO_IPV6;
1283
    cmsg->cmsg_type = IPV6_PKTINFO;
1284
    memset(&v6info, 0, sizeof(v6info));
1285
    memmove(&v6info.ipi6_addr, &sa6->sin6_addr,
1286
      sizeof(struct in6_addr));
1287
    v6info.ipi6_ifindex = ifindex;
1288
    memmove(CMSG_DATA(cmsg), &v6info, sizeof(struct in6_pktinfo));
1289
    cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
1290
  }
1291
#endif /* S_SPLINT_S */
1292
  /* Ignore unused variables, if no assertions are compiled. */
1293
  (void)localaddrlen;
1294
  (void)control_size;
1295
}
1296
1297
/** write address and port into strings */
1298
static int
1299
doq_print_addr_port(struct doq_addr_storage* addr, socklen_t addrlen,
1300
  char* host, size_t hostlen, char* port, size_t portlen)
1301
{
1302
  if(addr->sockaddr.in.sin_family == AF_INET) {
1303
    struct sockaddr_in* sa = (struct sockaddr_in*)addr;
1304
    log_assert(addrlen >= sizeof(*sa));
1305
    if(inet_ntop(sa->sin_family, &sa->sin_addr, host,
1306
      (socklen_t)hostlen) == 0) {
1307
      log_hex("inet_ntop error: address", &sa->sin_addr,
1308
        sizeof(sa->sin_addr));
1309
      return 0;
1310
    }
1311
    snprintf(port, portlen, "%u", (unsigned)ntohs(sa->sin_port));
1312
  } else if(addr->sockaddr.in.sin_family == AF_INET6) {
1313
    struct sockaddr_in6* sa6 = (struct sockaddr_in6*)addr;
1314
    log_assert(addrlen >= sizeof(*sa6));
1315
    if(inet_ntop(sa6->sin6_family, &sa6->sin6_addr, host,
1316
      (socklen_t)hostlen) == 0) {
1317
      log_hex("inet_ntop error: address", &sa6->sin6_addr,
1318
        sizeof(sa6->sin6_addr));
1319
      return 0;
1320
    }
1321
    snprintf(port, portlen, "%u", (unsigned)ntohs(sa6->sin6_port));
1322
  }
1323
  return 1;
1324
}
1325
1326
/** doq store the blocked packet when write has blocked */
1327
static void
1328
doq_store_blocked_pkt(struct comm_point* c, struct doq_pkt_addr* paddr,
1329
  uint32_t ecn)
1330
{
1331
  if(c->doq_socket->have_blocked_pkt)
1332
    return; /* should not happen that we write when there is
1333
    already a blocked write, but if so, drop it. */
1334
  if(sldns_buffer_limit(c->doq_socket->pkt_buf) >
1335
    sldns_buffer_capacity(c->doq_socket->blocked_pkt))
1336
    return; /* impossibly large, drop packet. impossible because
1337
    pkt_buf and blocked_pkt are the same size. */
1338
  c->doq_socket->have_blocked_pkt = 1;
1339
  c->doq_socket->blocked_pkt_pi.ecn = ecn;
1340
  memcpy(c->doq_socket->blocked_paddr, paddr,
1341
    sizeof(*c->doq_socket->blocked_paddr));
1342
  sldns_buffer_clear(c->doq_socket->blocked_pkt);
1343
  sldns_buffer_write(c->doq_socket->blocked_pkt,
1344
    sldns_buffer_begin(c->doq_socket->pkt_buf),
1345
    sldns_buffer_limit(c->doq_socket->pkt_buf));
1346
  sldns_buffer_flip(c->doq_socket->blocked_pkt);
1347
}
1348
1349
void
1350
doq_send_pkt(struct comm_point* c, struct doq_pkt_addr* paddr, uint32_t ecn)
1351
{
1352
  struct msghdr msg;
1353
  struct iovec iov[1];
1354
  union {
1355
    struct cmsghdr hdr;
1356
    char buf[256];
1357
  } control;
1358
  ssize_t ret;
1359
  iov[0].iov_base = sldns_buffer_begin(c->doq_socket->pkt_buf);
1360
  iov[0].iov_len = sldns_buffer_limit(c->doq_socket->pkt_buf);
1361
  memset(&msg, 0, sizeof(msg));
1362
  msg.msg_name = (void*)&paddr->addr;
1363
  msg.msg_namelen = paddr->addrlen;
1364
  msg.msg_iov = iov;
1365
  msg.msg_iovlen = 1;
1366
  msg.msg_control = control.buf;
1367
#ifndef S_SPLINT_S
1368
  msg.msg_controllen = sizeof(control.buf);
1369
#endif /* S_SPLINT_S */
1370
  msg.msg_flags = 0;
1371
1372
  doq_set_localaddr_cmsg(&msg, sizeof(control.buf), &paddr->localaddr,
1373
    paddr->localaddrlen, paddr->ifindex);
1374
  doq_set_ecn(c->fd, paddr->addr.sockaddr.in.sin_family, ecn);
1375
1376
  for(;;) {
1377
    ret = sendmsg(c->fd, &msg, MSG_DONTWAIT);
1378
    if(ret == -1 && errno == EINTR)
1379
      continue;
1380
    break;
1381
  }
1382
  if(ret == -1) {
1383
#ifndef USE_WINSOCK
1384
    if(errno == EAGAIN ||
1385
#  ifdef EWOULDBLOCK
1386
      errno == EWOULDBLOCK ||
1387
#  endif
1388
      errno == ENOBUFS)
1389
#else
1390
    if(WSAGetLastError() == WSAEINPROGRESS ||
1391
      WSAGetLastError() == WSAENOBUFS ||
1392
      WSAGetLastError() == WSAEWOULDBLOCK)
1393
#endif
1394
    {
1395
      /* udp send has blocked */
1396
      doq_store_blocked_pkt(c, paddr, ecn);
1397
      return;
1398
    }
1399
    if(!udp_send_errno_needs_log((void*)&paddr->addr,
1400
      paddr->addrlen))
1401
      return;
1402
    if(verbosity >= VERB_OPS) {
1403
      char host[256], port[32];
1404
      if(doq_print_addr_port(&paddr->addr, paddr->addrlen,
1405
        host, sizeof(host), port, sizeof(port))) {
1406
        verbose(VERB_OPS, "doq sendmsg to %s %s "
1407
          "failed: %s", host, port,
1408
          strerror(errno));
1409
      } else {
1410
        verbose(VERB_OPS, "doq sendmsg failed: %s",
1411
          strerror(errno));
1412
      }
1413
    }
1414
    return;
1415
  } else if(ret != (ssize_t)sldns_buffer_limit(c->doq_socket->pkt_buf)) {
1416
    char host[256], port[32];
1417
    if(doq_print_addr_port(&paddr->addr, paddr->addrlen, host,
1418
      sizeof(host), port, sizeof(port))) {
1419
      log_err("doq sendmsg to %s %s failed: "
1420
        "sent %d in place of %d bytes", 
1421
        host, port, (int)ret,
1422
        (int)sldns_buffer_limit(c->doq_socket->pkt_buf));
1423
    } else {
1424
      log_err("doq sendmsg failed: "
1425
        "sent %d in place of %d bytes", 
1426
        (int)ret, (int)sldns_buffer_limit(c->doq_socket->pkt_buf));
1427
    }
1428
    return;
1429
  }
1430
}
1431
1432
/** fetch port number */
1433
static int
1434
doq_sockaddr_get_port(struct doq_addr_storage* addr)
1435
{
1436
  if(addr->sockaddr.in.sin_family == AF_INET) {
1437
    struct sockaddr_in* sa = (struct sockaddr_in*)addr;
1438
    return ntohs(sa->sin_port);
1439
  } else if(addr->sockaddr.in.sin_family == AF_INET6) {
1440
    struct sockaddr_in6* sa6 = (struct sockaddr_in6*)addr;
1441
    return ntohs(sa6->sin6_port);
1442
  }
1443
  return 0;
1444
}
1445
1446
/** get local address from ancillary data headers */
1447
static int
1448
doq_get_localaddr_cmsg(struct comm_point* c, struct doq_pkt_addr* paddr,
1449
  int* pkt_continue, struct msghdr* msg)
1450
{
1451
#ifndef S_SPLINT_S
1452
  struct cmsghdr* cmsg;
1453
#endif /* S_SPLINT_S */
1454
1455
  memset(&paddr->localaddr, 0, sizeof(paddr->localaddr));
1456
#ifndef S_SPLINT_S
1457
  for(cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL;
1458
    cmsg = CMSG_NXTHDR(msg, cmsg)) {
1459
    if( cmsg->cmsg_level == IPPROTO_IPV6 &&
1460
      cmsg->cmsg_type == IPV6_PKTINFO) {
1461
      struct in6_pktinfo* v6info =
1462
        (struct in6_pktinfo*)CMSG_DATA(cmsg);
1463
      struct sockaddr_in6* sa= (struct sockaddr_in6*)
1464
        &paddr->localaddr;
1465
      struct sockaddr_in6* rema = (struct sockaddr_in6*)
1466
        &paddr->addr;
1467
      if(rema->sin6_family != AF_INET6) {
1468
        log_err("doq cmsg family mismatch cmsg is ip6");
1469
        *pkt_continue = 1;
1470
        return 0;
1471
      }
1472
      sa->sin6_family = AF_INET6;
1473
      sa->sin6_port = htons(doq_sockaddr_get_port(
1474
        (void*)c->socket->addr));
1475
      paddr->ifindex = v6info->ipi6_ifindex;
1476
      memmove(&sa->sin6_addr, &v6info->ipi6_addr,
1477
        sizeof(struct in6_addr));
1478
      paddr->localaddrlen = sizeof(struct sockaddr_in6);
1479
      break;
1480
#ifdef IP_PKTINFO
1481
    } else if( cmsg->cmsg_level == IPPROTO_IP &&
1482
      cmsg->cmsg_type == IP_PKTINFO) {
1483
      struct in_pktinfo* v4info =
1484
        (struct in_pktinfo*)CMSG_DATA(cmsg);
1485
      struct sockaddr_in* sa= (struct sockaddr_in*)
1486
        &paddr->localaddr;
1487
      struct sockaddr_in* rema = (struct sockaddr_in*)
1488
        &paddr->addr;
1489
      if(rema->sin_family != AF_INET) {
1490
        log_err("doq cmsg family mismatch cmsg is ip4");
1491
        *pkt_continue = 1;
1492
        return 0;
1493
      }
1494
      sa->sin_family = AF_INET;
1495
      sa->sin_port = htons(doq_sockaddr_get_port(
1496
        (void*)c->socket->addr));
1497
      paddr->ifindex = v4info->ipi_ifindex;
1498
      memmove(&sa->sin_addr, &v4info->ipi_addr,
1499
        sizeof(struct in_addr));
1500
      paddr->localaddrlen = sizeof(struct sockaddr_in);
1501
      break;
1502
#elif defined(IP_RECVDSTADDR)
1503
    } else if( cmsg->cmsg_level == IPPROTO_IP &&
1504
      cmsg->cmsg_type == IP_RECVDSTADDR) {
1505
      struct sockaddr_in* sa= (struct sockaddr_in*)
1506
        &paddr->localaddr;
1507
      struct sockaddr_in* rema = (struct sockaddr_in*)
1508
        &paddr->addr;
1509
      if(rema->sin_family != AF_INET) {
1510
        log_err("doq cmsg family mismatch cmsg is ip4");
1511
        *pkt_continue = 1;
1512
        return 0;
1513
      }
1514
      sa->sin_family = AF_INET;
1515
      sa->sin_port = htons(doq_sockaddr_get_port(
1516
        (void*)c->socket->addr));
1517
      paddr->ifindex = 0;
1518
      memmove(&sa.sin_addr, CMSG_DATA(cmsg),
1519
        sizeof(struct in_addr));
1520
      paddr->localaddrlen = sizeof(struct sockaddr_in);
1521
      break;
1522
#endif /* IP_PKTINFO or IP_RECVDSTADDR */
1523
    }
1524
  }
1525
#endif /* S_SPLINT_S */
1526
1527
return 1;
1528
}
1529
1530
/** get packet ecn information */
1531
static uint32_t
1532
msghdr_get_ecn(struct msghdr* msg, int family)
1533
{
1534
#ifndef S_SPLINT_S
1535
  struct cmsghdr* cmsg;
1536
  if(family == AF_INET6) {
1537
    for(cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL;
1538
      cmsg = CMSG_NXTHDR(msg, cmsg)) {
1539
      if(cmsg->cmsg_level == IPPROTO_IPV6 &&
1540
        cmsg->cmsg_type == IPV6_TCLASS &&
1541
        cmsg->cmsg_len != 0) {
1542
        uint8_t* ecn = (uint8_t*)CMSG_DATA(cmsg);
1543
        return *ecn;
1544
      }
1545
    }
1546
    return 0;
1547
  }
1548
  for(cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL;
1549
    cmsg = CMSG_NXTHDR(msg, cmsg)) {
1550
    if(cmsg->cmsg_level == IPPROTO_IP &&
1551
      cmsg->cmsg_type == IP_TOS &&
1552
      cmsg->cmsg_len != 0) {
1553
      uint8_t* ecn = (uint8_t*)CMSG_DATA(cmsg);
1554
      return *ecn;
1555
    }
1556
  }
1557
#endif /* S_SPLINT_S */
1558
  return 0;
1559
}
1560
1561
/** receive packet for DoQ on UDP. get ancillary data for addresses,
1562
 * return false if failed and the callback can stop receiving UDP packets
1563
 * if pkt_continue is false. */
1564
static int
1565
doq_recv(struct comm_point* c, struct doq_pkt_addr* paddr, int* pkt_continue,
1566
  struct ngtcp2_pkt_info* pi)
1567
{
1568
  struct msghdr msg;
1569
  struct iovec iov[1];
1570
  ssize_t rcv;
1571
  union {
1572
    struct cmsghdr hdr;
1573
    char buf[256];
1574
  } ancil;
1575
1576
  msg.msg_name = &paddr->addr;
1577
  msg.msg_namelen = (socklen_t)sizeof(paddr->addr);
1578
  iov[0].iov_base = sldns_buffer_begin(c->doq_socket->pkt_buf);
1579
  iov[0].iov_len = sldns_buffer_remaining(c->doq_socket->pkt_buf);
1580
  msg.msg_iov = iov;
1581
  msg.msg_iovlen = 1;
1582
  msg.msg_control = ancil.buf;
1583
#ifndef S_SPLINT_S
1584
  msg.msg_controllen = sizeof(ancil.buf);
1585
#endif /* S_SPLINT_S */
1586
  msg.msg_flags = 0;
1587
1588
  rcv = recvmsg(c->fd, &msg, MSG_DONTWAIT);
1589
  if(rcv == -1) {
1590
    if(errno != EAGAIN && errno != EINTR
1591
      && udp_recv_needs_log(errno)) {
1592
      log_err("recvmsg failed for doq: %s", strerror(errno));
1593
    }
1594
    *pkt_continue = 0;
1595
    return 0;
1596
  }
1597
1598
  paddr->addrlen = msg.msg_namelen;
1599
  sldns_buffer_skip(c->doq_socket->pkt_buf, rcv);
1600
  sldns_buffer_flip(c->doq_socket->pkt_buf);
1601
  if(!doq_get_localaddr_cmsg(c, paddr, pkt_continue, &msg))
1602
    return 0;
1603
  pi->ecn = msghdr_get_ecn(&msg, paddr->addr.sockaddr.in.sin_family);
1604
  return 1;
1605
}
1606
1607
/** send the version negotiation for doq. scid and dcid are flipped around
1608
 * to send back to the client. */
1609
static void
1610
doq_send_version_negotiation(struct comm_point* c, struct doq_pkt_addr* paddr,
1611
  const uint8_t* dcid, size_t dcidlen, const uint8_t* scid,
1612
  size_t scidlen)
1613
{
1614
  uint32_t versions[2];
1615
  size_t versions_len = 0;
1616
  ngtcp2_ssize ret;
1617
  uint8_t unused_random;
1618
1619
  /* fill the array with supported versions */
1620
  versions[0] = NGTCP2_PROTO_VER_V1;
1621
  versions_len = 1;
1622
  unused_random = ub_random_max(c->doq_socket->rnd, 256);
1623
  sldns_buffer_clear(c->doq_socket->pkt_buf);
1624
  ret = ngtcp2_pkt_write_version_negotiation(
1625
    sldns_buffer_begin(c->doq_socket->pkt_buf),
1626
    sldns_buffer_capacity(c->doq_socket->pkt_buf), unused_random,
1627
    dcid, dcidlen, scid, scidlen, versions, versions_len);
1628
  if(ret < 0) {
1629
    log_err("ngtcp2_pkt_write_version_negotiation failed: %s",
1630
      ngtcp2_strerror(ret));
1631
    return;
1632
  }
1633
  sldns_buffer_set_position(c->doq_socket->pkt_buf, ret);
1634
  sldns_buffer_flip(c->doq_socket->pkt_buf);
1635
  doq_send_pkt(c, paddr, 0);
1636
}
1637
1638
/** Find the doq_conn object by remote address and dcid */
1639
static struct doq_conn*
1640
doq_conn_find(struct doq_table* table, struct doq_addr_storage* addr,
1641
  socklen_t addrlen, struct doq_addr_storage* localaddr,
1642
  socklen_t localaddrlen, int ifindex, const uint8_t* dcid,
1643
  size_t dcidlen)
1644
{
1645
  struct rbnode_type* node;
1646
  struct doq_conn key;
1647
  memset(&key.node, 0, sizeof(key.node));
1648
  key.node.key = &key;
1649
  memmove(&key.key.paddr.addr, addr, addrlen);
1650
  key.key.paddr.addrlen = addrlen;
1651
  memmove(&key.key.paddr.localaddr, localaddr, localaddrlen);
1652
  key.key.paddr.localaddrlen = localaddrlen;
1653
  key.key.paddr.ifindex = ifindex;
1654
  key.key.dcid = (void*)dcid;
1655
  key.key.dcidlen = dcidlen;
1656
  node = rbtree_search(table->conn_tree, &key);
1657
  if(node)
1658
    return (struct doq_conn*)node->key;
1659
  return NULL;
1660
}
1661
1662
/** find the doq_con by the connection id */
1663
static struct doq_conn*
1664
doq_conn_find_by_id(struct doq_table* table, const uint8_t* dcid,
1665
  size_t dcidlen)
1666
{
1667
  struct doq_conid* conid;
1668
  lock_rw_rdlock(&table->conid_lock);
1669
  conid = doq_conid_find(table, dcid, dcidlen);
1670
  if(conid) {
1671
    /* make a copy of the key */
1672
    struct doq_conn* conn;
1673
    struct doq_conn_key key = conid->key;
1674
    uint8_t cid[NGTCP2_MAX_CIDLEN];
1675
    log_assert(conid->key.dcidlen <= NGTCP2_MAX_CIDLEN);
1676
    memcpy(cid, conid->key.dcid, conid->key.dcidlen);
1677
    key.dcid = cid;
1678
    lock_rw_unlock(&table->conid_lock);
1679
1680
    /* now that the conid lock is released, look up the conn */
1681
    lock_rw_rdlock(&table->lock);
1682
    conn = doq_conn_find(table, &key.paddr.addr,
1683
      key.paddr.addrlen, &key.paddr.localaddr,
1684
      key.paddr.localaddrlen, key.paddr.ifindex, key.dcid,
1685
      key.dcidlen);
1686
    if(!conn) {
1687
      /* The connection got deleted between the conid lookup
1688
       * and the connection lock grab, it no longer exists,
1689
       * so return null. */
1690
      lock_rw_unlock(&table->lock);
1691
      return NULL;
1692
    }
1693
    lock_basic_lock(&conn->lock);
1694
    if(conn->is_deleted) {
1695
      lock_rw_unlock(&table->lock);
1696
      lock_basic_unlock(&conn->lock);
1697
      return NULL;
1698
    }
1699
    lock_rw_unlock(&table->lock);
1700
    return conn;
1701
  }
1702
  lock_rw_unlock(&table->conid_lock);
1703
  return NULL;
1704
}
1705
1706
/** Find the doq_conn, by addr or by connection id */
1707
static struct doq_conn*
1708
doq_conn_find_by_addr_or_cid(struct doq_table* table,
1709
  struct doq_pkt_addr* paddr, const uint8_t* dcid, size_t dcidlen)
1710
{
1711
  struct doq_conn* conn;
1712
  lock_rw_rdlock(&table->lock);
1713
  conn = doq_conn_find(table, &paddr->addr, paddr->addrlen,
1714
    &paddr->localaddr, paddr->localaddrlen, paddr->ifindex,
1715
    dcid, dcidlen);
1716
  if(conn && conn->is_deleted) {
1717
    conn = NULL;
1718
  }
1719
  if(conn) {
1720
    lock_basic_lock(&conn->lock);
1721
    lock_rw_unlock(&table->lock);
1722
    verbose(VERB_ALGO, "doq: found connection by address, dcid");
1723
  } else {
1724
    lock_rw_unlock(&table->lock);
1725
    conn = doq_conn_find_by_id(table, dcid, dcidlen);
1726
    if(conn) {
1727
      verbose(VERB_ALGO, "doq: found connection by dcid");
1728
    }
1729
  }
1730
  return conn;
1731
}
1732
1733
/** decode doq packet header, false on handled or failure, true to continue
1734
 * to process the packet */
1735
static int
1736
doq_decode_pkt_header_negotiate(struct comm_point* c,
1737
  struct doq_pkt_addr* paddr, struct doq_conn** conn)
1738
{
1739
#ifdef HAVE_STRUCT_NGTCP2_VERSION_CID
1740
  struct ngtcp2_version_cid vc;
1741
#else
1742
  uint32_t version;
1743
  const uint8_t *dcid, *scid;
1744
  size_t dcidlen, scidlen;
1745
#endif
1746
  int rv;
1747
1748
#ifdef HAVE_STRUCT_NGTCP2_VERSION_CID
1749
  rv = ngtcp2_pkt_decode_version_cid(&vc,
1750
    sldns_buffer_begin(c->doq_socket->pkt_buf),
1751
    sldns_buffer_limit(c->doq_socket->pkt_buf),
1752
    c->doq_socket->sv_scidlen);
1753
#else
1754
  rv = ngtcp2_pkt_decode_version_cid(&version, &dcid, &dcidlen,
1755
    &scid, &scidlen, sldns_buffer_begin(c->doq_socket->pkt_buf),
1756
    sldns_buffer_limit(c->doq_socket->pkt_buf), c->doq_socket->sv_scidlen);
1757
#endif
1758
  if(rv != 0) {
1759
    if(rv == NGTCP2_ERR_VERSION_NEGOTIATION) {
1760
      /* send the version negotiation */
1761
      doq_send_version_negotiation(c, paddr,
1762
#ifdef HAVE_STRUCT_NGTCP2_VERSION_CID
1763
      vc.scid, vc.scidlen, vc.dcid, vc.dcidlen
1764
#else
1765
      scid, scidlen, dcid, dcidlen
1766
#endif
1767
      );
1768
      return 0;
1769
    }
1770
    verbose(VERB_ALGO, "doq: could not decode version "
1771
      "and CID from QUIC packet header: %s",
1772
      ngtcp2_strerror(rv));
1773
    return 0;
1774
  }
1775
1776
  if(verbosity >= VERB_ALGO) {
1777
    verbose(VERB_ALGO, "ngtcp2_pkt_decode_version_cid packet has "
1778
      "QUIC protocol version %u", (unsigned)
1779
#ifdef HAVE_STRUCT_NGTCP2_VERSION_CID
1780
      vc.
1781
#endif
1782
      version
1783
      );
1784
    log_hex("dcid",
1785
#ifdef HAVE_STRUCT_NGTCP2_VERSION_CID
1786
      (void*)vc.dcid, vc.dcidlen
1787
#else
1788
      (void*)dcid, dcidlen
1789
#endif
1790
      );
1791
    log_hex("scid",
1792
#ifdef HAVE_STRUCT_NGTCP2_VERSION_CID
1793
      (void*)vc.scid, vc.scidlen
1794
#else
1795
      (void*)scid, scidlen
1796
#endif
1797
      );
1798
  }
1799
  *conn = doq_conn_find_by_addr_or_cid(c->doq_socket->table, paddr,
1800
#ifdef HAVE_STRUCT_NGTCP2_VERSION_CID
1801
    vc.dcid, vc.dcidlen
1802
#else
1803
    dcid, dcidlen
1804
#endif
1805
    );
1806
  if(*conn)
1807
    (*conn)->doq_socket = c->doq_socket;
1808
  return 1;
1809
}
1810
1811
/** fill cid structure with random data */
1812
static void doq_cid_randfill(struct ngtcp2_cid* cid, size_t datalen,
1813
  struct ub_randstate* rnd)
1814
{
1815
  uint8_t buf[32];
1816
  if(datalen > sizeof(buf))
1817
    datalen = sizeof(buf);
1818
  doq_fill_rand(rnd, buf, datalen);
1819
  ngtcp2_cid_init(cid, buf, datalen);
1820
}
1821
1822
/** send retry packet for doq connection. */
1823
static void
1824
doq_send_retry(struct comm_point* c, struct doq_pkt_addr* paddr,
1825
  struct ngtcp2_pkt_hd* hd)
1826
{
1827
  char host[256], port[32];
1828
  struct ngtcp2_cid scid;
1829
  uint8_t token[NGTCP2_CRYPTO_MAX_RETRY_TOKENLEN];
1830
  ngtcp2_tstamp ts;
1831
  ngtcp2_ssize tokenlen, ret;
1832
1833
  if(!doq_print_addr_port(&paddr->addr, paddr->addrlen, host,
1834
    sizeof(host), port, sizeof(port))) {
1835
    log_err("doq_send_retry failed");
1836
    return;
1837
  }
1838
  verbose(VERB_ALGO, "doq: sending retry packet to %s %s", host, port);
1839
1840
  /* the server chosen source connection ID */
1841
  scid.datalen = c->doq_socket->sv_scidlen;
1842
  doq_cid_randfill(&scid, scid.datalen, c->doq_socket->rnd);
1843
1844
  ts = doq_get_timestamp_nanosec();
1845
1846
  tokenlen = ngtcp2_crypto_generate_retry_token(token,
1847
    c->doq_socket->static_secret, c->doq_socket->static_secret_len,
1848
    hd->version, (void*)&paddr->addr, paddr->addrlen, &scid,
1849
    &hd->dcid, ts);
1850
  if(tokenlen < 0) {
1851
    log_err("ngtcp2_crypto_generate_retry_token failed: %s",
1852
      ngtcp2_strerror(tokenlen));
1853
    return;
1854
  }
1855
1856
  sldns_buffer_clear(c->doq_socket->pkt_buf);
1857
  ret = ngtcp2_crypto_write_retry(sldns_buffer_begin(c->doq_socket->pkt_buf),
1858
    sldns_buffer_capacity(c->doq_socket->pkt_buf), hd->version,
1859
    &hd->scid, &scid, &hd->dcid, token, tokenlen);
1860
  if(ret < 0) {
1861
    log_err("ngtcp2_crypto_write_retry failed: %s",
1862
      ngtcp2_strerror(ret));
1863
    return;
1864
  }
1865
  sldns_buffer_set_position(c->doq_socket->pkt_buf, ret);
1866
  sldns_buffer_flip(c->doq_socket->pkt_buf);
1867
  doq_send_pkt(c, paddr, 0);
1868
}
1869
1870
/** doq send stateless connection close */
1871
static void
1872
doq_send_stateless_connection_close(struct comm_point* c,
1873
  struct doq_pkt_addr* paddr, struct ngtcp2_pkt_hd* hd,
1874
  uint64_t error_code)
1875
{
1876
  ngtcp2_ssize ret;
1877
  sldns_buffer_clear(c->doq_socket->pkt_buf);
1878
  ret = ngtcp2_crypto_write_connection_close(
1879
    sldns_buffer_begin(c->doq_socket->pkt_buf),
1880
    sldns_buffer_capacity(c->doq_socket->pkt_buf), hd->version, &hd->scid,
1881
    &hd->dcid, error_code, NULL, 0);
1882
  if(ret < 0) {
1883
    log_err("ngtcp2_crypto_write_connection_close failed: %s",
1884
      ngtcp2_strerror(ret));
1885
    return;
1886
  }
1887
  sldns_buffer_set_position(c->doq_socket->pkt_buf, ret);
1888
  sldns_buffer_flip(c->doq_socket->pkt_buf);
1889
  doq_send_pkt(c, paddr, 0);
1890
}
1891
1892
/** doq verify retry token, false on failure */
1893
static int
1894
doq_verify_retry_token(struct comm_point* c, struct doq_pkt_addr* paddr,
1895
  struct ngtcp2_cid* ocid, struct ngtcp2_pkt_hd* hd)
1896
{
1897
  char host[256], port[32];
1898
  ngtcp2_tstamp ts;
1899
  if(!doq_print_addr_port(&paddr->addr, paddr->addrlen, host,
1900
    sizeof(host), port, sizeof(port))) {
1901
    log_err("doq_verify_retry_token failed");
1902
    return 0;
1903
  }
1904
  ts = doq_get_timestamp_nanosec();
1905
  verbose(VERB_ALGO, "doq: verifying retry token from %s %s", host,
1906
    port);
1907
  if(ngtcp2_crypto_verify_retry_token(ocid,
1908
#ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN
1909
    hd->token, hd->tokenlen,
1910
#else
1911
    hd->token.base, hd->token.len,
1912
#endif
1913
    c->doq_socket->static_secret,
1914
    c->doq_socket->static_secret_len, hd->version,
1915
    (void*)&paddr->addr, paddr->addrlen, &hd->dcid,
1916
    10*NGTCP2_SECONDS, ts) != 0) {
1917
    verbose(VERB_ALGO, "doq: could not verify retry token "
1918
      "from %s %s", host, port);
1919
    return 0;
1920
  }
1921
  verbose(VERB_ALGO, "doq: verified retry token from %s %s", host, port);
1922
  return 1;
1923
}
1924
1925
/** doq verify token, false on failure */
1926
static int
1927
doq_verify_token(struct comm_point* c, struct doq_pkt_addr* paddr,
1928
  struct ngtcp2_pkt_hd* hd)
1929
{
1930
  char host[256], port[32];
1931
  ngtcp2_tstamp ts;
1932
  if(!doq_print_addr_port(&paddr->addr, paddr->addrlen, host,
1933
    sizeof(host), port, sizeof(port))) {
1934
    log_err("doq_verify_token failed");
1935
    return 0;
1936
  }
1937
  ts = doq_get_timestamp_nanosec();
1938
  verbose(VERB_ALGO, "doq: verifying token from %s %s", host, port);
1939
  if(ngtcp2_crypto_verify_regular_token(
1940
#ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN
1941
    hd->token, hd->tokenlen,
1942
#else
1943
    hd->token.base, hd->token.len,
1944
#endif
1945
    c->doq_socket->static_secret, c->doq_socket->static_secret_len,
1946
    (void*)&paddr->addr, paddr->addrlen, 3600*NGTCP2_SECONDS,
1947
    ts) != 0) {
1948
    verbose(VERB_ALGO, "doq: could not verify token from %s %s",
1949
      host, port);
1950
    return 0;
1951
  }
1952
  verbose(VERB_ALGO, "doq: verified token from %s %s", host, port);
1953
  return 1;
1954
}
1955
1956
/** delete and remove from the lookup tree the doq_conn connection */
1957
static void
1958
doq_delete_connection(struct comm_point* c, struct doq_conn* conn)
1959
{
1960
  struct doq_conn copy;
1961
  uint8_t cid[NGTCP2_MAX_CIDLEN];
1962
  rbnode_type* node;
1963
  if(!conn)
1964
    return;
1965
  /* Copy the key and set it deleted. */
1966
  conn->is_deleted = 1;
1967
  doq_conn_write_disable(conn);
1968
  copy.key = conn->key;
1969
  log_assert(conn->key.dcidlen <= NGTCP2_MAX_CIDLEN);
1970
  memcpy(cid, conn->key.dcid, conn->key.dcidlen);
1971
  copy.key.dcid = cid;
1972
  copy.node.key = &copy;
1973
  lock_basic_unlock(&conn->lock);
1974
1975
  /* Now get the table lock to delete it from the tree */
1976
  lock_rw_wrlock(&c->doq_socket->table->lock);
1977
  node = rbtree_delete(c->doq_socket->table->conn_tree, copy.node.key);
1978
  if(node) {
1979
    conn = (struct doq_conn*)node->key;
1980
    lock_basic_lock(&conn->lock);
1981
    doq_conn_write_list_remove(c->doq_socket->table, conn);
1982
    if(conn->timer.timer_in_list) {
1983
      /* Remove timer from list first, because finding the
1984
       * rbnode element of the setlist of same timeouts
1985
       * needs tree lookup. Edit the tree structure after
1986
       * that lookup. */
1987
      doq_timer_list_remove(c->doq_socket->table,
1988
        &conn->timer);
1989
    }
1990
    if(conn->timer.timer_in_tree)
1991
      doq_timer_tree_remove(c->doq_socket->table,
1992
        &conn->timer);
1993
  }
1994
  lock_rw_unlock(&c->doq_socket->table->lock);
1995
  if(node) {
1996
    lock_basic_unlock(&conn->lock);
1997
    doq_table_quic_size_subtract(c->doq_socket->table,
1998
      sizeof(*conn)+conn->key.dcidlen);
1999
    doq_conn_delete(conn, c->doq_socket->table);
2000
  }
2001
}
2002
2003
/** create and setup a new doq connection, to a new destination, or with
2004
 * a new dcid. It has a new set of streams. It is inserted in the lookup tree.
2005
 * Returns NULL on failure. */
2006
static struct doq_conn*
2007
doq_setup_new_conn(struct comm_point* c, struct doq_pkt_addr* paddr,
2008
  struct ngtcp2_pkt_hd* hd, struct ngtcp2_cid* ocid)
2009
{
2010
  struct doq_conn* conn;
2011
  if(!doq_table_quic_size_available(c->doq_socket->table,
2012
    c->doq_socket->cfg, sizeof(*conn)+hd->dcid.datalen
2013
    + sizeof(struct doq_stream)
2014
    + 100 /* estimated input query */
2015
    + 1200 /* estimated output query */)) {
2016
    verbose(VERB_ALGO, "doq: no mem available for new connection");
2017
    doq_send_stateless_connection_close(c, paddr, hd,
2018
      NGTCP2_CONNECTION_REFUSED);
2019
    return NULL;
2020
  }
2021
  conn = doq_conn_create(c, paddr, hd->dcid.data, hd->dcid.datalen,
2022
    hd->version);
2023
  if(!conn) {
2024
    log_err("doq: could not allocate doq_conn");
2025
    return NULL;
2026
  }
2027
  lock_rw_wrlock(&c->doq_socket->table->lock);
2028
  lock_basic_lock(&conn->lock);
2029
  if(!rbtree_insert(c->doq_socket->table->conn_tree, &conn->node)) {
2030
    lock_rw_unlock(&c->doq_socket->table->lock);
2031
    log_err("doq: duplicate connection");
2032
    /* conn has no entry in writelist, and no timer yet. */
2033
    lock_basic_unlock(&conn->lock);
2034
    doq_conn_delete(conn, c->doq_socket->table);
2035
    return NULL;
2036
  }
2037
  lock_rw_unlock(&c->doq_socket->table->lock);
2038
  doq_table_quic_size_add(c->doq_socket->table,
2039
    sizeof(*conn)+conn->key.dcidlen);
2040
  verbose(VERB_ALGO, "doq: created new connection");
2041
2042
  /* the scid and dcid switch meaning from the accepted client
2043
   * connection to the server connection. The 'source' and 'destination'
2044
   * meaning is reversed. */
2045
  if(!doq_conn_setup(conn, hd->scid.data, hd->scid.datalen,
2046
    (ocid?ocid->data:NULL), (ocid?ocid->datalen:0),
2047
#ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN
2048
    hd->token, hd->tokenlen
2049
#else
2050
    hd->token.base, hd->token.len
2051
#endif
2052
    )) {
2053
    log_err("doq: could not set up connection");
2054
    doq_delete_connection(c, conn);
2055
    return NULL;
2056
  }
2057
  return conn;
2058
}
2059
2060
/** perform doq address validation */
2061
static int
2062
doq_address_validation(struct comm_point* c, struct doq_pkt_addr* paddr,
2063
  struct ngtcp2_pkt_hd* hd, struct ngtcp2_cid* ocid,
2064
  struct ngtcp2_cid** pocid)
2065
{
2066
#ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN
2067
  const uint8_t* token = hd->token;
2068
  size_t tokenlen = hd->tokenlen;
2069
#else
2070
  const uint8_t* token = hd->token.base;
2071
  size_t tokenlen = hd->token.len;
2072
#endif
2073
  verbose(VERB_ALGO, "doq stateless address validation");
2074
2075
  if(tokenlen == 0 || token == NULL) {
2076
    doq_send_retry(c, paddr, hd);
2077
    return 0;
2078
  }
2079
  if(token[0] != NGTCP2_CRYPTO_TOKEN_MAGIC_RETRY &&
2080
    hd->dcid.datalen < NGTCP2_MIN_INITIAL_DCIDLEN) {
2081
    doq_send_stateless_connection_close(c, paddr, hd,
2082
      NGTCP2_INVALID_TOKEN);
2083
    return 0;
2084
  }
2085
  if(token[0] == NGTCP2_CRYPTO_TOKEN_MAGIC_RETRY) {
2086
    if(!doq_verify_retry_token(c, paddr, ocid, hd)) {
2087
      doq_send_stateless_connection_close(c, paddr, hd,
2088
        NGTCP2_INVALID_TOKEN);
2089
      return 0;
2090
    }
2091
    *pocid = ocid;
2092
  } else if(token[0] == NGTCP2_CRYPTO_TOKEN_MAGIC_REGULAR) {
2093
    if(!doq_verify_token(c, paddr, hd)) {
2094
      doq_send_retry(c, paddr, hd);
2095
      return 0;
2096
    }
2097
#ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN
2098
    hd->token = NULL;
2099
    hd->tokenlen = 0;
2100
#else
2101
    hd->token.base = NULL;
2102
    hd->token.len = 0;
2103
#endif
2104
  } else {
2105
    verbose(VERB_ALGO, "doq address validation: unrecognised "
2106
      "token in hd.token.base with magic byte 0x%2.2x",
2107
      (int)token[0]);
2108
    if(c->doq_socket->validate_addr) {
2109
      doq_send_retry(c, paddr, hd);
2110
      return 0;
2111
    }
2112
#ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN
2113
    hd->token = NULL;
2114
    hd->tokenlen = 0;
2115
#else
2116
    hd->token.base = NULL;
2117
    hd->token.len = 0;
2118
#endif
2119
  }
2120
  return 1;
2121
}
2122
2123
/** the doq accept, returns false if no further processing of content */
2124
static int
2125
doq_accept(struct comm_point* c, struct doq_pkt_addr* paddr,
2126
  struct doq_conn** conn, struct ngtcp2_pkt_info* pi)
2127
{
2128
  int rv;
2129
  struct ngtcp2_pkt_hd hd;
2130
  struct ngtcp2_cid ocid, *pocid=NULL;
2131
  int err_retry;
2132
  memset(&hd, 0, sizeof(hd));
2133
  rv = ngtcp2_accept(&hd, sldns_buffer_begin(c->doq_socket->pkt_buf),
2134
    sldns_buffer_limit(c->doq_socket->pkt_buf));
2135
  if(rv != 0) {
2136
    if(rv == NGTCP2_ERR_RETRY) {
2137
      doq_send_retry(c, paddr, &hd);
2138
      return 0;
2139
    }
2140
    log_err("doq: initial packet failed, ngtcp2_accept failed: %s",
2141
      ngtcp2_strerror(rv));
2142
    return 0;
2143
  }
2144
  if(c->doq_socket->validate_addr ||
2145
#ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN
2146
    hd.tokenlen
2147
#else
2148
    hd.token.len
2149
#endif
2150
    ) {
2151
    if(!doq_address_validation(c, paddr, &hd, &ocid, &pocid))
2152
      return 0;
2153
  }
2154
  *conn = doq_setup_new_conn(c, paddr, &hd, pocid);
2155
  if(!*conn)
2156
    return 0;
2157
  (*conn)->doq_socket = c->doq_socket;
2158
  if(!doq_conn_recv(c, paddr, *conn, pi, &err_retry, NULL)) {
2159
    if(err_retry)
2160
      doq_send_retry(c, paddr, &hd);
2161
    doq_delete_connection(c, *conn);
2162
    *conn = NULL;
2163
    return 0;
2164
  }
2165
  return 1;
2166
}
2167
2168
/** doq pickup a timer to wait for for the worker. If any timer exists. */
2169
static void
2170
doq_pickup_timer(struct comm_point* c)
2171
{
2172
  struct doq_timer* t;
2173
  struct timeval tv;
2174
  int have_time = 0;
2175
  memset(&tv, 0, sizeof(tv));
2176
2177
  lock_rw_wrlock(&c->doq_socket->table->lock);
2178
  RBTREE_FOR(t, struct doq_timer*, c->doq_socket->table->timer_tree) {
2179
    if(t->worker_doq_socket == NULL ||
2180
      t->worker_doq_socket == c->doq_socket) {
2181
      /* pick up this element */
2182
      t->worker_doq_socket = c->doq_socket;
2183
      have_time = 1;
2184
      memcpy(&tv, &t->time, sizeof(tv));
2185
      break;
2186
    }
2187
  }
2188
  lock_rw_unlock(&c->doq_socket->table->lock);
2189
2190
  if(have_time) {
2191
    struct timeval rel;
2192
    timeval_subtract(&rel, &tv, c->doq_socket->now_tv);
2193
    comm_timer_set(c->doq_socket->timer, &rel);
2194
    memcpy(&c->doq_socket->marked_time, &tv,
2195
      sizeof(c->doq_socket->marked_time));
2196
    verbose(VERB_ALGO, "doq pickup timer at %d.%6.6d in %d.%6.6d",
2197
      (int)tv.tv_sec, (int)tv.tv_usec, (int)rel.tv_sec,
2198
      (int)rel.tv_usec);
2199
  } else {
2200
    if(comm_timer_is_set(c->doq_socket->timer))
2201
      comm_timer_disable(c->doq_socket->timer);
2202
    memset(&c->doq_socket->marked_time, 0,
2203
      sizeof(c->doq_socket->marked_time));
2204
    verbose(VERB_ALGO, "doq timer disabled");
2205
  }
2206
}
2207
2208
/** doq done with connection, release locks and setup timer and write */
2209
static void
2210
doq_done_setup_timer_and_write(struct comm_point* c, struct doq_conn* conn)
2211
{
2212
  struct doq_conn copy;
2213
  uint8_t cid[NGTCP2_MAX_CIDLEN];
2214
  rbnode_type* node;
2215
  struct timeval new_tv;
2216
  int write_change = 0, timer_change = 0;
2217
2218
  /* No longer in callbacks, so the pointer to doq_socket is back
2219
   * to NULL. */
2220
  conn->doq_socket = NULL;
2221
2222
  if(doq_conn_check_timer(conn, &new_tv))
2223
    timer_change = 1;
2224
  if( (conn->write_interest && !conn->on_write_list) ||
2225
    (!conn->write_interest && conn->on_write_list))
2226
    write_change = 1;
2227
2228
  if(!timer_change && !write_change) {
2229
    /* Nothing to do. */
2230
    lock_basic_unlock(&conn->lock);
2231
    return;
2232
  }
2233
2234
  /* The table lock is needed to change the write list and timer tree.
2235
   * So the connection lock is release and then the connection is
2236
   * looked up again. */
2237
  copy.key = conn->key;
2238
  log_assert(conn->key.dcidlen <= NGTCP2_MAX_CIDLEN);
2239
  memcpy(cid, conn->key.dcid, conn->key.dcidlen);
2240
  copy.key.dcid = cid;
2241
  copy.node.key = &copy;
2242
  lock_basic_unlock(&conn->lock);
2243
2244
  lock_rw_wrlock(&c->doq_socket->table->lock);
2245
  node = rbtree_search(c->doq_socket->table->conn_tree, copy.node.key);
2246
  if(!node) {
2247
    lock_rw_unlock(&c->doq_socket->table->lock);
2248
    /* Must have been deleted in the mean time. */
2249
    return;
2250
  }
2251
  conn = (struct doq_conn*)node->key;
2252
  lock_basic_lock(&conn->lock);
2253
  if(conn->is_deleted) {
2254
    /* It is deleted now. */
2255
    lock_rw_unlock(&c->doq_socket->table->lock);
2256
    lock_basic_unlock(&conn->lock);
2257
    return;
2258
  }
2259
2260
  if(write_change) {
2261
    /* Edit the write lists, we are holding the table.lock and can
2262
     * edit the list first,last and also prev,next and on_list
2263
     * elements in the doq_conn structures. */
2264
    doq_conn_set_write_list(c->doq_socket->table, conn);
2265
  }
2266
  if(timer_change) {
2267
    doq_timer_set(c->doq_socket->table, &conn->timer,
2268
      c->doq_socket, &new_tv);
2269
  }
2270
  lock_rw_unlock(&c->doq_socket->table->lock);
2271
  lock_basic_unlock(&conn->lock);
2272
}
2273
2274
/** doq done with connection callbacks, release locks and setup write */
2275
static void
2276
doq_done_with_conn_cb(struct comm_point* c, struct doq_conn* conn)
2277
{
2278
  struct doq_conn copy;
2279
  uint8_t cid[NGTCP2_MAX_CIDLEN];
2280
  rbnode_type* node;
2281
2282
  /* no longer in callbacks, so the pointer to doq_socket is back
2283
   * to NULL. */
2284
  conn->doq_socket = NULL;
2285
2286
  if( (conn->write_interest && conn->on_write_list) ||
2287
    (!conn->write_interest && !conn->on_write_list)) {
2288
    /* The connection already has the required write list
2289
     * status. */
2290
    lock_basic_unlock(&conn->lock);
2291
    return;
2292
  }
2293
2294
  /* To edit the write list of connections we have to hold the table
2295
   * lock, so we release the connection and then look it up again. */
2296
  copy.key = conn->key;
2297
  log_assert(conn->key.dcidlen <= NGTCP2_MAX_CIDLEN);
2298
  memcpy(cid, conn->key.dcid, conn->key.dcidlen);
2299
  copy.key.dcid = cid;
2300
  copy.node.key = &copy;
2301
  lock_basic_unlock(&conn->lock);
2302
2303
  lock_rw_wrlock(&c->doq_socket->table->lock);
2304
  node = rbtree_search(c->doq_socket->table->conn_tree, copy.node.key);
2305
  if(!node) {
2306
    lock_rw_unlock(&c->doq_socket->table->lock);
2307
    /* must have been deleted in the mean time */
2308
    return;
2309
  }
2310
  conn = (struct doq_conn*)node->key;
2311
  lock_basic_lock(&conn->lock);
2312
  if(conn->is_deleted) {
2313
    /* it is deleted now. */
2314
    lock_rw_unlock(&c->doq_socket->table->lock);
2315
    lock_basic_unlock(&conn->lock);
2316
    return;
2317
  }
2318
2319
  /* edit the write lists, we are holding the table.lock and can
2320
   * edit the list first,last and also prev,next and on_list elements
2321
   * in the doq_conn structures. */
2322
  doq_conn_set_write_list(c->doq_socket->table, conn);
2323
  lock_rw_unlock(&c->doq_socket->table->lock);
2324
  lock_basic_unlock(&conn->lock);
2325
}
2326
2327
/** doq count the length of the write list */
2328
static size_t
2329
doq_write_list_length(struct comm_point* c)
2330
{
2331
  size_t count = 0;
2332
  struct doq_conn* conn;
2333
  lock_rw_rdlock(&c->doq_socket->table->lock);
2334
  conn = c->doq_socket->table->write_list_first;
2335
  while(conn) {
2336
    count++;
2337
    conn = conn->write_next;
2338
  }
2339
  lock_rw_unlock(&c->doq_socket->table->lock);
2340
  return count;
2341
}
2342
2343
/** doq pop the first element from the write list to have write events */
2344
static struct doq_conn*
2345
doq_pop_write_conn(struct comm_point* c)
2346
{
2347
  struct doq_conn* conn;
2348
  lock_rw_wrlock(&c->doq_socket->table->lock);
2349
  conn = doq_table_pop_first(c->doq_socket->table);
2350
  while(conn && conn->is_deleted) {
2351
    lock_basic_unlock(&conn->lock);
2352
    conn = doq_table_pop_first(c->doq_socket->table);
2353
  }
2354
  lock_rw_unlock(&c->doq_socket->table->lock);
2355
  if(conn)
2356
    conn->doq_socket = c->doq_socket;
2357
  return conn;
2358
}
2359
2360
/** doq the connection is done with write callbacks, release it. */
2361
static void
2362
doq_done_with_write_cb(struct comm_point* c, struct doq_conn* conn,
2363
  int delete_it)
2364
{
2365
  if(delete_it) {
2366
    doq_delete_connection(c, conn);
2367
    return;
2368
  }
2369
  doq_done_setup_timer_and_write(c, conn);
2370
}
2371
2372
/** see if the doq socket wants to write packets */
2373
static int
2374
doq_socket_want_write(struct comm_point* c)
2375
{
2376
  int want_write = 0;
2377
  if(c->doq_socket->have_blocked_pkt)
2378
    return 1;
2379
  lock_rw_rdlock(&c->doq_socket->table->lock);
2380
  if(c->doq_socket->table->write_list_first)
2381
    want_write = 1;
2382
  lock_rw_unlock(&c->doq_socket->table->lock);
2383
  return want_write;
2384
}
2385
2386
/** enable write event for the doq server socket fd */
2387
static void
2388
doq_socket_write_enable(struct comm_point* c)
2389
{
2390
  verbose(VERB_ALGO, "doq socket want write");
2391
  if(c->doq_socket->event_has_write)
2392
    return;
2393
  comm_point_listen_for_rw(c, 1, 1);
2394
  c->doq_socket->event_has_write = 1;
2395
}
2396
2397
/** disable write event for the doq server socket fd */
2398
static void
2399
doq_socket_write_disable(struct comm_point* c)
2400
{
2401
  verbose(VERB_ALGO, "doq socket want no write");
2402
  if(!c->doq_socket->event_has_write)
2403
    return;
2404
  comm_point_listen_for_rw(c, 1, 0);
2405
  c->doq_socket->event_has_write = 0;
2406
}
2407
2408
/** write blocked packet, if possible. returns false if failed, again. */
2409
static int
2410
doq_write_blocked_pkt(struct comm_point* c)
2411
{
2412
  struct doq_pkt_addr paddr;
2413
  if(!c->doq_socket->have_blocked_pkt)
2414
    return 1;
2415
  c->doq_socket->have_blocked_pkt = 0;
2416
  if(sldns_buffer_limit(c->doq_socket->blocked_pkt) >
2417
    sldns_buffer_remaining(c->doq_socket->pkt_buf))
2418
    return 1; /* impossibly large, drop it.
2419
    impossible since pkt_buf is same size as blocked_pkt buf. */
2420
  sldns_buffer_clear(c->doq_socket->pkt_buf);
2421
  sldns_buffer_write(c->doq_socket->pkt_buf,
2422
    sldns_buffer_begin(c->doq_socket->blocked_pkt),
2423
    sldns_buffer_limit(c->doq_socket->blocked_pkt));
2424
  sldns_buffer_flip(c->doq_socket->pkt_buf);
2425
  memcpy(&paddr, c->doq_socket->blocked_paddr, sizeof(paddr));
2426
  doq_send_pkt(c, &paddr, c->doq_socket->blocked_pkt_pi.ecn);
2427
  if(c->doq_socket->have_blocked_pkt)
2428
    return 0;
2429
  return 1;
2430
}
2431
2432
/** doq find a timer that timeouted and return the conn, locked. */
2433
static struct doq_conn*
2434
doq_timer_timeout_conn(struct doq_server_socket* doq_socket)
2435
{
2436
  struct doq_conn* conn = NULL;
2437
  struct rbnode_type* node;
2438
  lock_rw_wrlock(&doq_socket->table->lock);
2439
  node = rbtree_first(doq_socket->table->timer_tree);
2440
  if(node && node != RBTREE_NULL) {
2441
    struct doq_timer* t = (struct doq_timer*)node;
2442
    conn = t->conn;
2443
2444
    /* If now < timer then no further timeouts in tree. */
2445
    if(timeval_smaller(doq_socket->now_tv, &t->time)) {
2446
      lock_rw_unlock(&doq_socket->table->lock);
2447
      return NULL;
2448
    }
2449
2450
    lock_basic_lock(&conn->lock);
2451
    conn->doq_socket = doq_socket;
2452
2453
    /* Now that the timer is fired, remove it. */
2454
    doq_timer_unset(doq_socket->table, t);
2455
    lock_rw_unlock(&doq_socket->table->lock);
2456
    return conn;
2457
  }
2458
  lock_rw_unlock(&doq_socket->table->lock);
2459
  return NULL;
2460
}
2461
2462
/** doq timer erase the marker that said which timer the worker uses. */
2463
static void
2464
doq_timer_erase_marker(struct doq_server_socket* doq_socket)
2465
{
2466
  struct doq_timer* t;
2467
  lock_rw_wrlock(&doq_socket->table->lock);
2468
  t = doq_timer_find_time(doq_socket->table, &doq_socket->marked_time);
2469
  if(t && t->worker_doq_socket == doq_socket)
2470
    t->worker_doq_socket = NULL;
2471
  lock_rw_unlock(&doq_socket->table->lock);
2472
  memset(&doq_socket->marked_time, 0, sizeof(doq_socket->marked_time));
2473
}
2474
2475
void
2476
doq_timer_cb(void* arg)
2477
{
2478
  struct doq_server_socket* doq_socket = (struct doq_server_socket*)arg;
2479
  struct doq_conn* conn;
2480
  verbose(VERB_ALGO, "doq timer callback");
2481
2482
  doq_timer_erase_marker(doq_socket);
2483
2484
  while((conn = doq_timer_timeout_conn(doq_socket)) != NULL) {
2485
    if(conn->is_deleted ||
2486
#ifdef HAVE_NGTCP2_CONN_IN_CLOSING_PERIOD
2487
      ngtcp2_conn_in_closing_period(conn->conn) ||
2488
#else
2489
      ngtcp2_conn_is_in_closing_period(conn->conn) ||
2490
#endif
2491
#ifdef HAVE_NGTCP2_CONN_IN_DRAINING_PERIOD
2492
      ngtcp2_conn_in_draining_period(conn->conn)
2493
#else
2494
      ngtcp2_conn_is_in_draining_period(conn->conn)
2495
#endif
2496
      ) {
2497
      if(verbosity >= VERB_ALGO) {
2498
        char remotestr[256];
2499
        addr_to_str((void*)&conn->key.paddr.addr,
2500
          conn->key.paddr.addrlen, remotestr,
2501
          sizeof(remotestr));
2502
        verbose(VERB_ALGO, "doq conn %s is deleted "
2503
          "after timeout", remotestr);
2504
      }
2505
      doq_delete_connection(doq_socket->cp, conn);
2506
      continue;
2507
    }
2508
    if(!doq_conn_handle_timeout(conn))
2509
      doq_delete_connection(doq_socket->cp, conn);
2510
    else doq_done_setup_timer_and_write(doq_socket->cp, conn);
2511
  }
2512
2513
  if(doq_socket_want_write(doq_socket->cp))
2514
    doq_socket_write_enable(doq_socket->cp);
2515
  else doq_socket_write_disable(doq_socket->cp);
2516
  doq_pickup_timer(doq_socket->cp);
2517
}
2518
2519
void
2520
comm_point_doq_callback(int fd, short event, void* arg)
2521
{
2522
  struct comm_point* c;
2523
  struct doq_pkt_addr paddr;
2524
  int i, pkt_continue, err_drop;
2525
  struct doq_conn* conn;
2526
  struct ngtcp2_pkt_info pi;
2527
  size_t count, num_len;
2528
2529
  c = (struct comm_point*)arg;
2530
  log_assert(c->type == comm_doq);
2531
2532
  log_assert(c && c->doq_socket->pkt_buf && c->fd == fd);
2533
  ub_comm_base_now(c->ev->base);
2534
2535
  /* see if there is a blocked packet, and send that if possible.
2536
   * do not attempt to read yet, even if possible, that would just
2537
   * push more answers in reply to those read packets onto the list
2538
   * of written replies. First attempt to clear the write content out.
2539
   * That keeps the memory usage from bloating up. */
2540
  if(c->doq_socket->have_blocked_pkt) {
2541
    if(!doq_write_blocked_pkt(c)) {
2542
      /* this write has also blocked, attempt to write
2543
       * later. Make sure the event listens to write
2544
       * events. */
2545
      if(!c->doq_socket->event_has_write)
2546
        doq_socket_write_enable(c);
2547
      doq_pickup_timer(c);
2548
      return;
2549
    }
2550
  }
2551
2552
  /* see if there is write interest */
2553
  count = 0;
2554
  num_len = doq_write_list_length(c);
2555
  while((conn = doq_pop_write_conn(c)) != NULL) {
2556
    if(conn->is_deleted ||
2557
#ifdef HAVE_NGTCP2_CONN_IN_CLOSING_PERIOD
2558
      ngtcp2_conn_in_closing_period(conn->conn) ||
2559
#else
2560
      ngtcp2_conn_is_in_closing_period(conn->conn) ||
2561
#endif
2562
#ifdef HAVE_NGTCP2_CONN_IN_DRAINING_PERIOD
2563
      ngtcp2_conn_in_draining_period(conn->conn)
2564
#else
2565
      ngtcp2_conn_is_in_draining_period(conn->conn)
2566
#endif
2567
      ) {
2568
      conn->doq_socket = NULL;
2569
      lock_basic_unlock(&conn->lock);
2570
      if(c->doq_socket->have_blocked_pkt) {
2571
        if(!c->doq_socket->event_has_write)
2572
          doq_socket_write_enable(c);
2573
        doq_pickup_timer(c);
2574
        return;
2575
      }
2576
      if(++count > num_len*2)
2577
        break;
2578
      continue;
2579
    }
2580
    if(verbosity >= VERB_ALGO) {
2581
      char remotestr[256];
2582
      addr_to_str((void*)&conn->key.paddr.addr,
2583
        conn->key.paddr.addrlen, remotestr,
2584
        sizeof(remotestr));
2585
      verbose(VERB_ALGO, "doq write connection %s %d",
2586
        remotestr, doq_sockaddr_get_port(
2587
        &conn->key.paddr.addr));
2588
    }
2589
    if(doq_conn_write_streams(c, conn, &err_drop))
2590
      err_drop = 0;
2591
    doq_done_with_write_cb(c, conn, err_drop);
2592
    if(c->doq_socket->have_blocked_pkt) {
2593
      if(!c->doq_socket->event_has_write)
2594
        doq_socket_write_enable(c);
2595
      doq_pickup_timer(c);
2596
      return;
2597
    }
2598
    /* Stop overly long write lists that are created
2599
     * while we are processing. Do those next time there
2600
     * is a write callback. Stops long loops, and keeps
2601
     * fair for other events. */
2602
    if(++count > num_len*2)
2603
      break;
2604
  }
2605
2606
  /* check for data to read */
2607
  if((event&UB_EV_READ)!=0)
2608
    for(i=0; i<NUM_UDP_PER_SELECT; i++) {
2609
    /* there may be a blocked write packet and if so, stop
2610
     * reading because the reply cannot get written. The
2611
     * blocked packet could be written during the conn_recv
2612
     * handling of replies, or for a connection close. */
2613
    if(c->doq_socket->have_blocked_pkt) {
2614
      if(!c->doq_socket->event_has_write)
2615
        doq_socket_write_enable(c);
2616
      doq_pickup_timer(c);
2617
      return;
2618
    }
2619
    sldns_buffer_clear(c->doq_socket->pkt_buf);
2620
    doq_pkt_addr_init(&paddr);
2621
    log_assert(fd != -1);
2622
    log_assert(sldns_buffer_remaining(c->doq_socket->pkt_buf) > 0);
2623
    if(!doq_recv(c, &paddr, &pkt_continue, &pi)) {
2624
      if(pkt_continue)
2625
        continue;
2626
      break;
2627
    }
2628
2629
    /* handle incoming packet from remote addr to localaddr */
2630
    if(verbosity >= VERB_ALGO) {
2631
      char remotestr[256], localstr[256];
2632
      addr_to_str((void*)&paddr.addr, paddr.addrlen,
2633
        remotestr, sizeof(remotestr));
2634
      addr_to_str((void*)&paddr.localaddr,
2635
        paddr.localaddrlen, localstr,
2636
        sizeof(localstr));
2637
      log_info("incoming doq packet from %s port %d on "
2638
        "%s port %d ifindex %d",
2639
        remotestr, doq_sockaddr_get_port(&paddr.addr),
2640
        localstr,
2641
        doq_sockaddr_get_port(&paddr.localaddr),
2642
        paddr.ifindex);
2643
      log_info("doq_recv length %d ecn 0x%x",
2644
        (int)sldns_buffer_limit(c->doq_socket->pkt_buf),
2645
        (int)pi.ecn);
2646
    }
2647
2648
    if(sldns_buffer_limit(c->doq_socket->pkt_buf) == 0)
2649
      continue;
2650
2651
    conn = NULL;
2652
    if(!doq_decode_pkt_header_negotiate(c, &paddr, &conn))
2653
      continue;
2654
    if(!conn) {
2655
      if(!doq_accept(c, &paddr, &conn, &pi))
2656
        continue;
2657
      if(!doq_conn_write_streams(c, conn, NULL)) {
2658
        doq_delete_connection(c, conn);
2659
        continue;
2660
      }
2661
      doq_done_setup_timer_and_write(c, conn);
2662
      continue;
2663
    }
2664
    if(
2665
#ifdef HAVE_NGTCP2_CONN_IN_CLOSING_PERIOD
2666
      ngtcp2_conn_in_closing_period(conn->conn)
2667
#else
2668
      ngtcp2_conn_is_in_closing_period(conn->conn)
2669
#endif
2670
      ) {
2671
      if(!doq_conn_send_close(c, conn)) {
2672
        doq_delete_connection(c, conn);
2673
      } else {
2674
        doq_done_setup_timer_and_write(c, conn);
2675
      }
2676
      continue;
2677
    }
2678
    if(
2679
#ifdef HAVE_NGTCP2_CONN_IN_DRAINING_PERIOD
2680
      ngtcp2_conn_in_draining_period(conn->conn)
2681
#else
2682
      ngtcp2_conn_is_in_draining_period(conn->conn)
2683
#endif
2684
      ) {
2685
      doq_done_setup_timer_and_write(c, conn);
2686
      continue;
2687
    }
2688
    if(!doq_conn_recv(c, &paddr, conn, &pi, NULL, &err_drop)) {
2689
      /* The receive failed, and if it also failed to send
2690
       * a close, drop the connection. That means it is not
2691
       * in the closing period. */
2692
      if(err_drop) {
2693
        doq_delete_connection(c, conn);
2694
      } else {
2695
        doq_done_setup_timer_and_write(c, conn);
2696
      }
2697
      continue;
2698
    }
2699
    if(!doq_conn_write_streams(c, conn, &err_drop)) {
2700
      if(err_drop) {
2701
        doq_delete_connection(c, conn);
2702
      } else {
2703
        doq_done_setup_timer_and_write(c, conn);
2704
      }
2705
      continue;
2706
    }
2707
    doq_done_setup_timer_and_write(c, conn);
2708
  }
2709
2710
  /* see if we want to have more write events */
2711
  verbose(VERB_ALGO, "doq check write enable");
2712
  if(doq_socket_want_write(c))
2713
    doq_socket_write_enable(c);
2714
  else doq_socket_write_disable(c);
2715
  doq_pickup_timer(c);
2716
}
2717
2718
/** create new doq server socket structure */
2719
static struct doq_server_socket*
2720
doq_server_socket_create(struct doq_table* table, struct ub_randstate* rnd,
2721
  const void* quic_sslctx, struct comm_point* c, struct comm_base* base,
2722
  struct config_file* cfg)
2723
{
2724
  size_t doq_buffer_size = 4096; /* bytes buffer size, for one packet. */
2725
  struct doq_server_socket* doq_socket;
2726
  doq_socket = calloc(1, sizeof(*doq_socket));
2727
  if(!doq_socket) {
2728
    return NULL;
2729
  }
2730
  doq_socket->table = table;
2731
  doq_socket->rnd = rnd;
2732
  doq_socket->validate_addr = 1;
2733
  /* the doq_socket has its own copy of the static secret, as
2734
   * well as other config values, so that they do not need table.lock */
2735
  doq_socket->static_secret_len = table->static_secret_len;
2736
  doq_socket->static_secret = memdup(table->static_secret,
2737
    table->static_secret_len);
2738
  if(!doq_socket->static_secret) {
2739
    free(doq_socket);
2740
    return NULL;
2741
  }
2742
  doq_socket->ctx = (SSL_CTX*)quic_sslctx;
2743
  doq_socket->idle_timeout = table->idle_timeout;
2744
  doq_socket->sv_scidlen = table->sv_scidlen;
2745
  doq_socket->cp = c;
2746
  doq_socket->pkt_buf = sldns_buffer_new(doq_buffer_size);
2747
  if(!doq_socket->pkt_buf) {
2748
    free(doq_socket->static_secret);
2749
    free(doq_socket);
2750
    return NULL;
2751
  }
2752
  doq_socket->blocked_pkt = sldns_buffer_new(
2753
    sldns_buffer_capacity(doq_socket->pkt_buf));
2754
  if(!doq_socket->pkt_buf) {
2755
    free(doq_socket->static_secret);
2756
    sldns_buffer_free(doq_socket->pkt_buf);
2757
    free(doq_socket);
2758
    return NULL;
2759
  }
2760
  doq_socket->blocked_paddr = calloc(1,
2761
    sizeof(*doq_socket->blocked_paddr));
2762
  if(!doq_socket->blocked_paddr) {
2763
    free(doq_socket->static_secret);
2764
    sldns_buffer_free(doq_socket->pkt_buf);
2765
    sldns_buffer_free(doq_socket->blocked_pkt);
2766
    free(doq_socket);
2767
    return NULL;
2768
  }
2769
  doq_socket->timer = comm_timer_create(base, doq_timer_cb, doq_socket);
2770
  if(!doq_socket->timer) {
2771
    free(doq_socket->static_secret);
2772
    sldns_buffer_free(doq_socket->pkt_buf);
2773
    sldns_buffer_free(doq_socket->blocked_pkt);
2774
    free(doq_socket->blocked_paddr);
2775
    free(doq_socket);
2776
    return NULL;
2777
  }
2778
  memset(&doq_socket->marked_time, 0, sizeof(doq_socket->marked_time));
2779
  comm_base_timept(base, &doq_socket->now_tt, &doq_socket->now_tv);
2780
  doq_socket->cfg = cfg;
2781
  return doq_socket;
2782
}
2783
2784
/** delete doq server socket structure */
2785
static void
2786
doq_server_socket_delete(struct doq_server_socket* doq_socket)
2787
{
2788
  if(!doq_socket)
2789
    return;
2790
  free(doq_socket->static_secret);
2791
#ifndef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT
2792
  free(doq_socket->quic_method);
2793
#endif
2794
  sldns_buffer_free(doq_socket->pkt_buf);
2795
  sldns_buffer_free(doq_socket->blocked_pkt);
2796
  free(doq_socket->blocked_paddr);
2797
  comm_timer_delete(doq_socket->timer);
2798
  free(doq_socket);
2799
}
2800
2801
/** find repinfo in the doq table */
2802
static struct doq_conn*
2803
doq_lookup_repinfo(struct doq_table* table, struct comm_reply* repinfo)
2804
{
2805
  struct doq_conn* conn;
2806
  struct doq_conn_key key;
2807
  doq_conn_key_from_repinfo(&key, repinfo);
2808
  lock_rw_rdlock(&table->lock);
2809
  conn = doq_conn_find(table, &key.paddr.addr,
2810
    key.paddr.addrlen, &key.paddr.localaddr,
2811
    key.paddr.localaddrlen, key.paddr.ifindex, key.dcid,
2812
    key.dcidlen);
2813
  if(conn) {
2814
    lock_basic_lock(&conn->lock);
2815
    lock_rw_unlock(&table->lock);
2816
    return conn;
2817
  }
2818
  lock_rw_unlock(&table->lock);
2819
  return NULL;
2820
}
2821
2822
/** doq find connection and stream. From inside callbacks from worker. */
2823
static int
2824
doq_lookup_conn_stream(struct comm_reply* repinfo, struct comm_point* c,
2825
  struct doq_conn** conn, struct doq_stream** stream)
2826
{
2827
  log_assert(c->doq_socket);
2828
  if(c->doq_socket->current_conn) {
2829
    *conn = c->doq_socket->current_conn;
2830
  } else {
2831
    *conn = doq_lookup_repinfo(c->doq_socket->table, repinfo);
2832
    if((*conn) && (*conn)->is_deleted) {
2833
      lock_basic_unlock(&(*conn)->lock);
2834
      *conn = NULL;
2835
    }
2836
    if(*conn) {
2837
      (*conn)->doq_socket = c->doq_socket;
2838
    }
2839
  }
2840
  if(!*conn) {
2841
    *stream = NULL;
2842
    return 0;
2843
  }
2844
  *stream = doq_stream_find(*conn, repinfo->doq_streamid);
2845
  if(!*stream) {
2846
    if(!c->doq_socket->current_conn) {
2847
      /* Not inside callbacks, we have our own lock on conn.
2848
       * Release it. */
2849
      lock_basic_unlock(&(*conn)->lock);
2850
    }
2851
    return 0;
2852
  }
2853
  if((*stream)->is_closed) {
2854
    /* stream is closed, ignore reply or drop */
2855
    if(!c->doq_socket->current_conn) {
2856
      /* Not inside callbacks, we have our own lock on conn.
2857
       * Release it. */
2858
      lock_basic_unlock(&(*conn)->lock);
2859
    }
2860
    return 0;
2861
  }
2862
  return 1;
2863
}
2864
2865
/** doq send a reply from a comm reply */
2866
static void
2867
doq_socket_send_reply(struct comm_reply* repinfo)
2868
{
2869
  struct doq_conn* conn;
2870
  struct doq_stream* stream;
2871
  log_assert(repinfo->c->type == comm_doq);
2872
  if(!doq_lookup_conn_stream(repinfo, repinfo->c, &conn, &stream)) {
2873
    verbose(VERB_ALGO, "doq: send_reply but %s is gone",
2874
      (conn?"stream":"connection"));
2875
    /* No stream, it may have been closed. */
2876
    /* Drop the reply, it cannot be sent. */
2877
    return;
2878
  }
2879
  if(!doq_stream_send_reply(conn, stream, repinfo->c->buffer))
2880
    doq_stream_close(conn, stream, 1);
2881
  if(!repinfo->c->doq_socket->current_conn) {
2882
    /* Not inside callbacks, we have our own lock on conn.
2883
     * Release it. */
2884
    doq_done_with_conn_cb(repinfo->c, conn);
2885
    /* since we sent a reply, or closed it, the assumption is
2886
     * that there is something to write, so enable write event.
2887
     * It waits until the write event happens to write the
2888
     * streams with answers, this allows some answers to be
2889
     * answered before the event loop reaches the doq fd, in
2890
     * repinfo->c->fd, and that collates answers. That would
2891
     * not happen if we write doq packets right now. */
2892
    doq_socket_write_enable(repinfo->c);
2893
  }
2894
}
2895
2896
/** doq drop a reply from a comm reply */
2897
static void
2898
doq_socket_drop_reply(struct comm_reply* repinfo)
2899
{
2900
  struct doq_conn* conn;
2901
  struct doq_stream* stream;
2902
  log_assert(repinfo->c->type == comm_doq);
2903
  if(!doq_lookup_conn_stream(repinfo, repinfo->c, &conn, &stream)) {
2904
    verbose(VERB_ALGO, "doq: drop_reply but %s is gone",
2905
      (conn?"stream":"connection"));
2906
    /* The connection or stream is already gone. */
2907
    return;
2908
  }
2909
  doq_stream_close(conn, stream, 1);
2910
  if(!repinfo->c->doq_socket->current_conn) {
2911
    /* Not inside callbacks, we have our own lock on conn.
2912
     * Release it. */
2913
    doq_done_with_conn_cb(repinfo->c, conn);
2914
    doq_socket_write_enable(repinfo->c);
2915
  }
2916
}
2917
#endif /* HAVE_NGTCP2 */
2918
2919
int adjusted_tcp_timeout(struct comm_point* c)
2920
0
{
2921
0
  if(c->tcp_timeout_msec < TCP_QUERY_TIMEOUT_MINIMUM)
2922
0
    return TCP_QUERY_TIMEOUT_MINIMUM;
2923
0
  return c->tcp_timeout_msec;
2924
0
}
2925
2926
/** Use a new tcp handler for new query fd, set to read query */
2927
static void
2928
setup_tcp_handler(struct comm_point* c, int fd, int cur, int max)
2929
0
{
2930
0
  int handler_usage;
2931
0
  log_assert(c->type == comm_tcp || c->type == comm_http);
2932
0
  log_assert(c->fd == -1);
2933
0
  sldns_buffer_clear(c->buffer);
2934
#ifdef USE_DNSCRYPT
2935
  if (c->dnscrypt)
2936
    sldns_buffer_clear(c->dnscrypt_buffer);
2937
#endif
2938
0
  c->tcp_is_reading = 1;
2939
0
  c->tcp_byte_count = 0;
2940
0
  c->tcp_keepalive = 0;
2941
  /* if more than half the tcp handlers are in use, use a shorter
2942
   * timeout for this TCP connection, we need to make space for
2943
   * other connections to be able to get attention */
2944
  /* If > 50% TCP handler structures in use, set timeout to 1/100th
2945
   *  configured value.
2946
   * If > 65%TCP handler structures in use, set to 1/500th configured
2947
   *  value.
2948
   * If > 80% TCP handler structures in use, set to 0.
2949
   *
2950
   * If the timeout to use falls below 200 milliseconds, an actual
2951
   * timeout of 200ms is used.
2952
   */
2953
0
  handler_usage = (cur * 100) / max;
2954
0
  if(handler_usage > 50 && handler_usage <= 65)
2955
0
    c->tcp_timeout_msec /= 100;
2956
0
  else if (handler_usage > 65 && handler_usage <= 80)
2957
0
    c->tcp_timeout_msec /= 500;
2958
0
  else if (handler_usage > 80)
2959
0
    c->tcp_timeout_msec = 0;
2960
0
  comm_point_start_listening(c, fd, adjusted_tcp_timeout(c));
2961
0
}
2962
2963
void comm_base_handle_slow_accept(int ATTR_UNUSED(fd),
2964
  short ATTR_UNUSED(event), void* arg)
2965
0
{
2966
0
  struct comm_base* b = (struct comm_base*)arg;
2967
  /* timeout for the slow accept, re-enable accepts again */
2968
0
  if(b->start_accept) {
2969
0
    verbose(VERB_ALGO, "wait is over, slow accept disabled");
2970
0
    fptr_ok(fptr_whitelist_start_accept(b->start_accept));
2971
0
    (*b->start_accept)(b->cb_arg);
2972
0
    b->eb->slow_accept_enabled = 0;
2973
0
  }
2974
0
}
2975
2976
int comm_point_perform_accept(struct comm_point* c,
2977
  struct sockaddr_storage* addr, socklen_t* addrlen)
2978
0
{
2979
0
  int new_fd;
2980
0
  *addrlen = (socklen_t)sizeof(*addr);
2981
#ifndef HAVE_ACCEPT4
2982
  new_fd = accept(c->fd, (struct sockaddr*)addr, addrlen);
2983
#else
2984
  /* SOCK_NONBLOCK saves extra calls to fcntl for the same result */
2985
0
  new_fd = accept4(c->fd, (struct sockaddr*)addr, addrlen, SOCK_NONBLOCK);
2986
0
#endif
2987
0
  if(new_fd == -1) {
2988
0
#ifndef USE_WINSOCK
2989
    /* EINTR is signal interrupt. others are closed connection. */
2990
0
    if( errno == EINTR || errno == EAGAIN
2991
0
#ifdef EWOULDBLOCK
2992
0
      || errno == EWOULDBLOCK
2993
0
#endif
2994
0
#ifdef ECONNABORTED
2995
0
      || errno == ECONNABORTED
2996
0
#endif
2997
0
#ifdef EPROTO
2998
0
      || errno == EPROTO
2999
0
#endif /* EPROTO */
3000
0
      )
3001
0
      return -1;
3002
0
#if defined(ENFILE) && defined(EMFILE)
3003
0
    if(errno == ENFILE || errno == EMFILE) {
3004
      /* out of file descriptors, likely outside of our
3005
       * control. stop accept() calls for some time */
3006
0
      if(c->ev->base->stop_accept) {
3007
0
        struct comm_base* b = c->ev->base;
3008
0
        struct timeval tv;
3009
0
        verbose(VERB_ALGO, "out of file descriptors: "
3010
0
          "slow accept");
3011
0
        ub_comm_base_now(b);
3012
0
        if(b->eb->last_slow_log+SLOW_LOG_TIME <=
3013
0
          b->eb->secs) {
3014
0
          b->eb->last_slow_log = b->eb->secs;
3015
0
          verbose(VERB_OPS, "accept failed, "
3016
0
            "slow down accept for %d "
3017
0
            "msec: %s",
3018
0
            NETEVENT_SLOW_ACCEPT_TIME,
3019
0
            sock_strerror(errno));
3020
0
        }
3021
0
        b->eb->slow_accept_enabled = 1;
3022
0
        fptr_ok(fptr_whitelist_stop_accept(
3023
0
          b->stop_accept));
3024
0
        (*b->stop_accept)(b->cb_arg);
3025
        /* set timeout, no mallocs */
3026
0
        tv.tv_sec = NETEVENT_SLOW_ACCEPT_TIME/1000;
3027
0
        tv.tv_usec = (NETEVENT_SLOW_ACCEPT_TIME%1000)*1000;
3028
0
        b->eb->slow_accept = ub_event_new(b->eb->base,
3029
0
          -1, UB_EV_TIMEOUT,
3030
0
          comm_base_handle_slow_accept, b);
3031
0
        if(b->eb->slow_accept == NULL) {
3032
          /* we do not want to log here, because
3033
           * that would spam the logfiles.
3034
           * error: "event_base_set failed." */
3035
0
        }
3036
0
        else if(ub_event_add(b->eb->slow_accept, &tv)
3037
0
          != 0) {
3038
          /* we do not want to log here,
3039
           * error: "event_add failed." */
3040
0
        }
3041
0
      } else {
3042
0
        log_err("accept, with no slow down, "
3043
0
          "failed: %s", sock_strerror(errno));
3044
0
      }
3045
0
      return -1;
3046
0
    }
3047
0
#endif
3048
#else /* USE_WINSOCK */
3049
    if(WSAGetLastError() == WSAEINPROGRESS ||
3050
      WSAGetLastError() == WSAECONNRESET)
3051
      return -1;
3052
    if(WSAGetLastError() == WSAEWOULDBLOCK) {
3053
      ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
3054
      return -1;
3055
    }
3056
#endif
3057
0
    log_err_addr("accept failed", sock_strerror(errno), addr,
3058
0
      *addrlen);
3059
0
    return -1;
3060
0
  }
3061
0
  if(c->tcp_conn_limit && c->type == comm_tcp_accept) {
3062
0
    c->tcl_addr = tcl_addr_lookup(c->tcp_conn_limit, addr, *addrlen);
3063
0
    if(!tcl_new_connection(c->tcl_addr)) {
3064
0
      if(verbosity >= 3)
3065
0
        log_err_addr("accept rejected",
3066
0
        "connection limit exceeded", addr, *addrlen);
3067
0
      sock_close(new_fd);
3068
0
      return -1;
3069
0
    }
3070
0
  }
3071
#ifndef HAVE_ACCEPT4
3072
  fd_set_nonblock(new_fd);
3073
#endif
3074
0
  return new_fd;
3075
0
}
3076
3077
#ifdef USE_WINSOCK
3078
static long win_bio_cb(BIO *b, int oper, const char* ATTR_UNUSED(argp),
3079
#ifdef HAVE_BIO_SET_CALLBACK_EX
3080
  size_t ATTR_UNUSED(len),
3081
#endif
3082
        int ATTR_UNUSED(argi), long argl,
3083
#ifndef HAVE_BIO_SET_CALLBACK_EX
3084
  long retvalue
3085
#else
3086
  int retvalue, size_t* ATTR_UNUSED(processed)
3087
#endif
3088
  )
3089
{
3090
  int wsa_err = WSAGetLastError(); /* store errcode before it is gone */
3091
  verbose(VERB_ALGO, "bio_cb %d, %s %s %s", oper,
3092
    (oper&BIO_CB_RETURN)?"return":"before",
3093
    (oper&BIO_CB_READ)?"read":((oper&BIO_CB_WRITE)?"write":"other"),
3094
    wsa_err==WSAEWOULDBLOCK?"wsawb":"");
3095
  /* on windows, check if previous operation caused EWOULDBLOCK */
3096
  if( (oper == (BIO_CB_READ|BIO_CB_RETURN) && argl == 0) ||
3097
    (oper == (BIO_CB_GETS|BIO_CB_RETURN) && argl == 0)) {
3098
    if(wsa_err == WSAEWOULDBLOCK)
3099
      ub_winsock_tcp_wouldblock((struct ub_event*)
3100
        BIO_get_callback_arg(b), UB_EV_READ);
3101
  }
3102
  if( (oper == (BIO_CB_WRITE|BIO_CB_RETURN) && argl == 0) ||
3103
    (oper == (BIO_CB_PUTS|BIO_CB_RETURN) && argl == 0)) {
3104
    if(wsa_err == WSAEWOULDBLOCK)
3105
      ub_winsock_tcp_wouldblock((struct ub_event*)
3106
        BIO_get_callback_arg(b), UB_EV_WRITE);
3107
  }
3108
  /* return original return value */
3109
  return retvalue;
3110
}
3111
3112
/** set win bio callbacks for nonblocking operations */
3113
void
3114
comm_point_tcp_win_bio_cb(struct comm_point* c, void* thessl)
3115
{
3116
  SSL* ssl = (SSL*)thessl;
3117
  /* set them both just in case, but usually they are the same BIO */
3118
#ifdef HAVE_BIO_SET_CALLBACK_EX
3119
  BIO_set_callback_ex(SSL_get_rbio(ssl), &win_bio_cb);
3120
#else
3121
  BIO_set_callback(SSL_get_rbio(ssl), &win_bio_cb);
3122
#endif
3123
  BIO_set_callback_arg(SSL_get_rbio(ssl), (char*)c->ev->ev);
3124
#ifdef HAVE_BIO_SET_CALLBACK_EX
3125
  BIO_set_callback_ex(SSL_get_wbio(ssl), &win_bio_cb);
3126
#else
3127
  BIO_set_callback(SSL_get_wbio(ssl), &win_bio_cb);
3128
#endif
3129
  BIO_set_callback_arg(SSL_get_wbio(ssl), (char*)c->ev->ev);
3130
}
3131
#endif
3132
3133
#ifdef HAVE_NGHTTP2
3134
/** Create http2 session server.  Per connection, after TCP accepted.*/
3135
static int http2_session_server_create(struct http2_session* h2_session)
3136
{
3137
  log_assert(h2_session->callbacks);
3138
  h2_session->is_drop = 0;
3139
  if(nghttp2_session_server_new(&h2_session->session,
3140
      h2_session->callbacks,
3141
    h2_session) == NGHTTP2_ERR_NOMEM) {
3142
    log_err("failed to create nghttp2 session server");
3143
    return 0;
3144
  }
3145
3146
  return 1;
3147
}
3148
3149
/** Submit http2 setting to session. Once per session. */
3150
static int http2_submit_settings(struct http2_session* h2_session)
3151
{
3152
  int ret;
3153
  nghttp2_settings_entry settings[1] = {
3154
    {NGHTTP2_SETTINGS_MAX_CONCURRENT_STREAMS,
3155
     h2_session->c->http2_max_streams}};
3156
3157
  ret = nghttp2_submit_settings(h2_session->session, NGHTTP2_FLAG_NONE,
3158
    settings, 1);
3159
  if(ret) {
3160
    verbose(VERB_QUERY, "http2: submit_settings failed, "
3161
      "error: %s", nghttp2_strerror(ret));
3162
    return 0;
3163
  }
3164
  return 1;
3165
}
3166
#endif /* HAVE_NGHTTP2 */
3167
3168
#ifdef HAVE_NGHTTP2
3169
/** Delete http2 stream. After session delete or stream close callback */
3170
static void http2_stream_delete(struct http2_session* h2_session,
3171
  struct http2_stream* h2_stream)
3172
{
3173
  if(h2_stream->mesh_state) {
3174
    mesh_state_remove_reply(h2_stream->mesh, h2_stream->mesh_state,
3175
      h2_session->c);
3176
    h2_stream->mesh_state = NULL;
3177
  }
3178
  http2_req_stream_clear(h2_stream);
3179
  free(h2_stream);
3180
}
3181
#endif /* HAVE_NGHTTP2 */
3182
3183
/** delete http2 session server. After closing connection. */
3184
static void http2_session_server_delete(struct http2_session* h2_session)
3185
0
{
3186
#ifdef HAVE_NGHTTP2
3187
  struct http2_stream* h2_stream, *next;
3188
  nghttp2_session_del(h2_session->session); /* NULL input is fine */
3189
  h2_session->session = NULL;
3190
  for(h2_stream = h2_session->first_stream; h2_stream;) {
3191
    next = h2_stream->next;
3192
    http2_stream_delete(h2_session, h2_stream);
3193
    h2_stream = next;
3194
  }
3195
  h2_session->first_stream = NULL;
3196
  h2_session->is_drop = 0;
3197
  h2_session->postpone_drop = 0;
3198
  h2_session->c->h2_stream = NULL;
3199
#endif
3200
0
  (void)h2_session;
3201
0
}
3202
3203
void
3204
comm_point_tcp_accept_callback(int fd, short event, void* arg)
3205
0
{
3206
0
  struct comm_point* c = (struct comm_point*)arg, *c_hdl;
3207
0
  int new_fd;
3208
0
  log_assert(c->type == comm_tcp_accept);
3209
0
  if(!(event & UB_EV_READ)) {
3210
0
    log_info("ignoring tcp accept event %d", (int)event);
3211
0
    return;
3212
0
  }
3213
0
  ub_comm_base_now(c->ev->base);
3214
  /* find free tcp handler. */
3215
0
  if(!c->tcp_free) {
3216
0
    log_warn("accepted too many tcp, connections full");
3217
0
    return;
3218
0
  }
3219
  /* accept incoming connection. */
3220
0
  c_hdl = c->tcp_free;
3221
  /* Should not happen: inconsistent tcp_free state in
3222
   * accept_callback. */
3223
0
  log_assert(c_hdl->is_in_tcp_free);
3224
  /* clear leftover flags from previous use, and then set the
3225
   * correct event base for the event structure for libevent */
3226
0
  ub_event_free(c_hdl->ev->ev);
3227
0
  c_hdl->ev->ev = NULL;
3228
0
  if((c_hdl->type == comm_tcp && c_hdl->tcp_req_info) ||
3229
0
    c_hdl->type == comm_local || c_hdl->type == comm_raw)
3230
0
    c_hdl->tcp_do_toggle_rw = 0;
3231
0
  else  c_hdl->tcp_do_toggle_rw = 1;
3232
3233
0
  if(c_hdl->type == comm_http) {
3234
#ifdef HAVE_NGHTTP2
3235
    if(!c_hdl->h2_session ||
3236
      !http2_session_server_create(c_hdl->h2_session)) {
3237
      log_warn("failed to create nghttp2");
3238
      return;
3239
    }
3240
    if(!c_hdl->h2_session ||
3241
      !http2_submit_settings(c_hdl->h2_session)) {
3242
      log_warn("failed to submit http2 settings");
3243
      if(c_hdl->h2_session)
3244
        http2_session_server_delete(c_hdl->h2_session);
3245
      return;
3246
    }
3247
    if(!c->ssl) {
3248
      c_hdl->tcp_do_toggle_rw = 0;
3249
      c_hdl->use_h2 = 1;
3250
    }
3251
#endif
3252
0
    c_hdl->ev->ev = ub_event_new(c_hdl->ev->base->eb->base, -1,
3253
0
      UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT,
3254
0
      comm_point_http_handle_callback, c_hdl);
3255
0
  } else {
3256
0
    c_hdl->ev->ev = ub_event_new(c_hdl->ev->base->eb->base, -1,
3257
0
      UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT,
3258
0
      comm_point_tcp_handle_callback, c_hdl);
3259
0
  }
3260
0
  if(!c_hdl->ev->ev) {
3261
0
    log_warn("could not ub_event_new, dropped tcp");
3262
#ifdef HAVE_NGHTTP2
3263
    if(c_hdl->type == comm_http && c_hdl->h2_session)
3264
      http2_session_server_delete(c_hdl->h2_session);
3265
#endif
3266
0
    return;
3267
0
  }
3268
0
  log_assert(fd != -1);
3269
0
  (void)fd;
3270
0
  new_fd = comm_point_perform_accept(c, &c_hdl->repinfo.remote_addr,
3271
0
    &c_hdl->repinfo.remote_addrlen);
3272
0
  if(new_fd == -1) {
3273
#ifdef HAVE_NGHTTP2
3274
    if(c_hdl->type == comm_http && c_hdl->h2_session)
3275
      http2_session_server_delete(c_hdl->h2_session);
3276
#endif
3277
0
    return;
3278
0
  }
3279
  /* Copy remote_address to client_address.
3280
   * Simplest way/time for streams to do that. */
3281
0
  c_hdl->repinfo.client_addrlen = c_hdl->repinfo.remote_addrlen;
3282
0
  memmove(&c_hdl->repinfo.client_addr,
3283
0
    &c_hdl->repinfo.remote_addr,
3284
0
    c_hdl->repinfo.remote_addrlen);
3285
0
  if(c->ssl) {
3286
0
    c_hdl->ssl = incoming_ssl_fd(c->ssl, new_fd);
3287
0
    if(!c_hdl->ssl) {
3288
0
      c_hdl->fd = new_fd;
3289
0
      comm_point_close(c_hdl);
3290
0
      return;
3291
0
    }
3292
0
    c_hdl->ssl_shake_state = comm_ssl_shake_read;
3293
#ifdef USE_WINSOCK
3294
    comm_point_tcp_win_bio_cb(c_hdl, c_hdl->ssl);
3295
#endif
3296
0
  }
3297
3298
  /* Paranoia: Check that the state has not changed from above: */
3299
  /* Should not happen: tcp_free state changed within accept_callback. */
3300
0
  log_assert(c_hdl == c->tcp_free);
3301
0
  log_assert(c_hdl->is_in_tcp_free);
3302
  /* grab the tcp handler buffers */
3303
0
  c->cur_tcp_count++;
3304
0
  c->tcp_free = c_hdl->tcp_free;
3305
0
  c_hdl->tcp_free = NULL;
3306
0
  c_hdl->is_in_tcp_free = 0;
3307
0
  if(!c->tcp_free) {
3308
    /* stop accepting incoming queries for now. */
3309
0
    comm_point_stop_listening(c);
3310
0
  }
3311
0
  setup_tcp_handler(c_hdl, new_fd, c->cur_tcp_count, c->max_tcp_count);
3312
0
}
3313
3314
/** Make tcp handler free for next assignment */
3315
static void
3316
reclaim_tcp_handler(struct comm_point* c)
3317
0
{
3318
0
  log_assert(c->type == comm_tcp);
3319
0
  if(c->ssl) {
3320
0
#ifdef HAVE_SSL
3321
0
    SSL_shutdown(c->ssl);
3322
0
    SSL_free(c->ssl);
3323
0
    c->ssl = NULL;
3324
0
#endif
3325
0
  }
3326
0
  comm_point_close(c);
3327
0
  if(c->tcp_parent && !c->is_in_tcp_free) {
3328
    /* Should not happen: bad tcp_free state in reclaim_tcp. */
3329
0
    log_assert(c->tcp_free == NULL);
3330
0
    log_assert(c->tcp_parent->cur_tcp_count > 0);
3331
0
    c->tcp_parent->cur_tcp_count--;
3332
0
    c->tcp_free = c->tcp_parent->tcp_free;
3333
0
    c->tcp_parent->tcp_free = c;
3334
0
    c->is_in_tcp_free = 1;
3335
0
    if(!c->tcp_free) {
3336
      /* re-enable listening on accept socket */
3337
0
      comm_point_start_listening(c->tcp_parent, -1, -1);
3338
0
    }
3339
0
  }
3340
0
  c->tcp_more_read_again = NULL;
3341
0
  c->tcp_more_write_again = NULL;
3342
0
  c->tcp_byte_count = 0;
3343
0
  c->pp2_header_state = pp2_header_none;
3344
0
  sldns_buffer_clear(c->buffer);
3345
0
}
3346
3347
/** do the callback when writing is done */
3348
static void
3349
tcp_callback_writer(struct comm_point* c)
3350
0
{
3351
0
  log_assert(c->type == comm_tcp);
3352
0
  if(!c->tcp_write_and_read) {
3353
0
    sldns_buffer_clear(c->buffer);
3354
0
    c->tcp_byte_count = 0;
3355
0
  }
3356
0
  if(c->tcp_do_toggle_rw)
3357
0
    c->tcp_is_reading = 1;
3358
  /* switch from listening(write) to listening(read) */
3359
0
  if(c->tcp_req_info) {
3360
0
    tcp_req_info_handle_writedone(c->tcp_req_info);
3361
0
  } else {
3362
0
    comm_point_stop_listening(c);
3363
0
    if(c->tcp_write_and_read) {
3364
0
      fptr_ok(fptr_whitelist_comm_point(c->callback));
3365
0
      if( (*c->callback)(c, c->cb_arg, NETEVENT_PKT_WRITTEN,
3366
0
        &c->repinfo) ) {
3367
0
        comm_point_start_listening(c, -1,
3368
0
          adjusted_tcp_timeout(c));
3369
0
      }
3370
0
    } else {
3371
0
      comm_point_start_listening(c, -1,
3372
0
          adjusted_tcp_timeout(c));
3373
0
    }
3374
0
  }
3375
0
}
3376
3377
/** do the callback when reading is done */
3378
static void
3379
tcp_callback_reader(struct comm_point* c)
3380
0
{
3381
0
  log_assert(c->type == comm_tcp || c->type == comm_local);
3382
0
  sldns_buffer_flip(c->buffer);
3383
0
  if(c->tcp_do_toggle_rw)
3384
0
    c->tcp_is_reading = 0;
3385
0
  c->tcp_byte_count = 0;
3386
0
  if(c->tcp_req_info) {
3387
0
    tcp_req_info_handle_readdone(c->tcp_req_info);
3388
0
  } else {
3389
0
    if(c->type == comm_tcp)
3390
0
      comm_point_stop_listening(c);
3391
0
    fptr_ok(fptr_whitelist_comm_point(c->callback));
3392
0
    if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo) ) {
3393
0
      comm_point_start_listening(c, -1,
3394
0
          adjusted_tcp_timeout(c));
3395
0
    }
3396
0
  }
3397
0
}
3398
3399
#ifdef HAVE_SSL
3400
/** true if the ssl handshake error has to be squelched from the logs */
3401
int
3402
squelch_err_ssl_handshake(unsigned long err)
3403
0
{
3404
0
  if(verbosity >= VERB_QUERY)
3405
0
    return 0; /* only squelch on low verbosity */
3406
0
  if(ERR_GET_LIB(err) == ERR_LIB_SSL &&
3407
0
    (ERR_GET_REASON(err) == SSL_R_HTTPS_PROXY_REQUEST ||
3408
0
     ERR_GET_REASON(err) == SSL_R_HTTP_REQUEST ||
3409
0
     ERR_GET_REASON(err) == SSL_R_WRONG_VERSION_NUMBER ||
3410
0
     ERR_GET_REASON(err) == SSL_R_SSLV3_ALERT_BAD_CERTIFICATE
3411
0
#ifdef SSL_F_TLS_POST_PROCESS_CLIENT_HELLO
3412
0
     || ERR_GET_REASON(err) == SSL_R_NO_SHARED_CIPHER
3413
0
#endif
3414
0
#ifdef SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO
3415
0
     || ERR_GET_REASON(err) == SSL_R_UNKNOWN_PROTOCOL
3416
0
     || ERR_GET_REASON(err) == SSL_R_UNSUPPORTED_PROTOCOL
3417
0
#  ifdef SSL_R_VERSION_TOO_LOW
3418
0
     || ERR_GET_REASON(err) == SSL_R_VERSION_TOO_LOW
3419
0
#  endif
3420
0
#endif
3421
0
    ))
3422
0
    return 1;
3423
0
  return 0;
3424
0
}
3425
#endif /* HAVE_SSL */
3426
3427
/** continue ssl handshake */
3428
#ifdef HAVE_SSL
3429
static int
3430
ssl_handshake(struct comm_point* c)
3431
0
{
3432
0
  int r;
3433
0
  if(c->ssl_shake_state == comm_ssl_shake_hs_read) {
3434
    /* read condition satisfied back to writing */
3435
0
    comm_point_listen_for_rw(c, 0, 1);
3436
0
    c->ssl_shake_state = comm_ssl_shake_none;
3437
0
    return 1;
3438
0
  }
3439
0
  if(c->ssl_shake_state == comm_ssl_shake_hs_write) {
3440
    /* write condition satisfied, back to reading */
3441
0
    comm_point_listen_for_rw(c, 1, 0);
3442
0
    c->ssl_shake_state = comm_ssl_shake_none;
3443
0
    return 1;
3444
0
  }
3445
3446
0
  ERR_clear_error();
3447
0
  r = SSL_do_handshake(c->ssl);
3448
0
  if(r != 1) {
3449
0
    int want = SSL_get_error(c->ssl, r);
3450
0
    if(want == SSL_ERROR_WANT_READ) {
3451
0
      if(c->ssl_shake_state == comm_ssl_shake_read)
3452
0
        return 1;
3453
0
      c->ssl_shake_state = comm_ssl_shake_read;
3454
0
      comm_point_listen_for_rw(c, 1, 0);
3455
0
      return 1;
3456
0
    } else if(want == SSL_ERROR_WANT_WRITE) {
3457
0
      if(c->ssl_shake_state == comm_ssl_shake_write)
3458
0
        return 1;
3459
0
      c->ssl_shake_state = comm_ssl_shake_write;
3460
0
      comm_point_listen_for_rw(c, 0, 1);
3461
0
      return 1;
3462
0
    } else if(r == 0) {
3463
0
      return 0; /* closed */
3464
0
    } else if(want == SSL_ERROR_SYSCALL) {
3465
      /* SYSCALL and errno==0 means closed uncleanly */
3466
0
#ifdef EPIPE
3467
0
      if(errno == EPIPE && verbosity < 2)
3468
0
        return 0; /* silence 'broken pipe' */
3469
0
#endif
3470
0
#ifdef ECONNRESET
3471
0
      if(errno == ECONNRESET && verbosity < 2)
3472
0
        return 0; /* silence reset by peer */
3473
0
#endif
3474
0
      if(!tcp_connect_errno_needs_log(
3475
0
        (struct sockaddr*)&c->repinfo.remote_addr,
3476
0
        c->repinfo.remote_addrlen))
3477
0
        return 0; /* silence connect failures that
3478
        show up because after connect this is the
3479
        first system call that accesses the socket */
3480
0
      if(errno != 0)
3481
0
        log_err("SSL_handshake syscall: %s",
3482
0
          strerror(errno));
3483
0
      return 0;
3484
0
    } else {
3485
0
      unsigned long err = ERR_get_error();
3486
0
      if(!squelch_err_ssl_handshake(err)) {
3487
0
        long vr;
3488
0
        log_crypto_err_io_code("ssl handshake failed",
3489
0
          want, err);
3490
0
        if((vr=SSL_get_verify_result(c->ssl)) != 0)
3491
0
          log_err("ssl handshake cert error: %s",
3492
0
            X509_verify_cert_error_string(
3493
0
            vr));
3494
0
        log_addr(VERB_OPS, "ssl handshake failed",
3495
0
          &c->repinfo.remote_addr,
3496
0
          c->repinfo.remote_addrlen);
3497
0
      }
3498
0
      return 0;
3499
0
    }
3500
0
  }
3501
  /* this is where peer verification could take place */
3502
0
  if((SSL_get_verify_mode(c->ssl)&SSL_VERIFY_PEER)) {
3503
    /* verification */
3504
0
    if(SSL_get_verify_result(c->ssl) == X509_V_OK) {
3505
#ifdef HAVE_SSL_GET1_PEER_CERTIFICATE
3506
      X509* x = SSL_get1_peer_certificate(c->ssl);
3507
#else
3508
0
      X509* x = SSL_get_peer_certificate(c->ssl);
3509
0
#endif
3510
0
      if(!x) {
3511
0
        log_addr(VERB_ALGO, "SSL connection failed: "
3512
0
          "no certificate",
3513
0
          &c->repinfo.remote_addr,
3514
0
          c->repinfo.remote_addrlen);
3515
0
        return 0;
3516
0
      }
3517
0
      log_cert(VERB_ALGO, "peer certificate", x);
3518
0
#ifdef HAVE_SSL_GET0_PEERNAME
3519
0
      if(SSL_get0_peername(c->ssl)) {
3520
0
        char buf[255];
3521
0
        snprintf(buf, sizeof(buf), "SSL connection "
3522
0
          "to %s authenticated",
3523
0
          SSL_get0_peername(c->ssl));
3524
0
        log_addr(VERB_ALGO, buf, &c->repinfo.remote_addr,
3525
0
          c->repinfo.remote_addrlen);
3526
0
      } else {
3527
0
#endif
3528
0
        log_addr(VERB_ALGO, "SSL connection "
3529
0
          "authenticated", &c->repinfo.remote_addr,
3530
0
          c->repinfo.remote_addrlen);
3531
0
#ifdef HAVE_SSL_GET0_PEERNAME
3532
0
      }
3533
0
#endif
3534
0
      X509_free(x);
3535
0
    } else {
3536
#ifdef HAVE_SSL_GET1_PEER_CERTIFICATE
3537
      X509* x = SSL_get1_peer_certificate(c->ssl);
3538
#else
3539
0
      X509* x = SSL_get_peer_certificate(c->ssl);
3540
0
#endif
3541
0
      if(x) {
3542
0
        log_cert(VERB_ALGO, "peer certificate", x);
3543
0
        X509_free(x);
3544
0
      }
3545
0
      log_addr(VERB_ALGO, "SSL connection failed: "
3546
0
        "failed to authenticate",
3547
0
        &c->repinfo.remote_addr,
3548
0
        c->repinfo.remote_addrlen);
3549
0
      return 0;
3550
0
    }
3551
0
  } else {
3552
    /* unauthenticated, the verify peer flag was not set
3553
     * in c->ssl when the ssl object was created from ssl_ctx */
3554
0
    log_addr(VERB_ALGO, "SSL connection", &c->repinfo.remote_addr,
3555
0
      c->repinfo.remote_addrlen);
3556
0
  }
3557
3558
0
#ifdef HAVE_SSL_GET0_ALPN_SELECTED
3559
  /* check if http2 use is negotiated */
3560
0
  if(c->type == comm_http && c->h2_session) {
3561
0
    const unsigned char *alpn;
3562
0
    unsigned int alpnlen = 0;
3563
0
    SSL_get0_alpn_selected(c->ssl, &alpn, &alpnlen);
3564
0
    if(alpnlen == 2 && memcmp("h2", alpn, 2) == 0) {
3565
      /* connection upgraded to HTTP2 */
3566
0
      c->tcp_do_toggle_rw = 0;
3567
0
      c->use_h2 = 1;
3568
0
    } else {
3569
0
      verbose(VERB_ALGO, "client doesn't support HTTP/2");
3570
0
      return 0;
3571
0
    }
3572
0
  }
3573
0
#endif
3574
3575
  /* setup listen rw correctly */
3576
0
  if(c->tcp_is_reading) {
3577
0
    if(c->ssl_shake_state != comm_ssl_shake_read)
3578
0
      comm_point_listen_for_rw(c, 1, 0);
3579
0
  } else {
3580
0
    comm_point_listen_for_rw(c, 0, 1);
3581
0
  }
3582
0
  c->ssl_shake_state = comm_ssl_shake_none;
3583
0
  return 1;
3584
0
}
3585
#endif /* HAVE_SSL */
3586
3587
/** ssl read callback on TCP */
3588
static int
3589
ssl_handle_read(struct comm_point* c)
3590
0
{
3591
0
#ifdef HAVE_SSL
3592
0
  int r;
3593
0
  if(c->ssl_shake_state != comm_ssl_shake_none) {
3594
0
    if(!ssl_handshake(c))
3595
0
      return 0;
3596
0
    if(c->ssl_shake_state != comm_ssl_shake_none)
3597
0
      return 1;
3598
0
  }
3599
0
  if(c->pp2_enabled && c->pp2_header_state != pp2_header_done) {
3600
0
    struct pp2_header* header = NULL;
3601
0
    size_t want_read_size = 0;
3602
0
    size_t current_read_size = 0;
3603
0
    if(c->pp2_header_state == pp2_header_none) {
3604
0
      want_read_size = PP2_HEADER_SIZE;
3605
0
      if(sldns_buffer_remaining(c->buffer)<want_read_size) {
3606
0
        log_err_addr("proxy_protocol: not enough "
3607
0
          "buffer size to read PROXYv2 header", "",
3608
0
          &c->repinfo.remote_addr,
3609
0
          c->repinfo.remote_addrlen);
3610
0
        return 0;
3611
0
      }
3612
0
      verbose(VERB_ALGO, "proxy_protocol: reading fixed "
3613
0
        "part of PROXYv2 header (len %lu)",
3614
0
        (unsigned long)want_read_size);
3615
0
      current_read_size = want_read_size;
3616
0
      if(c->tcp_byte_count < current_read_size) {
3617
0
        ERR_clear_error();
3618
0
        if((r=SSL_read(c->ssl, (void*)sldns_buffer_at(
3619
0
          c->buffer, c->tcp_byte_count),
3620
0
          current_read_size -
3621
0
          c->tcp_byte_count)) <= 0) {
3622
0
          int want = SSL_get_error(c->ssl, r);
3623
0
          if(want == SSL_ERROR_ZERO_RETURN) {
3624
0
            if(c->tcp_req_info)
3625
0
              return tcp_req_info_handle_read_close(c->tcp_req_info);
3626
0
            return 0; /* shutdown, closed */
3627
0
          } else if(want == SSL_ERROR_WANT_READ) {
3628
#ifdef USE_WINSOCK
3629
            ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
3630
#endif
3631
0
            return 1; /* read more later */
3632
0
          } else if(want == SSL_ERROR_WANT_WRITE) {
3633
0
            c->ssl_shake_state = comm_ssl_shake_hs_write;
3634
0
            comm_point_listen_for_rw(c, 0, 1);
3635
0
            return 1;
3636
0
          } else if(want == SSL_ERROR_SYSCALL) {
3637
0
#ifdef ECONNRESET
3638
0
            if(errno == ECONNRESET && verbosity < 2)
3639
0
              return 0; /* silence reset by peer */
3640
0
#endif
3641
0
            if(errno != 0)
3642
0
              log_err("SSL_read syscall: %s",
3643
0
                strerror(errno));
3644
0
            return 0;
3645
0
          }
3646
0
          log_crypto_err_io("could not SSL_read",
3647
0
            want);
3648
0
          return 0;
3649
0
        }
3650
0
        c->tcp_byte_count += r;
3651
0
        sldns_buffer_skip(c->buffer, r);
3652
0
        if(c->tcp_byte_count != current_read_size) return 1;
3653
0
        c->pp2_header_state = pp2_header_init;
3654
0
      }
3655
0
    }
3656
0
    if(c->pp2_header_state == pp2_header_init) {
3657
0
      int err;
3658
0
      err = pp2_read_header(
3659
0
        sldns_buffer_begin(c->buffer),
3660
0
        sldns_buffer_limit(c->buffer));
3661
0
      if(err) {
3662
0
        log_err("proxy_protocol: could not parse "
3663
0
          "PROXYv2 header (%s)",
3664
0
          pp_lookup_error(err));
3665
0
        return 0;
3666
0
      }
3667
0
      header = (struct pp2_header*)sldns_buffer_begin(c->buffer);
3668
0
      want_read_size = ntohs(header->len);
3669
0
      if(sldns_buffer_limit(c->buffer) <
3670
0
        PP2_HEADER_SIZE + want_read_size) {
3671
0
        log_err_addr("proxy_protocol: not enough "
3672
0
          "buffer size to read PROXYv2 header", "",
3673
0
          &c->repinfo.remote_addr,
3674
0
          c->repinfo.remote_addrlen);
3675
0
        return 0;
3676
0
      }
3677
0
      verbose(VERB_ALGO, "proxy_protocol: reading variable "
3678
0
        "part of PROXYv2 header (len %lu)",
3679
0
        (unsigned long)want_read_size);
3680
0
      current_read_size = PP2_HEADER_SIZE + want_read_size;
3681
0
      if(want_read_size == 0) {
3682
        /* nothing more to read; header is complete */
3683
0
        c->pp2_header_state = pp2_header_done;
3684
0
      } else if(c->tcp_byte_count < current_read_size) {
3685
0
        ERR_clear_error();
3686
0
        if((r=SSL_read(c->ssl, (void*)sldns_buffer_at(
3687
0
          c->buffer, c->tcp_byte_count),
3688
0
          current_read_size -
3689
0
          c->tcp_byte_count)) <= 0) {
3690
0
          int want = SSL_get_error(c->ssl, r);
3691
0
          if(want == SSL_ERROR_ZERO_RETURN) {
3692
0
            if(c->tcp_req_info)
3693
0
              return tcp_req_info_handle_read_close(c->tcp_req_info);
3694
0
            return 0; /* shutdown, closed */
3695
0
          } else if(want == SSL_ERROR_WANT_READ) {
3696
#ifdef USE_WINSOCK
3697
            ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
3698
#endif
3699
0
            return 1; /* read more later */
3700
0
          } else if(want == SSL_ERROR_WANT_WRITE) {
3701
0
            c->ssl_shake_state = comm_ssl_shake_hs_write;
3702
0
            comm_point_listen_for_rw(c, 0, 1);
3703
0
            return 1;
3704
0
          } else if(want == SSL_ERROR_SYSCALL) {
3705
0
#ifdef ECONNRESET
3706
0
            if(errno == ECONNRESET && verbosity < 2)
3707
0
              return 0; /* silence reset by peer */
3708
0
#endif
3709
0
            if(errno != 0)
3710
0
              log_err("SSL_read syscall: %s",
3711
0
                strerror(errno));
3712
0
            return 0;
3713
0
          }
3714
0
          log_crypto_err_io("could not SSL_read",
3715
0
            want);
3716
0
          return 0;
3717
0
        }
3718
0
        c->tcp_byte_count += r;
3719
0
        sldns_buffer_skip(c->buffer, r);
3720
0
        if(c->tcp_byte_count != current_read_size) return 1;
3721
0
        c->pp2_header_state = pp2_header_done;
3722
0
      }
3723
0
    }
3724
0
    if(c->pp2_header_state != pp2_header_done || !header) {
3725
0
      log_err_addr("proxy_protocol: wrong state for the "
3726
0
        "PROXYv2 header", "", &c->repinfo.remote_addr,
3727
0
        c->repinfo.remote_addrlen);
3728
0
      return 0;
3729
0
    }
3730
0
    sldns_buffer_flip(c->buffer);
3731
0
    if(!consume_pp2_header(c->buffer, &c->repinfo, 1)) {
3732
0
      log_err_addr("proxy_protocol: could not consume "
3733
0
        "PROXYv2 header", "", &c->repinfo.remote_addr,
3734
0
        c->repinfo.remote_addrlen);
3735
0
      return 0;
3736
0
    }
3737
0
    verbose(VERB_ALGO, "proxy_protocol: successful read of "
3738
0
      "PROXYv2 header");
3739
    /* Clear and reset the buffer to read the following
3740
     * DNS packet(s). */
3741
0
    sldns_buffer_clear(c->buffer);
3742
0
    c->tcp_byte_count = 0;
3743
0
    return 1;
3744
0
  }
3745
0
  if(c->tcp_byte_count < sizeof(uint16_t)) {
3746
    /* read length bytes */
3747
0
    ERR_clear_error();
3748
0
    if((r=SSL_read(c->ssl, (void*)sldns_buffer_at(c->buffer,
3749
0
      c->tcp_byte_count), (int)(sizeof(uint16_t) -
3750
0
      c->tcp_byte_count))) <= 0) {
3751
0
      int want = SSL_get_error(c->ssl, r);
3752
0
      if(want == SSL_ERROR_ZERO_RETURN) {
3753
0
        if(c->tcp_req_info)
3754
0
          return tcp_req_info_handle_read_close(c->tcp_req_info);
3755
0
        return 0; /* shutdown, closed */
3756
0
      } else if(want == SSL_ERROR_WANT_READ) {
3757
#ifdef USE_WINSOCK
3758
        ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
3759
#endif
3760
0
        return 1; /* read more later */
3761
0
      } else if(want == SSL_ERROR_WANT_WRITE) {
3762
0
        c->ssl_shake_state = comm_ssl_shake_hs_write;
3763
0
        comm_point_listen_for_rw(c, 0, 1);
3764
0
        return 1;
3765
0
      } else if(want == SSL_ERROR_SYSCALL) {
3766
0
#ifdef ECONNRESET
3767
0
        if(errno == ECONNRESET && verbosity < 2)
3768
0
          return 0; /* silence reset by peer */
3769
0
#endif
3770
0
        if(errno != 0)
3771
0
          log_err("SSL_read syscall: %s",
3772
0
            strerror(errno));
3773
0
        return 0;
3774
0
      }
3775
0
      log_crypto_err_io("could not SSL_read", want);
3776
0
      return 0;
3777
0
    }
3778
0
    c->tcp_byte_count += r;
3779
0
    if(c->tcp_byte_count < sizeof(uint16_t))
3780
0
      return 1;
3781
0
    if(sldns_buffer_read_u16_at(c->buffer, 0) >
3782
0
      sldns_buffer_capacity(c->buffer)) {
3783
0
      verbose(VERB_QUERY, "ssl: dropped larger than buffer");
3784
0
      return 0;
3785
0
    }
3786
0
    sldns_buffer_set_limit(c->buffer,
3787
0
      sldns_buffer_read_u16_at(c->buffer, 0));
3788
0
    if(sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) {
3789
0
      verbose(VERB_QUERY, "ssl: dropped bogus too short.");
3790
0
      return 0;
3791
0
    }
3792
0
    sldns_buffer_skip(c->buffer, (ssize_t)(c->tcp_byte_count-sizeof(uint16_t)));
3793
0
    verbose(VERB_ALGO, "Reading ssl tcp query of length %d",
3794
0
      (int)sldns_buffer_limit(c->buffer));
3795
0
  }
3796
0
  if(sldns_buffer_remaining(c->buffer) > 0) {
3797
0
    ERR_clear_error();
3798
0
    r = SSL_read(c->ssl, (void*)sldns_buffer_current(c->buffer),
3799
0
      (int)sldns_buffer_remaining(c->buffer));
3800
0
    if(r <= 0) {
3801
0
      int want = SSL_get_error(c->ssl, r);
3802
0
      if(want == SSL_ERROR_ZERO_RETURN) {
3803
0
        if(c->tcp_req_info)
3804
0
          return tcp_req_info_handle_read_close(c->tcp_req_info);
3805
0
        return 0; /* shutdown, closed */
3806
0
      } else if(want == SSL_ERROR_WANT_READ) {
3807
#ifdef USE_WINSOCK
3808
        ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
3809
#endif
3810
0
        return 1; /* read more later */
3811
0
      } else if(want == SSL_ERROR_WANT_WRITE) {
3812
0
        c->ssl_shake_state = comm_ssl_shake_hs_write;
3813
0
        comm_point_listen_for_rw(c, 0, 1);
3814
0
        return 1;
3815
0
      } else if(want == SSL_ERROR_SYSCALL) {
3816
0
#ifdef ECONNRESET
3817
0
        if(errno == ECONNRESET && verbosity < 2)
3818
0
          return 0; /* silence reset by peer */
3819
0
#endif
3820
0
        if(errno != 0)
3821
0
          log_err("SSL_read syscall: %s",
3822
0
            strerror(errno));
3823
0
        return 0;
3824
0
      }
3825
0
      log_crypto_err_io("could not SSL_read", want);
3826
0
      return 0;
3827
0
    }
3828
0
    sldns_buffer_skip(c->buffer, (ssize_t)r);
3829
0
  }
3830
0
  if(sldns_buffer_remaining(c->buffer) <= 0) {
3831
0
    tcp_callback_reader(c);
3832
0
  }
3833
0
  return 1;
3834
#else
3835
  (void)c;
3836
  return 0;
3837
#endif /* HAVE_SSL */
3838
0
}
3839
3840
/** ssl write callback on TCP */
3841
static int
3842
ssl_handle_write(struct comm_point* c)
3843
0
{
3844
0
#ifdef HAVE_SSL
3845
0
  int r;
3846
0
  if(c->ssl_shake_state != comm_ssl_shake_none) {
3847
0
    if(!ssl_handshake(c))
3848
0
      return 0;
3849
0
    if(c->ssl_shake_state != comm_ssl_shake_none)
3850
0
      return 1;
3851
0
  }
3852
  /* ignore return, if fails we may simply block */
3853
0
  (void)SSL_set_mode(c->ssl, (long)SSL_MODE_ENABLE_PARTIAL_WRITE);
3854
0
  if((c->tcp_write_and_read?c->tcp_write_byte_count:c->tcp_byte_count) < sizeof(uint16_t)) {
3855
0
    uint16_t len = htons(c->tcp_write_and_read?c->tcp_write_pkt_len:sldns_buffer_limit(c->buffer));
3856
0
    ERR_clear_error();
3857
0
    if(c->tcp_write_and_read) {
3858
0
      if(c->tcp_write_pkt_len + 2 < LDNS_RR_BUF_SIZE) {
3859
        /* combine the tcp length and the query for
3860
         * write, this emulates writev */
3861
0
        uint8_t buf[LDNS_RR_BUF_SIZE];
3862
0
        memmove(buf, &len, sizeof(uint16_t));
3863
0
        memmove(buf+sizeof(uint16_t),
3864
0
          c->tcp_write_pkt,
3865
0
          c->tcp_write_pkt_len);
3866
0
        r = SSL_write(c->ssl,
3867
0
          (void*)(buf+c->tcp_write_byte_count),
3868
0
          c->tcp_write_pkt_len + 2 -
3869
0
          c->tcp_write_byte_count);
3870
0
      } else {
3871
0
        r = SSL_write(c->ssl,
3872
0
          (void*)(((uint8_t*)&len)+c->tcp_write_byte_count),
3873
0
          (int)(sizeof(uint16_t)-c->tcp_write_byte_count));
3874
0
      }
3875
0
    } else if(sizeof(uint16_t)+sldns_buffer_remaining(c->buffer) <
3876
0
      LDNS_RR_BUF_SIZE) {
3877
      /* combine the tcp length and the query for write,
3878
       * this emulates writev */
3879
0
      uint8_t buf[LDNS_RR_BUF_SIZE];
3880
0
      memmove(buf, &len, sizeof(uint16_t));
3881
0
      memmove(buf+sizeof(uint16_t),
3882
0
        sldns_buffer_current(c->buffer),
3883
0
        sldns_buffer_remaining(c->buffer));
3884
0
      r = SSL_write(c->ssl, (void*)(buf+c->tcp_byte_count),
3885
0
        (int)(sizeof(uint16_t)+
3886
0
        sldns_buffer_remaining(c->buffer)
3887
0
        - c->tcp_byte_count));
3888
0
    } else {
3889
0
      r = SSL_write(c->ssl,
3890
0
        (void*)(((uint8_t*)&len)+c->tcp_byte_count),
3891
0
        (int)(sizeof(uint16_t)-c->tcp_byte_count));
3892
0
    }
3893
0
    if(r <= 0) {
3894
0
      int want = SSL_get_error(c->ssl, r);
3895
0
      if(want == SSL_ERROR_ZERO_RETURN) {
3896
0
        return 0; /* closed */
3897
0
      } else if(want == SSL_ERROR_WANT_READ) {
3898
0
        c->ssl_shake_state = comm_ssl_shake_hs_read;
3899
0
        comm_point_listen_for_rw(c, 1, 0);
3900
0
        return 1; /* wait for read condition */
3901
0
      } else if(want == SSL_ERROR_WANT_WRITE) {
3902
#ifdef USE_WINSOCK
3903
        ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
3904
#endif
3905
0
        return 1; /* write more later */
3906
0
      } else if(want == SSL_ERROR_SYSCALL) {
3907
0
#ifdef EPIPE
3908
0
        if(errno == EPIPE && verbosity < 2)
3909
0
          return 0; /* silence 'broken pipe' */
3910
0
#endif
3911
0
        if(errno != 0)
3912
0
          log_err("SSL_write syscall: %s",
3913
0
            strerror(errno));
3914
0
        return 0;
3915
0
      }
3916
0
      log_crypto_err_io("could not SSL_write", want);
3917
0
      return 0;
3918
0
    }
3919
0
    if(c->tcp_write_and_read) {
3920
0
      c->tcp_write_byte_count += r;
3921
0
      if(c->tcp_write_byte_count < sizeof(uint16_t))
3922
0
        return 1;
3923
0
    } else {
3924
0
      c->tcp_byte_count += r;
3925
0
      if(c->tcp_byte_count < sizeof(uint16_t))
3926
0
        return 1;
3927
0
      sldns_buffer_set_position(c->buffer, c->tcp_byte_count -
3928
0
        sizeof(uint16_t));
3929
0
    }
3930
0
    if((!c->tcp_write_and_read && sldns_buffer_remaining(c->buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) {
3931
0
      tcp_callback_writer(c);
3932
0
      return 1;
3933
0
    }
3934
0
  }
3935
0
  log_assert(c->tcp_write_and_read || sldns_buffer_remaining(c->buffer) > 0);
3936
0
  log_assert(!c->tcp_write_and_read || c->tcp_write_byte_count < c->tcp_write_pkt_len + 2);
3937
0
  ERR_clear_error();
3938
0
  if(c->tcp_write_and_read) {
3939
0
    r = SSL_write(c->ssl, (void*)(c->tcp_write_pkt + c->tcp_write_byte_count - 2),
3940
0
      (int)(c->tcp_write_pkt_len + 2 - c->tcp_write_byte_count));
3941
0
  } else {
3942
0
    r = SSL_write(c->ssl, (void*)sldns_buffer_current(c->buffer),
3943
0
      (int)sldns_buffer_remaining(c->buffer));
3944
0
  }
3945
0
  if(r <= 0) {
3946
0
    int want = SSL_get_error(c->ssl, r);
3947
0
    if(want == SSL_ERROR_ZERO_RETURN) {
3948
0
      return 0; /* closed */
3949
0
    } else if(want == SSL_ERROR_WANT_READ) {
3950
0
      c->ssl_shake_state = comm_ssl_shake_hs_read;
3951
0
      comm_point_listen_for_rw(c, 1, 0);
3952
0
      return 1; /* wait for read condition */
3953
0
    } else if(want == SSL_ERROR_WANT_WRITE) {
3954
#ifdef USE_WINSOCK
3955
      ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
3956
#endif
3957
0
      return 1; /* write more later */
3958
0
    } else if(want == SSL_ERROR_SYSCALL) {
3959
0
#ifdef EPIPE
3960
0
      if(errno == EPIPE && verbosity < 2)
3961
0
        return 0; /* silence 'broken pipe' */
3962
0
#endif
3963
0
      if(errno != 0)
3964
0
        log_err("SSL_write syscall: %s",
3965
0
          strerror(errno));
3966
0
      return 0;
3967
0
    }
3968
0
    log_crypto_err_io("could not SSL_write", want);
3969
0
    return 0;
3970
0
  }
3971
0
  if(c->tcp_write_and_read) {
3972
0
    c->tcp_write_byte_count += r;
3973
0
  } else {
3974
0
    sldns_buffer_skip(c->buffer, (ssize_t)r);
3975
0
  }
3976
3977
0
  if((!c->tcp_write_and_read && sldns_buffer_remaining(c->buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) {
3978
0
    tcp_callback_writer(c);
3979
0
  }
3980
0
  return 1;
3981
#else
3982
  (void)c;
3983
  return 0;
3984
#endif /* HAVE_SSL */
3985
0
}
3986
3987
/** handle ssl tcp connection with dns contents */
3988
static int
3989
ssl_handle_it(struct comm_point* c, int is_write)
3990
0
{
3991
  /* handle case where renegotiation wants read during write call
3992
   * or write during read calls */
3993
0
  if(is_write && c->ssl_shake_state == comm_ssl_shake_hs_write)
3994
0
    return ssl_handle_read(c);
3995
0
  else if(!is_write && c->ssl_shake_state == comm_ssl_shake_hs_read)
3996
0
    return ssl_handle_write(c);
3997
  /* handle read events for read operation and write events for a
3998
   * write operation */
3999
0
  else if(!is_write)
4000
0
    return ssl_handle_read(c);
4001
0
  return ssl_handle_write(c);
4002
0
}
4003
4004
/**
4005
 * Handle tcp reading callback.
4006
 * @param fd: file descriptor of socket.
4007
 * @param c: comm point to read from into buffer.
4008
 * @param short_ok: if true, very short packets are OK (for comm_local).
4009
 * @return: 0 on error
4010
 */
4011
static int
4012
comm_point_tcp_handle_read(int fd, struct comm_point* c, int short_ok)
4013
0
{
4014
0
  ssize_t r;
4015
0
  int recv_initial = 0;
4016
0
  log_assert(c->type == comm_tcp || c->type == comm_local);
4017
0
  if(c->ssl)
4018
0
    return ssl_handle_it(c, 0);
4019
0
  if(!c->tcp_is_reading && !c->tcp_write_and_read)
4020
0
    return 0;
4021
4022
0
  log_assert(fd != -1);
4023
0
  if(c->pp2_enabled && c->pp2_header_state != pp2_header_done) {
4024
0
    struct pp2_header* header = NULL;
4025
0
    size_t want_read_size = 0;
4026
0
    size_t current_read_size = 0;
4027
0
    if(c->pp2_header_state == pp2_header_none) {
4028
0
      want_read_size = PP2_HEADER_SIZE;
4029
0
      if(sldns_buffer_remaining(c->buffer)<want_read_size) {
4030
0
        log_err_addr("proxy_protocol: not enough "
4031
0
          "buffer size to read PROXYv2 header", "",
4032
0
          &c->repinfo.remote_addr,
4033
0
          c->repinfo.remote_addrlen);
4034
0
        return 0;
4035
0
      }
4036
0
      verbose(VERB_ALGO, "proxy_protocol: reading fixed "
4037
0
        "part of PROXYv2 header (len %lu)",
4038
0
        (unsigned long)want_read_size);
4039
0
      current_read_size = want_read_size;
4040
0
      if(c->tcp_byte_count < current_read_size) {
4041
0
        r = recv(fd, (void*)sldns_buffer_at(c->buffer,
4042
0
          c->tcp_byte_count),
4043
0
          current_read_size-c->tcp_byte_count, MSG_DONTWAIT);
4044
0
        if(r == 0) {
4045
0
          if(c->tcp_req_info)
4046
0
            return tcp_req_info_handle_read_close(c->tcp_req_info);
4047
0
          return 0;
4048
0
        } else if(r == -1) {
4049
0
          goto recv_error_initial;
4050
0
        }
4051
0
        c->tcp_byte_count += r;
4052
0
        sldns_buffer_skip(c->buffer, r);
4053
0
        if(c->tcp_byte_count != current_read_size) return 1;
4054
0
        c->pp2_header_state = pp2_header_init;
4055
0
      }
4056
0
    }
4057
0
    if(c->pp2_header_state == pp2_header_init) {
4058
0
      int err;
4059
0
      err = pp2_read_header(
4060
0
        sldns_buffer_begin(c->buffer),
4061
0
        sldns_buffer_limit(c->buffer));
4062
0
      if(err) {
4063
0
        log_err("proxy_protocol: could not parse "
4064
0
          "PROXYv2 header (%s)",
4065
0
          pp_lookup_error(err));
4066
0
        return 0;
4067
0
      }
4068
0
      header = (struct pp2_header*)sldns_buffer_begin(c->buffer);
4069
0
      want_read_size = ntohs(header->len);
4070
0
      if(sldns_buffer_limit(c->buffer) <
4071
0
        PP2_HEADER_SIZE + want_read_size) {
4072
0
        log_err_addr("proxy_protocol: not enough "
4073
0
          "buffer size to read PROXYv2 header", "",
4074
0
          &c->repinfo.remote_addr,
4075
0
          c->repinfo.remote_addrlen);
4076
0
        return 0;
4077
0
      }
4078
0
      verbose(VERB_ALGO, "proxy_protocol: reading variable "
4079
0
        "part of PROXYv2 header (len %lu)",
4080
0
        (unsigned long)want_read_size);
4081
0
      current_read_size = PP2_HEADER_SIZE + want_read_size;
4082
0
      if(want_read_size == 0) {
4083
        /* nothing more to read; header is complete */
4084
0
        c->pp2_header_state = pp2_header_done;
4085
0
      } else if(c->tcp_byte_count < current_read_size) {
4086
0
        r = recv(fd, (void*)sldns_buffer_at(c->buffer,
4087
0
          c->tcp_byte_count),
4088
0
          current_read_size-c->tcp_byte_count, MSG_DONTWAIT);
4089
0
        if(r == 0) {
4090
0
          if(c->tcp_req_info)
4091
0
            return tcp_req_info_handle_read_close(c->tcp_req_info);
4092
0
          return 0;
4093
0
        } else if(r == -1) {
4094
0
          goto recv_error;
4095
0
        }
4096
0
        c->tcp_byte_count += r;
4097
0
        sldns_buffer_skip(c->buffer, r);
4098
0
        if(c->tcp_byte_count != current_read_size) return 1;
4099
0
        c->pp2_header_state = pp2_header_done;
4100
0
      }
4101
0
    }
4102
0
    if(c->pp2_header_state != pp2_header_done || !header) {
4103
0
      log_err_addr("proxy_protocol: wrong state for the "
4104
0
        "PROXYv2 header", "", &c->repinfo.remote_addr,
4105
0
        c->repinfo.remote_addrlen);
4106
0
      return 0;
4107
0
    }
4108
0
    sldns_buffer_flip(c->buffer);
4109
0
    if(!consume_pp2_header(c->buffer, &c->repinfo, 1)) {
4110
0
      log_err_addr("proxy_protocol: could not consume "
4111
0
        "PROXYv2 header", "", &c->repinfo.remote_addr,
4112
0
        c->repinfo.remote_addrlen);
4113
0
      return 0;
4114
0
    }
4115
0
    verbose(VERB_ALGO, "proxy_protocol: successful read of "
4116
0
      "PROXYv2 header");
4117
    /* Clear and reset the buffer to read the following
4118
        * DNS packet(s). */
4119
0
    sldns_buffer_clear(c->buffer);
4120
0
    c->tcp_byte_count = 0;
4121
0
    return 1;
4122
0
  }
4123
4124
0
  if(c->tcp_byte_count < sizeof(uint16_t)) {
4125
    /* read length bytes */
4126
0
    r = recv(fd,(void*)sldns_buffer_at(c->buffer,c->tcp_byte_count),
4127
0
      sizeof(uint16_t)-c->tcp_byte_count, MSG_DONTWAIT);
4128
0
    if(r == 0) {
4129
0
      if(c->tcp_req_info)
4130
0
        return tcp_req_info_handle_read_close(c->tcp_req_info);
4131
0
      return 0;
4132
0
    } else if(r == -1) {
4133
0
      if(c->pp2_enabled) goto recv_error;
4134
0
      goto recv_error_initial;
4135
0
    }
4136
0
    c->tcp_byte_count += r;
4137
0
    if(c->tcp_byte_count != sizeof(uint16_t))
4138
0
      return 1;
4139
0
    if(sldns_buffer_read_u16_at(c->buffer, 0) >
4140
0
      sldns_buffer_capacity(c->buffer)) {
4141
0
      verbose(VERB_QUERY, "tcp: dropped larger than buffer");
4142
0
      return 0;
4143
0
    }
4144
0
    sldns_buffer_set_limit(c->buffer,
4145
0
      sldns_buffer_read_u16_at(c->buffer, 0));
4146
0
    if(!short_ok &&
4147
0
      sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) {
4148
0
      verbose(VERB_QUERY, "tcp: dropped bogus too short.");
4149
0
      return 0;
4150
0
    }
4151
0
    verbose(VERB_ALGO, "Reading tcp query of length %d",
4152
0
      (int)sldns_buffer_limit(c->buffer));
4153
0
  }
4154
4155
0
  if(sldns_buffer_remaining(c->buffer) == 0)
4156
0
    log_err("in comm_point_tcp_handle_read buffer_remaining is "
4157
0
      "not > 0 as expected, continuing with (harmless) 0 "
4158
0
      "length recv");
4159
0
  r = recv(fd, (void*)sldns_buffer_current(c->buffer),
4160
0
    sldns_buffer_remaining(c->buffer), MSG_DONTWAIT);
4161
0
  if(r == 0) {
4162
0
    if(c->tcp_req_info)
4163
0
      return tcp_req_info_handle_read_close(c->tcp_req_info);
4164
0
    return 0;
4165
0
  } else if(r == -1) {
4166
0
    goto recv_error;
4167
0
  }
4168
0
  sldns_buffer_skip(c->buffer, r);
4169
0
  if(sldns_buffer_remaining(c->buffer) <= 0) {
4170
0
    tcp_callback_reader(c);
4171
0
  }
4172
0
  return 1;
4173
4174
0
recv_error_initial:
4175
0
  recv_initial = 1;
4176
0
recv_error:
4177
0
#ifndef USE_WINSOCK
4178
0
  if(errno == EINTR || errno == EAGAIN)
4179
0
    return 1;
4180
0
#ifdef ECONNRESET
4181
0
  if(errno == ECONNRESET && verbosity < 2)
4182
0
    return 0; /* silence reset by peer */
4183
0
#endif
4184
0
  if(recv_initial) {
4185
0
#ifdef ECONNREFUSED
4186
0
    if(errno == ECONNREFUSED && verbosity < 2)
4187
0
      return 0; /* silence reset by peer */
4188
0
#endif
4189
0
#ifdef ENETUNREACH
4190
0
    if(errno == ENETUNREACH && verbosity < 2)
4191
0
      return 0; /* silence it */
4192
0
#endif
4193
0
#ifdef EHOSTDOWN
4194
0
    if(errno == EHOSTDOWN && verbosity < 2)
4195
0
      return 0; /* silence it */
4196
0
#endif
4197
0
#ifdef EHOSTUNREACH
4198
0
    if(errno == EHOSTUNREACH && verbosity < 2)
4199
0
      return 0; /* silence it */
4200
0
#endif
4201
0
#ifdef ENETDOWN
4202
0
    if(errno == ENETDOWN && verbosity < 2)
4203
0
      return 0; /* silence it */
4204
0
#endif
4205
0
#ifdef EACCES
4206
0
    if(errno == EACCES && verbosity < 2)
4207
0
      return 0; /* silence it */
4208
0
#endif
4209
0
#ifdef ENOTCONN
4210
0
    if(errno == ENOTCONN) {
4211
0
      log_err_addr("read (in tcp initial) failed and this "
4212
0
        "could be because TCP Fast Open is "
4213
0
        "enabled [--disable-tfo-client "
4214
0
        "--disable-tfo-server] but does not "
4215
0
        "work", sock_strerror(errno),
4216
0
        &c->repinfo.remote_addr,
4217
0
        c->repinfo.remote_addrlen);
4218
0
      return 0;
4219
0
    }
4220
0
#endif
4221
0
  }
4222
#else /* USE_WINSOCK */
4223
  if(recv_initial) {
4224
    if(WSAGetLastError() == WSAECONNREFUSED && verbosity < 2)
4225
      return 0;
4226
    if(WSAGetLastError() == WSAEHOSTDOWN && verbosity < 2)
4227
      return 0;
4228
    if(WSAGetLastError() == WSAEHOSTUNREACH && verbosity < 2)
4229
      return 0;
4230
    if(WSAGetLastError() == WSAENETDOWN && verbosity < 2)
4231
      return 0;
4232
    if(WSAGetLastError() == WSAENETUNREACH && verbosity < 2)
4233
      return 0;
4234
  }
4235
  if(WSAGetLastError() == WSAECONNRESET)
4236
    return 0;
4237
  if(WSAGetLastError() == WSAEINPROGRESS)
4238
    return 1;
4239
  if(WSAGetLastError() == WSAEWOULDBLOCK) {
4240
    ub_winsock_tcp_wouldblock(c->ev->ev,
4241
      UB_EV_READ);
4242
    return 1;
4243
  }
4244
#endif
4245
0
  log_err_addr((recv_initial?"read (in tcp initial)":"read (in tcp)"),
4246
0
    sock_strerror(errno), &c->repinfo.remote_addr,
4247
0
    c->repinfo.remote_addrlen);
4248
0
  return 0;
4249
0
}
4250
4251
/**
4252
 * Handle tcp writing callback.
4253
 * @param fd: file descriptor of socket.
4254
 * @param c: comm point to write buffer out of.
4255
 * @return: 0 on error
4256
 */
4257
static int
4258
comm_point_tcp_handle_write(int fd, struct comm_point* c)
4259
0
{
4260
0
  ssize_t r;
4261
0
  struct sldns_buffer *buffer;
4262
0
  log_assert(c->type == comm_tcp);
4263
#ifdef USE_DNSCRYPT
4264
  buffer = c->dnscrypt_buffer;
4265
#else
4266
0
  buffer = c->buffer;
4267
0
#endif
4268
0
  if(c->tcp_is_reading && !c->ssl && !c->tcp_write_and_read)
4269
0
    return 0;
4270
0
  log_assert(fd != -1);
4271
0
  if(((!c->tcp_write_and_read && c->tcp_byte_count == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == 0)) && c->tcp_check_nb_connect) {
4272
    /* check for pending error from nonblocking connect */
4273
    /* from Stevens, unix network programming, vol1, 3rd ed, p450*/
4274
0
    int error = 0;
4275
0
    socklen_t len = (socklen_t)sizeof(error);
4276
0
    if(getsockopt(fd, SOL_SOCKET, SO_ERROR, (void*)&error,
4277
0
      &len) < 0){
4278
0
#ifndef USE_WINSOCK
4279
0
      error = errno; /* on solaris errno is error */
4280
#else /* USE_WINSOCK */
4281
      error = WSAGetLastError();
4282
#endif
4283
0
    }
4284
0
#ifndef USE_WINSOCK
4285
0
#if defined(EINPROGRESS) && defined(EWOULDBLOCK)
4286
0
    if(error == EINPROGRESS || error == EWOULDBLOCK)
4287
0
      return 1; /* try again later */
4288
0
    else
4289
0
#endif
4290
0
    if(error != 0 && verbosity < 2)
4291
0
      return 0; /* silence lots of chatter in the logs */
4292
0
                else if(error != 0) {
4293
0
      log_err_addr("tcp connect", strerror(error),
4294
0
        &c->repinfo.remote_addr,
4295
0
        c->repinfo.remote_addrlen);
4296
#else /* USE_WINSOCK */
4297
    /* examine error */
4298
    if(error == WSAEINPROGRESS)
4299
      return 1;
4300
    else if(error == WSAEWOULDBLOCK) {
4301
      ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
4302
      return 1;
4303
    } else if(error != 0 && verbosity < 2)
4304
      return 0;
4305
    else if(error != 0) {
4306
      log_err_addr("tcp connect", wsa_strerror(error),
4307
        &c->repinfo.remote_addr,
4308
        c->repinfo.remote_addrlen);
4309
#endif /* USE_WINSOCK */
4310
0
      return 0;
4311
0
    }
4312
0
  }
4313
0
  if(c->ssl)
4314
0
    return ssl_handle_it(c, 1);
4315
4316
#ifdef USE_MSG_FASTOPEN
4317
  /* Only try this on first use of a connection that uses tfo,
4318
     otherwise fall through to normal write */
4319
  /* Also, TFO support on WINDOWS not implemented at the moment */
4320
  if(c->tcp_do_fastopen == 1) {
4321
    /* this form of sendmsg() does both a connect() and send() so need to
4322
       look for various flavours of error*/
4323
    uint16_t len = htons(c->tcp_write_and_read?c->tcp_write_pkt_len:sldns_buffer_limit(buffer));
4324
    struct msghdr msg;
4325
    struct iovec iov[2];
4326
    c->tcp_do_fastopen = 0;
4327
    memset(&msg, 0, sizeof(msg));
4328
    if(c->tcp_write_and_read) {
4329
      iov[0].iov_base = (uint8_t*)&len + c->tcp_write_byte_count;
4330
      iov[0].iov_len = sizeof(uint16_t) - c->tcp_write_byte_count;
4331
      iov[1].iov_base = c->tcp_write_pkt;
4332
      iov[1].iov_len = c->tcp_write_pkt_len;
4333
    } else {
4334
      iov[0].iov_base = (uint8_t*)&len + c->tcp_byte_count;
4335
      iov[0].iov_len = sizeof(uint16_t) - c->tcp_byte_count;
4336
      iov[1].iov_base = sldns_buffer_begin(buffer);
4337
      iov[1].iov_len = sldns_buffer_limit(buffer);
4338
    }
4339
    log_assert(iov[0].iov_len > 0);
4340
    msg.msg_name = &c->repinfo.remote_addr;
4341
    msg.msg_namelen = c->repinfo.remote_addrlen;
4342
    msg.msg_iov = iov;
4343
    msg.msg_iovlen = 2;
4344
    r = sendmsg(fd, &msg, MSG_FASTOPEN);
4345
    if (r == -1) {
4346
#if defined(EINPROGRESS) && defined(EWOULDBLOCK)
4347
      /* Handshake is underway, maybe because no TFO cookie available.
4348
         Come back to write the message*/
4349
      if(errno == EINPROGRESS || errno == EWOULDBLOCK)
4350
        return 1;
4351
#endif
4352
      if(errno == EINTR || errno == EAGAIN)
4353
        return 1;
4354
      /* Not handling EISCONN here as shouldn't ever hit that case.*/
4355
      if(errno != EPIPE
4356
#ifdef EOPNOTSUPP
4357
        /* if /proc/sys/net/ipv4/tcp_fastopen is
4358
         * disabled on Linux, sendmsg may return
4359
         * 'Operation not supported', if so
4360
         * fallthrough to ordinary connect. */
4361
        && errno != EOPNOTSUPP
4362
#endif
4363
        && errno != 0) {
4364
        if(verbosity < 2)
4365
          return 0; /* silence lots of chatter in the logs */
4366
        log_err_addr("tcp sendmsg", strerror(errno),
4367
          &c->repinfo.remote_addr,
4368
          c->repinfo.remote_addrlen);
4369
        return 0;
4370
      }
4371
      verbose(VERB_ALGO, "tcp sendmsg for fastopen failed (with %s), try normal connect", strerror(errno));
4372
      /* fallthrough to nonFASTOPEN
4373
       * (MSG_FASTOPEN on Linux 3 produces EPIPE)
4374
       * we need to perform connect() */
4375
      if(connect(fd, (struct sockaddr *)&c->repinfo.remote_addr,
4376
        c->repinfo.remote_addrlen) == -1) {
4377
#ifdef EINPROGRESS
4378
        if(errno == EINPROGRESS)
4379
          return 1; /* wait until connect done*/
4380
#endif
4381
#ifdef USE_WINSOCK
4382
        if(WSAGetLastError() == WSAEINPROGRESS ||
4383
          WSAGetLastError() == WSAEWOULDBLOCK)
4384
          return 1; /* wait until connect done*/
4385
#endif
4386
        if(tcp_connect_errno_needs_log(
4387
          (struct sockaddr *)&c->repinfo.remote_addr,
4388
          c->repinfo.remote_addrlen)) {
4389
          log_err_addr("outgoing tcp: connect after EPIPE for fastopen",
4390
            strerror(errno),
4391
            &c->repinfo.remote_addr,
4392
            c->repinfo.remote_addrlen);
4393
        }
4394
        return 0;
4395
      }
4396
4397
    } else {
4398
      if(c->tcp_write_and_read) {
4399
        c->tcp_write_byte_count += r;
4400
        if(c->tcp_write_byte_count < sizeof(uint16_t))
4401
          return 1;
4402
      } else {
4403
        c->tcp_byte_count += r;
4404
        if(c->tcp_byte_count < sizeof(uint16_t))
4405
          return 1;
4406
        sldns_buffer_set_position(buffer, c->tcp_byte_count -
4407
          sizeof(uint16_t));
4408
      }
4409
      if((!c->tcp_write_and_read && sldns_buffer_remaining(buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) {
4410
        tcp_callback_writer(c);
4411
        return 1;
4412
      }
4413
    }
4414
  }
4415
#endif /* USE_MSG_FASTOPEN */
4416
4417
0
  if((c->tcp_write_and_read?c->tcp_write_byte_count:c->tcp_byte_count) < sizeof(uint16_t)) {
4418
0
    uint16_t len = htons(c->tcp_write_and_read?c->tcp_write_pkt_len:sldns_buffer_limit(buffer));
4419
0
#ifdef HAVE_WRITEV
4420
0
    struct iovec iov[2];
4421
0
    if(c->tcp_write_and_read) {
4422
0
      iov[0].iov_base = (uint8_t*)&len + c->tcp_write_byte_count;
4423
0
      iov[0].iov_len = sizeof(uint16_t) - c->tcp_write_byte_count;
4424
0
      iov[1].iov_base = c->tcp_write_pkt;
4425
0
      iov[1].iov_len = c->tcp_write_pkt_len;
4426
0
    } else {
4427
0
      iov[0].iov_base = (uint8_t*)&len + c->tcp_byte_count;
4428
0
      iov[0].iov_len = sizeof(uint16_t) - c->tcp_byte_count;
4429
0
      iov[1].iov_base = sldns_buffer_begin(buffer);
4430
0
      iov[1].iov_len = sldns_buffer_limit(buffer);
4431
0
    }
4432
0
    log_assert(iov[0].iov_len > 0);
4433
0
    r = writev(fd, iov, 2);
4434
#else /* HAVE_WRITEV */
4435
    if(c->tcp_write_and_read) {
4436
      r = send(fd, (void*)(((uint8_t*)&len)+c->tcp_write_byte_count),
4437
        sizeof(uint16_t)-c->tcp_write_byte_count, 0);
4438
    } else {
4439
      r = send(fd, (void*)(((uint8_t*)&len)+c->tcp_byte_count),
4440
        sizeof(uint16_t)-c->tcp_byte_count, 0);
4441
    }
4442
#endif /* HAVE_WRITEV */
4443
0
    if(r == -1) {
4444
0
#ifndef USE_WINSOCK
4445
0
#  ifdef EPIPE
4446
0
                  if(errno == EPIPE && verbosity < 2)
4447
0
                          return 0; /* silence 'broken pipe' */
4448
0
  #endif
4449
0
      if(errno == EINTR || errno == EAGAIN)
4450
0
        return 1;
4451
0
#ifdef ECONNRESET
4452
0
      if(errno == ECONNRESET && verbosity < 2)
4453
0
        return 0; /* silence reset by peer */
4454
0
#endif
4455
0
#  ifdef HAVE_WRITEV
4456
0
      log_err_addr("tcp writev", strerror(errno),
4457
0
        &c->repinfo.remote_addr,
4458
0
        c->repinfo.remote_addrlen);
4459
#  else /* HAVE_WRITEV */
4460
      log_err_addr("tcp send s", strerror(errno),
4461
        &c->repinfo.remote_addr,
4462
        c->repinfo.remote_addrlen);
4463
#  endif /* HAVE_WRITEV */
4464
#else
4465
      if(WSAGetLastError() == WSAENOTCONN)
4466
        return 1;
4467
      if(WSAGetLastError() == WSAEINPROGRESS)
4468
        return 1;
4469
      if(WSAGetLastError() == WSAEWOULDBLOCK) {
4470
        ub_winsock_tcp_wouldblock(c->ev->ev,
4471
          UB_EV_WRITE);
4472
        return 1;
4473
      }
4474
      if(WSAGetLastError() == WSAECONNRESET && verbosity < 2)
4475
        return 0; /* silence reset by peer */
4476
      log_err_addr("tcp send s",
4477
        wsa_strerror(WSAGetLastError()),
4478
        &c->repinfo.remote_addr,
4479
        c->repinfo.remote_addrlen);
4480
#endif
4481
0
      return 0;
4482
0
    }
4483
0
    if(c->tcp_write_and_read) {
4484
0
      c->tcp_write_byte_count += r;
4485
0
      if(c->tcp_write_byte_count < sizeof(uint16_t))
4486
0
        return 1;
4487
0
    } else {
4488
0
      c->tcp_byte_count += r;
4489
0
      if(c->tcp_byte_count < sizeof(uint16_t))
4490
0
        return 1;
4491
0
      sldns_buffer_set_position(buffer, c->tcp_byte_count -
4492
0
        sizeof(uint16_t));
4493
0
    }
4494
0
    if((!c->tcp_write_and_read && sldns_buffer_remaining(buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) {
4495
0
      tcp_callback_writer(c);
4496
0
      return 1;
4497
0
    }
4498
0
  }
4499
0
  log_assert(c->tcp_write_and_read || sldns_buffer_remaining(buffer) > 0);
4500
0
  log_assert(!c->tcp_write_and_read || c->tcp_write_byte_count < c->tcp_write_pkt_len + 2);
4501
0
  if(c->tcp_write_and_read) {
4502
0
    r = send(fd, (void*)(c->tcp_write_pkt + c->tcp_write_byte_count - 2),
4503
0
      c->tcp_write_pkt_len + 2 - c->tcp_write_byte_count, 0);
4504
0
  } else {
4505
0
    r = send(fd, (void*)sldns_buffer_current(buffer),
4506
0
      sldns_buffer_remaining(buffer), 0);
4507
0
  }
4508
0
  if(r == -1) {
4509
0
#ifndef USE_WINSOCK
4510
0
    if(errno == EINTR || errno == EAGAIN)
4511
0
      return 1;
4512
0
#ifdef ECONNRESET
4513
0
    if(errno == ECONNRESET && verbosity < 2)
4514
0
      return 0; /* silence reset by peer */
4515
0
#endif
4516
#else
4517
    if(WSAGetLastError() == WSAEINPROGRESS)
4518
      return 1;
4519
    if(WSAGetLastError() == WSAEWOULDBLOCK) {
4520
      ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
4521
      return 1;
4522
    }
4523
    if(WSAGetLastError() == WSAECONNRESET && verbosity < 2)
4524
      return 0; /* silence reset by peer */
4525
#endif
4526
0
    log_err_addr("tcp send r", sock_strerror(errno),
4527
0
      &c->repinfo.remote_addr,
4528
0
      c->repinfo.remote_addrlen);
4529
0
    return 0;
4530
0
  }
4531
0
  if(c->tcp_write_and_read) {
4532
0
    c->tcp_write_byte_count += r;
4533
0
  } else {
4534
0
    sldns_buffer_skip(buffer, r);
4535
0
  }
4536
4537
0
  if((!c->tcp_write_and_read && sldns_buffer_remaining(buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) {
4538
0
    tcp_callback_writer(c);
4539
0
  }
4540
4541
0
  return 1;
4542
0
}
4543
4544
/** read again to drain buffers when there could be more to read, returns 0
4545
 * on failure which means the comm point is closed. */
4546
static int
4547
tcp_req_info_read_again(int fd, struct comm_point* c)
4548
0
{
4549
0
  while(c->tcp_req_info->read_again) {
4550
0
    int r;
4551
0
    c->tcp_req_info->read_again = 0;
4552
0
    if(c->tcp_is_reading)
4553
0
      r = comm_point_tcp_handle_read(fd, c, 0);
4554
0
    else  r = comm_point_tcp_handle_write(fd, c);
4555
0
    if(!r) {
4556
0
      reclaim_tcp_handler(c);
4557
0
      if(!c->tcp_do_close) {
4558
0
        fptr_ok(fptr_whitelist_comm_point(
4559
0
          c->callback));
4560
0
        (void)(*c->callback)(c, c->cb_arg,
4561
0
          NETEVENT_CLOSED, NULL);
4562
0
      }
4563
0
      return 0;
4564
0
    }
4565
0
  }
4566
0
  return 1;
4567
0
}
4568
4569
/** read again to drain buffers when there could be more to read */
4570
static void
4571
tcp_more_read_again(int fd, struct comm_point* c)
4572
0
{
4573
  /* if the packet is done, but another one could be waiting on
4574
   * the connection, the callback signals this, and we try again */
4575
  /* this continues until the read routines get EAGAIN or so,
4576
   * and thus does not call the callback, and the bool is 0 */
4577
0
  int* moreread = c->tcp_more_read_again;
4578
0
  while(moreread && *moreread) {
4579
0
    *moreread = 0;
4580
0
    if(!comm_point_tcp_handle_read(fd, c, 0)) {
4581
0
      reclaim_tcp_handler(c);
4582
0
      if(!c->tcp_do_close) {
4583
0
        fptr_ok(fptr_whitelist_comm_point(
4584
0
          c->callback));
4585
0
        (void)(*c->callback)(c, c->cb_arg,
4586
0
          NETEVENT_CLOSED, NULL);
4587
0
      }
4588
0
      return;
4589
0
    }
4590
0
  }
4591
0
}
4592
4593
/** write again to fill up when there could be more to write */
4594
static void
4595
tcp_more_write_again(int fd, struct comm_point* c)
4596
0
{
4597
  /* if the packet is done, but another is waiting to be written,
4598
   * the callback signals it and we try again. */
4599
  /* this continues until the write routines get EAGAIN or so,
4600
   * and thus does not call the callback, and the bool is 0 */
4601
0
  int* morewrite = c->tcp_more_write_again;
4602
0
  while(morewrite && *morewrite) {
4603
0
    *morewrite = 0;
4604
0
    if(!comm_point_tcp_handle_write(fd, c)) {
4605
0
      reclaim_tcp_handler(c);
4606
0
      if(!c->tcp_do_close) {
4607
0
        fptr_ok(fptr_whitelist_comm_point(
4608
0
          c->callback));
4609
0
        (void)(*c->callback)(c, c->cb_arg,
4610
0
          NETEVENT_CLOSED, NULL);
4611
0
      }
4612
0
      return;
4613
0
    }
4614
0
  }
4615
0
}
4616
4617
void
4618
comm_point_tcp_handle_callback(int fd, short event, void* arg)
4619
0
{
4620
0
  struct comm_point* c = (struct comm_point*)arg;
4621
0
  log_assert(c->type == comm_tcp);
4622
0
  ub_comm_base_now(c->ev->base);
4623
4624
0
  if(c->fd == -1 || c->fd != fd)
4625
0
    return; /* duplicate event, but commpoint closed. */
4626
4627
#ifdef USE_DNSCRYPT
4628
  /* Initialize if this is a dnscrypt socket */
4629
  if(c->tcp_parent) {
4630
    c->dnscrypt = c->tcp_parent->dnscrypt;
4631
  }
4632
  if(c->dnscrypt && c->dnscrypt_buffer == c->buffer) {
4633
    c->dnscrypt_buffer = sldns_buffer_new(sldns_buffer_capacity(c->buffer));
4634
    if(!c->dnscrypt_buffer) {
4635
      log_err("Could not allocate dnscrypt buffer");
4636
      reclaim_tcp_handler(c);
4637
      if(!c->tcp_do_close) {
4638
        fptr_ok(fptr_whitelist_comm_point(
4639
          c->callback));
4640
        (void)(*c->callback)(c, c->cb_arg,
4641
          NETEVENT_CLOSED, NULL);
4642
      }
4643
      return;
4644
    }
4645
  }
4646
#endif
4647
4648
0
  if((event&UB_EV_TIMEOUT)) {
4649
0
    verbose(VERB_QUERY, "tcp took too long, dropped");
4650
0
    reclaim_tcp_handler(c);
4651
0
    if(!c->tcp_do_close) {
4652
0
      fptr_ok(fptr_whitelist_comm_point(c->callback));
4653
0
      (void)(*c->callback)(c, c->cb_arg,
4654
0
        NETEVENT_TIMEOUT, NULL);
4655
0
    }
4656
0
    return;
4657
0
  }
4658
0
  if((event&UB_EV_READ)
4659
#ifdef USE_MSG_FASTOPEN
4660
    && !(c->tcp_do_fastopen && (event&UB_EV_WRITE))
4661
#endif
4662
0
    ) {
4663
0
    int has_tcpq = (c->tcp_req_info != NULL);
4664
0
    int* moreread = c->tcp_more_read_again;
4665
0
    if(!comm_point_tcp_handle_read(fd, c, 0)) {
4666
0
      reclaim_tcp_handler(c);
4667
0
      if(!c->tcp_do_close) {
4668
0
        fptr_ok(fptr_whitelist_comm_point(
4669
0
          c->callback));
4670
0
        (void)(*c->callback)(c, c->cb_arg,
4671
0
          NETEVENT_CLOSED, NULL);
4672
0
      }
4673
0
      return;
4674
0
    }
4675
0
    if(has_tcpq && c->tcp_req_info && c->tcp_req_info->read_again) {
4676
0
      if(!tcp_req_info_read_again(fd, c))
4677
0
        return;
4678
0
    }
4679
0
    if(moreread && *moreread)
4680
0
      tcp_more_read_again(fd, c);
4681
0
    return;
4682
0
  }
4683
0
  if((event&UB_EV_WRITE)) {
4684
0
    int has_tcpq = (c->tcp_req_info != NULL);
4685
0
    int* morewrite = c->tcp_more_write_again;
4686
0
    if(!comm_point_tcp_handle_write(fd, c)) {
4687
0
      reclaim_tcp_handler(c);
4688
0
      if(!c->tcp_do_close) {
4689
0
        fptr_ok(fptr_whitelist_comm_point(
4690
0
          c->callback));
4691
0
        (void)(*c->callback)(c, c->cb_arg,
4692
0
          NETEVENT_CLOSED, NULL);
4693
0
      }
4694
0
      return;
4695
0
    }
4696
0
    if(has_tcpq && c->tcp_req_info && c->tcp_req_info->read_again) {
4697
0
      if(!tcp_req_info_read_again(fd, c))
4698
0
        return;
4699
0
    }
4700
0
    if(morewrite && *morewrite)
4701
0
      tcp_more_write_again(fd, c);
4702
0
    return;
4703
0
  }
4704
0
  log_err("Ignored event %d for tcphdl.", event);
4705
0
}
4706
4707
/** Make http handler free for next assignment */
4708
static void
4709
reclaim_http_handler(struct comm_point* c)
4710
0
{
4711
0
  log_assert(c->type == comm_http);
4712
0
  if(c->ssl) {
4713
0
#ifdef HAVE_SSL
4714
0
    SSL_shutdown(c->ssl);
4715
0
    SSL_free(c->ssl);
4716
0
    c->ssl = NULL;
4717
0
#endif
4718
0
  }
4719
0
  comm_point_close(c);
4720
0
  if(c->tcp_parent && !c->is_in_tcp_free) {
4721
    /* Should not happen: bad tcp_free state in reclaim_http. */
4722
0
    log_assert(c->tcp_free == NULL);
4723
0
    log_assert(c->tcp_parent->cur_tcp_count > 0);
4724
0
    c->tcp_parent->cur_tcp_count--;
4725
0
    c->tcp_free = c->tcp_parent->tcp_free;
4726
0
    c->tcp_parent->tcp_free = c;
4727
0
    c->is_in_tcp_free = 1;
4728
0
    if(!c->tcp_free) {
4729
      /* re-enable listening on accept socket */
4730
0
      comm_point_start_listening(c->tcp_parent, -1, -1);
4731
0
    }
4732
0
  }
4733
0
}
4734
4735
/** read more data for http (with ssl) */
4736
static int
4737
ssl_http_read_more(struct comm_point* c)
4738
0
{
4739
0
#ifdef HAVE_SSL
4740
0
  int r;
4741
0
  log_assert(sldns_buffer_remaining(c->buffer) > 0);
4742
0
  ERR_clear_error();
4743
0
  r = SSL_read(c->ssl, (void*)sldns_buffer_current(c->buffer),
4744
0
    (int)sldns_buffer_remaining(c->buffer));
4745
0
  if(r <= 0) {
4746
0
    int want = SSL_get_error(c->ssl, r);
4747
0
    if(want == SSL_ERROR_ZERO_RETURN) {
4748
0
      return 0; /* shutdown, closed */
4749
0
    } else if(want == SSL_ERROR_WANT_READ) {
4750
0
      return 1; /* read more later */
4751
0
    } else if(want == SSL_ERROR_WANT_WRITE) {
4752
0
      c->ssl_shake_state = comm_ssl_shake_hs_write;
4753
0
      comm_point_listen_for_rw(c, 0, 1);
4754
0
      return 1;
4755
0
    } else if(want == SSL_ERROR_SYSCALL) {
4756
0
#ifdef ECONNRESET
4757
0
      if(errno == ECONNRESET && verbosity < 2)
4758
0
        return 0; /* silence reset by peer */
4759
0
#endif
4760
0
      if(errno != 0)
4761
0
        log_err("SSL_read syscall: %s",
4762
0
          strerror(errno));
4763
0
      return 0;
4764
0
    }
4765
0
    log_crypto_err_io("could not SSL_read", want);
4766
0
    return 0;
4767
0
  }
4768
0
  verbose(VERB_ALGO, "ssl http read more skip to %d + %d",
4769
0
    (int)sldns_buffer_position(c->buffer), (int)r);
4770
0
  sldns_buffer_skip(c->buffer, (ssize_t)r);
4771
0
  return 1;
4772
#else
4773
  (void)c;
4774
  return 0;
4775
#endif /* HAVE_SSL */
4776
0
}
4777
4778
/** read more data for http */
4779
static int
4780
http_read_more(int fd, struct comm_point* c)
4781
0
{
4782
0
  ssize_t r;
4783
0
  log_assert(sldns_buffer_remaining(c->buffer) > 0);
4784
0
  r = recv(fd, (void*)sldns_buffer_current(c->buffer),
4785
0
    sldns_buffer_remaining(c->buffer), MSG_DONTWAIT);
4786
0
  if(r == 0) {
4787
0
    return 0;
4788
0
  } else if(r == -1) {
4789
0
#ifndef USE_WINSOCK
4790
0
    if(errno == EINTR || errno == EAGAIN)
4791
0
      return 1;
4792
#else /* USE_WINSOCK */
4793
    if(WSAGetLastError() == WSAECONNRESET)
4794
      return 0;
4795
    if(WSAGetLastError() == WSAEINPROGRESS)
4796
      return 1;
4797
    if(WSAGetLastError() == WSAEWOULDBLOCK) {
4798
      ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
4799
      return 1;
4800
    }
4801
#endif
4802
0
    log_err_addr("read (in http r)", sock_strerror(errno),
4803
0
      &c->repinfo.remote_addr, c->repinfo.remote_addrlen);
4804
0
    return 0;
4805
0
  }
4806
0
  verbose(VERB_ALGO, "http read more skip to %d + %d",
4807
0
    (int)sldns_buffer_position(c->buffer), (int)r);
4808
0
  sldns_buffer_skip(c->buffer, r);
4809
0
  return 1;
4810
0
}
4811
4812
/** return true if http header has been read (one line complete) */
4813
static int
4814
http_header_done(sldns_buffer* buf)
4815
0
{
4816
0
  size_t i;
4817
0
  for(i=sldns_buffer_position(buf); i<sldns_buffer_limit(buf); i++) {
4818
    /* there was a \r before the \n, but we ignore that */
4819
0
    if((char)sldns_buffer_read_u8_at(buf, i) == '\n')
4820
0
      return 1;
4821
0
  }
4822
0
  return 0;
4823
0
}
4824
4825
/** return character string into buffer for header line, moves buffer
4826
 * past that line and puts zero terminator into linefeed-newline */
4827
static char*
4828
http_header_line(sldns_buffer* buf)
4829
0
{
4830
0
  char* result = (char*)sldns_buffer_current(buf);
4831
0
  size_t i;
4832
0
  for(i=sldns_buffer_position(buf); i<sldns_buffer_limit(buf); i++) {
4833
    /* terminate the string on the \r */
4834
0
    if((char)sldns_buffer_read_u8_at(buf, i) == '\r')
4835
0
      sldns_buffer_write_u8_at(buf, i, 0);
4836
    /* terminate on the \n and skip past the it and done */
4837
0
    if((char)sldns_buffer_read_u8_at(buf, i) == '\n') {
4838
0
      sldns_buffer_write_u8_at(buf, i, 0);
4839
0
      sldns_buffer_set_position(buf, i+1);
4840
0
      return result;
4841
0
    }
4842
0
  }
4843
0
  return NULL;
4844
0
}
4845
4846
/** move unread buffer to start and clear rest for putting the rest into it */
4847
static void
4848
http_moveover_buffer(sldns_buffer* buf)
4849
0
{
4850
0
  size_t pos = sldns_buffer_position(buf);
4851
0
  size_t len = sldns_buffer_remaining(buf);
4852
0
  sldns_buffer_clear(buf);
4853
0
  memmove(sldns_buffer_begin(buf), sldns_buffer_at(buf, pos), len);
4854
0
  sldns_buffer_set_position(buf, len);
4855
0
}
4856
4857
/** a http header is complete, process it */
4858
static int
4859
http_process_initial_header(struct comm_point* c)
4860
0
{
4861
0
  char* line = http_header_line(c->buffer);
4862
0
  if(!line) return 1;
4863
0
  verbose(VERB_ALGO, "http header: %s", line);
4864
0
  if(strncasecmp(line, "HTTP/1.1 ", 9) == 0) {
4865
    /* check returncode */
4866
0
    if(line[9] != '2') {
4867
0
      verbose(VERB_ALGO, "http bad status %s", line+9);
4868
0
      return 0;
4869
0
    }
4870
0
  } else if(strncasecmp(line, "Content-Length: ", 16) == 0) {
4871
0
    if(!c->http_is_chunked)
4872
0
      c->tcp_byte_count = (size_t)atoi(line+16);
4873
0
  } else if(strncasecmp(line, "Transfer-Encoding: chunked", 19+7) == 0) {
4874
0
    c->tcp_byte_count = 0;
4875
0
    c->http_is_chunked = 1;
4876
0
  } else if(line[0] == 0) {
4877
    /* end of initial headers */
4878
0
    c->http_in_headers = 0;
4879
0
    if(c->http_is_chunked)
4880
0
      c->http_in_chunk_headers = 1;
4881
    /* remove header text from front of buffer
4882
     * the buffer is going to be used to return the data segment
4883
     * itself and we don't want the header to get returned
4884
     * prepended with it */
4885
0
    http_moveover_buffer(c->buffer);
4886
0
    sldns_buffer_flip(c->buffer);
4887
0
    return 1;
4888
0
  }
4889
  /* ignore other headers */
4890
0
  return 1;
4891
0
}
4892
4893
/** a chunk header is complete, process it, return 0=fail, 1=continue next
4894
 * header line, 2=done with chunked transfer*/
4895
static int
4896
http_process_chunk_header(struct comm_point* c)
4897
0
{
4898
0
  char* line = http_header_line(c->buffer);
4899
0
  if(!line) return 1;
4900
0
  if(c->http_in_chunk_headers == 3) {
4901
0
    verbose(VERB_ALGO, "http chunk trailer: %s", line);
4902
    /* are we done ? */
4903
0
    if(line[0] == 0 && c->tcp_byte_count == 0) {
4904
      /* callback of http reader when NETEVENT_DONE,
4905
       * end of data, with no data in buffer */
4906
0
      sldns_buffer_set_position(c->buffer, 0);
4907
0
      sldns_buffer_set_limit(c->buffer, 0);
4908
0
      fptr_ok(fptr_whitelist_comm_point(c->callback));
4909
0
      (void)(*c->callback)(c, c->cb_arg, NETEVENT_DONE, NULL);
4910
      /* return that we are done */
4911
0
      return 2;
4912
0
    }
4913
0
    if(line[0] == 0) {
4914
      /* continue with header of the next chunk */
4915
0
      c->http_in_chunk_headers = 1;
4916
      /* remove header text from front of buffer */
4917
0
      http_moveover_buffer(c->buffer);
4918
0
      sldns_buffer_flip(c->buffer);
4919
0
      return 1;
4920
0
    }
4921
    /* ignore further trail headers */
4922
0
    return 1;
4923
0
  }
4924
0
  verbose(VERB_ALGO, "http chunk header: %s", line);
4925
0
  if(c->http_in_chunk_headers == 1) {
4926
    /* read chunked start line */
4927
0
    char* end = NULL;
4928
0
    c->tcp_byte_count = (size_t)strtol(line, &end, 16);
4929
0
    if(end == line)
4930
0
      return 0;
4931
0
    c->http_in_chunk_headers = 0;
4932
    /* remove header text from front of buffer */
4933
0
    http_moveover_buffer(c->buffer);
4934
0
    sldns_buffer_flip(c->buffer);
4935
0
    if(c->tcp_byte_count == 0) {
4936
      /* done with chunks, process chunk_trailer lines */
4937
0
      c->http_in_chunk_headers = 3;
4938
0
    }
4939
0
    return 1;
4940
0
  }
4941
  /* ignore other headers */
4942
0
  return 1;
4943
0
}
4944
4945
/** handle nonchunked data segment, 0=fail, 1=wait */
4946
static int
4947
http_nonchunk_segment(struct comm_point* c)
4948
0
{
4949
  /* c->buffer at position..limit has new data we read in.
4950
   * the buffer itself is full of nonchunked data.
4951
   * we are looking to read tcp_byte_count more data
4952
   * and then the transfer is done. */
4953
0
  size_t remainbufferlen;
4954
0
  size_t got_now = sldns_buffer_limit(c->buffer);
4955
0
  if(c->tcp_byte_count <= got_now) {
4956
    /* done, this is the last data fragment */
4957
0
    c->http_stored = 0;
4958
0
    sldns_buffer_set_position(c->buffer, 0);
4959
0
    fptr_ok(fptr_whitelist_comm_point(c->callback));
4960
0
    (void)(*c->callback)(c, c->cb_arg, NETEVENT_DONE, NULL);
4961
0
    return 1;
4962
0
  }
4963
  /* if we have the buffer space,
4964
   * read more data collected into the buffer */
4965
0
  remainbufferlen = sldns_buffer_capacity(c->buffer) -
4966
0
    sldns_buffer_limit(c->buffer);
4967
0
  if(remainbufferlen+got_now >= c->tcp_byte_count ||
4968
0
    remainbufferlen >= (size_t)(c->ssl?16384:2048)) {
4969
0
    size_t total = sldns_buffer_limit(c->buffer);
4970
0
    sldns_buffer_clear(c->buffer);
4971
0
    sldns_buffer_set_position(c->buffer, total);
4972
0
    c->http_stored = total;
4973
    /* return and wait to read more */
4974
0
    return 1;
4975
0
  }
4976
  /* call callback with this data amount, then
4977
   * wait for more */
4978
0
  c->tcp_byte_count -= got_now;
4979
0
  c->http_stored = 0;
4980
0
  sldns_buffer_set_position(c->buffer, 0);
4981
0
  fptr_ok(fptr_whitelist_comm_point(c->callback));
4982
0
  (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, NULL);
4983
  /* c->callback has to buffer_clear(c->buffer). */
4984
  /* return and wait to read more */
4985
0
  return 1;
4986
0
}
4987
4988
/** handle chunked data segment, return 0=fail, 1=wait, 2=process more */
4989
static int
4990
http_chunked_segment(struct comm_point* c)
4991
0
{
4992
  /* the c->buffer has from position..limit new data we read. */
4993
  /* the current chunk has length tcp_byte_count.
4994
   * once we read that read more chunk headers.
4995
   */
4996
0
  size_t remainbufferlen;
4997
0
  size_t got_now = sldns_buffer_limit(c->buffer) - c->http_stored;
4998
0
  verbose(VERB_ALGO, "http_chunked_segment: got now %d, tcpbytcount %d, http_stored %d, buffer pos %d, buffer limit %d", (int)got_now, (int)c->tcp_byte_count, (int)c->http_stored, (int)sldns_buffer_position(c->buffer), (int)sldns_buffer_limit(c->buffer));
4999
0
  if(c->tcp_byte_count <= got_now) {
5000
    /* the chunk has completed (with perhaps some extra data
5001
     * from next chunk header and next chunk) */
5002
    /* save too much info into temp buffer */
5003
0
    size_t fraglen;
5004
0
    struct comm_reply repinfo;
5005
0
    c->http_stored = 0;
5006
0
    sldns_buffer_skip(c->buffer, (ssize_t)c->tcp_byte_count);
5007
0
    sldns_buffer_clear(c->http_temp);
5008
0
    sldns_buffer_write(c->http_temp,
5009
0
      sldns_buffer_current(c->buffer),
5010
0
      sldns_buffer_remaining(c->buffer));
5011
0
    sldns_buffer_flip(c->http_temp);
5012
5013
    /* callback with this fragment */
5014
0
    fraglen = sldns_buffer_position(c->buffer);
5015
0
    sldns_buffer_set_position(c->buffer, 0);
5016
0
    sldns_buffer_set_limit(c->buffer, fraglen);
5017
0
    repinfo = c->repinfo;
5018
0
    fptr_ok(fptr_whitelist_comm_point(c->callback));
5019
0
    (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &repinfo);
5020
    /* c->callback has to buffer_clear(). */
5021
5022
    /* is commpoint deleted? */
5023
0
    if(!repinfo.c) {
5024
0
      return 1;
5025
0
    }
5026
    /* copy waiting info */
5027
0
    sldns_buffer_clear(c->buffer);
5028
0
    sldns_buffer_write(c->buffer,
5029
0
      sldns_buffer_begin(c->http_temp),
5030
0
      sldns_buffer_remaining(c->http_temp));
5031
0
    sldns_buffer_flip(c->buffer);
5032
    /* process end of chunk trailer header lines, until
5033
     * an empty line */
5034
0
    c->http_in_chunk_headers = 3;
5035
    /* process more data in buffer (if any) */
5036
0
    return 2;
5037
0
  }
5038
0
  c->tcp_byte_count -= got_now;
5039
5040
  /* if we have the buffer space,
5041
   * read more data collected into the buffer */
5042
0
  remainbufferlen = sldns_buffer_capacity(c->buffer) -
5043
0
    sldns_buffer_limit(c->buffer);
5044
0
  if(remainbufferlen >= c->tcp_byte_count ||
5045
0
    remainbufferlen >= 2048) {
5046
0
    size_t total = sldns_buffer_limit(c->buffer);
5047
0
    sldns_buffer_clear(c->buffer);
5048
0
    sldns_buffer_set_position(c->buffer, total);
5049
0
    c->http_stored = total;
5050
    /* return and wait to read more */
5051
0
    return 1;
5052
0
  }
5053
5054
  /* callback of http reader for a new part of the data */
5055
0
  c->http_stored = 0;
5056
0
  sldns_buffer_set_position(c->buffer, 0);
5057
0
  fptr_ok(fptr_whitelist_comm_point(c->callback));
5058
0
  (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, NULL);
5059
  /* c->callback has to buffer_clear(c->buffer). */
5060
  /* return and wait to read more */
5061
0
  return 1;
5062
0
}
5063
5064
#ifdef HAVE_NGHTTP2
5065
/** Create new http2 session. Called when creating handling comm point. */
5066
static struct http2_session* http2_session_create(struct comm_point* c)
5067
{
5068
  struct http2_session* session = calloc(1, sizeof(*session));
5069
  if(!session) {
5070
    log_err("malloc failure while creating http2 session");
5071
    return NULL;
5072
  }
5073
  session->c = c;
5074
5075
  return session;
5076
}
5077
#endif
5078
5079
/** Delete http2 session. After closing connection or on error */
5080
static void http2_session_delete(struct http2_session* h2_session)
5081
0
{
5082
#ifdef HAVE_NGHTTP2
5083
  if(h2_session->callbacks)
5084
    nghttp2_session_callbacks_del(h2_session->callbacks);
5085
  free(h2_session);
5086
#else
5087
0
  (void)h2_session;
5088
0
#endif
5089
0
}
5090
5091
#ifdef HAVE_NGHTTP2
5092
struct http2_stream* http2_stream_create(int32_t stream_id)
5093
{
5094
  struct http2_stream* h2_stream = calloc(1, sizeof(*h2_stream));
5095
  if(!h2_stream) {
5096
    log_err("malloc failure while creating http2 stream");
5097
    return NULL;
5098
  }
5099
  h2_stream->stream_id = stream_id;
5100
  return h2_stream;
5101
}
5102
#endif
5103
5104
void http2_stream_add_meshstate(struct http2_stream* h2_stream,
5105
  struct mesh_area* mesh, struct mesh_state* m)
5106
0
{
5107
0
  h2_stream->mesh = mesh;
5108
0
  h2_stream->mesh_state = m;
5109
0
}
5110
5111
void http2_stream_remove_mesh_state(struct http2_stream* h2_stream)
5112
0
{
5113
0
  if(!h2_stream)
5114
0
    return;
5115
0
  h2_stream->mesh_state = NULL;
5116
0
}
5117
5118
#ifdef HAVE_NGHTTP2
5119
void http2_session_add_stream(struct http2_session* h2_session,
5120
  struct http2_stream* h2_stream)
5121
{
5122
  if(h2_session->first_stream)
5123
    h2_session->first_stream->prev = h2_stream;
5124
  h2_stream->next = h2_session->first_stream;
5125
  h2_session->first_stream = h2_stream;
5126
}
5127
5128
/** remove stream from session linked list. After stream close callback or
5129
 * closing connection */
5130
static void http2_session_remove_stream(struct http2_session* h2_session,
5131
  struct http2_stream* h2_stream)
5132
{
5133
  if(h2_stream->prev)
5134
    h2_stream->prev->next = h2_stream->next;
5135
  else
5136
    h2_session->first_stream = h2_stream->next;
5137
  if(h2_stream->next)
5138
    h2_stream->next->prev = h2_stream->prev;
5139
5140
}
5141
5142
int http2_stream_close_cb(nghttp2_session* ATTR_UNUSED(session),
5143
  int32_t stream_id, uint32_t ATTR_UNUSED(error_code), void* cb_arg)
5144
{
5145
  struct http2_stream* h2_stream;
5146
  struct http2_session* h2_session = (struct http2_session*)cb_arg;
5147
  if(!(h2_stream = nghttp2_session_get_stream_user_data(
5148
    h2_session->session, stream_id))) {
5149
    return 0;
5150
  }
5151
  http2_session_remove_stream(h2_session, h2_stream);
5152
  http2_stream_delete(h2_session, h2_stream);
5153
  return 0;
5154
}
5155
5156
ssize_t http2_recv_cb(nghttp2_session* ATTR_UNUSED(session), uint8_t* buf,
5157
  size_t len, int ATTR_UNUSED(flags), void* cb_arg)
5158
{
5159
  struct http2_session* h2_session = (struct http2_session*)cb_arg;
5160
  ssize_t ret;
5161
5162
  log_assert(h2_session->c->type == comm_http);
5163
  log_assert(h2_session->c->h2_session);
5164
5165
#ifdef HAVE_SSL
5166
  if(h2_session->c->ssl) {
5167
    int r;
5168
    ERR_clear_error();
5169
    r = SSL_read(h2_session->c->ssl, buf, len);
5170
    if(r <= 0) {
5171
      int want = SSL_get_error(h2_session->c->ssl, r);
5172
      if(want == SSL_ERROR_ZERO_RETURN) {
5173
        return NGHTTP2_ERR_EOF;
5174
      } else if(want == SSL_ERROR_WANT_READ) {
5175
        return NGHTTP2_ERR_WOULDBLOCK;
5176
      } else if(want == SSL_ERROR_WANT_WRITE) {
5177
        h2_session->c->ssl_shake_state = comm_ssl_shake_hs_write;
5178
        comm_point_listen_for_rw(h2_session->c, 0, 1);
5179
        return NGHTTP2_ERR_WOULDBLOCK;
5180
      } else if(want == SSL_ERROR_SYSCALL) {
5181
#ifdef ECONNRESET
5182
        if(errno == ECONNRESET && verbosity < 2)
5183
          return NGHTTP2_ERR_CALLBACK_FAILURE;
5184
#endif
5185
        if(errno != 0)
5186
          log_err("SSL_read syscall: %s",
5187
            strerror(errno));
5188
        return NGHTTP2_ERR_CALLBACK_FAILURE;
5189
      }
5190
      log_crypto_err_io("could not SSL_read", want);
5191
      return NGHTTP2_ERR_CALLBACK_FAILURE;
5192
    }
5193
    return r;
5194
  }
5195
#endif /* HAVE_SSL */
5196
5197
  ret = recv(h2_session->c->fd, (void*)buf, len, MSG_DONTWAIT);
5198
  if(ret == 0) {
5199
    return NGHTTP2_ERR_EOF;
5200
  } else if(ret < 0) {
5201
#ifndef USE_WINSOCK
5202
    if(errno == EINTR || errno == EAGAIN)
5203
      return NGHTTP2_ERR_WOULDBLOCK;
5204
#ifdef ECONNRESET
5205
    if(errno == ECONNRESET && verbosity < 2)
5206
      return NGHTTP2_ERR_CALLBACK_FAILURE;
5207
#endif
5208
    log_err_addr("could not http2 recv: %s", strerror(errno),
5209
      &h2_session->c->repinfo.remote_addr,
5210
      h2_session->c->repinfo.remote_addrlen);
5211
#else /* USE_WINSOCK */
5212
    if(WSAGetLastError() == WSAECONNRESET)
5213
      return NGHTTP2_ERR_CALLBACK_FAILURE;
5214
    if(WSAGetLastError() == WSAEINPROGRESS)
5215
      return NGHTTP2_ERR_WOULDBLOCK;
5216
    if(WSAGetLastError() == WSAEWOULDBLOCK) {
5217
      ub_winsock_tcp_wouldblock(h2_session->c->ev->ev,
5218
        UB_EV_READ);
5219
      return NGHTTP2_ERR_WOULDBLOCK;
5220
    }
5221
    log_err_addr("could not http2 recv: %s",
5222
      wsa_strerror(WSAGetLastError()),
5223
      &h2_session->c->repinfo.remote_addr,
5224
      h2_session->c->repinfo.remote_addrlen);
5225
#endif
5226
    return NGHTTP2_ERR_CALLBACK_FAILURE;
5227
  }
5228
  return ret;
5229
}
5230
#endif /* HAVE_NGHTTP2 */
5231
5232
/** Handle http2 read */
5233
static int
5234
comm_point_http2_handle_read(int ATTR_UNUSED(fd), struct comm_point* c)
5235
0
{
5236
#ifdef HAVE_NGHTTP2
5237
  int ret;
5238
  log_assert(c->h2_session);
5239
5240
  /* reading until recv cb returns NGHTTP2_ERR_WOULDBLOCK */
5241
  ret = nghttp2_session_recv(c->h2_session->session);
5242
  if(ret) {
5243
    if(ret != NGHTTP2_ERR_EOF &&
5244
      ret != NGHTTP2_ERR_CALLBACK_FAILURE) {
5245
      char a[256];
5246
      addr_to_str(&c->repinfo.remote_addr,
5247
        c->repinfo.remote_addrlen, a, sizeof(a));
5248
      verbose(VERB_QUERY, "http2: session_recv from %s failed, "
5249
        "error: %s", a, nghttp2_strerror(ret));
5250
    }
5251
    return 0;
5252
  }
5253
  if(nghttp2_session_want_write(c->h2_session->session)) {
5254
    c->tcp_is_reading = 0;
5255
    comm_point_stop_listening(c);
5256
    comm_point_start_listening(c, -1, adjusted_tcp_timeout(c));
5257
  } else if(!nghttp2_session_want_read(c->h2_session->session))
5258
    return 0; /* connection can be closed */
5259
  return 1;
5260
#else
5261
0
  (void)c;
5262
0
  return 0;
5263
0
#endif
5264
0
}
5265
5266
/**
5267
 * Handle http reading callback.
5268
 * @param fd: file descriptor of socket.
5269
 * @param c: comm point to read from into buffer.
5270
 * @return: 0 on error
5271
 */
5272
static int
5273
comm_point_http_handle_read(int fd, struct comm_point* c)
5274
0
{
5275
0
  log_assert(c->type == comm_http);
5276
0
  log_assert(fd != -1);
5277
5278
  /* if we are in ssl handshake, handle SSL handshake */
5279
0
#ifdef HAVE_SSL
5280
0
  if(c->ssl && c->ssl_shake_state != comm_ssl_shake_none) {
5281
0
    if(!ssl_handshake(c))
5282
0
      return 0;
5283
0
    if(c->ssl_shake_state != comm_ssl_shake_none)
5284
0
      return 1;
5285
0
  }
5286
0
#endif /* HAVE_SSL */
5287
5288
0
  if(!c->tcp_is_reading)
5289
0
    return 1;
5290
5291
0
  if(c->use_h2) {
5292
0
    return comm_point_http2_handle_read(fd, c);
5293
0
  }
5294
5295
  /* http version is <= http/1.1 */
5296
5297
0
  if(c->http_min_version >= http_version_2) {
5298
    /* HTTP/2 failed, not allowed to use lower version. */
5299
0
    return 0;
5300
0
  }
5301
5302
  /* read more data */
5303
0
  if(c->ssl) {
5304
0
    if(!ssl_http_read_more(c))
5305
0
      return 0;
5306
0
  } else {
5307
0
    if(!http_read_more(fd, c))
5308
0
      return 0;
5309
0
  }
5310
5311
0
  if(c->http_stored >= sldns_buffer_position(c->buffer)) {
5312
    /* read did not work but we wanted more data, there is
5313
     * no bytes to process now. */
5314
0
    return 1;
5315
0
  }
5316
0
  sldns_buffer_flip(c->buffer);
5317
  /* if we are partway in a segment of data, position us at the point
5318
   * where we left off previously */
5319
0
  if(c->http_stored < sldns_buffer_limit(c->buffer))
5320
0
    sldns_buffer_set_position(c->buffer, c->http_stored);
5321
0
  else  sldns_buffer_set_position(c->buffer, sldns_buffer_limit(c->buffer));
5322
5323
0
  while(sldns_buffer_remaining(c->buffer) > 0) {
5324
    /* Handle HTTP/1.x data */
5325
    /* if we are reading headers, read more headers */
5326
0
    if(c->http_in_headers || c->http_in_chunk_headers) {
5327
      /* if header is done, process the header */
5328
0
      if(!http_header_done(c->buffer)) {
5329
        /* copy remaining data to front of buffer
5330
         * and set rest for writing into it */
5331
0
        http_moveover_buffer(c->buffer);
5332
        /* return and wait to read more */
5333
0
        return 1;
5334
0
      }
5335
0
      if(!c->http_in_chunk_headers) {
5336
        /* process initial headers */
5337
0
        if(!http_process_initial_header(c))
5338
0
          return 0;
5339
0
      } else {
5340
        /* process chunk headers */
5341
0
        int r = http_process_chunk_header(c);
5342
0
        if(r == 0) return 0;
5343
0
        if(r == 2) return 1; /* done */
5344
        /* r == 1, continue */
5345
0
      }
5346
      /* see if we have more to process */
5347
0
      continue;
5348
0
    }
5349
5350
0
    if(!c->http_is_chunked) {
5351
      /* if we are reading nonchunks, process that*/
5352
0
      return http_nonchunk_segment(c);
5353
0
    } else {
5354
      /* if we are reading chunks, read the chunk */
5355
0
      int r = http_chunked_segment(c);
5356
0
      if(r == 0) return 0;
5357
0
      if(r == 1) return 1;
5358
0
      continue;
5359
0
    }
5360
0
  }
5361
  /* broke out of the loop; could not process header instead need
5362
   * to read more */
5363
  /* moveover any remaining data and read more data */
5364
0
  http_moveover_buffer(c->buffer);
5365
  /* return and wait to read more */
5366
0
  return 1;
5367
0
}
5368
5369
/** check pending connect for http */
5370
static int
5371
http_check_connect(int fd, struct comm_point* c)
5372
0
{
5373
  /* check for pending error from nonblocking connect */
5374
  /* from Stevens, unix network programming, vol1, 3rd ed, p450*/
5375
0
  int error = 0;
5376
0
  socklen_t len = (socklen_t)sizeof(error);
5377
0
  if(getsockopt(fd, SOL_SOCKET, SO_ERROR, (void*)&error,
5378
0
    &len) < 0){
5379
0
#ifndef USE_WINSOCK
5380
0
    error = errno; /* on solaris errno is error */
5381
#else /* USE_WINSOCK */
5382
    error = WSAGetLastError();
5383
#endif
5384
0
  }
5385
0
#ifndef USE_WINSOCK
5386
0
#if defined(EINPROGRESS) && defined(EWOULDBLOCK)
5387
0
  if(error == EINPROGRESS || error == EWOULDBLOCK)
5388
0
    return 1; /* try again later */
5389
0
  else
5390
0
#endif
5391
0
  if(error != 0 && verbosity < 2)
5392
0
    return 0; /* silence lots of chatter in the logs */
5393
0
  else if(error != 0) {
5394
0
    log_err_addr("http connect", strerror(error),
5395
0
      &c->repinfo.remote_addr, c->repinfo.remote_addrlen);
5396
#else /* USE_WINSOCK */
5397
  /* examine error */
5398
  if(error == WSAEINPROGRESS)
5399
    return 1;
5400
  else if(error == WSAEWOULDBLOCK) {
5401
    ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
5402
    return 1;
5403
  } else if(error != 0 && verbosity < 2)
5404
    return 0;
5405
  else if(error != 0) {
5406
    log_err_addr("http connect", wsa_strerror(error),
5407
      &c->repinfo.remote_addr, c->repinfo.remote_addrlen);
5408
#endif /* USE_WINSOCK */
5409
0
    return 0;
5410
0
  }
5411
  /* keep on processing this socket */
5412
0
  return 2;
5413
0
}
5414
5415
/** write more data for http (with ssl) */
5416
static int
5417
ssl_http_write_more(struct comm_point* c)
5418
0
{
5419
0
#ifdef HAVE_SSL
5420
0
  int r;
5421
0
  log_assert(sldns_buffer_remaining(c->buffer) > 0);
5422
0
  ERR_clear_error();
5423
0
  r = SSL_write(c->ssl, (void*)sldns_buffer_current(c->buffer),
5424
0
    (int)sldns_buffer_remaining(c->buffer));
5425
0
  if(r <= 0) {
5426
0
    int want = SSL_get_error(c->ssl, r);
5427
0
    if(want == SSL_ERROR_ZERO_RETURN) {
5428
0
      return 0; /* closed */
5429
0
    } else if(want == SSL_ERROR_WANT_READ) {
5430
0
      c->ssl_shake_state = comm_ssl_shake_hs_read;
5431
0
      comm_point_listen_for_rw(c, 1, 0);
5432
0
      return 1; /* wait for read condition */
5433
0
    } else if(want == SSL_ERROR_WANT_WRITE) {
5434
0
      return 1; /* write more later */
5435
0
    } else if(want == SSL_ERROR_SYSCALL) {
5436
0
#ifdef EPIPE
5437
0
      if(errno == EPIPE && verbosity < 2)
5438
0
        return 0; /* silence 'broken pipe' */
5439
0
#endif
5440
0
      if(errno != 0)
5441
0
        log_err("SSL_write syscall: %s",
5442
0
          strerror(errno));
5443
0
      return 0;
5444
0
    }
5445
0
    log_crypto_err_io("could not SSL_write", want);
5446
0
    return 0;
5447
0
  }
5448
0
  sldns_buffer_skip(c->buffer, (ssize_t)r);
5449
0
  return 1;
5450
#else
5451
  (void)c;
5452
  return 0;
5453
#endif /* HAVE_SSL */
5454
0
}
5455
5456
/** write more data for http */
5457
static int
5458
http_write_more(int fd, struct comm_point* c)
5459
0
{
5460
0
  ssize_t r;
5461
0
  log_assert(sldns_buffer_remaining(c->buffer) > 0);
5462
0
  r = send(fd, (void*)sldns_buffer_current(c->buffer),
5463
0
    sldns_buffer_remaining(c->buffer), 0);
5464
0
  if(r == -1) {
5465
0
#ifndef USE_WINSOCK
5466
0
    if(errno == EINTR || errno == EAGAIN)
5467
0
      return 1;
5468
#else
5469
    if(WSAGetLastError() == WSAEINPROGRESS)
5470
      return 1;
5471
    if(WSAGetLastError() == WSAEWOULDBLOCK) {
5472
      ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
5473
      return 1;
5474
    }
5475
#endif
5476
0
    log_err_addr("http send r", sock_strerror(errno),
5477
0
      &c->repinfo.remote_addr, c->repinfo.remote_addrlen);
5478
0
    return 0;
5479
0
  }
5480
0
  sldns_buffer_skip(c->buffer, r);
5481
0
  return 1;
5482
0
}
5483
5484
#ifdef HAVE_NGHTTP2
5485
ssize_t http2_send_cb(nghttp2_session* ATTR_UNUSED(session), const uint8_t* buf,
5486
  size_t len, int ATTR_UNUSED(flags), void* cb_arg)
5487
{
5488
  ssize_t ret;
5489
  struct http2_session* h2_session = (struct http2_session*)cb_arg;
5490
  log_assert(h2_session->c->type == comm_http);
5491
  log_assert(h2_session->c->h2_session);
5492
5493
#ifdef HAVE_SSL
5494
  if(h2_session->c->ssl) {
5495
    int r;
5496
    ERR_clear_error();
5497
    r = SSL_write(h2_session->c->ssl, buf, len);
5498
    if(r <= 0) {
5499
      int want = SSL_get_error(h2_session->c->ssl, r);
5500
      if(want == SSL_ERROR_ZERO_RETURN) {
5501
        return NGHTTP2_ERR_CALLBACK_FAILURE;
5502
      } else if(want == SSL_ERROR_WANT_READ) {
5503
        h2_session->c->ssl_shake_state = comm_ssl_shake_hs_read;
5504
        comm_point_listen_for_rw(h2_session->c, 1, 0);
5505
        return NGHTTP2_ERR_WOULDBLOCK;
5506
      } else if(want == SSL_ERROR_WANT_WRITE) {
5507
        return NGHTTP2_ERR_WOULDBLOCK;
5508
      } else if(want == SSL_ERROR_SYSCALL) {
5509
#ifdef EPIPE
5510
        if(errno == EPIPE && verbosity < 2)
5511
          return NGHTTP2_ERR_CALLBACK_FAILURE;
5512
#endif
5513
        if(errno != 0)
5514
          log_err("SSL_write syscall: %s",
5515
            strerror(errno));
5516
        return NGHTTP2_ERR_CALLBACK_FAILURE;
5517
      }
5518
      log_crypto_err_io("could not SSL_write", want);
5519
      return NGHTTP2_ERR_CALLBACK_FAILURE;
5520
    }
5521
    return r;
5522
  }
5523
#endif /* HAVE_SSL */
5524
5525
  ret = send(h2_session->c->fd, (void*)buf, len, 0);
5526
  if(ret == 0) {
5527
    return NGHTTP2_ERR_CALLBACK_FAILURE;
5528
  } else if(ret < 0) {
5529
#ifndef USE_WINSOCK
5530
    if(errno == EINTR || errno == EAGAIN)
5531
      return NGHTTP2_ERR_WOULDBLOCK;
5532
#ifdef EPIPE
5533
    if(errno == EPIPE && verbosity < 2)
5534
      return NGHTTP2_ERR_CALLBACK_FAILURE;
5535
#endif
5536
#ifdef ECONNRESET
5537
    if(errno == ECONNRESET && verbosity < 2)
5538
      return NGHTTP2_ERR_CALLBACK_FAILURE;
5539
#endif
5540
    log_err_addr("could not http2 write: %s", strerror(errno),
5541
      &h2_session->c->repinfo.remote_addr,
5542
      h2_session->c->repinfo.remote_addrlen);
5543
#else /* USE_WINSOCK */
5544
    if(WSAGetLastError() == WSAENOTCONN)
5545
      return NGHTTP2_ERR_WOULDBLOCK;
5546
    if(WSAGetLastError() == WSAEINPROGRESS)
5547
      return NGHTTP2_ERR_WOULDBLOCK;
5548
    if(WSAGetLastError() == WSAEWOULDBLOCK) {
5549
      ub_winsock_tcp_wouldblock(h2_session->c->ev->ev,
5550
        UB_EV_WRITE);
5551
      return NGHTTP2_ERR_WOULDBLOCK;
5552
    }
5553
    if(WSAGetLastError() == WSAECONNRESET && verbosity < 2)
5554
      return NGHTTP2_ERR_CALLBACK_FAILURE;
5555
    log_err_addr("could not http2 write: %s",
5556
      wsa_strerror(WSAGetLastError()),
5557
      &h2_session->c->repinfo.remote_addr,
5558
      h2_session->c->repinfo.remote_addrlen);
5559
#endif
5560
    return NGHTTP2_ERR_CALLBACK_FAILURE;
5561
  }
5562
  return ret;
5563
}
5564
#endif /* HAVE_NGHTTP2 */
5565
5566
/** Handle http2 writing */
5567
static int
5568
comm_point_http2_handle_write(int ATTR_UNUSED(fd), struct comm_point* c)
5569
0
{
5570
#ifdef HAVE_NGHTTP2
5571
  int ret;
5572
  log_assert(c->h2_session);
5573
5574
  ret = nghttp2_session_send(c->h2_session->session);
5575
  if(ret) {
5576
    verbose(VERB_QUERY, "http2: session_send failed, "
5577
      "error: %s", nghttp2_strerror(ret));
5578
    return 0;
5579
  }
5580
5581
  if(nghttp2_session_want_read(c->h2_session->session)) {
5582
    c->tcp_is_reading = 1;
5583
    comm_point_stop_listening(c);
5584
    comm_point_start_listening(c, -1, adjusted_tcp_timeout(c));
5585
  } else if(!nghttp2_session_want_write(c->h2_session->session))
5586
    return 0; /* connection can be closed */
5587
  return 1;
5588
#else
5589
0
  (void)c;
5590
0
  return 0;
5591
0
#endif
5592
0
}
5593
5594
/**
5595
 * Handle http writing callback.
5596
 * @param fd: file descriptor of socket.
5597
 * @param c: comm point to write buffer out of.
5598
 * @return: 0 on error
5599
 */
5600
static int
5601
comm_point_http_handle_write(int fd, struct comm_point* c)
5602
0
{
5603
0
  log_assert(c->type == comm_http);
5604
0
  log_assert(fd != -1);
5605
5606
  /* check pending connect errors, if that fails, we wait for more,
5607
   * or we can continue to write contents */
5608
0
  if(c->tcp_check_nb_connect) {
5609
0
    int r = http_check_connect(fd, c);
5610
0
    if(r == 0) return 0;
5611
0
    if(r == 1) return 1;
5612
0
    c->tcp_check_nb_connect = 0;
5613
0
  }
5614
  /* if we are in ssl handshake, handle SSL handshake */
5615
0
#ifdef HAVE_SSL
5616
0
  if(c->ssl && c->ssl_shake_state != comm_ssl_shake_none) {
5617
0
    if(!ssl_handshake(c))
5618
0
      return 0;
5619
0
    if(c->ssl_shake_state != comm_ssl_shake_none)
5620
0
      return 1;
5621
0
  }
5622
0
#endif /* HAVE_SSL */
5623
0
  if(c->tcp_is_reading)
5624
0
    return 1;
5625
5626
0
  if(c->use_h2) {
5627
0
    return comm_point_http2_handle_write(fd, c);
5628
0
  }
5629
5630
  /* http version is <= http/1.1 */
5631
5632
0
  if(c->http_min_version >= http_version_2) {
5633
    /* HTTP/2 failed, not allowed to use lower version. */
5634
0
    return 0;
5635
0
  }
5636
5637
  /* if we are writing, write more */
5638
0
  if(c->ssl) {
5639
0
    if(!ssl_http_write_more(c))
5640
0
      return 0;
5641
0
  } else {
5642
0
    if(!http_write_more(fd, c))
5643
0
      return 0;
5644
0
  }
5645
5646
  /* we write a single buffer contents, that can contain
5647
   * the http request, and then flip to read the results */
5648
  /* see if write is done */
5649
0
  if(sldns_buffer_remaining(c->buffer) == 0) {
5650
0
    sldns_buffer_clear(c->buffer);
5651
0
    if(c->tcp_do_toggle_rw)
5652
0
      c->tcp_is_reading = 1;
5653
0
    c->tcp_byte_count = 0;
5654
    /* switch from listening(write) to listening(read) */
5655
0
    comm_point_stop_listening(c);
5656
0
    comm_point_start_listening(c, -1, -1);
5657
0
  }
5658
0
  return 1;
5659
0
}
5660
5661
void
5662
comm_point_http_handle_callback(int fd, short event, void* arg)
5663
0
{
5664
0
  struct comm_point* c = (struct comm_point*)arg;
5665
0
  log_assert(c->type == comm_http);
5666
0
  ub_comm_base_now(c->ev->base);
5667
5668
0
  if((event&UB_EV_TIMEOUT)) {
5669
0
    verbose(VERB_QUERY, "http took too long, dropped");
5670
0
    reclaim_http_handler(c);
5671
0
    if(!c->tcp_do_close) {
5672
0
      fptr_ok(fptr_whitelist_comm_point(c->callback));
5673
0
      (void)(*c->callback)(c, c->cb_arg,
5674
0
        NETEVENT_TIMEOUT, NULL);
5675
0
    }
5676
0
    return;
5677
0
  }
5678
0
  if((event&UB_EV_READ)) {
5679
0
    if(!comm_point_http_handle_read(fd, c)) {
5680
0
      reclaim_http_handler(c);
5681
0
      if(!c->tcp_do_close) {
5682
0
        fptr_ok(fptr_whitelist_comm_point(
5683
0
          c->callback));
5684
0
        (void)(*c->callback)(c, c->cb_arg,
5685
0
          NETEVENT_CLOSED, NULL);
5686
0
      }
5687
0
    }
5688
0
    return;
5689
0
  }
5690
0
  if((event&UB_EV_WRITE)) {
5691
0
    if(!comm_point_http_handle_write(fd, c)) {
5692
0
      reclaim_http_handler(c);
5693
0
      if(!c->tcp_do_close) {
5694
0
        fptr_ok(fptr_whitelist_comm_point(
5695
0
          c->callback));
5696
0
        (void)(*c->callback)(c, c->cb_arg,
5697
0
          NETEVENT_CLOSED, NULL);
5698
0
      }
5699
0
    }
5700
0
    return;
5701
0
  }
5702
0
  log_err("Ignored event %d for httphdl.", event);
5703
0
}
5704
5705
void comm_point_local_handle_callback(int fd, short event, void* arg)
5706
0
{
5707
0
  struct comm_point* c = (struct comm_point*)arg;
5708
0
  log_assert(c->type == comm_local);
5709
0
  ub_comm_base_now(c->ev->base);
5710
5711
0
  if((event&UB_EV_READ)) {
5712
0
    if(!comm_point_tcp_handle_read(fd, c, 1)) {
5713
0
      fptr_ok(fptr_whitelist_comm_point(c->callback));
5714
0
      (void)(*c->callback)(c, c->cb_arg, NETEVENT_CLOSED,
5715
0
        NULL);
5716
0
    }
5717
0
    return;
5718
0
  }
5719
0
  log_err("Ignored event %d for localhdl.", event);
5720
0
}
5721
5722
void comm_point_raw_handle_callback(int ATTR_UNUSED(fd),
5723
  short event, void* arg)
5724
0
{
5725
0
  struct comm_point* c = (struct comm_point*)arg;
5726
0
  int err = NETEVENT_NOERROR;
5727
0
  log_assert(c->type == comm_raw);
5728
0
  ub_comm_base_now(c->ev->base);
5729
5730
0
  if((event&UB_EV_TIMEOUT))
5731
0
    err = NETEVENT_TIMEOUT;
5732
0
  fptr_ok(fptr_whitelist_comm_point_raw(c->callback));
5733
0
  (void)(*c->callback)(c, c->cb_arg, err, NULL);
5734
0
}
5735
5736
struct comm_point*
5737
comm_point_create_udp(struct comm_base *base, int fd, sldns_buffer* buffer,
5738
  int pp2_enabled, comm_point_callback_type* callback,
5739
  void* callback_arg, struct unbound_socket* socket)
5740
0
{
5741
0
  struct comm_point* c = (struct comm_point*)calloc(1,
5742
0
    sizeof(struct comm_point));
5743
0
  short evbits;
5744
0
  if(!c)
5745
0
    return NULL;
5746
0
  c->ev = (struct internal_event*)calloc(1,
5747
0
    sizeof(struct internal_event));
5748
0
  if(!c->ev) {
5749
0
    free(c);
5750
0
    return NULL;
5751
0
  }
5752
0
  c->ev->base = base;
5753
0
  c->fd = fd;
5754
0
  c->buffer = buffer;
5755
0
  c->timeout = NULL;
5756
0
  c->tcp_is_reading = 0;
5757
0
  c->tcp_byte_count = 0;
5758
0
  c->tcp_parent = NULL;
5759
0
  c->max_tcp_count = 0;
5760
0
  c->cur_tcp_count = 0;
5761
0
  c->tcp_handlers = NULL;
5762
0
  c->tcp_free = NULL;
5763
0
  c->is_in_tcp_free = 0;
5764
0
  c->type = comm_udp;
5765
0
  c->tcp_do_close = 0;
5766
0
  c->do_not_close = 0;
5767
0
  c->tcp_do_toggle_rw = 0;
5768
0
  c->tcp_check_nb_connect = 0;
5769
#ifdef USE_MSG_FASTOPEN
5770
  c->tcp_do_fastopen = 0;
5771
#endif
5772
#ifdef USE_DNSCRYPT
5773
  c->dnscrypt = 0;
5774
  c->dnscrypt_buffer = buffer;
5775
#endif
5776
0
  c->inuse = 0;
5777
0
  c->callback = callback;
5778
0
  c->cb_arg = callback_arg;
5779
0
  c->socket = socket;
5780
0
  c->pp2_enabled = pp2_enabled;
5781
0
  c->pp2_header_state = pp2_header_none;
5782
0
  evbits = UB_EV_READ | UB_EV_PERSIST;
5783
  /* ub_event stuff */
5784
0
  c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
5785
0
    comm_point_udp_callback, c);
5786
0
  if(c->ev->ev == NULL) {
5787
0
    log_err("could not baseset udp event");
5788
0
    comm_point_delete(c);
5789
0
    return NULL;
5790
0
  }
5791
0
  if(fd!=-1 && ub_event_add(c->ev->ev, c->timeout) != 0 ) {
5792
0
    log_err("could not add udp event");
5793
0
    comm_point_delete(c);
5794
0
    return NULL;
5795
0
  }
5796
0
  c->event_added = 1;
5797
0
  return c;
5798
0
}
5799
5800
#if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_RECVMSG)
5801
struct comm_point*
5802
comm_point_create_udp_ancil(struct comm_base *base, int fd,
5803
  sldns_buffer* buffer, int pp2_enabled,
5804
  comm_point_callback_type* callback, void* callback_arg, struct unbound_socket* socket)
5805
0
{
5806
0
  struct comm_point* c = (struct comm_point*)calloc(1,
5807
0
    sizeof(struct comm_point));
5808
0
  short evbits;
5809
0
  if(!c)
5810
0
    return NULL;
5811
0
  c->ev = (struct internal_event*)calloc(1,
5812
0
    sizeof(struct internal_event));
5813
0
  if(!c->ev) {
5814
0
    free(c);
5815
0
    return NULL;
5816
0
  }
5817
0
  c->ev->base = base;
5818
0
  c->fd = fd;
5819
0
  c->buffer = buffer;
5820
0
  c->timeout = NULL;
5821
0
  c->tcp_is_reading = 0;
5822
0
  c->tcp_byte_count = 0;
5823
0
  c->tcp_parent = NULL;
5824
0
  c->max_tcp_count = 0;
5825
0
  c->cur_tcp_count = 0;
5826
0
  c->tcp_handlers = NULL;
5827
0
  c->tcp_free = NULL;
5828
0
  c->is_in_tcp_free = 0;
5829
0
  c->type = comm_udp;
5830
0
  c->tcp_do_close = 0;
5831
0
  c->do_not_close = 0;
5832
#ifdef USE_DNSCRYPT
5833
  c->dnscrypt = 0;
5834
  c->dnscrypt_buffer = buffer;
5835
#endif
5836
0
  c->inuse = 0;
5837
0
  c->tcp_do_toggle_rw = 0;
5838
0
  c->tcp_check_nb_connect = 0;
5839
#ifdef USE_MSG_FASTOPEN
5840
  c->tcp_do_fastopen = 0;
5841
#endif
5842
0
  c->callback = callback;
5843
0
  c->cb_arg = callback_arg;
5844
0
  c->socket = socket;
5845
0
  c->pp2_enabled = pp2_enabled;
5846
0
  c->pp2_header_state = pp2_header_none;
5847
0
  evbits = UB_EV_READ | UB_EV_PERSIST;
5848
  /* ub_event stuff */
5849
0
  c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
5850
0
    comm_point_udp_ancil_callback, c);
5851
0
  if(c->ev->ev == NULL) {
5852
0
    log_err("could not baseset udp event");
5853
0
    comm_point_delete(c);
5854
0
    return NULL;
5855
0
  }
5856
0
  if(fd!=-1 && ub_event_add(c->ev->ev, c->timeout) != 0 ) {
5857
0
    log_err("could not add udp event");
5858
0
    comm_point_delete(c);
5859
0
    return NULL;
5860
0
  }
5861
0
  c->event_added = 1;
5862
0
  return c;
5863
0
}
5864
#endif
5865
5866
struct comm_point*
5867
comm_point_create_doq(struct comm_base *base, int fd, sldns_buffer* buffer,
5868
  comm_point_callback_type* callback, void* callback_arg,
5869
  struct unbound_socket* socket, struct doq_table* table,
5870
  struct ub_randstate* rnd, const void* quic_sslctx,
5871
  struct config_file* cfg)
5872
0
{
5873
#ifdef HAVE_NGTCP2
5874
  struct comm_point* c = (struct comm_point*)calloc(1,
5875
    sizeof(struct comm_point));
5876
  short evbits;
5877
  if(!c)
5878
    return NULL;
5879
  c->ev = (struct internal_event*)calloc(1,
5880
    sizeof(struct internal_event));
5881
  if(!c->ev) {
5882
    free(c);
5883
    return NULL;
5884
  }
5885
  c->ev->base = base;
5886
  c->fd = fd;
5887
  c->buffer = buffer;
5888
  c->timeout = NULL;
5889
  c->tcp_is_reading = 0;
5890
  c->tcp_byte_count = 0;
5891
  c->tcp_parent = NULL;
5892
  c->max_tcp_count = 0;
5893
  c->cur_tcp_count = 0;
5894
  c->tcp_handlers = NULL;
5895
  c->tcp_free = NULL;
5896
  c->is_in_tcp_free = 0;
5897
  c->type = comm_doq;
5898
  c->tcp_do_close = 0;
5899
  c->do_not_close = 0;
5900
  c->tcp_do_toggle_rw = 0;
5901
  c->tcp_check_nb_connect = 0;
5902
#ifdef USE_MSG_FASTOPEN
5903
  c->tcp_do_fastopen = 0;
5904
#endif
5905
#ifdef USE_DNSCRYPT
5906
  c->dnscrypt = 0;
5907
  c->dnscrypt_buffer = NULL;
5908
#endif
5909
  c->doq_socket = doq_server_socket_create(table, rnd, quic_sslctx, c,
5910
    base, cfg);
5911
  if(!c->doq_socket) {
5912
    log_err("could not create doq comm_point");
5913
    comm_point_delete(c);
5914
    return NULL;
5915
  }
5916
  c->inuse = 0;
5917
  c->callback = callback;
5918
  c->cb_arg = callback_arg;
5919
  c->socket = socket;
5920
  c->pp2_enabled = 0;
5921
  c->pp2_header_state = pp2_header_none;
5922
  evbits = UB_EV_READ | UB_EV_PERSIST;
5923
  /* ub_event stuff */
5924
  c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
5925
    comm_point_doq_callback, c);
5926
  if(c->ev->ev == NULL) {
5927
    log_err("could not baseset udp event");
5928
    comm_point_delete(c);
5929
    return NULL;
5930
  }
5931
  if(fd!=-1 && ub_event_add(c->ev->ev, c->timeout) != 0 ) {
5932
    log_err("could not add udp event");
5933
    comm_point_delete(c);
5934
    return NULL;
5935
  }
5936
  c->event_added = 1;
5937
  return c;
5938
#else
5939
  /* no libngtcp2, so no QUIC support */
5940
0
  (void)base;
5941
0
  (void)buffer;
5942
0
  (void)callback;
5943
0
  (void)callback_arg;
5944
0
  (void)socket;
5945
0
  (void)rnd;
5946
0
  (void)table;
5947
0
  (void)quic_sslctx;
5948
0
  (void)cfg;
5949
0
  sock_close(fd);
5950
0
  return NULL;
5951
0
#endif /* HAVE_NGTCP2 */
5952
0
}
5953
5954
static struct comm_point*
5955
comm_point_create_tcp_handler(struct comm_base *base,
5956
  struct comm_point* parent, size_t bufsize,
5957
  struct sldns_buffer* spoolbuf, comm_point_callback_type* callback,
5958
  void* callback_arg, struct unbound_socket* socket)
5959
0
{
5960
0
  struct comm_point* c = (struct comm_point*)calloc(1,
5961
0
    sizeof(struct comm_point));
5962
0
  short evbits;
5963
0
  if(!c)
5964
0
    return NULL;
5965
0
  c->ev = (struct internal_event*)calloc(1,
5966
0
    sizeof(struct internal_event));
5967
0
  if(!c->ev) {
5968
0
    free(c);
5969
0
    return NULL;
5970
0
  }
5971
0
  c->ev->base = base;
5972
0
  c->fd = -1;
5973
0
  c->buffer = sldns_buffer_new(bufsize);
5974
0
  if(!c->buffer) {
5975
0
    free(c->ev);
5976
0
    free(c);
5977
0
    return NULL;
5978
0
  }
5979
0
  c->timeout = (struct timeval*)malloc(sizeof(struct timeval));
5980
0
  if(!c->timeout) {
5981
0
    sldns_buffer_free(c->buffer);
5982
0
    free(c->ev);
5983
0
    free(c);
5984
0
    return NULL;
5985
0
  }
5986
0
  c->tcp_is_reading = 0;
5987
0
  c->tcp_byte_count = 0;
5988
0
  c->tcp_parent = parent;
5989
0
  c->tcp_timeout_msec = parent->tcp_timeout_msec;
5990
0
  c->tcp_conn_limit = parent->tcp_conn_limit;
5991
0
  c->tcl_addr = NULL;
5992
0
  c->tcp_keepalive = 0;
5993
0
  c->max_tcp_count = 0;
5994
0
  c->cur_tcp_count = 0;
5995
0
  c->tcp_handlers = NULL;
5996
0
  c->tcp_free = NULL;
5997
0
  c->is_in_tcp_free = 0;
5998
0
  c->type = comm_tcp;
5999
0
  c->tcp_do_close = 0;
6000
0
  c->do_not_close = 0;
6001
0
  c->tcp_do_toggle_rw = 1;
6002
0
  c->tcp_check_nb_connect = 0;
6003
#ifdef USE_MSG_FASTOPEN
6004
  c->tcp_do_fastopen = 0;
6005
#endif
6006
#ifdef USE_DNSCRYPT
6007
  c->dnscrypt = 0;
6008
  /* We don't know just yet if this is a dnscrypt channel. Allocation
6009
   * will be done when handling the callback. */
6010
  c->dnscrypt_buffer = c->buffer;
6011
#endif
6012
0
  c->repinfo.c = c;
6013
0
  c->callback = callback;
6014
0
  c->cb_arg = callback_arg;
6015
0
  c->socket = socket;
6016
0
  c->pp2_enabled = parent->pp2_enabled;
6017
0
  c->pp2_header_state = pp2_header_none;
6018
0
  if(spoolbuf) {
6019
0
    c->tcp_req_info = tcp_req_info_create(spoolbuf);
6020
0
    if(!c->tcp_req_info) {
6021
0
      log_err("could not create tcp commpoint");
6022
0
      sldns_buffer_free(c->buffer);
6023
0
      free(c->timeout);
6024
0
      free(c->ev);
6025
0
      free(c);
6026
0
      return NULL;
6027
0
    }
6028
0
    c->tcp_req_info->cp = c;
6029
0
    c->tcp_do_close = 1;
6030
0
    c->tcp_do_toggle_rw = 0;
6031
0
  }
6032
  /* add to parent free list */
6033
0
  c->tcp_free = parent->tcp_free;
6034
0
  parent->tcp_free = c;
6035
0
  c->is_in_tcp_free = 1;
6036
  /* ub_event stuff */
6037
0
  evbits = UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT;
6038
0
  c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
6039
0
    comm_point_tcp_handle_callback, c);
6040
0
  if(c->ev->ev == NULL)
6041
0
  {
6042
0
    log_err("could not basetset tcphdl event");
6043
0
    parent->tcp_free = c->tcp_free;
6044
0
    tcp_req_info_delete(c->tcp_req_info);
6045
0
    sldns_buffer_free(c->buffer);
6046
0
    free(c->timeout);
6047
0
    free(c->ev);
6048
0
    free(c);
6049
0
    return NULL;
6050
0
  }
6051
0
  return c;
6052
0
}
6053
6054
static struct comm_point*
6055
comm_point_create_http_handler(struct comm_base *base,
6056
  struct comm_point* parent, size_t bufsize, int harden_large_queries,
6057
  uint32_t http_max_streams, char* http_endpoint,
6058
  comm_point_callback_type* callback, void* callback_arg,
6059
  struct unbound_socket* socket)
6060
0
{
6061
0
  struct comm_point* c = (struct comm_point*)calloc(1,
6062
0
    sizeof(struct comm_point));
6063
0
  short evbits;
6064
0
  if(!c)
6065
0
    return NULL;
6066
0
  c->ev = (struct internal_event*)calloc(1,
6067
0
    sizeof(struct internal_event));
6068
0
  if(!c->ev) {
6069
0
    free(c);
6070
0
    return NULL;
6071
0
  }
6072
0
  c->ev->base = base;
6073
0
  c->fd = -1;
6074
0
  c->buffer = sldns_buffer_new(bufsize);
6075
0
  if(!c->buffer) {
6076
0
    free(c->ev);
6077
0
    free(c);
6078
0
    return NULL;
6079
0
  }
6080
0
  c->timeout = (struct timeval*)malloc(sizeof(struct timeval));
6081
0
  if(!c->timeout) {
6082
0
    sldns_buffer_free(c->buffer);
6083
0
    free(c->ev);
6084
0
    free(c);
6085
0
    return NULL;
6086
0
  }
6087
0
  c->tcp_is_reading = 0;
6088
0
  c->tcp_byte_count = 0;
6089
0
  c->tcp_parent = parent;
6090
0
  c->tcp_timeout_msec = parent->tcp_timeout_msec;
6091
0
  c->tcp_conn_limit = parent->tcp_conn_limit;
6092
0
  c->tcl_addr = NULL;
6093
0
  c->tcp_keepalive = 0;
6094
0
  c->max_tcp_count = 0;
6095
0
  c->cur_tcp_count = 0;
6096
0
  c->tcp_handlers = NULL;
6097
0
  c->tcp_free = NULL;
6098
0
  c->is_in_tcp_free = 0;
6099
0
  c->type = comm_http;
6100
0
  c->tcp_do_close = 1;
6101
0
  c->do_not_close = 0;
6102
0
  c->tcp_do_toggle_rw = 1; /* will be set to 0 after http2 upgrade */
6103
0
  c->tcp_check_nb_connect = 0;
6104
#ifdef USE_MSG_FASTOPEN
6105
  c->tcp_do_fastopen = 0;
6106
#endif
6107
#ifdef USE_DNSCRYPT
6108
  c->dnscrypt = 0;
6109
  c->dnscrypt_buffer = NULL;
6110
#endif
6111
0
  c->repinfo.c = c;
6112
0
  c->callback = callback;
6113
0
  c->cb_arg = callback_arg;
6114
0
  c->socket = socket;
6115
0
  c->pp2_enabled = 0;
6116
0
  c->pp2_header_state = pp2_header_none;
6117
6118
0
  c->http_min_version = http_version_2;
6119
0
  c->http2_stream_max_qbuffer_size = bufsize;
6120
0
  if(harden_large_queries && bufsize > 512)
6121
0
    c->http2_stream_max_qbuffer_size = 512;
6122
0
  c->http2_max_streams = http_max_streams;
6123
0
  if(!(c->http_endpoint = strdup(http_endpoint))) {
6124
0
    log_err("could not strdup http_endpoint");
6125
0
    sldns_buffer_free(c->buffer);
6126
0
    free(c->timeout);
6127
0
    free(c->ev);
6128
0
    free(c);
6129
0
    return NULL;
6130
0
  }
6131
0
  c->use_h2 = 0;
6132
#ifdef HAVE_NGHTTP2
6133
  if(!(c->h2_session = http2_session_create(c))) {
6134
    log_err("could not create http2 session");
6135
    free(c->http_endpoint);
6136
    sldns_buffer_free(c->buffer);
6137
    free(c->timeout);
6138
    free(c->ev);
6139
    free(c);
6140
    return NULL;
6141
  }
6142
  if(!(c->h2_session->callbacks = http2_req_callbacks_create())) {
6143
    log_err("could not create http2 callbacks");
6144
    http2_session_delete(c->h2_session);
6145
    free(c->http_endpoint);
6146
    sldns_buffer_free(c->buffer);
6147
    free(c->timeout);
6148
    free(c->ev);
6149
    free(c);
6150
    return NULL;
6151
  }
6152
#endif
6153
6154
  /* add to parent free list */
6155
0
  c->tcp_free = parent->tcp_free;
6156
0
  parent->tcp_free = c;
6157
0
  c->is_in_tcp_free = 1;
6158
  /* ub_event stuff */
6159
0
  evbits = UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT;
6160
0
  c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
6161
0
    comm_point_http_handle_callback, c);
6162
0
  if(c->ev->ev == NULL)
6163
0
  {
6164
0
    log_err("could not set http handler event");
6165
0
    parent->tcp_free = c->tcp_free;
6166
0
    http2_session_delete(c->h2_session);
6167
0
    sldns_buffer_free(c->buffer);
6168
0
    free(c->timeout);
6169
0
    free(c->ev);
6170
0
    free(c);
6171
0
    return NULL;
6172
0
  }
6173
0
  return c;
6174
0
}
6175
6176
struct comm_point*
6177
comm_point_create_tcp(struct comm_base *base, int fd, int num,
6178
  int idle_timeout, int harden_large_queries,
6179
  uint32_t http_max_streams, char* http_endpoint,
6180
  struct tcl_list* tcp_conn_limit, size_t bufsize,
6181
  struct sldns_buffer* spoolbuf, enum listen_type port_type,
6182
  int pp2_enabled, comm_point_callback_type* callback,
6183
  void* callback_arg, struct unbound_socket* socket)
6184
0
{
6185
0
  struct comm_point* c = (struct comm_point*)calloc(1,
6186
0
    sizeof(struct comm_point));
6187
0
  short evbits;
6188
0
  int i;
6189
  /* first allocate the TCP accept listener */
6190
0
  if(!c)
6191
0
    return NULL;
6192
0
  c->ev = (struct internal_event*)calloc(1,
6193
0
    sizeof(struct internal_event));
6194
0
  if(!c->ev) {
6195
0
    free(c);
6196
0
    return NULL;
6197
0
  }
6198
0
  c->ev->base = base;
6199
0
  c->fd = fd;
6200
0
  c->buffer = NULL;
6201
0
  c->timeout = NULL;
6202
0
  c->tcp_is_reading = 0;
6203
0
  c->tcp_byte_count = 0;
6204
0
  c->tcp_timeout_msec = idle_timeout;
6205
0
  c->tcp_conn_limit = tcp_conn_limit;
6206
0
  c->tcl_addr = NULL;
6207
0
  c->tcp_keepalive = 0;
6208
0
  c->tcp_parent = NULL;
6209
0
  c->max_tcp_count = num;
6210
0
  c->cur_tcp_count = 0;
6211
0
  c->tcp_handlers = (struct comm_point**)calloc((size_t)num,
6212
0
    sizeof(struct comm_point*));
6213
0
  if(!c->tcp_handlers) {
6214
0
    free(c->ev);
6215
0
    free(c);
6216
0
    return NULL;
6217
0
  }
6218
0
  c->tcp_free = NULL;
6219
0
  c->is_in_tcp_free = 0;
6220
0
  c->type = comm_tcp_accept;
6221
0
  c->tcp_do_close = 0;
6222
0
  c->do_not_close = 0;
6223
0
  c->tcp_do_toggle_rw = 0;
6224
0
  c->tcp_check_nb_connect = 0;
6225
#ifdef USE_MSG_FASTOPEN
6226
  c->tcp_do_fastopen = 0;
6227
#endif
6228
#ifdef USE_DNSCRYPT
6229
  c->dnscrypt = 0;
6230
  c->dnscrypt_buffer = NULL;
6231
#endif
6232
0
  c->callback = NULL;
6233
0
  c->cb_arg = NULL;
6234
0
  c->socket = socket;
6235
0
  c->pp2_enabled = (port_type==listen_type_http?0:pp2_enabled);
6236
0
  c->pp2_header_state = pp2_header_none;
6237
0
  evbits = UB_EV_READ | UB_EV_PERSIST;
6238
  /* ub_event stuff */
6239
0
  c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
6240
0
    comm_point_tcp_accept_callback, c);
6241
0
  if(c->ev->ev == NULL) {
6242
0
    log_err("could not baseset tcpacc event");
6243
0
    comm_point_delete(c);
6244
0
    return NULL;
6245
0
  }
6246
0
  if (ub_event_add(c->ev->ev, c->timeout) != 0) {
6247
0
    log_err("could not add tcpacc event");
6248
0
    comm_point_delete(c);
6249
0
    return NULL;
6250
0
  }
6251
0
  c->event_added = 1;
6252
  /* now prealloc the handlers */
6253
0
  for(i=0; i<num; i++) {
6254
0
    if(port_type == listen_type_tcp ||
6255
0
      port_type == listen_type_ssl ||
6256
0
      port_type == listen_type_tcp_dnscrypt) {
6257
0
      c->tcp_handlers[i] = comm_point_create_tcp_handler(base,
6258
0
        c, bufsize, spoolbuf, callback, callback_arg, socket);
6259
0
    } else if(port_type == listen_type_http) {
6260
0
      c->tcp_handlers[i] = comm_point_create_http_handler(
6261
0
        base, c, bufsize, harden_large_queries,
6262
0
        http_max_streams, http_endpoint,
6263
0
        callback, callback_arg, socket);
6264
0
    }
6265
0
    else {
6266
0
      log_err("could not create tcp handler, unknown listen "
6267
0
        "type");
6268
0
      return NULL;
6269
0
    }
6270
0
    if(!c->tcp_handlers[i]) {
6271
0
      comm_point_delete(c);
6272
0
      return NULL;
6273
0
    }
6274
0
  }
6275
6276
0
  return c;
6277
0
}
6278
6279
struct comm_point*
6280
comm_point_create_tcp_out(struct comm_base *base, size_t bufsize,
6281
        comm_point_callback_type* callback, void* callback_arg)
6282
0
{
6283
0
  struct comm_point* c = (struct comm_point*)calloc(1,
6284
0
    sizeof(struct comm_point));
6285
0
  short evbits;
6286
0
  if(!c)
6287
0
    return NULL;
6288
0
  c->ev = (struct internal_event*)calloc(1,
6289
0
    sizeof(struct internal_event));
6290
0
  if(!c->ev) {
6291
0
    free(c);
6292
0
    return NULL;
6293
0
  }
6294
0
  c->ev->base = base;
6295
0
  c->fd = -1;
6296
0
  c->buffer = sldns_buffer_new(bufsize);
6297
0
  if(!c->buffer) {
6298
0
    free(c->ev);
6299
0
    free(c);
6300
0
    return NULL;
6301
0
  }
6302
0
  c->timeout = NULL;
6303
0
  c->tcp_is_reading = 0;
6304
0
  c->tcp_byte_count = 0;
6305
0
  c->tcp_timeout_msec = TCP_QUERY_TIMEOUT;
6306
0
  c->tcp_conn_limit = NULL;
6307
0
  c->tcl_addr = NULL;
6308
0
  c->tcp_keepalive = 0;
6309
0
  c->tcp_parent = NULL;
6310
0
  c->max_tcp_count = 0;
6311
0
  c->cur_tcp_count = 0;
6312
0
  c->tcp_handlers = NULL;
6313
0
  c->tcp_free = NULL;
6314
0
  c->is_in_tcp_free = 0;
6315
0
  c->type = comm_tcp;
6316
0
  c->tcp_do_close = 0;
6317
0
  c->do_not_close = 0;
6318
0
  c->tcp_do_toggle_rw = 1;
6319
0
  c->tcp_check_nb_connect = 1;
6320
#ifdef USE_MSG_FASTOPEN
6321
  c->tcp_do_fastopen = 1;
6322
#endif
6323
#ifdef USE_DNSCRYPT
6324
  c->dnscrypt = 0;
6325
  c->dnscrypt_buffer = c->buffer;
6326
#endif
6327
0
  c->repinfo.c = c;
6328
0
  c->callback = callback;
6329
0
  c->cb_arg = callback_arg;
6330
0
  c->pp2_enabled = 0;
6331
0
  c->pp2_header_state = pp2_header_none;
6332
0
  evbits = UB_EV_PERSIST | UB_EV_WRITE;
6333
0
  c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
6334
0
    comm_point_tcp_handle_callback, c);
6335
0
  if(c->ev->ev == NULL)
6336
0
  {
6337
0
    log_err("could not baseset tcpout event");
6338
0
    sldns_buffer_free(c->buffer);
6339
0
    free(c->ev);
6340
0
    free(c);
6341
0
    return NULL;
6342
0
  }
6343
6344
0
  return c;
6345
0
}
6346
6347
struct comm_point*
6348
comm_point_create_http_out(struct comm_base *base, size_t bufsize,
6349
        comm_point_callback_type* callback, void* callback_arg,
6350
  sldns_buffer* temp)
6351
0
{
6352
0
  struct comm_point* c = (struct comm_point*)calloc(1,
6353
0
    sizeof(struct comm_point));
6354
0
  short evbits;
6355
0
  if(!c)
6356
0
    return NULL;
6357
0
  c->ev = (struct internal_event*)calloc(1,
6358
0
    sizeof(struct internal_event));
6359
0
  if(!c->ev) {
6360
0
    free(c);
6361
0
    return NULL;
6362
0
  }
6363
0
  c->ev->base = base;
6364
0
  c->fd = -1;
6365
0
  c->buffer = sldns_buffer_new(bufsize);
6366
0
  if(!c->buffer) {
6367
0
    free(c->ev);
6368
0
    free(c);
6369
0
    return NULL;
6370
0
  }
6371
0
  c->timeout = NULL;
6372
0
  c->tcp_is_reading = 0;
6373
0
  c->tcp_byte_count = 0;
6374
0
  c->tcp_parent = NULL;
6375
0
  c->max_tcp_count = 0;
6376
0
  c->cur_tcp_count = 0;
6377
0
  c->tcp_handlers = NULL;
6378
0
  c->tcp_free = NULL;
6379
0
  c->is_in_tcp_free = 0;
6380
0
  c->type = comm_http;
6381
0
  c->tcp_do_close = 0;
6382
0
  c->do_not_close = 0;
6383
0
  c->tcp_do_toggle_rw = 1;
6384
0
  c->tcp_check_nb_connect = 1;
6385
0
  c->http_in_headers = 1;
6386
0
  c->http_in_chunk_headers = 0;
6387
0
  c->http_is_chunked = 0;
6388
0
  c->http_temp = temp;
6389
#ifdef USE_MSG_FASTOPEN
6390
  c->tcp_do_fastopen = 1;
6391
#endif
6392
#ifdef USE_DNSCRYPT
6393
  c->dnscrypt = 0;
6394
  c->dnscrypt_buffer = c->buffer;
6395
#endif
6396
0
  c->repinfo.c = c;
6397
0
  c->callback = callback;
6398
0
  c->cb_arg = callback_arg;
6399
0
  c->pp2_enabled = 0;
6400
0
  c->pp2_header_state = pp2_header_none;
6401
0
  evbits = UB_EV_PERSIST | UB_EV_WRITE;
6402
0
  c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
6403
0
    comm_point_http_handle_callback, c);
6404
0
  if(c->ev->ev == NULL)
6405
0
  {
6406
0
    log_err("could not baseset tcpout event");
6407
0
#ifdef HAVE_SSL
6408
0
    SSL_free(c->ssl);
6409
0
#endif
6410
0
    sldns_buffer_free(c->buffer);
6411
0
    free(c->ev);
6412
0
    free(c);
6413
0
    return NULL;
6414
0
  }
6415
6416
0
  return c;
6417
0
}
6418
6419
struct comm_point*
6420
comm_point_create_local(struct comm_base *base, int fd, size_t bufsize,
6421
        comm_point_callback_type* callback, void* callback_arg)
6422
0
{
6423
0
  struct comm_point* c = (struct comm_point*)calloc(1,
6424
0
    sizeof(struct comm_point));
6425
0
  short evbits;
6426
0
  if(!c)
6427
0
    return NULL;
6428
0
  c->ev = (struct internal_event*)calloc(1,
6429
0
    sizeof(struct internal_event));
6430
0
  if(!c->ev) {
6431
0
    free(c);
6432
0
    return NULL;
6433
0
  }
6434
0
  c->ev->base = base;
6435
0
  c->fd = fd;
6436
0
  c->buffer = sldns_buffer_new(bufsize);
6437
0
  if(!c->buffer) {
6438
0
    free(c->ev);
6439
0
    free(c);
6440
0
    return NULL;
6441
0
  }
6442
0
  c->timeout = NULL;
6443
0
  c->tcp_is_reading = 1;
6444
0
  c->tcp_byte_count = 0;
6445
0
  c->tcp_parent = NULL;
6446
0
  c->max_tcp_count = 0;
6447
0
  c->cur_tcp_count = 0;
6448
0
  c->tcp_handlers = NULL;
6449
0
  c->tcp_free = NULL;
6450
0
  c->is_in_tcp_free = 0;
6451
0
  c->type = comm_local;
6452
0
  c->tcp_do_close = 0;
6453
0
  c->do_not_close = 1;
6454
0
  c->tcp_do_toggle_rw = 0;
6455
0
  c->tcp_check_nb_connect = 0;
6456
#ifdef USE_MSG_FASTOPEN
6457
  c->tcp_do_fastopen = 0;
6458
#endif
6459
#ifdef USE_DNSCRYPT
6460
  c->dnscrypt = 0;
6461
  c->dnscrypt_buffer = c->buffer;
6462
#endif
6463
0
  c->callback = callback;
6464
0
  c->cb_arg = callback_arg;
6465
0
  c->pp2_enabled = 0;
6466
0
  c->pp2_header_state = pp2_header_none;
6467
  /* ub_event stuff */
6468
0
  evbits = UB_EV_PERSIST | UB_EV_READ;
6469
0
  c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
6470
0
    comm_point_local_handle_callback, c);
6471
0
  if(c->ev->ev == NULL) {
6472
0
    log_err("could not baseset localhdl event");
6473
0
    free(c->ev);
6474
0
    free(c);
6475
0
    return NULL;
6476
0
  }
6477
0
  if (ub_event_add(c->ev->ev, c->timeout) != 0) {
6478
0
    log_err("could not add localhdl event");
6479
0
    ub_event_free(c->ev->ev);
6480
0
    free(c->ev);
6481
0
    free(c);
6482
0
    return NULL;
6483
0
  }
6484
0
  c->event_added = 1;
6485
0
  return c;
6486
0
}
6487
6488
struct comm_point*
6489
comm_point_create_raw(struct comm_base* base, int fd, int writing,
6490
  comm_point_callback_type* callback, void* callback_arg)
6491
0
{
6492
0
  struct comm_point* c = (struct comm_point*)calloc(1,
6493
0
    sizeof(struct comm_point));
6494
0
  short evbits;
6495
0
  if(!c)
6496
0
    return NULL;
6497
0
  c->ev = (struct internal_event*)calloc(1,
6498
0
    sizeof(struct internal_event));
6499
0
  if(!c->ev) {
6500
0
    free(c);
6501
0
    return NULL;
6502
0
  }
6503
0
  c->ev->base = base;
6504
0
  c->fd = fd;
6505
0
  c->buffer = NULL;
6506
0
  c->timeout = NULL;
6507
0
  c->tcp_is_reading = 0;
6508
0
  c->tcp_byte_count = 0;
6509
0
  c->tcp_parent = NULL;
6510
0
  c->max_tcp_count = 0;
6511
0
  c->cur_tcp_count = 0;
6512
0
  c->tcp_handlers = NULL;
6513
0
  c->tcp_free = NULL;
6514
0
  c->is_in_tcp_free = 0;
6515
0
  c->type = comm_raw;
6516
0
  c->tcp_do_close = 0;
6517
0
  c->do_not_close = 1;
6518
0
  c->tcp_do_toggle_rw = 0;
6519
0
  c->tcp_check_nb_connect = 0;
6520
#ifdef USE_MSG_FASTOPEN
6521
  c->tcp_do_fastopen = 0;
6522
#endif
6523
#ifdef USE_DNSCRYPT
6524
  c->dnscrypt = 0;
6525
  c->dnscrypt_buffer = c->buffer;
6526
#endif
6527
0
  c->callback = callback;
6528
0
  c->cb_arg = callback_arg;
6529
0
  c->pp2_enabled = 0;
6530
0
  c->pp2_header_state = pp2_header_none;
6531
  /* ub_event stuff */
6532
0
  if(writing)
6533
0
    evbits = UB_EV_PERSIST | UB_EV_WRITE;
6534
0
  else  evbits = UB_EV_PERSIST | UB_EV_READ;
6535
0
  c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
6536
0
    comm_point_raw_handle_callback, c);
6537
0
  if(c->ev->ev == NULL) {
6538
0
    log_err("could not baseset rawhdl event");
6539
0
    free(c->ev);
6540
0
    free(c);
6541
0
    return NULL;
6542
0
  }
6543
0
  if (ub_event_add(c->ev->ev, c->timeout) != 0) {
6544
0
    log_err("could not add rawhdl event");
6545
0
    ub_event_free(c->ev->ev);
6546
0
    free(c->ev);
6547
0
    free(c);
6548
0
    return NULL;
6549
0
  }
6550
0
  c->event_added = 1;
6551
0
  return c;
6552
0
}
6553
6554
void
6555
comm_point_close(struct comm_point* c)
6556
0
{
6557
0
  if(!c)
6558
0
    return;
6559
0
  if(c->fd != -1) {
6560
0
    verbose(5, "comm_point_close of %d: event_del", c->fd);
6561
0
    if(c->event_added) {
6562
0
      if(ub_event_del(c->ev->ev) != 0) {
6563
0
        log_err("could not event_del on close");
6564
0
      }
6565
0
      c->event_added = 0;
6566
0
    }
6567
0
  }
6568
0
  tcl_close_connection(c->tcl_addr);
6569
0
  if(c->tcp_req_info)
6570
0
    tcp_req_info_clear(c->tcp_req_info);
6571
0
  if(c->h2_session)
6572
0
    http2_session_server_delete(c->h2_session);
6573
  /* stop the comm point from reading or writing after it is closed. */
6574
0
  if(c->tcp_more_read_again && *c->tcp_more_read_again)
6575
0
    *c->tcp_more_read_again = 0;
6576
0
  if(c->tcp_more_write_again && *c->tcp_more_write_again)
6577
0
    *c->tcp_more_write_again = 0;
6578
6579
  /* close fd after removing from event lists, or epoll.. is messed up */
6580
0
  if(c->fd != -1 && !c->do_not_close) {
6581
#ifdef USE_WINSOCK
6582
    if(c->type == comm_tcp || c->type == comm_http) {
6583
      /* delete sticky events for the fd, it gets closed */
6584
      ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
6585
      ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
6586
    }
6587
#endif
6588
0
    verbose(VERB_ALGO, "close fd %d", c->fd);
6589
0
    sock_close(c->fd);
6590
0
  }
6591
0
  c->fd = -1;
6592
0
}
6593
6594
void
6595
comm_point_delete(struct comm_point* c)
6596
0
{
6597
0
  if(!c)
6598
0
    return;
6599
0
  if((c->type == comm_tcp || c->type == comm_http) && c->ssl) {
6600
0
#ifdef HAVE_SSL
6601
0
    SSL_shutdown(c->ssl);
6602
0
    SSL_free(c->ssl);
6603
0
#endif
6604
0
  }
6605
0
  if(c->type == comm_http && c->http_endpoint) {
6606
0
    free(c->http_endpoint);
6607
0
    c->http_endpoint = NULL;
6608
0
  }
6609
0
  comm_point_close(c);
6610
0
  if(c->tcp_handlers) {
6611
0
    int i;
6612
0
    for(i=0; i<c->max_tcp_count; i++)
6613
0
      comm_point_delete(c->tcp_handlers[i]);
6614
0
    free(c->tcp_handlers);
6615
0
  }
6616
0
  free(c->timeout);
6617
0
  if(c->type == comm_tcp || c->type == comm_local || c->type == comm_http) {
6618
0
    sldns_buffer_free(c->buffer);
6619
#ifdef USE_DNSCRYPT
6620
    if(c->dnscrypt && c->dnscrypt_buffer != c->buffer) {
6621
      sldns_buffer_free(c->dnscrypt_buffer);
6622
    }
6623
#endif
6624
0
    if(c->tcp_req_info) {
6625
0
      tcp_req_info_delete(c->tcp_req_info);
6626
0
    }
6627
0
    if(c->h2_session) {
6628
0
      http2_session_delete(c->h2_session);
6629
0
    }
6630
0
  }
6631
#ifdef HAVE_NGTCP2
6632
  if(c->doq_socket)
6633
    doq_server_socket_delete(c->doq_socket);
6634
#endif
6635
0
  ub_event_free(c->ev->ev);
6636
0
  free(c->ev);
6637
0
  free(c);
6638
0
}
6639
6640
#ifdef USE_DNSTAP
6641
static void
6642
send_reply_dnstap(struct dt_env* dtenv,
6643
  struct sockaddr* addr, socklen_t addrlen,
6644
  struct sockaddr_storage* client_addr, socklen_t client_addrlen,
6645
  enum comm_point_type type, void* ssl, sldns_buffer* buffer)
6646
{
6647
  log_addr(VERB_ALGO, "from local addr", (void*)addr, addrlen);
6648
  log_addr(VERB_ALGO, "response to client", client_addr, client_addrlen);
6649
  dt_msg_send_client_response(dtenv, client_addr,
6650
    (struct sockaddr_storage*)addr, type, ssl, buffer);
6651
}
6652
#endif
6653
6654
void
6655
comm_point_send_reply(struct comm_reply *repinfo)
6656
0
{
6657
0
  struct sldns_buffer* buffer;
6658
0
  log_assert(repinfo && repinfo->c);
6659
#ifdef USE_DNSCRYPT
6660
  buffer = repinfo->c->dnscrypt_buffer;
6661
  if(!dnsc_handle_uncurved_request(repinfo)) {
6662
    return;
6663
  }
6664
#else
6665
0
  buffer = repinfo->c->buffer;
6666
0
#endif
6667
0
  if(repinfo->c->type == comm_udp) {
6668
0
    if(repinfo->srctype)
6669
0
      comm_point_send_udp_msg_if(repinfo->c, buffer,
6670
0
        (struct sockaddr*)&repinfo->remote_addr,
6671
0
        repinfo->remote_addrlen, repinfo);
6672
0
    else
6673
0
      comm_point_send_udp_msg(repinfo->c, buffer,
6674
0
        (struct sockaddr*)&repinfo->remote_addr,
6675
0
        repinfo->remote_addrlen, 0);
6676
#ifdef USE_DNSTAP
6677
    /*
6678
     * sending src (client)/dst (local service) addresses over
6679
     * DNSTAP from udp callback
6680
     */
6681
    if(repinfo->c->dtenv != NULL && repinfo->c->dtenv->log_client_response_messages) {
6682
      send_reply_dnstap(repinfo->c->dtenv,
6683
        repinfo->c->socket->addr,
6684
        repinfo->c->socket->addrlen,
6685
        &repinfo->client_addr, repinfo->client_addrlen,
6686
        repinfo->c->type, repinfo->c->ssl,
6687
        repinfo->c->buffer);
6688
    }
6689
#endif
6690
0
  } else {
6691
#ifdef USE_DNSTAP
6692
    struct dt_env* dtenv =
6693
#ifdef HAVE_NGTCP2
6694
      repinfo->c->doq_socket
6695
      ?repinfo->c->dtenv:
6696
#endif
6697
      repinfo->c->tcp_parent->dtenv;
6698
    struct sldns_buffer* dtbuffer = repinfo->c->tcp_req_info
6699
      ?repinfo->c->tcp_req_info->spool_buffer
6700
      :repinfo->c->buffer;
6701
#ifdef USE_DNSCRYPT
6702
    if(repinfo->c->dnscrypt && repinfo->is_dnscrypted)
6703
      dtbuffer = repinfo->c->buffer;
6704
#endif
6705
    /*
6706
     * sending src (client)/dst (local service) addresses over
6707
     * DNSTAP from other callbacks
6708
     */
6709
    if(dtenv != NULL && dtenv->log_client_response_messages) {
6710
      send_reply_dnstap(dtenv,
6711
        repinfo->c->socket->addr,
6712
        repinfo->c->socket->addrlen,
6713
        &repinfo->client_addr, repinfo->client_addrlen,
6714
        repinfo->c->type, repinfo->c->ssl,
6715
        dtbuffer);
6716
    }
6717
#endif
6718
0
    if(repinfo->c->tcp_req_info) {
6719
0
      tcp_req_info_send_reply(repinfo->c->tcp_req_info);
6720
0
    } else if(repinfo->c->use_h2) {
6721
0
      if(!http2_submit_dns_response(repinfo->c->h2_session)) {
6722
0
        comm_point_drop_reply(repinfo);
6723
0
        return;
6724
0
      }
6725
0
      repinfo->c->h2_stream = NULL;
6726
0
      repinfo->c->tcp_is_reading = 0;
6727
0
      comm_point_stop_listening(repinfo->c);
6728
0
      comm_point_start_listening(repinfo->c, -1,
6729
0
        adjusted_tcp_timeout(repinfo->c));
6730
0
      return;
6731
#ifdef HAVE_NGTCP2
6732
    } else if(repinfo->c->doq_socket) {
6733
      doq_socket_send_reply(repinfo);
6734
#endif
6735
0
    } else {
6736
0
      comm_point_start_listening(repinfo->c, -1,
6737
0
        adjusted_tcp_timeout(repinfo->c));
6738
0
    }
6739
0
  }
6740
0
}
6741
6742
void
6743
comm_point_drop_reply(struct comm_reply* repinfo)
6744
0
{
6745
0
  if(!repinfo)
6746
0
    return;
6747
0
  log_assert(repinfo->c);
6748
0
  log_assert(repinfo->c->type != comm_tcp_accept);
6749
0
  if(repinfo->c->type == comm_udp)
6750
0
    return;
6751
0
  if(repinfo->c->tcp_req_info)
6752
0
    repinfo->c->tcp_req_info->is_drop = 1;
6753
0
  if(repinfo->c->type == comm_http) {
6754
0
    if(repinfo->c->h2_session) {
6755
0
      repinfo->c->h2_session->is_drop = 1;
6756
0
      if(!repinfo->c->h2_session->postpone_drop)
6757
0
        reclaim_http_handler(repinfo->c);
6758
0
      return;
6759
0
    }
6760
0
    reclaim_http_handler(repinfo->c);
6761
0
    return;
6762
#ifdef HAVE_NGTCP2
6763
  } else if(repinfo->c->doq_socket) {
6764
    doq_socket_drop_reply(repinfo);
6765
    return;
6766
#endif
6767
0
  }
6768
0
  reclaim_tcp_handler(repinfo->c);
6769
0
}
6770
6771
void
6772
comm_point_stop_listening(struct comm_point* c)
6773
0
{
6774
0
  verbose(VERB_ALGO, "comm point stop listening %d", c->fd);
6775
0
  if(c->event_added) {
6776
0
    if(ub_event_del(c->ev->ev) != 0) {
6777
0
      log_err("event_del error to stoplisten");
6778
0
    }
6779
0
    c->event_added = 0;
6780
0
  }
6781
0
}
6782
6783
void
6784
comm_point_start_listening(struct comm_point* c, int newfd, int msec)
6785
0
{
6786
0
  verbose(VERB_ALGO, "comm point start listening %d (%d msec)",
6787
0
    c->fd==-1?newfd:c->fd, msec);
6788
0
  if(c->type == comm_tcp_accept && !c->tcp_free) {
6789
    /* no use to start listening no free slots. */
6790
0
    return;
6791
0
  }
6792
0
  if(c->event_added) {
6793
0
    if(ub_event_del(c->ev->ev) != 0) {
6794
0
      log_err("event_del error to startlisten");
6795
0
    }
6796
0
    c->event_added = 0;
6797
0
  }
6798
0
  if(msec != -1 && msec != 0) {
6799
0
    if(!c->timeout) {
6800
0
      c->timeout = (struct timeval*)malloc(sizeof(
6801
0
        struct timeval));
6802
0
      if(!c->timeout) {
6803
0
        log_err("cpsl: malloc failed. No net read.");
6804
0
        return;
6805
0
      }
6806
0
    }
6807
0
    ub_event_add_bits(c->ev->ev, UB_EV_TIMEOUT);
6808
0
#ifndef S_SPLINT_S /* splint fails on struct timeval. */
6809
0
    c->timeout->tv_sec = msec/1000;
6810
0
    c->timeout->tv_usec = (msec%1000)*1000;
6811
0
#endif /* S_SPLINT_S */
6812
0
  } else {
6813
0
    if(msec == 0 || !c->timeout) {
6814
0
      ub_event_del_bits(c->ev->ev, UB_EV_TIMEOUT);
6815
0
    }
6816
0
  }
6817
0
  if(c->type == comm_tcp || c->type == comm_http) {
6818
0
    ub_event_del_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE);
6819
0
    if(c->tcp_write_and_read) {
6820
0
      verbose(5, "startlistening %d mode rw", (newfd==-1?c->fd:newfd));
6821
0
      ub_event_add_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE);
6822
0
    } else if(c->tcp_is_reading) {
6823
0
      verbose(5, "startlistening %d mode r", (newfd==-1?c->fd:newfd));
6824
0
      ub_event_add_bits(c->ev->ev, UB_EV_READ);
6825
0
    } else  {
6826
0
      verbose(5, "startlistening %d mode w", (newfd==-1?c->fd:newfd));
6827
0
      ub_event_add_bits(c->ev->ev, UB_EV_WRITE);
6828
0
    }
6829
0
  }
6830
0
  if(newfd != -1) {
6831
0
    if(c->fd != -1 && c->fd != newfd) {
6832
0
      verbose(5, "cpsl close of fd %d for %d", c->fd, newfd);
6833
0
      sock_close(c->fd);
6834
0
    }
6835
0
    c->fd = newfd;
6836
0
    ub_event_set_fd(c->ev->ev, c->fd);
6837
0
  }
6838
0
  if(ub_event_add(c->ev->ev, msec==0?NULL:c->timeout) != 0) {
6839
0
    log_err("event_add failed. in cpsl.");
6840
0
    return;
6841
0
  }
6842
0
  c->event_added = 1;
6843
0
}
6844
6845
void comm_point_listen_for_rw(struct comm_point* c, int rd, int wr)
6846
0
{
6847
0
  verbose(VERB_ALGO, "comm point listen_for_rw %d %d", c->fd, wr);
6848
0
  if(c->event_added) {
6849
0
    if(ub_event_del(c->ev->ev) != 0) {
6850
0
      log_err("event_del error to cplf");
6851
0
    }
6852
0
    c->event_added = 0;
6853
0
  }
6854
0
  if(!c->timeout) {
6855
0
    ub_event_del_bits(c->ev->ev, UB_EV_TIMEOUT);
6856
0
  }
6857
0
  ub_event_del_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE);
6858
0
  if(rd) ub_event_add_bits(c->ev->ev, UB_EV_READ);
6859
0
  if(wr) ub_event_add_bits(c->ev->ev, UB_EV_WRITE);
6860
0
  if(ub_event_add(c->ev->ev, c->timeout) != 0) {
6861
0
    log_err("event_add failed. in cplf.");
6862
0
    return;
6863
0
  }
6864
0
  c->event_added = 1;
6865
0
}
6866
6867
size_t comm_point_get_mem(struct comm_point* c)
6868
0
{
6869
0
  size_t s;
6870
0
  if(!c)
6871
0
    return 0;
6872
0
  s = sizeof(*c) + sizeof(*c->ev);
6873
0
  if(c->timeout)
6874
0
    s += sizeof(*c->timeout);
6875
0
  if(c->type == comm_tcp || c->type == comm_local) {
6876
0
    s += sizeof(*c->buffer) + sldns_buffer_capacity(c->buffer);
6877
#ifdef USE_DNSCRYPT
6878
    s += sizeof(*c->dnscrypt_buffer);
6879
    if(c->buffer != c->dnscrypt_buffer) {
6880
      s += sldns_buffer_capacity(c->dnscrypt_buffer);
6881
    }
6882
#endif
6883
0
  }
6884
0
  if(c->type == comm_tcp_accept) {
6885
0
    int i;
6886
0
    for(i=0; i<c->max_tcp_count; i++)
6887
0
      s += comm_point_get_mem(c->tcp_handlers[i]);
6888
0
  }
6889
0
  return s;
6890
0
}
6891
6892
struct comm_timer*
6893
comm_timer_create(struct comm_base* base, void (*cb)(void*), void* cb_arg)
6894
0
{
6895
0
  struct internal_timer *tm = (struct internal_timer*)calloc(1,
6896
0
    sizeof(struct internal_timer));
6897
0
  if(!tm) {
6898
0
    log_err("malloc failed");
6899
0
    return NULL;
6900
0
  }
6901
0
  tm->super.ev_timer = tm;
6902
0
  tm->base = base;
6903
0
  tm->super.callback = cb;
6904
0
  tm->super.cb_arg = cb_arg;
6905
0
  tm->ev = ub_event_new(base->eb->base, -1, UB_EV_TIMEOUT,
6906
0
    comm_timer_callback, &tm->super);
6907
0
  if(tm->ev == NULL) {
6908
0
    log_err("timer_create: event_base_set failed.");
6909
0
    free(tm);
6910
0
    return NULL;
6911
0
  }
6912
0
  return &tm->super;
6913
0
}
6914
6915
void
6916
comm_timer_disable(struct comm_timer* timer)
6917
0
{
6918
0
  if(!timer)
6919
0
    return;
6920
0
  ub_timer_del(timer->ev_timer->ev);
6921
0
  timer->ev_timer->enabled = 0;
6922
0
}
6923
6924
void
6925
comm_timer_set(struct comm_timer* timer, struct timeval* tv)
6926
0
{
6927
0
  log_assert(tv);
6928
0
  if(timer->ev_timer->enabled)
6929
0
    comm_timer_disable(timer);
6930
0
  if(ub_timer_add(timer->ev_timer->ev, timer->ev_timer->base->eb->base,
6931
0
    comm_timer_callback, timer, tv) != 0)
6932
0
    log_err("comm_timer_set: evtimer_add failed.");
6933
0
  timer->ev_timer->enabled = 1;
6934
0
}
6935
6936
void
6937
comm_timer_delete(struct comm_timer* timer)
6938
0
{
6939
0
  if(!timer)
6940
0
    return;
6941
0
  comm_timer_disable(timer);
6942
  /* Free the sub struct timer->ev_timer derived from the super struct timer.
6943
   * i.e. assert(timer == timer->ev_timer)
6944
   */
6945
0
  ub_event_free(timer->ev_timer->ev);
6946
0
  free(timer->ev_timer);
6947
0
}
6948
6949
void
6950
comm_timer_callback(int ATTR_UNUSED(fd), short event, void* arg)
6951
0
{
6952
0
  struct comm_timer* tm = (struct comm_timer*)arg;
6953
0
  if(!(event&UB_EV_TIMEOUT))
6954
0
    return;
6955
0
  ub_comm_base_now(tm->ev_timer->base);
6956
0
  tm->ev_timer->enabled = 0;
6957
0
  fptr_ok(fptr_whitelist_comm_timer(tm->callback));
6958
0
  (*tm->callback)(tm->cb_arg);
6959
0
}
6960
6961
int
6962
comm_timer_is_set(struct comm_timer* timer)
6963
0
{
6964
0
  return (int)timer->ev_timer->enabled;
6965
0
}
6966
6967
size_t
6968
comm_timer_get_mem(struct comm_timer* timer)
6969
0
{
6970
0
  if(!timer) return 0;
6971
0
  return sizeof(struct internal_timer);
6972
0
}
6973
6974
struct comm_signal*
6975
comm_signal_create(struct comm_base* base,
6976
        void (*callback)(int, void*), void* cb_arg)
6977
0
{
6978
0
  struct comm_signal* com = (struct comm_signal*)malloc(
6979
0
    sizeof(struct comm_signal));
6980
0
  if(!com) {
6981
0
    log_err("malloc failed");
6982
0
    return NULL;
6983
0
  }
6984
0
  com->base = base;
6985
0
  com->callback = callback;
6986
0
  com->cb_arg = cb_arg;
6987
0
  com->ev_signal = NULL;
6988
0
  return com;
6989
0
}
6990
6991
void
6992
comm_signal_callback(int sig, short event, void* arg)
6993
0
{
6994
0
  struct comm_signal* comsig = (struct comm_signal*)arg;
6995
0
  if(!(event & UB_EV_SIGNAL))
6996
0
    return;
6997
0
  ub_comm_base_now(comsig->base);
6998
0
  fptr_ok(fptr_whitelist_comm_signal(comsig->callback));
6999
0
  (*comsig->callback)(sig, comsig->cb_arg);
7000
0
}
7001
7002
int
7003
comm_signal_bind(struct comm_signal* comsig, int sig)
7004
0
{
7005
0
  struct internal_signal* entry = (struct internal_signal*)calloc(1,
7006
0
    sizeof(struct internal_signal));
7007
0
  if(!entry) {
7008
0
    log_err("malloc failed");
7009
0
    return 0;
7010
0
  }
7011
0
  log_assert(comsig);
7012
  /* add signal event */
7013
0
  entry->ev = ub_signal_new(comsig->base->eb->base, sig,
7014
0
    comm_signal_callback, comsig);
7015
0
  if(entry->ev == NULL) {
7016
0
    log_err("Could not create signal event");
7017
0
    free(entry);
7018
0
    return 0;
7019
0
  }
7020
0
  if(ub_signal_add(entry->ev, NULL) != 0) {
7021
0
    log_err("Could not add signal handler");
7022
0
    ub_event_free(entry->ev);
7023
0
    free(entry);
7024
0
    return 0;
7025
0
  }
7026
  /* link into list */
7027
0
  entry->next = comsig->ev_signal;
7028
0
  comsig->ev_signal = entry;
7029
0
  return 1;
7030
0
}
7031
7032
void
7033
comm_signal_delete(struct comm_signal* comsig)
7034
0
{
7035
0
  struct internal_signal* p, *np;
7036
0
  if(!comsig)
7037
0
    return;
7038
0
  p=comsig->ev_signal;
7039
0
  while(p) {
7040
0
    np = p->next;
7041
0
    ub_signal_del(p->ev);
7042
0
    ub_event_free(p->ev);
7043
0
    free(p);
7044
0
    p = np;
7045
0
  }
7046
0
  free(comsig);
7047
0
}