litespeed-quic/src/liblsquic/lsquic_send_ctl.c

4205 lines
135 KiB
C

/* Copyright (c) 2017 - 2022 LiteSpeed Technologies Inc. See LICENSE. */
/*
* lsquic_send_ctl.c -- Logic for sending and sent packets
*/
#include <assert.h>
#include <errno.h>
#include <inttypes.h>
#include <stdlib.h>
#include <string.h>
#include <sys/queue.h>
#include <openssl/rand.h>
#include "lsquic_types.h"
#include "lsquic_int_types.h"
#include "lsquic.h"
#include "lsquic_mm.h"
#include "lsquic_engine_public.h"
#include "lsquic_packet_common.h"
#include "lsquic_alarmset.h"
#include "lsquic_parse.h"
#include "lsquic_packet_in.h"
#include "lsquic_packet_out.h"
#include "lsquic_packet_resize.h"
#include "lsquic_senhist.h"
#include "lsquic_rtt.h"
#include "lsquic_cubic.h"
#include "lsquic_pacer.h"
#include "lsquic_bw_sampler.h"
#include "lsquic_minmax.h"
#include "lsquic_bbr.h"
#include "lsquic_adaptive_cc.h"
#include "lsquic_util.h"
#include "lsquic_sfcw.h"
#include "lsquic_varint.h"
#include "lsquic_hq.h"
#include "lsquic_hash.h"
#include "lsquic_stream.h"
#include "lsquic_ver_neg.h"
#include "lsquic_ev_log.h"
#include "lsquic_conn.h"
#include "lsquic_send_ctl.h"
#include "lsquic_conn_flow.h"
#include "lsquic_conn_public.h"
#include "lsquic_cong_ctl.h"
#include "lsquic_enc_sess.h"
#include "lsquic_malo.h"
#include "lsquic_attq.h"
#include "lsquic_http1x_if.h"
#include "lsqpack.h"
#include "lsquic_frab_list.h"
#include "lsquic_qdec_hdl.h"
#include "lsquic_crand.h"
#define LSQUIC_LOGGER_MODULE LSQLM_SENDCTL
#define LSQUIC_LOG_CONN_ID lsquic_conn_log_cid(ctl->sc_conn_pub->lconn)
#include "lsquic_logger.h"
#if __GNUC__
# define UNLIKELY(cond) __builtin_expect(cond, 0)
#else
# define UNLIKELY(cond) cond
#endif
#define MAX_RESUBMITTED_ON_RTO 2
#define MAX_RTO_BACKOFFS 10
#define DEFAULT_RETX_DELAY 500000 /* Microseconds */
#define MAX_RTO_DELAY 60000000 /* Microseconds */
#define MIN_RTO_DELAY 200000 /* Microseconds */
#define INITIAL_RTT 333333 /* Microseconds */
#define N_NACKS_BEFORE_RETX 3
#define CGP(ctl) ((struct cong_ctl *) (ctl)->sc_cong_ctl)
#define packet_out_total_sz(p) \
lsquic_packet_out_total_sz(ctl->sc_conn_pub->lconn, p)
#define packet_out_sent_sz(p) \
lsquic_packet_out_sent_sz(ctl->sc_conn_pub->lconn, p)
enum retx_mode {
RETX_MODE_HANDSHAKE,
RETX_MODE_LOSS,
RETX_MODE_TLP,
RETX_MODE_RTO,
};
static const char *const retx2str[] = {
[RETX_MODE_HANDSHAKE] = "RETX_MODE_HANDSHAKE",
[RETX_MODE_LOSS] = "RETX_MODE_LOSS",
[RETX_MODE_TLP] = "RETX_MODE_TLP",
[RETX_MODE_RTO] = "RETX_MODE_RTO",
};
#ifdef NDEBUG
#define MAX_BPQ_COUNT 10
#else
static unsigned MAX_BPQ_COUNT = 10;
void
lsquic_send_ctl_set_max_bpq_count (unsigned count) { MAX_BPQ_COUNT = count; }
#endif
static void
update_for_resending (lsquic_send_ctl_t *ctl, lsquic_packet_out_t *packet_out);
enum expire_filter { EXFI_ALL, EXFI_HSK, EXFI_LAST, };
static void
send_ctl_expire (struct lsquic_send_ctl *, enum packnum_space,
enum expire_filter);
static void
set_retx_alarm (struct lsquic_send_ctl *, enum packnum_space, lsquic_time_t);
static int
send_ctl_detect_losses (struct lsquic_send_ctl *, enum packnum_space,
lsquic_time_t time);
static unsigned
send_ctl_retx_bytes_out (const struct lsquic_send_ctl *ctl);
static unsigned
send_ctl_all_bytes_out (const struct lsquic_send_ctl *ctl);
static void
send_ctl_reschedule_poison (struct lsquic_send_ctl *ctl);
static int
send_ctl_can_send_pre_hsk (struct lsquic_send_ctl *ctl);
static int
send_ctl_can_send (struct lsquic_send_ctl *ctl);
static int
split_lost_packet (struct lsquic_send_ctl *, struct lsquic_packet_out *const);
#ifdef NDEBUG
static
#elif __GNUC__
__attribute__((weak))
#endif
int
lsquic_send_ctl_schedule_stream_packets_immediately (lsquic_send_ctl_t *ctl)
{
return !(ctl->sc_flags & SC_BUFFER_STREAM);
}
#ifdef NDEBUG
static
#elif __GNUC__
__attribute__((weak))
#endif
enum packno_bits
lsquic_send_ctl_guess_packno_bits (lsquic_send_ctl_t *ctl)
{
return PACKNO_BITS_1; /* This is 2 bytes in both GQUIC and IQUIC */
}
int
lsquic_send_ctl_have_unacked_stream_frames (const lsquic_send_ctl_t *ctl)
{
const lsquic_packet_out_t *packet_out;
TAILQ_FOREACH(packet_out, &ctl->sc_unacked_packets[PNS_APP], po_next)
if (0 == (packet_out->po_flags & (PO_LOSS_REC|PO_POISON))
&& (packet_out->po_frame_types &
((1 << QUIC_FRAME_STREAM) | (1 << QUIC_FRAME_RST_STREAM))))
return 1;
return 0;
}
static lsquic_packet_out_t *
send_ctl_first_unacked_retx_packet (const struct lsquic_send_ctl *ctl,
enum packnum_space pns)
{
lsquic_packet_out_t *packet_out;
TAILQ_FOREACH(packet_out, &ctl->sc_unacked_packets[pns], po_next)
if (0 == (packet_out->po_flags & (PO_LOSS_REC|PO_POISON))
&& (packet_out->po_frame_types & ctl->sc_retx_frames))
return packet_out;
return NULL;
}
int
lsquic_send_ctl_have_unacked_retx_data (const struct lsquic_send_ctl *ctl)
{
return lsquic_alarmset_is_set(ctl->sc_alset, AL_RETX_APP)
&& send_ctl_first_unacked_retx_packet(ctl, PNS_APP);
}
static lsquic_packet_out_t *
send_ctl_last_unacked_retx_packet (const struct lsquic_send_ctl *ctl,
enum packnum_space pns)
{
lsquic_packet_out_t *packet_out;
TAILQ_FOREACH_REVERSE(packet_out, &ctl->sc_unacked_packets[pns],
lsquic_packets_tailq, po_next)
if (0 == (packet_out->po_flags & (PO_LOSS_REC|PO_POISON))
&& (packet_out->po_frame_types & ctl->sc_retx_frames))
return packet_out;
return NULL;
}
static int
have_unacked_handshake_packets (const lsquic_send_ctl_t *ctl)
{
const lsquic_packet_out_t *packet_out;
enum packnum_space pns;
for (pns = ctl->sc_flags & SC_IETF ? PNS_INIT : PNS_APP; pns < N_PNS; ++pns)
TAILQ_FOREACH(packet_out, &ctl->sc_unacked_packets[pns], po_next)
if (packet_out->po_flags & PO_HELLO)
return 1;
return 0;
}
static enum retx_mode
get_retx_mode (const lsquic_send_ctl_t *ctl)
{
if (!(ctl->sc_conn_pub->lconn->cn_flags & LSCONN_HANDSHAKE_DONE)
&& have_unacked_handshake_packets(ctl))
return RETX_MODE_HANDSHAKE;
if (ctl->sc_loss_to)
return RETX_MODE_LOSS;
if (ctl->sc_n_tlp < 2)
return RETX_MODE_TLP;
return RETX_MODE_RTO;
}
static lsquic_time_t
get_retx_delay (const struct lsquic_rtt_stats *rtt_stats)
{
lsquic_time_t srtt, delay;
srtt = lsquic_rtt_stats_get_srtt(rtt_stats);
if (srtt)
{
delay = srtt + 4 * lsquic_rtt_stats_get_rttvar(rtt_stats);
if (delay < MIN_RTO_DELAY)
delay = MIN_RTO_DELAY;
}
else
delay = DEFAULT_RETX_DELAY;
return delay;
}
static lsquic_time_t
calculate_packet_rto (lsquic_send_ctl_t *ctl);
static void
retx_alarm_rings (enum alarm_id al_id, void *ctx, lsquic_time_t expiry, lsquic_time_t now)
{
lsquic_send_ctl_t *ctl = ctx;
struct lsquic_conn *const lconn = ctl->sc_conn_pub->lconn;
lsquic_packet_out_t *packet_out;
enum packnum_space pns;
enum retx_mode rm;
pns = al_id - AL_RETX_INIT;
/* This is a callback -- before it is called, the alarm is unset */
assert(!lsquic_alarmset_is_set(ctl->sc_alset, AL_RETX_INIT + pns));
rm = get_retx_mode(ctl);
LSQ_INFO("%s timeout, mode %s", lsquic_alid2str[al_id], retx2str[rm]);
switch (rm)
{
case RETX_MODE_HANDSHAKE:
send_ctl_expire(ctl, pns, EXFI_HSK);
/* Do not register cubic loss during handshake */
break;
case RETX_MODE_LOSS:
send_ctl_detect_losses(ctl, pns, now);
break;
case RETX_MODE_TLP:
ctl->sc_last_rto_time = now;
++ctl->sc_n_tlp;
send_ctl_expire(ctl, pns, EXFI_LAST);
break;
case RETX_MODE_RTO:
if ((ctl->sc_flags & SC_1RTT_ACKED)
|| now - ctl->sc_last_rto_time >= calculate_packet_rto(ctl))
{
ctl->sc_last_rto_time = now;
++ctl->sc_n_consec_rtos;
ctl->sc_next_limit = 2;
ctl->sc_ci->cci_timeout(CGP(ctl));
if (lconn->cn_if->ci_retx_timeout)
lconn->cn_if->ci_retx_timeout(lconn);
}
LSQ_DEBUG("packet RTO is %"PRIu64" (+%"PRIu64") usec, consec RTOs: %d",
expiry, now - expiry, ctl->sc_n_consec_rtos);
send_ctl_expire(ctl, pns, EXFI_ALL);
break;
}
packet_out = send_ctl_first_unacked_retx_packet(ctl, pns);
if (packet_out)
set_retx_alarm(ctl, pns, now);
lsquic_send_ctl_sanity_check(ctl);
}
static lsquic_packno_t
first_packno (const struct lsquic_send_ctl *ctl)
{
if (ctl->sc_flags & SC_IETF)
return 0;
else
return 1;
}
static void
send_ctl_pick_initial_packno (struct lsquic_send_ctl *ctl)
{
#ifndef NDEBUG
lsquic_packno_t packno;
const char *s;
if (!(ctl->sc_conn_pub->lconn->cn_flags & LSCONN_SERVER)
&& (s = getenv("LSQUIC_STARTING_PACKNO"), s != NULL))
{
packno = (lsquic_packno_t) strtoull(s, NULL, 10);
LSQ_DEBUG("starting sending packet numbers starting with %"PRIu64
" based on environment variable", packno);
ctl->sc_cur_packno = packno - 1;
}
else
#endif
ctl->sc_cur_packno = first_packno(ctl) - 1;
}
void
lsquic_send_ctl_init (lsquic_send_ctl_t *ctl, struct lsquic_alarmset *alset,
struct lsquic_engine_public *enpub, const struct ver_neg *ver_neg,
struct lsquic_conn_public *conn_pub, enum send_ctl_flags flags)
{
unsigned i;
memset(ctl, 0, sizeof(*ctl));
TAILQ_INIT(&ctl->sc_scheduled_packets);
TAILQ_INIT(&ctl->sc_unacked_packets[PNS_INIT]);
TAILQ_INIT(&ctl->sc_unacked_packets[PNS_HSK]);
TAILQ_INIT(&ctl->sc_unacked_packets[PNS_APP]);
TAILQ_INIT(&ctl->sc_lost_packets);
TAILQ_INIT(&ctl->sc_0rtt_stash);
ctl->sc_enpub = enpub;
ctl->sc_alset = alset;
ctl->sc_ver_neg = ver_neg;
ctl->sc_conn_pub = conn_pub;
assert(!(flags & ~(SC_IETF|SC_NSTP|SC_ECN)));
ctl->sc_flags = flags;
send_ctl_pick_initial_packno(ctl);
if (enpub->enp_settings.es_pace_packets)
ctl->sc_flags |= SC_PACE;
if (flags & SC_ECN)
ctl->sc_ecn = ECN_ECT0;
else
ctl->sc_ecn = ECN_NOT_ECT;
if (flags & SC_IETF)
ctl->sc_retx_frames = IQUIC_FRAME_RETX_MASK;
else
ctl->sc_retx_frames = GQUIC_FRAME_RETRANSMITTABLE_MASK;
lsquic_alarmset_init_alarm(alset, AL_RETX_INIT, retx_alarm_rings, ctl);
lsquic_alarmset_init_alarm(alset, AL_RETX_HSK, retx_alarm_rings, ctl);
lsquic_alarmset_init_alarm(alset, AL_RETX_APP, retx_alarm_rings, ctl);
lsquic_senhist_init(&ctl->sc_senhist, ctl->sc_flags & SC_IETF);
#ifndef NDEBUG
/* TODO: the logic to select the "previously sent" packno should not be
* duplicated here and in lsquic_senhist_init()...
*/
if (!(ctl->sc_conn_pub->lconn->cn_flags & LSCONN_SERVER))
ctl->sc_senhist.sh_last_sent = ctl->sc_cur_packno;
#endif
switch (enpub->enp_settings.es_cc_algo)
{
case 1:
ctl->sc_ci = &lsquic_cong_cubic_if;
ctl->sc_cong_ctl = &ctl->sc_adaptive_cc.acc_cubic;
break;
case 2:
ctl->sc_ci = &lsquic_cong_bbr_if;
ctl->sc_cong_ctl = &ctl->sc_adaptive_cc.acc_bbr;
break;
case 3:
default:
ctl->sc_ci = &lsquic_cong_adaptive_if;
ctl->sc_cong_ctl = &ctl->sc_adaptive_cc;
break;
}
ctl->sc_ci->cci_init(CGP(ctl), conn_pub, ctl->sc_retx_frames);
if (ctl->sc_flags & SC_PACE)
lsquic_pacer_init(&ctl->sc_pacer, conn_pub->lconn,
/* TODO: conn_pub has a pointer to enpub: drop third argument */
enpub->enp_settings.es_clock_granularity);
for (i = 0; i < sizeof(ctl->sc_buffered_packets) /
sizeof(ctl->sc_buffered_packets[0]); ++i)
TAILQ_INIT(&ctl->sc_buffered_packets[i].bpq_packets);
ctl->sc_max_packno_bits = PACKNO_BITS_2; /* Safe value before verneg */
ctl->sc_cached_bpt.stream_id = UINT64_MAX;
#if LSQUIC_EXTRA_CHECKS
ctl->sc_flags |= SC_SANITY_CHECK;
LSQ_DEBUG("sanity checks enabled");
#endif
ctl->sc_gap = UINT64_MAX - 1 /* Can't have +1 == 0 */;
if ((ctl->sc_conn_pub->lconn->cn_flags & (LSCONN_IETF|LSCONN_SERVER))
== (LSCONN_IETF|LSCONN_SERVER))
ctl->sc_can_send = send_ctl_can_send_pre_hsk;
else
ctl->sc_can_send = send_ctl_can_send;
ctl->sc_reord_thresh = N_NACKS_BEFORE_RETX;
#if LSQUIC_DEVEL
const char *s;
s = getenv("LSQUIC_DYN_PTHRESH");
if (s == NULL || atoi(s))
ctl->sc_flags |= SC_DYN_PTHRESH;
#endif
}
static lsquic_time_t
calculate_packet_rto (lsquic_send_ctl_t *ctl)
{
lsquic_time_t delay;
delay = get_retx_delay(&ctl->sc_conn_pub->rtt_stats);
unsigned exp = ctl->sc_n_consec_rtos;
if (exp > MAX_RTO_BACKOFFS)
exp = MAX_RTO_BACKOFFS;
delay = delay * (1 << exp);
return delay;
}
static lsquic_time_t
calculate_tlp_delay (lsquic_send_ctl_t *ctl)
{
lsquic_time_t srtt, delay;
srtt = lsquic_rtt_stats_get_srtt(&ctl->sc_conn_pub->rtt_stats);
if (!srtt)
srtt = INITIAL_RTT;
if (ctl->sc_n_in_flight_all > 1)
delay = 10000; /* 10 ms is the minimum tail loss probe delay */
else
delay = srtt + srtt / 2 + ctl->sc_conn_pub->max_peer_ack_usec;
if (delay < 2 * srtt)
delay = 2 * srtt;
return delay;
}
static void
set_retx_alarm (struct lsquic_send_ctl *ctl, enum packnum_space pns,
lsquic_time_t now)
{
enum retx_mode rm;
lsquic_time_t delay;
assert(!TAILQ_EMPTY(&ctl->sc_unacked_packets[pns]));
rm = get_retx_mode(ctl);
switch (rm)
{
case RETX_MODE_HANDSHAKE:
/* [draft-iyengar-quic-loss-recovery-01]:
*
* if (handshake packets are outstanding):
* alarm_duration = max(1.5 * smoothed_rtt, 10ms) << handshake_count;
* handshake_count++;
*/
delay = lsquic_rtt_stats_get_srtt(&ctl->sc_conn_pub->rtt_stats);
if (delay)
{
delay += delay / 2;
if (10000 > delay)
delay = 10000;
}
else
delay = 150000;
delay <<= ctl->sc_n_hsk;
++ctl->sc_n_hsk;
break;
case RETX_MODE_LOSS:
delay = ctl->sc_loss_to;
break;
case RETX_MODE_TLP:
delay = calculate_tlp_delay(ctl);
break;
default:
assert(rm == RETX_MODE_RTO);
/* XXX the comment below as well as the name of the function
* that follows seem obsolete.
*/
/* Base RTO on the first unacked packet, following reference
* implementation.
*/
delay = calculate_packet_rto(ctl);
break;
}
if (delay > MAX_RTO_DELAY)
delay = MAX_RTO_DELAY;
LSQ_DEBUG("set RETX_%s alarm to %"PRIu64" (%"PRIu64
"), mode %s", lsquic_pns2str[pns],
now + delay, delay, retx2str[rm]);
lsquic_alarmset_set(ctl->sc_alset, AL_RETX_INIT + pns, now + delay);
if (PNS_APP == pns
&& ctl->sc_ci == &lsquic_cong_bbr_if
&& lsquic_alarmset_is_inited(ctl->sc_alset, AL_PACK_TOL)
&& !lsquic_alarmset_is_set(ctl->sc_alset, AL_PACK_TOL))
lsquic_alarmset_set(ctl->sc_alset, AL_PACK_TOL, now + delay);
}
#define SC_PACK_SIZE(ctl_) (+(ctl_)->sc_conn_pub->path->np_pack_size)
/* XXX can we optimize this by caching the value of this function? It should
* not change within one tick.
*/
static lsquic_time_t
send_ctl_transfer_time (void *ctx)
{
lsquic_send_ctl_t *const ctl = ctx;
lsquic_time_t tx_time;
uint64_t pacing_rate;
int in_recovery;
in_recovery = send_ctl_in_recovery(ctl);
pacing_rate = ctl->sc_ci->cci_pacing_rate(CGP(ctl), in_recovery);
if (!pacing_rate)
pacing_rate = 1;
tx_time = (uint64_t) SC_PACK_SIZE(ctl) * 1000000 / pacing_rate;
return tx_time;
}
static void
send_ctl_unacked_append (struct lsquic_send_ctl *ctl,
struct lsquic_packet_out *packet_out)
{
enum packnum_space pns;
pns = lsquic_packet_out_pns(packet_out);
assert(0 == (packet_out->po_flags & (PO_LOSS_REC|PO_POISON)));
TAILQ_INSERT_TAIL(&ctl->sc_unacked_packets[pns], packet_out, po_next);
packet_out->po_flags |= PO_UNACKED;
ctl->sc_bytes_unacked_all += packet_out_sent_sz(packet_out);
ctl->sc_n_in_flight_all += 1;
if (packet_out->po_frame_types & ctl->sc_retx_frames)
{
ctl->sc_bytes_unacked_retx += packet_out_total_sz(packet_out);
++ctl->sc_n_in_flight_retx;
}
}
static void
send_ctl_unacked_remove (struct lsquic_send_ctl *ctl,
struct lsquic_packet_out *packet_out, unsigned packet_sz)
{
enum packnum_space pns;
pns = lsquic_packet_out_pns(packet_out);
TAILQ_REMOVE(&ctl->sc_unacked_packets[pns], packet_out, po_next);
packet_out->po_flags &= ~PO_UNACKED;
assert(ctl->sc_bytes_unacked_all >= packet_sz);
ctl->sc_bytes_unacked_all -= packet_sz;
ctl->sc_n_in_flight_all -= 1;
if (packet_out->po_frame_types & ctl->sc_retx_frames)
{
ctl->sc_bytes_unacked_retx -= packet_sz;
--ctl->sc_n_in_flight_retx;
}
}
static void
send_ctl_sched_Xpend_common (struct lsquic_send_ctl *ctl,
struct lsquic_packet_out *packet_out)
{
packet_out->po_flags |= PO_SCHED;
++ctl->sc_n_scheduled;
ctl->sc_bytes_scheduled += packet_out_total_sz(packet_out);
lsquic_send_ctl_sanity_check(ctl);
}
static void
send_ctl_sched_append (struct lsquic_send_ctl *ctl,
struct lsquic_packet_out *packet_out)
{
TAILQ_INSERT_TAIL(&ctl->sc_scheduled_packets, packet_out, po_next);
send_ctl_sched_Xpend_common(ctl, packet_out);
}
static void
send_ctl_sched_prepend (struct lsquic_send_ctl *ctl,
struct lsquic_packet_out *packet_out)
{
TAILQ_INSERT_HEAD(&ctl->sc_scheduled_packets, packet_out, po_next);
send_ctl_sched_Xpend_common(ctl, packet_out);
}
static void
send_ctl_sched_remove (struct lsquic_send_ctl *ctl,
struct lsquic_packet_out *packet_out)
{
TAILQ_REMOVE(&ctl->sc_scheduled_packets, packet_out, po_next);
packet_out->po_flags &= ~PO_SCHED;
assert(ctl->sc_n_scheduled);
--ctl->sc_n_scheduled;
ctl->sc_bytes_scheduled -= packet_out_total_sz(packet_out);
lsquic_send_ctl_sanity_check(ctl);
}
/* Poisoned packets are used to detect optimistic ACK attacks. We only
* use a single poisoned packet at a time.
*/
static int
send_ctl_add_poison (struct lsquic_send_ctl *ctl)
{
struct lsquic_packet_out *poison;
/* XXX Allocating the poison packet out of the regular pool can fail.
* This leads to a lot of error checking that could be skipped if we
* did not have to allocate this packet at all.
*/
poison = lsquic_malo_get(ctl->sc_conn_pub->packet_out_malo);
if (!poison)
return -1;
memset(poison, 0, sizeof(*poison));
poison->po_flags = PO_UNACKED|PO_POISON;
poison->po_packno = ctl->sc_gap;
poison->po_loss_chain = poison; /* Won't be used, but just in case */
TAILQ_INSERT_TAIL(&ctl->sc_unacked_packets[PNS_APP], poison, po_next);
LSQ_DEBUG("insert poisoned packet #%"PRIu64, poison->po_packno);
ctl->sc_flags |= SC_POISON;
return 0;
}
static void
send_ctl_reschedule_poison (struct lsquic_send_ctl *ctl)
{
struct lsquic_packet_out *poison;
enum lsq_log_level log_level;
lsquic_time_t approx_now;
TAILQ_FOREACH(poison, &ctl->sc_unacked_packets[PNS_APP], po_next)
if (poison->po_flags & PO_POISON)
{
LSQ_DEBUG("remove poisoned packet #%"PRIu64, poison->po_packno);
TAILQ_REMOVE(&ctl->sc_unacked_packets[PNS_APP], poison, po_next);
lsquic_malo_put(poison);
lsquic_send_ctl_begin_optack_detection(ctl);
ctl->sc_flags &= ~SC_POISON;
return;
}
approx_now = ctl->sc_last_sent_time;
if (0 == ctl->sc_enpub->enp_last_warning[WT_NO_POISON]
|| ctl->sc_enpub->enp_last_warning[WT_NO_POISON]
+ WARNING_INTERVAL < approx_now)
{
ctl->sc_enpub->enp_last_warning[WT_NO_POISON] = approx_now;
log_level = LSQ_LOG_WARN;
}
else
log_level = LSQ_LOG_DEBUG;
LSQ_LOG(log_level, "odd: poisoned packet #%"PRIu64" not found during "
"reschedule, flag: %d", ctl->sc_gap, !!(ctl->sc_flags & SC_POISON));
}
static int
send_ctl_update_poison_hist (struct lsquic_send_ctl *ctl,
lsquic_packno_t packno)
{
if (packno == ctl->sc_gap + 1)
{
assert(!(ctl->sc_flags & SC_POISON));
lsquic_senhist_add(&ctl->sc_senhist, ctl->sc_gap);
if (0 != send_ctl_add_poison(ctl))
return -1;
}
return 0;
}
void
lsquic_send_ctl_mtu_not_sent (struct lsquic_send_ctl *ctl,
struct lsquic_packet_out *packet_out)
{
(void) /* See comment in send_ctl_add_poison(): the plan is to make
this code path always succeed. */
send_ctl_update_poison_hist(ctl, packet_out->po_packno);
lsquic_senhist_add(&ctl->sc_senhist, packet_out->po_packno);
}
int
lsquic_send_ctl_sent_packet (lsquic_send_ctl_t *ctl,
struct lsquic_packet_out *packet_out)
{
enum packnum_space pns;
char frames[lsquic_frame_types_str_sz];
assert(!(packet_out->po_flags & PO_ENCRYPTED));
ctl->sc_last_sent_time = packet_out->po_sent;
pns = lsquic_packet_out_pns(packet_out);
if (0 != send_ctl_update_poison_hist(ctl, packet_out->po_packno))
return -1;
LSQ_DEBUG("sent packet #%"PRIu64" (%s)",
packet_out->po_packno, lsquic_frame_types_to_str(frames,
sizeof(frames), packet_out->po_frame_types));
lsquic_senhist_add(&ctl->sc_senhist, packet_out->po_packno);
if (ctl->sc_ci->cci_sent)
ctl->sc_ci->cci_sent(CGP(ctl), packet_out, ctl->sc_bytes_unacked_all,
ctl->sc_flags & SC_APP_LIMITED);
send_ctl_unacked_append(ctl, packet_out);
if (packet_out->po_frame_types & ctl->sc_retx_frames)
{
if (!lsquic_alarmset_is_set(ctl->sc_alset, AL_RETX_INIT + pns))
set_retx_alarm(ctl, pns, packet_out->po_sent);
if (ctl->sc_n_in_flight_retx == 1)
ctl->sc_flags |= SC_WAS_QUIET;
}
/* TODO: Do we really want to use those for RTT info? Revisit this. */
/* Hold on to packets that are not retransmittable because we need them
* to sample RTT information. They are released when ACK is received.
*/
#if LSQUIC_SEND_STATS
++ctl->sc_stats.n_total_sent;
#endif
lsquic_send_ctl_sanity_check(ctl);
return 0;
}
static void
send_ctl_select_cc (struct lsquic_send_ctl *ctl)
{
lsquic_time_t srtt;
srtt = lsquic_rtt_stats_get_srtt(&ctl->sc_conn_pub->rtt_stats);
if (srtt <= ctl->sc_enpub->enp_settings.es_cc_rtt_thresh)
{
LSQ_INFO("srtt is %"PRIu64" usec, which is smaller than or equal to "
"the threshold of %u usec: select Cubic congestion controller",
srtt, ctl->sc_enpub->enp_settings.es_cc_rtt_thresh);
ctl->sc_ci = &lsquic_cong_cubic_if;
ctl->sc_cong_ctl = &ctl->sc_adaptive_cc.acc_cubic;
ctl->sc_flags |= SC_CLEANUP_BBR;
}
else
{
LSQ_INFO("srtt is %"PRIu64" usec, which is greater than the threshold "
"of %u usec: select BBRv1 congestion controller", srtt,
ctl->sc_enpub->enp_settings.es_cc_rtt_thresh);
ctl->sc_ci = &lsquic_cong_bbr_if;
ctl->sc_cong_ctl = &ctl->sc_adaptive_cc.acc_bbr;
}
}
static void
take_rtt_sample (lsquic_send_ctl_t *ctl,
lsquic_time_t now, lsquic_time_t lack_delta)
{
const lsquic_packno_t packno = ctl->sc_largest_acked_packno;
const lsquic_time_t sent = ctl->sc_largest_acked_sent_time;
const lsquic_time_t measured_rtt = now - sent;
if ((!packno || packno > ctl->sc_max_rtt_packno) && lack_delta < measured_rtt)
{
if (UNLIKELY(ctl->sc_flags & SC_ROUGH_RTT))
{
memset(&ctl->sc_conn_pub->rtt_stats, 0,
sizeof(ctl->sc_conn_pub->rtt_stats));
ctl->sc_flags &= ~SC_ROUGH_RTT;
}
ctl->sc_max_rtt_packno = packno;
lsquic_rtt_stats_update(&ctl->sc_conn_pub->rtt_stats, measured_rtt, lack_delta);
LSQ_DEBUG("packno %"PRIu64"; rtt: %"PRIu64"; delta: %"PRIu64"; "
"new srtt: %"PRIu64, packno, measured_rtt, lack_delta,
lsquic_rtt_stats_get_srtt(&ctl->sc_conn_pub->rtt_stats));
if (ctl->sc_ci == &lsquic_cong_adaptive_if)
send_ctl_select_cc(ctl);
}
}
static void
send_ctl_return_enc_data (struct lsquic_send_ctl *ctl,
struct lsquic_packet_out *packet_out)
{
ctl->sc_enpub->enp_pmi->pmi_return(ctl->sc_enpub->enp_pmi_ctx,
packet_out->po_path->np_peer_ctx,
packet_out->po_enc_data, lsquic_packet_out_ipv6(packet_out));
packet_out->po_flags &= ~PO_ENCRYPTED;
packet_out->po_enc_data = NULL;
}
static void
send_ctl_destroy_packet (struct lsquic_send_ctl *ctl,
struct lsquic_packet_out *packet_out)
{
if (0 == (packet_out->po_flags & (PO_LOSS_REC|PO_POISON)))
lsquic_packet_out_destroy(packet_out, ctl->sc_enpub,
packet_out->po_path->np_peer_ctx);
else
lsquic_malo_put(packet_out);
}
static void
send_ctl_maybe_renumber_sched_to_right (struct lsquic_send_ctl *ctl,
const struct lsquic_packet_out *cur)
{
struct lsquic_packet_out *packet_out;
/* If current packet has PO_REPACKNO set, it means that all those to the
* right of it have this flag set as well.
*/
if (0 == (cur->po_flags & PO_REPACKNO))
{
ctl->sc_cur_packno = cur->po_packno - 1;
for (packet_out = TAILQ_NEXT(cur, po_next);
packet_out && 0 == (packet_out->po_flags & PO_REPACKNO);
packet_out = TAILQ_NEXT(packet_out, po_next))
{
packet_out->po_flags |= PO_REPACKNO;
}
}
}
static void
send_ctl_process_loss_chain_pkt (struct lsquic_send_ctl *ctl,
struct lsquic_packet_out *const chain_cur,
struct lsquic_packet_out **next)
{
unsigned packet_sz;
const char *state;
enum packnum_space pns;
switch (chain_cur->po_flags & (PO_SCHED|PO_UNACKED|PO_LOST))
{
case PO_SCHED:
send_ctl_maybe_renumber_sched_to_right(ctl, chain_cur);
send_ctl_sched_remove(ctl, chain_cur);
state = "scheduled";
break;
case PO_UNACKED:
if (chain_cur->po_flags & PO_LOSS_REC)
{
pns = lsquic_packet_out_pns(chain_cur);
TAILQ_REMOVE(&ctl->sc_unacked_packets[pns], chain_cur, po_next);
state = "loss record";
}
else
{
packet_sz = packet_out_sent_sz(chain_cur);
send_ctl_unacked_remove(ctl, chain_cur, packet_sz);
state = "unacked";
}
break;
case PO_LOST:
TAILQ_REMOVE(&ctl->sc_lost_packets, chain_cur, po_next);
state = "lost";
break;
case 0:
/* This is also weird, but let it pass */
state = "unknown";
break;
default:
assert(0);
state = "bad";
break;
}
if (next && *next == chain_cur)
*next = TAILQ_NEXT(*next, po_next);
if (0 == (chain_cur->po_flags & PO_LOSS_REC))
lsquic_packet_out_ack_streams(chain_cur);
LSQ_DEBUG("loss chain, destroy %s packet #%"PRIu64, state,
chain_cur->po_packno);
send_ctl_destroy_packet(ctl, chain_cur);
}
static void
send_ctl_acked_loss_chain (struct lsquic_send_ctl *ctl,
struct lsquic_packet_out *const packet_out,
struct lsquic_packet_out **next,
lsquic_packno_t largest_acked,
signed char *do_rtt)
{
struct lsquic_packet_out *chain_cur, *chain_next;
unsigned count;
count = 0;
for (chain_cur = packet_out->po_loss_chain; chain_cur != packet_out;
chain_cur = chain_next)
{
chain_next = chain_cur->po_loss_chain;
if (chain_cur->po_packno == largest_acked)
*do_rtt = 1;
send_ctl_process_loss_chain_pkt(ctl, chain_cur, next);
++count;
}
packet_out->po_loss_chain = packet_out;
if (count)
LSQ_DEBUG("destroyed %u packet%.*s in chain of packet #%"PRIu64,
count, count != 1, "s", packet_out->po_packno);
}
/* The third argument to advance `next' pointer when modifying the unacked
* queue. This is because the unacked queue may contain several elements
* of the same chain. This is not true of the lost and scheduled packet
* queue, as the loss records are only present on the unacked queue.
*/
static void
send_ctl_destroy_chain (struct lsquic_send_ctl *ctl,
struct lsquic_packet_out *const packet_out,
struct lsquic_packet_out **next)
{
struct lsquic_packet_out *chain_cur, *chain_next;
unsigned count;
count = 0;
for (chain_cur = packet_out->po_loss_chain; chain_cur != packet_out;
chain_cur = chain_next)
{
chain_next = chain_cur->po_loss_chain;
send_ctl_process_loss_chain_pkt(ctl, chain_cur, next);
++count;
}
packet_out->po_loss_chain = packet_out;
if (count)
LSQ_DEBUG("destroyed %u packet%.*s in chain of packet #%"PRIu64,
count, count != 1, "s", packet_out->po_packno);
}
static struct lsquic_packet_out *
send_ctl_record_loss (struct lsquic_send_ctl *ctl,
struct lsquic_packet_out *packet_out)
{
struct lsquic_packet_out *loss_record;
loss_record = lsquic_malo_get(ctl->sc_conn_pub->packet_out_malo);
if (loss_record)
{
memset(loss_record, 0, sizeof(*loss_record));
loss_record->po_flags = PO_UNACKED|PO_LOSS_REC|PO_SENT_SZ;
loss_record->po_flags |=
((packet_out->po_flags >> POPNS_SHIFT) & 3) << POPNS_SHIFT;
/* Copy values used in ACK processing: */
loss_record->po_packno = packet_out->po_packno;
loss_record->po_sent = packet_out->po_sent;
loss_record->po_sent_sz = packet_out_sent_sz(packet_out);
loss_record->po_frame_types = packet_out->po_frame_types;
/* Insert the loss record into the chain: */
loss_record->po_loss_chain = packet_out->po_loss_chain;
packet_out->po_loss_chain = loss_record;
/* Place the loss record next to the lost packet we are about to
* remove from the list:
*/
TAILQ_INSERT_BEFORE(packet_out, loss_record, po_next);
return loss_record;
}
else
{
LSQ_INFO("cannot allocate memory for loss record");
return NULL;
}
}
static struct lsquic_packet_out *
send_ctl_handle_regular_lost_packet (struct lsquic_send_ctl *ctl,
lsquic_packet_out_t *packet_out, struct lsquic_packet_out **next)
{
struct lsquic_packet_out *loss_record;
unsigned packet_sz;
assert(ctl->sc_n_in_flight_all);
packet_sz = packet_out_sent_sz(packet_out);
++ctl->sc_loss_count;
#if LSQUIC_CONN_STATS
++ctl->sc_conn_pub->conn_stats->out.lost_packets;
#endif
if (packet_out->po_frame_types & (1 << QUIC_FRAME_ACK))
{
ctl->sc_flags |= SC_LOST_ACK_INIT << lsquic_packet_out_pns(packet_out);
LSQ_DEBUG("lost ACK in packet #%"PRIu64, packet_out->po_packno);
}
if (ctl->sc_ci->cci_lost)
ctl->sc_ci->cci_lost(CGP(ctl), packet_out, packet_sz);
/* This is a client-only check, server check happens in mini conn */
if (lsquic_send_ctl_ecn_turned_on(ctl)
&& 0 == ctl->sc_ecn_total_acked[PNS_INIT]
&& HETY_INITIAL == packet_out->po_header_type
&& 3 == packet_out->po_packno)
{
LSQ_DEBUG("possible ECN black hole during handshake, disable ECN");
lsquic_send_ctl_disable_ecn(ctl);
}
if (packet_out->po_frame_types & ctl->sc_retx_frames)
{
LSQ_DEBUG("lost retransmittable packet #%"PRIu64,
packet_out->po_packno);
loss_record = send_ctl_record_loss(ctl, packet_out);
send_ctl_unacked_remove(ctl, packet_out, packet_sz);
TAILQ_INSERT_TAIL(&ctl->sc_lost_packets, packet_out, po_next);
packet_out->po_flags |= PO_LOST;
return loss_record;
}
else
{
LSQ_DEBUG("lost unretransmittable packet #%"PRIu64,
packet_out->po_packno);
send_ctl_unacked_remove(ctl, packet_out, packet_sz);
send_ctl_destroy_chain(ctl, packet_out, next);
send_ctl_destroy_packet(ctl, packet_out);
return NULL;
}
}
static int
send_ctl_handle_lost_mtu_probe (struct lsquic_send_ctl *ctl,
struct lsquic_packet_out *packet_out)
{
unsigned packet_sz;
LSQ_DEBUG("lost MTU probe in packet #%"PRIu64, packet_out->po_packno);
packet_sz = packet_out_sent_sz(packet_out);
send_ctl_unacked_remove(ctl, packet_out, packet_sz);
assert(packet_out->po_loss_chain == packet_out);
send_ctl_destroy_packet(ctl, packet_out);
return 0;
}
/* Returns true if packet was rescheduled, false otherwise. In the latter
* case, you should not dereference packet_out after the function returns.
*/
static int
send_ctl_handle_lost_packet (struct lsquic_send_ctl *ctl,
struct lsquic_packet_out *packet_out, struct lsquic_packet_out **next)
{
if (0 == (packet_out->po_flags & PO_MTU_PROBE))
return send_ctl_handle_regular_lost_packet(ctl, packet_out, next) != NULL;
else
return send_ctl_handle_lost_mtu_probe(ctl, packet_out);
}
static lsquic_packno_t
largest_retx_packet_number (const struct lsquic_send_ctl *ctl,
enum packnum_space pns)
{
const lsquic_packet_out_t *packet_out;
TAILQ_FOREACH_REVERSE(packet_out, &ctl->sc_unacked_packets[pns],
lsquic_packets_tailq, po_next)
{
if (0 == (packet_out->po_flags & (PO_LOSS_REC|PO_POISON))
&& (packet_out->po_frame_types & ctl->sc_retx_frames))
return packet_out->po_packno;
}
return 0;
}
static void
send_ctl_loss_event (struct lsquic_send_ctl *ctl)
{
ctl->sc_ci->cci_loss(CGP(ctl));
if (ctl->sc_flags & SC_PACE)
lsquic_pacer_loss_event(&ctl->sc_pacer);
ctl->sc_largest_sent_at_cutback =
lsquic_senhist_largest(&ctl->sc_senhist);
}
/* Return true if losses were detected, false otherwise */
static int
send_ctl_detect_losses (struct lsquic_send_ctl *ctl, enum packnum_space pns,
lsquic_time_t time)
{
struct lsquic_packet_out *packet_out, *next, *loss_record;
lsquic_packno_t largest_retx_packno, largest_lost_packno;
largest_retx_packno = largest_retx_packet_number(ctl, pns);
largest_lost_packno = 0;
ctl->sc_loss_to = 0;
for (packet_out = TAILQ_FIRST(&ctl->sc_unacked_packets[pns]);
packet_out && packet_out->po_packno <= ctl->sc_largest_acked_packno;
packet_out = next)
{
next = TAILQ_NEXT(packet_out, po_next);
if (packet_out->po_flags & (PO_LOSS_REC|PO_POISON))
continue;
if (packet_out->po_packno + ctl->sc_reord_thresh <
ctl->sc_largest_acked_packno)
{
LSQ_DEBUG("loss by FACK detected (dist: %"PRIu64"), packet #%"PRIu64,
ctl->sc_largest_acked_packno - packet_out->po_packno,
packet_out->po_packno);
if (0 == (packet_out->po_flags & PO_MTU_PROBE))
{
largest_lost_packno = packet_out->po_packno;
loss_record = send_ctl_handle_regular_lost_packet(ctl,
packet_out, &next);
if (loss_record)
loss_record->po_lflags |= POL_FACKED;
}
else
send_ctl_handle_lost_mtu_probe(ctl, packet_out);
continue;
}
if (largest_retx_packno
&& (packet_out->po_frame_types & ctl->sc_retx_frames)
&& 0 == (packet_out->po_flags & PO_MTU_PROBE)
&& largest_retx_packno <= ctl->sc_largest_acked_packno)
{
LSQ_DEBUG("loss by early retransmit detected, packet #%"PRIu64,
packet_out->po_packno);
largest_lost_packno = packet_out->po_packno;
ctl->sc_loss_to =
lsquic_rtt_stats_get_srtt(&ctl->sc_conn_pub->rtt_stats) / 4;
LSQ_DEBUG("set sc_loss_to to %"PRIu64", packet #%"PRIu64,
ctl->sc_loss_to, packet_out->po_packno);
(void) send_ctl_handle_lost_packet(ctl, packet_out, &next);
continue;
}
if (ctl->sc_largest_acked_sent_time > packet_out->po_sent +
lsquic_rtt_stats_get_srtt(&ctl->sc_conn_pub->rtt_stats))
{
LSQ_DEBUG("loss by sent time detected: packet #%"PRIu64,
packet_out->po_packno);
if ((packet_out->po_frame_types & ctl->sc_retx_frames)
&& 0 == (packet_out->po_flags & PO_MTU_PROBE))
largest_lost_packno = packet_out->po_packno;
else { /* don't count it as a loss */; }
(void) send_ctl_handle_lost_packet(ctl, packet_out, &next);
continue;
}
}
if (largest_lost_packno > ctl->sc_largest_sent_at_cutback)
{
LSQ_DEBUG("detected new loss: packet #%"PRIu64"; new lsac: "
"%"PRIu64, largest_lost_packno, ctl->sc_largest_sent_at_cutback);
send_ctl_loss_event(ctl);
}
else if (largest_lost_packno)
/* Lost packets whose numbers are smaller than the largest packet
* number sent at the time of the last loss event indicate the same
* loss event. This follows NewReno logic, see RFC 6582.
*/
LSQ_DEBUG("ignore loss of packet #%"PRIu64" smaller than lsac "
"%"PRIu64, largest_lost_packno, ctl->sc_largest_sent_at_cutback);
return largest_lost_packno > ctl->sc_largest_sent_at_cutback;
}
static void
send_ctl_mtu_probe_acked (struct lsquic_send_ctl *ctl,
struct lsquic_packet_out *packet_out)
{
struct lsquic_conn *const lconn = ctl->sc_conn_pub->lconn;
LSQ_DEBUG("MTU probe in packet #%"PRIu64" has been ACKed",
packet_out->po_packno);
assert(lconn->cn_if->ci_mtu_probe_acked);
if (lconn->cn_if->ci_mtu_probe_acked)
lconn->cn_if->ci_mtu_probe_acked(lconn, packet_out);
}
static void
send_ctl_maybe_increase_reord_thresh (struct lsquic_send_ctl *ctl,
const struct lsquic_packet_out *loss_record,
lsquic_packno_t prev_largest_acked)
{
#if LSQUIC_DEVEL
if (ctl->sc_flags & SC_DYN_PTHRESH)
#endif
if ((loss_record->po_lflags & POL_FACKED)
&& loss_record->po_packno + ctl->sc_reord_thresh
< prev_largest_acked)
{
ctl->sc_reord_thresh = prev_largest_acked - loss_record->po_packno;
LSQ_DEBUG("packet #%"PRIu64" was a spurious loss by FACK, increase "
"reordering threshold to %u", loss_record->po_packno,
ctl->sc_reord_thresh);
}
}
int
lsquic_send_ctl_got_ack (lsquic_send_ctl_t *ctl,
const struct ack_info *acki,
lsquic_time_t ack_recv_time, lsquic_time_t now)
{
const struct lsquic_packno_range *range =
&acki->ranges[ acki->n_ranges - 1 ];
lsquic_packet_out_t *packet_out, *next;
lsquic_packno_t smallest_unacked;
lsquic_packno_t prev_largest_acked;
lsquic_packno_t ack2ed[2];
unsigned packet_sz;
int app_limited, losses_detected;
signed char do_rtt, skip_checks;
enum packnum_space pns;
unsigned ecn_total_acked, ecn_ce_cnt, one_rtt_cnt;
pns = acki->pns;
ctl->sc_flags |= SC_ACK_RECV_INIT << pns;
packet_out = TAILQ_FIRST(&ctl->sc_unacked_packets[pns]);
#if __GNUC__
__builtin_prefetch(packet_out);
#endif
#if __GNUC__
if (UNLIKELY(LSQ_LOG_ENABLED(LSQ_LOG_DEBUG)))
#endif
LSQ_DEBUG("Got ACK frame, largest acked: %"PRIu64"; delta: %"PRIu64,
largest_acked(acki), acki->lack_delta);
/* Validate ACK first: */
if (UNLIKELY(largest_acked(acki)
> lsquic_senhist_largest(&ctl->sc_senhist)))
{
LSQ_INFO("at least one packet in ACK range [%"PRIu64" - %"PRIu64"] "
"was never sent", acki->ranges[0].low, acki->ranges[0].high);
return -1;
}
if (ctl->sc_ci->cci_begin_ack)
ctl->sc_ci->cci_begin_ack(CGP(ctl), ack_recv_time,
ctl->sc_bytes_unacked_all);
ecn_total_acked = 0;
ecn_ce_cnt = 0;
one_rtt_cnt = 0;
if (UNLIKELY(ctl->sc_flags & SC_WAS_QUIET))
{
ctl->sc_flags &= ~SC_WAS_QUIET;
LSQ_DEBUG("ACK comes after a period of quiescence");
ctl->sc_ci->cci_was_quiet(CGP(ctl), now, ctl->sc_bytes_unacked_all);
if (packet_out && (packet_out->po_frame_types & QUIC_FTBIT_PING)
&& ctl->sc_conn_pub->last_prog)
{
LSQ_DEBUG("ACK to PING frame, update last progress to %"PRIu64,
ctl->sc_conn_pub->last_tick);
ctl->sc_conn_pub->last_prog = ctl->sc_conn_pub->last_tick;
}
}
if (UNLIKELY(!packet_out))
goto no_unacked_packets;
smallest_unacked = packet_out->po_packno;
LSQ_DEBUG("Smallest unacked: %"PRIu64, smallest_unacked);
ack2ed[1] = 0;
if (packet_out->po_packno > largest_acked(acki))
goto detect_losses;
if (largest_acked(acki) > ctl->sc_cur_rt_end)
{
++ctl->sc_rt_count;
ctl->sc_cur_rt_end = lsquic_senhist_largest(&ctl->sc_senhist);
}
prev_largest_acked = ctl->sc_largest_acked_packno;
do_rtt = 0, skip_checks = 0;
app_limited = -1;
do
{
next = TAILQ_NEXT(packet_out, po_next);
#if __GNUC__
__builtin_prefetch(next);
#endif
if (skip_checks)
goto after_checks;
/* This is faster than binary search in the normal case when the number
* of ranges is not much larger than the number of unacked packets.
*/
while (UNLIKELY(range->high < packet_out->po_packno))
--range;
if (range->low <= packet_out->po_packno)
{
skip_checks = range == acki->ranges;
if (app_limited < 0)
app_limited = send_ctl_retx_bytes_out(ctl) + 3 * SC_PACK_SIZE(ctl) /* This
is the "maximum burst" parameter */
< ctl->sc_ci->cci_get_cwnd(CGP(ctl));
after_checks:
ctl->sc_largest_acked_packno = packet_out->po_packno;
ctl->sc_largest_acked_sent_time = packet_out->po_sent;
ecn_total_acked += lsquic_packet_out_ecn(packet_out) != ECN_NOT_ECT;
ecn_ce_cnt += lsquic_packet_out_ecn(packet_out) == ECN_CE;
one_rtt_cnt += lsquic_packet_out_enc_level(packet_out) == ENC_LEV_APP;
if (0 == (packet_out->po_flags
& (PO_LOSS_REC|PO_POISON|PO_MTU_PROBE)))
{
packet_sz = packet_out_sent_sz(packet_out);
send_ctl_unacked_remove(ctl, packet_out, packet_sz);
lsquic_packet_out_ack_streams(packet_out);
LSQ_DEBUG("acking via regular record #%"PRIu64,
packet_out->po_packno);
}
else if (packet_out->po_flags & PO_LOSS_REC)
{
packet_sz = packet_out->po_sent_sz;
TAILQ_REMOVE(&ctl->sc_unacked_packets[pns], packet_out,
po_next);
LSQ_DEBUG("acking via loss record #%"PRIu64,
packet_out->po_packno);
send_ctl_maybe_increase_reord_thresh(ctl, packet_out,
prev_largest_acked);
#if LSQUIC_CONN_STATS
++ctl->sc_conn_pub->conn_stats->out.acked_via_loss;
#endif
}
else if (packet_out->po_flags & PO_MTU_PROBE)
{
packet_sz = packet_out_sent_sz(packet_out);
send_ctl_unacked_remove(ctl, packet_out, packet_sz);
send_ctl_mtu_probe_acked(ctl, packet_out);
}
else
{
LSQ_WARN("poisoned packet #%"PRIu64" acked",
packet_out->po_packno);
return -1;
}
ack2ed[!!(packet_out->po_frame_types & (1 << QUIC_FRAME_ACK))]
= packet_out->po_ack2ed;
do_rtt |= packet_out->po_packno == largest_acked(acki);
ctl->sc_ci->cci_ack(CGP(ctl), packet_out, packet_sz, now,
app_limited);
send_ctl_acked_loss_chain(ctl, packet_out, &next,
largest_acked(acki), &do_rtt);
send_ctl_destroy_packet(ctl, packet_out);
}
packet_out = next;
}
while (packet_out && packet_out->po_packno <= largest_acked(acki));
if (do_rtt)
{
take_rtt_sample(ctl, ack_recv_time, acki->lack_delta);
LSQ_DEBUG("clear sc_n_consec_rtos, sc_n_hsk, sc_ntlp");
ctl->sc_n_consec_rtos = 0;
ctl->sc_n_hsk = 0;
ctl->sc_n_tlp = 0;
}
detect_losses:
losses_detected = send_ctl_detect_losses(ctl, pns, ack_recv_time);
if (send_ctl_first_unacked_retx_packet(ctl, pns))
{
if ((ctl->sc_flags & SC_1RTT_ACKED)
|| (!lsquic_alarmset_is_set(ctl->sc_alset, pns) && losses_detected))
set_retx_alarm(ctl, pns, now);
}
else
{
LSQ_DEBUG("No retransmittable packets: clear alarm");
lsquic_alarmset_unset(ctl->sc_alset, AL_RETX_INIT + pns);
}
lsquic_send_ctl_sanity_check(ctl);
if ((ctl->sc_flags & SC_NSTP) && ack2ed[1] > ctl->sc_largest_ack2ed[pns])
ctl->sc_largest_ack2ed[pns] = ack2ed[1];
if (ctl->sc_n_in_flight_retx == 0)
ctl->sc_flags |= SC_WAS_QUIET;
if (one_rtt_cnt)
ctl->sc_flags |= SC_1RTT_ACKED;
if (lsquic_send_ctl_ecn_turned_on(ctl) && (acki->flags & AI_ECN))
{
const uint64_t sum = acki->ecn_counts[ECN_ECT0]
+ acki->ecn_counts[ECN_ECT1]
+ acki->ecn_counts[ECN_CE];
ctl->sc_ecn_total_acked[pns] += ecn_total_acked;
ctl->sc_ecn_ce_cnt[pns] += ecn_ce_cnt;
if (sum >= ctl->sc_ecn_total_acked[pns])
{
if (sum > ctl->sc_ecn_total_acked[pns])
ctl->sc_ecn_total_acked[pns] = sum;
if (acki->ecn_counts[ECN_CE] > ctl->sc_ecn_ce_cnt[pns])
{
ctl->sc_ecn_ce_cnt[pns] = acki->ecn_counts[ECN_CE];
if (losses_detected)
/* It's either-or. From [draft-ietf-quic-recovery-29],
* Section 7.4:
" When a loss or ECN-CE marking is detected [...]
*/
LSQ_DEBUG("ECN-CE marking detected, but loss event already "
"accounted for");
else
{
LSQ_DEBUG("ECN-CE marking detected, issue loss event");
send_ctl_loss_event(ctl);
}
}
}
else
{
LSQ_INFO("ECN total ACKed (%"PRIu64") is greater than the sum "
"of ECN counters (%"PRIu64"): disable ECN",
ctl->sc_ecn_total_acked[pns], sum);
lsquic_send_ctl_disable_ecn(ctl);
}
}
update_n_stop_waiting:
if (!(ctl->sc_flags & (SC_NSTP|SC_IETF)))
{
if (smallest_unacked > smallest_acked(acki))
/* Peer is acking packets that have been acked already. Schedule
* ACK and STOP_WAITING frame to chop the range if we get two of
* these in a row.
*/
++ctl->sc_n_stop_waiting;
else
ctl->sc_n_stop_waiting = 0;
}
lsquic_send_ctl_sanity_check(ctl);
if (ctl->sc_ci->cci_end_ack)
ctl->sc_ci->cci_end_ack(CGP(ctl), ctl->sc_bytes_unacked_all);
if (ctl->sc_gap < smallest_acked(acki))
send_ctl_reschedule_poison(ctl);
return 0;
no_unacked_packets:
smallest_unacked = lsquic_senhist_largest(&ctl->sc_senhist) + 1;
ctl->sc_flags |= SC_WAS_QUIET;
goto update_n_stop_waiting;
}
lsquic_packno_t
lsquic_send_ctl_smallest_unacked (lsquic_send_ctl_t *ctl)
{
const lsquic_packet_out_t *packet_out;
enum packnum_space pns;
/* Packets are always sent out in order (unless we are reordering them
* on purpose). Thus, the first packet on the unacked packets list has
* the smallest packet number of all packets on that list.
*/
for (pns = ctl->sc_flags & SC_IETF ? PNS_INIT : PNS_APP; pns < N_PNS; ++pns)
if ((packet_out = TAILQ_FIRST(&ctl->sc_unacked_packets[pns])))
/* We're OK with using a loss record */
return packet_out->po_packno;
return lsquic_senhist_largest(&ctl->sc_senhist) + first_packno(ctl);
}
static struct lsquic_packet_out *
send_ctl_next_lost (lsquic_send_ctl_t *ctl)
{
struct lsquic_conn *const lconn = ctl->sc_conn_pub->lconn;
struct lsquic_packet_out *lost_packet;
get_next_lost:
lost_packet = TAILQ_FIRST(&ctl->sc_lost_packets);
if (lost_packet)
{
if (lost_packet->po_frame_types & (1 << QUIC_FRAME_STREAM))
{
if (0 == (lost_packet->po_flags & PO_MINI))
{
lsquic_packet_out_elide_reset_stream_frames(lost_packet,
UINT64_MAX);
if (lost_packet->po_regen_sz >= lost_packet->po_data_sz)
{
LSQ_DEBUG("Dropping packet #%"PRIu64" from lost queue",
lost_packet->po_packno);
TAILQ_REMOVE(&ctl->sc_lost_packets, lost_packet, po_next);
lost_packet->po_flags &= ~PO_LOST;
send_ctl_destroy_chain(ctl, lost_packet, NULL);
send_ctl_destroy_packet(ctl, lost_packet);
goto get_next_lost;
}
}
else
{
/* Mini connection only ever sends data on stream 1. There
* is nothing to elide: always resend it.
*/
;
}
}
if (!lsquic_send_ctl_can_send(ctl))
return NULL;
if (packet_out_total_sz(lost_packet) <= SC_PACK_SIZE(ctl))
{
pop_lost_packet:
TAILQ_REMOVE(&ctl->sc_lost_packets, lost_packet, po_next);
lost_packet->po_flags &= ~PO_LOST;
lost_packet->po_flags |= PO_RETX;
}
else
{
/* We delay resizing lost packets as long as possible, hoping that
* it may be ACKed. At this point, however, we have to resize.
*/
if (0 == split_lost_packet(ctl, lost_packet))
{
lost_packet = TAILQ_FIRST(&ctl->sc_lost_packets);
goto pop_lost_packet;
}
lconn->cn_if->ci_internal_error(lconn,
"error resizing lost packet");
return NULL;
}
}
return lost_packet;
}
static lsquic_packno_t
send_ctl_next_packno (lsquic_send_ctl_t *ctl)
{
lsquic_packno_t packno;
packno = ++ctl->sc_cur_packno;
if (packno == ctl->sc_gap)
packno = ++ctl->sc_cur_packno;
return packno;
}
void
lsquic_send_ctl_cleanup (lsquic_send_ctl_t *ctl)
{
lsquic_packet_out_t *packet_out, *next;
enum packnum_space pns;
unsigned n;
lsquic_senhist_cleanup(&ctl->sc_senhist);
while ((packet_out = TAILQ_FIRST(&ctl->sc_scheduled_packets)))
{
send_ctl_sched_remove(ctl, packet_out);
send_ctl_destroy_packet(ctl, packet_out);
}
assert(0 == ctl->sc_n_scheduled);
//assert(0 == ctl->sc_bytes_scheduled);
for (pns = PNS_INIT; pns < N_PNS; ++pns)
while ((packet_out = TAILQ_FIRST(&ctl->sc_unacked_packets[pns])))
{
TAILQ_REMOVE(&ctl->sc_unacked_packets[pns], packet_out, po_next);
packet_out->po_flags &= ~PO_UNACKED;
#ifndef NDEBUG
if (0 == (packet_out->po_flags & (PO_LOSS_REC|PO_POISON)))
{
ctl->sc_bytes_unacked_all -= packet_out_sent_sz(packet_out);
--ctl->sc_n_in_flight_all;
}
#endif
send_ctl_destroy_packet(ctl, packet_out);
}
assert(0 == ctl->sc_n_in_flight_all);
assert(0 == ctl->sc_bytes_unacked_all);
while ((packet_out = TAILQ_FIRST(&ctl->sc_lost_packets)))
{
TAILQ_REMOVE(&ctl->sc_lost_packets, packet_out, po_next);
packet_out->po_flags &= ~PO_LOST;
send_ctl_destroy_packet(ctl, packet_out);
}
while ((packet_out = TAILQ_FIRST(&ctl->sc_0rtt_stash)))
{
TAILQ_REMOVE(&ctl->sc_0rtt_stash, packet_out, po_next);
send_ctl_destroy_packet(ctl, packet_out);
}
for (n = 0; n < sizeof(ctl->sc_buffered_packets) /
sizeof(ctl->sc_buffered_packets[0]); ++n)
{
for (packet_out = TAILQ_FIRST(&ctl->sc_buffered_packets[n].bpq_packets);
packet_out; packet_out = next)
{
next = TAILQ_NEXT(packet_out, po_next);
send_ctl_destroy_packet(ctl, packet_out);
}
}
if (ctl->sc_flags & SC_PACE)
lsquic_pacer_cleanup(&ctl->sc_pacer);
ctl->sc_ci->cci_cleanup(CGP(ctl));
if (ctl->sc_flags & SC_CLEANUP_BBR)
{
assert(ctl->sc_ci == &lsquic_cong_cubic_if);
lsquic_cong_bbr_if.cci_cleanup(&ctl->sc_adaptive_cc.acc_bbr);
}
#if LSQUIC_SEND_STATS
LSQ_NOTICE("stats: n_total_sent: %u; n_resent: %u; n_delayed: %u",
ctl->sc_stats.n_total_sent, ctl->sc_stats.n_resent,
ctl->sc_stats.n_delayed);
#endif
free(ctl->sc_token);
}
static unsigned
send_ctl_retx_bytes_out (const struct lsquic_send_ctl *ctl)
{
return ctl->sc_bytes_scheduled
+ ctl->sc_bytes_unacked_retx
;
}
static unsigned
send_ctl_all_bytes_out (const struct lsquic_send_ctl *ctl)
{
return ctl->sc_bytes_scheduled
+ ctl->sc_bytes_unacked_all
;
}
int
lsquic_send_ctl_pacer_blocked (struct lsquic_send_ctl *ctl)
{
#ifdef NDEBUG
return (ctl->sc_flags & SC_PACE)
&& !lsquic_pacer_can_schedule(&ctl->sc_pacer,
ctl->sc_n_in_flight_all);
#else
if (ctl->sc_flags & SC_PACE)
{
const int blocked = !lsquic_pacer_can_schedule(&ctl->sc_pacer,
ctl->sc_n_in_flight_all);
LSQ_DEBUG("pacer blocked: %d, in_flight_all: %u", blocked,
ctl->sc_n_in_flight_all);
return blocked;
}
else
return 0;
#endif
}
static int
send_ctl_can_send (struct lsquic_send_ctl *ctl)
{
uint64_t cwnd = ctl->sc_ci->cci_get_cwnd(CGP(ctl));
const unsigned n_out = send_ctl_all_bytes_out(ctl);
LSQ_DEBUG("%s: sc_flags: 0x%X, b_out: %u = (%u + %u); b_retx: %u; cwnd: %"PRIu64
"; ccfc: %"PRIu64"/%"PRIu64"; n_scheduled: %d, n_in_flight_all: %d"
"; pa_burst: %d; pa_next: %lu; pa_now: %lu"
, __func__, ctl->sc_flags,
n_out, ctl->sc_bytes_unacked_all, ctl->sc_bytes_scheduled,
ctl->sc_bytes_unacked_retx, cwnd,
ctl->sc_conn_pub->conn_cap.cc_sent,
ctl->sc_conn_pub->conn_cap.cc_max,
ctl->sc_n_scheduled, ctl->sc_n_in_flight_all,
ctl->sc_pacer.pa_burst_tokens,
ctl->sc_pacer.pa_next_sched, ctl->sc_pacer.pa_now);
if (ctl->sc_flags & SC_PACE)
{
if (n_out >= cwnd)
return 0;
if (lsquic_pacer_can_schedule(&ctl->sc_pacer,
ctl->sc_n_scheduled + ctl->sc_n_in_flight_all))
return 1;
if (ctl->sc_flags & SC_SCHED_TICK)
{
ctl->sc_flags &= ~SC_SCHED_TICK;
lsquic_engine_add_conn_to_attq(ctl->sc_enpub,
ctl->sc_conn_pub->lconn, lsquic_pacer_next_sched(&ctl->sc_pacer),
AEW_PACER);
}
return 0;
}
else
return n_out < cwnd;
}
static int
send_ctl_can_send_pre_hsk (struct lsquic_send_ctl *ctl)
{
unsigned bytes_in, bytes_out;
bytes_in = ctl->sc_conn_pub->bytes_in;
bytes_out = ctl->sc_conn_pub->bytes_out + ctl->sc_bytes_scheduled;
if (bytes_out >= bytes_in * 2 + bytes_in / 2 /* This should work out
to around 3 on average */)
{
LSQ_DEBUG("%s: amplification block: %u bytes in, %u bytes out",
__func__, bytes_in, bytes_out);
return 0;
}
else
return send_ctl_can_send(ctl);
}
#ifndef NDEBUG
#if __GNUC__
__attribute__((weak))
#endif
#endif
int
lsquic_send_ctl_can_send (struct lsquic_send_ctl *ctl)
{
return ctl->sc_can_send(ctl);
}
/* Like lsquic_send_ctl_can_send(), but no mods */
static int
send_ctl_could_send (const struct lsquic_send_ctl *ctl)
{
uint64_t cwnd;
unsigned n_out;
if ((ctl->sc_flags & SC_PACE) && lsquic_pacer_delayed(&ctl->sc_pacer))
return 0;
cwnd = ctl->sc_ci->cci_get_cwnd(CGP(ctl));
n_out = send_ctl_all_bytes_out(ctl);
return n_out < cwnd;
}
void
lsquic_send_ctl_maybe_app_limited (struct lsquic_send_ctl *ctl,
const struct network_path *path)
{
const struct lsquic_packet_out *packet_out;
packet_out = lsquic_send_ctl_last_scheduled(ctl, PNS_APP, path, 0);
if ((packet_out && lsquic_packet_out_avail(packet_out) > 10)
|| send_ctl_could_send(ctl))
{
LSQ_DEBUG("app-limited");
ctl->sc_flags |= SC_APP_LIMITED;
}
}
static void
send_ctl_expire (struct lsquic_send_ctl *ctl, enum packnum_space pns,
enum expire_filter filter)
{
lsquic_packet_out_t *packet_out, *next;
int n_resubmitted;
static const char *const filter_type2str[] = {
[EXFI_ALL] = "all",
[EXFI_HSK] = "handshake",
[EXFI_LAST] = "last",
};
switch (filter)
{
case EXFI_ALL:
n_resubmitted = 0;
for (packet_out = TAILQ_FIRST(&ctl->sc_unacked_packets[pns]);
packet_out; packet_out = next)
{
next = TAILQ_NEXT(packet_out, po_next);
if (0 == (packet_out->po_flags & (PO_LOSS_REC|PO_POISON)))
n_resubmitted += send_ctl_handle_lost_packet(ctl, packet_out,
&next);
}
break;
case EXFI_HSK:
n_resubmitted = 0;
for (packet_out = TAILQ_FIRST(&ctl->sc_unacked_packets[pns]); packet_out;
packet_out = next)
{
next = TAILQ_NEXT(packet_out, po_next);
if (packet_out->po_flags & PO_HELLO)
n_resubmitted += send_ctl_handle_lost_packet(ctl, packet_out,
&next);
}
break;
default:
assert(filter == EXFI_LAST);
packet_out = send_ctl_last_unacked_retx_packet(ctl, pns);
if (packet_out)
n_resubmitted = send_ctl_handle_lost_packet(ctl, packet_out, NULL);
else
n_resubmitted = 0;
break;
}
LSQ_DEBUG("consider %s packets lost: %d resubmitted",
filter_type2str[filter], n_resubmitted);
}
void
lsquic_send_ctl_expire_all (lsquic_send_ctl_t *ctl)
{
enum packnum_space pns;
for (pns = ctl->sc_flags & SC_IETF ? PNS_INIT : PNS_APP; pns < N_PNS; ++pns)
{
lsquic_alarmset_unset(ctl->sc_alset, AL_RETX_INIT + pns);
send_ctl_expire(ctl, pns, EXFI_ALL);
}
lsquic_send_ctl_sanity_check(ctl);
}
#ifndef NDEBUG
void
lsquic_send_ctl_do_sanity_check (const struct lsquic_send_ctl *ctl)
{
const struct lsquic_packet_out *packet_out;
lsquic_packno_t prev_packno;
int prev_packno_set;
unsigned count, bytes;
enum packnum_space pns;
#if _MSC_VER
prev_packno = 0;
#endif
count = 0, bytes = 0;
for (pns = PNS_INIT; pns <= PNS_APP; ++pns)
{
prev_packno_set = 0;
TAILQ_FOREACH(packet_out, &ctl->sc_unacked_packets[pns], po_next)
{
if (prev_packno_set)
assert(packet_out->po_packno > prev_packno);
else
{
prev_packno = packet_out->po_packno;
prev_packno_set = 1;
}
if (0 == (packet_out->po_flags & (PO_LOSS_REC|PO_POISON)))
{
bytes += packet_out_sent_sz(packet_out);
++count;
}
}
}
assert(count == ctl->sc_n_in_flight_all);
assert(bytes == ctl->sc_bytes_unacked_all);
count = 0, bytes = 0;
TAILQ_FOREACH(packet_out, &ctl->sc_scheduled_packets, po_next)
{
assert(packet_out->po_flags & PO_SCHED);
bytes += packet_out_total_sz(packet_out);
++count;
}
assert(count == ctl->sc_n_scheduled);
assert(bytes == ctl->sc_bytes_scheduled);
}
#endif
void
lsquic_send_ctl_scheduled_one (lsquic_send_ctl_t *ctl,
lsquic_packet_out_t *packet_out)
{
#ifndef NDEBUG
const lsquic_packet_out_t *last;
last = TAILQ_LAST(&ctl->sc_scheduled_packets, lsquic_packets_tailq);
if (last)
assert((last->po_flags & PO_REPACKNO) ||
last->po_packno < packet_out->po_packno);
#endif
if (ctl->sc_flags & SC_PACE)
{
unsigned n_out = ctl->sc_n_in_flight_retx + ctl->sc_n_scheduled;
lsquic_pacer_packet_scheduled(&ctl->sc_pacer, n_out,
send_ctl_in_recovery(ctl), send_ctl_transfer_time, ctl);
}
send_ctl_sched_append(ctl, packet_out);
}
/* Wrapper is used to reset the counter when it's been too long */
static unsigned
send_ctl_get_n_consec_rtos (struct lsquic_send_ctl *ctl)
{
lsquic_time_t timeout;
if (ctl->sc_n_consec_rtos)
{
timeout = calculate_packet_rto(ctl);
if (ctl->sc_last_rto_time + timeout < ctl->sc_last_sent_time)
{
ctl->sc_n_consec_rtos = 0;
LSQ_DEBUG("reset RTO counter after %"PRIu64" usec",
ctl->sc_last_sent_time - ctl->sc_last_rto_time);
}
}
return ctl->sc_n_consec_rtos;
}
/* This mimics the logic in lsquic_send_ctl_next_packet_to_send(): we want
* to check whether the first scheduled packet cannot be sent.
*/
int
lsquic_send_ctl_sched_is_blocked (struct lsquic_send_ctl *ctl)
{
const lsquic_packet_out_t *packet_out
= TAILQ_FIRST(&ctl->sc_scheduled_packets);
return send_ctl_get_n_consec_rtos(ctl)
&& 0 == ctl->sc_next_limit
&& packet_out
&& !(packet_out->po_frame_types & (1 << QUIC_FRAME_ACK));
}
static void
send_ctl_maybe_zero_pad (struct lsquic_send_ctl *ctl,
struct lsquic_packet_out *initial_packet, size_t limit)
{
struct lsquic_packet_out *packet_out;
size_t cum_size, size;
cum_size = packet_out_total_sz(initial_packet);
if (cum_size >= limit)
{
LSQ_DEBUG("packet size %zu larger than %zu-byte limit: not "
"zero-padding", cum_size, limit);
return;
}
TAILQ_FOREACH(packet_out, &ctl->sc_scheduled_packets, po_next)
{
size = packet_out_total_sz(packet_out);
if (cum_size + size > limit)
break;
cum_size += size;
if (HETY_SHORT == packet_out->po_header_type)
break;
}
LSQ_DEBUG("cum_size: %zu; limit: %zu", cum_size, limit);
assert(cum_size <= limit);
size = limit - cum_size;
if (size > lsquic_packet_out_avail(initial_packet))
size = lsquic_packet_out_avail(initial_packet);
if (size)
{
memset(initial_packet->po_data + initial_packet->po_data_sz, 0, size);
initial_packet->po_data_sz += size;
initial_packet->po_frame_types |= QUIC_FTBIT_PADDING;
}
LSQ_DEBUG("Added %zu bytes of PADDING to packet #%"PRIu64, size,
initial_packet->po_packno);
}
/* Predict whether lsquic_send_ctl_next_packet_to_send() will return a
* packet by mimicking its logic. Returns true if packet will be returned,
* false otherwise.
*/
int
lsquic_send_ctl_next_packet_to_send_predict (struct lsquic_send_ctl *ctl)
{
const struct lsquic_packet_out *packet_out;
unsigned n_rtos;
n_rtos = ~0u;
TAILQ_FOREACH(packet_out, &ctl->sc_scheduled_packets, po_next)
{
if (!(packet_out->po_frame_types
& ((1 << QUIC_FRAME_ACK) | (1 << QUIC_FRAME_CRYPTO)))
&& 0 == ctl->sc_next_limit
&& 0 != (n_rtos == ~0u ? /* Initialize once */
(n_rtos = send_ctl_get_n_consec_rtos(ctl)) : n_rtos))
{
LSQ_DEBUG("send prediction: no, n_rtos: %u", n_rtos);
return 0;
}
if ((packet_out->po_flags & PO_REPACKNO)
&& packet_out->po_regen_sz == packet_out->po_data_sz
&& packet_out->po_frame_types != QUIC_FTBIT_PATH_CHALLENGE)
{
LSQ_DEBUG("send prediction: packet #%"PRIu64" would be dropped, "
"continue", packet_out->po_packno);
continue;
}
LSQ_DEBUG("send prediction: yes, packet #%"PRIu64", flags %u, frames 0x%X",
packet_out->po_packno, (unsigned) packet_out->po_flags,
(unsigned) packet_out->po_frame_types);
return 1;
}
LSQ_DEBUG("send prediction: no, no matching scheduled packets");
return 0;
}
lsquic_packet_out_t *
lsquic_send_ctl_next_packet_to_send (struct lsquic_send_ctl *ctl,
const struct to_coal *to_coal)
{
lsquic_packet_out_t *packet_out;
size_t size;
int dec_limit;
get_packet:
packet_out = TAILQ_FIRST(&ctl->sc_scheduled_packets);
if (!packet_out)
{
LSQ_DEBUG("no more scheduled packets");
return NULL;
}
/* Note: keep logic in this function and in
* lsquic_send_ctl_next_packet_to_send_predict() in synch.
*/
if (!(packet_out->po_frame_types
& ((1 << QUIC_FRAME_ACK) | (1 << QUIC_FRAME_CRYPTO)))
&& send_ctl_get_n_consec_rtos(ctl))
{
if (ctl->sc_next_limit)
dec_limit = 1;
else
{
LSQ_DEBUG("sc_n_consec_rtos: %d, sc_next_limit is 0",
ctl->sc_n_consec_rtos);
return NULL;
}
}
else
dec_limit = 0;
if (packet_out->po_flags & PO_REPACKNO)
{
if (packet_out->po_regen_sz < packet_out->po_data_sz
&& packet_out->po_frame_types != QUIC_FTBIT_PADDING)
{
update_for_resending(ctl, packet_out);
packet_out->po_flags &= ~PO_REPACKNO;
}
else
{
LSQ_DEBUG("Dropping packet #%"PRIu64" from scheduled queue",
packet_out->po_packno);
send_ctl_sched_remove(ctl, packet_out);
send_ctl_destroy_chain(ctl, packet_out, NULL);
send_ctl_destroy_packet(ctl, packet_out);
goto get_packet;
}
}
if (UNLIKELY(to_coal != NULL))
{
/* From [draft-ietf-quic-transport-30], Section-12.2:
" Senders MUST NOT coalesce QUIC packets with different connection
" IDs into a single UDP datagram.
*/
if (packet_out_total_sz(packet_out) + to_coal->prev_sz_sum
> SC_PACK_SIZE(ctl)
|| !lsquic_packet_out_equal_dcids(to_coal->prev_packet, packet_out))
return NULL;
LSQ_DEBUG("packet #%"PRIu64" (%zu bytes) will be tacked on to "
"previous packet(s) (%zu bytes) (coalescing)",
packet_out->po_packno, packet_out_total_sz(packet_out),
to_coal->prev_sz_sum);
size = to_coal->prev_sz_sum;
}
else
size = 0;
send_ctl_sched_remove(ctl, packet_out);
if (dec_limit)
{
--ctl->sc_next_limit;
packet_out->po_lflags |= POL_LIMITED;
}
else
packet_out->po_lflags &= ~POL_LIMITED;
if (UNLIKELY(packet_out->po_header_type == HETY_INITIAL)
&& (!(ctl->sc_conn_pub->lconn->cn_flags & LSCONN_SERVER)
|| (packet_out->po_frame_types
& IQUIC_FRAME_ACKABLE_MASK))
&& size < 1200)
{
send_ctl_maybe_zero_pad(ctl, packet_out, 1200 - size);
}
if (ctl->sc_flags & SC_QL_BITS)
{
packet_out->po_lflags |= POL_LOG_QL_BITS;
if (ctl->sc_loss_count)
{
--ctl->sc_loss_count;
packet_out->po_lflags |= POL_LOSS_BIT;
}
else
packet_out->po_lflags &= ~POL_LOSS_BIT;
if (packet_out->po_header_type == HETY_SHORT)
{
if (ctl->sc_gap + 1 == packet_out->po_packno)
++ctl->sc_square_count;
if (ctl->sc_square_count++ & 64)
packet_out->po_lflags |= POL_SQUARE_BIT;
else
packet_out->po_lflags &= ~POL_SQUARE_BIT;
}
}
return packet_out;
}
void
lsquic_send_ctl_delayed_one (lsquic_send_ctl_t *ctl,
lsquic_packet_out_t *packet_out)
{
send_ctl_sched_prepend(ctl, packet_out);
if (packet_out->po_lflags & POL_LIMITED)
++ctl->sc_next_limit;
LSQ_DEBUG("packet #%"PRIu64" has been delayed", packet_out->po_packno);
#if LSQUIC_SEND_STATS
++ctl->sc_stats.n_delayed;
#endif
if (packet_out->po_lflags & POL_LOSS_BIT)
++ctl->sc_loss_count;
if ((ctl->sc_flags & SC_QL_BITS)
&& packet_out->po_header_type == HETY_SHORT)
ctl->sc_square_count -= 1 + (ctl->sc_gap + 1 == packet_out->po_packno);
}
int
lsquic_send_ctl_have_outgoing_stream_frames (const lsquic_send_ctl_t *ctl)
{
const lsquic_packet_out_t *packet_out;
TAILQ_FOREACH(packet_out, &ctl->sc_scheduled_packets, po_next)
if (packet_out->po_frame_types &
((1 << QUIC_FRAME_STREAM) | (1 << QUIC_FRAME_RST_STREAM)))
return 1;
return 0;
}
int
lsquic_send_ctl_have_outgoing_retx_frames (const lsquic_send_ctl_t *ctl)
{
const lsquic_packet_out_t *packet_out;
TAILQ_FOREACH(packet_out, &ctl->sc_scheduled_packets, po_next)
if (packet_out->po_frame_types & ctl->sc_retx_frames)
return 1;
return 0;
}
static int
send_ctl_set_packet_out_token (const struct lsquic_send_ctl *ctl,
struct lsquic_packet_out *packet_out)
{
unsigned char *token;
token = malloc(ctl->sc_token_sz);
if (!token)
{
LSQ_WARN("malloc failed: cannot set initial token");
return -1;
}
memcpy(token, ctl->sc_token, ctl->sc_token_sz);
packet_out->po_token = token;
packet_out->po_token_len = ctl->sc_token_sz;
packet_out->po_flags |= PO_NONCE;
LSQ_DEBUG("set initial token on packet");
return 0;
}
static lsquic_packet_out_t *
send_ctl_allocate_packet (struct lsquic_send_ctl *ctl, enum packno_bits bits,
unsigned need_at_least, enum packnum_space pns,
const struct network_path *path)
{
static const enum header_type pns2hety[] =
{
[PNS_INIT] = HETY_INITIAL,
[PNS_HSK] = HETY_HANDSHAKE,
[PNS_APP] = HETY_SHORT,
};
lsquic_packet_out_t *packet_out;
packet_out = lsquic_packet_out_new(&ctl->sc_enpub->enp_mm,
ctl->sc_conn_pub->packet_out_malo,
!(ctl->sc_flags & SC_TCID0), ctl->sc_conn_pub->lconn, bits,
ctl->sc_ver_neg->vn_tag, NULL, path, pns2hety[pns]);
if (!packet_out)
return NULL;
if (need_at_least && lsquic_packet_out_avail(packet_out) < need_at_least)
{ /* This should never happen, this is why this check is performed at
* this level and not lower, before the packet is actually allocated.
*/
LSQ_ERROR("wanted to allocate packet with at least %u bytes of "
"payload, but only got %u bytes (mtu: %u bytes)", need_at_least,
lsquic_packet_out_avail(packet_out), SC_PACK_SIZE(ctl));
send_ctl_destroy_packet(ctl, packet_out);
return NULL;
}
if (UNLIKELY(pns != PNS_APP))
{
if (pns == PNS_INIT)
{
packet_out->po_header_type = HETY_INITIAL;
if (ctl->sc_token)
{
(void) send_ctl_set_packet_out_token(ctl, packet_out);
if (packet_out->po_n_alloc > packet_out->po_token_len)
packet_out->po_n_alloc -= packet_out->po_token_len;
else
{
/* XXX fail earlier: when retry token is parsed out */
LSQ_INFO("token is too long: cannot allocate packet");
return NULL;
}
}
}
else
packet_out->po_header_type = HETY_HANDSHAKE;
}
lsquic_packet_out_set_pns(packet_out, pns);
packet_out->po_lflags |= ctl->sc_ecn << POECN_SHIFT;
packet_out->po_loss_chain = packet_out;
return packet_out;
}
lsquic_packet_out_t *
lsquic_send_ctl_new_packet_out (lsquic_send_ctl_t *ctl, unsigned need_at_least,
enum packnum_space pns, const struct network_path *path)
{
lsquic_packet_out_t *packet_out;
enum packno_bits bits;
bits = lsquic_send_ctl_packno_bits(ctl, pns);
packet_out = send_ctl_allocate_packet(ctl, bits, need_at_least, pns, path);
if (!packet_out)
return NULL;
packet_out->po_packno = send_ctl_next_packno(ctl);
LSQ_DEBUG("created packet #%"PRIu64, packet_out->po_packno);
EV_LOG_PACKET_CREATED(LSQUIC_LOG_CONN_ID, packet_out);
return packet_out;
}
struct lsquic_packet_out *
lsquic_send_ctl_last_scheduled (struct lsquic_send_ctl *ctl,
enum packnum_space pns, const struct network_path *path,
int regen_match)
{
struct lsquic_packet_out *packet_out;
if (0 == regen_match)
{
TAILQ_FOREACH_REVERSE(packet_out, &ctl->sc_scheduled_packets,
lsquic_packets_tailq, po_next)
if (pns == lsquic_packet_out_pns(packet_out)
&& path == packet_out->po_path)
return packet_out;
}
else
{
TAILQ_FOREACH_REVERSE(packet_out, &ctl->sc_scheduled_packets,
lsquic_packets_tailq, po_next)
if (pns == lsquic_packet_out_pns(packet_out)
&& packet_out->po_regen_sz == packet_out->po_data_sz
&& path == packet_out->po_path)
return packet_out;
}
return NULL;
}
/* Do not use for STREAM frames
*/
lsquic_packet_out_t *
lsquic_send_ctl_get_writeable_packet (lsquic_send_ctl_t *ctl,
enum packnum_space pns, unsigned need_at_least,
const struct network_path *path, int regen_match, int *is_err)
{
lsquic_packet_out_t *packet_out;
assert(need_at_least > 0);
packet_out = lsquic_send_ctl_last_scheduled(ctl, pns, path, regen_match);
if (packet_out
&& !(packet_out->po_flags & (PO_MINI|PO_STREAM_END|PO_RETX))
&& lsquic_packet_out_avail(packet_out) >= need_at_least)
{
return packet_out;
}
if (!lsquic_send_ctl_can_send(ctl))
{
if (is_err)
*is_err = 0;
return NULL;
}
packet_out = lsquic_send_ctl_new_packet_out(ctl, need_at_least, pns, path);
if (packet_out)
{
lsquic_packet_out_set_pns(packet_out, pns);
lsquic_send_ctl_scheduled_one(ctl, packet_out);
}
else if (is_err)
*is_err = 1;
return packet_out;
}
struct lsquic_packet_out *
lsquic_send_ctl_get_packet_for_crypto (struct lsquic_send_ctl *ctl,
unsigned need_at_least, enum packnum_space pns,
const struct network_path *path)
{
struct lsquic_packet_out *packet_out;
assert(lsquic_send_ctl_schedule_stream_packets_immediately(ctl));
assert(need_at_least > 0);
packet_out = lsquic_send_ctl_last_scheduled(ctl, pns, path, 0);
if (packet_out
&& !(packet_out->po_flags & (PO_STREAM_END|PO_RETX))
&& lsquic_packet_out_avail(packet_out) >= need_at_least)
{
return packet_out;
}
if (!lsquic_send_ctl_can_send(ctl))
return NULL;
packet_out = lsquic_send_ctl_new_packet_out(ctl, need_at_least, pns, path);
if (!packet_out)
return NULL;
lsquic_send_ctl_scheduled_one(ctl, packet_out);
return packet_out;
}
static void
update_for_resending (lsquic_send_ctl_t *ctl, lsquic_packet_out_t *packet_out)
{
lsquic_packno_t oldno, packno;
/* When the packet is resent, it uses the same number of bytes to encode
* the packet number as the original packet. This follows the reference
* implementation.
*/
oldno = packet_out->po_packno;
packno = send_ctl_next_packno(ctl);
packet_out->po_flags &= ~PO_SENT_SZ;
assert(packet_out->po_frame_types & ~BQUIC_FRAME_REGEN_MASK);
packet_out->po_frame_types &= ~BQUIC_FRAME_REGEN_MASK;
packet_out->po_packno = packno;
lsquic_packet_out_set_ecn(packet_out, ctl->sc_ecn);
if (ctl->sc_ver_neg->vn_tag)
{
assert(packet_out->po_flags & PO_VERSION); /* It can only disappear */
packet_out->po_ver_tag = *ctl->sc_ver_neg->vn_tag;
}
assert(packet_out->po_regen_sz < packet_out->po_data_sz);
if (packet_out->po_regen_sz)
{
if (packet_out->po_flags & PO_SCHED)
ctl->sc_bytes_scheduled -= packet_out->po_regen_sz;
lsquic_packet_out_chop_regen(packet_out);
}
EV_LOG_CONN_EVENT(LSQUIC_LOG_CONN_ID, "schedule resend, repackage packet "
"#%"PRIu64" -> #%"PRIu64, oldno, packno);
}
unsigned
lsquic_send_ctl_reschedule_packets (lsquic_send_ctl_t *ctl)
{
lsquic_packet_out_t *packet_out;
unsigned n = 0;
while ((packet_out = send_ctl_next_lost(ctl)))
{
assert(packet_out->po_regen_sz < packet_out->po_data_sz);
++n;
#if LSQUIC_CONN_STATS
++ctl->sc_conn_pub->conn_stats->out.retx_packets;
#endif
update_for_resending(ctl, packet_out);
lsquic_send_ctl_scheduled_one(ctl, packet_out);
}
if (n)
LSQ_DEBUG("rescheduled %u packets", n);
return n;
}
void
lsquic_send_ctl_set_tcid0 (lsquic_send_ctl_t *ctl, int tcid0)
{
if (tcid0)
{
LSQ_INFO("set TCID flag");
ctl->sc_flags |= SC_TCID0;
}
else
{
LSQ_INFO("unset TCID flag");
ctl->sc_flags &= ~SC_TCID0;
}
}
/* The controller elides this STREAM frames of stream `stream_id' from
* scheduled and buffered packets. If a packet becomes empty as a result,
* it is dropped.
*
* Packets on other queues do not need to be processed: unacked packets
* have already been sent, and lost packets' reset stream frames will be
* elided in due time.
*/
void
lsquic_send_ctl_elide_stream_frames (lsquic_send_ctl_t *ctl,
lsquic_stream_id_t stream_id)
{
struct lsquic_packet_out *packet_out, *next;
unsigned n, adj;
int dropped;
dropped = 0;
#ifdef WIN32
next = NULL;
#endif
for (packet_out = TAILQ_FIRST(&ctl->sc_scheduled_packets); packet_out;
packet_out = next)
{
next = TAILQ_NEXT(packet_out, po_next);
if ((packet_out->po_frame_types & (1 << QUIC_FRAME_STREAM))
&& 0 == (packet_out->po_flags & PO_MINI))
{
adj = lsquic_packet_out_elide_reset_stream_frames(packet_out,
stream_id);
ctl->sc_bytes_scheduled -= adj;
if (0 == packet_out->po_frame_types)
{
LSQ_DEBUG("cancel packet #%"PRIu64" after eliding frames for "
"stream %"PRIu64, packet_out->po_packno, stream_id);
send_ctl_sched_remove(ctl, packet_out);
send_ctl_destroy_chain(ctl, packet_out, NULL);
send_ctl_destroy_packet(ctl, packet_out);
++dropped;
}
}
}
if (dropped)
lsquic_send_ctl_reset_packnos(ctl);
for (n = 0; n < sizeof(ctl->sc_buffered_packets) /
sizeof(ctl->sc_buffered_packets[0]); ++n)
{
for (packet_out = TAILQ_FIRST(&ctl->sc_buffered_packets[n].bpq_packets);
packet_out; packet_out = next)
{
next = TAILQ_NEXT(packet_out, po_next);
if (packet_out->po_frame_types & (1 << QUIC_FRAME_STREAM))
{
lsquic_packet_out_elide_reset_stream_frames(packet_out, stream_id);
if (0 == packet_out->po_frame_types)
{
LSQ_DEBUG("cancel buffered packet in queue #%u after eliding "
"frames for stream %"PRIu64, n, stream_id);
TAILQ_REMOVE(&ctl->sc_buffered_packets[n].bpq_packets,
packet_out, po_next);
--ctl->sc_buffered_packets[n].bpq_count;
send_ctl_destroy_packet(ctl, packet_out);
LSQ_DEBUG("Elide packet from buffered queue #%u; count: %u",
n, ctl->sc_buffered_packets[n].bpq_count);
}
}
}
}
}
/* Count how many packets will remain after the squeezing performed by
* lsquic_send_ctl_squeeze_sched(). This is the number of delayed data
* packets.
*/
#ifndef NDEBUG
#if __GNUC__
__attribute__((weak))
#endif
#endif
int
lsquic_send_ctl_have_delayed_packets (const lsquic_send_ctl_t *ctl)
{
const struct lsquic_packet_out *packet_out;
TAILQ_FOREACH(packet_out, &ctl->sc_scheduled_packets, po_next)
if (packet_out->po_regen_sz < packet_out->po_data_sz)
return 1;
return 0;
}
#ifndef NDEBUG
static void
send_ctl_log_packet_q (const lsquic_send_ctl_t *ctl, const char *prefix,
const struct lsquic_packets_tailq *tailq)
{
const lsquic_packet_out_t *packet_out;
unsigned n_packets;
char *buf;
size_t bufsz;
int off;
n_packets = 0;
TAILQ_FOREACH(packet_out, tailq, po_next)
++n_packets;
if (n_packets == 0)
{
LSQ_DEBUG("%s: [<empty set>]", prefix);
return;
}
bufsz = n_packets * sizeof("18446744073709551615" /* UINT64_MAX */);
buf = malloc(bufsz);
if (!buf)
{
LSQ_ERROR("%s: malloc: %s", __func__, strerror(errno));
return;
}
off = 0;
TAILQ_FOREACH(packet_out, tailq, po_next)
{
if (off)
buf[off++] = ' ';
off += sprintf(buf + off, "%"PRIu64, packet_out->po_packno);
}
LSQ_DEBUG("%s: [%s]", prefix, buf);
free(buf);
}
#define LOG_PACKET_Q(prefix, queue) do { \
if (LSQ_LOG_ENABLED(LSQ_LOG_DEBUG)) \
send_ctl_log_packet_q(ctl, queue, prefix); \
} while (0)
#else
#define LOG_PACKET_Q(p, q)
#endif
int
lsquic_send_ctl_squeeze_sched (lsquic_send_ctl_t *ctl)
{
struct lsquic_packet_out *packet_out, *next;
int dropped;
#ifndef NDEBUG
int pre_squeeze_logged = 0;
#endif
dropped = 0;
for (packet_out = TAILQ_FIRST(&ctl->sc_scheduled_packets); packet_out;
packet_out = next)
{
next = TAILQ_NEXT(packet_out, po_next);
if (packet_out->po_regen_sz < packet_out->po_data_sz
|| packet_out->po_frame_types == QUIC_FTBIT_PATH_CHALLENGE)
{
if (packet_out->po_flags & PO_ENCRYPTED)
send_ctl_return_enc_data(ctl, packet_out);
}
else
{
#ifndef NDEBUG
/* Log the whole list before we squeeze for the first time */
if (!pre_squeeze_logged++)
LOG_PACKET_Q(&ctl->sc_scheduled_packets,
"scheduled packets before squeezing");
#endif
send_ctl_sched_remove(ctl, packet_out);
LSQ_DEBUG("Dropping packet #%"PRIu64" from scheduled queue",
packet_out->po_packno);
send_ctl_destroy_chain(ctl, packet_out, NULL);
send_ctl_destroy_packet(ctl, packet_out);
++dropped;
}
}
if (dropped)
lsquic_send_ctl_reset_packnos(ctl);
#ifndef NDEBUG
if (pre_squeeze_logged)
LOG_PACKET_Q(&ctl->sc_scheduled_packets,
"scheduled packets after squeezing");
else if (ctl->sc_n_scheduled > 0)
LOG_PACKET_Q(&ctl->sc_scheduled_packets, "delayed packets");
#endif
return ctl->sc_n_scheduled > 0;
}
void
lsquic_send_ctl_reset_packnos (lsquic_send_ctl_t *ctl)
{
struct lsquic_packet_out *packet_out;
ctl->sc_cur_packno = lsquic_senhist_largest(&ctl->sc_senhist);
TAILQ_FOREACH(packet_out, &ctl->sc_scheduled_packets, po_next)
packet_out->po_flags |= PO_REPACKNO;
}
void
lsquic_send_ctl_ack_to_front (struct lsquic_send_ctl *ctl, unsigned n_acks)
{
struct lsquic_packet_out *ack_packet;
assert(n_acks > 0);
assert(ctl->sc_n_scheduled > n_acks); /* Otherwise, why is this called? */
for ( ; n_acks > 0; --n_acks)
{
ack_packet = TAILQ_LAST(&ctl->sc_scheduled_packets, lsquic_packets_tailq);
assert(ack_packet->po_frame_types & (1 << QUIC_FRAME_ACK));
TAILQ_REMOVE(&ctl->sc_scheduled_packets, ack_packet, po_next);
TAILQ_INSERT_HEAD(&ctl->sc_scheduled_packets, ack_packet, po_next);
}
}
void
lsquic_send_ctl_drop_scheduled (lsquic_send_ctl_t *ctl)
{
struct lsquic_packet_out *packet_out, *next;
unsigned n;
n = 0;
for (packet_out = TAILQ_FIRST(&ctl->sc_scheduled_packets); packet_out;
packet_out = next)
{
next = TAILQ_NEXT(packet_out, po_next);
if (0 == (packet_out->po_flags & PO_HELLO))
{
send_ctl_sched_remove(ctl, packet_out);
send_ctl_destroy_chain(ctl, packet_out, NULL);
send_ctl_destroy_packet(ctl, packet_out);
++n;
}
}
ctl->sc_senhist.sh_flags |= SH_GAP_OK;
LSQ_DEBUG("dropped %u scheduled packet%s (%u left)", n, n != 1 ? "s" : "",
ctl->sc_n_scheduled);
}
#ifdef NDEBUG
static
#elif __GNUC__
__attribute__((weak))
#endif
enum buf_packet_type
lsquic_send_ctl_determine_bpt (lsquic_send_ctl_t *ctl,
const lsquic_stream_t *stream)
{
const lsquic_stream_t *other_stream;
struct lsquic_hash_elem *el;
struct lsquic_hash *all_streams;
all_streams = ctl->sc_conn_pub->all_streams;
for (el = lsquic_hash_first(all_streams); el;
el = lsquic_hash_next(all_streams))
{
other_stream = lsquic_hashelem_getdata(el);
if (other_stream != stream
&& (!(other_stream->stream_flags & STREAM_U_WRITE_DONE))
&& !lsquic_stream_is_critical(other_stream)
&& other_stream->sm_priority < stream->sm_priority)
return BPT_OTHER_PRIO;
}
return BPT_HIGHEST_PRIO;
}
static enum buf_packet_type
send_ctl_lookup_bpt (lsquic_send_ctl_t *ctl,
const struct lsquic_stream *stream)
{
if (ctl->sc_cached_bpt.stream_id != stream->id)
{
ctl->sc_cached_bpt.stream_id = stream->id;
ctl->sc_cached_bpt.packet_type =
lsquic_send_ctl_determine_bpt(ctl, stream);
}
return ctl->sc_cached_bpt.packet_type;
}
static unsigned
send_ctl_max_bpq_count (const lsquic_send_ctl_t *ctl,
enum buf_packet_type packet_type)
{
unsigned long cwnd;
unsigned count;
switch (packet_type)
{
case BPT_OTHER_PRIO:
return MAX_BPQ_COUNT;
case BPT_HIGHEST_PRIO:
default: /* clang does not complain about absence of `default'... */
count = ctl->sc_n_scheduled + ctl->sc_n_in_flight_retx;
cwnd = ctl->sc_ci->cci_get_cwnd(CGP(ctl));
if (count < cwnd / SC_PACK_SIZE(ctl))
{
count = cwnd / SC_PACK_SIZE(ctl) - count;
if (count > MAX_BPQ_COUNT)
return count;
}
return MAX_BPQ_COUNT;
}
}
/* If error is returned, `src' is not modified */
static int
send_ctl_move_ack (struct lsquic_send_ctl *ctl, struct lsquic_packet_out *dst,
struct lsquic_packet_out *src)
{
struct packet_out_frec_iter pofi;
const struct frame_rec *frec;
assert(dst->po_data_sz == 0);
/* This checks that we only ever expect to move an ACK frame from one
* buffered packet to another. We don't generate any other regen frame
* types in buffered packets.
*/
assert(!(BQUIC_FRAME_REGEN_MASK & (1 << src->po_frame_types)
& ~QUIC_FTBIT_ACK));
if (lsquic_packet_out_avail(dst) >= src->po_regen_sz
&& (frec = lsquic_pofi_first(&pofi, src), frec != NULL)
&& frec->fe_frame_type == QUIC_FRAME_ACK)
{
memcpy(dst->po_data, src->po_data, src->po_regen_sz);
if (0 != lsquic_packet_out_add_frame(dst, &ctl->sc_enpub->enp_mm,
frec->fe_frame_type, QUIC_FRAME_ACK, dst->po_data_sz,
src->po_regen_sz))
return -1;
dst->po_data_sz = src->po_regen_sz;
dst->po_regen_sz = src->po_regen_sz;
dst->po_frame_types |= (BQUIC_FRAME_REGEN_MASK & src->po_frame_types);
src->po_frame_types &= ~BQUIC_FRAME_REGEN_MASK;
lsquic_packet_out_chop_regen(src);
}
return 0;
}
static lsquic_packet_out_t *
send_ctl_get_buffered_packet (lsquic_send_ctl_t *ctl,
enum buf_packet_type packet_type, unsigned need_at_least,
const struct network_path *path, const struct lsquic_stream *stream)
{
struct buf_packet_q *const packet_q =
&ctl->sc_buffered_packets[packet_type];
struct lsquic_conn *const lconn = ctl->sc_conn_pub->lconn;
lsquic_packet_out_t *packet_out;
enum packno_bits bits;
enum { AA_STEAL, AA_GENERATE, AA_NONE, } ack_action;
packet_out = TAILQ_LAST(&packet_q->bpq_packets, lsquic_packets_tailq);
if (packet_out
&& !(packet_out->po_flags & PO_STREAM_END)
&& lsquic_packet_out_avail(packet_out) >= need_at_least)
{
return packet_out;
}
if (packet_q->bpq_count >= send_ctl_max_bpq_count(ctl, packet_type))
return NULL;
if (packet_q->bpq_count == 0)
{
/* If ACK was written to the low-priority queue first, steal it */
if (packet_q == &ctl->sc_buffered_packets[BPT_HIGHEST_PRIO]
&& !TAILQ_EMPTY(&ctl->sc_buffered_packets[BPT_OTHER_PRIO].bpq_packets)
&& (TAILQ_FIRST(&ctl->sc_buffered_packets[BPT_OTHER_PRIO].bpq_packets)
->po_frame_types & QUIC_FTBIT_ACK))
{
LSQ_DEBUG("steal ACK frame from low-priority buffered queue");
ack_action = AA_STEAL;
bits = ctl->sc_max_packno_bits;
}
/* If ACK can be generated, write it to the first buffered packet. */
else if (lconn->cn_if->ci_can_write_ack(lconn))
{
LSQ_DEBUG("generate ACK frame for first buffered packet in "
"queue #%u", packet_type);
ack_action = AA_GENERATE;
/* Packet length is set to the largest possible size to guarantee
* that buffered packet with the ACK will not need to be split.
*/
bits = ctl->sc_max_packno_bits;
}
else
goto no_ack_action;
}
else
{
no_ack_action:
ack_action = AA_NONE;
bits = lsquic_send_ctl_guess_packno_bits(ctl);
}
packet_out = send_ctl_allocate_packet(ctl, bits, need_at_least, PNS_APP,
path);
if (!packet_out)
return NULL;
switch (ack_action)
{
case AA_STEAL:
if (0 != send_ctl_move_ack(ctl, packet_out,
TAILQ_FIRST(&ctl->sc_buffered_packets[BPT_OTHER_PRIO].bpq_packets)))
{
LSQ_INFO("cannot move ack");
lsquic_packet_out_destroy(packet_out, ctl->sc_enpub,
packet_out->po_path->np_peer_ctx);
return NULL;
}
break;
case AA_GENERATE:
lconn->cn_if->ci_write_ack(lconn, packet_out);
break;
case AA_NONE:
break;
}
TAILQ_INSERT_TAIL(&packet_q->bpq_packets, packet_out, po_next);
++packet_q->bpq_count;
LSQ_DEBUG("Add new packet to buffered queue #%u; count: %u",
packet_type, packet_q->bpq_count);
return packet_out;
}
static void
send_ctl_maybe_flush_decoder (struct lsquic_send_ctl *ctl,
const struct lsquic_stream *caller)
{
struct lsquic_stream *decoder;
if ((ctl->sc_flags & SC_IETF) && ctl->sc_conn_pub->u.ietf.qdh)
{
decoder = ctl->sc_conn_pub->u.ietf.qdh->qdh_dec_sm_out;
if (decoder && decoder != caller
&& lsquic_stream_has_data_to_flush(decoder))
{
LSQ_DEBUG("flushing decoder stream");
lsquic_stream_flush(decoder);
}
}
}
lsquic_packet_out_t *
lsquic_send_ctl_get_packet_for_stream (lsquic_send_ctl_t *ctl,
unsigned need_at_least, const struct network_path *path,
const struct lsquic_stream *stream)
{
enum buf_packet_type packet_type;
if (lsquic_send_ctl_schedule_stream_packets_immediately(ctl))
return lsquic_send_ctl_get_writeable_packet(ctl, PNS_APP,
need_at_least, path, 0, NULL);
else
{
if (!lsquic_send_ctl_has_buffered(ctl))
send_ctl_maybe_flush_decoder(ctl, stream);
packet_type = send_ctl_lookup_bpt(ctl, stream);
return send_ctl_get_buffered_packet(ctl, packet_type, need_at_least,
path, stream);
}
}
/* Return true if generated, false otherwise */
int
lsquic_sendctl_gen_stream_blocked_frame (struct lsquic_send_ctl *ctl,
struct lsquic_stream *stream)
{
struct lsquic_packet_out *packet_out;
const struct parse_funcs *const pf = stream->conn_pub->lconn->cn_pf;
unsigned need;
uint64_t off;
int sz;
int is_err;
off = lsquic_stream_combined_send_off(stream);
need = pf->pf_stream_blocked_frame_size(stream->id, off);
packet_out = lsquic_send_ctl_get_packet_for_stream(ctl, need,
stream->conn_pub->path, stream);
if (!packet_out)
{
LSQ_DEBUG("failed to get packet_out with lsquic_send_ctl_get_packet_for_stream");
packet_out = lsquic_send_ctl_get_writeable_packet(ctl,
PNS_APP, need, stream->conn_pub->path, 0, &is_err);
if (!packet_out)
{
LSQ_DEBUG("cannot get writeable packet for STREAM_BLOCKED frame");
return 0;
}
}
sz = pf->pf_gen_stream_blocked_frame(
packet_out->po_data + packet_out->po_data_sz,
lsquic_packet_out_avail(packet_out), stream->id, off);
if (sz < 0)
return 0;
LSQ_DEBUG("generated %d-byte STREAM_BLOCKED "
"frame; stream_id: %"PRIu64"; offset: %"PRIu64, sz, stream->id, off);
EV_LOG_CONN_EVENT(LSQUIC_LOG_CONN_ID, "generated %d-byte STREAM_BLOCKED "
"frame; stream_id: %"PRIu64"; offset: %"PRIu64, sz, stream->id, off);
if (0 != lsquic_packet_out_add_frame(packet_out, &ctl->sc_enpub->enp_mm, 0,
QUIC_FRAME_STREAM_BLOCKED, packet_out->po_data_sz, sz))
return 0;
lsquic_send_ctl_incr_pack_sz(ctl, packet_out, sz);
packet_out->po_frame_types |= 1 << QUIC_FRAME_STREAM_BLOCKED;
lsquic_stream_blocked_frame_sent(stream);
return 1;
}
#ifdef NDEBUG
static
#elif __GNUC__
__attribute__((weak))
#endif
enum packno_bits
lsquic_send_ctl_calc_packno_bits (lsquic_send_ctl_t *ctl)
{
lsquic_packno_t smallest_unacked;
enum packno_bits bits;
unsigned n_in_flight;
unsigned long cwnd;
const struct parse_funcs *pf;
pf = ctl->sc_conn_pub->lconn->cn_pf;
smallest_unacked = lsquic_send_ctl_smallest_unacked(ctl);
cwnd = ctl->sc_ci->cci_get_cwnd(CGP(ctl));
n_in_flight = cwnd / SC_PACK_SIZE(ctl);
bits = pf->pf_calc_packno_bits(ctl->sc_cur_packno + 1, smallest_unacked,
n_in_flight);
if (bits <= ctl->sc_max_packno_bits)
return bits;
else
return ctl->sc_max_packno_bits;
}
enum packno_bits
lsquic_send_ctl_packno_bits (struct lsquic_send_ctl *ctl,
enum packnum_space pns)
{
if ((ctl->sc_flags & (SC_ACK_RECV_INIT << pns))
&& lsquic_send_ctl_schedule_stream_packets_immediately(ctl))
return lsquic_send_ctl_calc_packno_bits(ctl);
else if (ctl->sc_flags & (SC_ACK_RECV_INIT << pns))
return lsquic_send_ctl_guess_packno_bits(ctl);
else
/* From [draft-ietf-quic-transport-31] Section 17.1:
*
" Prior to receiving an acknowledgement for a packet number space, the
" full packet number MUST be included; it is not to be truncated as
" described below.
*/
return vint_val2bits(ctl->sc_cur_packno + 1);
}
struct resize_one_packet_ctx
{
struct lsquic_send_ctl *const ctl;
struct lsquic_packet_out *const victim;
const struct network_path *const path;
const enum packnum_space pns;
int discarded, fetched;
};
static struct lsquic_packet_out *
resize_one_next_packet (void *ctx)
{
struct resize_one_packet_ctx *const one_ctx = ctx;
if (one_ctx->fetched)
return NULL;
++one_ctx->fetched;
return one_ctx->victim;
}
static void
resize_one_discard_packet (void *ctx, struct lsquic_packet_out *packet_out)
{
struct resize_one_packet_ctx *const one_ctx = ctx;
/* Delay discarding the packet: we need it for TAILQ_INSERT_BEFORE */
++one_ctx->discarded;
}
static struct lsquic_packet_out *
resize_one_new_packet (void *ctx)
{
struct resize_one_packet_ctx *const one_ctx = ctx;
struct lsquic_send_ctl *const ctl = one_ctx->ctl;
struct lsquic_packet_out *packet_out;
enum packno_bits bits;
bits = lsquic_send_ctl_calc_packno_bits(ctl);
packet_out = send_ctl_allocate_packet(ctl, bits, 0, one_ctx->pns,
one_ctx->path);
return packet_out;
}
static const struct packet_resize_if resize_one_funcs =
{
resize_one_next_packet,
resize_one_discard_packet,
resize_one_new_packet,
};
static int
split_buffered_packet (lsquic_send_ctl_t *ctl,
enum buf_packet_type packet_type, struct lsquic_packet_out *packet_out)
{
struct buf_packet_q *const packet_q =
&ctl->sc_buffered_packets[packet_type];
struct lsquic_conn *const lconn = ctl->sc_conn_pub->lconn;
struct lsquic_packet_out *new;
struct packet_resize_ctx prctx;
struct resize_one_packet_ctx one_ctx = {
ctl, packet_out, packet_out->po_path,
lsquic_packet_out_pns(packet_out), 0, 0,
};
unsigned count;
assert(TAILQ_FIRST(&packet_q->bpq_packets) == packet_out);
lsquic_packet_resize_init(&prctx, ctl->sc_enpub, lconn, &one_ctx,
&resize_one_funcs);
count = 0;
while (new = lsquic_packet_resize_next(&prctx), new != NULL)
{
++count;
TAILQ_INSERT_BEFORE(packet_out, new, po_next);
++packet_q->bpq_count;
LSQ_DEBUG("Add split packet to buffered queue #%u; count: %u",
packet_type, packet_q->bpq_count);
}
if (lsquic_packet_resize_is_error(&prctx))
{
LSQ_WARN("error resizing buffered packet #%"PRIu64,
packet_out->po_packno);
return -1;
}
if (!(count > 1 && one_ctx.fetched == 1 && one_ctx.discarded == 1))
{
/* A bit of insurance, this being new code */
LSQ_WARN("unexpected values resizing buffered packet: count: %u; "
"fetched: %d; discarded: %d", count, one_ctx.fetched,
one_ctx.discarded);
return -1;
}
LSQ_DEBUG("added %u packets to the buffered queue #%u", count, packet_type);
LSQ_DEBUG("drop oversized buffered packet #%"PRIu64, packet_out->po_packno);
TAILQ_REMOVE(&packet_q->bpq_packets, packet_out, po_next);
++packet_q->bpq_count;
assert(packet_out->po_loss_chain == packet_out);
send_ctl_destroy_packet(ctl, packet_out);
return 0;
}
int
lsquic_send_ctl_schedule_buffered (lsquic_send_ctl_t *ctl,
enum buf_packet_type packet_type)
{
struct buf_packet_q *const packet_q =
&ctl->sc_buffered_packets[packet_type];
const struct parse_funcs *const pf = ctl->sc_conn_pub->lconn->cn_pf;
lsquic_packet_out_t *packet_out;
unsigned used;
assert(lsquic_send_ctl_schedule_stream_packets_immediately(ctl));
const enum packno_bits bits = lsquic_send_ctl_calc_packno_bits(ctl);
const unsigned need = pf->pf_packno_bits2len(bits);
while ((packet_out = TAILQ_FIRST(&packet_q->bpq_packets)) &&
lsquic_send_ctl_can_send(ctl))
{
if ((packet_out->po_frame_types & QUIC_FTBIT_ACK)
&& packet_out->po_ack2ed < ctl->sc_largest_acked)
{
/* Chrome watches for a decrease in the value of the Largest
* Observed field of the ACK frame and marks it as an error:
* this is why we have to send out ACK in the order they were
* generated.
*/
LSQ_DEBUG("Remove out-of-order ACK from buffered packet");
lsquic_packet_out_chop_regen(packet_out);
if (packet_out->po_data_sz == 0)
{
LSQ_DEBUG("Dropping now-empty buffered packet");
TAILQ_REMOVE(&packet_q->bpq_packets, packet_out, po_next);
--packet_q->bpq_count;
send_ctl_destroy_packet(ctl, packet_out);
continue;
}
}
if (bits != lsquic_packet_out_packno_bits(packet_out))
{
used = pf->pf_packno_bits2len(
lsquic_packet_out_packno_bits(packet_out));
if (need > used
&& need - used > lsquic_packet_out_avail(packet_out))
{
if (0 == split_buffered_packet(ctl, packet_type, packet_out))
packet_out = TAILQ_FIRST(&packet_q->bpq_packets);
else
return -1;
}
}
TAILQ_REMOVE(&packet_q->bpq_packets, packet_out, po_next);
--packet_q->bpq_count;
packet_out->po_packno = send_ctl_next_packno(ctl);
LSQ_DEBUG("Remove packet from buffered queue #%u; count: %u. "
"It becomes packet #%"PRIu64, packet_type, packet_q->bpq_count,
packet_out->po_packno);
lsquic_send_ctl_scheduled_one(ctl, packet_out);
}
return 0;
}
int
lsquic_send_ctl_turn_on_fin (struct lsquic_send_ctl *ctl,
const struct lsquic_stream *stream)
{
enum buf_packet_type packet_type;
struct buf_packet_q *packet_q;
lsquic_packet_out_t *packet_out;
const struct parse_funcs *pf;
pf = ctl->sc_conn_pub->lconn->cn_pf;
packet_type = send_ctl_lookup_bpt(ctl, stream);
packet_q = &ctl->sc_buffered_packets[packet_type];
TAILQ_FOREACH_REVERSE(packet_out, &packet_q->bpq_packets,
lsquic_packets_tailq, po_next)
if (0 == lsquic_packet_out_turn_on_fin(packet_out, pf, stream))
return 0;
TAILQ_FOREACH(packet_out, &ctl->sc_scheduled_packets, po_next)
if (0 == packet_out->po_sent
&& 0 == lsquic_packet_out_turn_on_fin(packet_out, pf, stream))
{
return 0;
}
return -1;
}
size_t
lsquic_send_ctl_mem_used (const struct lsquic_send_ctl *ctl)
{
const lsquic_packet_out_t *packet_out;
unsigned n;
size_t size;
const struct lsquic_packets_tailq queues[] = {
ctl->sc_scheduled_packets,
ctl->sc_unacked_packets[PNS_INIT],
ctl->sc_unacked_packets[PNS_HSK],
ctl->sc_unacked_packets[PNS_APP],
ctl->sc_lost_packets,
ctl->sc_buffered_packets[0].bpq_packets,
ctl->sc_buffered_packets[1].bpq_packets,
};
size = sizeof(*ctl);
for (n = 0; n < sizeof(queues) / sizeof(queues[0]); ++n)
TAILQ_FOREACH(packet_out, &queues[n], po_next)
size += lsquic_packet_out_mem_used(packet_out);
return size;
}
void
lsquic_send_ctl_verneg_done (struct lsquic_send_ctl *ctl)
{
ctl->sc_max_packno_bits = PACKNO_BITS_3;
LSQ_DEBUG("version negotiation done (%s): max packno bits: %u",
lsquic_ver2str[ ctl->sc_conn_pub->lconn->cn_version ],
ctl->sc_max_packno_bits);
}
static void
strip_trailing_padding (struct lsquic_packet_out *packet_out)
{
struct packet_out_frec_iter pofi;
const struct frame_rec *frec;
unsigned off;
off = 0;
for (frec = lsquic_pofi_first(&pofi, packet_out); frec;
frec = lsquic_pofi_next(&pofi))
off = frec->fe_off + frec->fe_len;
assert(off);
packet_out->po_data_sz = off;
packet_out->po_frame_types &= ~QUIC_FTBIT_PADDING;
}
static int
split_lost_packet (struct lsquic_send_ctl *ctl,
struct lsquic_packet_out *const packet_out)
{
struct lsquic_conn *const lconn = ctl->sc_conn_pub->lconn;
struct lsquic_packet_out *new;
struct packet_resize_ctx prctx;
struct resize_one_packet_ctx one_ctx = {
ctl, packet_out, packet_out->po_path,
lsquic_packet_out_pns(packet_out), 0, 0,
};
unsigned count;
assert(packet_out->po_flags & PO_LOST);
lsquic_packet_resize_init(&prctx, ctl->sc_enpub, lconn, &one_ctx,
&resize_one_funcs);
count = 0;
while (new = lsquic_packet_resize_next(&prctx), new != NULL)
{
++count;
TAILQ_INSERT_BEFORE(packet_out, new, po_next);
new->po_flags |= PO_LOST;
}
if (lsquic_packet_resize_is_error(&prctx))
{
LSQ_WARN("error resizing lost packet #%"PRIu64, packet_out->po_packno);
return -1;
}
if (!(count > 1 && one_ctx.fetched == 1 && one_ctx.discarded == 1))
{
/* A bit of insurance, this being new code */
LSQ_WARN("unexpected values resizing lost packet: count: %u; "
"fetched: %d; discarded: %d", count, one_ctx.fetched,
one_ctx.discarded);
return -1;
}
LSQ_DEBUG("added %u packets to the lost queue", count);
LSQ_DEBUG("drop oversized lost packet #%"PRIu64, packet_out->po_packno);
TAILQ_REMOVE(&ctl->sc_lost_packets, packet_out, po_next);
packet_out->po_flags &= ~PO_LOST;
send_ctl_destroy_chain(ctl, packet_out, NULL);
send_ctl_destroy_packet(ctl, packet_out);
return 0;
}
int
lsquic_send_ctl_retry (struct lsquic_send_ctl *ctl,
const unsigned char *token, size_t token_sz)
{
struct lsquic_packet_out *packet_out, *next;
struct lsquic_conn *const lconn = ctl->sc_conn_pub->lconn;
size_t sz;
if (token_sz >= 1ull << (sizeof(packet_out->po_token_len) * 8))
{
LSQ_WARN("token size %zu is too long", token_sz);
return -1;
}
++ctl->sc_retry_count;
if (ctl->sc_retry_count > 3)
{
LSQ_INFO("failing connection after %u retries", ctl->sc_retry_count);
return -1;
}
send_ctl_expire(ctl, PNS_INIT, EXFI_ALL);
if (0 != lsquic_send_ctl_set_token(ctl, token, token_sz))
return -1;
for (packet_out = TAILQ_FIRST(&ctl->sc_lost_packets); packet_out; packet_out = next)
{
next = TAILQ_NEXT(packet_out, po_next);
if (HETY_INITIAL != packet_out->po_header_type)
continue;
if (packet_out->po_nonce)
{
free(packet_out->po_nonce);
packet_out->po_nonce = NULL;
packet_out->po_flags &= ~PO_NONCE;
}
if (0 != send_ctl_set_packet_out_token(ctl, packet_out))
{
LSQ_INFO("cannot set out token on packet");
return -1;
}
if (packet_out->po_frame_types & QUIC_FTBIT_PADDING)
strip_trailing_padding(packet_out);
sz = lconn->cn_pf->pf_packout_size(lconn, packet_out);
if (sz > packet_out->po_path->np_pack_size
&& 0 != split_lost_packet(ctl, packet_out))
return -1;
}
return 0;
}
int
lsquic_send_ctl_set_token (struct lsquic_send_ctl *ctl,
const unsigned char *token, size_t token_sz)
{
unsigned char *copy;
if (token_sz > 1 <<
(sizeof(((struct lsquic_packet_out *)0)->po_token_len) * 8))
{
errno = EINVAL;
return -1;
}
copy = malloc(token_sz);
if (!copy)
return -1;
memcpy(copy, token, token_sz);
free(ctl->sc_token);
ctl->sc_token = copy;
ctl->sc_token_sz = token_sz;
LSQ_DEBUG("set token");
return 0;
}
void
lsquic_send_ctl_maybe_calc_rough_rtt (struct lsquic_send_ctl *ctl,
enum packnum_space pns)
{
const struct lsquic_packet_out *packet_out;
lsquic_time_t min_sent, rtt;
struct lsquic_packets_tailq *const *q;
struct lsquic_packets_tailq *const queues[] = {
&ctl->sc_lost_packets,
&ctl->sc_unacked_packets[pns],
};
if ((ctl->sc_flags & SC_ROUGH_RTT)
|| lsquic_rtt_stats_get_srtt(&ctl->sc_conn_pub->rtt_stats))
return;
min_sent = UINT64_MAX;
for (q = queues; q < queues + sizeof(queues) / sizeof(queues[0]); ++q)
TAILQ_FOREACH(packet_out, *q, po_next)
if (min_sent > packet_out->po_sent)
min_sent = packet_out->po_sent;
/* If we do not have an RTT estimate yet, get a rough estimate of it,
* because now we will ignore packets that carry acknowledgements and
* RTT estimation may be delayed.
*/
if (min_sent < UINT64_MAX)
{
rtt = lsquic_time_now() - min_sent;
if (rtt > 500000)
rtt = 500000;
lsquic_rtt_stats_update(&ctl->sc_conn_pub->rtt_stats, rtt, 0);
ctl->sc_flags |= SC_ROUGH_RTT;
LSQ_DEBUG("set rough RTT to %"PRIu64" usec", rtt);
}
}
void
lsquic_send_ctl_empty_pns (struct lsquic_send_ctl *ctl, enum packnum_space pns)
{
lsquic_packet_out_t *packet_out, *next;
unsigned count, packet_sz;
struct lsquic_packets_tailq *const *q;
struct lsquic_packets_tailq *const queues[] = {
&ctl->sc_lost_packets,
&ctl->sc_buffered_packets[0].bpq_packets,
&ctl->sc_buffered_packets[1].bpq_packets,
};
/* Don't bother with chain destruction, as all chains members are always
* within the same packet number space
*/
count = 0;
for (packet_out = TAILQ_FIRST(&ctl->sc_scheduled_packets); packet_out;
packet_out = next)
{
next = TAILQ_NEXT(packet_out, po_next);
if (pns == lsquic_packet_out_pns(packet_out))
{
send_ctl_maybe_renumber_sched_to_right(ctl, packet_out);
send_ctl_sched_remove(ctl, packet_out);
send_ctl_destroy_packet(ctl, packet_out);
++count;
}
}
for (packet_out = TAILQ_FIRST(&ctl->sc_unacked_packets[pns]); packet_out;
packet_out = next)
{
next = TAILQ_NEXT(packet_out, po_next);
if (packet_out->po_flags & (PO_LOSS_REC|PO_POISON))
TAILQ_REMOVE(&ctl->sc_unacked_packets[pns], packet_out, po_next);
else
{
packet_sz = packet_out_sent_sz(packet_out);
send_ctl_unacked_remove(ctl, packet_out, packet_sz);
lsquic_packet_out_ack_streams(packet_out);
}
send_ctl_destroy_packet(ctl, packet_out);
++count;
}
for (q = queues; q < queues + sizeof(queues) / sizeof(queues[0]); ++q)
for (packet_out = TAILQ_FIRST(*q); packet_out; packet_out = next)
{
next = TAILQ_NEXT(packet_out, po_next);
if (pns == lsquic_packet_out_pns(packet_out))
{
TAILQ_REMOVE(*q, packet_out, po_next);
send_ctl_destroy_packet(ctl, packet_out);
++count;
}
}
lsquic_alarmset_unset(ctl->sc_alset, AL_RETX_INIT + pns);
ctl->sc_flags &= ~(SC_LOST_ACK_INIT << pns);
LSQ_DEBUG("emptied %s, destroyed %u packet%.*s", lsquic_pns2str[pns],
count, count != 1, "s");
}
struct resize_many_packet_ctx
{
struct lsquic_send_ctl *ctl;
struct lsquic_packets_tailq input_q;
const struct network_path *path;
};
static struct lsquic_packet_out *
resize_many_next_packet (void *ctx)
{
struct resize_many_packet_ctx *const many_ctx = ctx;
struct lsquic_packet_out *packet_out;
packet_out = TAILQ_FIRST(&many_ctx->input_q);
if (packet_out)
TAILQ_REMOVE(&many_ctx->input_q, packet_out, po_next);
return packet_out;
}
static void
resize_many_discard_packet (void *ctx, struct lsquic_packet_out *packet_out)
{
struct resize_many_packet_ctx *const many_ctx = ctx;
struct lsquic_send_ctl *const ctl = many_ctx->ctl;
send_ctl_destroy_chain(ctl, packet_out, NULL);
send_ctl_destroy_packet(ctl, packet_out);
}
static struct lsquic_packet_out *
resize_many_new_packet (void *ctx)
{
struct resize_many_packet_ctx *const many_ctx = ctx;
struct lsquic_send_ctl *const ctl = many_ctx->ctl;
struct lsquic_packet_out *packet_out;
enum packno_bits bits;
bits = lsquic_send_ctl_calc_packno_bits(ctl);
packet_out = send_ctl_allocate_packet(ctl, bits, 0, PNS_APP,
many_ctx->path);
return packet_out;
}
static const struct packet_resize_if resize_many_funcs =
{
resize_many_next_packet,
resize_many_discard_packet,
resize_many_new_packet,
};
static void
send_ctl_resize_q (struct lsquic_send_ctl *ctl, struct lsquic_packets_tailq *q,
const struct network_path *const path)
{
struct lsquic_conn *const lconn = ctl->sc_conn_pub->lconn;
struct lsquic_packet_out *next, *packet_out;
struct resize_many_packet_ctx many_ctx;
struct packet_resize_ctx prctx;
const char *q_name;
unsigned count_src = 0, count_dst = 0;
int idx;
#ifdef _MSC_VER
idx = 0;
#endif
/* Initialize input, removing packets from source queue, filtering by path.
* Note: this may reorder packets from different paths.
*/
many_ctx.ctl = ctl;
many_ctx.path = path;
TAILQ_INIT(&many_ctx.input_q);
if (q == &ctl->sc_scheduled_packets)
{
ctl->sc_cur_packno = lsquic_senhist_largest(&ctl->sc_senhist);
q_name = "scheduled";
for (packet_out = TAILQ_FIRST(q); packet_out != NULL; packet_out = next)
{
next = TAILQ_NEXT(packet_out, po_next);
if (packet_out->po_path == path
&& !(packet_out->po_flags & PO_MTU_PROBE))
{
send_ctl_sched_remove(ctl, packet_out);
TAILQ_INSERT_TAIL(&many_ctx.input_q, packet_out, po_next);
++count_src;
}
}
}
else
{
/* This function only deals with scheduled or buffered queues */
assert(q == &ctl->sc_buffered_packets[0].bpq_packets
|| q == &ctl->sc_buffered_packets[1].bpq_packets);
idx = q == &ctl->sc_buffered_packets[1].bpq_packets;
q_name = "buffered";
for (packet_out = TAILQ_FIRST(q); packet_out != NULL; packet_out = next)
{
next = TAILQ_NEXT(packet_out, po_next);
if (packet_out->po_path == path)
{
TAILQ_REMOVE(q, packet_out, po_next);
--ctl->sc_buffered_packets[idx].bpq_count;
TAILQ_INSERT_TAIL(&many_ctx.input_q, packet_out, po_next);
++count_src;
}
}
}
lsquic_packet_resize_init(&prctx, ctl->sc_enpub, lconn, &many_ctx,
&resize_many_funcs);
/* Get new packets, appending them to appropriate queue */
if (q == &ctl->sc_scheduled_packets)
while (packet_out = lsquic_packet_resize_next(&prctx), packet_out != NULL)
{
++count_dst;
packet_out->po_packno = send_ctl_next_packno(ctl);
send_ctl_sched_append(ctl, packet_out);
LSQ_DEBUG("created packet #%"PRIu64, packet_out->po_packno);
EV_LOG_PACKET_CREATED(LSQUIC_LOG_CONN_ID, packet_out);
}
else
while (packet_out = lsquic_packet_resize_next(&prctx), packet_out != NULL)
{
++count_dst;
TAILQ_INSERT_TAIL(q, packet_out, po_next);
++ctl->sc_buffered_packets[idx].bpq_count;
}
/* Verify success */
if (lsquic_packet_resize_is_error(&prctx))
{
LSQ_WARN("error resizing packets in %s queue", q_name);
goto err;
}
if (count_dst < 1 || !TAILQ_EMPTY(&many_ctx.input_q))
{
/* A bit of insurance, this being new code */
LSQ_WARN("unexpected values resizing packets in %s queue: count: %d; "
"empty: %d", q_name, count_dst, TAILQ_EMPTY(&many_ctx.input_q));
goto err;
}
LSQ_DEBUG("resized %u packets in %s queue, outputting %u packets",
count_src, q_name, count_dst);
return;
err:
lconn->cn_if->ci_internal_error(lconn, "error resizing packets");
return;
}
void
lsquic_send_ctl_repath (struct lsquic_send_ctl *ctl,
const struct network_path *old, const struct network_path *new,
int keep_path_properties)
{
struct lsquic_packet_out *packet_out;
unsigned count;
struct lsquic_packets_tailq *const *q;
struct lsquic_packets_tailq *const queues[] = {
&ctl->sc_scheduled_packets,
&ctl->sc_unacked_packets[PNS_INIT],
&ctl->sc_unacked_packets[PNS_HSK],
&ctl->sc_unacked_packets[PNS_APP],
&ctl->sc_lost_packets,
&ctl->sc_buffered_packets[0].bpq_packets,
&ctl->sc_buffered_packets[1].bpq_packets,
};
assert(ctl->sc_flags & SC_IETF);
count = 0;
for (q = queues; q < queues + sizeof(queues) / sizeof(queues[0]); ++q)
TAILQ_FOREACH(packet_out, *q, po_next)
if (packet_out->po_path == old)
{
++count;
packet_out->po_path = new;
if (packet_out->po_flags & PO_ENCRYPTED)
send_ctl_return_enc_data(ctl, packet_out);
if (packet_out->po_frame_types
& (QUIC_FTBIT_PATH_CHALLENGE|QUIC_FTBIT_PATH_RESPONSE))
/* This is a corner case, we just want to avoid protocol
* violation. No optimization is done. If we happen to
* send a packet of padding, oh well.
*/
lsquic_packet_out_pad_over(packet_out,
QUIC_FTBIT_PATH_CHALLENGE|QUIC_FTBIT_PATH_RESPONSE);
}
LSQ_DEBUG("repathed %u packet%.*s", count, count != 1, "s");
if (keep_path_properties)
LSQ_DEBUG("keeping path properties: MTU, RTT, and CC state");
else
{
lsquic_send_ctl_resize(ctl);
memset(&ctl->sc_conn_pub->rtt_stats, 0,
sizeof(ctl->sc_conn_pub->rtt_stats));
ctl->sc_ci->cci_reinit(CGP(ctl));
}
}
/* Drop PATH_CHALLENGE and PATH_RESPONSE packets for path `path'. */
void
lsquic_send_ctl_cancel_path_verification (struct lsquic_send_ctl *ctl,
const struct network_path *path)
{
struct lsquic_packet_out *packet_out, *next;
/* We need only to examine the scheduled queue as lost challenges and
* responses are not retransmitted.
*/
for (packet_out = TAILQ_FIRST(&ctl->sc_scheduled_packets); packet_out;
packet_out = next)
{
next = TAILQ_NEXT(packet_out, po_next);
if (packet_out->po_path == path)
{
assert((packet_out->po_frame_types
& (QUIC_FTBIT_PATH_CHALLENGE|QUIC_FTBIT_PATH_RESPONSE))
|| packet_out->po_frame_types == QUIC_FTBIT_PADDING);
assert(!(packet_out->po_frame_types & ctl->sc_retx_frames));
send_ctl_maybe_renumber_sched_to_right(ctl, packet_out);
send_ctl_sched_remove(ctl, packet_out);
assert(packet_out->po_loss_chain == packet_out);
send_ctl_destroy_packet(ctl, packet_out);
}
}
}
/* Examine packets in scheduled and buffered queues and resize packets if
* they exceed path MTU.
*/
void
lsquic_send_ctl_resize (struct lsquic_send_ctl *ctl)
{
struct lsquic_conn *const lconn = ctl->sc_conn_pub->lconn;
struct lsquic_packet_out *packet_out;
struct lsquic_packets_tailq *const *q;
struct lsquic_packets_tailq *const queues[] = {
&ctl->sc_scheduled_packets,
&ctl->sc_buffered_packets[0].bpq_packets,
&ctl->sc_buffered_packets[1].bpq_packets,
};
size_t size;
int path_ids /* assuming a reasonable number of paths */, q_idxs;
assert(ctl->sc_flags & SC_IETF);
q_idxs = 0;
for (q = queues; q < queues + sizeof(queues) / sizeof(queues[0]); ++q)
{
path_ids = 0;
redo_q:
TAILQ_FOREACH(packet_out, *q, po_next)
if (0 == (path_ids & (1 << packet_out->po_path->np_path_id))
&& !(packet_out->po_flags & PO_MTU_PROBE))
{
size = lsquic_packet_out_total_sz(lconn, packet_out);
if (size > packet_out->po_path->np_pack_size)
{
path_ids |= 1 << packet_out->po_path->np_path_id;
q_idxs |= 1 << (q - queues);
send_ctl_resize_q(ctl, *q, packet_out->po_path);
goto redo_q;
}
}
}
LSQ_DEBUG("resized packets in queues: 0x%X", q_idxs);
lsquic_send_ctl_sanity_check(ctl);
}
void
lsquic_send_ctl_return_enc_data (struct lsquic_send_ctl *ctl)
{
struct lsquic_packet_out *packet_out;
assert(!(ctl->sc_flags & SC_IETF));
TAILQ_FOREACH(packet_out, &ctl->sc_scheduled_packets, po_next)
if (packet_out->po_flags & PO_ENCRYPTED)
send_ctl_return_enc_data(ctl, packet_out);
}
/* When client updated DCID based on the first packet returned by the server,
* we must update the number of bytes scheduled if the DCID length changed
* because this length is used to calculate packet size.
*/
void
lsquic_send_ctl_cidlen_change (struct lsquic_send_ctl *ctl,
unsigned orig_cid_len, unsigned new_cid_len)
{
unsigned diff;
assert(!(ctl->sc_conn_pub->lconn->cn_flags & LSCONN_SERVER));
if (ctl->sc_n_scheduled)
{
ctl->sc_flags |= SC_CIDLEN;
ctl->sc_cidlen = (signed char) new_cid_len - (signed char) orig_cid_len;
if (new_cid_len > orig_cid_len)
{
diff = new_cid_len - orig_cid_len;
diff *= ctl->sc_n_scheduled;
ctl->sc_bytes_scheduled += diff;
LSQ_DEBUG("increased bytes scheduled by %u bytes to %u",
diff, ctl->sc_bytes_scheduled);
}
else if (new_cid_len < orig_cid_len)
{
diff = orig_cid_len - new_cid_len;
diff *= ctl->sc_n_scheduled;
ctl->sc_bytes_scheduled -= diff;
LSQ_DEBUG("decreased bytes scheduled by %u bytes to %u",
diff, ctl->sc_bytes_scheduled);
}
else
LSQ_DEBUG("DCID length did not change");
}
else
LSQ_DEBUG("no scheduled packets at the time of DCID change");
}
void
lsquic_send_ctl_begin_optack_detection (struct lsquic_send_ctl *ctl)
{
uint8_t rand;
rand = lsquic_crand_get_byte(ctl->sc_enpub->enp_crand);
ctl->sc_gap = ctl->sc_cur_packno + 1 + rand;
}
void
lsquic_send_ctl_path_validated (struct lsquic_send_ctl *ctl)
{
LSQ_DEBUG("path validated: switch to regular can_send");
ctl->sc_can_send = send_ctl_can_send;
}
int
lsquic_send_ctl_can_send_probe (const struct lsquic_send_ctl *ctl,
const struct network_path *path)
{
uint64_t cwnd, pacing_rate;
lsquic_time_t tx_time;
unsigned n_out;
assert(!send_ctl_in_recovery(ctl));
n_out = send_ctl_all_bytes_out(ctl);
cwnd = ctl->sc_ci->cci_get_cwnd(CGP(ctl));
if (ctl->sc_flags & SC_PACE)
{
if (n_out + path->np_pack_size >= cwnd)
return 0;
pacing_rate = ctl->sc_ci->cci_pacing_rate(CGP(ctl), 0);
if (!pacing_rate)
pacing_rate = 1;
tx_time = (uint64_t) path->np_pack_size * 1000000 / pacing_rate;
return lsquic_pacer_can_schedule_probe(&ctl->sc_pacer,
ctl->sc_n_scheduled + ctl->sc_n_in_flight_all, tx_time);
}
else
return n_out + path->np_pack_size < cwnd;
}
void
lsquic_send_ctl_disable_ecn (struct lsquic_send_ctl *ctl)
{
struct lsquic_packet_out *packet_out;
LSQ_INFO("disable ECN");
ctl->sc_ecn = ECN_NOT_ECT;
TAILQ_FOREACH(packet_out, &ctl->sc_scheduled_packets, po_next)
lsquic_packet_out_set_ecn(packet_out, ECN_NOT_ECT);
}
void
lsquic_send_ctl_snapshot (struct lsquic_send_ctl *ctl,
struct send_ctl_state *ctl_state)
{
struct lsquic_conn *const lconn = ctl->sc_conn_pub->lconn;
int buffered, repace;
buffered = !lsquic_send_ctl_schedule_stream_packets_immediately(ctl);
repace = !buffered && (ctl->sc_flags & SC_PACE);
if (repace)
ctl_state->pacer = ctl->sc_pacer;
if (buffered)
{
lconn->cn_if->ci_ack_snapshot(lconn, &ctl_state->ack_state);
ctl_state->buf_counts[BPT_OTHER_PRIO]
= ctl->sc_buffered_packets[BPT_OTHER_PRIO].bpq_count;
ctl_state->buf_counts[BPT_HIGHEST_PRIO]
= ctl->sc_buffered_packets[BPT_HIGHEST_PRIO].bpq_count;
}
}
static void
send_ctl_repace (struct lsquic_send_ctl *ctl, const struct pacer *pacer,
unsigned count)
{
unsigned n;
int in_rec;
LSQ_DEBUG("repace, count: %u", count);
ctl->sc_pacer = *pacer;
in_rec = send_ctl_in_recovery(ctl);
for (n = 0; n < count; ++n)
lsquic_pacer_packet_scheduled(&ctl->sc_pacer,
ctl->sc_n_in_flight_retx + ctl->sc_n_scheduled + n, in_rec,
send_ctl_transfer_time, ctl);
}
void
lsquic_send_ctl_rollback (struct lsquic_send_ctl *ctl,
struct send_ctl_state *ctl_state, const struct iovec *last_iov,
size_t shortfall)
{
struct lsquic_conn *const lconn = ctl->sc_conn_pub->lconn;
struct lsquic_packet_out *packet_out, *next;
struct lsquic_packets_tailq *packets;
struct stream_frame stream_frame;
struct packet_out_frec_iter pofi;
enum buf_packet_type packet_type;
unsigned orig_count, new_count;
enum quic_ft_bit lost_types;
int buffered, repace, len, to_end;
unsigned short prev_frec_len;
struct frame_rec *frec;
buffered = !lsquic_send_ctl_schedule_stream_packets_immediately(ctl);
repace = !buffered && (ctl->sc_flags & SC_PACE);
if (!buffered)
{
orig_count = ctl->sc_n_scheduled;
packets = &ctl->sc_scheduled_packets;
packet_type = 0; /* Not necessary, but compiler complains */
}
else if (ctl_state->buf_counts[BPT_HIGHEST_PRIO]
< ctl->sc_buffered_packets[BPT_HIGHEST_PRIO].bpq_count)
{
packets = &ctl->sc_buffered_packets[BPT_HIGHEST_PRIO].bpq_packets;
orig_count = ctl->sc_buffered_packets[BPT_HIGHEST_PRIO].bpq_count;
packet_type = BPT_HIGHEST_PRIO;
}
else
{
packets = &ctl->sc_buffered_packets[BPT_OTHER_PRIO].bpq_packets;
orig_count = ctl->sc_buffered_packets[BPT_OTHER_PRIO].bpq_count;
packet_type = BPT_OTHER_PRIO;
}
/* Now find last packet: */
TAILQ_FOREACH(packet_out, packets, po_next)
if ((unsigned char *) last_iov->iov_base >= packet_out->po_data
&& (unsigned char *) last_iov->iov_base
< packet_out->po_data + packet_out->po_data_sz)
break;
if (!packet_out)
{
lconn->cn_if->ci_internal_error(lconn,
"rollback failed: cannot find packet");
return;
}
for (frec = lsquic_pofi_first(&pofi, packet_out); frec;
frec = lsquic_pofi_next(&pofi))
if (frec->fe_frame_type == QUIC_FRAME_STREAM
/* At the time of this writing, pwritev() generates a single STREAM
* frame per packet. To keep code future-proof, we use an extra
* check.
*/
&& (unsigned char *) last_iov->iov_base
> packet_out->po_data + frec->fe_off
&& (unsigned char *) last_iov->iov_base
< packet_out->po_data + frec->fe_off + frec->fe_len)
break;
if (!frec)
{
lconn->cn_if->ci_internal_error(lconn,
"rollback failed: cannot find frame record");
return;
}
/* Strictly less because of the STREAM frame header */
assert(last_iov->iov_len < frec->fe_len);
len = lconn->cn_pf->pf_parse_stream_frame(
packet_out->po_data + frec->fe_off, frec->fe_len, &stream_frame);
if (len < 0)
{
lconn->cn_if->ci_internal_error(lconn,
"error parsing own STREAM frame");
return;
}
if (stream_frame.data_frame.df_size > last_iov->iov_len - shortfall)
{
packet_out->po_data_sz = (unsigned char *) last_iov->iov_base
+ last_iov->iov_len - shortfall - packet_out->po_data;
prev_frec_len = frec->fe_len;
frec->fe_len = packet_out->po_data_sz - frec->fe_off;
to_end = lconn->cn_pf->pf_dec_stream_frame_size(
packet_out->po_data + frec->fe_off,
stream_frame.data_frame.df_size - (prev_frec_len - frec->fe_len));
if (to_end)
{ /* A frame that's too short may be generated when pwritev runs out
* of iovecs. In that case, we adjust it here.
*/
if (!(packet_out->po_flags & PO_STREAM_END))
LSQ_DEBUG("set stream-end flag on truncated packet");
packet_out->po_flags |= PO_STREAM_END;
}
if (!buffered)
ctl->sc_bytes_scheduled -= prev_frec_len - frec->fe_len;
}
else
assert(stream_frame.data_frame.df_size
== last_iov->iov_len - shortfall);
/* Drop any frames that follow */
for (frec = lsquic_pofi_next(&pofi); frec; frec = lsquic_pofi_next(&pofi))
frec->fe_frame_type = 0;
/* Return unused packets */
new_count = orig_count;
lost_types = 0;
for (packet_out = TAILQ_NEXT(packet_out, po_next); packet_out != NULL;
packet_out = next)
{
next = TAILQ_NEXT(packet_out, po_next);
--new_count;
lost_types |= packet_out->po_frame_types;
/* Undo lsquic_send_ctl_get_packet_for_stream() */
if (!buffered)
send_ctl_sched_remove(ctl, packet_out);
else
{
TAILQ_REMOVE(packets, packet_out, po_next);
--ctl->sc_buffered_packets[packet_type].bpq_count;
}
send_ctl_destroy_packet(ctl, packet_out);
}
if (new_count < orig_count && repace)
send_ctl_repace(ctl, &ctl_state->pacer, new_count);
if (buffered && (lost_types & QUIC_FTBIT_ACK))
lconn->cn_if->ci_ack_rollback(lconn, &ctl_state->ack_state);
}
/* Find 0-RTT packets and change them to 1-RTT packets */
void
lsquic_send_ctl_0rtt_to_1rtt (struct lsquic_send_ctl *ctl)
{
struct lsquic_packet_out *packet_out;
unsigned count;
struct lsquic_packets_tailq *const *q;
struct lsquic_packets_tailq *const queues[] = {
&ctl->sc_scheduled_packets,
&ctl->sc_unacked_packets[PNS_APP],
&ctl->sc_lost_packets,
&ctl->sc_buffered_packets[0].bpq_packets,
&ctl->sc_buffered_packets[1].bpq_packets,
};
assert(ctl->sc_flags & SC_IETF);
while (packet_out = TAILQ_FIRST(&ctl->sc_0rtt_stash), packet_out != NULL)
{
TAILQ_REMOVE(&ctl->sc_0rtt_stash, packet_out, po_next);
TAILQ_INSERT_TAIL(&ctl->sc_lost_packets, packet_out, po_next);
packet_out->po_flags |= PO_LOST;
}
count = 0;
for (q = queues; q < queues + sizeof(queues) / sizeof(queues[0]); ++q)
TAILQ_FOREACH(packet_out, *q, po_next)
if (packet_out->po_header_type == HETY_0RTT)
{
++count;
packet_out->po_header_type = HETY_SHORT;
if (packet_out->po_flags & PO_ENCRYPTED)
send_ctl_return_enc_data(ctl, packet_out);
}
LSQ_DEBUG("handshake ok: changed %u packet%.*s from 0-RTT to 1-RTT",
count, count != 1, "s");
ctl->sc_n_consec_rtos >>= 1;
}
/* Remove 0-RTT packets from the unacked queue and wait to retransmit them
* after handshake succeeds. This is the most common case. There could
* (theoretically) be some corner cases where 0-RTT packets are in the
* scheduled queue, but we let those be lost naturally if that occurs.
*/
void
lsquic_send_ctl_stash_0rtt_packets (struct lsquic_send_ctl *ctl)
{
struct lsquic_packet_out *packet_out, *next;
unsigned count, packet_sz;
count = 0;
for (packet_out = TAILQ_FIRST(&ctl->sc_unacked_packets[PNS_APP]);
packet_out; packet_out = next)
{
next = TAILQ_NEXT(packet_out, po_next);
if (packet_out->po_header_type == HETY_0RTT)
{
packet_sz = packet_out_sent_sz(packet_out);
send_ctl_unacked_remove(ctl, packet_out, packet_sz);
TAILQ_INSERT_TAIL(&ctl->sc_0rtt_stash, packet_out, po_next);
++count;
}
}
LSQ_DEBUG("stashed %u 0-RTT packet%.*s", count, count != 1, "s");
}