diff options
| author | Vladimir Homutov <vl@nginx.com> | 2020-12-25 15:01:15 +0300 |
|---|---|---|
| committer | Vladimir Homutov <vl@nginx.com> | 2020-12-25 15:01:15 +0300 |
| commit | c4f31ccca174ff617a594b49ef255354e979b72d (patch) | |
| tree | d2ade5c91b17e009585c2b329823708e1ce91f76 /src/event/quic/bpf | |
| parent | b20b58ca7d1323664c5e8f91231ade0edf0d0f31 (diff) | |
| download | nginx-c4f31ccca174ff617a594b49ef255354e979b72d.tar.gz nginx-c4f31ccca174ff617a594b49ef255354e979b72d.tar.bz2 | |
QUIC: ngx_quic_bpf module.
The quic kernel bpf helper inspects packet payload for DCID, extracts key
and routes the packet into socket matching the key.
Due to reuseport feature, each worker owns a personal socket, which is
identified by the same key, used to create DCID.
BPF objects are locked in RAM and are subject to RLIMIT_MEMLOCK.
The "ulimit -l" command may be used to setup proper limits, if maps
cannot be created with EPERM or updated with ETOOLONG.
Diffstat (limited to 'src/event/quic/bpf')
| -rw-r--r-- | src/event/quic/bpf/bpfgen.sh | 113 | ||||
| -rw-r--r-- | src/event/quic/bpf/makefile | 30 | ||||
| -rw-r--r-- | src/event/quic/bpf/ngx_quic_reuseport_helper.c | 140 |
3 files changed, 283 insertions, 0 deletions
diff --git a/src/event/quic/bpf/bpfgen.sh b/src/event/quic/bpf/bpfgen.sh new file mode 100644 index 000000000..78cbdac4d --- /dev/null +++ b/src/event/quic/bpf/bpfgen.sh @@ -0,0 +1,113 @@ +#!/bin/bash + +export LANG=C + +set -e + +if [ $# -lt 1 ]; then + echo "Usage: PROGNAME=foo LICENSE=bar $0 <bpf object file>" + exit 1 +fi + + +self=$0 +filename=$1 +funcname=$PROGNAME + +generate_head() +{ + cat << END +/* AUTO-GENERATED, DO NOT EDIT. */ + +#include <stddef.h> +#include <stdint.h> + +#include "ngx_bpf.h" + + +END +} + +generate_tail() +{ + cat << END + +ngx_bpf_program_t $PROGNAME = { + .relocs = bpf_reloc_prog_$funcname, + .nrelocs = sizeof(bpf_reloc_prog_$funcname) + / sizeof(bpf_reloc_prog_$funcname[0]), + .ins = bpf_insn_prog_$funcname, + .nins = sizeof(bpf_insn_prog_$funcname) + / sizeof(bpf_insn_prog_$funcname[0]), + .license = "$LICENSE", + .type = BPF_PROG_TYPE_SK_REUSEPORT, +}; + +END +} + +process_relocations() +{ + echo "static ngx_bpf_reloc_t bpf_reloc_prog_$funcname[] = {" + + objdump -r $filename | awk '{ + + if (enabled && $NF > 0) { + off = strtonum(sprintf("0x%s", $1)); + name = $3; + + printf(" { \"%s\", %d },\n", name, off/8); + } + + if ($1 == "OFFSET") { + enabled=1; + } +}' + echo "};" + echo +} + +process_section() +{ + echo "static struct bpf_insn bpf_insn_prog_$funcname[] = {" + echo " /* opcode dst src offset imm */" + + section_info=$(objdump -h $filename --section=$funcname | grep "1 $funcname") + + # dd doesn't know hex + length=$(printf "%d" 0x$(echo $section_info | cut -d ' ' -f3)) + offset=$(printf "%d" 0x$(echo $section_info | cut -d ' ' -f6)) + + for ins in $(dd if="$filename" bs=1 count=$length skip=$offset status=none | xxd -p -c 8) + do + opcode=0x${ins:0:2} + srcdst=0x${ins:2:2} + + # bytes are dumped in LE order + offset=0x${ins:6:2}${ins:4:2} # short + immedi=0x${ins:14:2}${ins:12:2}${ins:10:2}${ins:8:2} # int + + dst="$(($srcdst & 0xF))" + src="$(($srcdst & 0xF0))" + src="$(($src >> 4))" + + opcode=$(printf "0x%x" $opcode) + dst=$(printf "BPF_REG_%d" $dst) + src=$(printf "BPF_REG_%d" $src) + offset=$(printf "%d" $offset) + immedi=$(printf "0x%x" $immedi) + + printf " { %4s, %11s, %11s, (int16_t) %6s, %10s },\n" $opcode $dst $src $offset $immedi + done + +cat << END +}; + +END +} + +generate_head +process_relocations +process_section +generate_tail + diff --git a/src/event/quic/bpf/makefile b/src/event/quic/bpf/makefile new file mode 100644 index 000000000..b4d758f33 --- /dev/null +++ b/src/event/quic/bpf/makefile @@ -0,0 +1,30 @@ +CFLAGS=-O2 -Wall + +LICENSE=BSD + +PROGNAME=ngx_quic_reuseport_helper +RESULT=ngx_event_quic_bpf_code +DEST=../$(RESULT).c + +all: $(RESULT) + +$(RESULT): $(PROGNAME).o + LICENSE=$(LICENSE) PROGNAME=$(PROGNAME) bash ./bpfgen.sh $< > $@ + +DEFS=-DPROGNAME=\"$(PROGNAME)\" \ + -DLICENSE_$(LICENSE) \ + -DLICENSE=\"$(LICENSE)\" \ + +$(PROGNAME).o: $(PROGNAME).c + clang $(CFLAGS) $(DEFS) -target bpf -c $< -o $@ + +install: $(RESULT) + cp $(RESULT) $(DEST) + +clean: + @rm -f $(RESULT) *.o + +debug: $(PROGNAME).o + llvm-objdump -S -no-show-raw-insn $< + +.DELETE_ON_ERROR: diff --git a/src/event/quic/bpf/ngx_quic_reuseport_helper.c b/src/event/quic/bpf/ngx_quic_reuseport_helper.c new file mode 100644 index 000000000..05919aaa9 --- /dev/null +++ b/src/event/quic/bpf/ngx_quic_reuseport_helper.c @@ -0,0 +1,140 @@ +#include <errno.h> +#include <linux/string.h> +#include <linux/udp.h> +#include <linux/bpf.h> +/* + * the bpf_helpers.h is not included into linux-headers, only available + * with kernel sources in "tools/lib/bpf/bpf_helpers.h" or in libbpf. + */ +#include <bpf/bpf_helpers.h> + + +#if !defined(SEC) +#define SEC(NAME) __attribute__((section(NAME), used)) +#endif + + +#if defined(LICENSE_GPL) + +/* + * To see debug: + * + * echo 1 > /sys/kernel/debug/tracing/events/bpf_trace/enable + * cat /sys/kernel/debug/tracing/trace_pipe + * echo 0 > /sys/kernel/debug/tracing/events/bpf_trace/enable + */ + +#define debugmsg(fmt, ...) \ +do { \ + char __buf[] = fmt; \ + bpf_trace_printk(__buf, sizeof(__buf), ##__VA_ARGS__); \ +} while (0) + +#else + +#define debugmsg(fmt, ...) + +#endif + +char _license[] SEC("license") = LICENSE; + +/*****************************************************************************/ + +#define NGX_QUIC_PKT_LONG 0x80 /* header form */ +#define NGX_QUIC_SERVER_CID_LEN 20 + + +#define advance_data(nbytes) \ + offset += nbytes; \ + if (start + offset > end) { \ + debugmsg("cannot read %ld bytes at offset %ld", nbytes, offset); \ + goto failed; \ + } \ + data = start + offset - 1; + + +#define ngx_quic_parse_uint64(p) \ + (((__u64)(p)[0] << 56) | \ + ((__u64)(p)[1] << 48) | \ + ((__u64)(p)[2] << 40) | \ + ((__u64)(p)[3] << 32) | \ + (p)[4] << 24 | \ + (p)[5] << 16 | \ + (p)[6] << 8 | \ + (p)[7]) + +/* + * actual map object is created by the "bpf" system call, + * all pointers to this variable are replaced by the bpf loader + */ +struct bpf_map_def SEC("maps") ngx_quic_sockmap; + + +SEC(PROGNAME) +int ngx_quic_select_socket_by_dcid(struct sk_reuseport_md *ctx) +{ + int rc; + __u64 key; + size_t len, offset; + unsigned char *start, *end, *data, *dcid; + + start = ctx->data; + end = (unsigned char *) ctx->data_end; + offset = 0; + + advance_data(sizeof(struct udphdr)); /* skip UDP header */ + advance_data(1); /* QUIC flags */ + + if (data[0] & NGX_QUIC_PKT_LONG) { + + advance_data(4); /* skip QUIC version */ + len = data[0]; /* read DCID length */ + + if (len < 8) { + /* it's useless to search for key in such short DCID */ + return SK_PASS; + } + + advance_data(1); /* skip DCID len */ + + } else { + len = NGX_QUIC_SERVER_CID_LEN; + } + + dcid = &data[1]; + advance_data(len); /* we expect the packet to have full DCID */ + + /* make verifier happy */ + if (dcid + sizeof(__u64) > end) { + goto failed; + } + + key = ngx_quic_parse_uint64(dcid); + + rc = bpf_sk_select_reuseport(ctx, &ngx_quic_sockmap, &key, 0); + + switch (rc) { + case 0: + debugmsg("nginx quic socket selected by key 0x%x", key); + return SK_PASS; + + /* kernel returns positive error numbers, errno.h defines positive */ + case -ENOENT: + debugmsg("nginx quic default route for key 0x%x", key); + /* let the default reuseport logic decide which socket to choose */ + return SK_PASS; + + default: + debugmsg("nginx quic bpf_sk_select_reuseport err: %d key 0x%x", + rc, key); + goto failed; + } + +failed: + /* + * SK_DROP will generate ICMP, but we may want to process "invalid" packet + * in userspace quic to investigate further and finally react properly + * (maybe ignore, maybe send something in response or close connection) + */ + return SK_PASS; +} |
