summaryrefslogtreecommitdiffhomepage
path: root/src/event/quic/bpf
diff options
context:
space:
mode:
authorVladimir Homutov <vl@nginx.com>2020-12-25 15:01:15 +0300
committerVladimir Homutov <vl@nginx.com>2020-12-25 15:01:15 +0300
commitc4f31ccca174ff617a594b49ef255354e979b72d (patch)
treed2ade5c91b17e009585c2b329823708e1ce91f76 /src/event/quic/bpf
parentb20b58ca7d1323664c5e8f91231ade0edf0d0f31 (diff)
downloadnginx-c4f31ccca174ff617a594b49ef255354e979b72d.tar.gz
nginx-c4f31ccca174ff617a594b49ef255354e979b72d.tar.bz2
QUIC: ngx_quic_bpf module.
The quic kernel bpf helper inspects packet payload for DCID, extracts key and routes the packet into socket matching the key. Due to reuseport feature, each worker owns a personal socket, which is identified by the same key, used to create DCID. BPF objects are locked in RAM and are subject to RLIMIT_MEMLOCK. The "ulimit -l" command may be used to setup proper limits, if maps cannot be created with EPERM or updated with ETOOLONG.
Diffstat (limited to 'src/event/quic/bpf')
-rw-r--r--src/event/quic/bpf/bpfgen.sh113
-rw-r--r--src/event/quic/bpf/makefile30
-rw-r--r--src/event/quic/bpf/ngx_quic_reuseport_helper.c140
3 files changed, 283 insertions, 0 deletions
diff --git a/src/event/quic/bpf/bpfgen.sh b/src/event/quic/bpf/bpfgen.sh
new file mode 100644
index 000000000..78cbdac4d
--- /dev/null
+++ b/src/event/quic/bpf/bpfgen.sh
@@ -0,0 +1,113 @@
+#!/bin/bash
+
+export LANG=C
+
+set -e
+
+if [ $# -lt 1 ]; then
+ echo "Usage: PROGNAME=foo LICENSE=bar $0 <bpf object file>"
+ exit 1
+fi
+
+
+self=$0
+filename=$1
+funcname=$PROGNAME
+
+generate_head()
+{
+ cat << END
+/* AUTO-GENERATED, DO NOT EDIT. */
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "ngx_bpf.h"
+
+
+END
+}
+
+generate_tail()
+{
+ cat << END
+
+ngx_bpf_program_t $PROGNAME = {
+ .relocs = bpf_reloc_prog_$funcname,
+ .nrelocs = sizeof(bpf_reloc_prog_$funcname)
+ / sizeof(bpf_reloc_prog_$funcname[0]),
+ .ins = bpf_insn_prog_$funcname,
+ .nins = sizeof(bpf_insn_prog_$funcname)
+ / sizeof(bpf_insn_prog_$funcname[0]),
+ .license = "$LICENSE",
+ .type = BPF_PROG_TYPE_SK_REUSEPORT,
+};
+
+END
+}
+
+process_relocations()
+{
+ echo "static ngx_bpf_reloc_t bpf_reloc_prog_$funcname[] = {"
+
+ objdump -r $filename | awk '{
+
+ if (enabled && $NF > 0) {
+ off = strtonum(sprintf("0x%s", $1));
+ name = $3;
+
+ printf(" { \"%s\", %d },\n", name, off/8);
+ }
+
+ if ($1 == "OFFSET") {
+ enabled=1;
+ }
+}'
+ echo "};"
+ echo
+}
+
+process_section()
+{
+ echo "static struct bpf_insn bpf_insn_prog_$funcname[] = {"
+ echo " /* opcode dst src offset imm */"
+
+ section_info=$(objdump -h $filename --section=$funcname | grep "1 $funcname")
+
+ # dd doesn't know hex
+ length=$(printf "%d" 0x$(echo $section_info | cut -d ' ' -f3))
+ offset=$(printf "%d" 0x$(echo $section_info | cut -d ' ' -f6))
+
+ for ins in $(dd if="$filename" bs=1 count=$length skip=$offset status=none | xxd -p -c 8)
+ do
+ opcode=0x${ins:0:2}
+ srcdst=0x${ins:2:2}
+
+ # bytes are dumped in LE order
+ offset=0x${ins:6:2}${ins:4:2} # short
+ immedi=0x${ins:14:2}${ins:12:2}${ins:10:2}${ins:8:2} # int
+
+ dst="$(($srcdst & 0xF))"
+ src="$(($srcdst & 0xF0))"
+ src="$(($src >> 4))"
+
+ opcode=$(printf "0x%x" $opcode)
+ dst=$(printf "BPF_REG_%d" $dst)
+ src=$(printf "BPF_REG_%d" $src)
+ offset=$(printf "%d" $offset)
+ immedi=$(printf "0x%x" $immedi)
+
+ printf " { %4s, %11s, %11s, (int16_t) %6s, %10s },\n" $opcode $dst $src $offset $immedi
+ done
+
+cat << END
+};
+
+END
+}
+
+generate_head
+process_relocations
+process_section
+generate_tail
+
diff --git a/src/event/quic/bpf/makefile b/src/event/quic/bpf/makefile
new file mode 100644
index 000000000..b4d758f33
--- /dev/null
+++ b/src/event/quic/bpf/makefile
@@ -0,0 +1,30 @@
+CFLAGS=-O2 -Wall
+
+LICENSE=BSD
+
+PROGNAME=ngx_quic_reuseport_helper
+RESULT=ngx_event_quic_bpf_code
+DEST=../$(RESULT).c
+
+all: $(RESULT)
+
+$(RESULT): $(PROGNAME).o
+ LICENSE=$(LICENSE) PROGNAME=$(PROGNAME) bash ./bpfgen.sh $< > $@
+
+DEFS=-DPROGNAME=\"$(PROGNAME)\" \
+ -DLICENSE_$(LICENSE) \
+ -DLICENSE=\"$(LICENSE)\" \
+
+$(PROGNAME).o: $(PROGNAME).c
+ clang $(CFLAGS) $(DEFS) -target bpf -c $< -o $@
+
+install: $(RESULT)
+ cp $(RESULT) $(DEST)
+
+clean:
+ @rm -f $(RESULT) *.o
+
+debug: $(PROGNAME).o
+ llvm-objdump -S -no-show-raw-insn $<
+
+.DELETE_ON_ERROR:
diff --git a/src/event/quic/bpf/ngx_quic_reuseport_helper.c b/src/event/quic/bpf/ngx_quic_reuseport_helper.c
new file mode 100644
index 000000000..05919aaa9
--- /dev/null
+++ b/src/event/quic/bpf/ngx_quic_reuseport_helper.c
@@ -0,0 +1,140 @@
+#include <errno.h>
+#include <linux/string.h>
+#include <linux/udp.h>
+#include <linux/bpf.h>
+/*
+ * the bpf_helpers.h is not included into linux-headers, only available
+ * with kernel sources in "tools/lib/bpf/bpf_helpers.h" or in libbpf.
+ */
+#include <bpf/bpf_helpers.h>
+
+
+#if !defined(SEC)
+#define SEC(NAME) __attribute__((section(NAME), used))
+#endif
+
+
+#if defined(LICENSE_GPL)
+
+/*
+ * To see debug:
+ *
+ * echo 1 > /sys/kernel/debug/tracing/events/bpf_trace/enable
+ * cat /sys/kernel/debug/tracing/trace_pipe
+ * echo 0 > /sys/kernel/debug/tracing/events/bpf_trace/enable
+ */
+
+#define debugmsg(fmt, ...) \
+do { \
+ char __buf[] = fmt; \
+ bpf_trace_printk(__buf, sizeof(__buf), ##__VA_ARGS__); \
+} while (0)
+
+#else
+
+#define debugmsg(fmt, ...)
+
+#endif
+
+char _license[] SEC("license") = LICENSE;
+
+/*****************************************************************************/
+
+#define NGX_QUIC_PKT_LONG 0x80 /* header form */
+#define NGX_QUIC_SERVER_CID_LEN 20
+
+
+#define advance_data(nbytes) \
+ offset += nbytes; \
+ if (start + offset > end) { \
+ debugmsg("cannot read %ld bytes at offset %ld", nbytes, offset); \
+ goto failed; \
+ } \
+ data = start + offset - 1;
+
+
+#define ngx_quic_parse_uint64(p) \
+ (((__u64)(p)[0] << 56) | \
+ ((__u64)(p)[1] << 48) | \
+ ((__u64)(p)[2] << 40) | \
+ ((__u64)(p)[3] << 32) | \
+ (p)[4] << 24 | \
+ (p)[5] << 16 | \
+ (p)[6] << 8 | \
+ (p)[7])
+
+/*
+ * actual map object is created by the "bpf" system call,
+ * all pointers to this variable are replaced by the bpf loader
+ */
+struct bpf_map_def SEC("maps") ngx_quic_sockmap;
+
+
+SEC(PROGNAME)
+int ngx_quic_select_socket_by_dcid(struct sk_reuseport_md *ctx)
+{
+ int rc;
+ __u64 key;
+ size_t len, offset;
+ unsigned char *start, *end, *data, *dcid;
+
+ start = ctx->data;
+ end = (unsigned char *) ctx->data_end;
+ offset = 0;
+
+ advance_data(sizeof(struct udphdr)); /* skip UDP header */
+ advance_data(1); /* QUIC flags */
+
+ if (data[0] & NGX_QUIC_PKT_LONG) {
+
+ advance_data(4); /* skip QUIC version */
+ len = data[0]; /* read DCID length */
+
+ if (len < 8) {
+ /* it's useless to search for key in such short DCID */
+ return SK_PASS;
+ }
+
+ advance_data(1); /* skip DCID len */
+
+ } else {
+ len = NGX_QUIC_SERVER_CID_LEN;
+ }
+
+ dcid = &data[1];
+ advance_data(len); /* we expect the packet to have full DCID */
+
+ /* make verifier happy */
+ if (dcid + sizeof(__u64) > end) {
+ goto failed;
+ }
+
+ key = ngx_quic_parse_uint64(dcid);
+
+ rc = bpf_sk_select_reuseport(ctx, &ngx_quic_sockmap, &key, 0);
+
+ switch (rc) {
+ case 0:
+ debugmsg("nginx quic socket selected by key 0x%x", key);
+ return SK_PASS;
+
+ /* kernel returns positive error numbers, errno.h defines positive */
+ case -ENOENT:
+ debugmsg("nginx quic default route for key 0x%x", key);
+ /* let the default reuseport logic decide which socket to choose */
+ return SK_PASS;
+
+ default:
+ debugmsg("nginx quic bpf_sk_select_reuseport err: %d key 0x%x",
+ rc, key);
+ goto failed;
+ }
+
+failed:
+ /*
+ * SK_DROP will generate ICMP, but we may want to process "invalid" packet
+ * in userspace quic to investigate further and finally react properly
+ * (maybe ignore, maybe send something in response or close connection)
+ */
+ return SK_PASS;
+}