From dfb93e636657c59e89fc62002ac26dee1795b794 Mon Sep 17 00:00:00 2001 From: Andrew Clayton Date: Fri, 7 Mar 2025 00:49:45 +0000 Subject: Initial commit Signed-off-by: Andrew Clayton --- .gitignore | 2 + Makefile | 7 +++ outfile.jira | 129 +++++++++++++++++++++++++++++++++++++++++++++++ sptr-diag-tr.png | Bin 0 -> 66163 bytes sptr-diag.ora | Bin 0 -> 1308496 bytes sptr-diag.png | Bin 0 -> 93319 bytes sptr.c | 66 ++++++++++++++++++++++++ sptr.md | 149 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 353 insertions(+) create mode 100644 .gitignore create mode 100644 Makefile create mode 100644 outfile.jira create mode 100644 sptr-diag-tr.png create mode 100644 sptr-diag.ora create mode 100644 sptr-diag.png create mode 100644 sptr.c create mode 100644 sptr.md diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b754c96 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +sptr.html +sptr diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..c5bbd2e --- /dev/null +++ b/Makefile @@ -0,0 +1,7 @@ +all: sptr html + +sptr: sptr.c + gcc -Wall -Wextra -g -o $@ $< + +html: sptr.md + pandoc $< --metadata pagetitle=nginx-unit-serialised-pointers -s --highlight-style tango -o sptr.html diff --git a/outfile.jira b/outfile.jira new file mode 100644 index 0000000..0058439 --- /dev/null +++ b/outfile.jira @@ -0,0 +1,129 @@ +{code:{=html}} + +{code} +h5. {anchor:digital-domainnet}[digital-domain.net|https://digital-domain.net/] +h2. {anchor:nginx-unit-serialised-pointers}NGINX Unit Serialised Pointers +In [NGINX Unit|https://unit.nginx.org/] we make use of what we call _serialised pointers_. In simplest terms these are nothing more than _offsets_ into memory. However, the way they are implemented is somewhat non-obvious. + +These are needed when we want to share memory \(containing pointers) via Inter Process Communications methods. + +This text will attempt to explain them. + +In Unit it is common to have a chunk of memory that starts with a _structure_ then has some some data after it, such as a bunch of, possibly nul terminated, strings. + +Each of these strings would have an associated {{nxt_unit_sptr_t}} structure member which is defined like + +{code:c} +union nxt_unit_sptr_u { + uint8_t base[1]; + uint32_t offset; +}; +{code} +{{.base[1\]}} is only used to get the address of this union, the array decays to a pointer, so {{.base}} is the address of the union. + +{{.offset}} is then an offset relative from the {{.base}} address to the start of the data in question. + +\(This could have been implemented using a simple integer type) + +The following example program and diagram will hopefully make things clear + +{code:c} +#include +#include +#include +#include + +union sptr_u { + uint8_t base[1]; + uint32_t offset; +}; +typedef union sptr_u sptr_t; + +struct s { + uint8_t name1_len; + uint8_t name2_len; + uint8_t name3_len; + + sptr_t name1; + sptr_t name2; + sptr_t name3; +}; + +static void sptr_set(sptr_t *sptr, void *ptr) +{ + sptr->offset = (uint8_t *)ptr - sptr->base; +} + +static void *sptr_get(sptr_t *sptr) +{ + return sptr->base + sptr->offset; +} + +int main(void) +{ + const char * const names[] = { "toor", "foobar", "baz" }; + struct s *s = malloc(sizeof(struct s) + + strlen(names[0]) + strlen(names[1]) + strlen(names[2]) + + 3); + char *p = (char *)(s) + sizeof(struct s); + + sptr_set(&s->name1, p); + p = stpcpy(p, names[0]); + + p++; + sptr_set(&s->name2, p); + p = stpcpy(p, names[1]); + + p++; + sptr_set(&s->name3, p); + p = stpcpy(p, names[2]); + + printf("name1 : %s\n", (const char *)sptr_get(&s->name1)); + printf("name2 : %s\n", (const char *)sptr_get(&s->name2)); + printf("name3 : %s\n", (const char *)sptr_get(&s->name3)); + + free(s); + + exit(EXIT_SUCCESS); +} +{code} +The above program results in something like + +!sptr-diag-tr.png|alt=Fig 1. structure memory layout! + +[pahole(1)|https://www.kernel.org/doc/ols/2007/ols2007v2-pages-35-44.pdf] shows + +{noformat} +union sptr_u { + uint8_t base[1]; /* 0 1 */ + uint32_t offset; /* 0 4 */ +}; +struct s { + uint8_t name1_len; /* 0 1 */ + uint8_t name2_len; /* 1 1 */ + uint8_t name3_len; /* 2 1 */ + + /* XXX 1 byte hole, try to pack */ + + sptr_t name1; /* 4 4 */ + sptr_t name2; /* 8 4 */ + sptr_t name3; /* 12 4 */ + + /* size: 16, cachelines: 1, members: 6 */ + /* sum members: 15, holes: 1, sum holes: 1 */ + /* last cacheline: 16 bytes */ +};{noformat} +So we have three strings; "toor", "foobar" & "baz" + +_toor_ starts at the address of _s->name1_ + _12_, 12 is {{sizeof(sptr_t) * 3}}. + +_foobar_ start at the address of _s->name2_ + _13_, 13 is {{sizeof(sptr_t) * 2}} + +* the length of "toor\0" \(5). + +_baz_ starts at the address of _s->name3_ + _16_, 16 is {{sizeof(sptr_t)}} + the lengths of "toor\0" & "foobar\0" \(12). + +---- +\[Andrew Clayton\]\(mailto:Andrew Clayton [mailto:andrew@digital-domain.net]), Apr 8th 2024 diff --git a/sptr-diag-tr.png b/sptr-diag-tr.png new file mode 100644 index 0000000..37ab6f8 Binary files /dev/null and b/sptr-diag-tr.png differ diff --git a/sptr-diag.ora b/sptr-diag.ora new file mode 100644 index 0000000..51d0135 Binary files /dev/null and b/sptr-diag.ora differ diff --git a/sptr-diag.png b/sptr-diag.png new file mode 100644 index 0000000..0077707 Binary files /dev/null and b/sptr-diag.png differ diff --git a/sptr.c b/sptr.c new file mode 100644 index 0000000..c275b8a --- /dev/null +++ b/sptr.c @@ -0,0 +1,66 @@ +#include +#include +#include +#include + +union sptr_u { + uint8_t base[1]; + uint32_t offset; +}; +typedef union sptr_u sptr_t; + +struct s { + uint8_t name1_len; + uint8_t name2_len; + uint8_t name3_len; + + sptr_t name1; + sptr_t name2; + sptr_t name3; +}; + +static void sptr_set(sptr_t *sptr, void *ptr) +{ + sptr->offset = (uint8_t *)ptr - sptr->base; + + printf("sptr->base : %p\n", sptr->base); + printf("sptr->offset : %u\n", sptr->offset); +} + +static void *sptr_get(sptr_t *sptr) +{ + return sptr->base + sptr->offset; +} + +int main(void) +{ + static const char * const names[] = { "toor", "foobar", "baz" }; + struct s *s = malloc(sizeof(struct s) + + strlen(names[0]) + strlen(names[1]) + + strlen(names[2]) + 3); + char *p = (char *)(s) + sizeof(struct s); + + printf("s : %p\n", s); + + s->name1_len = strlen(names[0]); + sptr_set(&s->name1, p); + p = stpcpy(p, names[0]); + + p++; + s->name2_len = strlen(names[1]); + sptr_set(&s->name2, p); + p = stpcpy(p, names[1]); + + p++; + s->name3_len = strlen(names[2]); + sptr_set(&s->name3, p); + p = stpcpy(p, names[2]); + + printf("name1 : %s\n", (const char *)sptr_get(&s->name1)); + printf("name2 : %s\n", (const char *)sptr_get(&s->name2)); + printf("name3 : %s\n", (const char *)sptr_get(&s->name3)); + + free(s); + + exit(EXIT_SUCCESS); +} diff --git a/sptr.md b/sptr.md new file mode 100644 index 0000000..9050953 --- /dev/null +++ b/sptr.md @@ -0,0 +1,149 @@ +``` {=html} + +``` + +##### [digital-domain.net](https://digital-domain.net/) + +## NGINX Unit Serialised Pointers + +In [NGINX Unit](https://unit.nginx.org/) we make use of what we call +_serialised pointers_. In simplest terms these are nothing more than _offsets_ +into memory. However, the way they are implemented is somewhat non-obvious. + +These are needed when we want to share memory (containing pointers) via +Inter Process Communications methods. + +This text will attempt to explain them. + +In Unit it is common to have a chunk of memory that starts with a _structure_ +then has some some data after it, such as a bunch of, possibly nul terminated, +strings. + +Each of these strings would have an associated `nxt_unit_sptr_t` structure +member which is defined like + +```c +union nxt_unit_sptr_u { + uint8_t base[1]; + uint32_t offset; +}; +``` + +`.base[1]` is only used to get the address of this union, the array decays to +a pointer, so `.base` is the address of the union. + +**This is really the key to the whole thing, we never set (or retrieve) +`.base`, it merely exists to provide the address of the union.** + +`.offset` is then an offset relative from the `.base` address to the start of +the data in question. + +(This could have been implemented using a simple integer type) + +The following example program and diagram will hopefully make things clear + +```c +#include +#include +#include +#include + +union sptr_u { + uint8_t base[1]; + uint32_t offset; +}; +typedef union sptr_u sptr_t; + +struct s { + uint8_t name1_len; + uint8_t name2_len; + uint8_t name3_len; + + sptr_t name1; + sptr_t name2; + sptr_t name3; +}; + +static void sptr_set(sptr_t *sptr, void *ptr) +{ + sptr->offset = (uint8_t *)ptr - sptr->base; +} + +static void *sptr_get(sptr_t *sptr) +{ + return sptr->base + sptr->offset; +} + +int main(void) +{ + static const char * const names[] = { "toor", "foobar", "baz" }; + struct s *s = malloc(sizeof(struct s) + + strlen(names[0]) + strlen(names[1]) + + strlen(names[2]) + 3); + char *p = (char *)(s) + sizeof(struct s); + + sptr_set(&s->name1, p); + p = stpcpy(p, names[0]); + + p++; + sptr_set(&s->name2, p); + p = stpcpy(p, names[1]); + + p++; + sptr_set(&s->name3, p); + p = stpcpy(p, names[2]); + + printf("name1 : %s\n", (const char *)sptr_get(&s->name1)); + printf("name2 : %s\n", (const char *)sptr_get(&s->name2)); + printf("name3 : %s\n", (const char *)sptr_get(&s->name3)); + + free(s); + + exit(EXIT_SUCCESS); +} +``` + +The above program results in something like + +![Fig 1. structure memory layout](sptr-diag-tr.png) + +[pahole(1)](https://www.kernel.org/doc/ols/2007/ols2007v2-pages-35-44.pdf) +shows + +``` +union sptr_u { + uint8_t base[1]; /* 0 1 */ + uint32_t offset; /* 0 4 */ +}; +struct s { + uint8_t name1_len; /* 0 1 */ + uint8_t name2_len; /* 1 1 */ + uint8_t name3_len; /* 2 1 */ + + /* XXX 1 byte hole, try to pack */ + + sptr_t name1; /* 4 4 */ + sptr_t name2; /* 8 4 */ + sptr_t name3; /* 12 4 */ + + /* size: 16, cachelines: 1, members: 6 */ + /* sum members: 15, holes: 1, sum holes: 1 */ + /* last cacheline: 16 bytes */ +}; +``` + +So we have three strings; "toor", "foobar" & "baz" + +_toor_ starts at the address of _s->name1_ + _12_, 12 is `sizeof(sptr_t) * 3`. + +_foobar_ start at the address of _s->name2_ + _13_, 13 is `sizeof(sptr_t) * 2` ++ the length of "toor\0" (5). + +_baz_ starts at the address of _s->name3_ + _16_, 16 is `sizeof(sptr_t)` + the +lengths of "toor\0" & "foobar\0" (12). + +--- +[Andrew Clayton](mailto:Andrew Clayton ), +Apr 8th 2024 -- cgit