From: Stefano Brivio <sbrivio@redhat.com>
To: David Gibson <david@gibson.dropbear.id.au>
Cc: passt-dev@passt.top
Subject: Re: [PATCH 5/5] udp: Handle errors on UDP sockets
Date: Tue, 16 Jul 2024 09:25:56 +0200 [thread overview]
Message-ID: <20240716092556.432cf763@elisabeth> (raw)
In-Reply-To: <20240716052936.1204164-6-david@gibson.dropbear.id.au>
Nits only (the rest of the series looks good to me):
On Tue, 16 Jul 2024 15:29:36 +1000
David Gibson <david@gibson.dropbear.id.au> wrote:
> Currently we ignore all events other than EPOLLIN on UDP sockets. This
> means that if we ever receive an EPOLLERR event, we'll enter an infinite
> loop on epoll, because we'll never do anything to clear the error.
>
> Luckily that doesn't seem to have happened in practice, but it's certainly
> fragile. Furthermore changes in how we handle UDP sockets with the flow
> table mean we will start receiving error events.
>
> Add handling of EPOLLERR events. For now we just read the error from the
> error queue (thereby clearing the error state) and print a debug message.
> We can add more substantial handling of specific events in future if we
> want to.
>
> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> ---
> udp.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> util.c | 29 +++++++++++++++++++++++++++++
> util.h | 3 +++
> 3 files changed, 91 insertions(+)
>
> diff --git a/udp.c b/udp.c
> index fbf3ce73..d761717a 100644
> --- a/udp.c
> +++ b/udp.c
> @@ -110,6 +110,7 @@
> #include <sys/socket.h>
> #include <sys/uio.h>
> #include <time.h>
> +#include <linux/errqueue.h>
>
> #include "checksum.h"
> #include "util.h"
> @@ -728,6 +729,59 @@ static void udp_tap_prepare(const struct ctx *c, const struct mmsghdr *mmh,
> (*tap_iov)[UDP_IOV_PAYLOAD].iov_len = l4len;
> }
>
> +/**
> + * udp_sock_recverr() - Receive and clear an error from a socket
> + * @s: Socket to receive from
> + *
> + * Return: true if an error received and processed, false if no more errors
It took me a while to realise there's an implied "was" between "error"
and "received". Maybe:
* Return: true: errors received and processed, false: no more errors
?
> + *
> + * #syscalls recvmsg
> + */
> +static bool udp_sock_recverr(int s)
> +{
> + const struct sock_extended_err *ee;
> + const struct cmsghdr *hdr;
> + char buf[CMSG_SPACE(sizeof(*ee))];
> + struct msghdr mh = {
> + .msg_name = NULL,
> + .msg_namelen = 0,
> + .msg_iov = NULL,
> + .msg_iovlen = 0,
> + .msg_control = buf,
> + .msg_controllen = sizeof(buf),
> + };
> + ssize_t rc;
> +
> + rc = recvmsg(s, &mh, MSG_ERRQUEUE);
> + if (rc < 0) {
> + if (errno != EAGAIN)
There are some existing cases where we forgot about this, but EAGAIN
and EWOULDBLOCK are not guaranteed to be the same value, and as
recvmsg(2) says:
POSIX.1 allows either error to be returned for this case, and does
not require these constants to have the same value, so a portable
application should check for both possibilities.
> + err_perror("Failed to read error queue");
> + return false;
> + }
> +
> + if (!(mh.msg_flags & MSG_ERRQUEUE)) {
> + err("Missing MSG_ERRQUEUE flag reading error queue");
> + return false;
> + }
> +
> + hdr = CMSG_FIRSTHDR(&mh);
> + if (!((hdr->cmsg_level == IPPROTO_IP &&
> + hdr->cmsg_type == IP_RECVERR) ||
> + (hdr->cmsg_level == IPPROTO_IPV6 &&
> + hdr->cmsg_type == IPV6_RECVERR))) {
> + err("Unexpected cmsg reading error queue");
> + return false;
> + }
> +
> + ee = (const struct sock_extended_err *)CMSG_DATA(hdr);
> +
> + /* TODO: When possible propagate and otherwise handle errors */
> + debug("%s error on UDP socket %i: %s",
> + str_ee_origin(ee), s, strerror(ee->ee_errno));
> +
> + return true;
> +}
> +
> /**
> * udp_sock_recv() - Receive datagrams from a socket
> * @c: Execution context
> @@ -751,6 +805,11 @@ static int udp_sock_recv(const struct ctx *c, int s, uint32_t events,
>
> ASSERT(!c->no_udp);
>
> + /* Clear any errors first */
> + if (events & EPOLLERR)
For consistency: curly brackets around multi-line block (or simply
change that to "while (udp_sock_recverr(s));"? No preference from me).
> + while (udp_sock_recverr(s))
> + ;
> +
> if (!(events & EPOLLIN))
> return 0;
>
> diff --git a/util.c b/util.c
> index 428847d2..70de1e16 100644
> --- a/util.c
> +++ b/util.c
> @@ -25,6 +25,7 @@
> #include <time.h>
> #include <errno.h>
> #include <stdbool.h>
> +#include <linux/errqueue.h>
>
> #include "util.h"
> #include "iov.h"
> @@ -97,6 +98,14 @@ static int sock_l4_sa(const struct ctx *c, enum epoll_type type,
> if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &y, sizeof(y)))
> debug("Failed to set SO_REUSEADDR on socket %i", fd);
>
> + if (proto == IPPROTO_UDP) {
> + int level = af == AF_INET ? IPPROTO_IP : IPPROTO_IPV6;
> + int opt = af == AF_INET ? IP_RECVERR : IPV6_RECVERR;
> +
> + if (setsockopt(fd, level, opt, &y, sizeof(y)))
> + die("Failed to set RECVERR on socket %i", fd);
There's also a die_perror() which would be practical here.
> + }
> +
> if (ifname && *ifname) {
> /* Supported since kernel version 5.7, commit c427bfec18f2
> * ("net: core: enable SO_BINDTODEVICE for non-root users"). If
> @@ -654,3 +663,23 @@ const char *sockaddr_ntop(const void *sa, char *dst, socklen_t size)
>
> return dst;
> }
> +
> +/** str_ee_origin() - Convert socket extended error origin to a string
> + * @ee: Socket extended error structure
> + *
> + * Return: Static string describing error origin
> + */
> +const char *str_ee_origin(const struct sock_extended_err *ee)
> +{
> + const char *const desc[] = {
> + [SO_EE_ORIGIN_NONE] = "<no origin>",
> + [SO_EE_ORIGIN_LOCAL] = "Local",
> + [SO_EE_ORIGIN_ICMP] = "ICMP",
> + [SO_EE_ORIGIN_ICMP6] = "ICMPv6",
> + };
> +
> + if (ee->ee_origin < ARRAY_SIZE(desc))
> + return desc[ee->ee_origin];
> +
> + return "<invalid>";
> +}
> diff --git a/util.h b/util.h
> index d0150396..1d479ddf 100644
> --- a/util.h
> +++ b/util.h
> @@ -194,7 +194,10 @@ static inline const char *af_name(sa_family_t af)
>
> #define SOCKADDR_STRLEN MAX(SOCKADDR_INET_STRLEN, SOCKADDR_INET6_STRLEN)
>
> +struct sock_extended_err;
> +
> const char *sockaddr_ntop(const void *sa, char *dst, socklen_t size);
> +const char *str_ee_origin(const struct sock_extended_err *ee);
>
> /**
> * mod_sub() - Modular arithmetic subtraction
--
Stefano
next prev parent reply other threads:[~2024-07-16 7:26 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-07-16 5:29 [PATCH 0/5] Handle error events on UDP sockets David Gibson
2024-07-16 5:29 ` [PATCH 1/5] conf: Don't configure port forwarding for a disabled protocol David Gibson
2024-07-16 5:29 ` [PATCH 2/5] udp: Make udp_sock_recv static David Gibson
2024-07-16 5:29 ` [PATCH 3/5] udp, tcp: Tweak handling of no_udp and no_tcp flags David Gibson
2024-07-16 5:29 ` [PATCH 4/5] util: Add AF_UNSPEC support to sockaddr_ntop() David Gibson
2024-07-16 5:29 ` [PATCH 5/5] udp: Handle errors on UDP sockets David Gibson
2024-07-16 7:25 ` Stefano Brivio [this message]
2024-07-17 0:04 ` David Gibson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240716092556.432cf763@elisabeth \
--to=sbrivio@redhat.com \
--cc=david@gibson.dropbear.id.au \
--cc=passt-dev@passt.top \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://passt.top/passt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).