From: Stefano Brivio <sbrivio@redhat.com>
To: Jon Maloy <jmaloy@redhat.com>
Cc: lvivier@redhat.com, dgibson@redhat.com, passt-dev@passt.top
Subject: Re: tcp.c: leverage MSG_PEEK with offset kernel capability when available
Date: Wed, 6 Dec 2023 16:08:08 +0100 [thread overview]
Message-ID: <20231206160808.3d312733@elisabeth> (raw)
In-Reply-To: <20231206155940.51047ac1@elisabeth>
[-- Attachment #1: Type: text/plain, Size: 543 bytes --]
On Wed, 6 Dec 2023 15:59:40 +0100
Stefano Brivio <sbrivio@redhat.com> wrote:
> [...]
>
> but on a kernel with your patch, I get ENOTCONN on recvmsg(). If I
> replace that by a simple recv():
>
> sendto(5, "ab", 2, 0, NULL, 0) = 2
> recvfrom(6, "ab", 10, 0, NULL, NULL) = 2
>
> ...so I don't think it's a fundamental issue with this approach, rather
> something with your patch, but I'm not yet sure what. :)
Oops, my bad, I got the order of fields in struct msghdr wrong. New
version attached, this one works.
--
Stefano
[-- Attachment #2: pkt_selfie.c --]
[-- Type: text/x-c++src, Size: 3381 bytes --]
#define _GNU_SOURCE
#include <sched.h>
#include <errno.h>
#include <signal.h>
#include <unistd.h>
#include <stdlib.h>
#include <stddef.h>
#include <stdio.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <net/if.h>
#include <netinet/tcp.h>
#include <linux/netlink.h>
#include <linux/rtnetlink.h>
/* ===> from passt's Makefile and code... */
#define RLIMIT_STACK_VAL 8192
#define NS_FN_STACK_SIZE (RLIMIT_STACK_VAL * 1024 / 8)
int do_clone(int (*fn)(void *), char *stack_area, size_t stack_size, int flags,
void *arg)
{
#ifdef __ia64__
return __clone2(fn, stack_area + stack_size / 2, stack_size / 2,
flags, arg);
#else
return clone(fn, stack_area + stack_size / 2, flags, arg);
#endif
}
static int nl_sock;
static int nl_sock_init_do(void *arg)
{
struct sockaddr_nl addr = { .nl_family = AF_NETLINK, };
int *s = &nl_sock;
#ifdef NETLINK_GET_STRICT_CHK
int y = 1;
#endif
*s = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE);
if (*s < 0 || bind(*s, (struct sockaddr *)&addr, sizeof(addr))) {
*s = -1;
return 0;
}
return 0;
}
/**
* nl_send() - Prepare and send netlink request
* @s: Netlink socket
* @req: Request (will fill netlink header)
* @type: Request type
* @flags: Extra request flags (NLM_F_REQUEST and NLM_F_ACK assumed)
* @len: Request length
*
* Return: sequence number of request on success, terminates on error
*/
static uint32_t nl_send(int s, void *req, uint16_t type,
uint16_t flags, ssize_t len)
{
struct nlmsghdr *nh;
ssize_t n;
nh = (struct nlmsghdr *)req;
nh->nlmsg_type = type;
nh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
nh->nlmsg_len = len;
nh->nlmsg_seq = 1;
nh->nlmsg_pid = 0;
n = send(s, req, len, 0);
return nh->nlmsg_seq;
}
int nl_link_up(int s, unsigned int ifi, int mtu)
{
struct req_t {
struct nlmsghdr nlh;
struct ifinfomsg ifm;
struct rtattr rta;
unsigned int mtu;
} req = {
.ifm.ifi_family = AF_UNSPEC,
.ifm.ifi_index = ifi,
.ifm.ifi_flags = IFF_UP,
.ifm.ifi_change = IFF_UP,
.rta.rta_type = IFLA_MTU,
.rta.rta_len = RTA_LENGTH(sizeof(unsigned int)),
.mtu = mtu,
};
ssize_t len = sizeof(req);
if (!mtu)
/* Shorten request to drop MTU attribute */
len = offsetof(struct req_t, rta);
return nl_send(s, &req, RTM_NEWLINK, 0, len); /* was nl_do() */
}
/* <=== ...until here */
static int tcp_probe_sockets(void *arg)
{
int *s = (int *)arg;
nl_sock_init_do(NULL);
nl_link_up(nl_sock, 1 /* lo */, 0);
s[0] = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
s[1] = socket(AF_INET, SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP);
return 0;
}
int main(int argc, char **argv)
{
char ns_fn_stack[NS_FN_STACK_SIZE], b;
struct iovec iov[2] = { { NULL, 1 }, { &b, 1 }, };
struct sockaddr a = { AF_INET, };
struct msghdr msg = { NULL, 0, iov, 2, };
int s[2], s_nl, s_recv;
ssize_t len;
do_clone(tcp_probe_sockets, ns_fn_stack, sizeof(ns_fn_stack),
CLONE_NEWNET | CLONE_NEWUSER | CLONE_VM | CLONE_VFORK | CLONE_FILES | SIGCHLD,
(void *)s);
bind(s[0], &a, sizeof(a));
getsockname(s[0], &a, &((int){ sizeof(a) }));
listen(s[0], 0);
connect(s[1], &a, sizeof(a));
s_recv = accept(s[0], NULL, NULL);
send(s[1], (char *)("ab"), 2, 0);
len = recvmsg(s_recv, &msg, MSG_PEEK);
printf("MSG_PEEK with offset %ssupported\n", len == 1 ? "" : "not ");
close(s_recv);
close(s[1]);
close(s[0]);
return 0;
}
next prev parent reply other threads:[~2023-12-06 15:08 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-12-05 23:36 tcp.c: leverage MSG_PEEK with offset kernel capability when available Jon Maloy
2023-12-06 14:59 ` Stefano Brivio
2023-12-06 15:08 ` Stefano Brivio [this message]
2023-12-06 16:10 ` Jon Maloy
2023-12-06 17:06 ` Stefano Brivio
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20231206160808.3d312733@elisabeth \
--to=sbrivio@redhat.com \
--cc=dgibson@redhat.com \
--cc=jmaloy@redhat.com \
--cc=lvivier@redhat.com \
--cc=passt-dev@passt.top \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://passt.top/passt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).