public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
From: Stefano Brivio <sbrivio@redhat.com>
To: Jon Maloy <jmaloy@redhat.com>
Cc: lvivier@redhat.com, dgibson@redhat.com, passt-dev@passt.top
Subject: Re: tcp.c: leverage MSG_PEEK with offset kernel capability when available
Date: Wed, 6 Dec 2023 16:08:08 +0100	[thread overview]
Message-ID: <20231206160808.3d312733@elisabeth> (raw)
In-Reply-To: <20231206155940.51047ac1@elisabeth>

[-- Attachment #1: Type: text/plain, Size: 543 bytes --]

On Wed, 6 Dec 2023 15:59:40 +0100
Stefano Brivio <sbrivio@redhat.com> wrote:

> [...]
>
> but on a kernel with your patch, I get ENOTCONN on recvmsg(). If I
> replace that by a simple recv():
> 
>   sendto(5, "ab", 2, 0, NULL, 0)          = 2
>   recvfrom(6, "ab", 10, 0, NULL, NULL)    = 2
> 
> ...so I don't think it's a fundamental issue with this approach, rather
> something with your patch, but I'm not yet sure what. :)

Oops, my bad, I got the order of fields in struct msghdr wrong. New
version attached, this one works.

-- 
Stefano

[-- Attachment #2: pkt_selfie.c --]
[-- Type: text/x-c++src, Size: 3381 bytes --]

#define _GNU_SOURCE
#include <sched.h>
#include <errno.h>
#include <signal.h>
#include <unistd.h>
#include <stdlib.h>
#include <stddef.h>
#include <stdio.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <net/if.h>
#include <netinet/tcp.h>

#include <linux/netlink.h>
#include <linux/rtnetlink.h>

/* ===> from passt's Makefile and code... */

#define RLIMIT_STACK_VAL	8192
#define NS_FN_STACK_SIZE	(RLIMIT_STACK_VAL * 1024 / 8)

int do_clone(int (*fn)(void *), char *stack_area, size_t stack_size, int flags,
	     void *arg)
{
#ifdef __ia64__
	return __clone2(fn, stack_area + stack_size / 2, stack_size / 2,
			flags, arg);
#else
	return clone(fn, stack_area + stack_size / 2, flags, arg);
#endif
}

static int nl_sock;

static int nl_sock_init_do(void *arg)
{
	struct sockaddr_nl addr = { .nl_family = AF_NETLINK, };
	int *s = &nl_sock;
#ifdef NETLINK_GET_STRICT_CHK
	int y = 1;
#endif

	*s = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE);
	if (*s < 0 || bind(*s, (struct sockaddr *)&addr, sizeof(addr))) {
		*s = -1;
		return 0;
	}

	return 0;
}

/**
 * nl_send() - Prepare and send netlink request
 * @s:		Netlink socket
 * @req:	Request (will fill netlink header)
 * @type:	Request type
 * @flags:	Extra request flags (NLM_F_REQUEST and NLM_F_ACK assumed)
 * @len:	Request length
 *
 * Return: sequence number of request on success, terminates on error
 */
static uint32_t nl_send(int s, void *req, uint16_t type,
		       uint16_t flags, ssize_t len)
{
	struct nlmsghdr *nh;
	ssize_t n;

	nh = (struct nlmsghdr *)req;
	nh->nlmsg_type = type;
	nh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
	nh->nlmsg_len = len;
	nh->nlmsg_seq = 1;
	nh->nlmsg_pid = 0;

	n = send(s, req, len, 0);

	return nh->nlmsg_seq;
}

int nl_link_up(int s, unsigned int ifi, int mtu)
{
	struct req_t {
		struct nlmsghdr nlh;
		struct ifinfomsg ifm;
		struct rtattr rta;
		unsigned int mtu;
	} req = {
		.ifm.ifi_family	  = AF_UNSPEC,
		.ifm.ifi_index	  = ifi,
		.ifm.ifi_flags	  = IFF_UP,
		.ifm.ifi_change	  = IFF_UP,
		.rta.rta_type	  = IFLA_MTU,
		.rta.rta_len	  = RTA_LENGTH(sizeof(unsigned int)),
		.mtu		  = mtu,
	};
	ssize_t len = sizeof(req);

	if (!mtu)
		/* Shorten request to drop MTU attribute */
		len = offsetof(struct req_t, rta);

	return nl_send(s, &req, RTM_NEWLINK, 0, len); /* was nl_do() */
}

/* <=== ...until here */

static int tcp_probe_sockets(void *arg)
{
	int *s = (int *)arg;

	nl_sock_init_do(NULL);
	nl_link_up(nl_sock, 1 /* lo */, 0);

	s[0] = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
	s[1] = socket(AF_INET, SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP);

	return 0;
}

int main(int argc, char **argv)
{
	char ns_fn_stack[NS_FN_STACK_SIZE], b;
	struct iovec iov[2] = { { NULL, 1 }, { &b, 1 }, };
	struct sockaddr a = { AF_INET, };
	struct msghdr msg = { NULL, 0, iov, 2, };
	int s[2], s_nl, s_recv;
	ssize_t len;

	do_clone(tcp_probe_sockets, ns_fn_stack, sizeof(ns_fn_stack),
		 CLONE_NEWNET | CLONE_NEWUSER | CLONE_VM | CLONE_VFORK | CLONE_FILES | SIGCHLD,
		 (void *)s);

	bind(s[0], &a, sizeof(a));
	getsockname(s[0], &a, &((int){ sizeof(a) }));
	listen(s[0], 0);

	connect(s[1], &a, sizeof(a));
	s_recv = accept(s[0], NULL, NULL);
	send(s[1], (char *)("ab"), 2, 0);

	len = recvmsg(s_recv, &msg, MSG_PEEK);
	printf("MSG_PEEK with offset %ssupported\n", len == 1 ? "" : "not ");

	close(s_recv);
	close(s[1]);
	close(s[0]);

	return 0;
}

  reply	other threads:[~2023-12-06 15:08 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-12-05 23:36 tcp.c: leverage MSG_PEEK with offset kernel capability when available Jon Maloy
2023-12-06 14:59 ` Stefano Brivio
2023-12-06 15:08   ` Stefano Brivio [this message]
2023-12-06 16:10   ` Jon Maloy
2023-12-06 17:06     ` Stefano Brivio

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231206160808.3d312733@elisabeth \
    --to=sbrivio@redhat.com \
    --cc=dgibson@redhat.com \
    --cc=jmaloy@redhat.com \
    --cc=lvivier@redhat.com \
    --cc=passt-dev@passt.top \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).