On Tue, Jan 28, 2025 at 12:15:30AM +0100, Stefano Brivio wrote: > These are symmetric to write_remainder() and write_all_buf() and > almost a copy and paste of them, with the most notable differences > being reversed reads/writes and a couple of better-safe-than-sorry > asserts to keep Coverity happy. So, there's one thing that needs to be not quite symmetric for the read() version: we need to handle EOF. At present, I believe these will enter an infinite loop on EOF, which is not a graceful failure mode. > I'll use them in the next patch. At least for the moment, they're > going to be used for vhost-user mode only, so I'm not unconditionally > enabling readv() in the seccomp profile: the caller has to ensure it's > there. > > Signed-off-by: Stefano Brivio > --- > util.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ > util.h | 2 ++ > 2 files changed, 72 insertions(+) > > diff --git a/util.c b/util.c > index 11973c4..085937b 100644 > --- a/util.c > +++ b/util.c > @@ -606,6 +606,76 @@ int write_remainder(int fd, const struct iovec *iov, size_t iovcnt, size_t skip) > return 0; > } > > +/** > + * read_all_buf() - Fill a whole buffer from a file descriptor > + * @fd: File descriptor > + * @buf: Pointer to base of buffer > + * @len: Length of buffer > + * > + * Return: 0 on success, -1 on error (with errno set) > + * > + * #syscalls read > + */ > +int read_all_buf(int fd, void *buf, size_t len) > +{ > + size_t left = len; > + char *p = buf; > + > + while (left) { > + ssize_t rc; > + > + ASSERT(left <= len); > + > + do > + rc = read(fd, p, left); > + while ((rc < 0) && errno == EINTR); > + > + if (rc < 0) > + return -1; > + > + p += rc; > + left -= rc; > + } > + return 0; > +} > + > +/** > + * read_remainder() - Read the tail of an IO vector from a file descriptor > + * @fd: File descriptor > + * @iov: IO vector > + * @cnt: Number of entries in @iov > + * @skip: Number of bytes of the vector to skip reading > + * > + * Return: 0 on success, -1 on error (with errno set) > + * > + * Note: mode-specific seccomp profiles need to enable readv() to use this. > + */ > +int read_remainder(int fd, struct iovec *iov, size_t cnt, size_t skip) > +{ > + size_t i = 0, offset; > + > + while ((i += iov_skip_bytes(iov + i, cnt - i, skip, &offset)) < cnt) { > + ssize_t rc; > + > + if (offset) { > + ASSERT(offset < iov[i].iov_len); > + /* Read the remainder of the partially read buffer */ > + if (read_all_buf(fd, (char *)iov[i].iov_base + offset, > + iov[i].iov_len - offset) < 0) > + return -1; > + i++; > + } > + > + /* Fill as many of the remaining buffers as we can */ > + rc = readv(fd, &iov[i], cnt - i); > + if (rc < 0) > + return -1; > + > + skip = rc; > + } > + return 0; > +} > + > /** sockaddr_ntop() - Convert a socket address to text format > * @sa: Socket address > * @dst: output buffer, minimum SOCKADDR_STRLEN bytes > diff --git a/util.h b/util.h > index d02333d..73a7a33 100644 > --- a/util.h > +++ b/util.h > @@ -203,6 +203,8 @@ int fls(unsigned long x); > int write_file(const char *path, const char *buf); > int write_all_buf(int fd, const void *buf, size_t len); > int write_remainder(int fd, const struct iovec *iov, size_t iovcnt, size_t skip); > +int read_all_buf(int fd, void *buf, size_t len); > +int read_remainder(int fd, struct iovec *iov, size_t cnt, size_t skip); > void close_open_files(int argc, char **argv); > bool snprintf_check(char *str, size_t size, const char *format, ...); > -- David Gibson (he or they) | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you, not the other way | around. http://www.ozlabs.org/~dgibson