On Fri, Oct 07, 2022 at 02:47:38AM +0200, Stefano Brivio wrote: > In some environments, such as KubeVirt pods, we might not have a > system logger available. We could choose to run in foreground, but > this takes away the convenient synchronisation mechanism derived from > forking to background when interfaces are ready. > > Add optional logging to file with -l/--log-file and --log-size. > > Unfortunately, this means we need to duplicate features that are more > appropriately implemented by a system logger, such as rotation. Keep > that reasonably simple, by using fallocate() with range collapsing > where supported (Linux kernel >= 3.15, extent-based ext4 and XFS) and > falling back to an unsophisticated block-by-block moving of entries > toward the beginning of the file once we reach the (mandatory) size > limit. > > While at it, clarify the role of LOG_EMERG in passt.c. > > Signed-off-by: Stefano Brivio > --- > Makefile | 2 +- > README.md | 2 +- > conf.c | 49 ++++++++++- > log.c | 251 ++++++++++++++++++++++++++++++++++++++++++++++++++---- > log.h | 6 ++ > passt.1 | 14 ++- > passt.c | 1 + > 7 files changed, 300 insertions(+), 25 deletions(-) > > diff --git a/Makefile b/Makefile > index 74bbfeb..fafb024 100644 > --- a/Makefile > +++ b/Makefile > @@ -41,7 +41,7 @@ SRCS = $(PASST_SRCS) $(QRAP_SRCS) > MANPAGES = passt.1 pasta.1 qrap.1 > > PASST_HEADERS = arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h icmp.h \ > - isolation.h lineread.h ndp.h netlink.h packet.h passt.h pasta.h \ > + isolation.h lineread.h log.h ndp.h netlink.h packet.h passt.h pasta.h \ > pcap.h port_fwd.h siphash.h tap.h tcp.h tcp_splice.h udp.h util.h > HEADERS = $(PASST_HEADERS) seccomp.h > > diff --git a/README.md b/README.md > index 2c522b2..3dc4fc5 100644 > --- a/README.md > +++ b/README.md > @@ -289,7 +289,7 @@ speeding up local connections, and usually requiring NAT. _pasta_: > * ✅ all capabilities dropped, other than `CAP_NET_BIND_SERVICE` (if granted) > * ✅ with default options, user, mount, IPC, UTS, PID namespaces are detached > * ✅ no external dependencies (other than a standard C library) > -* ✅ restrictive seccomp profiles (26 syscalls allowed for _passt_, 40 for > +* ✅ restrictive seccomp profiles (30 syscalls allowed for _passt_, 41 for > _pasta_ on x86_64) That's a pity, but I guess we need it. > * ✅ examples of [AppArmor](/passt/tree/contrib/apparmor) and > [SELinux](/passt/tree/contrib/selinux) profiles available > diff --git a/conf.c b/conf.c > index e9cd4a7..f22940b 100644 > --- a/conf.c > +++ b/conf.c > @@ -633,6 +633,9 @@ static void usage(const char *name) > info( " default: run in background if started from a TTY"); > info( " -e, --stderr Log to stderr too"); > info( " default: log to system logger only if started from a TTY"); > + info( " -l, --log-file PATH Log (only) to given file"); > + info( " --log-size BYTES Maximum size of log file"); > + info( " default: 1 MiB"); > info( " --runas UID|UID:GID Run as given UID, GID, which can be"); > info( " numeric, or login and group names"); > info( " default: drop to user \"nobody\""); > @@ -994,6 +997,7 @@ void conf(struct ctx *c, int argc, char **argv) > {"quiet", no_argument, NULL, 'q' }, > {"foreground", no_argument, NULL, 'f' }, > {"stderr", no_argument, NULL, 'e' }, > + {"log-file", required_argument, NULL, 'l' }, > {"help", no_argument, NULL, 'h' }, > {"socket", required_argument, NULL, 's' }, > {"ns-ifname", required_argument, NULL, 'I' }, > @@ -1034,26 +1038,28 @@ void conf(struct ctx *c, int argc, char **argv) > {"no-netns-quit", no_argument, NULL, 10 }, > {"trace", no_argument, NULL, 11 }, > {"runas", required_argument, NULL, 12 }, > + {"log-size", required_argument, NULL, 13 }, > { 0 }, > }; > struct get_bound_ports_ns_arg ns_ports_arg = { .c = c }; > char userns[PATH_MAX] = { 0 }, netns[PATH_MAX] = { 0 }; > bool v4_only = false, v6_only = false; > + char *runas = NULL, *logfile = NULL; > struct in6_addr *dns6 = c->ip6.dns; > struct fqdn *dnss = c->dns_search; > uint32_t *dns4 = c->ip4.dns; > int name, ret, mask, b, i; > const char *optstring; > unsigned int ifi = 0; > - char *runas = NULL; > + size_t logsize = 0; > uid_t uid; > gid_t gid; > > if (c->mode == MODE_PASTA) { > c->no_dhcp_dns = c->no_dhcp_dns_search = 1; > - optstring = "dqfehI:p:P:m:a:n:M:g:i:D:S:46t:u:T:U:"; > + optstring = "dqfel:hI:p:P:m:a:n:M:g:i:D:S:46t:u:T:U:"; > } else { > - optstring = "dqfehs:p:P:m:a:n:M:g:i:D:S:46t:u:"; > + optstring = "dqfel:hs:p:P:m:a:n:M:g:i:D:S:46t:u:"; > } > > c->tcp.fwd_in.mode = c->tcp.fwd_out.mode = 0; > @@ -1177,6 +1183,20 @@ void conf(struct ctx *c, int argc, char **argv) > > runas = optarg; > break; > + case 13: > + if (logsize) { > + err("Multiple --log-size options given"); > + usage(argv[0]); > + } > + > + errno = 0; > + logsize = strtol(optarg, NULL, 0); > + > + if (logsize < LOGFILE_SIZE_MIN || errno) { > + err("Invalid --log-size: %s", optarg); > + usage(argv[0]); > + } > + break; > case 'd': > if (c->debug) { > err("Multiple --debug options given"); > @@ -1192,6 +1212,11 @@ void conf(struct ctx *c, int argc, char **argv) > c->foreground = 1; > break; > case 'e': > + if (logfile) { > + err("Can't log to both file and stderr"); > + usage(argv[0]); > + } > + > if (c->stderr) { > err("Multiple --stderr options given"); > usage(argv[0]); > @@ -1199,6 +1224,19 @@ void conf(struct ctx *c, int argc, char **argv) > > c->stderr = 1; > break; > + case 'l': > + if (c->stderr) { > + err("Can't log to both stderr and file"); > + usage(argv[0]); > + } > + > + if (logfile) { > + err("Multiple --log-file options given"); > + usage(argv[0]); > + } > + > + logfile = optarg; > + break; > case 'q': > if (c->quiet) { > err("Multiple --quiet options given"); > @@ -1460,6 +1498,11 @@ void conf(struct ctx *c, int argc, char **argv) > if (ret) > usage(argv[0]); > > + if (logfile) { > + logfile_init(c->mode == MODE_PASST ? "passt" : "pasta", > + logfile, logsize); > + } > + > if (c->mode == MODE_PASTA) { > if (conf_pasta_ns(&netns_only, userns, netns, > optind, argc, argv) < 0) > diff --git a/log.c b/log.c > index 54a7cbb..1e405e3 100644 > --- a/log.c > +++ b/log.c > @@ -12,7 +12,12 @@ > * Author: Stefano Brivio > */ > > +#include > +#include > +#include > +#include > #include > +#include > #include > #include > #include > @@ -20,36 +25,49 @@ > #include > #include > #include > -#include > > #include "log.h" > +#include "util.h" > +#include "passt.h" > > -/* For __openlog() and __setlogmask() wrappers, and passt_vsyslog() */ > -static int log_mask; > -static int log_sock = -1; > -static char log_ident[BUFSIZ]; > -static int log_opt; > -static time_t log_debug_start; > -int log_trace; > +static int log_sock = -1; /* Optional socket to system logger */ > +static char log_ident[BUFSIZ]; /* Identifier string for openlog() */ > +static int log_mask; /* Current log priority mask */ > +static int log_opt; /* Options for openlog() */ > + > +static int log_file = -1; /* Optional log file descriptor */ > +static size_t log_size; /* Maximum log file size in bytes */ > +static size_t log_written; /* Currently used bytes in log file */ > +static size_t log_cut_size; /* Bytes to cut at start on rotation */ > +static char log_header[BUFSIZ]; /* File header, written back on cuts */ > + > +static time_t log_start; /* Start timestamp */ > +int log_trace; /* --trace mode enabled */ > > #define logfn(name, level) \ > void name(const char *format, ...) { \ > struct timespec tp; \ > va_list args; \ > \ > - if (setlogmask(0) & LOG_MASK(LOG_DEBUG)) { \ > + if (setlogmask(0) & LOG_MASK(LOG_DEBUG) && log_file == -1) { \ > clock_gettime(CLOCK_REALTIME, &tp); \ > fprintf(stderr, "%li.%04li: ", \ > - tp.tv_sec - log_debug_start, \ > + tp.tv_sec - log_start, \ > tp.tv_nsec / (100L * 1000)); \ > - } else { \ > + } \ > + \ > + if ((LOG_MASK(LOG_PRI(level)) & log_mask) || \ > + setlogmask(0) == LOG_MASK(LOG_EMERG)) { \ > va_start(args, format); \ > - passt_vsyslog(level, format, args); \ > + if (log_file != -1) \ > + logfile_write(level, format, args); \ > + else if (!(setlogmask(0) & LOG_MASK(LOG_DEBUG))) \ > + passt_vsyslog(level, format, args); \ > va_end(args); \ > } \ > \ > - if (setlogmask(0) & LOG_MASK(LOG_DEBUG) || \ > - setlogmask(0) == LOG_MASK(LOG_EMERG)) { \ > + if ((setlogmask(0) & LOG_MASK(LOG_DEBUG) || \ > + setlogmask(0) == LOG_MASK(LOG_EMERG)) && log_file == -1) { \ > va_start(args, format); \ > (void)vfprintf(stderr, format, args); \ > va_end(args); \ > @@ -58,6 +76,16 @@ void name(const char *format, ...) { \ > } \ > } > > +/* Prefixes for log file messages, indexed by priority */ > +const char *logfile_prefix[] = { > + NULL, NULL, NULL, /* Unused: LOG_EMERG, LOG_ALERT, LOG_CRIT */ > + "ERROR: ", > + "WARNING: ", > + NULL, /* Unused: LOG_NOTICE */ > + "info: ", > + " ", /* LOG_DEBUG */ > +}; > + > logfn(err, LOG_ERR) > logfn(warn, LOG_WARNING) > logfn(info, LOG_INFO) > @@ -79,7 +107,7 @@ void __openlog(const char *ident, int option, int facility) > struct timespec tp; > > clock_gettime(CLOCK_REALTIME, &tp); > - log_debug_start = tp.tv_sec; > + log_start = tp.tv_sec; > > if (log_sock < 0) { > struct sockaddr_un a = { .sun_family = AF_UNIX, }; > @@ -124,9 +152,6 @@ void passt_vsyslog(int pri, const char *format, va_list ap) > char buf[BUFSIZ]; > int n; > > - if (!(LOG_MASK(LOG_PRI(pri)) & log_mask)) > - return; > - > /* Send without name and timestamp, the system logger should add them */ > n = snprintf(buf, BUFSIZ, "<%i> ", pri); > > @@ -141,3 +166,193 @@ void passt_vsyslog(int pri, const char *format, va_list ap) > if (send(log_sock, buf, n, 0) != n) > fprintf(stderr, "Failed to send %i bytes to syslog\n", n); > } > + > +/** > + * logfile_init() - Open log file and write header with PID and path > + * @name: Identifier for header: passt or pasta > + * @path: Path to log file > + * @size: Maximum size of log file: log_cut_size is calculatd here > + */ > +void logfile_init(const char *name, const char *path, size_t size) > +{ > + char nl = '\n', exe[PATH_MAX] = { 0 }; > + int n; > + > + if (readlink("/proc/self/exe", exe, PATH_MAX - 1) < 0) { > + perror("readlink /proc/self/exe"); > + exit(EXIT_FAILURE); > + } > + > + log_file = open(path, O_CREAT | O_TRUNC | O_APPEND | O_RDWR | O_CLOEXEC, > + S_IRUSR | S_IWUSR); Do > + if (log_file == -1) { > + err("Couldn't open log file %s: %s", path, strerror(errno)); > + exit(EXIT_FAILURE); > + } > + > + log_size = size ? size : LOGFILE_SIZE_DEFAULT; > + > + n = snprintf(log_header, sizeof(log_header), "%s: %s (%i)", > + name, exe, getpid()); > + > + if (write(log_file, log_header, n) <= 0 || > + write(log_file, &nl, 1) <= 0) { > + perror("Couldn't write to log file\n"); > + exit(EXIT_FAILURE); > + } > + > + /* For FALLOC_FL_COLLAPSE_RANGE: VFS block size can be up to one page */ > + log_cut_size = ROUND_UP(log_size * LOGFILE_CUT_RATIO / 100, PAGE_SIZE); > +} > + > +/** > + * logfile_rotate_fallocate() - Write header, set log_written after fallocate() > + * @fd: Log file descriptor > + * @ts: Current timestamp > + * > + * #syscalls lseek ppc64le:_llseek ppc64:_llseek armv6l:_llseek armv7l:_llseek > + */ > +static void logfile_rotate_fallocate(int fd, struct timespec *ts) > +{ > + char buf[BUFSIZ], *nl; > + int n; > + > + if (lseek(fd, 0, SEEK_SET) == -1) > + return; > + if (read(fd, buf, BUFSIZ) == -1) > + return; > + > + n = snprintf(buf, BUFSIZ, > + "%s - log truncated at %li.%04li", log_header, > + ts->tv_sec - log_start, ts->tv_nsec / (100L * 1000)); > + > + /* Avoid partial lines by padding the header with spaces */ > + nl = memchr(buf + n + 1, '\n', BUFSIZ - n - 1); > + if (nl) > + memset(buf + n, ' ', nl - (buf + n)); > + > + if (lseek(fd, 0, SEEK_SET) == -1) > + return; > + if (write(fd, buf, BUFSIZ) == -1) Have you tested this path? I thought with O_APPEND, this write would go at the end, regardless of the lseek() above. > + return; > + > + log_written -= log_cut_size; > +} > + > +/** > + * logfile_rotate_move() - Fallback: move recent entries toward start, then cut > + * @fd: Log file descriptor > + * @ts: Current timestamp > + * > + * #syscalls lseek ppc64le:_llseek ppc64:_llseek armv6l:_llseek armv7l:_llseek > + * #syscalls ftruncate > + */ > +static void logfile_rotate_move(int fd, struct timespec *ts) > +{ > + int header_len, write_offset, end, discard, n; > + char buf[BUFSIZ], *nl; > + > + header_len = snprintf(buf, BUFSIZ, > + "%s - log truncated at %li.%04li\n", log_header, > + ts->tv_sec - log_start, > + ts->tv_nsec / (100L * 1000)); A common helper for the truncation header seems like it would be a good idea. > + if (lseek(fd, 0, SEEK_SET) == -1) > + return; > + if (write(fd, buf, header_len) == -1) > + return; > + > + end = write_offset = header_len; > + discard = log_cut_size + header_len; > + > + /* Try to cut cleanly at newline */ > + if (lseek(fd, discard, SEEK_SET) == -1) > + goto out; > + if ((n = read(fd, buf, BUFSIZ)) <= 0) > + goto out; > + if ((nl = memchr(buf, '\n', n))) > + discard += (nl - buf) + 1; > + > + /* Go to first block to be moved */ > + if (lseek(fd, discard, SEEK_SET) == -1) > + goto out; > + > + while ((n = read(fd, buf, BUFSIZ)) > 0) { > + end = header_len; Hrm.. so errors in this section will throw away everything except the header. > + > + if (lseek(fd, write_offset, SEEK_SET) == -1) > + goto out; > + if ((n = write(fd, buf, n)) == -1) > + goto out; > + write_offset += n; > + > + if ((n = lseek(fd, 0, SEEK_CUR)) == -1) > + goto out; > + end = n; But errors after here will just throw away what we haven't moved so far. Seems a bit inconsistent. > + > + if (lseek(fd, discard - header_len, SEEK_CUR) == -1) > + goto out; > + } I wonder if those whole function would be made simpler using pread() and pwrite() instead of lseek()s all over the place. > +out: > + if (ftruncate(fd, end)) > + return; > + > + log_written = end; > +} > + > +/** > + * logfile_rotate() - "Rotate" log file once it's full > + * @fd: Log file descriptor > + * @ts: Current timestamp > + * > + * Return: 0 on success, negative error code on failure > + * > + * #syscalls fallocate fcntl > + */ > +static int logfile_rotate(int fd, struct timespec *ts) > +{ > + if (fcntl(fd, F_SETFL, O_RDWR /* Drop O_APPEND: explicit lseek() */)) > + return -errno; > + > + /* Collapsing only for Linux kernel >= 3.15, extent-based ext4 or XFS */ > + if (!fallocate(fd, FALLOC_FL_COLLAPSE_RANGE, 0, log_cut_size)) > + logfile_rotate_fallocate(fd, ts); > + else > + logfile_rotate_move(fd, ts); > + > + if (fcntl(fd, F_SETFL, O_RDWR | O_APPEND)) > + return -errno; > + > + return 0; > +} > + > +/** > + * logfile_write() - Write entry to log file, trigger rotation if full > + * @pri: Facility and level map, same as priority for vsyslog() > + * @format: Same as vsyslog() format > + * @ap: Same as vsyslog() ap > + */ > +void logfile_write(int pri, const char *format, va_list ap) > +{ > + struct timespec ts; > + char buf[BUFSIZ]; > + int n; > + > + if (clock_gettime(CLOCK_REALTIME, &ts)) > + return; > + > + n = snprintf(buf, BUFSIZ, "%li.%04li: %s", > + ts.tv_sec - log_start, ts.tv_nsec / (100L * 1000), > + logfile_prefix[pri]); > + > + n += vsnprintf(buf + n, BUFSIZ - n, format, ap); > + > + if (format[strlen(format)] != '\n') > + n += snprintf(buf + n, BUFSIZ - n, "\n"); > + > + if ((log_written + n >= log_size) && logfile_rotate(log_file, &ts)) > + return; > + > + if ((n = write(log_file, buf, n)) >= 0) > + log_written += n; > +} > diff --git a/log.h b/log.h > index 70cad6f..ed71cd7 100644 > --- a/log.h > +++ b/log.h > @@ -6,6 +6,10 @@ > #ifndef LOG_H > #define LOG_H > > +#define LOGFILE_SIZE_DEFAULT 1E6 Wouldn't a multiple of the block size make more sense? > +#define LOGFILE_CUT_RATIO 30 /* When full, cut ~30% size */ > +#define LOGFILE_SIZE_MIN (5UL * MAX(BUFSIZ, PAGE_SIZE)) > + > void err(const char *format, ...); > void warn(const char *format, ...); > void info(const char *format, ...); > @@ -20,7 +24,9 @@ void trace_init(int enable); > } while (0) > > void __openlog(const char *ident, int option, int facility); > +void logfile_init(const char *name, const char *path, size_t size); > void passt_vsyslog(int pri, const char *format, va_list ap); > +void logfile_write(int pri, const char *format, va_list ap); > void __setlogmask(int mask); > > #endif /* LOG_H */ > diff --git a/passt.1 b/passt.1 > index 83395bc..64236b6 100644 > --- a/passt.1 > +++ b/passt.1 > @@ -79,7 +79,7 @@ for performance reasons. > > .TP > .BR \-d ", " \-\-debug > -Be verbose, don't run in background. > +Be verbose, don't run in background, don't log to the system logger. > > .TP > .BR \-\-trace > @@ -99,9 +99,19 @@ Default is to fork into background. > .TP > .BR \-e ", " \-\-stderr > Log to standard error too. > -Default is to log to system logger only, if started from an interactive > +Default is to log to the system logger only, if started from an interactive > terminal, and to both system logger and standard error otherwise. > > +.TP > +.BR \-l ", " \-\-log-file " " \fIPATH\fR > +Log to file \fIPATH\fR, not to standard error, and not to the system logger. > + > +.TP > +.BR \-\-log-size " " \fISIZE\fR > +Limit log file size to \fISIZE\fR bytes. When the log file is full, make room > +for new entries by removing old ones at the beginning. This limit is mandatory. > +Default is 1048576 (1 MiB). > + > .TP > .BR \-\-runas " " \fIUID\fR|\fIUID:GID\fR|\fILOGIN\fR|\fILOGIN:GROUP\fR > Attempt to change to given UID and corresponding group if UID is given, > diff --git a/passt.c b/passt.c > index 0c561b2..7589b05 100644 > --- a/passt.c > +++ b/passt.c > @@ -220,6 +220,7 @@ int main(int argc, char **argv) > > __openlog(log_name, 0, LOG_DAEMON); > > + /* Meaning we don't know yet: log everything. LOG_EMERG is unused */ > __setlogmask(LOG_MASK(LOG_EMERG)); > > c.epollfd = epoll_create1(EPOLL_CLOEXEC); -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson