diff --git a/README.md b/README.md index 5be5aaa1..2d0985a0 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,9 @@ such as: /proc/swaps /proc/uptime /proc/slabinfo +/proc/pressure/io +/proc/pressure/cpu +/proc/pressure/memory /sys/devices/system/cpu/online ``` @@ -109,6 +112,9 @@ docker run -it -m 256m --memory-swap 256m \ -v /var/lib/lxcfs/proc/swaps:/proc/swaps:rw \ -v /var/lib/lxcfs/proc/uptime:/proc/uptime:rw \ -v /var/lib/lxcfs/proc/slabinfo:/proc/slabinfo:rw \ + -v /var/lib/lxcfs/proc/pressure/io:/proc/pressure/io:rw \ + -v /var/lib/lxcfs/proc/pressure/cpu:/proc/pressure/cpu:rw \ + -v /var/lib/lxcfs/proc/pressure/memory:/proc/pressure/memory:rw \ -v /var/lib/lxcfs/sys/devices/system/cpu:/sys/devices/system/cpu:rw \ ubuntu:18.04 /bin/bash ``` diff --git a/src/api_extensions.h b/src/api_extensions.h index 77f69e71..ccf9bf59 100644 --- a/src/api_extensions.h +++ b/src/api_extensions.h @@ -23,6 +23,9 @@ static char *api_extensions[] = { "proc_swaps", "proc_uptime", "proc_slabinfo", + "proc_pressure_io", + "proc_pressure_cpu", + "proc_pressure_memory", "shared_pidns", "cpuview_daemon", "loadavg_daemon", diff --git a/src/bindings.h b/src/bindings.h index 346c261d..45b92c30 100644 --- a/src/bindings.h +++ b/src/bindings.h @@ -66,12 +66,24 @@ enum lxcfs_virt_t { LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_ONLINE, #define LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_ONLINE_PATH "/sys/devices/system/cpu/online" + + LXC_TYPE_PROC, + LXC_TYPE_PROC_PRESSURE, + LXC_TYPE_PROC_PRESSURE_IO, +#define LXC_TYPE_PROC_PRESSURE_IO_PATH "/proc/pressure/io" + + LXC_TYPE_PROC_PRESSURE_CPU, +#define LXC_TYPE_PROC_PRESSURE_CPU_PATH "/proc/pressure/cpu" + + LXC_TYPE_PROC_PRESSURE_MEMORY, +#define LXC_TYPE_PROC_PRESSURE_MEMORY_PATH "/proc/pressure/memory" LXC_TYPE_MAX, }; /* Macros below used to check the class from the file types above */ #define LXCFS_TYPE_CGROUP(type) (type >= LXC_TYPE_CGDIR && type <= LXC_TYPE_CGFILE) -#define LXCFS_TYPE_PROC(type) (type >= LXC_TYPE_PROC_MEMINFO && type <= LXC_TYPE_PROC_SLABINFO) +#define LXCFS_TYPE_PROC(type) ((type >= LXC_TYPE_PROC_MEMINFO && type <= LXC_TYPE_PROC_SLABINFO) || \ + (type >= LXC_TYPE_PROC && type <= LXC_TYPE_PROC_PRESSURE_MEMORY)) #define LXCFS_TYPE_SYS(type) (type >= LXC_TYPE_SYS && type <= LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_ONLINE) #define LXCFS_TYPE_OK(type) (type >= LXC_TYPE_CGDIR && type < LXC_TYPE_MAX) diff --git a/src/cgroups/cgfsng.c b/src/cgroups/cgfsng.c index 8396ed40..f03f9280 100644 --- a/src/cgroups/cgfsng.c +++ b/src/cgroups/cgfsng.c @@ -854,6 +854,54 @@ static bool cgfsng_can_use_cpuview(struct cgroup_ops *ops) return true; } +static int cgfsng_get_pressure_io_fd(struct cgroup_ops *ops, const char *cgroup) +{ + __do_free char *path = NULL; + struct hierarchy *h; + + h = ops->get_hierarchy(ops, "blkio"); + if (!h) + return -1; + + if (faccessat(h->fd, "io.pressure", F_OK, 0)) + return -1; + + path = must_make_path_relative(cgroup, "io.pressure", NULL); + return openat(h->fd, path, O_RDWR | O_CLOEXEC | O_NOFOLLOW); +} + +static int cgfsng_get_pressure_cpu_fd(struct cgroup_ops *ops, const char *cgroup) +{ + __do_free char *path = NULL; + struct hierarchy *h; + + h = ops->get_hierarchy(ops, "cpu"); + if (!h) + return -1; + + if (faccessat(h->fd, "cpu.pressure", F_OK, 0)) + return -1; + + path = must_make_path_relative(cgroup, "cpu.pressure", NULL); + return openat(h->fd, path, O_RDWR | O_CLOEXEC | O_NOFOLLOW); +} + +static int cgfsng_get_pressure_memory_fd(struct cgroup_ops *ops, const char *cgroup) +{ + __do_free char *path = NULL; + struct hierarchy *h; + + h = ops->get_hierarchy(ops, "memory"); + if (!h) + return -1; + + if (faccessat(h->fd, "memory.pressure", F_OK, 0)) + return -1; + + path = must_make_path_relative(cgroup, "memory.pressure", NULL); + return openat(h->fd, path, O_RDWR | O_CLOEXEC | O_NOFOLLOW); +} + /* At startup, parse_hierarchies finds all the info we need about cgroup * mountpoints and current cgroups, and stores it in @d. */ @@ -1074,6 +1122,10 @@ struct cgroup_ops *cgfsng_ops_init(void) cgfsng_ops->get_io_merged = cgfsng_get_io_merged; cgfsng_ops->get_io_wait_time = cgfsng_get_io_wait_time; + /* psi */ + cgfsng_ops->get_pressure_io_fd = cgfsng_get_pressure_io_fd; + cgfsng_ops->get_pressure_cpu_fd = cgfsng_get_pressure_cpu_fd; + cgfsng_ops->get_pressure_memory_fd = cgfsng_get_pressure_memory_fd; return move_ptr(cgfsng_ops); } diff --git a/src/cgroups/cgroup.h b/src/cgroups/cgroup.h index afa7db2e..2641ebf9 100644 --- a/src/cgroups/cgroup.h +++ b/src/cgroups/cgroup.h @@ -155,7 +155,7 @@ struct cgroup_ops { char **value); bool (*can_use_cpuview)(struct cgroup_ops *ops); - /* io */ + /* blkio */ int (*get_io_service_bytes)(struct cgroup_ops *ops, const char *cgroup, char **value); int (*get_io_service_time)(struct cgroup_ops *ops, const char *cgroup, @@ -166,6 +166,11 @@ struct cgroup_ops { char **value); int (*get_io_wait_time)(struct cgroup_ops *ops, const char *cgroup, char **value); + /* psi */ + int (*get_pressure_io_fd)(struct cgroup_ops *ops, const char *cgroup); + int (*get_pressure_cpu_fd)(struct cgroup_ops *ops, const char *cgroup); + int (*get_pressure_memory_fd)(struct cgroup_ops *ops, + const char *cgroup); }; extern struct cgroup_ops *cgroup_ops; diff --git a/src/lxcfs.c b/src/lxcfs.c index c5eef200..d26b06a9 100644 --- a/src/lxcfs.c +++ b/src/lxcfs.c @@ -524,6 +524,20 @@ static int do_proc_open(const char *path, struct fuse_file_info *fi) return __proc_open(path, fi); } +static int do_proc_opendir(const char *path, struct fuse_file_info *fi) +{ + char *error; + int (*__proc_opendir)(const char *path, struct fuse_file_info *fi); + + dlerror(); + __proc_opendir = (int (*)(const char *path, struct fuse_file_info *fi))dlsym(dlopen_handle, "proc_opendir"); + error = dlerror(); + if (error) + return log_error(-1, "%s - Failed to find proc_opendir()", error); + + return __proc_opendir(path, fi); +} + static int do_proc_access(const char *path, int mode) { char *error; @@ -608,6 +622,20 @@ static int do_proc_release(const char *path, struct fuse_file_info *fi) return __proc_release(path, fi); } +static int do_proc_releasedir(const char *path, struct fuse_file_info *fi) +{ + char *error; + int (*__proc_releasedir)(const char *path, struct fuse_file_info *fi); + + dlerror(); + __proc_releasedir = (int (*)(const char *path, struct fuse_file_info *)) dlsym(dlopen_handle, "proc_releasedir"); + error = dlerror(); + if (error) + return log_error(-1, "%s - Failed to find proc_releasedir()", error); + + return __proc_releasedir(path, fi); +} + static int do_sys_release(const char *path, struct fuse_file_info *fi) { char *error; @@ -724,8 +752,12 @@ static int lxcfs_opendir(const char *path, struct fuse_file_info *fi) return ret; } - if (strcmp(path, "/proc") == 0) - return 0; + if (strncmp(path, "/proc", 5) == 0) { + up_users(); + ret = do_proc_opendir(path, fi); + down_users(); + return ret; + } if (strncmp(path, "/sys", 4) == 0) { up_users(); @@ -768,7 +800,7 @@ static int lxcfs_readdir(const char *path, void *buf, fuse_fill_dir_t filler, return ret; } - if (strcmp(path, "/proc") == 0) { + if (LXCFS_TYPE_PROC(type)) { up_users(); ret = do_proc_readdir(path, buf, filler, offset, fi); down_users(); @@ -837,12 +869,14 @@ static int lxcfs_releasedir(const char *path, struct fuse_file_info *fi) return ret; } - if (path) { - if (strcmp(path, "/") == 0) - return 0; - if (strcmp(path, "/proc") == 0) - return 0; + if (LXCFS_TYPE_PROC(type)) { + up_users(); + ret = do_proc_releasedir(path, fi); + down_users(); + return ret; } + if (path && strcmp(path, "/") == 0) + return 0; lxcfs_error("unknown file type: path=%s, type=%d, fi->fh=%" PRIu64, path, type, fi->fh); diff --git a/src/proc_fuse.c b/src/proc_fuse.c index f48c40b1..f3f07def 100644 --- a/src/proc_fuse.c +++ b/src/proc_fuse.c @@ -136,6 +136,11 @@ __lxcfs_fuse_ops int proc_getattr(const char *path, struct stat *sb) sb->st_nlink = 2; return 0; } + if (strcmp(path, "/proc/pressure") == 0) { + sb->st_mode = S_IFDIR | 00555; + sb->st_nlink = 2; + return 0; + } if (strcmp(path, "/proc/meminfo") == 0 || strcmp(path, "/proc/cpuinfo") == 0 || @@ -156,6 +161,21 @@ __lxcfs_fuse_ops int proc_getattr(const char *path, struct stat *sb) sb->st_nlink = 1; return 0; } + if (strcmp(path, "/proc/pressure/io") == 0 || + strcmp(path, "/proc/pressure/cpu") == 0 || + strcmp(path, "/proc/pressure/memory") == 0) { + if (liblxcfs_functional()) { + if (!can_access_personality()) + return log_error(-EACCES, RESTRICTED_PERSONALITY_ACCESS_POLICY); + sb->st_size = get_procfile_size_with_personality(path); + } + else + sb->st_size = get_procfile_size(path); + /* TODO: read-only now, will be writable after monitoring support */ + sb->st_mode = S_IFREG | 00444; + sb->st_nlink = 1; + return 0; + } return -ENOENT; } @@ -164,17 +184,30 @@ __lxcfs_fuse_ops int proc_readdir(const char *path, void *buf, fuse_fill_dir_t filler, off_t offset, struct fuse_file_info *fi) { - if (dir_filler(filler, buf, ".", 0) != 0 || - dir_filler(filler, buf, "..", 0) != 0 || - dir_filler(filler, buf, "cpuinfo", 0) != 0 || - dir_filler(filler, buf, "meminfo", 0) != 0 || - dir_filler(filler, buf, "stat", 0) != 0 || - dir_filler(filler, buf, "uptime", 0) != 0 || - dir_filler(filler, buf, "diskstats", 0) != 0 || - dir_filler(filler, buf, "swaps", 0) != 0 || - dir_filler(filler, buf, "loadavg", 0) != 0 || - dir_filler(filler, buf, "slabinfo", 0) != 0) - return -EINVAL; + if (strcmp(path, "/proc") == 0) { + if (dir_filler(filler, buf, ".", 0) != 0 || + dir_filler(filler, buf, "..", 0) != 0 || + dir_filler(filler, buf, "cpuinfo", 0) != 0 || + dir_filler(filler, buf, "meminfo", 0) != 0 || + dir_filler(filler, buf, "stat", 0) != 0 || + dir_filler(filler, buf, "uptime", 0) != 0 || + dir_filler(filler, buf, "diskstats", 0) != 0 || + dir_filler(filler, buf, "swaps", 0) != 0 || + dir_filler(filler, buf, "loadavg", 0) != 0 || + dir_filler(filler, buf, "slabinfo", 0) != 0 || + dirent_filler(filler, path, "pressure", buf, 0) != 0) + return -EINVAL; + return 0; + } + if (strcmp(path, "/proc/pressure") == 0) { + if (dir_filler(filler, buf, ".", 0) != 0 || + dir_filler(filler, buf, "..", 0) != 0 || + dir_filler(filler, buf, "io", 0) != 0 || + dir_filler(filler, buf, "cpu", 0) != 0 || + dir_filler(filler, buf, "memory", 0) != 0) + return -EINVAL; + return 0; + } return 0; } @@ -200,6 +233,12 @@ __lxcfs_fuse_ops int proc_open(const char *path, struct fuse_file_info *fi) type = LXC_TYPE_PROC_LOADAVG; else if (strcmp(path, "/proc/slabinfo") == 0) type = LXC_TYPE_PROC_SLABINFO; + else if (strcmp(path, "/proc/pressure/io") == 0) + type = LXC_TYPE_PROC_PRESSURE_IO; + else if (strcmp(path, "/proc/pressure/cpu") == 0) + type = LXC_TYPE_PROC_PRESSURE_CPU; + else if (strcmp(path, "/proc/pressure/memory") == 0) + type = LXC_TYPE_PROC_PRESSURE_MEMORY; if (type == -1) return -ENOENT; @@ -227,10 +266,40 @@ __lxcfs_fuse_ops int proc_open(const char *path, struct fuse_file_info *fi) return 0; } +__lxcfs_fuse_ops int proc_opendir(const char *path, struct fuse_file_info *fi) +{ + __do_free struct file_info *dir_info = NULL; + int type = -1; + + if (!liblxcfs_functional()) + return -EIO; + + if (strcmp(path, "/proc") == 0) + type = LXC_TYPE_PROC; + else if (strcmp(path, "/proc/pressure") == 0) + type = LXC_TYPE_PROC_PRESSURE; + if (type == -1) + return -ENOENT; + + dir_info = zalloc(sizeof(*dir_info)); + if (!dir_info) + return -ENOMEM; + + dir_info->type = type; + dir_info->buf = NULL; + dir_info->file = NULL; + dir_info->buflen = 0; + + fi->fh = PTR_TO_UINT64(move_ptr(dir_info)); + return 0; +} + __lxcfs_fuse_ops int proc_access(const char *path, int mask) { if (strcmp(path, "/proc") == 0 && access(path, R_OK) == 0) return 0; + if (strcmp(path, "/proc/pressure") == 0 && access(path, R_OK) == 0) + return 0; /* these are all read-only */ if ((mask & ~R_OK) != 0) @@ -245,6 +314,12 @@ __lxcfs_fuse_ops int proc_release(const char *path, struct fuse_file_info *fi) return 0; } +__lxcfs_fuse_ops int proc_releasedir(const char *path, struct fuse_file_info *fi) +{ + do_release_file_info(fi); + return 0; +} + /** * Gets a non-hierarchical memory controller limit, or UINT64_MAX if no limit is * in place. If `swap` is true, reads 'swap' (v2) or 'memsw' (v1); otherwise @@ -1599,6 +1674,213 @@ static int proc_slabinfo_read(char *buf, size_t size, off_t offset, return total_len; } +static int proc_pressure_io_read(char *buf, size_t size, off_t offset, + struct fuse_file_info *fi) +{ + __do_free char *cgroup = NULL, *line = NULL; + __do_free void *fopen_cache = NULL; + __do_fclose FILE *f = NULL; + __do_close int fd = -EBADF; + struct fuse_context *fc = fuse_get_context(); + struct file_info *d = INTTYPE_TO_PTR(fi->fh); + size_t linelen = 0, total_len = 0; + char *cache = d->buf; + size_t cache_size = d->buflen; + pid_t initpid; + + if (offset) { + size_t left; + + if (offset > d->size) + return -EINVAL; + + if (!d->cached) + return 0; + + left = d->size - offset; + total_len = left > size ? size : left; + memcpy(buf, cache + offset, total_len); + + return total_len; + } + + initpid = lookup_initpid_in_store(fc->pid); + if (initpid <= 1 || is_shared_pidns(initpid)) + initpid = fc->pid; + + cgroup = get_pid_cgroup(initpid, "blkio"); + if (!cgroup) + return read_file_fuse("/proc/pressure/io", buf, size, d); + + prune_init_slice(cgroup); + + fd = cgroup_ops->get_pressure_io_fd(cgroup_ops, cgroup); + if (fd < 0) + return read_file_fuse("/proc/pressure/io", buf, size, d); + + f = fdopen_cached(fd, "re", &fopen_cache); + if (!f) + return read_file_fuse("/proc/pressure/io", buf, size, d); + + while (getline(&line, &linelen, f) != -1) { + ssize_t l = snprintf(cache, cache_size, "%s", line); + if (l < 0) + return log_error(0, "Failed to write cache"); + if ((size_t)l >= cache_size) + return log_error(0, "Write to cache was truncated"); + + cache += l; + cache_size -= l; + total_len += l; + } + + d->cached = 1; + d->size = total_len; + if (total_len > size) + total_len = size; + memcpy(buf, d->buf, total_len); + + return total_len; +} + +static int proc_pressure_cpu_read(char *buf, size_t size, off_t offset, + struct fuse_file_info *fi) +{ + __do_free char *cgroup = NULL, *line = NULL; + __do_free void *fopen_cache = NULL; + __do_fclose FILE *f = NULL; + __do_close int fd = -EBADF; + struct fuse_context *fc = fuse_get_context(); + struct file_info *d = INTTYPE_TO_PTR(fi->fh); + size_t linelen = 0, total_len = 0; + char *cache = d->buf; + size_t cache_size = d->buflen; + pid_t initpid; + + if (offset) { + size_t left; + + if (offset > d->size) + return -EINVAL; + + if (!d->cached) + return 0; + + left = d->size - offset; + total_len = left > size ? size : left; + memcpy(buf, cache + offset, total_len); + + return total_len; + } + + initpid = lookup_initpid_in_store(fc->pid); + if (initpid <= 1 || is_shared_pidns(initpid)) + initpid = fc->pid; + + cgroup = get_pid_cgroup(initpid, "cpu"); + if (!cgroup) + return read_file_fuse("/proc/pressure/cpu", buf, size, d); + + prune_init_slice(cgroup); + + fd = cgroup_ops->get_pressure_cpu_fd(cgroup_ops, cgroup); + if (fd < 0) + return read_file_fuse("/proc/pressure/cpu", buf, size, d); + + f = fdopen_cached(fd, "re", &fopen_cache); + if (!f) + return read_file_fuse("/proc/pressure/cpu", buf, size, d); + + while (getline(&line, &linelen, f) != -1) { + ssize_t l = snprintf(cache, cache_size, "%s", line); + if (l < 0) + return log_error(0, "Failed to write cache"); + if ((size_t)l >= cache_size) + return log_error(0, "Write to cache was truncated"); + + cache += l; + cache_size -= l; + total_len += l; + } + + d->cached = 1; + d->size = total_len; + if (total_len > size) + total_len = size; + memcpy(buf, d->buf, total_len); + + return total_len; +} + +static int proc_pressure_memory_read(char *buf, size_t size, off_t offset, + struct fuse_file_info *fi) +{ + __do_free char *cgroup = NULL, *line = NULL; + __do_free void *fopen_cache = NULL; + __do_fclose FILE *f = NULL; + __do_close int fd = -EBADF; + struct fuse_context *fc = fuse_get_context(); + struct file_info *d = INTTYPE_TO_PTR(fi->fh); + size_t linelen = 0, total_len = 0; + char *cache = d->buf; + size_t cache_size = d->buflen; + pid_t initpid; + + if (offset) { + size_t left; + + if (offset > d->size) + return -EINVAL; + + if (!d->cached) + return 0; + + left = d->size - offset; + total_len = left > size ? size : left; + memcpy(buf, cache + offset, total_len); + + return total_len; + } + + initpid = lookup_initpid_in_store(fc->pid); + if (initpid <= 1 || is_shared_pidns(initpid)) + initpid = fc->pid; + + cgroup = get_pid_cgroup(initpid, "memory"); + if (!cgroup) + return read_file_fuse("/proc/pressure/memory", buf, size, d); + + prune_init_slice(cgroup); + + fd = cgroup_ops->get_pressure_memory_fd(cgroup_ops, cgroup); + if (fd < 0) + return read_file_fuse("/proc/pressure/memory", buf, size, d); + + f = fdopen_cached(fd, "re", &fopen_cache); + if (!f) + return read_file_fuse("/proc/pressure/memory", buf, size, d); + + while (getline(&line, &linelen, f) != -1) { + ssize_t l = snprintf(cache, cache_size, "%s", line); + if (l < 0) + return log_error(0, "Failed to write cache"); + if ((size_t)l >= cache_size) + return log_error(0, "Write to cache was truncated"); + + cache += l; + cache_size -= l; + total_len += l; + } + + d->cached = 1; + d->size = total_len; + if (total_len > size) + total_len = size; + memcpy(buf, d->buf, total_len); + + return total_len; +} + static int proc_read_with_personality(int (*do_proc_read)(char *, size_t, off_t, struct fuse_file_info *), char *buf, size_t size, off_t offset, struct fuse_file_info *fi) @@ -1696,6 +1978,24 @@ __lxcfs_fuse_ops int proc_read(const char *path, char *buf, size_t size, return read_file_fuse_with_offset(LXC_TYPE_PROC_SLABINFO_PATH, buf, size, offset, f); + case LXC_TYPE_PROC_PRESSURE_IO: + if (liblxcfs_functional()) + return proc_pressure_io_read(buf, size, offset, fi); + + return read_file_fuse_with_offset(LXC_TYPE_PROC_PRESSURE_IO_PATH, + buf, size, offset, f); + case LXC_TYPE_PROC_PRESSURE_CPU: + if (liblxcfs_functional()) + return proc_pressure_cpu_read(buf, size, offset, fi); + + return read_file_fuse_with_offset(LXC_TYPE_PROC_PRESSURE_CPU_PATH, + buf, size, offset, f); + case LXC_TYPE_PROC_PRESSURE_MEMORY: + if (liblxcfs_functional()) + return proc_pressure_memory_read(buf, size, offset, fi); + + return read_file_fuse_with_offset(LXC_TYPE_PROC_PRESSURE_MEMORY_PATH, + buf, size, offset, f); } return -EINVAL; diff --git a/src/proc_fuse.h b/src/proc_fuse.h index c97d60c7..ebf8e7ae 100644 --- a/src/proc_fuse.h +++ b/src/proc_fuse.h @@ -18,8 +18,10 @@ __visible extern int proc_getattr(const char *path, struct stat *sb); __visible extern int proc_readdir(const char *path, void *buf, fuse_fill_dir_t filler, off_t offset, struct fuse_file_info *fi); __visible extern int proc_open(const char *path, struct fuse_file_info *fi); +__visible extern int proc_opendir(const char *path, struct fuse_file_info *fi); __visible extern int proc_access(const char *path, int mask); __visible extern int proc_read(const char *path, char *buf, size_t size, off_t offset, struct fuse_file_info *fi); __visible extern int proc_release(const char *path, struct fuse_file_info *fi); +__visible extern int proc_releasedir(const char *path, struct fuse_file_info *fi); #endif /* __LXCFS_PROC_FUSE_H */