From f19a423bd6abe801170eae384687af392317cdca Mon Sep 17 00:00:00 2001 From: "huteng.ht" Date: Tue, 24 May 2022 15:01:01 +0800 Subject: [PATCH 1/6] sysfs: implement get_cpuset_mems() and define macros also implement functions called by get_cpuset_mems(), - cgfsng_get_cpuset_mems - readat_cpuset_mems Signed-off-by: huteng.ht --- src/bindings.h | 16 ++++++++++ src/cgroups/cgfsng.c | 70 ++++++++++++++++++++++++++++++++++++++++++++ src/cgroups/cgroup.c | 16 ++++++++++ src/cgroups/cgroup.h | 3 ++ 4 files changed, 105 insertions(+) diff --git a/src/bindings.h b/src/bindings.h index 617179df..b54ce68e 100644 --- a/src/bindings.h +++ b/src/bindings.h @@ -69,6 +69,22 @@ enum lxcfs_virt_t { LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_ONLINE, #define LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_ONLINE_PATH "/sys/devices/system/cpu/online" + + LXC_TYPE_SYS_DEVICES_SYSTEM_NODE, + LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_SUBDIR, + LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_SUBFILE, + + LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_ONLINE, +#define LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_ONLINE_PATH "/sys/devices/system/node/online" + LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_HAS_CPU, +#define LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_HAS_CPU_PATH "/sys/devices/system/node/has_cpu" + LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_HAS_MEMORY, +#define LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_HAS_MEMORY_PATH "/sys/devices/system/node/has_memory" + LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_HAS_NORMAL_MEMORY, +#define LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_HAS_NORMAL_MEMORY_PATH "/sys/devices/system/node/has_normal_memory" + + LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_NODEX_CPULIST, + LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_NODEX_CPUMAP, LXC_TYPE_MAX, }; diff --git a/src/cgroups/cgfsng.c b/src/cgroups/cgfsng.c index 2d583c67..11621a22 100644 --- a/src/cgroups/cgfsng.c +++ b/src/cgroups/cgfsng.c @@ -683,6 +683,22 @@ static char *readat_cpuset(int cgroup_fd) return NULL; } +static char *readat_cpuset_mems(int cgroup_fd) +{ + __do_free char *val = NULL; + + val = readat_file(cgroup_fd, "cpuset.mems"); + if (val && strcmp(val, "") != 0) + return move_ptr(val); + + free_disarm(val); + val = readat_file(cgroup_fd, "cpuset.mems.effective"); + if (val && strcmp(val, "") != 0) + return move_ptr(val); + + return NULL; +} + static int cgfsng_get_cpuset_cpus(struct cgroup_ops *ops, const char *cgroup, char **value) { @@ -736,6 +752,59 @@ static int cgfsng_get_cpuset_cpus(struct cgroup_ops *ops, const char *cgroup, return -1; } +static int cgfsng_get_cpuset_mems(struct cgroup_ops *ops, const char *cgroup, + char **value) +{ + __do_close int cgroup_fd = -EBADF; + __do_free char *path = NULL; + char *v; + struct hierarchy *h; + int ret; + + h = ops->get_hierarchy(ops, "cpuset"); + if (!h) + return -1; + + if (!is_unified_hierarchy(h)) + ret = CGROUP_SUPER_MAGIC; + else + ret = CGROUP2_SUPER_MAGIC; + + *value = NULL; + path = must_make_path_relative(cgroup, NULL); + cgroup_fd = openat_safe(h->fd, path); + if (cgroup_fd < 0) + return -1; + + v = readat_cpuset_mems(cgroup_fd); + if (v) { + *value = v; + return ret; + } + + /* + * cpuset.cpus and cpuset.cpus.effective are empty so we need to look + * the nearest ancestor with a non-empty cpuset.cpus{.effective} file. + */ + for (;;) { + int fd; + + fd = openat_safe(cgroup_fd, "../"); + if (fd < 0 || !is_cgroup_fd(fd)) + return -1; + + close_prot_errno_replace(cgroup_fd, fd); + + v = readat_cpuset_mems(fd); + if (v) { + *value = v; + return ret; + } + } + + return -1; +} + static int cgfsng_get_io(struct cgroup_ops *ops, const char *cgroup, const char *file, char **value) { @@ -1023,6 +1092,7 @@ struct cgroup_ops *cgfsng_ops_init(void) /* cpuset */ cgfsng_ops->get_cpuset_cpus = cgfsng_get_cpuset_cpus; cgfsng_ops->can_use_cpuview = cgfsng_can_use_cpuview; + cgfsng_ops->get_cpuset_mems = cgfsng_get_cpuset_mems; /* blkio */ cgfsng_ops->get_io_service_bytes = cgfsng_get_io_service_bytes; diff --git a/src/cgroups/cgroup.c b/src/cgroups/cgroup.c index 490e0bfa..371a2ea1 100644 --- a/src/cgroups/cgroup.c +++ b/src/cgroups/cgroup.c @@ -121,3 +121,19 @@ char *get_cpuset(const char *cg) return value; } + +/* + * Read the cpuset.mems for cg + * Return the answer in a newly allocated string which must be freed + */ +char *get_cpuset_mems(const char *cg) +{ + char *value = NULL; + int ret; + + ret = cgroup_ops->get_cpuset_mems(cgroup_ops, cg, &value); + if (ret < 0) + return NULL; + + return value; +} diff --git a/src/cgroups/cgroup.h b/src/cgroups/cgroup.h index 122e8ebf..b4d46b2e 100644 --- a/src/cgroups/cgroup.h +++ b/src/cgroups/cgroup.h @@ -154,6 +154,8 @@ struct cgroup_ops { int (*get_cpuset_cpus)(struct cgroup_ops *ops, const char *cgroup, char **value); bool (*can_use_cpuview)(struct cgroup_ops *ops); + int (*get_cpuset_mems)(struct cgroup_ops *ops, const char *cgroup, + char **value); /* io */ int (*get_io_service_bytes)(struct cgroup_ops *ops, const char *cgroup, @@ -211,5 +213,6 @@ static inline int get_cgroup_fd(const char *controller) extern char *get_pid_cgroup(pid_t pid, const char *contrl); extern char *get_cpuset(const char *cg); +extern char *get_cpuset_mems(const char *cg); #endif From 5614412c892ba0e808473c9c8cdfdae92e8ad3fd Mon Sep 17 00:00:00 2001 From: "huteng.ht" Date: Tue, 24 May 2022 15:43:23 +0800 Subject: [PATCH 2/6] sysfs: sys_write, sys_opendir and sys_access_legacy for node support Signed-off-by: huteng.ht --- src/sysfs_fuse.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/src/sysfs_fuse.c b/src/sysfs_fuse.c index bf75ba99..5b3cacea 100644 --- a/src/sysfs_fuse.c +++ b/src/sysfs_fuse.c @@ -304,7 +304,8 @@ __lxcfs_fuse_ops int sys_write(const char *path, const char *buf, size_t size, if (!liblxcfs_functional()) return -EIO; - if (f->type != LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_SUBFILE) + if (f->type != LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_SUBFILE && + f->type != LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_SUBFILE) return -EINVAL; return -EACCES; @@ -544,6 +545,19 @@ __lxcfs_fuse_ops int sys_opendir(const char *path, struct fuse_file_info *fi) if (S_ISDIR(st_mode)) type = LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_SUBDIR; + } else if (strcmp(path, "/sys/devices/system/node") == 0) { + type = LXC_TYPE_SYS_DEVICES_SYSTEM_NODE; + } else if (strncmp(path, "/sys/devices/system/node/", + STRLITERALLEN("/sys/devices/system/node/")) == 0) { + int ret; + mode_t st_mode; + + ret = get_st_mode(path, &st_mode); + if (ret) + return ret; + + if (S_ISDIR(st_mode)) + type = LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_SUBDIR; } if (type == -1) return -ENOENT; @@ -577,6 +591,10 @@ static int sys_access_legacy(const char *path, int mask) access(path, R_OK) == 0) return 0; + if (strcmp(path, "/sys/devices/system/node") == 0 && + access(path, R_OK) == 0) + return 0; + /* these are all read-only */ if ((mask & ~R_OK) != 0) return -EACCES; From 85091d82dce030cfa53c4f8641732cea0d97e439 Mon Sep 17 00:00:00 2001 From: "huteng.ht" Date: Tue, 24 May 2022 15:48:15 +0800 Subject: [PATCH 3/6] sysfs: sys_open_legacy and sys_open for node support Signed-off-by: huteng.ht --- src/sysfs_fuse.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/src/sysfs_fuse.c b/src/sysfs_fuse.c index 5b3cacea..0d2f0a20 100644 --- a/src/sysfs_fuse.c +++ b/src/sysfs_fuse.c @@ -445,6 +445,16 @@ static int sys_open_legacy(const char *path, struct fuse_file_info *fi) type = LXC_TYPE_SYS_DEVICES_SYSTEM_CPU; if (strcmp(path, "/sys/devices/system/cpu/online") == 0) type = LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_ONLINE; + if (strcmp(path, "/sys/devices/system/node") == 0) + type = LXC_TYPE_SYS_DEVICES_SYSTEM_CPU; + if (strcmp(path, "/sys/devices/system/node/online") == 0) + type = LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_ONLINE; + if (strcmp(path, "/sys/devices/system/node/has_cpu") == 0) + type = LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_HAS_CPU; + if (strcmp(path, "/sys/devices/system/node/has_memory") == 0) + type = LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_HAS_MEMORY; + if (strcmp(path, "/sys/devices/system/node/has_normal_memory") == 0) + type = LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_HAS_NORMAL_MEMORY; if (type == -1) return -ENOENT; @@ -473,6 +483,7 @@ __lxcfs_fuse_ops int sys_open(const char *path, struct fuse_file_info *fi) { __do_free struct file_info *info = NULL; int type = -1; + int path_len = strlen(path); if (!liblxcfs_functional()) return -EIO; @@ -482,6 +493,22 @@ __lxcfs_fuse_ops int sys_open(const char *path, struct fuse_file_info *fi) if (strcmp(path, "/sys/devices/system/cpu/online") == 0) { type = LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_ONLINE; + } else if (strcmp(path, "/sys/devices/system/node/online") == 0) { + type = LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_ONLINE; + } else if (strcmp(path, "/sys/devices/system/node/has_cpu") == 0) { + type = LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_HAS_CPU; + } else if (strcmp(path, "/sys/devices/system/node/has_memory") == 0) { + type = LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_HAS_MEMORY; + } else if (strcmp(path, "/sys/devices/system/node/has_normal_memory") == 0) { + type = LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_HAS_NORMAL_MEMORY; + } else if ((strncmp(path, "/sys/devices/system/node/node", + STRLITERALLEN("/sys/devices/system/node/node")) == 0) && + (strcmp(path + path_len - strlen("cpulist"), "cpulist") == 0)) { + type = LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_NODEX_CPULIST; + } else if ((strncmp(path, "/sys/devices/system/node/node", + STRLITERALLEN("/sys/devices/system/node/node")) == 0) && + (strcmp(path + path_len - strlen("cpumap"), "cpumap") == 0)) { + type = LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_NODEX_CPUMAP; } else if (strncmp(path, "/sys/devices/system/cpu/", STRLITERALLEN("/sys/devices/system/cpu/")) == 0) { int ret; @@ -493,6 +520,17 @@ __lxcfs_fuse_ops int sys_open(const char *path, struct fuse_file_info *fi) if (S_ISREG(st_mode)) type = LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_SUBFILE; + } else if (strncmp(path, "/sys/devices/system/node/", + STRLITERALLEN("/sys/devices/system/node/")) == 0) { + int ret; + mode_t st_mode; + + ret = get_st_mode(path, &st_mode); + if (ret) + return ret; + + if (S_ISREG(st_mode)) + type = LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_SUBFILE; } if (type == -1) return -ENOENT; From 8244e155f38bdac287e23f2265d31e56d2fda2b7 Mon Sep 17 00:00:00 2001 From: "huteng.ht" Date: Tue, 24 May 2022 15:55:53 +0800 Subject: [PATCH 4/6] sysfs: sys_readdir and sys_readdir_legacy for node support Signed-off-by: huteng.ht --- src/sysfs_fuse.c | 323 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 321 insertions(+), 2 deletions(-) diff --git a/src/sysfs_fuse.c b/src/sysfs_fuse.c index 0d2f0a20..2ebb7ba4 100644 --- a/src/sysfs_fuse.c +++ b/src/sysfs_fuse.c @@ -38,6 +38,22 @@ #include "utils.h" static off_t get_sysfile_size(const char *which); + +static int nodemask(char *possnodes, __u32 **bitarr, __u32 *last_set_bit) +{ + __do_free __u32 *possmask = NULL; + int ret; + __u32 poss_last_set_bit = 0; + + ret = lxc_cpumask(possnodes, &possmask, &poss_last_set_bit); + if (ret) + return ret; + + *bitarr = move_ptr(possmask); + *last_set_bit = poss_last_set_bit; + return 0; +} + static int do_cpuset_read(char *cg, char *cpu_cg, char *buf, size_t buflen) { __do_free char *cpuset = NULL; @@ -73,6 +89,111 @@ static int do_cpuset_read(char *cg, char *cpu_cg, char *buf, size_t buflen) return total_len; } +/* + * Get online nodes from cpuset.cpus or cpuset.cpus.effective + * + * Traverse nodes listed by /sys/devices/system/node/online. If + * cpuX specified by cpuset.cpus or cpuset.cpus.effective is listed + * in /sys/devices/system/node/nodeY/cpulist, nodeY is online. + */ +static int do_get_online_nodes_from_cpuset_cpus(char *cg, __u32 **bitarr, __u32 *last_set_bit) +{ + __do_free char *cpuset_cpus = NULL; + __do_free char *node_online = NULL; + __do_free char *node_cpulist = NULL; + __do_free __u32 *bitarr_cpus = NULL; + __do_free __u32 *bitarr_node_online = NULL; + __do_free __u32 *bitarr_node_cpulist = NULL; + __u32 *arr_u32 = zalloc(sizeof(__u32)); + __u32 last_set_bit_cpus = 0; + __u32 last_set_bit_node_online = 0; + __u32 last_set_bit_node_cpulist = 0; + __u32 last_set_bit_node_from_cpuset_cpus = 0; + char node_cpulist_path[BUF_RESERVE_SIZE] = {}; + int ret = 0; + + cpuset_cpus = get_cpuset(cg); + if (!cpuset_cpus) + return 0; + + ret = cpumask(cpuset_cpus, &bitarr_cpus, &last_set_bit_cpus); + if (ret) + return ret; + + node_online = read_file_at(-EBADF, "/sys/devices/system/node/online", PROTECT_OPEN); + if (!node_online) + return -1; + + ret = nodemask(node_online, &bitarr_node_online, &last_set_bit_node_online); + if (ret) + return ret; + + for (__u32 bit = 0; bit <= last_set_bit_node_online; bit++) { + + ret = snprintf(node_cpulist_path, sizeof(node_cpulist_path), + "/sys/devices/system/node/node%u/cpulist", bit); + if (ret < 0 || (size_t)ret >= sizeof(node_cpulist_path)) + continue; + + node_cpulist = read_file_at(-EBADF, node_cpulist_path, PROTECT_OPEN); + if (!node_cpulist) + return -1; + + ret = cpumask(node_cpulist, &bitarr_node_cpulist, &last_set_bit_node_cpulist); + if (ret) + return ret; + + for (__u32 bit_cpu = 0; bit_cpu <= last_set_bit_cpus; bit_cpu++) { + if (is_set(bit_cpu, bitarr_cpus) && is_set(bit_cpu, bitarr_node_cpulist)) { + set_bit(bit, arr_u32); + last_set_bit_node_from_cpuset_cpus = bit; + break; + } + } + } + *last_set_bit = last_set_bit_node_from_cpuset_cpus; + *bitarr = move_ptr(arr_u32); + + return ret; +} + +/* + * Get online nodes + * + * Online nodes come from: + * - cpuset.cpus or cpuset.cpus.effective, indicating which nodes have cpus online + * - cpuset.mems or cpuset.mems.effective, indecating which nodes have mems online + */ +static int do_get_online_nodes(char *cg, __u32 **bitarr, __u32 *last_set_bit) +{ + __do_free char *cpuset_mems = NULL; + __do_free __u32 *bitarr_mems = NULL; + __do_free __u32 *bitarr_node_from_cpuset_cpus = NULL; + __u32 last_set_bit_mems = 0; + __u32 last_set_bit_node_from_cpuset_cpus = 0; + int ret = 0; + + cpuset_mems = get_cpuset_mems(cg); + if (!cpuset_mems) + return 0; + + ret = nodemask(cpuset_mems, &bitarr_mems, &last_set_bit_mems); + if (ret) + return ret; + + ret = do_get_online_nodes_from_cpuset_cpus(cg, &bitarr_node_from_cpuset_cpus, + &last_set_bit_node_from_cpuset_cpus); + if (ret) + return ret; + + *last_set_bit = last_set_bit_mems > last_set_bit_node_from_cpuset_cpus ? + last_set_bit_mems : last_set_bit_node_from_cpuset_cpus; + *bitarr_node_from_cpuset_cpus |= *bitarr_mems; + *bitarr = move_ptr(bitarr_node_from_cpuset_cpus); + + return ret; +} + static int sys_devices_system_cpu_online_read(char *buf, size_t size, off_t offset, struct fuse_file_info *fi) @@ -167,6 +288,179 @@ static int filler_sys_devices_system_cpu(const char *path, void *buf, return 0; } +static int filler_sys_devices_system_node(const char *path, void *buf, + fuse_fill_dir_t filler) +{ + __do_free char *cg = NULL; + __do_closedir DIR *dirp = NULL; + struct fuse_context *fc = fuse_get_context(); + __do_free __u32 *bitarr = NULL; + __u32 last_set_bit = 0; + int ret; + struct dirent *dirent; + pid_t initpid; + + initpid = lookup_initpid_in_store(fc->pid); + if (initpid <= 1 || is_shared_pidns(initpid)) + initpid = fc->pid; + + cg = get_pid_cgroup(initpid, "cpuset"); + if (!cg) + return 0; + prune_init_slice(cg); + + ret = do_get_online_nodes(cg, &bitarr, &last_set_bit); + if (ret) + return ret; + + dirp = opendir(path); + if (!dirp) + return -ENOENT; + + for (__u32 bit = 0; bit <= last_set_bit; bit++) { + char node[100]; + + if (!is_set(bit, bitarr)) + continue; + + ret = snprintf(node, sizeof(node), "node%u", bit); + if (ret < 0 || (size_t)ret >= sizeof(node)) + continue; + + if (dir_fillerat(filler, dirp, node, buf, 0) != 0) + return -ENOENT; + } + + while ((dirent = readdir(dirp))) { + char *entry = dirent->d_name; + + if (strlen(entry) <= 4) + continue; + entry += 4; + + /* Don't emit entries we already filtered above. */ + if (isdigit(*entry)) + continue; + + if (dirent_fillerat(filler, dirp, dirent, buf, 0) != 0) + return -ENOENT; + } + + return 0; +} + +static int filler_sys_devices_system_node_nodex(const char *path, void *buf, + fuse_fill_dir_t filler) +{ + __do_free __u32 *bitarr = NULL; + __do_free __u32 *bitarr_mems = NULL; + __do_free __u32 *bitarr_cpulist = NULL; + __do_free char *cpulist = NULL; + __do_free char *cg = NULL, *cpuset = NULL, *cpuset_mems = NULL; + __do_closedir DIR *dirp = NULL; + struct fuse_context *fc = fuse_get_context(); + __u32 last_set_bit = 0; + __u32 last_set_bit_mems = 0; + __u32 last_set_bit_cpulist = 0; + __u32 ndwords = 0; + int ret; + struct dirent *dirent; + pid_t initpid; + char cpulist_path[100]; + bool nomem = false; + + initpid = lookup_initpid_in_store(fc->pid); + if (initpid <= 1 || is_shared_pidns(initpid)) + initpid = fc->pid; + + cg = get_pid_cgroup(initpid, "cpuset"); + if (!cg) + return 0; + prune_init_slice(cg); + + cpuset = get_cpuset(cg); + if (!cpuset) + return 0; + + ret = cpumask(cpuset, &bitarr, &last_set_bit); + if (ret) + return ret; + + cpuset_mems = get_cpuset_mems(cg); + if (!cpuset_mems) + return 0; + + ret = nodemask(cpuset_mems, &bitarr_mems, &last_set_bit_mems); + if (ret) + return ret; + int nodex = atoi(path + strlen("/sys/devices/system/node/node")); + if (!is_set(nodex, bitarr_mems)) + nomem = true; + + ret = snprintf(cpulist_path, sizeof(cpulist_path), "%s/cpulist", path); + if (ret < 0 || (size_t)ret >= sizeof(cpulist_path)) + log_error(0, "Failed to write to cpulist buf"); + + if (file_exists(cpulist_path)) { + cpulist = read_file_at(-EBADF, cpulist_path, PROTECT_OPEN); + if (!cpulist) + return -1; + + if (!isdigit(cpulist[0])) + free_disarm(cpulist); + } else { + log_error(0, "/sys/devices/system/node/node*/cpulist does not exist"); + } + + if (cpulist) + ret = cpumask(cpulist, &bitarr_cpulist, &last_set_bit_cpulist); + if (ret) + return ret; + + last_set_bit = last_set_bit < last_set_bit_cpulist ? last_set_bit : last_set_bit_cpulist; + ndwords = last_set_bit / 32 + 1; + while (ndwords--) + *(bitarr+ndwords) &= *(bitarr_cpulist+ndwords); + + dirp = opendir(path); + if (!dirp) + return -ENOENT; + + for (__u32 bit = 0; bit <= last_set_bit; bit++) { + char cpu[100]; + + if (!is_set(bit, bitarr)) + continue; + + ret = snprintf(cpu, sizeof(cpu), "cpu%u", bit); + if (ret < 0 || (size_t)ret >= sizeof(cpu)) + continue; + + if (dir_fillerat(filler, dirp, cpu, buf, 0) != 0) + return -ENOENT; + } + + while ((dirent = readdir(dirp))) { + char *entry = dirent->d_name; + + if (nomem && (strncmp(entry, "cpu", strlen("cpu")) != 0)) + continue; + + if (strlen(entry) <= 3) + continue; + entry += 3; + + /* Don't emit entries we already filtered above. */ + if (isdigit(*entry)) + continue; + + if (dirent_fillerat(filler, dirp, dirent, buf, 0) != 0) + return -ENOENT; + } + + return 0; +} + static int get_st_mode(const char *path, mode_t *mode) { struct stat sb; @@ -333,7 +627,8 @@ static int sys_readdir_legacy(const char *path, void *buf, fuse_fill_dir_t fille if (strcmp(path, "/sys/devices/system") == 0) { if (dir_filler(filler, buf, ".", 0) != 0 || dir_filler(filler, buf, "..", 0) != 0 || - dirent_filler(filler, path, "cpu", buf, 0) != 0) + dirent_filler(filler, path, "cpu", buf, 0) != 0 || + dirent_filler(filler, path, "node", buf, 0) != 0) return -ENOENT; return 0; @@ -346,6 +641,17 @@ static int sys_readdir_legacy(const char *path, void *buf, fuse_fill_dir_t fille return 0; } + if (strcmp(path, "/sys/devices/system/node") == 0) { + if (dir_filler(filler, buf, ".", 0) != 0 || + dir_filler(filler, buf, "..", 0) != 0 || + dirent_filler(filler, path, "online", buf, 0) != 0 || + dirent_filler(filler, path, "has_cpu", buf, 0) != 0 || + dirent_filler(filler, path, "has_memory", buf, 0) != 0 || + dirent_filler(filler, path, "has_normal_memory", buf, 0) != 0) + return -ENOENT; + + return 0; + } return 0; } @@ -389,7 +695,8 @@ __lxcfs_fuse_ops int sys_readdir(const char *path, void *buf, case LXC_TYPE_SYS_DEVICES_SYSTEM: if (dir_filler(filler, buf, ".", 0) != 0 || dir_filler(filler, buf, "..", 0) != 0 || - dirent_filler(filler, path, "cpu", buf, 0) != 0) + dirent_filler(filler, path, "cpu", buf, 0) != 0 || + dirent_filler(filler, path, "node", buf, 0) != 0) return -ENOENT; return 0; case LXC_TYPE_SYS_DEVICES_SYSTEM_CPU: @@ -408,6 +715,18 @@ __lxcfs_fuse_ops int sys_readdir(const char *path, void *buf, return -ENOENT; } return 0; + case LXC_TYPE_SYS_DEVICES_SYSTEM_NODE: + if (dir_filler(filler, buf, ".", 0) != 0 || + dir_filler(filler, buf, "..", 0) != 0) + return -ENOENT; + + return filler_sys_devices_system_node(path, buf, filler); + case LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_SUBDIR: + if (dir_filler(filler, buf, ".", 0) != 0 || + dir_filler(filler, buf, "..", 0) != 0) + return -ENOENT; + + return filler_sys_devices_system_node_nodex(path, buf, filler); } return -EINVAL; From 9a03410ce78bcd139185cf64df74ef017adc4a70 Mon Sep 17 00:00:00 2001 From: "huteng.ht" Date: Tue, 24 May 2022 16:08:14 +0800 Subject: [PATCH 5/6] sysfs: sys_getattr and sys_read for node support Signed-off-by: huteng.ht --- src/sysfs_fuse.c | 692 +++++++++++++++++++++++++++++++++++++++++++++++ src/utils.c | 43 +++ src/utils.h | 1 + 3 files changed, 736 insertions(+) diff --git a/src/sysfs_fuse.c b/src/sysfs_fuse.c index 2ebb7ba4..ae8447c8 100644 --- a/src/sysfs_fuse.c +++ b/src/sysfs_fuse.c @@ -270,6 +270,612 @@ static int sys_devices_system_cpu_online_getsize(const char *path) return do_cpuset_read(cg, cpu_cg, buf, buflen); } +static int sys_devices_system_node_online_read(char *buf, size_t size, + off_t offset, + struct fuse_file_info *fi) +{ + __do_free char *cg = NULL; + struct fuse_context *fc = fuse_get_context(); + struct file_info *d = INTTYPE_TO_PTR(fi->fh); + char *cache = d->buf; + pid_t initpid; + ssize_t total_len = 0; + __do_free __u32 *bitarr = NULL; + __u32 last_set_bit = 0; + int ret = 0; + char list[BUF_RESERVE_SIZE] = {0}; + + if (offset) { + size_t left; + + if (!d->cached) + return 0; + + if (offset > d->size) + return -EINVAL; + + left = d->size - offset; + total_len = left > size ? size : left; + memcpy(buf, cache + offset, total_len); + + return total_len; + } + + initpid = lookup_initpid_in_store(fc->pid); + if (initpid <= 1 || is_shared_pidns(initpid)) + initpid = fc->pid; + + cg = get_pid_cgroup(initpid, "cpuset"); + if (!cg) + return read_file_fuse("/sys/devices/system/node/online", buf, size, d); + prune_init_slice(cg); + + ret = do_get_online_nodes(cg, &bitarr, &last_set_bit); + if (ret) + return ret; + + ret = bitarr_to_list(list, bitarr, last_set_bit); + if (ret < 0 || (size_t)ret >= sizeof(list)) + return log_error(0, "Failed to write to cache"); + + total_len = snprintf(d->buf, d->buflen, "%s\n", list); + if (total_len < 0 || total_len >= d->buflen) + return log_error(0, "Failed to write to cache"); + + d->size = (int)total_len; + d->cached = 1; + + if ((size_t)total_len > size) + total_len = size; + + memcpy(buf, d->buf, total_len); + + return total_len; +} + +static int sys_devices_system_node_online_getsize(const char *path) +{ + __do_free char *cg = NULL, *cpuset_mems = NULL; + struct fuse_context *fc = fuse_get_context(); + pid_t initpid; + __do_free __u32 *bitarr = NULL; + __u32 last_set_bit = 0; + ssize_t total_len = 0; + char list[BUF_RESERVE_SIZE] = {0}; + char buf[BUF_RESERVE_SIZE] = {0}; + int buflen = sizeof(buf); + int ret = 0; + + initpid = lookup_initpid_in_store(fc->pid); + if (initpid <= 1 || is_shared_pidns(initpid)) + initpid = fc->pid; + + cg = get_pid_cgroup(initpid, "cpuset"); + if (!cg) + return get_sysfile_size(path); + prune_init_slice(cg); + + ret = do_get_online_nodes(cg, &bitarr, &last_set_bit); + if (ret < 0) + return ret; + + ret = bitarr_to_list(list, bitarr, last_set_bit); + if (ret < 0 || (size_t)ret >= sizeof(list)) + return log_error(0, "Failed to write to cache"); + + total_len = snprintf(buf, buflen, "%s\n", list); + if (total_len < 0 || total_len >= buflen) + return log_error(0, "Failed to write to cache"); + + return total_len; +} + +/* + * Get nodes have cpus + * + * Nodes list from do_get_online_nodes_from_cpuset_cpus is bitwise-anded + * with nodes list from /sys/devices/system/node/has_cpu. + */ +static int sys_devices_system_node_has_cpu_read(char *buf, size_t size, + off_t offset, + struct fuse_file_info *fi) +{ + __do_free char *cg = NULL; + struct fuse_context *fc = fuse_get_context(); + struct file_info *d = INTTYPE_TO_PTR(fi->fh); + char *cache = d->buf; + pid_t initpid; + ssize_t total_len = 0; + __do_free __u32 *bitarr = NULL; + __do_free __u32 *bitarr_has_cpu = NULL; + __u32 last_set_bit = 0; + __u32 last_set_bit_has_cpu = 0; + int ret = 0; + char list[BUF_RESERVE_SIZE] = {0}; + __do_free char *has_cpu = NULL; + + if (offset) { + size_t left; + + if (!d->cached) + return 0; + + if (offset > d->size) + return -EINVAL; + + left = d->size - offset; + total_len = left > size ? size : left; + memcpy(buf, cache + offset, total_len); + + return total_len; + } + + initpid = lookup_initpid_in_store(fc->pid); + if (initpid <= 1 || is_shared_pidns(initpid)) + initpid = fc->pid; + + cg = get_pid_cgroup(initpid, "cpuset"); + if (!cg) + return read_file_fuse("/sys/devices/system/node/has_cpu", buf, size, d); + prune_init_slice(cg); + + ret = do_get_online_nodes_from_cpuset_cpus(cg, &bitarr, &last_set_bit); + if (ret) + return ret; + + has_cpu = read_file_at(-EBADF, "/sys/devices/system/node/has_cpu", PROTECT_OPEN); + if (!has_cpu) + return -1; + + ret = nodemask(has_cpu, &bitarr_has_cpu, &last_set_bit_has_cpu); + if (ret) + return ret; + + last_set_bit = last_set_bit < last_set_bit_has_cpu ? + last_set_bit : last_set_bit_has_cpu; + *bitarr &= *bitarr_has_cpu; + + ret = bitarr_to_list(list, bitarr, last_set_bit); + if (ret < 0 || (size_t)ret >= sizeof(list)) + return log_error(0, "Failed to write to cache"); + + total_len = snprintf(d->buf, d->buflen, "%s\n", list); + if (total_len < 0 || total_len >= d->buflen) + return log_error(0, "Failed to write to cache"); + + d->size = (int)total_len; + d->cached = 1; + + if ((size_t)total_len > size) + total_len = size; + + memcpy(buf, d->buf, total_len); + + return total_len; +} + +static int sys_devices_system_node_has_cpu_getsize(const char *path) +{ + __do_free char *cg = NULL; + struct fuse_context *fc = fuse_get_context(); + pid_t initpid; + ssize_t total_len = 0; + __do_free __u32 *bitarr = NULL; + __do_free __u32 *bitarr_has_cpu = NULL; + __u32 last_set_bit = 0; + __u32 last_set_bit_has_cpu = 0; + int ret = 0; + char list[BUF_RESERVE_SIZE] = {0}; + __do_free char *has_cpu = NULL; + + initpid = lookup_initpid_in_store(fc->pid); + if (initpid <= 1 || is_shared_pidns(initpid)) + initpid = fc->pid; + + cg = get_pid_cgroup(initpid, "cpuset"); + if (!cg) + return get_sysfile_size("/sys/devices/system/node/has_cpu"); + prune_init_slice(cg); + + ret = do_get_online_nodes_from_cpuset_cpus(cg, &bitarr, &last_set_bit); + if (ret) + return ret; + + has_cpu = read_file_at(-EBADF, "/sys/devices/system/node/has_cpu", PROTECT_OPEN); + if (!has_cpu) + return -1; + + ret = nodemask(has_cpu, &bitarr_has_cpu, &last_set_bit_has_cpu); + if (ret) + return ret; + + last_set_bit = last_set_bit < last_set_bit_has_cpu ? + last_set_bit : last_set_bit_has_cpu; + *bitarr &= *bitarr_has_cpu; + + ret = bitarr_to_list(list, bitarr, last_set_bit); + if (ret < 0 || (size_t)ret >= sizeof(list)) + return log_error(0, "Failed to write to cache"); + total_len = ret + 1; + + return total_len; +} + +/* + * Get nodes have memorys + * + * Nodes list from cpuset.mems or cpuset.mems.effective is bitwise-anded + * with nodes list from /sys/devices/system/node/has_memory. + */ +static int sys_devices_system_node_has_memory_read(char *buf, size_t size, + off_t offset, + struct fuse_file_info *fi) +{ + __do_free char *cg = NULL; + __do_free char *cpuset_mems = NULL; + __do_free char *has_memory = NULL; + struct fuse_context *fc = fuse_get_context(); + struct file_info *d = INTTYPE_TO_PTR(fi->fh); + char *cache = d->buf; + pid_t initpid; + ssize_t total_len = 0; + __do_free __u32 *bitarr = NULL; + __do_free __u32 *bitarr_has_memory = NULL; + __u32 last_set_bit = 0; + __u32 last_set_bit_has_memory = 0; + int ret = 0; + char list[BUF_RESERVE_SIZE] = {0}; + + if (offset) { + size_t left; + + if (!d->cached) + return 0; + + if (offset > d->size) + return -EINVAL; + + left = d->size - offset; + total_len = left > size ? size : left; + memcpy(buf, cache + offset, total_len); + + return total_len; + } + + initpid = lookup_initpid_in_store(fc->pid); + if (initpid <= 1 || is_shared_pidns(initpid)) + initpid = fc->pid; + + cg = get_pid_cgroup(initpid, "cpuset"); + if (!cg) + return read_file_fuse("/sys/devices/system/node/has_memory", buf, size, d); + prune_init_slice(cg); + + cpuset_mems = get_cpuset_mems(cg); + if (!cpuset_mems) + return 0; + + ret = nodemask(cpuset_mems, &bitarr, &last_set_bit); + if (ret) + return ret; + + has_memory = read_file_at(-EBADF, "/sys/devices/system/node/has_memory", PROTECT_OPEN); + if (!has_memory) + return -1; + + ret = nodemask(has_memory, &bitarr_has_memory, &last_set_bit_has_memory); + if (ret) + return ret; + + last_set_bit = last_set_bit < last_set_bit_has_memory ? + last_set_bit : last_set_bit_has_memory; + *bitarr &= *bitarr_has_memory; + + ret = bitarr_to_list(list, bitarr, last_set_bit); + if (ret < 0 || (size_t)ret >= sizeof(list)) + return log_error(0, "Failed to write to cache"); + + total_len = snprintf(d->buf, d->buflen, "%s\n", list); + if (total_len < 0 || total_len >= d->buflen) + return log_error(0, "Failed to write to cache"); + + d->size = (int)total_len; + d->cached = 1; + + if ((size_t)total_len > size) + total_len = size; + + memcpy(buf, d->buf, total_len); + + return total_len; +} + +static int sys_devices_system_node_has_memory_normal_memory_getsize(const char *path) +{ + __do_free char *cg = NULL; + __do_free char *cpuset_mems = NULL; + __do_free char *has_memory = NULL; + struct fuse_context *fc = fuse_get_context(); + pid_t initpid; + ssize_t total_len = 0; + __do_free __u32 *bitarr = NULL; + __do_free __u32 *bitarr_has_memory = NULL; + __u32 last_set_bit = 0; + __u32 last_set_bit_has_memory = 0; + int ret = 0; + char list[BUF_RESERVE_SIZE] = {0}; + + initpid = lookup_initpid_in_store(fc->pid); + if (initpid <= 1 || is_shared_pidns(initpid)) + initpid = fc->pid; + + cg = get_pid_cgroup(initpid, "cpuset"); + if (!cg) + return get_sysfile_size(path); + prune_init_slice(cg); + + cpuset_mems = get_cpuset_mems(cg); + if (!cpuset_mems) + return 0; + + ret = nodemask(cpuset_mems, &bitarr, &last_set_bit); + if (ret) + return ret; + + has_memory = read_file_at(-EBADF, path, PROTECT_OPEN); + if (!has_memory) + return -1; + + ret = nodemask(has_memory, &bitarr_has_memory, &last_set_bit_has_memory); + if (ret) + return ret; + + last_set_bit = last_set_bit < last_set_bit_has_memory ? + last_set_bit : last_set_bit_has_memory; + *bitarr &= *bitarr_has_memory; + + ret = bitarr_to_list(list, bitarr, last_set_bit); + if (ret < 0 || (size_t)ret >= sizeof(list)) + return log_error(0, "Failed to write to cache"); + total_len = ret + 1; + + return total_len; +} + +/* + * Get nodes have normal memorys + * + * Nodes list from cpuset.mems or cpuset.mems.effective is bitwise-anded + * with nodes list from /sys/devices/system/node/has_normal_memory. + */ +static int sys_devices_system_node_has_normal_memory_read(char *buf, size_t size, + off_t offset, + struct fuse_file_info *fi) +{ + __do_free char *cg = NULL; + __do_free char *cpuset_mems = NULL; + __do_free char *has_normal_memory = NULL; + struct fuse_context *fc = fuse_get_context(); + struct file_info *d = INTTYPE_TO_PTR(fi->fh); + char *cache = d->buf; + pid_t initpid; + ssize_t total_len = 0; + __do_free __u32 *bitarr = NULL; + __do_free __u32 *bitarr_has_normal_memory = NULL; + __u32 last_set_bit = 0; + __u32 last_set_bit_has_normal_memory = 0; + int ret = 0; + char list[BUF_RESERVE_SIZE] = {0}; + + if (offset) { + size_t left; + + if (!d->cached) + return 0; + + if (offset > d->size) + return -EINVAL; + + left = d->size - offset; + total_len = left > size ? size : left; + memcpy(buf, cache + offset, total_len); + + return total_len; + } + + initpid = lookup_initpid_in_store(fc->pid); + if (initpid <= 1 || is_shared_pidns(initpid)) + initpid = fc->pid; + + cg = get_pid_cgroup(initpid, "cpuset"); + if (!cg) + return read_file_fuse("/sys/devices/system/node/has_normal_memory", buf, size, d); + prune_init_slice(cg); + + cpuset_mems = get_cpuset_mems(cg); + if (!cpuset_mems) + return 0; + + ret = nodemask(cpuset_mems, &bitarr, &last_set_bit); + if (ret) + return ret; + + has_normal_memory = read_file_at(-EBADF, "/sys/devices/system/node/has_normal_memory", PROTECT_OPEN); + if (!has_normal_memory) + return -1; + + ret = nodemask(has_normal_memory, &bitarr_has_normal_memory, &last_set_bit_has_normal_memory); + if (ret) + return ret; + + last_set_bit = last_set_bit < last_set_bit_has_normal_memory ? + last_set_bit : last_set_bit_has_normal_memory; + *bitarr &= *bitarr_has_normal_memory; + + ret = bitarr_to_list(list, bitarr, last_set_bit); + if (ret < 0 || (size_t)ret >= sizeof(list)) + return log_error(0, "Failed to write to cache"); + + total_len = snprintf(d->buf, d->buflen, "%s\n", list); + if (total_len < 0 || total_len >= d->buflen) + return log_error(0, "Failed to write to cache"); + + d->size = (int)total_len; + d->cached = 1; + + if ((size_t)total_len > size) + total_len = size; + + memcpy(buf, d->buf, total_len); + + return total_len; +} + +static int do_cpulist_cpumap_read(const char *path, char *cg, char *buf, size_t buflen) +{ + __do_free __u32 *bitarr = NULL; + __do_free __u32 *bitarr_cpulist = NULL; + __do_free char *cpuset = NULL; + __do_free char *cpulist = NULL; + ssize_t total_len = 0; + __u32 last_set_bit = 0; + __u32 last_set_bit_cpulist = 0; + __u32 ndwords = 0; + int path_len = strlen(path); + int ret; + char file_path[BUF_RESERVE_SIZE] = {0}, cpulistmap[BUF_RESERVE_SIZE] = {0}; + bool cpumap; + __u32 i, pos = 0; + + cpuset = get_cpuset(cg); + if (!cpuset) + return 0; + + ret = cpumask(cpuset, &bitarr, &last_set_bit); + if (ret) + return ret; + + if((strcmp(path + path_len - 6, "cpumap") == 0)) { + strncpy(file_path, path, path_len -6); + strcpy(file_path + path_len - 6, "cpulist"); + cpumap = true; + } + else { + strcpy(file_path, path); + cpumap = false; + } + + if (file_exists(file_path)) { + cpulist = read_file_at(-EBADF, file_path, PROTECT_OPEN); + if (!cpulist) + return -1; + + if (!isdigit(cpulist[0])) + free_disarm(cpulist); + } else { + log_error(0, "/sys/devices/system/node/node*/cpulist does not exist"); + } + + ret = cpumask(cpulist, &bitarr_cpulist, &last_set_bit_cpulist); + if (ret) + return ret; + + last_set_bit = last_set_bit < last_set_bit_cpulist ? + last_set_bit : last_set_bit_cpulist; + ndwords = last_set_bit / 32 + 1; + for (i = 0; i < ndwords; i++) + *(bitarr + i) &= *(bitarr_cpulist + i); + + if (cpumap) { + for (i = 0; i < ndwords; i++) { + *(bitarr + ndwords - 1 - i) &= *(bitarr_cpulist + ndwords - 1 - i); + if (i) + pos += sprintf(cpulistmap + pos, "%08x,", *(bitarr + ndwords - 1 - i)); + else + pos += sprintf(cpulistmap, "%x,", *(bitarr + ndwords - 1 - i)); + } + cpulistmap[strlen(cpulistmap) - 1] = '\0'; + } + else { + ret = bitarr_to_list(cpulistmap, bitarr, last_set_bit); + if (ret < 0 || (size_t)ret >= sizeof(cpulistmap)) + return log_error(0, "Failed to write to cache"); + } + + total_len = snprintf(buf, buflen, "%s\n", cpulistmap); + if (total_len < 0 || (size_t)total_len >= buflen) + return log_error(0, "Failed to write to cache"); + + return total_len; +} + +static int sys_devices_system_node_nodex_cpulist_cpumap_read(const char *path, char *buf, size_t size, + off_t offset, struct fuse_file_info *fi) +{ + __do_free char *cg = NULL; + struct fuse_context *fc = fuse_get_context(); + struct file_info *d = INTTYPE_TO_PTR(fi->fh); + char *cache = d->buf; + pid_t initpid; + ssize_t total_len = 0; + + if (offset) { + size_t left; + + if (!d->cached) + return 0; + + if (offset > d->size) + return -EINVAL; + + left = d->size - offset; + total_len = left > size ? size : left; + memcpy(buf, cache + offset, total_len); + + return total_len; + } + + initpid = lookup_initpid_in_store(fc->pid); + if (initpid <= 1 || is_shared_pidns(initpid)) + initpid = fc->pid; + + cg = get_pid_cgroup(initpid, "cpuset"); + if (!cg) + return read_file_fuse(path, buf, size, d); + prune_init_slice(cg); + + total_len = do_cpulist_cpumap_read(path, cg, d->buf, d->buflen); + d->size = (int)total_len; + d->cached = 1; + + if ((size_t)total_len > size) + total_len = size; + + memcpy(buf, d->buf, total_len); + + return total_len; +} + +static int sys_devices_system_node_nodex_cpulist_cpumap_getsize(const char *path) +{ + __do_free char *cg = NULL; + struct fuse_context *fc = fuse_get_context(); + pid_t initpid; + char buf[BUF_RESERVE_SIZE]; + int buflen = sizeof(buf); + + initpid = lookup_initpid_in_store(fc->pid); + if (initpid <= 1 || is_shared_pidns(initpid)) + initpid = fc->pid; + + cg = get_pid_cgroup(initpid, "cpuset"); + if (!cg) + return get_sysfile_size(path); + prune_init_slice(cg); + + return do_cpulist_cpumap_read(path, cg, buf, buflen); +} + static int filler_sys_devices_system_cpu(const char *path, void *buf, fuse_fill_dir_t filler) { @@ -532,6 +1138,40 @@ static int sys_getattr_legacy(const char *path, struct stat *sb) return 0; } + if (strcmp(path, "/sys/devices/system/node") == 0) { + sb->st_mode = S_IFDIR | 00555; + sb->st_nlink = 2; + return 0; + } + + if (strcmp(path, "/sys/devices/system/node/online") == 0) { + sb->st_size = sys_devices_system_node_online_getsize(path); + sb->st_mode = S_IFREG | 00444; + sb->st_nlink = 1; + return 0; + } + + if (strcmp(path, "/sys/devices/system/node/has_cpu") == 0) { + sb->st_size = sys_devices_system_node_has_cpu_getsize(path); + sb->st_mode = S_IFREG | 00444; + sb->st_nlink = 1; + return 0; + } + + if (strcmp(path, "/sys/devices/system/node/has_memory") == 0) { + sb->st_size = sys_devices_system_node_has_memory_normal_memory_getsize(path); + sb->st_mode = S_IFREG | 00444; + sb->st_nlink = 1; + return 0; + } + + if (strcmp(path, "/sys/devices/system/node/has_normal_memory") == 0) { + sb->st_size = sys_devices_system_node_has_memory_normal_memory_getsize(path); + sb->st_mode = S_IFREG | 00444; + sb->st_nlink = 1; + return 0; + } + return -ENOENT; } @@ -540,6 +1180,7 @@ __lxcfs_fuse_ops int sys_getattr(const char *path, struct stat *sb) int ret; struct timespec now; mode_t st_mode; + int path_len = strlen(path); if (!liblxcfs_functional()) return -EIO; @@ -567,6 +1208,19 @@ __lxcfs_fuse_ops int sys_getattr(const char *path, struct stat *sb) if (S_ISREG(st_mode) || S_ISLNK(st_mode)) { if (strcmp(path, "/sys/devices/system/cpu/online") == 0) sb->st_size = sys_devices_system_cpu_online_getsize(path); + else if (strcmp(path, "/sys/devices/system/node/online") == 0) + sb->st_size = sys_devices_system_node_online_getsize(path); + else if (strcmp(path, "/sys/devices/system/node/has_cpu") == 0) + sb->st_size = sys_devices_system_node_has_cpu_getsize(path); + else if (strcmp(path, "/sys/devices/system/node/has_memory") == 0) + sb->st_size = sys_devices_system_node_has_memory_normal_memory_getsize(path); + else if (strcmp(path, "/sys/devices/system/node/has_normal_memory") == 0) + sb->st_size = sys_devices_system_node_has_memory_normal_memory_getsize(path); + else if ((strncmp(path, "/sys/devices/system/node/node", + STRLITERALLEN("/sys/devices/system/node/node")) == 0) && + ((strcmp(path + path_len - strlen("cpulist"), "cpulist") == 0) || + (strcmp(path + path_len - strlen("cpumap"), "cpumap") == 0))) + sb->st_size = sys_devices_system_node_nodex_cpulist_cpumap_getsize(path); else sb->st_size = get_sysfile_size(path); sb->st_mode = st_mode; @@ -982,12 +1636,38 @@ static int sys_read_legacy(const char *path, char *buf, size_t size, return read_file_fuse_with_offset(LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_ONLINE_PATH, buf, size, offset, f); + case LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_ONLINE: + if (liblxcfs_functional()) + return sys_devices_system_node_online_read(buf, size, offset, fi); + + return read_file_fuse_with_offset(LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_ONLINE_PATH, + buf, size, offset, f); + case LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_HAS_CPU: + if (liblxcfs_functional()) + return sys_devices_system_node_has_cpu_read(buf, size, offset, fi); + + return read_file_fuse_with_offset(LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_HAS_CPU_PATH, + buf, size, offset, f); + case LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_HAS_MEMORY: + if (liblxcfs_functional()) + return sys_devices_system_node_has_memory_read(buf, size, offset, fi); + + return read_file_fuse_with_offset(LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_HAS_MEMORY_PATH, + buf, size, offset, f); + case LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_HAS_NORMAL_MEMORY: + if (liblxcfs_functional()) + return sys_devices_system_node_has_normal_memory_read(buf, size, offset, fi); + + return read_file_fuse_with_offset(LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_HAS_NORMAL_MEMORY_PATH, + buf, size, offset, f); case LXC_TYPE_SYS_DEVICES: break; case LXC_TYPE_SYS_DEVICES_SYSTEM: break; case LXC_TYPE_SYS_DEVICES_SYSTEM_CPU: break; + case LXC_TYPE_SYS_DEVICES_SYSTEM_NODE: + break; } return -EINVAL; @@ -1007,7 +1687,19 @@ __lxcfs_fuse_ops int sys_read(const char *path, char *buf, size_t size, switch (f->type) { case LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_ONLINE: return sys_devices_system_cpu_online_read(buf, size, offset, fi); + case LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_ONLINE: + return sys_devices_system_node_online_read(buf, size, offset, fi); + case LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_HAS_CPU: + return sys_devices_system_node_has_cpu_read(buf, size, offset, fi); + case LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_HAS_MEMORY: + return sys_devices_system_node_has_memory_read(buf, size, offset, fi); + case LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_HAS_NORMAL_MEMORY: + return sys_devices_system_node_has_normal_memory_read(buf, size, offset, fi); + case LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_NODEX_CPULIST: + case LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_NODEX_CPUMAP: + return sys_devices_system_node_nodex_cpulist_cpumap_read(path, buf, size, offset, fi); case LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_SUBFILE: + case LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_SUBFILE: return read_file_fuse_with_offset(path, buf, size, offset, f); } diff --git a/src/utils.c b/src/utils.c index ab665f74..74aeae27 100644 --- a/src/utils.c +++ b/src/utils.c @@ -691,3 +691,46 @@ int get_task_personality(pid_t pid, __u32 *personality) return ret; } + +/* Convert bitarray to a list */ +int bitarr_to_list(char *list, __u32 *bitarr, __u32 last_set_bit) +{ + int pos = 0; + int pos1, pos2, pos3, pos4; + int ret = 0; + + for (__u32 bit = 0; bit <= last_set_bit; bit++) { + if (is_set(bit, bitarr)) { + if (bit && is_set(bit - 1, bitarr)) + list[pos - 1] = '-'; + ret = snprintf(list + pos, sizeof(list), "%u,", bit); + if (ret < 0 || (size_t)ret >= sizeof(list)) + return -1; + pos += ret; + } + } + + for (pos1 = 0; list[pos1] != '\0';) { + if (list[pos1] != '-') { + pos1++; + continue; + } + for (pos2 = pos1 + 1;; pos2++) { + if (list[pos2] == '-') { + for (pos3 = pos1, pos4 = pos2; list[pos4] != '\0';) + list[pos3++] = list[pos4++]; + list[pos3] = '\0'; + break; + } + if (list[pos2] == ',') { + pos1 = pos2; + break; + } + } + if (list[pos2] == '\0') + break; + } + list[--pos] = '\0'; + + return pos; +} diff --git a/src/utils.h b/src/utils.h index 7ed021a9..5a85b937 100644 --- a/src/utils.h +++ b/src/utils.h @@ -79,4 +79,5 @@ extern char *read_file_at(int dfd, const char *fnam, unsigned int o_flags); extern int get_task_personality(pid_t pid, __u32 *personality); extern int get_host_personality(__u32 *personality); +extern int bitarr_to_list(char *list, __u32 *bitarr, __u32 last_set_bit); #endif /* __LXCFS_UTILS_H */ From 2dad236083d19b886d82097ba98790184e1cfda2 Mon Sep 17 00:00:00 2001 From: "huteng.ht" Date: Tue, 31 May 2022 15:31:50 +0800 Subject: [PATCH 6/6] README.md: node support Signed-off-by: huteng.ht --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index 2db345b4..a089a395 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,11 @@ such as: /proc/uptime /proc/slabinfo /sys/devices/system/cpu/online +/sys/devices/system/node +/sys/devices/system/node/online +/sys/devices/system/node/has_cpu +/sys/devices/system/node/has_memory +/sys/devices/system/node/has_normal_memory ``` are container aware such that the values displayed (e.g. in `/proc/uptime`)