From cbcfb07c9fae9643f44ed53176317b200aad25f0 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Wed, 11 Mar 2026 01:02:21 -0700 Subject: [PATCH 001/158] anolis: Revert "fs/resctrl: Introduce interface to modify io_alloc capacity bitmasks" ANBZ: #31045 This reverts commit 53711bd62266811f26e9b9305e1dd9f7129f42f8. Signed-off-by: Jason Zeng --- Documentation/arch/x86/resctrl.rst | 12 ---- fs/resctrl/ctrlmondata.c | 93 ------------------------------ fs/resctrl/internal.h | 2 - fs/resctrl/rdtgroup.c | 3 +- 4 files changed, 1 insertion(+), 109 deletions(-) diff --git a/Documentation/arch/x86/resctrl.rst b/Documentation/arch/x86/resctrl.rst index 86adf6840bcd..d6dcded555e6 100644 --- a/Documentation/arch/x86/resctrl.rst +++ b/Documentation/arch/x86/resctrl.rst @@ -196,18 +196,6 @@ related to allocation: # cat /sys/fs/resctrl/info/L3/io_alloc_cbm 0=ffff;1=ffff - CBMs can be configured by writing to the interface. - - Example:: - - # echo 1=ff > /sys/fs/resctrl/info/L3/io_alloc_cbm - # cat /sys/fs/resctrl/info/L3/io_alloc_cbm - 0=ffff;1=00ff - - # echo "0=ff;1=f" > /sys/fs/resctrl/info/L3/io_alloc_cbm - # cat /sys/fs/resctrl/info/L3/io_alloc_cbm - 0=00ff;1=000f - When CDP is enabled "io_alloc_cbm" associated with the CDP_DATA and CDP_CODE resources may reflect the same values. For example, values read from and written to /sys/fs/resctrl/info/L3DATA/io_alloc_cbm may be reflected by diff --git a/fs/resctrl/ctrlmondata.c b/fs/resctrl/ctrlmondata.c index 6651e39c1cb8..2b101afca8e6 100644 --- a/fs/resctrl/ctrlmondata.c +++ b/fs/resctrl/ctrlmondata.c @@ -731,97 +731,4 @@ int resctrl_io_alloc_cbm_show(struct kernfs_open_file *of, struct seq_file *seq, cpus_read_unlock(); return ret; } - -static int resctrl_io_alloc_parse_line(char *line, struct rdt_resource *r, - struct resctrl_schema *s, u32 closid) -{ - enum resctrl_conf_type peer_type; - struct rdt_parse_data data; - struct rdt_domain *d; - char *dom = NULL, *id; - unsigned long dom_id; - -next: - if (!line || line[0] == '\0') - return 0; - - dom = strsep(&line, ";"); - id = strsep(&dom, "="); - if (!dom || kstrtoul(id, 10, &dom_id)) { - rdt_last_cmd_puts("Missing '=' or non-numeric domain\n"); - return -EINVAL; - } - - dom = strim(dom); - list_for_each_entry(d, &r->domains, list) { - if (d->id == dom_id) { - data.buf = dom; - data.mode = RDT_MODE_SHAREABLE; - data.closid = closid; - if (parse_cbm(&data, s, d)) - return -EINVAL; - /* - * Keep io_alloc CLOSID's CBM of CDP_CODE and CDP_DATA - * in sync. - */ - if (resctrl_arch_get_cdp_enabled(r->rid)) { - peer_type = resctrl_peer_type(s->conf_type); - memcpy(&d->staged_config[peer_type], - &d->staged_config[s->conf_type], - sizeof(d->staged_config[0])); - } - goto next; - } - } - - return -EINVAL; -} - -ssize_t resctrl_io_alloc_cbm_write(struct kernfs_open_file *of, char *buf, - size_t nbytes, loff_t off) -{ - struct resctrl_schema *s = of->kn->parent->priv; - struct rdt_resource *r = s->res; - u32 io_alloc_closid; - int ret = 0; - - /* Valid input requires a trailing newline */ - if (nbytes == 0 || buf[nbytes - 1] != '\n') - return -EINVAL; - - buf[nbytes - 1] = '\0'; - - cpus_read_lock(); - mutex_lock(&rdtgroup_mutex); - rdt_last_cmd_clear(); - - if (!r->cache.io_alloc_capable) { - rdt_last_cmd_printf("io_alloc is not supported on %s\n", s->name); - ret = -ENODEV; - goto out_unlock; - } - - if (!resctrl_arch_get_io_alloc_enabled(r)) { - rdt_last_cmd_printf("io_alloc is not enabled on %s\n", s->name); - ret = -EINVAL; - goto out_unlock; - } - - io_alloc_closid = resctrl_io_alloc_closid(r); - - rdt_staged_configs_clear(); - ret = resctrl_io_alloc_parse_line(buf, r, s, io_alloc_closid); - if (ret) - goto out_clear_configs; - - ret = resctrl_arch_update_domains(r, io_alloc_closid); - -out_clear_configs: - rdt_staged_configs_clear(); -out_unlock: - mutex_unlock(&rdtgroup_mutex); - cpus_read_unlock(); - - return ret ?: nbytes; -} #endif diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h index 3283448e5955..73cec0cc4c44 100644 --- a/fs/resctrl/internal.h +++ b/fs/resctrl/internal.h @@ -336,8 +336,6 @@ ssize_t resctrl_io_alloc_write(struct kernfs_open_file *of, char *buf, const char *rdtgroup_name_by_closid(u32 closid); int resctrl_io_alloc_cbm_show(struct kernfs_open_file *of, struct seq_file *seq, void *v); -ssize_t resctrl_io_alloc_cbm_write(struct kernfs_open_file *of, char *buf, - size_t nbytes, loff_t off); #endif #ifdef CONFIG_RESCTRL_FS_PSEUDO_LOCK diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index 457de2adc251..9e8861741db9 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -2470,10 +2470,9 @@ static struct rftype res_common_files[] = { }, { .name = "io_alloc_cbm", - .mode = 0644, + .mode = 0444, .kf_ops = &rdtgroup_kf_single_ops, .seq_show = resctrl_io_alloc_cbm_show, - .write = resctrl_io_alloc_cbm_write, }, #endif { -- Gitee From eb11638df486196dd07c650c94ea50651762cc30 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Wed, 11 Mar 2026 01:02:36 -0700 Subject: [PATCH 002/158] anolis: Revert "fs/resctrl: Modify struct rdt_parse_data to pass mode and CLOSID" ANBZ: #31045 This reverts commit 1e50ca047f6079c75da29582b6b644af602d63d8. Signed-off-by: Jason Zeng --- fs/resctrl/ctrlmondata.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/fs/resctrl/ctrlmondata.c b/fs/resctrl/ctrlmondata.c index 2b101afca8e6..fe505511447e 100644 --- a/fs/resctrl/ctrlmondata.c +++ b/fs/resctrl/ctrlmondata.c @@ -22,8 +22,7 @@ #include "internal.h" struct rdt_parse_data { - u32 closid; - enum rdtgrp_mode mode; + struct rdtgroup *rdtgrp; char *buf; }; @@ -75,8 +74,8 @@ static int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s, struct rdt_domain *d) { struct resctrl_staged_config *cfg; + u32 closid = data->rdtgrp->closid; struct rdt_resource *r = s->res; - u32 closid = data->closid; u32 bw_val; cfg = &d->staged_config[s->conf_type]; @@ -154,10 +153,9 @@ static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r) static int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s, struct rdt_domain *d) { - enum rdtgrp_mode mode = data->mode; + struct rdtgroup *rdtgrp = data->rdtgrp; struct resctrl_staged_config *cfg; struct rdt_resource *r = s->res; - u32 closid = data->closid; u32 cbm_val; cfg = &d->staged_config[s->conf_type]; @@ -170,7 +168,7 @@ static int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s, * Cannot set up more than one pseudo-locked region in a cache * hierarchy. */ - if (mode == RDT_MODE_PSEUDO_LOCKSETUP && + if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP && rdtgroup_pseudo_locked_in_hierarchy(d)) { rdt_last_cmd_puts("Pseudo-locked region in hierarchy\n"); return -EINVAL; @@ -179,7 +177,8 @@ static int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s, if (!cbm_validate(data->buf, &cbm_val, r)) return -EINVAL; - if ((mode == RDT_MODE_EXCLUSIVE || mode == RDT_MODE_SHAREABLE) && + if ((rdtgrp->mode == RDT_MODE_EXCLUSIVE || + rdtgrp->mode == RDT_MODE_SHAREABLE) && rdtgroup_cbm_overlaps_pseudo_locked(d, cbm_val)) { rdt_last_cmd_puts("CBM overlaps with pseudo-locked region\n"); return -EINVAL; @@ -189,14 +188,14 @@ static int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s, * The CBM may not overlap with the CBM of another closid if * either is exclusive. */ - if (rdtgroup_cbm_overlaps(s, d, cbm_val, closid, true)) { + if (rdtgroup_cbm_overlaps(s, d, cbm_val, rdtgrp->closid, true)) { rdt_last_cmd_puts("Overlaps with exclusive group\n"); return -EINVAL; } - if (rdtgroup_cbm_overlaps(s, d, cbm_val, closid, false)) { - if (mode == RDT_MODE_EXCLUSIVE || - mode == RDT_MODE_PSEUDO_LOCKSETUP) { + if (rdtgroup_cbm_overlaps(s, d, cbm_val, rdtgrp->closid, false)) { + if (rdtgrp->mode == RDT_MODE_EXCLUSIVE || + rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { rdt_last_cmd_puts("Overlaps with other group\n"); return -EINVAL; } @@ -256,8 +255,7 @@ static int parse_line(char *line, struct resctrl_schema *s, list_for_each_entry(d, &r->domains, list) { if (d->id == dom_id) { data.buf = dom; - data.closid = rdtgrp->closid; - data.mode = rdtgrp->mode; + data.rdtgrp = rdtgrp; if (parse_ctrlval(&data, s, d)) return -EINVAL; if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { -- Gitee From 064b8641386fc0c86a2c8c35327223c7984df876 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Wed, 11 Mar 2026 01:02:39 -0700 Subject: [PATCH 003/158] anolis: Revert "fs/resctrl: Introduce interface to display io_alloc CBMs" ANBZ: #31045 This reverts commit 4248aeb0ae1202d096a88024d775a3548d96e8f9. Signed-off-by: Jason Zeng --- Documentation/arch/x86/resctrl.rst | 19 ------------- fs/resctrl/ctrlmondata.c | 45 ++---------------------------- fs/resctrl/internal.h | 2 -- fs/resctrl/rdtgroup.c | 11 +------- 4 files changed, 4 insertions(+), 73 deletions(-) diff --git a/Documentation/arch/x86/resctrl.rst b/Documentation/arch/x86/resctrl.rst index d6dcded555e6..cb6f68c20984 100644 --- a/Documentation/arch/x86/resctrl.rst +++ b/Documentation/arch/x86/resctrl.rst @@ -182,25 +182,6 @@ related to allocation: available for general (CPU) cache allocation for both the CDP_CODE and CDP_DATA resources. -"io_alloc_cbm": - Capacity bitmasks that describe the portions of cache instances to - which I/O traffic from supported I/O devices are routed when "io_alloc" - is enabled. - - CBMs are displayed in the following format: - - =;=;... - - Example:: - - # cat /sys/fs/resctrl/info/L3/io_alloc_cbm - 0=ffff;1=ffff - - When CDP is enabled "io_alloc_cbm" associated with the CDP_DATA and CDP_CODE - resources may reflect the same values. For example, values read from and - written to /sys/fs/resctrl/info/L3DATA/io_alloc_cbm may be reflected by - /sys/fs/resctrl/info/L3CODE/io_alloc_cbm and vice versa. - Memory bandwidth(MB) subdirectory contains the following files with respect to allocation: diff --git a/fs/resctrl/ctrlmondata.c b/fs/resctrl/ctrlmondata.c index fe505511447e..7d7f1050b19e 100644 --- a/fs/resctrl/ctrlmondata.c +++ b/fs/resctrl/ctrlmondata.c @@ -374,8 +374,7 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of, return ret ?: nbytes; } -static void show_doms(struct seq_file *s, struct resctrl_schema *schema, - char *resource_name, int closid) +static void show_doms(struct seq_file *s, struct resctrl_schema *schema, int closid) { struct rdt_resource *r = schema->res; struct rdt_domain *dom; @@ -385,8 +384,7 @@ static void show_doms(struct seq_file *s, struct resctrl_schema *schema, /* Walking r->domains, ensure it can't race with cpuhp */ lockdep_assert_cpus_held(); - if (resource_name) - seq_printf(s, "%*s:", max_name_width, resource_name); + seq_printf(s, "%*s:", max_name_width, schema->name); list_for_each_entry(dom, &r->domains, list) { if (sep) seq_puts(s, ";"); @@ -433,7 +431,7 @@ int rdtgroup_schemata_show(struct kernfs_open_file *of, closid = rdtgrp->closid; list_for_each_entry(schema, &resctrl_schema_all, list) { if (closid < schema->num_closid) - show_doms(s, schema, schema->name, closid); + show_doms(s, schema, closid); } } } else { @@ -692,41 +690,4 @@ ssize_t resctrl_io_alloc_write(struct kernfs_open_file *of, char *buf, return ret ?: nbytes; } - -int resctrl_io_alloc_cbm_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) -{ - struct resctrl_schema *s = of->kn->parent->priv; - struct rdt_resource *r = s->res; - int ret = 0; - - cpus_read_lock(); - mutex_lock(&rdtgroup_mutex); - - rdt_last_cmd_clear(); - - if (!r->cache.io_alloc_capable) { - rdt_last_cmd_printf("io_alloc is not supported on %s\n", s->name); - ret = -ENODEV; - goto out_unlock; - } - - if (!resctrl_arch_get_io_alloc_enabled(r)) { - rdt_last_cmd_printf("io_alloc is not enabled on %s\n", s->name); - ret = -EINVAL; - goto out_unlock; - } - - /* - * When CDP is enabled, the CBMs of the highest CLOSID of CDP_CODE and - * CDP_DATA are kept in sync. As a result, the io_alloc CBMs shown for - * either CDP resource are identical and accurately represent the CBMs - * used for I/O. - */ - show_doms(seq, s, NULL, resctrl_io_alloc_closid(r)); - -out_unlock: - mutex_unlock(&rdtgroup_mutex); - cpus_read_unlock(); - return ret; -} #endif diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h index 73cec0cc4c44..b1d347e45e6a 100644 --- a/fs/resctrl/internal.h +++ b/fs/resctrl/internal.h @@ -334,8 +334,6 @@ ssize_t resctrl_io_alloc_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off); const char *rdtgroup_name_by_closid(u32 closid); -int resctrl_io_alloc_cbm_show(struct kernfs_open_file *of, struct seq_file *seq, - void *v); #endif #ifdef CONFIG_RESCTRL_FS_PSEUDO_LOCK diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index 9e8861741db9..ba863bfa943e 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -2468,12 +2468,6 @@ static struct rftype res_common_files[] = { .seq_show = resctrl_io_alloc_show, .write = resctrl_io_alloc_write, }, - { - .name = "io_alloc_cbm", - .mode = 0444, - .kf_ops = &rdtgroup_kf_single_ops, - .seq_show = resctrl_io_alloc_cbm_show, - }, #endif { .name = "max_threshold_occupancy", @@ -2647,12 +2641,9 @@ static void io_alloc_init(void) { struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); - if (r->cache.io_alloc_capable) { + if (r->cache.io_alloc_capable) resctrl_file_fflags_init("io_alloc", RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE); - resctrl_file_fflags_init("io_alloc_cbm", - RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE); - } } #endif -- Gitee From 7ff27977e9d99b86a946b42f1ec9dbcc58eecc2f Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Wed, 11 Mar 2026 01:02:41 -0700 Subject: [PATCH 004/158] anolis: Revert "fs/resctrl: Add user interface to enable/disable io_alloc feature" ANBZ: #31045 This reverts commit 990478714b9af10d1a4067a901c37681b2be92ea. Signed-off-by: Jason Zeng --- Documentation/arch/x86/resctrl.rst | 30 ------- fs/resctrl/ctrlmondata.c | 126 ----------------------------- fs/resctrl/internal.h | 11 --- fs/resctrl/rdtgroup.c | 24 +----- 4 files changed, 3 insertions(+), 188 deletions(-) diff --git a/Documentation/arch/x86/resctrl.rst b/Documentation/arch/x86/resctrl.rst index cb6f68c20984..48c91e80ad60 100644 --- a/Documentation/arch/x86/resctrl.rst +++ b/Documentation/arch/x86/resctrl.rst @@ -73,11 +73,6 @@ The 'info' directory contains information about the enabled resources. Each resource has its own subdirectory. The subdirectory names reflect the resource names. -Most of the files in the resource's subdirectory are read-only, and -describe properties of the resource. Resources that support global -configuration options also include writable files that can be used -to modify those settings. - Each subdirectory contains the following files with respect to allocation: @@ -157,31 +152,6 @@ related to allocation: "not supported": Support not available for this resource. - The feature can be modified by writing to the interface, for example: - - To enable:: - - # echo 1 > /sys/fs/resctrl/info/L3/io_alloc - - To disable:: - - # echo 0 > /sys/fs/resctrl/info/L3/io_alloc - - The underlying implementation may reduce resources available to - general (CPU) cache allocation. See architecture specific notes - below. Depending on usage requirements the feature can be enabled - or disabled. - - On AMD systems, io_alloc feature is supported by the L3 Smart - Data Cache Injection Allocation Enforcement (SDCIAE). The CLOSID for - io_alloc is the highest CLOSID supported by the resource. When - io_alloc is enabled, the highest CLOSID is dedicated to io_alloc and - no longer available for general (CPU) cache allocation. When CDP is - enabled, io_alloc routes I/O traffic using the highest CLOSID allocated - for the instruction cache (CDP_CODE), making this CLOSID no longer - available for general (CPU) cache allocation for both the CDP_CODE - and CDP_DATA resources. - Memory bandwidth(MB) subdirectory contains the following files with respect to allocation: diff --git a/fs/resctrl/ctrlmondata.c b/fs/resctrl/ctrlmondata.c index 7d7f1050b19e..723725619ff1 100644 --- a/fs/resctrl/ctrlmondata.c +++ b/fs/resctrl/ctrlmondata.c @@ -564,130 +564,4 @@ int resctrl_io_alloc_show(struct kernfs_open_file *of, struct seq_file *seq, voi return 0; } - -/* - * resctrl_io_alloc_closid_supported() - io_alloc feature utilizes the - * highest CLOSID value to direct I/O traffic. Ensure that io_alloc_closid - * is in the supported range. - */ -static bool resctrl_io_alloc_closid_supported(u32 io_alloc_closid) -{ - return io_alloc_closid < closids_supported(); -} - -/* - * Initialize io_alloc CLOSID cache resource CBM with all usable (shared - * and unused) cache portions. - */ -static int resctrl_io_alloc_init_cbm(struct resctrl_schema *s, u32 closid) -{ - enum resctrl_conf_type peer_type; - struct rdt_resource *r = s->res; - struct rdt_domain *d; - int ret; - - rdt_staged_configs_clear(); - - ret = rdtgroup_init_cat(s, closid); - if (ret < 0) - goto out; - - /* Keep CDP_CODE and CDP_DATA of io_alloc CLOSID's CBM in sync. */ - if (resctrl_arch_get_cdp_enabled(r->rid)) { - peer_type = resctrl_peer_type(s->conf_type); - list_for_each_entry(d, &s->res->domains, list) - memcpy(&d->staged_config[peer_type], - &d->staged_config[s->conf_type], - sizeof(d->staged_config[0])); - } - - ret = resctrl_arch_update_domains(r, closid); -out: - rdt_staged_configs_clear(); - return ret; -} - -/* - * resctrl_io_alloc_closid() - io_alloc feature routes I/O traffic using - * the highest available CLOSID. Retrieve the maximum CLOSID supported by the - * resource. Note that if Code Data Prioritization (CDP) is enabled, the number - * of available CLOSIDs is reduced by half. - */ -static u32 resctrl_io_alloc_closid(struct rdt_resource *r) -{ - if (resctrl_arch_get_cdp_enabled(r->rid)) - return resctrl_arch_get_num_closid(r) / 2 - 1; - else - return resctrl_arch_get_num_closid(r) - 1; -} - -ssize_t resctrl_io_alloc_write(struct kernfs_open_file *of, char *buf, - size_t nbytes, loff_t off) -{ - struct resctrl_schema *s = of->kn->parent->priv; - struct rdt_resource *r = s->res; - char const *grp_name; - u32 io_alloc_closid; - bool enable; - int ret; - - ret = kstrtobool(buf, &enable); - if (ret) - return ret; - - cpus_read_lock(); - mutex_lock(&rdtgroup_mutex); - - rdt_last_cmd_clear(); - - if (!r->cache.io_alloc_capable) { - rdt_last_cmd_printf("io_alloc is not supported on %s\n", s->name); - ret = -ENODEV; - goto out_unlock; - } - - /* If the feature is already up to date, no action is needed. */ - if (resctrl_arch_get_io_alloc_enabled(r) == enable) - goto out_unlock; - - io_alloc_closid = resctrl_io_alloc_closid(r); - if (!resctrl_io_alloc_closid_supported(io_alloc_closid)) { - rdt_last_cmd_printf("io_alloc CLOSID (ctrl_hw_id) %u is not available\n", - io_alloc_closid); - ret = -EINVAL; - goto out_unlock; - } - - if (enable) { - if (!closid_alloc_fixed(io_alloc_closid)) { - grp_name = rdtgroup_name_by_closid(io_alloc_closid); - WARN_ON_ONCE(!grp_name); - rdt_last_cmd_printf("CLOSID (ctrl_hw_id) %u for io_alloc is used by %s group\n", - io_alloc_closid, grp_name ? grp_name : "another"); - ret = -ENOSPC; - goto out_unlock; - } - - ret = resctrl_io_alloc_init_cbm(s, io_alloc_closid); - if (ret) { - rdt_last_cmd_puts("Failed to initialize io_alloc allocations\n"); - closid_free(io_alloc_closid); - goto out_unlock; - } - } else { - closid_free(io_alloc_closid); - } - - ret = resctrl_arch_io_alloc_enable(r, enable); - if (enable && ret) { - rdt_last_cmd_puts("Failed to enable io_alloc feature\n"); - closid_free(io_alloc_closid); - } - -out_unlock: - mutex_unlock(&rdtgroup_mutex); - cpus_read_unlock(); - - return ret ?: nbytes; -} #endif diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h index b1d347e45e6a..f41b94a525c8 100644 --- a/fs/resctrl/internal.h +++ b/fs/resctrl/internal.h @@ -313,9 +313,6 @@ int mbm_cntr_alloc(struct rdt_resource *r); void mbm_cntr_free(u32 cntr_id); void rdt_staged_configs_clear(void); bool closid_allocated(unsigned int closid); - -bool closid_alloc_fixed(u32 closid); - int resctrl_find_cleanest_closid(void); unsigned int mon_event_config_index_get(u32 evtid); int rdtgroup_assign_cntr(struct rdtgroup *rdtgrp, enum resctrl_event_id evtid); @@ -326,14 +323,6 @@ void rdtgroup_free_cntr(struct rdt_resource *r, struct rdtgroup *rdtgrp, int ind #ifdef CONFIG_X86 int resctrl_io_alloc_show(struct kernfs_open_file *of, struct seq_file *seq, void *v); -int rdtgroup_init_cat(struct resctrl_schema *s, u32 closid); - -enum resctrl_conf_type resctrl_peer_type(enum resctrl_conf_type my_type); - -ssize_t resctrl_io_alloc_write(struct kernfs_open_file *of, char *buf, - size_t nbytes, loff_t off); - -const char *rdtgroup_name_by_closid(u32 closid); #endif #ifdef CONFIG_RESCTRL_FS_PSEUDO_LOCK diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index ba863bfa943e..8c76d0234c94 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -238,11 +238,6 @@ void mbm_cntr_free(u32 cntr_id) __set_bit(cntr_id, mbm_cntrs_free_map); } -bool closid_alloc_fixed(u32 closid) -{ - return __test_and_clear_bit(closid, closid_free_map); -} - /** * rdtgroup_mode_by_closid - Return mode of resource group with closid * @closid: closid if the resource group @@ -1712,7 +1707,7 @@ static int rdtgroup_mode_show(struct kernfs_open_file *of, return 0; } -enum resctrl_conf_type resctrl_peer_type(enum resctrl_conf_type my_type) +static enum resctrl_conf_type resctrl_peer_type(enum resctrl_conf_type my_type) { switch (my_type) { case CDP_CODE: @@ -2358,18 +2353,6 @@ int rdtgroup_unassign_cntr(struct rdtgroup *rdtgrp, enum resctrl_event_id evtid) return 0; } -const char *rdtgroup_name_by_closid(u32 closid) -{ - struct rdtgroup *rdtgrp; - - list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) { - if (rdtgrp->closid == closid) - return rdtgrp->kn->name; - } - - return NULL; -} - /* rdtgroup information files for one cache resource. */ static struct rftype res_common_files[] = { { @@ -2463,10 +2446,9 @@ static struct rftype res_common_files[] = { #ifdef CONFIG_X86 { .name = "io_alloc", - .mode = 0644, + .mode = 0444, .kf_ops = &rdtgroup_kf_single_ops, .seq_show = resctrl_io_alloc_show, - .write = resctrl_io_alloc_write, }, #endif { @@ -3852,7 +3834,7 @@ static int __init_one_rdt_domain(struct rdt_domain *d, struct resctrl_schema *s, * If there are no more shareable bits available on any domain then * the entire allocation will fail. */ -int rdtgroup_init_cat(struct resctrl_schema *s, u32 closid) +static int rdtgroup_init_cat(struct resctrl_schema *s, u32 closid) { struct rdt_domain *d; int ret; -- Gitee From f1506b847c080fadf137f660c90e627497cbf679 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Wed, 11 Mar 2026 01:02:43 -0700 Subject: [PATCH 005/158] anolis: Revert "fs/resctrl: Introduce interface to display "io_alloc" support" ANBZ: #31045 This reverts commit 4ea307d18a252175d4d6d8247b71450468add640. Signed-off-by: Jason Zeng --- Documentation/arch/x86/resctrl.rst | 15 --------------- fs/resctrl/ctrlmondata.c | 23 ----------------------- fs/resctrl/internal.h | 5 ----- fs/resctrl/rdtgroup.c | 27 --------------------------- 4 files changed, 70 deletions(-) diff --git a/Documentation/arch/x86/resctrl.rst b/Documentation/arch/x86/resctrl.rst index 48c91e80ad60..6753ced39454 100644 --- a/Documentation/arch/x86/resctrl.rst +++ b/Documentation/arch/x86/resctrl.rst @@ -137,21 +137,6 @@ related to allocation: "1": Non-contiguous 1s value in CBM is supported. -"io_alloc": - "io_alloc" enables system software to configure the portion of - the cache allocated for I/O traffic. File may only exist if the - system supports this feature on some of its cache resources. - - "disabled": - Resource supports "io_alloc" but the feature is disabled. - Portions of cache used for allocation of I/O traffic cannot - be configured. - "enabled": - Portions of cache used for allocation of I/O traffic - can be configured using "io_alloc_cbm". - "not supported": - Support not available for this resource. - Memory bandwidth(MB) subdirectory contains the following files with respect to allocation: diff --git a/fs/resctrl/ctrlmondata.c b/fs/resctrl/ctrlmondata.c index 723725619ff1..253443f87646 100644 --- a/fs/resctrl/ctrlmondata.c +++ b/fs/resctrl/ctrlmondata.c @@ -542,26 +542,3 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) rdtgroup_kn_unlock(of->kn); return ret; } - -#ifdef CONFIG_X86 -int resctrl_io_alloc_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) -{ - struct resctrl_schema *s = of->kn->parent->priv; - struct rdt_resource *r = s->res; - - mutex_lock(&rdtgroup_mutex); - - if (r->cache.io_alloc_capable) { - if (resctrl_arch_get_io_alloc_enabled(r)) - seq_puts(seq, "enabled\n"); - else - seq_puts(seq, "disabled\n"); - } else { - seq_puts(seq, "not supported\n"); - } - - mutex_unlock(&rdtgroup_mutex); - - return 0; -} -#endif diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h index f41b94a525c8..58d812d6c1d2 100644 --- a/fs/resctrl/internal.h +++ b/fs/resctrl/internal.h @@ -320,11 +320,6 @@ int rdtgroup_alloc_cntr(struct rdtgroup *rdtgrp, int index); int rdtgroup_unassign_cntr(struct rdtgroup *rdtgrp, enum resctrl_event_id evtid); void rdtgroup_free_cntr(struct rdt_resource *r, struct rdtgroup *rdtgrp, int index); -#ifdef CONFIG_X86 -int resctrl_io_alloc_show(struct kernfs_open_file *of, struct seq_file *seq, void *v); - -#endif - #ifdef CONFIG_RESCTRL_FS_PSEUDO_LOCK int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp); int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp); diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index 8c76d0234c94..aba699474e86 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -2443,14 +2443,6 @@ static struct rftype res_common_files[] = { .kf_ops = &rdtgroup_kf_single_ops, .seq_show = rdt_thread_throttle_mode_show, }, -#ifdef CONFIG_X86 - { - .name = "io_alloc", - .mode = 0444, - .kf_ops = &rdtgroup_kf_single_ops, - .seq_show = resctrl_io_alloc_show, - }, -#endif { .name = "max_threshold_occupancy", .mode = 0644, @@ -2613,22 +2605,6 @@ static struct rftype *rdtgroup_get_rftype_by_name(const char *name) return NULL; } -#ifdef CONFIG_X86 -/* - * The resctrl file "io_alloc" is added using L3 resource. However, it results - * in this file being visible for *all* cache resources (eg. L2 cache), - * whether it supports "io_alloc" or not. - */ -static void io_alloc_init(void) -{ - struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); - - if (r->cache.io_alloc_capable) - resctrl_file_fflags_init("io_alloc", RFTYPE_CTRL_INFO | - RFTYPE_RES_CACHE); -} -#endif - void resctrl_file_fflags_init(const char *config, unsigned long fflags) { struct rftype *rft; @@ -4702,9 +4678,6 @@ int resctrl_init(void) resctrl_file_fflags_init("thread_throttle_mode", RFTYPE_CTRL_INFO | RFTYPE_RES_MB); -#ifdef CONFIG_X86 - io_alloc_init(); -#endif ret = resctrl_mon_resource_init(); if (ret) -- Gitee From 0bb7363c60caab7a6f8eda99bbc29a1f6c70dc91 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Wed, 11 Mar 2026 01:02:44 -0700 Subject: [PATCH 006/158] anolis: Revert "x86,fs/resctrl: Implement "io_alloc" enable/disable handlers" ANBZ: #31045 This reverts commit d25ffa678fc7b92983e28dcc8cb81d729a8b259d. Signed-off-by: Jason Zeng --- arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 40 ----------------------- arch/x86/kernel/cpu/resctrl/internal.h | 5 --- include/linux/resctrl.h | 21 ------------ 3 files changed, 66 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c index b62792ad80ac..c5c3eaea27b6 100644 --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c @@ -119,43 +119,3 @@ u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d, return hw_dom->ctrl_val[idx]; } - -bool resctrl_arch_get_io_alloc_enabled(struct rdt_resource *r) -{ - return resctrl_to_arch_res(r)->sdciae_enabled; -} - -static void resctrl_sdciae_set_one_amd(void *arg) -{ - bool *enable = arg; - - if (*enable) - msr_set_bit(MSR_IA32_L3_QOS_EXT_CFG, SDCIAE_ENABLE_BIT); - else - msr_clear_bit(MSR_IA32_L3_QOS_EXT_CFG, SDCIAE_ENABLE_BIT); -} - -static void _resctrl_sdciae_enable(struct rdt_resource *r, bool enable) -{ - struct rdt_domain *d; - - /* Walking r->ctrl_domains, ensure it can't race with cpuhp */ - lockdep_assert_cpus_held(); - - /* Update MSR_IA32_L3_QOS_EXT_CFG MSR on all the CPUs in all domains */ - list_for_each_entry(d, &r->domains, list) - on_each_cpu_mask(&d->cpu_mask, resctrl_sdciae_set_one_amd, &enable, 1); -} - -int resctrl_arch_io_alloc_enable(struct rdt_resource *r, bool enable) -{ - struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); - - if (hw_res->r_resctrl.cache.io_alloc_capable && - hw_res->sdciae_enabled != enable) { - _resctrl_sdciae_enable(r, enable); - hw_res->sdciae_enabled = enable; - } - - return 0; -} diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 41c912aa859c..9017fe146a86 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -55,9 +55,6 @@ struct arch_mbm_state { u64 prev_msr; }; -/* Setting bit 1 in MSR_IA32_L3_QOS_EXT_CFG enables the SDCIAE feature. */ -#define SDCIAE_ENABLE_BIT 1 - /** * struct rdt_hw_domain - Arch private attributes of a set of CPUs that share * a resource @@ -110,7 +107,6 @@ struct msr_param { * @mbm_width: Monitor width, to detect and correct for overflow. * @cdp_enabled: CDP state of this resource * @mbm_cntr_assign_enabled: ABMC feature is enabled - * @sdciae_enabled: SDCIAE feature (backing "io_alloc") is enabled. * * Members of this structure are either private to the architecture * e.g. mbm_width, or accessed via helpers that provide abstraction. e.g. @@ -126,7 +122,6 @@ struct rdt_hw_resource { unsigned int mbm_width; bool cdp_enabled; bool mbm_cntr_assign_enabled; - bool sdciae_enabled; }; static inline struct rdt_hw_resource *resctrl_to_arch_res(struct rdt_resource *r) diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 01bef03c0edd..aa99d3d58d5b 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -440,27 +440,6 @@ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d, */ void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_domain *d); -/** - * resctrl_arch_io_alloc_enable() - Enable/disable io_alloc feature. - * @r: The resctrl resource. - * @enable: Enable (true) or disable (false) io_alloc on resource @r. - * - * This can be called from any CPU. - * - * Return: - * 0 on success, <0 on error. - */ -int resctrl_arch_io_alloc_enable(struct rdt_resource *r, bool enable); - -/** - * resctrl_arch_get_io_alloc_enabled() - Get io_alloc feature state. - * @r: The resctrl resource. - * - * Return: - * true if io_alloc is enabled or false if disabled. - */ -bool resctrl_arch_get_io_alloc_enabled(struct rdt_resource *r); - extern unsigned int resctrl_rmid_realloc_threshold; extern unsigned int resctrl_rmid_realloc_limit; void resctrl_file_fflags_init(const char *config, unsigned long fflags); -- Gitee From 55a006c9be8806ece606fa7ee2b769bff91d6d55 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Wed, 11 Mar 2026 01:02:46 -0700 Subject: [PATCH 007/158] anolis: Revert "x86,fs/resctrl: Detect io_alloc feature" ANBZ: #31045 This reverts commit 563adbb0dc2647ec381c9f06d48f1c4f8ce854d6. Signed-off-by: Jason Zeng --- arch/x86/kernel/cpu/resctrl/core.c | 7 ------- include/linux/resctrl.h | 3 --- 2 files changed, 10 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 9f1a439be5ab..10fd07ba4272 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -281,11 +281,6 @@ static void rdt_get_cdp_config(int level) rdt_resources_all[level].r_resctrl.cdp_capable = true; } -static void rdt_set_io_alloc_capable(struct rdt_resource *r) -{ - r->cache.io_alloc_capable = true; -} - static void rdt_get_cdp_l3_config(void) { rdt_get_cdp_config(RDT_RESOURCE_L3); @@ -781,8 +776,6 @@ static __init bool get_rdt_alloc_resources(void) rdt_get_cache_alloc_cfg(1, r); if (rdt_cpu_has(X86_FEATURE_CDP_L3)) rdt_get_cdp_l3_config(); - if (rdt_cpu_has(X86_FEATURE_SDCIAE)) - rdt_set_io_alloc_capable(r); ret = true; } if (rdt_cpu_has(X86_FEATURE_CAT_L2)) { diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index aa99d3d58d5b..75dacdc47c8a 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -137,8 +137,6 @@ struct rdt_domain { * @arch_has_sparse_bitmasks: True if a bitmask like f00f is valid. * @arch_has_per_cpu_cfg: True if QOS_CFG register for this cache * level has CPU scope. - * @io_alloc_capable: True if portion of the cache can be configured - * for I/O traffic. */ struct resctrl_cache { unsigned int cbm_len; @@ -146,7 +144,6 @@ struct resctrl_cache { unsigned int shareable_bits; bool arch_has_sparse_bitmasks; bool arch_has_per_cpu_cfg; - bool io_alloc_capable; }; /** -- Gitee From f02324b609472bb3792979ef59c46cfef016cb0d Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Wed, 11 Mar 2026 01:02:48 -0700 Subject: [PATCH 008/158] anolis: Revert "x86/resctrl: Add SDCIAE feature in the command line options" ANBZ: #31045 This reverts commit 06296f8026df1f5146e19eac2c8d6ef09606d62d. Signed-off-by: Jason Zeng --- .../admin-guide/kernel-parameters.txt | 2 +- Documentation/arch/x86/resctrl.rst | 23 +++++++++---------- arch/x86/kernel/cpu/resctrl/core.c | 2 -- 3 files changed, 12 insertions(+), 15 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index e0193a18f4fa..ae26c0b0f4fc 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5538,7 +5538,7 @@ rdt= [HW,X86,RDT] Turn on/off individual RDT features. List is: cmt, mbmtotal, mbmlocal, l3cat, l3cdp, l2cat, l2cdp, - mba, smba, bmec, abmc, sdciae. + mba, smba, bmec, abmc. E.g. to turn on cmt and turn off mba use: rdt=cmt,!mba diff --git a/Documentation/arch/x86/resctrl.rst b/Documentation/arch/x86/resctrl.rst index 6753ced39454..11c827931f1b 100644 --- a/Documentation/arch/x86/resctrl.rst +++ b/Documentation/arch/x86/resctrl.rst @@ -17,18 +17,17 @@ AMD refers to this feature as AMD Platform Quality of Service(AMD QoS). This feature is enabled by the CONFIG_X86_CPU_RESCTRL and the x86 /proc/cpuinfo flag bits: -=============================================================== ================================ -RDT (Resource Director Technology) Allocation "rdt_a" -CAT (Cache Allocation Technology) "cat_l3", "cat_l2" -CDP (Code and Data Prioritization) "cdp_l3", "cdp_l2" -CQM (Cache QoS Monitoring) "cqm_llc", "cqm_occup_llc" -MBM (Memory Bandwidth Monitoring) "cqm_mbm_total", "cqm_mbm_local" -MBA (Memory Bandwidth Allocation) "mba" -SMBA (Slow Memory Bandwidth Allocation) "" -BMEC (Bandwidth Monitoring Event Configuration) "" -ABMC (Assignable Bandwidth Monitoring Counters) "" -SDCIAE (Smart Data Cache Injection Allocation Enforcement) "" -=============================================================== ================================ +=============================================== ================================ +RDT (Resource Director Technology) Allocation "rdt_a" +CAT (Cache Allocation Technology) "cat_l3", "cat_l2" +CDP (Code and Data Prioritization) "cdp_l3", "cdp_l2" +CQM (Cache QoS Monitoring) "cqm_llc", "cqm_occup_llc" +MBM (Memory Bandwidth Monitoring) "cqm_mbm_total", "cqm_mbm_local" +MBA (Memory Bandwidth Allocation) "mba" +SMBA (Slow Memory Bandwidth Allocation) "" +BMEC (Bandwidth Monitoring Event Configuration) "" +ABMC (Assignable Bandwidth Monitoring Counters) "" +=============================================== ================================ Historically, new features were made visible by default in /proc/cpuinfo. This resulted in the feature flags becoming hard to parse by humans. Adding a new diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 10fd07ba4272..526f58b8f26c 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -639,7 +639,6 @@ enum { RDT_FLAG_SMBA, RDT_FLAG_BMEC, RDT_FLAG_ABMC, - RDT_FLAG_SDCIAE, }; #define RDT_OPT(idx, n, f) \ @@ -666,7 +665,6 @@ static struct rdt_options rdt_options[] __ro_after_init = { RDT_OPT(RDT_FLAG_SMBA, "smba", X86_FEATURE_SMBA), RDT_OPT(RDT_FLAG_BMEC, "bmec", X86_FEATURE_BMEC), RDT_OPT(RDT_FLAG_ABMC, "abmc", X86_FEATURE_ABMC), - RDT_OPT(RDT_FLAG_SDCIAE, "sdciae", X86_FEATURE_SDCIAE), }; #define NUM_RDT_OPTIONS ARRAY_SIZE(rdt_options) -- Gitee From 00a4f74e6aa96a963656f0c2010ddee917d92353 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Wed, 11 Mar 2026 01:02:50 -0700 Subject: [PATCH 009/158] anolis: Revert "x86/cpufeatures: Add support for L3 Smart Data Cache Injection Allocation Enforcement" ANBZ: #31045 This reverts commit 981fb50f4060831e91d7bae47ad80dba1d3d2112. Signed-off-by: Jason Zeng --- arch/x86/include/asm/cpufeatures.h | 2 -- arch/x86/kernel/cpu/cpuid-deps.c | 1 - arch/x86/kernel/cpu/scattered.c | 1 - 3 files changed, 4 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 5172ff7e623f..9a12e209939b 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -515,8 +515,6 @@ #define X86_FEATURE_AMD_FAST_CPPC (21*32 + 5) /* Fast CPPC */ #define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32 + 7) /* Workload Classification */ -#define X86_FEATURE_SDCIAE (21*32+18) /* L3 Smart Data Cache Injection Allocation Enforcement */ - /* * BUG word(s) */ diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c index 8be2847473c4..d0714eb74eca 100644 --- a/arch/x86/kernel/cpu/cpuid-deps.c +++ b/arch/x86/kernel/cpu/cpuid-deps.c @@ -73,7 +73,6 @@ static const struct cpuid_dep cpuid_deps[] = { { X86_FEATURE_ABMC, X86_FEATURE_CQM_MBM_TOTAL }, { X86_FEATURE_ABMC, X86_FEATURE_CQM_MBM_LOCAL }, { X86_FEATURE_ABMC, X86_FEATURE_BMEC }, - { X86_FEATURE_SDCIAE, X86_FEATURE_CAT_L3 }, { X86_FEATURE_AVX512_BF16, X86_FEATURE_AVX512VL }, { X86_FEATURE_AVX512_FP16, X86_FEATURE_AVX512BW }, { X86_FEATURE_ENQCMD, X86_FEATURE_XSAVES }, diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 0773ce13715f..509026d5c9cf 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -56,7 +56,6 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_AMD_WORKLOAD_CLASS, CPUID_EAX, 22, 0x80000021, 0 }, { X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 }, { X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 }, - { X86_FEATURE_SDCIAE, CPUID_EBX, 6, 0x80000020, 0 }, { X86_FEATURE_AMD_LBR_PMC_FREEZE, CPUID_EAX, 2, 0x80000022, 0 }, { 0, 0, 0, 0, 0 } }; -- Gitee From c90639975ddcf5ba6cb1bc1dd584fc935f08b92c Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 21:29:18 -0800 Subject: [PATCH 010/158] anolis: Revert "anolis: x86/resctrl: Fix memory bandwidth counter width for Hygon CPUs" ANBZ: #31045 This reverts commit 6e491b9f54ea3245a5c167b19e48c0502cf7169b. Signed-off-by: Jason Zeng --- arch/x86/kernel/cpu/resctrl/core.c | 17 ++++------------- arch/x86/kernel/cpu/resctrl/internal.h | 3 --- 2 files changed, 4 insertions(+), 16 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 526f58b8f26c..996d61331546 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -922,19 +922,10 @@ void resctrl_cpu_detect(struct cpuinfo_x86 *c) c->x86_cache_occ_scale = ebx; c->x86_cache_mbm_width_offset = eax & 0xff; - if (!c->x86_cache_mbm_width_offset) { - switch (c->x86_vendor) { - case X86_VENDOR_AMD: - c->x86_cache_mbm_width_offset = MBM_CNTR_WIDTH_OFFSET_AMD; - break; - case X86_VENDOR_HYGON: - c->x86_cache_mbm_width_offset = MBM_CNTR_WIDTH_OFFSET_HYGON; - break; - default: - /* Leave c->x86_cache_mbm_width_offset as 0 */ - break; - } - } + if ((c->x86_vendor == X86_VENDOR_AMD || + c->x86_vendor == X86_VENDOR_HYGON) && + !c->x86_cache_mbm_width_offset) + c->x86_cache_mbm_width_offset = MBM_CNTR_WIDTH_OFFSET_AMD; } } diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 9017fe146a86..a7ff4f16247c 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -27,9 +27,6 @@ #define MBM_CNTR_WIDTH_OFFSET_AMD 20 -/* Hygon MBM counter width as an offset from MBM_CNTR_WIDTH_BASE */ -#define MBM_CNTR_WIDTH_OFFSET_HYGON 8 - #define RMID_VAL_ERROR BIT_ULL(63) #define RMID_VAL_UNAVAIL BIT_ULL(62) -- Gitee From 0e199337700183cdf463d3f8875b4d267f70f846 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 21:32:08 -0800 Subject: [PATCH 011/158] anolis: Revert "x86/resctrl: Introduce interface to modify assignment states of the groups" ANBZ: #31045 This reverts commit 87031530f949d6f95c57538d5c6f2e5a9e8bdc26. Signed-off-by: Jason Zeng --- Documentation/arch/x86/resctrl.rst | 94 +-------- fs/resctrl/rdtgroup.c | 315 +---------------------------- include/linux/resctrl.h | 7 - 3 files changed, 2 insertions(+), 414 deletions(-) diff --git a/Documentation/arch/x86/resctrl.rst b/Documentation/arch/x86/resctrl.rst index 11c827931f1b..aaed239782e5 100644 --- a/Documentation/arch/x86/resctrl.rst +++ b/Documentation/arch/x86/resctrl.rst @@ -346,7 +346,7 @@ with the following files: t MBM total event is enabled. l MBM local event is enabled. tl Both total and local MBM events are enabled. - _ None of the MBM events are enabled. Only works with opcode '=' for write. + _ None of the MBM events are enabled. Examples: :: @@ -364,98 +364,6 @@ with the following files: There are four resctrl groups. All the groups have total and local MBM events enabled on domain 0 and 1. - Assignment state can be updated by writing to the interface. - - Format is similar to the list format with addition of opcode for the - assignment operation. - - "//" - - Format for each type of groups: - - * Default CTRL_MON group: - "//" - - * Non-default CTRL_MON group: - "//" - - * Child MON group of default CTRL_MON group: - "//" - - * Child MON group of non-default CTRL_MON group: - "//" - - Domain_id '*' wil apply the flags on all the domains. - - Opcode can be one of the following: - :: - - = Update the assignment to match the MBM event. - + Assign a MBM event. - - Unassign a MBM event. - - Examples: - :: - - Initial group status: - # cat /sys/fs/resctrl/info/L3_MON/mbm_control - non_default_ctrl_mon_grp//0=tl;1=tl; - non_default_ctrl_mon_grp/child_non_default_mon_grp/0=tl;1=tl; - //0=tl;1=tl; - /child_default_mon_grp/0=tl;1=tl; - - To update the default group to assign only total MBM event on domain 0: - # echo "//0=t" > /sys/fs/resctrl/info/L3_MON/mbm_control - - Assignment status after the update: - # cat /sys/fs/resctrl/info/L3_MON/mbm_control - non_default_ctrl_mon_grp//0=tl;1=tl; - non_default_ctrl_mon_grp/child_non_default_mon_grp/0=tl;1=tl; - //0=t;1=tl; - /child_default_mon_grp/0=tl;1=tl; - - To update the MON group child_default_mon_grp to remove total MBM event on domain 1: - # echo "/child_default_mon_grp/1-t" > /sys/fs/resctrl/info/L3_MON/mbm_control - - Assignment status after the update: - $ cat /sys/fs/resctrl/info/L3_MON/mbm_control - non_default_ctrl_mon_grp//0=tl;1=tl; - non_default_ctrl_mon_grp/child_non_default_mon_grp/0=tl;1=tl; - //0=t;1=tl; - /child_default_mon_grp/0=tl;1=l; - - To update the MON group non_default_ctrl_mon_grp/child_non_default_mon_grp to - unassign both local and total MBM events on domain 1: - # echo "non_default_ctrl_mon_grp/child_non_default_mon_grp/1=_" > - /sys/fs/resctrl/info/L3_MON/mbm_control - - Assignment status after the update: - non_default_ctrl_mon_grp//0=tl;1=tl; - non_default_ctrl_mon_grp/child_non_default_mon_grp/0=tl;1=_; - //0=t;1=tl; - /child_default_mon_grp/0=tl;1=l; - - To update the default group to add a local MBM event domain 0. - # echo "//0+l" > /sys/fs/resctrl/info/L3_MON/mbm_control - - Assignment status after the update: - # cat /sys/fs/resctrl/info/L3_MON/mbm_control - non_default_ctrl_mon_grp//0=tl;1=tl; - non_default_ctrl_mon_grp/child_non_default_mon_grp/0=tl;1=_; - //0=tl;1=tl; - /child_default_mon_grp/0=tl;1=l; - - To update the non default CTRL_MON group non_default_ctrl_mon_grp to unassign all - the MBM events on all the domains. - # echo "non_default_ctrl_mon_grp//*=_" > /sys/fs/resctrl/info/L3_MON/mbm_control - - Assignment status after the update: - #cat /sys/fs/resctrl/info/L3_MON/mbm_control - non_default_ctrl_mon_grp//0=_;1=_; - non_default_ctrl_mon_grp/child_non_default_mon_grp/0=tl;1=_; - //0=tl;1=tl; - /child_default_mon_grp/0=tl;1=l; - "max_threshold_occupancy": Read/write file provides the largest value (in bytes) at which a previously used LLC_occupancy diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index aba699474e86..e7a62537bc03 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -1043,318 +1043,6 @@ static int rdtgroup_mbm_control_show(struct kernfs_open_file *of, return 0; } -/* - * Update the assign states for the domain. - * - * If this is a new assignment for the group then allocate a counter and update - * the assignment else just update the assign state - */ -static int rdtgroup_assign_update(struct rdtgroup *rdtgrp, enum resctrl_event_id evtid, - struct rdt_domain *d) -{ - int ret, index; - - index = mon_event_config_index_get(evtid); - if (index == INVALID_CONFIG_INDEX) - return -EINVAL; - - if (rdtgrp->mon.cntr_id[index] == MON_CNTR_UNSET) { - ret = rdtgroup_alloc_cntr(rdtgrp, index); - if (ret < 0) - goto out_done; - } - - /* Update the state on all domains if d == NULL */ - if (d == NULL) { - ret = rdtgroup_assign_cntr(rdtgrp, evtid); - } else { - ret = resctrl_arch_assign_cntr(d, evtid, rdtgrp->mon.rmid, - rdtgrp->mon.cntr_id[index], - rdtgrp->closid, 1); - if (!ret) - set_bit(rdtgrp->mon.cntr_id[index], d->mbm_cntr_map); - } - -out_done: - return ret; -} - -/* - * Update the unassign state for the domain. - * - * Free the counter if it is unassigned on all the domains else just - * update the unassign state - */ -static int rdtgroup_unassign_update(struct rdtgroup *rdtgrp, enum resctrl_event_id evtid, - struct rdt_domain *d) -{ - struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); - int ret = 0, index; - - index = mon_event_config_index_get(evtid); - if (index == INVALID_CONFIG_INDEX) - return -EINVAL; - - if (rdtgrp->mon.cntr_id[index] == MON_CNTR_UNSET) - goto out_done; - - if (d == NULL) { - ret = rdtgroup_unassign_cntr(rdtgrp, evtid); - } else { - ret = resctrl_arch_assign_cntr(d, evtid, rdtgrp->mon.rmid, - rdtgrp->mon.cntr_id[index], - rdtgrp->closid, 0); - if (!ret) { - clear_bit(rdtgrp->mon.cntr_id[index], d->mbm_cntr_map); - rdtgroup_free_cntr(r, rdtgrp, index); - } - } - -out_done: - return ret; -} - -static int rdtgroup_str_to_mon_state(char *flag) -{ - int i, mon_state = 0; - - for (i = 0; i < strlen(flag); i++) { - switch (*(flag + i)) { - case 't': - mon_state |= ASSIGN_TOTAL; - break; - case 'l': - mon_state |= ASSIGN_LOCAL; - break; - case '_': - mon_state = ASSIGN_NONE; - break; - default: - break; - } - } - - return mon_state; -} - -static struct rdtgroup *rdtgroup_find_grp(enum rdt_group_type rtype, char *p_grp, char *c_grp) -{ - struct rdtgroup *rdtg, *crg; - - if (rtype == RDTCTRL_GROUP && *p_grp == '\0') { - return &rdtgroup_default; - } else if (rtype == RDTCTRL_GROUP) { - list_for_each_entry(rdtg, &rdt_all_groups, rdtgroup_list) - if (!strcmp(p_grp, rdtg->kn->name)) - return rdtg; - } else if (rtype == RDTMON_GROUP) { - list_for_each_entry(rdtg, &rdt_all_groups, rdtgroup_list) { - if (!strcmp(p_grp, rdtg->kn->name)) { - list_for_each_entry(crg, &rdtg->mon.crdtgrp_list, - mon.crdtgrp_list) { - if (!strcmp(c_grp, crg->kn->name)) - return crg; - } - } - } - } - - return NULL; -} - -static int rdtgroup_process_flags(struct rdt_resource *r, - enum rdt_group_type rtype, - char *p_grp, char *c_grp, char *tok) -{ - int op, mon_state, assign_state, unassign_state; - char *dom_str, *id_str, *op_str; - struct rdt_domain *d; - struct rdtgroup *rdtgrp; - unsigned long dom_id; - int ret, found = 0; - - rdtgrp = rdtgroup_find_grp(rtype, p_grp, c_grp); - - if (!rdtgrp) { - rdt_last_cmd_puts("Not a valid resctrl group\n"); - return -EINVAL; - } - -next: - if (!tok || tok[0] == '\0') - return 0; - - /* Start processing the strings for each domain */ - dom_str = strim(strsep(&tok, ";")); - - op_str = strpbrk(dom_str, "=+-"); - - if (op_str) { - op = *op_str; - } else { - rdt_last_cmd_puts("Missing operation =, +, -, _ character\n"); - return -EINVAL; - } - - id_str = strsep(&dom_str, "=+-"); - - /* Check for domain id '*' which means all domains */ - if (id_str && *id_str == '*') { - d = NULL; - goto check_state; - } else if (!id_str || kstrtoul(id_str, 10, &dom_id)) { - rdt_last_cmd_puts("Missing domain id\n"); - return -EINVAL; - } - - /* Verify if the dom_id is valid */ - list_for_each_entry(d, &r->domains, list) { - if (d->id == dom_id) { - found = 1; - break; - } - } - - if (!found) { - rdt_last_cmd_printf("Invalid domain id %ld\n", dom_id); - return -EINVAL; - } - -check_state: - mon_state = rdtgroup_str_to_mon_state(dom_str); - - assign_state = 0; - unassign_state = 0; - - switch (op) { - case '+': - if (mon_state == ASSIGN_NONE) { - rdt_last_cmd_puts("Invalid assign opcode\n"); - goto out_fail; - } - assign_state = mon_state; - break; - case '-': - if (mon_state == ASSIGN_NONE) { - rdt_last_cmd_puts("Invalid assign opcode\n"); - goto out_fail; - } - unassign_state = mon_state; - break; - case '=': - assign_state = mon_state; - unassign_state = (ASSIGN_TOTAL | ASSIGN_LOCAL) & ~assign_state; - break; - default: - break; - } - - if (assign_state & ASSIGN_TOTAL) { - ret = rdtgroup_assign_update(rdtgrp, QOS_L3_MBM_TOTAL_EVENT_ID, d); - if (ret) - goto out_fail; - } - - if (assign_state & ASSIGN_LOCAL) { - ret = rdtgroup_assign_update(rdtgrp, QOS_L3_MBM_LOCAL_EVENT_ID, d); - if (ret) - goto out_fail; - } - - if (unassign_state & ASSIGN_TOTAL) { - ret = rdtgroup_unassign_update(rdtgrp, QOS_L3_MBM_TOTAL_EVENT_ID, d); - if (ret) - goto out_fail; - } - - if (unassign_state & ASSIGN_LOCAL) { - ret = rdtgroup_unassign_update(rdtgrp, QOS_L3_MBM_LOCAL_EVENT_ID, d); - if (ret) - goto out_fail; - } - - goto next; - -out_fail: - - return -EINVAL; -} - -static ssize_t rdtgroup_mbm_control_write(struct kernfs_open_file *of, - char *buf, size_t nbytes, - loff_t off) -{ - struct rdt_resource *r = of->kn->parent->priv; - char *token, *cmon_grp, *mon_grp; - int ret; - - if (!resctrl_arch_get_abmc_enabled()) - return -EINVAL; - - /* Valid input requires a trailing newline */ - if (nbytes == 0 || buf[nbytes - 1] != '\n') - return -EINVAL; - - buf[nbytes - 1] = '\0'; - - cpus_read_lock(); - mutex_lock(&rdtgroup_mutex); - rdt_last_cmd_clear(); - - while ((token = strsep(&buf, "\n")) != NULL) { - if (strstr(token, "//")) { - /* - * The CTRL_MON group processing: - * default CTRL_MON group: "//" - * non-default CTRL_MON group: "//flags" - * The CTRL_MON group will be empty string if it is a - * default group. - */ - cmon_grp = strsep(&token, "//"); - - /* - * strsep returns empty string for contiguous delimiters. - * Make sure check for two consecutive delimiters and - * advance the token. - */ - mon_grp = strsep(&token, "//"); - if (*mon_grp != '\0') { - rdt_last_cmd_printf("Invalid CTRL_MON group format %s\n", token); - ret = -EINVAL; - break; - } - - ret = rdtgroup_process_flags(r, RDTCTRL_GROUP, cmon_grp, mon_grp, token); - if (ret) - break; - } else if (strstr(token, "/")) { - /* - * MON group processing: - * MON_GROUP inside default CTRL_MON group: "//" - * MON_GROUP within CTRL_MON group: "//" - */ - cmon_grp = strsep(&token, "/"); - - /* Extract the MON_GROUP. It cannot be empty string */ - mon_grp = strsep(&token, "/"); - if (*mon_grp == '\0') { - rdt_last_cmd_printf("Invalid MON_GROUP format %s\n", token); - ret = -EINVAL; - break; - } - - ret = rdtgroup_process_flags(r, RDTMON_GROUP, cmon_grp, mon_grp, token); - if (ret) - break; - } - } - - mutex_unlock(&rdtgroup_mutex); - cpus_read_unlock(); - - return ret ?: nbytes; -} - #ifdef CONFIG_PROC_CPU_RESCTRL /* @@ -2489,10 +2177,9 @@ static struct rftype res_common_files[] = { }, { .name = "mbm_control", - .mode = 0644, + .mode = 0444, .kf_ops = &rdtgroup_kf_single_ops, .seq_show = rdtgroup_mbm_control_show, - .write = rdtgroup_mbm_control_write, }, { .name = "cpus_list", diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 75dacdc47c8a..1c5d3361a71f 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -274,13 +274,6 @@ struct resctrl_mon_config_info { int err; }; -/* - * Assignment flags for ABMC feature - */ -#define ASSIGN_NONE 0 -#define ASSIGN_TOTAL BIT(QOS_L3_MBM_TOTAL_EVENT_ID) -#define ASSIGN_LOCAL BIT(QOS_L3_MBM_LOCAL_EVENT_ID) - /** * mon_event_config_index_get - get the hardware index for the * configurable event -- Gitee From b4ded3aea5651c9b5a5b5f873af1edc5d0936fab Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 21:32:51 -0800 Subject: [PATCH 012/158] anolis: Revert "x86/resctrl: Introduce interface to list monitor states of all the groups" ANBZ: #31045 This reverts commit 76def6384268a3f0ad89d617a7fc37ce1ee1451a. Signed-off-by: Jason Zeng --- Documentation/arch/x86/resctrl.rst | 44 --------------- arch/x86/include/asm/resctrl.h | 1 - arch/x86/kernel/cpu/resctrl/monitor.c | 1 - arch/x86/kernel/cpu/resctrl/rdtgroup.c | 8 --- fs/resctrl/rdtgroup.c | 74 -------------------------- include/linux/arm_mpam.h | 5 -- 6 files changed, 133 deletions(-) diff --git a/Documentation/arch/x86/resctrl.rst b/Documentation/arch/x86/resctrl.rst index aaed239782e5..c61057728ad7 100644 --- a/Documentation/arch/x86/resctrl.rst +++ b/Documentation/arch/x86/resctrl.rst @@ -320,50 +320,6 @@ with the following files: the counter is not assigned to the event when read. Users need to assign a counter manually to read the events. -"mbm_control": - Reports the resctrl group and monitor status of each group. - - List follows the following format: - "//=" - - Format for specific type of groups: - - * Default CTRL_MON group: - "//=" - - * Non-default CTRL_MON group: - "//=" - - * Child MON group of default CTRL_MON group: - "//=" - - * Child MON group of non-default CTRL_MON group: - "//=" - - Flags can be one of the following: - :: - - t MBM total event is enabled. - l MBM local event is enabled. - tl Both total and local MBM events are enabled. - _ None of the MBM events are enabled. - - Examples: - :: - - # mkdir /sys/fs/resctrl/mon_groups/child_default_mon_grp - # mkdir /sys/fs/resctrl/non_default_ctrl_mon_grp - # mkdir /sys/fs/resctrl/non_default_ctrl_mon_grp/mon_groups/child_non_default_mon_grp - - # cat /sys/fs/resctrl/info/L3_MON/mbm_control - non_default_ctrl_mon_grp//0=tl;1=tl; - non_default_ctrl_mon_grp/child_non_default_mon_grp/0=tl;1=tl; - //0=tl;1=tl; - /child_default_mon_grp/0=tl;1=tl; - - There are four resctrl groups. All the groups have total and local MBM events - enabled on domain 0 and 1. - "max_threshold_occupancy": Read/write file provides the largest value (in bytes) at which a previously used LLC_occupancy diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h index 7ea7c3d7b5be..2db0d95f9dc5 100644 --- a/arch/x86/include/asm/resctrl.h +++ b/arch/x86/include/asm/resctrl.h @@ -219,7 +219,6 @@ static inline void resctrl_arch_mon_ctx_free(struct rdt_resource *r, int evtid, int resctrl_arch_mbm_cntr_assign_enable(void); void resctrl_arch_mbm_cntr_assign_configure(void); void resctrl_arch_mbm_cntr_assign_disable(void); -bool resctrl_arch_get_mbm_cntr_assign_enable(void); void resctrl_arch_event_config_set(void *info); u32 resctrl_arch_event_config_get(void *d, enum resctrl_event_id eventid); diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 0ed28ef8c25a..ee8519f51359 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -257,7 +257,6 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r) r->mon.num_mbm_cntrs = 64; resctrl_file_fflags_init("num_mbm_cntrs", RFTYPE_MON_INFO); - resctrl_file_fflags_init("mbm_control", RFTYPE_MON_INFO); } /* diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 50a49a1ea604..964f2614695a 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -391,14 +391,6 @@ void resctrl_arch_mbm_cntr_assign_disable(void) } } -bool resctrl_arch_get_mbm_cntr_assign_enable(void) -{ - struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; - struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); - - return hw_res->mbm_cntr_assign_enabled; -} - static void rdtgroup_abmc_cfg(void *info) { u64 *msrval = info; diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index e7a62537bc03..722e2ace8624 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -975,74 +975,6 @@ static int rdtgroup_num_mbm_cntrs_show(struct kernfs_open_file *of, return 0; } -static char *rdtgroup_mon_state_to_str(struct rdtgroup *rdtgrp, - struct rdt_domain *d, char *str) -{ - char *tmp = str; - int index; - - /* - * Query the monitor state for the domain. - * Index 0 for evtid == QOS_L3_MBM_TOTAL_EVENT_ID - * Index 1 for evtid == QOS_L3_MBM_LOCAL_EVENT_ID - */ - index = mon_event_config_index_get(QOS_L3_MBM_TOTAL_EVENT_ID); - if (rdtgrp->mon.cntr_id[index] != MON_CNTR_UNSET && - test_bit(rdtgrp->mon.cntr_id[index], d->mbm_cntr_map)) - *tmp++ = 't'; - - index = mon_event_config_index_get(QOS_L3_MBM_LOCAL_EVENT_ID); - if (rdtgrp->mon.cntr_id[index] != MON_CNTR_UNSET && - test_bit(rdtgrp->mon.cntr_id[index], d->mbm_cntr_map)) - *tmp++ = 'l'; - - if (tmp == str) - *tmp++ = '_'; - - *tmp = '\0'; - return str; -} - -static int rdtgroup_mbm_control_show(struct kernfs_open_file *of, - struct seq_file *s, void *v) -{ - struct rdt_resource *r = of->kn->parent->priv; - struct rdt_domain *dom; - struct rdtgroup *rdtg; - char str[10]; - - if (!resctrl_arch_get_mbm_cntr_assign_enable()) { - rdt_last_cmd_puts("ABMC feature is not enabled\n"); - return -EINVAL; - } - - mutex_lock(&rdtgroup_mutex); - - list_for_each_entry(rdtg, &rdt_all_groups, rdtgroup_list) { - struct rdtgroup *crg; - - seq_printf(s, "%s//", rdtg->kn->name); - - list_for_each_entry(dom, &r->domains, list) - seq_printf(s, "%d=%s;", dom->id, - rdtgroup_mon_state_to_str(rdtg, dom, str)); - seq_putc(s, '\n'); - - list_for_each_entry(crg, &rdtg->mon.crdtgrp_list, - mon.crdtgrp_list) { - seq_printf(s, "%s/%s/", rdtg->kn->name, crg->kn->name); - - list_for_each_entry(dom, &r->domains, list) - seq_printf(s, "%d=%s;", dom->id, - rdtgroup_mon_state_to_str(crg, dom, str)); - seq_putc(s, '\n'); - } - } - - mutex_unlock(&rdtgroup_mutex); - return 0; -} - #ifdef CONFIG_PROC_CPU_RESCTRL /* @@ -2175,12 +2107,6 @@ static struct rftype res_common_files[] = { .kf_ops = &rdtgroup_kf_single_ops, .seq_show = rdtgroup_num_mbm_cntrs_show, }, - { - .name = "mbm_control", - .mode = 0444, - .kf_ops = &rdtgroup_kf_single_ops, - .seq_show = rdtgroup_mbm_control_show, - }, { .name = "cpus_list", .mode = 0644, diff --git a/include/linux/arm_mpam.h b/include/linux/arm_mpam.h index c7698dda09fb..d55bc53ffbed 100644 --- a/include/linux/arm_mpam.h +++ b/include/linux/arm_mpam.h @@ -146,11 +146,6 @@ void resctrl_arch_event_config_set(void *info); u32 resctrl_arch_event_config_get(void *dom, enum resctrl_event_id eventid); -static inline bool resctrl_arch_get_mbm_cntr_assign_enable(void) -{ - return false; -} - static inline int resctrl_arch_assign_cntr(void *dom, enum resctrl_event_id evtid, u32 rmid, u32 cntr_id, u32 closid, bool assign) { -- Gitee From 4da52efb6e89562d668c3215499b6d0e286be439 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 21:33:03 -0800 Subject: [PATCH 013/158] anolis: Revert "x86/resctrl: Enable AMD ABMC feature by default when supported" ANBZ: #31045 This reverts commit 207abf1fc4b4526791dc8fb9d12750de439a64e7. Signed-off-by: Jason Zeng --- arch/x86/include/asm/resctrl.h | 1 - arch/x86/kernel/cpu/resctrl/core.c | 2 -- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 17 ----------------- include/linux/arm_mpam.h | 2 -- 4 files changed, 22 deletions(-) diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h index 2db0d95f9dc5..a2958efa9375 100644 --- a/arch/x86/include/asm/resctrl.h +++ b/arch/x86/include/asm/resctrl.h @@ -217,7 +217,6 @@ static inline void resctrl_arch_mon_ctx_free(struct rdt_resource *r, int evtid, void *ctx) { }; int resctrl_arch_mbm_cntr_assign_enable(void); -void resctrl_arch_mbm_cntr_assign_configure(void); void resctrl_arch_mbm_cntr_assign_disable(void); void resctrl_arch_event_config_set(void *info); diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 996d61331546..7a70a55a5162 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -510,7 +510,6 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r) cpumask_set_cpu(cpu, &d->cpu_mask); if (r->cache.arch_has_per_cpu_cfg) rdt_domain_reconfigure_cdp(r); - resctrl_arch_mbm_cntr_assign_configure(); return; } @@ -532,7 +531,6 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r) } resctrl_mbm_evt_config_init(hw_dom); - resctrl_arch_mbm_cntr_assign_configure(); if (r->mon_capable && arch_domain_mbm_alloc(r->mon.num_rmid, hw_dom)) { domain_free(hw_dom); diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 964f2614695a..0739cb5c65cf 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -361,23 +361,6 @@ int resctrl_arch_mbm_cntr_assign_enable(void) return 0; } -void resctrl_arch_mbm_cntr_assign_configure(void) -{ - struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; - struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); - bool enable = true; - - mutex_lock(&rdtgroup_mutex); - - if (r->mon.mbm_cntr_assignable) { - if (!hw_res->mbm_cntr_assign_enabled) - hw_res->mbm_cntr_assign_enabled = true; - resctrl_abmc_set_one_amd(&enable); - } - - mutex_unlock(&rdtgroup_mutex); -} - void resctrl_arch_mbm_cntr_assign_disable(void) { struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; diff --git a/include/linux/arm_mpam.h b/include/linux/arm_mpam.h index d55bc53ffbed..12785ba2441e 100644 --- a/include/linux/arm_mpam.h +++ b/include/linux/arm_mpam.h @@ -128,8 +128,6 @@ static inline void resctrl_arch_disable_mon(void) { } static inline void resctrl_arch_enable_alloc(void) { } static inline void resctrl_arch_disable_alloc(void) { } -static inline void resctrl_arch_mbm_cntr_assign_configure(void) { } - static inline bool resctrl_arch_get_abmc_enabled(void) { return false; -- Gitee From cb0f027f9c4b8d347aee3a6e8883bb90cc7534d2 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 21:33:11 -0800 Subject: [PATCH 014/158] anolis: Revert "x86/resctrl: Introduce the interface to switch between monitor modes" ANBZ: #31045 This reverts commit ed82ee7b8eab3dd174bf1a83040d3a9772a7b46b. Signed-off-by: Jason Zeng --- Documentation/arch/x86/resctrl.rst | 15 -------- fs/resctrl/rdtgroup.c | 62 +----------------------------- 2 files changed, 1 insertion(+), 76 deletions(-) diff --git a/Documentation/arch/x86/resctrl.rst b/Documentation/arch/x86/resctrl.rst index c61057728ad7..4e5f5e9da229 100644 --- a/Documentation/arch/x86/resctrl.rst +++ b/Documentation/arch/x86/resctrl.rst @@ -291,21 +291,6 @@ with the following files: as long as there are enough RMID counters available to support number of monitoring groups. - * To enable ABMC feature: - :: - - # echo "mbm_cntr_assign" > /sys/fs/resctrl/info/L3_MON/mbm_mode - - * To enable the legacy monitoring feature: - :: - - # echo "legacy" > /sys/fs/resctrl/info/L3_MON/mbm_mode - - The MBM event counters will reset when mbm_mode is changed. Moving to - mbm_cntr_assign will require users to assign the counters to the events to - read the events. Otherwise, the MBM event counters will return "Unassigned" - when read. - "num_mbm_cntrs": The number of monitoring counters available for assignment. diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index 722e2ace8624..8b1839b4271c 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -906,65 +906,6 @@ static int rdtgroup_mbm_mode_show(struct kernfs_open_file *of, return 0; } -static void rdtgroup_mbm_cntr_reset(struct rdt_resource *r) -{ - struct rdtgroup *prgrp, *crgrp; - struct rdt_domain *dom; - - mbm_cntrs_init(r); - - list_for_each_entry(dom, &r->domains, list) - bitmap_zero(dom->mbm_cntr_map, r->mon.num_mbm_cntrs); - - /* Reset the cntr_id's for all the monitor groups */ - list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) { - prgrp->mon.cntr_id[0] = MON_CNTR_UNSET; - prgrp->mon.cntr_id[1] = MON_CNTR_UNSET; - list_for_each_entry(crgrp, &prgrp->mon.crdtgrp_list, - mon.crdtgrp_list) { - crgrp->mon.cntr_id[0] = MON_CNTR_UNSET; - crgrp->mon.cntr_id[1] = MON_CNTR_UNSET; - } - } -} - -static ssize_t rdtgroup_mbm_mode_write(struct kernfs_open_file *of, - char *buf, size_t nbytes, - loff_t off) -{ - int mbm_cntr_assign = resctrl_arch_get_abmc_enabled(); - struct rdt_resource *r = of->kn->parent->priv; - int ret = 0; - - /* Valid input requires a trailing newline */ - if (nbytes == 0 || buf[nbytes - 1] != '\n') - return -EINVAL; - - buf[nbytes - 1] = '\0'; - - cpus_read_lock(); - mutex_lock(&rdtgroup_mutex); - - rdt_last_cmd_clear(); - - if (!strcmp(buf, "legacy")) { - if (mbm_cntr_assign) - resctrl_arch_mbm_cntr_assign_disable(); - } else if (!strcmp(buf, "mbm_cntr_assign")) { - if (!mbm_cntr_assign) { - rdtgroup_mbm_cntr_reset(r); - ret = resctrl_arch_mbm_cntr_assign_enable(); - } - } else { - ret = -EINVAL; - } - - mutex_unlock(&rdtgroup_mutex); - cpus_read_unlock(); - - return ret ?: nbytes; -} - static int rdtgroup_num_mbm_cntrs_show(struct kernfs_open_file *of, struct seq_file *s, void *v) { @@ -2087,10 +2028,9 @@ static struct rftype res_common_files[] = { }, { .name = "mbm_mode", - .mode = 0644, + .mode = 0444, .kf_ops = &rdtgroup_kf_single_ops, .seq_show = rdtgroup_mbm_mode_show, - .write = rdtgroup_mbm_mode_write, .fflags = RFTYPE_MON_INFO, }, { -- Gitee From 126c37d421d063d16691858dfb68c8ccbe428aef Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 21:33:17 -0800 Subject: [PATCH 015/158] anolis: Revert "x86/resctrl: Report "Unassigned" for MBM events in ABMC mode" ANBZ: #31045 This reverts commit 321afaa12ea1cd0f56bc989f4f2d1d4366813ad9. Signed-off-by: Jason Zeng --- Documentation/arch/x86/resctrl.rst | 11 ----------- fs/resctrl/ctrlmondata.c | 14 +------------- 2 files changed, 1 insertion(+), 24 deletions(-) diff --git a/Documentation/arch/x86/resctrl.rst b/Documentation/arch/x86/resctrl.rst index 4e5f5e9da229..4594f740eb67 100644 --- a/Documentation/arch/x86/resctrl.rst +++ b/Documentation/arch/x86/resctrl.rst @@ -294,17 +294,6 @@ with the following files: "num_mbm_cntrs": The number of monitoring counters available for assignment. - Resctrl subsystem provides the interface to count maximum of two - MBM events per group, from a combination of total and local events. - Keeping the current interface, users can assign a maximum of two - monitoring counters per group. User will also have the option to - enable only one counter to the group. - - With limited number of counters, system can run out of assignable counters. - In mbm_cntr_assign mode, the MBM event counters will return "Unassigned" if - the counter is not assigned to the event when read. Users need to assign a - counter manually to read the events. - "max_threshold_occupancy": Read/write file provides the largest value (in bytes) at which a previously used LLC_occupancy diff --git a/fs/resctrl/ctrlmondata.c b/fs/resctrl/ctrlmondata.c index 253443f87646..f5cdabe2ee9e 100644 --- a/fs/resctrl/ctrlmondata.c +++ b/fs/resctrl/ctrlmondata.c @@ -497,7 +497,7 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) union mon_data_bits md; struct rdt_domain *d; struct rmid_read rr; - int ret = 0, index; + int ret = 0; rdtgrp = rdtgroup_kn_lock_live(of->kn); if (!rdtgrp) { @@ -517,24 +517,12 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) goto out; } - if (resctrl_arch_get_abmc_enabled() && evtid != QOS_L3_OCCUP_EVENT_ID) { - index = mon_event_config_index_get(evtid); - if (index != INVALID_CONFIG_INDEX && - rdtgrp->mon.cntr_id[index] == MON_CNTR_UNSET) { - rr.err = -ENOENT; - goto checkresult; - } - } - mon_event_read(&rr, r, d, rdtgrp, evtid, false); -checkresult: if (rr.err == -EIO) seq_puts(m, "Error\n"); else if (rr.err == -EINVAL) seq_puts(m, "Unavailable\n"); - else if (rr.err == -ENOENT) - seq_puts(m, "Unassigned\n"); else seq_printf(m, "%llu\n", rr.val); -- Gitee From 933bef844ca5288bf31e032e2106f62b5d205fd3 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 21:34:53 -0800 Subject: [PATCH 016/158] anolis: Revert "x86/resctrl: Assign/unassign counters by default when ABMC is enabled" ANBZ: #31045 This reverts commit 25441d0598356841e5e862b7af522c777d3ec50f. Signed-off-by: Jason Zeng --- fs/resctrl/rdtgroup.c | 55 ------------------------------------------- 1 file changed, 55 deletions(-) diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index 8b1839b4271c..5d0901d7f09e 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -2668,46 +2668,6 @@ static void schemata_list_destroy(void) } } -/* - * Called when new group is created. Assign the counters if ABMC is - * already enabled. Two counters are required per group, one for total - * event and one for local event. With limited number of counters, - * the assignments can fail in some cases. But, it is not required to - * fail the group creation. Users have the option to modify the - * assignments after the group creation. - */ -static int rdtgroup_assign_cntrs(struct rdtgroup *rdtgrp) -{ - int ret = 0; - - if (!resctrl_arch_get_abmc_enabled()) - return 0; - - if (resctrl_arch_is_mbm_total_enabled()) - ret = rdtgroup_assign_cntr(rdtgrp, QOS_L3_MBM_TOTAL_EVENT_ID); - - if (!ret && resctrl_arch_is_mbm_local_enabled()) - ret = rdtgroup_assign_cntr(rdtgrp, QOS_L3_MBM_LOCAL_EVENT_ID); - - return ret; -} - -static int rdtgroup_unassign_cntrs(struct rdtgroup *rdtgrp) -{ - int ret = 0; - - if (!resctrl_arch_get_abmc_enabled()) - return 0; - - if (resctrl_arch_is_mbm_total_enabled()) - ret = rdtgroup_unassign_cntr(rdtgrp, QOS_L3_MBM_TOTAL_EVENT_ID); - - if (!ret && resctrl_arch_is_mbm_local_enabled()) - ret = rdtgroup_unassign_cntr(rdtgrp, QOS_L3_MBM_LOCAL_EVENT_ID); - - return ret; -} - static int rdt_get_tree(struct fs_context *fc) { struct rdt_resource *l3 = resctrl_arch_get_resource(RDT_RESOURCE_L3); @@ -2769,8 +2729,6 @@ static int rdt_get_tree(struct fs_context *fc) if (ret < 0) goto out_mongrp; rdtgroup_default.mon.mon_data_kn = kn_mondata; - - rdtgroup_assign_cntrs(&rdtgroup_default); } ret = rdt_pseudo_lock_init(); @@ -2800,7 +2758,6 @@ static int rdt_get_tree(struct fs_context *fc) out_psl: rdt_pseudo_lock_release(); out_mondata: - rdtgroup_unassign_cntrs(&rdtgroup_default); if (resctrl_arch_mon_capable()) kernfs_remove(kn_mondata); out_mongrp: @@ -3048,8 +3005,6 @@ static void rdt_kill_sb(struct super_block *sb) resctrl_arch_disable_alloc(); if (resctrl_arch_mon_capable()) resctrl_arch_disable_mon(); - - rdtgroup_unassign_cntrs(&rdtgroup_default); resctrl_mounted = false; kernfs_kill_sb(sb); mutex_unlock(&rdtgroup_mutex); @@ -3582,8 +3537,6 @@ static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn, goto out_unlock; } - rdtgroup_assign_cntrs(rdtgrp); - kernfs_activate(rdtgrp->kn); /* @@ -3628,8 +3581,6 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn, if (ret) goto out_closid_free; - rdtgroup_assign_cntrs(rdtgrp); - kernfs_activate(rdtgrp->kn); ret = rdtgroup_init_alloc(rdtgrp); @@ -3655,7 +3606,6 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn, out_del_list: list_del(&rdtgrp->rdtgroup_list); out_rmid_free: - rdtgroup_unassign_cntrs(rdtgrp); mkdir_rdt_prepare_rmid_free(rdtgrp); out_closid_free: closid_free(closid); @@ -3730,9 +3680,6 @@ static int rdtgroup_rmdir_mon(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask) update_closid_rmid(tmpmask, NULL); rdtgrp->flags = RDT_DELETED; - - rdtgroup_unassign_cntrs(rdtgrp); - free_rmid(rdtgrp->closid, rdtgrp->mon.rmid); /* @@ -3780,8 +3727,6 @@ static int rdtgroup_rmdir_ctrl(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask) cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask); update_closid_rmid(tmpmask, NULL); - rdtgroup_unassign_cntrs(rdtgrp); - free_rmid(rdtgrp->closid, rdtgrp->mon.rmid); closid_free(rdtgrp->closid); -- Gitee From 189ed88d344a66ee323e74fec2f6f15fa24b5f92 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 21:34:59 -0800 Subject: [PATCH 017/158] anolis: Revert "x86/resctrl: Add the interface to unassign a MBM counter" ANBZ: #31045 This reverts commit 5c1e22737e211e6c7630a07a207828b64a0a3bee. Signed-off-by: Jason Zeng --- fs/resctrl/internal.h | 2 -- fs/resctrl/rdtgroup.c | 52 ------------------------------------------- 2 files changed, 54 deletions(-) diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h index 58d812d6c1d2..638734c63dcf 100644 --- a/fs/resctrl/internal.h +++ b/fs/resctrl/internal.h @@ -317,8 +317,6 @@ int resctrl_find_cleanest_closid(void); unsigned int mon_event_config_index_get(u32 evtid); int rdtgroup_assign_cntr(struct rdtgroup *rdtgrp, enum resctrl_event_id evtid); int rdtgroup_alloc_cntr(struct rdtgroup *rdtgrp, int index); -int rdtgroup_unassign_cntr(struct rdtgroup *rdtgrp, enum resctrl_event_id evtid); -void rdtgroup_free_cntr(struct rdt_resource *r, struct rdtgroup *rdtgrp, int index); #ifdef CONFIG_RESCTRL_FS_PSEUDO_LOCK int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp); diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index 5d0901d7f09e..af402df1e9e8 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -1862,58 +1862,6 @@ int rdtgroup_assign_cntr(struct rdtgroup *rdtgrp, enum resctrl_event_id evtid) return 0; } -static int rdtgroup_mbm_cntr_test(struct rdt_resource *r, u32 cntr_id) -{ - struct rdt_domain *d; - - list_for_each_entry(d, &r->domains, list) - if (test_bit(cntr_id, d->mbm_cntr_map)) - return 1; - - return 0; -} - -/* Free the counter id after the event is unassigned */ -void rdtgroup_free_cntr(struct rdt_resource *r, struct rdtgroup *rdtgrp, - int index) -{ - /* Update the counter bitmap */ - if (!rdtgroup_mbm_cntr_test(r, rdtgrp->mon.cntr_id[index])) { - mbm_cntr_free(rdtgrp->mon.cntr_id[index]); - rdtgrp->mon.cntr_id[index] = MON_CNTR_UNSET; - } -} - -/* - * Unassign a hardware counter from the group and update all the domains - * in the group. - */ -int rdtgroup_unassign_cntr(struct rdtgroup *rdtgrp, enum resctrl_event_id evtid) -{ - struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); - struct rdt_domain *d; - int index; - - index = mon_event_config_index_get(evtid); - if (index == INVALID_CONFIG_INDEX) - return -EINVAL; - - if (rdtgrp->mon.cntr_id[index] != MON_CNTR_UNSET) { - list_for_each_entry(d, &r->domains, list) { - resctrl_arch_assign_cntr(d, evtid, rdtgrp->mon.rmid, - rdtgrp->mon.cntr_id[index], - rdtgrp->closid, false); - clear_bit(rdtgrp->mon.cntr_id[index], - d->mbm_cntr_map); - } - - /* Free the counter at group level */ - rdtgroup_free_cntr(r, rdtgrp, index); - } - - return 0; -} - /* rdtgroup information files for one cache resource. */ static struct rftype res_common_files[] = { { -- Gitee From 3af9aacb48ba511bd39fd8886b9f0c3e6a2ff295 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 21:35:05 -0800 Subject: [PATCH 018/158] anolis: Revert "x86/resctrl: Add the interface to assign a hardware counter" ANBZ: #31045 This reverts commit 53014429148a0d942d33bbacc56c247836755582. Signed-off-by: Jason Zeng --- arch/x86/include/asm/resctrl.h | 3 -- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 44 --------------------- fs/resctrl/internal.h | 2 - fs/resctrl/rdtgroup.c | 54 -------------------------- include/linux/arm_mpam.h | 7 ---- 5 files changed, 110 deletions(-) diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h index a2958efa9375..405fa1d9bae4 100644 --- a/arch/x86/include/asm/resctrl.h +++ b/arch/x86/include/asm/resctrl.h @@ -222,9 +222,6 @@ void resctrl_arch_mbm_cntr_assign_disable(void); void resctrl_arch_event_config_set(void *info); u32 resctrl_arch_event_config_get(void *d, enum resctrl_event_id eventid); -int resctrl_arch_assign_cntr(void *dom, enum resctrl_event_id evtid, - u32 rmid, u32 cntr_id, u32 closid, bool assign); - u64 resctrl_arch_get_prefetch_disable_bits(void); int resctrl_arch_pseudo_lock_fn(void *_plr); int resctrl_arch_measure_cycles_lat_fn(void *_plr); diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 0739cb5c65cf..517562950edb 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -374,50 +374,6 @@ void resctrl_arch_mbm_cntr_assign_disable(void) } } -static void rdtgroup_abmc_cfg(void *info) -{ - u64 *msrval = info; - - wrmsrl(MSR_IA32_L3_QOS_ABMC_CFG, *msrval); -} - -/* - * Send an IPI to the domain to assign the counter id to RMID. - */ -int resctrl_arch_assign_cntr(void *dom, enum resctrl_event_id evtid, - u32 rmid, u32 cntr_id, u32 closid, bool assign) -{ - struct rdt_domain *d = dom; - struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); - union l3_qos_abmc_cfg abmc_cfg = { 0 }; - struct arch_mbm_state *arch_mbm; - - abmc_cfg.split.cfg_en = 1; - abmc_cfg.split.cntr_en = assign ? 1 : 0; - abmc_cfg.split.cntr_id = cntr_id; - abmc_cfg.split.bw_src = rmid; - - /* Update the event configuration from the domain */ - if (evtid == QOS_L3_MBM_TOTAL_EVENT_ID) { - abmc_cfg.split.bw_type = hw_dom->mbm_total_cfg; - arch_mbm = &hw_dom->arch_mbm_total[rmid]; - } else { - abmc_cfg.split.bw_type = hw_dom->mbm_local_cfg; - arch_mbm = &hw_dom->arch_mbm_local[rmid]; - } - - smp_call_function_any(&d->cpu_mask, rdtgroup_abmc_cfg, &abmc_cfg, 1); - - /* - * Reset the architectural state so that reading of hardware - * counter is not considered as an overflow in next update. - */ - if (arch_mbm) - memset(arch_mbm, 0, sizeof(struct arch_mbm_state)); - - return 0; -} - static int reset_all_ctrls(struct rdt_resource *r) { struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h index 638734c63dcf..f04516092c85 100644 --- a/fs/resctrl/internal.h +++ b/fs/resctrl/internal.h @@ -315,8 +315,6 @@ void rdt_staged_configs_clear(void); bool closid_allocated(unsigned int closid); int resctrl_find_cleanest_closid(void); unsigned int mon_event_config_index_get(u32 evtid); -int rdtgroup_assign_cntr(struct rdtgroup *rdtgrp, enum resctrl_event_id evtid); -int rdtgroup_alloc_cntr(struct rdtgroup *rdtgrp, int index); #ifdef CONFIG_RESCTRL_FS_PSEUDO_LOCK int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp); diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index af402df1e9e8..80213fd361b6 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -1808,60 +1808,6 @@ static ssize_t mbm_local_bytes_config_write(struct kernfs_open_file *of, return ret ?: nbytes; } -/* Allocate a new counter id if the event is unassigned */ -int rdtgroup_alloc_cntr(struct rdtgroup *rdtgrp, int index) -{ - struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); - int cntr_id; - - /* Nothing to do if event has been assigned already */ - if (rdtgrp->mon.cntr_id[index] != MON_CNTR_UNSET) { - rdt_last_cmd_puts("ABMC counter is assigned already\n"); - return 0; - } - - /* - * Allocate a new counter id and update domains - */ - cntr_id = mbm_cntr_alloc(r); - if (cntr_id < 0) { - rdt_last_cmd_puts("Out of ABMC counters\n"); - return -ENOSPC; - } - - rdtgrp->mon.cntr_id[index] = cntr_id; - - return 0; -} - -/* - * Assign a hardware counter to the group and assign the counter - * all the domains in the group. It will try to allocate the mbm - * counter if the counter is available. - */ -int rdtgroup_assign_cntr(struct rdtgroup *rdtgrp, enum resctrl_event_id evtid) -{ - struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); - struct rdt_domain *d; - int index; - - index = mon_event_config_index_get(evtid); - if (index == INVALID_CONFIG_INDEX) - return -EINVAL; - - if (rdtgroup_alloc_cntr(rdtgrp, index)) - return -EINVAL; - - list_for_each_entry(d, &r->domains, list) { - resctrl_arch_assign_cntr(d, evtid, rdtgrp->mon.rmid, - rdtgrp->mon.cntr_id[index], - rdtgrp->closid, true); - set_bit(rdtgrp->mon.cntr_id[index], d->mbm_cntr_map); - } - - return 0; -} - /* rdtgroup information files for one cache resource. */ static struct rftype res_common_files[] = { { diff --git a/include/linux/arm_mpam.h b/include/linux/arm_mpam.h index 12785ba2441e..bfd30d93c3d1 100644 --- a/include/linux/arm_mpam.h +++ b/include/linux/arm_mpam.h @@ -143,11 +143,4 @@ static inline void resctrl_arch_mbm_cntr_assign_disable(void) { } void resctrl_arch_event_config_set(void *info); u32 resctrl_arch_event_config_get(void *dom, enum resctrl_event_id eventid); - -static inline int resctrl_arch_assign_cntr(void *dom, enum resctrl_event_id evtid, - u32 rmid, u32 cntr_id, u32 closid, bool assign) -{ - return -EINVAL; -} - #endif /* __LINUX_ARM_MPAM_H */ -- Gitee From 63512463273c6eb9ad4116eede12a7656eabeef8 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 21:35:10 -0800 Subject: [PATCH 019/158] anolis: Revert "x86/resctrl: Introduce cntr_id in mongroup for assignments" ANBZ: #31045 This reverts commit fefac429a993c8b4514c32747b3f81547085d4c8. Signed-off-by: Jason Zeng --- fs/resctrl/internal.h | 7 ------- fs/resctrl/rdtgroup.c | 6 ------ 2 files changed, 13 deletions(-) diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h index f04516092c85..7620f8a5ce50 100644 --- a/fs/resctrl/internal.h +++ b/fs/resctrl/internal.h @@ -11,11 +11,6 @@ #include -/* Maximum assignable counters per resctrl group */ -#define MAX_CNTRS 2 - -#define MON_CNTR_UNSET U32_MAX - /** * cpumask_any_housekeeping() - Choose any CPU in @mask, preferring those that * aren't marked nohz_full @@ -160,14 +155,12 @@ enum rdtgrp_mode { * @parent: parent rdtgrp * @crdtgrp_list: child rdtgroup node list * @rmid: rmid for this rdtgroup - * @cntr_id: Counter ids for assignment */ struct mongroup { struct kernfs_node *mon_data_kn; struct rdtgroup *parent; struct list_head crdtgrp_list; u32 rmid; - u32 cntr_id[MAX_CNTRS]; }; /** diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index 80213fd361b6..de27579019b9 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -3295,9 +3295,6 @@ static int mkdir_rdt_prepare_rmid_alloc(struct rdtgroup *rdtgrp) } rdtgrp->mon.rmid = ret; - rdtgrp->mon.cntr_id[0] = MON_CNTR_UNSET; - rdtgrp->mon.cntr_id[1] = MON_CNTR_UNSET; - ret = mkdir_mondata_all(rdtgrp->kn, rdtgrp, &rdtgrp->mon.mon_data_kn); if (ret) { rdt_last_cmd_puts("kernfs subdir error\n"); @@ -3860,9 +3857,6 @@ static void rdtgroup_setup_default(void) rdtgroup_default.closid = RESCTRL_RESERVED_CLOSID; rdtgroup_default.mon.rmid = RESCTRL_RESERVED_RMID; rdtgroup_default.type = RDTCTRL_GROUP; - rdtgroup_default.mon.cntr_id[0] = MON_CNTR_UNSET; - rdtgroup_default.mon.cntr_id[1] = MON_CNTR_UNSET; - INIT_LIST_HEAD(&rdtgroup_default.mon.crdtgrp_list); list_add(&rdtgroup_default.rdtgroup_list, &rdt_all_groups); -- Gitee From 2842ebe13a7f37a71805e6b24410e14a138032cb Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 21:35:16 -0800 Subject: [PATCH 020/158] anolis: Revert "x86/resctrl: Add data structures and definitions for ABMC assignment" ANBZ: #31045 This reverts commit 55f81edfe99110b5d3ce8c4d480bff0f0ac60073. Signed-off-by: Jason Zeng --- arch/x86/include/asm/msr-index.h | 2 -- arch/x86/kernel/cpu/resctrl/internal.h | 26 -------------------------- 2 files changed, 28 deletions(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 39d111515bd0..5bcafbf648a7 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -1195,8 +1195,6 @@ #define MSR_IA32_SMBA_BW_BASE 0xc0000280 #define MSR_IA32_EVT_CFG_BASE 0xc0000400 #define MSR_IA32_L3_QOS_EXT_CFG 0xc00003ff -#define MSR_IA32_L3_QOS_ABMC_CFG 0xc00003fd -#define MSR_IA32_L3_QOS_ABMC_DSC 0xc00003fe /* MSR_IA32_VMX_MISC bits */ #define MSR_IA32_VMX_MISC_INTEL_PT (1ULL << 14) diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index a7ff4f16247c..993dfb942f9c 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -190,32 +190,6 @@ union cpuid_0x10_x_edx { unsigned int full; }; -/* - * ABMC counters can be configured by writing to L3_QOS_ABMC_CFG. - * @bw_type : Bandwidth configuration(supported by BMEC) - * tracked by the @cntr_id. - * @bw_src : Bandwidth source (RMID or CLOSID). - * @reserved1 : Reserved. - * @is_clos : @bw_src field is a CLOSID (not an RMID). - * @cntr_id : Counter identifier. - * @reserved : Reserved. - * @cntr_en : Tracking enable bit. - * @cfg_en : Configuration enable bit. - */ -union l3_qos_abmc_cfg { - struct { - unsigned long bw_type :32, - bw_src :12, - reserved1: 3, - is_clos : 1, - cntr_id : 5, - reserved : 9, - cntr_en : 1, - cfg_en : 1; - } split; - unsigned long full; -}; - void rdt_ctrl_update(void *arg); int rdt_get_mon_l3_config(struct rdt_resource *r); bool rdt_cpu_has(int flag); -- Gitee From 11ba496117d83300199b27c9ad5854190bea2210 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 21:35:24 -0800 Subject: [PATCH 021/158] anolis: Revert "x86/resctrl: Introduce mbm_cntr_map to track counters at domain" ANBZ: #31045 This reverts commit 135a18a2811089c67be5d759f5312d4416dee6c0. Signed-off-by: Jason Zeng --- fs/resctrl/rdtgroup.c | 10 ---------- include/linux/resctrl.h | 2 -- 2 files changed, 12 deletions(-) diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index de27579019b9..e8a44bfb7846 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -3866,7 +3866,6 @@ static void rdtgroup_setup_default(void) static void domain_destroy_mon_state(struct rdt_domain *d) { - bitmap_free(d->mbm_cntr_map); bitmap_free(d->rmid_busy_llc); kfree(d->mbm_total); kfree(d->mbm_local); @@ -3937,15 +3936,6 @@ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d) return -ENOMEM; } } - if (resctrl_is_mbm_enabled()) { - d->mbm_cntr_map = bitmap_zalloc(r->mon.num_mbm_cntrs, GFP_KERNEL); - if (!d->mbm_cntr_map) { - bitmap_free(d->rmid_busy_llc); - kfree(d->mbm_total); - kfree(d->mbm_local); - return -ENOMEM; - } - } return 0; } diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 1c5d3361a71f..70f87231853d 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -102,7 +102,6 @@ struct resctrl_staged_config { * @cqm_limbo: worker to periodically read CQM h/w counters * @mbm_work_cpu: worker CPU for MBM h/w counters * @cqm_work_cpu: worker CPU for CQM h/w counters - * @mbm_cntr_map: bitmap to track domain counter assignment * @plr: pseudo-locked region (if any) associated with domain * @staged_config: parsed configuration to be applied * @mbps_val: When mba_sc is enabled, this holds the array of user @@ -120,7 +119,6 @@ struct rdt_domain { struct delayed_work cqm_limbo; int mbm_work_cpu; int cqm_work_cpu; - unsigned long *mbm_cntr_map; struct pseudo_lock_region *plr; struct resctrl_staged_config staged_config[CDP_NUM_TYPES]; u32 *mbps_val; -- Gitee From 1ee5fbc90bed0adb066f11d5643a0f99a4a3a16a Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 21:35:30 -0800 Subject: [PATCH 022/158] anolis: Revert "x86/resctrl: Remove MSR reading of event configuration value" ANBZ: #31045 This reverts commit e481c8b2b5c7c5ef95ef3521111c2235a87bd5d0. Signed-off-by: Jason Zeng --- arch/x86/include/asm/resctrl.h | 3 -- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 50 -------------------------- drivers/platform/mpam/mpam_internal.h | 2 -- drivers/platform/mpam/mpam_resctrl.c | 6 ++-- fs/resctrl/rdtgroup.c | 32 ++++++++++------- include/linux/arm_mpam.h | 3 -- include/linux/resctrl.h | 2 ++ include/linux/resctrl_types.h | 21 +++++++++++ 8 files changed, 45 insertions(+), 74 deletions(-) diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h index 405fa1d9bae4..dec669838b39 100644 --- a/arch/x86/include/asm/resctrl.h +++ b/arch/x86/include/asm/resctrl.h @@ -219,9 +219,6 @@ static inline void resctrl_arch_mon_ctx_free(struct rdt_resource *r, int evtid, int resctrl_arch_mbm_cntr_assign_enable(void); void resctrl_arch_mbm_cntr_assign_disable(void); -void resctrl_arch_event_config_set(void *info); -u32 resctrl_arch_event_config_get(void *d, enum resctrl_event_id eventid); - u64 resctrl_arch_get_prefetch_disable_bits(void); int resctrl_arch_pseudo_lock_fn(void *_plr); int resctrl_arch_measure_cycles_lat_fn(void *_plr); diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 517562950edb..a0b27a82099c 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -421,53 +421,3 @@ void resctrl_arch_reset_resources(void) for_each_capable_rdt_resource(r) reset_all_ctrls(r); } - -u32 resctrl_arch_event_config_get(void *dom, enum resctrl_event_id eventid) -{ - struct rdt_domain *d = dom; - struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); - - switch (eventid) { - case QOS_L3_OCCUP_EVENT_ID: - case QOS_MC_MBM_BPS_EVENT_ID: - break; - case QOS_L3_MBM_TOTAL_EVENT_ID: - return hw_dom->mbm_total_cfg; - case QOS_L3_MBM_LOCAL_EVENT_ID: - return hw_dom->mbm_local_cfg; - } - - /* Never expect to get here */ - WARN_ON_ONCE(1); - - return INVALID_CONFIG_VALUE; -} - -void resctrl_arch_event_config_set(void *info) -{ - struct resctrl_mon_config_info *mon_info = info; - struct rdt_hw_domain *hw_dom; - unsigned int index; - - index = mon_event_config_index_get(mon_info->evtid); - if (index == INVALID_CONFIG_INDEX) - return; - - wrmsr(MSR_IA32_EVT_CFG_BASE + index, mon_info->mon_config, 0); - - hw_dom = resctrl_to_arch_dom(mon_info->d); - - switch (mon_info->evtid) { - case QOS_L3_OCCUP_EVENT_ID: - case QOS_MC_MBM_BPS_EVENT_ID: - break; - case QOS_L3_MBM_TOTAL_EVENT_ID: - hw_dom->mbm_total_cfg = mon_info->mon_config; - break; - case QOS_L3_MBM_LOCAL_EVENT_ID: - hw_dom->mbm_local_cfg = mon_info->mon_config; - break; - } - - mon_info->err = 0; -} diff --git a/drivers/platform/mpam/mpam_internal.h b/drivers/platform/mpam/mpam_internal.h index c46af5caa90c..d84413e5e031 100644 --- a/drivers/platform/mpam/mpam_internal.h +++ b/drivers/platform/mpam/mpam_internal.h @@ -24,8 +24,6 @@ DECLARE_STATIC_KEY_FALSE(mpam_enabled); * Only these event configuration bits are supported. MPAM can't know if * data is being written back, these will show up as a write. */ -#define READS_TO_LOCAL_MEM BIT(0) -#define NON_TEMP_WRITE_TO_LOCAL_MEM BIT(2) #define MPAM_RESTRL_EVT_CONFIG_VALID (READS_TO_LOCAL_MEM | NON_TEMP_WRITE_TO_LOCAL_MEM) static inline bool mpam_is_enabled(void) diff --git a/drivers/platform/mpam/mpam_resctrl.c b/drivers/platform/mpam/mpam_resctrl.c index ee1a66a6c099..242dcf69066e 100644 --- a/drivers/platform/mpam/mpam_resctrl.c +++ b/drivers/platform/mpam/mpam_resctrl.c @@ -741,18 +741,16 @@ bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt) } } -u32 resctrl_arch_event_config_get(void *info, enum resctrl_event_id eventid) +void resctrl_arch_mon_event_config_read(void *info) { struct mpam_resctrl_dom *dom; struct resctrl_mon_config_info *mon_info = info; dom = container_of(mon_info->d, struct mpam_resctrl_dom, resctrl_dom); mon_info->mon_config = dom->mbm_local_evt_cfg & MAX_EVT_CONFIG_BITS; - - return mon_info->mon_config; } -void resctrl_arch_event_config_set(void *info) +void resctrl_arch_mon_event_config_write(void *info) { struct mpam_resctrl_dom *dom; struct resctrl_mon_config_info *mon_info = info; diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index e8a44bfb7846..9a2f1734dee5 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -1616,11 +1616,17 @@ static int rdtgroup_size_show(struct kernfs_open_file *of, return ret; } +static void mondata_config_read(struct resctrl_mon_config_info *mon_info) +{ + smp_call_function_any(&mon_info->d->cpu_mask, + resctrl_arch_mon_event_config_read, mon_info, 1); +} + static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid) { + struct resctrl_mon_config_info mon_info = {0}; struct rdt_domain *dom; bool sep = false; - u32 val; cpus_read_lock(); mutex_lock(&rdtgroup_mutex); @@ -1629,11 +1635,13 @@ static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid if (sep) seq_puts(s, ";"); - val = resctrl_arch_event_config_get(dom, evtid); - if (val == INVALID_CONFIG_VALUE) - break; + memset(&mon_info, 0, sizeof(struct resctrl_mon_config_info)); + mon_info.r = r; + mon_info.d = dom; + mon_info.evtid = evtid; + mondata_config_read(&mon_info); - seq_printf(s, "%d=0x%02x", dom->id, val); + seq_printf(s, "%d=0x%02x", dom->id, mon_info.mon_config); sep = true; } seq_puts(s, "\n"); @@ -1668,18 +1676,18 @@ static int mbm_config_write_domain(struct rdt_resource *r, struct rdt_domain *d, u32 evtid, u32 val) { struct resctrl_mon_config_info mon_info = {0}; - u32 config_val; /* - * Check the current config value first. If both are the same then + * Read the current config value first. If both are the same then * no need to write it again. */ - config_val = resctrl_arch_event_config_get(d, evtid); - if (config_val == INVALID_CONFIG_VALUE || config_val == val) - return 0; - + mon_info.r = r; mon_info.d = d; mon_info.evtid = evtid; + mondata_config_read(&mon_info); + if (mon_info.mon_config == val) + return 0; + mon_info.mon_config = val; /* @@ -1688,7 +1696,7 @@ static int mbm_config_write_domain(struct rdt_resource *r, * are scoped at the domain level. Writing any of these MSRs * on one CPU is observed by all the CPUs in the domain. */ - smp_call_function_any(&d->cpu_mask, resctrl_arch_event_config_set, + smp_call_function_any(&d->cpu_mask, resctrl_arch_mon_event_config_write, &mon_info, 1); if (mon_info.err) { rdt_last_cmd_puts("Invalid event configuration\n"); diff --git a/include/linux/arm_mpam.h b/include/linux/arm_mpam.h index bfd30d93c3d1..398ae2afa5dd 100644 --- a/include/linux/arm_mpam.h +++ b/include/linux/arm_mpam.h @@ -140,7 +140,4 @@ static inline int resctrl_arch_mbm_cntr_assign_enable(void) static inline void resctrl_arch_mbm_cntr_assign_disable(void) { } -void resctrl_arch_event_config_set(void *info); -u32 resctrl_arch_event_config_get(void *dom, - enum resctrl_event_id eventid); #endif /* __LINUX_ARM_MPAM_H */ diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 70f87231853d..91f0d0631dc6 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -307,6 +307,8 @@ struct rdt_domain *resctrl_arch_find_domain(struct rdt_resource *r, int id); int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid); bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt); +void resctrl_arch_mon_event_config_write(void *info); +void resctrl_arch_mon_event_config_read(void *info); /* For use by arch code to remap resctrl's smaller CDP CLOSID range */ static inline u32 resctrl_get_config_index(u32 closid, diff --git a/include/linux/resctrl_types.h b/include/linux/resctrl_types.h index 7c6dad3df4ff..5c1e1dbd1ad8 100644 --- a/include/linux/resctrl_types.h +++ b/include/linux/resctrl_types.h @@ -40,6 +40,27 @@ #define RFTYPE_CTRL_BASE (RFTYPE_BASE | RFTYPE_CTRL) #define RFTYPE_MON_BASE (RFTYPE_BASE | RFTYPE_MON) +/* Reads to Local DRAM Memory */ +#define READS_TO_LOCAL_MEM BIT(0) + +/* Reads to Remote DRAM Memory */ +#define READS_TO_REMOTE_MEM BIT(1) + +/* Non-Temporal Writes to Local Memory */ +#define NON_TEMP_WRITE_TO_LOCAL_MEM BIT(2) + +/* Non-Temporal Writes to Remote Memory */ +#define NON_TEMP_WRITE_TO_REMOTE_MEM BIT(3) + +/* Reads to Local Memory the system identifies as "Slow Memory" */ +#define READS_TO_LOCAL_S_MEM BIT(4) + +/* Reads to Remote Memory the system identifies as "Slow Memory" */ +#define READS_TO_REMOTE_S_MEM BIT(5) + +/* Dirty Victims to All Types of Memory */ +#define DIRTY_VICTIMS_TO_ALL_MEM BIT(6) + /* Max event bits supported */ #define MAX_EVT_CONFIG_BITS GENMASK(6, 0) -- Gitee From eb9e09de986b96a41f5343cea2c70b417722d046 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 21:35:35 -0800 Subject: [PATCH 023/158] anolis: Revert "x86/resctrl: Introduce mbm_total_cfg and mbm_local_cfg" ANBZ: #31045 This reverts commit 26bec061fb1fc7a32f2d6ac6d051bad506d16a9d. Signed-off-by: Jason Zeng --- arch/x86/kernel/cpu/resctrl/core.c | 2 -- arch/x86/kernel/cpu/resctrl/internal.h | 6 +----- arch/x86/kernel/cpu/resctrl/monitor.c | 26 -------------------------- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 24 ++++++++++++++++++++++++ fs/resctrl/internal.h | 1 - include/linux/resctrl.h | 22 ---------------------- include/linux/resctrl_types.h | 3 --- 7 files changed, 25 insertions(+), 59 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 7a70a55a5162..8c16e1153db6 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -530,8 +530,6 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r) return; } - resctrl_mbm_evt_config_init(hw_dom); - if (r->mon_capable && arch_domain_mbm_alloc(r->mon.num_rmid, hw_dom)) { domain_free(hw_dom); return; diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 993dfb942f9c..3ef379c44e92 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -59,8 +59,6 @@ struct arch_mbm_state { * @ctrl_val: array of cache or mem ctrl values (indexed by CLOSID) * @arch_mbm_total: arch private state for MBM total bandwidth * @arch_mbm_local: arch private state for MBM local bandwidth - * @mbm_total_cfg: MBM total bandwidth configuration - * @mbm_local_cfg: MBM local bandwidth configuration * * Members of this structure are accessed via helpers that provide abstraction. */ @@ -69,8 +67,6 @@ struct rdt_hw_domain { u32 *ctrl_val; struct arch_mbm_state *arch_mbm_total; struct arch_mbm_state *arch_mbm_local; - u32 mbm_total_cfg; - u32 mbm_local_cfg; }; static inline struct rdt_hw_domain *resctrl_to_arch_dom(struct rdt_domain *r) @@ -195,5 +191,5 @@ int rdt_get_mon_l3_config(struct rdt_resource *r); bool rdt_cpu_has(int flag); void __init intel_rdt_mbm_apply_quirk(void); void rdt_domain_reconfigure_cdp(struct rdt_resource *r); -void resctrl_mbm_evt_config_init(struct rdt_hw_domain *hw_dom); + #endif /* _ASM_X86_RESCTRL_INTERNAL_H */ diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index ee8519f51359..f3dbb2543c37 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -279,32 +279,6 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r) return 0; } -void resctrl_mbm_evt_config_init(struct rdt_hw_domain *hw_dom) -{ - unsigned int index; - u64 msrval; - - /* - * Read the configuration registers QOS_EVT_CFG_n, where is - * the BMEC event number (EvtID). - */ - if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_TOTAL_EVENT_ID)) { - index = mon_event_config_index_get(QOS_L3_MBM_TOTAL_EVENT_ID); - rdmsrl(MSR_IA32_EVT_CFG_BASE + index, msrval); - hw_dom->mbm_total_cfg = msrval & MAX_EVT_CONFIG_BITS; - } else { - hw_dom->mbm_total_cfg = INVALID_CONFIG_VALUE; - } - - if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_LOCAL_EVENT_ID)) { - index = mon_event_config_index_get(QOS_L3_MBM_LOCAL_EVENT_ID); - rdmsrl(MSR_IA32_EVT_CFG_BASE + index, msrval); - hw_dom->mbm_local_cfg = msrval & MAX_EVT_CONFIG_BITS; - } else { - hw_dom->mbm_total_cfg = INVALID_CONFIG_VALUE; - } -} - void __init intel_rdt_mbm_apply_quirk(void) { int cf_index; diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index a0b27a82099c..4aba9b972a14 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -45,6 +45,30 @@ void resctrl_arch_sync_cpu_defaults(void *info) resctrl_arch_sched_in(current); } +#define INVALID_CONFIG_INDEX UINT_MAX + +/** + * mon_event_config_index_get - get the hardware index for the + * configurable event + * @evtid: event id. + * + * Return: 0 for evtid == QOS_L3_MBM_TOTAL_EVENT_ID + * 1 for evtid == QOS_L3_MBM_LOCAL_EVENT_ID + * INVALID_CONFIG_INDEX for invalid evtid + */ +static inline unsigned int mon_event_config_index_get(u32 evtid) +{ + switch (evtid) { + case QOS_L3_MBM_TOTAL_EVENT_ID: + return 0; + case QOS_L3_MBM_LOCAL_EVENT_ID: + return 1; + default: + /* Should never reach here */ + return INVALID_CONFIG_INDEX; + } +} + void resctrl_arch_mon_event_config_read(void *info) { struct resctrl_mon_config_info *mon_info = info; diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h index 7620f8a5ce50..04f79dfacd62 100644 --- a/fs/resctrl/internal.h +++ b/fs/resctrl/internal.h @@ -307,7 +307,6 @@ void mbm_cntr_free(u32 cntr_id); void rdt_staged_configs_clear(void); bool closid_allocated(unsigned int closid); int resctrl_find_cleanest_closid(void); -unsigned int mon_event_config_index_get(u32 evtid); #ifdef CONFIG_RESCTRL_FS_PSEUDO_LOCK int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp); diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 91f0d0631dc6..c9703ef8cb7f 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -272,28 +272,6 @@ struct resctrl_mon_config_info { int err; }; -/** - * mon_event_config_index_get - get the hardware index for the - * configurable event - * @evtid: event id. - * - * Return: 0 for evtid == QOS_L3_MBM_TOTAL_EVENT_ID - * 1 for evtid == QOS_L3_MBM_LOCAL_EVENT_ID - * INVALID_CONFIG_INDEX for invalid evtid - */ -static inline unsigned int mon_event_config_index_get(u32 evtid) -{ - switch (evtid) { - case QOS_L3_MBM_TOTAL_EVENT_ID: - return 0; - case QOS_L3_MBM_LOCAL_EVENT_ID: - return 1; - default: - /* Should never reach here */ - return INVALID_CONFIG_INDEX; - } -} - /* * Update and re-load this CPUs defaults. Called via IPI, takes a pointer to * struct resctrl_cpu_sync, or NULL. diff --git a/include/linux/resctrl_types.h b/include/linux/resctrl_types.h index 5c1e1dbd1ad8..a0d8694be783 100644 --- a/include/linux/resctrl_types.h +++ b/include/linux/resctrl_types.h @@ -7,9 +7,6 @@ #ifndef __LINUX_RESCTRL_TYPES_H #define __LINUX_RESCTRL_TYPES_H -#define INVALID_CONFIG_VALUE U32_MAX -#define INVALID_CONFIG_INDEX UINT_MAX - #define CQM_LIMBOCHECK_INTERVAL 1000 #define MBM_CNTR_WIDTH_BASE 24 -- Gitee From dca96500dd64266eb2c2d4d5eae124a590936f7f Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 21:35:43 -0800 Subject: [PATCH 024/158] anolis: Revert "x86/resctrl: Introduce MBM counters bitmap" ANBZ: #31045 This reverts commit 20ad3c34e09f432b16aed92881433bbe99608395. Signed-off-by: Jason Zeng --- fs/resctrl/internal.h | 2 -- fs/resctrl/rdtgroup.c | 33 --------------------------------- 2 files changed, 35 deletions(-) diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h index 04f79dfacd62..d86b0295c456 100644 --- a/fs/resctrl/internal.h +++ b/fs/resctrl/internal.h @@ -302,8 +302,6 @@ void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms, void cqm_handle_limbo(struct work_struct *work); bool has_busy_rmid(struct rdt_domain *d); void __check_limbo(struct rdt_domain *d, bool force_free); -int mbm_cntr_alloc(struct rdt_resource *r); -void mbm_cntr_free(u32 cntr_id); void rdt_staged_configs_clear(void); bool closid_allocated(unsigned int closid); int resctrl_find_cleanest_closid(void); diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index 9a2f1734dee5..6ed6f774ec8a 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -207,37 +207,6 @@ bool closid_allocated(unsigned int closid) return !test_bit(closid, closid_free_map); } -/* - * Counter bitmap for tracking the available counters. - * ABMC feature provides set of hardware counters for enabling events. - * Each event takes one hardware counter. Kernel needs to keep track - * of number of available counters. - */ -static DECLARE_BITMAP(mbm_cntrs_free_map, 64); - -static void mbm_cntrs_init(struct rdt_resource *r) -{ - bitmap_fill(mbm_cntrs_free_map, r->mon.num_mbm_cntrs); -} - -int mbm_cntr_alloc(struct rdt_resource *r) -{ - int cntr_id; - - cntr_id = find_first_bit(mbm_cntrs_free_map, r->mon.num_mbm_cntrs); - if (cntr_id >= r->mon.num_mbm_cntrs) - return -ENOSPC; - - __clear_bit(cntr_id, mbm_cntrs_free_map); - - return cntr_id; -} - -void mbm_cntr_free(u32 cntr_id) -{ - __set_bit(cntr_id, mbm_cntrs_free_map); -} - /** * rdtgroup_mode_by_closid - Return mode of resource group with closid * @closid: closid if the resource group @@ -2604,8 +2573,6 @@ static int rdt_get_tree(struct fs_context *fc) closid_init(); - mbm_cntrs_init(resctrl_arch_get_resource(RDT_RESOURCE_L3)); - if (resctrl_arch_mon_capable()) flags |= RFTYPE_MON; -- Gitee From 32b2112fb72293f7459af74c28c73d4df67a6146 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 21:35:51 -0800 Subject: [PATCH 025/158] anolis: Revert "x86/resctrl: Introduce interface to display number of monitoring counters" ANBZ: #31045 This reverts commit ff4f78ad7a18d74aa690c554443b9bbc182224cb. Signed-off-by: Jason Zeng --- Documentation/arch/x86/resctrl.rst | 3 --- arch/x86/kernel/cpu/resctrl/monitor.c | 2 -- fs/resctrl/rdtgroup.c | 16 ---------------- 3 files changed, 21 deletions(-) diff --git a/Documentation/arch/x86/resctrl.rst b/Documentation/arch/x86/resctrl.rst index 4594f740eb67..431f08352c0e 100644 --- a/Documentation/arch/x86/resctrl.rst +++ b/Documentation/arch/x86/resctrl.rst @@ -291,9 +291,6 @@ with the following files: as long as there are enough RMID counters available to support number of monitoring groups. -"num_mbm_cntrs": - The number of monitoring counters available for assignment. - "max_threshold_occupancy": Read/write file provides the largest value (in bytes) at which a previously used LLC_occupancy diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index f3dbb2543c37..974cc7beba82 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -255,8 +255,6 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r) r->mon.num_mbm_cntrs = (ebx & 0xFFFF) + 1; if (WARN_ON(r->mon.num_mbm_cntrs > 64)) r->mon.num_mbm_cntrs = 64; - - resctrl_file_fflags_init("num_mbm_cntrs", RFTYPE_MON_INFO); } /* diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index 6ed6f774ec8a..cc03fe848b8e 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -875,16 +875,6 @@ static int rdtgroup_mbm_mode_show(struct kernfs_open_file *of, return 0; } -static int rdtgroup_num_mbm_cntrs_show(struct kernfs_open_file *of, - struct seq_file *s, void *v) -{ - struct rdt_resource *r = of->kn->parent->priv; - - seq_printf(s, "%d\n", r->mon.num_mbm_cntrs); - - return 0; -} - #ifdef CONFIG_PROC_CPU_RESCTRL /* @@ -1912,12 +1902,6 @@ static struct rftype res_common_files[] = { .seq_show = rdtgroup_cpus_show, .fflags = RFTYPE_BASE, }, - { - .name = "num_mbm_cntrs", - .mode = 0444, - .kf_ops = &rdtgroup_kf_single_ops, - .seq_show = rdtgroup_num_mbm_cntrs_show, - }, { .name = "cpus_list", .mode = 0644, -- Gitee From 6dd6dc4bfd89488cfd708f4c08aef3b963e105a2 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 21:35:59 -0800 Subject: [PATCH 026/158] anolis: Revert "x86/resctrl: Introduce the interface to display monitor mode" ANBZ: #31045 This reverts commit e7dd18cfbb5575226f8d7ea2003b7fef5a204564. Signed-off-by: Jason Zeng --- Documentation/arch/x86/resctrl.rst | 34 ------------------------------ fs/resctrl/rdtgroup.c | 27 ------------------------ 2 files changed, 61 deletions(-) diff --git a/Documentation/arch/x86/resctrl.rst b/Documentation/arch/x86/resctrl.rst index 431f08352c0e..d816ded93c22 100644 --- a/Documentation/arch/x86/resctrl.rst +++ b/Documentation/arch/x86/resctrl.rst @@ -257,40 +257,6 @@ with the following files: # cat /sys/fs/resctrl/info/L3_MON/mbm_local_bytes_config 0=0x30;1=0x30;3=0x15;4=0x15 -"mbm_mode": - Reports the list of assignable monitoring features supported. The - enclosed brackets indicate which feature is enabled. - :: - - cat /sys/fs/resctrl/info/L3_MON/mbm_mode - [mbm_cntr_assign] - legacy - - "mbm_cntr_assign": - AMD's ABMC feature is one of the mbm_cntr_assign mode supported. - The bandwidth monitoring feature on AMD system only guarantees - that RMIDs currently assigned to a processor will be tracked by - hardware. The counters of any other RMIDs which are no longer - being tracked will be reset to zero. The MBM event counters - return "Unavailable" for the RMIDs that are not tracked by - hardware. So, there can be only limited number of groups that can - give guaranteed monitoring numbers. With ever changing configurations - there is no way to definitely know which of these groups are being - tracked for certain point of time. Users do not have the option to - monitor a group or set of groups for certain period of time without - worrying about RMID being reset in between. - - The ABMC feature provides an option to the user to assign a hardware - counter to an RMID and monitor the bandwidth as long as it is assigned. - The assigned RMID will be tracked by the hardware until the user - unassigns it manually. There is no need to worry about counters being - reset during this period. - - "Legacy": - Legacy mode works without the assignment option. The monitoring works - as long as there are enough RMID counters available to support number - of monitoring groups. - "max_threshold_occupancy": Read/write file provides the largest value (in bytes) at which a previously used LLC_occupancy diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index cc03fe848b8e..638b7827b8cb 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -855,26 +855,6 @@ static int rdtgroup_rmid_show(struct kernfs_open_file *of, return ret; } -static int rdtgroup_mbm_mode_show(struct kernfs_open_file *of, - struct seq_file *s, void *v) -{ - struct rdt_resource *r = of->kn->parent->priv; - - if (r->mon.mbm_cntr_assignable) { - if (resctrl_arch_get_abmc_enabled()) { - seq_puts(s, "[mbm_cntr_assign]\n"); - seq_puts(s, "legacy\n"); - } else { - seq_puts(s, "mbm_cntr_assign\n"); - seq_puts(s, "[legacy]\n"); - } - } else { - seq_puts(s, "[legacy]\n"); - } - - return 0; -} - #ifdef CONFIG_PROC_CPU_RESCTRL /* @@ -1887,13 +1867,6 @@ static struct rftype res_common_files[] = { .seq_show = mbm_local_bytes_config_show, .write = mbm_local_bytes_config_write, }, - { - .name = "mbm_mode", - .mode = 0444, - .kf_ops = &rdtgroup_kf_single_ops, - .seq_show = rdtgroup_mbm_mode_show, - .fflags = RFTYPE_MON_INFO, - }, { .name = "cpus", .mode = 0644, -- Gitee From fac638dcf46072c86631888ff5d52e371dacdede Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 21:36:06 -0800 Subject: [PATCH 027/158] anolis: Revert "x86/resctrl: Add support to enable/disable AMD ABMC feature" ANBZ: #31045 This reverts commit 2dd7907598ed3a655276eb27f7b5503f3cb03c57. Signed-off-by: Jason Zeng --- arch/x86/include/asm/msr-index.h | 1 - arch/x86/include/asm/resctrl.h | 5 --- arch/x86/kernel/cpu/resctrl/internal.h | 5 --- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 62 -------------------------- include/linux/arm_mpam.h | 12 ----- include/linux/resctrl.h | 2 - 6 files changed, 87 deletions(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 5bcafbf648a7..47edd53ae5b8 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -1194,7 +1194,6 @@ #define MSR_IA32_MBA_BW_BASE 0xc0000200 #define MSR_IA32_SMBA_BW_BASE 0xc0000280 #define MSR_IA32_EVT_CFG_BASE 0xc0000400 -#define MSR_IA32_L3_QOS_EXT_CFG 0xc00003ff /* MSR_IA32_VMX_MISC bits */ #define MSR_IA32_VMX_MISC_INTEL_PT (1ULL << 14) diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h index dec669838b39..04be7e43f015 100644 --- a/arch/x86/include/asm/resctrl.h +++ b/arch/x86/include/asm/resctrl.h @@ -216,9 +216,6 @@ static inline void *resctrl_arch_mon_ctx_alloc(struct rdt_resource *r, int evtid static inline void resctrl_arch_mon_ctx_free(struct rdt_resource *r, int evtid, void *ctx) { }; -int resctrl_arch_mbm_cntr_assign_enable(void); -void resctrl_arch_mbm_cntr_assign_disable(void); - u64 resctrl_arch_get_prefetch_disable_bits(void); int resctrl_arch_pseudo_lock_fn(void *_plr); int resctrl_arch_measure_cycles_lat_fn(void *_plr); @@ -232,8 +229,6 @@ int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable); bool resctrl_arch_is_hwdrc_mb_capable(void); int resctrl_arch_set_hwdrc_enabled(enum resctrl_res_level l, bool hwdrc_mb); -bool resctrl_arch_get_abmc_enabled(void); - #else static inline void resctrl_arch_sched_in(struct task_struct *tsk) {} diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 3ef379c44e92..270953d9db4d 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -37,9 +37,6 @@ */ #define MBM_CNTR_WIDTH_OFFSET_MAX (62 - MBM_CNTR_WIDTH_BASE) -/* Setting bit 0 in L3_QOS_EXT_CFG enables the ABMC feature. */ -#define ABMC_ENABLE_BIT 0 - /** * struct arch_mbm_state - values used to compute resctrl_arch_rmid_read()s * return value. @@ -99,7 +96,6 @@ struct msr_param { * @mon_scale: cqm counter * mon_scale = occupancy in bytes * @mbm_width: Monitor width, to detect and correct for overflow. * @cdp_enabled: CDP state of this resource - * @mbm_cntr_assign_enabled: ABMC feature is enabled * * Members of this structure are either private to the architecture * e.g. mbm_width, or accessed via helpers that provide abstraction. e.g. @@ -114,7 +110,6 @@ struct rdt_hw_resource { unsigned int mon_scale; unsigned int mbm_width; bool cdp_enabled; - bool mbm_cntr_assign_enabled; }; static inline struct rdt_hw_resource *resctrl_to_arch_res(struct rdt_resource *r) diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 4aba9b972a14..405f70c770d1 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -336,68 +336,6 @@ int resctrl_arch_set_hwdrc_enabled(enum resctrl_res_level l, bool hwdrc_mb) return 0; } -/* - * Update L3_QOS_EXT_CFG MSR on all the CPUs associated with the resource. - */ -static void resctrl_abmc_set_one_amd(void *arg) -{ - bool *enable = arg; - - if (*enable) - msr_set_bit(MSR_IA32_L3_QOS_EXT_CFG, ABMC_ENABLE_BIT); - else - msr_clear_bit(MSR_IA32_L3_QOS_EXT_CFG, ABMC_ENABLE_BIT); -} - -static void _resctrl_abmc_enable(struct rdt_resource *r, bool enable) -{ - struct rdt_domain *d; - - /* - * Hardware counters will reset after switching the monitor mode. - * Reset the architectural state so that reading of hardware - * counter is not considered as an overflow in the next update. - */ - list_for_each_entry(d, &r->domains, list) { - on_each_cpu_mask(&d->cpu_mask, - resctrl_abmc_set_one_amd, &enable, 1); - resctrl_arch_reset_rmid_all(r, d); - } -} - -bool resctrl_arch_get_abmc_enabled(void) -{ - return rdt_resources_all[RDT_RESOURCE_L3].mbm_cntr_assign_enabled; -} - -int resctrl_arch_mbm_cntr_assign_enable(void) -{ - struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; - struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); - - lockdep_assert_held(&rdtgroup_mutex); - - if (r->mon.mbm_cntr_assignable && !hw_res->mbm_cntr_assign_enabled) { - _resctrl_abmc_enable(r, true); - hw_res->mbm_cntr_assign_enabled = true; - } - - return 0; -} - -void resctrl_arch_mbm_cntr_assign_disable(void) -{ - struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; - struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); - - lockdep_assert_held(&rdtgroup_mutex); - - if (hw_res->mbm_cntr_assign_enabled) { - _resctrl_abmc_enable(r, false); - hw_res->mbm_cntr_assign_enabled = false; - } -} - static int reset_all_ctrls(struct rdt_resource *r) { struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); diff --git a/include/linux/arm_mpam.h b/include/linux/arm_mpam.h index 398ae2afa5dd..b3be84f24c72 100644 --- a/include/linux/arm_mpam.h +++ b/include/linux/arm_mpam.h @@ -128,16 +128,4 @@ static inline void resctrl_arch_disable_mon(void) { } static inline void resctrl_arch_enable_alloc(void) { } static inline void resctrl_arch_disable_alloc(void) { } -static inline bool resctrl_arch_get_abmc_enabled(void) -{ - return false; -} - -static inline int resctrl_arch_mbm_cntr_assign_enable(void) -{ - return -EINVAL; -} - -static inline void resctrl_arch_mbm_cntr_assign_disable(void) { } - #endif /* __LINUX_ARM_MPAM_H */ diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index c9703ef8cb7f..1a1aab4069c1 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -12,8 +12,6 @@ #include #endif -extern struct mutex rdtgroup_mutex; - /* CLOSID, RMID value used by the default control group */ #define RESCTRL_RESERVED_CLOSID 0 #define RESCTRL_RESERVED_RMID 0 -- Gitee From 3846b6699d3ebeab2c1e300f513d125a92857305 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 21:36:11 -0800 Subject: [PATCH 028/158] anolis: Revert "x86/resctrl: Introduce resctrl_file_fflags_init() to initialize fflags" ANBZ: #31045 This reverts commit 3e75a0ebc07bfef1069771da6e483efd85a28537. Signed-off-by: Jason Zeng --- fs/resctrl/internal.h | 1 + fs/resctrl/monitor.c | 6 ++---- fs/resctrl/rdtgroup.c | 23 +++++++++++++++++++---- include/linux/resctrl.h | 1 - 4 files changed, 22 insertions(+), 9 deletions(-) diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h index d86b0295c456..d56e3200d1d7 100644 --- a/fs/resctrl/internal.h +++ b/fs/resctrl/internal.h @@ -302,6 +302,7 @@ void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms, void cqm_handle_limbo(struct work_struct *work); bool has_busy_rmid(struct rdt_domain *d); void __check_limbo(struct rdt_domain *d, bool force_free); +void mbm_config_rftype_init(const char *config); void rdt_staged_configs_clear(void); bool closid_allocated(unsigned int closid); int resctrl_find_cleanest_closid(void); diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c index b6bb310bb10b..1ca50dfee517 100644 --- a/fs/resctrl/monitor.c +++ b/fs/resctrl/monitor.c @@ -852,13 +852,11 @@ int resctrl_mon_resource_init(void) if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_TOTAL_EVENT_ID)) { mbm_total_event.configurable = true; - resctrl_file_fflags_init("mbm_total_bytes_config", - RFTYPE_MON_INFO | RFTYPE_RES_CACHE); + mbm_config_rftype_init("mbm_total_bytes_config"); } if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_LOCAL_EVENT_ID)) { mbm_local_event.configurable = true; - resctrl_file_fflags_init("mbm_local_bytes_config", - RFTYPE_MON_INFO | RFTYPE_RES_CACHE); + mbm_config_rftype_init("mbm_local_bytes_config"); } r = resctrl_arch_get_resource(RDT_RESOURCE_MBA); diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index 638b7827b8cb..efdab19640c2 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -1986,13 +1986,29 @@ static struct rftype *rdtgroup_get_rftype_by_name(const char *name) return NULL; } -void resctrl_file_fflags_init(const char *config, unsigned long fflags) +static void thread_throttle_mode_init(void) +{ + struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_MBA); + struct rftype *rft; + + if (!r->alloc_capable || + r->membw.throttle_mode == THREAD_THROTTLE_UNDEFINED) + return; + + rft = rdtgroup_get_rftype_by_name("thread_throttle_mode"); + if (!rft) + return; + + rft->fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_MB; +} + +void mbm_config_rftype_init(const char *config) { struct rftype *rft; rft = rdtgroup_get_rftype_by_name(config); if (rft) - rft->fflags = fflags; + rft->fflags = RFTYPE_MON_INFO | RFTYPE_RES_CACHE; } /** @@ -3984,8 +4000,7 @@ int resctrl_init(void) rdtgroup_setup_default(); - resctrl_file_fflags_init("thread_throttle_mode", - RFTYPE_CTRL_INFO | RFTYPE_RES_MB); + thread_throttle_mode_init(); ret = resctrl_mon_resource_init(); if (ret) diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 1a1aab4069c1..b5e479914fa4 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -408,7 +408,6 @@ void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_domain *d); extern unsigned int resctrl_rmid_realloc_threshold; extern unsigned int resctrl_rmid_realloc_limit; -void resctrl_file_fflags_init(const char *config, unsigned long fflags); int resctrl_init(void); void resctrl_exit(void); -- Gitee From 38e9e5ad3daacec0fa20a6379924743b014362ba Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 21:36:17 -0800 Subject: [PATCH 029/158] anolis: Revert "x86/resctrl: Detect Assignable Bandwidth Monitoring feature details" ANBZ: #31045 This reverts commit e7434733c38e2f4afbcd3e43c4633b630d25cb61. Signed-off-by: Jason Zeng --- arch/x86/kernel/cpu/resctrl/monitor.c | 13 ------------- include/linux/resctrl.h | 4 ---- 2 files changed, 17 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 974cc7beba82..d5304a12500d 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -224,7 +224,6 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r) unsigned int mbm_offset = boot_cpu_data.x86_cache_mbm_width_offset; struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); unsigned int threshold; - u32 eax, ebx, ecx, edx; resctrl_rmid_realloc_limit = boot_cpu_data.x86_cache_size * 1024; hw_res->mon_scale = boot_cpu_data.x86_cache_occ_scale; @@ -245,18 +244,6 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r) */ threshold = resctrl_rmid_realloc_limit / r->mon.num_rmid; - if (rdt_cpu_has(X86_FEATURE_ABMC)) { - r->mon.mbm_cntr_assignable = true; - /* - * Query CPUID_Fn80000020_EBX_x05 for number of - * ABMC counters. - */ - cpuid_count(0x80000020, 5, &eax, &ebx, &ecx, &edx); - r->mon.num_mbm_cntrs = (ebx & 0xFFFF) + 1; - if (WARN_ON(r->mon.num_mbm_cntrs > 64)) - r->mon.num_mbm_cntrs = 64; - } - /* * Because num_rmid may not be a power of two, round the value * to the nearest multiple of hw_res->mon_scale so it matches a diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index b5e479914fa4..034546e97602 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -182,14 +182,10 @@ struct resctrl_membw { /** * struct resctrl_mon - Monitoring related data * @num_rmid: Number of RMIDs available - * @num_mbm_cntrs: Number of monitoring counters - * @mbm_cntr_assignable:Is system capable of supporting monitor assignment? * @evt_list: List of monitoring events */ struct resctrl_mon { int num_rmid; - int num_mbm_cntrs; - bool mbm_cntr_assignable; struct list_head evt_list; }; -- Gitee From ae16e7d863f4e0308f639af3437b2cf41ffd6fad Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 21:36:24 -0800 Subject: [PATCH 030/158] anolis: Revert "x86/resctrl: Consolidate monitoring related data from rdt_resource" ANBZ: #31045 This reverts commit 1cb013cbe5e221764d3454ff1e23c72798e09f13. Signed-off-by: Jason Zeng --- arch/x86/kernel/cpu/resctrl/core.c | 2 +- arch/x86/kernel/cpu/resctrl/monitor.c | 8 ++++---- drivers/platform/mpam/mpam_resctrl.c | 4 ++-- fs/resctrl/internal.h | 2 +- fs/resctrl/monitor.c | 12 ++++++------ fs/resctrl/rdtgroup.c | 8 ++++---- include/linux/resctrl.h | 15 ++++----------- 7 files changed, 22 insertions(+), 29 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 8c16e1153db6..7b2b5ca496f8 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -530,7 +530,7 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r) return; } - if (r->mon_capable && arch_domain_mbm_alloc(r->mon.num_rmid, hw_dom)) { + if (r->mon_capable && arch_domain_mbm_alloc(r->num_rmid, hw_dom)) { domain_free(hw_dom); return; } diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index d5304a12500d..e0cc1b499279 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -170,11 +170,11 @@ void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_domain *d) if (resctrl_arch_is_mbm_total_enabled()) memset(hw_dom->arch_mbm_total, 0, - sizeof(*hw_dom->arch_mbm_total) * r->mon.num_rmid); + sizeof(*hw_dom->arch_mbm_total) * r->num_rmid); if (resctrl_arch_is_mbm_local_enabled()) memset(hw_dom->arch_mbm_local, 0, - sizeof(*hw_dom->arch_mbm_local) * r->mon.num_rmid); + sizeof(*hw_dom->arch_mbm_local) * r->num_rmid); } static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width) @@ -227,7 +227,7 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r) resctrl_rmid_realloc_limit = boot_cpu_data.x86_cache_size * 1024; hw_res->mon_scale = boot_cpu_data.x86_cache_occ_scale; - r->mon.num_rmid = boot_cpu_data.x86_cache_max_rmid + 1; + r->num_rmid = boot_cpu_data.x86_cache_max_rmid + 1; hw_res->mbm_width = MBM_CNTR_WIDTH_BASE; if (mbm_offset > 0 && mbm_offset <= MBM_CNTR_WIDTH_OFFSET_MAX) @@ -242,7 +242,7 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r) * * For a 35MB LLC and 56 RMIDs, this is ~1.8% of the LLC. */ - threshold = resctrl_rmid_realloc_limit / r->mon.num_rmid; + threshold = resctrl_rmid_realloc_limit / r->num_rmid; /* * Because num_rmid may not be a power of two, round the value diff --git a/drivers/platform/mpam/mpam_resctrl.c b/drivers/platform/mpam/mpam_resctrl.c index 242dcf69066e..55848a9c321c 100644 --- a/drivers/platform/mpam/mpam_resctrl.c +++ b/drivers/platform/mpam/mpam_resctrl.c @@ -875,7 +875,7 @@ static int mpam_resctrl_resource_init(struct mpam_resctrl_res *res) * For mpam, each control group has its own pmg/rmid * space. */ - r->mon.num_rmid = 1; + r->num_rmid = 1; } return 0; @@ -893,7 +893,7 @@ int mpam_resctrl_setup(void) for (i = 0; i < RDT_NUM_RESOURCES; i++) { res = &mpam_resctrl_exports[i]; INIT_LIST_HEAD(&res->resctrl_res.domains); - INIT_LIST_HEAD(&res->resctrl_res.mon.evt_list); + INIT_LIST_HEAD(&res->resctrl_res.evt_list); res->resctrl_res.rid = i; } diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h index d56e3200d1d7..6b378466d8a4 100644 --- a/fs/resctrl/internal.h +++ b/fs/resctrl/internal.h @@ -73,7 +73,7 @@ static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc) * @evtid: event id * @name: name of the event * @configurable: true if the event is configurable - * @list: entry in &rdt_resource->mon.evt_list + * @list: entry in &rdt_resource->evt_list */ struct mon_evt { enum resctrl_event_id evtid; diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c index 1ca50dfee517..62ae06aac20d 100644 --- a/fs/resctrl/monitor.c +++ b/fs/resctrl/monitor.c @@ -818,22 +818,22 @@ static struct mon_evt mbm_bps_event = { */ static void l3_mon_evt_init(struct rdt_resource *r) { - INIT_LIST_HEAD(&r->mon.evt_list); + INIT_LIST_HEAD(&r->evt_list); if (resctrl_arch_is_llc_occupancy_enabled()) - list_add_tail(&llc_occupancy_event.list, &r->mon.evt_list); + list_add_tail(&llc_occupancy_event.list, &r->evt_list); if (resctrl_arch_is_mbm_total_enabled()) - list_add_tail(&mbm_total_event.list, &r->mon.evt_list); + list_add_tail(&mbm_total_event.list, &r->evt_list); if (resctrl_arch_is_mbm_local_enabled()) - list_add_tail(&mbm_local_event.list, &r->mon.evt_list); + list_add_tail(&mbm_local_event.list, &r->evt_list); } static void mc_mon_evt_init(struct rdt_resource *r) { - INIT_LIST_HEAD(&r->mon.evt_list); + INIT_LIST_HEAD(&r->evt_list); if (resctrl_arch_is_mbm_bps_enabled()) - list_add_tail(&mbm_bps_event.list, &r->mon.evt_list); + list_add_tail(&mbm_bps_event.list, &r->evt_list); } int resctrl_mon_resource_init(void) diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index efdab19640c2..e2a8de53d639 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -1107,7 +1107,7 @@ static int rdt_num_rmids_show(struct kernfs_open_file *of, { struct rdt_resource *r = of->kn->parent->priv; - seq_printf(seq, "%d\n", r->mon.num_rmid); + seq_printf(seq, "%d\n", r->num_rmid); return 0; } @@ -1118,7 +1118,7 @@ static int rdt_mon_features_show(struct kernfs_open_file *of, struct rdt_resource *r = of->kn->parent->priv; struct mon_evt *mevt; - list_for_each_entry(mevt, &r->mon.evt_list, list) { + list_for_each_entry(mevt, &r->evt_list, list) { seq_printf(seq, "%s\n", mevt->name); if (mevt->configurable) seq_printf(seq, "%s_config\n", mevt->name); @@ -2921,14 +2921,14 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn, if (ret) goto out_destroy; - if (WARN_ON(list_empty(&r->mon.evt_list))) { + if (WARN_ON(list_empty(&r->evt_list))) { ret = -EPERM; goto out_destroy; } priv.u.rid = r->rid; priv.u.domid = d->id; - list_for_each_entry(mevt, &r->mon.evt_list, list) { + list_for_each_entry(mevt, &r->evt_list, list) { priv.u.evtid = mevt->evtid; ret = mon_addfile(kn, mevt->name, priv.priv); if (ret) diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 034546e97602..5d06695671e2 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -179,21 +179,12 @@ struct resctrl_membw { u32 *mb_map; }; -/** - * struct resctrl_mon - Monitoring related data - * @num_rmid: Number of RMIDs available - * @evt_list: List of monitoring events - */ -struct resctrl_mon { - int num_rmid; - struct list_head evt_list; -}; - /** * struct rdt_resource - attributes of a resctrl resource * @rid: The index of the resource * @alloc_capable: Is allocation available on this machine * @mon_capable: Is monitor feature available on this machine + * @num_rmid: Number of RMIDs available * @cache_level: Which cache level defines scope of this resource * @cache: Cache allocation related data * @membw: If the component has bandwidth controls, their properties. @@ -202,6 +193,7 @@ struct resctrl_mon { * @data_width: Character width of data when displaying * @default_ctrl: Specifies default cache cbm or memory B/W percent. * @format_str: Per resource format string to show domain value + * @evt_list: List of monitoring events * @fflags: flags to choose base and info files * @mbm_cfg_mask: Bandwidth sources that can be tracked when Bandwidth * Monitoring Event Configuration (BMEC) is supported. @@ -211,15 +203,16 @@ struct rdt_resource { int rid; bool alloc_capable; bool mon_capable; + int num_rmid; int cache_level; struct resctrl_cache cache; struct resctrl_membw membw; - struct resctrl_mon mon; struct list_head domains; char *name; int data_width; u32 default_ctrl; const char *format_str; + struct list_head evt_list; unsigned long fflags; unsigned int mbm_cfg_mask; bool cdp_capable; -- Gitee From 46ee021daf11fc535e016ecde2b81982ce3ecec3 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 21:36:30 -0800 Subject: [PATCH 031/158] anolis: Revert "x86/resctrl: Add ABMC feature in the command line options" ANBZ: #31045 This reverts commit f326bced03a1be6bd5a9db48eb5cebe486038eca. Signed-off-by: Jason Zeng --- Documentation/admin-guide/kernel-parameters.txt | 2 +- Documentation/arch/x86/resctrl.rst | 1 - arch/x86/kernel/cpu/resctrl/core.c | 2 -- 3 files changed, 1 insertion(+), 4 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index ae26c0b0f4fc..99d2596e0174 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5538,7 +5538,7 @@ rdt= [HW,X86,RDT] Turn on/off individual RDT features. List is: cmt, mbmtotal, mbmlocal, l3cat, l3cdp, l2cat, l2cdp, - mba, smba, bmec, abmc. + mba, smba, bmec. E.g. to turn on cmt and turn off mba use: rdt=cmt,!mba diff --git a/Documentation/arch/x86/resctrl.rst b/Documentation/arch/x86/resctrl.rst index d816ded93c22..a6279df64a9d 100644 --- a/Documentation/arch/x86/resctrl.rst +++ b/Documentation/arch/x86/resctrl.rst @@ -26,7 +26,6 @@ MBM (Memory Bandwidth Monitoring) "cqm_mbm_total", "cqm_mbm_local" MBA (Memory Bandwidth Allocation) "mba" SMBA (Slow Memory Bandwidth Allocation) "" BMEC (Bandwidth Monitoring Event Configuration) "" -ABMC (Assignable Bandwidth Monitoring Counters) "" =============================================== ================================ Historically, new features were made visible by default in /proc/cpuinfo. This diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 7b2b5ca496f8..49bea5e5c196 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -634,7 +634,6 @@ enum { RDT_FLAG_MBA, RDT_FLAG_SMBA, RDT_FLAG_BMEC, - RDT_FLAG_ABMC, }; #define RDT_OPT(idx, n, f) \ @@ -660,7 +659,6 @@ static struct rdt_options rdt_options[] __ro_after_init = { RDT_OPT(RDT_FLAG_MBA, "mba", X86_FEATURE_MBA), RDT_OPT(RDT_FLAG_SMBA, "smba", X86_FEATURE_SMBA), RDT_OPT(RDT_FLAG_BMEC, "bmec", X86_FEATURE_BMEC), - RDT_OPT(RDT_FLAG_ABMC, "abmc", X86_FEATURE_ABMC), }; #define NUM_RDT_OPTIONS ARRAY_SIZE(rdt_options) -- Gitee From 1b1cebc26222db09228b367959439502488074e4 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 21:45:07 -0800 Subject: [PATCH 032/158] anolis: Revert "x86/cpufeatures: Add support for Assignable Bandwidth Monitoring Counters (ABMC)" This reverts commit 126cd8f9f3cf01a9c72599cd2c0f84b174e7f313. ANBZ: #31045 Conflicts: arch/x86/include/asm/cpufeatures.h arch/x86/kernel/cpu/scattered.c [jz: resolve context conflict] Signed-off-by: Jason Zeng --- arch/x86/include/asm/cpufeatures.h | 1 - arch/x86/kernel/cpu/cpuid-deps.c | 3 --- arch/x86/kernel/cpu/scattered.c | 1 - 3 files changed, 5 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 9a12e209939b..fdac919c8cc5 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -511,7 +511,6 @@ #define X86_FEATURE_TSA_SQ_NO (21*32+11) /* "" AMD CPU not vulnerable to TSA-SQ */ #define X86_FEATURE_TSA_L1_NO (21*32+12) /* "" AMD CPU not vulnerable to TSA-L1 */ #define X86_FEATURE_CLEAR_CPU_BUF_VM (21*32+13) /* "" Clear CPU buffers using VERW before VMRUN */ -#define X86_FEATURE_ABMC (21*32+ 6) /* "" Assignable Bandwidth Monitoring Counters */ #define X86_FEATURE_AMD_FAST_CPPC (21*32 + 5) /* Fast CPPC */ #define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32 + 7) /* Workload Classification */ diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c index d0714eb74eca..7d8733874218 100644 --- a/arch/x86/kernel/cpu/cpuid-deps.c +++ b/arch/x86/kernel/cpu/cpuid-deps.c @@ -70,9 +70,6 @@ static const struct cpuid_dep cpuid_deps[] = { { X86_FEATURE_CQM_MBM_LOCAL, X86_FEATURE_CQM_LLC }, { X86_FEATURE_BMEC, X86_FEATURE_CQM_MBM_TOTAL }, { X86_FEATURE_BMEC, X86_FEATURE_CQM_MBM_LOCAL }, - { X86_FEATURE_ABMC, X86_FEATURE_CQM_MBM_TOTAL }, - { X86_FEATURE_ABMC, X86_FEATURE_CQM_MBM_LOCAL }, - { X86_FEATURE_ABMC, X86_FEATURE_BMEC }, { X86_FEATURE_AVX512_BF16, X86_FEATURE_AVX512VL }, { X86_FEATURE_AVX512_FP16, X86_FEATURE_AVX512BW }, { X86_FEATURE_ENQCMD, X86_FEATURE_XSAVES }, diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 509026d5c9cf..e1d8419f6e08 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -52,7 +52,6 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 }, { X86_FEATURE_TSA_SQ_NO, CPUID_ECX, 1, 0x80000021, 0 }, { X86_FEATURE_TSA_L1_NO, CPUID_ECX, 2, 0x80000021, 0 }, - { X86_FEATURE_ABMC, CPUID_EBX, 5, 0x80000020, 0 }, { X86_FEATURE_AMD_WORKLOAD_CLASS, CPUID_EAX, 22, 0x80000021, 0 }, { X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 }, { X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 }, -- Gitee From b7b778a3bc6d14bdb66ebcc3c479b5df71242031 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 22:54:20 -0800 Subject: [PATCH 033/158] anolis: Revert "anolis: x86/resctrl: Add mount option for memory bandwidth HWDRC" ANBZ: #31045 This reverts commit fcab6177eed5c4db63e594def5cf457959f93155. Signed-off-by: Jason Zeng --- arch/x86/include/asm/resctrl.h | 1 - arch/x86/kernel/cpu/resctrl/rdtgroup.c | 46 -------------------------- fs/resctrl/internal.h | 9 ----- fs/resctrl/rdtgroup.c | 24 -------------- include/linux/arm_mpam.h | 5 --- include/linux/resctrl.h | 2 -- 6 files changed, 87 deletions(-) diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h index 04be7e43f015..4665cb7cfb5e 100644 --- a/arch/x86/include/asm/resctrl.h +++ b/arch/x86/include/asm/resctrl.h @@ -227,7 +227,6 @@ bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level l); int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable); bool resctrl_arch_is_hwdrc_mb_capable(void); -int resctrl_arch_set_hwdrc_enabled(enum resctrl_res_level l, bool hwdrc_mb); #else diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 405f70c770d1..038107195f2e 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -290,52 +290,6 @@ bool resctrl_arch_is_hwdrc_mb_capable(void) return false; } -static void mba_enable(enum resctrl_res_level l) -{ - struct rdt_hw_resource *r_hw = &rdt_resources_all[l]; - struct rdt_resource *r = &r_hw->r_resctrl; - - r->alloc_capable = true; -} - -static void mba_disable(enum resctrl_res_level l) -{ - struct rdt_hw_resource *r_hw = &rdt_resources_all[l]; - struct rdt_resource *r = &r_hw->r_resctrl; - - r->alloc_capable = false; -} - -/* - * Currently memory bandwidth HWDRC feature is enabled or disabled by the user - * outside of the scope of the resctrl filesystem. When memory bandwidth HWDRC - * is enabled, it takes over MBA hooks in resctrl for memory bandwidth - * throttling. - * - * Set memory bandwidth HWDRC enabled in resctrl so that the user who enables - * memory bandwidth HWDRC can make sure that resctrl doesn't provide any hooks - * to control MBA. - * - * Set memory bandwidth HWDRC disabled in resctrl, MBA is enabled by default. - */ -int resctrl_arch_set_hwdrc_enabled(enum resctrl_res_level l, bool hwdrc_mb) -{ - struct rdt_resource *r = &rdt_resources_all[l].r_resctrl; - - if (!resctrl_arch_is_hwdrc_mb_capable() || hwdrc_mb == r->membw.hwdrc_mb) - return -EINVAL; - - /* MBA and memory bandwidth HWDRC features are mutually exclusive */ - if (hwdrc_mb) - mba_disable(l); - else - mba_enable(l); - - r->membw.hwdrc_mb = hwdrc_mb; - - return 0; -} - static int reset_all_ctrls(struct rdt_resource *r) { struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h index 6b378466d8a4..b8afeee36fff 100644 --- a/fs/resctrl/internal.h +++ b/fs/resctrl/internal.h @@ -58,7 +58,6 @@ struct rdt_fs_context { bool enable_cdpl3; bool enable_mba_mbps; bool enable_debug; - bool enable_hwdrc_mb; }; static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc) @@ -249,14 +248,6 @@ static inline bool is_mba_sc(struct rdt_resource *r) return r->membw.mba_sc; } -static inline bool is_hwdrc_enabled(struct rdt_resource *r) -{ - if (!r) - r = resctrl_arch_get_resource(RDT_RESOURCE_MBA); - - return r->membw.hwdrc_mb; -} - extern struct mutex rdtgroup_mutex; extern struct rdtgroup rdtgroup_default; extern struct dentry *debugfs_resctrl; diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index e2a8de53d639..643ea199c428 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -2362,7 +2362,6 @@ static void rdt_disable_ctx(void) resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, false); resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, false); set_mba_sc(false); - resctrl_arch_set_hwdrc_enabled(RDT_RESOURCE_MBA, false); resctrl_debug = false; } @@ -2383,19 +2382,6 @@ static int rdt_enable_ctx(struct rdt_fs_context *ctx) goto out_cdpl2; } - /* - * MBA and memory bandwidth HWDRC features are mutually exclusive. - * So mba_MBps and hwdrc_mb options could not be set at the same time. - */ - if (ctx->enable_hwdrc_mb && ctx->enable_mba_mbps) { - pr_debug("Option 'mba_MBps' and 'hwdrc_mb' are mutually exclusive\n"); - ret = -EINVAL; - goto out_cdpl3; - } - - if (!ret && ctx->enable_hwdrc_mb) - ret = resctrl_arch_set_hwdrc_enabled(RDT_RESOURCE_MBA, true); - if (ctx->enable_mba_mbps) { ret = set_mba_sc(true); if (ret) @@ -2624,7 +2610,6 @@ enum rdt_param { Opt_cdp, Opt_cdpl2, Opt_mba_mbps, - Opt_hwdrc_mb, Opt_debug, nr__rdt_params }; @@ -2633,7 +2618,6 @@ static const struct fs_parameter_spec rdt_fs_parameters[] = { fsparam_flag("cdp", Opt_cdp), fsparam_flag("cdpl2", Opt_cdpl2), fsparam_flag("mba_MBps", Opt_mba_mbps), - fsparam_flag("hwdrc_mb", Opt_hwdrc_mb), fsparam_flag("debug", Opt_debug), {} }; @@ -2660,11 +2644,6 @@ static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param) return -EINVAL; ctx->enable_mba_mbps = true; return 0; - case Opt_hwdrc_mb: - if (!resctrl_arch_is_hwdrc_mb_capable()) - return -EINVAL; - ctx->enable_hwdrc_mb = true; - return 0; case Opt_debug: ctx->enable_debug = true; return 0; @@ -3761,9 +3740,6 @@ static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf) if (is_mba_sc(resctrl_arch_get_resource(RDT_RESOURCE_MBA))) seq_puts(seq, ",mba_MBps"); - if (is_hwdrc_enabled(resctrl_arch_get_resource(RDT_RESOURCE_MBA))) - seq_puts(seq, ",hwdrc_mb"); - if (resctrl_debug) seq_puts(seq, ",debug"); diff --git a/include/linux/arm_mpam.h b/include/linux/arm_mpam.h index b3be84f24c72..370fa3f2a7c9 100644 --- a/include/linux/arm_mpam.h +++ b/include/linux/arm_mpam.h @@ -88,11 +88,6 @@ static inline bool resctrl_arch_is_hwdrc_mb_capable(void) return false; } -static inline int resctrl_arch_set_hwdrc_enabled(enum resctrl_res_level ignored, bool hwdrc_mb) -{ - return hwdrc_mb ? -EINVAL : 0; -} - /* reset cached configurations, then all devices */ void resctrl_arch_reset_resources(void); diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 5d06695671e2..b1ee49c5bc64 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -165,7 +165,6 @@ enum membw_throttle_mode { * @throttle_mode: Bandwidth throttling mode when threads request * different memory bandwidths * @mba_sc: True if MBA software controller(mba_sc) is enabled - * @hwdrc_mb: True if memory bandwidth HWDRC is enabled * @mb_map: Mapping of memory B/W percentage to memory B/W delay */ struct resctrl_membw { @@ -175,7 +174,6 @@ struct resctrl_membw { bool arch_needs_linear; enum membw_throttle_mode throttle_mode; bool mba_sc; - bool hwdrc_mb; u32 *mb_map; }; -- Gitee From 93a8b7cef6dcf8416da5c3767081ed521b772005 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 22:54:28 -0800 Subject: [PATCH 034/158] anolis: Revert "anolis: x86/resctrl: Workaround to detect if memory bandwidth HWDRC is capable" ANBZ: #31045 This reverts commit 5d9672616f1157fc2ce32f6f1cb379457f49e3e0. Signed-off-by: Jason Zeng --- arch/x86/include/asm/resctrl.h | 2 - arch/x86/kernel/cpu/resctrl/internal.h | 11 ---- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 79 -------------------------- include/linux/arm_mpam.h | 5 -- 4 files changed, 97 deletions(-) diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h index 4665cb7cfb5e..f159bddbec51 100644 --- a/arch/x86/include/asm/resctrl.h +++ b/arch/x86/include/asm/resctrl.h @@ -226,8 +226,6 @@ void resctrl_cpu_detect(struct cpuinfo_x86 *c); bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level l); int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable); -bool resctrl_arch_is_hwdrc_mb_capable(void); - #else static inline void resctrl_arch_sched_in(struct task_struct *tsk) {} diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 270953d9db4d..bf3538992667 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -10,16 +10,6 @@ #include #include -#include - -/* Memory bandwidth HWDRC */ -#define HWDRC_MSR_OS_MAILBOX_INTERFACE 0xb0 -#define HWDRC_MSR_OS_MAILBOX_DATA 0xb1 -#define HWDRC_MSR_OS_MAILBOX_BUSY_BIT BIT_ULL(31) -#define HWDRC_COMMAND_MEM_CLOS_EN 0xd0 -#define HWDRC_SUB_COMMAND_MEM_CLOS_EN 0x54 -#define HWDRC_MEMCLOS_AVAILABLE BIT_ULL(0) -#define HWDRC_OS_MAILBOX_RETRY_COUNT 30 #define L3_QOS_CDP_ENABLE 0x01ULL @@ -29,7 +19,6 @@ #define RMID_VAL_ERROR BIT_ULL(63) #define RMID_VAL_UNAVAIL BIT_ULL(62) - /* * With the above fields in use 62 bits remain in MSR_IA32_QM_CTR for * data to be returned. The counter width is discovered from the hardware diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 038107195f2e..fe3952514add 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -211,85 +211,6 @@ int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable) return 0; } -/* - * Workaround to detect if memory bandwidth HWDRC feature is capable. - * - * CPUID for memory bandwidth HWDRC feature is not exposed by H/W. - * Check presence of HWDRC OS mailbox MSRs. Read out the discovery bit of - * HWDRC OS mailbox data which indicates if the feature is capable. - */ -bool resctrl_arch_is_hwdrc_mb_capable(void) -{ - u32 retries; - u64 data; - int status; - - /* Only enable memory bandwidth HWDRC after ICELAKE server */ - if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || - boot_cpu_data.x86_model < INTEL_FAM6_ICELAKE_X) { - pr_debug("HWDRC: Not support until ICELAKE server\n"); - goto out; - } - - /* Check presence of mailbox MSRs */ - if (rdmsrl_safe(HWDRC_MSR_OS_MAILBOX_INTERFACE, &data)) { - pr_debug("HWDRC: Can't access OS mailbox interface MSR\n"); - goto out; - } - - if (rdmsrl_safe(HWDRC_MSR_OS_MAILBOX_DATA, &data)) { - pr_debug("HWDRC: Can't access OS mailbox data MSR\n"); - goto out; - } - - /* Poll for run_busy bit == 0 */ - status = -EBUSY; - retries = HWDRC_OS_MAILBOX_RETRY_COUNT; - do { - rdmsrl(HWDRC_MSR_OS_MAILBOX_INTERFACE, data); - if (!(data & HWDRC_MSR_OS_MAILBOX_BUSY_BIT)) { - status = 0; - break; - } - } while (--retries); - - if (status) - goto out; - - /* Write command register: 0x800054d0 */ - data = HWDRC_MSR_OS_MAILBOX_BUSY_BIT | - HWDRC_SUB_COMMAND_MEM_CLOS_EN << 8 | - HWDRC_COMMAND_MEM_CLOS_EN; - pr_debug("HWDRC: Write command register: 0x%llx\n", data); - if (wrmsrl_safe(HWDRC_MSR_OS_MAILBOX_INTERFACE, data)) { - pr_debug("HWDRC: Write command register 0x%llx failed!\n", data); - goto out; - } - - /* Poll for run_busy bit == 0 */ - retries = HWDRC_OS_MAILBOX_RETRY_COUNT; - do { - rdmsrl(HWDRC_MSR_OS_MAILBOX_INTERFACE, data); - if (!(data & HWDRC_MSR_OS_MAILBOX_BUSY_BIT)) { - rdmsrl(HWDRC_MSR_OS_MAILBOX_DATA, data); - pr_debug("HWDRC: Read MEM_CLOS_EN data: 0x%llx\n", data); - - /* Feature capability bit is set */ - if (data & HWDRC_MEMCLOS_AVAILABLE) { - pr_debug("HWDRC: Memory bandwidth HWDRC is capable\n"); - return true; - } - - /* Feature capability bit is not set */ - break; - } - } while (--retries); - -out: - pr_debug("HWDRC: Memory bandwidth HWDRC is not capable\n"); - return false; -} - static int reset_all_ctrls(struct rdt_resource *r) { struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); diff --git a/include/linux/arm_mpam.h b/include/linux/arm_mpam.h index 370fa3f2a7c9..660776491941 100644 --- a/include/linux/arm_mpam.h +++ b/include/linux/arm_mpam.h @@ -83,11 +83,6 @@ bool resctrl_arch_is_mbm_local_enabled(void); bool resctrl_arch_is_mbm_total_enabled(void); bool resctrl_arch_is_mbm_bps_enabled(void); -static inline bool resctrl_arch_is_hwdrc_mb_capable(void) -{ - return false; -} - /* reset cached configurations, then all devices */ void resctrl_arch_reset_resources(void); -- Gitee From d9bce38530dc06e3e4124d73489e3c8ddf3f21aa Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 22:54:58 -0800 Subject: [PATCH 035/158] anolis: Revert "anolis: resctrl: fix UAF when domain offline" ANBZ: #31045 This reverts commit f6c5c597bd5049d769f00d5db667d77f23a3618c. Signed-off-by: Jason Zeng --- arch/x86/kernel/cpu/resctrl/core.c | 5 ----- drivers/platform/mpam/mpam_resctrl.c | 7 ------- fs/resctrl/monitor.c | 13 ------------- include/linux/resctrl.h | 1 - 4 files changed, 26 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 49bea5e5c196..51389ebc0f19 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -497,7 +497,6 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r) struct rdt_domain *d; int err; - BUG_ON(id > NR_CPUS); lockdep_assert_held(&domain_list_lock); d = rdt_find_domain(r, id, &add_pos); @@ -519,8 +518,6 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r) d = &hw_dom->d_resctrl; d->id = id; - r->rdt_domain_list[id] = d; - cpumask_set_cpu(cpu, &d->cpu_mask); rdt_domain_reconfigure_cdp(r); @@ -551,7 +548,6 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r) struct rdt_hw_domain *hw_dom; struct rdt_domain *d; - BUG_ON(id > NR_CPUS); lockdep_assert_held(&domain_list_lock); d = rdt_find_domain(r, id, NULL); @@ -573,7 +569,6 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r) */ if (d->plr) d->plr->d = NULL; - r->rdt_domain_list[id] = NULL; domain_free(hw_dom); return; diff --git a/drivers/platform/mpam/mpam_resctrl.c b/drivers/platform/mpam/mpam_resctrl.c index 55848a9c321c..98b3b1baa91e 100644 --- a/drivers/platform/mpam/mpam_resctrl.c +++ b/drivers/platform/mpam/mpam_resctrl.c @@ -1197,11 +1197,9 @@ int mpam_resctrl_online_cpu(unsigned int cpu) int i, err; struct mpam_resctrl_dom *dom; struct mpam_resctrl_res *res; - struct rdt_resource *r; for (i = 0; i < RDT_NUM_RESOURCES; i++) { res = &mpam_resctrl_exports[i]; - r = &res->resctrl_res; if (!res->class) continue; // dummy_resource; @@ -1215,7 +1213,6 @@ int mpam_resctrl_online_cpu(unsigned int cpu) dom = mpam_resctrl_alloc_domain(cpu, res); if (IS_ERR(dom)) return PTR_ERR(dom); - r->rdt_domain_list[i] = &dom->resctrl_dom; err = resctrl_online_domain(&res->resctrl_res, &dom->resctrl_dom); if (err) return err; @@ -1231,13 +1228,11 @@ int mpam_resctrl_offline_cpu(unsigned int cpu) struct rdt_domain *d; struct mpam_resctrl_res *res; struct mpam_resctrl_dom *dom; - struct rdt_resource *r; resctrl_offline_cpu(cpu); for (i = 0; i < RDT_NUM_RESOURCES; i++) { res = &mpam_resctrl_exports[i]; - r = &res->resctrl_res; if (!res->class) continue; // dummy resource @@ -1256,8 +1251,6 @@ int mpam_resctrl_offline_cpu(unsigned int cpu) resctrl_offline_domain(&res->resctrl_res, &dom->resctrl_dom); list_del(&d->list); - - r->rdt_domain_list[i] = NULL; kfree(dom); } diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c index 62ae06aac20d..ea4183cc48ad 100644 --- a/fs/resctrl/monitor.c +++ b/fs/resctrl/monitor.c @@ -626,16 +626,6 @@ void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms, schedule_delayed_work_on(cpu, &dom->cqm_limbo, delay); } -bool is_rdt_domain_valid(struct rdt_resource *r, struct rdt_domain *d) -{ - int i; - - for (i = 0; i < NR_CPUS; i++) - if (r->rdt_domain_list[i] == d) - return true; - return false; -} - void mbm_handle_overflow(struct work_struct *work) { unsigned long delay = msecs_to_jiffies(MBM_OVERFLOW_INTERVAL); @@ -657,9 +647,6 @@ void mbm_handle_overflow(struct work_struct *work) r = resctrl_arch_get_resource(RDT_RESOURCE_L3); d = container_of(work, struct rdt_domain, mbm_over.work); - if (!is_rdt_domain_valid(r, d)) - goto out_unlock; - list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) { mbm_update(r, d, prgrp->closid, prgrp->mon.rmid); diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index b1ee49c5bc64..dc80ddab26cf 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -214,7 +214,6 @@ struct rdt_resource { unsigned long fflags; unsigned int mbm_cfg_mask; bool cdp_capable; - struct rdt_domain *rdt_domain_list[NR_CPUS]; }; /* -- Gitee From d088b63660aed5e56f4e0475b8005f3a5dff4560 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 22:55:24 -0800 Subject: [PATCH 036/158] anolis: Revert "anolis: aarch64: MPAM: Disable MPAM function by default" ANBZ: #31045 This reverts commit 68bf068860aa1761b39cae2a4f6e7729ee7de950. Signed-off-by: Jason Zeng --- drivers/platform/mpam/mpam_devices.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/drivers/platform/mpam/mpam_devices.c b/drivers/platform/mpam/mpam_devices.c index c7dfc1e17ed6..0134263c88af 100644 --- a/drivers/platform/mpam/mpam_devices.c +++ b/drivers/platform/mpam/mpam_devices.c @@ -2460,21 +2460,8 @@ static int __init arm64_mpam_register_cpus(void) return mpam_register_requestor(partid_max, pmg_max); } -static int mpam_on; - -static int __init mpam_on_setup(char *str) -{ - mpam_on = 1; - pr_info("MPAM is supported now"); - return 1; -} -__setup("mpam_on", mpam_on_setup); - static int __init mpam_msc_driver_init(void) { - if (!mpam_on) - return 0; - bool mpam_not_available = false; int err; -- Gitee From b60163c3588791823cb774851c3ce41c25c5ff7e Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:03:20 -0800 Subject: [PATCH 037/158] anolis: Revert "x86/resctrl: Don't try to free nonexistent RMIDs" ANBZ: #31045 This reverts commit 209b480199f0caf5d26c8666fd654c7232d540a2. Signed-off-by: Jason Zeng --- fs/resctrl/monitor.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c index ea4183cc48ad..51baa0f71b65 100644 --- a/fs/resctrl/monitor.c +++ b/fs/resctrl/monitor.c @@ -312,8 +312,7 @@ void free_rmid(u32 closid, u32 rmid) * allows architectures that ignore the closid parameter to avoid an * unnecessary check. */ - if (!resctrl_arch_mon_capable() || - idx == resctrl_arch_rmid_idx_encode(RESCTRL_RESERVED_CLOSID, + if (idx == resctrl_arch_rmid_idx_encode(RESCTRL_RESERVED_CLOSID, RESCTRL_RESERVED_RMID)) return; -- Gitee From 4f9fc2f01483ba355c7d797df9e3bc8ec058bb38 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:03:26 -0800 Subject: [PATCH 038/158] anolis: Revert "x86/resctrl: Fix uninitialized memory read when last CPU of domain goes offline" ANBZ: #31045 This reverts commit ba1377ba3b97ce7b4adcf09e673c1ac08da7f543. Signed-off-by: Jason Zeng --- fs/resctrl/internal.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h index b8afeee36fff..f73267762a87 100644 --- a/fs/resctrl/internal.h +++ b/fs/resctrl/internal.h @@ -33,8 +33,7 @@ cpumask_any_housekeeping(const struct cpumask *mask, int exclude_cpu) else cpu = cpumask_any_but(mask, exclude_cpu); - /* Only continue if tick_nohz_full_mask has been initialized. */ - if (!tick_nohz_full_enabled()) + if (!IS_ENABLED(CONFIG_NO_HZ_FULL)) return cpu; /* If the CPU picked isn't marked nohz_full nothing more needs doing. */ -- Gitee From de931b6f94e5d1a1fd0254d83f285aee252c3119 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:04:05 -0800 Subject: [PATCH 039/158] anolis: Revert "anolis: arm_mpam: Fix the ris field parsing typo in error irq handler" ANBZ: #31045 This reverts commit 5aaebd3dc29c55eac8365e61d6bff0b8fc40d91d. Signed-off-by: Jason Zeng --- drivers/platform/mpam/mpam_devices.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/mpam/mpam_devices.c b/drivers/platform/mpam/mpam_devices.c index 0134263c88af..906f8a6b6940 100644 --- a/drivers/platform/mpam/mpam_devices.c +++ b/drivers/platform/mpam/mpam_devices.c @@ -2055,7 +2055,7 @@ static irqreturn_t __mpam_irq_handler(int irq, struct mpam_msc *msc) partid = FIELD_GET(MPAMF_ESR_PARTID_OR_MON, reg); pmg = FIELD_GET(MPAMF_ESR_PMG, reg); - ris = FIELD_GET(MPAMF_ESR_RIS, reg); + ris = FIELD_GET(MPAMF_ESR_PMG, reg); pr_err("error irq from msc:%u '%s', partid:%u, pmg: %u, ris: %u\n", msc->id, mpam_errcode_names[errcode], partid, pmg, ris); -- Gitee From 6f39da597aa78f6966374e40aa96c8e450364cbc Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:04:11 -0800 Subject: [PATCH 040/158] anolis: Revert "anolis: fs/resctrl: Cancel the delayed checking works when resctrl is umounted" ANBZ: #31045 This reverts commit c6d8096ec77eb2a84eb8341490b3e687856f9cd1. Signed-off-by: Jason Zeng --- fs/resctrl/rdtgroup.c | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index 643ea199c428..ea969ddb1a9d 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -2792,9 +2792,6 @@ static void rmdir_all_sub(void) static void rdt_kill_sb(struct super_block *sb) { - struct rdt_resource *l3 = resctrl_arch_get_resource(RDT_RESOURCE_L3); - struct rdt_domain *d; - cpus_read_lock(); mutex_lock(&rdtgroup_mutex); @@ -2804,20 +2801,6 @@ static void rdt_kill_sb(struct super_block *sb) resctrl_arch_reset_resources(); rmdir_all_sub(); - - /* - * When resctrl is umounted, forcefully cancel delayed works since the - * new mount option may be changed. - */ - list_for_each_entry(d, &l3->domains, list) { - if (resctrl_is_mbm_enabled()) - cancel_delayed_work(&d->mbm_over); - if (resctrl_arch_is_llc_occupancy_enabled() && has_busy_rmid(d)) { - __check_limbo(d, true); - cancel_delayed_work(&d->cqm_limbo); - } - } - rdt_pseudo_lock_release(); rdtgroup_default.mode = RDT_MODE_SHAREABLE; schemata_list_destroy(); -- Gitee From 7990553ecf8d3690e3a356666c1d1340780cf51e Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:04:18 -0800 Subject: [PATCH 041/158] anolis: Revert "anolis: fs/resctrl: Remove mbm_Bps features checking from resctrl_is_mbm_{enabled,event}" ANBZ: #31045 This reverts commit 327fc4be6787d9f29b3b185d1bc29c9fdd9d2131. Signed-off-by: Jason Zeng --- fs/resctrl/rdtgroup.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index ea969ddb1a9d..e39f22453d84 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -113,13 +113,14 @@ void rdt_staged_configs_clear(void) static bool resctrl_is_mbm_enabled(void) { return (resctrl_arch_is_mbm_total_enabled() || - resctrl_arch_is_mbm_local_enabled()); + resctrl_arch_is_mbm_local_enabled() || + resctrl_arch_is_mbm_bps_enabled()); } static bool resctrl_is_mbm_event(int e) { return (e >= QOS_L3_MBM_TOTAL_EVENT_ID && - e <= QOS_L3_MBM_LOCAL_EVENT_ID); + e <= QOS_MC_MBM_BPS_EVENT_ID); } /* -- Gitee From 5defe765859667e9366769b2009d084f4ad414d7 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:29:42 -0800 Subject: [PATCH 042/158] anolis: Revert "anolis: configs: arm64: Enable ARM64_MPAM" ANBZ: #31045 This reverts 744373f990ebbb8d3b936c5d53457c76f27fc7fc Signed-off-by: Jason Zeng --- anolis/configs/L0-MANDATORY/arm64/CONFIG_ARM64_MPAM | 1 - 1 file changed, 1 deletion(-) delete mode 100644 anolis/configs/L0-MANDATORY/arm64/CONFIG_ARM64_MPAM diff --git a/anolis/configs/L0-MANDATORY/arm64/CONFIG_ARM64_MPAM b/anolis/configs/L0-MANDATORY/arm64/CONFIG_ARM64_MPAM deleted file mode 100644 index 45957b7b4ea2..000000000000 --- a/anolis/configs/L0-MANDATORY/arm64/CONFIG_ARM64_MPAM +++ /dev/null @@ -1 +0,0 @@ -CONFIG_ARM64_MPAM=y -- Gitee From 03ec55d8bfb3396715d1722c56cd9a335393c4c1 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:31:33 -0800 Subject: [PATCH 043/158] anolis: Revert "anolis: KVM: arm64: Only access MPAM registers when MPAM is enabled" ANBZ: #31045 This reverts commit 3161da4713cdf31acc76f93698c39b734aef2382. Signed-off-by: Jason Zeng --- arch/arm64/kernel/image-vars.h | 1 - arch/arm64/kvm/hyp/include/hyp/switch.h | 4 ++-- arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h | 6 ++---- arch/arm64/kvm/hyp/nvhe/switch.c | 3 +-- 4 files changed, 5 insertions(+), 9 deletions(-) diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 6999668e9ecf..d10d3fed31d9 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -67,7 +67,6 @@ KVM_NVHE_ALIAS(__hyp_stub_vectors); /* Additional static keys for cpufeatures */ #ifdef CONFIG_ARM64_MPAM KVM_NVHE_ALIAS(arm64_mpam_has_hcr); -KVM_NVHE_ALIAS(mpam_enabled); #endif /* Static keys which are set if a vGIC trap should be handled in hyp. */ diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index d274244e1b44..2679527eb2f3 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -177,7 +177,7 @@ static inline void __activate_traps_mpam(struct kvm_vcpu *vcpu) { u64 r = MPAM_SYSREG_TRAP_MPAM0_EL1 | MPAM_SYSREG_TRAP_MPAM1_EL1; - if (!mpam_cpus_have_feature() || !static_branch_likely(&mpam_enabled)) + if (!mpam_cpus_have_feature()) return; /* trap guest access to MPAMIDR_EL1 */ @@ -193,7 +193,7 @@ static inline void __activate_traps_mpam(struct kvm_vcpu *vcpu) static inline void __deactivate_traps_mpam(void) { - if (!mpam_cpus_have_feature() || !static_branch_likely(&mpam_enabled)) + if (!mpam_cpus_have_feature()) return; write_sysreg_s(0, SYS_MPAM2_EL2); diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h index 8e99f66b377b..c8767abd693e 100644 --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h @@ -250,8 +250,7 @@ static inline void __sysreg32_restore_state(struct kvm_vcpu *vcpu) */ static inline void __mpam_guest_load(void) { - if (IS_ENABLED(CONFIG_ARM64_MPAM) && mpam_cpus_have_feature() && - static_branch_likely(&mpam_enabled)) + if (IS_ENABLED(CONFIG_ARM64_MPAM) && mpam_cpus_have_feature()) write_sysreg_el1(read_sysreg_s(SYS_MPAM0_EL1), SYS_MPAM1); } @@ -265,8 +264,7 @@ static inline void __mpam_guest_put(void) u64 val, mask = MPAM_SYSREG_PMG_D | MPAM_SYSREG_PMG_I | MPAM_SYSREG_PARTID_D | MPAM_SYSREG_PARTID_I; - if (IS_ENABLED(CONFIG_ARM64_MPAM) && mpam_cpus_have_feature() && - static_branch_likely(&mpam_enabled)) { + if (IS_ENABLED(CONFIG_ARM64_MPAM) && mpam_cpus_have_feature()) { val = FIELD_GET(mask, read_sysreg_s(SYS_MPAM2_EL2)); write_sysreg_el1(val, SYS_MPAM1); } diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 024a7871892f..3ff46a88f1f4 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -285,8 +285,7 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) /* Use the host thread's partid and pmg for world switch */ static void __mpam_copy_el1_to_el2(void) { - if (IS_ENABLED(CONFIG_ARM64_MPAM) && mpam_cpus_have_feature() && - static_branch_likely(&mpam_enabled)) + if (IS_ENABLED(CONFIG_ARM64_MPAM) && mpam_cpus_have_feature()) write_sysreg_s(read_sysreg_s(SYS_MPAM1_EL1), SYS_MPAM2_EL2); } -- Gitee From 446aae83b70a002bb43298ce9f83074041be1e88 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:34:46 -0800 Subject: [PATCH 044/158] anolis: Revert "anolis: arm_mpam: Fix kernel boot failure when the firmware does not support MPAM" ANBZ: #31045 This reverts commit 9686c02c71809bd76e5acc7c17db979037d48ef9. Signed-off-by: Jason Zeng --- arch/arm64/include/asm/cpu.h | 1 + arch/arm64/include/asm/cpufeature.h | 6 +++ arch/arm64/kernel/cpufeature.c | 62 +++++++++++++++++++++++++++- arch/arm64/kernel/cpuinfo.c | 4 ++ arch/arm64/kernel/mpam.c | 10 +++++ drivers/platform/mpam/mpam_devices.c | 40 +++++------------- 6 files changed, 92 insertions(+), 31 deletions(-) diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h index 6344fc126262..33328b1d4a39 100644 --- a/arch/arm64/include/asm/cpu.h +++ b/arch/arm64/include/asm/cpu.h @@ -47,6 +47,7 @@ struct cpuinfo_arm64 { u64 reg_revidr; u64 reg_gmid; u64 reg_smidr; + u64 reg_mpamidr; u64 reg_id_aa64dfr0; u64 reg_id_aa64dfr1; diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 385a6c162c32..4e980563ad95 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -855,6 +855,12 @@ static inline bool system_supports_gcs(void) alternative_has_cap_unlikely(ARM64_HAS_GCS); } +static inline bool cpus_support_mpam(void) +{ + return IS_ENABLED(CONFIG_ARM64_MPAM) && + cpus_have_final_cap(ARM64_MPAM); +} + int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt); bool try_emulate_mrs(struct pt_regs *regs, u32 isn); diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 6b50132aed89..62eceb2cf5d0 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -84,6 +84,7 @@ #include #include #include +#include #include #include #include @@ -665,6 +666,18 @@ static const struct arm64_ftr_bits ftr_smcr[] = { ARM64_FTR_END, }; +static const struct arm64_ftr_bits ftr_mpamidr[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, + MPAMIDR_PMG_MAX_SHIFT, MPAMIDR_PMG_MAX_LEN, 0), /* PMG_MAX */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, + MPAMIDR_VPMR_MAX_SHIFT, MPAMIDR_VPMR_MAX_LEN, 0), /* VPMR_MAX */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, + MPAMIDR_HAS_HCR_SHIFT, 1, 0), /* HAS_HCR */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, + MPAMIDR_PARTID_MAX_SHIFT, MPAMIDR_PARTID_MAX_LEN, 0), /* PARTID_MAX */ + ARM64_FTR_END, +}; + /* * Common ftr bits for a 32bit register with all hidden, strict * attributes, with 4bit feature fields and a default safe value of @@ -784,6 +797,9 @@ static const struct __ftr_reg_entry { ARM64_FTR_REG(SYS_ZCR_EL1, ftr_zcr), ARM64_FTR_REG(SYS_SMCR_EL1, ftr_smcr), + /* Op1 = 0, CRn = 10, CRm = 4 */ + ARM64_FTR_REG(SYS_MPAMIDR_EL1, ftr_mpamidr), + /* Op1 = 1, CRn = 0, CRm = 0 */ ARM64_FTR_REG(SYS_GMID_EL1, ftr_gmid), @@ -1108,6 +1124,9 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) vec_init_vq_map(ARM64_VEC_SME); } + if (id_aa64pfr0_mpam(info->reg_id_aa64pfr0)) + init_cpu_ftr_reg(SYS_MPAMIDR_EL1, info->reg_mpamidr); + if (id_aa64pfr1_mte(info->reg_id_aa64pfr1)) init_cpu_ftr_reg(SYS_GMID_EL1, info->reg_gmid); @@ -1374,6 +1393,11 @@ void update_cpu_features(int cpu, vec_update_vq_map(ARM64_VEC_SME); } + if (id_aa64pfr0_mpam(info->reg_id_aa64pfr0)) { + taint |= check_update_ftr_reg(SYS_MPAMIDR_EL1, cpu, + info->reg_mpamidr, boot->reg_mpamidr); + } + /* * The kernel uses the LDGM/STGM instructions and the number of tags * they read/write depends on the GMID_EL1.BS field. Check that the @@ -2326,6 +2350,39 @@ cpucap_panic_on_conflict(const struct arm64_cpu_capabilities *cap) return !!(cap->type & ARM64_CPUCAP_PANIC_ON_CONFLICT); } +static bool __maybe_unused +test_has_mpam(const struct arm64_cpu_capabilities *entry, int scope) +{ + if (!has_cpuid_feature(entry, scope)) + return false; + + /* Check firmware actually enabled MPAM on this cpu. */ + return (read_sysreg_s(SYS_MPAM1_EL1) & MPAM_SYSREG_EN); +} + +static void __maybe_unused +cpu_enable_mpam(const struct arm64_cpu_capabilities *entry) +{ + /* + * Access by the kernel (at EL1) should use the reserved PARTID + * which is configured unrestricted. This avoids priority-inversion + * where latency sensitive tasks have to wait for a task that has + * been throttled to release the lock. + */ + write_sysreg_s(0, SYS_MPAM1_EL1); +} + +static void mpam_extra_caps(void) +{ + u64 idr = read_sanitised_ftr_reg(SYS_MPAMIDR_EL1); + + if (!IS_ENABLED(CONFIG_ARM64_MPAM)) + return; + + if (idr & MPAMIDR_HAS_HCR) + __enable_mpam_hcr(); +} + static const struct arm64_cpu_capabilities arm64_features[] = { { .capability = ARM64_ALWAYS_BOOT, @@ -2819,7 +2876,8 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .desc = "Memory Partitioning And Monitoring", .type = ARM64_CPUCAP_SYSTEM_FEATURE, .capability = ARM64_MPAM, - .matches = has_cpuid_feature, + .matches = test_has_mpam, + .cpu_enable = cpu_enable_mpam, ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, MPAM, 1) }, #endif @@ -3537,6 +3595,8 @@ void __init setup_cpu_features(void) if (!cwg) pr_warn("No Cache Writeback Granule information, assuming %d\n", ARCH_DMA_MINALIGN); + + mpam_extra_caps(); } static int enable_mismatched_32bit_el0(unsigned int cpu) diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index d9d5ba68b1d7..8a41d13f97d1 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -504,6 +504,10 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) __cpuinfo_store_cpu_32bit(&info->aarch32); + if (IS_ENABLED(CONFIG_ARM64_MPAM) && + id_aa64pfr0_mpam(info->reg_id_aa64pfr0)) + info->reg_mpamidr = read_cpuid(MPAMIDR_EL1); + cpuinfo_detect_icache_policy(info); } diff --git a/arch/arm64/kernel/mpam.c b/arch/arm64/kernel/mpam.c index 190a0aa4f9c0..0801dc58a189 100644 --- a/arch/arm64/kernel/mpam.c +++ b/arch/arm64/kernel/mpam.c @@ -13,3 +13,13 @@ DEFINE_STATIC_KEY_FALSE(mpam_enabled); EXPORT_SYMBOL_FOR_KVM(mpam_enabled); DEFINE_PER_CPU(u64, arm64_mpam_default); DEFINE_PER_CPU(u64, arm64_mpam_current); + +static int __init arm64_mpam_register_cpus(void) +{ + u64 mpamidr = read_sanitised_ftr_reg(SYS_MPAMIDR_EL1); + u16 partid_max = FIELD_GET(MPAMIDR_PARTID_MAX, mpamidr); + u8 pmg_max = FIELD_GET(MPAMIDR_PMG_MAX, mpamidr); + + return mpam_register_requestor(partid_max, pmg_max); +} +arch_initcall(arm64_mpam_register_cpus) diff --git a/drivers/platform/mpam/mpam_devices.c b/drivers/platform/mpam/mpam_devices.c index 906f8a6b6940..e1a1dee8578c 100644 --- a/drivers/platform/mpam/mpam_devices.c +++ b/drivers/platform/mpam/mpam_devices.c @@ -2451,19 +2451,9 @@ static void mpam_dt_create_foundling_msc(void) } } -static int __init arm64_mpam_register_cpus(void) -{ - u64 mpamidr = read_sysreg_s(SYS_MPAMIDR_EL1); - u16 partid_max = FIELD_GET(MPAMIDR_PARTID_MAX, mpamidr); - u8 pmg_max = FIELD_GET(MPAMIDR_PMG_MAX, mpamidr); - - return mpam_register_requestor(partid_max, pmg_max); -} - static int __init mpam_msc_driver_init(void) { bool mpam_not_available = false; - int err; if (!mpam_cpus_have_feature()) return -EOPNOTSUPP; @@ -2475,26 +2465,6 @@ static int __init mpam_msc_driver_init(void) else mpam_current_machine = mpam_dt_get_machine_type(); - if (!acpi_disabled) - fw_num_msc = acpi_mpam_count_msc(); - else - fw_num_msc = mpam_dt_count_msc(); - - if (fw_num_msc <= 0) { - pr_err("No MSC devices found in firmware\n"); - return -EINVAL; - } - - /* - * Access MPAM system registers after MPAM ACPI table is parsed, since - * some BIOSs disable MPAM system registers accessing but export MPAM in - * ID_AA64PFR0_EL1. So we can only rely on the MPAM ACPI table to - * determine whether MPAM feature is enabled. - */ - err = arm64_mpam_register_cpus(); - if (err) - return err; - /* * If the MPAM CPU interface is not implemented, or reserved by * firmware, there is no point touching the rest of the hardware. @@ -2507,6 +2477,16 @@ static int __init mpam_msc_driver_init(void) if (mpam_not_available) return 0; + if (!acpi_disabled) + fw_num_msc = acpi_mpam_count_msc(); + else + fw_num_msc = mpam_dt_count_msc(); + + if (fw_num_msc <= 0) { + pr_err("No MSC devices found in firmware\n"); + return -EINVAL; + } + if (acpi_disabled) mpam_dt_create_foundling_msc(); -- Gitee From 36da63f438ace8de466a2759091e99fdc21030f8 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:34:49 -0800 Subject: [PATCH 045/158] anolis: Revert "anolis: arm_mpam: Maximize Yitian710's MB monitoring window width" ANBZ: #31045 This reverts commit d91e1cbc9db5ca88216b8e5fcbe972aeb3ee6468. Signed-off-by: Jason Zeng --- drivers/platform/mpam/mpam_devices.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/platform/mpam/mpam_devices.c b/drivers/platform/mpam/mpam_devices.c index e1a1dee8578c..0827f2c61f91 100644 --- a/drivers/platform/mpam/mpam_devices.c +++ b/drivers/platform/mpam/mpam_devices.c @@ -1220,7 +1220,6 @@ static void mpam_reprogram_ris_partid(struct mpam_msc_ris *ris, u16 partid, struct mpam_props *rprops = &ris->props; u16 dspri = GENMASK(rprops->dspri_wd, 0); u16 intpri = GENMASK(rprops->intpri_wd, 0); - u32 custom_reg_base_addr; spin_lock(&msc->part_sel_lock); __mpam_part_sel(ris->ris_idx, partid, msc); @@ -1277,15 +1276,6 @@ static void mpam_reprogram_ris_partid(struct mpam_msc_ris *ris, u16 partid, mpam_write_partsel_reg(msc, PRI, pri_val); } - if (FIELD_GET(MPAMF_IDR_HAS_IMPL_IDR, ris->idr)) { - if (mpam_current_machine == MPAM_YITIAN710) { - custom_reg_base_addr = __mpam_read_reg(msc, MPAMF_IMPL_IDR); - __mpam_write_reg(msc, custom_reg_base_addr + - MPAMF_CUST_WINDW_OFFSET, - MBWU_WINWD_MAX); - } - } - spin_unlock(&msc->part_sel_lock); } -- Gitee From ac9984f2362d42d6a97c1e894eca99c2fac05536 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:34:51 -0800 Subject: [PATCH 046/158] anolis: Revert "anolis: fs/resctrl: Add a new resctrl monitoring event to get MB in Bps" ANBZ: #31045 This reverts commit c8ca4112705d713c83ebdcc7a5aa2739f4281a13. Signed-off-by: Jason Zeng --- arch/x86/include/asm/resctrl.h | 5 ----- arch/x86/kernel/cpu/resctrl/monitor.c | 2 -- drivers/platform/mpam/mpam_resctrl.c | 29 --------------------------- fs/resctrl/monitor.c | 16 --------------- fs/resctrl/rdtgroup.c | 5 ++--- include/linux/arm_mpam.h | 1 - include/linux/resctrl_types.h | 3 --- 7 files changed, 2 insertions(+), 59 deletions(-) diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h index f159bddbec51..746431c66fc4 100644 --- a/arch/x86/include/asm/resctrl.h +++ b/arch/x86/include/asm/resctrl.h @@ -99,11 +99,6 @@ static inline bool resctrl_arch_is_mbm_local_enabled(void) return (rdt_mon_features & (1 << QOS_L3_MBM_LOCAL_EVENT_ID)); } -static inline bool resctrl_arch_is_mbm_bps_enabled(void) -{ - return false; -} - /* * __resctrl_sched_in() - Writes the task's CLOSid/RMID to IA32_PQR_MSR * diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index e0cc1b499279..02fb9d87479a 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -134,8 +134,6 @@ static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_domain *hw_dom, return &hw_dom->arch_mbm_total[rmid]; case QOS_L3_MBM_LOCAL_EVENT_ID: return &hw_dom->arch_mbm_local[rmid]; - default: - break; } /* Never expect to get here */ diff --git a/drivers/platform/mpam/mpam_resctrl.c b/drivers/platform/mpam/mpam_resctrl.c index 98b3b1baa91e..25cf64386b64 100644 --- a/drivers/platform/mpam/mpam_resctrl.c +++ b/drivers/platform/mpam/mpam_resctrl.c @@ -35,7 +35,6 @@ static bool exposed_alloc_capable; static bool exposed_mon_capable; static struct mpam_class *mbm_local_class; static struct mpam_class *mbm_total_class; -static struct mpam_class *mbm_bps_class; /* * MPAM emulates CDP by setting different PARTID in the I/D fields of MPAM1_EL1. @@ -80,11 +79,6 @@ bool resctrl_arch_is_mbm_total_enabled(void) return mbm_total_class; } -bool resctrl_arch_is_mbm_bps_enabled(void) -{ - return mbm_bps_class; -} - bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level rid) { switch (rid) { @@ -278,10 +272,6 @@ static void *resctrl_arch_mon_ctx_alloc_no_wait(struct rdt_resource *r, case QOS_L3_MBM_LOCAL_EVENT_ID: case QOS_L3_MBM_TOTAL_EVENT_ID: return mon_is_rmid_idx; - case QOS_MC_MBM_BPS_EVENT_ID: - if (mpam_current_machine == MPAM_YITIAN710) - return mon_is_rmid_idx; - return ERR_PTR(-EOPNOTSUPP); } return ERR_PTR(-EOPNOTSUPP); @@ -326,7 +316,6 @@ void resctrl_arch_mon_ctx_free(struct rdt_resource *r, int evtid, return; case QOS_L3_MBM_TOTAL_EVENT_ID: case QOS_L3_MBM_LOCAL_EVENT_ID: - case QOS_MC_MBM_BPS_EVENT_ID: return; } } @@ -366,10 +355,6 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d, case QOS_L3_MBM_TOTAL_EVENT_ID: type = mpam_feat_msmon_mbwu; break; - case QOS_MC_MBM_BPS_EVENT_ID: - if (mpam_current_machine == MPAM_YITIAN710) - type = mpam_feat_impl_msmon_mbwu; - break; default: return -EINVAL; } @@ -502,16 +487,6 @@ static bool class_has_usable_mbwu(struct mpam_class *class) return (mpam_partid_max > 1) || (mpam_pmg_max != 0); } -static bool class_has_usable_impl_mbwu(struct mpam_class *class) -{ - struct mpam_props *cprops = &class->props; - - if (!mpam_has_feature(mpam_feat_impl_msmon_mbwu, cprops)) - return false; - - return true; -} - static bool mba_class_use_mbw_part(struct mpam_props *cprops) { /* TODO: Scaling is not yet supported */ @@ -857,10 +832,6 @@ static int mpam_resctrl_resource_init(struct mpam_resctrl_res *res) if (has_mbwu && class->type == MPAM_CLASS_MEMORY) { mbm_total_class = class; r->mon_capable = true; - } else if (class_has_usable_impl_mbwu(class)) { - r->mon_capable = true; - if (mpam_current_machine == MPAM_YITIAN710) - mbm_bps_class = class; } } diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c index 51baa0f71b65..06f660dfd929 100644 --- a/fs/resctrl/monitor.c +++ b/fs/resctrl/monitor.c @@ -790,11 +790,6 @@ static struct mon_evt mbm_local_event = { .evtid = QOS_L3_MBM_LOCAL_EVENT_ID, }; -static struct mon_evt mbm_bps_event = { - .name = "mbm_local_bytes", - .evtid = QOS_MC_MBM_BPS_EVENT_ID, -}; - /* * Initialize the event list for the resource. * @@ -814,14 +809,6 @@ static void l3_mon_evt_init(struct rdt_resource *r) list_add_tail(&mbm_local_event.list, &r->evt_list); } -static void mc_mon_evt_init(struct rdt_resource *r) -{ - INIT_LIST_HEAD(&r->evt_list); - - if (resctrl_arch_is_mbm_bps_enabled()) - list_add_tail(&mbm_bps_event.list, &r->evt_list); -} - int resctrl_mon_resource_init(void) { struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); @@ -845,9 +832,6 @@ int resctrl_mon_resource_init(void) mbm_config_rftype_init("mbm_local_bytes_config"); } - r = resctrl_arch_get_resource(RDT_RESOURCE_MBA); - mc_mon_evt_init(r); - return 0; } diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index e39f22453d84..ea969ddb1a9d 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -113,14 +113,13 @@ void rdt_staged_configs_clear(void) static bool resctrl_is_mbm_enabled(void) { return (resctrl_arch_is_mbm_total_enabled() || - resctrl_arch_is_mbm_local_enabled() || - resctrl_arch_is_mbm_bps_enabled()); + resctrl_arch_is_mbm_local_enabled()); } static bool resctrl_is_mbm_event(int e) { return (e >= QOS_L3_MBM_TOTAL_EVENT_ID && - e <= QOS_MC_MBM_BPS_EVENT_ID); + e <= QOS_L3_MBM_LOCAL_EVENT_ID); } /* diff --git a/include/linux/arm_mpam.h b/include/linux/arm_mpam.h index 660776491941..5423a2eff810 100644 --- a/include/linux/arm_mpam.h +++ b/include/linux/arm_mpam.h @@ -81,7 +81,6 @@ bool resctrl_arch_mon_capable(void); bool resctrl_arch_is_llc_occupancy_enabled(void); bool resctrl_arch_is_mbm_local_enabled(void); bool resctrl_arch_is_mbm_total_enabled(void); -bool resctrl_arch_is_mbm_bps_enabled(void); /* reset cached configurations, then all devices */ void resctrl_arch_reset_resources(void); diff --git a/include/linux/resctrl_types.h b/include/linux/resctrl_types.h index a0d8694be783..fe0b10b589c0 100644 --- a/include/linux/resctrl_types.h +++ b/include/linux/resctrl_types.h @@ -93,9 +93,6 @@ enum resctrl_event_id { QOS_L3_OCCUP_EVENT_ID = 0x01, QOS_L3_MBM_TOTAL_EVENT_ID = 0x02, QOS_L3_MBM_LOCAL_EVENT_ID = 0x03, - QOS_MC_MBM_BPS_EVENT_ID = 0x04, }; -#define RESCTRL_MAX_EVENT_NUM 4 - #endif /* __LINUX_RESCTRL_TYPES_H */ -- Gitee From d142c7f1149a2613c85913e6fc12f56edc2b93cc Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:34:53 -0800 Subject: [PATCH 047/158] anolis: Revert "anolis: arm_mpam: Add supportion for implementation-defined MB monitoring" ANBZ: #31045 This reverts commit 0dcae46b4456796bb839eb313ab68c8ae82d96e5. Signed-off-by: Jason Zeng --- drivers/acpi/arm64/mpam.c | 4 -- drivers/platform/mpam/mpam_devices.c | 56 +-------------------------- drivers/platform/mpam/mpam_internal.h | 8 ---- 3 files changed, 2 insertions(+), 66 deletions(-) diff --git a/drivers/acpi/arm64/mpam.c b/drivers/acpi/arm64/mpam.c index 153ef041abf0..7a4bd70972c2 100644 --- a/drivers/acpi/arm64/mpam.c +++ b/drivers/acpi/arm64/mpam.c @@ -22,8 +22,6 @@ #define ACPI_MPAM_MSC_IRQ_AFFINITY_PROCESSOR_CONTAINER (1<<3) #define ACPI_MPAM_MSC_IRQ_AFFINITY_VALID (1<<4) -int ddrc_freq; - /* Use OEM info in MPAM ACPI table to distinguish different machine types */ struct acpi_mpam_machine_oem_info { enum mpam_machine_type type; @@ -143,8 +141,6 @@ static int acpi_mpam_parse_resource(struct mpam_msc *msc, return mpam_ris_create(msc, res->ris_index, MPAM_CLASS_CACHE, level, cache_id); case ACPI_MPAM_LOCATION_TYPE_MEMORY: - if (mpam_current_machine == MPAM_YITIAN710) - ddrc_freq = res->locator.memory_locator.reserved; return mpam_ris_create(msc, res->ris_index, MPAM_CLASS_MEMORY, 255, res->locator.memory_locator.proximity_domain); default: diff --git a/drivers/platform/mpam/mpam_devices.c b/drivers/platform/mpam/mpam_devices.c index 0827f2c61f91..b6672b7881f7 100644 --- a/drivers/platform/mpam/mpam_devices.c +++ b/drivers/platform/mpam/mpam_devices.c @@ -35,8 +35,6 @@ #include "mpam_internal.h" -extern int ddrc_freq; - /* * mpam_list_lock protects the SRCU lists when writing. Once the * mpam_enabled key is enabled these lists are read-only, @@ -694,10 +692,6 @@ static void mpam_ris_hw_probe(struct mpam_msc_ris *ris) } } - if (FIELD_GET(MPAMF_IDR_HAS_IMPL_IDR, ris->idr)) - if (mpam_current_machine == MPAM_YITIAN710 && class->type == MPAM_CLASS_MEMORY) - mpam_set_feature(mpam_feat_impl_msmon_mbwu, props); - /* * RIS with PARTID narrowing don't have enough storage for one * configuration per PARTID. If these are in a class we could use, @@ -1021,45 +1015,6 @@ static void __ris_msmon_read(void *arg) *(m->val) += now; } -static void __ris_impl_msmon_read(void *arg) -{ - unsigned long flags; - struct mon_read *m = arg; - u64 mb_val = 0; - struct mon_cfg *ctx = m->ctx; - struct mpam_msc *msc = m->ris->msc; - u32 custom_reg_base_addr, cycle, val; - - lockdep_assert_held(&msc->lock); - if (m->type != mpam_feat_impl_msmon_mbwu) - return; - - /* Other machine can extend this function */ - if (mpam_current_machine != MPAM_YITIAN710) - return; - - spin_lock_irqsave(&msc->part_sel_lock, flags); - - __mpam_write_reg(msc, MPAMCFG_PART_SEL, ctx->mon); - - custom_reg_base_addr = __mpam_read_reg(msc, MPAMF_IMPL_IDR); - - cycle = __mpam_read_reg(msc, custom_reg_base_addr + MPAMF_CUST_WINDW_OFFSET); - val = __mpam_read_reg(msc, custom_reg_base_addr + MPAMF_CUST_MBWC_OFFSET); - - spin_unlock_irqrestore(&msc->part_sel_lock, flags); - - if (val & MSMON___NRDY) { - m->err = -EBUSY; - return; - } - - mb_val = MBWU_GET(val); - - mb_val = mb_val * 32 * ddrc_freq * 1000000 / cycle; /* B/s */ - *(m->val) += mb_val; -} - static int _msmon_read(struct mpam_component *comp, struct mon_read *arg) { int err, idx; @@ -1072,15 +1027,8 @@ static int _msmon_read(struct mpam_component *comp, struct mon_read *arg) msc = ris->msc; mutex_lock(&msc->lock); - if (arg->type == mpam_feat_msmon_csu || - arg->type == mpam_feat_msmon_mbwu) - err = smp_call_function_any(&msc->accessibility, - __ris_msmon_read, arg, true); - else if (arg->type == mpam_feat_impl_msmon_mbwu) - err = smp_call_function_any(&msc->accessibility, - __ris_impl_msmon_read, arg, true); - else - err = -EOPNOTSUPP; + err = smp_call_function_any(&msc->accessibility, + __ris_msmon_read, arg, true); mutex_unlock(&msc->lock); if (!err && arg->err) err = arg->err; diff --git a/drivers/platform/mpam/mpam_internal.h b/drivers/platform/mpam/mpam_internal.h index d84413e5e031..014524e5fa4f 100644 --- a/drivers/platform/mpam/mpam_internal.h +++ b/drivers/platform/mpam/mpam_internal.h @@ -108,7 +108,6 @@ enum mpam_device_features { mpam_feat_msmon_mbwu_capture, mpam_feat_msmon_mbwu_rwbw, mpam_feat_msmon_capt, - mpam_feat_impl_msmon_mbwu, mpam_feat_partid_nrw, MPAM_FEATURE_LAST, }; @@ -575,11 +574,4 @@ void mpam_resctrl_exit(void); */ #define MSMON_CAPT_EVNT_NOW BIT(0) -/* Used for PTG Yitian710 specific MB monitoring feature */ -#define MBWU_MASK GENMASK(23, 0) -#define MBWU_WINWD_MAX GENMASK(22, 0) -#define MBWU_GET(v) ((v) & MBWU_MASK) -#define MPAMF_CUST_MBWC_OFFSET 0x08 -#define MPAMF_CUST_WINDW_OFFSET 0x0C - #endif /* MPAM_INTERNAL_H */ -- Gitee From 819fcee715761dd7c524968bd8108f6c26c37f91 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:34:55 -0800 Subject: [PATCH 048/158] anolis: Revert "anolis: arm_mpam: Add cache msc info parsing for Yitian710 specific MPAM ACPI table" ANBZ: #31045 This reverts commit c4d02e76df6c83226ba80ad65a26256d5b865b67. Signed-off-by: Jason Zeng --- drivers/acpi/arm64/mpam.c | 17 ++++------------- drivers/platform/mpam/mpam_devices.c | 6 +----- 2 files changed, 5 insertions(+), 18 deletions(-) diff --git a/drivers/acpi/arm64/mpam.c b/drivers/acpi/arm64/mpam.c index 7a4bd70972c2..92ecfc614d14 100644 --- a/drivers/acpi/arm64/mpam.c +++ b/drivers/acpi/arm64/mpam.c @@ -124,19 +124,10 @@ static int acpi_mpam_parse_resource(struct mpam_msc *msc, switch (res->locator_type) { case ACPI_MPAM_LOCATION_TYPE_PROCESSOR_CACHE: cache_id = res->locator.cache_locator.cache_reference; - if (mpam_current_machine == MPAM_YITIAN710) { - /* - * YITIAN710's BIOS doesn't support find level from - * cache id. Since it only supports L3 cache, use a - * fixed value, 3. - */ - level = 3; - } else { - level = find_acpi_cache_level_from_id(cache_id); - if (level < 0) { - pr_err_once("Bad level for cache with id %u\n", cache_id); - return level; - } + level = find_acpi_cache_level_from_id(cache_id); + if (level < 0) { + pr_err_once("Bad level for cache with id %u\n", cache_id); + return level; } return mpam_ris_create(msc, res->ris_index, MPAM_CLASS_CACHE, level, cache_id); diff --git a/drivers/platform/mpam/mpam_devices.c b/drivers/platform/mpam/mpam_devices.c index b6672b7881f7..3351dc9d1b1b 100644 --- a/drivers/platform/mpam/mpam_devices.c +++ b/drivers/platform/mpam/mpam_devices.c @@ -367,12 +367,8 @@ static int get_cpumask_from_cache_id(u32 cache_id, u32 cache_level, int iter_cache_id; struct device_node *iter; - if (!acpi_disabled) { - if (mpam_current_machine == MPAM_YITIAN710) - return acpi_pptt_get_cpumask_from_cache_id_and_level( - cache_id, cache_level, affinity); + if (!acpi_disabled) return acpi_pptt_get_cpumask_from_cache_id(cache_id, affinity); - } for_each_possible_cpu(cpu) { iter = of_get_cpu_node(cpu, NULL); -- Gitee From ef9571f5b6b7ca20a5685fab93a15c2a7206d6dd Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:34:57 -0800 Subject: [PATCH 049/158] anolis: Revert "anolis: ACPI / PPTT: Downgrade the revision requirement in acpi_pptt_get_cpumask_from_cache_id_and_level" ANBZ: #31045 This reverts commit 02a1af8f13c84089353b3532591c9dff93a21fc3. Signed-off-by: Jason Zeng --- drivers/acpi/pptt.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c index eea6ea5fcdca..b2249f8541e7 100644 --- a/drivers/acpi/pptt.c +++ b/drivers/acpi/pptt.c @@ -1173,11 +1173,7 @@ int acpi_pptt_get_cpumask_from_cache_id_and_level(u32 cache_id, u32 cache_level, return -ENOENT; } - /* - * FIXME: Since this function does not actually use the cache id in the - * PPTT table, we downgrade the revision requirement. - */ - if (table->revision < 2) { + if (table->revision < 3) { acpi_put_table(table); return -ENOENT; } -- Gitee From 98e7b9809a12f9e540ebfba1096898a243bb2280 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:34:59 -0800 Subject: [PATCH 050/158] anolis: Revert "anolis: ACPI / PPTT: Add a helper to fill a cpumask from a cache with specific id and level" ANBZ: #31045 This reverts commit 23cc2060897f188f5cd0f9f3843eca0046a43794. Signed-off-by: Jason Zeng --- drivers/acpi/pptt.c | 60 -------------------------------------------- include/linux/acpi.h | 8 ------ 2 files changed, 68 deletions(-) diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c index b2249f8541e7..fb3bd2eaa7e8 100644 --- a/drivers/acpi/pptt.c +++ b/drivers/acpi/pptt.c @@ -1139,63 +1139,3 @@ int acpi_pptt_get_cpumask_from_cache_id(u32 cache_id, cpumask_t *cpus) acpi_put_table(table); return 0; } - -/** - * acpi_pptt_get_cpumask_from_cache_id_and_level() - Get the cpus associated with the - * cache specified by id and level - * @cache_id: The id field of the unified cache - * @cache_level: The level of the unified cache - * @cpus: Where to buidl the cpumask - * - * Determine which CPUs are below this cache in the PPTT. This allows the property - * to be found even if the CPUs are offline. - * - * The PPTT table must be rev 3 or later, - * - * Return: -ENOENT if the PPTT doesn't exist, or the cache cannot be found. - * Otherwise returns 0 and sets the cpus in the provided cpumask. - */ -int acpi_pptt_get_cpumask_from_cache_id_and_level(u32 cache_id, u32 cache_level, - cpumask_t *cpus) -{ - u32 acpi_cpu_id; - acpi_status status; - int cpu; - struct acpi_table_header *table; - struct acpi_pptt_cache *cache_node; - struct acpi_pptt_processor *cpu_node; - - cpumask_clear(cpus); - - status = acpi_get_table(ACPI_SIG_PPTT, 0, &table); - if (ACPI_FAILURE(status)) { - acpi_pptt_warn_missing(); - return -ENOENT; - } - - if (table->revision < 3) { - acpi_put_table(table); - return -ENOENT; - } - - for_each_possible_cpu(cpu) { - acpi_cpu_id = get_acpi_id_for_cpu(cpu); - cpu_node = acpi_find_processor_node(table, acpi_cpu_id); - if (!cpu_node) - continue; - - cache_node = acpi_find_cache_node(table, acpi_cpu_id, - ACPI_PPTT_CACHE_TYPE_UNIFIED, - cache_level, &cpu_node); - - if (!cache_node) - continue; - - cpu_node = acpi_pptt_find_cache_backwards(table, cache_node); - if (cpu_node->acpi_processor_id == cache_id) - cpumask_set_cpu(cpu, cpus); - } - - acpi_put_table(table); - return 0; -} diff --git a/include/linux/acpi.h b/include/linux/acpi.h index b339abbfa2a4..f154c5768e2b 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1496,8 +1496,6 @@ int find_acpi_cpu_topology_hetero_id(unsigned int cpu); int find_acpi_cache_level_from_id(u32 cache_id); int acpi_pptt_get_cpus_from_container(u32 acpi_cpu_id, cpumask_t *cpus); int acpi_pptt_get_cpumask_from_cache_id(u32 cache_id, cpumask_t *cpus); -int acpi_pptt_get_cpumask_from_cache_id_and_level(u32 cache_id, u32 cache_level, - cpumask_t *cpus); #else static inline int acpi_pptt_cpu_is_thread(unsigned int cpu) { @@ -1533,12 +1531,6 @@ static inline int acpi_pptt_get_cpumask_from_cache_id(u32 cache_id, { return -EINVAL; } -static inline int acpi_pptt_get_cpumask_from_cache_id_and_level(u32 cache_id, - u32 cache_level, - cpumask_t *cpus) -{ - return -EINVAL; -} #endif #ifdef CONFIG_ARM64 -- Gitee From dfa709abc03954dc1b3971833011386d82b7d74f Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:35:02 -0800 Subject: [PATCH 051/158] anolis: Revert "openEuler: ACPI / PPTT: Filthy hack to find _a_ backwards reference in the PPTT [ROTTEN]" ANBZ: #31045 This reverts commit 1b633f23c3e58daf21dccdf2486881304e95c115. Signed-off-by: Jason Zeng --- drivers/acpi/pptt.c | 48 -------------------------------------------- include/linux/acpi.h | 4 ---- 2 files changed, 52 deletions(-) diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c index fb3bd2eaa7e8..f5dcabdc99c9 100644 --- a/drivers/acpi/pptt.c +++ b/drivers/acpi/pptt.c @@ -301,54 +301,6 @@ static struct acpi_pptt_processor *acpi_find_processor_node(struct acpi_table_he return NULL; } -/* - * acpi_pptt_find_cache_backwards() - Given a PPTT cache find a processor node - * that points to it. This lets us find a cacheinfo node by fw_token, but - * is totally broken as many processor node may point at the same PPTT - * cache indicating different instances of the cache. (e.g. all the L1 - * caches are the same shape, but they aren't the same cache). - * This only works if you cooked your PPTT table to look like this. - */ -struct acpi_pptt_processor * -acpi_pptt_find_cache_backwards(struct acpi_table_header *table_hdr, - struct acpi_pptt_cache *cache) -{ - struct acpi_pptt_processor *cpu_node; - struct acpi_subtable_header *entry; - struct acpi_subtable_header *res; - unsigned long table_end; - u32 proc_sz; - int i; - - table_end = (unsigned long)table_hdr + table_hdr->length; - entry = ACPI_ADD_PTR(struct acpi_subtable_header, table_hdr, - sizeof(struct acpi_table_pptt)); - proc_sz = sizeof(struct acpi_pptt_processor *); - - /* find the processor structure which points at with this cpuid */ - while ((unsigned long)entry + proc_sz < table_end) { - if (entry->length == 0) { - pr_warn("Invalid zero length subtable\n"); - break; - } - - cpu_node = (struct acpi_pptt_processor *)entry; - entry = ACPI_ADD_PTR(struct acpi_subtable_header, entry, - entry->length); - - if (cpu_node->header.type != ACPI_PPTT_TYPE_PROCESSOR) - continue; - - for (i = 0; i < cpu_node->number_of_priv_resources; i++) { - res = acpi_get_pptt_resource(table_hdr, cpu_node, i); - if (&cache->header == res) - return cpu_node; - } - } - - return NULL; -} - /* parent_node points into the table, but the table isn't provided. */ static void acpi_pptt_get_child_cpus(struct acpi_pptt_processor *parent_node, cpumask_t *cpus) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index f154c5768e2b..0ef4b9427e99 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1569,8 +1569,4 @@ static inline void acpi_device_notify(struct device *dev) { } static inline void acpi_device_notify_remove(struct device *dev) { } #endif -struct acpi_pptt_processor * -acpi_pptt_find_cache_backwards(struct acpi_table_header *table_hdr, - struct acpi_pptt_cache *cache); - #endif /*_LINUX_ACPI_H*/ -- Gitee From b2885647b4b759d972437bc337651af2fffab1cc Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:35:04 -0800 Subject: [PATCH 052/158] anolis: Revert "anolis: arm_mpam: Identify different types of machines for MPAM implementation specific features" ANBZ: #31045 This reverts commit e7713cbc2aff4677eef0dd32f59fe5a7e6fec691. Signed-off-by: Jason Zeng --- drivers/acpi/arm64/mpam.c | 67 +--------------------------- drivers/platform/mpam/mpam_devices.c | 13 ------ include/linux/arm_mpam.h | 15 ------- 3 files changed, 2 insertions(+), 93 deletions(-) diff --git a/drivers/acpi/arm64/mpam.c b/drivers/acpi/arm64/mpam.c index 92ecfc614d14..c1f9cafaf1e2 100644 --- a/drivers/acpi/arm64/mpam.c +++ b/drivers/acpi/arm64/mpam.c @@ -22,26 +22,6 @@ #define ACPI_MPAM_MSC_IRQ_AFFINITY_PROCESSOR_CONTAINER (1<<3) #define ACPI_MPAM_MSC_IRQ_AFFINITY_VALID (1<<4) -/* Use OEM info in MPAM ACPI table to distinguish different machine types */ -struct acpi_mpam_machine_oem_info { - enum mpam_machine_type type; - char signature[ACPI_NAMESEG_SIZE + 1]; - u8 revision; - char oem_id[ACPI_OEM_ID_SIZE + 1]; - char oem_table_id[ACPI_OEM_TABLE_ID_SIZE + 1]; - u32 oem_revision; -}; - -static struct acpi_mpam_machine_oem_info acpi_mpam_machines[MPAM_NUM_MACHINE_TYPES] = { - [MPAM_YITIAN710] = { - .signature = "YMPM", - .revision = 0, - .oem_id = "PTG ", - .oem_table_id = "PTG01 ", - .oem_revision = 0, - }, -}; - static bool frob_irq(struct platform_device *pdev, int intid, u32 flags, int *irq, u32 processor_container_uid) { @@ -317,34 +297,18 @@ static int __init _parse_table(struct acpi_table_header *table) static struct acpi_table_header *get_table(void) { struct acpi_table_header *table; - enum mpam_machine_type mtype; acpi_status status; if (acpi_disabled || !mpam_cpus_have_feature()) return NULL; - mtype = acpi_mpam_get_machine_type(); - - if (mtype != MPAM_DEFAULT_MACHINE) - status = acpi_get_table(acpi_mpam_machines[mtype].signature, 0, &table); - else - status = acpi_get_table(ACPI_SIG_MPAM, 0, &table); + status = acpi_get_table(ACPI_SIG_MPAM, 0, &table); if (ACPI_FAILURE(status)) return NULL; - if (mtype == MPAM_DEFAULT_MACHINE && table->revision != 1) + if (table->revision != 1) return NULL; - /* - * Kunpeng's MPAM ACPI adopts an older version of MPAM ACPI, so - * this MPAM ACPI driver is not suitable for Kunpeng platform. - * Skip it. - */ - if (!strncmp(table->oem_id, "HISI", 4)) { - acpi_put_table(table); - return NULL; - } - return table; } @@ -403,33 +367,6 @@ int acpi_mpam_count_msc(void) return ret; } -enum mpam_machine_type acpi_mpam_get_machine_type(void) -{ - struct acpi_table_header *table; - enum mpam_machine_type ret; - acpi_status status; - int i; - - ret = MPAM_DEFAULT_MACHINE; - - for (i = MPAM_DEFAULT_MACHINE + 1; i < MPAM_NUM_MACHINE_TYPES; i++) { - status = acpi_get_table(acpi_mpam_machines[i].signature, 0, &table); - if (ACPI_FAILURE(status)) - continue; - - if (!memcmp(acpi_mpam_machines[i].oem_id, table->oem_id, ACPI_OEM_ID_SIZE) && - !memcmp(acpi_mpam_machines[i].oem_table_id, table->oem_table_id, - ACPI_OEM_TABLE_ID_SIZE) && - acpi_mpam_machines[i].oem_revision == table->oem_revision) { - ret = i; - } - - acpi_put_table(table); - } - - return ret; -} - /* * Call after ACPI devices have been created, which happens behind acpi_scan_init() * called from subsys_initcall(). PCC requires the mailbox driver, which is diff --git a/drivers/platform/mpam/mpam_devices.c b/drivers/platform/mpam/mpam_devices.c index 3351dc9d1b1b..e22540111d5b 100644 --- a/drivers/platform/mpam/mpam_devices.c +++ b/drivers/platform/mpam/mpam_devices.c @@ -45,8 +45,6 @@ static LIST_HEAD(mpam_all_msc); struct srcu_struct mpam_srcu; -enum mpam_machine_type mpam_current_machine; - /* MPAM isn't available until all the MSC have been probed. */ static u32 mpam_num_msc; @@ -1563,12 +1561,6 @@ static int mpam_msc_setup_error_irq(struct mpam_msc *msc) return 0; } -static enum mpam_machine_type mpam_dt_get_machine_type(void) -{ - /* FIXME: not supported yet */ - return MPAM_DEFAULT_MACHINE; -} - static int mpam_dt_count_msc(void) { int count = 0; @@ -2394,11 +2386,6 @@ static int __init mpam_msc_driver_init(void) init_srcu_struct(&mpam_srcu); - if (!acpi_disabled) - mpam_current_machine = acpi_mpam_get_machine_type(); - else - mpam_current_machine = mpam_dt_get_machine_type(); - /* * If the MPAM CPU interface is not implemented, or reserved by * firmware, there is no point touching the rest of the hardware. diff --git a/include/linux/arm_mpam.h b/include/linux/arm_mpam.h index 5423a2eff810..239d27af9e32 100644 --- a/include/linux/arm_mpam.h +++ b/include/linux/arm_mpam.h @@ -31,22 +31,11 @@ enum mpam_class_types { MPAM_CLASS_UNKNOWN, /* Everything else, e.g. SMMU */ }; -enum mpam_machine_type { - MPAM_DEFAULT_MACHINE, - MPAM_YITIAN710, - - MPAM_NUM_MACHINE_TYPES, -}; - -/* Machine identifier which can be used for vendor-specific MPAM features */ -extern enum mpam_machine_type mpam_current_machine; - #ifdef CONFIG_ACPI_MPAM /* Parse the ACPI description of resources entries for this MSC. */ int acpi_mpam_parse_resources(struct mpam_msc *msc, struct acpi_mpam_msc_node *tbl_msc); int acpi_mpam_count_msc(void); -enum mpam_machine_type acpi_mpam_get_machine_type(void); #else static inline int acpi_mpam_parse_resources(struct mpam_msc *msc, struct acpi_mpam_msc_node *tbl_msc) @@ -54,10 +43,6 @@ static inline int acpi_mpam_parse_resources(struct mpam_msc *msc, return -EINVAL; } static inline int acpi_mpam_count_msc(void) { return -EINVAL; } -static inline enum mpam_machine_type acpi_mpam_get_machine_type(void) -{ - return MPAM_DEFAULT_MACHINE; -} #endif int mpam_register_requestor(u16 partid_max, u8 pmg_max); -- Gitee From e5873dd58a47029230683e643884e7ef7e9aa6f4 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:35:06 -0800 Subject: [PATCH 053/158] anolis: Revert "anolis: ACPI / MPAM: Avoid MPAM MSC has the same identifier" ANBZ: #31045 This reverts commit 33159b9601a649bdddf80589593324f5cd24f554. Signed-off-by: Jason Zeng --- drivers/acpi/arm64/mpam.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/acpi/arm64/mpam.c b/drivers/acpi/arm64/mpam.c index c1f9cafaf1e2..8a63449f27b5 100644 --- a/drivers/acpi/arm64/mpam.c +++ b/drivers/acpi/arm64/mpam.c @@ -194,7 +194,6 @@ static int __init _parse_table(struct acpi_table_header *table) struct resource res[3]; char uid[16]; u32 acpi_id; - int msc_num = 0; table_end = (char *)table + table->length; @@ -221,12 +220,7 @@ static int __init _parse_table(struct acpi_table_header *table) memset(res, 0, sizeof(res)); memset(props, 0, sizeof(props)); - /* - * Use an extra msc_num instead of msc->identifier, since MSC - * nodes with different types in MPAM ACPI table may have the - * same id value. - */ - pdev = platform_device_alloc("mpam_msc", msc_num++); + pdev = platform_device_alloc("mpam_msc", tbl_msc->identifier); if (IS_ERR(pdev)) { err = PTR_ERR(pdev); break; -- Gitee From 4db9eab5afa3bfddd8142c2059daaf6e3bd2d282 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:35:08 -0800 Subject: [PATCH 054/158] anolis: Revert "anolis: arm_mpam: Fix the wrong checking when using USE_RMID_IDX" ANBZ: #31045 This reverts commit c0657014dd6277a985a06e3cf7152a65339d070e. Signed-off-by: Jason Zeng --- drivers/platform/mpam/mpam_resctrl.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/platform/mpam/mpam_resctrl.c b/drivers/platform/mpam/mpam_resctrl.c index 25cf64386b64..bc26aaeebc76 100644 --- a/drivers/platform/mpam/mpam_resctrl.c +++ b/drivers/platform/mpam/mpam_resctrl.c @@ -359,10 +359,9 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d, return -EINVAL; } - if (mon == USE_RMID_IDX) + cfg.mon = mon; + if (cfg.mon == USE_RMID_IDX) cfg.mon = resctrl_arch_rmid_idx_encode(closid, rmid); - else - cfg.mon = mon; cfg.match_pmg = true; cfg.pmg = rmid; -- Gitee From 745a46f9737ee3f05340f6d36d5e3207500bf5c5 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:35:10 -0800 Subject: [PATCH 055/158] anolis: Revert "anolis: arm_mpam: Fix schemata's display error of MB when CDP is enabled" ANBZ: #31045 This reverts commit a0a192b425a20f543157ffcafa6995e8cd5d43da. Signed-off-by: Jason Zeng --- drivers/platform/mpam/mpam_resctrl.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/platform/mpam/mpam_resctrl.c b/drivers/platform/mpam/mpam_resctrl.c index bc26aaeebc76..06af2d77b93e 100644 --- a/drivers/platform/mpam/mpam_resctrl.c +++ b/drivers/platform/mpam/mpam_resctrl.c @@ -931,10 +931,7 @@ u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d, dom = container_of(d, struct mpam_resctrl_dom, resctrl_dom); cprops = &res->class->props; - if (mpam_resctrl_hide_cdp(r->rid)) - partid = resctrl_get_config_index(closid, CDP_CODE); - else - partid = resctrl_get_config_index(closid, type); + partid = resctrl_get_config_index(closid, type); cfg = &dom->comp->cfg[partid]; switch (r->rid) { -- Gitee From cd677d7e33f29db7c7d5db59e77da4cbdd43fa3d Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:35:12 -0800 Subject: [PATCH 056/158] anolis: Revert "anolis: arm_mpam: Fix the wrong PARTID_MAX and PMG_MAX usage" ANBZ: #31045 This reverts commit 511767ff9e959b064dcfbc1089c390143e34da84. Signed-off-by: Jason Zeng --- drivers/platform/mpam/mpam_devices.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/platform/mpam/mpam_devices.c b/drivers/platform/mpam/mpam_devices.c index e22540111d5b..ad641f21d301 100644 --- a/drivers/platform/mpam/mpam_devices.c +++ b/drivers/platform/mpam/mpam_devices.c @@ -1240,7 +1240,7 @@ static int mpam_reprogram_ris(void *_arg) spin_lock(&partid_max_lock); partid_max = mpam_partid_max; spin_unlock(&partid_max_lock); - for (partid = 0; partid <= partid_max; partid++) + for (partid = 0; partid < partid_max; partid++) mpam_reprogram_ris_partid(ris, partid, cfg); return 0; @@ -1396,7 +1396,7 @@ static void mpam_reprogram_msc(struct mpam_msc *msc) } reset = true; - for (partid = 0; partid <= mpam_partid_max; partid++) { + for (partid = 0; partid < mpam_partid_max; partid++) { cfg = &ris->comp->cfg[partid]; if (cfg->features) reset = false; @@ -2119,7 +2119,7 @@ static int __allocate_component_cfg(struct mpam_component *comp) if (comp->cfg) return 0; - comp->cfg = kcalloc(mpam_partid_max + 1, sizeof(*comp->cfg), GFP_KERNEL); + comp->cfg = kcalloc(mpam_partid_max, sizeof(*comp->cfg), GFP_KERNEL); if (!comp->cfg) return -ENOMEM; @@ -2220,7 +2220,7 @@ static void mpam_enable_once(void) mpam_register_cpuhp_callbacks(mpam_cpu_online); pr_info("MPAM enabled with %u partid and %u pmg\n", - READ_ONCE(mpam_partid_max) + 1, READ_ONCE(mpam_pmg_max) + 1); + READ_ONCE(mpam_partid_max) + 1, mpam_pmg_max + 1); } void mpam_reset_class(struct mpam_class *class) @@ -2231,7 +2231,7 @@ void mpam_reset_class(struct mpam_class *class) idx = srcu_read_lock(&mpam_srcu); list_for_each_entry_rcu(comp, &class->components, class_list) { - memset(comp->cfg, 0, ((mpam_partid_max + 1) * sizeof(*comp->cfg))); + memset(comp->cfg, 0, (mpam_partid_max * sizeof(*comp->cfg))); list_for_each_entry_rcu(ris, &comp->ris, comp_list) { mutex_lock(&ris->msc->lock); -- Gitee From b54b90a6d2fbb4e4af906fd48fa57760e37c4895 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:35:14 -0800 Subject: [PATCH 057/158] anolis: Revert "anolis: arm_mpam: Fix wrong MMIO space size for msc platform device" ANBZ: #31045 This reverts commit 128e377542b0b62f6537832a0053fdb2cc9a0384. Signed-off-by: Jason Zeng --- drivers/platform/mpam/mpam_devices.c | 7 +------ drivers/platform/mpam/mpam_internal.h | 1 - 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/platform/mpam/mpam_devices.c b/drivers/platform/mpam/mpam_devices.c index ad641f21d301..12015951a63d 100644 --- a/drivers/platform/mpam/mpam_devices.c +++ b/drivers/platform/mpam/mpam_devices.c @@ -1769,13 +1769,8 @@ static int mpam_msc_drv_probe(struct platform_device *pdev) err = PTR_ERR(io); break; } - msc->mapped_hwpage_sz = msc_res->end - msc_res->start + 1; + msc->mapped_hwpage_sz = msc_res->end - msc_res->start; msc->mapped_hwpage = io; - if (msc->mapped_hwpage_sz < MPAM_MIN_MMIO_SIZE) { - pr_err("MSC MMIO space size is too small\n"); - err = -EINVAL; - break; - } } else if (msc->iface == MPAM_IFACE_PCC) { msc->pcc_cl.dev = &pdev->dev; msc->pcc_cl.rx_callback = mpam_pcc_rx_callback; diff --git a/drivers/platform/mpam/mpam_internal.h b/drivers/platform/mpam/mpam_internal.h index 014524e5fa4f..48554ff93e09 100644 --- a/drivers/platform/mpam/mpam_internal.h +++ b/drivers/platform/mpam/mpam_internal.h @@ -321,7 +321,6 @@ void mpam_resctrl_exit(void); * Partitioning and Monitoring (MPAM), for Armv8-A. DDI 0598A.a */ #define MPAM_ARCHITECTURE_V1 0x10 -#define MPAM_MIN_MMIO_SIZE 0x3000 /* Memory mapped control pages: */ /* ID Register offsets in the memory mapped page */ -- Gitee From 7a50d52968b5d9e5e7a079b380aa227b5836a866 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:35:15 -0800 Subject: [PATCH 058/158] anolis: Revert "anolis: arm_mpam: Clear mpam_msc properly when driver probing failed" ANBZ: #31045 This reverts commit 7d0219e6e108fbe8ca4bfba820231894742181b5. Signed-off-by: Jason Zeng --- drivers/platform/mpam/mpam_devices.c | 43 ++++++++++++++-------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/drivers/platform/mpam/mpam_devices.c b/drivers/platform/mpam/mpam_devices.c index 12015951a63d..f5f23725d13a 100644 --- a/drivers/platform/mpam/mpam_devices.c +++ b/drivers/platform/mpam/mpam_devices.c @@ -1688,24 +1688,6 @@ static void mpam_pcc_rx_callback(struct mbox_client *cl, void *msg) /* TODO: wake up tasks blocked on this MSC's PCC channel */ } -static int mpam_msc_drv_remove(struct platform_device *pdev) -{ - struct mpam_msc *msc = platform_get_drvdata(pdev); - - if (!msc) - return 0; - - mutex_lock(&mpam_list_lock); - mpam_num_msc--; - platform_set_drvdata(pdev, NULL); - list_del_rcu(&msc->glbl_list); - mpam_msc_destroy(msc); - synchronize_srcu(&mpam_srcu); - mutex_unlock(&mpam_list_lock); - - return 0; -} - static int mpam_msc_drv_probe(struct platform_device *pdev) { int err; @@ -1751,6 +1733,7 @@ static int mpam_msc_drv_probe(struct platform_device *pdev) err = mpam_msc_setup_error_irq(msc); if (err) { + devm_kfree(&pdev->dev, msc); msc = ERR_PTR(err); break; } @@ -1766,6 +1749,7 @@ static int mpam_msc_drv_probe(struct platform_device *pdev) &msc_res); if (IS_ERR(io)) { pr_err("Failed to map MSC base address\n"); + devm_kfree(&pdev->dev, msc); err = PTR_ERR(io); break; } @@ -1782,6 +1766,7 @@ static int mpam_msc_drv_probe(struct platform_device *pdev) msc->pcc_subspace_id); if (IS_ERR(msc->pcc_chan)) { pr_err("Failed to request MSC PCC channel\n"); + devm_kfree(&pdev->dev, msc); err = PTR_ERR(msc->pcc_chan); break; } @@ -1792,6 +1777,7 @@ static int mpam_msc_drv_probe(struct platform_device *pdev) if (IS_ERR(io)) { pr_err("Failed to map MSC base address\n"); pcc_mbox_free_channel(msc->pcc_chan); + devm_kfree(&pdev->dev, msc); err = PTR_ERR(io); break; } @@ -1815,9 +1801,6 @@ static int mpam_msc_drv_probe(struct platform_device *pdev) err = mpam_dt_parse_resources(msc, plat_data); } - if (err) - mpam_msc_drv_remove(pdev); - if (!err && fw_num_msc == mpam_num_msc) mpam_register_cpuhp_callbacks(&mpam_discovery_cpu_online); @@ -2294,6 +2277,24 @@ void mpam_enable(struct work_struct *work) mpam_enable_once(); } +static int mpam_msc_drv_remove(struct platform_device *pdev) +{ + struct mpam_msc *msc = platform_get_drvdata(pdev); + + if (!msc) + return 0; + + mutex_lock(&mpam_list_lock); + mpam_num_msc--; + platform_set_drvdata(pdev, NULL); + list_del_rcu(&msc->glbl_list); + mpam_msc_destroy(msc); + synchronize_srcu(&mpam_srcu); + mutex_unlock(&mpam_list_lock); + + return 0; +} + struct mpam_write_config_arg { struct mpam_msc_ris *ris; struct mpam_component *comp; -- Gitee From d99926a68378665c23d59511117429245d6a4605 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:35:17 -0800 Subject: [PATCH 059/158] anolis: Revert "anolis: arm_mpam: Fix the problem that error interrupt handling cannot exit normally" ANBZ: #31045 This reverts commit f09dc5fedc1f38963d0ef3ba34a559efe218aedf. Signed-off-by: Jason Zeng --- drivers/platform/mpam/mpam_devices.c | 33 ++++++++++++++++++---------- 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/drivers/platform/mpam/mpam_devices.c b/drivers/platform/mpam/mpam_devices.c index f5f23725d13a..32532a918115 100644 --- a/drivers/platform/mpam/mpam_devices.c +++ b/drivers/platform/mpam/mpam_devices.c @@ -1964,15 +1964,13 @@ static irqreturn_t __mpam_irq_handler(int irq, struct mpam_msc *msc) pr_err("error irq from msc:%u '%s', partid:%u, pmg: %u, ris: %u\n", msc->id, mpam_errcode_names[errcode], partid, pmg, ris); - /* - * To prevent this interrupt from repeatedly cancelling the scheduled - * work to disable mpam, disable the error interrupt. - */ - mpam_disable_msc_ecr(msc); - - schedule_work(&mpam_broken_work); + if (irq_is_percpu(irq)) { + mpam_disable_msc_ecr(msc); + schedule_work(&mpam_broken_work); + return IRQ_HANDLED; + } - return IRQ_HANDLED; + return IRQ_WAKE_THREAD; } static irqreturn_t mpam_ppi_handler(int irq, void *dev_id) @@ -1989,6 +1987,8 @@ static irqreturn_t mpam_spi_handler(int irq, void *dev_id) return __mpam_irq_handler(irq, msc); } +static irqreturn_t mpam_disable_thread(int irq, void *dev_id); + static int mpam_register_irqs(void) { int err, irq; @@ -2018,9 +2018,11 @@ static int mpam_register_irqs(void) true); mutex_unlock(&msc->lock); } else { - err = devm_request_irq(&msc->pdev->dev, irq, - &mpam_spi_handler, IRQF_SHARED, - "mpam:msc:error", msc); + err = devm_request_threaded_irq(&msc->pdev->dev, irq, + &mpam_spi_handler, + &mpam_disable_thread, + IRQF_SHARED, + "mpam:msc:error", msc); if (err) return err; } @@ -2226,7 +2228,7 @@ void mpam_reset_class(struct mpam_class *class) * All of MPAMs errors indicate a software bug, restore any modified * controls to their reset values. */ -void mpam_disable(struct work_struct *ignored) +static irqreturn_t mpam_disable_thread(int irq, void *dev_id) { int idx; struct mpam_class *class; @@ -2248,6 +2250,13 @@ void mpam_disable(struct work_struct *ignored) list_for_each_entry_rcu(class, &mpam_classes, classes_list) mpam_reset_class(class); srcu_read_unlock(&mpam_srcu, idx); + + return IRQ_HANDLED; +} + +void mpam_disable(struct work_struct *ignored) +{ + mpam_disable_thread(0, NULL); } /* -- Gitee From af8acc11c2fc5ee70c8b84a4833e5c8f7324c34a Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:35:19 -0800 Subject: [PATCH 060/158] anolis: Revert "anolis: arm_mpam: Limit MB percentage in schemata to multiples of granularity" ANBZ: #31045 This reverts commit dd0e78b2f117bab5f9c3d339b8e8a2b18f61dee4. Signed-off-by: Jason Zeng --- drivers/platform/mpam/mpam_resctrl.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/platform/mpam/mpam_resctrl.c b/drivers/platform/mpam/mpam_resctrl.c index 06af2d77b93e..c53b89557b4d 100644 --- a/drivers/platform/mpam/mpam_resctrl.c +++ b/drivers/platform/mpam/mpam_resctrl.c @@ -568,17 +568,11 @@ static u32 percent_to_mbw_pbm(u8 pc, struct mpam_props *cprops) static u16 percent_to_mbw_max(u8 pc, struct mpam_props *cprops) { u8 bit; - u32 granularity, pc_ls, divisor = 2, value = 0; + u32 pc_ls, divisor = 2, value = 0; if (WARN_ON_ONCE(cprops->bwa_wd > 15)) return MAX_MBA_BW; - /* Set the pc value to be a multiple of granularity. */ - granularity = get_mba_granularity(cprops); - pc = roundup(pc, (u8) granularity); - if (pc > 100) - pc = 100; - /* * Left shift by 16 bits to preserve the precision of the division * operation. @@ -817,7 +811,6 @@ static int mpam_resctrl_resource_init(struct mpam_resctrl_res *res) r->membw.delay_linear = true; r->membw.throttle_mode = THREAD_THROTTLE_UNDEFINED; r->membw.bw_gran = get_mba_granularity(cprops); - r->membw.min_bw = r->membw.bw_gran; /* Round up to at least 1% */ if (!r->membw.bw_gran) -- Gitee From 899ac764bb4fc7dadfda70bf9764b0595c7e1b33 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:35:21 -0800 Subject: [PATCH 061/158] anolis: Revert "anolis: arm_mpam: Fix the inaccurate MB granularity in resctrl info directory" ANBZ: #31045 This reverts commit 7d467ea9850508dcac2896dc771745c759f26368. Signed-off-by: Jason Zeng --- drivers/platform/mpam/mpam_resctrl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/mpam/mpam_resctrl.c b/drivers/platform/mpam/mpam_resctrl.c index c53b89557b4d..dfecac759f41 100644 --- a/drivers/platform/mpam/mpam_resctrl.c +++ b/drivers/platform/mpam/mpam_resctrl.c @@ -515,7 +515,7 @@ static u32 get_mba_granularity(struct mpam_props *cprops) * bwa_wd is the number of bits implemented in the 0.xxx * fixed point fraction. 1 bit is 50%, 2 is 25% etc. */ - return max_t(u32, 1, (MAX_MBA_BW / BIT(cprops->bwa_wd))); + return MAX_MBA_BW / (cprops->bwa_wd + 1); } return 0; -- Gitee From 91216deca59a6ef45e9256e848c34e290011d868 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:35:23 -0800 Subject: [PATCH 062/158] anolis: Revert "anolis: arm_mpam: Fix MB default percentage error in schemata" ANBZ: #31045 This reverts commit 41d4ab4cfb5c91c33d04d08b9ba6b50ea1dce5f9. Signed-off-by: Jason Zeng --- drivers/platform/mpam/mpam_resctrl.c | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/drivers/platform/mpam/mpam_resctrl.c b/drivers/platform/mpam/mpam_resctrl.c index dfecac759f41..41ae848b6f61 100644 --- a/drivers/platform/mpam/mpam_resctrl.c +++ b/drivers/platform/mpam/mpam_resctrl.c @@ -539,17 +539,10 @@ static u32 mbw_max_to_percent(u16 mbw_max, struct mpam_props *cprops) for (bit = 15; bit; bit--) { if (mbw_max & BIT(bit)) - /* - * Left shift by 16 bits to preserve the precision of - * the division operation. - */ - value += (MAX_MBA_BW << 16) / divisor; + value += MAX_MBA_BW / divisor; divisor <<= 1; } - /* Use the upper bound of the fixed-point fraction. */ - value = (value + (MAX_MBA_BW << (16 - cprops->bwa_wd))) >> 16; - return value; } @@ -568,29 +561,23 @@ static u32 percent_to_mbw_pbm(u8 pc, struct mpam_props *cprops) static u16 percent_to_mbw_max(u8 pc, struct mpam_props *cprops) { u8 bit; - u32 pc_ls, divisor = 2, value = 0; + u32 divisor = 2, value = 0; if (WARN_ON_ONCE(cprops->bwa_wd > 15)) return MAX_MBA_BW; - /* - * Left shift by 16 bits to preserve the precision of the division - * operation. - */ - pc_ls = (u32) pc << 16; - for (bit = 15; bit; bit--) { - if (pc_ls >= (MAX_MBA_BW << 16) / divisor) { - pc_ls -= (MAX_MBA_BW << 16) / divisor; + if (pc >= MAX_MBA_BW / divisor) { + pc -= MAX_MBA_BW / divisor; value |= BIT(bit); } divisor <<= 1; - if (!pc_ls || !((MAX_MBA_BW << 16) / divisor)) + if (!pc || !(MAX_MBA_BW / divisor)) break; } - value &= GENMASK(15, 15 - cprops->bwa_wd + 1); + value &= GENMASK(15, 15 - cprops->bwa_wd); return value; } -- Gitee From e1263e5b445f6a2fe01bae51c4a3436343da5ed9 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:35:25 -0800 Subject: [PATCH 063/158] anolis: Revert "anolis: arm_mpam: Remove the zero padding for MB percentage showing in schemata" ANBZ: #31045 This reverts commit 2314db153a5ab4e4fad16c14b9e869f9cff2b16d. Signed-off-by: Jason Zeng --- drivers/platform/mpam/mpam_resctrl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/mpam/mpam_resctrl.c b/drivers/platform/mpam/mpam_resctrl.c index 41ae848b6f61..6d25e98a757d 100644 --- a/drivers/platform/mpam/mpam_resctrl.c +++ b/drivers/platform/mpam/mpam_resctrl.c @@ -790,7 +790,7 @@ static int mpam_resctrl_resource_init(struct mpam_resctrl_res *res) struct mpam_props *cprops = &class->props; /* TODO: kill these properties off as they are derivatives */ - r->format_str = "%d=%*u"; + r->format_str = "%d=%0*u"; r->fflags = RFTYPE_RES_MB; r->default_ctrl = MAX_MBA_BW; r->data_width = 3; -- Gitee From d8f20fdfba628428cec95f0b715d966a87d7a7b2 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:35:27 -0800 Subject: [PATCH 064/158] anolis: Revert "anolis: arm_mpam: Fix L3 cache size display error in resctrl fs" ANBZ: #31045 This reverts commit dd301f435dbeb6eaace9eab33041f1090b0b10f0. Signed-off-by: Jason Zeng --- drivers/platform/mpam/mpam_resctrl.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/platform/mpam/mpam_resctrl.c b/drivers/platform/mpam/mpam_resctrl.c index 6d25e98a757d..c7ebb4d37b47 100644 --- a/drivers/platform/mpam/mpam_resctrl.c +++ b/drivers/platform/mpam/mpam_resctrl.c @@ -739,8 +739,6 @@ static int mpam_resctrl_resource_init(struct mpam_resctrl_res *res) res->resctrl_res.rid == RDT_RESOURCE_L3) { bool has_csu = cache_has_usable_csu(class); - r->cache_level = class->level; - /* TODO: Scaling is not yet supported */ r->cache.cbm_len = class->props.cpbm_wd; r->cache.arch_has_sparse_bitmasks = true; -- Gitee From 200da6d0a8dd725a469b362f11278af4638ee45f Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:43:51 -0800 Subject: [PATCH 065/158] anolis: Revert "fs/resctrl: Remove the limit on the number of CLOSID" ANBZ: #31045 This reverts commit 0c5b924b71f0ff9fd8c67d4b5a43cbf016729724. Signed-off-by: Jason Zeng --- drivers/platform/mpam/mpam_resctrl.c | 2 +- fs/resctrl/rdtgroup.c | 25 ++++++++++++------------- include/linux/resctrl.h | 5 +++++ 3 files changed, 18 insertions(+), 14 deletions(-) diff --git a/drivers/platform/mpam/mpam_resctrl.c b/drivers/platform/mpam/mpam_resctrl.c index c7ebb4d37b47..4062c0f93f85 100644 --- a/drivers/platform/mpam/mpam_resctrl.c +++ b/drivers/platform/mpam/mpam_resctrl.c @@ -132,7 +132,7 @@ static bool mpam_resctrl_hide_cdp(enum resctrl_res_level rid) */ u32 resctrl_arch_get_num_closid(struct rdt_resource *ignored) { - return mpam_partid_max + 1; + return min((u32)mpam_partid_max + 1, (u32)RESCTRL_MAX_CLOSID); } u32 resctrl_arch_system_num_rmid_idx(void) diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index ea969ddb1a9d..936fc6e47386 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -123,8 +123,8 @@ static bool resctrl_is_mbm_event(int e) } /* - * Trivial allocator for CLOSIDs. Use BITMAP APIs to manipulate a bitmap - * of free CLOSIDs. + * Trivial allocator for CLOSIDs. Since h/w only supports a small number, + * we can keep a bitmap of free CLOSIDs in a single integer. * * Using a global CLOSID across all resources has some advantages and * some drawbacks: @@ -137,7 +137,7 @@ static bool resctrl_is_mbm_event(int e) * - Our choices on how to configure each resource become progressively more * limited as the number of resources grows. */ -static unsigned long *closid_free_map; +static unsigned long closid_free_map; static int closid_free_map_len; int closids_supported(void) @@ -148,17 +148,16 @@ int closids_supported(void) static void closid_init(void) { struct resctrl_schema *s; - u32 rdt_min_closid = ~0; + u32 rdt_min_closid = 32; /* Compute rdt_min_closid across all resources */ list_for_each_entry(s, &resctrl_schema_all, list) rdt_min_closid = min(rdt_min_closid, s->num_closid); - closid_free_map = bitmap_alloc(rdt_min_closid, GFP_KERNEL); - bitmap_fill(closid_free_map, rdt_min_closid); + closid_free_map = BIT_MASK(rdt_min_closid) - 1; /* RESCTRL_RESERVED_CLOSID is always reserved for the default group */ - __clear_bit(RESCTRL_RESERVED_CLOSID, closid_free_map); + __clear_bit(RESCTRL_RESERVED_CLOSID, &closid_free_map); closid_free_map_len = rdt_min_closid; } @@ -176,12 +175,12 @@ static int closid_alloc(void) return cleanest_closid; closid = cleanest_closid; } else { - closid = find_first_bit(closid_free_map, closid_free_map_len); - if (closid == closid_free_map_len) + closid = ffs(closid_free_map); + if (closid == 0) return -ENOSPC; + closid--; } - - __clear_bit(closid, closid_free_map); + __clear_bit(closid, &closid_free_map); return closid; } @@ -190,7 +189,7 @@ void closid_free(int closid) { lockdep_assert_held(&rdtgroup_mutex); - __set_bit(closid, closid_free_map); + __set_bit(closid, &closid_free_map); } /** @@ -204,7 +203,7 @@ bool closid_allocated(unsigned int closid) { lockdep_assert_held(&rdtgroup_mutex); - return !test_bit(closid, closid_free_map); + return !test_bit(closid, &closid_free_map); } /** diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index dc80ddab26cf..00cc0457af50 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -30,6 +30,11 @@ int proc_resctrl_show(struct seq_file *m, /* max value for struct rdt_domain's mbps_val */ #define MBA_MAX_MBPS U32_MAX +/* + * Resctrl uses a u32 as a closid bitmap. The maximum closid is 32. + */ +#define RESCTRL_MAX_CLOSID 32 + /* * Resctrl uses u32 to hold the user-space config. The maximum bitmap size is * 32. -- Gitee From d2d5d41e43fc6865a27c51ba0f35cf7a2aedf6fc Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:43:51 -0800 Subject: [PATCH 066/158] anolis: Revert "arm_mpam: resctrl: Update the rmid reallocation limit" ANBZ: #31045 This reverts commit 7a6d4ec917214da744df7050aba327515187b7a0. Signed-off-by: Jason Zeng --- drivers/platform/mpam/mpam_resctrl.c | 51 ---------------------------- 1 file changed, 51 deletions(-) diff --git a/drivers/platform/mpam/mpam_resctrl.c b/drivers/platform/mpam/mpam_resctrl.c index 4062c0f93f85..62bfa7d772d9 100644 --- a/drivers/platform/mpam/mpam_resctrl.c +++ b/drivers/platform/mpam/mpam_resctrl.c @@ -15,7 +15,6 @@ #include #include #include -#include #include @@ -48,13 +47,6 @@ static bool cdp_enabled; */ static bool resctrl_enabled; -/* - * mpam_resctrl_pick_caches() needs to know the size of the caches. cacheinfo - * populates this from a device_initcall(). mpam_resctrl_setup() must wait. - */ -static bool cacheinfo_ready; -static DECLARE_WAIT_QUEUE_HEAD(wait_cacheinfo_ready); - /* A dummy mon context to use when the monitors were allocated up front */ u32 __mon_is_rmid_idx = USE_RMID_IDX; void *mon_is_rmid_idx = &__mon_is_rmid_idx; @@ -412,24 +404,6 @@ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d, } } -/* - * The rmid realloc threshold should be for the smallest cache exposed to - * resctrl. - */ -static void update_rmid_limits(unsigned int size) -{ - u32 num_unique_pmg = resctrl_arch_system_num_rmid_idx(); - - if (WARN_ON_ONCE(!size)) - return; - - if (resctrl_rmid_realloc_limit && size > resctrl_rmid_realloc_limit) - return; - - resctrl_rmid_realloc_limit = size; - resctrl_rmid_realloc_threshold = size / num_unique_pmg; -} - static bool cache_has_usable_cpor(struct mpam_class *class) { struct mpam_props *cprops = &class->props; @@ -586,15 +560,11 @@ static u16 percent_to_mbw_max(u8 pc, struct mpam_props *cprops) static void mpam_resctrl_pick_caches(void) { int idx; - unsigned int cache_size; struct mpam_class *class; struct mpam_resctrl_res *res; - lockdep_assert_cpus_held(); - idx = srcu_read_lock(&mpam_srcu); list_for_each_entry_rcu(class, &mpam_classes, classes_list) { - struct mpam_props *cprops = &class->props; bool has_cpor = cache_has_usable_cpor(class); if (class->type != MPAM_CLASS_CACHE) { @@ -620,16 +590,6 @@ static void mpam_resctrl_pick_caches(void) continue; } - /* Assume cache levels are the same size for all CPUs... */ - cache_size = get_cpu_cacheinfo_size(smp_processor_id(), class->level); - if (!cache_size) { - pr_debug("pick_caches: Could not read cache size\n"); - continue; - } - - if (mpam_has_feature(mpam_feat_msmon_csu, cprops)) - update_rmid_limits(cache_size); - if (class->level == 2) { res = &mpam_resctrl_exports[RDT_RESOURCE_L2]; res->resctrl_res.name = "L2"; @@ -835,8 +795,6 @@ int mpam_resctrl_setup(void) struct mpam_resctrl_res *res; enum resctrl_res_level i; - wait_event(wait_cacheinfo_ready, cacheinfo_ready); - cpus_read_lock(); for (i = 0; i < RDT_NUM_RESOURCES; i++) { res = &mpam_resctrl_exports[i]; @@ -1201,12 +1159,3 @@ int mpam_resctrl_offline_cpu(unsigned int cpu) return 0; } - -static int __init __cacheinfo_ready(void) -{ - cacheinfo_ready = true; - wake_up(&wait_cacheinfo_ready); - - return 0; -} -device_initcall_sync(__cacheinfo_ready); -- Gitee From 9f61a336140d46cd10a26f11528a55fae8e0b43d Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:43:51 -0800 Subject: [PATCH 067/158] anolis: Revert "arm_mpam: resctrl: Call resctrl_exit() in the event of errors" ANBZ: #31045 This reverts commit ba4cc05c8e0892abaad30dbba940c658dd52b3e9. Signed-off-by: Jason Zeng --- drivers/platform/mpam/mpam_devices.c | 2 -- drivers/platform/mpam/mpam_internal.h | 1 - drivers/platform/mpam/mpam_resctrl.c | 17 ----------------- 3 files changed, 20 deletions(-) diff --git a/drivers/platform/mpam/mpam_devices.c b/drivers/platform/mpam/mpam_devices.c index 32532a918115..04e1e98e17cb 100644 --- a/drivers/platform/mpam/mpam_devices.c +++ b/drivers/platform/mpam/mpam_devices.c @@ -2240,8 +2240,6 @@ static irqreturn_t mpam_disable_thread(int irq, void *dev_id) } mutex_unlock(&mpam_cpuhp_state_lock); - mpam_resctrl_exit(); - static_branch_disable(&mpam_enabled); mpam_unregister_irqs(); diff --git a/drivers/platform/mpam/mpam_internal.h b/drivers/platform/mpam/mpam_internal.h index 48554ff93e09..50d738d83047 100644 --- a/drivers/platform/mpam/mpam_internal.h +++ b/drivers/platform/mpam/mpam_internal.h @@ -313,7 +313,6 @@ int mpam_resctrl_online_cpu(unsigned int cpu); int mpam_resctrl_offline_cpu(unsigned int cpu); int mpam_resctrl_setup(void); -void mpam_resctrl_exit(void); /* * MPAM MSCs have the following register layout. See: diff --git a/drivers/platform/mpam/mpam_resctrl.c b/drivers/platform/mpam/mpam_resctrl.c index 62bfa7d772d9..976641a2af12 100644 --- a/drivers/platform/mpam/mpam_resctrl.c +++ b/drivers/platform/mpam/mpam_resctrl.c @@ -41,12 +41,6 @@ static struct mpam_class *mbm_total_class; */ static bool cdp_enabled; -/* - * If resctrl_init() succeeded, resctrl_exit() can be used to remove support - * for the filesystem in the event of an error. - */ -static bool resctrl_enabled; - /* A dummy mon context to use when the monitors were allocated up front */ u32 __mon_is_rmid_idx = USE_RMID_IDX; void *mon_is_rmid_idx = &__mon_is_rmid_idx; @@ -832,22 +826,11 @@ int mpam_resctrl_setup(void) } err = resctrl_init(); - if (!err) - WRITE_ONCE(resctrl_enabled, true); } return err; } -void mpam_resctrl_exit(void) -{ - if (!READ_ONCE(resctrl_enabled)) - return; - - WRITE_ONCE(resctrl_enabled, false); - resctrl_exit(); -} - u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d, u32 closid, enum resctrl_conf_type type) { -- Gitee From f433c068ac98ee00c0b870f0792839cb550e7d0c Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:43:51 -0800 Subject: [PATCH 068/158] anolis: Revert "arm_mpam: resctrl: Tell resctrl about cpu/domain online/offline" ANBZ: #31045 This reverts commit ac8ed1c572b27b8821ac5f70a4b12d731c769e7b. Signed-off-by: Jason Zeng --- drivers/platform/mpam/mpam_resctrl.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/platform/mpam/mpam_resctrl.c b/drivers/platform/mpam/mpam_resctrl.c index 976641a2af12..328837867d60 100644 --- a/drivers/platform/mpam/mpam_resctrl.c +++ b/drivers/platform/mpam/mpam_resctrl.c @@ -825,7 +825,7 @@ int mpam_resctrl_setup(void) pr_warn("Number of PMG is not a power of 2! resctrl may misbehave"); } - err = resctrl_init(); + /* TODO: call resctrl_init() */ } return err; @@ -1080,7 +1080,7 @@ struct rdt_domain *resctrl_arch_find_domain(struct rdt_resource *r, int id) int mpam_resctrl_online_cpu(unsigned int cpu) { - int i, err; + int i; struct mpam_resctrl_dom *dom; struct mpam_resctrl_res *res; @@ -1099,12 +1099,8 @@ int mpam_resctrl_online_cpu(unsigned int cpu) dom = mpam_resctrl_alloc_domain(cpu, res); if (IS_ERR(dom)) return PTR_ERR(dom); - err = resctrl_online_domain(&res->resctrl_res, &dom->resctrl_dom); - if (err) - return err; } - resctrl_online_cpu(cpu); return 0; } @@ -1115,8 +1111,6 @@ int mpam_resctrl_offline_cpu(unsigned int cpu) struct mpam_resctrl_res *res; struct mpam_resctrl_dom *dom; - resctrl_offline_cpu(cpu); - for (i = 0; i < RDT_NUM_RESOURCES; i++) { res = &mpam_resctrl_exports[i]; @@ -1135,7 +1129,6 @@ int mpam_resctrl_offline_cpu(unsigned int cpu) if (!cpumask_empty(&d->cpu_mask)) continue; - resctrl_offline_domain(&res->resctrl_res, &dom->resctrl_dom); list_del(&d->list); kfree(dom); } -- Gitee From 6cc2a5d68f2a01437e8ec1fd74dcf00f7fc0d5d9 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:43:51 -0800 Subject: [PATCH 069/158] anolis: Revert "perf/arm-cmn: Stop claiming all the resources" ANBZ: #31045 This reverts commit faf97c06fed3a01996dd83d368398e07e5c64a1f. Signed-off-by: Jason Zeng --- drivers/perf/arm-cmn.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index 2f022d1fb052..77aa37de5988 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -2435,7 +2435,6 @@ static int arm_cmn_probe(struct platform_device *pdev) struct arm_cmn *cmn; const char *name; static atomic_t id; - struct resource *cfg; int err, rootnode, this_id; cmn = devm_kzalloc(&pdev->dev, sizeof(*cmn), GFP_KERNEL); @@ -2451,16 +2450,7 @@ static int arm_cmn_probe(struct platform_device *pdev) rootnode = arm_cmn600_acpi_probe(pdev, cmn); } else { rootnode = 0; - - /* - * Avoid registering resources as the PMUs registers are - * scattered through CMN, and may appear either side of - * registers for other 'devices'. (e.g. the MPAM MSC controls). - */ - cfg = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!cfg) - return -EINVAL; - cmn->base = devm_ioremap(&pdev->dev, cfg->start, resource_size(cfg)); + cmn->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(cmn->base)) return PTR_ERR(cmn->base); if (cmn->part == PART_CMN600) -- Gitee From 679dc15ce1cffd78fffd533f2dc2ac0de6099b4f Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:43:51 -0800 Subject: [PATCH 070/158] anolis: Revert "arm64: mpam: Select ARCH_HAS_CPU_RESCTRL" ANBZ: #31045 This reverts commit 402a0ef3e02c4a06421c81abe7ad24c794a432e3. Signed-off-by: Jason Zeng --- arch/arm64/Kconfig | 3 +-- arch/arm64/include/asm/resctrl.h | 2 -- drivers/platform/mpam/Kconfig | 4 +--- 3 files changed, 2 insertions(+), 7 deletions(-) delete mode 100644 arch/arm64/include/asm/resctrl.h diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 592ea627edb3..659e1ac54f4c 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -2069,8 +2069,7 @@ config ARM64_TLB_RANGE config ARM64_MPAM bool "Enable support for MPAM" select ACPI_MPAM if ACPI - select ARCH_HAS_CPU_RESCTRL - select RESCTRL_FS + select ARM_CPU_RESCTRL help Memory Partitioning and Monitoring is an optional extension that allows the CPUs to mark load and store transactions with diff --git a/arch/arm64/include/asm/resctrl.h b/arch/arm64/include/asm/resctrl.h deleted file mode 100644 index b506e95cf6e3..000000000000 --- a/arch/arm64/include/asm/resctrl.h +++ /dev/null @@ -1,2 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include diff --git a/drivers/platform/mpam/Kconfig b/drivers/platform/mpam/Kconfig index 75f5b2454fbe..13bd86fc5e58 100644 --- a/drivers/platform/mpam/Kconfig +++ b/drivers/platform/mpam/Kconfig @@ -2,7 +2,5 @@ # CPU resources, not containers or cgroups etc. config ARM_CPU_RESCTRL bool - default y - depends on ARM64 && ARCH_HAS_CPU_RESCTRL - depends on MISC_FILESYSTEMS + depends on ARM64 select RESCTRL_RMID_DEPENDS_ON_CLOSID -- Gitee From 126241b3331634794516cfbc52de6e5305f18982 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:43:52 -0800 Subject: [PATCH 071/158] anolis: Revert "arm_mpam: resctrl: Add dummy definition for free running counters" ANBZ: #31045 This reverts commit 19a5282070b963bc9f36aeec7cfc10addad90fbe. Signed-off-by: Jason Zeng --- include/linux/arm_mpam.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/include/linux/arm_mpam.h b/include/linux/arm_mpam.h index 239d27af9e32..f6b92060b811 100644 --- a/include/linux/arm_mpam.h +++ b/include/linux/arm_mpam.h @@ -55,12 +55,6 @@ static inline unsigned int resctrl_arch_round_mon_val(unsigned int val) return val; } -/* MPAM counters requires a monitor to be allocated */ -static inline bool resctrl_arch_event_is_free_running(enum resctrl_event_id evt) -{ - return false; -} - bool resctrl_arch_alloc_capable(void); bool resctrl_arch_mon_capable(void); bool resctrl_arch_is_llc_occupancy_enabled(void); -- Gitee From 33fa6dcd1a8314e1a7a145d5e6c0b34380b6af29 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:43:52 -0800 Subject: [PATCH 072/158] anolis: Revert "arm_mpam: resctrl: Add empty definitions for fine-grained enables" ANBZ: #31045 This reverts commit a023b4a7164acf06a575e93e7b863d01b983a826. Signed-off-by: Jason Zeng --- include/linux/arm_mpam.h | 9 --------- 1 file changed, 9 deletions(-) diff --git a/include/linux/arm_mpam.h b/include/linux/arm_mpam.h index f6b92060b811..8897309c163d 100644 --- a/include/linux/arm_mpam.h +++ b/include/linux/arm_mpam.h @@ -87,13 +87,4 @@ static inline int resctrl_arch_measure_l3_residency(void *_plr) { return 0; } static inline int resctrl_arch_measure_cycles_lat_fn(void *_plr) { return 0; } static inline u64 resctrl_arch_get_prefetch_disable_bits(void) { return 0; } -/* - * The CPU configuration for MPAM is cheap to write, and is only written if it - * has changed. No need for fine grained enables. - */ -static inline void resctrl_arch_enable_mon(void) { } -static inline void resctrl_arch_disable_mon(void) { } -static inline void resctrl_arch_enable_alloc(void) { } -static inline void resctrl_arch_disable_alloc(void) { } - #endif /* __LINUX_ARM_MPAM_H */ -- Gitee From 83946592fa51617e4a548a9c6f6d06d49ed96427 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:43:52 -0800 Subject: [PATCH 073/158] anolis: Revert "arm_mpam: resctrl: Add empty definitions for pseudo lock" ANBZ: #31045 This reverts commit 7f8f16bd57fec307f368e5e46cc59dce912e6058. Signed-off-by: Jason Zeng --- include/linux/arm_mpam.h | 7 ------- 1 file changed, 7 deletions(-) diff --git a/include/linux/arm_mpam.h b/include/linux/arm_mpam.h index 8897309c163d..abadaba0085f 100644 --- a/include/linux/arm_mpam.h +++ b/include/linux/arm_mpam.h @@ -80,11 +80,4 @@ struct rdt_resource; void *resctrl_arch_mon_ctx_alloc(struct rdt_resource *r, int evtid); void resctrl_arch_mon_ctx_free(struct rdt_resource *r, int evtid, void *ctx); -/* Pseudo lock is not supported by MPAM */ -static inline int resctrl_arch_pseudo_lock_fn(void *_plr) { return 0; } -static inline int resctrl_arch_measure_l2_residency(void *_plr) { return 0; } -static inline int resctrl_arch_measure_l3_residency(void *_plr) { return 0; } -static inline int resctrl_arch_measure_cycles_lat_fn(void *_plr) { return 0; } -static inline u64 resctrl_arch_get_prefetch_disable_bits(void) { return 0; } - #endif /* __LINUX_ARM_MPAM_H */ -- Gitee From 9cefb611d7d2470552423e1cc821f8b8ba4824b7 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:43:52 -0800 Subject: [PATCH 074/158] anolis: Revert "untested: arm_mpam: resctrl: Allow monitors to be configured" ANBZ: #31045 This reverts commit f43d7a4edfa633583e55f07c9949d67f02800fd0. Signed-off-by: Jason Zeng --- drivers/platform/mpam/mpam_devices.c | 26 ----------- drivers/platform/mpam/mpam_internal.h | 9 ---- drivers/platform/mpam/mpam_resctrl.c | 62 --------------------------- 3 files changed, 97 deletions(-) diff --git a/drivers/platform/mpam/mpam_devices.c b/drivers/platform/mpam/mpam_devices.c index 04e1e98e17cb..5ebb944ad9fc 100644 --- a/drivers/platform/mpam/mpam_devices.c +++ b/drivers/platform/mpam/mpam_devices.c @@ -1076,32 +1076,6 @@ int mpam_msmon_read(struct mpam_component *comp, struct mon_cfg *ctx, return err; } -void mpam_msmon_reset_all_mbwu(struct mpam_component *comp) -{ - int idx, i; - unsigned long flags; - struct mpam_msc *msc; - struct mpam_msc_ris *ris; - - if (!mpam_is_enabled()) - return; - - idx = srcu_read_lock(&mpam_srcu); - list_for_each_entry_rcu(ris, &comp->ris, comp_list) { - if (!mpam_has_feature(mpam_feat_msmon_mbwu, &ris->props)) - continue; - - msc = ris->msc; - spin_lock_irqsave(&msc->mon_sel_lock, flags); - for (i = 0; i < ris->props.num_mbwu_mon; i++) { - ris->mbwu_state[i].correction = 0; - ris->mbwu_state[i].reset_on_next_read = true; - } - spin_unlock_irqrestore(&msc->mon_sel_lock, flags); - } - srcu_read_unlock(&mpam_srcu, idx); -} - void mpam_msmon_reset_mbwu(struct mpam_component *comp, struct mon_cfg *ctx) { int idx; diff --git a/drivers/platform/mpam/mpam_internal.h b/drivers/platform/mpam/mpam_internal.h index 50d738d83047..8d4bcc1f5642 100644 --- a/drivers/platform/mpam/mpam_internal.h +++ b/drivers/platform/mpam/mpam_internal.h @@ -20,12 +20,6 @@ DECLARE_STATIC_KEY_FALSE(mpam_enabled); /* Value to indicate the allocated monitor is derived from the RMID index. */ #define USE_RMID_IDX (U16_MAX + 1) -/* - * Only these event configuration bits are supported. MPAM can't know if - * data is being written back, these will show up as a write. - */ -#define MPAM_RESTRL_EVT_CONFIG_VALID (READS_TO_LOCAL_MEM | NON_TEMP_WRITE_TO_LOCAL_MEM) - static inline bool mpam_is_enabled(void) { return static_branch_likely(&mpam_enabled); @@ -246,8 +240,6 @@ struct mpam_msc_ris { struct mpam_resctrl_dom { struct mpam_component *comp; struct rdt_domain resctrl_dom; - - u32 mbm_local_evt_cfg; }; struct mpam_resctrl_res { @@ -307,7 +299,6 @@ int mpam_apply_config(struct mpam_component *comp, u16 partid, int mpam_msmon_read(struct mpam_component *comp, struct mon_cfg *ctx, enum mpam_device_features, u64 *val); void mpam_msmon_reset_mbwu(struct mpam_component *comp, struct mon_cfg *ctx); -void mpam_msmon_reset_all_mbwu(struct mpam_component *comp); int mpam_resctrl_online_cpu(unsigned int cpu); int mpam_resctrl_offline_cpu(unsigned int cpu); diff --git a/drivers/platform/mpam/mpam_resctrl.c b/drivers/platform/mpam/mpam_resctrl.c index 328837867d60..730ea6c6dffc 100644 --- a/drivers/platform/mpam/mpam_resctrl.c +++ b/drivers/platform/mpam/mpam_resctrl.c @@ -306,18 +306,6 @@ void resctrl_arch_mon_ctx_free(struct rdt_resource *r, int evtid, } } -static enum mon_filter_options resctrl_evt_config_to_mpam(u32 local_evt_cfg) -{ - switch (local_evt_cfg) { - case READS_TO_LOCAL_MEM: - return COUNT_READ; - case NON_TEMP_WRITE_TO_LOCAL_MEM: - return COUNT_WRITE; - default: - return COUNT_BOTH; - } -} - int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d, u32 closid, u32 rmid, enum resctrl_event_id eventid, u64 *val, void *arch_mon_ctx) @@ -351,7 +339,6 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d, cfg.match_pmg = true; cfg.pmg = rmid; - cfg.opts = resctrl_evt_config_to_mpam(dom->mbm_local_evt_cfg); if (cdp_enabled) { cfg.partid = closid << 1; @@ -634,54 +621,6 @@ static void mpam_resctrl_pick_mba(void) } } -bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt) -{ - struct mpam_props *cprops; - - switch (evt) { - case QOS_L3_MBM_LOCAL_EVENT_ID: - if (!mbm_local_class) - return false; - cprops = &mbm_local_class->props; - - return mpam_has_feature(mpam_feat_msmon_mbwu_rwbw, cprops); - default: - return false; - } -} - -void resctrl_arch_mon_event_config_read(void *info) -{ - struct mpam_resctrl_dom *dom; - struct resctrl_mon_config_info *mon_info = info; - - dom = container_of(mon_info->d, struct mpam_resctrl_dom, resctrl_dom); - mon_info->mon_config = dom->mbm_local_evt_cfg & MAX_EVT_CONFIG_BITS; -} - -void resctrl_arch_mon_event_config_write(void *info) -{ - struct mpam_resctrl_dom *dom; - struct resctrl_mon_config_info *mon_info = info; - - if (mon_info->mon_config & ~MPAM_RESTRL_EVT_CONFIG_VALID) { - mon_info->err = -EOPNOTSUPP; - return; - } - - dom = container_of(mon_info->d, struct mpam_resctrl_dom, resctrl_dom); - dom->mbm_local_evt_cfg = mon_info->mon_config & MPAM_RESTRL_EVT_CONFIG_VALID; -} - -void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_domain *d) -{ - struct mpam_resctrl_dom *dom; - - dom = container_of(d, struct mpam_resctrl_dom, resctrl_dom); - dom->mbm_local_evt_cfg = MPAM_RESTRL_EVT_CONFIG_VALID; - mpam_msmon_reset_all_mbwu(dom->comp); -} - static int mpam_resctrl_resource_init(struct mpam_resctrl_res *res) { struct mpam_class *class = res->class; @@ -1034,7 +973,6 @@ mpam_resctrl_alloc_domain(unsigned int cpu, struct mpam_resctrl_res *res) dom->comp = comp; INIT_LIST_HEAD(&dom->resctrl_dom.list); dom->resctrl_dom.id = comp->comp_id; - dom->mbm_local_evt_cfg = MPAM_RESTRL_EVT_CONFIG_VALID; cpumask_set_cpu(cpu, &dom->resctrl_dom.cpu_mask); /* TODO: this list should be sorted */ -- Gitee From df494df64eb5dac4a1072c078c5b0faea7a07a9c Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:43:52 -0800 Subject: [PATCH 075/158] anolis: Revert "arm_mpam: resctrl: Add resctrl_arch_rmid_read() and resctrl_arch_reset_rmid()" ANBZ: #31045 This reverts commit e56d73dc30923fb4cfd1dcbf9732be8385ab1887. Signed-off-by: Jason Zeng --- drivers/platform/mpam/mpam_resctrl.c | 79 ---------------------------- include/linux/arm_mpam.h | 4 -- 2 files changed, 83 deletions(-) diff --git a/drivers/platform/mpam/mpam_resctrl.c b/drivers/platform/mpam/mpam_resctrl.c index 730ea6c6dffc..8cb4707a4117 100644 --- a/drivers/platform/mpam/mpam_resctrl.c +++ b/drivers/platform/mpam/mpam_resctrl.c @@ -306,85 +306,6 @@ void resctrl_arch_mon_ctx_free(struct rdt_resource *r, int evtid, } } -int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d, - u32 closid, u32 rmid, enum resctrl_event_id eventid, - u64 *val, void *arch_mon_ctx) -{ - int err; - u64 cdp_val; - struct mon_cfg cfg; - struct mpam_resctrl_dom *dom; - u32 mon = *(u32 *)arch_mon_ctx; - enum mpam_device_features type; - - resctrl_arch_rmid_read_context_check(); - - dom = container_of(d, struct mpam_resctrl_dom, resctrl_dom); - - switch (eventid) { - case QOS_L3_OCCUP_EVENT_ID: - type = mpam_feat_msmon_csu; - break; - case QOS_L3_MBM_LOCAL_EVENT_ID: - case QOS_L3_MBM_TOTAL_EVENT_ID: - type = mpam_feat_msmon_mbwu; - break; - default: - return -EINVAL; - } - - cfg.mon = mon; - if (cfg.mon == USE_RMID_IDX) - cfg.mon = resctrl_arch_rmid_idx_encode(closid, rmid); - - cfg.match_pmg = true; - cfg.pmg = rmid; - - if (cdp_enabled) { - cfg.partid = closid << 1; - err = mpam_msmon_read(dom->comp, &cfg, type, val); - if (err) - return err; - - cfg.partid += 1; - err = mpam_msmon_read(dom->comp, &cfg, type, &cdp_val); - if (!err) - *val += cdp_val; - } else { - cfg.partid = closid; - err = mpam_msmon_read(dom->comp, &cfg, type, val); - } - - return err; -} - -void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d, - u32 closid, u32 rmid, enum resctrl_event_id eventid) -{ - struct mon_cfg cfg; - struct mpam_resctrl_dom *dom; - - if (eventid != QOS_L3_MBM_LOCAL_EVENT_ID) - return; - - cfg.mon = resctrl_arch_rmid_idx_encode(closid, rmid); - cfg.match_pmg = true; - cfg.pmg = rmid; - - dom = container_of(d, struct mpam_resctrl_dom, resctrl_dom); - - if (cdp_enabled) { - cfg.partid = closid << 1; - mpam_msmon_reset_mbwu(dom->comp, &cfg); - - cfg.partid += 1; - mpam_msmon_reset_mbwu(dom->comp, &cfg); - } else { - cfg.partid = closid; - mpam_msmon_reset_mbwu(dom->comp, &cfg); - } -} - static bool cache_has_usable_cpor(struct mpam_class *class) { struct mpam_props *cprops = &class->props; diff --git a/include/linux/arm_mpam.h b/include/linux/arm_mpam.h index abadaba0085f..88000eb59c6f 100644 --- a/include/linux/arm_mpam.h +++ b/include/linux/arm_mpam.h @@ -50,10 +50,6 @@ int mpam_register_requestor(u16 partid_max, u8 pmg_max); int mpam_ris_create(struct mpam_msc *msc, u8 ris_idx, enum mpam_class_types type, u8 class_id, int component_id); -static inline unsigned int resctrl_arch_round_mon_val(unsigned int val) -{ - return val; -} bool resctrl_arch_alloc_capable(void); bool resctrl_arch_mon_capable(void); -- Gitee From b688881087ddc2a94be5f0c84f1d77543ccc03a1 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:43:52 -0800 Subject: [PATCH 076/158] anolis: Revert "arm_mpam: resctrl: Allow resctrl to allocate monitors" ANBZ: #31045 This reverts commit dfcd57db32144e43b211d4336cb527c3ebea12f3. Signed-off-by: Jason Zeng --- drivers/platform/mpam/mpam_internal.h | 3 -- drivers/platform/mpam/mpam_resctrl.c | 72 --------------------------- include/linux/arm_mpam.h | 4 -- 3 files changed, 79 deletions(-) diff --git a/drivers/platform/mpam/mpam_internal.h b/drivers/platform/mpam/mpam_internal.h index 8d4bcc1f5642..36c49cfb9271 100644 --- a/drivers/platform/mpam/mpam_internal.h +++ b/drivers/platform/mpam/mpam_internal.h @@ -17,9 +17,6 @@ DECLARE_STATIC_KEY_FALSE(mpam_enabled); -/* Value to indicate the allocated monitor is derived from the RMID index. */ -#define USE_RMID_IDX (U16_MAX + 1) - static inline bool mpam_is_enabled(void) { return static_branch_likely(&mpam_enabled); diff --git a/drivers/platform/mpam/mpam_resctrl.c b/drivers/platform/mpam/mpam_resctrl.c index 8cb4707a4117..4786be5dc4b4 100644 --- a/drivers/platform/mpam/mpam_resctrl.c +++ b/drivers/platform/mpam/mpam_resctrl.c @@ -22,8 +22,6 @@ u64 mpam_resctrl_default_group; -DECLARE_WAIT_QUEUE_HEAD(resctrl_mon_ctx_waiters); - /* * The classes we've picked to map to resctrl resources. * Class pointer may be NULL. @@ -41,10 +39,6 @@ static struct mpam_class *mbm_total_class; */ static bool cdp_enabled; -/* A dummy mon context to use when the monitors were allocated up front */ -u32 __mon_is_rmid_idx = USE_RMID_IDX; -void *mon_is_rmid_idx = &__mon_is_rmid_idx; - bool resctrl_arch_alloc_capable(void) { return exposed_alloc_capable; @@ -240,72 +234,6 @@ struct rdt_resource *resctrl_arch_get_resource(enum resctrl_res_level l) return &mpam_resctrl_exports[l].resctrl_res; } -static void *resctrl_arch_mon_ctx_alloc_no_wait(struct rdt_resource *r, - int evtid) -{ - struct mpam_resctrl_res *res; - u32 *ret = kmalloc(sizeof(*ret), GFP_KERNEL); - - if (!ret) - return ERR_PTR(-ENOMEM); - - switch (evtid) { - case QOS_L3_OCCUP_EVENT_ID: - res = container_of(r, struct mpam_resctrl_res, resctrl_res); - - *ret = mpam_alloc_csu_mon(res->class); - return ret; - case QOS_L3_MBM_LOCAL_EVENT_ID: - case QOS_L3_MBM_TOTAL_EVENT_ID: - return mon_is_rmid_idx; - } - - return ERR_PTR(-EOPNOTSUPP); -} - -void *resctrl_arch_mon_ctx_alloc(struct rdt_resource *r, int evtid) -{ - DEFINE_WAIT(wait); - void *ret; - - might_sleep(); - - do { - prepare_to_wait(&resctrl_mon_ctx_waiters, &wait, - TASK_INTERRUPTIBLE); - ret = resctrl_arch_mon_ctx_alloc_no_wait(r, evtid); - if (PTR_ERR(ret) == -ENOSPC) - schedule(); - } while (PTR_ERR(ret) == -ENOSPC && !signal_pending(current)); - finish_wait(&resctrl_mon_ctx_waiters, &wait); - - return ret; -} - -void resctrl_arch_mon_ctx_free(struct rdt_resource *r, int evtid, - void *arch_mon_ctx) -{ - struct mpam_resctrl_res *res; - u32 mon = *(u32 *)arch_mon_ctx; - - if (mon == USE_RMID_IDX) - return; - kfree(arch_mon_ctx); - arch_mon_ctx = NULL; - - res = container_of(r, struct mpam_resctrl_res, resctrl_res); - - switch (evtid) { - case QOS_L3_OCCUP_EVENT_ID: - mpam_free_csu_mon(res->class, mon); - wake_up(&resctrl_mon_ctx_waiters); - return; - case QOS_L3_MBM_TOTAL_EVENT_ID: - case QOS_L3_MBM_LOCAL_EVENT_ID: - return; - } -} - static bool cache_has_usable_cpor(struct mpam_class *class) { struct mpam_props *cprops = &class->props; diff --git a/include/linux/arm_mpam.h b/include/linux/arm_mpam.h index 88000eb59c6f..49416f22244a 100644 --- a/include/linux/arm_mpam.h +++ b/include/linux/arm_mpam.h @@ -72,8 +72,4 @@ u32 resctrl_arch_rmid_idx_encode(u32 closid, u32 rmid); void resctrl_arch_rmid_idx_decode(u32 idx, u32 *closid, u32 *rmid); u32 resctrl_arch_system_num_rmid_idx(void); -struct rdt_resource; -void *resctrl_arch_mon_ctx_alloc(struct rdt_resource *r, int evtid); -void resctrl_arch_mon_ctx_free(struct rdt_resource *r, int evtid, void *ctx); - #endif /* __LINUX_ARM_MPAM_H */ -- Gitee From d4d55020622e25dd65b4fd69cf95e0616f562e77 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:43:52 -0800 Subject: [PATCH 077/158] anolis: Revert "untested: arm_mpam: resctrl: Add support for mbm counters" ANBZ: #31045 This reverts commit d805a4483ef4813c47801ded07b01ec7fbf6698a. Signed-off-by: Jason Zeng --- drivers/platform/mpam/mpam_resctrl.c | 52 +--------------------------- include/linux/arm_mpam.h | 6 +++- 2 files changed, 6 insertions(+), 52 deletions(-) diff --git a/drivers/platform/mpam/mpam_resctrl.c b/drivers/platform/mpam/mpam_resctrl.c index 4786be5dc4b4..b4be7f81e79b 100644 --- a/drivers/platform/mpam/mpam_resctrl.c +++ b/drivers/platform/mpam/mpam_resctrl.c @@ -31,7 +31,6 @@ static struct mpam_resctrl_res mpam_resctrl_exports[RDT_NUM_RESOURCES]; static bool exposed_alloc_capable; static bool exposed_mon_capable; static struct mpam_class *mbm_local_class; -static struct mpam_class *mbm_total_class; /* * MPAM emulates CDP by setting different PARTID in the I/D fields of MPAM1_EL1. @@ -54,11 +53,6 @@ bool resctrl_arch_is_mbm_local_enabled(void) return mbm_local_class; } -bool resctrl_arch_is_mbm_total_enabled(void) -{ - return mbm_total_class; -} - bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level rid) { switch (rid) { @@ -272,24 +266,6 @@ bool resctrl_arch_is_llc_occupancy_enabled(void) return cache_has_usable_csu(mpam_resctrl_exports[RDT_RESOURCE_L3].class); } -static bool class_has_usable_mbwu(struct mpam_class *class) -{ - struct mpam_props *cprops = &class->props; - - if (!mpam_has_feature(mpam_feat_msmon_mbwu, cprops)) - return false; - - /* - * resctrl expects the bandwidth counters to be free running, - * which means we need as many monitors as resctrl has - * control/monitor groups. - */ - if (cprops->num_mbwu_mon < resctrl_arch_system_num_rmid_idx()) - return false; - - return (mpam_partid_max > 1) || (mpam_pmg_max != 0); -} - static bool mba_class_use_mbw_part(struct mpam_props *cprops) { /* TODO: Scaling is not yet supported */ @@ -474,13 +450,10 @@ static int mpam_resctrl_resource_init(struct mpam_resctrl_res *res) { struct mpam_class *class = res->class; struct rdt_resource *r = &res->resctrl_res; - bool has_mbwu = class_has_usable_mbwu(class); /* Is this one of the two well-known caches? */ if (res->resctrl_res.rid == RDT_RESOURCE_L2 || res->resctrl_res.rid == RDT_RESOURCE_L3) { - bool has_csu = cache_has_usable_csu(class); - /* TODO: Scaling is not yet supported */ r->cache.cbm_len = class->props.cpbm_wd; r->cache.arch_has_sparse_bitmasks = true; @@ -507,25 +480,8 @@ static int mpam_resctrl_resource_init(struct mpam_resctrl_res *res) exposed_alloc_capable = true; } - /* - * MBWU counters may be 'local' or 'total' depending on where - * they are in the topology. Counters on caches are assumed to - * be local. If it's on the memory controller, its assumed to - * be global. - */ - if (has_mbwu && class->level >= 3) { - mbm_local_class = class; + if (class->level == 3 && cache_has_usable_csu(class)) r->mon_capable = true; - } - - /* - * CSU counters only make sense on a cache. The file is called - * llc_occupancy, but its expected to the on the L3. - */ - if (has_csu && class->type == MPAM_CLASS_CACHE && - class->level == 3) { - r->mon_capable = true; - } } else if (res->resctrl_res.rid == RDT_RESOURCE_MBA) { struct mpam_props *cprops = &class->props; @@ -547,11 +503,6 @@ static int mpam_resctrl_resource_init(struct mpam_resctrl_res *res) r->alloc_capable = true; exposed_alloc_capable = true; } - - if (has_mbwu && class->type == MPAM_CLASS_MEMORY) { - mbm_total_class = class; - r->mon_capable = true; - } } if (r->mon_capable) { @@ -587,7 +538,6 @@ int mpam_resctrl_setup(void) mpam_resctrl_pick_caches(); mpam_resctrl_pick_mba(); - /* TODO: mpam_resctrl_pick_counters(); */ for (i = 0; i < RDT_NUM_RESOURCES; i++) { res = &mpam_resctrl_exports[i]; diff --git a/include/linux/arm_mpam.h b/include/linux/arm_mpam.h index 49416f22244a..d41891df56d4 100644 --- a/include/linux/arm_mpam.h +++ b/include/linux/arm_mpam.h @@ -55,7 +55,11 @@ bool resctrl_arch_alloc_capable(void); bool resctrl_arch_mon_capable(void); bool resctrl_arch_is_llc_occupancy_enabled(void); bool resctrl_arch_is_mbm_local_enabled(void); -bool resctrl_arch_is_mbm_total_enabled(void); + +static inline bool resctrl_arch_is_mbm_total_enabled(void) +{ + return false; +} /* reset cached configurations, then all devices */ void resctrl_arch_reset_resources(void); -- Gitee From 1343ec786d805ed0bb1681bc8733135f978f0bbc Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:43:52 -0800 Subject: [PATCH 078/158] anolis: Revert "untested: arm_mpam: resctrl: Add support for MB resource" ANBZ: #31045 This reverts commit 773749b7e7cf7d05f73d354563e55e76f05386d0. Signed-off-by: Jason Zeng --- drivers/platform/mpam/mpam_resctrl.c | 219 +-------------------------- 1 file changed, 3 insertions(+), 216 deletions(-) diff --git a/drivers/platform/mpam/mpam_resctrl.c b/drivers/platform/mpam/mpam_resctrl.c index b4be7f81e79b..1efefbf1313b 100644 --- a/drivers/platform/mpam/mpam_resctrl.c +++ b/drivers/platform/mpam/mpam_resctrl.c @@ -53,20 +53,9 @@ bool resctrl_arch_is_mbm_local_enabled(void) return mbm_local_class; } -bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level rid) +bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level ignored) { - switch (rid) { - case RDT_RESOURCE_L2: - case RDT_RESOURCE_L3: - return cdp_enabled; - case RDT_RESOURCE_MBA: - default: - /* - * x86's MBA control doesn't support CDP, so user-space doesn't - * expect it. - */ - return false; - } + return cdp_enabled; } int resctrl_arch_set_cdp_enabled(enum resctrl_res_level ignored, bool enable) @@ -94,11 +83,6 @@ int resctrl_arch_set_cdp_enabled(enum resctrl_res_level ignored, bool enable) return 0; } -static bool mpam_resctrl_hide_cdp(enum resctrl_res_level rid) -{ - return cdp_enabled && !resctrl_arch_get_cdp_enabled(rid); -} - /* * MSC may raise an error interrupt if it sees an out or range partid/pmg, * and go on to truncate the value. Regardless of what the hardware supports, @@ -266,102 +250,6 @@ bool resctrl_arch_is_llc_occupancy_enabled(void) return cache_has_usable_csu(mpam_resctrl_exports[RDT_RESOURCE_L3].class); } -static bool mba_class_use_mbw_part(struct mpam_props *cprops) -{ - /* TODO: Scaling is not yet supported */ - return (mpam_has_feature(mpam_feat_mbw_part, cprops) && - cprops->mbw_pbm_bits < MAX_MBA_BW); -} - -static bool class_has_usable_mba(struct mpam_props *cprops) -{ - if (mba_class_use_mbw_part(cprops) || - mpam_has_feature(mpam_feat_mbw_max, cprops)) - return true; - - return false; -} - -/* - * Calculate the percentage change from each implemented bit in the control - * This can return 0 when BWA_WD is greater than 6. (100 / (1<<7) == 0) - */ -static u32 get_mba_granularity(struct mpam_props *cprops) -{ - if (mba_class_use_mbw_part(cprops)) { - return MAX_MBA_BW / cprops->mbw_pbm_bits; - } else if (mpam_has_feature(mpam_feat_mbw_max, cprops)) { - /* - * bwa_wd is the number of bits implemented in the 0.xxx - * fixed point fraction. 1 bit is 50%, 2 is 25% etc. - */ - return MAX_MBA_BW / (cprops->bwa_wd + 1); - } - - return 0; -} - -static u32 mbw_pbm_to_percent(unsigned long mbw_pbm, struct mpam_props *cprops) -{ - u32 bit, result = 0, granularity = get_mba_granularity(cprops); - - for_each_set_bit(bit, &mbw_pbm, cprops->mbw_pbm_bits % 32) { - result += granularity; - } - - return result; -} - -static u32 mbw_max_to_percent(u16 mbw_max, struct mpam_props *cprops) -{ - u8 bit; - u32 divisor = 2, value = 0; - - for (bit = 15; bit; bit--) { - if (mbw_max & BIT(bit)) - value += MAX_MBA_BW / divisor; - divisor <<= 1; - } - - return value; -} - -static u32 percent_to_mbw_pbm(u8 pc, struct mpam_props *cprops) -{ - u32 granularity = get_mba_granularity(cprops); - u8 num_bits = pc / granularity; - - if (!num_bits) - return 0; - - /* TODO: pick bits at random to avoid contention */ - return (1 << num_bits) - 1; -} - -static u16 percent_to_mbw_max(u8 pc, struct mpam_props *cprops) -{ - u8 bit; - u32 divisor = 2, value = 0; - - if (WARN_ON_ONCE(cprops->bwa_wd > 15)) - return MAX_MBA_BW; - - for (bit = 15; bit; bit--) { - if (pc >= MAX_MBA_BW / divisor) { - pc -= MAX_MBA_BW / divisor; - value |= BIT(bit); - } - divisor <<= 1; - - if (!pc || !(MAX_MBA_BW / divisor)) - break; - } - - value &= GENMASK(15, 15 - cprops->bwa_wd); - - return value; -} - /* Test whether we can export MPAM_CLASS_CACHE:{2,3}? */ static void mpam_resctrl_pick_caches(void) { @@ -408,44 +296,6 @@ static void mpam_resctrl_pick_caches(void) srcu_read_unlock(&mpam_srcu, idx); } -static void mpam_resctrl_pick_mba(void) -{ - struct mpam_class *class, *candidate_class = NULL; - struct mpam_resctrl_res *res; - int idx; - - lockdep_assert_cpus_held(); - - idx = srcu_read_lock(&mpam_srcu); - list_for_each_entry_rcu(class, &mpam_classes, classes_list) { - struct mpam_props *cprops = &class->props; - - if (class->level < 3) - continue; - - if (!class_has_usable_mba(cprops)) - continue; - - if (!cpumask_equal(&class->affinity, cpu_possible_mask)) - continue; - - /* - * mba_sc reads the mbm_local counter, and waggles the MBA controls. - * mbm_local is implicitly part of the L3, pick a resouce to be MBA - * that as close as possible to the L3. - */ - if (!candidate_class || class->level < candidate_class->level) - candidate_class = class; - } - srcu_read_unlock(&mpam_srcu, idx); - - if (candidate_class) { - res = &mpam_resctrl_exports[RDT_RESOURCE_MBA]; - res->class = candidate_class; - res->resctrl_res.name = "MB"; - } -} - static int mpam_resctrl_resource_init(struct mpam_resctrl_res *res) { struct mpam_class *class = res->class; @@ -482,27 +332,6 @@ static int mpam_resctrl_resource_init(struct mpam_resctrl_res *res) if (class->level == 3 && cache_has_usable_csu(class)) r->mon_capable = true; - } else if (res->resctrl_res.rid == RDT_RESOURCE_MBA) { - struct mpam_props *cprops = &class->props; - - /* TODO: kill these properties off as they are derivatives */ - r->format_str = "%d=%0*u"; - r->fflags = RFTYPE_RES_MB; - r->default_ctrl = MAX_MBA_BW; - r->data_width = 3; - - r->membw.delay_linear = true; - r->membw.throttle_mode = THREAD_THROTTLE_UNDEFINED; - r->membw.bw_gran = get_mba_granularity(cprops); - - /* Round up to at least 1% */ - if (!r->membw.bw_gran) - r->membw.bw_gran = 1; - - if (class_has_usable_mba(cprops)) { - r->alloc_capable = true; - exposed_alloc_capable = true; - } } if (r->mon_capable) { @@ -537,7 +366,6 @@ int mpam_resctrl_setup(void) } mpam_resctrl_pick_caches(); - mpam_resctrl_pick_mba(); for (i = 0; i < RDT_NUM_RESOURCES; i++) { res = &mpam_resctrl_exports[i]; @@ -596,15 +424,6 @@ u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d, case RDT_RESOURCE_L3: configured_by = mpam_feat_cpor_part; break; - case RDT_RESOURCE_MBA: - if (mba_class_use_mbw_part(cprops)) { - configured_by = mpam_feat_mbw_part; - break; - } else if (mpam_has_feature(mpam_feat_mbw_max, cprops)) { - configured_by = mpam_feat_mbw_max; - break; - } - fallthrough; default: return -EINVAL; } @@ -617,11 +436,6 @@ u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d, case mpam_feat_cpor_part: /* TODO: Scaling is not yet supported */ return cfg->cpbm; - case mpam_feat_mbw_part: - /* TODO: Scaling is not yet supported */ - return mbw_pbm_to_percent(cfg->mbw_pbm, cprops); - case mpam_feat_mbw_max: - return mbw_max_to_percent(cfg->mbw_max, cprops); default: return -EINVAL; } @@ -630,7 +444,6 @@ u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d, int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_domain *d, u32 closid, enum resctrl_conf_type t, u32 cfg_val) { - int err; u32 partid; struct mpam_config cfg; struct mpam_props *cprops; @@ -659,37 +472,11 @@ int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_domain *d, cfg.cpbm = cfg_val; mpam_set_feature(mpam_feat_cpor_part, &cfg); break; - case RDT_RESOURCE_MBA: - if (mba_class_use_mbw_part(cprops)) { - cfg.mbw_pbm = percent_to_mbw_pbm(cfg_val, cprops); - mpam_set_feature(mpam_feat_mbw_part, &cfg); - break; - } else if (mpam_has_feature(mpam_feat_mbw_max, cprops)) { - cfg.mbw_max = percent_to_mbw_max(cfg_val, cprops); - mpam_set_feature(mpam_feat_mbw_max, &cfg); - break; - } - fallthrough; default: return -EINVAL; } - /* - * When CDP is enabled, but the resource doesn't support it, we need to - * apply the same configuration to the other partid. - */ - if (mpam_resctrl_hide_cdp(r->rid)) { - partid = resctrl_get_config_index(closid, CDP_CODE); - err = mpam_apply_config(dom->comp, partid, &cfg); - if (err) - return err; - - partid = resctrl_get_config_index(closid, CDP_DATA); - return mpam_apply_config(dom->comp, partid, &cfg); - - } else { - return mpam_apply_config(dom->comp, partid, &cfg); - } + return mpam_apply_config(dom->comp, partid, &cfg); } /* TODO: this is IPI heavy */ -- Gitee From db982e2972123eac17e98a4ab470cda7d2b22872 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:43:52 -0800 Subject: [PATCH 079/158] anolis: Revert "arm_mpam: resctrl: Add rmid index helpers" ANBZ: #31045 This reverts commit af1a56a71078050575120317685c591a5da14c23. Signed-off-by: Jason Zeng --- drivers/platform/mpam/mpam_resctrl.c | 30 ---------------------------- include/linux/arm_mpam.h | 3 --- 2 files changed, 33 deletions(-) diff --git a/drivers/platform/mpam/mpam_resctrl.c b/drivers/platform/mpam/mpam_resctrl.c index 1efefbf1313b..225565ec4b04 100644 --- a/drivers/platform/mpam/mpam_resctrl.c +++ b/drivers/platform/mpam/mpam_resctrl.c @@ -93,36 +93,6 @@ u32 resctrl_arch_get_num_closid(struct rdt_resource *ignored) return min((u32)mpam_partid_max + 1, (u32)RESCTRL_MAX_CLOSID); } -u32 resctrl_arch_system_num_rmid_idx(void) -{ - u8 closid_shift = fls(mpam_pmg_max); - u32 num_partid = resctrl_arch_get_num_closid(NULL); - - return num_partid << closid_shift; -} - -u32 resctrl_arch_rmid_idx_encode(u32 closid, u32 rmid) -{ - u8 closid_shift = fls(mpam_pmg_max); - - if (WARN_ON_ONCE(closid_shift > 8)) - closid_shift = 8; - - return (closid << closid_shift) | rmid; -} - -void resctrl_arch_rmid_idx_decode(u32 idx, u32 *closid, u32 *rmid) -{ - u8 closid_shift = fls(mpam_pmg_max); - u32 pmg_mask = ~(~0 << closid_shift); - - if (WARN_ON_ONCE(closid_shift > 8)) - closid_shift = 8; - - *closid = idx >> closid_shift; - *rmid = idx & pmg_mask; -} - void resctrl_arch_sched_in(struct task_struct *tsk) { lockdep_assert_preemption_disabled(); diff --git a/include/linux/arm_mpam.h b/include/linux/arm_mpam.h index d41891df56d4..95a960b6f9d7 100644 --- a/include/linux/arm_mpam.h +++ b/include/linux/arm_mpam.h @@ -72,8 +72,5 @@ void resctrl_arch_set_cpu_default_closid(int cpu, u32 closid); void resctrl_arch_set_closid_rmid(struct task_struct *tsk, u32 closid, u32 rmid); void resctrl_arch_set_cpu_default_closid_rmid(int cpu, u32 closid, u32 pmg); void resctrl_arch_sched_in(struct task_struct *tsk); -u32 resctrl_arch_rmid_idx_encode(u32 closid, u32 rmid); -void resctrl_arch_rmid_idx_decode(u32 idx, u32 *closid, u32 *rmid); -u32 resctrl_arch_system_num_rmid_idx(void); #endif /* __LINUX_ARM_MPAM_H */ -- Gitee From ad0c9fdbd71f62bfeeac57a62df624e2adf0eeb4 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:43:52 -0800 Subject: [PATCH 080/158] anolis: Revert "arm64: mpam: Add helpers to change a tasks and cpu mpam partid/pmg values" ANBZ: #31045 This reverts commit ccbfd7a4a7f1338fe3ea99c11ad8605150f8c4ce. Signed-off-by: Jason Zeng --- arch/arm64/include/asm/mpam.h | 44 --------------------- drivers/platform/mpam/mpam_resctrl.c | 58 ---------------------------- include/linux/arm_mpam.h | 7 ---- 3 files changed, 109 deletions(-) diff --git a/arch/arm64/include/asm/mpam.h b/arch/arm64/include/asm/mpam.h index 9abe1fe58c34..edae8c98fa28 100644 --- a/arch/arm64/include/asm/mpam.h +++ b/arch/arm64/include/asm/mpam.h @@ -6,7 +6,6 @@ #include #include -#include #include #include #include @@ -91,35 +90,6 @@ static inline void __init __enable_mpam_hcr(void) * A value in struct thread_info is used instead of struct task_struct as the * cpu's u64 register format is used, but struct task_struct has two u32'. */ - static inline void mpam_set_cpu_defaults(int cpu, u16 partid_d, u16 partid_i, - u8 pmg_d, u8 pmg_i) -{ - u64 default_val; - - default_val = FIELD_PREP(MPAM_SYSREG_PARTID_D, partid_d); - default_val |= FIELD_PREP(MPAM_SYSREG_PARTID_I, partid_i); - default_val |= FIELD_PREP(MPAM_SYSREG_PMG_D, pmg_d); - default_val |= FIELD_PREP(MPAM_SYSREG_PMG_I, pmg_i); - - WRITE_ONCE(per_cpu(arm64_mpam_default, cpu), default_val); -} - -static inline void mpam_set_task_partid_pmg(struct task_struct *tsk, - u16 partid_d, u16 partid_i, - u8 pmg_d, u8 pmg_i) -{ -#ifdef CONFIG_ARM64_MPAM - u64 regval; - - regval = FIELD_PREP(MPAM_SYSREG_PARTID_D, partid_d); - regval |= FIELD_PREP(MPAM_SYSREG_PARTID_I, partid_i); - regval |= FIELD_PREP(MPAM_SYSREG_PMG_D, pmg_d); - regval |= FIELD_PREP(MPAM_SYSREG_PMG_I, pmg_i); - - WRITE_ONCE(task_thread_info(tsk)->mpam_partid_pmg, regval); -#endif -} - static inline u64 mpam_get_regval(struct task_struct *tsk) { #ifdef CONFIG_ARM64_MPAM @@ -129,20 +99,6 @@ static inline u64 mpam_get_regval(struct task_struct *tsk) #endif } -static inline void resctrl_arch_set_rmid(struct task_struct *tsk, u32 rmid) -{ -#ifdef CONFIG_ARM64_MPAM - u64 regval = mpam_get_regval(tsk); - - regval &= ~MPAM_SYSREG_PMG_D; - regval &= ~MPAM_SYSREG_PMG_I; - regval |= FIELD_PREP(MPAM_SYSREG_PMG_D, rmid); - regval |= FIELD_PREP(MPAM_SYSREG_PMG_I, rmid); - - WRITE_ONCE(task_thread_info(tsk)->mpam_partid_pmg, regval); -#endif -} - static inline void mpam_thread_switch(struct task_struct *tsk) { u64 oldregval; diff --git a/drivers/platform/mpam/mpam_resctrl.c b/drivers/platform/mpam/mpam_resctrl.c index 225565ec4b04..64372f5bf380 100644 --- a/drivers/platform/mpam/mpam_resctrl.c +++ b/drivers/platform/mpam/mpam_resctrl.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include @@ -93,63 +92,6 @@ u32 resctrl_arch_get_num_closid(struct rdt_resource *ignored) return min((u32)mpam_partid_max + 1, (u32)RESCTRL_MAX_CLOSID); } -void resctrl_arch_sched_in(struct task_struct *tsk) -{ - lockdep_assert_preemption_disabled(); - - mpam_thread_switch(tsk); -} - -void resctrl_arch_set_cpu_default_closid_rmid(int cpu, u32 closid, u32 pmg) -{ - if (WARN_ON_ONCE(closid > U16_MAX) || WARN_ON_ONCE(pmg > U8_MAX)) - return; - - if (!cdp_enabled) { - mpam_set_cpu_defaults(cpu, closid, closid, pmg, pmg); - } else { - /* - * When CDP is enabled, resctrl halves the closid range and we - * use odd/even partid for one closid. - */ - u32 partid_d = resctrl_get_config_index(closid, CDP_DATA); - u32 partid_i = resctrl_get_config_index(closid, CDP_CODE); - - mpam_set_cpu_defaults(cpu, partid_d, partid_i, pmg, pmg); - } -} - -void resctrl_arch_sync_cpu_defaults(void *info) -{ - struct resctrl_cpu_sync *r = info; - - lockdep_assert_preemption_disabled(); - - if (r) { - resctrl_arch_set_cpu_default_closid_rmid(smp_processor_id(), - r->closid, r->rmid); - } - - resctrl_arch_sched_in(current); -} - -void resctrl_arch_set_closid_rmid(struct task_struct *tsk, u32 closid, u32 rmid) -{ - - - if (WARN_ON_ONCE(closid > U16_MAX) || WARN_ON_ONCE(rmid > U8_MAX)) - return; - - if (!cdp_enabled) { - mpam_set_task_partid_pmg(tsk, closid, closid, rmid, rmid); - } else { - u32 partid_d = resctrl_get_config_index(closid, CDP_DATA); - u32 partid_i = resctrl_get_config_index(closid, CDP_CODE); - - mpam_set_task_partid_pmg(tsk, partid_d, partid_i, rmid, rmid); - } -} - bool resctrl_arch_match_closid(struct task_struct *tsk, u32 closid) { u64 regval = mpam_get_regval(tsk); diff --git a/include/linux/arm_mpam.h b/include/linux/arm_mpam.h index 95a960b6f9d7..e3921b0ab836 100644 --- a/include/linux/arm_mpam.h +++ b/include/linux/arm_mpam.h @@ -16,8 +16,6 @@ */ extern u64 mpam_resctrl_default_group; -#include - struct mpam_msc; enum mpam_msc_iface { @@ -68,9 +66,4 @@ bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level ignored); int resctrl_arch_set_cdp_enabled(enum resctrl_res_level ignored, bool enable); bool resctrl_arch_match_closid(struct task_struct *tsk, u32 closid); bool resctrl_arch_match_rmid(struct task_struct *tsk, u32 closid, u32 rmid); -void resctrl_arch_set_cpu_default_closid(int cpu, u32 closid); -void resctrl_arch_set_closid_rmid(struct task_struct *tsk, u32 closid, u32 rmid); -void resctrl_arch_set_cpu_default_closid_rmid(int cpu, u32 closid, u32 pmg); -void resctrl_arch_sched_in(struct task_struct *tsk); - #endif /* __LINUX_ARM_MPAM_H */ -- Gitee From 60cb164355f32e9607b77ee63a91666dac9a3efa Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:43:52 -0800 Subject: [PATCH 081/158] anolis: Revert "arm_mpam: resctrl: Add CDP emulation" ANBZ: #31045 This reverts commit 3744e79894129a12ff494faa1263e31f9bc5bce3. Signed-off-by: Jason Zeng --- arch/arm64/include/asm/mpam.h | 3 +- drivers/platform/mpam/mpam_resctrl.c | 62 ---------------------------- include/linux/arm_mpam.h | 13 ------ 3 files changed, 1 insertion(+), 77 deletions(-) diff --git a/arch/arm64/include/asm/mpam.h b/arch/arm64/include/asm/mpam.h index edae8c98fa28..1d81a6f26acd 100644 --- a/arch/arm64/include/asm/mpam.h +++ b/arch/arm64/include/asm/mpam.h @@ -4,7 +4,6 @@ #ifndef __ASM__MPAM_H #define __ASM__MPAM_H -#include #include #include #include @@ -109,7 +108,7 @@ static inline void mpam_thread_switch(struct task_struct *tsk) !static_branch_likely(&mpam_enabled)) return; - if (regval == READ_ONCE(mpam_resctrl_default_group)) + if (!regval) regval = READ_ONCE(per_cpu(arm64_mpam_default, cpu)); oldregval = READ_ONCE(per_cpu(arm64_mpam_current, cpu)); diff --git a/drivers/platform/mpam/mpam_resctrl.c b/drivers/platform/mpam/mpam_resctrl.c index 64372f5bf380..ebd8aef84679 100644 --- a/drivers/platform/mpam/mpam_resctrl.c +++ b/drivers/platform/mpam/mpam_resctrl.c @@ -19,8 +19,6 @@ #include "mpam_internal.h" -u64 mpam_resctrl_default_group; - /* * The classes we've picked to map to resctrl resources. * Class pointer may be NULL. @@ -31,12 +29,6 @@ static bool exposed_alloc_capable; static bool exposed_mon_capable; static struct mpam_class *mbm_local_class; -/* - * MPAM emulates CDP by setting different PARTID in the I/D fields of MPAM1_EL1. - * This applies globally to all traffic the CPU generates. - */ -static bool cdp_enabled; - bool resctrl_arch_alloc_capable(void) { return exposed_alloc_capable; @@ -52,36 +44,6 @@ bool resctrl_arch_is_mbm_local_enabled(void) return mbm_local_class; } -bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level ignored) -{ - return cdp_enabled; -} - -int resctrl_arch_set_cdp_enabled(enum resctrl_res_level ignored, bool enable) -{ - u64 regval; - u32 partid, partid_i, partid_d; - - cdp_enabled = enable; - - partid = RESCTRL_RESERVED_CLOSID; - - if (enable) { - partid_d = resctrl_get_config_index(partid, CDP_CODE); - partid_i = resctrl_get_config_index(partid, CDP_DATA); - regval = FIELD_PREP(MPAM_SYSREG_PARTID_D, partid_d) | - FIELD_PREP(MPAM_SYSREG_PARTID_I, partid_i); - - } else { - regval = FIELD_PREP(MPAM_SYSREG_PARTID_D, partid) | - FIELD_PREP(MPAM_SYSREG_PARTID_I, partid); - } - - WRITE_ONCE(mpam_resctrl_default_group, regval); - - return 0; -} - /* * MSC may raise an error interrupt if it sees an out or range partid/pmg, * and go on to truncate the value. Regardless of what the hardware supports, @@ -92,30 +54,6 @@ u32 resctrl_arch_get_num_closid(struct rdt_resource *ignored) return min((u32)mpam_partid_max + 1, (u32)RESCTRL_MAX_CLOSID); } -bool resctrl_arch_match_closid(struct task_struct *tsk, u32 closid) -{ - u64 regval = mpam_get_regval(tsk); - u32 tsk_closid = FIELD_GET(MPAM_SYSREG_PARTID_D, regval); - - if (cdp_enabled) - tsk_closid >>= 1; - - return tsk_closid == closid; -} - -/* The task's pmg is not unique, the partid must be considered too */ -bool resctrl_arch_match_rmid(struct task_struct *tsk, u32 closid, u32 rmid) -{ - u64 regval = mpam_get_regval(tsk); - u32 tsk_closid = FIELD_GET(MPAM_SYSREG_PARTID_D, regval); - u32 tsk_rmid = FIELD_GET(MPAM_SYSREG_PMG_D, regval); - - if (cdp_enabled) - tsk_closid >>= 1; - - return (tsk_closid == closid) && (tsk_rmid == rmid); -} - struct rdt_resource *resctrl_arch_get_resource(enum resctrl_res_level l) { if (l >= RDT_NUM_RESOURCES) diff --git a/include/linux/arm_mpam.h b/include/linux/arm_mpam.h index e3921b0ab836..97d4c8f076e4 100644 --- a/include/linux/arm_mpam.h +++ b/include/linux/arm_mpam.h @@ -5,17 +5,8 @@ #define __LINUX_ARM_MPAM_H #include -#include #include -/* - * The value of the MPAM1_EL1 sysreg when a task is in the default group. - * This is used by the context switch code to use the resctrl CPU property - * instead. The value is modified when CDP is enabled/disabled by mounting - * the resctrl filesystem. - */ -extern u64 mpam_resctrl_default_group; - struct mpam_msc; enum mpam_msc_iface { @@ -62,8 +53,4 @@ static inline bool resctrl_arch_is_mbm_total_enabled(void) /* reset cached configurations, then all devices */ void resctrl_arch_reset_resources(void); -bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level ignored); -int resctrl_arch_set_cdp_enabled(enum resctrl_res_level ignored, bool enable); -bool resctrl_arch_match_closid(struct task_struct *tsk, u32 closid); -bool resctrl_arch_match_rmid(struct task_struct *tsk, u32 closid, u32 rmid); #endif /* __LINUX_ARM_MPAM_H */ -- Gitee From 16c258c77bfb1101b1fcd4fc9d82c1b0d47aded1 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:43:52 -0800 Subject: [PATCH 082/158] anolis: Revert "arm_mpam: resctrl: Implement helpers to update configuration" ANBZ: #31045 This reverts commit ff2893ece6af3ef12a157e29426f1bc2f68de150. Signed-off-by: Jason Zeng --- drivers/platform/mpam/mpam_internal.h | 7 ++- drivers/platform/mpam/mpam_resctrl.c | 65 --------------------------- 2 files changed, 6 insertions(+), 66 deletions(-) diff --git a/drivers/platform/mpam/mpam_internal.h b/drivers/platform/mpam/mpam_internal.h index 36c49cfb9271..612d5f8c0568 100644 --- a/drivers/platform/mpam/mpam_internal.h +++ b/drivers/platform/mpam/mpam_internal.h @@ -119,7 +119,12 @@ struct mpam_props }; #define mpam_has_feature(_feat, x) ((1<<_feat) & (x)->features) -#define mpam_set_feature(_feat, x) ((x)->features |= (1<<_feat)) + +static inline void mpam_set_feature(enum mpam_device_features feat, + struct mpam_props *props) +{ + props->features |= (1<class->props; - - partid = resctrl_get_config_index(closid, t); - if (!r->alloc_capable || partid >= resctrl_arch_get_num_closid(r)) - return -EINVAL; - - switch (r->rid) { - case RDT_RESOURCE_L2: - case RDT_RESOURCE_L3: - /* TODO: Scaling is not yet supported */ - cfg.cpbm = cfg_val; - mpam_set_feature(mpam_feat_cpor_part, &cfg); - break; - default: - return -EINVAL; - } - - return mpam_apply_config(dom->comp, partid, &cfg); -} - -/* TODO: this is IPI heavy */ -int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid) -{ - int err = 0; - struct rdt_domain *d; - enum resctrl_conf_type t; - struct resctrl_staged_config *cfg; - - lockdep_assert_cpus_held(); - lockdep_assert_irqs_enabled(); - - list_for_each_entry(d, &r->domains, list) { - for (t = 0; t < CDP_NUM_TYPES; t++) { - cfg = &d->staged_config[t]; - if (!cfg->have_new_ctrl) - continue; - - err = resctrl_arch_update_one(r, d, closid, t, - cfg->new_ctrl); - if (err) - return err; - } - } - - return err; -} - void resctrl_arch_reset_resources(void) { int i, idx; -- Gitee From 8dd55ceea88168e3cacf6353aea0ad260e8a2c11 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:43:52 -0800 Subject: [PATCH 083/158] anolis: Revert "arm_mpam: resctrl: Add resctrl_arch_get_config()" ANBZ: #31045 This reverts commit aade81099c3d06b9b9904bde1fc9b686f6827500. Signed-off-by: Jason Zeng --- drivers/platform/mpam/mpam_devices.c | 3 -- drivers/platform/mpam/mpam_resctrl.c | 44 ---------------------------- 2 files changed, 47 deletions(-) diff --git a/drivers/platform/mpam/mpam_devices.c b/drivers/platform/mpam/mpam_devices.c index 5ebb944ad9fc..13f48878c3d5 100644 --- a/drivers/platform/mpam/mpam_devices.c +++ b/drivers/platform/mpam/mpam_devices.c @@ -2302,9 +2302,6 @@ int mpam_apply_config(struct mpam_component *comp, u16 partid, lockdep_assert_cpus_held(); - if (!memcmp(&comp->cfg[partid], cfg, sizeof(*cfg))) - return 0; - comp->cfg[partid] = *cfg; arg.comp = comp; arg.partid = partid; diff --git a/drivers/platform/mpam/mpam_resctrl.c b/drivers/platform/mpam/mpam_resctrl.c index cda8f08c0819..82325956a938 100644 --- a/drivers/platform/mpam/mpam_resctrl.c +++ b/drivers/platform/mpam/mpam_resctrl.c @@ -247,50 +247,6 @@ int mpam_resctrl_setup(void) return err; } -u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d, - u32 closid, enum resctrl_conf_type type) -{ - u32 partid; - struct mpam_config *cfg; - struct mpam_props *cprops; - struct mpam_resctrl_res *res; - struct mpam_resctrl_dom *dom; - enum mpam_device_features configured_by; - - lockdep_assert_cpus_held(); - - if (!mpam_is_enabled()) - return r->default_ctrl; - - res = container_of(r, struct mpam_resctrl_res, resctrl_res); - dom = container_of(d, struct mpam_resctrl_dom, resctrl_dom); - cprops = &res->class->props; - - partid = resctrl_get_config_index(closid, type); - cfg = &dom->comp->cfg[partid]; - - switch (r->rid) { - case RDT_RESOURCE_L2: - case RDT_RESOURCE_L3: - configured_by = mpam_feat_cpor_part; - break; - default: - return -EINVAL; - } - - if (!r->alloc_capable || partid >= resctrl_arch_get_num_closid(r) || - !mpam_has_feature(configured_by, cfg)) - return r->default_ctrl; - - switch (configured_by) { - case mpam_feat_cpor_part: - /* TODO: Scaling is not yet supported */ - return cfg->cpbm; - default: - return -EINVAL; - } -} - void resctrl_arch_reset_resources(void) { int i, idx; -- Gitee From c03f8a39af0acec924fdac22c71a6cbed5cbd4fe Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:43:52 -0800 Subject: [PATCH 084/158] anolis: Revert "arm_mpam: resctrl: Implement resctrl_arch_reset_resources()" ANBZ: #31045 This reverts commit f74eccd1dac9b48fea16c9aa9828e8163febb384. Signed-off-by: Jason Zeng --- drivers/platform/mpam/mpam_devices.c | 2 +- drivers/platform/mpam/mpam_internal.h | 2 -- drivers/platform/mpam/mpam_resctrl.c | 27 --------------------------- include/linux/arm_mpam.h | 3 --- 4 files changed, 1 insertion(+), 33 deletions(-) diff --git a/drivers/platform/mpam/mpam_devices.c b/drivers/platform/mpam/mpam_devices.c index 13f48878c3d5..28f6010df6dc 100644 --- a/drivers/platform/mpam/mpam_devices.c +++ b/drivers/platform/mpam/mpam_devices.c @@ -2177,7 +2177,7 @@ static void mpam_enable_once(void) READ_ONCE(mpam_partid_max) + 1, mpam_pmg_max + 1); } -void mpam_reset_class(struct mpam_class *class) +static void mpam_reset_class(struct mpam_class *class) { int idx; struct mpam_msc_ris *ris; diff --git a/drivers/platform/mpam/mpam_internal.h b/drivers/platform/mpam/mpam_internal.h index 612d5f8c0568..c9d9abb87cff 100644 --- a/drivers/platform/mpam/mpam_internal.h +++ b/drivers/platform/mpam/mpam_internal.h @@ -293,8 +293,6 @@ extern u8 mpam_pmg_max; void mpam_enable(struct work_struct *work); void mpam_disable(struct work_struct *work); -void mpam_reset_class(struct mpam_class *class); - int mpam_apply_config(struct mpam_component *comp, u16 partid, struct mpam_config *cfg); diff --git a/drivers/platform/mpam/mpam_resctrl.c b/drivers/platform/mpam/mpam_resctrl.c index 82325956a938..f71f8c466817 100644 --- a/drivers/platform/mpam/mpam_resctrl.c +++ b/drivers/platform/mpam/mpam_resctrl.c @@ -247,33 +247,6 @@ int mpam_resctrl_setup(void) return err; } -void resctrl_arch_reset_resources(void) -{ - int i, idx; - struct mpam_class *class; - struct mpam_resctrl_res *res; - - lockdep_assert_cpus_held(); - - if (!mpam_is_enabled()) - return; - - for (i = 0; i < RDT_NUM_RESOURCES; i++) { - res = &mpam_resctrl_exports[i]; - - if (!res->class) - continue; // dummy resource - - if (!res->resctrl_res.alloc_capable) - continue; - - idx = srcu_read_lock(&mpam_srcu); - list_for_each_entry_rcu(class, &mpam_classes, classes_list) - mpam_reset_class(class); - srcu_read_unlock(&mpam_srcu, idx); - } -} - static struct mpam_resctrl_dom * mpam_resctrl_alloc_domain(unsigned int cpu, struct mpam_resctrl_res *res) { diff --git a/include/linux/arm_mpam.h b/include/linux/arm_mpam.h index 97d4c8f076e4..576bb97fa552 100644 --- a/include/linux/arm_mpam.h +++ b/include/linux/arm_mpam.h @@ -50,7 +50,4 @@ static inline bool resctrl_arch_is_mbm_total_enabled(void) return false; } -/* reset cached configurations, then all devices */ -void resctrl_arch_reset_resources(void); - #endif /* __LINUX_ARM_MPAM_H */ -- Gitee From 4116d26fb81cb150668ae1ebff6f913bb446cf6d Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:43:52 -0800 Subject: [PATCH 085/158] anolis: Revert "arm_mpam: resctrl: Pick a value for num_rmid" ANBZ: #31045 This reverts commit 205ad25c95f33c9fdfb9cf5bc74c59262e4c3bf9. Signed-off-by: Jason Zeng --- drivers/platform/mpam/mpam_resctrl.c | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/drivers/platform/mpam/mpam_resctrl.c b/drivers/platform/mpam/mpam_resctrl.c index f71f8c466817..d294db50a651 100644 --- a/drivers/platform/mpam/mpam_resctrl.c +++ b/drivers/platform/mpam/mpam_resctrl.c @@ -180,22 +180,10 @@ static int mpam_resctrl_resource_init(struct mpam_resctrl_res *res) exposed_alloc_capable = true; } - if (class->level == 3 && cache_has_usable_csu(class)) + if (class->level == 3 && cache_has_usable_csu(class)) { r->mon_capable = true; - } - - if (r->mon_capable) { - exposed_mon_capable = true; - - /* - * Unfortunately, num_rmid doesn't mean anything for - * mpam, and its exposed to user-space! - * num-rmid is supposed to mean the number of groups - * that can be created, both control or monitor groups. - * For mpam, each control group has its own pmg/rmid - * space. - */ - r->num_rmid = 1; + exposed_mon_capable = true; + } } return 0; -- Gitee From cd2f8d080503cebb14af34f882f79440ef2a1181 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:43:52 -0800 Subject: [PATCH 086/158] anolis: Revert "arm_mpam: resctrl: Pick the caches we will use as resctrl resources" ANBZ: #31045 This reverts commit 643f32ba32c48b26571138febe759019cc532cee. Signed-off-by: Jason Zeng --- drivers/platform/mpam/mpam_resctrl.c | 134 +-------------------------- include/linux/arm_mpam.h | 7 -- 2 files changed, 4 insertions(+), 137 deletions(-) diff --git a/drivers/platform/mpam/mpam_resctrl.c b/drivers/platform/mpam/mpam_resctrl.c index d294db50a651..b9c1292ff630 100644 --- a/drivers/platform/mpam/mpam_resctrl.c +++ b/drivers/platform/mpam/mpam_resctrl.c @@ -27,7 +27,6 @@ static struct mpam_resctrl_res mpam_resctrl_exports[RDT_NUM_RESOURCES]; static bool exposed_alloc_capable; static bool exposed_mon_capable; -static struct mpam_class *mbm_local_class; bool resctrl_arch_alloc_capable(void) { @@ -39,11 +38,6 @@ bool resctrl_arch_mon_capable(void) return exposed_mon_capable; } -bool resctrl_arch_is_mbm_local_enabled(void) -{ - return mbm_local_class; -} - /* * MSC may raise an error interrupt if it sees an out or range partid/pmg, * and go on to truncate the value. Regardless of what the hardware supports, @@ -62,133 +56,14 @@ struct rdt_resource *resctrl_arch_get_resource(enum resctrl_res_level l) return &mpam_resctrl_exports[l].resctrl_res; } -static bool cache_has_usable_cpor(struct mpam_class *class) -{ - struct mpam_props *cprops = &class->props; - - if (!mpam_has_feature(mpam_feat_cpor_part, cprops)) - return false; - - /* TODO: Scaling is not yet supported */ - return (class->props.cpbm_wd <= RESCTRL_MAX_CBM); -} - -static bool cache_has_usable_csu(struct mpam_class *class) -{ - struct mpam_props *cprops; - - if (!class) - return false; - - cprops = &class->props; - - if (!mpam_has_feature(mpam_feat_msmon_csu, cprops)) - return false; - - /* - * CSU counters settle on the value, so we can get away with - * having only one. - */ - if (!cprops->num_csu_mon) - return false; - - return (mpam_partid_max > 1) || (mpam_pmg_max != 0); -} - -bool resctrl_arch_is_llc_occupancy_enabled(void) -{ - return cache_has_usable_csu(mpam_resctrl_exports[RDT_RESOURCE_L3].class); -} - -/* Test whether we can export MPAM_CLASS_CACHE:{2,3}? */ -static void mpam_resctrl_pick_caches(void) -{ - int idx; - struct mpam_class *class; - struct mpam_resctrl_res *res; - - idx = srcu_read_lock(&mpam_srcu); - list_for_each_entry_rcu(class, &mpam_classes, classes_list) { - bool has_cpor = cache_has_usable_cpor(class); - - if (class->type != MPAM_CLASS_CACHE) { - pr_debug("pick_caches: Class is not a cache\n"); - continue; - } - - if (class->level != 2 && class->level != 3) { - pr_debug("pick_caches: not L2 or L3\n"); - continue; - } - - if (class->level == 2 && !has_cpor) { - pr_debug("pick_caches: L2 missing CPOR\n"); - continue; - } else if (!has_cpor && !cache_has_usable_csu(class)) { - pr_debug("pick_caches: Cache misses CPOR and CSU\n"); - continue; - } - - if (!cpumask_equal(&class->affinity, cpu_possible_mask)) { - pr_debug("pick_caches: Class has missing CPUs\n"); - continue; - } - - if (class->level == 2) { - res = &mpam_resctrl_exports[RDT_RESOURCE_L2]; - res->resctrl_res.name = "L2"; - } else { - res = &mpam_resctrl_exports[RDT_RESOURCE_L3]; - res->resctrl_res.name = "L3"; - } - res->class = class; - } - srcu_read_unlock(&mpam_srcu, idx); -} - static int mpam_resctrl_resource_init(struct mpam_resctrl_res *res) { - struct mpam_class *class = res->class; - struct rdt_resource *r = &res->resctrl_res; - - /* Is this one of the two well-known caches? */ - if (res->resctrl_res.rid == RDT_RESOURCE_L2 || - res->resctrl_res.rid == RDT_RESOURCE_L3) { - /* TODO: Scaling is not yet supported */ - r->cache.cbm_len = class->props.cpbm_wd; - r->cache.arch_has_sparse_bitmasks = true; - - /* mpam_devices will reject empty bitmaps */ - r->cache.min_cbm_bits = 1; - - /* TODO: kill these properties off as they are derivatives */ - r->format_str = "%d=%0*x"; - r->fflags = RFTYPE_RES_CACHE; - r->default_ctrl = BIT_MASK(class->props.cpbm_wd) - 1; - r->data_width = (class->props.cpbm_wd + 3) / 4; - - /* - * Which bits are shared with other ...things... - * Unknown devices use partid-0 which uses all the bitmap - * fields. Until we configured the SMMU and GIC not to do this - * 'all the bits' is the correct answer here. - */ - r->cache.shareable_bits = r->default_ctrl; - - if (mpam_has_feature(mpam_feat_cpor_part, &class->props)) { - r->alloc_capable = true; - exposed_alloc_capable = true; - } - - if (class->level == 3 && cache_has_usable_csu(class)) { - r->mon_capable = true; - exposed_mon_capable = true; - } - } + /* TODO: initialise the resctrl resources */ return 0; } +/* Called with the mpam classes lock held */ int mpam_resctrl_setup(void) { int err = 0; @@ -203,7 +78,7 @@ int mpam_resctrl_setup(void) res->resctrl_res.rid = i; } - mpam_resctrl_pick_caches(); + /* TODO: pick MPAM classes to map to resctrl resources */ for (i = 0; i < RDT_NUM_RESOURCES; i++) { res = &mpam_resctrl_exports[i]; @@ -228,8 +103,7 @@ int mpam_resctrl_setup(void) */ pr_warn("Number of PMG is not a power of 2! resctrl may misbehave"); } - - /* TODO: call resctrl_init() */ + err = resctrl_init(); } return err; diff --git a/include/linux/arm_mpam.h b/include/linux/arm_mpam.h index 576bb97fa552..27c3ad9912ef 100644 --- a/include/linux/arm_mpam.h +++ b/include/linux/arm_mpam.h @@ -42,12 +42,5 @@ int mpam_ris_create(struct mpam_msc *msc, u8 ris_idx, bool resctrl_arch_alloc_capable(void); bool resctrl_arch_mon_capable(void); -bool resctrl_arch_is_llc_occupancy_enabled(void); -bool resctrl_arch_is_mbm_local_enabled(void); - -static inline bool resctrl_arch_is_mbm_total_enabled(void) -{ - return false; -} #endif /* __LINUX_ARM_MPAM_H */ -- Gitee From 8f38e0c20d0e7f3e515a4793fa769744acb02444 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:43:52 -0800 Subject: [PATCH 087/158] anolis: Revert "arm_mpam: resctrl: Add boilerplate cpuhp and domain allocation" ANBZ: #31045 This reverts commit af0e625adc981d1a3ada7824fd17bc7d51f3a29b. Signed-off-by: Jason Zeng --- drivers/platform/mpam/Makefile | 2 +- drivers/platform/mpam/mpam_devices.c | 12 -- drivers/platform/mpam/mpam_internal.h | 15 -- drivers/platform/mpam/mpam_resctrl.c | 237 -------------------------- include/linux/arm_mpam.h | 4 - 5 files changed, 1 insertion(+), 269 deletions(-) delete mode 100644 drivers/platform/mpam/mpam_resctrl.c diff --git a/drivers/platform/mpam/Makefile b/drivers/platform/mpam/Makefile index 37693be531c3..8ad69bfa2aa2 100644 --- a/drivers/platform/mpam/Makefile +++ b/drivers/platform/mpam/Makefile @@ -1 +1 @@ -obj-$(CONFIG_ARM_CPU_RESCTRL) += mpam_devices.o mpam_resctrl.o +obj-$(CONFIG_ARM_CPU_RESCTRL) += mpam_devices.o diff --git a/drivers/platform/mpam/mpam_devices.c b/drivers/platform/mpam/mpam_devices.c index 28f6010df6dc..f9f22d1d698c 100644 --- a/drivers/platform/mpam/mpam_devices.c +++ b/drivers/platform/mpam/mpam_devices.c @@ -1411,9 +1411,6 @@ static int mpam_cpu_online(unsigned int cpu) } srcu_read_unlock(&mpam_srcu, idx); - if (mpam_is_enabled()) - mpam_resctrl_online_cpu(cpu); - return 0; } @@ -1473,9 +1470,6 @@ static int mpam_cpu_offline(unsigned int cpu) } srcu_read_unlock(&mpam_srcu, idx); - if (mpam_is_enabled()) - mpam_resctrl_offline_cpu(cpu); - return 0; } @@ -2146,12 +2140,6 @@ static void mpam_enable_once(void) mutex_unlock(&mpam_list_lock); cpus_read_unlock(); - if (!err) { - err = mpam_resctrl_setup(); - if (err) - pr_err("Failed to initialise resctrl: %d\n", err); - } - if (err) { schedule_work(&mpam_broken_work); return; diff --git a/drivers/platform/mpam/mpam_internal.h b/drivers/platform/mpam/mpam_internal.h index c9d9abb87cff..3f9478c90faf 100644 --- a/drivers/platform/mpam/mpam_internal.h +++ b/drivers/platform/mpam/mpam_internal.h @@ -239,16 +239,6 @@ struct mpam_msc_ris { struct msmon_mbwu_state *mbwu_state; }; -struct mpam_resctrl_dom { - struct mpam_component *comp; - struct rdt_domain resctrl_dom; -}; - -struct mpam_resctrl_res { - struct mpam_class *class; - struct rdt_resource resctrl_res; -}; - static inline int mpam_alloc_csu_mon(struct mpam_class *class) { struct mpam_props *cprops = &class->props; @@ -300,11 +290,6 @@ int mpam_msmon_read(struct mpam_component *comp, struct mon_cfg *ctx, enum mpam_device_features, u64 *val); void mpam_msmon_reset_mbwu(struct mpam_component *comp, struct mon_cfg *ctx); -int mpam_resctrl_online_cpu(unsigned int cpu); -int mpam_resctrl_offline_cpu(unsigned int cpu); - -int mpam_resctrl_setup(void); - /* * MPAM MSCs have the following register layout. See: * Arm Architecture Reference Manual Supplement - Memory System Resource diff --git a/drivers/platform/mpam/mpam_resctrl.c b/drivers/platform/mpam/mpam_resctrl.c deleted file mode 100644 index b9c1292ff630..000000000000 --- a/drivers/platform/mpam/mpam_resctrl.c +++ /dev/null @@ -1,237 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -// Copyright (C) 2021 Arm Ltd. - -#define pr_fmt(fmt) "mpam: resctrl: " fmt - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "mpam_internal.h" - -/* - * The classes we've picked to map to resctrl resources. - * Class pointer may be NULL. - */ -static struct mpam_resctrl_res mpam_resctrl_exports[RDT_NUM_RESOURCES]; - -static bool exposed_alloc_capable; -static bool exposed_mon_capable; - -bool resctrl_arch_alloc_capable(void) -{ - return exposed_alloc_capable; -} - -bool resctrl_arch_mon_capable(void) -{ - return exposed_mon_capable; -} - -/* - * MSC may raise an error interrupt if it sees an out or range partid/pmg, - * and go on to truncate the value. Regardless of what the hardware supports, - * only the system wide safe value is safe to use. - */ -u32 resctrl_arch_get_num_closid(struct rdt_resource *ignored) -{ - return min((u32)mpam_partid_max + 1, (u32)RESCTRL_MAX_CLOSID); -} - -struct rdt_resource *resctrl_arch_get_resource(enum resctrl_res_level l) -{ - if (l >= RDT_NUM_RESOURCES) - return NULL; - - return &mpam_resctrl_exports[l].resctrl_res; -} - -static int mpam_resctrl_resource_init(struct mpam_resctrl_res *res) -{ - /* TODO: initialise the resctrl resources */ - - return 0; -} - -/* Called with the mpam classes lock held */ -int mpam_resctrl_setup(void) -{ - int err = 0; - struct mpam_resctrl_res *res; - enum resctrl_res_level i; - - cpus_read_lock(); - for (i = 0; i < RDT_NUM_RESOURCES; i++) { - res = &mpam_resctrl_exports[i]; - INIT_LIST_HEAD(&res->resctrl_res.domains); - INIT_LIST_HEAD(&res->resctrl_res.evt_list); - res->resctrl_res.rid = i; - } - - /* TODO: pick MPAM classes to map to resctrl resources */ - - for (i = 0; i < RDT_NUM_RESOURCES; i++) { - res = &mpam_resctrl_exports[i]; - if (!res->class) - continue; // dummy resource - - err = mpam_resctrl_resource_init(res); - if (err) - break; - } - cpus_read_unlock(); - - if (!err && !exposed_alloc_capable && !exposed_mon_capable) - err = -EOPNOTSUPP; - - if (!err) { - if (!is_power_of_2(mpam_pmg_max + 1)) { - /* - * If not all the partid*pmg values are valid indexes, - * resctrl may allocate pmg that don't exist. This - * should cause an error interrupt. - */ - pr_warn("Number of PMG is not a power of 2! resctrl may misbehave"); - } - err = resctrl_init(); - } - - return err; -} - -static struct mpam_resctrl_dom * -mpam_resctrl_alloc_domain(unsigned int cpu, struct mpam_resctrl_res *res) -{ - struct mpam_resctrl_dom *dom; - struct mpam_class *class = res->class; - struct mpam_component *comp_iter, *comp; - - comp = NULL; - list_for_each_entry(comp_iter, &class->components, class_list) { - if (cpumask_test_cpu(cpu, &comp_iter->affinity)) { - comp = comp_iter; - break; - } - } - - /* cpu with unknown exported component? */ - if (WARN_ON_ONCE(!comp)) - return ERR_PTR(-EINVAL); - - dom = kzalloc_node(sizeof(*dom), GFP_KERNEL, cpu_to_node(cpu)); - if (!dom) - return ERR_PTR(-ENOMEM); - - dom->comp = comp; - INIT_LIST_HEAD(&dom->resctrl_dom.list); - dom->resctrl_dom.id = comp->comp_id; - cpumask_set_cpu(cpu, &dom->resctrl_dom.cpu_mask); - - /* TODO: this list should be sorted */ - list_add_tail(&dom->resctrl_dom.list, &res->resctrl_res.domains); - - return dom; -} - -/* Like resctrl_get_domain_from_cpu(), but for offline CPUs */ -static struct mpam_resctrl_dom * -mpam_get_domain_from_cpu(int cpu, struct mpam_resctrl_res *res) -{ - struct rdt_domain *d; - struct mpam_resctrl_dom *dom; - - lockdep_assert_cpus_held(); - - list_for_each_entry(d, &res->resctrl_res.domains, list) { - dom = container_of(d, struct mpam_resctrl_dom, resctrl_dom); - - if (cpumask_test_cpu(cpu, &dom->comp->affinity)) - return dom; - } - - return NULL; -} - -struct rdt_domain *resctrl_arch_find_domain(struct rdt_resource *r, int id) -{ - struct rdt_domain *d; - struct mpam_resctrl_dom *dom; - - lockdep_assert_cpus_held(); - - list_for_each_entry(d, &r->domains, list) { - dom = container_of(d, struct mpam_resctrl_dom, resctrl_dom); - if (dom->comp->comp_id == id) - return &dom->resctrl_dom; - } - - return NULL; -} - -int mpam_resctrl_online_cpu(unsigned int cpu) -{ - int i; - struct mpam_resctrl_dom *dom; - struct mpam_resctrl_res *res; - - for (i = 0; i < RDT_NUM_RESOURCES; i++) { - res = &mpam_resctrl_exports[i]; - - if (!res->class) - continue; // dummy_resource; - - dom = mpam_get_domain_from_cpu(cpu, res); - if (dom) { - cpumask_set_cpu(cpu, &dom->resctrl_dom.cpu_mask); - continue; - } - - dom = mpam_resctrl_alloc_domain(cpu, res); - if (IS_ERR(dom)) - return PTR_ERR(dom); - } - - return 0; -} - -int mpam_resctrl_offline_cpu(unsigned int cpu) -{ - int i; - struct rdt_domain *d; - struct mpam_resctrl_res *res; - struct mpam_resctrl_dom *dom; - - for (i = 0; i < RDT_NUM_RESOURCES; i++) { - res = &mpam_resctrl_exports[i]; - - if (!res->class) - continue; // dummy resource - - d = resctrl_get_domain_from_cpu(cpu, &res->resctrl_res); - dom = container_of(d, struct mpam_resctrl_dom, resctrl_dom); - - /* The last one standing was ahead of us... */ - if (WARN_ON_ONCE(!d)) - continue; - - cpumask_clear_cpu(cpu, &d->cpu_mask); - - if (!cpumask_empty(&d->cpu_mask)) - continue; - - list_del(&d->list); - kfree(dom); - } - - return 0; -} diff --git a/include/linux/arm_mpam.h b/include/linux/arm_mpam.h index 27c3ad9912ef..40e09b4d236b 100644 --- a/include/linux/arm_mpam.h +++ b/include/linux/arm_mpam.h @@ -39,8 +39,4 @@ int mpam_register_requestor(u16 partid_max, u8 pmg_max); int mpam_ris_create(struct mpam_msc *msc, u8 ris_idx, enum mpam_class_types type, u8 class_id, int component_id); - -bool resctrl_arch_alloc_capable(void); -bool resctrl_arch_mon_capable(void); - #endif /* __LINUX_ARM_MPAM_H */ -- Gitee From cf84e7ef656b6dcde37f98b7e5031ab4a87647eb Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:46:24 -0800 Subject: [PATCH 088/158] anolis: Revert "arm_mpam: Add helper to reset saved mbwu state" ANBZ: #31045 This reverts commit f9b460fae0521518794a2e5bc62818a1f42b7505. Signed-off-by: Jason Zeng --- drivers/platform/mpam/mpam_devices.c | 44 ++++----------------------- drivers/platform/mpam/mpam_internal.h | 5 +-- 2 files changed, 7 insertions(+), 42 deletions(-) diff --git a/drivers/platform/mpam/mpam_devices.c b/drivers/platform/mpam/mpam_devices.c index f9f22d1d698c..7f1f848281be 100644 --- a/drivers/platform/mpam/mpam_devices.c +++ b/drivers/platform/mpam/mpam_devices.c @@ -919,11 +919,9 @@ static void __ris_msmon_read(void *arg) { bool nrdy = false; unsigned long flags; - bool config_mismatch; struct mon_read *m = arg; u64 now, overflow_val = 0; struct mon_cfg *ctx = m->ctx; - bool reset_on_next_read = false; struct mpam_msc_ris *ris = m->ris; struct mpam_msc *msc = m->ris->msc; struct msmon_mbwu_state *mbwu_state; @@ -936,24 +934,13 @@ static void __ris_msmon_read(void *arg) FIELD_PREP(MSMON_CFG_MON_SEL_RIS, ris->ris_idx); mpam_write_monsel_reg(msc, CFG_MON_SEL, mon_sel); - if (m->type == mpam_feat_msmon_mbwu) { - mbwu_state = &ris->mbwu_state[ctx->mon]; - if (mbwu_state) { - reset_on_next_read = mbwu_state->reset_on_next_read; - mbwu_state->reset_on_next_read = false; - } - } - /* * Read the existing configuration to avoid re-writing the same values. * This saves waiting for 'nrdy' on subsequent reads. */ read_msmon_ctl_flt_vals(m, &cur_ctl, &cur_flt); gen_msmon_ctl_flt_vals(m, &ctl_val, &flt_val); - config_mismatch = cur_flt != flt_val || - cur_ctl != (ctl_val | MSMON_CFG_x_CTL_EN); - - if (config_mismatch || reset_on_next_read) + if (cur_flt != flt_val || cur_ctl != (ctl_val | MSMON_CFG_x_CTL_EN)) write_msmon_ctl_flt_vals(m, ctl_val, flt_val); switch (m->type) { @@ -983,6 +970,7 @@ static void __ris_msmon_read(void *arg) if (nrdy) break; + mbwu_state = &ris->mbwu_state[ctx->mon]; if (!mbwu_state) break; @@ -1076,30 +1064,6 @@ int mpam_msmon_read(struct mpam_component *comp, struct mon_cfg *ctx, return err; } -void mpam_msmon_reset_mbwu(struct mpam_component *comp, struct mon_cfg *ctx) -{ - int idx; - unsigned long flags; - struct mpam_msc *msc; - struct mpam_msc_ris *ris; - - if (!mpam_is_enabled()) - return; - - idx = srcu_read_lock(&mpam_srcu); - list_for_each_entry_rcu(ris, &comp->ris, comp_list) { - if (!mpam_has_feature(mpam_feat_msmon_mbwu, &ris->props)) - continue; - - msc = ris->msc; - spin_lock_irqsave(&msc->mon_sel_lock, flags); - ris->mbwu_state[ctx->mon].correction = 0; - ris->mbwu_state[ctx->mon].reset_on_next_read = true; - spin_unlock_irqrestore(&msc->mon_sel_lock, flags); - } - srcu_read_unlock(&mpam_srcu, idx); -} - static void mpam_reset_msc_bitmap(struct mpam_msc *msc, u16 reg, u16 wd) { u32 num_words, msb; @@ -1226,6 +1190,8 @@ static int mpam_restore_mbwu_state(void *_ris) struct mon_read mwbu_arg; struct mpam_msc_ris *ris = _ris; + lockdep_assert_held(&ris->msc->lock); + for (i = 0; i < ris->props.num_mbwu_mon; i++) { if (ris->mbwu_state[i].enabled) { mwbu_arg.ris = ris; @@ -1250,6 +1216,8 @@ static int mpam_save_mbwu_state(void *arg) struct mpam_msc *msc = ris->msc; struct msmon_mbwu_state *mbwu_state; + lockdep_assert_held(&msc->lock); + for (i = 0; i < ris->props.num_mbwu_mon; i++) { mbwu_state = &ris->mbwu_state[i]; cfg = &mbwu_state->cfg; diff --git a/drivers/platform/mpam/mpam_internal.h b/drivers/platform/mpam/mpam_internal.h index 3f9478c90faf..cafcce3e9efb 100644 --- a/drivers/platform/mpam/mpam_internal.h +++ b/drivers/platform/mpam/mpam_internal.h @@ -199,12 +199,10 @@ struct mon_cfg { /* * Changes to enabled and cfg are protected by the msc->lock. - * Changes to reset_on_next_read, prev_val and correction are protected by the - * msc's mon_sel_lock. + * Changes to prev_val and correction are protected by the msc's mon_sel_lock. */ struct msmon_mbwu_state { bool enabled; - bool reset_on_next_read; struct mon_cfg cfg; /* The value last read from the hardware. Used to detect overflow. */ @@ -288,7 +286,6 @@ int mpam_apply_config(struct mpam_component *comp, u16 partid, int mpam_msmon_read(struct mpam_component *comp, struct mon_cfg *ctx, enum mpam_device_features, u64 *val); -void mpam_msmon_reset_mbwu(struct mpam_component *comp, struct mon_cfg *ctx); /* * MPAM MSCs have the following register layout. See: -- Gitee From 14c48e7e5fc12f059b3808be8ac8b46c664272eb Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:46:24 -0800 Subject: [PATCH 089/158] anolis: Revert "arm_mpam: Use long MBWU counters if supported" ANBZ: #31045 This reverts commit bd889c51c020c81b83588819c0827753381d7217. Signed-off-by: Jason Zeng --- drivers/platform/mpam/mpam_devices.c | 86 +++------------------------ drivers/platform/mpam/mpam_internal.h | 7 +-- 2 files changed, 9 insertions(+), 84 deletions(-) diff --git a/drivers/platform/mpam/mpam_devices.c b/drivers/platform/mpam/mpam_devices.c index 7f1f848281be..c2c5dfa77083 100644 --- a/drivers/platform/mpam/mpam_devices.c +++ b/drivers/platform/mpam/mpam_devices.c @@ -772,48 +772,6 @@ struct mon_read int err; }; -static bool mpam_ris_has_mbwu_long_counter(struct mpam_msc_ris *ris) -{ - return (mpam_has_feature(mpam_feat_msmon_mbwu_63counter, &ris->props) || - mpam_has_feature(mpam_feat_msmon_mbwu_44counter, &ris->props)); -} - -static u64 mpam_msc_read_mbwu_l(struct mpam_msc *msc) -{ - int retry = 3; - u32 mbwu_l_low; - u64 mbwu_l_high1, mbwu_l_high2; - - lockdep_assert_held_once(&msc->mon_sel_lock); - - WARN_ON_ONCE((MSMON_MBWU_L + sizeof(u64)) > msc->mapped_hwpage_sz); - WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &msc->accessibility)); - - mbwu_l_high2 = readl_relaxed(msc->mapped_hwpage + MSMON_MBWU_L + 4); - do { - mbwu_l_high1 = mbwu_l_high2; - mbwu_l_low = readl_relaxed(msc->mapped_hwpage + MSMON_MBWU_L); - mbwu_l_high2 = readl_relaxed(msc->mapped_hwpage + MSMON_MBWU_L + 4); - - retry--; - } while (mbwu_l_high1 != mbwu_l_high2 && retry > 0); - - if (mbwu_l_high2 == mbwu_l_high1) - return (mbwu_l_high1 << 32) | mbwu_l_low; - return MSMON___NRDY_L; -} - -static void mpam_msc_zero_mbwu_l(struct mpam_msc *msc) -{ - lockdep_assert_held_once(&msc->mon_sel_lock); - - WARN_ON_ONCE((MSMON_MBWU_L + sizeof(u64)) > msc->mapped_hwpage_sz); - WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &msc->accessibility)); - - writel_relaxed(0, msc->mapped_hwpage + MSMON_MBWU_L); - writel_relaxed(0, msc->mapped_hwpage + MSMON_MBWU_L + 4); -} - static void gen_msmon_ctl_flt_vals(struct mon_read *m, u32 *ctl_val, u32 *flt_val) { @@ -886,12 +844,7 @@ static void write_msmon_ctl_flt_vals(struct mon_read *m, u32 ctl_val, case mpam_feat_msmon_mbwu: mpam_write_monsel_reg(msc, CFG_MBWU_FLT, flt_val); mpam_write_monsel_reg(msc, CFG_MBWU_CTL, ctl_val); - - if (mpam_ris_has_mbwu_long_counter(m->ris)) - mpam_msc_zero_mbwu_l(m->ris->msc); - else - mpam_write_monsel_reg(msc, MBWU, 0); - + mpam_write_monsel_reg(msc, MBWU, 0); mpam_write_monsel_reg(msc, CFG_MBWU_CTL, ctl_val|MSMON_CFG_x_CTL_EN); mbwu_state = &m->ris->mbwu_state[m->ctx->mon]; @@ -906,13 +859,8 @@ static void write_msmon_ctl_flt_vals(struct mon_read *m, u32 ctl_val, static u64 mpam_msmon_overflow_val(struct mpam_msc_ris *ris) { - /* TODO: implement scaling counters */ - if (mpam_has_feature(mpam_feat_msmon_mbwu_63counter, &ris->props)) - return GENMASK_ULL(62, 0); - else if (mpam_has_feature(mpam_feat_msmon_mbwu_44counter, &ris->props)) - return GENMASK_ULL(43, 0); - else - return GENMASK_ULL(30, 0); + /* TODO: scaling, and long counters */ + return GENMASK_ULL(30, 0); } static void __ris_msmon_read(void *arg) @@ -950,22 +898,9 @@ static void __ris_msmon_read(void *arg) now = FIELD_GET(MSMON___VALUE, now); break; case mpam_feat_msmon_mbwu: - /* - * If long or lwd counters are supported, use them, else revert - * to the 32 bit counter. - */ - if (mpam_ris_has_mbwu_long_counter(ris)) { - now = mpam_msc_read_mbwu_l(msc); - nrdy = now & MSMON___NRDY_L; - if (mpam_has_feature(mpam_feat_msmon_mbwu_63counter, &ris->props)) - now = FIELD_GET(MSMON___LWD_VALUE, now); - else - now = FIELD_GET(MSMON___L_VALUE, now); - } else { - now = mpam_read_monsel_reg(msc, MBWU); - nrdy = now & MSMON___NRDY; - now = FIELD_GET(MSMON___VALUE, now); - } + now = mpam_read_monsel_reg(msc, MBWU); + nrdy = now & MSMON___NRDY; + now = FIELD_GET(MSMON___VALUE, now); if (nrdy) break; @@ -1231,13 +1166,8 @@ static int mpam_save_mbwu_state(void *arg) cur_ctl = mpam_read_monsel_reg(msc, CFG_MBWU_CTL); mpam_write_monsel_reg(msc, CFG_MBWU_CTL, 0); - if (mpam_ris_has_mbwu_long_counter(ris)) { - val = mpam_msc_read_mbwu_l(msc); - mpam_msc_zero_mbwu_l(msc); - } else { - val = mpam_read_monsel_reg(msc, MBWU); - mpam_write_monsel_reg(msc, MBWU, 0); - } + val = mpam_read_monsel_reg(msc, MBWU); + mpam_write_monsel_reg(msc, MBWU, 0); cfg->mon = i; cfg->pmg = FIELD_GET(MSMON_CFG_MBWU_FLT_PMG, cur_flt); diff --git a/drivers/platform/mpam/mpam_internal.h b/drivers/platform/mpam/mpam_internal.h index cafcce3e9efb..99790ba74768 100644 --- a/drivers/platform/mpam/mpam_internal.h +++ b/drivers/platform/mpam/mpam_internal.h @@ -330,8 +330,6 @@ int mpam_msmon_read(struct mpam_component *comp, struct mon_cfg *ctx, #define MSMON_CSU_CAPTURE 0x0848 /* last cache-usage value captured */ #define MSMON_MBWU 0x0860 /* current mem-bw usage value */ #define MSMON_MBWU_CAPTURE 0x0868 /* last mem-bw value captured */ -#define MSMON_MBWU_L 0x0880 /* current long mem-bw usage value */ -#define MSMON_MBWU_CAPTURE_L 0x0890 /* last long mem-bw value captured */ #define MSMON_CAPT_EVNT 0x0808 /* signal a capture event */ #define MPAMF_ESR 0x00F8 /* error status register */ #define MPAMF_ECR 0x00F0 /* error control register */ @@ -535,10 +533,7 @@ int mpam_msmon_read(struct mpam_component *comp, struct mon_cfg *ctx, */ #define MSMON___VALUE GENMASK(30, 0) #define MSMON___NRDY BIT(31) -#define MSMON___NRDY_L BIT(63) -#define MSMON___L_VALUE GENMASK(43, 0) -#define MSMON___LWD_VALUE GENMASK(62, 0) - +#define MSMON_MBWU_L_VALUE GENMASK(62, 0) /* * MSMON_CAPT_EVNT - Memory system performance monitoring capture event * generation register -- Gitee From 1ebf2cb8530fc7d02c951d268760477852cb8464 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:46:24 -0800 Subject: [PATCH 090/158] anolis: Revert "arm_mpam: Probe for long/lwd mbwu counters" ANBZ: #31045 This reverts commit 8b0268977fe1a52c7ba59dbdcffa8e1970fee05f. Signed-off-by: Jason Zeng --- drivers/platform/mpam/mpam_devices.c | 22 ---------------------- drivers/platform/mpam/mpam_internal.h | 7 ------- 2 files changed, 29 deletions(-) diff --git a/drivers/platform/mpam/mpam_devices.c b/drivers/platform/mpam/mpam_devices.c index c2c5dfa77083..930e14471378 100644 --- a/drivers/platform/mpam/mpam_devices.c +++ b/drivers/platform/mpam/mpam_devices.c @@ -653,7 +653,6 @@ static void mpam_ris_hw_probe(struct mpam_msc_ris *ris) pr_err_once("Counters are not usable because not-ready timeout was not provided by firmware."); } if (FIELD_GET(MPAMF_MSMON_IDR_MSMON_MBWU, msmon_features)) { - bool has_long; u32 mbwumonidr = mpam_read_partsel_reg(msc, MBWUMON_IDR); props->num_mbwu_mon = FIELD_GET(MPAMF_MBWUMON_IDR_NUM_MON, mbwumonidr); @@ -662,27 +661,6 @@ static void mpam_ris_hw_probe(struct mpam_msc_ris *ris) if (FIELD_GET(MPAMF_MBWUMON_IDR_HAS_RWBW, mbwumonidr)) mpam_set_feature(mpam_feat_msmon_mbwu_rwbw, props); - - /* - * Treat long counter and its extension, lwd as mutually - * exclusive feature bits. Though these are dependent - * fields at the implementation level, there would never - * be a need for mpam_feat_msmon_mbwu_44counter (long - * counter) and mpam_feat_msmon_mbwu_63counter (lwd) - * bits to be set together. - * - * mpam_feat_msmon_mbwu isn't treated as an exclusive - * bit as this feature bit would be used as the "front - * facing feature bit" for any checks related to mbwu - * monitors. - */ - has_long = FIELD_GET(MPAMF_MBWUMON_IDR_HAS_LONG, mbwumonidr); - if (props->num_mbwu_mon && has_long) { - if (FIELD_GET(MPAMF_MBWUMON_IDR_LWD, mbwumonidr)) - mpam_set_feature(mpam_feat_msmon_mbwu_63counter, props); - else - mpam_set_feature(mpam_feat_msmon_mbwu_44counter, props); - } } } diff --git a/drivers/platform/mpam/mpam_internal.h b/drivers/platform/mpam/mpam_internal.h index 99790ba74768..e546a8612dab 100644 --- a/drivers/platform/mpam/mpam_internal.h +++ b/drivers/platform/mpam/mpam_internal.h @@ -88,14 +88,7 @@ enum mpam_device_features { mpam_feat_msmon, mpam_feat_msmon_csu, mpam_feat_msmon_csu_capture, - /* - * Having mpam_feat_msmon_mbwu set doesn't mean the regular 31 bit MBWU - * counter would be used. The exact counter used is decided based on the - * status of mpam_feat_msmon_mbwu_l/mpam_feat_msmon_mbwu_lwd as well. - */ mpam_feat_msmon_mbwu, - mpam_feat_msmon_mbwu_44counter, - mpam_feat_msmon_mbwu_63counter, mpam_feat_msmon_mbwu_capture, mpam_feat_msmon_mbwu_rwbw, mpam_feat_msmon_capt, -- Gitee From 2e8ab26315ee8af0d59c8fe7566db37a96d840a2 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:46:24 -0800 Subject: [PATCH 091/158] anolis: Revert "arm_mpam: Track bandwidth counter state for overflow and power management" ANBZ: #31045 This reverts commit 3e4545b2dd5ec7899a522b90dad3de45d3c0855e. Signed-off-by: Jason Zeng --- drivers/platform/mpam/mpam_devices.c | 148 +------------------------- drivers/platform/mpam/mpam_internal.h | 49 +++------ 2 files changed, 17 insertions(+), 180 deletions(-) diff --git a/drivers/platform/mpam/mpam_devices.c b/drivers/platform/mpam/mpam_devices.c index 930e14471378..3516528f2e14 100644 --- a/drivers/platform/mpam/mpam_devices.c +++ b/drivers/platform/mpam/mpam_devices.c @@ -773,7 +773,6 @@ static void gen_msmon_ctl_flt_vals(struct mon_read *m, u32 *ctl_val, *ctl_val |= MSMON_CFG_x_CTL_MATCH_PARTID; *flt_val = FIELD_PREP(MSMON_CFG_MBWU_FLT_PARTID, ctx->partid); - *flt_val |= FIELD_PREP(MSMON_CFG_MBWU_FLT_RWBW, ctx->opts); if (m->ctx->match_pmg) { *ctl_val |= MSMON_CFG_x_CTL_MATCH_PMG; *flt_val |= FIELD_PREP(MSMON_CFG_MBWU_FLT_PMG, ctx->pmg); @@ -806,7 +805,6 @@ static void write_msmon_ctl_flt_vals(struct mon_read *m, u32 ctl_val, u32 flt_val) { struct mpam_msc *msc = m->ris->msc; - struct msmon_mbwu_state *mbwu_state; /* * Write the ctl_val with the enable bit cleared, reset the counter, @@ -824,33 +822,21 @@ static void write_msmon_ctl_flt_vals(struct mon_read *m, u32 ctl_val, mpam_write_monsel_reg(msc, CFG_MBWU_CTL, ctl_val); mpam_write_monsel_reg(msc, MBWU, 0); mpam_write_monsel_reg(msc, CFG_MBWU_CTL, ctl_val|MSMON_CFG_x_CTL_EN); - - mbwu_state = &m->ris->mbwu_state[m->ctx->mon]; - if (mbwu_state) - mbwu_state->prev_val = 0; - break; default: return; } } -static u64 mpam_msmon_overflow_val(struct mpam_msc_ris *ris) -{ - /* TODO: scaling, and long counters */ - return GENMASK_ULL(30, 0); -} - static void __ris_msmon_read(void *arg) { + u64 now; bool nrdy = false; unsigned long flags; struct mon_read *m = arg; - u64 now, overflow_val = 0; struct mon_cfg *ctx = m->ctx; struct mpam_msc_ris *ris = m->ris; struct mpam_msc *msc = m->ris->msc; - struct msmon_mbwu_state *mbwu_state; u32 mon_sel, ctl_val, flt_val, cur_ctl, cur_flt; lockdep_assert_held(&msc->lock); @@ -872,41 +858,22 @@ static void __ris_msmon_read(void *arg) switch (m->type) { case mpam_feat_msmon_csu: now = mpam_read_monsel_reg(msc, CSU); - nrdy = now & MSMON___NRDY; - now = FIELD_GET(MSMON___VALUE, now); break; case mpam_feat_msmon_mbwu: now = mpam_read_monsel_reg(msc, MBWU); - nrdy = now & MSMON___NRDY; - now = FIELD_GET(MSMON___VALUE, now); - - if (nrdy) - break; - - mbwu_state = &ris->mbwu_state[ctx->mon]; - if (!mbwu_state) - break; - - /* Add any pre-overflow value to the mbwu_state->val */ - if (mbwu_state->prev_val > now) - overflow_val = mpam_msmon_overflow_val(ris) - mbwu_state->prev_val; - - mbwu_state->prev_val = now; - mbwu_state->correction += overflow_val; - - /* Include bandwidth consumed before the last hardware reset */ - now += mbwu_state->correction; break; default: return; } spin_unlock_irqrestore(&msc->mon_sel_lock, flags); + nrdy = now & MSMON___NRDY; if (nrdy) { m->err = -EBUSY; return; } + now = FIELD_GET(MSMON___VALUE, now); *(m->val) += now; } @@ -1097,68 +1064,6 @@ static int mpam_reprogram_ris(void *_arg) return 0; } -static int mpam_restore_mbwu_state(void *_ris) -{ - int i; - struct mon_read mwbu_arg; - struct mpam_msc_ris *ris = _ris; - - lockdep_assert_held(&ris->msc->lock); - - for (i = 0; i < ris->props.num_mbwu_mon; i++) { - if (ris->mbwu_state[i].enabled) { - mwbu_arg.ris = ris; - mwbu_arg.ctx = &ris->mbwu_state[i].cfg; - mwbu_arg.type = mpam_feat_msmon_mbwu; - - __ris_msmon_read(&mwbu_arg); - } - } - - return 0; -} - -static int mpam_save_mbwu_state(void *arg) -{ - int i; - u64 val; - struct mon_cfg *cfg; - unsigned long flags; - u32 cur_flt, cur_ctl, mon_sel; - struct mpam_msc_ris *ris = arg; - struct mpam_msc *msc = ris->msc; - struct msmon_mbwu_state *mbwu_state; - - lockdep_assert_held(&msc->lock); - - for (i = 0; i < ris->props.num_mbwu_mon; i++) { - mbwu_state = &ris->mbwu_state[i]; - cfg = &mbwu_state->cfg; - - spin_lock_irqsave(&msc->mon_sel_lock, flags); - mon_sel = FIELD_PREP(MSMON_CFG_MON_SEL_MON_SEL, i) | - FIELD_PREP(MSMON_CFG_MON_SEL_RIS, ris->ris_idx); - mpam_write_monsel_reg(msc, CFG_MON_SEL, mon_sel); - - cur_flt = mpam_read_monsel_reg(msc, CFG_MBWU_FLT); - cur_ctl = mpam_read_monsel_reg(msc, CFG_MBWU_CTL); - mpam_write_monsel_reg(msc, CFG_MBWU_CTL, 0); - - val = mpam_read_monsel_reg(msc, MBWU); - mpam_write_monsel_reg(msc, MBWU, 0); - - cfg->mon = i; - cfg->pmg = FIELD_GET(MSMON_CFG_MBWU_FLT_PMG, cur_flt); - cfg->match_pmg = FIELD_GET(MSMON_CFG_x_CTL_MATCH_PMG, cur_ctl); - cfg->partid = FIELD_GET(MSMON_CFG_MBWU_FLT_PARTID, cur_flt); - mbwu_state->correction += val; - mbwu_state->enabled = FIELD_GET(MSMON_CFG_x_CTL_EN, cur_ctl); - spin_unlock_irqrestore(&msc->mon_sel_lock, flags); - } - - return 0; -} - /* * Called via smp_call_on_cpu() to prevent migration, while still being * pre-emptible. @@ -1220,9 +1125,6 @@ static void mpam_reset_msc(struct mpam_msc *msc, bool online) * for non-zero partid may be lost while the CPUs are offline. */ ris->in_reset_state = online; - - if (mpam_is_enabled() && !online) - mpam_touch_msc(msc, &mpam_save_mbwu_state, ris); } srcu_read_unlock(&mpam_srcu, idx); } @@ -1254,9 +1156,6 @@ static void mpam_reprogram_msc(struct mpam_msc *msc) mpam_reprogram_ris_partid(ris, partid, cfg); } ris->in_reset_state = reset; - - if (mpam_has_feature(mpam_feat_msmon_mbwu, &ris->props)) - mpam_touch_msc(msc, &mpam_restore_mbwu_state, ris); } srcu_read_unlock(&mpam_srcu, idx); } @@ -1915,31 +1814,8 @@ static void mpam_unregister_irqs(void) cpus_read_unlock(); } -static void __destroy_component_cfg(struct mpam_component *comp) -{ - unsigned long flags; - struct mpam_msc_ris *ris; - struct msmon_mbwu_state *mbwu_state; - - kfree(comp->cfg); - list_for_each_entry(ris, &comp->ris, comp_list) { - mutex_lock(&ris->msc->lock); - spin_lock_irqsave(&ris->msc->mon_sel_lock, flags); - mbwu_state = ris->mbwu_state; - ris->mbwu_state = NULL; - spin_unlock_irqrestore(&ris->msc->mon_sel_lock, flags); - mutex_unlock(&ris->msc->lock); - - kfree(mbwu_state); - } -} - static int __allocate_component_cfg(struct mpam_component *comp) { - unsigned long flags; - struct mpam_msc_ris *ris; - struct msmon_mbwu_state *mbwu_state; - if (comp->cfg) return 0; @@ -1947,24 +1823,6 @@ static int __allocate_component_cfg(struct mpam_component *comp) if (!comp->cfg) return -ENOMEM; - list_for_each_entry(ris, &comp->ris, comp_list) { - if (!ris->props.num_mbwu_mon) - continue; - - mbwu_state = kcalloc(ris->props.num_mbwu_mon, - sizeof(*ris->mbwu_state), GFP_KERNEL); - if (!mbwu_state) { - __destroy_component_cfg(comp); - return -ENOMEM; - } - - mutex_lock(&ris->msc->lock); - spin_lock_irqsave(&ris->msc->mon_sel_lock, flags); - ris->mbwu_state = mbwu_state; - spin_unlock_irqrestore(&ris->msc->mon_sel_lock, flags); - mutex_unlock(&ris->msc->lock); - } - return 0; } diff --git a/drivers/platform/mpam/mpam_internal.h b/drivers/platform/mpam/mpam_internal.h index e546a8612dab..19aedaa7e831 100644 --- a/drivers/platform/mpam/mpam_internal.h +++ b/drivers/platform/mpam/mpam_internal.h @@ -175,39 +175,6 @@ struct mpam_component struct mpam_class *class; }; -/* The values for MSMON_CFG_MBWU_FLT.RWBW */ -enum mon_filter_options { - COUNT_BOTH = 0, - COUNT_WRITE = 1, - COUNT_READ = 2, -}; - -struct mon_cfg { - u16 mon; - u8 pmg; - bool match_pmg; - u32 partid; - enum mon_filter_options opts; -}; - -/* - * Changes to enabled and cfg are protected by the msc->lock. - * Changes to prev_val and correction are protected by the msc's mon_sel_lock. - */ -struct msmon_mbwu_state { - bool enabled; - struct mon_cfg cfg; - - /* The value last read from the hardware. Used to detect overflow. */ - u64 prev_val; - - /* - * The value to add to the new reading to account for power management, - * and shifts to trigger the overflow interrupt. - */ - u64 correction; -}; - struct mpam_msc_ris { u8 ris_idx; u64 idr; @@ -225,9 +192,21 @@ struct mpam_msc_ris { /* parents: */ struct mpam_msc *msc; struct mpam_component *comp; +}; + +/* The values for MSMON_CFG_MBWU_FLT.RWBW */ +enum mon_filter_options { + COUNT_BOTH = 0, + COUNT_WRITE = 1, + COUNT_READ = 2, +}; - /* msmon mbwu configuration is preserved over reset */ - struct msmon_mbwu_state *mbwu_state; +struct mon_cfg { + u16 mon; + u8 pmg; + bool match_pmg; + u32 partid; + enum mon_filter_options opts; }; static inline int mpam_alloc_csu_mon(struct mpam_class *class) -- Gitee From 882dfe3479eb1b2f1e4145361dabf2f7b698929e Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:46:24 -0800 Subject: [PATCH 092/158] anolis: Revert "arm_mpam: Add mpam_msmon_read() to read monitor value" ANBZ: #31045 This reverts commit 286806f020804e571bfb2d834201c05274960b8d. Signed-off-by: Jason Zeng --- drivers/platform/mpam/mpam_devices.c | 219 -------------------------- drivers/platform/mpam/mpam_internal.h | 19 --- 2 files changed, 238 deletions(-) diff --git a/drivers/platform/mpam/mpam_devices.c b/drivers/platform/mpam/mpam_devices.c index 3516528f2e14..82dffe8c03ef 100644 --- a/drivers/platform/mpam/mpam_devices.c +++ b/drivers/platform/mpam/mpam_devices.c @@ -123,22 +123,6 @@ static void __mpam_write_reg(struct mpam_msc *msc, u16 reg, u32 val) __mpam_write_reg(msc, MPAMCFG_##reg, val); \ }) -#define mpam_read_monsel_reg(msc, reg) \ -({ \ - u32 ____ret; \ - \ - lockdep_assert_held_once(&msc->mon_sel_lock); \ - ____ret = __mpam_read_reg(msc, MSMON_##reg); \ - \ - ____ret; \ -}) - -#define mpam_write_monsel_reg(msc, reg, val) \ -({ \ - lockdep_assert_held_once(&msc->mon_sel_lock); \ - __mpam_write_reg(msc, MSMON_##reg, val); \ -}) - static u64 mpam_msc_read_idr(struct mpam_msc *msc) { u64 idr_high = 0, idr_low; @@ -741,209 +725,6 @@ static int mpam_msc_hw_probe(struct mpam_msc *msc) return 0; } -struct mon_read -{ - struct mpam_msc_ris *ris; - struct mon_cfg *ctx; - enum mpam_device_features type; - u64 *val; - int err; -}; - -static void gen_msmon_ctl_flt_vals(struct mon_read *m, u32 *ctl_val, - u32 *flt_val) -{ - struct mon_cfg *ctx = m->ctx; - - switch (m->type) { - case mpam_feat_msmon_csu: - *ctl_val = MSMON_CFG_MBWU_CTL_TYPE_CSU; - break; - case mpam_feat_msmon_mbwu: - *ctl_val = MSMON_CFG_MBWU_CTL_TYPE_MBWU; - break; - default: - return; - } - - /* - * For CSU counters its implementation-defined what happens when not - * filtering by partid. - */ - *ctl_val |= MSMON_CFG_x_CTL_MATCH_PARTID; - - *flt_val = FIELD_PREP(MSMON_CFG_MBWU_FLT_PARTID, ctx->partid); - if (m->ctx->match_pmg) { - *ctl_val |= MSMON_CFG_x_CTL_MATCH_PMG; - *flt_val |= FIELD_PREP(MSMON_CFG_MBWU_FLT_PMG, ctx->pmg); - } - - if (mpam_has_feature(mpam_feat_msmon_mbwu_rwbw, &m->ris->props)) - *flt_val |= FIELD_PREP(MSMON_CFG_MBWU_FLT_RWBW, ctx->opts); -} - -static void read_msmon_ctl_flt_vals(struct mon_read *m, u32 *ctl_val, - u32 *flt_val) -{ - struct mpam_msc *msc = m->ris->msc; - - switch (m->type) { - case mpam_feat_msmon_csu: - *ctl_val = mpam_read_monsel_reg(msc, CFG_CSU_CTL); - *flt_val = mpam_read_monsel_reg(msc, CFG_CSU_FLT); - break; - case mpam_feat_msmon_mbwu: - *ctl_val = mpam_read_monsel_reg(msc, CFG_MBWU_CTL); - *flt_val = mpam_read_monsel_reg(msc, CFG_MBWU_FLT); - break; - default: - return; - } -} - -static void write_msmon_ctl_flt_vals(struct mon_read *m, u32 ctl_val, - u32 flt_val) -{ - struct mpam_msc *msc = m->ris->msc; - - /* - * Write the ctl_val with the enable bit cleared, reset the counter, - * then enable counter. - */ - switch (m->type) { - case mpam_feat_msmon_csu: - mpam_write_monsel_reg(msc, CFG_CSU_FLT, flt_val); - mpam_write_monsel_reg(msc, CFG_CSU_CTL, ctl_val); - mpam_write_monsel_reg(msc, CSU, 0); - mpam_write_monsel_reg(msc, CFG_CSU_CTL, ctl_val|MSMON_CFG_x_CTL_EN); - break; - case mpam_feat_msmon_mbwu: - mpam_write_monsel_reg(msc, CFG_MBWU_FLT, flt_val); - mpam_write_monsel_reg(msc, CFG_MBWU_CTL, ctl_val); - mpam_write_monsel_reg(msc, MBWU, 0); - mpam_write_monsel_reg(msc, CFG_MBWU_CTL, ctl_val|MSMON_CFG_x_CTL_EN); - break; - default: - return; - } -} - -static void __ris_msmon_read(void *arg) -{ - u64 now; - bool nrdy = false; - unsigned long flags; - struct mon_read *m = arg; - struct mon_cfg *ctx = m->ctx; - struct mpam_msc_ris *ris = m->ris; - struct mpam_msc *msc = m->ris->msc; - u32 mon_sel, ctl_val, flt_val, cur_ctl, cur_flt; - - lockdep_assert_held(&msc->lock); - - spin_lock_irqsave(&msc->mon_sel_lock, flags); - mon_sel = FIELD_PREP(MSMON_CFG_MON_SEL_MON_SEL, ctx->mon) | - FIELD_PREP(MSMON_CFG_MON_SEL_RIS, ris->ris_idx); - mpam_write_monsel_reg(msc, CFG_MON_SEL, mon_sel); - - /* - * Read the existing configuration to avoid re-writing the same values. - * This saves waiting for 'nrdy' on subsequent reads. - */ - read_msmon_ctl_flt_vals(m, &cur_ctl, &cur_flt); - gen_msmon_ctl_flt_vals(m, &ctl_val, &flt_val); - if (cur_flt != flt_val || cur_ctl != (ctl_val | MSMON_CFG_x_CTL_EN)) - write_msmon_ctl_flt_vals(m, ctl_val, flt_val); - - switch (m->type) { - case mpam_feat_msmon_csu: - now = mpam_read_monsel_reg(msc, CSU); - break; - case mpam_feat_msmon_mbwu: - now = mpam_read_monsel_reg(msc, MBWU); - break; - default: - return; - } - spin_unlock_irqrestore(&msc->mon_sel_lock, flags); - - nrdy = now & MSMON___NRDY; - if (nrdy) { - m->err = -EBUSY; - return; - } - - now = FIELD_GET(MSMON___VALUE, now); - *(m->val) += now; -} - -static int _msmon_read(struct mpam_component *comp, struct mon_read *arg) -{ - int err, idx; - struct mpam_msc *msc; - struct mpam_msc_ris *ris; - - idx = srcu_read_lock(&mpam_srcu); - list_for_each_entry_rcu(ris, &comp->ris, comp_list) { - arg->ris = ris; - - msc = ris->msc; - mutex_lock(&msc->lock); - err = smp_call_function_any(&msc->accessibility, - __ris_msmon_read, arg, true); - mutex_unlock(&msc->lock); - if (!err && arg->err) - err = arg->err; - if (err) - break; - } - srcu_read_unlock(&mpam_srcu, idx); - - return err; -} - -int mpam_msmon_read(struct mpam_component *comp, struct mon_cfg *ctx, - enum mpam_device_features type, u64 *val) -{ - int err; - struct mon_read arg; - u64 wait_jiffies = 0; - struct mpam_props *cprops = &comp->class->props; - - might_sleep(); - - if (!mpam_is_enabled()) - return -EIO; - - if (!mpam_has_feature(type, cprops)) - return -EOPNOTSUPP; - - memset(&arg, 0, sizeof(arg)); - arg.ctx = ctx; - arg.type = type; - arg.val = val; - *val = 0; - - err = _msmon_read(comp, &arg); - if (err == -EBUSY) - wait_jiffies = usecs_to_jiffies(comp->class->nrdy_usec); - - while (wait_jiffies) - wait_jiffies = schedule_timeout_uninterruptible(wait_jiffies); - - if (err == -EBUSY) { - memset(&arg, 0, sizeof(arg)); - arg.ctx = ctx; - arg.type = type; - arg.val = val; - *val = 0; - - err = _msmon_read(comp, &arg); - } - - return err; -} - static void mpam_reset_msc_bitmap(struct mpam_msc *msc, u16 reg, u16 wd) { u32 num_words, msb; diff --git a/drivers/platform/mpam/mpam_internal.h b/drivers/platform/mpam/mpam_internal.h index 19aedaa7e831..b69199dc81eb 100644 --- a/drivers/platform/mpam/mpam_internal.h +++ b/drivers/platform/mpam/mpam_internal.h @@ -62,7 +62,6 @@ struct mpam_msc * If needed, take msc->lock first. */ spinlock_t part_sel_lock; - spinlock_t mon_sel_lock; void __iomem * mapped_hwpage; size_t mapped_hwpage_sz; }; @@ -194,21 +193,6 @@ struct mpam_msc_ris { struct mpam_component *comp; }; -/* The values for MSMON_CFG_MBWU_FLT.RWBW */ -enum mon_filter_options { - COUNT_BOTH = 0, - COUNT_WRITE = 1, - COUNT_READ = 2, -}; - -struct mon_cfg { - u16 mon; - u8 pmg; - bool match_pmg; - u32 partid; - enum mon_filter_options opts; -}; - static inline int mpam_alloc_csu_mon(struct mpam_class *class) { struct mpam_props *cprops = &class->props; @@ -256,9 +240,6 @@ void mpam_disable(struct work_struct *work); int mpam_apply_config(struct mpam_component *comp, u16 partid, struct mpam_config *cfg); -int mpam_msmon_read(struct mpam_component *comp, struct mon_cfg *ctx, - enum mpam_device_features, u64 *val); - /* * MPAM MSCs have the following register layout. See: * Arm Architecture Reference Manual Supplement - Memory System Resource -- Gitee From 778f8b6d947f4b43071d9fd668fae1a87744d3e0 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:46:24 -0800 Subject: [PATCH 093/158] anolis: Revert "arm_mpam: Add helpers to allocate monitors" ANBZ: #31045 This reverts commit 79ce37b820e2a6cd785cae2e6aa86d0baa94ce03. Signed-off-by: Jason Zeng --- drivers/platform/mpam/mpam_devices.c | 2 -- drivers/platform/mpam/mpam_internal.h | 35 --------------------------- 2 files changed, 37 deletions(-) diff --git a/drivers/platform/mpam/mpam_devices.c b/drivers/platform/mpam/mpam_devices.c index 82dffe8c03ef..612c4b97d373 100644 --- a/drivers/platform/mpam/mpam_devices.c +++ b/drivers/platform/mpam/mpam_devices.c @@ -244,8 +244,6 @@ mpam_class_alloc(u8 level_idx, enum mpam_class_types type, gfp_t gfp) class->level = level_idx; class->type = type; INIT_LIST_HEAD_RCU(&class->classes_list); - ida_init(&class->ida_csu_mon); - ida_init(&class->ida_mbwu_mon); list_add_rcu(&class->classes_list, &mpam_classes); diff --git a/drivers/platform/mpam/mpam_internal.h b/drivers/platform/mpam/mpam_internal.h index b69199dc81eb..b4f79ae8f12f 100644 --- a/drivers/platform/mpam/mpam_internal.h +++ b/drivers/platform/mpam/mpam_internal.h @@ -138,9 +138,6 @@ struct mpam_class /* member of mpam_classes */ struct list_head classes_list; - - struct ida ida_csu_mon; - struct ida ida_mbwu_mon; }; struct mpam_config { @@ -193,38 +190,6 @@ struct mpam_msc_ris { struct mpam_component *comp; }; -static inline int mpam_alloc_csu_mon(struct mpam_class *class) -{ - struct mpam_props *cprops = &class->props; - - if (!mpam_has_feature(mpam_feat_msmon_csu, cprops)) - return -EOPNOTSUPP; - - return ida_alloc_range(&class->ida_csu_mon, 0, cprops->num_csu_mon - 1, - GFP_KERNEL); -} - -static inline void mpam_free_csu_mon(struct mpam_class *class, int csu_mon) -{ - ida_free(&class->ida_csu_mon, csu_mon); -} - -static inline int mpam_alloc_mbwu_mon(struct mpam_class *class) -{ - struct mpam_props *cprops = &class->props; - - if (!mpam_has_feature(mpam_feat_msmon_mbwu, cprops)) - return -EOPNOTSUPP; - - return ida_alloc_range(&class->ida_mbwu_mon, 0, - cprops->num_mbwu_mon - 1, GFP_KERNEL); -} - -static inline void mpam_free_mbwu_mon(struct mpam_class *class, int mbwu_mon) -{ - ida_free(&class->ida_mbwu_mon, mbwu_mon); -} - /* List of all classes */ extern struct list_head mpam_classes; extern struct srcu_struct mpam_srcu; -- Gitee From 4570c72b01c189903b2baac1131bf52c17342bdc Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:46:24 -0800 Subject: [PATCH 094/158] anolis: Revert "arm_mpam: Probe and reset the rest of the features" ANBZ: #31045 This reverts commit 6c0cd0f9f3d8538eb708f28f4a3632c10b917c2f. Signed-off-by: Jason Zeng --- drivers/platform/mpam/mpam_devices.c | 91 --------------------------- drivers/platform/mpam/mpam_internal.h | 10 +-- 2 files changed, 1 insertion(+), 100 deletions(-) diff --git a/drivers/platform/mpam/mpam_devices.c b/drivers/platform/mpam/mpam_devices.c index 612c4b97d373..7da57310d52c 100644 --- a/drivers/platform/mpam/mpam_devices.c +++ b/drivers/platform/mpam/mpam_devices.c @@ -549,20 +549,10 @@ static void mpam_ris_hw_probe(struct mpam_msc_ris *ris) int err; struct mpam_msc *msc = ris->msc; struct mpam_props *props = &ris->props; - struct mpam_class *class = ris->comp->class; lockdep_assert_held(&msc->lock); lockdep_assert_held(&msc->part_sel_lock); - /* Cache Capacity Partitioning */ - if (FIELD_GET(MPAMF_IDR_HAS_CCAP_PART, ris->idr)) { - u32 ccap_features = mpam_read_partsel_reg(msc, CCAP_IDR); - - props->cmax_wd = FIELD_GET(MPAMF_CCAP_IDR_CMAX_WD, ccap_features); - if (props->cmax_wd) - mpam_set_feature(mpam_feat_ccap_part, props); - } - /* Cache Portion partitioning */ if (FIELD_GET(MPAMF_IDR_HAS_CPOR_PART, ris->idr)) { u32 cpor_features = mpam_read_partsel_reg(msc, CPOR_IDR); @@ -585,31 +575,6 @@ static void mpam_ris_hw_probe(struct mpam_msc_ris *ris) props->bwa_wd = FIELD_GET(MPAMF_MBW_IDR_BWA_WD, mbw_features); if (props->bwa_wd && FIELD_GET(MPAMF_MBW_IDR_HAS_MAX, mbw_features)) mpam_set_feature(mpam_feat_mbw_max, props); - - if (props->bwa_wd && FIELD_GET(MPAMF_MBW_IDR_HAS_MIN, mbw_features)) - mpam_set_feature(mpam_feat_mbw_min, props); - - if (props->bwa_wd && FIELD_GET(MPAMF_MBW_IDR_HAS_PROP, mbw_features)) - mpam_set_feature(mpam_feat_mbw_prop, props); - } - - /* Priority partitioning */ - if (FIELD_GET(MPAMF_IDR_HAS_PRI_PART, ris->idr)) { - u32 pri_features = mpam_read_partsel_reg(msc, PRI_IDR); - - props->intpri_wd = FIELD_GET(MPAMF_PRI_IDR_INTPRI_WD, pri_features); - if (props->intpri_wd && FIELD_GET(MPAMF_PRI_IDR_HAS_INTPRI, pri_features)) { - mpam_set_feature(mpam_feat_intpri_part, props); - if (FIELD_GET(MPAMF_PRI_IDR_INTPRI_0_IS_LOW, pri_features)) - mpam_set_feature(mpam_feat_intpri_part_0_low, props); - } - - props->dspri_wd = FIELD_GET(MPAMF_PRI_IDR_DSPRI_WD, pri_features); - if (props->dspri_wd && FIELD_GET(MPAMF_PRI_IDR_HAS_DSPRI, pri_features)) { - mpam_set_feature(mpam_feat_dspri_part, props); - if (FIELD_GET(MPAMF_PRI_IDR_DSPRI_0_IS_LOW, pri_features)) - mpam_set_feature(mpam_feat_dspri_part_0_low, props); - } } /* Performance Monitoring */ @@ -645,21 +610,6 @@ static void mpam_ris_hw_probe(struct mpam_msc_ris *ris) mpam_set_feature(mpam_feat_msmon_mbwu_rwbw, props); } } - - /* - * RIS with PARTID narrowing don't have enough storage for one - * configuration per PARTID. If these are in a class we could use, - * reduce the supported partid_max to match the numer of intpartid. - * If the class is unknown, just ignore it. - */ - if (FIELD_GET(MPAMF_IDR_HAS_PARTID_NRW, ris->idr) && - class->type != MPAM_CLASS_UNKNOWN) { - u32 nrwidr = mpam_read_partsel_reg(msc, PARTID_NRW_IDR); - u16 partid_max = FIELD_GET(MPAMF_PARTID_NRW_IDR_INTPARTID_MAX, nrwidr); - - mpam_set_feature(mpam_feat_partid_nrw, props); - msc->partid_max = min(msc->partid_max, partid_max); - } } static int mpam_msc_hw_probe(struct mpam_msc *msc) @@ -752,21 +702,13 @@ static void mpam_reset_msc_bitmap(struct mpam_msc *msc, u16 reg, u16 wd) static void mpam_reprogram_ris_partid(struct mpam_msc_ris *ris, u16 partid, struct mpam_config *cfg) { - u32 pri_val = 0; - u16 cmax = MPAMCFG_CMAX_CMAX; struct mpam_msc *msc = ris->msc; u16 bwa_fract = MPAMCFG_MBW_MAX_MAX; struct mpam_props *rprops = &ris->props; - u16 dspri = GENMASK(rprops->dspri_wd, 0); - u16 intpri = GENMASK(rprops->intpri_wd, 0); spin_lock(&msc->part_sel_lock); __mpam_part_sel(ris->ris_idx, partid, msc); - if(mpam_has_feature(mpam_feat_partid_nrw, rprops)) - mpam_write_partsel_reg(msc, INTPARTID, - (MPAMCFG_PART_SEL_INTERNAL | partid)); - if (mpam_has_feature(mpam_feat_cpor_part, rprops)) { if (mpam_has_feature(mpam_feat_cpor_part, cfg)) mpam_write_partsel_reg(msc, CPBM, cfg->cpbm); @@ -795,26 +737,6 @@ static void mpam_reprogram_ris_partid(struct mpam_msc_ris *ris, u16 partid, if (mpam_has_feature(mpam_feat_mbw_prop, rprops)) mpam_write_partsel_reg(msc, MBW_PROP, bwa_fract); - - if (mpam_has_feature(mpam_feat_ccap_part, rprops)) - mpam_write_partsel_reg(msc, CMAX, cmax); - - if (mpam_has_feature(mpam_feat_intpri_part, rprops) || - mpam_has_feature(mpam_feat_dspri_part, rprops)) { - /* aces high? */ - if (!mpam_has_feature(mpam_feat_intpri_part_0_low, rprops)) - intpri = 0; - if (!mpam_has_feature(mpam_feat_dspri_part_0_low, rprops)) - dspri = 0; - - if (mpam_has_feature(mpam_feat_intpri_part, rprops)) - pri_val |= FIELD_PREP(MPAMCFG_PRI_INTPRI, intpri); - if (mpam_has_feature(mpam_feat_dspri_part, rprops)) - pri_val |= FIELD_PREP(MPAMCFG_PRI_DSPRI, dspri); - - mpam_write_partsel_reg(msc, PRI, pri_val); - } - spin_unlock(&msc->part_sel_lock); } @@ -1363,19 +1285,6 @@ __resource_props_mismatch(struct mpam_msc_ris *ris, struct mpam_class *class) cprops->num_csu_mon = min(cprops->num_csu_mon, rprops->num_csu_mon); if (cprops->num_mbwu_mon != rprops->num_mbwu_mon) cprops->num_mbwu_mon = min(cprops->num_mbwu_mon, rprops->num_mbwu_mon); - - if (cprops->intpri_wd != rprops->intpri_wd) - cprops->intpri_wd = min(cprops->intpri_wd, rprops->intpri_wd); - if (cprops->dspri_wd != rprops->dspri_wd) - cprops->dspri_wd = min(cprops->dspri_wd, rprops->dspri_wd); - - /* {int,ds}pri may not have differing 0-low behaviour */ - if (mpam_has_feature(mpam_feat_intpri_part_0_low, cprops) != - mpam_has_feature(mpam_feat_intpri_part_0_low, rprops)) - mpam_clear_feature(mpam_feat_intpri_part, &cprops->features); - if (mpam_has_feature(mpam_feat_dspri_part_0_low, cprops) != - mpam_has_feature(mpam_feat_dspri_part_0_low, rprops)) - mpam_clear_feature(mpam_feat_dspri_part, &cprops->features); } /* diff --git a/drivers/platform/mpam/mpam_internal.h b/drivers/platform/mpam/mpam_internal.h index b4f79ae8f12f..7ae9c6cb3569 100644 --- a/drivers/platform/mpam/mpam_internal.h +++ b/drivers/platform/mpam/mpam_internal.h @@ -70,7 +70,7 @@ struct mpam_msc * When we compact the supported features, we don't care what they are. * Storing them as a bitmap makes life easy. */ -typedef u32 mpam_features_t; +typedef u16 mpam_features_t; /* Bits for mpam_features_t */ enum mpam_device_features { @@ -80,10 +80,6 @@ enum mpam_device_features { mpam_feat_mbw_min, mpam_feat_mbw_max, mpam_feat_mbw_prop, - mpam_feat_intpri_part, - mpam_feat_intpri_part_0_low, - mpam_feat_dspri_part, - mpam_feat_dspri_part_0_low, mpam_feat_msmon, mpam_feat_msmon_csu, mpam_feat_msmon_csu_capture, @@ -91,7 +87,6 @@ enum mpam_device_features { mpam_feat_msmon_mbwu_capture, mpam_feat_msmon_mbwu_rwbw, mpam_feat_msmon_capt, - mpam_feat_partid_nrw, MPAM_FEATURE_LAST, }; #define MPAM_ALL_FEATURES ((1< Date: Sun, 25 Jan 2026 23:46:24 -0800 Subject: [PATCH 095/158] anolis: Revert "arm_mpam: Allow configuration to be applied and restored during cpu online" ANBZ: #31045 This reverts commit 5a8ca4ed94d3f594592d6f5cb33c9b39a8bf5373. Signed-off-by: Jason Zeng --- drivers/platform/mpam/mpam_devices.c | 193 +++----------------------- drivers/platform/mpam/mpam_internal.h | 24 +--- 2 files changed, 25 insertions(+), 192 deletions(-) diff --git a/drivers/platform/mpam/mpam_devices.c b/drivers/platform/mpam/mpam_devices.c index 7da57310d52c..04e1dfd1c5e3 100644 --- a/drivers/platform/mpam/mpam_devices.c +++ b/drivers/platform/mpam/mpam_devices.c @@ -699,89 +699,51 @@ static void mpam_reset_msc_bitmap(struct mpam_msc *msc, u16 reg, u16 wd) __mpam_write_reg(msc, reg, bm); } -static void mpam_reprogram_ris_partid(struct mpam_msc_ris *ris, u16 partid, - struct mpam_config *cfg) +static void mpam_reset_ris_partid(struct mpam_msc_ris *ris, u16 partid) { struct mpam_msc *msc = ris->msc; u16 bwa_fract = MPAMCFG_MBW_MAX_MAX; struct mpam_props *rprops = &ris->props; + lockdep_assert_held(&msc->lock); + spin_lock(&msc->part_sel_lock); __mpam_part_sel(ris->ris_idx, partid, msc); - if (mpam_has_feature(mpam_feat_cpor_part, rprops)) { - if (mpam_has_feature(mpam_feat_cpor_part, cfg)) - mpam_write_partsel_reg(msc, CPBM, cfg->cpbm); - else - mpam_reset_msc_bitmap(msc, MPAMCFG_CPBM, - rprops->cpbm_wd); - } + if (mpam_has_feature(mpam_feat_cpor_part, rprops)) + mpam_reset_msc_bitmap(msc, MPAMCFG_CPBM, rprops->cpbm_wd); - if (mpam_has_feature(mpam_feat_mbw_part, rprops)) { - if (mpam_has_feature(mpam_feat_mbw_part, cfg)) - mpam_write_partsel_reg(msc, MBW_PBM, cfg->mbw_pbm); - else - mpam_reset_msc_bitmap(msc, MPAMCFG_MBW_PBM, - rprops->mbw_pbm_bits); - } + if (mpam_has_feature(mpam_feat_mbw_part, rprops)) + mpam_reset_msc_bitmap(msc, MPAMCFG_MBW_PBM, rprops->mbw_pbm_bits); if (mpam_has_feature(mpam_feat_mbw_min, rprops)) mpam_write_partsel_reg(msc, MBW_MIN, 0); - if (mpam_has_feature(mpam_feat_mbw_max, rprops)) { - if (mpam_has_feature(mpam_feat_mbw_max, cfg)) - mpam_write_partsel_reg(msc, MBW_MAX, cfg->mbw_max); - else - mpam_write_partsel_reg(msc, MBW_MAX, bwa_fract); - } + if (mpam_has_feature(mpam_feat_mbw_max, rprops)) + mpam_write_partsel_reg(msc, MBW_MAX, bwa_fract); if (mpam_has_feature(mpam_feat_mbw_prop, rprops)) mpam_write_partsel_reg(msc, MBW_PROP, bwa_fract); spin_unlock(&msc->part_sel_lock); } -struct reprogram_ris { - struct mpam_msc_ris *ris; - struct mpam_config *cfg; -}; - -/* Call with MSC lock held */ -static int mpam_reprogram_ris(void *_arg) -{ - u16 partid, partid_max; - struct reprogram_ris *arg = _arg; - struct mpam_msc_ris *ris = arg->ris; - struct mpam_config *cfg = arg->cfg; - - if (ris->in_reset_state) - return 0; - - spin_lock(&partid_max_lock); - partid_max = mpam_partid_max; - spin_unlock(&partid_max_lock); - for (partid = 0; partid < partid_max; partid++) - mpam_reprogram_ris_partid(ris, partid, cfg); - - return 0; -} - /* * Called via smp_call_on_cpu() to prevent migration, while still being * pre-emptible. */ static int mpam_reset_ris(void *arg) { + u16 partid, partid_max; struct mpam_msc_ris *ris = arg; - struct reprogram_ris reprogram_arg; - struct mpam_config empty_cfg = { 0 }; if (ris->in_reset_state) return 0; - reprogram_arg.ris = ris; - reprogram_arg.cfg = &empty_cfg; - - mpam_reprogram_ris(&reprogram_arg); + spin_lock(&partid_max_lock); + partid_max = mpam_partid_max; + spin_unlock(&partid_max_lock); + for (partid = 0; partid < partid_max; partid++) + mpam_reset_ris_partid(ris, partid); return 0; } @@ -830,37 +792,6 @@ static void mpam_reset_msc(struct mpam_msc *msc, bool online) srcu_read_unlock(&mpam_srcu, idx); } -static void mpam_reprogram_msc(struct mpam_msc *msc) -{ - int idx; - u16 partid; - bool reset; - struct mpam_config *cfg; - struct mpam_msc_ris *ris; - - lockdep_assert_held(&msc->lock); - - idx = srcu_read_lock(&mpam_srcu); - list_for_each_entry_rcu(ris, &msc->ris, msc_list) { - if (!mpam_is_enabled() && !ris->in_reset_state) { - mpam_touch_msc(msc, &mpam_reset_ris, ris); - ris->in_reset_state = true; - continue; - } - - reset = true; - for (partid = 0; partid < mpam_partid_max; partid++) { - cfg = &ris->comp->cfg[partid]; - if (cfg->features) - reset = false; - - mpam_reprogram_ris_partid(ris, partid, cfg); - } - ris->in_reset_state = reset; - } - srcu_read_unlock(&mpam_srcu, idx); -} - static void _enable_percpu_irq(void *_irq) { int *irq = _irq; @@ -882,7 +813,7 @@ static int mpam_cpu_online(unsigned int cpu) _enable_percpu_irq(&msc->reenable_error_ppi); if (atomic_fetch_inc(&msc->online_refs) == 0) - mpam_reprogram_msc(msc); + mpam_reset_msc(msc, true); mutex_unlock(&msc->lock); } srcu_read_unlock(&mpam_srcu, idx); @@ -1502,37 +1433,6 @@ static void mpam_unregister_irqs(void) cpus_read_unlock(); } -static int __allocate_component_cfg(struct mpam_component *comp) -{ - if (comp->cfg) - return 0; - - comp->cfg = kcalloc(mpam_partid_max, sizeof(*comp->cfg), GFP_KERNEL); - if (!comp->cfg) - return -ENOMEM; - - return 0; -} - -static int mpam_allocate_config(void) -{ - int err = 0; - struct mpam_class *class; - struct mpam_component *comp; - - lockdep_assert_held(&mpam_list_lock); - - list_for_each_entry(class, &mpam_classes, classes_list) { - list_for_each_entry(comp, &class->components, class_list) { - err = __allocate_component_cfg(comp); - if (err) - return err; - } - } - - return 0; -} - static void mpam_enable_once(void) { int err; @@ -1544,21 +1444,12 @@ static void mpam_enable_once(void) */ cpus_read_lock(); mutex_lock(&mpam_list_lock); - do { - mpam_enable_merge_features(); + mpam_enable_merge_features(); - err = mpam_allocate_config(); - if (err) { - pr_err("Failed to allocate configuration arrays.\n"); - break; - } + err = mpam_register_irqs(); + if (err) + pr_warn("Failed to register irqs: %d\n", err); - err = mpam_register_irqs(); - if (err) { - pr_warn("Failed to register irqs: %d\n", err); - break; - } - } while (0); mutex_unlock(&mpam_list_lock); cpus_read_unlock(); @@ -1595,8 +1486,6 @@ static void mpam_reset_class(struct mpam_class *class) idx = srcu_read_lock(&mpam_srcu); list_for_each_entry_rcu(comp, &class->components, class_list) { - memset(comp->cfg, 0, (mpam_partid_max * sizeof(*comp->cfg))); - list_for_each_entry_rcu(ris, &comp->ris, comp_list) { mutex_lock(&ris->msc->lock); mpam_touch_msc(ris->msc, mpam_reset_ris, ris); @@ -1686,48 +1575,6 @@ static int mpam_msc_drv_remove(struct platform_device *pdev) return 0; } -struct mpam_write_config_arg { - struct mpam_msc_ris *ris; - struct mpam_component *comp; - u16 partid; -}; - -static int __write_config(void *arg) -{ - struct mpam_write_config_arg *c = arg; - - mpam_reprogram_ris_partid(c->ris, c->partid, &c->comp->cfg[c->partid]); - - return 0; -} - -/* TODO: split into write_config/sync_config */ -/* TODO: add config_dirty bitmap to drive sync_config */ -int mpam_apply_config(struct mpam_component *comp, u16 partid, - struct mpam_config *cfg) -{ - struct mpam_write_config_arg arg; - struct mpam_msc_ris *ris; - int idx; - - lockdep_assert_cpus_held(); - - comp->cfg[partid] = *cfg; - arg.comp = comp; - arg.partid = partid; - - idx = srcu_read_lock(&mpam_srcu); - list_for_each_entry_rcu(ris, &comp->ris, comp_list) { - arg.ris = ris; - mutex_lock(&ris->msc->lock); - mpam_touch_msc(ris->msc, __write_config, &arg); - mutex_unlock(&ris->msc->lock); - } - srcu_read_unlock(&mpam_srcu, idx); - - return 0; -} - static const struct of_device_id mpam_of_match[] = { { .compatible = "arm,mpam-msc", }, {}, diff --git a/drivers/platform/mpam/mpam_internal.h b/drivers/platform/mpam/mpam_internal.h index 7ae9c6cb3569..33b48fa3d40a 100644 --- a/drivers/platform/mpam/mpam_internal.h +++ b/drivers/platform/mpam/mpam_internal.h @@ -102,7 +102,11 @@ struct mpam_props u16 num_mbwu_mon; }; -#define mpam_has_feature(_feat, x) ((1<<_feat) & (x)->features) +static inline bool mpam_has_feature(enum mpam_device_features feat, + struct mpam_props *props) +{ + return (1<features; +} static inline void mpam_set_feature(enum mpam_device_features feat, struct mpam_props *props) @@ -132,15 +136,6 @@ struct mpam_class struct list_head classes_list; }; -struct mpam_config { - /* Which configuration values are valid. 0 is used for reset */ - mpam_features_t features; - - u32 cpbm; - u32 mbw_pbm; - u16 mbw_max; -}; - struct mpam_component { u32 comp_id; @@ -150,12 +145,6 @@ struct mpam_component cpumask_t affinity; - /* - * Array of configuration values, indexed by partid. - * Read from cpuhp callbacks, hold the cpuhp lock when writing. - */ - struct mpam_config *cfg; - /* member of mpam_class:components */ struct list_head class_list; @@ -194,9 +183,6 @@ extern u8 mpam_pmg_max; void mpam_enable(struct work_struct *work); void mpam_disable(struct work_struct *work); -int mpam_apply_config(struct mpam_component *comp, u16 partid, - struct mpam_config *cfg); - /* * MPAM MSCs have the following register layout. See: * Arm Architecture Reference Manual Supplement - Memory System Resource -- Gitee From 53b9046cd7a8d22556629334e1a690394676136c Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:46:24 -0800 Subject: [PATCH 096/158] anolis: Revert "arm_mpam: Use the arch static key to indicate when mpam is enabled" ANBZ: #31045 This reverts commit 380018d3afccc57da8e875fa0fb234755edd0c99. Signed-off-by: Jason Zeng --- drivers/platform/mpam/mpam_devices.c | 6 ------ drivers/platform/mpam/mpam_internal.h | 8 -------- 2 files changed, 14 deletions(-) diff --git a/drivers/platform/mpam/mpam_devices.c b/drivers/platform/mpam/mpam_devices.c index 04e1dfd1c5e3..ac82c041999d 100644 --- a/drivers/platform/mpam/mpam_devices.c +++ b/drivers/platform/mpam/mpam_devices.c @@ -828,9 +828,6 @@ static int mpam_discovery_cpu_online(unsigned int cpu) struct mpam_msc *msc; bool new_device_probed = false; - if (mpam_is_enabled()) - return 0; - mutex_lock(&mpam_list_lock); list_for_each_entry(msc, &mpam_all_msc, glbl_list) { if (!cpumask_test_cpu(cpu, &msc->accessibility)) @@ -1471,7 +1468,6 @@ static void mpam_enable_once(void) partid_max_published = true; spin_unlock(&partid_max_lock); - static_branch_enable(&mpam_enabled); mpam_register_cpuhp_callbacks(mpam_cpu_online); pr_info("MPAM enabled with %u partid and %u pmg\n", @@ -1513,8 +1509,6 @@ static irqreturn_t mpam_disable_thread(int irq, void *dev_id) } mutex_unlock(&mpam_cpuhp_state_lock); - static_branch_disable(&mpam_enabled); - mpam_unregister_irqs(); idx = srcu_read_lock(&mpam_srcu); diff --git a/drivers/platform/mpam/mpam_internal.h b/drivers/platform/mpam/mpam_internal.h index 33b48fa3d40a..b4d35021da77 100644 --- a/drivers/platform/mpam/mpam_internal.h +++ b/drivers/platform/mpam/mpam_internal.h @@ -8,20 +8,12 @@ #include #include #include -#include #include #include #include #include #include -DECLARE_STATIC_KEY_FALSE(mpam_enabled); - -static inline bool mpam_is_enabled(void) -{ - return static_branch_likely(&mpam_enabled); -} - struct mpam_msc { /* member of mpam_all_msc */ -- Gitee From d0f85a4bacda8a8c9a69af399c8a0642291600c1 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:46:24 -0800 Subject: [PATCH 097/158] anolis: Revert "arm_mpam: Register and enable IRQs" ANBZ: #31045 This reverts commit fd9c5ee54b7acfcd16f4032d707556aef3eec505. Signed-off-by: Jason Zeng --- drivers/platform/mpam/mpam_devices.c | 311 +------------------------- drivers/platform/mpam/mpam_internal.h | 8 - 2 files changed, 10 insertions(+), 309 deletions(-) diff --git a/drivers/platform/mpam/mpam_devices.c b/drivers/platform/mpam/mpam_devices.c index ac82c041999d..6091e87308ba 100644 --- a/drivers/platform/mpam/mpam_devices.c +++ b/drivers/platform/mpam/mpam_devices.c @@ -14,9 +14,6 @@ #include #include #include -#include -#include -#include #include #include #include @@ -67,12 +64,6 @@ static DEFINE_SPINLOCK(partid_max_lock); */ static DECLARE_WORK(mpam_enable_work, &mpam_enable); -/* - * All mpam error interrupts indicate a software bug. On receipt, disable the - * driver. - */ -static DECLARE_WORK(mpam_broken_work, &mpam_disable); - /* * An MSC is a container for resources, each identified by their RIS index. * Components are a group of RIS that control the same thing. @@ -136,24 +127,6 @@ static u64 mpam_msc_read_idr(struct mpam_msc *msc) return (idr_high << 32) | idr_low; } -static void mpam_msc_zero_esr(struct mpam_msc *msc) -{ - writel_relaxed(0, msc->mapped_hwpage + MPAMF_ESR); - if (msc->has_extd_esr) - writel_relaxed(0, msc->mapped_hwpage + MPAMF_ESR + 4); -} - -static u64 mpam_msc_read_esr(struct mpam_msc *msc) -{ - u64 esr_high = 0, esr_low; - - esr_low = readl_relaxed(msc->mapped_hwpage + MPAMF_ESR); - if (msc->has_extd_esr) - esr_high = readl_relaxed(msc->mapped_hwpage + MPAMF_ESR + 4); - - return (esr_high << 32) | esr_low; -} - static void __mpam_part_sel(u8 ris_idx, u16 partid, struct mpam_msc *msc) { u32 partsel; @@ -649,7 +622,6 @@ static int mpam_msc_hw_probe(struct mpam_msc *msc) pmg_max = FIELD_GET(MPAMF_IDR_PMG_MAX, idr); msc->partid_max = min(msc->partid_max, partid_max); msc->pmg_max = min(msc->pmg_max, pmg_max); - msc->has_extd_esr = FIELD_GET(MPAMF_IDR_HAS_EXT_ESR, idr); ris = mpam_get_or_create_ris(msc, ris_idx); if (IS_ERR(ris)) { @@ -792,12 +764,6 @@ static void mpam_reset_msc(struct mpam_msc *msc, bool online) srcu_read_unlock(&mpam_srcu, idx); } -static void _enable_percpu_irq(void *_irq) -{ - int *irq = _irq; - enable_percpu_irq(*irq, IRQ_TYPE_NONE); -} - static int mpam_cpu_online(unsigned int cpu) { int idx; @@ -808,13 +774,11 @@ static int mpam_cpu_online(unsigned int cpu) if (!cpumask_test_cpu(cpu, &msc->accessibility)) continue; - mutex_lock(&msc->lock); - if (msc->reenable_error_ppi) - _enable_percpu_irq(&msc->reenable_error_ppi); - - if (atomic_fetch_inc(&msc->online_refs) == 0) + if (atomic_fetch_inc(&msc->online_refs) == 0) { + mutex_lock(&msc->lock); mpam_reset_msc(msc, true); - mutex_unlock(&msc->lock); + mutex_unlock(&msc->lock); + } } srcu_read_unlock(&mpam_srcu, idx); @@ -864,13 +828,11 @@ static int mpam_cpu_offline(unsigned int cpu) if (!cpumask_test_cpu(cpu, &msc->accessibility)) continue; - mutex_lock(&msc->lock); - if (msc->reenable_error_ppi) - disable_percpu_irq(msc->reenable_error_ppi); - - if (atomic_dec_and_test(&msc->online_refs)) + if (atomic_dec_and_test(&msc->online_refs)) { + mutex_lock(&msc->lock); mpam_reset_msc(msc, false); - mutex_unlock(&msc->lock); + mutex_unlock(&msc->lock); + } } srcu_read_unlock(&mpam_srcu, idx); @@ -889,50 +851,6 @@ static void mpam_register_cpuhp_callbacks(int (*online)(unsigned int online)) mutex_unlock(&mpam_cpuhp_state_lock); } -static int __setup_ppi(struct mpam_msc *msc) -{ - int cpu; - - msc->error_dev_id = alloc_percpu_gfp(struct mpam_msc *, GFP_KERNEL); - if (!msc->error_dev_id) - return -ENOMEM; - - for_each_cpu(cpu, &msc->accessibility) { - struct mpam_msc *empty = *per_cpu_ptr(msc->error_dev_id, cpu); - if (empty != NULL) { - pr_err_once("%s shares PPI with %s!\n", dev_name(&msc->pdev->dev), - dev_name(&empty->pdev->dev)); - return -EBUSY; - } - *per_cpu_ptr(msc->error_dev_id, cpu) = msc; - } - - return 0; -} - -static int mpam_msc_setup_error_irq(struct mpam_msc *msc) -{ - int irq; - - irq = platform_get_irq_byname_optional(msc->pdev, "error"); - if (irq <= 0) - return 0; - - /* Allocate and initialise the percpu device pointer for PPI */ - if (irq_is_percpu(irq)) - - return __setup_ppi(msc); - - /* sanity check: shared interrupts can be routed anywhere? */ - if (!cpumask_equal(&msc->accessibility, cpu_possible_mask)) { - pr_err_once("msc:%u is a private resource with a shared error interrupt", - msc->id); - return -EINVAL; - } - - return 0; -} - static int mpam_dt_count_msc(void) { int count = 0; @@ -1103,13 +1021,6 @@ static int mpam_msc_drv_probe(struct platform_device *pdev) spin_lock_init(&msc->part_sel_lock); spin_lock_init(&msc->mon_sel_lock); - err = mpam_msc_setup_error_irq(msc); - if (err) { - devm_kfree(&pdev->dev, msc); - msc = ERR_PTR(err); - break; - } - if (device_property_read_u32(&pdev->dev, "pcc-channel", &msc->pcc_subspace_id)) msc->iface = MPAM_IFACE_MMIO; @@ -1262,198 +1173,11 @@ static void mpam_enable_merge_features(void) } } -static char *mpam_errcode_names[16] = { - [0] = "No error", - [1] = "PARTID_SEL_Range", - [2] = "Req_PARTID_Range", - [3] = "MSMONCFG_ID_RANGE", - [4] = "Req_PMG_Range", - [5] = "Monitor_Range", - [6] = "intPARTID_Range", - [7] = "Unexpected_INTERNAL", - [8] = "Undefined_RIS_PART_SEL", - [9] = "RIS_No_Control", - [10] = "Undefined_RIS_MON_SEL", - [11] = "RIS_No_Monitor", - [12 ... 15] = "Reserved" -}; - -static int mpam_enable_msc_ecr(void *_msc) -{ - struct mpam_msc *msc = _msc; - - writel_relaxed(1, msc->mapped_hwpage + MPAMF_ECR); - - return 0; -} - -static int mpam_disable_msc_ecr(void *_msc) -{ - struct mpam_msc *msc = _msc; - - writel_relaxed(0, msc->mapped_hwpage + MPAMF_ECR); - - return 0; -} - -static irqreturn_t __mpam_irq_handler(int irq, struct mpam_msc *msc) -{ - u64 reg; - u16 partid; - u8 errcode, pmg, ris; - - if (WARN_ON_ONCE(!msc) || - WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), - &msc->accessibility))) - return IRQ_NONE; - - reg = mpam_msc_read_esr(msc); - - errcode = FIELD_GET(MPAMF_ESR_ERRCODE, reg); - if (!errcode) - return IRQ_NONE; - - /* Clear level triggered irq */ - mpam_msc_zero_esr(msc); - - partid = FIELD_GET(MPAMF_ESR_PARTID_OR_MON, reg); - pmg = FIELD_GET(MPAMF_ESR_PMG, reg); - ris = FIELD_GET(MPAMF_ESR_PMG, reg); - - pr_err("error irq from msc:%u '%s', partid:%u, pmg: %u, ris: %u\n", - msc->id, mpam_errcode_names[errcode], partid, pmg, ris); - - if (irq_is_percpu(irq)) { - mpam_disable_msc_ecr(msc); - schedule_work(&mpam_broken_work); - return IRQ_HANDLED; - } - - return IRQ_WAKE_THREAD; -} - -static irqreturn_t mpam_ppi_handler(int irq, void *dev_id) -{ - struct mpam_msc *msc = *(struct mpam_msc **)dev_id; - - return __mpam_irq_handler(irq, msc); -} - -static irqreturn_t mpam_spi_handler(int irq, void *dev_id) -{ - struct mpam_msc *msc = dev_id; - - return __mpam_irq_handler(irq, msc); -} - -static irqreturn_t mpam_disable_thread(int irq, void *dev_id); - -static int mpam_register_irqs(void) -{ - int err, irq; - struct mpam_msc *msc; - - lockdep_assert_cpus_held(); - lockdep_assert_held(&mpam_list_lock); - - list_for_each_entry(msc, &mpam_all_msc, glbl_list) { - irq = platform_get_irq_byname_optional(msc->pdev, "error"); - if (irq <= 0) - continue; - - /* The MPAM spec says the interrupt can be SPI, PPI or LPI */ - /* We anticipate sharing the interrupt with other MSCs */ - if (irq_is_percpu(irq)) { - err = request_percpu_irq(irq, &mpam_ppi_handler, - "mpam:msc:error", - msc->error_dev_id); - if (err) - return err; - - mutex_lock(&msc->lock); - msc->reenable_error_ppi = irq; - smp_call_function_many(&msc->accessibility, - &_enable_percpu_irq, &irq, - true); - mutex_unlock(&msc->lock); - } else { - err = devm_request_threaded_irq(&msc->pdev->dev, irq, - &mpam_spi_handler, - &mpam_disable_thread, - IRQF_SHARED, - "mpam:msc:error", msc); - if (err) - return err; - } - - mutex_lock(&msc->lock); - msc->error_irq_requested = true; - mpam_touch_msc(msc, mpam_enable_msc_ecr, msc); - msc->error_irq_hw_enabled = true; - mutex_unlock(&msc->lock); - } - - return 0; -} - -static void mpam_unregister_irqs(void) -{ - int irq; - struct mpam_msc *msc; - - cpus_read_lock(); - /* take the lock as free_irq() can sleep */ - mutex_lock(&mpam_list_lock); - list_for_each_entry(msc, &mpam_all_msc, glbl_list) { - irq = platform_get_irq_byname_optional(msc->pdev, "error"); - if (irq <= 0) - continue; - - mutex_lock(&msc->lock); - if (msc->error_irq_hw_enabled) { - mpam_touch_msc(msc, mpam_disable_msc_ecr, msc); - msc->error_irq_hw_enabled = false; - } - - if (msc->error_irq_requested) { - if (irq_is_percpu(irq)) { - msc->reenable_error_ppi = 0; - free_percpu_irq(irq, msc->error_dev_id); - } else { - devm_free_irq(&msc->pdev->dev, irq, msc); - } - msc->error_irq_requested = false; - } - mutex_unlock(&msc->lock); - } - mutex_unlock(&mpam_list_lock); - cpus_read_unlock(); -} - static void mpam_enable_once(void) { - int err; - - /* - * If all the MSC have been probed, enabling the IRQs happens next. - * That involves cross-calling to a CPU that can reach the MSC, and - * the locks must be taken in this order: - */ - cpus_read_lock(); mutex_lock(&mpam_list_lock); mpam_enable_merge_features(); - - err = mpam_register_irqs(); - if (err) - pr_warn("Failed to register irqs: %d\n", err); - mutex_unlock(&mpam_list_lock); - cpus_read_unlock(); - - if (err) { - schedule_work(&mpam_broken_work); - return; - } mutex_lock(&mpam_cpuhp_state_lock); cpuhp_remove_state(mpam_cpuhp_state); @@ -1497,31 +1221,15 @@ static void mpam_reset_class(struct mpam_class *class) * All of MPAMs errors indicate a software bug, restore any modified * controls to their reset values. */ -static irqreturn_t mpam_disable_thread(int irq, void *dev_id) +void mpam_disable(void) { int idx; struct mpam_class *class; - mutex_lock(&mpam_cpuhp_state_lock); - if (mpam_cpuhp_state) { - cpuhp_remove_state(mpam_cpuhp_state); - mpam_cpuhp_state = 0; - } - mutex_unlock(&mpam_cpuhp_state_lock); - - mpam_unregister_irqs(); - idx = srcu_read_lock(&mpam_srcu); list_for_each_entry_rcu(class, &mpam_classes, classes_list) mpam_reset_class(class); srcu_read_unlock(&mpam_srcu, idx); - - return IRQ_HANDLED; -} - -void mpam_disable(struct work_struct *ignored) -{ - mpam_disable_thread(0, NULL); } /* @@ -1535,6 +1243,7 @@ void mpam_enable(struct work_struct *work) struct mpam_msc *msc; bool all_devices_probed = true; + /* Have we probed all the hw devices? */ mutex_lock(&mpam_list_lock); list_for_each_entry(msc, &mpam_all_msc, glbl_list) { mutex_lock(&msc->lock); diff --git a/drivers/platform/mpam/mpam_internal.h b/drivers/platform/mpam/mpam_internal.h index b4d35021da77..c0727010635a 100644 --- a/drivers/platform/mpam/mpam_internal.h +++ b/drivers/platform/mpam/mpam_internal.h @@ -29,17 +29,10 @@ struct mpam_msc struct pcc_mbox_chan *pcc_chan; u32 nrdy_usec; cpumask_t accessibility; - bool has_extd_esr; - - int reenable_error_ppi; - struct mpam_msc * __percpu *error_dev_id; - atomic_t online_refs; struct mutex lock; bool probed; - bool error_irq_requested; - bool error_irq_hw_enabled; u16 partid_max; u8 pmg_max; unsigned long ris_idxs[128 / BITS_PER_LONG]; @@ -173,7 +166,6 @@ extern u8 mpam_pmg_max; /* Scheduled work callback to enable mpam once all MSC have been probed */ void mpam_enable(struct work_struct *work); -void mpam_disable(struct work_struct *work); /* * MPAM MSCs have the following register layout. See: -- Gitee From e882c9a207813f9c8939213787b332c20abc0462 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:46:24 -0800 Subject: [PATCH 098/158] anolis: Revert "arm_mpam: Extend reset logic to allow devices to be reset any time" ANBZ: #31045 This reverts commit db8a81dbc5957c1ac1cea3dbb143c5cf5fdda2bd. Signed-off-by: Jason Zeng --- drivers/platform/mpam/mpam_devices.c | 34 ---------------------------- 1 file changed, 34 deletions(-) diff --git a/drivers/platform/mpam/mpam_devices.c b/drivers/platform/mpam/mpam_devices.c index 6091e87308ba..9156591fc4b9 100644 --- a/drivers/platform/mpam/mpam_devices.c +++ b/drivers/platform/mpam/mpam_devices.c @@ -1198,40 +1198,6 @@ static void mpam_enable_once(void) READ_ONCE(mpam_partid_max) + 1, mpam_pmg_max + 1); } -static void mpam_reset_class(struct mpam_class *class) -{ - int idx; - struct mpam_msc_ris *ris; - struct mpam_component *comp; - - idx = srcu_read_lock(&mpam_srcu); - list_for_each_entry_rcu(comp, &class->components, class_list) { - list_for_each_entry_rcu(ris, &comp->ris, comp_list) { - mutex_lock(&ris->msc->lock); - mpam_touch_msc(ris->msc, mpam_reset_ris, ris); - mutex_unlock(&ris->msc->lock); - ris->in_reset_state = true; - } - } - srcu_read_unlock(&mpam_srcu, idx); -} - -/* - * Called in response to an error IRQ. - * All of MPAMs errors indicate a software bug, restore any modified - * controls to their reset values. - */ -void mpam_disable(void) -{ - int idx; - struct mpam_class *class; - - idx = srcu_read_lock(&mpam_srcu); - list_for_each_entry_rcu(class, &mpam_classes, classes_list) - mpam_reset_class(class); - srcu_read_unlock(&mpam_srcu, idx); -} - /* * Enable mpam once all devices have been probed. * Scheduled by mpam_discovery_cpu_online() once all devices have been created. -- Gitee From ae4f12fc6373af4f9fdf0b5f3b295a6f0666e0a0 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:46:24 -0800 Subject: [PATCH 099/158] anolis: Revert "arm_mpam: Add a helper to touch an MSC from any CPU" ANBZ: #31045 This reverts commit 402fdf23a9bc86905d703ff2db9a97653d32844b. Signed-off-by: Jason Zeng --- drivers/platform/mpam/mpam_devices.c | 40 +++++----------------------- 1 file changed, 6 insertions(+), 34 deletions(-) diff --git a/drivers/platform/mpam/mpam_devices.c b/drivers/platform/mpam/mpam_devices.c index 9156591fc4b9..e281e6e910e0 100644 --- a/drivers/platform/mpam/mpam_devices.c +++ b/drivers/platform/mpam/mpam_devices.c @@ -699,49 +699,21 @@ static void mpam_reset_ris_partid(struct mpam_msc_ris *ris, u16 partid) spin_unlock(&msc->part_sel_lock); } -/* - * Called via smp_call_on_cpu() to prevent migration, while still being - * pre-emptible. - */ -static int mpam_reset_ris(void *arg) +static void mpam_reset_ris(struct mpam_msc_ris *ris) { u16 partid, partid_max; - struct mpam_msc_ris *ris = arg; + struct mpam_msc *msc = ris->msc; + + lockdep_assert_held(&msc->lock); if (ris->in_reset_state) - return 0; + return; spin_lock(&partid_max_lock); partid_max = mpam_partid_max; spin_unlock(&partid_max_lock); for (partid = 0; partid < partid_max; partid++) mpam_reset_ris_partid(ris, partid); - - return 0; -} - -/* - * Get the preferred CPU for this MSC. If it is accessible from this CPU, - * this CPU is preferred. This can be preempted/migrated, it will only result - * in more work. - */ -static int mpam_get_msc_preferred_cpu(struct mpam_msc *msc) -{ - int cpu = raw_smp_processor_id(); - - if (cpumask_test_cpu(cpu, &msc->accessibility)) - return cpu; - - return cpumask_first_and(&msc->accessibility, cpu_online_mask); -} - -static int mpam_touch_msc(struct mpam_msc *msc, int (*fn)(void *a), void *arg) -{ - lockdep_assert_irqs_enabled(); - lockdep_assert_cpus_held(); - lockdep_assert_held(&msc->lock); - - return smp_call_on_cpu(mpam_get_msc_preferred_cpu(msc), fn, arg, true); } static void mpam_reset_msc(struct mpam_msc *msc, bool online) @@ -753,7 +725,7 @@ static void mpam_reset_msc(struct mpam_msc *msc, bool online) idx = srcu_read_lock(&mpam_srcu); list_for_each_entry_rcu(ris, &msc->ris, msc_list) { - mpam_touch_msc(msc, &mpam_reset_ris, ris); + mpam_reset_ris(ris); /* * Set in_reset_state when coming online. The reset state -- Gitee From 521827d226b66b9af4faace40188e834bec97180 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:46:24 -0800 Subject: [PATCH 100/158] anolis: Revert "arm_mpam: Reset MSC controls from cpu hp callbacks" ANBZ: #31045 This reverts commit 0f7f5e07299a317617f845f13cf7c47d19820619. Signed-off-by: Jason Zeng --- drivers/platform/mpam/mpam_devices.c | 126 +------------------------- drivers/platform/mpam/mpam_internal.h | 3 - 2 files changed, 1 insertion(+), 128 deletions(-) diff --git a/drivers/platform/mpam/mpam_devices.c b/drivers/platform/mpam/mpam_devices.c index e281e6e910e0..f1574f393e1d 100644 --- a/drivers/platform/mpam/mpam_devices.c +++ b/drivers/platform/mpam/mpam_devices.c @@ -7,7 +7,6 @@ #include #include #include -#include #include #include #include @@ -645,115 +644,8 @@ static int mpam_msc_hw_probe(struct mpam_msc *msc) return 0; } -static void mpam_reset_msc_bitmap(struct mpam_msc *msc, u16 reg, u16 wd) -{ - u32 num_words, msb; - u32 bm = ~0; - int i; - - lockdep_assert_held(&msc->part_sel_lock); - - /* - * Write all ~0 to all but the last 32bit-word, which may - * have fewer bits... - */ - num_words = DIV_ROUND_UP(wd, 32); - for (i = 0; i < num_words - 1; i++, reg += sizeof(bm)) - __mpam_write_reg(msc, reg, bm); - - /* - * ....and then the last (maybe) partial 32bit word. When wd is a - * multiple of 32, msb should be 31 to write a full 32bit word. - */ - msb = (wd - 1) % 32; - bm = GENMASK(msb , 0); - if (bm) - __mpam_write_reg(msc, reg, bm); -} - -static void mpam_reset_ris_partid(struct mpam_msc_ris *ris, u16 partid) -{ - struct mpam_msc *msc = ris->msc; - u16 bwa_fract = MPAMCFG_MBW_MAX_MAX; - struct mpam_props *rprops = &ris->props; - - lockdep_assert_held(&msc->lock); - - spin_lock(&msc->part_sel_lock); - __mpam_part_sel(ris->ris_idx, partid, msc); - - if (mpam_has_feature(mpam_feat_cpor_part, rprops)) - mpam_reset_msc_bitmap(msc, MPAMCFG_CPBM, rprops->cpbm_wd); - - if (mpam_has_feature(mpam_feat_mbw_part, rprops)) - mpam_reset_msc_bitmap(msc, MPAMCFG_MBW_PBM, rprops->mbw_pbm_bits); - - if (mpam_has_feature(mpam_feat_mbw_min, rprops)) - mpam_write_partsel_reg(msc, MBW_MIN, 0); - - if (mpam_has_feature(mpam_feat_mbw_max, rprops)) - mpam_write_partsel_reg(msc, MBW_MAX, bwa_fract); - - if (mpam_has_feature(mpam_feat_mbw_prop, rprops)) - mpam_write_partsel_reg(msc, MBW_PROP, bwa_fract); - spin_unlock(&msc->part_sel_lock); -} - -static void mpam_reset_ris(struct mpam_msc_ris *ris) -{ - u16 partid, partid_max; - struct mpam_msc *msc = ris->msc; - - lockdep_assert_held(&msc->lock); - - if (ris->in_reset_state) - return; - - spin_lock(&partid_max_lock); - partid_max = mpam_partid_max; - spin_unlock(&partid_max_lock); - for (partid = 0; partid < partid_max; partid++) - mpam_reset_ris_partid(ris, partid); -} - -static void mpam_reset_msc(struct mpam_msc *msc, bool online) -{ - int idx; - struct mpam_msc_ris *ris; - - lockdep_assert_held(&msc->lock); - - idx = srcu_read_lock(&mpam_srcu); - list_for_each_entry_rcu(ris, &msc->ris, msc_list) { - mpam_reset_ris(ris); - - /* - * Set in_reset_state when coming online. The reset state - * for non-zero partid may be lost while the CPUs are offline. - */ - ris->in_reset_state = online; - } - srcu_read_unlock(&mpam_srcu, idx); -} - static int mpam_cpu_online(unsigned int cpu) { - int idx; - struct mpam_msc *msc; - - idx = srcu_read_lock(&mpam_srcu); - list_for_each_entry_rcu(msc, &mpam_all_msc, glbl_list) { - if (!cpumask_test_cpu(cpu, &msc->accessibility)) - continue; - - if (atomic_fetch_inc(&msc->online_refs) == 0) { - mutex_lock(&msc->lock); - mpam_reset_msc(msc, true); - mutex_unlock(&msc->lock); - } - } - srcu_read_unlock(&mpam_srcu, idx); - return 0; } @@ -792,22 +684,6 @@ static int mpam_discovery_cpu_online(unsigned int cpu) static int mpam_cpu_offline(unsigned int cpu) { - int idx; - struct mpam_msc *msc; - - idx = srcu_read_lock(&mpam_srcu); - list_for_each_entry_rcu(msc, &mpam_all_msc, glbl_list) { - if (!cpumask_test_cpu(cpu, &msc->accessibility)) - continue; - - if (atomic_dec_and_test(&msc->online_refs)) { - mutex_lock(&msc->lock); - mpam_reset_msc(msc, false); - mutex_unlock(&msc->lock); - } - } - srcu_read_unlock(&mpam_srcu, idx); - return 0; } @@ -1167,7 +1043,7 @@ static void mpam_enable_once(void) mpam_register_cpuhp_callbacks(mpam_cpu_online); pr_info("MPAM enabled with %u partid and %u pmg\n", - READ_ONCE(mpam_partid_max) + 1, mpam_pmg_max + 1); + mpam_partid_max + 1, mpam_pmg_max + 1); } /* diff --git a/drivers/platform/mpam/mpam_internal.h b/drivers/platform/mpam/mpam_internal.h index c0727010635a..37858a4dd147 100644 --- a/drivers/platform/mpam/mpam_internal.h +++ b/drivers/platform/mpam/mpam_internal.h @@ -5,7 +5,6 @@ #define MPAM_INTERNAL_H #include -#include #include #include #include @@ -29,7 +28,6 @@ struct mpam_msc struct pcc_mbox_chan *pcc_chan; u32 nrdy_usec; cpumask_t accessibility; - atomic_t online_refs; struct mutex lock; bool probed; @@ -141,7 +139,6 @@ struct mpam_msc_ris { u8 ris_idx; u64 idr; struct mpam_props props; - bool in_reset_state; cpumask_t affinity; -- Gitee From bc94a512433eb27d9e225b26cfa808860c88066e Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:46:24 -0800 Subject: [PATCH 101/158] anolis: Revert "arm_mpam: Merge supported features during mpam_enable() into mpam_class" ANBZ: #31045 This reverts commit b4c66f9d0cdd245dbdaa3f7e8af4a0edd8de0ef9. Signed-off-by: Jason Zeng --- drivers/platform/mpam/mpam_devices.c | 87 --------------------------- drivers/platform/mpam/mpam_internal.h | 8 --- 2 files changed, 95 deletions(-) diff --git a/drivers/platform/mpam/mpam_devices.c b/drivers/platform/mpam/mpam_devices.c index f1574f393e1d..7eeeb44506b1 100644 --- a/drivers/platform/mpam/mpam_devices.c +++ b/drivers/platform/mpam/mpam_devices.c @@ -938,95 +938,8 @@ static int mpam_msc_drv_probe(struct platform_device *pdev) return err; } -/* - * If a resource doesn't match class feature/configuration, do the right thing. - * For 'num' properties we can just take the minimum. - * For properties where the mismatched unused bits would make a difference, we - * nobble the class feature, as we can't configure all the resources. - * e.g. The L3 cache is composed of two resources with 13 and 17 portion - * bitmaps respectively. - */ -static void -__resource_props_mismatch(struct mpam_msc_ris *ris, struct mpam_class *class) -{ - struct mpam_props *cprops = &class->props; - struct mpam_props *rprops = &ris->props; - - lockdep_assert_held(&mpam_list_lock); /* we modify class */ - - /* Clear missing features */ - cprops->features &= rprops->features; - - /* Clear incompatible features */ - if (cprops->cpbm_wd != rprops->cpbm_wd) - mpam_clear_feature(mpam_feat_cpor_part, &cprops->features); - if (cprops->mbw_pbm_bits != rprops->mbw_pbm_bits) - mpam_clear_feature(mpam_feat_mbw_part, &cprops->features); - - /* bwa_wd is a count of bits, fewer bits means less precision */ - if (cprops->bwa_wd != rprops->bwa_wd) - cprops->bwa_wd = min(cprops->bwa_wd, rprops->bwa_wd); - - /* For num properties, take the minimum */ - if (cprops->num_csu_mon != rprops->num_csu_mon) - cprops->num_csu_mon = min(cprops->num_csu_mon, rprops->num_csu_mon); - if (cprops->num_mbwu_mon != rprops->num_mbwu_mon) - cprops->num_mbwu_mon = min(cprops->num_mbwu_mon, rprops->num_mbwu_mon); -} - -/* - * Copy the first component's first resources's properties and features to the - * class. __resource_props_mismatch() will remove conflicts. - * It is not possible to have a class with no components, or a component with - * no resources. - */ -static void mpam_enable_init_class_features(struct mpam_class *class) -{ - struct mpam_msc_ris *ris; - struct mpam_component *comp; - - comp = list_first_entry_or_null(&class->components, - struct mpam_component, class_list); - if (WARN_ON(!comp)) - return; - - ris = list_first_entry_or_null(&comp->ris, - struct mpam_msc_ris, comp_list); - if (WARN_ON(!ris)) - return; - - class->props = ris->props; -} - -/* Merge all the common resource features into class. */ -static void mpam_enable_merge_features(void) -{ - struct mpam_msc_ris *ris; - struct mpam_class *class; - struct mpam_component *comp; - - lockdep_assert_held(&mpam_list_lock); - - list_for_each_entry(class, &mpam_classes, classes_list) { - mpam_enable_init_class_features(class); - - list_for_each_entry(comp, &class->components, class_list) { - list_for_each_entry(ris, &comp->ris, comp_list) { - __resource_props_mismatch(ris, class); - - class->nrdy_usec = max(class->nrdy_usec, - ris->msc->nrdy_usec); - } - } - } -} - static void mpam_enable_once(void) { - mutex_lock(&mpam_list_lock); - mpam_enable_merge_features(); - mutex_unlock(&mpam_list_lock); - mutex_lock(&mpam_cpuhp_state_lock); cpuhp_remove_state(mpam_cpuhp_state); mpam_cpuhp_state = 0; diff --git a/drivers/platform/mpam/mpam_internal.h b/drivers/platform/mpam/mpam_internal.h index 37858a4dd147..abe3a1eba2ac 100644 --- a/drivers/platform/mpam/mpam_internal.h +++ b/drivers/platform/mpam/mpam_internal.h @@ -97,12 +97,6 @@ static inline void mpam_set_feature(enum mpam_device_features feat, props->features |= (1< Date: Sun, 25 Jan 2026 23:46:24 -0800 Subject: [PATCH 102/158] anolis: Revert "arm_mpam: Probe the hardware features resctrl supports" ANBZ: #31045 This reverts commit b8ee79d2897035cd4d881c1627881f680505c705. Signed-off-by: Jason Zeng --- drivers/platform/mpam/mpam_devices.c | 79 +-------------------------- drivers/platform/mpam/mpam_internal.h | 50 ----------------- 2 files changed, 2 insertions(+), 127 deletions(-) diff --git a/drivers/platform/mpam/mpam_devices.c b/drivers/platform/mpam/mpam_devices.c index 7eeeb44506b1..ec22c279cfe9 100644 --- a/drivers/platform/mpam/mpam_devices.c +++ b/drivers/platform/mpam/mpam_devices.c @@ -83,7 +83,7 @@ LIST_HEAD(mpam_classes); static u32 __mpam_read_reg(struct mpam_msc *msc, u16 reg) { - WARN_ON_ONCE(reg + sizeof(u32) > msc->mapped_hwpage_sz); + WARN_ON_ONCE(reg > msc->mapped_hwpage_sz); WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &msc->accessibility)); return readl_relaxed(msc->mapped_hwpage + reg); @@ -91,7 +91,7 @@ static u32 __mpam_read_reg(struct mpam_msc *msc, u16 reg) static void __mpam_write_reg(struct mpam_msc *msc, u16 reg, u32 val) { - WARN_ON_ONCE(reg + sizeof(u32) > msc->mapped_hwpage_sz); + WARN_ON_ONCE(reg > msc->mapped_hwpage_sz); WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &msc->accessibility)); writel_relaxed(val, msc->mapped_hwpage + reg); @@ -516,74 +516,6 @@ static struct mpam_msc_ris *mpam_get_or_create_ris(struct mpam_msc *msc, return found; } -static void mpam_ris_hw_probe(struct mpam_msc_ris *ris) -{ - int err; - struct mpam_msc *msc = ris->msc; - struct mpam_props *props = &ris->props; - - lockdep_assert_held(&msc->lock); - lockdep_assert_held(&msc->part_sel_lock); - - /* Cache Portion partitioning */ - if (FIELD_GET(MPAMF_IDR_HAS_CPOR_PART, ris->idr)) { - u32 cpor_features = mpam_read_partsel_reg(msc, CPOR_IDR); - - props->cpbm_wd = FIELD_GET(MPAMF_CPOR_IDR_CPBM_WD, cpor_features); - if (props->cpbm_wd) - mpam_set_feature(mpam_feat_cpor_part, props); - } - - /* Memory bandwidth partitioning */ - if (FIELD_GET(MPAMF_IDR_HAS_MBW_PART, ris->idr)) { - u32 mbw_features = mpam_read_partsel_reg(msc, MBW_IDR); - - /* portion bitmap resolution */ - props->mbw_pbm_bits = FIELD_GET(MPAMF_MBW_IDR_BWPBM_WD, mbw_features); - if (props->mbw_pbm_bits && - FIELD_GET(MPAMF_MBW_IDR_HAS_PBM, mbw_features)) - mpam_set_feature(mpam_feat_mbw_part, props); - - props->bwa_wd = FIELD_GET(MPAMF_MBW_IDR_BWA_WD, mbw_features); - if (props->bwa_wd && FIELD_GET(MPAMF_MBW_IDR_HAS_MAX, mbw_features)) - mpam_set_feature(mpam_feat_mbw_max, props); - } - - /* Performance Monitoring */ - if (FIELD_GET(MPAMF_IDR_HAS_MSMON, ris->idr)) { - u32 msmon_features = mpam_read_partsel_reg(msc, MSMON_IDR); - - if (FIELD_GET(MPAMF_MSMON_IDR_MSMON_CSU, msmon_features)) { - u32 csumonidr, discard; - - /* - * If the firmware max-nrdy-us property is missing, the - * CSU counters can't be used. Should we wait forever? - */ - err = device_property_read_u32(&msc->pdev->dev, - "arm,not-ready-us", - &discard); - - csumonidr = mpam_read_partsel_reg(msc, CSUMON_IDR); - props->num_csu_mon = FIELD_GET(MPAMF_CSUMON_IDR_NUM_MON, csumonidr); - if (props->num_csu_mon && !err) - mpam_set_feature(mpam_feat_msmon_csu, props); - else if (props->num_csu_mon) - pr_err_once("Counters are not usable because not-ready timeout was not provided by firmware."); - } - if (FIELD_GET(MPAMF_MSMON_IDR_MSMON_MBWU, msmon_features)) { - u32 mbwumonidr = mpam_read_partsel_reg(msc, MBWUMON_IDR); - - props->num_mbwu_mon = FIELD_GET(MPAMF_MBWUMON_IDR_NUM_MON, mbwumonidr); - if (props->num_mbwu_mon) - mpam_set_feature(mpam_feat_msmon_mbwu, props); - - if (FIELD_GET(MPAMF_MBWUMON_IDR_HAS_RWBW, mbwumonidr)) - mpam_set_feature(mpam_feat_msmon_mbwu_rwbw, props); - } - } -} - static int mpam_msc_hw_probe(struct mpam_msc *msc) { u64 idr; @@ -604,7 +536,6 @@ static int mpam_msc_hw_probe(struct mpam_msc *msc) idr = mpam_msc_read_idr(msc); spin_unlock(&msc->part_sel_lock); - msc->ris_max = FIELD_GET(MPAMF_IDR_RIS_MAX, idr); /* Use these values so partid/pmg always starts with a valid value */ @@ -626,12 +557,6 @@ static int mpam_msc_hw_probe(struct mpam_msc *msc) if (IS_ERR(ris)) { return PTR_ERR(ris); } - ris->idr = idr; - - spin_lock(&msc->part_sel_lock); - __mpam_part_sel(ris_idx, 0, msc); - mpam_ris_hw_probe(ris); - spin_unlock(&msc->part_sel_lock); } spin_lock(&partid_max_lock); diff --git a/drivers/platform/mpam/mpam_internal.h b/drivers/platform/mpam/mpam_internal.h index abe3a1eba2ac..0f89bef1e0d5 100644 --- a/drivers/platform/mpam/mpam_internal.h +++ b/drivers/platform/mpam/mpam_internal.h @@ -49,54 +49,6 @@ struct mpam_msc size_t mapped_hwpage_sz; }; -/* - * When we compact the supported features, we don't care what they are. - * Storing them as a bitmap makes life easy. - */ -typedef u16 mpam_features_t; - -/* Bits for mpam_features_t */ -enum mpam_device_features { - mpam_feat_ccap_part = 0, - mpam_feat_cpor_part, - mpam_feat_mbw_part, - mpam_feat_mbw_min, - mpam_feat_mbw_max, - mpam_feat_mbw_prop, - mpam_feat_msmon, - mpam_feat_msmon_csu, - mpam_feat_msmon_csu_capture, - mpam_feat_msmon_mbwu, - mpam_feat_msmon_mbwu_capture, - mpam_feat_msmon_mbwu_rwbw, - mpam_feat_msmon_capt, - MPAM_FEATURE_LAST, -}; -#define MPAM_ALL_FEATURES ((1<features; -} - -static inline void mpam_set_feature(enum mpam_device_features feat, - struct mpam_props *props) -{ - props->features |= (1< Date: Sun, 25 Jan 2026 23:46:24 -0800 Subject: [PATCH 103/158] anolis: Revert "arm_mpam: Probe MSCs to find the supported partid/pmg values" ANBZ: #31045 This reverts commit 1ea287bb0904f2c769304001da65d20f701b0b8f. Signed-off-by: Jason Zeng --- arch/arm64/kernel/mpam.c | 11 -- drivers/platform/mpam/mpam_devices.c | 166 ++------------------------ drivers/platform/mpam/mpam_internal.h | 6 - include/linux/arm_mpam.h | 2 - 4 files changed, 7 insertions(+), 178 deletions(-) diff --git a/arch/arm64/kernel/mpam.c b/arch/arm64/kernel/mpam.c index 0801dc58a189..d88a45a299fa 100644 --- a/arch/arm64/kernel/mpam.c +++ b/arch/arm64/kernel/mpam.c @@ -3,7 +3,6 @@ #include -#include #include #include @@ -13,13 +12,3 @@ DEFINE_STATIC_KEY_FALSE(mpam_enabled); EXPORT_SYMBOL_FOR_KVM(mpam_enabled); DEFINE_PER_CPU(u64, arm64_mpam_default); DEFINE_PER_CPU(u64, arm64_mpam_current); - -static int __init arm64_mpam_register_cpus(void) -{ - u64 mpamidr = read_sanitised_ftr_reg(SYS_MPAMIDR_EL1); - u16 partid_max = FIELD_GET(MPAMIDR_PARTID_MAX, mpamidr); - u8 pmg_max = FIELD_GET(MPAMIDR_PMG_MAX, mpamidr); - - return mpam_register_requestor(partid_max, pmg_max); -} -arch_initcall(arm64_mpam_register_cpus) diff --git a/drivers/platform/mpam/mpam_devices.c b/drivers/platform/mpam/mpam_devices.c index ec22c279cfe9..48811c90b78a 100644 --- a/drivers/platform/mpam/mpam_devices.c +++ b/drivers/platform/mpam/mpam_devices.c @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include @@ -47,15 +46,6 @@ static u32 mpam_num_msc; static int mpam_cpuhp_state; static DEFINE_MUTEX(mpam_cpuhp_state_lock); -/* - * The smallest common values for any CPU or MSC in the system. - * Generating traffic outside this range will result in screaming interrupts. - */ -u16 mpam_partid_max; -u8 mpam_pmg_max; -static bool partid_max_init, partid_max_published; -static DEFINE_SPINLOCK(partid_max_lock); - /* * mpam is enabled once all devices have been probed from CPU online callbacks, * scheduled via this work_struct. If access to an MSC depends on a CPU that @@ -89,14 +79,6 @@ static u32 __mpam_read_reg(struct mpam_msc *msc, u16 reg) return readl_relaxed(msc->mapped_hwpage + reg); } -static void __mpam_write_reg(struct mpam_msc *msc, u16 reg, u32 val) -{ - WARN_ON_ONCE(reg > msc->mapped_hwpage_sz); - WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &msc->accessibility)); - - writel_relaxed(val, msc->mapped_hwpage + reg); -} - #define mpam_read_partsel_reg(msc, reg) \ ({ \ u32 ____ret; \ @@ -107,59 +89,6 @@ static void __mpam_write_reg(struct mpam_msc *msc, u16 reg, u32 val) ____ret; \ }) -#define mpam_write_partsel_reg(msc, reg, val) \ -({ \ - lockdep_assert_held_once(&msc->part_sel_lock); \ - __mpam_write_reg(msc, MPAMCFG_##reg, val); \ -}) - -static u64 mpam_msc_read_idr(struct mpam_msc *msc) -{ - u64 idr_high = 0, idr_low; - - lockdep_assert_held(&msc->part_sel_lock); - - idr_low = mpam_read_partsel_reg(msc, IDR); - if (FIELD_GET(MPAMF_IDR_HAS_EXT, idr_low)) - idr_high = mpam_read_partsel_reg(msc, IDR + 4); - - return (idr_high << 32) | idr_low; -} - -static void __mpam_part_sel(u8 ris_idx, u16 partid, struct mpam_msc *msc) -{ - u32 partsel; - - lockdep_assert_held(&msc->part_sel_lock); - - partsel = FIELD_PREP(MPAMCFG_PART_SEL_RIS, ris_idx) | - FIELD_PREP(MPAMCFG_PART_SEL_PARTID_SEL, partid); - mpam_write_partsel_reg(msc, PART_SEL, partsel); -} - -int mpam_register_requestor(u16 partid_max, u8 pmg_max) -{ - int err = 0; - - spin_lock(&partid_max_lock); - if (!partid_max_init) { - mpam_partid_max = partid_max; - mpam_pmg_max = pmg_max; - partid_max_init = true; - } else if (!partid_max_published) { - mpam_partid_max = min(mpam_partid_max, partid_max); - mpam_pmg_max = min(mpam_pmg_max, pmg_max); - } else { - /* New requestors can't lower the values */ - if ((partid_max < mpam_partid_max) || (pmg_max < mpam_pmg_max)) - err = -EBUSY; - } - spin_unlock(&partid_max_lock); - - return err; -} -EXPORT_SYMBOL(mpam_register_requestor); - static struct mpam_component * mpam_component_alloc(struct mpam_class *class, int id, gfp_t gfp) { @@ -473,7 +402,6 @@ static int mpam_ris_create_locked(struct mpam_msc *msc, u8 ris_idx, cpumask_or(&comp->affinity, &comp->affinity, &ris->affinity); cpumask_or(&class->affinity, &class->affinity, &ris->affinity); list_add_rcu(&ris->comp_list, &comp->ris); - list_add_rcu(&ris->msc_list, &msc->ris); return 0; } @@ -491,37 +419,10 @@ int mpam_ris_create(struct mpam_msc *msc, u8 ris_idx, return err; } -static struct mpam_msc_ris *mpam_get_or_create_ris(struct mpam_msc *msc, - u8 ris_idx) -{ - int err; - struct mpam_msc_ris *ris, *found = ERR_PTR(-ENOENT); - - lockdep_assert_held(&mpam_list_lock); - - if (!test_bit(ris_idx, msc->ris_idxs)) { - err = mpam_ris_create_locked(msc, ris_idx, MPAM_CLASS_UNKNOWN, - 0, 0, GFP_ATOMIC); - if (err) - return ERR_PTR(err); - } - - list_for_each_entry(ris, &msc->ris, msc_list) { - if (ris->ris_idx == ris_idx) { - found = ris; - break; - } - } - - return found; -} - static int mpam_msc_hw_probe(struct mpam_msc *msc) { u64 idr; - u16 partid_max; - u8 ris_idx, pmg_max; - struct mpam_msc_ris *ris; + int err; lockdep_assert_held(&msc->lock); @@ -530,43 +431,14 @@ static int mpam_msc_hw_probe(struct mpam_msc *msc) if ((idr & MPAMF_AIDR_ARCH_MAJOR_REV) != MPAM_ARCHITECTURE_V1) { pr_err_once("%s does not match MPAM architecture v1.0\n", dev_name(&msc->pdev->dev)); - spin_unlock(&msc->part_sel_lock); - return -EIO; + err = -EIO; + } else { + msc->probed = true; + err = 0; } - - idr = mpam_msc_read_idr(msc); spin_unlock(&msc->part_sel_lock); - msc->ris_max = FIELD_GET(MPAMF_IDR_RIS_MAX, idr); - - /* Use these values so partid/pmg always starts with a valid value */ - msc->partid_max = FIELD_GET(MPAMF_IDR_PARTID_MAX, idr); - msc->pmg_max = FIELD_GET(MPAMF_IDR_PMG_MAX, idr); - - for (ris_idx = 0; ris_idx <= msc->ris_max; ris_idx++) { - spin_lock(&msc->part_sel_lock); - __mpam_part_sel(ris_idx, 0, msc); - idr = mpam_msc_read_idr(msc); - spin_unlock(&msc->part_sel_lock); - - partid_max = FIELD_GET(MPAMF_IDR_PARTID_MAX, idr); - pmg_max = FIELD_GET(MPAMF_IDR_PMG_MAX, idr); - msc->partid_max = min(msc->partid_max, partid_max); - msc->pmg_max = min(msc->pmg_max, pmg_max); - - ris = mpam_get_or_create_ris(msc, ris_idx); - if (IS_ERR(ris)) { - return PTR_ERR(ris); - } - } - - spin_lock(&partid_max_lock); - mpam_partid_max = min(mpam_partid_max, msc->partid_max); - mpam_pmg_max = min(mpam_pmg_max, msc->pmg_max); - spin_unlock(&partid_max_lock); - - msc->probed = true; - return 0; + return err; } static int mpam_cpu_online(unsigned int cpu) @@ -870,18 +742,9 @@ static void mpam_enable_once(void) mpam_cpuhp_state = 0; mutex_unlock(&mpam_cpuhp_state_lock); - /* - * Once the cpuhp callbacks have been changed, mpam_partid_max can no - * longer change. - */ - spin_lock(&partid_max_lock); - partid_max_published = true; - spin_unlock(&partid_max_lock); - mpam_register_cpuhp_callbacks(mpam_cpu_online); - pr_info("MPAM enabled with %u partid and %u pmg\n", - mpam_partid_max + 1, mpam_pmg_max + 1); + pr_info("MPAM enabled\n"); } /* @@ -965,25 +828,11 @@ static void mpam_dt_create_foundling_msc(void) static int __init mpam_msc_driver_init(void) { - bool mpam_not_available = false; - if (!mpam_cpus_have_feature()) return -EOPNOTSUPP; init_srcu_struct(&mpam_srcu); - /* - * If the MPAM CPU interface is not implemented, or reserved by - * firmware, there is no point touching the rest of the hardware. - */ - spin_lock(&partid_max_lock); - if (!partid_max_init || (!mpam_partid_max && !mpam_pmg_max)) - mpam_not_available = true; - spin_unlock(&partid_max_lock); - - if (mpam_not_available) - return 0; - if (!acpi_disabled) fw_num_msc = acpi_mpam_count_msc(); else @@ -999,5 +848,4 @@ static int __init mpam_msc_driver_init(void) return platform_driver_register(&mpam_msc_driver); } -/* Must occur after arm64_mpam_register_cpus() from arch_initcall() */ subsys_initcall(mpam_msc_driver_init); diff --git a/drivers/platform/mpam/mpam_internal.h b/drivers/platform/mpam/mpam_internal.h index 0f89bef1e0d5..92a3eea6d728 100644 --- a/drivers/platform/mpam/mpam_internal.h +++ b/drivers/platform/mpam/mpam_internal.h @@ -31,8 +31,6 @@ struct mpam_msc struct mutex lock; bool probed; - u16 partid_max; - u8 pmg_max; unsigned long ris_idxs[128 / BITS_PER_LONG]; u32 ris_max; @@ -99,10 +97,6 @@ struct mpam_msc_ris { extern struct list_head mpam_classes; extern struct srcu_struct mpam_srcu; -/* System wide partid/pmg values */ -extern u16 mpam_partid_max; -extern u8 mpam_pmg_max; - /* Scheduled work callback to enable mpam once all MSC have been probed */ void mpam_enable(struct work_struct *work); diff --git a/include/linux/arm_mpam.h b/include/linux/arm_mpam.h index 40e09b4d236b..950ea7049d53 100644 --- a/include/linux/arm_mpam.h +++ b/include/linux/arm_mpam.h @@ -34,8 +34,6 @@ static inline int acpi_mpam_parse_resources(struct mpam_msc *msc, static inline int acpi_mpam_count_msc(void) { return -EINVAL; } #endif -int mpam_register_requestor(u16 partid_max, u8 pmg_max); - int mpam_ris_create(struct mpam_msc *msc, u8 ris_idx, enum mpam_class_types type, u8 class_id, int component_id); -- Gitee From 496346554390e19187bc9147be081e7f9e8cdfe3 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:46:24 -0800 Subject: [PATCH 104/158] anolis: Revert "arm_mpam: Add cpuhp callbacks to probe MSC hardware" ANBZ: #31045 This reverts commit f26020c6a1b1ea5cc0036b3b37046c8491de5381. Signed-off-by: Jason Zeng --- drivers/platform/mpam/mpam_devices.c | 148 +------------------------- drivers/platform/mpam/mpam_internal.h | 5 +- 2 files changed, 4 insertions(+), 149 deletions(-) diff --git a/drivers/platform/mpam/mpam_devices.c b/drivers/platform/mpam/mpam_devices.c index 48811c90b78a..599bd4ee5b00 100644 --- a/drivers/platform/mpam/mpam_devices.c +++ b/drivers/platform/mpam/mpam_devices.c @@ -4,7 +4,6 @@ #define pr_fmt(fmt) "mpam: " fmt #include -#include #include #include #include @@ -22,7 +21,6 @@ #include #include #include -#include #include @@ -43,16 +41,6 @@ struct srcu_struct mpam_srcu; /* MPAM isn't available until all the MSC have been probed. */ static u32 mpam_num_msc; -static int mpam_cpuhp_state; -static DEFINE_MUTEX(mpam_cpuhp_state_lock); - -/* - * mpam is enabled once all devices have been probed from CPU online callbacks, - * scheduled via this work_struct. If access to an MSC depends on a CPU that - * was not brought online at boot, this can happen surprisingly late. - */ -static DECLARE_WORK(mpam_enable_work, &mpam_enable); - /* * An MSC is a container for resources, each identified by their RIS index. * Components are a group of RIS that control the same thing. @@ -71,24 +59,6 @@ static DECLARE_WORK(mpam_enable_work, &mpam_enable); */ LIST_HEAD(mpam_classes); -static u32 __mpam_read_reg(struct mpam_msc *msc, u16 reg) -{ - WARN_ON_ONCE(reg > msc->mapped_hwpage_sz); - WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &msc->accessibility)); - - return readl_relaxed(msc->mapped_hwpage + reg); -} - -#define mpam_read_partsel_reg(msc, reg) \ -({ \ - u32 ____ret; \ - \ - lockdep_assert_held_once(&msc->part_sel_lock); \ - ____ret = __mpam_read_reg(msc, MPAMF_##reg); \ - \ - ____ret; \ -}) - static struct mpam_component * mpam_component_alloc(struct mpam_class *class, int id, gfp_t gfp) { @@ -419,81 +389,9 @@ int mpam_ris_create(struct mpam_msc *msc, u8 ris_idx, return err; } -static int mpam_msc_hw_probe(struct mpam_msc *msc) -{ - u64 idr; - int err; - - lockdep_assert_held(&msc->lock); - - spin_lock(&msc->part_sel_lock); - idr = mpam_read_partsel_reg(msc, AIDR); - if ((idr & MPAMF_AIDR_ARCH_MAJOR_REV) != MPAM_ARCHITECTURE_V1) { - pr_err_once("%s does not match MPAM architecture v1.0\n", - dev_name(&msc->pdev->dev)); - err = -EIO; - } else { - msc->probed = true; - err = 0; - } - spin_unlock(&msc->part_sel_lock); - - return err; -} - -static int mpam_cpu_online(unsigned int cpu) +static void mpam_discovery_complete(void) { - return 0; -} - -/* Before mpam is enabled, try to probe new MSC */ -static int mpam_discovery_cpu_online(unsigned int cpu) -{ - int err = 0; - struct mpam_msc *msc; - bool new_device_probed = false; - - mutex_lock(&mpam_list_lock); - list_for_each_entry(msc, &mpam_all_msc, glbl_list) { - if (!cpumask_test_cpu(cpu, &msc->accessibility)) - continue; - - mutex_lock(&msc->lock); - if (!msc->probed) - err = mpam_msc_hw_probe(msc); - mutex_unlock(&msc->lock); - - if (!err) - new_device_probed = true; - else - break; // mpam_broken - } - mutex_unlock(&mpam_list_lock); - - if (new_device_probed && !err) - schedule_work(&mpam_enable_work); - - if (err < 0) - return err; - - return mpam_cpu_online(cpu); -} - -static int mpam_cpu_offline(unsigned int cpu) -{ - return 0; -} - -static void mpam_register_cpuhp_callbacks(int (*online)(unsigned int online)) -{ - mutex_lock(&mpam_cpuhp_state_lock); - mpam_cpuhp_state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "mpam:online", - online, mpam_cpu_offline); - if (mpam_cpuhp_state <= 0) { - pr_err("Failed to register cpuhp callbacks"); - mpam_cpuhp_state = 0; - } - mutex_unlock(&mpam_cpuhp_state_lock); + pr_err("Discovered all MSC\n"); } static int mpam_dt_count_msc(void) @@ -730,51 +628,11 @@ static int mpam_msc_drv_probe(struct platform_device *pdev) } if (!err && fw_num_msc == mpam_num_msc) - mpam_register_cpuhp_callbacks(&mpam_discovery_cpu_online); + mpam_discovery_complete(); return err; } -static void mpam_enable_once(void) -{ - mutex_lock(&mpam_cpuhp_state_lock); - cpuhp_remove_state(mpam_cpuhp_state); - mpam_cpuhp_state = 0; - mutex_unlock(&mpam_cpuhp_state_lock); - - mpam_register_cpuhp_callbacks(mpam_cpu_online); - - pr_info("MPAM enabled\n"); -} - -/* - * Enable mpam once all devices have been probed. - * Scheduled by mpam_discovery_cpu_online() once all devices have been created. - * Also scheduled when new devices are probed when new CPUs come online. - */ -void mpam_enable(struct work_struct *work) -{ - static atomic_t once; - struct mpam_msc *msc; - bool all_devices_probed = true; - - /* Have we probed all the hw devices? */ - mutex_lock(&mpam_list_lock); - list_for_each_entry(msc, &mpam_all_msc, glbl_list) { - mutex_lock(&msc->lock); - if (!msc->probed) - all_devices_probed = false; - mutex_unlock(&msc->lock); - - if (!all_devices_probed) - break; - } - mutex_unlock(&mpam_list_lock); - - if (all_devices_probed && !atomic_fetch_inc(&once)) - mpam_enable_once(); -} - static int mpam_msc_drv_remove(struct platform_device *pdev) { struct mpam_msc *msc = platform_get_drvdata(pdev); diff --git a/drivers/platform/mpam/mpam_internal.h b/drivers/platform/mpam/mpam_internal.h index 92a3eea6d728..87723e28a0ba 100644 --- a/drivers/platform/mpam/mpam_internal.h +++ b/drivers/platform/mpam/mpam_internal.h @@ -30,7 +30,6 @@ struct mpam_msc cpumask_t accessibility; struct mutex lock; - bool probed; unsigned long ris_idxs[128 / BITS_PER_LONG]; u32 ris_max; @@ -97,8 +96,6 @@ struct mpam_msc_ris { extern struct list_head mpam_classes; extern struct srcu_struct mpam_srcu; -/* Scheduled work callback to enable mpam once all MSC have been probed */ -void mpam_enable(struct work_struct *work); /* * MPAM MSCs have the following register layout. See: @@ -198,7 +195,7 @@ void mpam_enable(struct work_struct *work); /* MPAMF_MBWUMON_IDR - MPAM memory bandwidth usage monitor ID register */ #define MPAMF_MBWUMON_IDR_NUM_MON GENMASK(15, 0) -#define MPAMF_MBWUMON_IDR_HAS_RWBW BIT(28) +#define MPAMF_MBWUMON_IDR_RWBW BIT(28) #define MPAMF_MBWUMON_IDR_LWD BIT(29) #define MPAMF_MBWUMON_IDR_HAS_LONG BIT(30) #define MPAMF_MBWUMON_IDR_HAS_CAPTURE BIT(31) -- Gitee From c5f6e39bfe2baeb74fb965fb3c36348b632701b6 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:46:24 -0800 Subject: [PATCH 105/158] anolis: Revert "arm_mpam: Add MPAM MSC register layout definitions" ANBZ: #31045 This reverts commit eaf270b3fb89ae9381bf8a8400038c2fa009cb5f. Signed-off-by: Jason Zeng --- drivers/platform/mpam/mpam_internal.h | 254 -------------------------- 1 file changed, 254 deletions(-) diff --git a/drivers/platform/mpam/mpam_internal.h b/drivers/platform/mpam/mpam_internal.h index 87723e28a0ba..90e4eeeacb8a 100644 --- a/drivers/platform/mpam/mpam_internal.h +++ b/drivers/platform/mpam/mpam_internal.h @@ -96,258 +96,4 @@ struct mpam_msc_ris { extern struct list_head mpam_classes; extern struct srcu_struct mpam_srcu; - -/* - * MPAM MSCs have the following register layout. See: - * Arm Architecture Reference Manual Supplement - Memory System Resource - * Partitioning and Monitoring (MPAM), for Armv8-A. DDI 0598A.a - */ -#define MPAM_ARCHITECTURE_V1 0x10 - -/* Memory mapped control pages: */ -/* ID Register offsets in the memory mapped page */ -#define MPAMF_IDR 0x0000 /* features id register */ -#define MPAMF_MSMON_IDR 0x0080 /* performance monitoring features */ -#define MPAMF_IMPL_IDR 0x0028 /* imp-def partitioning */ -#define MPAMF_CPOR_IDR 0x0030 /* cache-portion partitioning */ -#define MPAMF_CCAP_IDR 0x0038 /* cache-capacity partitioning */ -#define MPAMF_MBW_IDR 0x0040 /* mem-bw partitioning */ -#define MPAMF_PRI_IDR 0x0048 /* priority partitioning */ -#define MPAMF_CSUMON_IDR 0x0088 /* cache-usage monitor */ -#define MPAMF_MBWUMON_IDR 0x0090 /* mem-bw usage monitor */ -#define MPAMF_PARTID_NRW_IDR 0x0050 /* partid-narrowing */ -#define MPAMF_IIDR 0x0018 /* implementer id register */ -#define MPAMF_AIDR 0x0020 /* architectural id register */ - -/* Configuration and Status Register offsets in the memory mapped page */ -#define MPAMCFG_PART_SEL 0x0100 /* partid to configure: */ -#define MPAMCFG_CPBM 0x1000 /* cache-portion config */ -#define MPAMCFG_CMAX 0x0108 /* cache-capacity config */ -#define MPAMCFG_MBW_MIN 0x0200 /* min mem-bw config */ -#define MPAMCFG_MBW_MAX 0x0208 /* max mem-bw config */ -#define MPAMCFG_MBW_WINWD 0x0220 /* mem-bw accounting window config */ -#define MPAMCFG_MBW_PBM 0x2000 /* mem-bw portion bitmap config */ -#define MPAMCFG_PRI 0x0400 /* priority partitioning config */ -#define MPAMCFG_MBW_PROP 0x0500 /* mem-bw stride config */ -#define MPAMCFG_INTPARTID 0x0600 /* partid-narrowing config */ - -#define MSMON_CFG_MON_SEL 0x0800 /* monitor selector */ -#define MSMON_CFG_CSU_FLT 0x0810 /* cache-usage monitor filter */ -#define MSMON_CFG_CSU_CTL 0x0818 /* cache-usage monitor config */ -#define MSMON_CFG_MBWU_FLT 0x0820 /* mem-bw monitor filter */ -#define MSMON_CFG_MBWU_CTL 0x0828 /* mem-bw monitor config */ -#define MSMON_CSU 0x0840 /* current cache-usage */ -#define MSMON_CSU_CAPTURE 0x0848 /* last cache-usage value captured */ -#define MSMON_MBWU 0x0860 /* current mem-bw usage value */ -#define MSMON_MBWU_CAPTURE 0x0868 /* last mem-bw value captured */ -#define MSMON_CAPT_EVNT 0x0808 /* signal a capture event */ -#define MPAMF_ESR 0x00F8 /* error status register */ -#define MPAMF_ECR 0x00F0 /* error control register */ - -/* MPAMF_IDR - MPAM features ID register */ -#define MPAMF_IDR_PARTID_MAX GENMASK(15, 0) -#define MPAMF_IDR_PMG_MAX GENMASK(23, 16) -#define MPAMF_IDR_HAS_CCAP_PART BIT(24) -#define MPAMF_IDR_HAS_CPOR_PART BIT(25) -#define MPAMF_IDR_HAS_MBW_PART BIT(26) -#define MPAMF_IDR_HAS_PRI_PART BIT(27) -#define MPAMF_IDR_HAS_EXT BIT(28) -#define MPAMF_IDR_HAS_IMPL_IDR BIT(29) -#define MPAMF_IDR_HAS_MSMON BIT(30) -#define MPAMF_IDR_HAS_PARTID_NRW BIT(31) -#define MPAMF_IDR_HAS_RIS BIT(32) -#define MPAMF_IDR_HAS_EXT_ESR BIT(38) -#define MPAMF_IDR_HAS_ESR BIT(39) -#define MPAMF_IDR_RIS_MAX GENMASK(59, 56) - - -/* MPAMF_MSMON_IDR - MPAM performance monitoring ID register */ -#define MPAMF_MSMON_IDR_MSMON_CSU BIT(16) -#define MPAMF_MSMON_IDR_MSMON_MBWU BIT(17) -#define MPAMF_MSMON_IDR_HAS_LOCAL_CAPT_EVNT BIT(31) - -/* MPAMF_CPOR_IDR - MPAM features cache portion partitioning ID register */ -#define MPAMF_CPOR_IDR_CPBM_WD GENMASK(15, 0) - -/* MPAMF_CCAP_IDR - MPAM features cache capacity partitioning ID register */ -#define MPAMF_CCAP_IDR_CMAX_WD GENMASK(5, 0) - -/* MPAMF_MBW_IDR - MPAM features memory bandwidth partitioning ID register */ -#define MPAMF_MBW_IDR_BWA_WD GENMASK(5, 0) -#define MPAMF_MBW_IDR_HAS_MIN BIT(10) -#define MPAMF_MBW_IDR_HAS_MAX BIT(11) -#define MPAMF_MBW_IDR_HAS_PBM BIT(12) -#define MPAMF_MBW_IDR_HAS_PROP BIT(13) -#define MPAMF_MBW_IDR_WINDWR BIT(14) -#define MPAMF_MBW_IDR_BWPBM_WD GENMASK(28, 16) - -/* MPAMF_PRI_IDR - MPAM features priority partitioning ID register */ -#define MPAMF_PRI_IDR_HAS_INTPRI BIT(0) -#define MPAMF_PRI_IDR_INTPRI_0_IS_LOW BIT(1) -#define MPAMF_PRI_IDR_INTPRI_WD GENMASK(9, 4) -#define MPAMF_PRI_IDR_HAS_DSPRI BIT(16) -#define MPAMF_PRI_IDR_DSPRI_0_IS_LOW BIT(17) -#define MPAMF_PRI_IDR_DSPRI_WD GENMASK(25, 20) - -/* MPAMF_CSUMON_IDR - MPAM cache storage usage monitor ID register */ -#define MPAMF_CSUMON_IDR_NUM_MON GENMASK(15, 0) -#define MPAMF_CSUMON_IDR_HAS_CAPTURE BIT(31) - -/* MPAMF_MBWUMON_IDR - MPAM memory bandwidth usage monitor ID register */ -#define MPAMF_MBWUMON_IDR_NUM_MON GENMASK(15, 0) -#define MPAMF_MBWUMON_IDR_RWBW BIT(28) -#define MPAMF_MBWUMON_IDR_LWD BIT(29) -#define MPAMF_MBWUMON_IDR_HAS_LONG BIT(30) -#define MPAMF_MBWUMON_IDR_HAS_CAPTURE BIT(31) - -/* MPAMF_PARTID_NRW_IDR - MPAM PARTID narrowing ID register */ -#define MPAMF_PARTID_NRW_IDR_INTPARTID_MAX GENMASK(15, 0) - -/* MPAMF_IIDR - MPAM implementation ID register */ -#define MPAMF_IIDR_PRODUCTID GENMASK(31, 20) -#define MPAMF_IIDR_PRODUCTID_SHIFT 20 -#define MPAMF_IIDR_VARIANT GENMASK(19, 16) -#define MPAMF_IIDR_VARIANT_SHIFT 16 -#define MPAMF_IIDR_REVISON GENMASK(15, 12) -#define MPAMF_IIDR_REVISON_SHIFT 12 -#define MPAMF_IIDR_IMPLEMENTER GENMASK(11, 0) -#define MPAMF_IIDR_IMPLEMENTER_SHIFT 0 - -/* MPAMF_AIDR - MPAM architecture ID register */ -#define MPAMF_AIDR_ARCH_MAJOR_REV GENMASK(7, 4) -#define MPAMF_AIDR_ARCH_MINOR_REV GENMASK(3, 0) - -/* MPAMCFG_PART_SEL - MPAM partition configuration selection register */ -#define MPAMCFG_PART_SEL_PARTID_SEL GENMASK(15, 0) -#define MPAMCFG_PART_SEL_INTERNAL BIT(16) -#define MPAMCFG_PART_SEL_RIS GENMASK(27, 24) - -/* MPAMCFG_CMAX - MPAM cache portion bitmap partition configuration register */ -#define MPAMCFG_CMAX_CMAX GENMASK(15, 0) - -/* - * MPAMCFG_MBW_MIN - MPAM memory minimum bandwidth partitioning configuration - * register - */ -#define MPAMCFG_MBW_MIN_MIN GENMASK(15, 0) - -/* - * MPAMCFG_MBW_MAX - MPAM memory maximum bandwidth partitioning configuration - * register - */ -#define MPAMCFG_MBW_MAX_MAX GENMASK(15, 0) -#define MPAMCFG_MBW_MAX_HARDLIM BIT(31) - -/* - * MPAMCFG_MBW_WINWD - MPAM memory bandwidth partitioning window width - * register - */ -#define MPAMCFG_MBW_WINWD_US_FRAC GENMASK(7, 0) -#define MPAMCFG_MBW_WINWD_US_INT GENMASK(23, 8) - - -/* MPAMCFG_PRI - MPAM priority partitioning configuration register */ -#define MPAMCFG_PRI_INTPRI GENMASK(15, 0) -#define MPAMCFG_PRI_DSPRI GENMASK(31, 16) - -/* - * MPAMCFG_MBW_PROP - Memory bandwidth proportional stride partitioning - * configuration register - */ -#define MPAMCFG_MBW_PROP_STRIDEM1 GENMASK(15, 0) -#define MPAMCFG_MBW_PROP_EN BIT(31) - -/* - * MPAMCFG_INTPARTID - MPAM internal partition narrowing configuration register - */ -#define MPAMCFG_INTPARTID_INTPARTID GENMASK(15, 0) -#define MPAMCFG_INTPARTID_INTERNAL BIT(16) - -/* MSMON_CFG_MON_SEL - Memory system performance monitor selection register */ -#define MSMON_CFG_MON_SEL_MON_SEL GENMASK(7, 0) -#define MSMON_CFG_MON_SEL_RIS GENMASK(27, 24) - -/* MPAMF_ESR - MPAM Error Status Register */ -#define MPAMF_ESR_PARTID_OR_MON GENMASK(15, 0) -#define MPAMF_ESR_PMG GENMASK(23, 16) -#define MPAMF_ESR_ERRCODE GENMASK(27, 24) -#define MPAMF_ESR_OVRWR BIT(31) -#define MPAMF_ESR_RIS GENMASK(35, 32) - -/* MPAMF_ECR - MPAM Error Control Register */ -#define MPAMF_ECR_INTEN BIT(0) - -/* Error conditions in accessing memory mapped registers */ -#define MPAM_ERRCODE_NONE 0 -#define MPAM_ERRCODE_PARTID_SEL_RANGE 1 -#define MPAM_ERRCODE_REQ_PARTID_RANGE 2 -#define MPAM_ERRCODE_MSMONCFG_ID_RANGE 3 -#define MPAM_ERRCODE_REQ_PMG_RANGE 4 -#define MPAM_ERRCODE_MONITOR_RANGE 5 -#define MPAM_ERRCODE_INTPARTID_RANGE 6 -#define MPAM_ERRCODE_UNEXPECTED_INTERNAL 7 - -/* - * MSMON_CFG_CSU_FLT - Memory system performance monitor configure cache storage - * usage monitor filter register - */ -#define MSMON_CFG_CSU_FLT_PARTID GENMASK(15, 0) -#define MSMON_CFG_CSU_FLT_PMG GENMASK(23, 16) - -/* - * MSMON_CFG_CSU_CTL - Memory system performance monitor configure cache storage - * usage monitor control register - * MSMON_CFG_MBWU_CTL - Memory system performance monitor configure memory - * bandwidth usage monitor control register - */ -#define MSMON_CFG_x_CTL_TYPE GENMASK(7, 0) -#define MSMON_CFG_x_CTL_MATCH_PARTID BIT(16) -#define MSMON_CFG_x_CTL_MATCH_PMG BIT(17) -#define MSMON_CFG_x_CTL_SCLEN BIT(19) -#define MSMON_CFG_x_CTL_SUBTYPE GENMASK(23, 20) -#define MSMON_CFG_x_CTL_OFLOW_FRZ BIT(24) -#define MSMON_CFG_x_CTL_OFLOW_INTR BIT(25) -#define MSMON_CFG_x_CTL_OFLOW_STATUS BIT(26) -#define MSMON_CFG_x_CTL_CAPT_RESET BIT(27) -#define MSMON_CFG_x_CTL_CAPT_EVNT GENMASK(30, 28) -#define MSMON_CFG_x_CTL_EN BIT(31) - -#define MSMON_CFG_MBWU_CTL_TYPE_MBWU 0x42 -#define MSMON_CFG_MBWU_CTL_TYPE_CSU 0x43 - -#define MSMON_CFG_MBWU_CTL_SUBTYPE_NONE 0 -#define MSMON_CFG_MBWU_CTL_SUBTYPE_READ 1 -#define MSMON_CFG_MBWU_CTL_SUBTYPE_WRITE 2 -#define MSMON_CFG_MBWU_CTL_SUBTYPE_BOTH 3 - -#define MSMON_CFG_MBWU_CTL_SUBTYPE_MAX 3 -#define MSMON_CFG_MBWU_CTL_SUBTYPE_MASK 0x3 - -/* - * MSMON_CFG_MBWU_FLT - Memory system performance monitor configure memory - * bandwidth usage monitor filter register - */ -#define MSMON_CFG_MBWU_FLT_PARTID GENMASK(15, 0) -#define MSMON_CFG_MBWU_FLT_PMG GENMASK(23, 16) -#define MSMON_CFG_MBWU_FLT_RWBW GENMASK(31, 30) - -/* - * MSMON_CSU - Memory system performance monitor cache storage usage monitor - * register - * MSMON_CSU_CAPTURE - Memory system performance monitor cache storage usage - * capture register - * MSMON_MBWU - Memory system performance monitor memory bandwidth usage - * monitor register - * MSMON_MBWU_CAPTURE - Memory system performance monitor memory bandwidth usage - * capture register - */ -#define MSMON___VALUE GENMASK(30, 0) -#define MSMON___NRDY BIT(31) -#define MSMON_MBWU_L_VALUE GENMASK(62, 0) -/* - * MSMON_CAPT_EVNT - Memory system performance monitoring capture event - * generation register - */ -#define MSMON_CAPT_EVNT_NOW BIT(0) - #endif /* MPAM_INTERNAL_H */ -- Gitee From b5b4d9f00ceee0c707bf615e7682a4b5339d26ac Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:46:24 -0800 Subject: [PATCH 106/158] anolis: Revert "arm_mpam: Add the class and component structures for ris firmware described" ANBZ: #31045 This reverts commit af7ac72a6c4d83e798f275b6b68eca8ce0de536b. Signed-off-by: Jason Zeng --- drivers/platform/mpam/mpam_devices.c | 362 +------------------------- drivers/platform/mpam/mpam_internal.h | 51 ---- include/linux/arm_mpam.h | 7 +- 3 files changed, 9 insertions(+), 411 deletions(-) diff --git a/drivers/platform/mpam/mpam_devices.c b/drivers/platform/mpam/mpam_devices.c index 599bd4ee5b00..885f8b61cb65 100644 --- a/drivers/platform/mpam/mpam_devices.c +++ b/drivers/platform/mpam/mpam_devices.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -36,359 +37,11 @@ static DEFINE_MUTEX(mpam_list_lock); static LIST_HEAD(mpam_all_msc); -struct srcu_struct mpam_srcu; +static struct srcu_struct mpam_srcu; /* MPAM isn't available until all the MSC have been probed. */ static u32 mpam_num_msc; -/* - * An MSC is a container for resources, each identified by their RIS index. - * Components are a group of RIS that control the same thing. - * Classes are the set components of the same type. - * - * e.g. The set of RIS that make up the L2 are a component. These are sometimes - * termed slices. They should be configured as if they were one MSC. - * - * e.g. The SoC probably has more than one L2, each attached to a distinct set - * of CPUs. All the L2 components are grouped as a class. - * - * When creating an MSC, struct mpam_msc is added to the all mpam_all_msc list, - * then linked via struct mpam_ris to a component and a class. - * The same MSC may exist under different class->component paths, but the RIS - * index will be unique. - */ -LIST_HEAD(mpam_classes); - -static struct mpam_component * -mpam_component_alloc(struct mpam_class *class, int id, gfp_t gfp) -{ - struct mpam_component *comp; - - lockdep_assert_held(&mpam_list_lock); - - comp = kzalloc(sizeof(*comp), gfp); - if (!comp) - return ERR_PTR(-ENOMEM); - - comp->comp_id = id; - INIT_LIST_HEAD_RCU(&comp->ris); - /* affinity is updated when ris are added */ - INIT_LIST_HEAD_RCU(&comp->class_list); - comp->class = class; - - list_add_rcu(&comp->class_list, &class->components); - - return comp; -} - -static struct mpam_component * -mpam_component_get(struct mpam_class *class, int id, bool alloc, gfp_t gfp) -{ - struct mpam_component *comp; - - lockdep_assert_held(&mpam_list_lock); - - list_for_each_entry(comp, &class->components, class_list) { - if (comp->comp_id == id) - return comp; - } - - if (!alloc) - return ERR_PTR(-ENOENT); - - return mpam_component_alloc(class, id, gfp); -} - -static struct mpam_class * -mpam_class_alloc(u8 level_idx, enum mpam_class_types type, gfp_t gfp) -{ - struct mpam_class *class; - - lockdep_assert_held(&mpam_list_lock); - - class = kzalloc(sizeof(*class), gfp); - if (!class) - return ERR_PTR(-ENOMEM); - - INIT_LIST_HEAD_RCU(&class->components); - /* affinity is updated when ris are added */ - class->level = level_idx; - class->type = type; - INIT_LIST_HEAD_RCU(&class->classes_list); - - list_add_rcu(&class->classes_list, &mpam_classes); - - return class; -} - -static struct mpam_class * -mpam_class_get(u8 level_idx, enum mpam_class_types type, bool alloc, gfp_t gfp) -{ - bool found = false; - struct mpam_class *class; - - lockdep_assert_held(&mpam_list_lock); - - list_for_each_entry(class, &mpam_classes, classes_list) { - if (class->type == type && class->level == level_idx) { - found = true; - break; - } - } - - if (found) - return class; - - if (!alloc) - return ERR_PTR(-ENOENT); - - return mpam_class_alloc(level_idx, type, gfp); -} - -static void mpam_class_destroy(struct mpam_class *class) -{ - lockdep_assert_held(&mpam_list_lock); - - list_del_rcu(&class->classes_list); - synchronize_srcu(&mpam_srcu); - kfree(class); -} - -static void mpam_comp_destroy(struct mpam_component *comp) -{ - struct mpam_class *class = comp->class; - - lockdep_assert_held(&mpam_list_lock); - - list_del_rcu(&comp->class_list); - synchronize_srcu(&mpam_srcu); - kfree(comp); - - if (list_empty(&class->components)) - mpam_class_destroy(class); -} - -/* synchronise_srcu() before freeing ris */ -static void mpam_ris_destroy(struct mpam_msc_ris *ris) -{ - struct mpam_component *comp = ris->comp; - struct mpam_class *class = comp->class; - struct mpam_msc *msc = ris->msc; - - lockdep_assert_held(&mpam_list_lock); - lockdep_assert_preemption_enabled(); - - clear_bit(ris->ris_idx, msc->ris_idxs); - list_del_rcu(&ris->comp_list); - list_del_rcu(&ris->msc_list); - - cpumask_andnot(&comp->affinity, &comp->affinity, &ris->affinity); - cpumask_andnot(&class->affinity, &class->affinity, &ris->affinity); - - if (list_empty(&comp->ris)) - mpam_comp_destroy(comp); -} - -/* - * There are two ways of reaching a struct mpam_msc_ris. Via the - * class->component->ris, or via the msc. - * When destroying the msc, the other side needs unlinking and cleaning up too. - * synchronise_srcu() before freeing msc. - */ -static void mpam_msc_destroy(struct mpam_msc *msc) -{ - struct mpam_msc_ris *ris, *tmp; - - lockdep_assert_held(&mpam_list_lock); - lockdep_assert_preemption_enabled(); - - list_for_each_entry_safe(ris, tmp, &msc->ris, msc_list) - mpam_ris_destroy(ris); -} - -/* - * The cacheinfo structures are only populated when CPUs are online. - * This helper walks the device tree to include offline CPUs too. - */ -static int get_cpumask_from_cache_id(u32 cache_id, u32 cache_level, - cpumask_t *affinity) -{ - int cpu, err; - u32 iter_level; - int iter_cache_id; - struct device_node *iter; - - if (!acpi_disabled) - return acpi_pptt_get_cpumask_from_cache_id(cache_id, affinity); - - for_each_possible_cpu(cpu) { - iter = of_get_cpu_node(cpu, NULL); - if (!iter) { - pr_err("Failed to find cpu%d device node\n", cpu); - return -ENOENT; - } - - while ((iter = of_find_next_cache_node(iter))) { - err = of_property_read_u32(iter, "cache-level", - &iter_level); - if (err || (iter_level != cache_level)) { - of_node_put(iter); - continue; - } - - /* - * get_cpu_cacheinfo_id() isn't ready until sometime - * during device_initcall(). Use cache_of_get_id(). - */ - iter_cache_id = cache_of_get_id(iter); - if (cache_id == ~0UL) { - of_node_put(iter); - continue; - } - - if (iter_cache_id == cache_id) - cpumask_set_cpu(cpu, affinity); - - of_node_put(iter); - } - } - - return 0; -} - - -/* - * cpumask_of_node() only knows about online CPUs. This can't tell us whether - * a class is represented on all possible CPUs. - */ -static void get_cpumask_from_node_id(u32 node_id, cpumask_t *affinity) -{ - int cpu; - - for_each_possible_cpu(cpu) { - if (node_id == cpu_to_node(cpu)) - cpumask_set_cpu(cpu, affinity); - } -} - -static int get_cpumask_from_cache(struct device_node *cache, - cpumask_t *affinity) -{ - int err; - u32 cache_level; - int cache_id; - - err = of_property_read_u32(cache, "cache-level", &cache_level); - if (err) { - pr_err("Failed to read cache-level from cache node\n"); - return -ENOENT; - } - - cache_id = cache_of_get_id(cache); - if (cache_id == ~0UL) { - pr_err("Failed to calculate cache-id from cache node\n"); - return -ENOENT; - } - - return get_cpumask_from_cache_id(cache_id, cache_level, affinity); -} - -static int mpam_ris_get_affinity(struct mpam_msc *msc, cpumask_t *affinity, - enum mpam_class_types type, - struct mpam_class *class, - struct mpam_component *comp) -{ - int err; - - switch (type) { - case MPAM_CLASS_CACHE: - err = get_cpumask_from_cache_id(comp->comp_id, class->level, - affinity); - if (err) - return err; - - if (cpumask_empty(affinity)) - pr_warn_once("%s no CPUs associated with cache node", - dev_name(&msc->pdev->dev)); - - break; - case MPAM_CLASS_MEMORY: - get_cpumask_from_node_id(comp->comp_id, affinity); - if (cpumask_empty(affinity)) - pr_warn_once("%s no CPUs associated with memory node", - dev_name(&msc->pdev->dev)); - break; - case MPAM_CLASS_UNKNOWN: - return 0; - } - - cpumask_and(affinity, affinity, &msc->accessibility); - - return 0; -} - -static int mpam_ris_create_locked(struct mpam_msc *msc, u8 ris_idx, - enum mpam_class_types type, u8 class_id, - int component_id, gfp_t gfp) -{ - int err; - struct mpam_msc_ris *ris; - struct mpam_class *class; - struct mpam_component *comp; - - lockdep_assert_held(&mpam_list_lock); - - if (test_and_set_bit(ris_idx, msc->ris_idxs)) - return -EBUSY; - - ris = devm_kzalloc(&msc->pdev->dev, sizeof(*ris), gfp); - if (!ris) - return -ENOMEM; - - class = mpam_class_get(class_id, type, true, gfp); - if (IS_ERR(class)) - return PTR_ERR(class); - - comp = mpam_component_get(class, component_id, true, gfp); - if (IS_ERR(comp)) { - if (list_empty(&class->components)) - mpam_class_destroy(class); - return PTR_ERR(comp); - } - - err = mpam_ris_get_affinity(msc, &ris->affinity, type, class, comp); - if (err) { - if (list_empty(&class->components)) - mpam_class_destroy(class); - return err; - } - - ris->ris_idx = ris_idx; - INIT_LIST_HEAD_RCU(&ris->comp_list); - INIT_LIST_HEAD_RCU(&ris->msc_list); - ris->msc = msc; - ris->comp = comp; - - cpumask_or(&comp->affinity, &comp->affinity, &ris->affinity); - cpumask_or(&class->affinity, &class->affinity, &ris->affinity); - list_add_rcu(&ris->comp_list, &comp->ris); - - return 0; -} - -int mpam_ris_create(struct mpam_msc *msc, u8 ris_idx, - enum mpam_class_types type, u8 class_id, int component_id) -{ - int err; - - mutex_lock(&mpam_list_lock); - err = mpam_ris_create_locked(msc, ris_idx, type, class_id, - component_id, GFP_KERNEL); - mutex_unlock(&mpam_list_lock); - - return err; -} - static void mpam_discovery_complete(void) { pr_err("Discovered all MSC\n"); @@ -500,14 +153,8 @@ static int get_msc_affinity(struct mpam_msc *msc) cpumask_copy(&msc->accessibility, cpu_possible_mask); err = 0; } else { - if (of_device_is_compatible(parent, "cache")) { - err = get_cpumask_from_cache(parent, - &msc->accessibility); - } else { - err = -EINVAL; - pr_err("Cannot determine accessibility of MSC: %s\n", - dev_name(&msc->pdev->dev)); - } + err = -EINVAL; + pr_err("Cannot determine CPU accessibility of MSC\n"); } of_node_put(parent); @@ -644,7 +291,6 @@ static int mpam_msc_drv_remove(struct platform_device *pdev) mpam_num_msc--; platform_set_drvdata(pdev, NULL); list_del_rcu(&msc->glbl_list); - mpam_msc_destroy(msc); synchronize_srcu(&mpam_srcu); mutex_unlock(&mpam_list_lock); diff --git a/drivers/platform/mpam/mpam_internal.h b/drivers/platform/mpam/mpam_internal.h index 90e4eeeacb8a..affd7999fcad 100644 --- a/drivers/platform/mpam/mpam_internal.h +++ b/drivers/platform/mpam/mpam_internal.h @@ -11,7 +11,6 @@ #include #include #include -#include struct mpam_msc { @@ -46,54 +45,4 @@ struct mpam_msc size_t mapped_hwpage_sz; }; -struct mpam_class -{ - /* mpam_components in this class */ - struct list_head components; - - cpumask_t affinity; - - u8 level; - enum mpam_class_types type; - - /* member of mpam_classes */ - struct list_head classes_list; -}; - -struct mpam_component -{ - u32 comp_id; - - /* mpam_msc_ris in this component */ - struct list_head ris; - - cpumask_t affinity; - - /* member of mpam_class:components */ - struct list_head class_list; - - /* parent: */ - struct mpam_class *class; -}; - -struct mpam_msc_ris { - u8 ris_idx; - - cpumask_t affinity; - - /* member of mpam_component:ris */ - struct list_head comp_list; - - /* member of mpam_msc:ris */ - struct list_head msc_list; - - /* parents: */ - struct mpam_msc *msc; - struct mpam_component *comp; -}; - -/* List of all classes */ -extern struct list_head mpam_classes; -extern struct srcu_struct mpam_srcu; - #endif /* MPAM_INTERNAL_H */ diff --git a/include/linux/arm_mpam.h b/include/linux/arm_mpam.h index 950ea7049d53..0f1d3f07e789 100644 --- a/include/linux/arm_mpam.h +++ b/include/linux/arm_mpam.h @@ -34,7 +34,10 @@ static inline int acpi_mpam_parse_resources(struct mpam_msc *msc, static inline int acpi_mpam_count_msc(void) { return -EINVAL; } #endif -int mpam_ris_create(struct mpam_msc *msc, u8 ris_idx, - enum mpam_class_types type, u8 class_id, int component_id); +static inline int mpam_ris_create(struct mpam_msc *msc, u8 ris_idx, + enum mpam_class_types type, u8 class_id, int component_id) +{ + return -EINVAL; +} #endif /* __LINUX_ARM_MPAM_H */ -- Gitee From 06fb7247b316e85a85538c65979cdfcc7e5a900b Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:46:25 -0800 Subject: [PATCH 107/158] anolis: Revert "arm_mpam: Add probe/remove for mpam msc driver and kbuild boiler plate" ANBZ: #31045 This reverts commit 04166a32de9bb542594ea4a2f6e332ff7102c95d. [jz: also remove config file: anolis/configs/L0-MANDATORY/arm64/CONFIG_ARM_CPU_RESCTRL] Signed-off-by: Jason Zeng --- .../L0-MANDATORY/arm64/CONFIG_ARM_CPU_RESCTRL | 1 - arch/arm64/Kconfig | 1 - drivers/platform/Kconfig | 2 - drivers/platform/Makefile | 1 - drivers/platform/mpam/Kconfig | 6 - drivers/platform/mpam/Makefile | 1 - drivers/platform/mpam/mpam_devices.c | 355 ------------------ drivers/platform/mpam/mpam_internal.h | 48 --- 8 files changed, 415 deletions(-) delete mode 100644 anolis/configs/L0-MANDATORY/arm64/CONFIG_ARM_CPU_RESCTRL delete mode 100644 drivers/platform/mpam/Kconfig delete mode 100644 drivers/platform/mpam/Makefile delete mode 100644 drivers/platform/mpam/mpam_devices.c delete mode 100644 drivers/platform/mpam/mpam_internal.h diff --git a/anolis/configs/L0-MANDATORY/arm64/CONFIG_ARM_CPU_RESCTRL b/anolis/configs/L0-MANDATORY/arm64/CONFIG_ARM_CPU_RESCTRL deleted file mode 100644 index b1c35e9ba99b..000000000000 --- a/anolis/configs/L0-MANDATORY/arm64/CONFIG_ARM_CPU_RESCTRL +++ /dev/null @@ -1 +0,0 @@ -CONFIG_ARM_CPU_RESCTRL=y diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 659e1ac54f4c..1cbb49ffb961 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -2069,7 +2069,6 @@ config ARM64_TLB_RANGE config ARM64_MPAM bool "Enable support for MPAM" select ACPI_MPAM if ACPI - select ARM_CPU_RESCTRL help Memory Partitioning and Monitoring is an optional extension that allows the CPUs to mark load and store transactions with diff --git a/drivers/platform/Kconfig b/drivers/platform/Kconfig index f26534a4a83b..868b20361769 100644 --- a/drivers/platform/Kconfig +++ b/drivers/platform/Kconfig @@ -9,8 +9,6 @@ source "drivers/platform/chrome/Kconfig" source "drivers/platform/mellanox/Kconfig" -source "drivers/platform/mpam/Kconfig" - source "drivers/platform/olpc/Kconfig" source "drivers/platform/surface/Kconfig" diff --git a/drivers/platform/Makefile b/drivers/platform/Makefile index 54ee16e4e4d8..8296d4c41eb7 100644 --- a/drivers/platform/Makefile +++ b/drivers/platform/Makefile @@ -12,4 +12,3 @@ obj-$(CONFIG_OLPC_EC) += olpc/ obj-$(CONFIG_GOLDFISH) += goldfish/ obj-$(CONFIG_CHROME_PLATFORMS) += chrome/ obj-$(CONFIG_SURFACE_PLATFORMS) += surface/ -obj-$(CONFIG_ARM_CPU_RESCTRL) += mpam/ diff --git a/drivers/platform/mpam/Kconfig b/drivers/platform/mpam/Kconfig deleted file mode 100644 index 13bd86fc5e58..000000000000 --- a/drivers/platform/mpam/Kconfig +++ /dev/null @@ -1,6 +0,0 @@ -# Confusingly, this is everything but the CPU bits of MPAM. CPU here means -# CPU resources, not containers or cgroups etc. -config ARM_CPU_RESCTRL - bool - depends on ARM64 - select RESCTRL_RMID_DEPENDS_ON_CLOSID diff --git a/drivers/platform/mpam/Makefile b/drivers/platform/mpam/Makefile deleted file mode 100644 index 8ad69bfa2aa2..000000000000 --- a/drivers/platform/mpam/Makefile +++ /dev/null @@ -1 +0,0 @@ -obj-$(CONFIG_ARM_CPU_RESCTRL) += mpam_devices.o diff --git a/drivers/platform/mpam/mpam_devices.c b/drivers/platform/mpam/mpam_devices.c deleted file mode 100644 index 885f8b61cb65..000000000000 --- a/drivers/platform/mpam/mpam_devices.c +++ /dev/null @@ -1,355 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -// Copyright (C) 2022 Arm Ltd. - -#define pr_fmt(fmt) "mpam: " fmt - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include - -#include "mpam_internal.h" - -/* - * mpam_list_lock protects the SRCU lists when writing. Once the - * mpam_enabled key is enabled these lists are read-only, - * unless the error interrupt disables the driver. - */ -static DEFINE_MUTEX(mpam_list_lock); -static LIST_HEAD(mpam_all_msc); - -static struct srcu_struct mpam_srcu; - -/* MPAM isn't available until all the MSC have been probed. */ -static u32 mpam_num_msc; - -static void mpam_discovery_complete(void) -{ - pr_err("Discovered all MSC\n"); -} - -static int mpam_dt_count_msc(void) -{ - int count = 0; - struct device_node *np; - - for_each_compatible_node(np, NULL, "arm,mpam-msc") - count++; - - return count; -} - -static int mpam_dt_parse_resource(struct mpam_msc *msc, struct device_node *np, - u32 ris_idx) -{ - int err = 0; - u32 level = 0; - unsigned long cache_id; - struct device_node *cache; - - do { - if (of_device_is_compatible(np, "arm,mpam-cache")) { - cache = of_parse_phandle(np, "arm,mpam-device", 0); - if (!cache) { - pr_err("Failed to read phandle\n"); - break; - } - } else if (of_device_is_compatible(np->parent, "cache")) { - cache = np->parent; - } else { - /* For now, only caches are supported */ - cache = NULL; - break; - } - - err = of_property_read_u32(cache, "cache-level", &level); - if (err) { - pr_err("Failed to read cache-level\n"); - break; - } - - cache_id = cache_of_get_id(cache); - if (cache_id == ~0UL) { - err = -ENOENT; - break; - } - - err = mpam_ris_create(msc, ris_idx, MPAM_CLASS_CACHE, level, - cache_id); - } while (0); - of_node_put(cache); - - return err; -} - - -static int mpam_dt_parse_resources(struct mpam_msc *msc, void *ignored) -{ - int err, num_ris = 0; - const u32 *ris_idx_p; - struct device_node *iter, *np; - - np = msc->pdev->dev.of_node; - for_each_child_of_node(np, iter) { - ris_idx_p = of_get_property(iter, "reg", NULL); - if (ris_idx_p) { - num_ris++; - err = mpam_dt_parse_resource(msc, iter, *ris_idx_p); - if (err) { - of_node_put(iter); - return err; - } - } - } - - if (!num_ris) - mpam_dt_parse_resource(msc, np, 0); - - return err; -} - -static int get_msc_affinity(struct mpam_msc *msc) -{ - struct device_node *parent; - u32 affinity_id; - int err; - - if (!acpi_disabled) { - err = device_property_read_u32(&msc->pdev->dev, "cpu_affinity", - &affinity_id); - if (err) { - cpumask_copy(&msc->accessibility, cpu_possible_mask); - err = 0; - } else { - err = acpi_pptt_get_cpus_from_container(affinity_id, - &msc->accessibility); - } - - return err; - } - - /* This depends on the path to of_node */ - parent = of_get_parent(msc->pdev->dev.of_node); - if (parent == of_root) { - cpumask_copy(&msc->accessibility, cpu_possible_mask); - err = 0; - } else { - err = -EINVAL; - pr_err("Cannot determine CPU accessibility of MSC\n"); - } - of_node_put(parent); - - return err; -} - -static int fw_num_msc; - -static void mpam_pcc_rx_callback(struct mbox_client *cl, void *msg) -{ - /* TODO: wake up tasks blocked on this MSC's PCC channel */ -} - -static int mpam_msc_drv_probe(struct platform_device *pdev) -{ - int err; - pgprot_t prot; - void * __iomem io; - struct mpam_msc *msc; - struct resource *msc_res; - void *plat_data = pdev->dev.platform_data; - - mutex_lock(&mpam_list_lock); - do { - msc = devm_kzalloc(&pdev->dev, sizeof(*msc), GFP_KERNEL); - if (!msc) { - err = -ENOMEM; - break; - } - - INIT_LIST_HEAD_RCU(&msc->glbl_list); - msc->pdev = pdev; - - err = device_property_read_u32(&pdev->dev, "arm,not-ready-us", - &msc->nrdy_usec); - if (err) { - /* This will prevent CSU monitors being usable */ - msc->nrdy_usec = 0; - } - - err = get_msc_affinity(msc); - if (err) - break; - if (cpumask_empty(&msc->accessibility)) { - pr_err_once("msc:%u is not accessible from any CPU!", - msc->id); - err = -EINVAL; - break; - } - - mutex_init(&msc->lock); - msc->id = mpam_num_msc++; - INIT_LIST_HEAD_RCU(&msc->ris); - spin_lock_init(&msc->part_sel_lock); - spin_lock_init(&msc->mon_sel_lock); - - if (device_property_read_u32(&pdev->dev, "pcc-channel", - &msc->pcc_subspace_id)) - msc->iface = MPAM_IFACE_MMIO; - else - msc->iface = MPAM_IFACE_PCC; - - if (msc->iface == MPAM_IFACE_MMIO) { - io = devm_platform_get_and_ioremap_resource(pdev, 0, - &msc_res); - if (IS_ERR(io)) { - pr_err("Failed to map MSC base address\n"); - devm_kfree(&pdev->dev, msc); - err = PTR_ERR(io); - break; - } - msc->mapped_hwpage_sz = msc_res->end - msc_res->start; - msc->mapped_hwpage = io; - } else if (msc->iface == MPAM_IFACE_PCC) { - msc->pcc_cl.dev = &pdev->dev; - msc->pcc_cl.rx_callback = mpam_pcc_rx_callback; - msc->pcc_cl.tx_block = false; - msc->pcc_cl.tx_tout = 1000; /* 1s */ - msc->pcc_cl.knows_txdone = false; - - msc->pcc_chan = pcc_mbox_request_channel(&msc->pcc_cl, - msc->pcc_subspace_id); - if (IS_ERR(msc->pcc_chan)) { - pr_err("Failed to request MSC PCC channel\n"); - devm_kfree(&pdev->dev, msc); - err = PTR_ERR(msc->pcc_chan); - break; - } - - prot = __acpi_get_mem_attribute(msc->pcc_chan->shmem_base_addr); - io = ioremap_prot(msc->pcc_chan->shmem_base_addr, - msc->pcc_chan->shmem_size, pgprot_val(prot)); - if (IS_ERR(io)) { - pr_err("Failed to map MSC base address\n"); - pcc_mbox_free_channel(msc->pcc_chan); - devm_kfree(&pdev->dev, msc); - err = PTR_ERR(io); - break; - } - - /* TODO: issue a read to update the registers */ - - msc->mapped_hwpage_sz = msc->pcc_chan->shmem_size; - msc->mapped_hwpage = io + sizeof(struct acpi_pcct_shared_memory); - } - - list_add_rcu(&msc->glbl_list, &mpam_all_msc); - platform_set_drvdata(pdev, msc); - } while (0); - mutex_unlock(&mpam_list_lock); - - if (!err) { - /* Create RIS entries described by firmware */ - if (!acpi_disabled) - err = acpi_mpam_parse_resources(msc, plat_data); - else - err = mpam_dt_parse_resources(msc, plat_data); - } - - if (!err && fw_num_msc == mpam_num_msc) - mpam_discovery_complete(); - - return err; -} - -static int mpam_msc_drv_remove(struct platform_device *pdev) -{ - struct mpam_msc *msc = platform_get_drvdata(pdev); - - if (!msc) - return 0; - - mutex_lock(&mpam_list_lock); - mpam_num_msc--; - platform_set_drvdata(pdev, NULL); - list_del_rcu(&msc->glbl_list); - synchronize_srcu(&mpam_srcu); - mutex_unlock(&mpam_list_lock); - - return 0; -} - -static const struct of_device_id mpam_of_match[] = { - { .compatible = "arm,mpam-msc", }, - {}, -}; -MODULE_DEVICE_TABLE(of, mpam_of_match); - -static struct platform_driver mpam_msc_driver = { - .driver = { - .name = "mpam_msc", - .of_match_table = of_match_ptr(mpam_of_match), - }, - .probe = mpam_msc_drv_probe, - .remove = mpam_msc_drv_remove, -}; - -/* - * MSC that are hidden under caches are not created as platform devices - * as there is no cache driver. Caches are also special-cased in - * get_msc_affinity(). - */ -static void mpam_dt_create_foundling_msc(void) -{ - int err; - struct device_node *cache; - - for_each_compatible_node(cache, NULL, "cache") { - err = of_platform_populate(cache, mpam_of_match, NULL, NULL); - if (err) { - pr_err("Failed to create MSC devices under caches\n"); - } - } -} - -static int __init mpam_msc_driver_init(void) -{ - if (!mpam_cpus_have_feature()) - return -EOPNOTSUPP; - - init_srcu_struct(&mpam_srcu); - - if (!acpi_disabled) - fw_num_msc = acpi_mpam_count_msc(); - else - fw_num_msc = mpam_dt_count_msc(); - - if (fw_num_msc <= 0) { - pr_err("No MSC devices found in firmware\n"); - return -EINVAL; - } - - if (acpi_disabled) - mpam_dt_create_foundling_msc(); - - return platform_driver_register(&mpam_msc_driver); -} -subsys_initcall(mpam_msc_driver_init); diff --git a/drivers/platform/mpam/mpam_internal.h b/drivers/platform/mpam/mpam_internal.h deleted file mode 100644 index affd7999fcad..000000000000 --- a/drivers/platform/mpam/mpam_internal.h +++ /dev/null @@ -1,48 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -// Copyright (C) 2021 Arm Ltd. - -#ifndef MPAM_INTERNAL_H -#define MPAM_INTERNAL_H - -#include -#include -#include -#include -#include -#include -#include - -struct mpam_msc -{ - /* member of mpam_all_msc */ - struct list_head glbl_list; - - int id; - struct platform_device *pdev; - - /* Not modified after mpam_is_enabled() becomes true */ - enum mpam_msc_iface iface; - u32 pcc_subspace_id; - struct mbox_client pcc_cl; - struct pcc_mbox_chan *pcc_chan; - u32 nrdy_usec; - cpumask_t accessibility; - - struct mutex lock; - unsigned long ris_idxs[128 / BITS_PER_LONG]; - u32 ris_max; - - /* mpam_msc_ris of this component */ - struct list_head ris; - - /* - * part_sel_lock protects access to the MSC hardware registers that are - * affected by MPAMCFG_PART_SEL. (including the ID registers) - * If needed, take msc->lock first. - */ - spinlock_t part_sel_lock; - void __iomem * mapped_hwpage; - size_t mapped_hwpage_sz; -}; - -#endif /* MPAM_INTERNAL_H */ -- Gitee From 70a42b8f717720295f31059b846122e89fb28c08 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:46:25 -0800 Subject: [PATCH 108/158] anolis: Revert "dt-bindings: arm: Add MPAM MSC binding" ANBZ: #31045 This reverts commit 5f900916c83404c0f882d1e658ccf236f6d401e2. Signed-off-by: Jason Zeng --- .../devicetree/bindings/arm/arm,mpam-msc.yaml | 227 ------------------ 1 file changed, 227 deletions(-) delete mode 100644 Documentation/devicetree/bindings/arm/arm,mpam-msc.yaml diff --git a/Documentation/devicetree/bindings/arm/arm,mpam-msc.yaml b/Documentation/devicetree/bindings/arm/arm,mpam-msc.yaml deleted file mode 100644 index 9d542ecb1a7d..000000000000 --- a/Documentation/devicetree/bindings/arm/arm,mpam-msc.yaml +++ /dev/null @@ -1,227 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause -%YAML 1.2 ---- -$id: http://devicetree.org/schemas/arm/arm,mpam-msc.yaml# -$schema: http://devicetree.org/meta-schemas/core.yaml# - -title: Arm Memory System Resource Partitioning and Monitoring (MPAM) - -description: | - The Arm MPAM specification can be found here: - - https://developer.arm.com/documentation/ddi0598/latest - -maintainers: - - Rob Herring - -properties: - compatible: - items: - - const: arm,mpam-msc # Further details are discoverable - - const: arm,mpam-memory-controller-msc - - reg: - maxItems: 1 - description: A memory region containing registers as defined in the MPAM - specification. - - interrupts: - minItems: 1 - items: - - description: error (optional) - - description: overflow (optional, only for monitoring) - - interrupt-names: - oneOf: - - items: - - enum: [ error, overflow ] - - items: - - const: error - - const: overflow - - arm,not-ready-us: - description: The maximum time in microseconds for monitoring data to be - accurate after a settings change. For more information, see the - Not-Ready (NRDY) bit description in the MPAM specification. - - numa-node-id: true # see NUMA binding - - '#address-cells': - const: 1 - - '#size-cells': - const: 0 - -patternProperties: - '^ris@[0-9a-f]$': - type: object - additionalProperties: false - description: | - RIS nodes for each RIS in an MSC. These nodes are required for each RIS - implementing known MPAM controls - - properties: - compatible: - enum: - # Bulk storage for cache - - arm,mpam-cache - # Memory bandwidth - - arm,mpam-memory - - reg: - minimum: 0 - maximum: 0xf - - cpus: - $ref: '/schemas/types.yaml#/definitions/phandle-array' - description: - Phandle(s) to the CPU node(s) this RIS belongs to. By default, the parent - device's affinity is used. - - arm,mpam-device: - $ref: '/schemas/types.yaml#/definitions/phandle' - description: - By default, the MPAM enabled device associated with a RIS is the MSC's - parent node. It is possible for each RIS to be associated with different - devices in which case 'arm,mpam-device' should be used. - - required: - - compatible - - reg - -required: - - compatible - - reg - -dependencies: - interrupts: [ interrupt-names ] - -additionalProperties: false - -examples: - - | - /* - cpus { - cpu@0 { - next-level-cache = <&L2_0>; - }; - cpu@100 { - next-level-cache = <&L2_1>; - }; - }; - */ - L2_0: cache-controller-0 { - compatible = "cache"; - cache-level = <2>; - cache-unified; - next-level-cache = <&L3>; - - }; - - L2_1: cache-controller-1 { - compatible = "cache"; - cache-level = <2>; - cache-unified; - next-level-cache = <&L3>; - - }; - - L3: cache-controller@30000000 { - compatible = "arm,dsu-l3-cache", "cache"; - cache-level = <3>; - cache-unified; - - ranges = <0x0 0x30000000 0x800000>; - #address-cells = <1>; - #size-cells = <1>; - - msc@10000 { - compatible = "arm,mpam-msc"; - - /* CPU affinity implied by parent cache node's */ - reg = <0x10000 0x2000>; - interrupts = <1>, <2>; - interrupt-names = "error", "overflow"; - arm,not-ready-us = <1>; - }; - }; - - mem: memory-controller@20000 { - compatible = "foo,a-memory-controller"; - reg = <0x20000 0x1000>; - - #address-cells = <1>; - #size-cells = <1>; - ranges; - - msc@21000 { - compatible = "arm,mpam-memory-controller-msc", "arm,mpam-msc"; - reg = <0x21000 0x1000>; - interrupts = <3>; - interrupt-names = "error"; - arm,not-ready-us = <1>; - numa-node-id = <1>; - }; - }; - - iommu@40000 { - reg = <0x40000 0x1000>; - - ranges; - #address-cells = <1>; - #size-cells = <1>; - - msc@41000 { - compatible = "arm,mpam-msc"; - reg = <0 0x1000>; - interrupts = <5>, <6>; - interrupt-names = "error", "overflow"; - arm,not-ready-us = <1>; - - #address-cells = <1>; - #size-cells = <0>; - - ris@2 { - compatible = "arm,mpam-cache"; - reg = <0>; - // TODO: How to map to device(s)? - }; - }; - }; - - msc@80000 { - compatible = "foo,a-standalone-msc"; - reg = <0x80000 0x1000>; - - clocks = <&clks 123>; - - ranges; - #address-cells = <1>; - #size-cells = <1>; - - msc@10000 { - compatible = "arm,mpam-msc"; - - reg = <0x10000 0x2000>; - interrupts = <7>; - interrupt-names = "overflow"; - arm,not-ready-us = <1>; - - #address-cells = <1>; - #size-cells = <0>; - - ris@0 { - compatible = "arm,mpam-cache"; - reg = <0>; - arm,mpam-device = <&L2_0>; - }; - - ris@1 { - compatible = "arm,mpam-memory"; - reg = <1>; - arm,mpam-device = <&mem>; - }; - }; - }; - -... -- Gitee From a14fdb404bcae70e766782a2efcf938938c85358 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:46:25 -0800 Subject: [PATCH 109/158] anolis: Revert "ACPI / MPAM: Parse the MPAM table" ANBZ: #31045 This reverts commit fdae51957a3770b914313379c4bfc7baf44fcfe9. Signed-off-by: Jason Zeng --- arch/arm64/Kconfig | 1 - drivers/acpi/arm64/Kconfig | 3 - drivers/acpi/arm64/Makefile | 1 - drivers/acpi/arm64/mpam.c | 369 ------------------------------------ drivers/acpi/tables.c | 2 +- include/linux/arm_mpam.h | 43 ----- 6 files changed, 1 insertion(+), 418 deletions(-) delete mode 100644 drivers/acpi/arm64/mpam.c delete mode 100644 include/linux/arm_mpam.h diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 1cbb49ffb961..bd77f5ad4169 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -2068,7 +2068,6 @@ config ARM64_TLB_RANGE config ARM64_MPAM bool "Enable support for MPAM" - select ACPI_MPAM if ACPI help Memory Partitioning and Monitoring is an optional extension that allows the CPUs to mark load and store transactions with diff --git a/drivers/acpi/arm64/Kconfig b/drivers/acpi/arm64/Kconfig index f2fd79f22e7d..b3ed6212244c 100644 --- a/drivers/acpi/arm64/Kconfig +++ b/drivers/acpi/arm64/Kconfig @@ -21,6 +21,3 @@ config ACPI_AGDI config ACPI_APMT bool - -config ACPI_MPAM - bool diff --git a/drivers/acpi/arm64/Makefile b/drivers/acpi/arm64/Makefile index d8bd6066a4c6..726944648c9b 100644 --- a/drivers/acpi/arm64/Makefile +++ b/drivers/acpi/arm64/Makefile @@ -4,6 +4,5 @@ obj-$(CONFIG_ACPI_IORT) += iort.o obj-$(CONFIG_ACPI_GTDT) += gtdt.o obj-$(CONFIG_ACPI_APMT) += apmt.o obj-$(CONFIG_ARM_AMBA) += amba.o -obj-$(CONFIG_ACPI_MPAM) += mpam.o obj-y += dma.o init.o obj-y += thermal_cpufreq.o diff --git a/drivers/acpi/arm64/mpam.c b/drivers/acpi/arm64/mpam.c deleted file mode 100644 index 8a63449f27b5..000000000000 --- a/drivers/acpi/arm64/mpam.c +++ /dev/null @@ -1,369 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -// Copyright (C) 2022 Arm Ltd. - -/* Parse the MPAM ACPI table feeding the discovered nodes into the driver */ - -#define pr_fmt(fmt) "ACPI MPAM: " fmt - -#include -#include -#include -#include -#include - -#include - -#include - -/* Flags for acpi_table_mpam_msc.*_interrupt_flags */ -#define ACPI_MPAM_MSC_IRQ_MODE_EDGE 1 -#define ACPI_MPAM_MSC_IRQ_TYPE_MASK (3<<1) -#define ACPI_MPAM_MSC_IRQ_TYPE_WIRED 0 -#define ACPI_MPAM_MSC_IRQ_AFFINITY_PROCESSOR_CONTAINER (1<<3) -#define ACPI_MPAM_MSC_IRQ_AFFINITY_VALID (1<<4) - -static bool frob_irq(struct platform_device *pdev, int intid, u32 flags, - int *irq, u32 processor_container_uid) -{ - int sense; - - if (!intid) - return false; - - /* 0 in this field indicates a wired interrupt */ - if (flags & ACPI_MPAM_MSC_IRQ_TYPE_MASK) - return false; - - if (flags & ACPI_MPAM_MSC_IRQ_MODE_EDGE) - sense = ACPI_EDGE_SENSITIVE; - else - sense = ACPI_LEVEL_SENSITIVE; - - /* - * If the GSI is in the GIC's PPI range, try and create a partitioned - * percpu interrupt. - */ - if (16 <= intid && intid < 32 && processor_container_uid != ~0) { - pr_err_once("Partitioned interrupts not supported\n"); - return false; - } else { - *irq = acpi_register_gsi(&pdev->dev, intid, sense, - ACPI_ACTIVE_HIGH); - } - if (*irq <= 0) { - pr_err_once("Failed to register interrupt 0x%x with ACPI\n", - intid); - return false; - } - - return true; -} - -static void acpi_mpam_parse_irqs(struct platform_device *pdev, - struct acpi_mpam_msc_node *tbl_msc, - struct resource *res, int *res_idx) -{ - u32 flags, aff = ~0; - int irq; - - flags = tbl_msc->overflow_interrupt_flags; - if (flags & ACPI_MPAM_MSC_IRQ_AFFINITY_VALID && - flags & ACPI_MPAM_MSC_IRQ_AFFINITY_PROCESSOR_CONTAINER) - aff = tbl_msc->overflow_interrupt_affinity; - if (frob_irq(pdev, tbl_msc->overflow_interrupt, flags, &irq, aff)) { - res[*res_idx].start = irq; - res[*res_idx].end = irq; - res[*res_idx].flags = IORESOURCE_IRQ; - res[*res_idx].name = "overflow"; - - (*res_idx)++; - } - - flags = tbl_msc->error_interrupt_flags; - if (flags & ACPI_MPAM_MSC_IRQ_AFFINITY_VALID && - flags & ACPI_MPAM_MSC_IRQ_AFFINITY_PROCESSOR_CONTAINER) - aff = tbl_msc->error_interrupt_affinity; - else - aff = ~0; - if (frob_irq(pdev, tbl_msc->error_interrupt, flags, &irq, aff)) { - res[*res_idx].start = irq; - res[*res_idx].end = irq; - res[*res_idx].flags = IORESOURCE_IRQ; - res[*res_idx].name = "error"; - - (*res_idx)++; - } -} - -static int acpi_mpam_parse_resource(struct mpam_msc *msc, - struct acpi_mpam_resource_node *res) -{ - u32 cache_id; - int level; - - switch (res->locator_type) { - case ACPI_MPAM_LOCATION_TYPE_PROCESSOR_CACHE: - cache_id = res->locator.cache_locator.cache_reference; - level = find_acpi_cache_level_from_id(cache_id); - if (level < 0) { - pr_err_once("Bad level for cache with id %u\n", cache_id); - return level; - } - return mpam_ris_create(msc, res->ris_index, MPAM_CLASS_CACHE, - level, cache_id); - case ACPI_MPAM_LOCATION_TYPE_MEMORY: - return mpam_ris_create(msc, res->ris_index, MPAM_CLASS_MEMORY, - 255, res->locator.memory_locator.proximity_domain); - default: - /* These get discovered later and treated as unknown */ - return 0; - } -} - -int acpi_mpam_parse_resources(struct mpam_msc *msc, - struct acpi_mpam_msc_node *tbl_msc) -{ - int i, err; - struct acpi_mpam_resource_node *resources; - - resources = (struct acpi_mpam_resource_node *)(tbl_msc + 1); - for (i = 0; i < tbl_msc->num_resouce_nodes; i++) { - err = acpi_mpam_parse_resource(msc, &resources[i]); - if (err) - return err; - } - - return 0; -} - -static bool __init parse_msc_pm_link(struct acpi_mpam_msc_node *tbl_msc, - struct platform_device *pdev, - u32 *acpi_id) -{ - bool acpi_id_valid = false; - struct acpi_device *buddy; - char hid[16], uid[16]; - int err; - - memset(&hid, 0, sizeof(hid)); - memcpy(hid, &tbl_msc->hardware_id_linked_device, - sizeof(tbl_msc->hardware_id_linked_device)); - - if (!strcmp(hid, ACPI_PROCESSOR_CONTAINER_HID)) { - *acpi_id = tbl_msc->instance_id_linked_device; - acpi_id_valid = true; - } - - err = snprintf(uid, sizeof(uid), "%u", - tbl_msc->instance_id_linked_device); - if (err < 0 || err >= sizeof(uid)) - return acpi_id_valid; - - buddy = acpi_dev_get_first_match_dev(hid, uid, -1); - if (buddy) { - device_link_add(&pdev->dev, &buddy->dev, DL_FLAG_STATELESS); - } - - return acpi_id_valid; -} - -static int decode_interface_type(struct acpi_mpam_msc_node *tbl_msc, - enum mpam_msc_iface *iface) -{ - switch (tbl_msc->interface_type){ - case 0: - *iface = MPAM_IFACE_MMIO; - return 0; - case 1: - *iface = MPAM_IFACE_PCC; - return 0; - default: - return -EINVAL; - } -} - -static int __init _parse_table(struct acpi_table_header *table) -{ - char *table_end, *table_offset = (char *)(table + 1); - struct property_entry props[4]; /* needs a sentinel */ - struct acpi_mpam_msc_node *tbl_msc; - int next_res, next_prop, err = 0; - struct acpi_device *companion; - struct platform_device *pdev; - enum mpam_msc_iface iface; - struct resource res[3]; - char uid[16]; - u32 acpi_id; - - table_end = (char *)table + table->length; - - while (table_offset < table_end) { - tbl_msc = (struct acpi_mpam_msc_node *)table_offset; - table_offset += tbl_msc->length; - - /* - * If any of the reserved fields are set, make no attempt to - * parse the msc structure. This will prevent the driver from - * probing all the MSC, meaning it can't discover the system - * wide supported partid and pmg ranges. This avoids whatever - * this MSC is truncating the partids and creating a screaming - * error interrupt. - */ - if (tbl_msc->reserved || tbl_msc->reserved1 || tbl_msc->reserved2) - continue; - - if (decode_interface_type(tbl_msc, &iface)) - continue; - - next_res = 0; - next_prop = 0; - memset(res, 0, sizeof(res)); - memset(props, 0, sizeof(props)); - - pdev = platform_device_alloc("mpam_msc", tbl_msc->identifier); - if (IS_ERR(pdev)) { - err = PTR_ERR(pdev); - break; - } - - if (tbl_msc->length < sizeof(*tbl_msc)) { - err = -EINVAL; - break; - } - - /* Some power management is described in the namespace: */ - err = snprintf(uid, sizeof(uid), "%u", tbl_msc->identifier); - if (err > 0 && err < sizeof(uid)) { - companion = acpi_dev_get_first_match_dev("ARMHAA5C", uid, -1); - if (companion) - ACPI_COMPANION_SET(&pdev->dev, companion); - } - - if (iface == MPAM_IFACE_MMIO) { - res[next_res].name = "MPAM:MSC"; - res[next_res].start = tbl_msc->base_address; - res[next_res].end = tbl_msc->base_address + tbl_msc->mmio_size - 1; - res[next_res].flags = IORESOURCE_MEM; - next_res++; - } else if (iface == MPAM_IFACE_PCC) { - props[next_prop++] = PROPERTY_ENTRY_U32("pcc-channel", - tbl_msc->base_address); - next_prop++; - } - - acpi_mpam_parse_irqs(pdev, tbl_msc, res, &next_res); - err = platform_device_add_resources(pdev, res, next_res); - if (err) - break; - - props[next_prop++] = PROPERTY_ENTRY_U32("arm,not-ready-us", - tbl_msc->max_nrdy_usec); - - /* - * The MSC's CPU affinity is described via its linked power - * management device, but only if it points at a Processor or - * Processor Container. - */ - if (parse_msc_pm_link(tbl_msc, pdev, &acpi_id)) { - props[next_prop++] = PROPERTY_ENTRY_U32("cpu_affinity", - acpi_id); - } - - err = device_create_managed_software_node(&pdev->dev, props, - NULL); - if (err) - break; - - /* Come back later if you want the RIS too */ - err = platform_device_add_data(pdev, tbl_msc, tbl_msc->length); - if (err) - break; - - platform_device_add(pdev); - } - - if (err) - platform_device_put(pdev); - - return err; -} - -static struct acpi_table_header *get_table(void) -{ - struct acpi_table_header *table; - acpi_status status; - - if (acpi_disabled || !mpam_cpus_have_feature()) - return NULL; - - status = acpi_get_table(ACPI_SIG_MPAM, 0, &table); - if (ACPI_FAILURE(status)) - return NULL; - - if (table->revision != 1) - return NULL; - - return table; -} - - - -static int __init acpi_mpam_parse(void) -{ - struct acpi_table_header *mpam; - int err; - - mpam = get_table(); - if (!mpam) - return 0; - - err = _parse_table(mpam); - acpi_put_table(mpam); - - return err; -} - -static int _count_msc(struct acpi_table_header *table) -{ - char *table_end, *table_offset = (char *)(table + 1); - struct acpi_mpam_msc_node *tbl_msc; - int ret = 0; - - tbl_msc = (struct acpi_mpam_msc_node *)table_offset; - table_end = (char *)table + table->length; - - while (table_offset < table_end) { - if (tbl_msc->length < sizeof(*tbl_msc)) - return -EINVAL; - - ret++; - - table_offset += tbl_msc->length; - tbl_msc = (struct acpi_mpam_msc_node *)table_offset; - } - - return ret; -} - - -int acpi_mpam_count_msc(void) -{ - struct acpi_table_header *mpam; - int ret; - - mpam = get_table(); - if (!mpam) - return 0; - - ret = _count_msc(mpam); - acpi_put_table(mpam); - - return ret; -} - -/* - * Call after ACPI devices have been created, which happens behind acpi_scan_init() - * called from subsys_initcall(). PCC requires the mailbox driver, which is - * initialised from postcore_initcall(). - */ -subsys_initcall_sync(acpi_mpam_parse); diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c index 8961e4101a1d..6b4c292f989d 100644 --- a/drivers/acpi/tables.c +++ b/drivers/acpi/tables.c @@ -431,7 +431,7 @@ static const char table_sigs[][ACPI_NAMESEG_SIZE] __initconst = { ACPI_SIG_PSDT, ACPI_SIG_RSDT, ACPI_SIG_XSDT, ACPI_SIG_SSDT, ACPI_SIG_IORT, ACPI_SIG_NFIT, ACPI_SIG_HMAT, ACPI_SIG_PPTT, ACPI_SIG_NHLT, ACPI_SIG_AEST, ACPI_SIG_CEDT, ACPI_SIG_AGDI, - ACPI_SIG_NBFT, ACPI_SIG_MPAM }; + ACPI_SIG_NBFT }; #define ACPI_HEADER_SIZE sizeof(struct acpi_table_header) diff --git a/include/linux/arm_mpam.h b/include/linux/arm_mpam.h deleted file mode 100644 index 0f1d3f07e789..000000000000 --- a/include/linux/arm_mpam.h +++ /dev/null @@ -1,43 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -// Copyright (C) 2021 Arm Ltd. - -#ifndef __LINUX_ARM_MPAM_H -#define __LINUX_ARM_MPAM_H - -#include -#include - -struct mpam_msc; - -enum mpam_msc_iface { - MPAM_IFACE_MMIO, /* a real MPAM MSC */ - MPAM_IFACE_PCC, /* a fake MPAM MSC */ -}; - -enum mpam_class_types { - MPAM_CLASS_CACHE, /* Well known caches, e.g. L2 */ - MPAM_CLASS_MEMORY, /* Main memory */ - MPAM_CLASS_UNKNOWN, /* Everything else, e.g. SMMU */ -}; - -#ifdef CONFIG_ACPI_MPAM -/* Parse the ACPI description of resources entries for this MSC. */ -int acpi_mpam_parse_resources(struct mpam_msc *msc, - struct acpi_mpam_msc_node *tbl_msc); -int acpi_mpam_count_msc(void); -#else -static inline int acpi_mpam_parse_resources(struct mpam_msc *msc, - struct acpi_mpam_msc_node *tbl_msc) -{ - return -EINVAL; -} -static inline int acpi_mpam_count_msc(void) { return -EINVAL; } -#endif - -static inline int mpam_ris_create(struct mpam_msc *msc, u8 ris_idx, - enum mpam_class_types type, u8 class_id, int component_id) -{ - return -EINVAL; -} - -#endif /* __LINUX_ARM_MPAM_H */ -- Gitee From e346c53ad645e9bfd63c6fbde549cae9913ec19d Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Thu, 12 Mar 2026 21:48:04 -0700 Subject: [PATCH 110/158] anolis: remove config file for CONFIG_ACPI_MPAM ANBZ: #31045 Remove config file: anolis/configs/L2-OPTIONAL/arm64/CONFIG_ACPI_MPAM Signed-off-by: Jason Zeng --- anolis/configs/L2-OPTIONAL/arm64/CONFIG_ACPI_MPAM | 1 - 1 file changed, 1 deletion(-) delete mode 100644 anolis/configs/L2-OPTIONAL/arm64/CONFIG_ACPI_MPAM diff --git a/anolis/configs/L2-OPTIONAL/arm64/CONFIG_ACPI_MPAM b/anolis/configs/L2-OPTIONAL/arm64/CONFIG_ACPI_MPAM deleted file mode 100644 index e93cbd36cedc..000000000000 --- a/anolis/configs/L2-OPTIONAL/arm64/CONFIG_ACPI_MPAM +++ /dev/null @@ -1 +0,0 @@ -CONFIG_ACPI_MPAM=y -- Gitee From 859275383ccf3237d9ab6db35c5a38dd7b347ff0 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:46:25 -0800 Subject: [PATCH 111/158] anolis: Revert "drivers: base: cacheinfo: Add helper to find the cache size from cpu+level" ANBZ: #31045 This reverts commit 52d95c086250fb32b23633274f4767fe8a687c1f. Signed-off-by: Jason Zeng --- include/linux/cacheinfo.h | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index 251e8b393ec7..2802406bb422 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -133,27 +133,6 @@ static inline unsigned long get_cpu_cacheinfo_id(int cpu, int level) return ~0UL; } -/* - * Get the size of the cache associated with @cpu at level @level. - * cpuhp lock must be held. - */ -static inline unsigned int get_cpu_cacheinfo_size(int cpu, int level) -{ - struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu); - int i; - - if (!ci->info_list) - return 0; - - for (i = 0; i < ci->num_leaves; i++) { - if (ci->info_list[i].level == level) { - return ci->info_list[i].size; - } - } - - return 0; -} - #ifdef CONFIG_ARM64 #define use_arch_cache_info() (true) #else -- Gitee From d323a000e35a8c21036f3c6a83d5e8847aec2271 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:46:25 -0800 Subject: [PATCH 112/158] anolis: Revert "cacheinfo: Expose the code to generate a cache-id from a device_node" ANBZ: #31045 This reverts commit c7d8421f932acf34a3d8fa45682a26e62e161f1d. Signed-off-by: Jason Zeng --- drivers/base/cacheinfo.c | 13 +++---------- include/linux/cacheinfo.h | 1 - 2 files changed, 3 insertions(+), 11 deletions(-) diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index 24200cb949cc..54f16bb71152 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -183,7 +183,7 @@ static bool cache_node_is_unified(struct cacheinfo *this_leaf, return of_property_read_bool(np, "cache-unified"); } -unsigned long cache_of_get_id(struct device_node *np) +static void cache_of_set_id(struct cacheinfo *this_leaf, struct device_node *np) { struct device_node *cpu; unsigned long min_id = ~0UL; @@ -202,15 +202,8 @@ unsigned long cache_of_get_id(struct device_node *np) } } - return min_id; -} - -static void cache_of_set_id(struct cacheinfo *this_leaf, struct device_node *np) -{ - unsigned long id = cache_of_get_id(np); - - if (id != ~0UL) { - this_leaf->id = id; + if (min_id != ~0UL) { + this_leaf->id = min_id; this_leaf->attributes |= CACHE_ID; } } diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index 2802406bb422..e0717a2d7ca0 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -111,7 +111,6 @@ int acpi_get_cache_info(unsigned int cpu, #endif const struct attribute_group *cache_get_priv_group(struct cacheinfo *this_leaf); -unsigned long cache_of_get_id(struct device_node *np); /* * Get the id of the cache associated with @cpu at level @level. -- Gitee From bd305424e1133d96cf268878db7bacf813f870c8 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:46:25 -0800 Subject: [PATCH 113/158] anolis: Revert "cacheinfo: Set cache 'id' based on DT data" ANBZ: #31045 This reverts commit a0b821ea63c7fa7c65ba7cf2674f91b3747ab964. Signed-off-by: Jason Zeng --- drivers/base/cacheinfo.c | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index 54f16bb71152..6ad1ac0723a6 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -183,31 +183,6 @@ static bool cache_node_is_unified(struct cacheinfo *this_leaf, return of_property_read_bool(np, "cache-unified"); } -static void cache_of_set_id(struct cacheinfo *this_leaf, struct device_node *np) -{ - struct device_node *cpu; - unsigned long min_id = ~0UL; - - for_each_of_cpu_node(cpu) { - struct device_node *cache_node = cpu; - u64 id = of_get_cpu_hwid(cache_node, 0); - - while ((cache_node = of_find_next_cache_node(cache_node))) { - if ((cache_node == np) && (id < min_id)) { - min_id = id; - of_node_put(cache_node); - break; - } - of_node_put(cache_node); - } - } - - if (min_id != ~0UL) { - this_leaf->id = min_id; - this_leaf->attributes |= CACHE_ID; - } -} - static void cache_of_set_props(struct cacheinfo *this_leaf, struct device_node *np) { @@ -223,7 +198,6 @@ static void cache_of_set_props(struct cacheinfo *this_leaf, cache_get_line_size(this_leaf, np); cache_nr_sets(this_leaf, np); cache_associativity(this_leaf); - cache_of_set_id(this_leaf, np); } static int cache_setup_of_node(unsigned int cpu) -- Gitee From f682ede2bc3a97b475ca3b9f712a30d553dde7d2 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:46:25 -0800 Subject: [PATCH 114/158] anolis: Revert "cacheinfo: Allow for >32-bit cache 'id'" ANBZ: #31045 This reverts commit e95ad3323b7d0cbd7ae7f52541a7ec6733fe4555. Signed-off-by: Jason Zeng --- drivers/base/cacheinfo.c | 8 +------- include/linux/cacheinfo.h | 8 ++++---- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index 6ad1ac0723a6..0fbb1575e297 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -622,19 +622,13 @@ static ssize_t file_name##_show(struct device *dev, \ return sysfs_emit(buf, "%u\n", this_leaf->object); \ } +show_one(id, id); show_one(level, level); show_one(coherency_line_size, coherency_line_size); show_one(number_of_sets, number_of_sets); show_one(physical_line_partition, physical_line_partition); show_one(ways_of_associativity, ways_of_associativity); -static ssize_t id_show(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct cacheinfo *this_leaf = dev_get_drvdata(dev); - - return sysfs_emit(buf, "%lu\n", this_leaf->id); -} - static ssize_t size_show(struct device *dev, struct device_attribute *attr, char *buf) { diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index e0717a2d7ca0..d504eb4b49ab 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -47,7 +47,7 @@ extern unsigned int coherency_max_size; * keeping, the remaining members form the core properties of the cache */ struct cacheinfo { - unsigned long id; + unsigned int id; enum cache_type type; unsigned int level; unsigned int coherency_line_size; @@ -116,7 +116,7 @@ const struct attribute_group *cache_get_priv_group(struct cacheinfo *this_leaf); * Get the id of the cache associated with @cpu at level @level. * cpuhp lock must be held. */ -static inline unsigned long get_cpu_cacheinfo_id(int cpu, int level) +static inline int get_cpu_cacheinfo_id(int cpu, int level) { struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu); int i; @@ -125,11 +125,11 @@ static inline unsigned long get_cpu_cacheinfo_id(int cpu, int level) if (ci->info_list[i].level == level) { if (ci->info_list[i].attributes & CACHE_ID) return ci->info_list[i].id; - return ~0UL; + return -1; } } - return ~0UL; + return -1; } #ifdef CONFIG_ARM64 -- Gitee From 7be8d743ff2dd10eac627acd42bfb757071a6b42 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:46:25 -0800 Subject: [PATCH 115/158] anolis: Revert "ACPI / PPTT: Add a helper to fill a cpumask from a cache_id" ANBZ: #31045 This reverts commit 32b40340fc76af7ed3411b00fe338ec7ae4a35be. Signed-off-by: Jason Zeng --- drivers/acpi/pptt.c | 79 ++------------------------------------------ include/linux/acpi.h | 6 ---- 2 files changed, 2 insertions(+), 83 deletions(-) diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c index f5dcabdc99c9..19fbda328cd8 100644 --- a/drivers/acpi/pptt.c +++ b/drivers/acpi/pptt.c @@ -183,10 +183,9 @@ acpi_find_cache_level(struct acpi_table_header *table_hdr, * levels and split cache levels (data/instruction). * @table_hdr: Pointer to the head of the PPTT table * @cpu_node: processor node we wish to count caches for - * @levels: Number of levels if success. (*levels) should be initialized by - * the caller with the value to be used as the starting level. + * @levels: Number of levels if success. * @split_levels: Number of split cache levels (data/instruction) if - * success. Can be NULL. + * success. Can by NULL. * * Given a processor node containing a processing unit, walk into it and count * how many levels exist solely for it, and then walk up each level until we hit @@ -988,8 +987,6 @@ int find_acpi_cache_level_from_id(u32 cache_id) * to find the level... */ for_each_possible_cpu(cpu) { - - num_levels = 0; acpi_cpu_id = get_acpi_id_for_cpu(cpu); cpu_node = acpi_find_processor_node(table, acpi_cpu_id); if (!cpu_node) @@ -1019,75 +1016,3 @@ int find_acpi_cache_level_from_id(u32 cache_id) acpi_put_table(table); return -ENOENT; } - -/** - * acpi_pptt_get_cpumask_from_cache_id() - Get the cpus associated with the - * specified cache - * @cache_id: The id field of the unified cache - * @cpus: Where to buidl the cpumask - * - * Determine which CPUs are below this cache in the PPTT. This allows the property - * to be found even if the CPUs are offline. - * - * The PPTT table must be rev 3 or later, - * - * Return: -ENOENT if the PPTT doesn't exist, or the cache cannot be found. - * Otherwise returns 0 and sets the cpus in the provided cpumask. - */ -int acpi_pptt_get_cpumask_from_cache_id(u32 cache_id, cpumask_t *cpus) -{ - u32 acpi_cpu_id; - acpi_status status; - int level, cpu, num_levels; - struct acpi_pptt_cache *cache; - struct acpi_table_header *table; - struct acpi_pptt_cache_v1* cache_v1; - struct acpi_pptt_processor *cpu_node; - - cpumask_clear(cpus); - - status = acpi_get_table(ACPI_SIG_PPTT, 0, &table); - if (ACPI_FAILURE(status)) { - acpi_pptt_warn_missing(); - return -ENOENT; - } - - if (table->revision < 3) { - acpi_put_table(table); - return -ENOENT; - } - - /* - * If we found the cache first, we'd still need to walk from each cpu. - */ - for_each_possible_cpu(cpu) { - - num_levels = 0; - acpi_cpu_id = get_acpi_id_for_cpu(cpu); - cpu_node = acpi_find_processor_node(table, acpi_cpu_id); - if (!cpu_node) - break; - acpi_count_levels(table, cpu_node, &num_levels, NULL); - - /* Start at 1 for L1 */ - for (level = 1; level <= num_levels; level++) { - cache = acpi_find_cache_node(table, acpi_cpu_id, - ACPI_PPTT_CACHE_TYPE_UNIFIED, - level, &cpu_node); - if (!cache) - continue; - - cache_v1 = ACPI_ADD_PTR(struct acpi_pptt_cache_v1, - cache, - sizeof(struct acpi_pptt_cache)); - - if (cache->flags & ACPI_PPTT_CACHE_ID_VALID && - cache_v1->cache_id == cache_id) { - cpumask_set_cpu(cpu, cpus); - } - } - } - - acpi_put_table(table); - return 0; -} diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 0ef4b9427e99..b12b4504d443 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1495,7 +1495,6 @@ int find_acpi_cpu_topology_package(unsigned int cpu); int find_acpi_cpu_topology_hetero_id(unsigned int cpu); int find_acpi_cache_level_from_id(u32 cache_id); int acpi_pptt_get_cpus_from_container(u32 acpi_cpu_id, cpumask_t *cpus); -int acpi_pptt_get_cpumask_from_cache_id(u32 cache_id, cpumask_t *cpus); #else static inline int acpi_pptt_cpu_is_thread(unsigned int cpu) { @@ -1526,11 +1525,6 @@ static inline int acpi_pptt_get_cpus_from_container(u32 acpi_cpu_id, { return -EINVAL; } -static inline int acpi_pptt_get_cpumask_from_cache_id(u32 cache_id, - cpumask_t *cpus) -{ - return -EINVAL; -} #endif #ifdef CONFIG_ARM64 -- Gitee From 20fd25edebecc4137afb3209db1f5351aa7a3a66 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:46:25 -0800 Subject: [PATCH 116/158] anolis: Revert "ACPI / PPTT: Add a helper to fill a cpumask from a processor container" ANBZ: #31045 This reverts commit f58f05b651e2e23681165480ea01bd375c52b424. Signed-off-by: Jason Zeng --- drivers/acpi/pptt.c | 67 -------------------------------------------- include/linux/acpi.h | 6 ---- 2 files changed, 73 deletions(-) diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c index 19fbda328cd8..67e634802c88 100644 --- a/drivers/acpi/pptt.c +++ b/drivers/acpi/pptt.c @@ -300,38 +300,6 @@ static struct acpi_pptt_processor *acpi_find_processor_node(struct acpi_table_he return NULL; } -/* parent_node points into the table, but the table isn't provided. */ -static void acpi_pptt_get_child_cpus(struct acpi_pptt_processor *parent_node, - cpumask_t *cpus) -{ - struct acpi_pptt_processor *cpu_node; - struct acpi_table_header *table_hdr; - acpi_status status; - u32 acpi_id; - int cpu; - - status = acpi_get_table(ACPI_SIG_PPTT, 0, &table_hdr); - if (ACPI_FAILURE(status)) - return; - - for_each_possible_cpu(cpu) { - acpi_id = get_acpi_id_for_cpu(cpu); - cpu_node = acpi_find_processor_node(table_hdr, acpi_id); - - while (cpu_node) { - if (cpu_node == parent_node) { - cpumask_set_cpu(cpu, cpus); - break; - } - cpu_node = fetch_pptt_node(table_hdr, cpu_node->parent); - } - } - - acpi_put_table(table_hdr); - - return; -} - /** * acpi_pptt_for_each_container() - Iterate over all processor containers * @@ -388,41 +356,6 @@ int acpi_pptt_for_each_container(acpi_pptt_cpu_callback_t callback, void *arg) return ret; } -struct __cpus_from_container_arg { - u32 acpi_cpu_id; - cpumask_t *cpus; -}; - -static int __cpus_from_container(struct acpi_pptt_processor *container, void *arg) -{ - struct __cpus_from_container_arg *params = arg; - - if (container->acpi_processor_id == params->acpi_cpu_id) - acpi_pptt_get_child_cpus(container, params->cpus); - - return 0; -} - -/** - * acpi_pptt_get_cpus_from_container() - Populate a cpumask with all CPUs in a - * processor containers - * - * Find the specified Processor Container, and fill cpus with all the cpus - * below it. - * - * Return: 0 for a complete walk, or an error if the mask is incomplete. - */ -int acpi_pptt_get_cpus_from_container(u32 acpi_cpu_id, cpumask_t *cpus) -{ - struct __cpus_from_container_arg params; - - params.acpi_cpu_id = acpi_cpu_id; - params.cpus = cpus; - - cpumask_clear(cpus); - return acpi_pptt_for_each_container(&__cpus_from_container, ¶ms); -} - static u8 acpi_cache_type(enum cache_type type) { switch (type) { diff --git a/include/linux/acpi.h b/include/linux/acpi.h index b12b4504d443..603578c6f8e9 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1494,7 +1494,6 @@ int find_acpi_cpu_topology_cluster(unsigned int cpu); int find_acpi_cpu_topology_package(unsigned int cpu); int find_acpi_cpu_topology_hetero_id(unsigned int cpu); int find_acpi_cache_level_from_id(u32 cache_id); -int acpi_pptt_get_cpus_from_container(u32 acpi_cpu_id, cpumask_t *cpus); #else static inline int acpi_pptt_cpu_is_thread(unsigned int cpu) { @@ -1520,11 +1519,6 @@ static inline int find_acpi_cache_level_from_id(u32 cache_id) { return -EINVAL; } -static inline int acpi_pptt_get_cpus_from_container(u32 acpi_cpu_id, - cpumask_t *cpus) -{ - return -EINVAL; -} #endif #ifdef CONFIG_ARM64 -- Gitee From 4cb45a868646ef0219275ba5ae2220697a084a48 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:46:25 -0800 Subject: [PATCH 117/158] anolis: Revert "ACPI / PPTT: Find PPTT cache level by ID" ANBZ: #31045 This reverts commit f1bb7bf47d797d16f3667d998ae4ffed084085df. Signed-off-by: Jason Zeng --- drivers/acpi/pptt.c | 74 -------------------------------------------- include/linux/acpi.h | 5 --- 2 files changed, 79 deletions(-) diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c index 67e634802c88..96927da9ea14 100644 --- a/drivers/acpi/pptt.c +++ b/drivers/acpi/pptt.c @@ -875,77 +875,3 @@ int find_acpi_cpu_topology_hetero_id(unsigned int cpu) return find_acpi_cpu_topology_tag(cpu, PPTT_ABORT_PACKAGE, ACPI_PPTT_ACPI_IDENTICAL); } - - -/** - * find_acpi_cache_level_from_id() - Get the level of the specified cache - * @cache_id: The id field of the unified cache - * - * Determine the level relative to any CPU for the unified cache identified by - * cache_id. This allows the property to be found even if the CPUs are offline. - * - * The returned level can be used to group unified caches that are peers. - * - * The PPTT table must be rev 3 or later, - * - * If one CPUs L2 is shared with another as L3, this function will return - * and unpredictable value. - * - * Return: -ENOENT if the PPTT doesn't exist, or the cache cannot be found. - * Otherwise returns a value which represents the level of the specified cache. - */ -int find_acpi_cache_level_from_id(u32 cache_id) -{ - u32 acpi_cpu_id; - acpi_status status; - int level, cpu, num_levels; - struct acpi_pptt_cache *cache; - struct acpi_table_header *table; - struct acpi_pptt_cache_v1* cache_v1; - struct acpi_pptt_processor *cpu_node; - - status = acpi_get_table(ACPI_SIG_PPTT, 0, &table); - if (ACPI_FAILURE(status)) { - acpi_pptt_warn_missing(); - return -ENOENT; - } - - if (table->revision < 3) { - acpi_put_table(table); - return -ENOENT; - } - - /* - * If we found the cache first, we'd still need to walk from each CPU - * to find the level... - */ - for_each_possible_cpu(cpu) { - acpi_cpu_id = get_acpi_id_for_cpu(cpu); - cpu_node = acpi_find_processor_node(table, acpi_cpu_id); - if (!cpu_node) - break; - acpi_count_levels(table, cpu_node, &num_levels, NULL); - - /* Start at 1 for L1 */ - for (level = 1; level <= num_levels; level++) { - cache = acpi_find_cache_node(table, acpi_cpu_id, - ACPI_PPTT_CACHE_TYPE_UNIFIED, - level, &cpu_node); - if (!cache) - continue; - - cache_v1 = ACPI_ADD_PTR(struct acpi_pptt_cache_v1, - cache, - sizeof(struct acpi_pptt_cache)); - - if (cache->flags & ACPI_PPTT_CACHE_ID_VALID && - cache_v1->cache_id == cache_id) { - acpi_put_table(table); - return level; - } - } - } - - acpi_put_table(table); - return -ENOENT; -} diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 603578c6f8e9..8ac16b6967d8 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1493,7 +1493,6 @@ int find_acpi_cpu_topology(unsigned int cpu, int level); int find_acpi_cpu_topology_cluster(unsigned int cpu); int find_acpi_cpu_topology_package(unsigned int cpu); int find_acpi_cpu_topology_hetero_id(unsigned int cpu); -int find_acpi_cache_level_from_id(u32 cache_id); #else static inline int acpi_pptt_cpu_is_thread(unsigned int cpu) { @@ -1515,10 +1514,6 @@ static inline int find_acpi_cpu_topology_hetero_id(unsigned int cpu) { return -EINVAL; } -static inline int find_acpi_cache_level_from_id(u32 cache_id) -{ - return -EINVAL; -} #endif #ifdef CONFIG_ARM64 -- Gitee From 043ee9647b840fef62eae320e21625c577477c25 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:46:25 -0800 Subject: [PATCH 118/158] anolis: Revert "ACPI / PPTT: Provide a helper to walk processor containers" ANBZ: #31045 This reverts commit 93e30b21d130070a7132d2cea26c06e33b598cd5. Signed-off-by: Jason Zeng --- drivers/acpi/pptt.c | 58 --------------------------------------------- 1 file changed, 58 deletions(-) diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c index 96927da9ea14..54676e3d82dd 100644 --- a/drivers/acpi/pptt.c +++ b/drivers/acpi/pptt.c @@ -21,8 +21,6 @@ #include #include -typedef int (*acpi_pptt_cpu_callback_t)(struct acpi_pptt_processor *, void *); - static struct acpi_subtable_header *fetch_pptt_subtable(struct acpi_table_header *table_hdr, u32 pptt_ref) { @@ -300,62 +298,6 @@ static struct acpi_pptt_processor *acpi_find_processor_node(struct acpi_table_he return NULL; } -/** - * acpi_pptt_for_each_container() - Iterate over all processor containers - * - * Not all 'Processor' entries in the PPTT are either a CPU or a Processor - * Container, they may exist purely to describe a Private resource. CPUs - * have to be leaves, so a Processor Container is a non-leaf that has the - * 'ACPI Processor ID valid' flag set. - * - * Return: 0 for a complete walk, or the first non-zero value from the callback - * that stopped the walk. - */ -int acpi_pptt_for_each_container(acpi_pptt_cpu_callback_t callback, void *arg) -{ - struct acpi_pptt_processor *cpu_node; - struct acpi_table_header *table_hdr; - struct acpi_subtable_header *entry; - bool leaf_flag, has_leaf_flag = false; - unsigned long table_end; - acpi_status status; - u32 proc_sz; - int ret = 0; - - status = acpi_get_table(ACPI_SIG_PPTT, 0, &table_hdr); - if (ACPI_FAILURE(status)) - return 0; - - if (table_hdr->revision > 1) - has_leaf_flag = true; - - table_end = (unsigned long)table_hdr + table_hdr->length; - entry = ACPI_ADD_PTR(struct acpi_subtable_header, table_hdr, - sizeof(struct acpi_table_pptt)); - proc_sz = sizeof(struct acpi_pptt_processor); - while ((unsigned long)entry + proc_sz < table_end) { - cpu_node = (struct acpi_pptt_processor *)entry; - if (entry->type == ACPI_PPTT_TYPE_PROCESSOR && - cpu_node->flags & ACPI_PPTT_ACPI_PROCESSOR_ID_VALID) - { - leaf_flag = cpu_node->flags & ACPI_PPTT_ACPI_LEAF_NODE; - if ((has_leaf_flag && !leaf_flag) || - (!has_leaf_flag && !acpi_pptt_leaf_node(table_hdr, cpu_node))) - { - ret = callback(cpu_node, arg); - if (ret) - break; - } - } - entry = ACPI_ADD_PTR(struct acpi_subtable_header, entry, - entry->length); - } - - acpi_put_table(table_hdr); - - return ret; -} - static u8 acpi_cache_type(enum cache_type type) { switch (type) { -- Gitee From efd5f9e65d8426b26a97f24ff63e6f7ae099c563 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:46:25 -0800 Subject: [PATCH 119/158] anolis: Revert "untested: KVM: arm64: Force guest EL1 to use user-space's partid configuration" ANBZ: #31045 This reverts commit b755238aaeac2bd4e3241a9d1f446dea027e3eb7. Signed-off-by: Jason Zeng --- arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h | 27 ---------------------- arch/arm64/kvm/hyp/nvhe/switch.c | 11 --------- arch/arm64/kvm/hyp/vhe/sysreg-sr.c | 1 - 3 files changed, 39 deletions(-) diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h index c8767abd693e..bb6b571ec627 100644 --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h @@ -15,7 +15,6 @@ #include #include #include -#include static inline void __sysreg_save_common_state(struct kvm_cpu_context *ctxt) { @@ -244,30 +243,4 @@ static inline void __sysreg32_restore_state(struct kvm_vcpu *vcpu) write_sysreg(__vcpu_sys_reg(vcpu, DBGVCR32_EL2), dbgvcr32_el2); } -/* - * The _EL0 value was written by the host's context switch, copy this into the - * guest's EL1. - */ -static inline void __mpam_guest_load(void) -{ - if (IS_ENABLED(CONFIG_ARM64_MPAM) && mpam_cpus_have_feature()) - write_sysreg_el1(read_sysreg_s(SYS_MPAM0_EL1), SYS_MPAM1); -} - -/* - * Copy the _EL2 register back to _EL1, clearing any trap bits EL2 may have set. - * nVHE world-switch copies the _EL1 register to _EL2. A VHE host writes to the - * _EL2 register as it is aliased by the hardware when TGE is set. - */ -static inline void __mpam_guest_put(void) -{ - u64 val, mask = MPAM_SYSREG_PMG_D | MPAM_SYSREG_PMG_I | - MPAM_SYSREG_PARTID_D | MPAM_SYSREG_PARTID_I; - - if (IS_ENABLED(CONFIG_ARM64_MPAM) && mpam_cpus_have_feature()) { - val = FIELD_GET(mask, read_sysreg_s(SYS_MPAM2_EL2)); - write_sysreg_el1(val, SYS_MPAM1); - } -} - #endif /* __ARM64_KVM_HYP_SYSREG_SR_H__ */ diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 3ff46a88f1f4..f1970816fa43 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -282,13 +282,6 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) return __fixup_guest_exit(vcpu, exit_code, handlers); } -/* Use the host thread's partid and pmg for world switch */ -static void __mpam_copy_el1_to_el2(void) -{ - if (IS_ENABLED(CONFIG_ARM64_MPAM) && mpam_cpus_have_feature()) - write_sysreg_s(read_sysreg_s(SYS_MPAM1_EL1), SYS_MPAM2_EL2); -} - /* Switch to the guest for legacy non-VHE systems */ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) { @@ -298,8 +291,6 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) bool pmu_switch_needed; u64 exit_code; - __mpam_copy_el1_to_el2(); - /* * Having IRQs masked via PMR when entering the guest means the GIC * will not signal the CPU of interrupts of lower priority, and the @@ -359,7 +350,6 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) __timer_enable_traps(vcpu); __debug_switch_to_guest(vcpu); - __mpam_guest_load(); do { /* Jump in the fire! */ @@ -370,7 +360,6 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) __sysreg_save_state_nvhe(guest_ctxt); __sysreg32_save_state(vcpu); - __mpam_guest_put(); __timer_disable_traps(vcpu); __hyp_vgic_save_state(vcpu); diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c index 71af2f8c7568..ae763061909a 100644 --- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c @@ -90,7 +90,6 @@ void __vcpu_load_switch_sysregs(struct kvm_vcpu *vcpu) __sysreg32_restore_state(vcpu); __sysreg_restore_user_state(guest_ctxt); __sysreg_restore_el1_state(guest_ctxt); - __mpam_guest_load(); vcpu_set_flag(vcpu, SYSREGS_ON_CPU); } -- Gitee From 47addc395a2d964a6d215b14ad6d7911e235bc18 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:46:25 -0800 Subject: [PATCH 120/158] anolis: Revert "arm64: mpam: Context switch the MPAM registers" ANBZ: #31045 This reverts commit db08e78d00b8608723221714859b0ce1890fcaf5. Signed-off-by: Jason Zeng --- arch/arm64/include/asm/mpam.h | 46 ---------------------------- arch/arm64/include/asm/thread_info.h | 3 -- arch/arm64/kernel/mpam.c | 4 --- arch/arm64/kernel/process.c | 7 ----- 4 files changed, 60 deletions(-) diff --git a/arch/arm64/include/asm/mpam.h b/arch/arm64/include/asm/mpam.h index 1d81a6f26acd..576102d510ad 100644 --- a/arch/arm64/include/asm/mpam.h +++ b/arch/arm64/include/asm/mpam.h @@ -7,8 +7,6 @@ #include #include #include -#include -#include #include #include @@ -51,9 +49,6 @@ DECLARE_STATIC_KEY_FALSE(arm64_mpam_has_hcr); -DECLARE_STATIC_KEY_FALSE(mpam_enabled); -DECLARE_PER_CPU(u64, arm64_mpam_default); -DECLARE_PER_CPU(u64, arm64_mpam_current); /* check whether all CPUs have MPAM support */ static __always_inline bool mpam_cpus_have_feature(void) @@ -78,45 +73,4 @@ static inline void __init __enable_mpam_hcr(void) static_branch_enable(&arm64_mpam_has_hcr); } -/* - * The resctrl filesystem writes to the partid/pmg values for threads and CPUs, - * which may race with reads in __mpam_sched_in(). Ensure only one of the old - * or new values are used. Particular care should be taken with the pmg field - * as __mpam_sched_in() may read a partid and pmg that don't match, causing - * this value to be stored with cache allocations, despite being considered - * 'free' by resctrl. - * - * A value in struct thread_info is used instead of struct task_struct as the - * cpu's u64 register format is used, but struct task_struct has two u32'. - */ -static inline u64 mpam_get_regval(struct task_struct *tsk) -{ -#ifdef CONFIG_ARM64_MPAM - return READ_ONCE(task_thread_info(tsk)->mpam_partid_pmg); -#else - return 0; -#endif -} - -static inline void mpam_thread_switch(struct task_struct *tsk) -{ - u64 oldregval; - int cpu = smp_processor_id(); - u64 regval = mpam_get_regval(tsk); - - if (!IS_ENABLED(CONFIG_ARM64_MPAM) || - !static_branch_likely(&mpam_enabled)) - return; - - if (!regval) - regval = READ_ONCE(per_cpu(arm64_mpam_default, cpu)); - - oldregval = READ_ONCE(per_cpu(arm64_mpam_current, cpu)); - if (oldregval == regval) - return; - - /* Synchronising this write is left until the ERET to EL0 */ - write_sysreg_s(regval, SYS_MPAM0_EL1); - WRITE_ONCE(per_cpu(arm64_mpam_current, cpu), regval); -} #endif /* __ASM__MPAM_H */ diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index f36803eb2ebd..872b8e810a37 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -41,9 +41,6 @@ struct thread_info { #ifdef CONFIG_SHADOW_CALL_STACK void *scs_base; void *scs_sp; -#endif -#ifdef CONFIG_ARM64_MPAM - u64 mpam_partid_pmg; #endif u32 cpu; }; diff --git a/arch/arm64/kernel/mpam.c b/arch/arm64/kernel/mpam.c index d88a45a299fa..0da6b1b6d034 100644 --- a/arch/arm64/kernel/mpam.c +++ b/arch/arm64/kernel/mpam.c @@ -4,11 +4,7 @@ #include #include -#include DEFINE_STATIC_KEY_FALSE(arm64_mpam_has_hcr); EXPORT_SYMBOL_FOR_KVM(arm64_mpam_has_hcr); -DEFINE_STATIC_KEY_FALSE(mpam_enabled); EXPORT_SYMBOL_FOR_KVM(mpam_enabled); -DEFINE_PER_CPU(u64, arm64_mpam_default); -DEFINE_PER_CPU(u64, arm64_mpam_current); diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index c3b83023851f..a44f0e97f6d7 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -49,7 +49,6 @@ #include #include #include -#include #include #include #include @@ -578,12 +577,6 @@ struct task_struct *__switch_to(struct task_struct *prev, if (prev->thread.sctlr_user != next->thread.sctlr_user) update_sctlr_el1(next->thread.sctlr_user); - /* - * MPAM thread switch happens after the DSB to ensure prev's accesses - * use prev's MPAM settings. - */ - mpam_thread_switch(next); - /* the actual thread switch */ last = cpu_switch_to(prev, next); -- Gitee From fe18cce61098b174028ff054145609474e2e2046 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:47:24 -0800 Subject: [PATCH 121/158] anolis: Revert "KVM: arm64: Disable MPAM visibility by default, and handle traps" ANBZ: #31045 This reverts commit b56998c7ab741b1917e318a15a639d1ec234d597. Signed-off-by: Jason Zeng --- arch/arm64/kvm/sys_regs.c | 69 +++++++++------------------------------ 1 file changed, 15 insertions(+), 54 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 7738226ef8cf..e6453a0938de 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -418,29 +418,21 @@ static bool trap_oslar_el1(struct kvm_vcpu *vcpu, return true; } -static bool trap_mpam(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *r) +static bool workaround_bad_mpam_abi(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) { - u64 aa64pfr0_el1 = IDREG(vcpu->kvm, SYS_ID_AA64PFR0_EL1); - /* - * What did we expose to the guest? - * Earlier guests may have seen the ID bits, which can't be removed - * without breaking migration, but MPAMIDR_EL1 can advertise all-zeroes, - * indicating there are zero PARTID/PMG supported by the CPU, allowing - * the other two trapped registers (MPAM1_EL1 and MPAM0_EL1) to be - * treated as RAZ/WI. + * The ID register can't be removed without breaking migration, + * but MPAMIDR_EL1 can advertise all-zeroes, indicating there are zero + * PARTID/PMG supported by the CPU, allowing the other two trapped + * registers (MPAM1_EL1 and MPAM0_EL1) to be treated as RAZ/WI. * Emulating MPAM1_EL1 as RAZ/WI means the guest sees the MPAMEN bit * as clear, and realises MPAM isn't usable on this CPU. */ - if (FIELD_GET(ID_AA64PFR0_EL1_MPAM_MASK, aa64pfr0_el1)) { - p->regval = 0; - return true; - } + p->regval = 0; - kvm_inject_undefined(vcpu); - return false; + return true; } static bool trap_oslsr_el1(struct kvm_vcpu *vcpu, @@ -1341,36 +1333,6 @@ static s64 kvm_arm64_ftr_safe_value(u32 id, const struct arm64_ftr_bits *ftrp, return arm64_ftr_safe_value(&kvm_ftr, new, cur); } -static u64 kvm_arm64_ftr_max(struct kvm_vcpu *vcpu, - const struct sys_reg_desc *rd) -{ - u64 pfr0, val = rd->reset(vcpu, rd); - u32 field, id = reg_to_encoding(rd); - - /* - * Some values may reset to a lower value than can be supported, - * get the maximum feature value. - */ - switch (id) { - case SYS_ID_AA64PFR0_EL1: - pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); - - /* - * MPAM resets to 0, but migration of MPAM=1 guests is needed. - * See trap_mpam() for more. - */ - field = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_MPAM_SHIFT); - if (field == ID_AA64PFR0_EL1_MPAM_1) { - val &= ~ID_AA64PFR0_EL1_MPAM_MASK; - val |= FIELD_PREP(ID_AA64PFR0_EL1_MPAM_MASK, ID_AA64PFR0_EL1_MPAM_1); - } - - break; - } - - return val; -} - /** * arm64_check_features() - Check if a feature register value constitutes * a subset of features indicated by the idreg's KVM sanitised limit. @@ -1391,7 +1353,8 @@ static int arm64_check_features(struct kvm_vcpu *vcpu, const struct arm64_ftr_bits *ftrp = NULL; u32 id = reg_to_encoding(rd); u64 writable_mask = rd->val; - u64 limit, mask = 0; + u64 limit = rd->reset(vcpu, rd); + u64 mask = 0; u32 midr = read_cpuid_id(); /* @@ -1406,7 +1369,6 @@ static int arm64_check_features(struct kvm_vcpu *vcpu, if (!ftr_reg) return -EINVAL; - limit = kvm_arm64_ftr_max(vcpu, rd); ftrp = ftr_reg->ftr_bits; for (; ftrp && ftrp->width; ftrp++) { @@ -1624,8 +1586,7 @@ static u64 read_sanitised_id_aa64pfr0_el1(struct kvm_vcpu *vcpu, /* * MPAM is disabled by default as KVM also needs a set of PARTID to * program the MPAMVPMx_EL2 PARTID remapping registers with. But some - * older kernels let the guest see the ID bit. Turning it on causes - * the registers to be emulated as RAZ/WI. See trap_mpam() for more. + * older kernels let the guest see the ID bit. */ val &= ~ID_AA64PFR0_EL1_MPAM_MASK; @@ -2393,11 +2354,11 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_LOREA_EL1), trap_loregion }, { SYS_DESC(SYS_LORN_EL1), trap_loregion }, { SYS_DESC(SYS_LORC_EL1), trap_loregion }, - { SYS_DESC(SYS_MPAMIDR_EL1), trap_mpam }, + { SYS_DESC(SYS_MPAMIDR_EL1), workaround_bad_mpam_abi }, { SYS_DESC(SYS_LORID_EL1), trap_loregion }, - { SYS_DESC(SYS_MPAM1_EL1), trap_mpam }, - { SYS_DESC(SYS_MPAM0_EL1), trap_mpam }, + { SYS_DESC(SYS_MPAM1_EL1), workaround_bad_mpam_abi }, + { SYS_DESC(SYS_MPAM0_EL1), workaround_bad_mpam_abi }, { SYS_DESC(SYS_VBAR_EL1), access_rw, reset_val, VBAR_EL1, 0 }, { SYS_DESC(SYS_DISR_EL1), NULL, reset_val, DISR_EL1, 0 }, -- Gitee From 4d5887249cec9e61ee530cb3625b5835d7073901 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:47:24 -0800 Subject: [PATCH 122/158] anolis: Revert "KVM: arm64: Fix missing traps of guest accesses to the MPAM registers" ANBZ: #31045 This reverts commit 1348610175294cc4a7f6cd8f5841738ee7f789f8. Signed-off-by: Jason Zeng --- arch/arm64/include/asm/kvm_arm.h | 1 - arch/arm64/include/asm/mpam.h | 4 ++-- arch/arm64/kernel/image-vars.h | 5 ---- arch/arm64/kvm/hyp/include/hyp/switch.h | 32 ------------------------- arch/arm64/kvm/sys_regs.c | 20 ---------------- 5 files changed, 2 insertions(+), 60 deletions(-) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 96c7aad7cc43..c11010965b8e 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -108,7 +108,6 @@ #define HCRX_GUEST_FLAGS (HCRX_EL2_SMPME | HCRX_EL2_TCR2En) #define HCRX_HOST_FLAGS (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En | HCRX_EL2_EnFPM) -#define MPAMHCR_HOST_FLAGS 0 /* TCR_EL2 Registers bits */ #define TCR_EL2_RES1 ((1U << 31) | (1 << 23)) diff --git a/arch/arm64/include/asm/mpam.h b/arch/arm64/include/asm/mpam.h index 576102d510ad..a4a969be233a 100644 --- a/arch/arm64/include/asm/mpam.h +++ b/arch/arm64/include/asm/mpam.h @@ -51,7 +51,7 @@ DECLARE_STATIC_KEY_FALSE(arm64_mpam_has_hcr); /* check whether all CPUs have MPAM support */ -static __always_inline bool mpam_cpus_have_feature(void) +static inline bool mpam_cpus_have_feature(void) { if (IS_ENABLED(CONFIG_ARM64_MPAM)) return cpus_have_final_cap(ARM64_MPAM); @@ -59,7 +59,7 @@ static __always_inline bool mpam_cpus_have_feature(void) } /* check whether all CPUs have MPAM virtualisation support */ -static __always_inline bool mpam_cpus_have_mpam_hcr(void) +static inline bool mpam_cpus_have_mpam_hcr(void) { if (IS_ENABLED(CONFIG_ARM64_MPAM)) return static_branch_unlikely(&arm64_mpam_has_hcr); diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index d10d3fed31d9..35f3c7959513 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -64,11 +64,6 @@ KVM_NVHE_ALIAS(nvhe_hyp_panic_handler); /* Vectors installed by hyp-init on reset HVC. */ KVM_NVHE_ALIAS(__hyp_stub_vectors); -/* Additional static keys for cpufeatures */ -#ifdef CONFIG_ARM64_MPAM -KVM_NVHE_ALIAS(arm64_mpam_has_hcr); -#endif - /* Static keys which are set if a vGIC trap should be handled in hyp. */ KVM_NVHE_ALIAS(vgic_v2_cpuif_trap); KVM_NVHE_ALIAS(vgic_v3_cpuif_trap); diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 2679527eb2f3..2b6c9724d55f 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include @@ -173,35 +172,6 @@ static inline void __deactivate_traps_hfgxtr(struct kvm_vcpu *vcpu) write_sysreg_s(ctxt_sys_reg(hctxt, HDFGWTR_EL2), SYS_HDFGWTR_EL2); } -static inline void __activate_traps_mpam(struct kvm_vcpu *vcpu) -{ - u64 r = MPAM_SYSREG_TRAP_MPAM0_EL1 | MPAM_SYSREG_TRAP_MPAM1_EL1; - - if (!mpam_cpus_have_feature()) - return; - - /* trap guest access to MPAMIDR_EL1 */ - if (mpam_cpus_have_mpam_hcr()) { - write_sysreg_s(MPAMHCR_TRAP_MPAMIDR, SYS_MPAMHCR_EL2); - } else { - /* From v1.1 TIDR can trap MPAMIDR, set it unconditionally */ - r |= MPAM_SYSREG_TRAP_IDR; - } - - write_sysreg_s(r, SYS_MPAM2_EL2); -} - -static inline void __deactivate_traps_mpam(void) -{ - if (!mpam_cpus_have_feature()) - return; - - write_sysreg_s(0, SYS_MPAM2_EL2); - - if (mpam_cpus_have_mpam_hcr()) - write_sysreg_s(MPAMHCR_HOST_FLAGS, SYS_MPAMHCR_EL2); -} - static inline void __activate_traps_common(struct kvm_vcpu *vcpu) { /* Trap on AArch32 cp15 c15 (impdef sysregs) accesses (EL1 or EL0) */ @@ -242,7 +212,6 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu) } __activate_traps_hfgxtr(vcpu); - __activate_traps_mpam(vcpu); } static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu) @@ -262,7 +231,6 @@ static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu) write_sysreg_s(HCRX_HOST_FLAGS, SYS_HCRX_EL2); __deactivate_traps_hfgxtr(vcpu); - __deactivate_traps_mpam(); } static inline void ___activate_traps(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index e6453a0938de..623737f65228 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -418,23 +418,6 @@ static bool trap_oslar_el1(struct kvm_vcpu *vcpu, return true; } -static bool workaround_bad_mpam_abi(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *r) -{ - /* - * The ID register can't be removed without breaking migration, - * but MPAMIDR_EL1 can advertise all-zeroes, indicating there are zero - * PARTID/PMG supported by the CPU, allowing the other two trapped - * registers (MPAM1_EL1 and MPAM0_EL1) to be treated as RAZ/WI. - * Emulating MPAM1_EL1 as RAZ/WI means the guest sees the MPAMEN bit - * as clear, and realises MPAM isn't usable on this CPU. - */ - p->regval = 0; - - return true; -} - static bool trap_oslsr_el1(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) @@ -2354,11 +2337,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_LOREA_EL1), trap_loregion }, { SYS_DESC(SYS_LORN_EL1), trap_loregion }, { SYS_DESC(SYS_LORC_EL1), trap_loregion }, - { SYS_DESC(SYS_MPAMIDR_EL1), workaround_bad_mpam_abi }, { SYS_DESC(SYS_LORID_EL1), trap_loregion }, - { SYS_DESC(SYS_MPAM1_EL1), workaround_bad_mpam_abi }, - { SYS_DESC(SYS_MPAM0_EL1), workaround_bad_mpam_abi }, { SYS_DESC(SYS_VBAR_EL1), access_rw, reset_val, VBAR_EL1, 0 }, { SYS_DESC(SYS_DISR_EL1), NULL, reset_val, DISR_EL1, 0 }, -- Gitee From 17960c2d45839983b888c3355bb8a226eeb10191 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:49:27 -0800 Subject: [PATCH 123/158] anolis: Revert "arm64: cpufeature: discover CPU support for MPAM" ANBZ: #31045 This reverts commit fb2ecde8397b4ab884a0f04ea310bfd04c867e29. [jz: also remove the EXPORT_SYMBOL_FOR_KVM for arm64_mpam_has_hcr and mpam_enabled which were introduced by ec04d85a267c ("anolis: KVM: arm64: Adapt kernel to allow KVM to be built as a module")] Signed-off-by: Jason Zeng --- .../arch/arm64/cpu-feature-registers.rst | 2 - arch/arm64/Kconfig | 19 +---- arch/arm64/include/asm/cpu.h | 1 - arch/arm64/include/asm/cpufeature.h | 13 ---- arch/arm64/include/asm/mpam.h | 76 ------------------- arch/arm64/include/asm/sysreg.h | 8 -- arch/arm64/kernel/Makefile | 2 - arch/arm64/kernel/cpufeature.c | 69 ----------------- arch/arm64/kernel/cpuinfo.c | 4 - arch/arm64/kernel/mpam.c | 10 --- arch/arm64/tools/cpucaps | 1 - arch/arm64/tools/sysreg | 33 -------- 12 files changed, 1 insertion(+), 237 deletions(-) delete mode 100644 arch/arm64/include/asm/mpam.h delete mode 100644 arch/arm64/kernel/mpam.c diff --git a/Documentation/arch/arm64/cpu-feature-registers.rst b/Documentation/arch/arm64/cpu-feature-registers.rst index 253e9743de2f..44f9bd78539d 100644 --- a/Documentation/arch/arm64/cpu-feature-registers.rst +++ b/Documentation/arch/arm64/cpu-feature-registers.rst @@ -152,8 +152,6 @@ infrastructure: +------------------------------+---------+---------+ | DIT | [51-48] | y | +------------------------------+---------+---------+ - | MPAM | [43-40] | n | - +------------------------------+---------+---------+ | SVE | [35-32] | y | +------------------------------+---------+---------+ | GIC | [27-24] | n | diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index bd77f5ad4169..acedee7c1667 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -2066,24 +2066,7 @@ config ARM64_TLB_RANGE The feature introduces new assembly instructions, and they were support when binutils >= 2.30. -config ARM64_MPAM - bool "Enable support for MPAM" - help - Memory Partitioning and Monitoring is an optional extension - that allows the CPUs to mark load and store transactions with - labels for partition-id and performance-monitoring-group. - System components, such as the caches, can use the partition-id - to apply a performance policy. MPAM monitors can use the - partition-id and performance-monitoring-group to measure the - cache occupancy or data throughput. - - Use of this extension requires CPU support, support in the - memory system components (MSC), and a description from firmware - of where the MSC are in the address space. - - MPAM is exposed to user-space via the resctrl pseudo filesystem. - -endmenu +endmenu # "ARMv8.4 architectural features" menu "ARMv8.5 architectural features" diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h index 33328b1d4a39..6344fc126262 100644 --- a/arch/arm64/include/asm/cpu.h +++ b/arch/arm64/include/asm/cpu.h @@ -47,7 +47,6 @@ struct cpuinfo_arm64 { u64 reg_revidr; u64 reg_gmid; u64 reg_smidr; - u64 reg_mpamidr; u64 reg_id_aa64dfr0; u64 reg_id_aa64dfr1; diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 4e980563ad95..4dd7dce1917b 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -619,13 +619,6 @@ static inline bool id_aa64pfr1_sme(u64 pfr1) return val > 0; } -static inline bool id_aa64pfr0_mpam(u64 pfr0) -{ - u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_MPAM_SHIFT); - - return val > 0; -} - static inline bool id_aa64pfr1_mte(u64 pfr1) { u32 val = cpuid_feature_extract_unsigned_field(pfr1, ID_AA64PFR1_EL1_MTE_SHIFT); @@ -855,12 +848,6 @@ static inline bool system_supports_gcs(void) alternative_has_cap_unlikely(ARM64_HAS_GCS); } -static inline bool cpus_support_mpam(void) -{ - return IS_ENABLED(CONFIG_ARM64_MPAM) && - cpus_have_final_cap(ARM64_MPAM); -} - int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt); bool try_emulate_mrs(struct pt_regs *regs, u32 isn); diff --git a/arch/arm64/include/asm/mpam.h b/arch/arm64/include/asm/mpam.h deleted file mode 100644 index a4a969be233a..000000000000 --- a/arch/arm64/include/asm/mpam.h +++ /dev/null @@ -1,76 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2021 Arm Ltd. */ - -#ifndef __ASM__MPAM_H -#define __ASM__MPAM_H - -#include -#include -#include - -#include -#include -#include - -/* CPU Registers */ -#define MPAM_SYSREG_EN BIT_ULL(63) -#define MPAM_SYSREG_TRAP_IDR BIT_ULL(58) -#define MPAM_SYSREG_TRAP_MPAM0_EL1 BIT_ULL(49) -#define MPAM_SYSREG_TRAP_MPAM1_EL1 BIT_ULL(48) -#define MPAM_SYSREG_PMG_D GENMASK(47, 40) -#define MPAM_SYSREG_PMG_I GENMASK(39, 32) -#define MPAM_SYSREG_PARTID_D GENMASK(31, 16) -#define MPAM_SYSREG_PARTID_I GENMASK(15, 0) - -#define MPAMIDR_PMG_MAX GENMASK(40, 32) -#define MPAMIDR_PMG_MAX_SHIFT 32 -#define MPAMIDR_PMG_MAX_LEN 8 -#define MPAMIDR_VPMR_MAX GENMASK(20, 18) -#define MPAMIDR_VPMR_MAX_SHIFT 18 -#define MPAMIDR_VPMR_MAX_LEN 3 -#define MPAMIDR_HAS_HCR BIT(17) -#define MPAMIDR_HAS_HCR_SHIFT 17 -#define MPAMIDR_PARTID_MAX GENMASK(15, 0) -#define MPAMIDR_PARTID_MAX_SHIFT 0 -#define MPAMIDR_PARTID_MAX_LEN 15 - -#define MPAMHCR_EL0_VPMEN BIT_ULL(0) -#define MPAMHCR_EL1_VPMEN BIT_ULL(1) -#define MPAMHCR_GSTAPP_PLK BIT_ULL(8) -#define MPAMHCR_TRAP_MPAMIDR BIT_ULL(31) - -/* Properties of the VPM registers */ -#define MPAM_VPM_NUM_REGS 8 -#define MPAM_VPM_PARTID_LEN 16 -#define MPAM_VPM_PARTID_MASK 0xffff -#define MPAM_VPM_REG_LEN 64 -#define MPAM_VPM_PARTIDS_PER_REG (MPAM_VPM_REG_LEN / MPAM_VPM_PARTID_LEN) -#define MPAM_VPM_MAX_PARTID (MPAM_VPM_NUM_REGS * MPAM_VPM_PARTIDS_PER_REG) - - -DECLARE_STATIC_KEY_FALSE(arm64_mpam_has_hcr); - -/* check whether all CPUs have MPAM support */ -static inline bool mpam_cpus_have_feature(void) -{ - if (IS_ENABLED(CONFIG_ARM64_MPAM)) - return cpus_have_final_cap(ARM64_MPAM); - return false; -} - -/* check whether all CPUs have MPAM virtualisation support */ -static inline bool mpam_cpus_have_mpam_hcr(void) -{ - if (IS_ENABLED(CONFIG_ARM64_MPAM)) - return static_branch_unlikely(&arm64_mpam_has_hcr); - return false; -} - -/* enable MPAM virtualisation support */ -static inline void __init __enable_mpam_hcr(void) -{ - if (IS_ENABLED(CONFIG_ARM64_MPAM)) - static_branch_enable(&arm64_mpam_has_hcr); -} - -#endif /* __ASM__MPAM_H */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 752e45c29afa..e4bb2c2a136a 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -503,13 +503,6 @@ #define SYS_MAIR_EL2 sys_reg(3, 4, 10, 2, 0) #define SYS_AMAIR_EL2 sys_reg(3, 4, 10, 3, 0) -#define SYS_MPAMHCR_EL2 sys_reg(3, 4, 10, 4, 0) -#define SYS_MPAMVPMV_EL2 sys_reg(3, 4, 10, 4, 1) -#define SYS_MPAM2_EL2 sys_reg(3, 4, 10, 5, 0) - -#define __VPMn_op2(n) ((n) & 0x7) -#define SYS_MPAM_VPMn_EL2(n) sys_reg(3, 4, 10, 6, __VPMn_op2(n)) - #define SYS_VBAR_EL2 sys_reg(3, 4, 12, 0, 0) #define SYS_RVBAR_EL2 sys_reg(3, 4, 12, 0, 1) #define SYS_RMR_EL2 sys_reg(3, 4, 12, 0, 2) @@ -574,7 +567,6 @@ #define SYS_TFSR_EL12 sys_reg(3, 5, 5, 6, 0) #define SYS_MAIR_EL12 sys_reg(3, 5, 10, 2, 0) #define SYS_AMAIR_EL12 sys_reg(3, 5, 10, 3, 0) -#define SYS_MPAM1_EL12 sys_reg(3, 5, 10, 5, 0) #define SYS_VBAR_EL12 sys_reg(3, 5, 12, 0, 0) #define SYS_CNTKCTL_EL12 sys_reg(3, 5, 14, 1, 0) #define SYS_CNTP_TVAL_EL02 sys_reg(3, 5, 14, 2, 0) diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index e9827519f567..e3bab76856a3 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -67,13 +67,11 @@ obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o \ obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file.o kexec_image.o obj-$(CONFIG_ARM64_RELOC_TEST) += arm64-reloc-test.o arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o - obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_CRASH_CORE) += crash_core.o obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o obj-$(CONFIG_SDEI_WATCHDOG) += watchdog_sdei.o obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o -obj-$(CONFIG_ARM64_MPAM) += mpam.o obj-$(CONFIG_ARM64_MTE) += mte.o obj-y += vdso-wrap.o obj-$(CONFIG_COMPAT_VDSO) += vdso32-wrap.o diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 62eceb2cf5d0..c214639da4a4 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -84,7 +84,6 @@ #include #include #include -#include #include #include #include @@ -666,18 +665,6 @@ static const struct arm64_ftr_bits ftr_smcr[] = { ARM64_FTR_END, }; -static const struct arm64_ftr_bits ftr_mpamidr[] = { - ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, - MPAMIDR_PMG_MAX_SHIFT, MPAMIDR_PMG_MAX_LEN, 0), /* PMG_MAX */ - ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, - MPAMIDR_VPMR_MAX_SHIFT, MPAMIDR_VPMR_MAX_LEN, 0), /* VPMR_MAX */ - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, - MPAMIDR_HAS_HCR_SHIFT, 1, 0), /* HAS_HCR */ - ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, - MPAMIDR_PARTID_MAX_SHIFT, MPAMIDR_PARTID_MAX_LEN, 0), /* PARTID_MAX */ - ARM64_FTR_END, -}; - /* * Common ftr bits for a 32bit register with all hidden, strict * attributes, with 4bit feature fields and a default safe value of @@ -797,9 +784,6 @@ static const struct __ftr_reg_entry { ARM64_FTR_REG(SYS_ZCR_EL1, ftr_zcr), ARM64_FTR_REG(SYS_SMCR_EL1, ftr_smcr), - /* Op1 = 0, CRn = 10, CRm = 4 */ - ARM64_FTR_REG(SYS_MPAMIDR_EL1, ftr_mpamidr), - /* Op1 = 1, CRn = 0, CRm = 0 */ ARM64_FTR_REG(SYS_GMID_EL1, ftr_gmid), @@ -1124,9 +1108,6 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) vec_init_vq_map(ARM64_VEC_SME); } - if (id_aa64pfr0_mpam(info->reg_id_aa64pfr0)) - init_cpu_ftr_reg(SYS_MPAMIDR_EL1, info->reg_mpamidr); - if (id_aa64pfr1_mte(info->reg_id_aa64pfr1)) init_cpu_ftr_reg(SYS_GMID_EL1, info->reg_gmid); @@ -1393,11 +1374,6 @@ void update_cpu_features(int cpu, vec_update_vq_map(ARM64_VEC_SME); } - if (id_aa64pfr0_mpam(info->reg_id_aa64pfr0)) { - taint |= check_update_ftr_reg(SYS_MPAMIDR_EL1, cpu, - info->reg_mpamidr, boot->reg_mpamidr); - } - /* * The kernel uses the LDGM/STGM instructions and the number of tags * they read/write depends on the GMID_EL1.BS field. Check that the @@ -2350,39 +2326,6 @@ cpucap_panic_on_conflict(const struct arm64_cpu_capabilities *cap) return !!(cap->type & ARM64_CPUCAP_PANIC_ON_CONFLICT); } -static bool __maybe_unused -test_has_mpam(const struct arm64_cpu_capabilities *entry, int scope) -{ - if (!has_cpuid_feature(entry, scope)) - return false; - - /* Check firmware actually enabled MPAM on this cpu. */ - return (read_sysreg_s(SYS_MPAM1_EL1) & MPAM_SYSREG_EN); -} - -static void __maybe_unused -cpu_enable_mpam(const struct arm64_cpu_capabilities *entry) -{ - /* - * Access by the kernel (at EL1) should use the reserved PARTID - * which is configured unrestricted. This avoids priority-inversion - * where latency sensitive tasks have to wait for a task that has - * been throttled to release the lock. - */ - write_sysreg_s(0, SYS_MPAM1_EL1); -} - -static void mpam_extra_caps(void) -{ - u64 idr = read_sanitised_ftr_reg(SYS_MPAMIDR_EL1); - - if (!IS_ENABLED(CONFIG_ARM64_MPAM)) - return; - - if (idr & MPAMIDR_HAS_HCR) - __enable_mpam_hcr(); -} - static const struct arm64_cpu_capabilities arm64_features[] = { { .capability = ARM64_ALWAYS_BOOT, @@ -2871,16 +2814,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .cpu_enable = cpu_enable_fpmr, ARM64_CPUID_FIELDS(ID_AA64PFR2_EL1, FPMR, IMP) }, -#ifdef CONFIG_ARM64_MPAM - { - .desc = "Memory Partitioning And Monitoring", - .type = ARM64_CPUCAP_SYSTEM_FEATURE, - .capability = ARM64_MPAM, - .matches = test_has_mpam, - .cpu_enable = cpu_enable_mpam, - ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, MPAM, 1) - }, -#endif #ifdef CONFIG_ARM64_POE { .desc = "Stage-1 Permission Overlay Extension (S1POE)", @@ -3595,8 +3528,6 @@ void __init setup_cpu_features(void) if (!cwg) pr_warn("No Cache Writeback Granule information, assuming %d\n", ARCH_DMA_MINALIGN); - - mpam_extra_caps(); } static int enable_mismatched_32bit_el0(unsigned int cpu) diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 8a41d13f97d1..d9d5ba68b1d7 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -504,10 +504,6 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) __cpuinfo_store_cpu_32bit(&info->aarch32); - if (IS_ENABLED(CONFIG_ARM64_MPAM) && - id_aa64pfr0_mpam(info->reg_id_aa64pfr0)) - info->reg_mpamidr = read_cpuid(MPAMIDR_EL1); - cpuinfo_detect_icache_policy(info); } diff --git a/arch/arm64/kernel/mpam.c b/arch/arm64/kernel/mpam.c deleted file mode 100644 index 0da6b1b6d034..000000000000 --- a/arch/arm64/kernel/mpam.c +++ /dev/null @@ -1,10 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2021 Arm Ltd. */ - -#include - -#include - -DEFINE_STATIC_KEY_FALSE(arm64_mpam_has_hcr); -EXPORT_SYMBOL_FOR_KVM(arm64_mpam_has_hcr); -EXPORT_SYMBOL_FOR_KVM(mpam_enabled); diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 9946e38b2b65..0f68f9430e44 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -59,7 +59,6 @@ HW_DBM KVM_HVHE KVM_PROTECTED_MODE MISMATCHED_CACHE_TYPE -MPAM MTE MTE_ASYMM SME diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 4ff32f97fcc1..d32d591859bf 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -3182,22 +3182,6 @@ Res0 1 Field 0 EN EndSysreg -Sysreg MPAMIDR_EL1 3 0 10 4 4 -Res0 63:62 -Field 61 HAS_SDEFLT -Field 60 HAS_FORCE_NS -Field 59 SP4 -Field 58 HAS_TIDR -Field 57 HAS_ALTSP -Res0 56:40 -Field 39:32 PMG_MAX -Res0 31:21 -Field 20:18 VPMR_MAX -Field 17 HAS_HCR -Res0 16 -Field 15:0 PARTID_MAX -EndSysreg - Sysreg LORID_EL1 3 0 10 4 7 Res0 63:24 Field 23:16 LD @@ -3205,22 +3189,6 @@ Res0 15:8 Field 7:0 LR EndSysreg -Sysreg MPAM1_EL1 3 0 10 5 0 -Res0 63:48 -Field 47:40 PMG_D -Field 39:32 PMG_I -Field 31:16 PARTID_D -Field 15:0 PARTID_I -EndSysreg - -Sysreg MPAM0_EL1 3 0 10 5 1 -Res0 63:48 -Field 47:40 PMG_D -Field 39:32 PMG_I -Field 31:16 PARTID_D -Field 15:0 PARTID_I -EndSysreg - Sysreg ISR_EL1 3 0 12 1 0 Res0 63:11 Field 10 IS @@ -3234,7 +3202,6 @@ EndSysreg Sysreg ICC_NMIAR1_EL1 3 0 12 9 5 Res0 63:24 Field 23:0 INTID - EndSysreg Sysreg TRBLIMITR_EL1 3 0 9 11 0 -- Gitee From 001784df41267fd2844fe7aefa2e988d16465abf Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:49:27 -0800 Subject: [PATCH 124/158] anolis: Revert "arm64: head.S: Initialise MPAM EL2 registers and disable traps" ANBZ: #31045 This reverts commit 4ccbd4e8f883eb3f56e1ada8b067543d07d473dc. Signed-off-by: Jason Zeng --- arch/arm64/include/asm/el2_setup.h | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index d93c2b5b0b59..44336899ca27 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -274,21 +274,6 @@ msr spsr_el2, x0 .endm -.macro __init_el2_mpam -#ifdef CONFIG_ARM64_MPAM - /* Memory Partioning And Monitoring: disable EL2 traps */ - mrs x1, id_aa64pfr0_el1 - ubfx x0, x1, #ID_AA64PFR0_EL1_MPAM_SHIFT, #4 - cbz x0, .Lskip_mpam_\@ // skip if no MPAM - msr_s SYS_MPAM2_EL2, xzr // use the default partition - // and disable lower traps - mrs_s x0, SYS_MPAMIDR_EL1 - tbz x0, #17, .Lskip_mpam_\@ // skip if no MPAMHCR reg - msr_s SYS_MPAMHCR_EL2, xzr // clear TRAP_MPAMIDR_EL1 -> EL2 -.Lskip_mpam_\@: -#endif /* CONFIG_ARM64_MPAM */ -.endm - /** * Initialize EL2 registers to sane values. This should be called early on all * cores that were booted in EL2. Note that everything gets initialised as @@ -307,7 +292,6 @@ __init_el2_stage2 __init_el2_gicv3 __init_el2_hstr - __init_el2_mpam __init_el2_nvhe_idregs __init_el2_cptr __init_el2_fgt -- Gitee From ab828dc99ed22c47ffcab60cb392bc465c0175d1 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:52:16 -0800 Subject: [PATCH 125/158] anolis: Revert "x86/resctrl: Move the resctrl filesystem code to /fs/resctrl" ANBZ: #31045 This reverts commit c7d51b50e13f5e409032cc3d60d038286802e6c1. Signed-off-by: Jason Zeng --- arch/x86/kernel/cpu/resctrl/core.c | 15 + arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 510 +++ arch/x86/kernel/cpu/resctrl/internal.h | 310 ++ arch/x86/kernel/cpu/resctrl/monitor.c | 821 ++++ arch/x86/kernel/cpu/resctrl/pseudo_lock.c | 1093 ++++++ arch/x86/kernel/cpu/resctrl/rdtgroup.c | 4284 ++++++++++++++++++++- fs/resctrl/ctrlmondata.c | 532 --- fs/resctrl/internal.h | 340 -- fs/resctrl/monitor.c | 843 ---- fs/resctrl/psuedo_lock.c | 1122 ------ fs/resctrl/rdtgroup.c | 4013 ------------------- 11 files changed, 6888 insertions(+), 6995 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 51389ebc0f19..87cd508588e5 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -164,6 +164,21 @@ static inline void cache_alloc_hsw_probe(void) rdt_alloc_capable = true; } +bool is_mba_sc(struct rdt_resource *r) +{ + if (!r) + r = resctrl_arch_get_resource(RDT_RESOURCE_MBA); + + /* + * The software controller support is only applicable to MBA resource. + * Make sure to check for resource type. + */ + if (r->rid != RDT_RESOURCE_MBA) + return false; + + return r->membw.mba_sc; +} + /* * rdt_get_mb_table() - get a mapping of bandwidth(b/w) percentage values * exposed to user interface and the h/w understandable delay values. diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c index c5c3eaea27b6..4d91a8abb24d 100644 --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c @@ -23,6 +23,265 @@ #include "internal.h" +struct rdt_parse_data { + struct rdtgroup *rdtgrp; + char *buf; +}; + +typedef int (ctrlval_parser_t)(struct rdt_parse_data *data, + struct resctrl_schema *s, + struct rdt_domain *d); + +/* + * Check whether MBA bandwidth percentage value is correct. The value is + * checked against the minimum and max bandwidth values specified by the + * hardware. The allocated bandwidth percentage is rounded to the next + * control step available on the hardware. + */ +static bool bw_validate(char *buf, u32 *data, struct rdt_resource *r) +{ + int ret; + u32 bw; + + /* + * Only linear delay values is supported for current Intel SKUs. + */ + if (!r->membw.delay_linear && r->membw.arch_needs_linear) { + rdt_last_cmd_puts("No support for non-linear MB domains\n"); + return false; + } + + ret = kstrtou32(buf, 10, &bw); + if (ret) { + rdt_last_cmd_printf("Invalid MB value %s\n", buf); + return false; + } + + /* Nothing else to do if software controller is enabled. */ + if (is_mba_sc(r)) { + *data = bw; + return true; + } + + if (bw < r->membw.min_bw || bw > r->default_ctrl) { + rdt_last_cmd_printf("MB value %u out of range [%d,%d]\n", + bw, r->membw.min_bw, r->default_ctrl); + return false; + } + + *data = roundup(bw, (unsigned long)r->membw.bw_gran); + return true; +} + +static int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s, + struct rdt_domain *d) +{ + struct resctrl_staged_config *cfg; + u32 closid = data->rdtgrp->closid; + struct rdt_resource *r = s->res; + u32 bw_val; + + cfg = &d->staged_config[s->conf_type]; + if (cfg->have_new_ctrl) { + rdt_last_cmd_printf("Duplicate domain %d\n", d->id); + return -EINVAL; + } + + if (!bw_validate(data->buf, &bw_val, r)) + return -EINVAL; + + if (is_mba_sc(r)) { + d->mbps_val[closid] = bw_val; + return 0; + } + + cfg->new_ctrl = bw_val; + cfg->have_new_ctrl = true; + + return 0; +} + +/* + * Check whether a cache bit mask is valid. + * On Intel CPUs, non-contiguous 1s value support is indicated by CPUID: + * - CPUID.0x10.1:ECX[3]: L3 non-contiguous 1s value supported if 1 + * - CPUID.0x10.2:ECX[3]: L2 non-contiguous 1s value supported if 1 + * + * Haswell does not support a non-contiguous 1s value and additionally + * requires at least two bits set. + * AMD allows non-contiguous bitmasks. + */ +static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r) +{ + unsigned long first_bit, zero_bit, val; + unsigned int cbm_len = r->cache.cbm_len; + int ret; + + ret = kstrtoul(buf, 16, &val); + if (ret) { + rdt_last_cmd_printf("Non-hex character in the mask %s\n", buf); + return false; + } + + if ((r->cache.min_cbm_bits > 0 && val == 0) || val > r->default_ctrl) { + rdt_last_cmd_puts("Mask out of range\n"); + return false; + } + + first_bit = find_first_bit(&val, cbm_len); + zero_bit = find_next_zero_bit(&val, cbm_len, first_bit); + + /* Are non-contiguous bitmasks allowed? */ + if (!r->cache.arch_has_sparse_bitmasks && + (find_next_bit(&val, cbm_len, zero_bit) < cbm_len)) { + rdt_last_cmd_printf("The mask %lx has non-consecutive 1-bits\n", val); + return false; + } + + if ((zero_bit - first_bit) < r->cache.min_cbm_bits) { + rdt_last_cmd_printf("Need at least %d bits in the mask\n", + r->cache.min_cbm_bits); + return false; + } + + *data = val; + return true; +} + +/* + * Read one cache bit mask (hex). Check that it is valid for the current + * resource type. + */ +static int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s, + struct rdt_domain *d) +{ + struct rdtgroup *rdtgrp = data->rdtgrp; + struct resctrl_staged_config *cfg; + struct rdt_resource *r = s->res; + u32 cbm_val; + + cfg = &d->staged_config[s->conf_type]; + if (cfg->have_new_ctrl) { + rdt_last_cmd_printf("Duplicate domain %d\n", d->id); + return -EINVAL; + } + + /* + * Cannot set up more than one pseudo-locked region in a cache + * hierarchy. + */ + if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP && + rdtgroup_pseudo_locked_in_hierarchy(d)) { + rdt_last_cmd_puts("Pseudo-locked region in hierarchy\n"); + return -EINVAL; + } + + if (!cbm_validate(data->buf, &cbm_val, r)) + return -EINVAL; + + if ((rdtgrp->mode == RDT_MODE_EXCLUSIVE || + rdtgrp->mode == RDT_MODE_SHAREABLE) && + rdtgroup_cbm_overlaps_pseudo_locked(d, cbm_val)) { + rdt_last_cmd_puts("CBM overlaps with pseudo-locked region\n"); + return -EINVAL; + } + + /* + * The CBM may not overlap with the CBM of another closid if + * either is exclusive. + */ + if (rdtgroup_cbm_overlaps(s, d, cbm_val, rdtgrp->closid, true)) { + rdt_last_cmd_puts("Overlaps with exclusive group\n"); + return -EINVAL; + } + + if (rdtgroup_cbm_overlaps(s, d, cbm_val, rdtgrp->closid, false)) { + if (rdtgrp->mode == RDT_MODE_EXCLUSIVE || + rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { + rdt_last_cmd_puts("Overlaps with other group\n"); + return -EINVAL; + } + } + + cfg->new_ctrl = cbm_val; + cfg->have_new_ctrl = true; + + return 0; +} + +static ctrlval_parser_t *get_parser(struct rdt_resource *res) +{ + if (res->fflags & RFTYPE_RES_CACHE) + return &parse_cbm; + else + return &parse_bw; +} + +/* + * For each domain in this resource we expect to find a series of: + * id=mask + * separated by ";". The "id" is in decimal, and must match one of + * the "id"s for this resource. + */ +static int parse_line(char *line, struct resctrl_schema *s, + struct rdtgroup *rdtgrp) +{ + ctrlval_parser_t *parse_ctrlval = get_parser(s->res); + enum resctrl_conf_type t = s->conf_type; + struct resctrl_staged_config *cfg; + struct rdt_resource *r = s->res; + struct rdt_parse_data data; + char *dom = NULL, *id; + struct rdt_domain *d; + unsigned long dom_id; + + /* Walking r->domains, ensure it can't race with cpuhp */ + lockdep_assert_cpus_held(); + + if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP && + (r->rid == RDT_RESOURCE_MBA || r->rid == RDT_RESOURCE_SMBA)) { + rdt_last_cmd_puts("Cannot pseudo-lock MBA resource\n"); + return -EINVAL; + } + +next: + if (!line || line[0] == '\0') + return 0; + dom = strsep(&line, ";"); + id = strsep(&dom, "="); + if (!dom || kstrtoul(id, 10, &dom_id)) { + rdt_last_cmd_puts("Missing '=' or non-numeric domain\n"); + return -EINVAL; + } + dom = strim(dom); + list_for_each_entry(d, &r->domains, list) { + if (d->id == dom_id) { + data.buf = dom; + data.rdtgrp = rdtgrp; + if (parse_ctrlval(&data, s, d)) + return -EINVAL; + if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { + cfg = &d->staged_config[t]; + /* + * In pseudo-locking setup mode and just + * parsed a valid CBM that should be + * pseudo-locked. Only one locked region per + * resource group and domain so just do + * the required initialization for single + * region and return. + */ + rdtgrp->plr->s = s; + rdtgrp->plr->d = d; + rdtgrp->plr->cbm = cfg->new_ctrl; + d->plr = rdtgrp->plr; + return 0; + } + goto next; + } + } + return -EINVAL; +} + static bool apply_config(struct rdt_hw_domain *hw_dom, struct resctrl_staged_config *cfg, u32 idx, cpumask_var_t cpu_mask) @@ -111,6 +370,100 @@ int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid) return 0; } +static int rdtgroup_parse_resource(char *resname, char *tok, + struct rdtgroup *rdtgrp) +{ + struct resctrl_schema *s; + + list_for_each_entry(s, &resctrl_schema_all, list) { + if (!strcmp(resname, s->name) && rdtgrp->closid < s->num_closid) + return parse_line(tok, s, rdtgrp); + } + rdt_last_cmd_printf("Unknown or unsupported resource name '%s'\n", resname); + return -EINVAL; +} + +ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) +{ + struct resctrl_schema *s; + struct rdtgroup *rdtgrp; + struct rdt_resource *r; + char *tok, *resname; + int ret = 0; + + /* Valid input requires a trailing newline */ + if (nbytes == 0 || buf[nbytes - 1] != '\n') + return -EINVAL; + buf[nbytes - 1] = '\0'; + + rdtgrp = rdtgroup_kn_lock_live(of->kn); + if (!rdtgrp) { + rdtgroup_kn_unlock(of->kn); + return -ENOENT; + } + rdt_last_cmd_clear(); + + /* + * No changes to pseudo-locked region allowed. It has to be removed + * and re-created instead. + */ + if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) { + ret = -EINVAL; + rdt_last_cmd_puts("Resource group is pseudo-locked\n"); + goto out; + } + + rdt_staged_configs_clear(); + + while ((tok = strsep(&buf, "\n")) != NULL) { + resname = strim(strsep(&tok, ":")); + if (!tok) { + rdt_last_cmd_puts("Missing ':'\n"); + ret = -EINVAL; + goto out; + } + if (tok[0] == '\0') { + rdt_last_cmd_printf("Missing '%s' value\n", resname); + ret = -EINVAL; + goto out; + } + ret = rdtgroup_parse_resource(resname, tok, rdtgrp); + if (ret) + goto out; + } + + list_for_each_entry(s, &resctrl_schema_all, list) { + r = s->res; + + /* + * Writes to mba_sc resources update the software controller, + * not the control MSR. + */ + if (is_mba_sc(r)) + continue; + + ret = resctrl_arch_update_domains(r, rdtgrp->closid); + if (ret) + goto out; + } + + if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { + /* + * If pseudo-locking fails we keep the resource group in + * mode RDT_MODE_PSEUDO_LOCKSETUP with its class of service + * active and updated for just the domain the pseudo-locked + * region was requested for. + */ + ret = rdtgroup_pseudo_lock_create(rdtgrp); + } + +out: + rdt_staged_configs_clear(); + rdtgroup_kn_unlock(of->kn); + return ret ?: nbytes; +} + u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d, u32 closid, enum resctrl_conf_type type) { @@ -119,3 +472,160 @@ u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d, return hw_dom->ctrl_val[idx]; } + +static void show_doms(struct seq_file *s, struct resctrl_schema *schema, int closid) +{ + struct rdt_resource *r = schema->res; + struct rdt_domain *dom; + bool sep = false; + u32 ctrl_val; + + /* Walking r->domains, ensure it can't race with cpuhp */ + lockdep_assert_cpus_held(); + + seq_printf(s, "%*s:", max_name_width, schema->name); + list_for_each_entry(dom, &r->domains, list) { + if (sep) + seq_puts(s, ";"); + + if (is_mba_sc(r)) + ctrl_val = dom->mbps_val[closid]; + else + ctrl_val = resctrl_arch_get_config(r, dom, closid, + schema->conf_type); + + seq_printf(s, r->format_str, dom->id, max_data_width, + ctrl_val); + sep = true; + } + seq_puts(s, "\n"); +} + +int rdtgroup_schemata_show(struct kernfs_open_file *of, + struct seq_file *s, void *v) +{ + struct resctrl_schema *schema; + struct rdtgroup *rdtgrp; + int ret = 0; + u32 closid; + + rdtgrp = rdtgroup_kn_lock_live(of->kn); + if (rdtgrp) { + if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { + list_for_each_entry(schema, &resctrl_schema_all, list) { + seq_printf(s, "%s:uninitialized\n", schema->name); + } + } else if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) { + if (!rdtgrp->plr->d) { + rdt_last_cmd_clear(); + rdt_last_cmd_puts("Cache domain offline\n"); + ret = -ENODEV; + } else { + seq_printf(s, "%s:%d=%x\n", + rdtgrp->plr->s->res->name, + rdtgrp->plr->d->id, + rdtgrp->plr->cbm); + } + } else { + closid = rdtgrp->closid; + list_for_each_entry(schema, &resctrl_schema_all, list) { + if (closid < schema->num_closid) + show_doms(s, schema, closid); + } + } + } else { + ret = -ENOENT; + } + rdtgroup_kn_unlock(of->kn); + return ret; +} + +static int smp_mon_event_count(void *arg) +{ + mon_event_count(arg); + + return 0; +} + +void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, + struct rdt_domain *d, struct rdtgroup *rdtgrp, + int evtid, int first) +{ + int cpu; + + /* When picking a CPU from cpu_mask, ensure it can't race with cpuhp */ + lockdep_assert_cpus_held(); + + /* + * Setup the parameters to pass to mon_event_count() to read the data. + */ + rr->rgrp = rdtgrp; + rr->evtid = evtid; + rr->r = r; + rr->d = d; + rr->val = 0; + rr->first = first; + rr->arch_mon_ctx = resctrl_arch_mon_ctx_alloc(r, evtid); + if (IS_ERR(rr->arch_mon_ctx)) { + rr->err = -EINVAL; + return; + } + + cpu = cpumask_any_housekeeping(&d->cpu_mask, RESCTRL_PICK_ANY_CPU); + + /* + * cpumask_any_housekeeping() prefers housekeeping CPUs, but + * are all the CPUs nohz_full? If yes, pick a CPU to IPI. + * MPAM's resctrl_arch_rmid_read() is unable to read the + * counters on some platforms if its called in IRQ context. + */ + if (tick_nohz_full_cpu(cpu)) + smp_call_function_any(&d->cpu_mask, mon_event_count, rr, 1); + else + smp_call_on_cpu(cpu, smp_mon_event_count, rr, false); + + resctrl_arch_mon_ctx_free(r, evtid, rr->arch_mon_ctx); +} + +int rdtgroup_mondata_show(struct seq_file *m, void *arg) +{ + struct kernfs_open_file *of = m->private; + u32 resid, evtid, domid; + struct rdtgroup *rdtgrp; + struct rdt_resource *r; + union mon_data_bits md; + struct rdt_domain *d; + struct rmid_read rr; + int ret = 0; + + rdtgrp = rdtgroup_kn_lock_live(of->kn); + if (!rdtgrp) { + ret = -ENOENT; + goto out; + } + + md.priv = of->kn->priv; + resid = md.u.rid; + domid = md.u.domid; + evtid = md.u.evtid; + + r = resctrl_arch_get_resource(resid); + d = resctrl_arch_find_domain(r, domid); + if (IS_ERR_OR_NULL(d)) { + ret = -ENOENT; + goto out; + } + + mon_event_read(&rr, r, d, rdtgrp, evtid, false); + + if (rr.err == -EIO) + seq_puts(m, "Error\n"); + else if (rr.err == -EINVAL) + seq_puts(m, "Unavailable\n"); + else + seq_printf(m, "%llu\n", rr.val); + +out: + rdtgroup_kn_unlock(of->kn); + return ret; +} diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index bf3538992667..0f7e3f10941b 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -26,6 +26,227 @@ */ #define MBM_CNTR_WIDTH_OFFSET_MAX (62 - MBM_CNTR_WIDTH_BASE) +/** + * cpumask_any_housekeeping() - Choose any CPU in @mask, preferring those that + * aren't marked nohz_full + * @mask: The mask to pick a CPU from. + * @exclude_cpu:The CPU to avoid picking. + * + * Returns a CPU from @mask, but not @exclude_cpu. If there are housekeeping + * CPUs that don't use nohz_full, these are preferred. Pass + * RESCTRL_PICK_ANY_CPU to avoid excluding any CPUs. + * + * When a CPU is excluded, returns >= nr_cpu_ids if no CPUs are available. + */ +static inline unsigned int +cpumask_any_housekeeping(const struct cpumask *mask, int exclude_cpu) +{ + unsigned int cpu, hk_cpu; + + if (exclude_cpu == RESCTRL_PICK_ANY_CPU) + cpu = cpumask_any(mask); + else + cpu = cpumask_any_but(mask, exclude_cpu); + + if (!IS_ENABLED(CONFIG_NO_HZ_FULL)) + return cpu; + + /* If the CPU picked isn't marked nohz_full nothing more needs doing. */ + if (cpu < nr_cpu_ids && !tick_nohz_full_cpu(cpu)) + return cpu; + + /* Try to find a CPU that isn't nohz_full to use in preference */ + hk_cpu = cpumask_nth_andnot(0, mask, tick_nohz_full_mask); + if (hk_cpu == exclude_cpu) + hk_cpu = cpumask_nth_andnot(1, mask, tick_nohz_full_mask); + + if (hk_cpu < nr_cpu_ids) + cpu = hk_cpu; + + return cpu; +} + +struct rdt_fs_context { + struct kernfs_fs_context kfc; + bool enable_cdpl2; + bool enable_cdpl3; + bool enable_mba_mbps; + bool enable_debug; +}; + +static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc) +{ + struct kernfs_fs_context *kfc = fc->fs_private; + + return container_of(kfc, struct rdt_fs_context, kfc); +} + +/** + * struct mon_evt - Entry in the event list of a resource + * @evtid: event id + * @name: name of the event + * @configurable: true if the event is configurable + * @list: entry in &rdt_resource->evt_list + */ +struct mon_evt { + enum resctrl_event_id evtid; + char *name; + bool configurable; + struct list_head list; +}; + +/** + * union mon_data_bits - Monitoring details for each event file + * @priv: Used to store monitoring event data in @u + * as kernfs private data + * @rid: Resource id associated with the event file + * @evtid: Event id associated with the event file + * @domid: The domain to which the event file belongs + * @u: Name of the bit fields struct + */ +union mon_data_bits { + void *priv; + struct { + unsigned int rid : 10; + enum resctrl_event_id evtid : 8; + unsigned int domid : 14; + } u; +}; + +struct rmid_read { + struct rdtgroup *rgrp; + struct rdt_resource *r; + struct rdt_domain *d; + enum resctrl_event_id evtid; + bool first; + int err; + u64 val; + void *arch_mon_ctx; +}; + +extern struct list_head resctrl_schema_all; +extern bool resctrl_mounted; + +enum rdt_group_type { + RDTCTRL_GROUP = 0, + RDTMON_GROUP, + RDT_NUM_GROUP, +}; + +/** + * enum rdtgrp_mode - Mode of a RDT resource group + * @RDT_MODE_SHAREABLE: This resource group allows sharing of its allocations + * @RDT_MODE_EXCLUSIVE: No sharing of this resource group's allocations allowed + * @RDT_MODE_PSEUDO_LOCKSETUP: Resource group will be used for Pseudo-Locking + * @RDT_MODE_PSEUDO_LOCKED: No sharing of this resource group's allocations + * allowed AND the allocations are Cache Pseudo-Locked + * @RDT_NUM_MODES: Total number of modes + * + * The mode of a resource group enables control over the allowed overlap + * between allocations associated with different resource groups (classes + * of service). User is able to modify the mode of a resource group by + * writing to the "mode" resctrl file associated with the resource group. + * + * The "shareable", "exclusive", and "pseudo-locksetup" modes are set by + * writing the appropriate text to the "mode" file. A resource group enters + * "pseudo-locked" mode after the schemata is written while the resource + * group is in "pseudo-locksetup" mode. + */ +enum rdtgrp_mode { + RDT_MODE_SHAREABLE = 0, + RDT_MODE_EXCLUSIVE, + RDT_MODE_PSEUDO_LOCKSETUP, + RDT_MODE_PSEUDO_LOCKED, + + /* Must be last */ + RDT_NUM_MODES, +}; + +/** + * struct mongroup - store mon group's data in resctrl fs. + * @mon_data_kn: kernfs node for the mon_data directory + * @parent: parent rdtgrp + * @crdtgrp_list: child rdtgroup node list + * @rmid: rmid for this rdtgroup + */ +struct mongroup { + struct kernfs_node *mon_data_kn; + struct rdtgroup *parent; + struct list_head crdtgrp_list; + u32 rmid; +}; + +/** + * struct rdtgroup - store rdtgroup's data in resctrl file system. + * @kn: kernfs node + * @rdtgroup_list: linked list for all rdtgroups + * @closid: closid for this rdtgroup + * @cpu_mask: CPUs assigned to this rdtgroup + * @flags: status bits + * @waitcount: how many cpus expect to find this + * group when they acquire rdtgroup_mutex + * @type: indicates type of this rdtgroup - either + * monitor only or ctrl_mon group + * @mon: mongroup related data + * @mode: mode of resource group + * @plr: pseudo-locked region + */ +struct rdtgroup { + struct kernfs_node *kn; + struct list_head rdtgroup_list; + u32 closid; + struct cpumask cpu_mask; + int flags; + atomic_t waitcount; + enum rdt_group_type type; + struct mongroup mon; + enum rdtgrp_mode mode; + struct pseudo_lock_region *plr; +}; + +/* List of all resource groups */ +extern struct list_head rdt_all_groups; + +extern int max_name_width, max_data_width; + +/** + * struct rftype - describe each file in the resctrl file system + * @name: File name + * @mode: Access mode + * @kf_ops: File operations + * @flags: File specific RFTYPE_FLAGS_* flags + * @fflags: File specific RFTYPE_* flags + * @seq_show: Show content of the file + * @write: Write to the file + */ +struct rftype { + char *name; + umode_t mode; + const struct kernfs_ops *kf_ops; + unsigned long flags; + unsigned long fflags; + + int (*seq_show)(struct kernfs_open_file *of, + struct seq_file *sf, void *v); + /* + * write() is the generic write callback which maps directly to + * kernfs write operation and overrides all other operations. + * Maximum write size is determined by ->max_write_len. + */ + ssize_t (*write)(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off); +}; + +/** + * struct mbm_state - status for each MBM counter in each domain + * @prev_bw_bytes: Previous bytes value read for bandwidth calculation + * @prev_bw: The most recent bandwidth in MBps + */ +struct mbm_state { + u64 prev_bw_bytes; + u32 prev_bw; +}; + /** * struct arch_mbm_state - values used to compute resctrl_arch_rmid_read()s * return value. @@ -106,7 +327,11 @@ static inline struct rdt_hw_resource *resctrl_to_arch_res(struct rdt_resource *r return container_of(r, struct rdt_hw_resource, r_resctrl); } +extern struct mutex rdtgroup_mutex; + extern struct rdt_hw_resource rdt_resources_all[]; +extern struct rdtgroup rdtgroup_default; +extern struct dentry *debugfs_resctrl; static inline struct rdt_resource *resctrl_inc(struct rdt_resource *res) { @@ -170,10 +395,95 @@ union cpuid_0x10_x_edx { unsigned int full; }; +void rdt_last_cmd_clear(void); +void rdt_last_cmd_puts(const char *s); +__printf(1, 2) +void rdt_last_cmd_printf(const char *fmt, ...); + void rdt_ctrl_update(void *arg); +struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn); +void rdtgroup_kn_unlock(struct kernfs_node *kn); +int rdtgroup_kn_mode_restrict(struct rdtgroup *r, const char *name); +int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name, + umode_t mask); +ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off); +int rdtgroup_schemata_show(struct kernfs_open_file *of, + struct seq_file *s, void *v); +bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_domain *d, + unsigned long cbm, int closid, bool exclusive); +unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, struct rdt_domain *d, + unsigned long cbm); +enum rdtgrp_mode rdtgroup_mode_by_closid(int closid); +int rdtgroup_tasks_assigned(struct rdtgroup *r); +int closids_supported(void); +void closid_free(int closid); +int alloc_rmid(u32 closid); +void free_rmid(u32 closid, u32 rmid); int rdt_get_mon_l3_config(struct rdt_resource *r); +void resctrl_mon_resource_exit(void); bool rdt_cpu_has(int flag); +void mon_event_count(void *info); +int rdtgroup_mondata_show(struct seq_file *m, void *arg); +void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, + struct rdt_domain *d, struct rdtgroup *rdtgrp, + int evtid, int first); +int resctrl_mon_resource_init(void); +void mbm_setup_overflow_handler(struct rdt_domain *dom, + unsigned long delay_ms, + int exclude_cpu); +void mbm_handle_overflow(struct work_struct *work); void __init intel_rdt_mbm_apply_quirk(void); +bool is_mba_sc(struct rdt_resource *r); +void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms, + int exclude_cpu); +void cqm_handle_limbo(struct work_struct *work); +bool has_busy_rmid(struct rdt_domain *d); +void __check_limbo(struct rdt_domain *d, bool force_free); void rdt_domain_reconfigure_cdp(struct rdt_resource *r); +void mbm_config_rftype_init(const char *config); +void rdt_staged_configs_clear(void); +bool closid_allocated(unsigned int closid); +int resctrl_find_cleanest_closid(void); + +#ifdef CONFIG_RESCTRL_FS_PSEUDO_LOCK +int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp); +int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp); +bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm); +bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d); +int rdt_pseudo_lock_init(void); +void rdt_pseudo_lock_release(void); +int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp); +void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp); +#else +static inline int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp) +{ + return -EOPNOTSUPP; +} + +static inline int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp) +{ + return -EOPNOTSUPP; +} + +static inline bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm) +{ + return false; +} + +static inline bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d) +{ + return false; +} + +static inline int rdt_pseudo_lock_init(void) { return 0; } +static inline void rdt_pseudo_lock_release(void) { } +static inline int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp) +{ + return -EOPNOTSUPP; +} + +static inline void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp) { } +#endif /* CONFIG_RESCTRL_FS_PSEUDO_LOCK */ #endif /* _ASM_X86_RESCTRL_INTERNAL_H */ diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 02fb9d87479a..7e6fca138cb7 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -25,6 +25,53 @@ #include "internal.h" +/** + * struct rmid_entry - dirty tracking for all RMID. + * @closid: The CLOSID for this entry. + * @rmid: The RMID for this entry. + * @busy: The number of domains with cached data using this RMID. + * @list: Member of the rmid_free_lru list when busy == 0. + * + * Depending on the architecture the correct monitor is accessed using + * both @closid and @rmid, or @rmid only. + * + * Take the rdtgroup_mutex when accessing. + */ +struct rmid_entry { + u32 closid; + u32 rmid; + int busy; + struct list_head list; +}; + +/* + * @rmid_free_lru - A least recently used list of free RMIDs + * These RMIDs are guaranteed to have an occupancy less than the + * threshold occupancy + */ +static LIST_HEAD(rmid_free_lru); + +/* + * @closid_num_dirty_rmid The number of dirty RMID each CLOSID has. + * Only allocated when CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID is defined. + * Indexed by CLOSID. Protected by rdtgroup_mutex. + */ +static u32 *closid_num_dirty_rmid; + +/* + * @rmid_limbo_count - count of currently unused but (potentially) + * dirty RMIDs. + * This counts RMIDs that no one is currently using but that + * may have a occupancy value > resctrl_rmid_realloc_threshold. User can + * change the threshold occupancy value. + */ +static unsigned int rmid_limbo_count; + +/* + * @rmid_entry - The entry in the limbo and free lists. + */ +static struct rmid_entry *rmid_ptrs; + /* * Global boolean for rdt_monitor which is true if any * resource monitoring is enabled. @@ -36,6 +83,17 @@ bool rdt_mon_capable; */ unsigned int rdt_mon_features; +/* + * This is the threshold cache occupancy in bytes at which we will consider an + * RMID available for re-allocation. + */ +unsigned int resctrl_rmid_realloc_threshold; + +/* + * This is the maximum value for the reallocation threshold, in bytes. + */ +unsigned int resctrl_rmid_realloc_limit; + #define CF(cf) ((unsigned long)(1048576 * (cf) + 0.5)) /* @@ -99,6 +157,33 @@ static inline u64 get_corrected_mbm_count(u32 rmid, unsigned long val) return val; } +/* + * x86 and arm64 differ in their handling of monitoring. + * x86's RMID are independent numbers, there is only one source of traffic + * with an RMID value of '1'. + * arm64's PMG extends the PARTID/CLOSID space, there are multiple sources of + * traffic with a PMG value of '1', one for each CLOSID, meaning the RMID + * value is no longer unique. + * To account for this, resctrl uses an index. On x86 this is just the RMID, + * on arm64 it encodes the CLOSID and RMID. This gives a unique number. + * + * The domain's rmid_busy_llc and rmid_ptrs[] are sized by index. The arch code + * must accept an attempt to read every index. + */ +static inline struct rmid_entry *__rmid_entry(u32 idx) +{ + struct rmid_entry *entry; + u32 closid, rmid; + + entry = &rmid_ptrs[idx]; + resctrl_arch_rmid_idx_decode(idx, &closid, &rmid); + + WARN_ON_ONCE(entry->closid != closid); + WARN_ON_ONCE(entry->rmid != rmid); + + return entry; +} + static int __rmid_read(u32 rmid, enum resctrl_event_id eventid, u64 *val) { u64 msr_val; @@ -217,6 +302,735 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d, return 0; } +static void limbo_release_entry(struct rmid_entry *entry) +{ + lockdep_assert_held(&rdtgroup_mutex); + + rmid_limbo_count--; + list_add_tail(&entry->list, &rmid_free_lru); + + if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) + closid_num_dirty_rmid[entry->closid]--; +} + +/* + * Check the RMIDs that are marked as busy for this domain. If the + * reported LLC occupancy is below the threshold clear the busy bit and + * decrement the count. If the busy count gets to zero on an RMID, we + * free the RMID + */ +void __check_limbo(struct rdt_domain *d, bool force_free) +{ + struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); + u32 idx_limit = resctrl_arch_system_num_rmid_idx(); + struct rmid_entry *entry; + u32 idx, cur_idx = 1; + void *arch_mon_ctx; + bool rmid_dirty; + u64 val = 0; + + arch_mon_ctx = resctrl_arch_mon_ctx_alloc(r, QOS_L3_OCCUP_EVENT_ID); + if (IS_ERR(arch_mon_ctx)) { + pr_warn_ratelimited("Failed to allocate monitor context: %ld", + PTR_ERR(arch_mon_ctx)); + return; + } + + /* + * Skip RMID 0 and start from RMID 1 and check all the RMIDs that + * are marked as busy for occupancy < threshold. If the occupancy + * is less than the threshold decrement the busy counter of the + * RMID and move it to the free list when the counter reaches 0. + */ + for (;;) { + idx = find_next_bit(d->rmid_busy_llc, idx_limit, cur_idx); + if (idx >= idx_limit) + break; + + entry = __rmid_entry(idx); + if (resctrl_arch_rmid_read(r, d, entry->closid, entry->rmid, + QOS_L3_OCCUP_EVENT_ID, &val, + arch_mon_ctx)) { + rmid_dirty = true; + } else { + rmid_dirty = (val >= resctrl_rmid_realloc_threshold); + } + + if (force_free || !rmid_dirty) { + clear_bit(idx, d->rmid_busy_llc); + if (!--entry->busy) + limbo_release_entry(entry); + } + cur_idx = idx + 1; + } + + resctrl_arch_mon_ctx_free(r, QOS_L3_OCCUP_EVENT_ID, arch_mon_ctx); +} + +bool has_busy_rmid(struct rdt_domain *d) +{ + u32 idx_limit = resctrl_arch_system_num_rmid_idx(); + + return find_first_bit(d->rmid_busy_llc, idx_limit) != idx_limit; +} + +static struct rmid_entry *resctrl_find_free_rmid(u32 closid) +{ + struct rmid_entry *itr; + u32 itr_idx, cmp_idx; + + if (list_empty(&rmid_free_lru)) + return rmid_limbo_count ? ERR_PTR(-EBUSY) : ERR_PTR(-ENOSPC); + + list_for_each_entry(itr, &rmid_free_lru, list) { + /* + * Get the index of this free RMID, and the index it would need + * to be if it were used with this CLOSID. + * If the CLOSID is irrelevant on this architecture, the two + * index values are always the same on every entry and thus the + * very first entry will be returned. + */ + itr_idx = resctrl_arch_rmid_idx_encode(itr->closid, itr->rmid); + cmp_idx = resctrl_arch_rmid_idx_encode(closid, itr->rmid); + + if (itr_idx == cmp_idx) + return itr; + } + + return ERR_PTR(-ENOSPC); +} + +/** + * resctrl_find_cleanest_closid() - Find a CLOSID where all the associated + * RMID are clean, or the CLOSID that has + * the most clean RMID. + * + * MPAM's equivalent of RMID are per-CLOSID, meaning a freshly allocated CLOSID + * may not be able to allocate clean RMID. To avoid this the allocator will + * choose the CLOSID with the most clean RMID. + * + * When the CLOSID and RMID are independent numbers, the first free CLOSID will + * be returned. + */ +int resctrl_find_cleanest_closid(void) +{ + u32 cleanest_closid = ~0; + int i = 0; + + lockdep_assert_held(&rdtgroup_mutex); + + if (!IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) + return -EIO; + + for (i = 0; i < closids_supported(); i++) { + int num_dirty; + + if (closid_allocated(i)) + continue; + + num_dirty = closid_num_dirty_rmid[i]; + if (num_dirty == 0) + return i; + + if (cleanest_closid == ~0) + cleanest_closid = i; + + if (num_dirty < closid_num_dirty_rmid[cleanest_closid]) + cleanest_closid = i; + } + + if (cleanest_closid == ~0) + return -ENOSPC; + + return cleanest_closid; +} + +/* + * For MPAM the RMID value is not unique, and has to be considered with + * the CLOSID. The (CLOSID, RMID) pair is allocated on all domains, which + * allows all domains to be managed by a single free list. + * Each domain also has a rmid_busy_llc to reduce the work of the limbo handler. + */ +int alloc_rmid(u32 closid) +{ + struct rmid_entry *entry; + + lockdep_assert_held(&rdtgroup_mutex); + + entry = resctrl_find_free_rmid(closid); + if (IS_ERR(entry)) + return PTR_ERR(entry); + + list_del(&entry->list); + return entry->rmid; +} + +static void add_rmid_to_limbo(struct rmid_entry *entry) +{ + struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); + struct rdt_domain *d; + u32 idx; + + lockdep_assert_held(&rdtgroup_mutex); + + /* Walking r->domains, ensure it can't race with cpuhp */ + lockdep_assert_cpus_held(); + + idx = resctrl_arch_rmid_idx_encode(entry->closid, entry->rmid); + + entry->busy = 0; + list_for_each_entry(d, &r->domains, list) { + /* + * For the first limbo RMID in the domain, + * setup up the limbo worker. + */ + if (!has_busy_rmid(d)) + cqm_setup_limbo_handler(d, CQM_LIMBOCHECK_INTERVAL, + RESCTRL_PICK_ANY_CPU); + set_bit(idx, d->rmid_busy_llc); + entry->busy++; + } + + rmid_limbo_count++; + if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) + closid_num_dirty_rmid[entry->closid]++; +} + +void free_rmid(u32 closid, u32 rmid) +{ + u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid); + struct rmid_entry *entry; + + lockdep_assert_held(&rdtgroup_mutex); + + /* + * Do not allow the default rmid to be free'd. Comparing by index + * allows architectures that ignore the closid parameter to avoid an + * unnecessary check. + */ + if (idx == resctrl_arch_rmid_idx_encode(RESCTRL_RESERVED_CLOSID, + RESCTRL_RESERVED_RMID)) + return; + + entry = __rmid_entry(idx); + + if (resctrl_arch_is_llc_occupancy_enabled()) + add_rmid_to_limbo(entry); + else + list_add_tail(&entry->list, &rmid_free_lru); +} + +static struct mbm_state *get_mbm_state(struct rdt_domain *d, u32 closid, + u32 rmid, enum resctrl_event_id evtid) +{ + u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid); + + switch (evtid) { + case QOS_L3_MBM_TOTAL_EVENT_ID: + return &d->mbm_total[idx]; + case QOS_L3_MBM_LOCAL_EVENT_ID: + return &d->mbm_local[idx]; + default: + return NULL; + } +} + +static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr) +{ + struct mbm_state *m; + u64 tval = 0; + + if (rr->first) { + resctrl_arch_reset_rmid(rr->r, rr->d, closid, rmid, rr->evtid); + m = get_mbm_state(rr->d, closid, rmid, rr->evtid); + if (m) + memset(m, 0, sizeof(struct mbm_state)); + return 0; + } + + rr->err = resctrl_arch_rmid_read(rr->r, rr->d, closid, rmid, rr->evtid, + &tval, rr->arch_mon_ctx); + if (rr->err) + return rr->err; + + rr->val += tval; + + return 0; +} + +/* + * mbm_bw_count() - Update bw count from values previously read by + * __mon_event_count(). + * @closid: The closid used to identify the cached mbm_state. + * @rmid: The rmid used to identify the cached mbm_state. + * @rr: The struct rmid_read populated by __mon_event_count(). + * + * Supporting function to calculate the memory bandwidth + * and delta bandwidth in MBps. The chunks value previously read by + * __mon_event_count() is compared with the chunks value from the previous + * invocation. This must be called once per second to maintain values in MBps. + */ +static void mbm_bw_count(u32 closid, u32 rmid, struct rmid_read *rr) +{ + u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid); + struct mbm_state *m = &rr->d->mbm_local[idx]; + u64 cur_bw, bytes, cur_bytes; + + cur_bytes = rr->val; + bytes = cur_bytes - m->prev_bw_bytes; + m->prev_bw_bytes = cur_bytes; + + cur_bw = bytes / SZ_1M; + + m->prev_bw = cur_bw; +} + +/* + * This is scheduled by mon_event_read() to read the CQM/MBM counters + * on a domain. + */ +void mon_event_count(void *info) +{ + struct rdtgroup *rdtgrp, *entry; + struct rmid_read *rr = info; + struct list_head *head; + int ret; + + rdtgrp = rr->rgrp; + + ret = __mon_event_count(rdtgrp->closid, rdtgrp->mon.rmid, rr); + + /* + * For Ctrl groups read data from child monitor groups and + * add them together. Count events which are read successfully. + * Discard the rmid_read's reporting errors. + */ + head = &rdtgrp->mon.crdtgrp_list; + + if (rdtgrp->type == RDTCTRL_GROUP) { + list_for_each_entry(entry, head, mon.crdtgrp_list) { + if (__mon_event_count(entry->closid, entry->mon.rmid, + rr) == 0) + ret = 0; + } + } + + /* + * __mon_event_count() calls for newly created monitor groups may + * report -EINVAL/Unavailable if the monitor hasn't seen any traffic. + * Discard error if any of the monitor event reads succeeded. + */ + if (ret == 0) + rr->err = 0; +} + +/* + * Feedback loop for MBA software controller (mba_sc) + * + * mba_sc is a feedback loop where we periodically read MBM counters and + * adjust the bandwidth percentage values via the IA32_MBA_THRTL_MSRs so + * that: + * + * current bandwidth(cur_bw) < user specified bandwidth(user_bw) + * + * This uses the MBM counters to measure the bandwidth and MBA throttle + * MSRs to control the bandwidth for a particular rdtgrp. It builds on the + * fact that resctrl rdtgroups have both monitoring and control. + * + * The frequency of the checks is 1s and we just tag along the MBM overflow + * timer. Having 1s interval makes the calculation of bandwidth simpler. + * + * Although MBA's goal is to restrict the bandwidth to a maximum, there may + * be a need to increase the bandwidth to avoid unnecessarily restricting + * the L2 <-> L3 traffic. + * + * Since MBA controls the L2 external bandwidth where as MBM measures the + * L3 external bandwidth the following sequence could lead to such a + * situation. + * + * Consider an rdtgroup which had high L3 <-> memory traffic in initial + * phases -> mba_sc kicks in and reduced bandwidth percentage values -> but + * after some time rdtgroup has mostly L2 <-> L3 traffic. + * + * In this case we may restrict the rdtgroup's L2 <-> L3 traffic as its + * throttle MSRs already have low percentage values. To avoid + * unnecessarily restricting such rdtgroups, we also increase the bandwidth. + */ +static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm) +{ + u32 closid, rmid, cur_msr_val, new_msr_val; + struct mbm_state *pmbm_data, *cmbm_data; + struct rdt_resource *r_mba; + struct rdt_domain *dom_mba; + u32 cur_bw, user_bw, idx; + struct list_head *head; + struct rdtgroup *entry; + + if (!resctrl_arch_is_mbm_local_enabled()) + return; + + r_mba = resctrl_arch_get_resource(RDT_RESOURCE_MBA); + + closid = rgrp->closid; + rmid = rgrp->mon.rmid; + idx = resctrl_arch_rmid_idx_encode(closid, rmid); + pmbm_data = &dom_mbm->mbm_local[idx]; + + dom_mba = resctrl_get_domain_from_cpu(smp_processor_id(), r_mba); + if (!dom_mba) { + pr_warn_once("Failure to get domain for MBA update\n"); + return; + } + + cur_bw = pmbm_data->prev_bw; + user_bw = dom_mba->mbps_val[closid]; + + /* MBA resource doesn't support CDP */ + cur_msr_val = resctrl_arch_get_config(r_mba, dom_mba, closid, CDP_NONE); + + /* + * For Ctrl groups read data from child monitor groups. + */ + head = &rgrp->mon.crdtgrp_list; + list_for_each_entry(entry, head, mon.crdtgrp_list) { + cmbm_data = &dom_mbm->mbm_local[entry->mon.rmid]; + cur_bw += cmbm_data->prev_bw; + } + + /* + * Scale up/down the bandwidth linearly for the ctrl group. The + * bandwidth step is the bandwidth granularity specified by the + * hardware. + * Always increase throttling if current bandwidth is above the + * target set by user. + * But avoid thrashing up and down on every poll by checking + * whether a decrease in throttling is likely to push the group + * back over target. E.g. if currently throttling to 30% of bandwidth + * on a system with 10% granularity steps, check whether moving to + * 40% would go past the limit by multiplying current bandwidth by + * "(30 + 10) / 30". + */ + if (cur_msr_val > r_mba->membw.min_bw && user_bw < cur_bw) { + new_msr_val = cur_msr_val - r_mba->membw.bw_gran; + } else if (cur_msr_val < MAX_MBA_BW && + (user_bw > (cur_bw * (cur_msr_val + r_mba->membw.min_bw) / cur_msr_val))) { + new_msr_val = cur_msr_val + r_mba->membw.bw_gran; + } else { + return; + } + + resctrl_arch_update_one(r_mba, dom_mba, closid, CDP_NONE, new_msr_val); +} + +static void mbm_update(struct rdt_resource *r, struct rdt_domain *d, + u32 closid, u32 rmid) +{ + struct rmid_read rr; + + rr.first = false; + rr.r = r; + rr.d = d; + + /* + * This is protected from concurrent reads from user + * as both the user and we hold the global mutex. + */ + if (resctrl_arch_is_mbm_total_enabled()) { + rr.evtid = QOS_L3_MBM_TOTAL_EVENT_ID; + rr.val = 0; + rr.arch_mon_ctx = resctrl_arch_mon_ctx_alloc(rr.r, rr.evtid); + if (IS_ERR(rr.arch_mon_ctx)) { + pr_warn_ratelimited("Failed to allocate monitor context: %ld", + PTR_ERR(rr.arch_mon_ctx)); + return; + } + + __mon_event_count(closid, rmid, &rr); + + resctrl_arch_mon_ctx_free(rr.r, rr.evtid, rr.arch_mon_ctx); + } + if (resctrl_arch_is_mbm_local_enabled()) { + rr.evtid = QOS_L3_MBM_LOCAL_EVENT_ID; + rr.val = 0; + rr.arch_mon_ctx = resctrl_arch_mon_ctx_alloc(rr.r, rr.evtid); + if (IS_ERR(rr.arch_mon_ctx)) { + pr_warn_ratelimited("Failed to allocate monitor context: %ld", + PTR_ERR(rr.arch_mon_ctx)); + return; + } + + __mon_event_count(closid, rmid, &rr); + + /* + * Call the MBA software controller only for the + * control groups and when user has enabled + * the software controller explicitly. + */ + if (is_mba_sc(NULL)) + mbm_bw_count(closid, rmid, &rr); + + resctrl_arch_mon_ctx_free(rr.r, rr.evtid, rr.arch_mon_ctx); + } +} + +/* + * Handler to scan the limbo list and move the RMIDs + * to free list whose occupancy < threshold_occupancy. + */ +void cqm_handle_limbo(struct work_struct *work) +{ + unsigned long delay = msecs_to_jiffies(CQM_LIMBOCHECK_INTERVAL); + struct rdt_domain *d; + + cpus_read_lock(); + mutex_lock(&rdtgroup_mutex); + + d = container_of(work, struct rdt_domain, cqm_limbo.work); + + __check_limbo(d, false); + + if (has_busy_rmid(d)) { + d->cqm_work_cpu = cpumask_any_housekeeping(&d->cpu_mask, + RESCTRL_PICK_ANY_CPU); + schedule_delayed_work_on(d->cqm_work_cpu, &d->cqm_limbo, + delay); + } + + mutex_unlock(&rdtgroup_mutex); + cpus_read_unlock(); +} + +/** + * cqm_setup_limbo_handler() - Schedule the limbo handler to run for this + * domain. + * @dom: The domain the limbo handler should run for. + * @delay_ms: How far in the future the handler should run. + * @exclude_cpu: Which CPU the handler should not run on, + * RESCTRL_PICK_ANY_CPU to pick any CPU. + */ +void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms, + int exclude_cpu) +{ + unsigned long delay = msecs_to_jiffies(delay_ms); + int cpu; + + cpu = cpumask_any_housekeeping(&dom->cpu_mask, exclude_cpu); + dom->cqm_work_cpu = cpu; + + if (cpu < nr_cpu_ids) + schedule_delayed_work_on(cpu, &dom->cqm_limbo, delay); +} + +void mbm_handle_overflow(struct work_struct *work) +{ + unsigned long delay = msecs_to_jiffies(MBM_OVERFLOW_INTERVAL); + struct rdtgroup *prgrp, *crgrp; + struct list_head *head; + struct rdt_resource *r; + struct rdt_domain *d; + + cpus_read_lock(); + mutex_lock(&rdtgroup_mutex); + + /* + * If the filesystem has been unmounted this work no longer needs to + * run. + */ + if (!resctrl_mounted || !resctrl_arch_mon_capable()) + goto out_unlock; + + r = resctrl_arch_get_resource(RDT_RESOURCE_L3); + d = container_of(work, struct rdt_domain, mbm_over.work); + + list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) { + mbm_update(r, d, prgrp->closid, prgrp->mon.rmid); + + head = &prgrp->mon.crdtgrp_list; + list_for_each_entry(crgrp, head, mon.crdtgrp_list) + mbm_update(r, d, crgrp->closid, crgrp->mon.rmid); + + if (is_mba_sc(NULL)) + update_mba_bw(prgrp, d); + } + + /* + * Re-check for housekeeping CPUs. This allows the overflow handler to + * move off a nohz_full CPU quickly. + */ + d->mbm_work_cpu = cpumask_any_housekeeping(&d->cpu_mask, + RESCTRL_PICK_ANY_CPU); + schedule_delayed_work_on(d->mbm_work_cpu, &d->mbm_over, delay); + +out_unlock: + mutex_unlock(&rdtgroup_mutex); + cpus_read_unlock(); +} + +/** + * mbm_setup_overflow_handler() - Schedule the overflow handler to run for this + * domain. + * @dom: The domain the overflow handler should run for. + * @delay_ms: How far in the future the handler should run. + * @exclude_cpu: Which CPU the handler should not run on, + * RESCTRL_PICK_ANY_CPU to pick any CPU. + */ +void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms, + int exclude_cpu) +{ + unsigned long delay = msecs_to_jiffies(delay_ms); + int cpu; + + /* + * When a domain comes online there is no guarantee the filesystem is + * mounted. If not, there is no need to catch counter overflow. + */ + if (!resctrl_mounted || !resctrl_arch_mon_capable()) + return; + cpu = cpumask_any_housekeeping(&dom->cpu_mask, exclude_cpu); + dom->mbm_work_cpu = cpu; + + if (cpu < nr_cpu_ids) + schedule_delayed_work_on(cpu, &dom->mbm_over, delay); +} + +static int dom_data_init(struct rdt_resource *r) +{ + u32 idx_limit = resctrl_arch_system_num_rmid_idx(); + u32 num_closid = resctrl_arch_get_num_closid(r); + struct rmid_entry *entry = NULL; + int err = 0, i; + u32 idx; + + mutex_lock(&rdtgroup_mutex); + if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) { + u32 *tmp; + + /* + * If the architecture hasn't provided a sanitised value here, + * this may result in larger arrays than necessary. Resctrl will + * use a smaller system wide value based on the resources in + * use. + */ + tmp = kcalloc(num_closid, sizeof(*tmp), GFP_KERNEL); + if (!tmp) { + err = -ENOMEM; + goto out_unlock; + } + + closid_num_dirty_rmid = tmp; + } + + rmid_ptrs = kcalloc(idx_limit, sizeof(struct rmid_entry), GFP_KERNEL); + if (!rmid_ptrs) { + if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) { + kfree(closid_num_dirty_rmid); + closid_num_dirty_rmid = NULL; + } + err = -ENOMEM; + goto out_unlock; + } + + for (i = 0; i < idx_limit; i++) { + entry = &rmid_ptrs[i]; + INIT_LIST_HEAD(&entry->list); + + resctrl_arch_rmid_idx_decode(i, &entry->closid, &entry->rmid); + list_add_tail(&entry->list, &rmid_free_lru); + } + + /* + * RESCTRL_RESERVED_CLOSID and RESCTRL_RESERVED_RMID are special and + * are always allocated. These are used for the rdtgroup_default + * control group, which will be setup later in rdtgroup_init(). + */ + idx = resctrl_arch_rmid_idx_encode(RESCTRL_RESERVED_CLOSID, + RESCTRL_RESERVED_RMID); + entry = __rmid_entry(idx); + list_del(&entry->list); + +out_unlock: + mutex_unlock(&rdtgroup_mutex); + + return err; +} + +static void dom_data_exit(struct rdt_resource *r) +{ + if (!r->mon_capable) + return; + + mutex_lock(&rdtgroup_mutex); + if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) { + kfree(closid_num_dirty_rmid); + closid_num_dirty_rmid = NULL; + } + + kfree(rmid_ptrs); + rmid_ptrs = NULL; + + mutex_unlock(&rdtgroup_mutex); +} + +static struct mon_evt llc_occupancy_event = { + .name = "llc_occupancy", + .evtid = QOS_L3_OCCUP_EVENT_ID, +}; + +static struct mon_evt mbm_total_event = { + .name = "mbm_total_bytes", + .evtid = QOS_L3_MBM_TOTAL_EVENT_ID, +}; + +static struct mon_evt mbm_local_event = { + .name = "mbm_local_bytes", + .evtid = QOS_L3_MBM_LOCAL_EVENT_ID, +}; + +/* + * Initialize the event list for the resource. + * + * Note that MBM events are also part of RDT_RESOURCE_L3 resource + * because as per the SDM the total and local memory bandwidth + * are enumerated as part of L3 monitoring. + */ +static void l3_mon_evt_init(struct rdt_resource *r) +{ + INIT_LIST_HEAD(&r->evt_list); + + if (resctrl_arch_is_llc_occupancy_enabled()) + list_add_tail(&llc_occupancy_event.list, &r->evt_list); + if (resctrl_arch_is_mbm_total_enabled()) + list_add_tail(&mbm_total_event.list, &r->evt_list); + if (resctrl_arch_is_mbm_local_enabled()) + list_add_tail(&mbm_local_event.list, &r->evt_list); +} + +int resctrl_mon_resource_init(void) +{ + struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); + int ret; + + if (!r->mon_capable) + return 0; + + ret = dom_data_init(r); + if (ret) + return ret; + + l3_mon_evt_init(r); + + if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_TOTAL_EVENT_ID)) { + mbm_total_event.configurable = true; + mbm_config_rftype_init("mbm_total_bytes_config"); + } + if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_LOCAL_EVENT_ID)) { + mbm_local_event.configurable = true; + mbm_config_rftype_init("mbm_local_bytes_config"); + } + + return 0; +} + int __init rdt_get_mon_l3_config(struct rdt_resource *r) { unsigned int mbm_offset = boot_cpu_data.x86_cache_mbm_width_offset; @@ -262,6 +1076,13 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r) return 0; } +void resctrl_mon_resource_exit(void) +{ + struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); + + dom_data_exit(r); +} + void __init intel_rdt_mbm_apply_quirk(void) { int cf_index; diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c index ba1596afee10..ba51ab1f70e6 100644 --- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c +++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c @@ -39,6 +39,28 @@ */ static u64 prefetch_disable_bits; +/* + * Major number assigned to and shared by all devices exposing + * pseudo-locked regions. + */ +static unsigned int pseudo_lock_major; +static unsigned long pseudo_lock_minor_avail = GENMASK(MINORBITS, 0); + +static char *pseudo_lock_devnode(const struct device *dev, umode_t *mode) +{ + const struct rdtgroup *rdtgrp; + + rdtgrp = dev_get_drvdata(dev); + if (mode) + *mode = 0600; + return kasprintf(GFP_KERNEL, "pseudo_lock/%s", rdtgrp->kn->name); +} + +static const struct class pseudo_lock_class = { + .name = "pseudo_lock", + .devnode = pseudo_lock_devnode, +}; + /** * resctrl_arch_get_prefetch_disable_bits - prefetch disable bits of supported * platforms @@ -99,6 +121,299 @@ u64 resctrl_arch_get_prefetch_disable_bits(void) return prefetch_disable_bits; } +/** + * pseudo_lock_minor_get - Obtain available minor number + * @minor: Pointer to where new minor number will be stored + * + * A bitmask is used to track available minor numbers. Here the next free + * minor number is marked as unavailable and returned. + * + * Return: 0 on success, <0 on failure. + */ +static int pseudo_lock_minor_get(unsigned int *minor) +{ + unsigned long first_bit; + + first_bit = find_first_bit(&pseudo_lock_minor_avail, MINORBITS); + + if (first_bit == MINORBITS) + return -ENOSPC; + + __clear_bit(first_bit, &pseudo_lock_minor_avail); + *minor = first_bit; + + return 0; +} + +/** + * pseudo_lock_minor_release - Return minor number to available + * @minor: The minor number made available + */ +static void pseudo_lock_minor_release(unsigned int minor) +{ + __set_bit(minor, &pseudo_lock_minor_avail); +} + +/** + * region_find_by_minor - Locate a pseudo-lock region by inode minor number + * @minor: The minor number of the device representing pseudo-locked region + * + * When the character device is accessed we need to determine which + * pseudo-locked region it belongs to. This is done by matching the minor + * number of the device to the pseudo-locked region it belongs. + * + * Minor numbers are assigned at the time a pseudo-locked region is associated + * with a cache instance. + * + * Return: On success return pointer to resource group owning the pseudo-locked + * region, NULL on failure. + */ +static struct rdtgroup *region_find_by_minor(unsigned int minor) +{ + struct rdtgroup *rdtgrp, *rdtgrp_match = NULL; + + list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) { + if (rdtgrp->plr && rdtgrp->plr->minor == minor) { + rdtgrp_match = rdtgrp; + break; + } + } + return rdtgrp_match; +} + +/** + * struct pseudo_lock_pm_req - A power management QoS request list entry + * @list: Entry within the @pm_reqs list for a pseudo-locked region + * @req: PM QoS request + */ +struct pseudo_lock_pm_req { + struct list_head list; + struct dev_pm_qos_request req; +}; + +static void pseudo_lock_cstates_relax(struct pseudo_lock_region *plr) +{ + struct pseudo_lock_pm_req *pm_req, *next; + + list_for_each_entry_safe(pm_req, next, &plr->pm_reqs, list) { + dev_pm_qos_remove_request(&pm_req->req); + list_del(&pm_req->list); + kfree(pm_req); + } +} + +/** + * pseudo_lock_cstates_constrain - Restrict cores from entering C6 + * @plr: Pseudo-locked region + * + * To prevent the cache from being affected by power management entering + * C6 has to be avoided. This is accomplished by requesting a latency + * requirement lower than lowest C6 exit latency of all supported + * platforms as found in the cpuidle state tables in the intel_idle driver. + * At this time it is possible to do so with a single latency requirement + * for all supported platforms. + * + * Since Goldmont is supported, which is affected by X86_BUG_MONITOR, + * the ACPI latencies need to be considered while keeping in mind that C2 + * may be set to map to deeper sleep states. In this case the latency + * requirement needs to prevent entering C2 also. + * + * Return: 0 on success, <0 on failure + */ +static int pseudo_lock_cstates_constrain(struct pseudo_lock_region *plr) +{ + struct pseudo_lock_pm_req *pm_req; + int cpu; + int ret; + + for_each_cpu(cpu, &plr->d->cpu_mask) { + pm_req = kzalloc(sizeof(*pm_req), GFP_KERNEL); + if (!pm_req) { + rdt_last_cmd_puts("Failure to allocate memory for PM QoS\n"); + ret = -ENOMEM; + goto out_err; + } + ret = dev_pm_qos_add_request(get_cpu_device(cpu), + &pm_req->req, + DEV_PM_QOS_RESUME_LATENCY, + 30); + if (ret < 0) { + rdt_last_cmd_printf("Failed to add latency req CPU%d\n", + cpu); + kfree(pm_req); + ret = -1; + goto out_err; + } + list_add(&pm_req->list, &plr->pm_reqs); + } + + return 0; + +out_err: + pseudo_lock_cstates_relax(plr); + return ret; +} + +/** + * pseudo_lock_region_clear - Reset pseudo-lock region data + * @plr: pseudo-lock region + * + * All content of the pseudo-locked region is reset - any memory allocated + * freed. + * + * Return: void + */ +static void pseudo_lock_region_clear(struct pseudo_lock_region *plr) +{ + plr->size = 0; + plr->line_size = 0; + kfree(plr->kmem); + plr->kmem = NULL; + plr->s = NULL; + if (plr->d) + plr->d->plr = NULL; + plr->d = NULL; + plr->cbm = 0; + plr->debugfs_dir = NULL; +} + +/** + * pseudo_lock_region_init - Initialize pseudo-lock region information + * @plr: pseudo-lock region + * + * Called after user provided a schemata to be pseudo-locked. From the + * schemata the &struct pseudo_lock_region is on entry already initialized + * with the resource, domain, and capacity bitmask. Here the information + * required for pseudo-locking is deduced from this data and &struct + * pseudo_lock_region initialized further. This information includes: + * - size in bytes of the region to be pseudo-locked + * - cache line size to know the stride with which data needs to be accessed + * to be pseudo-locked + * - a cpu associated with the cache instance on which the pseudo-locking + * flow can be executed + * + * Return: 0 on success, <0 on failure. Descriptive error will be written + * to last_cmd_status buffer. + */ +static int pseudo_lock_region_init(struct pseudo_lock_region *plr) +{ + struct cpu_cacheinfo *ci; + int ret; + int i; + + /* Pick the first cpu we find that is associated with the cache. */ + plr->cpu = cpumask_first(&plr->d->cpu_mask); + + if (!cpu_online(plr->cpu)) { + rdt_last_cmd_printf("CPU %u associated with cache not online\n", + plr->cpu); + ret = -ENODEV; + goto out_region; + } + + ci = get_cpu_cacheinfo(plr->cpu); + + plr->size = rdtgroup_cbm_to_size(plr->s->res, plr->d, plr->cbm); + + for (i = 0; i < ci->num_leaves; i++) { + if (ci->info_list[i].level == plr->s->res->cache_level) { + plr->line_size = ci->info_list[i].coherency_line_size; + return 0; + } + } + + ret = -1; + rdt_last_cmd_puts("Unable to determine cache line size\n"); +out_region: + pseudo_lock_region_clear(plr); + return ret; +} + +/** + * pseudo_lock_init - Initialize a pseudo-lock region + * @rdtgrp: resource group to which new pseudo-locked region will belong + * + * A pseudo-locked region is associated with a resource group. When this + * association is created the pseudo-locked region is initialized. The + * details of the pseudo-locked region are not known at this time so only + * allocation is done and association established. + * + * Return: 0 on success, <0 on failure + */ +static int pseudo_lock_init(struct rdtgroup *rdtgrp) +{ + struct pseudo_lock_region *plr; + + plr = kzalloc(sizeof(*plr), GFP_KERNEL); + if (!plr) + return -ENOMEM; + + init_waitqueue_head(&plr->lock_thread_wq); + INIT_LIST_HEAD(&plr->pm_reqs); + rdtgrp->plr = plr; + return 0; +} + +/** + * pseudo_lock_region_alloc - Allocate kernel memory that will be pseudo-locked + * @plr: pseudo-lock region + * + * Initialize the details required to set up the pseudo-locked region and + * allocate the contiguous memory that will be pseudo-locked to the cache. + * + * Return: 0 on success, <0 on failure. Descriptive error will be written + * to last_cmd_status buffer. + */ +static int pseudo_lock_region_alloc(struct pseudo_lock_region *plr) +{ + int ret; + + ret = pseudo_lock_region_init(plr); + if (ret < 0) + return ret; + + /* + * We do not yet support contiguous regions larger than + * KMALLOC_MAX_SIZE. + */ + if (plr->size > KMALLOC_MAX_SIZE) { + rdt_last_cmd_puts("Requested region exceeds maximum size\n"); + ret = -E2BIG; + goto out_region; + } + + plr->kmem = kzalloc(plr->size, GFP_KERNEL); + if (!plr->kmem) { + rdt_last_cmd_puts("Unable to allocate memory\n"); + ret = -ENOMEM; + goto out_region; + } + + ret = 0; + goto out; +out_region: + pseudo_lock_region_clear(plr); +out: + return ret; +} + +/** + * pseudo_lock_free - Free a pseudo-locked region + * @rdtgrp: resource group to which pseudo-locked region belonged + * + * The pseudo-locked region's resources have already been released, or not + * yet created at this point. Now it can be freed and disassociated from the + * resource group. + * + * Return: void + */ +static void pseudo_lock_free(struct rdtgroup *rdtgrp) +{ + pseudo_lock_region_clear(rdtgrp->plr); + kfree(rdtgrp->plr); + rdtgrp->plr = NULL; +} + /** * resctrl_arch_pseudo_lock_fn - Load kernel memory into cache * @_plr: the pseudo-lock region descriptor @@ -228,6 +543,345 @@ int resctrl_arch_pseudo_lock_fn(void *_plr) return 0; } +/** + * rdtgroup_monitor_in_progress - Test if monitoring in progress + * @rdtgrp: resource group being queried + * + * Return: 1 if monitor groups have been created for this resource + * group, 0 otherwise. + */ +static int rdtgroup_monitor_in_progress(struct rdtgroup *rdtgrp) +{ + return !list_empty(&rdtgrp->mon.crdtgrp_list); +} + +/** + * rdtgroup_locksetup_user_restrict - Restrict user access to group + * @rdtgrp: resource group needing access restricted + * + * A resource group used for cache pseudo-locking cannot have cpus or tasks + * assigned to it. This is communicated to the user by restricting access + * to all the files that can be used to make such changes. + * + * Permissions restored with rdtgroup_locksetup_user_restore() + * + * Return: 0 on success, <0 on failure. If a failure occurs during the + * restriction of access an attempt will be made to restore permissions but + * the state of the mode of these files will be uncertain when a failure + * occurs. + */ +static int rdtgroup_locksetup_user_restrict(struct rdtgroup *rdtgrp) +{ + int ret; + + ret = rdtgroup_kn_mode_restrict(rdtgrp, "tasks"); + if (ret) + return ret; + + ret = rdtgroup_kn_mode_restrict(rdtgrp, "cpus"); + if (ret) + goto err_tasks; + + ret = rdtgroup_kn_mode_restrict(rdtgrp, "cpus_list"); + if (ret) + goto err_cpus; + + if (resctrl_arch_mon_capable()) { + ret = rdtgroup_kn_mode_restrict(rdtgrp, "mon_groups"); + if (ret) + goto err_cpus_list; + } + + ret = 0; + goto out; + +err_cpus_list: + rdtgroup_kn_mode_restore(rdtgrp, "cpus_list", 0777); +err_cpus: + rdtgroup_kn_mode_restore(rdtgrp, "cpus", 0777); +err_tasks: + rdtgroup_kn_mode_restore(rdtgrp, "tasks", 0777); +out: + return ret; +} + +/** + * rdtgroup_locksetup_user_restore - Restore user access to group + * @rdtgrp: resource group needing access restored + * + * Restore all file access previously removed using + * rdtgroup_locksetup_user_restrict() + * + * Return: 0 on success, <0 on failure. If a failure occurs during the + * restoration of access an attempt will be made to restrict permissions + * again but the state of the mode of these files will be uncertain when + * a failure occurs. + */ +static int rdtgroup_locksetup_user_restore(struct rdtgroup *rdtgrp) +{ + int ret; + + ret = rdtgroup_kn_mode_restore(rdtgrp, "tasks", 0777); + if (ret) + return ret; + + ret = rdtgroup_kn_mode_restore(rdtgrp, "cpus", 0777); + if (ret) + goto err_tasks; + + ret = rdtgroup_kn_mode_restore(rdtgrp, "cpus_list", 0777); + if (ret) + goto err_cpus; + + if (resctrl_arch_mon_capable()) { + ret = rdtgroup_kn_mode_restore(rdtgrp, "mon_groups", 0777); + if (ret) + goto err_cpus_list; + } + + ret = 0; + goto out; + +err_cpus_list: + rdtgroup_kn_mode_restrict(rdtgrp, "cpus_list"); +err_cpus: + rdtgroup_kn_mode_restrict(rdtgrp, "cpus"); +err_tasks: + rdtgroup_kn_mode_restrict(rdtgrp, "tasks"); +out: + return ret; +} + +/** + * rdtgroup_locksetup_enter - Resource group enters locksetup mode + * @rdtgrp: resource group requested to enter locksetup mode + * + * A resource group enters locksetup mode to reflect that it would be used + * to represent a pseudo-locked region and is in the process of being set + * up to do so. A resource group used for a pseudo-locked region would + * lose the closid associated with it so we cannot allow it to have any + * tasks or cpus assigned nor permit tasks or cpus to be assigned in the + * future. Monitoring of a pseudo-locked region is not allowed either. + * + * The above and more restrictions on a pseudo-locked region are checked + * for and enforced before the resource group enters the locksetup mode. + * + * Returns: 0 if the resource group successfully entered locksetup mode, <0 + * on failure. On failure the last_cmd_status buffer is updated with text to + * communicate details of failure to the user. + */ +int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp) +{ + int ret; + + /* + * The default resource group can neither be removed nor lose the + * default closid associated with it. + */ + if (rdtgrp == &rdtgroup_default) { + rdt_last_cmd_puts("Cannot pseudo-lock default group\n"); + return -EINVAL; + } + + /* + * Cache Pseudo-locking not supported when CDP is enabled. + * + * Some things to consider if you would like to enable this + * support (using L3 CDP as example): + * - When CDP is enabled two separate resources are exposed, + * L3DATA and L3CODE, but they are actually on the same cache. + * The implication for pseudo-locking is that if a + * pseudo-locked region is created on a domain of one + * resource (eg. L3CODE), then a pseudo-locked region cannot + * be created on that same domain of the other resource + * (eg. L3DATA). This is because the creation of a + * pseudo-locked region involves a call to wbinvd that will + * affect all cache allocations on particular domain. + * - Considering the previous, it may be possible to only + * expose one of the CDP resources to pseudo-locking and + * hide the other. For example, we could consider to only + * expose L3DATA and since the L3 cache is unified it is + * still possible to place instructions there are execute it. + * - If only one region is exposed to pseudo-locking we should + * still keep in mind that availability of a portion of cache + * for pseudo-locking should take into account both resources. + * Similarly, if a pseudo-locked region is created in one + * resource, the portion of cache used by it should be made + * unavailable to all future allocations from both resources. + */ + if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3) || + resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2)) { + rdt_last_cmd_puts("CDP enabled\n"); + return -EINVAL; + } + + /* + * Not knowing the bits to disable prefetching implies that this + * platform does not support Cache Pseudo-Locking. + */ + if (resctrl_arch_get_prefetch_disable_bits() == 0) { + rdt_last_cmd_puts("Pseudo-locking not supported\n"); + return -EINVAL; + } + + if (rdtgroup_monitor_in_progress(rdtgrp)) { + rdt_last_cmd_puts("Monitoring in progress\n"); + return -EINVAL; + } + + if (rdtgroup_tasks_assigned(rdtgrp)) { + rdt_last_cmd_puts("Tasks assigned to resource group\n"); + return -EINVAL; + } + + if (!cpumask_empty(&rdtgrp->cpu_mask)) { + rdt_last_cmd_puts("CPUs assigned to resource group\n"); + return -EINVAL; + } + + if (rdtgroup_locksetup_user_restrict(rdtgrp)) { + rdt_last_cmd_puts("Unable to modify resctrl permissions\n"); + return -EIO; + } + + ret = pseudo_lock_init(rdtgrp); + if (ret) { + rdt_last_cmd_puts("Unable to init pseudo-lock region\n"); + goto out_release; + } + + /* + * If this system is capable of monitoring a rmid would have been + * allocated when the control group was created. This is not needed + * anymore when this group would be used for pseudo-locking. This + * is safe to call on platforms not capable of monitoring. + */ + free_rmid(rdtgrp->closid, rdtgrp->mon.rmid); + + ret = 0; + goto out; + +out_release: + rdtgroup_locksetup_user_restore(rdtgrp); +out: + return ret; +} + +/** + * rdtgroup_locksetup_exit - resource group exist locksetup mode + * @rdtgrp: resource group + * + * When a resource group exits locksetup mode the earlier restrictions are + * lifted. + * + * Return: 0 on success, <0 on failure + */ +int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp) +{ + int ret; + + if (resctrl_arch_mon_capable()) { + ret = alloc_rmid(rdtgrp->closid); + if (ret < 0) { + rdt_last_cmd_puts("Out of RMIDs\n"); + return ret; + } + rdtgrp->mon.rmid = ret; + } + + ret = rdtgroup_locksetup_user_restore(rdtgrp); + if (ret) { + free_rmid(rdtgrp->closid, rdtgrp->mon.rmid); + return ret; + } + + pseudo_lock_free(rdtgrp); + return 0; +} + +/** + * rdtgroup_cbm_overlaps_pseudo_locked - Test if CBM or portion is pseudo-locked + * @d: RDT domain + * @cbm: CBM to test + * + * @d represents a cache instance and @cbm a capacity bitmask that is + * considered for it. Determine if @cbm overlaps with any existing + * pseudo-locked region on @d. + * + * @cbm is unsigned long, even if only 32 bits are used, to make the + * bitmap functions work correctly. + * + * Return: true if @cbm overlaps with pseudo-locked region on @d, false + * otherwise. + */ +bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm) +{ + unsigned int cbm_len; + unsigned long cbm_b; + + if (d->plr) { + cbm_len = d->plr->s->res->cache.cbm_len; + cbm_b = d->plr->cbm; + if (bitmap_intersects(&cbm, &cbm_b, cbm_len)) + return true; + } + return false; +} + +/** + * rdtgroup_pseudo_locked_in_hierarchy - Pseudo-locked region in cache hierarchy + * @d: RDT domain under test + * + * The setup of a pseudo-locked region affects all cache instances within + * the hierarchy of the region. It is thus essential to know if any + * pseudo-locked regions exist within a cache hierarchy to prevent any + * attempts to create new pseudo-locked regions in the same hierarchy. + * + * Return: true if a pseudo-locked region exists in the hierarchy of @d or + * if it is not possible to test due to memory allocation issue, + * false otherwise. + */ +bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d) +{ + cpumask_var_t cpu_with_psl; + enum resctrl_res_level i; + struct rdt_resource *r; + struct rdt_domain *d_i; + bool ret = false; + + /* Walking r->domains, ensure it can't race with cpuhp */ + lockdep_assert_cpus_held(); + + if (!zalloc_cpumask_var(&cpu_with_psl, GFP_KERNEL)) + return true; + + /* + * First determine which cpus have pseudo-locked regions + * associated with them. + */ + for (i = 0; i < RDT_NUM_RESOURCES; i++) { + r = resctrl_arch_get_resource(i); + if (!r->alloc_capable) + continue; + + list_for_each_entry(d_i, &r->domains, list) { + if (d_i->plr) + cpumask_or(cpu_with_psl, cpu_with_psl, + &d_i->cpu_mask); + } + } + + /* + * Next test if new pseudo-locked region would intersect with + * existing region. + */ + if (cpumask_intersects(&d->cpu_mask, cpu_with_psl)) + ret = true; + + free_cpumask_var(cpu_with_psl); + return ret; +} + /** * resctrl_arch_measure_cycles_lat_fn - Measure cycle latency to read * pseudo-locked memory @@ -520,3 +1174,442 @@ int resctrl_arch_measure_l3_residency(void *_plr) wake_up_interruptible(&plr->lock_thread_wq); return 0; } + +/** + * pseudo_lock_measure_cycles - Trigger latency measure to pseudo-locked region + * @rdtgrp: Resource group to which the pseudo-locked region belongs. + * @sel: Selector of which measurement to perform on a pseudo-locked region. + * + * The measurement of latency to access a pseudo-locked region should be + * done from a cpu that is associated with that pseudo-locked region. + * Determine which cpu is associated with this region and start a thread on + * that cpu to perform the measurement, wait for that thread to complete. + * + * Return: 0 on success, <0 on failure + */ +static int pseudo_lock_measure_cycles(struct rdtgroup *rdtgrp, int sel) +{ + struct pseudo_lock_region *plr = rdtgrp->plr; + struct task_struct *thread; + unsigned int cpu; + int ret = -1; + + cpus_read_lock(); + mutex_lock(&rdtgroup_mutex); + + if (rdtgrp->flags & RDT_DELETED) { + ret = -ENODEV; + goto out; + } + + if (!plr->d) { + ret = -ENODEV; + goto out; + } + + plr->thread_done = 0; + cpu = cpumask_first(&plr->d->cpu_mask); + if (!cpu_online(cpu)) { + ret = -ENODEV; + goto out; + } + + plr->cpu = cpu; + + if (sel == 1) + thread = kthread_create_on_node(resctrl_arch_measure_cycles_lat_fn, + plr, cpu_to_node(cpu), + "pseudo_lock_measure/%u", + cpu); + else if (sel == 2) + thread = kthread_create_on_node(resctrl_arch_measure_l2_residency, + plr, cpu_to_node(cpu), + "pseudo_lock_measure/%u", + cpu); + else if (sel == 3) + thread = kthread_create_on_node(resctrl_arch_measure_l3_residency, + plr, cpu_to_node(cpu), + "pseudo_lock_measure/%u", + cpu); + else + goto out; + + if (IS_ERR(thread)) { + ret = PTR_ERR(thread); + goto out; + } + kthread_bind(thread, cpu); + wake_up_process(thread); + + ret = wait_event_interruptible(plr->lock_thread_wq, + plr->thread_done == 1); + if (ret < 0) + goto out; + + ret = 0; + +out: + mutex_unlock(&rdtgroup_mutex); + cpus_read_unlock(); + return ret; +} + +static ssize_t pseudo_lock_measure_trigger(struct file *file, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct rdtgroup *rdtgrp = file->private_data; + size_t buf_size; + char buf[32]; + int ret; + int sel; + + buf_size = min(count, (sizeof(buf) - 1)); + if (copy_from_user(buf, user_buf, buf_size)) + return -EFAULT; + + buf[buf_size] = '\0'; + ret = kstrtoint(buf, 10, &sel); + if (ret == 0) { + if (sel != 1 && sel != 2 && sel != 3) + return -EINVAL; + ret = debugfs_file_get(file->f_path.dentry); + if (ret) + return ret; + ret = pseudo_lock_measure_cycles(rdtgrp, sel); + if (ret == 0) + ret = count; + debugfs_file_put(file->f_path.dentry); + } + + return ret; +} + +static const struct file_operations pseudo_measure_fops = { + .write = pseudo_lock_measure_trigger, + .open = simple_open, + .llseek = default_llseek, +}; + +/** + * rdtgroup_pseudo_lock_create - Create a pseudo-locked region + * @rdtgrp: resource group to which pseudo-lock region belongs + * + * Called when a resource group in the pseudo-locksetup mode receives a + * valid schemata that should be pseudo-locked. Since the resource group is + * in pseudo-locksetup mode the &struct pseudo_lock_region has already been + * allocated and initialized with the essential information. If a failure + * occurs the resource group remains in the pseudo-locksetup mode with the + * &struct pseudo_lock_region associated with it, but cleared from all + * information and ready for the user to re-attempt pseudo-locking by + * writing the schemata again. + * + * Return: 0 if the pseudo-locked region was successfully pseudo-locked, <0 + * on failure. Descriptive error will be written to last_cmd_status buffer. + */ +int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp) +{ + struct pseudo_lock_region *plr = rdtgrp->plr; + struct task_struct *thread; + unsigned int new_minor; + struct device *dev; + int ret; + + ret = pseudo_lock_region_alloc(plr); + if (ret < 0) + return ret; + + ret = pseudo_lock_cstates_constrain(plr); + if (ret < 0) { + ret = -EINVAL; + goto out_region; + } + + plr->thread_done = 0; + + plr->closid = rdtgrp->closid; + thread = kthread_create_on_node(resctrl_arch_pseudo_lock_fn, plr, + cpu_to_node(plr->cpu), + "pseudo_lock/%u", plr->cpu); + if (IS_ERR(thread)) { + ret = PTR_ERR(thread); + rdt_last_cmd_printf("Locking thread returned error %d\n", ret); + goto out_cstates; + } + + kthread_bind(thread, plr->cpu); + wake_up_process(thread); + + ret = wait_event_interruptible(plr->lock_thread_wq, + plr->thread_done == 1); + if (ret < 0) { + /* + * If the thread does not get on the CPU for whatever + * reason and the process which sets up the region is + * interrupted then this will leave the thread in runnable + * state and once it gets on the CPU it will dereference + * the cleared, but not freed, plr struct resulting in an + * empty pseudo-locking loop. + */ + rdt_last_cmd_puts("Locking thread interrupted\n"); + goto out_cstates; + } + + ret = pseudo_lock_minor_get(&new_minor); + if (ret < 0) { + rdt_last_cmd_puts("Unable to obtain a new minor number\n"); + goto out_cstates; + } + + /* + * Unlock access but do not release the reference. The + * pseudo-locked region will still be here on return. + * + * The mutex has to be released temporarily to avoid a potential + * deadlock with the mm->mmap_lock which is obtained in the + * device_create() and debugfs_create_dir() callpath below as well as + * before the mmap() callback is called. + */ + mutex_unlock(&rdtgroup_mutex); + + if (!IS_ERR_OR_NULL(debugfs_resctrl)) { + plr->debugfs_dir = debugfs_create_dir(rdtgrp->kn->name, + debugfs_resctrl); + if (!IS_ERR_OR_NULL(plr->debugfs_dir)) + debugfs_create_file("pseudo_lock_measure", 0200, + plr->debugfs_dir, rdtgrp, + &pseudo_measure_fops); + } + + dev = device_create(&pseudo_lock_class, NULL, + MKDEV(pseudo_lock_major, new_minor), + rdtgrp, "%s", rdtgrp->kn->name); + + mutex_lock(&rdtgroup_mutex); + + if (IS_ERR(dev)) { + ret = PTR_ERR(dev); + rdt_last_cmd_printf("Failed to create character device: %d\n", + ret); + goto out_debugfs; + } + + /* We released the mutex - check if group was removed while we did so */ + if (rdtgrp->flags & RDT_DELETED) { + ret = -ENODEV; + goto out_device; + } + + plr->minor = new_minor; + + rdtgrp->mode = RDT_MODE_PSEUDO_LOCKED; + closid_free(rdtgrp->closid); + rdtgroup_kn_mode_restore(rdtgrp, "cpus", 0444); + rdtgroup_kn_mode_restore(rdtgrp, "cpus_list", 0444); + + ret = 0; + goto out; + +out_device: + device_destroy(&pseudo_lock_class, MKDEV(pseudo_lock_major, new_minor)); +out_debugfs: + debugfs_remove_recursive(plr->debugfs_dir); + pseudo_lock_minor_release(new_minor); +out_cstates: + pseudo_lock_cstates_relax(plr); +out_region: + pseudo_lock_region_clear(plr); +out: + return ret; +} + +/** + * rdtgroup_pseudo_lock_remove - Remove a pseudo-locked region + * @rdtgrp: resource group to which the pseudo-locked region belongs + * + * The removal of a pseudo-locked region can be initiated when the resource + * group is removed from user space via a "rmdir" from userspace or the + * unmount of the resctrl filesystem. On removal the resource group does + * not go back to pseudo-locksetup mode before it is removed, instead it is + * removed directly. There is thus asymmetry with the creation where the + * &struct pseudo_lock_region is removed here while it was not created in + * rdtgroup_pseudo_lock_create(). + * + * Return: void + */ +void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp) +{ + struct pseudo_lock_region *plr = rdtgrp->plr; + + if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { + /* + * Default group cannot be a pseudo-locked region so we can + * free closid here. + */ + closid_free(rdtgrp->closid); + goto free; + } + + pseudo_lock_cstates_relax(plr); + debugfs_remove_recursive(rdtgrp->plr->debugfs_dir); + device_destroy(&pseudo_lock_class, MKDEV(pseudo_lock_major, plr->minor)); + pseudo_lock_minor_release(plr->minor); + +free: + pseudo_lock_free(rdtgrp); +} + +static int pseudo_lock_dev_open(struct inode *inode, struct file *filp) +{ + struct rdtgroup *rdtgrp; + + mutex_lock(&rdtgroup_mutex); + + rdtgrp = region_find_by_minor(iminor(inode)); + if (!rdtgrp) { + mutex_unlock(&rdtgroup_mutex); + return -ENODEV; + } + + filp->private_data = rdtgrp; + atomic_inc(&rdtgrp->waitcount); + /* Perform a non-seekable open - llseek is not supported */ + filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE); + + mutex_unlock(&rdtgroup_mutex); + + return 0; +} + +static int pseudo_lock_dev_release(struct inode *inode, struct file *filp) +{ + struct rdtgroup *rdtgrp; + + mutex_lock(&rdtgroup_mutex); + rdtgrp = filp->private_data; + WARN_ON(!rdtgrp); + if (!rdtgrp) { + mutex_unlock(&rdtgroup_mutex); + return -ENODEV; + } + filp->private_data = NULL; + atomic_dec(&rdtgrp->waitcount); + mutex_unlock(&rdtgroup_mutex); + return 0; +} + +static int pseudo_lock_dev_mremap(struct vm_area_struct *area) +{ + /* Not supported */ + return -EINVAL; +} + +static const struct vm_operations_struct pseudo_mmap_ops = { + .mremap = pseudo_lock_dev_mremap, +}; + +static int pseudo_lock_dev_mmap(struct file *filp, struct vm_area_struct *vma) +{ + unsigned long vsize = vma->vm_end - vma->vm_start; + unsigned long off = vma->vm_pgoff << PAGE_SHIFT; + struct pseudo_lock_region *plr; + struct rdtgroup *rdtgrp; + unsigned long physical; + unsigned long psize; + + mutex_lock(&rdtgroup_mutex); + + rdtgrp = filp->private_data; + WARN_ON(!rdtgrp); + if (!rdtgrp) { + mutex_unlock(&rdtgroup_mutex); + return -ENODEV; + } + + plr = rdtgrp->plr; + + if (!plr->d) { + mutex_unlock(&rdtgroup_mutex); + return -ENODEV; + } + + /* + * Task is required to run with affinity to the cpus associated + * with the pseudo-locked region. If this is not the case the task + * may be scheduled elsewhere and invalidate entries in the + * pseudo-locked region. + */ + if (!cpumask_subset(current->cpus_ptr, &plr->d->cpu_mask)) { + mutex_unlock(&rdtgroup_mutex); + return -EINVAL; + } + + physical = __pa(plr->kmem) >> PAGE_SHIFT; + psize = plr->size - off; + + if (off > plr->size) { + mutex_unlock(&rdtgroup_mutex); + return -ENOSPC; + } + + /* + * Ensure changes are carried directly to the memory being mapped, + * do not allow copy-on-write mapping. + */ + if (!(vma->vm_flags & VM_SHARED)) { + mutex_unlock(&rdtgroup_mutex); + return -EINVAL; + } + + if (vsize > psize) { + mutex_unlock(&rdtgroup_mutex); + return -ENOSPC; + } + + memset(plr->kmem + off, 0, vsize); + + if (remap_pfn_range(vma, vma->vm_start, physical + vma->vm_pgoff, + vsize, vma->vm_page_prot)) { + mutex_unlock(&rdtgroup_mutex); + return -EAGAIN; + } + vma->vm_ops = &pseudo_mmap_ops; + mutex_unlock(&rdtgroup_mutex); + return 0; +} + +static const struct file_operations pseudo_lock_dev_fops = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .read = NULL, + .write = NULL, + .open = pseudo_lock_dev_open, + .release = pseudo_lock_dev_release, + .mmap = pseudo_lock_dev_mmap, +}; + +int rdt_pseudo_lock_init(void) +{ + int ret; + + ret = register_chrdev(0, "pseudo_lock", &pseudo_lock_dev_fops); + if (ret < 0) + return ret; + + pseudo_lock_major = ret; + + ret = class_register(&pseudo_lock_class); + if (ret) { + unregister_chrdev(pseudo_lock_major, "pseudo_lock"); + return ret; + } + + return 0; +} + +void rdt_pseudo_lock_release(void) +{ + class_unregister(&pseudo_lock_class); + unregister_chrdev(pseudo_lock_major, "pseudo_lock"); + pseudo_lock_major = 0; +} diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index fe3952514add..1425a33d201d 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -12,8 +12,22 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include +#include +#include + +#include #include #include "internal.h" @@ -22,239 +36,4219 @@ DEFINE_STATIC_KEY_FALSE(rdt_enable_key); DEFINE_STATIC_KEY_FALSE(rdt_mon_enable_key); DEFINE_STATIC_KEY_FALSE(rdt_alloc_enable_key); +/* Mutex to protect rdtgroup access. */ +DEFINE_MUTEX(rdtgroup_mutex); + +static struct kernfs_root *rdt_root; +struct rdtgroup rdtgroup_default; +LIST_HEAD(rdt_all_groups); + +/* list of entries for the schemata file */ +LIST_HEAD(resctrl_schema_all); + +/* The filesystem can only be mounted once. */ +bool resctrl_mounted; + +/* Kernel fs node for "info" directory under root */ +static struct kernfs_node *kn_info; + +/* Kernel fs node for "mon_groups" directory under root */ +static struct kernfs_node *kn_mongrp; + +/* Kernel fs node for "mon_data" directory under root */ +static struct kernfs_node *kn_mondata; + /* - * This is safe against resctrl_arch_sched_in() called from __switch_to() - * because __switch_to() is executed with interrupts disabled. A local call - * from update_closid_rmid() is protected against __switch_to() because - * preemption is disabled. + * Used to store the max resource name width and max resource data width + * to display the schemata in a tabular format */ -void resctrl_arch_sync_cpu_defaults(void *info) +int max_name_width, max_data_width; + +static struct seq_buf last_cmd_status; +static char last_cmd_status_buf[512]; + +static int rdtgroup_setup_root(struct rdt_fs_context *ctx); +static void rdtgroup_destroy_root(void); + +struct dentry *debugfs_resctrl; + +static bool resctrl_debug; + +void rdt_last_cmd_clear(void) { - struct resctrl_cpu_sync *r = info; + lockdep_assert_held(&rdtgroup_mutex); + seq_buf_clear(&last_cmd_status); +} - if (r) { - this_cpu_write(pqr_state.default_closid, r->closid); - this_cpu_write(pqr_state.default_rmid, r->rmid); +void rdt_last_cmd_puts(const char *s) +{ + lockdep_assert_held(&rdtgroup_mutex); + seq_buf_puts(&last_cmd_status, s); +} + +void rdt_last_cmd_printf(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + lockdep_assert_held(&rdtgroup_mutex); + seq_buf_vprintf(&last_cmd_status, fmt, ap); + va_end(ap); +} + +void rdt_staged_configs_clear(void) +{ + enum resctrl_res_level i; + struct rdt_resource *r; + struct rdt_domain *dom; + + lockdep_assert_held(&rdtgroup_mutex); + + for (i = 0; i < RDT_NUM_RESOURCES; i++) { + r = resctrl_arch_get_resource(i); + if (!r->alloc_capable) + continue; + + list_for_each_entry(dom, &r->domains, list) + memset(dom->staged_config, 0, sizeof(dom->staged_config)); } +} - /* - * We cannot unconditionally write the MSR because the current - * executing task might have its own closid selected. Just reuse - * the context switch code. - */ - resctrl_arch_sched_in(current); +static bool resctrl_is_mbm_enabled(void) +{ + return (resctrl_arch_is_mbm_total_enabled() || + resctrl_arch_is_mbm_local_enabled()); } -#define INVALID_CONFIG_INDEX UINT_MAX +static bool resctrl_is_mbm_event(int e) +{ + return (e >= QOS_L3_MBM_TOTAL_EVENT_ID && + e <= QOS_L3_MBM_LOCAL_EVENT_ID); +} -/** - * mon_event_config_index_get - get the hardware index for the - * configurable event - * @evtid: event id. +/* + * Trivial allocator for CLOSIDs. Since h/w only supports a small number, + * we can keep a bitmap of free CLOSIDs in a single integer. * - * Return: 0 for evtid == QOS_L3_MBM_TOTAL_EVENT_ID - * 1 for evtid == QOS_L3_MBM_LOCAL_EVENT_ID - * INVALID_CONFIG_INDEX for invalid evtid + * Using a global CLOSID across all resources has some advantages and + * some drawbacks: + * + We can simply set current's closid to assign a task to a resource + * group. + * + Context switch code can avoid extra memory references deciding which + * CLOSID to load into the PQR_ASSOC MSR + * - We give up some options in configuring resource groups across multi-socket + * systems. + * - Our choices on how to configure each resource become progressively more + * limited as the number of resources grows. */ -static inline unsigned int mon_event_config_index_get(u32 evtid) +static unsigned long closid_free_map; +static int closid_free_map_len; + +int closids_supported(void) { - switch (evtid) { - case QOS_L3_MBM_TOTAL_EVENT_ID: - return 0; - case QOS_L3_MBM_LOCAL_EVENT_ID: - return 1; - default: - /* Should never reach here */ - return INVALID_CONFIG_INDEX; - } + return closid_free_map_len; } -void resctrl_arch_mon_event_config_read(void *info) +static void closid_init(void) { - struct resctrl_mon_config_info *mon_info = info; - unsigned int index; - u64 msrval; + struct resctrl_schema *s; + u32 rdt_min_closid = 32; - index = mon_event_config_index_get(mon_info->evtid); - if (index == INVALID_CONFIG_INDEX) { - pr_warn_once("Invalid event id %d\n", mon_info->evtid); - return; - } - rdmsrl(MSR_IA32_EVT_CFG_BASE + index, msrval); + /* Compute rdt_min_closid across all resources */ + list_for_each_entry(s, &resctrl_schema_all, list) + rdt_min_closid = min(rdt_min_closid, s->num_closid); - /* Report only the valid event configuration bits */ - mon_info->mon_config = msrval & MAX_EVT_CONFIG_BITS; + closid_free_map = BIT_MASK(rdt_min_closid) - 1; + + /* RESCTRL_RESERVED_CLOSID is always reserved for the default group */ + __clear_bit(RESCTRL_RESERVED_CLOSID, &closid_free_map); + closid_free_map_len = rdt_min_closid; } -void resctrl_arch_mon_event_config_write(void *info) +static int closid_alloc(void) { - struct resctrl_mon_config_info *mon_info = info; - unsigned int index; + int cleanest_closid; + u32 closid; - index = mon_event_config_index_get(mon_info->evtid); - if (index == INVALID_CONFIG_INDEX) { - pr_warn_once("Invalid event id %d\n", mon_info->evtid); - mon_info->err = -EINVAL; - return; + lockdep_assert_held(&rdtgroup_mutex); + + if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID) && + resctrl_arch_is_llc_occupancy_enabled()) { + cleanest_closid = resctrl_find_cleanest_closid(); + if (cleanest_closid < 0) + return cleanest_closid; + closid = cleanest_closid; + } else { + closid = ffs(closid_free_map); + if (closid == 0) + return -ENOSPC; + closid--; } - wrmsr(MSR_IA32_EVT_CFG_BASE + index, mon_info->mon_config, 0); + __clear_bit(closid, &closid_free_map); - mon_info->err = 0; + return closid; } -static void l3_qos_cfg_update(void *arg) +void closid_free(int closid) { - bool *enable = arg; + lockdep_assert_held(&rdtgroup_mutex); - wrmsrl(MSR_IA32_L3_QOS_CFG, *enable ? L3_QOS_CDP_ENABLE : 0ULL); + __set_bit(closid, &closid_free_map); } -static void l2_qos_cfg_update(void *arg) +/** + * closid_allocated - test if provided closid is in use + * @closid: closid to be tested + * + * Return: true if @closid is currently associated with a resource group, + * false if @closid is free + */ +bool closid_allocated(unsigned int closid) { - bool *enable = arg; + lockdep_assert_held(&rdtgroup_mutex); - wrmsrl(MSR_IA32_L2_QOS_CFG, *enable ? L2_QOS_CDP_ENABLE : 0ULL); + return !test_bit(closid, &closid_free_map); } -static int set_cache_qos_cfg(int level, bool enable) +/** + * rdtgroup_mode_by_closid - Return mode of resource group with closid + * @closid: closid if the resource group + * + * Each resource group is associated with a @closid. Here the mode + * of a resource group can be queried by searching for it using its closid. + * + * Return: mode as &enum rdtgrp_mode of resource group with closid @closid + */ +enum rdtgrp_mode rdtgroup_mode_by_closid(int closid) { - void (*update)(void *arg); - struct rdt_resource *r_l; - cpumask_var_t cpu_mask; - struct rdt_domain *d; - int cpu; + struct rdtgroup *rdtgrp; - /* Walking r->domains, ensure it can't race with cpuhp */ - lockdep_assert_cpus_held(); + list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) { + if (rdtgrp->closid == closid) + return rdtgrp->mode; + } - if (level == RDT_RESOURCE_L3) - update = l3_qos_cfg_update; - else if (level == RDT_RESOURCE_L2) - update = l2_qos_cfg_update; - else - return -EINVAL; + return RDT_NUM_MODES; +} - if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) - return -ENOMEM; +static const char * const rdt_mode_str[] = { + [RDT_MODE_SHAREABLE] = "shareable", + [RDT_MODE_EXCLUSIVE] = "exclusive", + [RDT_MODE_PSEUDO_LOCKSETUP] = "pseudo-locksetup", + [RDT_MODE_PSEUDO_LOCKED] = "pseudo-locked", +}; - r_l = &rdt_resources_all[level].r_resctrl; - list_for_each_entry(d, &r_l->domains, list) { - if (r_l->cache.arch_has_per_cpu_cfg) - /* Pick all the CPUs in the domain instance */ - for_each_cpu(cpu, &d->cpu_mask) - cpumask_set_cpu(cpu, cpu_mask); - else - /* Pick one CPU from each domain instance to update MSR */ - cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask); +/** + * rdtgroup_mode_str - Return the string representation of mode + * @mode: the resource group mode as &enum rdtgroup_mode + * + * Return: string representation of valid mode, "unknown" otherwise + */ +static const char *rdtgroup_mode_str(enum rdtgrp_mode mode) +{ + if (mode < RDT_MODE_SHAREABLE || mode >= RDT_NUM_MODES) + return "unknown"; + + return rdt_mode_str[mode]; +} + +/* set uid and gid of rdtgroup dirs and files to that of the creator */ +static int rdtgroup_kn_set_ugid(struct kernfs_node *kn) +{ + struct iattr iattr = { .ia_valid = ATTR_UID | ATTR_GID, + .ia_uid = current_fsuid(), + .ia_gid = current_fsgid(), }; + + if (uid_eq(iattr.ia_uid, GLOBAL_ROOT_UID) && + gid_eq(iattr.ia_gid, GLOBAL_ROOT_GID)) + return 0; + + return kernfs_setattr(kn, &iattr); +} + +static int rdtgroup_add_file(struct kernfs_node *parent_kn, struct rftype *rft) +{ + struct kernfs_node *kn; + int ret; + + kn = __kernfs_create_file(parent_kn, rft->name, rft->mode, + GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, + 0, rft->kf_ops, rft, NULL, NULL); + if (IS_ERR(kn)) + return PTR_ERR(kn); + + ret = rdtgroup_kn_set_ugid(kn); + if (ret) { + kernfs_remove(kn); + return ret; } - /* Update QOS_CFG MSR on all the CPUs in cpu_mask */ - on_each_cpu_mask(cpu_mask, update, &enable, 1); + return 0; +} - free_cpumask_var(cpu_mask); +static int rdtgroup_seqfile_show(struct seq_file *m, void *arg) +{ + struct kernfs_open_file *of = m->private; + struct rftype *rft = of->kn->priv; + if (rft->seq_show) + return rft->seq_show(of, m, arg); return 0; } -/* Restore the qos cfg state when a domain comes online */ -void rdt_domain_reconfigure_cdp(struct rdt_resource *r) +static ssize_t rdtgroup_file_write(struct kernfs_open_file *of, char *buf, + size_t nbytes, loff_t off) { - struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); + struct rftype *rft = of->kn->priv; - if (!r->cdp_capable) - return; + if (rft->write) + return rft->write(of, buf, nbytes, off); - if (r->rid == RDT_RESOURCE_L2) - l2_qos_cfg_update(&hw_res->cdp_enabled); + return -EINVAL; +} - if (r->rid == RDT_RESOURCE_L3) - l3_qos_cfg_update(&hw_res->cdp_enabled); +static const struct kernfs_ops rdtgroup_kf_single_ops = { + .atomic_write_len = PAGE_SIZE, + .write = rdtgroup_file_write, + .seq_show = rdtgroup_seqfile_show, +}; + +static const struct kernfs_ops kf_mondata_ops = { + .atomic_write_len = PAGE_SIZE, + .seq_show = rdtgroup_mondata_show, +}; + +static bool is_cpu_list(struct kernfs_open_file *of) +{ + struct rftype *rft = of->kn->priv; + + return rft->flags & RFTYPE_FLAGS_CPUS_LIST; } -static int cdp_enable(int level) +static int rdtgroup_cpus_show(struct kernfs_open_file *of, + struct seq_file *s, void *v) { - struct rdt_resource *r_l = &rdt_resources_all[level].r_resctrl; - int ret; + struct rdtgroup *rdtgrp; + struct cpumask *mask; + int ret = 0; - if (!r_l->alloc_capable) - return -EINVAL; + rdtgrp = rdtgroup_kn_lock_live(of->kn); - ret = set_cache_qos_cfg(level, true); - if (!ret) - rdt_resources_all[level].cdp_enabled = true; + if (rdtgrp) { + if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) { + if (!rdtgrp->plr->d) { + rdt_last_cmd_clear(); + rdt_last_cmd_puts("Cache domain offline\n"); + ret = -ENODEV; + } else { + mask = &rdtgrp->plr->d->cpu_mask; + seq_printf(s, is_cpu_list(of) ? + "%*pbl\n" : "%*pb\n", + cpumask_pr_args(mask)); + } + } else { + seq_printf(s, is_cpu_list(of) ? "%*pbl\n" : "%*pb\n", + cpumask_pr_args(&rdtgrp->cpu_mask)); + } + } else { + ret = -ENOENT; + } + rdtgroup_kn_unlock(of->kn); return ret; } -static void cdp_disable(int level) +/* + * This is safe against resctrl_arch_sched_in() called from __switch_to() + * because __switch_to() is executed with interrupts disabled. A local call + * from update_closid_rmid() is protected against __switch_to() because + * preemption is disabled. + */ +void resctrl_arch_sync_cpu_defaults(void *info) { - struct rdt_hw_resource *r_hw = &rdt_resources_all[level]; + struct resctrl_cpu_sync *r = info; - if (r_hw->cdp_enabled) { - set_cache_qos_cfg(level, false); - r_hw->cdp_enabled = false; + if (r) { + this_cpu_write(pqr_state.default_closid, r->closid); + this_cpu_write(pqr_state.default_rmid, r->rmid); + } + + /* + * We cannot unconditionally write the MSR because the current + * executing task might have its own closid selected. Just reuse + * the context switch code. + */ + resctrl_arch_sched_in(current); +} + +/* + * Update the PGR_ASSOC MSR on all cpus in @cpu_mask, + * + * Per task closids/rmids must have been set up before calling this function. + * @r may be NULL. + */ +static void +update_closid_rmid(const struct cpumask *cpu_mask, struct rdtgroup *r) +{ + struct resctrl_cpu_sync defaults; + struct resctrl_cpu_sync *defaults_p = NULL; + + if (r) { + defaults.closid = r->closid; + defaults.rmid = r->mon.rmid; + defaults_p = &defaults; } + + on_each_cpu_mask(cpu_mask, resctrl_arch_sync_cpu_defaults, defaults_p, + 1); } -int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable) +static int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask, + cpumask_var_t tmpmask) { - struct rdt_hw_resource *hw_res = &rdt_resources_all[l]; + struct rdtgroup *prgrp = rdtgrp->mon.parent, *crgrp; + struct list_head *head; - if (!hw_res->r_resctrl.cdp_capable) + /* Check whether cpus belong to parent ctrl group */ + cpumask_andnot(tmpmask, newmask, &prgrp->cpu_mask); + if (!cpumask_empty(tmpmask)) { + rdt_last_cmd_puts("Can only add CPUs to mongroup that belong to parent\n"); return -EINVAL; + } - if (enable) - return cdp_enable(l); + /* Check whether cpus are dropped from this group */ + cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask); + if (!cpumask_empty(tmpmask)) { + /* Give any dropped cpus to parent rdtgroup */ + cpumask_or(&prgrp->cpu_mask, &prgrp->cpu_mask, tmpmask); + update_closid_rmid(tmpmask, prgrp); + } - cdp_disable(l); + /* + * If we added cpus, remove them from previous group that owned them + * and update per-cpu rmid + */ + cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask); + if (!cpumask_empty(tmpmask)) { + head = &prgrp->mon.crdtgrp_list; + list_for_each_entry(crgrp, head, mon.crdtgrp_list) { + if (crgrp == rdtgrp) + continue; + cpumask_andnot(&crgrp->cpu_mask, &crgrp->cpu_mask, + tmpmask); + } + update_closid_rmid(tmpmask, rdtgrp); + } + + /* Done pushing/pulling - update this group with new mask */ + cpumask_copy(&rdtgrp->cpu_mask, newmask); return 0; } -static int reset_all_ctrls(struct rdt_resource *r) +static void cpumask_rdtgrp_clear(struct rdtgroup *r, struct cpumask *m) { - struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); - struct rdt_hw_domain *hw_dom; - struct msr_param msr_param; - cpumask_var_t cpu_mask; - struct rdt_domain *d; - int i; + struct rdtgroup *crgrp; - /* Walking r->domains, ensure it can't race with cpuhp */ - lockdep_assert_cpus_held(); + cpumask_andnot(&r->cpu_mask, &r->cpu_mask, m); + /* update the child mon group masks as well*/ + list_for_each_entry(crgrp, &r->mon.crdtgrp_list, mon.crdtgrp_list) + cpumask_and(&crgrp->cpu_mask, &r->cpu_mask, &crgrp->cpu_mask); +} - if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) - return -ENOMEM; +static int cpus_ctrl_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask, + cpumask_var_t tmpmask, cpumask_var_t tmpmask1) +{ + struct rdtgroup *r, *crgrp; + struct list_head *head; - msr_param.res = r; - msr_param.low = 0; - msr_param.high = hw_res->num_closid; + /* Check whether cpus are dropped from this group */ + cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask); + if (!cpumask_empty(tmpmask)) { + /* Can't drop from default group */ + if (rdtgrp == &rdtgroup_default) { + rdt_last_cmd_puts("Can't drop CPUs from default group\n"); + return -EINVAL; + } + + /* Give any dropped cpus to rdtgroup_default */ + cpumask_or(&rdtgroup_default.cpu_mask, + &rdtgroup_default.cpu_mask, tmpmask); + update_closid_rmid(tmpmask, &rdtgroup_default); + } /* - * Disable resource control for this resource by setting all - * CBMs in all domains to the maximum mask value. Pick one CPU - * from each domain to update the MSRs below. + * If we added cpus, remove them from previous group and + * the prev group's child groups that owned them + * and update per-cpu closid/rmid. */ - list_for_each_entry(d, &r->domains, list) { - hw_dom = resctrl_to_arch_dom(d); - cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask); - - for (i = 0; i < hw_res->num_closid; i++) - hw_dom->ctrl_val[i] = r->default_ctrl; + cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask); + if (!cpumask_empty(tmpmask)) { + list_for_each_entry(r, &rdt_all_groups, rdtgroup_list) { + if (r == rdtgrp) + continue; + cpumask_and(tmpmask1, &r->cpu_mask, tmpmask); + if (!cpumask_empty(tmpmask1)) + cpumask_rdtgrp_clear(r, tmpmask1); + } + update_closid_rmid(tmpmask, rdtgrp); } - /* Update CBM on all the CPUs in cpu_mask */ - on_each_cpu_mask(cpu_mask, rdt_ctrl_update, &msr_param, 1); + /* Done pushing/pulling - update this group with new mask */ + cpumask_copy(&rdtgrp->cpu_mask, newmask); - free_cpumask_var(cpu_mask); + /* + * Clear child mon group masks since there is a new parent mask + * now and update the rmid for the cpus the child lost. + */ + head = &rdtgrp->mon.crdtgrp_list; + list_for_each_entry(crgrp, head, mon.crdtgrp_list) { + cpumask_and(tmpmask, &rdtgrp->cpu_mask, &crgrp->cpu_mask); + update_closid_rmid(tmpmask, rdtgrp); + cpumask_clear(&crgrp->cpu_mask); + } return 0; } -void resctrl_arch_reset_resources(void) +static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) { - struct rdt_resource *r; + cpumask_var_t tmpmask, newmask, tmpmask1; + struct rdtgroup *rdtgrp; + int ret; + + if (!buf) + return -EINVAL; + + if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL)) + return -ENOMEM; + if (!zalloc_cpumask_var(&newmask, GFP_KERNEL)) { + free_cpumask_var(tmpmask); + return -ENOMEM; + } + if (!zalloc_cpumask_var(&tmpmask1, GFP_KERNEL)) { + free_cpumask_var(tmpmask); + free_cpumask_var(newmask); + return -ENOMEM; + } + + rdtgrp = rdtgroup_kn_lock_live(of->kn); + if (!rdtgrp) { + ret = -ENOENT; + goto unlock; + } + + if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED || + rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { + ret = -EINVAL; + rdt_last_cmd_puts("Pseudo-locking in progress\n"); + goto unlock; + } + + if (is_cpu_list(of)) + ret = cpulist_parse(buf, newmask); + else + ret = cpumask_parse(buf, newmask); + + if (ret) { + rdt_last_cmd_puts("Bad CPU list/mask\n"); + goto unlock; + } + + /* check that user didn't specify any offline cpus */ + cpumask_andnot(tmpmask, newmask, cpu_online_mask); + if (!cpumask_empty(tmpmask)) { + ret = -EINVAL; + rdt_last_cmd_puts("Can only assign online CPUs\n"); + goto unlock; + } + + if (rdtgrp->type == RDTCTRL_GROUP) + ret = cpus_ctrl_write(rdtgrp, newmask, tmpmask, tmpmask1); + else if (rdtgrp->type == RDTMON_GROUP) + ret = cpus_mon_write(rdtgrp, newmask, tmpmask); + else + ret = -EINVAL; + +unlock: + rdtgroup_kn_unlock(of->kn); + free_cpumask_var(tmpmask); + free_cpumask_var(newmask); + free_cpumask_var(tmpmask1); + + return ret ?: nbytes; +} + +/** + * rdtgroup_remove - the helper to remove resource group safely + * @rdtgrp: resource group to remove + * + * On resource group creation via a mkdir, an extra kernfs_node reference is + * taken to ensure that the rdtgroup structure remains accessible for the + * rdtgroup_kn_unlock() calls where it is removed. + * + * Drop the extra reference here, then free the rdtgroup structure. + * + * Return: void + */ +static void rdtgroup_remove(struct rdtgroup *rdtgrp) +{ + kernfs_put(rdtgrp->kn); + kfree(rdtgrp); +} + +static void _update_task_closid_rmid(void *task) +{ + /* + * If the task is still current on this CPU, update PQR_ASSOC MSR. + * Otherwise, the MSR is updated when the task is scheduled in. + */ + if (task == current) + resctrl_arch_sched_in(task); +} + +static void update_task_closid_rmid(struct task_struct *t) +{ + if (IS_ENABLED(CONFIG_SMP) && task_curr(t)) + smp_call_function_single(task_cpu(t), _update_task_closid_rmid, t, 1); + else + _update_task_closid_rmid(t); +} + +static bool task_in_rdtgroup(struct task_struct *tsk, struct rdtgroup *rdtgrp) +{ + u32 closid, rmid = rdtgrp->mon.rmid; + + if (rdtgrp->type == RDTCTRL_GROUP) + closid = rdtgrp->closid; + else if (rdtgrp->type == RDTMON_GROUP) + closid = rdtgrp->mon.parent->closid; + else + return false; + + return resctrl_arch_match_closid(tsk, closid) && + resctrl_arch_match_rmid(tsk, closid, rmid); +} + +static int __rdtgroup_move_task(struct task_struct *tsk, + struct rdtgroup *rdtgrp) +{ + /* If the task is already in rdtgrp, no need to move the task. */ + if (task_in_rdtgroup(tsk, rdtgrp)) + return 0; + + /* + * Set the task's closid/rmid before the PQR_ASSOC MSR can be + * updated by them. + * + * For ctrl_mon groups, move both closid and rmid. + * For monitor groups, can move the tasks only from + * their parent CTRL group. + */ + if (rdtgrp->type == RDTMON_GROUP && + !resctrl_arch_match_closid(tsk, rdtgrp->mon.parent->closid)) { + rdt_last_cmd_puts("Can't move task to different control group\n"); + return -EINVAL; + } + + if (rdtgrp->type == RDTMON_GROUP) + resctrl_arch_set_closid_rmid(tsk, rdtgrp->mon.parent->closid, + rdtgrp->mon.rmid); + else + resctrl_arch_set_closid_rmid(tsk, rdtgrp->closid, + rdtgrp->mon.rmid); + + /* + * Ensure the task's closid and rmid are written before determining if + * the task is current that will decide if it will be interrupted. + * This pairs with the full barrier between the rq->curr update and + * resctrl_arch_sched_in() during context switch. + */ + smp_mb(); + + /* + * By now, the task's closid and rmid are set. If the task is current + * on a CPU, the PQR_ASSOC MSR needs to be updated to make the resource + * group go into effect. If the task is not current, the MSR will be + * updated when the task is scheduled in. + */ + update_task_closid_rmid(tsk); + + return 0; +} + +static bool is_closid_match(struct task_struct *t, struct rdtgroup *r) +{ + return (resctrl_arch_alloc_capable() && (r->type == RDTCTRL_GROUP) && + resctrl_arch_match_closid(t, r->closid)); +} + +static bool is_rmid_match(struct task_struct *t, struct rdtgroup *r) +{ + return (resctrl_arch_mon_capable() && (r->type == RDTMON_GROUP) && + resctrl_arch_match_rmid(t, r->mon.parent->closid, + r->mon.rmid)); +} + +/** + * rdtgroup_tasks_assigned - Test if tasks have been assigned to resource group + * @r: Resource group + * + * Return: 1 if tasks have been assigned to @r, 0 otherwise + */ +int rdtgroup_tasks_assigned(struct rdtgroup *r) +{ + struct task_struct *p, *t; + int ret = 0; + + lockdep_assert_held(&rdtgroup_mutex); + + rcu_read_lock(); + for_each_process_thread(p, t) { + if (is_closid_match(t, r) || is_rmid_match(t, r)) { + ret = 1; + break; + } + } + rcu_read_unlock(); + + return ret; +} + +static int rdtgroup_task_write_permission(struct task_struct *task, + struct kernfs_open_file *of) +{ + const struct cred *tcred = get_task_cred(task); + const struct cred *cred = current_cred(); + int ret = 0; + + /* + * Even if we're attaching all tasks in the thread group, we only + * need to check permissions on one of them. + */ + if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) && + !uid_eq(cred->euid, tcred->uid) && + !uid_eq(cred->euid, tcred->suid)) { + rdt_last_cmd_printf("No permission to move task %d\n", task->pid); + ret = -EPERM; + } + + put_cred(tcred); + return ret; +} + +static int rdtgroup_move_task(pid_t pid, struct rdtgroup *rdtgrp, + struct kernfs_open_file *of) +{ + struct task_struct *tsk; + int ret; + + rcu_read_lock(); + if (pid) { + tsk = find_task_by_vpid(pid); + if (!tsk) { + rcu_read_unlock(); + rdt_last_cmd_printf("No task %d\n", pid); + return -ESRCH; + } + } else { + tsk = current; + } + + get_task_struct(tsk); + rcu_read_unlock(); + + ret = rdtgroup_task_write_permission(tsk, of); + if (!ret) + ret = __rdtgroup_move_task(tsk, rdtgrp); + + put_task_struct(tsk); + return ret; +} + +static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) +{ + struct rdtgroup *rdtgrp; + char *pid_str; + int ret = 0; + pid_t pid; + + rdtgrp = rdtgroup_kn_lock_live(of->kn); + if (!rdtgrp) { + rdtgroup_kn_unlock(of->kn); + return -ENOENT; + } + rdt_last_cmd_clear(); + + if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED || + rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { + ret = -EINVAL; + rdt_last_cmd_puts("Pseudo-locking in progress\n"); + goto unlock; + } + + while (buf && buf[0] != '\0' && buf[0] != '\n') { + pid_str = strim(strsep(&buf, ",")); + + if (kstrtoint(pid_str, 0, &pid)) { + rdt_last_cmd_printf("Task list parsing error pid %s\n", pid_str); + ret = -EINVAL; + break; + } + + if (pid < 0) { + rdt_last_cmd_printf("Invalid pid %d\n", pid); + ret = -EINVAL; + break; + } + + ret = rdtgroup_move_task(pid, rdtgrp, of); + if (ret) { + rdt_last_cmd_printf("Error while processing task %d\n", pid); + break; + } + } + +unlock: + rdtgroup_kn_unlock(of->kn); + + return ret ?: nbytes; +} + +static void show_rdt_tasks(struct rdtgroup *r, struct seq_file *s) +{ + struct task_struct *p, *t; + pid_t pid; + + rcu_read_lock(); + for_each_process_thread(p, t) { + if (is_closid_match(t, r) || is_rmid_match(t, r)) { + pid = task_pid_vnr(t); + if (pid) + seq_printf(s, "%d\n", pid); + } + } + rcu_read_unlock(); +} + +static int rdtgroup_tasks_show(struct kernfs_open_file *of, + struct seq_file *s, void *v) +{ + struct rdtgroup *rdtgrp; + int ret = 0; + + rdtgrp = rdtgroup_kn_lock_live(of->kn); + if (rdtgrp) + show_rdt_tasks(rdtgrp, s); + else + ret = -ENOENT; + rdtgroup_kn_unlock(of->kn); + + return ret; +} + +static int rdtgroup_closid_show(struct kernfs_open_file *of, + struct seq_file *s, void *v) +{ + struct rdtgroup *rdtgrp; + int ret = 0; + + rdtgrp = rdtgroup_kn_lock_live(of->kn); + if (rdtgrp) + seq_printf(s, "%u\n", rdtgrp->closid); + else + ret = -ENOENT; + rdtgroup_kn_unlock(of->kn); + + return ret; +} + +static int rdtgroup_rmid_show(struct kernfs_open_file *of, + struct seq_file *s, void *v) +{ + struct rdtgroup *rdtgrp; + int ret = 0; + + rdtgrp = rdtgroup_kn_lock_live(of->kn); + if (rdtgrp) + seq_printf(s, "%u\n", rdtgrp->mon.rmid); + else + ret = -ENOENT; + rdtgroup_kn_unlock(of->kn); + + return ret; +} + +#ifdef CONFIG_PROC_CPU_RESCTRL + +/* + * A task can only be part of one resctrl control group and of one monitor + * group which is associated to that control group. + * + * 1) res: + * mon: + * + * resctrl is not available. + * + * 2) res:/ + * mon: + * + * Task is part of the root resctrl control group, and it is not associated + * to any monitor group. + * + * 3) res:/ + * mon:mon0 + * + * Task is part of the root resctrl control group and monitor group mon0. + * + * 4) res:group0 + * mon: + * + * Task is part of resctrl control group group0, and it is not associated + * to any monitor group. + * + * 5) res:group0 + * mon:mon1 + * + * Task is part of resctrl control group group0 and monitor group mon1. + */ +int proc_resctrl_show(struct seq_file *s, struct pid_namespace *ns, + struct pid *pid, struct task_struct *tsk) +{ + struct rdtgroup *rdtg; + int ret = 0; + + mutex_lock(&rdtgroup_mutex); + + /* Return empty if resctrl has not been mounted. */ + if (!resctrl_mounted) { + seq_puts(s, "res:\nmon:\n"); + goto unlock; + } + + list_for_each_entry(rdtg, &rdt_all_groups, rdtgroup_list) { + struct rdtgroup *crg; + + /* + * Task information is only relevant for shareable + * and exclusive groups. + */ + if (rdtg->mode != RDT_MODE_SHAREABLE && + rdtg->mode != RDT_MODE_EXCLUSIVE) + continue; + + if (!resctrl_arch_match_closid(tsk, rdtg->closid)) + continue; + + seq_printf(s, "res:%s%s\n", (rdtg == &rdtgroup_default) ? "/" : "", + rdtg->kn->name); + seq_puts(s, "mon:"); + list_for_each_entry(crg, &rdtg->mon.crdtgrp_list, + mon.crdtgrp_list) { + if (!resctrl_arch_match_rmid(tsk, crg->mon.parent->closid, + crg->mon.rmid)) + continue; + seq_printf(s, "%s", crg->kn->name); + break; + } + seq_putc(s, '\n'); + goto unlock; + } + /* + * The above search should succeed. Otherwise return + * with an error. + */ + ret = -ENOENT; +unlock: + mutex_unlock(&rdtgroup_mutex); + + return ret; +} +#endif + +static int rdt_last_cmd_status_show(struct kernfs_open_file *of, + struct seq_file *seq, void *v) +{ + int len; + + mutex_lock(&rdtgroup_mutex); + len = seq_buf_used(&last_cmd_status); + if (len) + seq_printf(seq, "%.*s", len, last_cmd_status_buf); + else + seq_puts(seq, "ok\n"); + mutex_unlock(&rdtgroup_mutex); + return 0; +} + +static int rdt_num_closids_show(struct kernfs_open_file *of, + struct seq_file *seq, void *v) +{ + struct resctrl_schema *s = of->kn->parent->priv; + + seq_printf(seq, "%u\n", s->num_closid); + return 0; +} + +static int rdt_default_ctrl_show(struct kernfs_open_file *of, + struct seq_file *seq, void *v) +{ + struct resctrl_schema *s = of->kn->parent->priv; + struct rdt_resource *r = s->res; + + seq_printf(seq, "%x\n", r->default_ctrl); + return 0; +} + +static int rdt_min_cbm_bits_show(struct kernfs_open_file *of, + struct seq_file *seq, void *v) +{ + struct resctrl_schema *s = of->kn->parent->priv; + struct rdt_resource *r = s->res; + + seq_printf(seq, "%u\n", r->cache.min_cbm_bits); + return 0; +} + +static int rdt_shareable_bits_show(struct kernfs_open_file *of, + struct seq_file *seq, void *v) +{ + struct resctrl_schema *s = of->kn->parent->priv; + struct rdt_resource *r = s->res; + + seq_printf(seq, "%x\n", r->cache.shareable_bits); + return 0; +} + +/* + * rdt_bit_usage_show - Display current usage of resources + * + * A domain is a shared resource that can now be allocated differently. Here + * we display the current regions of the domain as an annotated bitmask. + * For each domain of this resource its allocation bitmask + * is annotated as below to indicate the current usage of the corresponding bit: + * 0 - currently unused + * X - currently available for sharing and used by software and hardware + * H - currently used by hardware only but available for software use + * S - currently used and shareable by software only + * E - currently used exclusively by one resource group + * P - currently pseudo-locked by one resource group + */ +static int rdt_bit_usage_show(struct kernfs_open_file *of, + struct seq_file *seq, void *v) +{ + struct resctrl_schema *s = of->kn->parent->priv; + /* + * Use unsigned long even though only 32 bits are used to ensure + * test_bit() is used safely. + */ + unsigned long sw_shareable = 0, hw_shareable = 0; + unsigned long exclusive = 0, pseudo_locked = 0; + struct rdt_resource *r = s->res; + struct rdt_domain *dom; + int i, hwb, swb, excl, psl; + enum rdtgrp_mode mode; + bool sep = false; + u32 ctrl_val; + + cpus_read_lock(); + mutex_lock(&rdtgroup_mutex); + hw_shareable = r->cache.shareable_bits; + list_for_each_entry(dom, &r->domains, list) { + if (sep) + seq_putc(seq, ';'); + sw_shareable = 0; + exclusive = 0; + seq_printf(seq, "%d=", dom->id); + for (i = 0; i < closids_supported(); i++) { + if (!closid_allocated(i)) + continue; + ctrl_val = resctrl_arch_get_config(r, dom, i, + s->conf_type); + mode = rdtgroup_mode_by_closid(i); + switch (mode) { + case RDT_MODE_SHAREABLE: + sw_shareable |= ctrl_val; + break; + case RDT_MODE_EXCLUSIVE: + exclusive |= ctrl_val; + break; + case RDT_MODE_PSEUDO_LOCKSETUP: + /* + * RDT_MODE_PSEUDO_LOCKSETUP is possible + * here but not included since the CBM + * associated with this CLOSID in this mode + * is not initialized and no task or cpu can be + * assigned this CLOSID. + */ + break; + case RDT_MODE_PSEUDO_LOCKED: + case RDT_NUM_MODES: + WARN(1, + "invalid mode for closid %d\n", i); + break; + } + } + for (i = r->cache.cbm_len - 1; i >= 0; i--) { + pseudo_locked = dom->plr ? dom->plr->cbm : 0; + hwb = test_bit(i, &hw_shareable); + swb = test_bit(i, &sw_shareable); + excl = test_bit(i, &exclusive); + psl = test_bit(i, &pseudo_locked); + if (hwb && swb) + seq_putc(seq, 'X'); + else if (hwb && !swb) + seq_putc(seq, 'H'); + else if (!hwb && swb) + seq_putc(seq, 'S'); + else if (excl) + seq_putc(seq, 'E'); + else if (psl) + seq_putc(seq, 'P'); + else /* Unused bits remain */ + seq_putc(seq, '0'); + } + sep = true; + } + seq_putc(seq, '\n'); + mutex_unlock(&rdtgroup_mutex); + cpus_read_unlock(); + return 0; +} + +static int rdt_min_bw_show(struct kernfs_open_file *of, + struct seq_file *seq, void *v) +{ + struct resctrl_schema *s = of->kn->parent->priv; + struct rdt_resource *r = s->res; + + seq_printf(seq, "%u\n", r->membw.min_bw); + return 0; +} + +static int rdt_num_rmids_show(struct kernfs_open_file *of, + struct seq_file *seq, void *v) +{ + struct rdt_resource *r = of->kn->parent->priv; + + seq_printf(seq, "%d\n", r->num_rmid); + + return 0; +} + +static int rdt_mon_features_show(struct kernfs_open_file *of, + struct seq_file *seq, void *v) +{ + struct rdt_resource *r = of->kn->parent->priv; + struct mon_evt *mevt; + + list_for_each_entry(mevt, &r->evt_list, list) { + seq_printf(seq, "%s\n", mevt->name); + if (mevt->configurable) + seq_printf(seq, "%s_config\n", mevt->name); + } + + return 0; +} + +static int rdt_bw_gran_show(struct kernfs_open_file *of, + struct seq_file *seq, void *v) +{ + struct resctrl_schema *s = of->kn->parent->priv; + struct rdt_resource *r = s->res; + + seq_printf(seq, "%u\n", r->membw.bw_gran); + return 0; +} + +static int rdt_delay_linear_show(struct kernfs_open_file *of, + struct seq_file *seq, void *v) +{ + struct resctrl_schema *s = of->kn->parent->priv; + struct rdt_resource *r = s->res; + + seq_printf(seq, "%u\n", r->membw.delay_linear); + return 0; +} + +static int max_threshold_occ_show(struct kernfs_open_file *of, + struct seq_file *seq, void *v) +{ + seq_printf(seq, "%u\n", resctrl_rmid_realloc_threshold); + + return 0; +} + +static int rdt_thread_throttle_mode_show(struct kernfs_open_file *of, + struct seq_file *seq, void *v) +{ + struct resctrl_schema *s = of->kn->parent->priv; + struct rdt_resource *r = s->res; + + if (r->membw.throttle_mode == THREAD_THROTTLE_PER_THREAD) + seq_puts(seq, "per-thread\n"); + else + seq_puts(seq, "max\n"); + + return 0; +} + +static ssize_t max_threshold_occ_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) +{ + unsigned int bytes; + int ret; + + ret = kstrtouint(buf, 0, &bytes); + if (ret) + return ret; + + if (bytes > resctrl_rmid_realloc_limit) + return -EINVAL; + + resctrl_rmid_realloc_threshold = resctrl_arch_round_mon_val(bytes); + + return nbytes; +} + +/* + * rdtgroup_mode_show - Display mode of this resource group + */ +static int rdtgroup_mode_show(struct kernfs_open_file *of, + struct seq_file *s, void *v) +{ + struct rdtgroup *rdtgrp; + + rdtgrp = rdtgroup_kn_lock_live(of->kn); + if (!rdtgrp) { + rdtgroup_kn_unlock(of->kn); + return -ENOENT; + } + + seq_printf(s, "%s\n", rdtgroup_mode_str(rdtgrp->mode)); + + rdtgroup_kn_unlock(of->kn); + return 0; +} + +static enum resctrl_conf_type resctrl_peer_type(enum resctrl_conf_type my_type) +{ + switch (my_type) { + case CDP_CODE: + return CDP_DATA; + case CDP_DATA: + return CDP_CODE; + default: + case CDP_NONE: + return CDP_NONE; + } +} + +static int rdt_has_sparse_bitmasks_show(struct kernfs_open_file *of, + struct seq_file *seq, void *v) +{ + struct resctrl_schema *s = of->kn->parent->priv; + struct rdt_resource *r = s->res; + + seq_printf(seq, "%u\n", r->cache.arch_has_sparse_bitmasks); + + return 0; +} + +/** + * __rdtgroup_cbm_overlaps - Does CBM for intended closid overlap with other + * @r: Resource to which domain instance @d belongs. + * @d: The domain instance for which @closid is being tested. + * @cbm: Capacity bitmask being tested. + * @closid: Intended closid for @cbm. + * @type: CDP type of @r. + * @exclusive: Only check if overlaps with exclusive resource groups + * + * Checks if provided @cbm intended to be used for @closid on domain + * @d overlaps with any other closids or other hardware usage associated + * with this domain. If @exclusive is true then only overlaps with + * resource groups in exclusive mode will be considered. If @exclusive + * is false then overlaps with any resource group or hardware entities + * will be considered. + * + * @cbm is unsigned long, even if only 32 bits are used, to make the + * bitmap functions work correctly. + * + * Return: false if CBM does not overlap, true if it does. + */ +static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d, + unsigned long cbm, int closid, + enum resctrl_conf_type type, bool exclusive) +{ + enum rdtgrp_mode mode; + unsigned long ctrl_b; + int i; + + /* Check for any overlap with regions used by hardware directly */ + if (!exclusive) { + ctrl_b = r->cache.shareable_bits; + if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len)) + return true; + } + + /* Check for overlap with other resource groups */ + for (i = 0; i < closids_supported(); i++) { + ctrl_b = resctrl_arch_get_config(r, d, i, type); + mode = rdtgroup_mode_by_closid(i); + if (closid_allocated(i) && i != closid && + mode != RDT_MODE_PSEUDO_LOCKSETUP) { + if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len)) { + if (exclusive) { + if (mode == RDT_MODE_EXCLUSIVE) + return true; + continue; + } + return true; + } + } + } + + return false; +} + +/** + * rdtgroup_cbm_overlaps - Does CBM overlap with other use of hardware + * @s: Schema for the resource to which domain instance @d belongs. + * @d: The domain instance for which @closid is being tested. + * @cbm: Capacity bitmask being tested. + * @closid: Intended closid for @cbm. + * @exclusive: Only check if overlaps with exclusive resource groups + * + * Resources that can be allocated using a CBM can use the CBM to control + * the overlap of these allocations. rdtgroup_cmb_overlaps() is the test + * for overlap. Overlap test is not limited to the specific resource for + * which the CBM is intended though - when dealing with CDP resources that + * share the underlying hardware the overlap check should be performed on + * the CDP resource sharing the hardware also. + * + * Refer to description of __rdtgroup_cbm_overlaps() for the details of the + * overlap test. + * + * Return: true if CBM overlap detected, false if there is no overlap + */ +bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_domain *d, + unsigned long cbm, int closid, bool exclusive) +{ + enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type); + struct rdt_resource *r = s->res; + + if (__rdtgroup_cbm_overlaps(r, d, cbm, closid, s->conf_type, + exclusive)) + return true; + + if (!resctrl_arch_get_cdp_enabled(r->rid)) + return false; + return __rdtgroup_cbm_overlaps(r, d, cbm, closid, peer_type, exclusive); +} + +/** + * rdtgroup_mode_test_exclusive - Test if this resource group can be exclusive + * @rdtgrp: Resource group identified through its closid. + * + * An exclusive resource group implies that there should be no sharing of + * its allocated resources. At the time this group is considered to be + * exclusive this test can determine if its current schemata supports this + * setting by testing for overlap with all other resource groups. + * + * Return: true if resource group can be exclusive, false if there is overlap + * with allocations of other resource groups and thus this resource group + * cannot be exclusive. + */ +static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp) +{ + int closid = rdtgrp->closid; + struct resctrl_schema *s; + struct rdt_resource *r; + bool has_cache = false; + struct rdt_domain *d; + u32 ctrl; + + /* Walking r->domains, ensure it can't race with cpuhp */ + lockdep_assert_cpus_held(); + + list_for_each_entry(s, &resctrl_schema_all, list) { + r = s->res; + if (r->rid == RDT_RESOURCE_MBA || r->rid == RDT_RESOURCE_SMBA) + continue; + has_cache = true; + list_for_each_entry(d, &r->domains, list) { + ctrl = resctrl_arch_get_config(r, d, closid, + s->conf_type); + if (rdtgroup_cbm_overlaps(s, d, ctrl, closid, false)) { + rdt_last_cmd_puts("Schemata overlaps\n"); + return false; + } + } + } + + if (!has_cache) { + rdt_last_cmd_puts("Cannot be exclusive without CAT/CDP\n"); + return false; + } + + return true; +} + +/* + * rdtgroup_mode_write - Modify the resource group's mode + */ +static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) +{ + struct rdtgroup *rdtgrp; + enum rdtgrp_mode mode; + int ret = 0; + + /* Valid input requires a trailing newline */ + if (nbytes == 0 || buf[nbytes - 1] != '\n') + return -EINVAL; + buf[nbytes - 1] = '\0'; + + rdtgrp = rdtgroup_kn_lock_live(of->kn); + if (!rdtgrp) { + rdtgroup_kn_unlock(of->kn); + return -ENOENT; + } + + rdt_last_cmd_clear(); + + mode = rdtgrp->mode; + + if ((!strcmp(buf, "shareable") && mode == RDT_MODE_SHAREABLE) || + (!strcmp(buf, "exclusive") && mode == RDT_MODE_EXCLUSIVE) || + (!strcmp(buf, "pseudo-locksetup") && + mode == RDT_MODE_PSEUDO_LOCKSETUP) || + (!strcmp(buf, "pseudo-locked") && mode == RDT_MODE_PSEUDO_LOCKED)) + goto out; + + if (mode == RDT_MODE_PSEUDO_LOCKED) { + rdt_last_cmd_puts("Cannot change pseudo-locked group\n"); + ret = -EINVAL; + goto out; + } + + if (!strcmp(buf, "shareable")) { + if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { + ret = rdtgroup_locksetup_exit(rdtgrp); + if (ret) + goto out; + } + rdtgrp->mode = RDT_MODE_SHAREABLE; + } else if (!strcmp(buf, "exclusive")) { + if (!rdtgroup_mode_test_exclusive(rdtgrp)) { + ret = -EINVAL; + goto out; + } + if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { + ret = rdtgroup_locksetup_exit(rdtgrp); + if (ret) + goto out; + } + rdtgrp->mode = RDT_MODE_EXCLUSIVE; + } else if (IS_ENABLED(CONFIG_RESCTRL_FS_PSEUDO_LOCK) && + !strcmp(buf, "pseudo-locksetup")) { + ret = rdtgroup_locksetup_enter(rdtgrp); + if (ret) + goto out; + rdtgrp->mode = RDT_MODE_PSEUDO_LOCKSETUP; + } else { + rdt_last_cmd_puts("Unknown or unsupported mode\n"); + ret = -EINVAL; + } + +out: + rdtgroup_kn_unlock(of->kn); + return ret ?: nbytes; +} + +/** + * rdtgroup_cbm_to_size - Translate CBM to size in bytes + * @r: RDT resource to which @d belongs. + * @d: RDT domain instance. + * @cbm: bitmask for which the size should be computed. + * + * The bitmask provided associated with the RDT domain instance @d will be + * translated into how many bytes it represents. The size in bytes is + * computed by first dividing the total cache size by the CBM length to + * determine how many bytes each bit in the bitmask represents. The result + * is multiplied with the number of bits set in the bitmask. + * + * @cbm is unsigned long, even if only 32 bits are used to make the + * bitmap functions work correctly. + */ +unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, + struct rdt_domain *d, unsigned long cbm) +{ + struct cpu_cacheinfo *ci; + unsigned int size = 0; + int num_b, i; + + num_b = bitmap_weight(&cbm, r->cache.cbm_len); + ci = get_cpu_cacheinfo(cpumask_any(&d->cpu_mask)); + for (i = 0; i < ci->num_leaves; i++) { + if (ci->info_list[i].level == r->cache_level) { + size = ci->info_list[i].size / r->cache.cbm_len * num_b; + break; + } + } + + return size; +} + +/* + * rdtgroup_size_show - Display size in bytes of allocated regions + * + * The "size" file mirrors the layout of the "schemata" file, printing the + * size in bytes of each region instead of the capacity bitmask. + */ +static int rdtgroup_size_show(struct kernfs_open_file *of, + struct seq_file *s, void *v) +{ + struct resctrl_schema *schema; + enum resctrl_conf_type type; + struct rdtgroup *rdtgrp; + struct rdt_resource *r; + struct rdt_domain *d; + unsigned int size; + int ret = 0; + u32 closid; + bool sep; + u32 ctrl; + + rdtgrp = rdtgroup_kn_lock_live(of->kn); + if (!rdtgrp) { + rdtgroup_kn_unlock(of->kn); + return -ENOENT; + } + + if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) { + if (!rdtgrp->plr->d) { + rdt_last_cmd_clear(); + rdt_last_cmd_puts("Cache domain offline\n"); + ret = -ENODEV; + } else { + seq_printf(s, "%*s:", max_name_width, + rdtgrp->plr->s->name); + size = rdtgroup_cbm_to_size(rdtgrp->plr->s->res, + rdtgrp->plr->d, + rdtgrp->plr->cbm); + seq_printf(s, "%d=%u\n", rdtgrp->plr->d->id, size); + } + goto out; + } + + closid = rdtgrp->closid; + + list_for_each_entry(schema, &resctrl_schema_all, list) { + r = schema->res; + type = schema->conf_type; + sep = false; + seq_printf(s, "%*s:", max_name_width, schema->name); + list_for_each_entry(d, &r->domains, list) { + if (sep) + seq_putc(s, ';'); + if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { + size = 0; + } else { + if (is_mba_sc(r)) + ctrl = d->mbps_val[closid]; + else + ctrl = resctrl_arch_get_config(r, d, + closid, + type); + if (r->rid == RDT_RESOURCE_MBA || + r->rid == RDT_RESOURCE_SMBA) + size = ctrl; + else + size = rdtgroup_cbm_to_size(r, d, ctrl); + } + seq_printf(s, "%d=%u", d->id, size); + sep = true; + } + seq_putc(s, '\n'); + } + +out: + rdtgroup_kn_unlock(of->kn); + + return ret; +} + +#define INVALID_CONFIG_INDEX UINT_MAX + +/** + * mon_event_config_index_get - get the hardware index for the + * configurable event + * @evtid: event id. + * + * Return: 0 for evtid == QOS_L3_MBM_TOTAL_EVENT_ID + * 1 for evtid == QOS_L3_MBM_LOCAL_EVENT_ID + * INVALID_CONFIG_INDEX for invalid evtid + */ +static inline unsigned int mon_event_config_index_get(u32 evtid) +{ + switch (evtid) { + case QOS_L3_MBM_TOTAL_EVENT_ID: + return 0; + case QOS_L3_MBM_LOCAL_EVENT_ID: + return 1; + default: + /* Should never reach here */ + return INVALID_CONFIG_INDEX; + } +} + +void resctrl_arch_mon_event_config_read(void *info) +{ + struct resctrl_mon_config_info *mon_info = info; + unsigned int index; + u64 msrval; + + index = mon_event_config_index_get(mon_info->evtid); + if (index == INVALID_CONFIG_INDEX) { + pr_warn_once("Invalid event id %d\n", mon_info->evtid); + return; + } + rdmsrl(MSR_IA32_EVT_CFG_BASE + index, msrval); + + /* Report only the valid event configuration bits */ + mon_info->mon_config = msrval & MAX_EVT_CONFIG_BITS; +} + +static void mondata_config_read(struct resctrl_mon_config_info *mon_info) +{ + smp_call_function_any(&mon_info->d->cpu_mask, + resctrl_arch_mon_event_config_read, mon_info, 1); +} + +static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid) +{ + struct resctrl_mon_config_info mon_info = {0}; + struct rdt_domain *dom; + bool sep = false; + + cpus_read_lock(); + mutex_lock(&rdtgroup_mutex); + + list_for_each_entry(dom, &r->domains, list) { + if (sep) + seq_puts(s, ";"); + + memset(&mon_info, 0, sizeof(struct resctrl_mon_config_info)); + mon_info.r = r; + mon_info.d = dom; + mon_info.evtid = evtid; + mondata_config_read(&mon_info); + + seq_printf(s, "%d=0x%02x", dom->id, mon_info.mon_config); + sep = true; + } + seq_puts(s, "\n"); + + mutex_unlock(&rdtgroup_mutex); + cpus_read_unlock(); + + return 0; +} + +static int mbm_total_bytes_config_show(struct kernfs_open_file *of, + struct seq_file *seq, void *v) +{ + struct rdt_resource *r = of->kn->parent->priv; + + mbm_config_show(seq, r, QOS_L3_MBM_TOTAL_EVENT_ID); + + return 0; +} + +static int mbm_local_bytes_config_show(struct kernfs_open_file *of, + struct seq_file *seq, void *v) +{ + struct rdt_resource *r = of->kn->parent->priv; + + mbm_config_show(seq, r, QOS_L3_MBM_LOCAL_EVENT_ID); + + return 0; +} + +void resctrl_arch_mon_event_config_write(void *info) +{ + struct resctrl_mon_config_info *mon_info = info; + unsigned int index; + + index = mon_event_config_index_get(mon_info->evtid); + if (index == INVALID_CONFIG_INDEX) { + pr_warn_once("Invalid event id %d\n", mon_info->evtid); + mon_info->err = -EINVAL; + return; + } + wrmsr(MSR_IA32_EVT_CFG_BASE + index, mon_info->mon_config, 0); + + mon_info->err = 0; +} + +static int mbm_config_write_domain(struct rdt_resource *r, + struct rdt_domain *d, u32 evtid, u32 val) +{ + struct resctrl_mon_config_info mon_info = {0}; + + /* + * Read the current config value first. If both are the same then + * no need to write it again. + */ + mon_info.r = r; + mon_info.d = d; + mon_info.evtid = evtid; + mondata_config_read(&mon_info); + if (mon_info.mon_config == val) + return 0; + + mon_info.mon_config = val; + + /* + * Update MSR_IA32_EVT_CFG_BASE MSR on one of the CPUs in the + * domain. The MSRs offset from MSR MSR_IA32_EVT_CFG_BASE + * are scoped at the domain level. Writing any of these MSRs + * on one CPU is observed by all the CPUs in the domain. + */ + smp_call_function_any(&d->cpu_mask, resctrl_arch_mon_event_config_write, + &mon_info, 1); + if (mon_info.err) { + rdt_last_cmd_puts("Invalid event configuration\n"); + return mon_info.err; + } + + /* + * When an Event Configuration is changed, the bandwidth counters + * for all RMIDs and Events will be cleared by the hardware. The + * hardware also sets MSR_IA32_QM_CTR.Unavailable (bit 62) for + * every RMID on the next read to any event for every RMID. + * Subsequent reads will have MSR_IA32_QM_CTR.Unavailable (bit 62) + * cleared while it is tracked by the hardware. Clear the + * mbm_local and mbm_total counts for all the RMIDs. + */ + resctrl_arch_reset_rmid_all(r, d); + + return 0; +} + +static int mon_config_write(struct rdt_resource *r, char *tok, u32 evtid) +{ + char *dom_str = NULL, *id_str; + unsigned long dom_id, val; + struct rdt_domain *d; + int err; + + /* Walking r->domains, ensure it can't race with cpuhp */ + lockdep_assert_cpus_held(); + +next: + if (!tok || tok[0] == '\0') + return 0; + + /* Start processing the strings for each domain */ + dom_str = strim(strsep(&tok, ";")); + id_str = strsep(&dom_str, "="); + + if (!id_str || kstrtoul(id_str, 10, &dom_id)) { + rdt_last_cmd_puts("Missing '=' or non-numeric domain id\n"); + return -EINVAL; + } + + if (!dom_str || kstrtoul(dom_str, 16, &val)) { + rdt_last_cmd_puts("Non-numeric event configuration value\n"); + return -EINVAL; + } + + /* Value from user cannot be more than the supported set of events */ + if ((val & r->mbm_cfg_mask) != val) { + rdt_last_cmd_printf("Invalid event configuration: max valid mask is 0x%02x\n", + r->mbm_cfg_mask); + return -EINVAL; + } + + list_for_each_entry(d, &r->domains, list) { + if (d->id == dom_id) { + err = mbm_config_write_domain(r, d, evtid, val); + if (err) + return err; + goto next; + } + } + + return -EINVAL; +} + +static ssize_t mbm_total_bytes_config_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, + loff_t off) +{ + struct rdt_resource *r = of->kn->parent->priv; + int ret; + + /* Valid input requires a trailing newline */ + if (nbytes == 0 || buf[nbytes - 1] != '\n') + return -EINVAL; + + cpus_read_lock(); + mutex_lock(&rdtgroup_mutex); + + rdt_last_cmd_clear(); + + buf[nbytes - 1] = '\0'; + + ret = mon_config_write(r, buf, QOS_L3_MBM_TOTAL_EVENT_ID); + + mutex_unlock(&rdtgroup_mutex); + cpus_read_unlock(); + + return ret ?: nbytes; +} + +static ssize_t mbm_local_bytes_config_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, + loff_t off) +{ + struct rdt_resource *r = of->kn->parent->priv; + int ret; + + /* Valid input requires a trailing newline */ + if (nbytes == 0 || buf[nbytes - 1] != '\n') + return -EINVAL; + + cpus_read_lock(); + mutex_lock(&rdtgroup_mutex); + + rdt_last_cmd_clear(); + + buf[nbytes - 1] = '\0'; + + ret = mon_config_write(r, buf, QOS_L3_MBM_LOCAL_EVENT_ID); + + mutex_unlock(&rdtgroup_mutex); + cpus_read_unlock(); + + return ret ?: nbytes; +} + +/* rdtgroup information files for one cache resource. */ +static struct rftype res_common_files[] = { + { + .name = "last_cmd_status", + .mode = 0444, + .kf_ops = &rdtgroup_kf_single_ops, + .seq_show = rdt_last_cmd_status_show, + .fflags = RFTYPE_TOP_INFO, + }, + { + .name = "num_closids", + .mode = 0444, + .kf_ops = &rdtgroup_kf_single_ops, + .seq_show = rdt_num_closids_show, + .fflags = RFTYPE_CTRL_INFO, + }, + { + .name = "mon_features", + .mode = 0444, + .kf_ops = &rdtgroup_kf_single_ops, + .seq_show = rdt_mon_features_show, + .fflags = RFTYPE_MON_INFO, + }, + { + .name = "num_rmids", + .mode = 0444, + .kf_ops = &rdtgroup_kf_single_ops, + .seq_show = rdt_num_rmids_show, + .fflags = RFTYPE_MON_INFO, + }, + { + .name = "cbm_mask", + .mode = 0444, + .kf_ops = &rdtgroup_kf_single_ops, + .seq_show = rdt_default_ctrl_show, + .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE, + }, + { + .name = "min_cbm_bits", + .mode = 0444, + .kf_ops = &rdtgroup_kf_single_ops, + .seq_show = rdt_min_cbm_bits_show, + .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE, + }, + { + .name = "shareable_bits", + .mode = 0444, + .kf_ops = &rdtgroup_kf_single_ops, + .seq_show = rdt_shareable_bits_show, + .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE, + }, + { + .name = "bit_usage", + .mode = 0444, + .kf_ops = &rdtgroup_kf_single_ops, + .seq_show = rdt_bit_usage_show, + .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE, + }, + { + .name = "min_bandwidth", + .mode = 0444, + .kf_ops = &rdtgroup_kf_single_ops, + .seq_show = rdt_min_bw_show, + .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_MB, + }, + { + .name = "bandwidth_gran", + .mode = 0444, + .kf_ops = &rdtgroup_kf_single_ops, + .seq_show = rdt_bw_gran_show, + .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_MB, + }, + { + .name = "delay_linear", + .mode = 0444, + .kf_ops = &rdtgroup_kf_single_ops, + .seq_show = rdt_delay_linear_show, + .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_MB, + }, + /* + * Platform specific which (if any) capabilities are provided by + * thread_throttle_mode. Defer "fflags" initialization to platform + * discovery. + */ + { + .name = "thread_throttle_mode", + .mode = 0444, + .kf_ops = &rdtgroup_kf_single_ops, + .seq_show = rdt_thread_throttle_mode_show, + }, + { + .name = "max_threshold_occupancy", + .mode = 0644, + .kf_ops = &rdtgroup_kf_single_ops, + .write = max_threshold_occ_write, + .seq_show = max_threshold_occ_show, + .fflags = RFTYPE_MON_INFO | RFTYPE_RES_CACHE, + }, + { + .name = "mbm_total_bytes_config", + .mode = 0644, + .kf_ops = &rdtgroup_kf_single_ops, + .seq_show = mbm_total_bytes_config_show, + .write = mbm_total_bytes_config_write, + }, + { + .name = "mbm_local_bytes_config", + .mode = 0644, + .kf_ops = &rdtgroup_kf_single_ops, + .seq_show = mbm_local_bytes_config_show, + .write = mbm_local_bytes_config_write, + }, + { + .name = "cpus", + .mode = 0644, + .kf_ops = &rdtgroup_kf_single_ops, + .write = rdtgroup_cpus_write, + .seq_show = rdtgroup_cpus_show, + .fflags = RFTYPE_BASE, + }, + { + .name = "cpus_list", + .mode = 0644, + .kf_ops = &rdtgroup_kf_single_ops, + .write = rdtgroup_cpus_write, + .seq_show = rdtgroup_cpus_show, + .flags = RFTYPE_FLAGS_CPUS_LIST, + .fflags = RFTYPE_BASE, + }, + { + .name = "tasks", + .mode = 0644, + .kf_ops = &rdtgroup_kf_single_ops, + .write = rdtgroup_tasks_write, + .seq_show = rdtgroup_tasks_show, + .fflags = RFTYPE_BASE, + }, + { + .name = "mon_hw_id", + .mode = 0444, + .kf_ops = &rdtgroup_kf_single_ops, + .seq_show = rdtgroup_rmid_show, + .fflags = RFTYPE_MON_BASE | RFTYPE_DEBUG, + }, + { + .name = "schemata", + .mode = 0644, + .kf_ops = &rdtgroup_kf_single_ops, + .write = rdtgroup_schemata_write, + .seq_show = rdtgroup_schemata_show, + .fflags = RFTYPE_CTRL_BASE, + }, + { + .name = "mode", + .mode = 0644, + .kf_ops = &rdtgroup_kf_single_ops, + .write = rdtgroup_mode_write, + .seq_show = rdtgroup_mode_show, + .fflags = RFTYPE_CTRL_BASE, + }, + { + .name = "size", + .mode = 0444, + .kf_ops = &rdtgroup_kf_single_ops, + .seq_show = rdtgroup_size_show, + .fflags = RFTYPE_CTRL_BASE, + }, + { + .name = "sparse_masks", + .mode = 0444, + .kf_ops = &rdtgroup_kf_single_ops, + .seq_show = rdt_has_sparse_bitmasks_show, + .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE, + }, + { + .name = "ctrl_hw_id", + .mode = 0444, + .kf_ops = &rdtgroup_kf_single_ops, + .seq_show = rdtgroup_closid_show, + .fflags = RFTYPE_CTRL_BASE | RFTYPE_DEBUG, + }, + +}; + +static int rdtgroup_add_files(struct kernfs_node *kn, unsigned long fflags) +{ + struct rftype *rfts, *rft; + int ret, len; + + rfts = res_common_files; + len = ARRAY_SIZE(res_common_files); + + lockdep_assert_held(&rdtgroup_mutex); + + if (resctrl_debug) + fflags |= RFTYPE_DEBUG; + + for (rft = rfts; rft < rfts + len; rft++) { + if (rft->fflags && ((fflags & rft->fflags) == rft->fflags)) { + ret = rdtgroup_add_file(kn, rft); + if (ret) + goto error; + } + } + + return 0; +error: + pr_warn("Failed to add %s, err=%d\n", rft->name, ret); + while (--rft >= rfts) { + if ((fflags & rft->fflags) == rft->fflags) + kernfs_remove_by_name(kn, rft->name); + } + return ret; +} + +static struct rftype *rdtgroup_get_rftype_by_name(const char *name) +{ + struct rftype *rfts, *rft; + int len; + + rfts = res_common_files; + len = ARRAY_SIZE(res_common_files); + + for (rft = rfts; rft < rfts + len; rft++) { + if (!strcmp(rft->name, name)) + return rft; + } + + return NULL; +} + +static void thread_throttle_mode_init(void) +{ + struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_MBA); + struct rftype *rft; + + if (!r->alloc_capable || + r->membw.throttle_mode == THREAD_THROTTLE_UNDEFINED) + return; + + rft = rdtgroup_get_rftype_by_name("thread_throttle_mode"); + if (!rft) + return; + + rft->fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_MB; +} + +void mbm_config_rftype_init(const char *config) +{ + struct rftype *rft; + + rft = rdtgroup_get_rftype_by_name(config); + if (rft) + rft->fflags = RFTYPE_MON_INFO | RFTYPE_RES_CACHE; +} + +/** + * rdtgroup_kn_mode_restrict - Restrict user access to named resctrl file + * @r: The resource group with which the file is associated. + * @name: Name of the file + * + * The permissions of named resctrl file, directory, or link are modified + * to not allow read, write, or execute by any user. + * + * WARNING: This function is intended to communicate to the user that the + * resctrl file has been locked down - that it is not relevant to the + * particular state the system finds itself in. It should not be relied + * on to protect from user access because after the file's permissions + * are restricted the user can still change the permissions using chmod + * from the command line. + * + * Return: 0 on success, <0 on failure. + */ +int rdtgroup_kn_mode_restrict(struct rdtgroup *r, const char *name) +{ + struct iattr iattr = {.ia_valid = ATTR_MODE,}; + struct kernfs_node *kn; + int ret = 0; + + kn = kernfs_find_and_get_ns(r->kn, name, NULL); + if (!kn) + return -ENOENT; + + switch (kernfs_type(kn)) { + case KERNFS_DIR: + iattr.ia_mode = S_IFDIR; + break; + case KERNFS_FILE: + iattr.ia_mode = S_IFREG; + break; + case KERNFS_LINK: + iattr.ia_mode = S_IFLNK; + break; + } + + ret = kernfs_setattr(kn, &iattr); + kernfs_put(kn); + return ret; +} + +/** + * rdtgroup_kn_mode_restore - Restore user access to named resctrl file + * @r: The resource group with which the file is associated. + * @name: Name of the file + * @mask: Mask of permissions that should be restored + * + * Restore the permissions of the named file. If @name is a directory the + * permissions of its parent will be used. + * + * Return: 0 on success, <0 on failure. + */ +int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name, + umode_t mask) +{ + struct iattr iattr = {.ia_valid = ATTR_MODE,}; + struct kernfs_node *kn, *parent; + struct rftype *rfts, *rft; + int ret, len; + + rfts = res_common_files; + len = ARRAY_SIZE(res_common_files); + + for (rft = rfts; rft < rfts + len; rft++) { + if (!strcmp(rft->name, name)) + iattr.ia_mode = rft->mode & mask; + } + + kn = kernfs_find_and_get_ns(r->kn, name, NULL); + if (!kn) + return -ENOENT; + + switch (kernfs_type(kn)) { + case KERNFS_DIR: + parent = kernfs_get_parent(kn); + if (parent) { + iattr.ia_mode |= parent->mode; + kernfs_put(parent); + } + iattr.ia_mode |= S_IFDIR; + break; + case KERNFS_FILE: + iattr.ia_mode |= S_IFREG; + break; + case KERNFS_LINK: + iattr.ia_mode |= S_IFLNK; + break; + } + + ret = kernfs_setattr(kn, &iattr); + kernfs_put(kn); + return ret; +} + +static int rdtgroup_mkdir_info_resdir(void *priv, char *name, + unsigned long fflags) +{ + struct kernfs_node *kn_subdir; + int ret; + + kn_subdir = kernfs_create_dir(kn_info, name, + kn_info->mode, priv); + if (IS_ERR(kn_subdir)) + return PTR_ERR(kn_subdir); + + ret = rdtgroup_kn_set_ugid(kn_subdir); + if (ret) + return ret; + + ret = rdtgroup_add_files(kn_subdir, fflags); + if (!ret) + kernfs_activate(kn_subdir); + + return ret; +} + +static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn) +{ + enum resctrl_res_level i; + struct resctrl_schema *s; + struct rdt_resource *r; + unsigned long fflags; + char name[32]; + int ret; + + /* create the directory */ + kn_info = kernfs_create_dir(parent_kn, "info", parent_kn->mode, NULL); + if (IS_ERR(kn_info)) + return PTR_ERR(kn_info); + + ret = rdtgroup_add_files(kn_info, RFTYPE_TOP_INFO); + if (ret) + goto out_destroy; + + /* loop over enabled controls, these are all alloc_capable */ + list_for_each_entry(s, &resctrl_schema_all, list) { + r = s->res; + fflags = r->fflags | RFTYPE_CTRL_INFO; + ret = rdtgroup_mkdir_info_resdir(s, s->name, fflags); + if (ret) + goto out_destroy; + } + + for (i = 0; i < RDT_NUM_RESOURCES; i++) { + r = resctrl_arch_get_resource(i); + if (!r->mon_capable) + continue; + + fflags = r->fflags | RFTYPE_MON_INFO; + sprintf(name, "%s_MON", r->name); + ret = rdtgroup_mkdir_info_resdir(r, name, fflags); + if (ret) + goto out_destroy; + } + + ret = rdtgroup_kn_set_ugid(kn_info); + if (ret) + goto out_destroy; + + kernfs_activate(kn_info); + + return 0; + +out_destroy: + kernfs_remove(kn_info); + return ret; +} + +static int +mongroup_create_dir(struct kernfs_node *parent_kn, struct rdtgroup *prgrp, + char *name, struct kernfs_node **dest_kn) +{ + struct kernfs_node *kn; + int ret; + + /* create the directory */ + kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp); + if (IS_ERR(kn)) + return PTR_ERR(kn); + + if (dest_kn) + *dest_kn = kn; + + ret = rdtgroup_kn_set_ugid(kn); + if (ret) + goto out_destroy; + + kernfs_activate(kn); + + return 0; + +out_destroy: + kernfs_remove(kn); + return ret; +} + +static void l3_qos_cfg_update(void *arg) +{ + bool *enable = arg; + + wrmsrl(MSR_IA32_L3_QOS_CFG, *enable ? L3_QOS_CDP_ENABLE : 0ULL); +} + +static void l2_qos_cfg_update(void *arg) +{ + bool *enable = arg; + + wrmsrl(MSR_IA32_L2_QOS_CFG, *enable ? L2_QOS_CDP_ENABLE : 0ULL); +} + +static inline bool is_mba_linear(void) +{ + return resctrl_arch_get_resource(RDT_RESOURCE_MBA)->membw.delay_linear; +} + +static int set_cache_qos_cfg(int level, bool enable) +{ + void (*update)(void *arg); + struct rdt_resource *r_l; + cpumask_var_t cpu_mask; + struct rdt_domain *d; + int cpu; + + /* Walking r->domains, ensure it can't race with cpuhp */ + lockdep_assert_cpus_held(); + + if (level == RDT_RESOURCE_L3) + update = l3_qos_cfg_update; + else if (level == RDT_RESOURCE_L2) + update = l2_qos_cfg_update; + else + return -EINVAL; + + if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) + return -ENOMEM; + + r_l = &rdt_resources_all[level].r_resctrl; + list_for_each_entry(d, &r_l->domains, list) { + if (r_l->cache.arch_has_per_cpu_cfg) + /* Pick all the CPUs in the domain instance */ + for_each_cpu(cpu, &d->cpu_mask) + cpumask_set_cpu(cpu, cpu_mask); + else + /* Pick one CPU from each domain instance to update MSR */ + cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask); + } + + /* Update QOS_CFG MSR on all the CPUs in cpu_mask */ + on_each_cpu_mask(cpu_mask, update, &enable, 1); + + free_cpumask_var(cpu_mask); + + return 0; +} + +/* Restore the qos cfg state when a domain comes online */ +void rdt_domain_reconfigure_cdp(struct rdt_resource *r) +{ + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); + + if (!r->cdp_capable) + return; + + if (r->rid == RDT_RESOURCE_L2) + l2_qos_cfg_update(&hw_res->cdp_enabled); + + if (r->rid == RDT_RESOURCE_L3) + l3_qos_cfg_update(&hw_res->cdp_enabled); +} + +static int mba_sc_domain_allocate(struct rdt_resource *r, struct rdt_domain *d) +{ + u32 num_closid = resctrl_arch_get_num_closid(r); + int cpu = cpumask_any(&d->cpu_mask); + int i; + + d->mbps_val = kcalloc_node(num_closid, sizeof(*d->mbps_val), + GFP_KERNEL, cpu_to_node(cpu)); + if (!d->mbps_val) + return -ENOMEM; + + for (i = 0; i < num_closid; i++) + d->mbps_val[i] = MBA_MAX_MBPS; + + return 0; +} + +static void mba_sc_domain_destroy(struct rdt_resource *r, + struct rdt_domain *d) +{ + kfree(d->mbps_val); + d->mbps_val = NULL; +} + +/* + * MBA software controller is supported only if + * MBM is supported and MBA is in linear scale. + */ +static bool supports_mba_mbps(void) +{ + struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_MBA); + + return (resctrl_arch_is_mbm_local_enabled() && + r->alloc_capable && is_mba_linear()); +} + +/* + * Enable or disable the MBA software controller + * which helps user specify bandwidth in MBps. + */ +static int set_mba_sc(bool mba_sc) +{ + struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_MBA); + u32 num_closid = resctrl_arch_get_num_closid(r); + struct rdt_domain *d; + int i; + + if (!supports_mba_mbps() || mba_sc == is_mba_sc(r)) + return -EINVAL; + + r->membw.mba_sc = mba_sc; + + list_for_each_entry(d, &r->domains, list) { + for (i = 0; i < num_closid; i++) + d->mbps_val[i] = MBA_MAX_MBPS; + } + + return 0; +} + +static int cdp_enable(int level) +{ + struct rdt_resource *r_l = &rdt_resources_all[level].r_resctrl; + int ret; + + if (!r_l->alloc_capable) + return -EINVAL; + + ret = set_cache_qos_cfg(level, true); + if (!ret) + rdt_resources_all[level].cdp_enabled = true; + + return ret; +} + +static void cdp_disable(int level) +{ + struct rdt_hw_resource *r_hw = &rdt_resources_all[level]; + + if (r_hw->cdp_enabled) { + set_cache_qos_cfg(level, false); + r_hw->cdp_enabled = false; + } +} + +int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable) +{ + struct rdt_hw_resource *hw_res = &rdt_resources_all[l]; + + if (!hw_res->r_resctrl.cdp_capable) + return -EINVAL; + + if (enable) + return cdp_enable(l); + + cdp_disable(l); + + return 0; +} + +/* + * We don't allow rdtgroup directories to be created anywhere + * except the root directory. Thus when looking for the rdtgroup + * structure for a kernfs node we are either looking at a directory, + * in which case the rdtgroup structure is pointed at by the "priv" + * field, otherwise we have a file, and need only look to the parent + * to find the rdtgroup. + */ +static struct rdtgroup *kernfs_to_rdtgroup(struct kernfs_node *kn) +{ + if (kernfs_type(kn) == KERNFS_DIR) { + /* + * All the resource directories use "kn->priv" + * to point to the "struct rdtgroup" for the + * resource. "info" and its subdirectories don't + * have rdtgroup structures, so return NULL here. + */ + if (kn == kn_info || kn->parent == kn_info) + return NULL; + else + return kn->priv; + } else { + return kn->parent->priv; + } +} + +static void rdtgroup_kn_get(struct rdtgroup *rdtgrp, struct kernfs_node *kn) +{ + atomic_inc(&rdtgrp->waitcount); + kernfs_break_active_protection(kn); +} + +static void rdtgroup_kn_put(struct rdtgroup *rdtgrp, struct kernfs_node *kn) +{ + if (atomic_dec_and_test(&rdtgrp->waitcount) && + (rdtgrp->flags & RDT_DELETED)) { + if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP || + rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) + rdtgroup_pseudo_lock_remove(rdtgrp); + kernfs_unbreak_active_protection(kn); + rdtgroup_remove(rdtgrp); + } else { + kernfs_unbreak_active_protection(kn); + } +} + +struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn) +{ + struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn); + + if (!rdtgrp) + return NULL; + + rdtgroup_kn_get(rdtgrp, kn); + + cpus_read_lock(); + mutex_lock(&rdtgroup_mutex); + + /* Was this group deleted while we waited? */ + if (rdtgrp->flags & RDT_DELETED) + return NULL; + + return rdtgrp; +} + +void rdtgroup_kn_unlock(struct kernfs_node *kn) +{ + struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn); + + if (!rdtgrp) + return; + + mutex_unlock(&rdtgroup_mutex); + cpus_read_unlock(); + + rdtgroup_kn_put(rdtgrp, kn); +} + +static int mkdir_mondata_all(struct kernfs_node *parent_kn, + struct rdtgroup *prgrp, + struct kernfs_node **mon_data_kn); + +static void rdt_disable_ctx(void) +{ + resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, false); + resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, false); + set_mba_sc(false); + + resctrl_debug = false; +} + +static int rdt_enable_ctx(struct rdt_fs_context *ctx) +{ + int ret = 0; + + if (ctx->enable_cdpl2) { + ret = resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, true); + if (ret) + goto out_done; + } + + if (ctx->enable_cdpl3) { + ret = resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, true); + if (ret) + goto out_cdpl2; + } + + if (ctx->enable_mba_mbps) { + ret = set_mba_sc(true); + if (ret) + goto out_cdpl3; + } + + if (ctx->enable_debug) + resctrl_debug = true; + + return 0; + +out_cdpl3: + resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, false); +out_cdpl2: + resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, false); +out_done: + return ret; +} + +static int schemata_list_add(struct rdt_resource *r, enum resctrl_conf_type type) +{ + struct resctrl_schema *s; + const char *suffix = ""; + int ret, cl; + + s = kzalloc(sizeof(*s), GFP_KERNEL); + if (!s) + return -ENOMEM; + + s->res = r; + s->num_closid = resctrl_arch_get_num_closid(r); + if (resctrl_arch_get_cdp_enabled(r->rid)) + s->num_closid /= 2; + + s->conf_type = type; + switch (type) { + case CDP_CODE: + suffix = "CODE"; + break; + case CDP_DATA: + suffix = "DATA"; + break; + case CDP_NONE: + suffix = ""; + break; + } + + ret = snprintf(s->name, sizeof(s->name), "%s%s", r->name, suffix); + if (ret >= sizeof(s->name)) { + kfree(s); + return -EINVAL; + } + + cl = strlen(s->name); + + /* + * If CDP is supported by this resource, but not enabled, + * include the suffix. This ensures the tabular format of the + * schemata file does not change between mounts of the filesystem. + */ + if (r->cdp_capable && !resctrl_arch_get_cdp_enabled(r->rid)) + cl += 4; + + if (cl > max_name_width) + max_name_width = cl; + + /* + * Choose a width for the resource data based on the resource that has + * widest name and cbm. + */ + max_data_width = max(max_data_width, r->data_width); + + INIT_LIST_HEAD(&s->list); + list_add(&s->list, &resctrl_schema_all); + + return 0; +} + +static int schemata_list_create(void) +{ + enum resctrl_res_level i; + struct rdt_resource *r; + int ret = 0; + + for (i = 0; i < RDT_NUM_RESOURCES; i++) { + r = resctrl_arch_get_resource(i); + if (!r->alloc_capable) + continue; + + if (resctrl_arch_get_cdp_enabled(r->rid)) { + ret = schemata_list_add(r, CDP_CODE); + if (ret) + break; + + ret = schemata_list_add(r, CDP_DATA); + } else { + ret = schemata_list_add(r, CDP_NONE); + } + + if (ret) + break; + } + + return ret; +} + +static void schemata_list_destroy(void) +{ + struct resctrl_schema *s, *tmp; + + list_for_each_entry_safe(s, tmp, &resctrl_schema_all, list) { + list_del(&s->list); + kfree(s); + } +} + +static int rdt_get_tree(struct fs_context *fc) +{ + struct rdt_resource *l3 = resctrl_arch_get_resource(RDT_RESOURCE_L3); + struct rdt_fs_context *ctx = rdt_fc2context(fc); + unsigned long flags = RFTYPE_CTRL_BASE; + struct rdt_domain *dom; + int ret; + + cpus_read_lock(); + mutex_lock(&rdtgroup_mutex); + /* + * resctrl file system can only be mounted once. + */ + if (resctrl_mounted) { + ret = -EBUSY; + goto out; + } + + ret = rdtgroup_setup_root(ctx); + if (ret) + goto out; + + ret = rdt_enable_ctx(ctx); + if (ret) + goto out_root; + + ret = schemata_list_create(); + if (ret) { + schemata_list_destroy(); + goto out_ctx; + } + + closid_init(); + + if (resctrl_arch_mon_capable()) + flags |= RFTYPE_MON; + + ret = rdtgroup_add_files(rdtgroup_default.kn, flags); + if (ret) + goto out_schemata_free; + + kernfs_activate(rdtgroup_default.kn); + + ret = rdtgroup_create_info_dir(rdtgroup_default.kn); + if (ret < 0) + goto out_schemata_free; + + if (resctrl_arch_mon_capable()) { + ret = mongroup_create_dir(rdtgroup_default.kn, + &rdtgroup_default, "mon_groups", + &kn_mongrp); + if (ret < 0) + goto out_info; + + ret = mkdir_mondata_all(rdtgroup_default.kn, + &rdtgroup_default, &kn_mondata); + if (ret < 0) + goto out_mongrp; + rdtgroup_default.mon.mon_data_kn = kn_mondata; + } + + ret = rdt_pseudo_lock_init(); + if (ret) + goto out_mondata; + + ret = kernfs_get_tree(fc); + if (ret < 0) + goto out_psl; + + if (resctrl_arch_alloc_capable()) + resctrl_arch_enable_alloc(); + if (resctrl_arch_mon_capable()) + resctrl_arch_enable_mon(); + + if (resctrl_arch_alloc_capable() || resctrl_arch_mon_capable()) + resctrl_mounted = true; + + if (resctrl_is_mbm_enabled()) { + list_for_each_entry(dom, &l3->domains, list) + mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL, + RESCTRL_PICK_ANY_CPU); + } + + goto out; + +out_psl: + rdt_pseudo_lock_release(); +out_mondata: + if (resctrl_arch_mon_capable()) + kernfs_remove(kn_mondata); +out_mongrp: + if (resctrl_arch_mon_capable()) + kernfs_remove(kn_mongrp); +out_info: + kernfs_remove(kn_info); +out_schemata_free: + schemata_list_destroy(); +out_ctx: + rdt_disable_ctx(); +out_root: + rdtgroup_destroy_root(); +out: + rdt_last_cmd_clear(); + mutex_unlock(&rdtgroup_mutex); + cpus_read_unlock(); + return ret; +} + +enum rdt_param { + Opt_cdp, + Opt_cdpl2, + Opt_mba_mbps, + Opt_debug, + nr__rdt_params +}; + +static const struct fs_parameter_spec rdt_fs_parameters[] = { + fsparam_flag("cdp", Opt_cdp), + fsparam_flag("cdpl2", Opt_cdpl2), + fsparam_flag("mba_MBps", Opt_mba_mbps), + fsparam_flag("debug", Opt_debug), + {} +}; + +static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param) +{ + struct rdt_fs_context *ctx = rdt_fc2context(fc); + struct fs_parse_result result; + int opt; + + opt = fs_parse(fc, rdt_fs_parameters, param, &result); + if (opt < 0) + return opt; + + switch (opt) { + case Opt_cdp: + ctx->enable_cdpl3 = true; + return 0; + case Opt_cdpl2: + ctx->enable_cdpl2 = true; + return 0; + case Opt_mba_mbps: + if (!supports_mba_mbps()) + return -EINVAL; + ctx->enable_mba_mbps = true; + return 0; + case Opt_debug: + ctx->enable_debug = true; + return 0; + } + + return -EINVAL; +} + +static void rdt_fs_context_free(struct fs_context *fc) +{ + struct rdt_fs_context *ctx = rdt_fc2context(fc); + + kernfs_free_fs_context(fc); + kfree(ctx); +} + +static const struct fs_context_operations rdt_fs_context_ops = { + .free = rdt_fs_context_free, + .parse_param = rdt_parse_param, + .get_tree = rdt_get_tree, +}; + +static int rdt_init_fs_context(struct fs_context *fc) +{ + struct rdt_fs_context *ctx; + + ctx = kzalloc(sizeof(struct rdt_fs_context), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->kfc.magic = RDTGROUP_SUPER_MAGIC; + fc->fs_private = &ctx->kfc; + fc->ops = &rdt_fs_context_ops; + put_user_ns(fc->user_ns); + fc->user_ns = get_user_ns(&init_user_ns); + fc->global = true; + return 0; +} + +static int reset_all_ctrls(struct rdt_resource *r) +{ + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); + struct rdt_hw_domain *hw_dom; + struct msr_param msr_param; + cpumask_var_t cpu_mask; + struct rdt_domain *d; + int i; + + /* Walking r->domains, ensure it can't race with cpuhp */ + lockdep_assert_cpus_held(); + + if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) + return -ENOMEM; + + msr_param.res = r; + msr_param.low = 0; + msr_param.high = hw_res->num_closid; + + /* + * Disable resource control for this resource by setting all + * CBMs in all domains to the maximum mask value. Pick one CPU + * from each domain to update the MSRs below. + */ + list_for_each_entry(d, &r->domains, list) { + hw_dom = resctrl_to_arch_dom(d); + cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask); + + for (i = 0; i < hw_res->num_closid; i++) + hw_dom->ctrl_val[i] = r->default_ctrl; + } + + /* Update CBM on all the CPUs in cpu_mask */ + on_each_cpu_mask(cpu_mask, rdt_ctrl_update, &msr_param, 1); + + free_cpumask_var(cpu_mask); + + return 0; +} + +void resctrl_arch_reset_resources(void) +{ + struct rdt_resource *r; for_each_capable_rdt_resource(r) reset_all_ctrls(r); } + +/* + * Move tasks from one to the other group. If @from is NULL, then all tasks + * in the systems are moved unconditionally (used for teardown). + * + * If @mask is not NULL the cpus on which moved tasks are running are set + * in that mask so the update smp function call is restricted to affected + * cpus. + */ +static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to, + struct cpumask *mask) +{ + struct task_struct *p, *t; + + read_lock(&tasklist_lock); + for_each_process_thread(p, t) { + if (!from || is_closid_match(t, from) || + is_rmid_match(t, from)) { + resctrl_arch_set_closid_rmid(t, to->closid, + to->mon.rmid); + + /* + * Order the closid/rmid stores above before the loads + * in task_curr(). This pairs with the full barrier + * between the rq->curr update and + * resctrl_arch_sched_in() during context switch. + */ + smp_mb(); + + /* + * If the task is on a CPU, set the CPU in the mask. + * The detection is inaccurate as tasks might move or + * schedule before the smp function call takes place. + * In such a case the function call is pointless, but + * there is no other side effect. + */ + if (IS_ENABLED(CONFIG_SMP) && mask && task_curr(t)) + cpumask_set_cpu(task_cpu(t), mask); + } + } + read_unlock(&tasklist_lock); +} + +static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp) +{ + struct rdtgroup *sentry, *stmp; + struct list_head *head; + + head = &rdtgrp->mon.crdtgrp_list; + list_for_each_entry_safe(sentry, stmp, head, mon.crdtgrp_list) { + free_rmid(sentry->closid, sentry->mon.rmid); + list_del(&sentry->mon.crdtgrp_list); + + if (atomic_read(&sentry->waitcount) != 0) + sentry->flags = RDT_DELETED; + else + rdtgroup_remove(sentry); + } +} + +/* + * Forcibly remove all of subdirectories under root. + */ +static void rmdir_all_sub(void) +{ + struct rdtgroup *rdtgrp, *tmp; + + /* Move all tasks to the default resource group */ + rdt_move_group_tasks(NULL, &rdtgroup_default, NULL); + + list_for_each_entry_safe(rdtgrp, tmp, &rdt_all_groups, rdtgroup_list) { + /* Free any child rmids */ + free_all_child_rdtgrp(rdtgrp); + + /* Remove each rdtgroup other than root */ + if (rdtgrp == &rdtgroup_default) + continue; + + if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP || + rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) + rdtgroup_pseudo_lock_remove(rdtgrp); + + /* + * Give any CPUs back to the default group. We cannot copy + * cpu_online_mask because a CPU might have executed the + * offline callback already, but is still marked online. + */ + cpumask_or(&rdtgroup_default.cpu_mask, + &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask); + + free_rmid(rdtgrp->closid, rdtgrp->mon.rmid); + + kernfs_remove(rdtgrp->kn); + list_del(&rdtgrp->rdtgroup_list); + + if (atomic_read(&rdtgrp->waitcount) != 0) + rdtgrp->flags = RDT_DELETED; + else + rdtgroup_remove(rdtgrp); + } + /* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */ + update_closid_rmid(cpu_online_mask, &rdtgroup_default); + + kernfs_remove(kn_info); + kernfs_remove(kn_mongrp); + kernfs_remove(kn_mondata); +} + +static void rdt_kill_sb(struct super_block *sb) +{ + cpus_read_lock(); + mutex_lock(&rdtgroup_mutex); + + rdt_disable_ctx(); + + /* Put everything back to default values. */ + resctrl_arch_reset_resources(); + + rmdir_all_sub(); + rdt_pseudo_lock_release(); + rdtgroup_default.mode = RDT_MODE_SHAREABLE; + schemata_list_destroy(); + rdtgroup_destroy_root(); + if (resctrl_arch_alloc_capable()) + resctrl_arch_disable_alloc(); + if (resctrl_arch_mon_capable()) + resctrl_arch_disable_mon(); + resctrl_mounted = false; + kernfs_kill_sb(sb); + mutex_unlock(&rdtgroup_mutex); + cpus_read_unlock(); +} + +static struct file_system_type rdt_fs_type = { + .name = "resctrl", + .init_fs_context = rdt_init_fs_context, + .parameters = rdt_fs_parameters, + .kill_sb = rdt_kill_sb, +}; + +static int mon_addfile(struct kernfs_node *parent_kn, const char *name, + void *priv) +{ + struct kernfs_node *kn; + int ret = 0; + + kn = __kernfs_create_file(parent_kn, name, 0444, + GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0, + &kf_mondata_ops, priv, NULL, NULL); + if (IS_ERR(kn)) + return PTR_ERR(kn); + + ret = rdtgroup_kn_set_ugid(kn); + if (ret) { + kernfs_remove(kn); + return ret; + } + + return ret; +} + +/* + * Remove all subdirectories of mon_data of ctrl_mon groups + * and monitor groups with given domain id. + */ +static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, + unsigned int dom_id) +{ + struct rdtgroup *prgrp, *crgrp; + char name[32]; + + list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) { + sprintf(name, "mon_%s_%02d", r->name, dom_id); + kernfs_remove_by_name(prgrp->mon.mon_data_kn, name); + + list_for_each_entry(crgrp, &prgrp->mon.crdtgrp_list, mon.crdtgrp_list) + kernfs_remove_by_name(crgrp->mon.mon_data_kn, name); + } +} + +static int mkdir_mondata_subdir(struct kernfs_node *parent_kn, + struct rdt_domain *d, + struct rdt_resource *r, struct rdtgroup *prgrp) +{ + union mon_data_bits priv; + struct kernfs_node *kn; + struct mon_evt *mevt; + struct rmid_read rr; + char name[32]; + int ret; + + sprintf(name, "mon_%s_%02d", r->name, d->id); + /* create the directory */ + kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp); + if (IS_ERR(kn)) + return PTR_ERR(kn); + + ret = rdtgroup_kn_set_ugid(kn); + if (ret) + goto out_destroy; + + if (WARN_ON(list_empty(&r->evt_list))) { + ret = -EPERM; + goto out_destroy; + } + + priv.u.rid = r->rid; + priv.u.domid = d->id; + list_for_each_entry(mevt, &r->evt_list, list) { + priv.u.evtid = mevt->evtid; + ret = mon_addfile(kn, mevt->name, priv.priv); + if (ret) + goto out_destroy; + + if (resctrl_is_mbm_event(mevt->evtid)) + mon_event_read(&rr, r, d, prgrp, mevt->evtid, true); + } + kernfs_activate(kn); + return 0; + +out_destroy: + kernfs_remove(kn); + return ret; +} + +/* + * Add all subdirectories of mon_data for "ctrl_mon" groups + * and "monitor" groups with given domain id. + */ +static void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, + struct rdt_domain *d) +{ + struct kernfs_node *parent_kn; + struct rdtgroup *prgrp, *crgrp; + struct list_head *head; + + list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) { + parent_kn = prgrp->mon.mon_data_kn; + mkdir_mondata_subdir(parent_kn, d, r, prgrp); + + head = &prgrp->mon.crdtgrp_list; + list_for_each_entry(crgrp, head, mon.crdtgrp_list) { + parent_kn = crgrp->mon.mon_data_kn; + mkdir_mondata_subdir(parent_kn, d, r, crgrp); + } + } +} + +static int mkdir_mondata_subdir_alldom(struct kernfs_node *parent_kn, + struct rdt_resource *r, + struct rdtgroup *prgrp) +{ + struct rdt_domain *dom; + int ret; + + /* Walking r->domains, ensure it can't race with cpuhp */ + lockdep_assert_cpus_held(); + + list_for_each_entry(dom, &r->domains, list) { + ret = mkdir_mondata_subdir(parent_kn, dom, r, prgrp); + if (ret) + return ret; + } + + return 0; +} + +/* + * This creates a directory mon_data which contains the monitored data. + * + * mon_data has one directory for each domain which are named + * in the format mon__. For ex: A mon_data + * with L3 domain looks as below: + * ./mon_data: + * mon_L3_00 + * mon_L3_01 + * mon_L3_02 + * ... + * + * Each domain directory has one file per event: + * ./mon_L3_00/: + * llc_occupancy + * + */ +static int mkdir_mondata_all(struct kernfs_node *parent_kn, + struct rdtgroup *prgrp, + struct kernfs_node **dest_kn) +{ + enum resctrl_res_level i; + struct rdt_resource *r; + struct kernfs_node *kn; + int ret; + + /* + * Create the mon_data directory first. + */ + ret = mongroup_create_dir(parent_kn, prgrp, "mon_data", &kn); + if (ret) + return ret; + + if (dest_kn) + *dest_kn = kn; + + /* + * Create the subdirectories for each domain. Note that all events + * in a domain like L3 are grouped into a resource whose domain is L3 + */ + for (i = 0; i < RDT_NUM_RESOURCES; i++) { + r = resctrl_arch_get_resource(i); + if (!r->mon_capable) + continue; + + ret = mkdir_mondata_subdir_alldom(kn, r, prgrp); + if (ret) + goto out_destroy; + } + + return 0; + +out_destroy: + kernfs_remove(kn); + return ret; +} + +/** + * cbm_ensure_valid - Enforce validity on provided CBM + * @_val: Candidate CBM + * @r: RDT resource to which the CBM belongs + * + * The provided CBM represents all cache portions available for use. This + * may be represented by a bitmap that does not consist of contiguous ones + * and thus be an invalid CBM. + * Here the provided CBM is forced to be a valid CBM by only considering + * the first set of contiguous bits as valid and clearing all bits. + * The intention here is to provide a valid default CBM with which a new + * resource group is initialized. The user can follow this with a + * modification to the CBM if the default does not satisfy the + * requirements. + */ +static u32 cbm_ensure_valid(u32 _val, struct rdt_resource *r) +{ + unsigned int cbm_len = r->cache.cbm_len; + unsigned long first_bit, zero_bit; + unsigned long val = _val; + + if (!val) + return 0; + + first_bit = find_first_bit(&val, cbm_len); + zero_bit = find_next_zero_bit(&val, cbm_len, first_bit); + + /* Clear any remaining bits to ensure contiguous region */ + bitmap_clear(&val, zero_bit, cbm_len - zero_bit); + return (u32)val; +} + +/* + * Initialize cache resources per RDT domain + * + * Set the RDT domain up to start off with all usable allocations. That is, + * all shareable and unused bits. All-zero CBM is invalid. + */ +static int __init_one_rdt_domain(struct rdt_domain *d, struct resctrl_schema *s, + u32 closid) +{ + enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type); + enum resctrl_conf_type t = s->conf_type; + struct resctrl_staged_config *cfg; + struct rdt_resource *r = s->res; + u32 used_b = 0, unused_b = 0; + unsigned long tmp_cbm; + enum rdtgrp_mode mode; + u32 peer_ctl, ctrl_val; + int i; + + cfg = &d->staged_config[t]; + cfg->have_new_ctrl = false; + cfg->new_ctrl = r->cache.shareable_bits; + used_b = r->cache.shareable_bits; + for (i = 0; i < closids_supported(); i++) { + if (closid_allocated(i) && i != closid) { + mode = rdtgroup_mode_by_closid(i); + if (mode == RDT_MODE_PSEUDO_LOCKSETUP) + /* + * ctrl values for locksetup aren't relevant + * until the schemata is written, and the mode + * becomes RDT_MODE_PSEUDO_LOCKED. + */ + continue; + /* + * If CDP is active include peer domain's + * usage to ensure there is no overlap + * with an exclusive group. + */ + if (resctrl_arch_get_cdp_enabled(r->rid)) + peer_ctl = resctrl_arch_get_config(r, d, i, + peer_type); + else + peer_ctl = 0; + ctrl_val = resctrl_arch_get_config(r, d, i, + s->conf_type); + used_b |= ctrl_val | peer_ctl; + if (mode == RDT_MODE_SHAREABLE) + cfg->new_ctrl |= ctrl_val | peer_ctl; + } + } + if (d->plr && d->plr->cbm > 0) + used_b |= d->plr->cbm; + unused_b = used_b ^ (BIT_MASK(r->cache.cbm_len) - 1); + unused_b &= BIT_MASK(r->cache.cbm_len) - 1; + cfg->new_ctrl |= unused_b; + /* + * Force the initial CBM to be valid, user can + * modify the CBM based on system availability. + */ + cfg->new_ctrl = cbm_ensure_valid(cfg->new_ctrl, r); + /* + * Assign the u32 CBM to an unsigned long to ensure that + * bitmap_weight() does not access out-of-bound memory. + */ + tmp_cbm = cfg->new_ctrl; + if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) < r->cache.min_cbm_bits) { + rdt_last_cmd_printf("No space on %s:%d\n", s->name, d->id); + return -ENOSPC; + } + cfg->have_new_ctrl = true; + + return 0; +} + +/* + * Initialize cache resources with default values. + * + * A new RDT group is being created on an allocation capable (CAT) + * supporting system. Set this group up to start off with all usable + * allocations. + * + * If there are no more shareable bits available on any domain then + * the entire allocation will fail. + */ +static int rdtgroup_init_cat(struct resctrl_schema *s, u32 closid) +{ + struct rdt_domain *d; + int ret; + + list_for_each_entry(d, &s->res->domains, list) { + ret = __init_one_rdt_domain(d, s, closid); + if (ret < 0) + return ret; + } + + return 0; +} + +/* Initialize MBA resource with default values. */ +static void rdtgroup_init_mba(struct rdt_resource *r, u32 closid) +{ + struct resctrl_staged_config *cfg; + struct rdt_domain *d; + + list_for_each_entry(d, &r->domains, list) { + if (is_mba_sc(r)) { + d->mbps_val[closid] = MBA_MAX_MBPS; + continue; + } + + cfg = &d->staged_config[CDP_NONE]; + cfg->new_ctrl = r->default_ctrl; + cfg->have_new_ctrl = true; + } +} + +/* Initialize the RDT group's allocations. */ +static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp) +{ + struct resctrl_schema *s; + struct rdt_resource *r; + int ret = 0; + + rdt_staged_configs_clear(); + + list_for_each_entry(s, &resctrl_schema_all, list) { + r = s->res; + if (r->rid == RDT_RESOURCE_MBA || + r->rid == RDT_RESOURCE_SMBA) { + rdtgroup_init_mba(r, rdtgrp->closid); + if (is_mba_sc(r)) + continue; + } else { + ret = rdtgroup_init_cat(s, rdtgrp->closid); + if (ret < 0) + goto out; + } + + ret = resctrl_arch_update_domains(r, rdtgrp->closid); + if (ret < 0) { + rdt_last_cmd_puts("Failed to initialize allocations\n"); + goto out; + } + + } + + rdtgrp->mode = RDT_MODE_SHAREABLE; + +out: + rdt_staged_configs_clear(); + return ret; +} + +static int mkdir_rdt_prepare_rmid_alloc(struct rdtgroup *rdtgrp) +{ + int ret; + + if (!resctrl_arch_mon_capable()) + return 0; + + ret = alloc_rmid(rdtgrp->closid); + if (ret < 0) { + rdt_last_cmd_puts("Out of RMIDs\n"); + return ret; + } + rdtgrp->mon.rmid = ret; + + ret = mkdir_mondata_all(rdtgrp->kn, rdtgrp, &rdtgrp->mon.mon_data_kn); + if (ret) { + rdt_last_cmd_puts("kernfs subdir error\n"); + free_rmid(rdtgrp->closid, rdtgrp->mon.rmid); + return ret; + } + + return 0; +} + +static void mkdir_rdt_prepare_rmid_free(struct rdtgroup *rgrp) +{ + if (resctrl_arch_mon_capable()) + free_rmid(rgrp->closid, rgrp->mon.rmid); +} + +static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, + const char *name, umode_t mode, + enum rdt_group_type rtype, struct rdtgroup **r) +{ + struct rdtgroup *prdtgrp, *rdtgrp; + unsigned long files = 0; + struct kernfs_node *kn; + int ret; + + prdtgrp = rdtgroup_kn_lock_live(parent_kn); + if (!prdtgrp) { + ret = -ENODEV; + goto out_unlock; + } + + if (rtype == RDTMON_GROUP && + (prdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP || + prdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)) { + ret = -EINVAL; + rdt_last_cmd_puts("Pseudo-locking in progress\n"); + goto out_unlock; + } + + /* allocate the rdtgroup. */ + rdtgrp = kzalloc(sizeof(*rdtgrp), GFP_KERNEL); + if (!rdtgrp) { + ret = -ENOSPC; + rdt_last_cmd_puts("Kernel out of memory\n"); + goto out_unlock; + } + *r = rdtgrp; + rdtgrp->mon.parent = prdtgrp; + rdtgrp->type = rtype; + INIT_LIST_HEAD(&rdtgrp->mon.crdtgrp_list); + + /* kernfs creates the directory for rdtgrp */ + kn = kernfs_create_dir(parent_kn, name, mode, rdtgrp); + if (IS_ERR(kn)) { + ret = PTR_ERR(kn); + rdt_last_cmd_puts("kernfs create error\n"); + goto out_free_rgrp; + } + rdtgrp->kn = kn; + + /* + * kernfs_remove() will drop the reference count on "kn" which + * will free it. But we still need it to stick around for the + * rdtgroup_kn_unlock(kn) call. Take one extra reference here, + * which will be dropped by kernfs_put() in rdtgroup_remove(). + */ + kernfs_get(kn); + + ret = rdtgroup_kn_set_ugid(kn); + if (ret) { + rdt_last_cmd_puts("kernfs perm error\n"); + goto out_destroy; + } + + if (rtype == RDTCTRL_GROUP) { + files = RFTYPE_BASE | RFTYPE_CTRL; + if (resctrl_arch_mon_capable()) + files |= RFTYPE_MON; + } else { + files = RFTYPE_BASE | RFTYPE_MON; + } + + ret = rdtgroup_add_files(kn, files); + if (ret) { + rdt_last_cmd_puts("kernfs fill error\n"); + goto out_destroy; + } + + /* + * The caller unlocks the parent_kn upon success. + */ + return 0; + +out_destroy: + kernfs_put(rdtgrp->kn); + kernfs_remove(rdtgrp->kn); +out_free_rgrp: + kfree(rdtgrp); +out_unlock: + rdtgroup_kn_unlock(parent_kn); + return ret; +} + +static void mkdir_rdt_prepare_clean(struct rdtgroup *rgrp) +{ + kernfs_remove(rgrp->kn); + rdtgroup_remove(rgrp); +} + +/* + * Create a monitor group under "mon_groups" directory of a control + * and monitor group(ctrl_mon). This is a resource group + * to monitor a subset of tasks and cpus in its parent ctrl_mon group. + */ +static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn, + const char *name, umode_t mode) +{ + struct rdtgroup *rdtgrp, *prgrp; + int ret; + + ret = mkdir_rdt_prepare(parent_kn, name, mode, RDTMON_GROUP, &rdtgrp); + if (ret) + return ret; + + prgrp = rdtgrp->mon.parent; + rdtgrp->closid = prgrp->closid; + + ret = mkdir_rdt_prepare_rmid_alloc(rdtgrp); + if (ret) { + mkdir_rdt_prepare_clean(rdtgrp); + goto out_unlock; + } + + kernfs_activate(rdtgrp->kn); + + /* + * Add the rdtgrp to the list of rdtgrps the parent + * ctrl_mon group has to track. + */ + list_add_tail(&rdtgrp->mon.crdtgrp_list, &prgrp->mon.crdtgrp_list); + +out_unlock: + rdtgroup_kn_unlock(parent_kn); + return ret; +} + +/* + * These are rdtgroups created under the root directory. Can be used + * to allocate and monitor resources. + */ +static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn, + const char *name, umode_t mode) +{ + struct rdtgroup *rdtgrp; + struct kernfs_node *kn; + u32 closid; + int ret; + + ret = mkdir_rdt_prepare(parent_kn, name, mode, RDTCTRL_GROUP, &rdtgrp); + if (ret) + return ret; + + kn = rdtgrp->kn; + ret = closid_alloc(); + if (ret < 0) { + rdt_last_cmd_puts("Out of CLOSIDs\n"); + goto out_common_fail; + } + closid = ret; + ret = 0; + + rdtgrp->closid = closid; + + ret = mkdir_rdt_prepare_rmid_alloc(rdtgrp); + if (ret) + goto out_closid_free; + + kernfs_activate(rdtgrp->kn); + + ret = rdtgroup_init_alloc(rdtgrp); + if (ret < 0) + goto out_rmid_free; + + list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups); + + if (resctrl_arch_mon_capable()) { + /* + * Create an empty mon_groups directory to hold the subset + * of tasks and cpus to monitor. + */ + ret = mongroup_create_dir(kn, rdtgrp, "mon_groups", NULL); + if (ret) { + rdt_last_cmd_puts("kernfs subdir error\n"); + goto out_del_list; + } + } + + goto out_unlock; + +out_del_list: + list_del(&rdtgrp->rdtgroup_list); +out_rmid_free: + mkdir_rdt_prepare_rmid_free(rdtgrp); +out_closid_free: + closid_free(closid); +out_common_fail: + mkdir_rdt_prepare_clean(rdtgrp); +out_unlock: + rdtgroup_kn_unlock(parent_kn); + return ret; +} + +/* + * We allow creating mon groups only with in a directory called "mon_groups" + * which is present in every ctrl_mon group. Check if this is a valid + * "mon_groups" directory. + * + * 1. The directory should be named "mon_groups". + * 2. The mon group itself should "not" be named "mon_groups". + * This makes sure "mon_groups" directory always has a ctrl_mon group + * as parent. + */ +static bool is_mon_groups(struct kernfs_node *kn, const char *name) +{ + return (!strcmp(kn->name, "mon_groups") && + strcmp(name, "mon_groups")); +} + +static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name, + umode_t mode) +{ + /* Do not accept '\n' to avoid unparsable situation. */ + if (strchr(name, '\n')) + return -EINVAL; + + /* + * If the parent directory is the root directory and RDT + * allocation is supported, add a control and monitoring + * subdirectory + */ + if (resctrl_arch_alloc_capable() && parent_kn == rdtgroup_default.kn) + return rdtgroup_mkdir_ctrl_mon(parent_kn, name, mode); + + /* + * If RDT monitoring is supported and the parent directory is a valid + * "mon_groups" directory, add a monitoring subdirectory. + */ + if (resctrl_arch_mon_capable() && is_mon_groups(parent_kn, name)) + return rdtgroup_mkdir_mon(parent_kn, name, mode); + + return -EPERM; +} + +static int rdtgroup_rmdir_mon(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask) +{ + struct rdtgroup *prdtgrp = rdtgrp->mon.parent; + u32 closid, rmid; + int cpu; + + /* Give any tasks back to the parent group */ + rdt_move_group_tasks(rdtgrp, prdtgrp, tmpmask); + + /* Update per cpu rmid of the moved CPUs first */ + closid = rdtgrp->closid; + rmid = prdtgrp->mon.rmid; + for_each_cpu(cpu, &rdtgrp->cpu_mask) + resctrl_arch_set_cpu_default_closid_rmid(cpu, closid, rmid); + + /* + * Update the MSR on moved CPUs and CPUs which have moved + * task running on them. + */ + cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask); + update_closid_rmid(tmpmask, NULL); + + rdtgrp->flags = RDT_DELETED; + free_rmid(rdtgrp->closid, rdtgrp->mon.rmid); + + /* + * Remove the rdtgrp from the parent ctrl_mon group's list + */ + WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list)); + list_del(&rdtgrp->mon.crdtgrp_list); + + kernfs_remove(rdtgrp->kn); + + return 0; +} + +static int rdtgroup_ctrl_remove(struct rdtgroup *rdtgrp) +{ + rdtgrp->flags = RDT_DELETED; + list_del(&rdtgrp->rdtgroup_list); + + kernfs_remove(rdtgrp->kn); + return 0; +} + +static int rdtgroup_rmdir_ctrl(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask) +{ + u32 closid, rmid; + int cpu; + + /* Give any tasks back to the default group */ + rdt_move_group_tasks(rdtgrp, &rdtgroup_default, tmpmask); + + /* Give any CPUs back to the default group */ + cpumask_or(&rdtgroup_default.cpu_mask, + &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask); + + /* Update per cpu closid and rmid of the moved CPUs first */ + closid = rdtgroup_default.closid; + rmid = rdtgroup_default.mon.rmid; + for_each_cpu(cpu, &rdtgrp->cpu_mask) + resctrl_arch_set_cpu_default_closid_rmid(cpu, closid, rmid); + + /* + * Update the MSR on moved CPUs and CPUs which have moved + * task running on them. + */ + cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask); + update_closid_rmid(tmpmask, NULL); + + free_rmid(rdtgrp->closid, rdtgrp->mon.rmid); + closid_free(rdtgrp->closid); + + rdtgroup_ctrl_remove(rdtgrp); + + /* + * Free all the child monitor group rmids. + */ + free_all_child_rdtgrp(rdtgrp); + + return 0; +} + +static int rdtgroup_rmdir(struct kernfs_node *kn) +{ + struct kernfs_node *parent_kn = kn->parent; + struct rdtgroup *rdtgrp; + cpumask_var_t tmpmask; + int ret = 0; + + if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL)) + return -ENOMEM; + + rdtgrp = rdtgroup_kn_lock_live(kn); + if (!rdtgrp) { + ret = -EPERM; + goto out; + } + + /* + * If the rdtgroup is a ctrl_mon group and parent directory + * is the root directory, remove the ctrl_mon group. + * + * If the rdtgroup is a mon group and parent directory + * is a valid "mon_groups" directory, remove the mon group. + */ + if (rdtgrp->type == RDTCTRL_GROUP && parent_kn == rdtgroup_default.kn && + rdtgrp != &rdtgroup_default) { + if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP || + rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) { + ret = rdtgroup_ctrl_remove(rdtgrp); + } else { + ret = rdtgroup_rmdir_ctrl(rdtgrp, tmpmask); + } + } else if (rdtgrp->type == RDTMON_GROUP && + is_mon_groups(parent_kn, kn->name)) { + ret = rdtgroup_rmdir_mon(rdtgrp, tmpmask); + } else { + ret = -EPERM; + } + +out: + rdtgroup_kn_unlock(kn); + free_cpumask_var(tmpmask); + return ret; +} + +/** + * mongrp_reparent() - replace parent CTRL_MON group of a MON group + * @rdtgrp: the MON group whose parent should be replaced + * @new_prdtgrp: replacement parent CTRL_MON group for @rdtgrp + * @cpus: cpumask provided by the caller for use during this call + * + * Replaces the parent CTRL_MON group for a MON group, resulting in all member + * tasks' CLOSID immediately changing to that of the new parent group. + * Monitoring data for the group is unaffected by this operation. + */ +static void mongrp_reparent(struct rdtgroup *rdtgrp, + struct rdtgroup *new_prdtgrp, + cpumask_var_t cpus) +{ + struct rdtgroup *prdtgrp = rdtgrp->mon.parent; + + WARN_ON(rdtgrp->type != RDTMON_GROUP); + WARN_ON(new_prdtgrp->type != RDTCTRL_GROUP); + + /* Nothing to do when simply renaming a MON group. */ + if (prdtgrp == new_prdtgrp) + return; + + WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list)); + list_move_tail(&rdtgrp->mon.crdtgrp_list, + &new_prdtgrp->mon.crdtgrp_list); + + rdtgrp->mon.parent = new_prdtgrp; + rdtgrp->closid = new_prdtgrp->closid; + + /* Propagate updated closid to all tasks in this group. */ + rdt_move_group_tasks(rdtgrp, rdtgrp, cpus); + + update_closid_rmid(cpus, NULL); +} + +static int rdtgroup_rename(struct kernfs_node *kn, + struct kernfs_node *new_parent, const char *new_name) +{ + struct rdtgroup *new_prdtgrp; + struct rdtgroup *rdtgrp; + cpumask_var_t tmpmask; + int ret; + + rdtgrp = kernfs_to_rdtgroup(kn); + new_prdtgrp = kernfs_to_rdtgroup(new_parent); + if (!rdtgrp || !new_prdtgrp) + return -ENOENT; + + /* Release both kernfs active_refs before obtaining rdtgroup mutex. */ + rdtgroup_kn_get(rdtgrp, kn); + rdtgroup_kn_get(new_prdtgrp, new_parent); + + mutex_lock(&rdtgroup_mutex); + + rdt_last_cmd_clear(); + + /* + * Don't allow kernfs_to_rdtgroup() to return a parent rdtgroup if + * either kernfs_node is a file. + */ + if (kernfs_type(kn) != KERNFS_DIR || + kernfs_type(new_parent) != KERNFS_DIR) { + rdt_last_cmd_puts("Source and destination must be directories"); + ret = -EPERM; + goto out; + } + + if ((rdtgrp->flags & RDT_DELETED) || (new_prdtgrp->flags & RDT_DELETED)) { + ret = -ENOENT; + goto out; + } + + if (rdtgrp->type != RDTMON_GROUP || !kn->parent || + !is_mon_groups(kn->parent, kn->name)) { + rdt_last_cmd_puts("Source must be a MON group\n"); + ret = -EPERM; + goto out; + } + + if (!is_mon_groups(new_parent, new_name)) { + rdt_last_cmd_puts("Destination must be a mon_groups subdirectory\n"); + ret = -EPERM; + goto out; + } + + /* + * If the MON group is monitoring CPUs, the CPUs must be assigned to the + * current parent CTRL_MON group and therefore cannot be assigned to + * the new parent, making the move illegal. + */ + if (!cpumask_empty(&rdtgrp->cpu_mask) && + rdtgrp->mon.parent != new_prdtgrp) { + rdt_last_cmd_puts("Cannot move a MON group that monitors CPUs\n"); + ret = -EPERM; + goto out; + } + + /* + * Allocate the cpumask for use in mongrp_reparent() to avoid the + * possibility of failing to allocate it after kernfs_rename() has + * succeeded. + */ + if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL)) { + ret = -ENOMEM; + goto out; + } + + /* + * Perform all input validation and allocations needed to ensure + * mongrp_reparent() will succeed before calling kernfs_rename(), + * otherwise it would be necessary to revert this call if + * mongrp_reparent() failed. + */ + ret = kernfs_rename(kn, new_parent, new_name); + if (!ret) + mongrp_reparent(rdtgrp, new_prdtgrp, tmpmask); + + free_cpumask_var(tmpmask); + +out: + mutex_unlock(&rdtgroup_mutex); + rdtgroup_kn_put(rdtgrp, kn); + rdtgroup_kn_put(new_prdtgrp, new_parent); + return ret; +} + +static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf) +{ + if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3)) + seq_puts(seq, ",cdp"); + + if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2)) + seq_puts(seq, ",cdpl2"); + + if (is_mba_sc(resctrl_arch_get_resource(RDT_RESOURCE_MBA))) + seq_puts(seq, ",mba_MBps"); + + if (resctrl_debug) + seq_puts(seq, ",debug"); + + return 0; +} + +static struct kernfs_syscall_ops rdtgroup_kf_syscall_ops = { + .mkdir = rdtgroup_mkdir, + .rmdir = rdtgroup_rmdir, + .rename = rdtgroup_rename, + .show_options = rdtgroup_show_options, +}; + +static int rdtgroup_setup_root(struct rdt_fs_context *ctx) +{ + rdt_root = kernfs_create_root(&rdtgroup_kf_syscall_ops, + KERNFS_ROOT_CREATE_DEACTIVATED | + KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK, + &rdtgroup_default); + if (IS_ERR(rdt_root)) + return PTR_ERR(rdt_root); + + ctx->kfc.root = rdt_root; + rdtgroup_default.kn = kernfs_root_to_node(rdt_root); + + return 0; +} + +static void rdtgroup_destroy_root(void) +{ + kernfs_destroy_root(rdt_root); + rdtgroup_default.kn = NULL; +} + +static void rdtgroup_setup_default(void) +{ + mutex_lock(&rdtgroup_mutex); + + rdtgroup_default.closid = RESCTRL_RESERVED_CLOSID; + rdtgroup_default.mon.rmid = RESCTRL_RESERVED_RMID; + rdtgroup_default.type = RDTCTRL_GROUP; + INIT_LIST_HEAD(&rdtgroup_default.mon.crdtgrp_list); + + list_add(&rdtgroup_default.rdtgroup_list, &rdt_all_groups); + + mutex_unlock(&rdtgroup_mutex); +} + +static void domain_destroy_mon_state(struct rdt_domain *d) +{ + bitmap_free(d->rmid_busy_llc); + kfree(d->mbm_total); + kfree(d->mbm_local); +} + +void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d) +{ + mutex_lock(&rdtgroup_mutex); + + if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA) + mba_sc_domain_destroy(r, d); + + if (!r->mon_capable) + goto out_unlock; + + /* + * If resctrl is mounted, remove all the + * per domain monitor data directories. + */ + if (resctrl_mounted && resctrl_arch_mon_capable()) + rmdir_mondata_subdir_allrdtgrp(r, d->id); + + if (resctrl_is_mbm_enabled()) + cancel_delayed_work(&d->mbm_over); + if (resctrl_arch_is_llc_occupancy_enabled() && has_busy_rmid(d)) { + /* + * When a package is going down, forcefully + * decrement rmid->ebusy. There is no way to know + * that the L3 was flushed and hence may lead to + * incorrect counts in rare scenarios, but leaving + * the RMID as busy creates RMID leaks if the + * package never comes back. + */ + __check_limbo(d, true); + cancel_delayed_work(&d->cqm_limbo); + } + + domain_destroy_mon_state(d); + +out_unlock: + mutex_unlock(&rdtgroup_mutex); +} + +static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d) +{ + u32 idx_limit = resctrl_arch_system_num_rmid_idx(); + size_t tsize; + + if (resctrl_arch_is_llc_occupancy_enabled()) { + d->rmid_busy_llc = bitmap_zalloc(idx_limit, GFP_KERNEL); + if (!d->rmid_busy_llc) + return -ENOMEM; + } + if (resctrl_arch_is_mbm_total_enabled()) { + tsize = sizeof(*d->mbm_total); + d->mbm_total = kcalloc(idx_limit, tsize, GFP_KERNEL); + if (!d->mbm_total) { + bitmap_free(d->rmid_busy_llc); + return -ENOMEM; + } + } + if (resctrl_arch_is_mbm_local_enabled()) { + tsize = sizeof(*d->mbm_local); + d->mbm_local = kcalloc(idx_limit, tsize, GFP_KERNEL); + if (!d->mbm_local) { + bitmap_free(d->rmid_busy_llc); + kfree(d->mbm_total); + return -ENOMEM; + } + } + + return 0; +} + +int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d) +{ + int err = 0; + + mutex_lock(&rdtgroup_mutex); + + if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA) { + /* RDT_RESOURCE_MBA is never mon_capable */ + err = mba_sc_domain_allocate(r, d); + goto out_unlock; + } + + if (!r->mon_capable) + goto out_unlock; + + err = domain_setup_mon_state(r, d); + if (err) + goto out_unlock; + + if (resctrl_is_mbm_enabled()) { + INIT_DELAYED_WORK(&d->mbm_over, mbm_handle_overflow); + mbm_setup_overflow_handler(d, MBM_OVERFLOW_INTERVAL, + RESCTRL_PICK_ANY_CPU); + } + + if (resctrl_arch_is_llc_occupancy_enabled()) + INIT_DELAYED_WORK(&d->cqm_limbo, cqm_handle_limbo); + + /* + * If the filesystem is not mounted then only the default resource group + * exists. Creation of its directories is deferred until mount time + * by rdt_get_tree() calling mkdir_mondata_all(). + * If resctrl is mounted, add per domain monitor data directories. + */ + if (resctrl_mounted && resctrl_arch_mon_capable()) + mkdir_mondata_subdir_allrdtgrp(r, d); + +out_unlock: + mutex_unlock(&rdtgroup_mutex); + + return err; +} + +void resctrl_online_cpu(unsigned int cpu) +{ + mutex_lock(&rdtgroup_mutex); + /* The CPU is set in default rdtgroup after online. */ + cpumask_set_cpu(cpu, &rdtgroup_default.cpu_mask); + mutex_unlock(&rdtgroup_mutex); +} + +static void clear_childcpus(struct rdtgroup *r, unsigned int cpu) +{ + struct rdtgroup *cr; + + list_for_each_entry(cr, &r->mon.crdtgrp_list, mon.crdtgrp_list) { + if (cpumask_test_and_clear_cpu(cpu, &cr->cpu_mask)) + break; + } +} + +void resctrl_offline_cpu(unsigned int cpu) +{ + struct rdt_resource *l3 = resctrl_arch_get_resource(RDT_RESOURCE_L3); + struct rdtgroup *rdtgrp; + struct rdt_domain *d; + + mutex_lock(&rdtgroup_mutex); + list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) { + if (cpumask_test_and_clear_cpu(cpu, &rdtgrp->cpu_mask)) { + clear_childcpus(rdtgrp, cpu); + break; + } + } + + if (!l3->mon_capable) + goto out_unlock; + + d = resctrl_get_domain_from_cpu(cpu, l3); + if (d) { + if (resctrl_is_mbm_enabled() && cpu == d->mbm_work_cpu) { + cancel_delayed_work(&d->mbm_over); + mbm_setup_overflow_handler(d, 0, cpu); + } + if (resctrl_arch_is_llc_occupancy_enabled() && + cpu == d->cqm_work_cpu && has_busy_rmid(d)) { + cancel_delayed_work(&d->cqm_limbo); + cqm_setup_limbo_handler(d, 0, cpu); + } + } + +out_unlock: + mutex_unlock(&rdtgroup_mutex); +} + +/* + * resctrl_init - resctrl filesystem initialization + * + * Setup resctrl file system including set up root, create mount point, + * register resctrl filesystem, and initialize files under root directory. + * + * Return: 0 on success or -errno + */ +int resctrl_init(void) +{ + int ret = 0; + + seq_buf_init(&last_cmd_status, last_cmd_status_buf, + sizeof(last_cmd_status_buf)); + + rdtgroup_setup_default(); + + thread_throttle_mode_init(); + + ret = resctrl_mon_resource_init(); + if (ret) + return ret; + + ret = sysfs_create_mount_point(fs_kobj, "resctrl"); + if (ret) + return ret; + + ret = register_filesystem(&rdt_fs_type); + if (ret) + goto cleanup_mountpoint; + + /* + * Adding the resctrl debugfs directory here may not be ideal since + * it would let the resctrl debugfs directory appear on the debugfs + * filesystem before the resctrl filesystem is mounted. + * It may also be ok since that would enable debugging of RDT before + * resctrl is mounted. + * The reason why the debugfs directory is created here and not in + * rdt_get_tree() is because rdt_get_tree() takes rdtgroup_mutex and + * during the debugfs directory creation also &sb->s_type->i_mutex_key + * (the lockdep class of inode->i_rwsem). Other filesystem + * interactions (eg. SyS_getdents) have the lock ordering: + * &sb->s_type->i_mutex_key --> &mm->mmap_lock + * During mmap(), called with &mm->mmap_lock, the rdtgroup_mutex + * is taken, thus creating dependency: + * &mm->mmap_lock --> rdtgroup_mutex for the latter that can cause + * issues considering the other two lock dependencies. + * By creating the debugfs directory here we avoid a dependency + * that may cause deadlock (even though file operations cannot + * occur until the filesystem is mounted, but I do not know how to + * tell lockdep that). + */ + debugfs_resctrl = debugfs_create_dir("resctrl", NULL); + + return 0; + +cleanup_mountpoint: + sysfs_remove_mount_point(fs_kobj, "resctrl"); + + return ret; +} + +void resctrl_exit(void) +{ + debugfs_remove_recursive(debugfs_resctrl); + unregister_filesystem(&rdt_fs_type); + sysfs_remove_mount_point(fs_kobj, "resctrl"); + + resctrl_mon_resource_exit(); +} diff --git a/fs/resctrl/ctrlmondata.c b/fs/resctrl/ctrlmondata.c index f5cdabe2ee9e..e69de29bb2d1 100644 --- a/fs/resctrl/ctrlmondata.c +++ b/fs/resctrl/ctrlmondata.c @@ -1,532 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Resource Director Technology(RDT) - * - Cache Allocation code. - * - * Copyright (C) 2016 Intel Corporation - * - * Authors: - * Fenghua Yu - * Tony Luck - * - * More information about RDT be found in the Intel (R) x86 Architecture - * Software Developer Manual June 2016, volume 3, section 17.17. - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include -#include -#include -#include -#include "internal.h" - -struct rdt_parse_data { - struct rdtgroup *rdtgrp; - char *buf; -}; - -typedef int(ctrlval_parser_t)(struct rdt_parse_data *data, - struct resctrl_schema *s, struct rdt_domain *d); - -/* - * Check whether MBA bandwidth percentage value is correct. The value is - * checked against the minimum and max bandwidth values specified by the - * hardware. The allocated bandwidth percentage is rounded to the next - * control step available on the hardware. - */ -static bool bw_validate(char *buf, u32 *data, struct rdt_resource *r) -{ - int ret; - u32 bw; - - /* - * Only linear delay values is supported for current Intel SKUs. - */ - if (!r->membw.delay_linear && r->membw.arch_needs_linear) { - rdt_last_cmd_puts("No support for non-linear MB domains\n"); - return false; - } - - ret = kstrtou32(buf, 10, &bw); - if (ret) { - rdt_last_cmd_printf("Invalid MB value %s\n", buf); - return false; - } - - /* Nothing else to do if software controller is enabled. */ - if (is_mba_sc(r)) { - *data = bw; - return true; - } - - if (bw < r->membw.min_bw || bw > r->default_ctrl) { - rdt_last_cmd_printf("MB value %u out of range [%d,%d]\n", bw, - r->membw.min_bw, r->default_ctrl); - return false; - } - - *data = roundup(bw, (unsigned long)r->membw.bw_gran); - return true; -} - -static int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s, - struct rdt_domain *d) -{ - struct resctrl_staged_config *cfg; - u32 closid = data->rdtgrp->closid; - struct rdt_resource *r = s->res; - u32 bw_val; - - cfg = &d->staged_config[s->conf_type]; - if (cfg->have_new_ctrl) { - rdt_last_cmd_printf("Duplicate domain %d\n", d->id); - return -EINVAL; - } - - if (!bw_validate(data->buf, &bw_val, r)) - return -EINVAL; - - if (is_mba_sc(r)) { - d->mbps_val[closid] = bw_val; - return 0; - } - - cfg->new_ctrl = bw_val; - cfg->have_new_ctrl = true; - - return 0; -} - -/* - * Check whether a cache bit mask is valid. - * On Intel CPUs, non-contiguous 1s value support is indicated by CPUID: - * - CPUID.0x10.1:ECX[3]: L3 non-contiguous 1s value supported if 1 - * - CPUID.0x10.2:ECX[3]: L2 non-contiguous 1s value supported if 1 - * - * Haswell does not support a non-contiguous 1s value and additionally - * requires at least two bits set. - * AMD allows non-contiguous bitmasks. - */ -static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r) -{ - unsigned long first_bit, zero_bit, val; - unsigned int cbm_len = r->cache.cbm_len; - int ret; - - ret = kstrtoul(buf, 16, &val); - if (ret) { - rdt_last_cmd_printf("Non-hex character in the mask %s\n", buf); - return false; - } - - if ((r->cache.min_cbm_bits > 0 && val == 0) || val > r->default_ctrl) { - rdt_last_cmd_puts("Mask out of range\n"); - return false; - } - - first_bit = find_first_bit(&val, cbm_len); - zero_bit = find_next_zero_bit(&val, cbm_len, first_bit); - - /* Are non-contiguous bitmasks allowed? */ - if (!r->cache.arch_has_sparse_bitmasks && - (find_next_bit(&val, cbm_len, zero_bit) < cbm_len)) { - rdt_last_cmd_printf("The mask %lx has non-consecutive 1-bits\n", - val); - return false; - } - - if ((zero_bit - first_bit) < r->cache.min_cbm_bits) { - rdt_last_cmd_printf("Need at least %d bits in the mask\n", - r->cache.min_cbm_bits); - return false; - } - - *data = val; - return true; -} - -/* - * Read one cache bit mask (hex). Check that it is valid for the current - * resource type. - */ -static int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s, - struct rdt_domain *d) -{ - struct rdtgroup *rdtgrp = data->rdtgrp; - struct resctrl_staged_config *cfg; - struct rdt_resource *r = s->res; - u32 cbm_val; - - cfg = &d->staged_config[s->conf_type]; - if (cfg->have_new_ctrl) { - rdt_last_cmd_printf("Duplicate domain %d\n", d->id); - return -EINVAL; - } - - /* - * Cannot set up more than one pseudo-locked region in a cache - * hierarchy. - */ - if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP && - rdtgroup_pseudo_locked_in_hierarchy(d)) { - rdt_last_cmd_puts("Pseudo-locked region in hierarchy\n"); - return -EINVAL; - } - - if (!cbm_validate(data->buf, &cbm_val, r)) - return -EINVAL; - - if ((rdtgrp->mode == RDT_MODE_EXCLUSIVE || - rdtgrp->mode == RDT_MODE_SHAREABLE) && - rdtgroup_cbm_overlaps_pseudo_locked(d, cbm_val)) { - rdt_last_cmd_puts("CBM overlaps with pseudo-locked region\n"); - return -EINVAL; - } - - /* - * The CBM may not overlap with the CBM of another closid if - * either is exclusive. - */ - if (rdtgroup_cbm_overlaps(s, d, cbm_val, rdtgrp->closid, true)) { - rdt_last_cmd_puts("Overlaps with exclusive group\n"); - return -EINVAL; - } - - if (rdtgroup_cbm_overlaps(s, d, cbm_val, rdtgrp->closid, false)) { - if (rdtgrp->mode == RDT_MODE_EXCLUSIVE || - rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { - rdt_last_cmd_puts("Overlaps with other group\n"); - return -EINVAL; - } - } - - cfg->new_ctrl = cbm_val; - cfg->have_new_ctrl = true; - - return 0; -} - -static ctrlval_parser_t *get_parser(struct rdt_resource *res) -{ - if (res->fflags & RFTYPE_RES_CACHE) - return &parse_cbm; - else - return &parse_bw; -} - -/* - * For each domain in this resource we expect to find a series of: - * id=mask - * separated by ";". The "id" is in decimal, and must match one of - * the "id"s for this resource. - */ -static int parse_line(char *line, struct resctrl_schema *s, - struct rdtgroup *rdtgrp) -{ - ctrlval_parser_t *parse_ctrlval = get_parser(s->res); - enum resctrl_conf_type t = s->conf_type; - struct resctrl_staged_config *cfg; - struct rdt_resource *r = s->res; - struct rdt_parse_data data; - char *dom = NULL, *id; - struct rdt_domain *d; - unsigned long dom_id; - - /* Walking r->domains, ensure it can't race with cpuhp */ - lockdep_assert_cpus_held(); - - if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP && - (r->rid == RDT_RESOURCE_MBA || r->rid == RDT_RESOURCE_SMBA)) { - rdt_last_cmd_puts("Cannot pseudo-lock MBA resource\n"); - return -EINVAL; - } - -next: - if (!line || line[0] == '\0') - return 0; - dom = strsep(&line, ";"); - id = strsep(&dom, "="); - if (!dom || kstrtoul(id, 10, &dom_id)) { - rdt_last_cmd_puts("Missing '=' or non-numeric domain\n"); - return -EINVAL; - } - dom = strim(dom); - list_for_each_entry(d, &r->domains, list) { - if (d->id == dom_id) { - data.buf = dom; - data.rdtgrp = rdtgrp; - if (parse_ctrlval(&data, s, d)) - return -EINVAL; - if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { - cfg = &d->staged_config[t]; - /* - * In pseudo-locking setup mode and just - * parsed a valid CBM that should be - * pseudo-locked. Only one locked region per - * resource group and domain so just do - * the required initialization for single - * region and return. - */ - rdtgrp->plr->s = s; - rdtgrp->plr->d = d; - rdtgrp->plr->cbm = cfg->new_ctrl; - d->plr = rdtgrp->plr; - return 0; - } - goto next; - } - } - return -EINVAL; -} - -static int rdtgroup_parse_resource(char *resname, char *tok, - struct rdtgroup *rdtgrp) -{ - struct resctrl_schema *s; - - list_for_each_entry(s, &resctrl_schema_all, list) { - if (!strcmp(resname, s->name) && rdtgrp->closid < s->num_closid) - return parse_line(tok, s, rdtgrp); - } - rdt_last_cmd_printf("Unknown or unsupported resource name '%s'\n", resname); - return -EINVAL; -} - -ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of, - char *buf, size_t nbytes, loff_t off) -{ - struct resctrl_schema *s; - struct rdtgroup *rdtgrp; - struct rdt_resource *r; - char *tok, *resname; - int ret = 0; - - /* Valid input requires a trailing newline */ - if (nbytes == 0 || buf[nbytes - 1] != '\n') - return -EINVAL; - buf[nbytes - 1] = '\0'; - - rdtgrp = rdtgroup_kn_lock_live(of->kn); - if (!rdtgrp) { - rdtgroup_kn_unlock(of->kn); - return -ENOENT; - } - rdt_last_cmd_clear(); - - /* - * No changes to pseudo-locked region allowed. It has to be removed - * and re-created instead. - */ - if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) { - ret = -EINVAL; - rdt_last_cmd_puts("Resource group is pseudo-locked\n"); - goto out; - } - - rdt_staged_configs_clear(); - - while ((tok = strsep(&buf, "\n")) != NULL) { - resname = strim(strsep(&tok, ":")); - if (!tok) { - rdt_last_cmd_puts("Missing ':'\n"); - ret = -EINVAL; - goto out; - } - if (tok[0] == '\0') { - rdt_last_cmd_printf("Missing '%s' value\n", resname); - ret = -EINVAL; - goto out; - } - ret = rdtgroup_parse_resource(resname, tok, rdtgrp); - if (ret) - goto out; - } - - list_for_each_entry(s, &resctrl_schema_all, list) { - r = s->res; - - /* - * Writes to mba_sc resources update the software controller, - * not the control MSR. - */ - if (is_mba_sc(r)) - continue; - - ret = resctrl_arch_update_domains(r, rdtgrp->closid); - if (ret) - goto out; - } - - if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { - /* - * If pseudo-locking fails we keep the resource group in - * mode RDT_MODE_PSEUDO_LOCKSETUP with its class of service - * active and updated for just the domain the pseudo-locked - * region was requested for. - */ - ret = rdtgroup_pseudo_lock_create(rdtgrp); - } - -out: - rdt_staged_configs_clear(); - rdtgroup_kn_unlock(of->kn); - return ret ?: nbytes; -} - -static void show_doms(struct seq_file *s, struct resctrl_schema *schema, int closid) -{ - struct rdt_resource *r = schema->res; - struct rdt_domain *dom; - bool sep = false; - u32 ctrl_val; - - /* Walking r->domains, ensure it can't race with cpuhp */ - lockdep_assert_cpus_held(); - - seq_printf(s, "%*s:", max_name_width, schema->name); - list_for_each_entry(dom, &r->domains, list) { - if (sep) - seq_puts(s, ";"); - - if (is_mba_sc(r)) - ctrl_val = dom->mbps_val[closid]; - else - ctrl_val = resctrl_arch_get_config(r, dom, closid, - schema->conf_type); - - seq_printf(s, r->format_str, dom->id, max_data_width, - ctrl_val); - sep = true; - } - seq_puts(s, "\n"); -} - -int rdtgroup_schemata_show(struct kernfs_open_file *of, - struct seq_file *s, void *v) -{ - struct resctrl_schema *schema; - struct rdtgroup *rdtgrp; - int ret = 0; - u32 closid; - - rdtgrp = rdtgroup_kn_lock_live(of->kn); - if (rdtgrp) { - if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { - list_for_each_entry(schema, &resctrl_schema_all, list) { - seq_printf(s, "%s:uninitialized\n", schema->name); - } - } else if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) { - if (!rdtgrp->plr->d) { - rdt_last_cmd_clear(); - rdt_last_cmd_puts("Cache domain offline\n"); - ret = -ENODEV; - } else { - seq_printf(s, "%s:%d=%x\n", - rdtgrp->plr->s->res->name, - rdtgrp->plr->d->id, - rdtgrp->plr->cbm); - } - } else { - closid = rdtgrp->closid; - list_for_each_entry(schema, &resctrl_schema_all, list) { - if (closid < schema->num_closid) - show_doms(s, schema, closid); - } - } - } else { - ret = -ENOENT; - } - rdtgroup_kn_unlock(of->kn); - return ret; -} - -static int smp_mon_event_count(void *arg) -{ - mon_event_count(arg); - - return 0; -} - -void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, - struct rdt_domain *d, struct rdtgroup *rdtgrp, - int evtid, int first) -{ - int cpu; - - /* When picking a CPU from cpu_mask, ensure it can't race with cpuhp */ - lockdep_assert_cpus_held(); - - /* - * Setup the parameters to pass to mon_event_count() to read the data. - */ - rr->rgrp = rdtgrp; - rr->evtid = evtid; - rr->r = r; - rr->d = d; - rr->val = 0; - rr->first = first; - rr->arch_mon_ctx = resctrl_arch_mon_ctx_alloc(r, evtid); - if (IS_ERR(rr->arch_mon_ctx)) { - rr->err = -EINVAL; - return; - } - - cpu = cpumask_any_housekeeping(&d->cpu_mask, RESCTRL_PICK_ANY_CPU); - - /* - * cpumask_any_housekeeping() prefers housekeeping CPUs, but - * are all the CPUs nohz_full? If yes, pick a CPU to IPI. - * MPAM's resctrl_arch_rmid_read() is unable to read the - * counters on some platforms if its called in IRQ context. - */ - if (tick_nohz_full_cpu(cpu)) - smp_call_function_any(&d->cpu_mask, mon_event_count, rr, 1); - else - smp_call_on_cpu(cpu, smp_mon_event_count, rr, false); - - resctrl_arch_mon_ctx_free(r, evtid, rr->arch_mon_ctx); -} - -int rdtgroup_mondata_show(struct seq_file *m, void *arg) -{ - struct kernfs_open_file *of = m->private; - u32 resid, evtid, domid; - struct rdtgroup *rdtgrp; - struct rdt_resource *r; - union mon_data_bits md; - struct rdt_domain *d; - struct rmid_read rr; - int ret = 0; - - rdtgrp = rdtgroup_kn_lock_live(of->kn); - if (!rdtgrp) { - ret = -ENOENT; - goto out; - } - - md.priv = of->kn->priv; - resid = md.u.rid; - domid = md.u.domid; - evtid = md.u.evtid; - - r = resctrl_arch_get_resource(resid); - d = resctrl_arch_find_domain(r, domid); - if (IS_ERR_OR_NULL(d)) { - ret = -ENOENT; - goto out; - } - - mon_event_read(&rr, r, d, rdtgrp, evtid, false); - - if (rr.err == -EIO) - seq_puts(m, "Error\n"); - else if (rr.err == -EINVAL) - seq_puts(m, "Unavailable\n"); - else - seq_printf(m, "%llu\n", rr.val); - -out: - rdtgroup_kn_unlock(of->kn); - return ret; -} diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h index f73267762a87..e69de29bb2d1 100644 --- a/fs/resctrl/internal.h +++ b/fs/resctrl/internal.h @@ -1,340 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _FS_RESCTRL_INTERNAL_H -#define _FS_RESCTRL_INTERNAL_H - -#include -#include -#include -#include -#include -#include - -#include - -/** - * cpumask_any_housekeeping() - Choose any CPU in @mask, preferring those that - * aren't marked nohz_full - * @mask: The mask to pick a CPU from. - * @exclude_cpu:The CPU to avoid picking. - * - * Returns a CPU from @mask, but not @exclude_cpu. If there are housekeeping - * CPUs that don't use nohz_full, these are preferred. Pass - * RESCTRL_PICK_ANY_CPU to avoid excluding any CPUs. - * - * When a CPU is excluded, returns >= nr_cpu_ids if no CPUs are available. - */ -static inline unsigned int -cpumask_any_housekeeping(const struct cpumask *mask, int exclude_cpu) -{ - unsigned int cpu, hk_cpu; - - if (exclude_cpu == RESCTRL_PICK_ANY_CPU) - cpu = cpumask_any(mask); - else - cpu = cpumask_any_but(mask, exclude_cpu); - - if (!IS_ENABLED(CONFIG_NO_HZ_FULL)) - return cpu; - - /* If the CPU picked isn't marked nohz_full nothing more needs doing. */ - if (cpu < nr_cpu_ids && !tick_nohz_full_cpu(cpu)) - return cpu; - - /* Try to find a CPU that isn't nohz_full to use in preference */ - hk_cpu = cpumask_nth_andnot(0, mask, tick_nohz_full_mask); - if (hk_cpu == exclude_cpu) - hk_cpu = cpumask_nth_andnot(1, mask, tick_nohz_full_mask); - - if (hk_cpu < nr_cpu_ids) - cpu = hk_cpu; - - return cpu; -} - -struct rdt_fs_context { - struct kernfs_fs_context kfc; - bool enable_cdpl2; - bool enable_cdpl3; - bool enable_mba_mbps; - bool enable_debug; -}; - -static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc) -{ - struct kernfs_fs_context *kfc = fc->fs_private; - - return container_of(kfc, struct rdt_fs_context, kfc); -} - -/** - * struct mon_evt - Entry in the event list of a resource - * @evtid: event id - * @name: name of the event - * @configurable: true if the event is configurable - * @list: entry in &rdt_resource->evt_list - */ -struct mon_evt { - enum resctrl_event_id evtid; - char *name; - bool configurable; - struct list_head list; -}; - -/** - * union mon_data_bits - Monitoring details for each event file - * @priv: Used to store monitoring event data in @u - * as kernfs private data - * @rid: Resource id associated with the event file - * @evtid: Event id associated with the event file - * @domid: The domain to which the event file belongs - * @u: Name of the bit fields struct - */ -union mon_data_bits { - void *priv; - struct { - unsigned int rid : 10; - enum resctrl_event_id evtid : 8; - unsigned int domid : 14; - } u; -}; - -struct rmid_read { - struct rdtgroup *rgrp; - struct rdt_resource *r; - struct rdt_domain *d; - enum resctrl_event_id evtid; - bool first; - int err; - u64 val; - void *arch_mon_ctx; -}; - -extern struct list_head resctrl_schema_all; -extern bool resctrl_mounted; - -enum rdt_group_type { - RDTCTRL_GROUP = 0, - RDTMON_GROUP, - RDT_NUM_GROUP, -}; - -/** - * enum rdtgrp_mode - Mode of a RDT resource group - * @RDT_MODE_SHAREABLE: This resource group allows sharing of its allocations - * @RDT_MODE_EXCLUSIVE: No sharing of this resource group's allocations allowed - * @RDT_MODE_PSEUDO_LOCKSETUP: Resource group will be used for Pseudo-Locking - * @RDT_MODE_PSEUDO_LOCKED: No sharing of this resource group's allocations - * allowed AND the allocations are Cache Pseudo-Locked - * @RDT_NUM_MODES: Total number of modes - * - * The mode of a resource group enables control over the allowed overlap - * between allocations associated with different resource groups (classes - * of service). User is able to modify the mode of a resource group by - * writing to the "mode" resctrl file associated with the resource group. - * - * The "shareable", "exclusive", and "pseudo-locksetup" modes are set by - * writing the appropriate text to the "mode" file. A resource group enters - * "pseudo-locked" mode after the schemata is written while the resource - * group is in "pseudo-locksetup" mode. - */ -enum rdtgrp_mode { - RDT_MODE_SHAREABLE = 0, - RDT_MODE_EXCLUSIVE, - RDT_MODE_PSEUDO_LOCKSETUP, - RDT_MODE_PSEUDO_LOCKED, - - /* Must be last */ - RDT_NUM_MODES, -}; - -/** - * struct mongroup - store mon group's data in resctrl fs. - * @mon_data_kn: kernfs node for the mon_data directory - * @parent: parent rdtgrp - * @crdtgrp_list: child rdtgroup node list - * @rmid: rmid for this rdtgroup - */ -struct mongroup { - struct kernfs_node *mon_data_kn; - struct rdtgroup *parent; - struct list_head crdtgrp_list; - u32 rmid; -}; - -/** - * struct rdtgroup - store rdtgroup's data in resctrl file system. - * @kn: kernfs node - * @rdtgroup_list: linked list for all rdtgroups - * @closid: closid for this rdtgroup - * @cpu_mask: CPUs assigned to this rdtgroup - * @flags: status bits - * @waitcount: how many cpus expect to find this - * group when they acquire rdtgroup_mutex - * @type: indicates type of this rdtgroup - either - * monitor only or ctrl_mon group - * @mon: mongroup related data - * @mode: mode of resource group - * @plr: pseudo-locked region - */ -struct rdtgroup { - struct kernfs_node *kn; - struct list_head rdtgroup_list; - u32 closid; - struct cpumask cpu_mask; - int flags; - atomic_t waitcount; - enum rdt_group_type type; - struct mongroup mon; - enum rdtgrp_mode mode; - struct pseudo_lock_region *plr; -}; - -/* List of all resource groups */ -extern struct list_head rdt_all_groups; - -extern int max_name_width, max_data_width; - -/** - * struct rftype - describe each file in the resctrl file system - * @name: File name - * @mode: Access mode - * @kf_ops: File operations - * @flags: File specific RFTYPE_FLAGS_* flags - * @fflags: File specific RFTYPE_* flags - * @seq_show: Show content of the file - * @write: Write to the file - */ -struct rftype { - char *name; - umode_t mode; - const struct kernfs_ops *kf_ops; - unsigned long flags; - unsigned long fflags; - - int (*seq_show)(struct kernfs_open_file *of, - struct seq_file *sf, void *v); - /* - * write() is the generic write callback which maps directly to - * kernfs write operation and overrides all other operations. - * Maximum write size is determined by ->max_write_len. - */ - ssize_t (*write)(struct kernfs_open_file *of, - char *buf, size_t nbytes, loff_t off); -}; - -/** - * struct mbm_state - status for each MBM counter in each domain - * @prev_bw_bytes: Previous bytes value read for bandwidth calculation - * @prev_bw: The most recent bandwidth in MBps - */ -struct mbm_state { - u64 prev_bw_bytes; - u32 prev_bw; -}; - -static inline bool is_mba_sc(struct rdt_resource *r) -{ - if (!r) - r = resctrl_arch_get_resource(RDT_RESOURCE_MBA); - - /* - * The software controller support is only applicable to MBA resource. - * Make sure to check for resource type. - */ - if (r->rid != RDT_RESOURCE_MBA) - return false; - - return r->membw.mba_sc; -} - -extern struct mutex rdtgroup_mutex; -extern struct rdtgroup rdtgroup_default; -extern struct dentry *debugfs_resctrl; - -void rdt_last_cmd_clear(void); -void rdt_last_cmd_puts(const char *s); -__printf(1, 2) -void rdt_last_cmd_printf(const char *fmt, ...); - -struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn); -void rdtgroup_kn_unlock(struct kernfs_node *kn); -int rdtgroup_kn_mode_restrict(struct rdtgroup *r, const char *name); -int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name, - umode_t mask); -ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of, - char *buf, size_t nbytes, loff_t off); -int rdtgroup_schemata_show(struct kernfs_open_file *of, - struct seq_file *s, void *v); -bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_domain *d, - unsigned long cbm, int closid, bool exclusive); -unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, struct rdt_domain *d, - unsigned long cbm); -enum rdtgrp_mode rdtgroup_mode_by_closid(int closid); -int rdtgroup_tasks_assigned(struct rdtgroup *r); -int closids_supported(void); -void closid_free(int closid); -int alloc_rmid(u32 closid); -void free_rmid(u32 closid, u32 rmid); -void resctrl_mon_resource_exit(void); -void mon_event_count(void *info); -int rdtgroup_mondata_show(struct seq_file *m, void *arg); -void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, - struct rdt_domain *d, struct rdtgroup *rdtgrp, - int evtid, int first); -int resctrl_mon_resource_init(void); -void mbm_setup_overflow_handler(struct rdt_domain *dom, - unsigned long delay_ms, - int exclude_cpu); -void mbm_handle_overflow(struct work_struct *work); -bool is_mba_sc(struct rdt_resource *r); -void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms, - int exclude_cpu); -void cqm_handle_limbo(struct work_struct *work); -bool has_busy_rmid(struct rdt_domain *d); -void __check_limbo(struct rdt_domain *d, bool force_free); -void mbm_config_rftype_init(const char *config); -void rdt_staged_configs_clear(void); -bool closid_allocated(unsigned int closid); -int resctrl_find_cleanest_closid(void); - -#ifdef CONFIG_RESCTRL_FS_PSEUDO_LOCK -int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp); -int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp); -bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm); -bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d); -int rdt_pseudo_lock_init(void); -void rdt_pseudo_lock_release(void); -int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp); -void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp); -#else -static inline int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp) -{ - return -EOPNOTSUPP; -} - -static inline int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp) -{ - return -EOPNOTSUPP; -} - -static inline bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm) -{ - return false; -} - -static inline bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d) -{ - return false; -} - -static inline int rdt_pseudo_lock_init(void) { return 0; } -static inline void rdt_pseudo_lock_release(void) { } -static inline int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp) -{ - return -EOPNOTSUPP; -} - -static inline void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp) { } -#endif /* CONFIG_RESCTRL_FS_PSEUDO_LOCK */ - -#endif /* _FS_RESCTRL_INTERNAL_H */ diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c index 06f660dfd929..e69de29bb2d1 100644 --- a/fs/resctrl/monitor.c +++ b/fs/resctrl/monitor.c @@ -1,843 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Resource Director Technology(RDT) - * - Monitoring code - * - * Copyright (C) 2017 Intel Corporation - * - * Author: - * Vikas Shivappa - * - * This replaces the cqm.c based on perf but we reuse a lot of - * code and datastructures originally from Peter Zijlstra and Matt Fleming. - * - * More information about RDT be found in the Intel (R) x86 Architecture - * Software Developer Manual June 2016, volume 3, section 17.17. - */ - -#include -#include -#include -#include -#include "internal.h" - -/* - * struct rmid_entry - dirty tracking for all RMID. - * @closid: The CLOSID for this entry. - * @rmid: The RMID for this entry. - * @busy: The number of domains with cached data using this RMID. - * @list: Member of the rmid_free_lru list when busy == 0. - * - * Depending on the architecture the correct monitor is accessed using - * both @closid and @rmid, or @rmid only. - * - * Take the rdtgroup_mutex when accessing. - */ -struct rmid_entry { - u32 closid; - u32 rmid; - int busy; - struct list_head list; -}; - -/* - * @rmid_free_lru - A least recently used list of free RMIDs - * These RMIDs are guaranteed to have an occupancy less than the - * threshold occupancy - */ -static LIST_HEAD(rmid_free_lru); - -/* - * @closid_num_dirty_rmid The number of dirty RMID each CLOSID has. - * Only allocated when CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID is defined. - * Indexed by CLOSID. Protected by rdtgroup_mutex. - */ -static u32 *closid_num_dirty_rmid; - -/* - * @rmid_limbo_count - count of currently unused but (potentially) - * dirty RMIDs. - * This counts RMIDs that no one is currently using but that - * may have a occupancy value > resctrl_rmid_realloc_threshold. User can - * change the threshold occupancy value. - */ -static unsigned int rmid_limbo_count; - -/* - * @rmid_entry - The entry in the limbo and free lists. - */ -static struct rmid_entry *rmid_ptrs; - -/* - * This is the threshold cache occupancy in bytes at which we will consider an - * RMID available for re-allocation. - */ -unsigned int resctrl_rmid_realloc_threshold; - -/* - * This is the maximum value for the reallocation threshold, in bytes. - */ -unsigned int resctrl_rmid_realloc_limit; - -/* - * x86 and arm64 differ in their handling of monitoring. - * x86's RMID are independent numbers, there is only one source of traffic - * with an RMID value of '1'. - * arm64's PMG extends the PARTID/CLOSID space, there are multiple sources of - * traffic with a PMG value of '1', one for each CLOSID, meaning the RMID - * value is no longer unique. - * To account for this, resctrl uses an index. On x86 this is just the RMID, - * on arm64 it encodes the CLOSID and RMID. This gives a unique number. - * - * The domain's rmid_busy_llc and rmid_ptrs[] are sized by index. The arch code - * must accept an attempt to read every index. - */ -static inline struct rmid_entry *__rmid_entry(u32 idx) -{ - struct rmid_entry *entry; - u32 closid, rmid; - - entry = &rmid_ptrs[idx]; - resctrl_arch_rmid_idx_decode(idx, &closid, &rmid); - - WARN_ON_ONCE(entry->closid != closid); - WARN_ON_ONCE(entry->rmid != rmid); - - return entry; -} - -static void limbo_release_entry(struct rmid_entry *entry) -{ - lockdep_assert_held(&rdtgroup_mutex); - - rmid_limbo_count--; - list_add_tail(&entry->list, &rmid_free_lru); - - if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) - closid_num_dirty_rmid[entry->closid]--; -} - -/* - * Check the RMIDs that are marked as busy for this domain. If the - * reported LLC occupancy is below the threshold clear the busy bit and - * decrement the count. If the busy count gets to zero on an RMID, we - * free the RMID - */ -void __check_limbo(struct rdt_domain *d, bool force_free) -{ - struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); - u32 idx_limit = resctrl_arch_system_num_rmid_idx(); - struct rmid_entry *entry; - u32 idx, cur_idx = 1; - void *arch_mon_ctx; - bool rmid_dirty; - u64 val = 0; - - arch_mon_ctx = resctrl_arch_mon_ctx_alloc(r, QOS_L3_OCCUP_EVENT_ID); - if (IS_ERR(arch_mon_ctx)) { - pr_warn_ratelimited("Failed to allocate monitor context: %ld", - PTR_ERR(arch_mon_ctx)); - return; - } - - /* - * Skip RMID 0 and start from RMID 1 and check all the RMIDs that - * are marked as busy for occupancy < threshold. If the occupancy - * is less than the threshold decrement the busy counter of the - * RMID and move it to the free list when the counter reaches 0. - */ - for (;;) { - idx = find_next_bit(d->rmid_busy_llc, idx_limit, cur_idx); - if (idx >= idx_limit) - break; - - entry = __rmid_entry(idx); - if (resctrl_arch_rmid_read(r, d, entry->closid, entry->rmid, - QOS_L3_OCCUP_EVENT_ID, &val, - arch_mon_ctx)) { - rmid_dirty = true; - } else { - rmid_dirty = (val >= resctrl_rmid_realloc_threshold); - } - - if (force_free || !rmid_dirty) { - clear_bit(idx, d->rmid_busy_llc); - if (!--entry->busy) - limbo_release_entry(entry); - } - cur_idx = idx + 1; - } - - resctrl_arch_mon_ctx_free(r, QOS_L3_OCCUP_EVENT_ID, arch_mon_ctx); -} - -bool has_busy_rmid(struct rdt_domain *d) -{ - u32 idx_limit = resctrl_arch_system_num_rmid_idx(); - - return find_first_bit(d->rmid_busy_llc, idx_limit) != idx_limit; -} - -static struct rmid_entry *resctrl_find_free_rmid(u32 closid) -{ - struct rmid_entry *itr; - u32 itr_idx, cmp_idx; - - if (list_empty(&rmid_free_lru)) - return rmid_limbo_count ? ERR_PTR(-EBUSY) : ERR_PTR(-ENOSPC); - - list_for_each_entry(itr, &rmid_free_lru, list) { - /* - * Get the index of this free RMID, and the index it would need - * to be if it were used with this CLOSID. - * If the CLOSID is irrelevant on this architecture, the two - * index values are always the same on every entry and thus the - * very first entry will be returned. - */ - itr_idx = resctrl_arch_rmid_idx_encode(itr->closid, itr->rmid); - cmp_idx = resctrl_arch_rmid_idx_encode(closid, itr->rmid); - - if (itr_idx == cmp_idx) - return itr; - } - - return ERR_PTR(-ENOSPC); -} - -/** - * resctrl_find_cleanest_closid() - Find a CLOSID where all the associated - * RMID are clean, or the CLOSID that has - * the most clean RMID. - * - * MPAM's equivalent of RMID are per-CLOSID, meaning a freshly allocated CLOSID - * may not be able to allocate clean RMID. To avoid this the allocator will - * choose the CLOSID with the most clean RMID. - * - * When the CLOSID and RMID are independent numbers, the first free CLOSID will - * be returned. - */ -int resctrl_find_cleanest_closid(void) -{ - u32 cleanest_closid = ~0; - int i = 0; - - lockdep_assert_held(&rdtgroup_mutex); - - if (!IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) - return -EIO; - - for (i = 0; i < closids_supported(); i++) { - int num_dirty; - - if (closid_allocated(i)) - continue; - - num_dirty = closid_num_dirty_rmid[i]; - if (num_dirty == 0) - return i; - - if (cleanest_closid == ~0) - cleanest_closid = i; - - if (num_dirty < closid_num_dirty_rmid[cleanest_closid]) - cleanest_closid = i; - } - - if (cleanest_closid == ~0) - return -ENOSPC; - - return cleanest_closid; -} - -/* - * For MPAM the RMID value is not unique, and has to be considered with - * the CLOSID. The (CLOSID, RMID) pair is allocated on all domains, which - * allows all domains to be managed by a single free list. - * Each domain also has a rmid_busy_llc to reduce the work of the limbo handler. - */ -int alloc_rmid(u32 closid) -{ - struct rmid_entry *entry; - - lockdep_assert_held(&rdtgroup_mutex); - - entry = resctrl_find_free_rmid(closid); - if (IS_ERR(entry)) - return PTR_ERR(entry); - - list_del(&entry->list); - return entry->rmid; -} - -static void add_rmid_to_limbo(struct rmid_entry *entry) -{ - struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); - struct rdt_domain *d; - u32 idx; - - lockdep_assert_held(&rdtgroup_mutex); - - /* Walking r->domains, ensure it can't race with cpuhp */ - lockdep_assert_cpus_held(); - - idx = resctrl_arch_rmid_idx_encode(entry->closid, entry->rmid); - - entry->busy = 0; - list_for_each_entry(d, &r->domains, list) { - /* - * For the first limbo RMID in the domain, - * setup up the limbo worker. - */ - if (!has_busy_rmid(d)) - cqm_setup_limbo_handler(d, CQM_LIMBOCHECK_INTERVAL, - RESCTRL_PICK_ANY_CPU); - set_bit(idx, d->rmid_busy_llc); - entry->busy++; - } - - rmid_limbo_count++; - if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) - closid_num_dirty_rmid[entry->closid]++; -} - -void free_rmid(u32 closid, u32 rmid) -{ - u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid); - struct rmid_entry *entry; - - lockdep_assert_held(&rdtgroup_mutex); - - /* - * Do not allow the default rmid to be free'd. Comparing by index - * allows architectures that ignore the closid parameter to avoid an - * unnecessary check. - */ - if (idx == resctrl_arch_rmid_idx_encode(RESCTRL_RESERVED_CLOSID, - RESCTRL_RESERVED_RMID)) - return; - - entry = __rmid_entry(idx); - - if (resctrl_arch_is_llc_occupancy_enabled()) - add_rmid_to_limbo(entry); - else - list_add_tail(&entry->list, &rmid_free_lru); -} - -static struct mbm_state *get_mbm_state(struct rdt_domain *d, u32 closid, - u32 rmid, enum resctrl_event_id evtid) -{ - u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid); - - switch (evtid) { - case QOS_L3_MBM_TOTAL_EVENT_ID: - return &d->mbm_total[idx]; - case QOS_L3_MBM_LOCAL_EVENT_ID: - return &d->mbm_local[idx]; - default: - return NULL; - } -} - -static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr) -{ - struct mbm_state *m; - u64 tval = 0; - - if (rr->first) { - resctrl_arch_reset_rmid(rr->r, rr->d, closid, rmid, rr->evtid); - m = get_mbm_state(rr->d, closid, rmid, rr->evtid); - if (m) - memset(m, 0, sizeof(struct mbm_state)); - return 0; - } - - rr->err = resctrl_arch_rmid_read(rr->r, rr->d, closid, rmid, rr->evtid, - &tval, rr->arch_mon_ctx); - if (rr->err) - return rr->err; - - rr->val += tval; - - return 0; -} - -/* - * mbm_bw_count() - Update bw count from values previously read by - * __mon_event_count(). - * @closid: The closid used to identify the cached mbm_state. - * @rmid: The rmid used to identify the cached mbm_state. - * @rr: The struct rmid_read populated by __mon_event_count(). - * - * Supporting function to calculate the memory bandwidth - * and delta bandwidth in MBps. The chunks value previously read by - * __mon_event_count() is compared with the chunks value from the previous - * invocation. This must be called once per second to maintain values in MBps. - */ -static void mbm_bw_count(u32 closid, u32 rmid, struct rmid_read *rr) -{ - u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid); - struct mbm_state *m = &rr->d->mbm_local[idx]; - u64 cur_bw, bytes, cur_bytes; - - cur_bytes = rr->val; - bytes = cur_bytes - m->prev_bw_bytes; - m->prev_bw_bytes = cur_bytes; - - cur_bw = bytes / SZ_1M; - - m->prev_bw = cur_bw; -} - -/* - * This is scheduled by mon_event_read() to read the CQM/MBM counters - * on a domain. - */ -void mon_event_count(void *info) -{ - struct rdtgroup *rdtgrp, *entry; - struct rmid_read *rr = info; - struct list_head *head; - int ret; - - rdtgrp = rr->rgrp; - - ret = __mon_event_count(rdtgrp->closid, rdtgrp->mon.rmid, rr); - - /* - * For Ctrl groups read data from child monitor groups and - * add them together. Count events which are read successfully. - * Discard the rmid_read's reporting errors. - */ - head = &rdtgrp->mon.crdtgrp_list; - - if (rdtgrp->type == RDTCTRL_GROUP) { - list_for_each_entry(entry, head, mon.crdtgrp_list) { - if (__mon_event_count(entry->closid, entry->mon.rmid, - rr) == 0) - ret = 0; - } - } - - /* - * __mon_event_count() calls for newly created monitor groups may - * report -EINVAL/Unavailable if the monitor hasn't seen any traffic. - * Discard error if any of the monitor event reads succeeded. - */ - if (ret == 0) - rr->err = 0; -} - -/* - * Feedback loop for MBA software controller (mba_sc) - * - * mba_sc is a feedback loop where we periodically read MBM counters and - * adjust the bandwidth percentage values via the IA32_MBA_THRTL_MSRs so - * that: - * - * current bandwidth(cur_bw) < user specified bandwidth(user_bw) - * - * This uses the MBM counters to measure the bandwidth and MBA throttle - * MSRs to control the bandwidth for a particular rdtgrp. It builds on the - * fact that resctrl rdtgroups have both monitoring and control. - * - * The frequency of the checks is 1s and we just tag along the MBM overflow - * timer. Having 1s interval makes the calculation of bandwidth simpler. - * - * Although MBA's goal is to restrict the bandwidth to a maximum, there may - * be a need to increase the bandwidth to avoid unnecessarily restricting - * the L2 <-> L3 traffic. - * - * Since MBA controls the L2 external bandwidth where as MBM measures the - * L3 external bandwidth the following sequence could lead to such a - * situation. - * - * Consider an rdtgroup which had high L3 <-> memory traffic in initial - * phases -> mba_sc kicks in and reduced bandwidth percentage values -> but - * after some time rdtgroup has mostly L2 <-> L3 traffic. - * - * In this case we may restrict the rdtgroup's L2 <-> L3 traffic as its - * throttle MSRs already have low percentage values. To avoid - * unnecessarily restricting such rdtgroups, we also increase the bandwidth. - */ -static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm) -{ - u32 closid, rmid, cur_msr_val, new_msr_val; - struct mbm_state *pmbm_data, *cmbm_data; - struct rdt_resource *r_mba; - struct rdt_domain *dom_mba; - u32 cur_bw, user_bw, idx; - struct list_head *head; - struct rdtgroup *entry; - - if (!resctrl_arch_is_mbm_local_enabled()) - return; - - r_mba = resctrl_arch_get_resource(RDT_RESOURCE_MBA); - - closid = rgrp->closid; - rmid = rgrp->mon.rmid; - idx = resctrl_arch_rmid_idx_encode(closid, rmid); - pmbm_data = &dom_mbm->mbm_local[idx]; - - dom_mba = resctrl_get_domain_from_cpu(smp_processor_id(), r_mba); - if (!dom_mba) { - pr_warn_once("Failure to get domain for MBA update\n"); - return; - } - - cur_bw = pmbm_data->prev_bw; - user_bw = dom_mba->mbps_val[closid]; - - /* MBA resource doesn't support CDP */ - cur_msr_val = resctrl_arch_get_config(r_mba, dom_mba, closid, CDP_NONE); - - /* - * For Ctrl groups read data from child monitor groups. - */ - head = &rgrp->mon.crdtgrp_list; - list_for_each_entry(entry, head, mon.crdtgrp_list) { - cmbm_data = &dom_mbm->mbm_local[entry->mon.rmid]; - cur_bw += cmbm_data->prev_bw; - } - - /* - * Scale up/down the bandwidth linearly for the ctrl group. The - * bandwidth step is the bandwidth granularity specified by the - * hardware. - * Always increase throttling if current bandwidth is above the - * target set by user. - * But avoid thrashing up and down on every poll by checking - * whether a decrease in throttling is likely to push the group - * back over target. E.g. if currently throttling to 30% of bandwidth - * on a system with 10% granularity steps, check whether moving to - * 40% would go past the limit by multiplying current bandwidth by - * "(30 + 10) / 30". - */ - if (cur_msr_val > r_mba->membw.min_bw && user_bw < cur_bw) { - new_msr_val = cur_msr_val - r_mba->membw.bw_gran; - } else if (cur_msr_val < MAX_MBA_BW && - (user_bw > (cur_bw * (cur_msr_val + r_mba->membw.min_bw) / cur_msr_val))) { - new_msr_val = cur_msr_val + r_mba->membw.bw_gran; - } else { - return; - } - - resctrl_arch_update_one(r_mba, dom_mba, closid, CDP_NONE, new_msr_val); -} - -static void mbm_update(struct rdt_resource *r, struct rdt_domain *d, - u32 closid, u32 rmid) -{ - struct rmid_read rr; - - rr.first = false; - rr.r = r; - rr.d = d; - - /* - * This is protected from concurrent reads from user - * as both the user and we hold the global mutex. - */ - if (resctrl_arch_is_mbm_total_enabled()) { - rr.evtid = QOS_L3_MBM_TOTAL_EVENT_ID; - rr.val = 0; - rr.arch_mon_ctx = resctrl_arch_mon_ctx_alloc(rr.r, rr.evtid); - if (IS_ERR(rr.arch_mon_ctx)) { - pr_warn_ratelimited("Failed to allocate monitor context: %ld", - PTR_ERR(rr.arch_mon_ctx)); - return; - } - - __mon_event_count(closid, rmid, &rr); - - resctrl_arch_mon_ctx_free(rr.r, rr.evtid, rr.arch_mon_ctx); - } - if (resctrl_arch_is_mbm_local_enabled()) { - rr.evtid = QOS_L3_MBM_LOCAL_EVENT_ID; - rr.val = 0; - rr.arch_mon_ctx = resctrl_arch_mon_ctx_alloc(rr.r, rr.evtid); - if (IS_ERR(rr.arch_mon_ctx)) { - pr_warn_ratelimited("Failed to allocate monitor context: %ld", - PTR_ERR(rr.arch_mon_ctx)); - return; - } - - __mon_event_count(closid, rmid, &rr); - - /* - * Call the MBA software controller only for the - * control groups and when user has enabled - * the software controller explicitly. - */ - if (is_mba_sc(NULL)) - mbm_bw_count(closid, rmid, &rr); - - resctrl_arch_mon_ctx_free(rr.r, rr.evtid, rr.arch_mon_ctx); - } -} - -/* - * Handler to scan the limbo list and move the RMIDs - * to free list whose occupancy < threshold_occupancy. - */ -void cqm_handle_limbo(struct work_struct *work) -{ - unsigned long delay = msecs_to_jiffies(CQM_LIMBOCHECK_INTERVAL); - struct rdt_domain *d; - - cpus_read_lock(); - mutex_lock(&rdtgroup_mutex); - - d = container_of(work, struct rdt_domain, cqm_limbo.work); - - __check_limbo(d, false); - - if (has_busy_rmid(d)) { - d->cqm_work_cpu = cpumask_any_housekeeping(&d->cpu_mask, - RESCTRL_PICK_ANY_CPU); - schedule_delayed_work_on(d->cqm_work_cpu, &d->cqm_limbo, - delay); - } - - mutex_unlock(&rdtgroup_mutex); - cpus_read_unlock(); -} - -/** - * cqm_setup_limbo_handler() - Schedule the limbo handler to run for this - * domain. - * @dom: The domain the limbo handler should run for. - * @delay_ms: How far in the future the handler should run. - * @exclude_cpu: Which CPU the handler should not run on, - * RESCTRL_PICK_ANY_CPU to pick any CPU. - */ -void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms, - int exclude_cpu) -{ - unsigned long delay = msecs_to_jiffies(delay_ms); - int cpu; - - cpu = cpumask_any_housekeeping(&dom->cpu_mask, exclude_cpu); - dom->cqm_work_cpu = cpu; - - if (cpu < nr_cpu_ids) - schedule_delayed_work_on(cpu, &dom->cqm_limbo, delay); -} - -void mbm_handle_overflow(struct work_struct *work) -{ - unsigned long delay = msecs_to_jiffies(MBM_OVERFLOW_INTERVAL); - struct rdtgroup *prgrp, *crgrp; - struct list_head *head; - struct rdt_resource *r; - struct rdt_domain *d; - - cpus_read_lock(); - mutex_lock(&rdtgroup_mutex); - - /* - * If the filesystem has been unmounted this work no longer needs to - * run. - */ - if (!resctrl_mounted || !resctrl_arch_mon_capable()) - goto out_unlock; - - r = resctrl_arch_get_resource(RDT_RESOURCE_L3); - d = container_of(work, struct rdt_domain, mbm_over.work); - - list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) { - mbm_update(r, d, prgrp->closid, prgrp->mon.rmid); - - head = &prgrp->mon.crdtgrp_list; - list_for_each_entry(crgrp, head, mon.crdtgrp_list) - mbm_update(r, d, crgrp->closid, crgrp->mon.rmid); - - if (is_mba_sc(NULL)) - update_mba_bw(prgrp, d); - } - - /* - * Re-check for housekeeping CPUs. This allows the overflow handler to - * move off a nohz_full CPU quickly. - */ - d->mbm_work_cpu = cpumask_any_housekeeping(&d->cpu_mask, - RESCTRL_PICK_ANY_CPU); - schedule_delayed_work_on(d->mbm_work_cpu, &d->mbm_over, delay); - -out_unlock: - mutex_unlock(&rdtgroup_mutex); - cpus_read_unlock(); -} - -/** - * mbm_setup_overflow_handler() - Schedule the overflow handler to run for this - * domain. - * @dom: The domain the overflow handler should run for. - * @delay_ms: How far in the future the handler should run. - * @exclude_cpu: Which CPU the handler should not run on, - * RESCTRL_PICK_ANY_CPU to pick any CPU. - */ -void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms, - int exclude_cpu) -{ - unsigned long delay = msecs_to_jiffies(delay_ms); - int cpu; - - /* - * When a domain comes online there is no guarantee the filesystem is - * mounted. If not, there is no need to catch counter overflow. - */ - if (!resctrl_mounted || !resctrl_arch_mon_capable()) - return; - cpu = cpumask_any_housekeeping(&dom->cpu_mask, exclude_cpu); - dom->mbm_work_cpu = cpu; - - if (cpu < nr_cpu_ids) - schedule_delayed_work_on(cpu, &dom->mbm_over, delay); -} - -static int dom_data_init(struct rdt_resource *r) -{ - u32 idx_limit = resctrl_arch_system_num_rmid_idx(); - u32 num_closid = resctrl_arch_get_num_closid(r); - struct rmid_entry *entry = NULL; - int err = 0, i; - u32 idx; - - mutex_lock(&rdtgroup_mutex); - if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) { - u32 *tmp; - - /* - * If the architecture hasn't provided a sanitised value here, - * this may result in larger arrays than necessary. Resctrl will - * use a smaller system wide value based on the resources in - * use. - */ - tmp = kcalloc(num_closid, sizeof(*tmp), GFP_KERNEL); - if (!tmp) { - err = -ENOMEM; - goto out_unlock; - } - - closid_num_dirty_rmid = tmp; - } - - rmid_ptrs = kcalloc(idx_limit, sizeof(struct rmid_entry), GFP_KERNEL); - if (!rmid_ptrs) { - if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) { - kfree(closid_num_dirty_rmid); - closid_num_dirty_rmid = NULL; - } - err = -ENOMEM; - goto out_unlock; - } - - for (i = 0; i < idx_limit; i++) { - entry = &rmid_ptrs[i]; - INIT_LIST_HEAD(&entry->list); - - resctrl_arch_rmid_idx_decode(i, &entry->closid, &entry->rmid); - list_add_tail(&entry->list, &rmid_free_lru); - } - - /* - * RESCTRL_RESERVED_CLOSID and RESCTRL_RESERVED_RMID are special and - * are always allocated. These are used for the rdtgroup_default - * control group, which will be setup later in rdtgroup_init(). - */ - idx = resctrl_arch_rmid_idx_encode(RESCTRL_RESERVED_CLOSID, - RESCTRL_RESERVED_RMID); - entry = __rmid_entry(idx); - list_del(&entry->list); - -out_unlock: - mutex_unlock(&rdtgroup_mutex); - - return err; -} - -static void dom_data_exit(struct rdt_resource *r) -{ - if (!r->mon_capable) - return; - - mutex_lock(&rdtgroup_mutex); - if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) { - kfree(closid_num_dirty_rmid); - closid_num_dirty_rmid = NULL; - } - - kfree(rmid_ptrs); - rmid_ptrs = NULL; - - mutex_unlock(&rdtgroup_mutex); -} - -static struct mon_evt llc_occupancy_event = { - .name = "llc_occupancy", - .evtid = QOS_L3_OCCUP_EVENT_ID, -}; - -static struct mon_evt mbm_total_event = { - .name = "mbm_total_bytes", - .evtid = QOS_L3_MBM_TOTAL_EVENT_ID, -}; - -static struct mon_evt mbm_local_event = { - .name = "mbm_local_bytes", - .evtid = QOS_L3_MBM_LOCAL_EVENT_ID, -}; - -/* - * Initialize the event list for the resource. - * - * Note that MBM events are also part of RDT_RESOURCE_L3 resource - * because as per the SDM the total and local memory bandwidth - * are enumerated as part of L3 monitoring. - */ -static void l3_mon_evt_init(struct rdt_resource *r) -{ - INIT_LIST_HEAD(&r->evt_list); - - if (resctrl_arch_is_llc_occupancy_enabled()) - list_add_tail(&llc_occupancy_event.list, &r->evt_list); - if (resctrl_arch_is_mbm_total_enabled()) - list_add_tail(&mbm_total_event.list, &r->evt_list); - if (resctrl_arch_is_mbm_local_enabled()) - list_add_tail(&mbm_local_event.list, &r->evt_list); -} - -int resctrl_mon_resource_init(void) -{ - struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); - int ret; - - if (!r->mon_capable) - return 0; - - ret = dom_data_init(r); - if (ret) - return ret; - - l3_mon_evt_init(r); - - if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_TOTAL_EVENT_ID)) { - mbm_total_event.configurable = true; - mbm_config_rftype_init("mbm_total_bytes_config"); - } - if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_LOCAL_EVENT_ID)) { - mbm_local_event.configurable = true; - mbm_config_rftype_init("mbm_local_bytes_config"); - } - - return 0; -} - -void resctrl_mon_resource_exit(void) -{ - struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); - - dom_data_exit(r); -} diff --git a/fs/resctrl/psuedo_lock.c b/fs/resctrl/psuedo_lock.c index 077c2abb6edd..e69de29bb2d1 100644 --- a/fs/resctrl/psuedo_lock.c +++ b/fs/resctrl/psuedo_lock.c @@ -1,1122 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Resource Director Technology (RDT) - * - * Pseudo-locking support built on top of Cache Allocation Technology (CAT) - * - * Copyright (C) 2018 Intel Corporation - * - * Author: Reinette Chatre - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "internal.h" - -/* - * Major number assigned to and shared by all devices exposing - * pseudo-locked regions. - */ -static unsigned int pseudo_lock_major; -static unsigned long pseudo_lock_minor_avail = GENMASK(MINORBITS, 0); - -static char *pseudo_lock_devnode(const struct device *dev, umode_t *mode) -{ - const struct rdtgroup *rdtgrp; - - rdtgrp = dev_get_drvdata(dev); - if (mode) - *mode = 0600; - return kasprintf(GFP_KERNEL, "pseudo_lock/%s", rdtgrp->kn->name); -} - -static const struct class pseudo_lock_class = { - .name = "pseudo_lock", - .devnode = pseudo_lock_devnode, -}; - -/** - * pseudo_lock_minor_get - Obtain available minor number - * @minor: Pointer to where new minor number will be stored - * - * A bitmask is used to track available minor numbers. Here the next free - * minor number is marked as unavailable and returned. - * - * Return: 0 on success, <0 on failure. - */ -static int pseudo_lock_minor_get(unsigned int *minor) -{ - unsigned long first_bit; - - first_bit = find_first_bit(&pseudo_lock_minor_avail, MINORBITS); - - if (first_bit == MINORBITS) - return -ENOSPC; - - __clear_bit(first_bit, &pseudo_lock_minor_avail); - *minor = first_bit; - - return 0; -} - -/** - * pseudo_lock_minor_release - Return minor number to available - * @minor: The minor number made available - */ -static void pseudo_lock_minor_release(unsigned int minor) -{ - __set_bit(minor, &pseudo_lock_minor_avail); -} - -/** - * region_find_by_minor - Locate a pseudo-lock region by inode minor number - * @minor: The minor number of the device representing pseudo-locked region - * - * When the character device is accessed we need to determine which - * pseudo-locked region it belongs to. This is done by matching the minor - * number of the device to the pseudo-locked region it belongs. - * - * Minor numbers are assigned at the time a pseudo-locked region is associated - * with a cache instance. - * - * Return: On success return pointer to resource group owning the pseudo-locked - * region, NULL on failure. - */ -static struct rdtgroup *region_find_by_minor(unsigned int minor) -{ - struct rdtgroup *rdtgrp, *rdtgrp_match = NULL; - - list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) { - if (rdtgrp->plr && rdtgrp->plr->minor == minor) { - rdtgrp_match = rdtgrp; - break; - } - } - return rdtgrp_match; -} - -/** - * struct pseudo_lock_pm_req - A power management QoS request list entry - * @list: Entry within the @pm_reqs list for a pseudo-locked region - * @req: PM QoS request - */ -struct pseudo_lock_pm_req { - struct list_head list; - struct dev_pm_qos_request req; -}; - -static void pseudo_lock_cstates_relax(struct pseudo_lock_region *plr) -{ - struct pseudo_lock_pm_req *pm_req, *next; - - list_for_each_entry_safe(pm_req, next, &plr->pm_reqs, list) { - dev_pm_qos_remove_request(&pm_req->req); - list_del(&pm_req->list); - kfree(pm_req); - } -} - -/** - * pseudo_lock_cstates_constrain - Restrict cores from entering C6 - * @plr: Pseudo-locked region - * - * To prevent the cache from being affected by power management entering - * C6 has to be avoided. This is accomplished by requesting a latency - * requirement lower than lowest C6 exit latency of all supported - * platforms as found in the cpuidle state tables in the intel_idle driver. - * At this time it is possible to do so with a single latency requirement - * for all supported platforms. - * - * Since Goldmont is supported, which is affected by X86_BUG_MONITOR, - * the ACPI latencies need to be considered while keeping in mind that C2 - * may be set to map to deeper sleep states. In this case the latency - * requirement needs to prevent entering C2 also. - * - * Return: 0 on success, <0 on failure - */ -static int pseudo_lock_cstates_constrain(struct pseudo_lock_region *plr) -{ - struct pseudo_lock_pm_req *pm_req; - int cpu; - int ret; - - for_each_cpu(cpu, &plr->d->cpu_mask) { - pm_req = kzalloc(sizeof(*pm_req), GFP_KERNEL); - if (!pm_req) { - rdt_last_cmd_puts("Failure to allocate memory for PM QoS\n"); - ret = -ENOMEM; - goto out_err; - } - ret = dev_pm_qos_add_request(get_cpu_device(cpu), - &pm_req->req, - DEV_PM_QOS_RESUME_LATENCY, - 30); - if (ret < 0) { - rdt_last_cmd_printf("Failed to add latency req CPU%d\n", - cpu); - kfree(pm_req); - ret = -1; - goto out_err; - } - list_add(&pm_req->list, &plr->pm_reqs); - } - - return 0; - -out_err: - pseudo_lock_cstates_relax(plr); - return ret; -} - -/** - * pseudo_lock_region_clear - Reset pseudo-lock region data - * @plr: pseudo-lock region - * - * All content of the pseudo-locked region is reset - any memory allocated - * freed. - * - * Return: void - */ -static void pseudo_lock_region_clear(struct pseudo_lock_region *plr) -{ - plr->size = 0; - plr->line_size = 0; - kfree(plr->kmem); - plr->kmem = NULL; - plr->s = NULL; - if (plr->d) - plr->d->plr = NULL; - plr->d = NULL; - plr->cbm = 0; - plr->debugfs_dir = NULL; -} - -/** - * pseudo_lock_region_init - Initialize pseudo-lock region information - * @plr: pseudo-lock region - * - * Called after user provided a schemata to be pseudo-locked. From the - * schemata the &struct pseudo_lock_region is on entry already initialized - * with the resource, domain, and capacity bitmask. Here the information - * required for pseudo-locking is deduced from this data and &struct - * pseudo_lock_region initialized further. This information includes: - * - size in bytes of the region to be pseudo-locked - * - cache line size to know the stride with which data needs to be accessed - * to be pseudo-locked - * - a cpu associated with the cache instance on which the pseudo-locking - * flow can be executed - * - * Return: 0 on success, <0 on failure. Descriptive error will be written - * to last_cmd_status buffer. - */ -static int pseudo_lock_region_init(struct pseudo_lock_region *plr) -{ - struct cpu_cacheinfo *ci; - int ret; - int i; - - /* Pick the first cpu we find that is associated with the cache. */ - plr->cpu = cpumask_first(&plr->d->cpu_mask); - - if (!cpu_online(plr->cpu)) { - rdt_last_cmd_printf("CPU %u associated with cache not online\n", - plr->cpu); - ret = -ENODEV; - goto out_region; - } - - ci = get_cpu_cacheinfo(plr->cpu); - - plr->size = rdtgroup_cbm_to_size(plr->s->res, plr->d, plr->cbm); - - for (i = 0; i < ci->num_leaves; i++) { - if (ci->info_list[i].level == plr->s->res->cache_level) { - plr->line_size = ci->info_list[i].coherency_line_size; - return 0; - } - } - - ret = -1; - rdt_last_cmd_puts("Unable to determine cache line size\n"); -out_region: - pseudo_lock_region_clear(plr); - return ret; -} - -/** - * pseudo_lock_init - Initialize a pseudo-lock region - * @rdtgrp: resource group to which new pseudo-locked region will belong - * - * A pseudo-locked region is associated with a resource group. When this - * association is created the pseudo-locked region is initialized. The - * details of the pseudo-locked region are not known at this time so only - * allocation is done and association established. - * - * Return: 0 on success, <0 on failure - */ -static int pseudo_lock_init(struct rdtgroup *rdtgrp) -{ - struct pseudo_lock_region *plr; - - plr = kzalloc(sizeof(*plr), GFP_KERNEL); - if (!plr) - return -ENOMEM; - - init_waitqueue_head(&plr->lock_thread_wq); - INIT_LIST_HEAD(&plr->pm_reqs); - rdtgrp->plr = plr; - return 0; -} - -/** - * pseudo_lock_region_alloc - Allocate kernel memory that will be pseudo-locked - * @plr: pseudo-lock region - * - * Initialize the details required to set up the pseudo-locked region and - * allocate the contiguous memory that will be pseudo-locked to the cache. - * - * Return: 0 on success, <0 on failure. Descriptive error will be written - * to last_cmd_status buffer. - */ -static int pseudo_lock_region_alloc(struct pseudo_lock_region *plr) -{ - int ret; - - ret = pseudo_lock_region_init(plr); - if (ret < 0) - return ret; - - /* - * We do not yet support contiguous regions larger than - * KMALLOC_MAX_SIZE. - */ - if (plr->size > KMALLOC_MAX_SIZE) { - rdt_last_cmd_puts("Requested region exceeds maximum size\n"); - ret = -E2BIG; - goto out_region; - } - - plr->kmem = kzalloc(plr->size, GFP_KERNEL); - if (!plr->kmem) { - rdt_last_cmd_puts("Unable to allocate memory\n"); - ret = -ENOMEM; - goto out_region; - } - - ret = 0; - goto out; -out_region: - pseudo_lock_region_clear(plr); -out: - return ret; -} - -/** - * pseudo_lock_free - Free a pseudo-locked region - * @rdtgrp: resource group to which pseudo-locked region belonged - * - * The pseudo-locked region's resources have already been released, or not - * yet created at this point. Now it can be freed and disassociated from the - * resource group. - * - * Return: void - */ -static void pseudo_lock_free(struct rdtgroup *rdtgrp) -{ - pseudo_lock_region_clear(rdtgrp->plr); - kfree(rdtgrp->plr); - rdtgrp->plr = NULL; -} - -/** - * rdtgroup_monitor_in_progress - Test if monitoring in progress - * @rdtgrp: resource group being queried - * - * Return: 1 if monitor groups have been created for this resource - * group, 0 otherwise. - */ -static int rdtgroup_monitor_in_progress(struct rdtgroup *rdtgrp) -{ - return !list_empty(&rdtgrp->mon.crdtgrp_list); -} - -/** - * rdtgroup_locksetup_user_restrict - Restrict user access to group - * @rdtgrp: resource group needing access restricted - * - * A resource group used for cache pseudo-locking cannot have cpus or tasks - * assigned to it. This is communicated to the user by restricting access - * to all the files that can be used to make such changes. - * - * Permissions restored with rdtgroup_locksetup_user_restore() - * - * Return: 0 on success, <0 on failure. If a failure occurs during the - * restriction of access an attempt will be made to restore permissions but - * the state of the mode of these files will be uncertain when a failure - * occurs. - */ -static int rdtgroup_locksetup_user_restrict(struct rdtgroup *rdtgrp) -{ - int ret; - - ret = rdtgroup_kn_mode_restrict(rdtgrp, "tasks"); - if (ret) - return ret; - - ret = rdtgroup_kn_mode_restrict(rdtgrp, "cpus"); - if (ret) - goto err_tasks; - - ret = rdtgroup_kn_mode_restrict(rdtgrp, "cpus_list"); - if (ret) - goto err_cpus; - - if (resctrl_arch_mon_capable()) { - ret = rdtgroup_kn_mode_restrict(rdtgrp, "mon_groups"); - if (ret) - goto err_cpus_list; - } - - ret = 0; - goto out; - -err_cpus_list: - rdtgroup_kn_mode_restore(rdtgrp, "cpus_list", 0777); -err_cpus: - rdtgroup_kn_mode_restore(rdtgrp, "cpus", 0777); -err_tasks: - rdtgroup_kn_mode_restore(rdtgrp, "tasks", 0777); -out: - return ret; -} - -/** - * rdtgroup_locksetup_user_restore - Restore user access to group - * @rdtgrp: resource group needing access restored - * - * Restore all file access previously removed using - * rdtgroup_locksetup_user_restrict() - * - * Return: 0 on success, <0 on failure. If a failure occurs during the - * restoration of access an attempt will be made to restrict permissions - * again but the state of the mode of these files will be uncertain when - * a failure occurs. - */ -static int rdtgroup_locksetup_user_restore(struct rdtgroup *rdtgrp) -{ - int ret; - - ret = rdtgroup_kn_mode_restore(rdtgrp, "tasks", 0777); - if (ret) - return ret; - - ret = rdtgroup_kn_mode_restore(rdtgrp, "cpus", 0777); - if (ret) - goto err_tasks; - - ret = rdtgroup_kn_mode_restore(rdtgrp, "cpus_list", 0777); - if (ret) - goto err_cpus; - - if (resctrl_arch_mon_capable()) { - ret = rdtgroup_kn_mode_restore(rdtgrp, "mon_groups", 0777); - if (ret) - goto err_cpus_list; - } - - ret = 0; - goto out; - -err_cpus_list: - rdtgroup_kn_mode_restrict(rdtgrp, "cpus_list"); -err_cpus: - rdtgroup_kn_mode_restrict(rdtgrp, "cpus"); -err_tasks: - rdtgroup_kn_mode_restrict(rdtgrp, "tasks"); -out: - return ret; -} - -/** - * rdtgroup_locksetup_enter - Resource group enters locksetup mode - * @rdtgrp: resource group requested to enter locksetup mode - * - * A resource group enters locksetup mode to reflect that it would be used - * to represent a pseudo-locked region and is in the process of being set - * up to do so. A resource group used for a pseudo-locked region would - * lose the closid associated with it so we cannot allow it to have any - * tasks or cpus assigned nor permit tasks or cpus to be assigned in the - * future. Monitoring of a pseudo-locked region is not allowed either. - * - * The above and more restrictions on a pseudo-locked region are checked - * for and enforced before the resource group enters the locksetup mode. - * - * Returns: 0 if the resource group successfully entered locksetup mode, <0 - * on failure. On failure the last_cmd_status buffer is updated with text to - * communicate details of failure to the user. - */ -int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp) -{ - int ret; - - /* - * The default resource group can neither be removed nor lose the - * default closid associated with it. - */ - if (rdtgrp == &rdtgroup_default) { - rdt_last_cmd_puts("Cannot pseudo-lock default group\n"); - return -EINVAL; - } - - /* - * Cache Pseudo-locking not supported when CDP is enabled. - * - * Some things to consider if you would like to enable this - * support (using L3 CDP as example): - * - When CDP is enabled two separate resources are exposed, - * L3DATA and L3CODE, but they are actually on the same cache. - * The implication for pseudo-locking is that if a - * pseudo-locked region is created on a domain of one - * resource (eg. L3CODE), then a pseudo-locked region cannot - * be created on that same domain of the other resource - * (eg. L3DATA). This is because the creation of a - * pseudo-locked region involves a call to wbinvd that will - * affect all cache allocations on particular domain. - * - Considering the previous, it may be possible to only - * expose one of the CDP resources to pseudo-locking and - * hide the other. For example, we could consider to only - * expose L3DATA and since the L3 cache is unified it is - * still possible to place instructions there are execute it. - * - If only one region is exposed to pseudo-locking we should - * still keep in mind that availability of a portion of cache - * for pseudo-locking should take into account both resources. - * Similarly, if a pseudo-locked region is created in one - * resource, the portion of cache used by it should be made - * unavailable to all future allocations from both resources. - */ - if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3) || - resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2)) { - rdt_last_cmd_puts("CDP enabled\n"); - return -EINVAL; - } - - /* - * Not knowing the bits to disable prefetching implies that this - * platform does not support Cache Pseudo-Locking. - */ - if (resctrl_arch_get_prefetch_disable_bits() == 0) { - rdt_last_cmd_puts("Pseudo-locking not supported\n"); - return -EINVAL; - } - - if (rdtgroup_monitor_in_progress(rdtgrp)) { - rdt_last_cmd_puts("Monitoring in progress\n"); - return -EINVAL; - } - - if (rdtgroup_tasks_assigned(rdtgrp)) { - rdt_last_cmd_puts("Tasks assigned to resource group\n"); - return -EINVAL; - } - - if (!cpumask_empty(&rdtgrp->cpu_mask)) { - rdt_last_cmd_puts("CPUs assigned to resource group\n"); - return -EINVAL; - } - - if (rdtgroup_locksetup_user_restrict(rdtgrp)) { - rdt_last_cmd_puts("Unable to modify resctrl permissions\n"); - return -EIO; - } - - ret = pseudo_lock_init(rdtgrp); - if (ret) { - rdt_last_cmd_puts("Unable to init pseudo-lock region\n"); - goto out_release; - } - - /* - * If this system is capable of monitoring a rmid would have been - * allocated when the control group was created. This is not needed - * anymore when this group would be used for pseudo-locking. This - * is safe to call on platforms not capable of monitoring. - */ - free_rmid(rdtgrp->closid, rdtgrp->mon.rmid); - - ret = 0; - goto out; - -out_release: - rdtgroup_locksetup_user_restore(rdtgrp); -out: - return ret; -} - -/** - * rdtgroup_locksetup_exit - resource group exist locksetup mode - * @rdtgrp: resource group - * - * When a resource group exits locksetup mode the earlier restrictions are - * lifted. - * - * Return: 0 on success, <0 on failure - */ -int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp) -{ - int ret; - - if (resctrl_arch_mon_capable()) { - ret = alloc_rmid(rdtgrp->closid); - if (ret < 0) { - rdt_last_cmd_puts("Out of RMIDs\n"); - return ret; - } - rdtgrp->mon.rmid = ret; - } - - ret = rdtgroup_locksetup_user_restore(rdtgrp); - if (ret) { - free_rmid(rdtgrp->closid, rdtgrp->mon.rmid); - return ret; - } - - pseudo_lock_free(rdtgrp); - return 0; -} - -/** - * rdtgroup_cbm_overlaps_pseudo_locked - Test if CBM or portion is pseudo-locked - * @d: RDT domain - * @cbm: CBM to test - * - * @d represents a cache instance and @cbm a capacity bitmask that is - * considered for it. Determine if @cbm overlaps with any existing - * pseudo-locked region on @d. - * - * @cbm is unsigned long, even if only 32 bits are used, to make the - * bitmap functions work correctly. - * - * Return: true if @cbm overlaps with pseudo-locked region on @d, false - * otherwise. - */ -bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm) -{ - unsigned int cbm_len; - unsigned long cbm_b; - - if (d->plr) { - cbm_len = d->plr->s->res->cache.cbm_len; - cbm_b = d->plr->cbm; - if (bitmap_intersects(&cbm, &cbm_b, cbm_len)) - return true; - } - return false; -} - -/** - * rdtgroup_pseudo_locked_in_hierarchy - Pseudo-locked region in cache hierarchy - * @d: RDT domain under test - * - * The setup of a pseudo-locked region affects all cache instances within - * the hierarchy of the region. It is thus essential to know if any - * pseudo-locked regions exist within a cache hierarchy to prevent any - * attempts to create new pseudo-locked regions in the same hierarchy. - * - * Return: true if a pseudo-locked region exists in the hierarchy of @d or - * if it is not possible to test due to memory allocation issue, - * false otherwise. - */ -bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d) -{ - cpumask_var_t cpu_with_psl; - enum resctrl_res_level i; - struct rdt_resource *r; - struct rdt_domain *d_i; - bool ret = false; - - /* Walking r->domains, ensure it can't race with cpuhp */ - lockdep_assert_cpus_held(); - - if (!zalloc_cpumask_var(&cpu_with_psl, GFP_KERNEL)) - return true; - - /* - * First determine which cpus have pseudo-locked regions - * associated with them. - */ - for (i = 0; i < RDT_NUM_RESOURCES; i++) { - r = resctrl_arch_get_resource(i); - if (!r->alloc_capable) - continue; - - list_for_each_entry(d_i, &r->domains, list) { - if (d_i->plr) - cpumask_or(cpu_with_psl, cpu_with_psl, - &d_i->cpu_mask); - } - } - - /* - * Next test if new pseudo-locked region would intersect with - * existing region. - */ - if (cpumask_intersects(&d->cpu_mask, cpu_with_psl)) - ret = true; - - free_cpumask_var(cpu_with_psl); - return ret; -} - -/** - * pseudo_lock_measure_cycles - Trigger latency measure to pseudo-locked region - * @rdtgrp: Resource group to which the pseudo-locked region belongs. - * @sel: Selector of which measurement to perform on a pseudo-locked region. - * - * The measurement of latency to access a pseudo-locked region should be - * done from a cpu that is associated with that pseudo-locked region. - * Determine which cpu is associated with this region and start a thread on - * that cpu to perform the measurement, wait for that thread to complete. - * - * Return: 0 on success, <0 on failure - */ -static int pseudo_lock_measure_cycles(struct rdtgroup *rdtgrp, int sel) -{ - struct pseudo_lock_region *plr = rdtgrp->plr; - struct task_struct *thread; - unsigned int cpu; - int ret = -1; - - cpus_read_lock(); - mutex_lock(&rdtgroup_mutex); - - if (rdtgrp->flags & RDT_DELETED) { - ret = -ENODEV; - goto out; - } - - if (!plr->d) { - ret = -ENODEV; - goto out; - } - - plr->thread_done = 0; - cpu = cpumask_first(&plr->d->cpu_mask); - if (!cpu_online(cpu)) { - ret = -ENODEV; - goto out; - } - - plr->cpu = cpu; - - if (sel == 1) - thread = kthread_create_on_node(resctrl_arch_measure_cycles_lat_fn, - plr, cpu_to_node(cpu), - "pseudo_lock_measure/%u", - cpu); - else if (sel == 2) - thread = kthread_create_on_node(resctrl_arch_measure_l2_residency, - plr, cpu_to_node(cpu), - "pseudo_lock_measure/%u", - cpu); - else if (sel == 3) - thread = kthread_create_on_node(resctrl_arch_measure_l3_residency, - plr, cpu_to_node(cpu), - "pseudo_lock_measure/%u", - cpu); - else - goto out; - - if (IS_ERR(thread)) { - ret = PTR_ERR(thread); - goto out; - } - kthread_bind(thread, cpu); - wake_up_process(thread); - - ret = wait_event_interruptible(plr->lock_thread_wq, - plr->thread_done == 1); - if (ret < 0) - goto out; - - ret = 0; - -out: - mutex_unlock(&rdtgroup_mutex); - cpus_read_unlock(); - return ret; -} - -static ssize_t pseudo_lock_measure_trigger(struct file *file, - const char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct rdtgroup *rdtgrp = file->private_data; - size_t buf_size; - char buf[32]; - int ret; - int sel; - - buf_size = min(count, (sizeof(buf) - 1)); - if (copy_from_user(buf, user_buf, buf_size)) - return -EFAULT; - - buf[buf_size] = '\0'; - ret = kstrtoint(buf, 10, &sel); - if (ret == 0) { - if (sel != 1 && sel != 2 && sel != 3) - return -EINVAL; - ret = debugfs_file_get(file->f_path.dentry); - if (ret) - return ret; - ret = pseudo_lock_measure_cycles(rdtgrp, sel); - if (ret == 0) - ret = count; - debugfs_file_put(file->f_path.dentry); - } - - return ret; -} - -static const struct file_operations pseudo_measure_fops = { - .write = pseudo_lock_measure_trigger, - .open = simple_open, - .llseek = default_llseek, -}; - -/** - * rdtgroup_pseudo_lock_create - Create a pseudo-locked region - * @rdtgrp: resource group to which pseudo-lock region belongs - * - * Called when a resource group in the pseudo-locksetup mode receives a - * valid schemata that should be pseudo-locked. Since the resource group is - * in pseudo-locksetup mode the &struct pseudo_lock_region has already been - * allocated and initialized with the essential information. If a failure - * occurs the resource group remains in the pseudo-locksetup mode with the - * &struct pseudo_lock_region associated with it, but cleared from all - * information and ready for the user to re-attempt pseudo-locking by - * writing the schemata again. - * - * Return: 0 if the pseudo-locked region was successfully pseudo-locked, <0 - * on failure. Descriptive error will be written to last_cmd_status buffer. - */ -int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp) -{ - struct pseudo_lock_region *plr = rdtgrp->plr; - struct task_struct *thread; - unsigned int new_minor; - struct device *dev; - int ret; - - ret = pseudo_lock_region_alloc(plr); - if (ret < 0) - return ret; - - ret = pseudo_lock_cstates_constrain(plr); - if (ret < 0) { - ret = -EINVAL; - goto out_region; - } - - plr->thread_done = 0; - - plr->closid = rdtgrp->closid; - thread = kthread_create_on_node(resctrl_arch_pseudo_lock_fn, plr, - cpu_to_node(plr->cpu), - "pseudo_lock/%u", plr->cpu); - if (IS_ERR(thread)) { - ret = PTR_ERR(thread); - rdt_last_cmd_printf("Locking thread returned error %d\n", ret); - goto out_cstates; - } - - kthread_bind(thread, plr->cpu); - wake_up_process(thread); - - ret = wait_event_interruptible(plr->lock_thread_wq, - plr->thread_done == 1); - if (ret < 0) { - /* - * If the thread does not get on the CPU for whatever - * reason and the process which sets up the region is - * interrupted then this will leave the thread in runnable - * state and once it gets on the CPU it will dereference - * the cleared, but not freed, plr struct resulting in an - * empty pseudo-locking loop. - */ - rdt_last_cmd_puts("Locking thread interrupted\n"); - goto out_cstates; - } - - ret = pseudo_lock_minor_get(&new_minor); - if (ret < 0) { - rdt_last_cmd_puts("Unable to obtain a new minor number\n"); - goto out_cstates; - } - - /* - * Unlock access but do not release the reference. The - * pseudo-locked region will still be here on return. - * - * The mutex has to be released temporarily to avoid a potential - * deadlock with the mm->mmap_lock which is obtained in the - * device_create() and debugfs_create_dir() callpath below as well as - * before the mmap() callback is called. - */ - mutex_unlock(&rdtgroup_mutex); - - if (!IS_ERR_OR_NULL(debugfs_resctrl)) { - plr->debugfs_dir = debugfs_create_dir(rdtgrp->kn->name, - debugfs_resctrl); - if (!IS_ERR_OR_NULL(plr->debugfs_dir)) - debugfs_create_file("pseudo_lock_measure", 0200, - plr->debugfs_dir, rdtgrp, - &pseudo_measure_fops); - } - - dev = device_create(&pseudo_lock_class, NULL, - MKDEV(pseudo_lock_major, new_minor), - rdtgrp, "%s", rdtgrp->kn->name); - - mutex_lock(&rdtgroup_mutex); - - if (IS_ERR(dev)) { - ret = PTR_ERR(dev); - rdt_last_cmd_printf("Failed to create character device: %d\n", - ret); - goto out_debugfs; - } - - /* We released the mutex - check if group was removed while we did so */ - if (rdtgrp->flags & RDT_DELETED) { - ret = -ENODEV; - goto out_device; - } - - plr->minor = new_minor; - - rdtgrp->mode = RDT_MODE_PSEUDO_LOCKED; - closid_free(rdtgrp->closid); - rdtgroup_kn_mode_restore(rdtgrp, "cpus", 0444); - rdtgroup_kn_mode_restore(rdtgrp, "cpus_list", 0444); - - ret = 0; - goto out; - -out_device: - device_destroy(&pseudo_lock_class, MKDEV(pseudo_lock_major, new_minor)); -out_debugfs: - debugfs_remove_recursive(plr->debugfs_dir); - pseudo_lock_minor_release(new_minor); -out_cstates: - pseudo_lock_cstates_relax(plr); -out_region: - pseudo_lock_region_clear(plr); -out: - return ret; -} - -/** - * rdtgroup_pseudo_lock_remove - Remove a pseudo-locked region - * @rdtgrp: resource group to which the pseudo-locked region belongs - * - * The removal of a pseudo-locked region can be initiated when the resource - * group is removed from user space via a "rmdir" from userspace or the - * unmount of the resctrl filesystem. On removal the resource group does - * not go back to pseudo-locksetup mode before it is removed, instead it is - * removed directly. There is thus asymmetry with the creation where the - * &struct pseudo_lock_region is removed here while it was not created in - * rdtgroup_pseudo_lock_create(). - * - * Return: void - */ -void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp) -{ - struct pseudo_lock_region *plr = rdtgrp->plr; - - if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { - /* - * Default group cannot be a pseudo-locked region so we can - * free closid here. - */ - closid_free(rdtgrp->closid); - goto free; - } - - pseudo_lock_cstates_relax(plr); - debugfs_remove_recursive(rdtgrp->plr->debugfs_dir); - device_destroy(&pseudo_lock_class, MKDEV(pseudo_lock_major, plr->minor)); - pseudo_lock_minor_release(plr->minor); - -free: - pseudo_lock_free(rdtgrp); -} - -static int pseudo_lock_dev_open(struct inode *inode, struct file *filp) -{ - struct rdtgroup *rdtgrp; - - mutex_lock(&rdtgroup_mutex); - - rdtgrp = region_find_by_minor(iminor(inode)); - if (!rdtgrp) { - mutex_unlock(&rdtgroup_mutex); - return -ENODEV; - } - - filp->private_data = rdtgrp; - atomic_inc(&rdtgrp->waitcount); - /* Perform a non-seekable open - llseek is not supported */ - filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE); - - mutex_unlock(&rdtgroup_mutex); - - return 0; -} - -static int pseudo_lock_dev_release(struct inode *inode, struct file *filp) -{ - struct rdtgroup *rdtgrp; - - mutex_lock(&rdtgroup_mutex); - rdtgrp = filp->private_data; - WARN_ON(!rdtgrp); - if (!rdtgrp) { - mutex_unlock(&rdtgroup_mutex); - return -ENODEV; - } - filp->private_data = NULL; - atomic_dec(&rdtgrp->waitcount); - mutex_unlock(&rdtgroup_mutex); - return 0; -} - -static int pseudo_lock_dev_mremap(struct vm_area_struct *area) -{ - /* Not supported */ - return -EINVAL; -} - -static const struct vm_operations_struct pseudo_mmap_ops = { - .mremap = pseudo_lock_dev_mremap, -}; - -static int pseudo_lock_dev_mmap(struct file *filp, struct vm_area_struct *vma) -{ - unsigned long vsize = vma->vm_end - vma->vm_start; - unsigned long off = vma->vm_pgoff << PAGE_SHIFT; - struct pseudo_lock_region *plr; - struct rdtgroup *rdtgrp; - unsigned long physical; - unsigned long psize; - - mutex_lock(&rdtgroup_mutex); - - rdtgrp = filp->private_data; - WARN_ON(!rdtgrp); - if (!rdtgrp) { - mutex_unlock(&rdtgroup_mutex); - return -ENODEV; - } - - plr = rdtgrp->plr; - - if (!plr->d) { - mutex_unlock(&rdtgroup_mutex); - return -ENODEV; - } - - /* - * Task is required to run with affinity to the cpus associated - * with the pseudo-locked region. If this is not the case the task - * may be scheduled elsewhere and invalidate entries in the - * pseudo-locked region. - */ - if (!cpumask_subset(current->cpus_ptr, &plr->d->cpu_mask)) { - mutex_unlock(&rdtgroup_mutex); - return -EINVAL; - } - - physical = __pa(plr->kmem) >> PAGE_SHIFT; - psize = plr->size - off; - - if (off > plr->size) { - mutex_unlock(&rdtgroup_mutex); - return -ENOSPC; - } - - /* - * Ensure changes are carried directly to the memory being mapped, - * do not allow copy-on-write mapping. - */ - if (!(vma->vm_flags & VM_SHARED)) { - mutex_unlock(&rdtgroup_mutex); - return -EINVAL; - } - - if (vsize > psize) { - mutex_unlock(&rdtgroup_mutex); - return -ENOSPC; - } - - memset(plr->kmem + off, 0, vsize); - - if (remap_pfn_range(vma, vma->vm_start, physical + vma->vm_pgoff, - vsize, vma->vm_page_prot)) { - mutex_unlock(&rdtgroup_mutex); - return -EAGAIN; - } - vma->vm_ops = &pseudo_mmap_ops; - mutex_unlock(&rdtgroup_mutex); - return 0; -} - -static const struct file_operations pseudo_lock_dev_fops = { - .owner = THIS_MODULE, - .llseek = no_llseek, - .read = NULL, - .write = NULL, - .open = pseudo_lock_dev_open, - .release = pseudo_lock_dev_release, - .mmap = pseudo_lock_dev_mmap, -}; - -int rdt_pseudo_lock_init(void) -{ - int ret; - - ret = register_chrdev(0, "pseudo_lock", &pseudo_lock_dev_fops); - if (ret < 0) - return ret; - - pseudo_lock_major = ret; - - ret = class_register(&pseudo_lock_class); - if (ret) { - unregister_chrdev(pseudo_lock_major, "pseudo_lock"); - return ret; - } - - return 0; -} - -void rdt_pseudo_lock_release(void) -{ - class_unregister(&pseudo_lock_class); - unregister_chrdev(pseudo_lock_major, "pseudo_lock"); - pseudo_lock_major = 0; -} diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index 936fc6e47386..e69de29bb2d1 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -1,4013 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * User interface for Resource Allocation in Resource Director Technology(RDT) - * - * Copyright (C) 2016 Intel Corporation - * - * Author: Fenghua Yu - * - * More information about RDT be found in the Intel (R) x86 Architecture - * Software Developer Manual. - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include "internal.h" - -/* Mutex to protect rdtgroup access. */ -DEFINE_MUTEX(rdtgroup_mutex); - -static struct kernfs_root *rdt_root; -struct rdtgroup rdtgroup_default; -LIST_HEAD(rdt_all_groups); - -/* list of entries for the schemata file */ -LIST_HEAD(resctrl_schema_all); - -/* The filesystem can only be mounted once. */ -bool resctrl_mounted; - -/* Kernel fs node for "info" directory under root */ -static struct kernfs_node *kn_info; - -/* Kernel fs node for "mon_groups" directory under root */ -static struct kernfs_node *kn_mongrp; - -/* Kernel fs node for "mon_data" directory under root */ -static struct kernfs_node *kn_mondata; - -/* - * Used to store the max resource name width and max resource data width - * to display the schemata in a tabular format - */ -int max_name_width, max_data_width; - -static struct seq_buf last_cmd_status; -static char last_cmd_status_buf[512]; - -static int rdtgroup_setup_root(struct rdt_fs_context *ctx); -static void rdtgroup_destroy_root(void); - -struct dentry *debugfs_resctrl; - -static bool resctrl_debug; - -void rdt_last_cmd_clear(void) -{ - lockdep_assert_held(&rdtgroup_mutex); - seq_buf_clear(&last_cmd_status); -} - -void rdt_last_cmd_puts(const char *s) -{ - lockdep_assert_held(&rdtgroup_mutex); - seq_buf_puts(&last_cmd_status, s); -} - -void rdt_last_cmd_printf(const char *fmt, ...) -{ - va_list ap; - - va_start(ap, fmt); - lockdep_assert_held(&rdtgroup_mutex); - seq_buf_vprintf(&last_cmd_status, fmt, ap); - va_end(ap); -} - -void rdt_staged_configs_clear(void) -{ - enum resctrl_res_level i; - struct rdt_resource *r; - struct rdt_domain *dom; - - lockdep_assert_held(&rdtgroup_mutex); - - for (i = 0; i < RDT_NUM_RESOURCES; i++) { - r = resctrl_arch_get_resource(i); - if (!r->alloc_capable) - continue; - - list_for_each_entry(dom, &r->domains, list) - memset(dom->staged_config, 0, sizeof(dom->staged_config)); - } -} - -static bool resctrl_is_mbm_enabled(void) -{ - return (resctrl_arch_is_mbm_total_enabled() || - resctrl_arch_is_mbm_local_enabled()); -} - -static bool resctrl_is_mbm_event(int e) -{ - return (e >= QOS_L3_MBM_TOTAL_EVENT_ID && - e <= QOS_L3_MBM_LOCAL_EVENT_ID); -} - -/* - * Trivial allocator for CLOSIDs. Since h/w only supports a small number, - * we can keep a bitmap of free CLOSIDs in a single integer. - * - * Using a global CLOSID across all resources has some advantages and - * some drawbacks: - * + We can simply set current's closid to assign a task to a resource - * group. - * + Context switch code can avoid extra memory references deciding which - * CLOSID to load into the PQR_ASSOC MSR - * - We give up some options in configuring resource groups across multi-socket - * systems. - * - Our choices on how to configure each resource become progressively more - * limited as the number of resources grows. - */ -static unsigned long closid_free_map; -static int closid_free_map_len; - -int closids_supported(void) -{ - return closid_free_map_len; -} - -static void closid_init(void) -{ - struct resctrl_schema *s; - u32 rdt_min_closid = 32; - - /* Compute rdt_min_closid across all resources */ - list_for_each_entry(s, &resctrl_schema_all, list) - rdt_min_closid = min(rdt_min_closid, s->num_closid); - - closid_free_map = BIT_MASK(rdt_min_closid) - 1; - - /* RESCTRL_RESERVED_CLOSID is always reserved for the default group */ - __clear_bit(RESCTRL_RESERVED_CLOSID, &closid_free_map); - closid_free_map_len = rdt_min_closid; -} - -static int closid_alloc(void) -{ - int cleanest_closid; - u32 closid; - - lockdep_assert_held(&rdtgroup_mutex); - - if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID) && - resctrl_arch_is_llc_occupancy_enabled()) { - cleanest_closid = resctrl_find_cleanest_closid(); - if (cleanest_closid < 0) - return cleanest_closid; - closid = cleanest_closid; - } else { - closid = ffs(closid_free_map); - if (closid == 0) - return -ENOSPC; - closid--; - } - __clear_bit(closid, &closid_free_map); - - return closid; -} - -void closid_free(int closid) -{ - lockdep_assert_held(&rdtgroup_mutex); - - __set_bit(closid, &closid_free_map); -} - -/** - * closid_allocated - test if provided closid is in use - * @closid: closid to be tested - * - * Return: true if @closid is currently associated with a resource group, - * false if @closid is free - */ -bool closid_allocated(unsigned int closid) -{ - lockdep_assert_held(&rdtgroup_mutex); - - return !test_bit(closid, &closid_free_map); -} - -/** - * rdtgroup_mode_by_closid - Return mode of resource group with closid - * @closid: closid if the resource group - * - * Each resource group is associated with a @closid. Here the mode - * of a resource group can be queried by searching for it using its closid. - * - * Return: mode as &enum rdtgrp_mode of resource group with closid @closid - */ -enum rdtgrp_mode rdtgroup_mode_by_closid(int closid) -{ - struct rdtgroup *rdtgrp; - - list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) { - if (rdtgrp->closid == closid) - return rdtgrp->mode; - } - - return RDT_NUM_MODES; -} - -static const char * const rdt_mode_str[] = { - [RDT_MODE_SHAREABLE] = "shareable", - [RDT_MODE_EXCLUSIVE] = "exclusive", - [RDT_MODE_PSEUDO_LOCKSETUP] = "pseudo-locksetup", - [RDT_MODE_PSEUDO_LOCKED] = "pseudo-locked", -}; - -/** - * rdtgroup_mode_str - Return the string representation of mode - * @mode: the resource group mode as &enum rdtgroup_mode - * - * Return: string representation of valid mode, "unknown" otherwise - */ -static const char *rdtgroup_mode_str(enum rdtgrp_mode mode) -{ - if (mode < RDT_MODE_SHAREABLE || mode >= RDT_NUM_MODES) - return "unknown"; - - return rdt_mode_str[mode]; -} - -/* set uid and gid of rdtgroup dirs and files to that of the creator */ -static int rdtgroup_kn_set_ugid(struct kernfs_node *kn) -{ - struct iattr iattr = { .ia_valid = ATTR_UID | ATTR_GID, - .ia_uid = current_fsuid(), - .ia_gid = current_fsgid(), }; - - if (uid_eq(iattr.ia_uid, GLOBAL_ROOT_UID) && - gid_eq(iattr.ia_gid, GLOBAL_ROOT_GID)) - return 0; - - return kernfs_setattr(kn, &iattr); -} - -static int rdtgroup_add_file(struct kernfs_node *parent_kn, struct rftype *rft) -{ - struct kernfs_node *kn; - int ret; - - kn = __kernfs_create_file(parent_kn, rft->name, rft->mode, - GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, - 0, rft->kf_ops, rft, NULL, NULL); - if (IS_ERR(kn)) - return PTR_ERR(kn); - - ret = rdtgroup_kn_set_ugid(kn); - if (ret) { - kernfs_remove(kn); - return ret; - } - - return 0; -} - -static int rdtgroup_seqfile_show(struct seq_file *m, void *arg) -{ - struct kernfs_open_file *of = m->private; - struct rftype *rft = of->kn->priv; - - if (rft->seq_show) - return rft->seq_show(of, m, arg); - return 0; -} - -static ssize_t rdtgroup_file_write(struct kernfs_open_file *of, char *buf, - size_t nbytes, loff_t off) -{ - struct rftype *rft = of->kn->priv; - - if (rft->write) - return rft->write(of, buf, nbytes, off); - - return -EINVAL; -} - -static const struct kernfs_ops rdtgroup_kf_single_ops = { - .atomic_write_len = PAGE_SIZE, - .write = rdtgroup_file_write, - .seq_show = rdtgroup_seqfile_show, -}; - -static const struct kernfs_ops kf_mondata_ops = { - .atomic_write_len = PAGE_SIZE, - .seq_show = rdtgroup_mondata_show, -}; - -static bool is_cpu_list(struct kernfs_open_file *of) -{ - struct rftype *rft = of->kn->priv; - - return rft->flags & RFTYPE_FLAGS_CPUS_LIST; -} - -static int rdtgroup_cpus_show(struct kernfs_open_file *of, - struct seq_file *s, void *v) -{ - struct rdtgroup *rdtgrp; - struct cpumask *mask; - int ret = 0; - - rdtgrp = rdtgroup_kn_lock_live(of->kn); - - if (rdtgrp) { - if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) { - if (!rdtgrp->plr->d) { - rdt_last_cmd_clear(); - rdt_last_cmd_puts("Cache domain offline\n"); - ret = -ENODEV; - } else { - mask = &rdtgrp->plr->d->cpu_mask; - seq_printf(s, is_cpu_list(of) ? - "%*pbl\n" : "%*pb\n", - cpumask_pr_args(mask)); - } - } else { - seq_printf(s, is_cpu_list(of) ? "%*pbl\n" : "%*pb\n", - cpumask_pr_args(&rdtgrp->cpu_mask)); - } - } else { - ret = -ENOENT; - } - rdtgroup_kn_unlock(of->kn); - - return ret; -} - -/* - * Update the PGR_ASSOC MSR on all cpus in @cpu_mask, - * - * Per task closids/rmids must have been set up before calling this function. - * @r may be NULL. - */ -static void -update_closid_rmid(const struct cpumask *cpu_mask, struct rdtgroup *r) -{ - struct resctrl_cpu_sync defaults; - struct resctrl_cpu_sync *defaults_p = NULL; - - if (r) { - defaults.closid = r->closid; - defaults.rmid = r->mon.rmid; - defaults_p = &defaults; - } - - on_each_cpu_mask(cpu_mask, resctrl_arch_sync_cpu_defaults, defaults_p, - 1); -} - -static int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask, - cpumask_var_t tmpmask) -{ - struct rdtgroup *prgrp = rdtgrp->mon.parent, *crgrp; - struct list_head *head; - - /* Check whether cpus belong to parent ctrl group */ - cpumask_andnot(tmpmask, newmask, &prgrp->cpu_mask); - if (!cpumask_empty(tmpmask)) { - rdt_last_cmd_puts("Can only add CPUs to mongroup that belong to parent\n"); - return -EINVAL; - } - - /* Check whether cpus are dropped from this group */ - cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask); - if (!cpumask_empty(tmpmask)) { - /* Give any dropped cpus to parent rdtgroup */ - cpumask_or(&prgrp->cpu_mask, &prgrp->cpu_mask, tmpmask); - update_closid_rmid(tmpmask, prgrp); - } - - /* - * If we added cpus, remove them from previous group that owned them - * and update per-cpu rmid - */ - cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask); - if (!cpumask_empty(tmpmask)) { - head = &prgrp->mon.crdtgrp_list; - list_for_each_entry(crgrp, head, mon.crdtgrp_list) { - if (crgrp == rdtgrp) - continue; - cpumask_andnot(&crgrp->cpu_mask, &crgrp->cpu_mask, - tmpmask); - } - update_closid_rmid(tmpmask, rdtgrp); - } - - /* Done pushing/pulling - update this group with new mask */ - cpumask_copy(&rdtgrp->cpu_mask, newmask); - - return 0; -} - -static void cpumask_rdtgrp_clear(struct rdtgroup *r, struct cpumask *m) -{ - struct rdtgroup *crgrp; - - cpumask_andnot(&r->cpu_mask, &r->cpu_mask, m); - /* update the child mon group masks as well*/ - list_for_each_entry(crgrp, &r->mon.crdtgrp_list, mon.crdtgrp_list) - cpumask_and(&crgrp->cpu_mask, &r->cpu_mask, &crgrp->cpu_mask); -} - -static int cpus_ctrl_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask, - cpumask_var_t tmpmask, cpumask_var_t tmpmask1) -{ - struct rdtgroup *r, *crgrp; - struct list_head *head; - - /* Check whether cpus are dropped from this group */ - cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask); - if (!cpumask_empty(tmpmask)) { - /* Can't drop from default group */ - if (rdtgrp == &rdtgroup_default) { - rdt_last_cmd_puts("Can't drop CPUs from default group\n"); - return -EINVAL; - } - - /* Give any dropped cpus to rdtgroup_default */ - cpumask_or(&rdtgroup_default.cpu_mask, - &rdtgroup_default.cpu_mask, tmpmask); - update_closid_rmid(tmpmask, &rdtgroup_default); - } - - /* - * If we added cpus, remove them from previous group and - * the prev group's child groups that owned them - * and update per-cpu closid/rmid. - */ - cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask); - if (!cpumask_empty(tmpmask)) { - list_for_each_entry(r, &rdt_all_groups, rdtgroup_list) { - if (r == rdtgrp) - continue; - cpumask_and(tmpmask1, &r->cpu_mask, tmpmask); - if (!cpumask_empty(tmpmask1)) - cpumask_rdtgrp_clear(r, tmpmask1); - } - update_closid_rmid(tmpmask, rdtgrp); - } - - /* Done pushing/pulling - update this group with new mask */ - cpumask_copy(&rdtgrp->cpu_mask, newmask); - - /* - * Clear child mon group masks since there is a new parent mask - * now and update the rmid for the cpus the child lost. - */ - head = &rdtgrp->mon.crdtgrp_list; - list_for_each_entry(crgrp, head, mon.crdtgrp_list) { - cpumask_and(tmpmask, &rdtgrp->cpu_mask, &crgrp->cpu_mask); - update_closid_rmid(tmpmask, rdtgrp); - cpumask_clear(&crgrp->cpu_mask); - } - - return 0; -} - -static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of, - char *buf, size_t nbytes, loff_t off) -{ - cpumask_var_t tmpmask, newmask, tmpmask1; - struct rdtgroup *rdtgrp; - int ret; - - if (!buf) - return -EINVAL; - - if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL)) - return -ENOMEM; - if (!zalloc_cpumask_var(&newmask, GFP_KERNEL)) { - free_cpumask_var(tmpmask); - return -ENOMEM; - } - if (!zalloc_cpumask_var(&tmpmask1, GFP_KERNEL)) { - free_cpumask_var(tmpmask); - free_cpumask_var(newmask); - return -ENOMEM; - } - - rdtgrp = rdtgroup_kn_lock_live(of->kn); - if (!rdtgrp) { - ret = -ENOENT; - goto unlock; - } - - if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED || - rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { - ret = -EINVAL; - rdt_last_cmd_puts("Pseudo-locking in progress\n"); - goto unlock; - } - - if (is_cpu_list(of)) - ret = cpulist_parse(buf, newmask); - else - ret = cpumask_parse(buf, newmask); - - if (ret) { - rdt_last_cmd_puts("Bad CPU list/mask\n"); - goto unlock; - } - - /* check that user didn't specify any offline cpus */ - cpumask_andnot(tmpmask, newmask, cpu_online_mask); - if (!cpumask_empty(tmpmask)) { - ret = -EINVAL; - rdt_last_cmd_puts("Can only assign online CPUs\n"); - goto unlock; - } - - if (rdtgrp->type == RDTCTRL_GROUP) - ret = cpus_ctrl_write(rdtgrp, newmask, tmpmask, tmpmask1); - else if (rdtgrp->type == RDTMON_GROUP) - ret = cpus_mon_write(rdtgrp, newmask, tmpmask); - else - ret = -EINVAL; - -unlock: - rdtgroup_kn_unlock(of->kn); - free_cpumask_var(tmpmask); - free_cpumask_var(newmask); - free_cpumask_var(tmpmask1); - - return ret ?: nbytes; -} - -/** - * rdtgroup_remove - the helper to remove resource group safely - * @rdtgrp: resource group to remove - * - * On resource group creation via a mkdir, an extra kernfs_node reference is - * taken to ensure that the rdtgroup structure remains accessible for the - * rdtgroup_kn_unlock() calls where it is removed. - * - * Drop the extra reference here, then free the rdtgroup structure. - * - * Return: void - */ -static void rdtgroup_remove(struct rdtgroup *rdtgrp) -{ - kernfs_put(rdtgrp->kn); - kfree(rdtgrp); -} - -static void _update_task_closid_rmid(void *task) -{ - /* - * If the task is still current on this CPU, update PQR_ASSOC MSR. - * Otherwise, the MSR is updated when the task is scheduled in. - */ - if (task == current) - resctrl_arch_sched_in(task); -} - -static void update_task_closid_rmid(struct task_struct *t) -{ - if (IS_ENABLED(CONFIG_SMP) && task_curr(t)) - smp_call_function_single(task_cpu(t), _update_task_closid_rmid, t, 1); - else - _update_task_closid_rmid(t); -} - -static bool task_in_rdtgroup(struct task_struct *tsk, struct rdtgroup *rdtgrp) -{ - u32 closid, rmid = rdtgrp->mon.rmid; - - if (rdtgrp->type == RDTCTRL_GROUP) - closid = rdtgrp->closid; - else if (rdtgrp->type == RDTMON_GROUP) - closid = rdtgrp->mon.parent->closid; - else - return false; - - return resctrl_arch_match_closid(tsk, closid) && - resctrl_arch_match_rmid(tsk, closid, rmid); -} - -static int __rdtgroup_move_task(struct task_struct *tsk, - struct rdtgroup *rdtgrp) -{ - /* If the task is already in rdtgrp, no need to move the task. */ - if (task_in_rdtgroup(tsk, rdtgrp)) - return 0; - - /* - * Set the task's closid/rmid before the PQR_ASSOC MSR can be - * updated by them. - * - * For ctrl_mon groups, move both closid and rmid. - * For monitor groups, can move the tasks only from - * their parent CTRL group. - */ - if (rdtgrp->type == RDTMON_GROUP && - !resctrl_arch_match_closid(tsk, rdtgrp->mon.parent->closid)) { - rdt_last_cmd_puts("Can't move task to different control group\n"); - return -EINVAL; - } - - if (rdtgrp->type == RDTMON_GROUP) - resctrl_arch_set_closid_rmid(tsk, rdtgrp->mon.parent->closid, - rdtgrp->mon.rmid); - else - resctrl_arch_set_closid_rmid(tsk, rdtgrp->closid, - rdtgrp->mon.rmid); - - /* - * Ensure the task's closid and rmid are written before determining if - * the task is current that will decide if it will be interrupted. - * This pairs with the full barrier between the rq->curr update and - * resctrl_arch_sched_in() during context switch. - */ - smp_mb(); - - /* - * By now, the task's closid and rmid are set. If the task is current - * on a CPU, the PQR_ASSOC MSR needs to be updated to make the resource - * group go into effect. If the task is not current, the MSR will be - * updated when the task is scheduled in. - */ - update_task_closid_rmid(tsk); - - return 0; -} - -static bool is_closid_match(struct task_struct *t, struct rdtgroup *r) -{ - return (resctrl_arch_alloc_capable() && (r->type == RDTCTRL_GROUP) && - resctrl_arch_match_closid(t, r->closid)); -} - -static bool is_rmid_match(struct task_struct *t, struct rdtgroup *r) -{ - return (resctrl_arch_mon_capable() && (r->type == RDTMON_GROUP) && - resctrl_arch_match_rmid(t, r->mon.parent->closid, - r->mon.rmid)); -} - -/** - * rdtgroup_tasks_assigned - Test if tasks have been assigned to resource group - * @r: Resource group - * - * Return: 1 if tasks have been assigned to @r, 0 otherwise - */ -int rdtgroup_tasks_assigned(struct rdtgroup *r) -{ - struct task_struct *p, *t; - int ret = 0; - - lockdep_assert_held(&rdtgroup_mutex); - - rcu_read_lock(); - for_each_process_thread(p, t) { - if (is_closid_match(t, r) || is_rmid_match(t, r)) { - ret = 1; - break; - } - } - rcu_read_unlock(); - - return ret; -} - -static int rdtgroup_task_write_permission(struct task_struct *task, - struct kernfs_open_file *of) -{ - const struct cred *tcred = get_task_cred(task); - const struct cred *cred = current_cred(); - int ret = 0; - - /* - * Even if we're attaching all tasks in the thread group, we only - * need to check permissions on one of them. - */ - if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) && - !uid_eq(cred->euid, tcred->uid) && - !uid_eq(cred->euid, tcred->suid)) { - rdt_last_cmd_printf("No permission to move task %d\n", task->pid); - ret = -EPERM; - } - - put_cred(tcred); - return ret; -} - -static int rdtgroup_move_task(pid_t pid, struct rdtgroup *rdtgrp, - struct kernfs_open_file *of) -{ - struct task_struct *tsk; - int ret; - - rcu_read_lock(); - if (pid) { - tsk = find_task_by_vpid(pid); - if (!tsk) { - rcu_read_unlock(); - rdt_last_cmd_printf("No task %d\n", pid); - return -ESRCH; - } - } else { - tsk = current; - } - - get_task_struct(tsk); - rcu_read_unlock(); - - ret = rdtgroup_task_write_permission(tsk, of); - if (!ret) - ret = __rdtgroup_move_task(tsk, rdtgrp); - - put_task_struct(tsk); - return ret; -} - -static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of, - char *buf, size_t nbytes, loff_t off) -{ - struct rdtgroup *rdtgrp; - char *pid_str; - int ret = 0; - pid_t pid; - - rdtgrp = rdtgroup_kn_lock_live(of->kn); - if (!rdtgrp) { - rdtgroup_kn_unlock(of->kn); - return -ENOENT; - } - rdt_last_cmd_clear(); - - if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED || - rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { - ret = -EINVAL; - rdt_last_cmd_puts("Pseudo-locking in progress\n"); - goto unlock; - } - - while (buf && buf[0] != '\0' && buf[0] != '\n') { - pid_str = strim(strsep(&buf, ",")); - - if (kstrtoint(pid_str, 0, &pid)) { - rdt_last_cmd_printf("Task list parsing error pid %s\n", pid_str); - ret = -EINVAL; - break; - } - - if (pid < 0) { - rdt_last_cmd_printf("Invalid pid %d\n", pid); - ret = -EINVAL; - break; - } - - ret = rdtgroup_move_task(pid, rdtgrp, of); - if (ret) { - rdt_last_cmd_printf("Error while processing task %d\n", pid); - break; - } - } - -unlock: - rdtgroup_kn_unlock(of->kn); - - return ret ?: nbytes; -} - -static void show_rdt_tasks(struct rdtgroup *r, struct seq_file *s) -{ - struct task_struct *p, *t; - pid_t pid; - - rcu_read_lock(); - for_each_process_thread(p, t) { - if (is_closid_match(t, r) || is_rmid_match(t, r)) { - pid = task_pid_vnr(t); - if (pid) - seq_printf(s, "%d\n", pid); - } - } - rcu_read_unlock(); -} - -static int rdtgroup_tasks_show(struct kernfs_open_file *of, - struct seq_file *s, void *v) -{ - struct rdtgroup *rdtgrp; - int ret = 0; - - rdtgrp = rdtgroup_kn_lock_live(of->kn); - if (rdtgrp) - show_rdt_tasks(rdtgrp, s); - else - ret = -ENOENT; - rdtgroup_kn_unlock(of->kn); - - return ret; -} - -static int rdtgroup_closid_show(struct kernfs_open_file *of, - struct seq_file *s, void *v) -{ - struct rdtgroup *rdtgrp; - int ret = 0; - - rdtgrp = rdtgroup_kn_lock_live(of->kn); - if (rdtgrp) - seq_printf(s, "%u\n", rdtgrp->closid); - else - ret = -ENOENT; - rdtgroup_kn_unlock(of->kn); - - return ret; -} - -static int rdtgroup_rmid_show(struct kernfs_open_file *of, - struct seq_file *s, void *v) -{ - struct rdtgroup *rdtgrp; - int ret = 0; - - rdtgrp = rdtgroup_kn_lock_live(of->kn); - if (rdtgrp) - seq_printf(s, "%u\n", rdtgrp->mon.rmid); - else - ret = -ENOENT; - rdtgroup_kn_unlock(of->kn); - - return ret; -} - -#ifdef CONFIG_PROC_CPU_RESCTRL - -/* - * A task can only be part of one resctrl control group and of one monitor - * group which is associated to that control group. - * - * 1) res: - * mon: - * - * resctrl is not available. - * - * 2) res:/ - * mon: - * - * Task is part of the root resctrl control group, and it is not associated - * to any monitor group. - * - * 3) res:/ - * mon:mon0 - * - * Task is part of the root resctrl control group and monitor group mon0. - * - * 4) res:group0 - * mon: - * - * Task is part of resctrl control group group0, and it is not associated - * to any monitor group. - * - * 5) res:group0 - * mon:mon1 - * - * Task is part of resctrl control group group0 and monitor group mon1. - */ -int proc_resctrl_show(struct seq_file *s, struct pid_namespace *ns, - struct pid *pid, struct task_struct *tsk) -{ - struct rdtgroup *rdtg; - int ret = 0; - - mutex_lock(&rdtgroup_mutex); - - /* Return empty if resctrl has not been mounted. */ - if (!resctrl_mounted) { - seq_puts(s, "res:\nmon:\n"); - goto unlock; - } - - list_for_each_entry(rdtg, &rdt_all_groups, rdtgroup_list) { - struct rdtgroup *crg; - - /* - * Task information is only relevant for shareable - * and exclusive groups. - */ - if (rdtg->mode != RDT_MODE_SHAREABLE && - rdtg->mode != RDT_MODE_EXCLUSIVE) - continue; - - if (!resctrl_arch_match_closid(tsk, rdtg->closid)) - continue; - - seq_printf(s, "res:%s%s\n", (rdtg == &rdtgroup_default) ? "/" : "", - rdtg->kn->name); - seq_puts(s, "mon:"); - list_for_each_entry(crg, &rdtg->mon.crdtgrp_list, - mon.crdtgrp_list) { - if (!resctrl_arch_match_rmid(tsk, crg->mon.parent->closid, - crg->mon.rmid)) - continue; - seq_printf(s, "%s", crg->kn->name); - break; - } - seq_putc(s, '\n'); - goto unlock; - } - /* - * The above search should succeed. Otherwise return - * with an error. - */ - ret = -ENOENT; -unlock: - mutex_unlock(&rdtgroup_mutex); - - return ret; -} -#endif - -static int rdt_last_cmd_status_show(struct kernfs_open_file *of, - struct seq_file *seq, void *v) -{ - int len; - - mutex_lock(&rdtgroup_mutex); - len = seq_buf_used(&last_cmd_status); - if (len) - seq_printf(seq, "%.*s", len, last_cmd_status_buf); - else - seq_puts(seq, "ok\n"); - mutex_unlock(&rdtgroup_mutex); - return 0; -} - -static int rdt_num_closids_show(struct kernfs_open_file *of, - struct seq_file *seq, void *v) -{ - struct resctrl_schema *s = of->kn->parent->priv; - - seq_printf(seq, "%u\n", s->num_closid); - return 0; -} - -static int rdt_default_ctrl_show(struct kernfs_open_file *of, - struct seq_file *seq, void *v) -{ - struct resctrl_schema *s = of->kn->parent->priv; - struct rdt_resource *r = s->res; - - seq_printf(seq, "%x\n", r->default_ctrl); - return 0; -} - -static int rdt_min_cbm_bits_show(struct kernfs_open_file *of, - struct seq_file *seq, void *v) -{ - struct resctrl_schema *s = of->kn->parent->priv; - struct rdt_resource *r = s->res; - - seq_printf(seq, "%u\n", r->cache.min_cbm_bits); - return 0; -} - -static int rdt_shareable_bits_show(struct kernfs_open_file *of, - struct seq_file *seq, void *v) -{ - struct resctrl_schema *s = of->kn->parent->priv; - struct rdt_resource *r = s->res; - - seq_printf(seq, "%x\n", r->cache.shareable_bits); - return 0; -} - -/* - * rdt_bit_usage_show - Display current usage of resources - * - * A domain is a shared resource that can now be allocated differently. Here - * we display the current regions of the domain as an annotated bitmask. - * For each domain of this resource its allocation bitmask - * is annotated as below to indicate the current usage of the corresponding bit: - * 0 - currently unused - * X - currently available for sharing and used by software and hardware - * H - currently used by hardware only but available for software use - * S - currently used and shareable by software only - * E - currently used exclusively by one resource group - * P - currently pseudo-locked by one resource group - */ -static int rdt_bit_usage_show(struct kernfs_open_file *of, - struct seq_file *seq, void *v) -{ - struct resctrl_schema *s = of->kn->parent->priv; - /* - * Use unsigned long even though only 32 bits are used to ensure - * test_bit() is used safely. - */ - unsigned long sw_shareable = 0, hw_shareable = 0; - unsigned long exclusive = 0, pseudo_locked = 0; - struct rdt_resource *r = s->res; - struct rdt_domain *dom; - int i, hwb, swb, excl, psl; - enum rdtgrp_mode mode; - bool sep = false; - u32 ctrl_val; - - cpus_read_lock(); - mutex_lock(&rdtgroup_mutex); - hw_shareable = r->cache.shareable_bits; - list_for_each_entry(dom, &r->domains, list) { - if (sep) - seq_putc(seq, ';'); - sw_shareable = 0; - exclusive = 0; - seq_printf(seq, "%d=", dom->id); - for (i = 0; i < closids_supported(); i++) { - if (!closid_allocated(i)) - continue; - ctrl_val = resctrl_arch_get_config(r, dom, i, - s->conf_type); - mode = rdtgroup_mode_by_closid(i); - switch (mode) { - case RDT_MODE_SHAREABLE: - sw_shareable |= ctrl_val; - break; - case RDT_MODE_EXCLUSIVE: - exclusive |= ctrl_val; - break; - case RDT_MODE_PSEUDO_LOCKSETUP: - /* - * RDT_MODE_PSEUDO_LOCKSETUP is possible - * here but not included since the CBM - * associated with this CLOSID in this mode - * is not initialized and no task or cpu can be - * assigned this CLOSID. - */ - break; - case RDT_MODE_PSEUDO_LOCKED: - case RDT_NUM_MODES: - WARN(1, - "invalid mode for closid %d\n", i); - break; - } - } - for (i = r->cache.cbm_len - 1; i >= 0; i--) { - pseudo_locked = dom->plr ? dom->plr->cbm : 0; - hwb = test_bit(i, &hw_shareable); - swb = test_bit(i, &sw_shareable); - excl = test_bit(i, &exclusive); - psl = test_bit(i, &pseudo_locked); - if (hwb && swb) - seq_putc(seq, 'X'); - else if (hwb && !swb) - seq_putc(seq, 'H'); - else if (!hwb && swb) - seq_putc(seq, 'S'); - else if (excl) - seq_putc(seq, 'E'); - else if (psl) - seq_putc(seq, 'P'); - else /* Unused bits remain */ - seq_putc(seq, '0'); - } - sep = true; - } - seq_putc(seq, '\n'); - mutex_unlock(&rdtgroup_mutex); - cpus_read_unlock(); - return 0; -} - -static int rdt_min_bw_show(struct kernfs_open_file *of, - struct seq_file *seq, void *v) -{ - struct resctrl_schema *s = of->kn->parent->priv; - struct rdt_resource *r = s->res; - - seq_printf(seq, "%u\n", r->membw.min_bw); - return 0; -} - -static int rdt_num_rmids_show(struct kernfs_open_file *of, - struct seq_file *seq, void *v) -{ - struct rdt_resource *r = of->kn->parent->priv; - - seq_printf(seq, "%d\n", r->num_rmid); - - return 0; -} - -static int rdt_mon_features_show(struct kernfs_open_file *of, - struct seq_file *seq, void *v) -{ - struct rdt_resource *r = of->kn->parent->priv; - struct mon_evt *mevt; - - list_for_each_entry(mevt, &r->evt_list, list) { - seq_printf(seq, "%s\n", mevt->name); - if (mevt->configurable) - seq_printf(seq, "%s_config\n", mevt->name); - } - - return 0; -} - -static int rdt_bw_gran_show(struct kernfs_open_file *of, - struct seq_file *seq, void *v) -{ - struct resctrl_schema *s = of->kn->parent->priv; - struct rdt_resource *r = s->res; - - seq_printf(seq, "%u\n", r->membw.bw_gran); - return 0; -} - -static int rdt_delay_linear_show(struct kernfs_open_file *of, - struct seq_file *seq, void *v) -{ - struct resctrl_schema *s = of->kn->parent->priv; - struct rdt_resource *r = s->res; - - seq_printf(seq, "%u\n", r->membw.delay_linear); - return 0; -} - -static int max_threshold_occ_show(struct kernfs_open_file *of, - struct seq_file *seq, void *v) -{ - seq_printf(seq, "%u\n", resctrl_rmid_realloc_threshold); - - return 0; -} - -static int rdt_thread_throttle_mode_show(struct kernfs_open_file *of, - struct seq_file *seq, void *v) -{ - struct resctrl_schema *s = of->kn->parent->priv; - struct rdt_resource *r = s->res; - - if (r->membw.throttle_mode == THREAD_THROTTLE_PER_THREAD) - seq_puts(seq, "per-thread\n"); - else - seq_puts(seq, "max\n"); - - return 0; -} - -static ssize_t max_threshold_occ_write(struct kernfs_open_file *of, - char *buf, size_t nbytes, loff_t off) -{ - unsigned int bytes; - int ret; - - ret = kstrtouint(buf, 0, &bytes); - if (ret) - return ret; - - if (bytes > resctrl_rmid_realloc_limit) - return -EINVAL; - - resctrl_rmid_realloc_threshold = resctrl_arch_round_mon_val(bytes); - - return nbytes; -} - -/* - * rdtgroup_mode_show - Display mode of this resource group - */ -static int rdtgroup_mode_show(struct kernfs_open_file *of, - struct seq_file *s, void *v) -{ - struct rdtgroup *rdtgrp; - - rdtgrp = rdtgroup_kn_lock_live(of->kn); - if (!rdtgrp) { - rdtgroup_kn_unlock(of->kn); - return -ENOENT; - } - - seq_printf(s, "%s\n", rdtgroup_mode_str(rdtgrp->mode)); - - rdtgroup_kn_unlock(of->kn); - return 0; -} - -static enum resctrl_conf_type resctrl_peer_type(enum resctrl_conf_type my_type) -{ - switch (my_type) { - case CDP_CODE: - return CDP_DATA; - case CDP_DATA: - return CDP_CODE; - default: - case CDP_NONE: - return CDP_NONE; - } -} - -static int rdt_has_sparse_bitmasks_show(struct kernfs_open_file *of, - struct seq_file *seq, void *v) -{ - struct resctrl_schema *s = of->kn->parent->priv; - struct rdt_resource *r = s->res; - - seq_printf(seq, "%u\n", r->cache.arch_has_sparse_bitmasks); - - return 0; -} - -/** - * __rdtgroup_cbm_overlaps - Does CBM for intended closid overlap with other - * @r: Resource to which domain instance @d belongs. - * @d: The domain instance for which @closid is being tested. - * @cbm: Capacity bitmask being tested. - * @closid: Intended closid for @cbm. - * @type: CDP type of @r. - * @exclusive: Only check if overlaps with exclusive resource groups - * - * Checks if provided @cbm intended to be used for @closid on domain - * @d overlaps with any other closids or other hardware usage associated - * with this domain. If @exclusive is true then only overlaps with - * resource groups in exclusive mode will be considered. If @exclusive - * is false then overlaps with any resource group or hardware entities - * will be considered. - * - * @cbm is unsigned long, even if only 32 bits are used, to make the - * bitmap functions work correctly. - * - * Return: false if CBM does not overlap, true if it does. - */ -static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d, - unsigned long cbm, int closid, - enum resctrl_conf_type type, bool exclusive) -{ - enum rdtgrp_mode mode; - unsigned long ctrl_b; - int i; - - /* Check for any overlap with regions used by hardware directly */ - if (!exclusive) { - ctrl_b = r->cache.shareable_bits; - if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len)) - return true; - } - - /* Check for overlap with other resource groups */ - for (i = 0; i < closids_supported(); i++) { - ctrl_b = resctrl_arch_get_config(r, d, i, type); - mode = rdtgroup_mode_by_closid(i); - if (closid_allocated(i) && i != closid && - mode != RDT_MODE_PSEUDO_LOCKSETUP) { - if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len)) { - if (exclusive) { - if (mode == RDT_MODE_EXCLUSIVE) - return true; - continue; - } - return true; - } - } - } - - return false; -} - -/** - * rdtgroup_cbm_overlaps - Does CBM overlap with other use of hardware - * @s: Schema for the resource to which domain instance @d belongs. - * @d: The domain instance for which @closid is being tested. - * @cbm: Capacity bitmask being tested. - * @closid: Intended closid for @cbm. - * @exclusive: Only check if overlaps with exclusive resource groups - * - * Resources that can be allocated using a CBM can use the CBM to control - * the overlap of these allocations. rdtgroup_cmb_overlaps() is the test - * for overlap. Overlap test is not limited to the specific resource for - * which the CBM is intended though - when dealing with CDP resources that - * share the underlying hardware the overlap check should be performed on - * the CDP resource sharing the hardware also. - * - * Refer to description of __rdtgroup_cbm_overlaps() for the details of the - * overlap test. - * - * Return: true if CBM overlap detected, false if there is no overlap - */ -bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_domain *d, - unsigned long cbm, int closid, bool exclusive) -{ - enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type); - struct rdt_resource *r = s->res; - - if (__rdtgroup_cbm_overlaps(r, d, cbm, closid, s->conf_type, - exclusive)) - return true; - - if (!resctrl_arch_get_cdp_enabled(r->rid)) - return false; - return __rdtgroup_cbm_overlaps(r, d, cbm, closid, peer_type, exclusive); -} - -/** - * rdtgroup_mode_test_exclusive - Test if this resource group can be exclusive - * @rdtgrp: Resource group identified through its closid. - * - * An exclusive resource group implies that there should be no sharing of - * its allocated resources. At the time this group is considered to be - * exclusive this test can determine if its current schemata supports this - * setting by testing for overlap with all other resource groups. - * - * Return: true if resource group can be exclusive, false if there is overlap - * with allocations of other resource groups and thus this resource group - * cannot be exclusive. - */ -static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp) -{ - int closid = rdtgrp->closid; - struct resctrl_schema *s; - struct rdt_resource *r; - bool has_cache = false; - struct rdt_domain *d; - u32 ctrl; - - /* Walking r->domains, ensure it can't race with cpuhp */ - lockdep_assert_cpus_held(); - - list_for_each_entry(s, &resctrl_schema_all, list) { - r = s->res; - if (r->rid == RDT_RESOURCE_MBA || r->rid == RDT_RESOURCE_SMBA) - continue; - has_cache = true; - list_for_each_entry(d, &r->domains, list) { - ctrl = resctrl_arch_get_config(r, d, closid, - s->conf_type); - if (rdtgroup_cbm_overlaps(s, d, ctrl, closid, false)) { - rdt_last_cmd_puts("Schemata overlaps\n"); - return false; - } - } - } - - if (!has_cache) { - rdt_last_cmd_puts("Cannot be exclusive without CAT/CDP\n"); - return false; - } - - return true; -} - -/* - * rdtgroup_mode_write - Modify the resource group's mode - */ -static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of, - char *buf, size_t nbytes, loff_t off) -{ - struct rdtgroup *rdtgrp; - enum rdtgrp_mode mode; - int ret = 0; - - /* Valid input requires a trailing newline */ - if (nbytes == 0 || buf[nbytes - 1] != '\n') - return -EINVAL; - buf[nbytes - 1] = '\0'; - - rdtgrp = rdtgroup_kn_lock_live(of->kn); - if (!rdtgrp) { - rdtgroup_kn_unlock(of->kn); - return -ENOENT; - } - - rdt_last_cmd_clear(); - - mode = rdtgrp->mode; - - if ((!strcmp(buf, "shareable") && mode == RDT_MODE_SHAREABLE) || - (!strcmp(buf, "exclusive") && mode == RDT_MODE_EXCLUSIVE) || - (!strcmp(buf, "pseudo-locksetup") && - mode == RDT_MODE_PSEUDO_LOCKSETUP) || - (!strcmp(buf, "pseudo-locked") && mode == RDT_MODE_PSEUDO_LOCKED)) - goto out; - - if (mode == RDT_MODE_PSEUDO_LOCKED) { - rdt_last_cmd_puts("Cannot change pseudo-locked group\n"); - ret = -EINVAL; - goto out; - } - - if (!strcmp(buf, "shareable")) { - if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { - ret = rdtgroup_locksetup_exit(rdtgrp); - if (ret) - goto out; - } - rdtgrp->mode = RDT_MODE_SHAREABLE; - } else if (!strcmp(buf, "exclusive")) { - if (!rdtgroup_mode_test_exclusive(rdtgrp)) { - ret = -EINVAL; - goto out; - } - if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { - ret = rdtgroup_locksetup_exit(rdtgrp); - if (ret) - goto out; - } - rdtgrp->mode = RDT_MODE_EXCLUSIVE; - } else if (IS_ENABLED(CONFIG_RESCTRL_FS_PSEUDO_LOCK) && - !strcmp(buf, "pseudo-locksetup")) { - ret = rdtgroup_locksetup_enter(rdtgrp); - if (ret) - goto out; - rdtgrp->mode = RDT_MODE_PSEUDO_LOCKSETUP; - } else { - rdt_last_cmd_puts("Unknown or unsupported mode\n"); - ret = -EINVAL; - } - -out: - rdtgroup_kn_unlock(of->kn); - return ret ?: nbytes; -} - -/** - * rdtgroup_cbm_to_size - Translate CBM to size in bytes - * @r: RDT resource to which @d belongs. - * @d: RDT domain instance. - * @cbm: bitmask for which the size should be computed. - * - * The bitmask provided associated with the RDT domain instance @d will be - * translated into how many bytes it represents. The size in bytes is - * computed by first dividing the total cache size by the CBM length to - * determine how many bytes each bit in the bitmask represents. The result - * is multiplied with the number of bits set in the bitmask. - * - * @cbm is unsigned long, even if only 32 bits are used to make the - * bitmap functions work correctly. - */ -unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, - struct rdt_domain *d, unsigned long cbm) -{ - struct cpu_cacheinfo *ci; - unsigned int size = 0; - int num_b, i; - - num_b = bitmap_weight(&cbm, r->cache.cbm_len); - ci = get_cpu_cacheinfo(cpumask_any(&d->cpu_mask)); - for (i = 0; i < ci->num_leaves; i++) { - if (ci->info_list[i].level == r->cache_level) { - size = ci->info_list[i].size / r->cache.cbm_len * num_b; - break; - } - } - - return size; -} - -/* - * rdtgroup_size_show - Display size in bytes of allocated regions - * - * The "size" file mirrors the layout of the "schemata" file, printing the - * size in bytes of each region instead of the capacity bitmask. - */ -static int rdtgroup_size_show(struct kernfs_open_file *of, - struct seq_file *s, void *v) -{ - struct resctrl_schema *schema; - enum resctrl_conf_type type; - struct rdtgroup *rdtgrp; - struct rdt_resource *r; - struct rdt_domain *d; - unsigned int size; - int ret = 0; - u32 closid; - bool sep; - u32 ctrl; - - rdtgrp = rdtgroup_kn_lock_live(of->kn); - if (!rdtgrp) { - rdtgroup_kn_unlock(of->kn); - return -ENOENT; - } - - if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) { - if (!rdtgrp->plr->d) { - rdt_last_cmd_clear(); - rdt_last_cmd_puts("Cache domain offline\n"); - ret = -ENODEV; - } else { - seq_printf(s, "%*s:", max_name_width, - rdtgrp->plr->s->name); - size = rdtgroup_cbm_to_size(rdtgrp->plr->s->res, - rdtgrp->plr->d, - rdtgrp->plr->cbm); - seq_printf(s, "%d=%u\n", rdtgrp->plr->d->id, size); - } - goto out; - } - - closid = rdtgrp->closid; - - list_for_each_entry(schema, &resctrl_schema_all, list) { - r = schema->res; - type = schema->conf_type; - sep = false; - seq_printf(s, "%*s:", max_name_width, schema->name); - list_for_each_entry(d, &r->domains, list) { - if (sep) - seq_putc(s, ';'); - if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { - size = 0; - } else { - if (is_mba_sc(r)) - ctrl = d->mbps_val[closid]; - else - ctrl = resctrl_arch_get_config(r, d, - closid, - type); - if (r->rid == RDT_RESOURCE_MBA || - r->rid == RDT_RESOURCE_SMBA) - size = ctrl; - else - size = rdtgroup_cbm_to_size(r, d, ctrl); - } - seq_printf(s, "%d=%u", d->id, size); - sep = true; - } - seq_putc(s, '\n'); - } - -out: - rdtgroup_kn_unlock(of->kn); - - return ret; -} - -static void mondata_config_read(struct resctrl_mon_config_info *mon_info) -{ - smp_call_function_any(&mon_info->d->cpu_mask, - resctrl_arch_mon_event_config_read, mon_info, 1); -} - -static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid) -{ - struct resctrl_mon_config_info mon_info = {0}; - struct rdt_domain *dom; - bool sep = false; - - cpus_read_lock(); - mutex_lock(&rdtgroup_mutex); - - list_for_each_entry(dom, &r->domains, list) { - if (sep) - seq_puts(s, ";"); - - memset(&mon_info, 0, sizeof(struct resctrl_mon_config_info)); - mon_info.r = r; - mon_info.d = dom; - mon_info.evtid = evtid; - mondata_config_read(&mon_info); - - seq_printf(s, "%d=0x%02x", dom->id, mon_info.mon_config); - sep = true; - } - seq_puts(s, "\n"); - - mutex_unlock(&rdtgroup_mutex); - cpus_read_unlock(); - - return 0; -} - -static int mbm_total_bytes_config_show(struct kernfs_open_file *of, - struct seq_file *seq, void *v) -{ - struct rdt_resource *r = of->kn->parent->priv; - - mbm_config_show(seq, r, QOS_L3_MBM_TOTAL_EVENT_ID); - - return 0; -} - -static int mbm_local_bytes_config_show(struct kernfs_open_file *of, - struct seq_file *seq, void *v) -{ - struct rdt_resource *r = of->kn->parent->priv; - - mbm_config_show(seq, r, QOS_L3_MBM_LOCAL_EVENT_ID); - - return 0; -} - -static int mbm_config_write_domain(struct rdt_resource *r, - struct rdt_domain *d, u32 evtid, u32 val) -{ - struct resctrl_mon_config_info mon_info = {0}; - - /* - * Read the current config value first. If both are the same then - * no need to write it again. - */ - mon_info.r = r; - mon_info.d = d; - mon_info.evtid = evtid; - mondata_config_read(&mon_info); - if (mon_info.mon_config == val) - return 0; - - mon_info.mon_config = val; - - /* - * Update MSR_IA32_EVT_CFG_BASE MSR on one of the CPUs in the - * domain. The MSRs offset from MSR MSR_IA32_EVT_CFG_BASE - * are scoped at the domain level. Writing any of these MSRs - * on one CPU is observed by all the CPUs in the domain. - */ - smp_call_function_any(&d->cpu_mask, resctrl_arch_mon_event_config_write, - &mon_info, 1); - if (mon_info.err) { - rdt_last_cmd_puts("Invalid event configuration\n"); - return mon_info.err; - } - - /* - * When an Event Configuration is changed, the bandwidth counters - * for all RMIDs and Events will be cleared by the hardware. The - * hardware also sets MSR_IA32_QM_CTR.Unavailable (bit 62) for - * every RMID on the next read to any event for every RMID. - * Subsequent reads will have MSR_IA32_QM_CTR.Unavailable (bit 62) - * cleared while it is tracked by the hardware. Clear the - * mbm_local and mbm_total counts for all the RMIDs. - */ - resctrl_arch_reset_rmid_all(r, d); - - return 0; -} - -static int mon_config_write(struct rdt_resource *r, char *tok, u32 evtid) -{ - char *dom_str = NULL, *id_str; - unsigned long dom_id, val; - struct rdt_domain *d; - int err; - - /* Walking r->domains, ensure it can't race with cpuhp */ - lockdep_assert_cpus_held(); - -next: - if (!tok || tok[0] == '\0') - return 0; - - /* Start processing the strings for each domain */ - dom_str = strim(strsep(&tok, ";")); - id_str = strsep(&dom_str, "="); - - if (!id_str || kstrtoul(id_str, 10, &dom_id)) { - rdt_last_cmd_puts("Missing '=' or non-numeric domain id\n"); - return -EINVAL; - } - - if (!dom_str || kstrtoul(dom_str, 16, &val)) { - rdt_last_cmd_puts("Non-numeric event configuration value\n"); - return -EINVAL; - } - - /* Value from user cannot be more than the supported set of events */ - if ((val & r->mbm_cfg_mask) != val) { - rdt_last_cmd_printf("Invalid event configuration: max valid mask is 0x%02x\n", - r->mbm_cfg_mask); - return -EINVAL; - } - - list_for_each_entry(d, &r->domains, list) { - if (d->id == dom_id) { - err = mbm_config_write_domain(r, d, evtid, val); - if (err) - return err; - goto next; - } - } - - return -EINVAL; -} - -static ssize_t mbm_total_bytes_config_write(struct kernfs_open_file *of, - char *buf, size_t nbytes, - loff_t off) -{ - struct rdt_resource *r = of->kn->parent->priv; - int ret; - - /* Valid input requires a trailing newline */ - if (nbytes == 0 || buf[nbytes - 1] != '\n') - return -EINVAL; - - cpus_read_lock(); - mutex_lock(&rdtgroup_mutex); - - rdt_last_cmd_clear(); - - buf[nbytes - 1] = '\0'; - - ret = mon_config_write(r, buf, QOS_L3_MBM_TOTAL_EVENT_ID); - - mutex_unlock(&rdtgroup_mutex); - cpus_read_unlock(); - - return ret ?: nbytes; -} - -static ssize_t mbm_local_bytes_config_write(struct kernfs_open_file *of, - char *buf, size_t nbytes, - loff_t off) -{ - struct rdt_resource *r = of->kn->parent->priv; - int ret; - - /* Valid input requires a trailing newline */ - if (nbytes == 0 || buf[nbytes - 1] != '\n') - return -EINVAL; - - cpus_read_lock(); - mutex_lock(&rdtgroup_mutex); - - rdt_last_cmd_clear(); - - buf[nbytes - 1] = '\0'; - - ret = mon_config_write(r, buf, QOS_L3_MBM_LOCAL_EVENT_ID); - - mutex_unlock(&rdtgroup_mutex); - cpus_read_unlock(); - - return ret ?: nbytes; -} - -/* rdtgroup information files for one cache resource. */ -static struct rftype res_common_files[] = { - { - .name = "last_cmd_status", - .mode = 0444, - .kf_ops = &rdtgroup_kf_single_ops, - .seq_show = rdt_last_cmd_status_show, - .fflags = RFTYPE_TOP_INFO, - }, - { - .name = "num_closids", - .mode = 0444, - .kf_ops = &rdtgroup_kf_single_ops, - .seq_show = rdt_num_closids_show, - .fflags = RFTYPE_CTRL_INFO, - }, - { - .name = "mon_features", - .mode = 0444, - .kf_ops = &rdtgroup_kf_single_ops, - .seq_show = rdt_mon_features_show, - .fflags = RFTYPE_MON_INFO, - }, - { - .name = "num_rmids", - .mode = 0444, - .kf_ops = &rdtgroup_kf_single_ops, - .seq_show = rdt_num_rmids_show, - .fflags = RFTYPE_MON_INFO, - }, - { - .name = "cbm_mask", - .mode = 0444, - .kf_ops = &rdtgroup_kf_single_ops, - .seq_show = rdt_default_ctrl_show, - .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE, - }, - { - .name = "min_cbm_bits", - .mode = 0444, - .kf_ops = &rdtgroup_kf_single_ops, - .seq_show = rdt_min_cbm_bits_show, - .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE, - }, - { - .name = "shareable_bits", - .mode = 0444, - .kf_ops = &rdtgroup_kf_single_ops, - .seq_show = rdt_shareable_bits_show, - .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE, - }, - { - .name = "bit_usage", - .mode = 0444, - .kf_ops = &rdtgroup_kf_single_ops, - .seq_show = rdt_bit_usage_show, - .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE, - }, - { - .name = "min_bandwidth", - .mode = 0444, - .kf_ops = &rdtgroup_kf_single_ops, - .seq_show = rdt_min_bw_show, - .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_MB, - }, - { - .name = "bandwidth_gran", - .mode = 0444, - .kf_ops = &rdtgroup_kf_single_ops, - .seq_show = rdt_bw_gran_show, - .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_MB, - }, - { - .name = "delay_linear", - .mode = 0444, - .kf_ops = &rdtgroup_kf_single_ops, - .seq_show = rdt_delay_linear_show, - .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_MB, - }, - /* - * Platform specific which (if any) capabilities are provided by - * thread_throttle_mode. Defer "fflags" initialization to platform - * discovery. - */ - { - .name = "thread_throttle_mode", - .mode = 0444, - .kf_ops = &rdtgroup_kf_single_ops, - .seq_show = rdt_thread_throttle_mode_show, - }, - { - .name = "max_threshold_occupancy", - .mode = 0644, - .kf_ops = &rdtgroup_kf_single_ops, - .write = max_threshold_occ_write, - .seq_show = max_threshold_occ_show, - .fflags = RFTYPE_MON_INFO | RFTYPE_RES_CACHE, - }, - { - .name = "mbm_total_bytes_config", - .mode = 0644, - .kf_ops = &rdtgroup_kf_single_ops, - .seq_show = mbm_total_bytes_config_show, - .write = mbm_total_bytes_config_write, - }, - { - .name = "mbm_local_bytes_config", - .mode = 0644, - .kf_ops = &rdtgroup_kf_single_ops, - .seq_show = mbm_local_bytes_config_show, - .write = mbm_local_bytes_config_write, - }, - { - .name = "cpus", - .mode = 0644, - .kf_ops = &rdtgroup_kf_single_ops, - .write = rdtgroup_cpus_write, - .seq_show = rdtgroup_cpus_show, - .fflags = RFTYPE_BASE, - }, - { - .name = "cpus_list", - .mode = 0644, - .kf_ops = &rdtgroup_kf_single_ops, - .write = rdtgroup_cpus_write, - .seq_show = rdtgroup_cpus_show, - .flags = RFTYPE_FLAGS_CPUS_LIST, - .fflags = RFTYPE_BASE, - }, - { - .name = "tasks", - .mode = 0644, - .kf_ops = &rdtgroup_kf_single_ops, - .write = rdtgroup_tasks_write, - .seq_show = rdtgroup_tasks_show, - .fflags = RFTYPE_BASE, - }, - { - .name = "mon_hw_id", - .mode = 0444, - .kf_ops = &rdtgroup_kf_single_ops, - .seq_show = rdtgroup_rmid_show, - .fflags = RFTYPE_MON_BASE | RFTYPE_DEBUG, - }, - { - .name = "schemata", - .mode = 0644, - .kf_ops = &rdtgroup_kf_single_ops, - .write = rdtgroup_schemata_write, - .seq_show = rdtgroup_schemata_show, - .fflags = RFTYPE_CTRL_BASE, - }, - { - .name = "mode", - .mode = 0644, - .kf_ops = &rdtgroup_kf_single_ops, - .write = rdtgroup_mode_write, - .seq_show = rdtgroup_mode_show, - .fflags = RFTYPE_CTRL_BASE, - }, - { - .name = "size", - .mode = 0444, - .kf_ops = &rdtgroup_kf_single_ops, - .seq_show = rdtgroup_size_show, - .fflags = RFTYPE_CTRL_BASE, - }, - { - .name = "sparse_masks", - .mode = 0444, - .kf_ops = &rdtgroup_kf_single_ops, - .seq_show = rdt_has_sparse_bitmasks_show, - .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE, - }, - { - .name = "ctrl_hw_id", - .mode = 0444, - .kf_ops = &rdtgroup_kf_single_ops, - .seq_show = rdtgroup_closid_show, - .fflags = RFTYPE_CTRL_BASE | RFTYPE_DEBUG, - }, - -}; - -static int rdtgroup_add_files(struct kernfs_node *kn, unsigned long fflags) -{ - struct rftype *rfts, *rft; - int ret, len; - - rfts = res_common_files; - len = ARRAY_SIZE(res_common_files); - - lockdep_assert_held(&rdtgroup_mutex); - - if (resctrl_debug) - fflags |= RFTYPE_DEBUG; - - for (rft = rfts; rft < rfts + len; rft++) { - if (rft->fflags && ((fflags & rft->fflags) == rft->fflags)) { - ret = rdtgroup_add_file(kn, rft); - if (ret) - goto error; - } - } - - return 0; -error: - pr_warn("Failed to add %s, err=%d\n", rft->name, ret); - while (--rft >= rfts) { - if ((fflags & rft->fflags) == rft->fflags) - kernfs_remove_by_name(kn, rft->name); - } - return ret; -} - -static struct rftype *rdtgroup_get_rftype_by_name(const char *name) -{ - struct rftype *rfts, *rft; - int len; - - rfts = res_common_files; - len = ARRAY_SIZE(res_common_files); - - for (rft = rfts; rft < rfts + len; rft++) { - if (!strcmp(rft->name, name)) - return rft; - } - - return NULL; -} - -static void thread_throttle_mode_init(void) -{ - struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_MBA); - struct rftype *rft; - - if (!r->alloc_capable || - r->membw.throttle_mode == THREAD_THROTTLE_UNDEFINED) - return; - - rft = rdtgroup_get_rftype_by_name("thread_throttle_mode"); - if (!rft) - return; - - rft->fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_MB; -} - -void mbm_config_rftype_init(const char *config) -{ - struct rftype *rft; - - rft = rdtgroup_get_rftype_by_name(config); - if (rft) - rft->fflags = RFTYPE_MON_INFO | RFTYPE_RES_CACHE; -} - -/** - * rdtgroup_kn_mode_restrict - Restrict user access to named resctrl file - * @r: The resource group with which the file is associated. - * @name: Name of the file - * - * The permissions of named resctrl file, directory, or link are modified - * to not allow read, write, or execute by any user. - * - * WARNING: This function is intended to communicate to the user that the - * resctrl file has been locked down - that it is not relevant to the - * particular state the system finds itself in. It should not be relied - * on to protect from user access because after the file's permissions - * are restricted the user can still change the permissions using chmod - * from the command line. - * - * Return: 0 on success, <0 on failure. - */ -int rdtgroup_kn_mode_restrict(struct rdtgroup *r, const char *name) -{ - struct iattr iattr = {.ia_valid = ATTR_MODE,}; - struct kernfs_node *kn; - int ret = 0; - - kn = kernfs_find_and_get_ns(r->kn, name, NULL); - if (!kn) - return -ENOENT; - - switch (kernfs_type(kn)) { - case KERNFS_DIR: - iattr.ia_mode = S_IFDIR; - break; - case KERNFS_FILE: - iattr.ia_mode = S_IFREG; - break; - case KERNFS_LINK: - iattr.ia_mode = S_IFLNK; - break; - } - - ret = kernfs_setattr(kn, &iattr); - kernfs_put(kn); - return ret; -} - -/** - * rdtgroup_kn_mode_restore - Restore user access to named resctrl file - * @r: The resource group with which the file is associated. - * @name: Name of the file - * @mask: Mask of permissions that should be restored - * - * Restore the permissions of the named file. If @name is a directory the - * permissions of its parent will be used. - * - * Return: 0 on success, <0 on failure. - */ -int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name, - umode_t mask) -{ - struct iattr iattr = {.ia_valid = ATTR_MODE,}; - struct kernfs_node *kn, *parent; - struct rftype *rfts, *rft; - int ret, len; - - rfts = res_common_files; - len = ARRAY_SIZE(res_common_files); - - for (rft = rfts; rft < rfts + len; rft++) { - if (!strcmp(rft->name, name)) - iattr.ia_mode = rft->mode & mask; - } - - kn = kernfs_find_and_get_ns(r->kn, name, NULL); - if (!kn) - return -ENOENT; - - switch (kernfs_type(kn)) { - case KERNFS_DIR: - parent = kernfs_get_parent(kn); - if (parent) { - iattr.ia_mode |= parent->mode; - kernfs_put(parent); - } - iattr.ia_mode |= S_IFDIR; - break; - case KERNFS_FILE: - iattr.ia_mode |= S_IFREG; - break; - case KERNFS_LINK: - iattr.ia_mode |= S_IFLNK; - break; - } - - ret = kernfs_setattr(kn, &iattr); - kernfs_put(kn); - return ret; -} - -static int rdtgroup_mkdir_info_resdir(void *priv, char *name, - unsigned long fflags) -{ - struct kernfs_node *kn_subdir; - int ret; - - kn_subdir = kernfs_create_dir(kn_info, name, - kn_info->mode, priv); - if (IS_ERR(kn_subdir)) - return PTR_ERR(kn_subdir); - - ret = rdtgroup_kn_set_ugid(kn_subdir); - if (ret) - return ret; - - ret = rdtgroup_add_files(kn_subdir, fflags); - if (!ret) - kernfs_activate(kn_subdir); - - return ret; -} - -static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn) -{ - enum resctrl_res_level i; - struct resctrl_schema *s; - struct rdt_resource *r; - unsigned long fflags; - char name[32]; - int ret; - - /* create the directory */ - kn_info = kernfs_create_dir(parent_kn, "info", parent_kn->mode, NULL); - if (IS_ERR(kn_info)) - return PTR_ERR(kn_info); - - ret = rdtgroup_add_files(kn_info, RFTYPE_TOP_INFO); - if (ret) - goto out_destroy; - - /* loop over enabled controls, these are all alloc_capable */ - list_for_each_entry(s, &resctrl_schema_all, list) { - r = s->res; - fflags = r->fflags | RFTYPE_CTRL_INFO; - ret = rdtgroup_mkdir_info_resdir(s, s->name, fflags); - if (ret) - goto out_destroy; - } - - for (i = 0; i < RDT_NUM_RESOURCES; i++) { - r = resctrl_arch_get_resource(i); - if (!r->mon_capable) - continue; - - fflags = r->fflags | RFTYPE_MON_INFO; - sprintf(name, "%s_MON", r->name); - ret = rdtgroup_mkdir_info_resdir(r, name, fflags); - if (ret) - goto out_destroy; - } - - ret = rdtgroup_kn_set_ugid(kn_info); - if (ret) - goto out_destroy; - - kernfs_activate(kn_info); - - return 0; - -out_destroy: - kernfs_remove(kn_info); - return ret; -} - -static int -mongroup_create_dir(struct kernfs_node *parent_kn, struct rdtgroup *prgrp, - char *name, struct kernfs_node **dest_kn) -{ - struct kernfs_node *kn; - int ret; - - /* create the directory */ - kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp); - if (IS_ERR(kn)) - return PTR_ERR(kn); - - if (dest_kn) - *dest_kn = kn; - - ret = rdtgroup_kn_set_ugid(kn); - if (ret) - goto out_destroy; - - kernfs_activate(kn); - - return 0; - -out_destroy: - kernfs_remove(kn); - return ret; -} - -static inline bool is_mba_linear(void) -{ - return resctrl_arch_get_resource(RDT_RESOURCE_MBA)->membw.delay_linear; -} - -static int mba_sc_domain_allocate(struct rdt_resource *r, struct rdt_domain *d) -{ - u32 num_closid = resctrl_arch_get_num_closid(r); - int cpu = cpumask_any(&d->cpu_mask); - int i; - - d->mbps_val = kcalloc_node(num_closid, sizeof(*d->mbps_val), - GFP_KERNEL, cpu_to_node(cpu)); - if (!d->mbps_val) - return -ENOMEM; - - for (i = 0; i < num_closid; i++) - d->mbps_val[i] = MBA_MAX_MBPS; - - return 0; -} - -static void mba_sc_domain_destroy(struct rdt_resource *r, - struct rdt_domain *d) -{ - kfree(d->mbps_val); - d->mbps_val = NULL; -} - -/* - * MBA software controller is supported only if - * MBM is supported and MBA is in linear scale. - */ -static bool supports_mba_mbps(void) -{ - struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_MBA); - - return (resctrl_arch_is_mbm_local_enabled() && - r->alloc_capable && is_mba_linear()); -} - -/* - * Enable or disable the MBA software controller - * which helps user specify bandwidth in MBps. - */ -static int set_mba_sc(bool mba_sc) -{ - struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_MBA); - u32 num_closid = resctrl_arch_get_num_closid(r); - struct rdt_domain *d; - int i; - - if (!supports_mba_mbps() || mba_sc == is_mba_sc(r)) - return -EINVAL; - - r->membw.mba_sc = mba_sc; - - list_for_each_entry(d, &r->domains, list) { - for (i = 0; i < num_closid; i++) - d->mbps_val[i] = MBA_MAX_MBPS; - } - - return 0; -} - -/* - * We don't allow rdtgroup directories to be created anywhere - * except the root directory. Thus when looking for the rdtgroup - * structure for a kernfs node we are either looking at a directory, - * in which case the rdtgroup structure is pointed at by the "priv" - * field, otherwise we have a file, and need only look to the parent - * to find the rdtgroup. - */ -static struct rdtgroup *kernfs_to_rdtgroup(struct kernfs_node *kn) -{ - if (kernfs_type(kn) == KERNFS_DIR) { - /* - * All the resource directories use "kn->priv" - * to point to the "struct rdtgroup" for the - * resource. "info" and its subdirectories don't - * have rdtgroup structures, so return NULL here. - */ - if (kn == kn_info || kn->parent == kn_info) - return NULL; - else - return kn->priv; - } else { - return kn->parent->priv; - } -} - -static void rdtgroup_kn_get(struct rdtgroup *rdtgrp, struct kernfs_node *kn) -{ - atomic_inc(&rdtgrp->waitcount); - kernfs_break_active_protection(kn); -} - -static void rdtgroup_kn_put(struct rdtgroup *rdtgrp, struct kernfs_node *kn) -{ - if (atomic_dec_and_test(&rdtgrp->waitcount) && - (rdtgrp->flags & RDT_DELETED)) { - if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP || - rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) - rdtgroup_pseudo_lock_remove(rdtgrp); - kernfs_unbreak_active_protection(kn); - rdtgroup_remove(rdtgrp); - } else { - kernfs_unbreak_active_protection(kn); - } -} - -struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn) -{ - struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn); - - if (!rdtgrp) - return NULL; - - rdtgroup_kn_get(rdtgrp, kn); - - cpus_read_lock(); - mutex_lock(&rdtgroup_mutex); - - /* Was this group deleted while we waited? */ - if (rdtgrp->flags & RDT_DELETED) - return NULL; - - return rdtgrp; -} - -void rdtgroup_kn_unlock(struct kernfs_node *kn) -{ - struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn); - - if (!rdtgrp) - return; - - mutex_unlock(&rdtgroup_mutex); - cpus_read_unlock(); - - rdtgroup_kn_put(rdtgrp, kn); -} - -static int mkdir_mondata_all(struct kernfs_node *parent_kn, - struct rdtgroup *prgrp, - struct kernfs_node **mon_data_kn); - -static void rdt_disable_ctx(void) -{ - resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, false); - resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, false); - set_mba_sc(false); - - resctrl_debug = false; -} - -static int rdt_enable_ctx(struct rdt_fs_context *ctx) -{ - int ret = 0; - - if (ctx->enable_cdpl2) { - ret = resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, true); - if (ret) - goto out_done; - } - - if (ctx->enable_cdpl3) { - ret = resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, true); - if (ret) - goto out_cdpl2; - } - - if (ctx->enable_mba_mbps) { - ret = set_mba_sc(true); - if (ret) - goto out_cdpl3; - } - - if (ctx->enable_debug) - resctrl_debug = true; - - return 0; - -out_cdpl3: - resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, false); -out_cdpl2: - resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, false); -out_done: - return ret; -} - -static int schemata_list_add(struct rdt_resource *r, enum resctrl_conf_type type) -{ - struct resctrl_schema *s; - const char *suffix = ""; - int ret, cl; - - s = kzalloc(sizeof(*s), GFP_KERNEL); - if (!s) - return -ENOMEM; - - s->res = r; - s->num_closid = resctrl_arch_get_num_closid(r); - if (resctrl_arch_get_cdp_enabled(r->rid)) - s->num_closid /= 2; - - s->conf_type = type; - switch (type) { - case CDP_CODE: - suffix = "CODE"; - break; - case CDP_DATA: - suffix = "DATA"; - break; - case CDP_NONE: - suffix = ""; - break; - } - - ret = snprintf(s->name, sizeof(s->name), "%s%s", r->name, suffix); - if (ret >= sizeof(s->name)) { - kfree(s); - return -EINVAL; - } - - cl = strlen(s->name); - - /* - * If CDP is supported by this resource, but not enabled, - * include the suffix. This ensures the tabular format of the - * schemata file does not change between mounts of the filesystem. - */ - if (r->cdp_capable && !resctrl_arch_get_cdp_enabled(r->rid)) - cl += 4; - - if (cl > max_name_width) - max_name_width = cl; - - /* - * Choose a width for the resource data based on the resource that has - * widest name and cbm. - */ - max_data_width = max(max_data_width, r->data_width); - - INIT_LIST_HEAD(&s->list); - list_add(&s->list, &resctrl_schema_all); - - return 0; -} - -static int schemata_list_create(void) -{ - enum resctrl_res_level i; - struct rdt_resource *r; - int ret = 0; - - for (i = 0; i < RDT_NUM_RESOURCES; i++) { - r = resctrl_arch_get_resource(i); - if (!r->alloc_capable) - continue; - - if (resctrl_arch_get_cdp_enabled(r->rid)) { - ret = schemata_list_add(r, CDP_CODE); - if (ret) - break; - - ret = schemata_list_add(r, CDP_DATA); - } else { - ret = schemata_list_add(r, CDP_NONE); - } - - if (ret) - break; - } - - return ret; -} - -static void schemata_list_destroy(void) -{ - struct resctrl_schema *s, *tmp; - - list_for_each_entry_safe(s, tmp, &resctrl_schema_all, list) { - list_del(&s->list); - kfree(s); - } -} - -static int rdt_get_tree(struct fs_context *fc) -{ - struct rdt_resource *l3 = resctrl_arch_get_resource(RDT_RESOURCE_L3); - struct rdt_fs_context *ctx = rdt_fc2context(fc); - unsigned long flags = RFTYPE_CTRL_BASE; - struct rdt_domain *dom; - int ret; - - cpus_read_lock(); - mutex_lock(&rdtgroup_mutex); - /* - * resctrl file system can only be mounted once. - */ - if (resctrl_mounted) { - ret = -EBUSY; - goto out; - } - - ret = rdtgroup_setup_root(ctx); - if (ret) - goto out; - - ret = rdt_enable_ctx(ctx); - if (ret) - goto out_root; - - ret = schemata_list_create(); - if (ret) { - schemata_list_destroy(); - goto out_ctx; - } - - closid_init(); - - if (resctrl_arch_mon_capable()) - flags |= RFTYPE_MON; - - ret = rdtgroup_add_files(rdtgroup_default.kn, flags); - if (ret) - goto out_schemata_free; - - kernfs_activate(rdtgroup_default.kn); - - ret = rdtgroup_create_info_dir(rdtgroup_default.kn); - if (ret < 0) - goto out_schemata_free; - - if (resctrl_arch_mon_capable()) { - ret = mongroup_create_dir(rdtgroup_default.kn, - &rdtgroup_default, "mon_groups", - &kn_mongrp); - if (ret < 0) - goto out_info; - - ret = mkdir_mondata_all(rdtgroup_default.kn, - &rdtgroup_default, &kn_mondata); - if (ret < 0) - goto out_mongrp; - rdtgroup_default.mon.mon_data_kn = kn_mondata; - } - - ret = rdt_pseudo_lock_init(); - if (ret) - goto out_mondata; - - ret = kernfs_get_tree(fc); - if (ret < 0) - goto out_psl; - - if (resctrl_arch_alloc_capable()) - resctrl_arch_enable_alloc(); - if (resctrl_arch_mon_capable()) - resctrl_arch_enable_mon(); - - if (resctrl_arch_alloc_capable() || resctrl_arch_mon_capable()) - resctrl_mounted = true; - - if (resctrl_is_mbm_enabled()) { - list_for_each_entry(dom, &l3->domains, list) - mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL, - RESCTRL_PICK_ANY_CPU); - } - - goto out; - -out_psl: - rdt_pseudo_lock_release(); -out_mondata: - if (resctrl_arch_mon_capable()) - kernfs_remove(kn_mondata); -out_mongrp: - if (resctrl_arch_mon_capable()) - kernfs_remove(kn_mongrp); -out_info: - kernfs_remove(kn_info); -out_schemata_free: - schemata_list_destroy(); -out_ctx: - rdt_disable_ctx(); -out_root: - rdtgroup_destroy_root(); -out: - rdt_last_cmd_clear(); - mutex_unlock(&rdtgroup_mutex); - cpus_read_unlock(); - return ret; -} - -enum rdt_param { - Opt_cdp, - Opt_cdpl2, - Opt_mba_mbps, - Opt_debug, - nr__rdt_params -}; - -static const struct fs_parameter_spec rdt_fs_parameters[] = { - fsparam_flag("cdp", Opt_cdp), - fsparam_flag("cdpl2", Opt_cdpl2), - fsparam_flag("mba_MBps", Opt_mba_mbps), - fsparam_flag("debug", Opt_debug), - {} -}; - -static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param) -{ - struct rdt_fs_context *ctx = rdt_fc2context(fc); - struct fs_parse_result result; - int opt; - - opt = fs_parse(fc, rdt_fs_parameters, param, &result); - if (opt < 0) - return opt; - - switch (opt) { - case Opt_cdp: - ctx->enable_cdpl3 = true; - return 0; - case Opt_cdpl2: - ctx->enable_cdpl2 = true; - return 0; - case Opt_mba_mbps: - if (!supports_mba_mbps()) - return -EINVAL; - ctx->enable_mba_mbps = true; - return 0; - case Opt_debug: - ctx->enable_debug = true; - return 0; - } - - return -EINVAL; -} - -static void rdt_fs_context_free(struct fs_context *fc) -{ - struct rdt_fs_context *ctx = rdt_fc2context(fc); - - kernfs_free_fs_context(fc); - kfree(ctx); -} - -static const struct fs_context_operations rdt_fs_context_ops = { - .free = rdt_fs_context_free, - .parse_param = rdt_parse_param, - .get_tree = rdt_get_tree, -}; - -static int rdt_init_fs_context(struct fs_context *fc) -{ - struct rdt_fs_context *ctx; - - ctx = kzalloc(sizeof(struct rdt_fs_context), GFP_KERNEL); - if (!ctx) - return -ENOMEM; - - ctx->kfc.magic = RDTGROUP_SUPER_MAGIC; - fc->fs_private = &ctx->kfc; - fc->ops = &rdt_fs_context_ops; - put_user_ns(fc->user_ns); - fc->user_ns = get_user_ns(&init_user_ns); - fc->global = true; - return 0; -} - -/* - * Move tasks from one to the other group. If @from is NULL, then all tasks - * in the systems are moved unconditionally (used for teardown). - * - * If @mask is not NULL the cpus on which moved tasks are running are set - * in that mask so the update smp function call is restricted to affected - * cpus. - */ -static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to, - struct cpumask *mask) -{ - struct task_struct *p, *t; - - read_lock(&tasklist_lock); - for_each_process_thread(p, t) { - if (!from || is_closid_match(t, from) || - is_rmid_match(t, from)) { - resctrl_arch_set_closid_rmid(t, to->closid, - to->mon.rmid); - - /* - * Order the closid/rmid stores above before the loads - * in task_curr(). This pairs with the full barrier - * between the rq->curr update and - * resctrl_arch_sched_in() during context switch. - */ - smp_mb(); - - /* - * If the task is on a CPU, set the CPU in the mask. - * The detection is inaccurate as tasks might move or - * schedule before the smp function call takes place. - * In such a case the function call is pointless, but - * there is no other side effect. - */ - if (IS_ENABLED(CONFIG_SMP) && mask && task_curr(t)) - cpumask_set_cpu(task_cpu(t), mask); - } - } - read_unlock(&tasklist_lock); -} - -static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp) -{ - struct rdtgroup *sentry, *stmp; - struct list_head *head; - - head = &rdtgrp->mon.crdtgrp_list; - list_for_each_entry_safe(sentry, stmp, head, mon.crdtgrp_list) { - free_rmid(sentry->closid, sentry->mon.rmid); - list_del(&sentry->mon.crdtgrp_list); - - if (atomic_read(&sentry->waitcount) != 0) - sentry->flags = RDT_DELETED; - else - rdtgroup_remove(sentry); - } -} - -/* - * Forcibly remove all of subdirectories under root. - */ -static void rmdir_all_sub(void) -{ - struct rdtgroup *rdtgrp, *tmp; - - /* Move all tasks to the default resource group */ - rdt_move_group_tasks(NULL, &rdtgroup_default, NULL); - - list_for_each_entry_safe(rdtgrp, tmp, &rdt_all_groups, rdtgroup_list) { - /* Free any child rmids */ - free_all_child_rdtgrp(rdtgrp); - - /* Remove each rdtgroup other than root */ - if (rdtgrp == &rdtgroup_default) - continue; - - if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP || - rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) - rdtgroup_pseudo_lock_remove(rdtgrp); - - /* - * Give any CPUs back to the default group. We cannot copy - * cpu_online_mask because a CPU might have executed the - * offline callback already, but is still marked online. - */ - cpumask_or(&rdtgroup_default.cpu_mask, - &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask); - - free_rmid(rdtgrp->closid, rdtgrp->mon.rmid); - - kernfs_remove(rdtgrp->kn); - list_del(&rdtgrp->rdtgroup_list); - - if (atomic_read(&rdtgrp->waitcount) != 0) - rdtgrp->flags = RDT_DELETED; - else - rdtgroup_remove(rdtgrp); - } - /* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */ - update_closid_rmid(cpu_online_mask, &rdtgroup_default); - - kernfs_remove(kn_info); - kernfs_remove(kn_mongrp); - kernfs_remove(kn_mondata); -} - -static void rdt_kill_sb(struct super_block *sb) -{ - cpus_read_lock(); - mutex_lock(&rdtgroup_mutex); - - rdt_disable_ctx(); - - /* Put everything back to default values. */ - resctrl_arch_reset_resources(); - - rmdir_all_sub(); - rdt_pseudo_lock_release(); - rdtgroup_default.mode = RDT_MODE_SHAREABLE; - schemata_list_destroy(); - rdtgroup_destroy_root(); - if (resctrl_arch_alloc_capable()) - resctrl_arch_disable_alloc(); - if (resctrl_arch_mon_capable()) - resctrl_arch_disable_mon(); - resctrl_mounted = false; - kernfs_kill_sb(sb); - mutex_unlock(&rdtgroup_mutex); - cpus_read_unlock(); -} - -static struct file_system_type rdt_fs_type = { - .name = "resctrl", - .init_fs_context = rdt_init_fs_context, - .parameters = rdt_fs_parameters, - .kill_sb = rdt_kill_sb, -}; - -static int mon_addfile(struct kernfs_node *parent_kn, const char *name, - void *priv) -{ - struct kernfs_node *kn; - int ret = 0; - - kn = __kernfs_create_file(parent_kn, name, 0444, - GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0, - &kf_mondata_ops, priv, NULL, NULL); - if (IS_ERR(kn)) - return PTR_ERR(kn); - - ret = rdtgroup_kn_set_ugid(kn); - if (ret) { - kernfs_remove(kn); - return ret; - } - - return ret; -} - -/* - * Remove all subdirectories of mon_data of ctrl_mon groups - * and monitor groups with given domain id. - */ -static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, - unsigned int dom_id) -{ - struct rdtgroup *prgrp, *crgrp; - char name[32]; - - list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) { - sprintf(name, "mon_%s_%02d", r->name, dom_id); - kernfs_remove_by_name(prgrp->mon.mon_data_kn, name); - - list_for_each_entry(crgrp, &prgrp->mon.crdtgrp_list, mon.crdtgrp_list) - kernfs_remove_by_name(crgrp->mon.mon_data_kn, name); - } -} - -static int mkdir_mondata_subdir(struct kernfs_node *parent_kn, - struct rdt_domain *d, - struct rdt_resource *r, struct rdtgroup *prgrp) -{ - union mon_data_bits priv; - struct kernfs_node *kn; - struct mon_evt *mevt; - struct rmid_read rr; - char name[32]; - int ret; - - sprintf(name, "mon_%s_%02d", r->name, d->id); - /* create the directory */ - kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp); - if (IS_ERR(kn)) - return PTR_ERR(kn); - - ret = rdtgroup_kn_set_ugid(kn); - if (ret) - goto out_destroy; - - if (WARN_ON(list_empty(&r->evt_list))) { - ret = -EPERM; - goto out_destroy; - } - - priv.u.rid = r->rid; - priv.u.domid = d->id; - list_for_each_entry(mevt, &r->evt_list, list) { - priv.u.evtid = mevt->evtid; - ret = mon_addfile(kn, mevt->name, priv.priv); - if (ret) - goto out_destroy; - - if (resctrl_is_mbm_event(mevt->evtid)) - mon_event_read(&rr, r, d, prgrp, mevt->evtid, true); - } - kernfs_activate(kn); - return 0; - -out_destroy: - kernfs_remove(kn); - return ret; -} - -/* - * Add all subdirectories of mon_data for "ctrl_mon" groups - * and "monitor" groups with given domain id. - */ -static void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, - struct rdt_domain *d) -{ - struct kernfs_node *parent_kn; - struct rdtgroup *prgrp, *crgrp; - struct list_head *head; - - list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) { - parent_kn = prgrp->mon.mon_data_kn; - mkdir_mondata_subdir(parent_kn, d, r, prgrp); - - head = &prgrp->mon.crdtgrp_list; - list_for_each_entry(crgrp, head, mon.crdtgrp_list) { - parent_kn = crgrp->mon.mon_data_kn; - mkdir_mondata_subdir(parent_kn, d, r, crgrp); - } - } -} - -static int mkdir_mondata_subdir_alldom(struct kernfs_node *parent_kn, - struct rdt_resource *r, - struct rdtgroup *prgrp) -{ - struct rdt_domain *dom; - int ret; - - /* Walking r->domains, ensure it can't race with cpuhp */ - lockdep_assert_cpus_held(); - - list_for_each_entry(dom, &r->domains, list) { - ret = mkdir_mondata_subdir(parent_kn, dom, r, prgrp); - if (ret) - return ret; - } - - return 0; -} - -/* - * This creates a directory mon_data which contains the monitored data. - * - * mon_data has one directory for each domain which are named - * in the format mon__. For ex: A mon_data - * with L3 domain looks as below: - * ./mon_data: - * mon_L3_00 - * mon_L3_01 - * mon_L3_02 - * ... - * - * Each domain directory has one file per event: - * ./mon_L3_00/: - * llc_occupancy - * - */ -static int mkdir_mondata_all(struct kernfs_node *parent_kn, - struct rdtgroup *prgrp, - struct kernfs_node **dest_kn) -{ - enum resctrl_res_level i; - struct rdt_resource *r; - struct kernfs_node *kn; - int ret; - - /* - * Create the mon_data directory first. - */ - ret = mongroup_create_dir(parent_kn, prgrp, "mon_data", &kn); - if (ret) - return ret; - - if (dest_kn) - *dest_kn = kn; - - /* - * Create the subdirectories for each domain. Note that all events - * in a domain like L3 are grouped into a resource whose domain is L3 - */ - for (i = 0; i < RDT_NUM_RESOURCES; i++) { - r = resctrl_arch_get_resource(i); - if (!r->mon_capable) - continue; - - ret = mkdir_mondata_subdir_alldom(kn, r, prgrp); - if (ret) - goto out_destroy; - } - - return 0; - -out_destroy: - kernfs_remove(kn); - return ret; -} - -/** - * cbm_ensure_valid - Enforce validity on provided CBM - * @_val: Candidate CBM - * @r: RDT resource to which the CBM belongs - * - * The provided CBM represents all cache portions available for use. This - * may be represented by a bitmap that does not consist of contiguous ones - * and thus be an invalid CBM. - * Here the provided CBM is forced to be a valid CBM by only considering - * the first set of contiguous bits as valid and clearing all bits. - * The intention here is to provide a valid default CBM with which a new - * resource group is initialized. The user can follow this with a - * modification to the CBM if the default does not satisfy the - * requirements. - */ -static u32 cbm_ensure_valid(u32 _val, struct rdt_resource *r) -{ - unsigned int cbm_len = r->cache.cbm_len; - unsigned long first_bit, zero_bit; - unsigned long val = _val; - - if (!val) - return 0; - - first_bit = find_first_bit(&val, cbm_len); - zero_bit = find_next_zero_bit(&val, cbm_len, first_bit); - - /* Clear any remaining bits to ensure contiguous region */ - bitmap_clear(&val, zero_bit, cbm_len - zero_bit); - return (u32)val; -} - -/* - * Initialize cache resources per RDT domain - * - * Set the RDT domain up to start off with all usable allocations. That is, - * all shareable and unused bits. All-zero CBM is invalid. - */ -static int __init_one_rdt_domain(struct rdt_domain *d, struct resctrl_schema *s, - u32 closid) -{ - enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type); - enum resctrl_conf_type t = s->conf_type; - struct resctrl_staged_config *cfg; - struct rdt_resource *r = s->res; - u32 used_b = 0, unused_b = 0; - unsigned long tmp_cbm; - enum rdtgrp_mode mode; - u32 peer_ctl, ctrl_val; - int i; - - cfg = &d->staged_config[t]; - cfg->have_new_ctrl = false; - cfg->new_ctrl = r->cache.shareable_bits; - used_b = r->cache.shareable_bits; - for (i = 0; i < closids_supported(); i++) { - if (closid_allocated(i) && i != closid) { - mode = rdtgroup_mode_by_closid(i); - if (mode == RDT_MODE_PSEUDO_LOCKSETUP) - /* - * ctrl values for locksetup aren't relevant - * until the schemata is written, and the mode - * becomes RDT_MODE_PSEUDO_LOCKED. - */ - continue; - /* - * If CDP is active include peer domain's - * usage to ensure there is no overlap - * with an exclusive group. - */ - if (resctrl_arch_get_cdp_enabled(r->rid)) - peer_ctl = resctrl_arch_get_config(r, d, i, - peer_type); - else - peer_ctl = 0; - ctrl_val = resctrl_arch_get_config(r, d, i, - s->conf_type); - used_b |= ctrl_val | peer_ctl; - if (mode == RDT_MODE_SHAREABLE) - cfg->new_ctrl |= ctrl_val | peer_ctl; - } - } - if (d->plr && d->plr->cbm > 0) - used_b |= d->plr->cbm; - unused_b = used_b ^ (BIT_MASK(r->cache.cbm_len) - 1); - unused_b &= BIT_MASK(r->cache.cbm_len) - 1; - cfg->new_ctrl |= unused_b; - /* - * Force the initial CBM to be valid, user can - * modify the CBM based on system availability. - */ - cfg->new_ctrl = cbm_ensure_valid(cfg->new_ctrl, r); - /* - * Assign the u32 CBM to an unsigned long to ensure that - * bitmap_weight() does not access out-of-bound memory. - */ - tmp_cbm = cfg->new_ctrl; - if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) < r->cache.min_cbm_bits) { - rdt_last_cmd_printf("No space on %s:%d\n", s->name, d->id); - return -ENOSPC; - } - cfg->have_new_ctrl = true; - - return 0; -} - -/* - * Initialize cache resources with default values. - * - * A new RDT group is being created on an allocation capable (CAT) - * supporting system. Set this group up to start off with all usable - * allocations. - * - * If there are no more shareable bits available on any domain then - * the entire allocation will fail. - */ -static int rdtgroup_init_cat(struct resctrl_schema *s, u32 closid) -{ - struct rdt_domain *d; - int ret; - - list_for_each_entry(d, &s->res->domains, list) { - ret = __init_one_rdt_domain(d, s, closid); - if (ret < 0) - return ret; - } - - return 0; -} - -/* Initialize MBA resource with default values. */ -static void rdtgroup_init_mba(struct rdt_resource *r, u32 closid) -{ - struct resctrl_staged_config *cfg; - struct rdt_domain *d; - - list_for_each_entry(d, &r->domains, list) { - if (is_mba_sc(r)) { - d->mbps_val[closid] = MBA_MAX_MBPS; - continue; - } - - cfg = &d->staged_config[CDP_NONE]; - cfg->new_ctrl = r->default_ctrl; - cfg->have_new_ctrl = true; - } -} - -/* Initialize the RDT group's allocations. */ -static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp) -{ - struct resctrl_schema *s; - struct rdt_resource *r; - int ret = 0; - - rdt_staged_configs_clear(); - - list_for_each_entry(s, &resctrl_schema_all, list) { - r = s->res; - if (r->rid == RDT_RESOURCE_MBA || - r->rid == RDT_RESOURCE_SMBA) { - rdtgroup_init_mba(r, rdtgrp->closid); - if (is_mba_sc(r)) - continue; - } else { - ret = rdtgroup_init_cat(s, rdtgrp->closid); - if (ret < 0) - goto out; - } - - ret = resctrl_arch_update_domains(r, rdtgrp->closid); - if (ret < 0) { - rdt_last_cmd_puts("Failed to initialize allocations\n"); - goto out; - } - - } - - rdtgrp->mode = RDT_MODE_SHAREABLE; - -out: - rdt_staged_configs_clear(); - return ret; -} - -static int mkdir_rdt_prepare_rmid_alloc(struct rdtgroup *rdtgrp) -{ - int ret; - - if (!resctrl_arch_mon_capable()) - return 0; - - ret = alloc_rmid(rdtgrp->closid); - if (ret < 0) { - rdt_last_cmd_puts("Out of RMIDs\n"); - return ret; - } - rdtgrp->mon.rmid = ret; - - ret = mkdir_mondata_all(rdtgrp->kn, rdtgrp, &rdtgrp->mon.mon_data_kn); - if (ret) { - rdt_last_cmd_puts("kernfs subdir error\n"); - free_rmid(rdtgrp->closid, rdtgrp->mon.rmid); - return ret; - } - - return 0; -} - -static void mkdir_rdt_prepare_rmid_free(struct rdtgroup *rgrp) -{ - if (resctrl_arch_mon_capable()) - free_rmid(rgrp->closid, rgrp->mon.rmid); -} - -static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, - const char *name, umode_t mode, - enum rdt_group_type rtype, struct rdtgroup **r) -{ - struct rdtgroup *prdtgrp, *rdtgrp; - unsigned long files = 0; - struct kernfs_node *kn; - int ret; - - prdtgrp = rdtgroup_kn_lock_live(parent_kn); - if (!prdtgrp) { - ret = -ENODEV; - goto out_unlock; - } - - if (rtype == RDTMON_GROUP && - (prdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP || - prdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)) { - ret = -EINVAL; - rdt_last_cmd_puts("Pseudo-locking in progress\n"); - goto out_unlock; - } - - /* allocate the rdtgroup. */ - rdtgrp = kzalloc(sizeof(*rdtgrp), GFP_KERNEL); - if (!rdtgrp) { - ret = -ENOSPC; - rdt_last_cmd_puts("Kernel out of memory\n"); - goto out_unlock; - } - *r = rdtgrp; - rdtgrp->mon.parent = prdtgrp; - rdtgrp->type = rtype; - INIT_LIST_HEAD(&rdtgrp->mon.crdtgrp_list); - - /* kernfs creates the directory for rdtgrp */ - kn = kernfs_create_dir(parent_kn, name, mode, rdtgrp); - if (IS_ERR(kn)) { - ret = PTR_ERR(kn); - rdt_last_cmd_puts("kernfs create error\n"); - goto out_free_rgrp; - } - rdtgrp->kn = kn; - - /* - * kernfs_remove() will drop the reference count on "kn" which - * will free it. But we still need it to stick around for the - * rdtgroup_kn_unlock(kn) call. Take one extra reference here, - * which will be dropped by kernfs_put() in rdtgroup_remove(). - */ - kernfs_get(kn); - - ret = rdtgroup_kn_set_ugid(kn); - if (ret) { - rdt_last_cmd_puts("kernfs perm error\n"); - goto out_destroy; - } - - if (rtype == RDTCTRL_GROUP) { - files = RFTYPE_BASE | RFTYPE_CTRL; - if (resctrl_arch_mon_capable()) - files |= RFTYPE_MON; - } else { - files = RFTYPE_BASE | RFTYPE_MON; - } - - ret = rdtgroup_add_files(kn, files); - if (ret) { - rdt_last_cmd_puts("kernfs fill error\n"); - goto out_destroy; - } - - /* - * The caller unlocks the parent_kn upon success. - */ - return 0; - -out_destroy: - kernfs_put(rdtgrp->kn); - kernfs_remove(rdtgrp->kn); -out_free_rgrp: - kfree(rdtgrp); -out_unlock: - rdtgroup_kn_unlock(parent_kn); - return ret; -} - -static void mkdir_rdt_prepare_clean(struct rdtgroup *rgrp) -{ - kernfs_remove(rgrp->kn); - rdtgroup_remove(rgrp); -} - -/* - * Create a monitor group under "mon_groups" directory of a control - * and monitor group(ctrl_mon). This is a resource group - * to monitor a subset of tasks and cpus in its parent ctrl_mon group. - */ -static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn, - const char *name, umode_t mode) -{ - struct rdtgroup *rdtgrp, *prgrp; - int ret; - - ret = mkdir_rdt_prepare(parent_kn, name, mode, RDTMON_GROUP, &rdtgrp); - if (ret) - return ret; - - prgrp = rdtgrp->mon.parent; - rdtgrp->closid = prgrp->closid; - - ret = mkdir_rdt_prepare_rmid_alloc(rdtgrp); - if (ret) { - mkdir_rdt_prepare_clean(rdtgrp); - goto out_unlock; - } - - kernfs_activate(rdtgrp->kn); - - /* - * Add the rdtgrp to the list of rdtgrps the parent - * ctrl_mon group has to track. - */ - list_add_tail(&rdtgrp->mon.crdtgrp_list, &prgrp->mon.crdtgrp_list); - -out_unlock: - rdtgroup_kn_unlock(parent_kn); - return ret; -} - -/* - * These are rdtgroups created under the root directory. Can be used - * to allocate and monitor resources. - */ -static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn, - const char *name, umode_t mode) -{ - struct rdtgroup *rdtgrp; - struct kernfs_node *kn; - u32 closid; - int ret; - - ret = mkdir_rdt_prepare(parent_kn, name, mode, RDTCTRL_GROUP, &rdtgrp); - if (ret) - return ret; - - kn = rdtgrp->kn; - ret = closid_alloc(); - if (ret < 0) { - rdt_last_cmd_puts("Out of CLOSIDs\n"); - goto out_common_fail; - } - closid = ret; - ret = 0; - - rdtgrp->closid = closid; - - ret = mkdir_rdt_prepare_rmid_alloc(rdtgrp); - if (ret) - goto out_closid_free; - - kernfs_activate(rdtgrp->kn); - - ret = rdtgroup_init_alloc(rdtgrp); - if (ret < 0) - goto out_rmid_free; - - list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups); - - if (resctrl_arch_mon_capable()) { - /* - * Create an empty mon_groups directory to hold the subset - * of tasks and cpus to monitor. - */ - ret = mongroup_create_dir(kn, rdtgrp, "mon_groups", NULL); - if (ret) { - rdt_last_cmd_puts("kernfs subdir error\n"); - goto out_del_list; - } - } - - goto out_unlock; - -out_del_list: - list_del(&rdtgrp->rdtgroup_list); -out_rmid_free: - mkdir_rdt_prepare_rmid_free(rdtgrp); -out_closid_free: - closid_free(closid); -out_common_fail: - mkdir_rdt_prepare_clean(rdtgrp); -out_unlock: - rdtgroup_kn_unlock(parent_kn); - return ret; -} - -/* - * We allow creating mon groups only with in a directory called "mon_groups" - * which is present in every ctrl_mon group. Check if this is a valid - * "mon_groups" directory. - * - * 1. The directory should be named "mon_groups". - * 2. The mon group itself should "not" be named "mon_groups". - * This makes sure "mon_groups" directory always has a ctrl_mon group - * as parent. - */ -static bool is_mon_groups(struct kernfs_node *kn, const char *name) -{ - return (!strcmp(kn->name, "mon_groups") && - strcmp(name, "mon_groups")); -} - -static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name, - umode_t mode) -{ - /* Do not accept '\n' to avoid unparsable situation. */ - if (strchr(name, '\n')) - return -EINVAL; - - /* - * If the parent directory is the root directory and RDT - * allocation is supported, add a control and monitoring - * subdirectory - */ - if (resctrl_arch_alloc_capable() && parent_kn == rdtgroup_default.kn) - return rdtgroup_mkdir_ctrl_mon(parent_kn, name, mode); - - /* - * If RDT monitoring is supported and the parent directory is a valid - * "mon_groups" directory, add a monitoring subdirectory. - */ - if (resctrl_arch_mon_capable() && is_mon_groups(parent_kn, name)) - return rdtgroup_mkdir_mon(parent_kn, name, mode); - - return -EPERM; -} - -static int rdtgroup_rmdir_mon(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask) -{ - struct rdtgroup *prdtgrp = rdtgrp->mon.parent; - u32 closid, rmid; - int cpu; - - /* Give any tasks back to the parent group */ - rdt_move_group_tasks(rdtgrp, prdtgrp, tmpmask); - - /* Update per cpu rmid of the moved CPUs first */ - closid = rdtgrp->closid; - rmid = prdtgrp->mon.rmid; - for_each_cpu(cpu, &rdtgrp->cpu_mask) - resctrl_arch_set_cpu_default_closid_rmid(cpu, closid, rmid); - - /* - * Update the MSR on moved CPUs and CPUs which have moved - * task running on them. - */ - cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask); - update_closid_rmid(tmpmask, NULL); - - rdtgrp->flags = RDT_DELETED; - free_rmid(rdtgrp->closid, rdtgrp->mon.rmid); - - /* - * Remove the rdtgrp from the parent ctrl_mon group's list - */ - WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list)); - list_del(&rdtgrp->mon.crdtgrp_list); - - kernfs_remove(rdtgrp->kn); - - return 0; -} - -static int rdtgroup_ctrl_remove(struct rdtgroup *rdtgrp) -{ - rdtgrp->flags = RDT_DELETED; - list_del(&rdtgrp->rdtgroup_list); - - kernfs_remove(rdtgrp->kn); - return 0; -} - -static int rdtgroup_rmdir_ctrl(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask) -{ - u32 closid, rmid; - int cpu; - - /* Give any tasks back to the default group */ - rdt_move_group_tasks(rdtgrp, &rdtgroup_default, tmpmask); - - /* Give any CPUs back to the default group */ - cpumask_or(&rdtgroup_default.cpu_mask, - &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask); - - /* Update per cpu closid and rmid of the moved CPUs first */ - closid = rdtgroup_default.closid; - rmid = rdtgroup_default.mon.rmid; - for_each_cpu(cpu, &rdtgrp->cpu_mask) - resctrl_arch_set_cpu_default_closid_rmid(cpu, closid, rmid); - - /* - * Update the MSR on moved CPUs and CPUs which have moved - * task running on them. - */ - cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask); - update_closid_rmid(tmpmask, NULL); - - free_rmid(rdtgrp->closid, rdtgrp->mon.rmid); - closid_free(rdtgrp->closid); - - rdtgroup_ctrl_remove(rdtgrp); - - /* - * Free all the child monitor group rmids. - */ - free_all_child_rdtgrp(rdtgrp); - - return 0; -} - -static int rdtgroup_rmdir(struct kernfs_node *kn) -{ - struct kernfs_node *parent_kn = kn->parent; - struct rdtgroup *rdtgrp; - cpumask_var_t tmpmask; - int ret = 0; - - if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL)) - return -ENOMEM; - - rdtgrp = rdtgroup_kn_lock_live(kn); - if (!rdtgrp) { - ret = -EPERM; - goto out; - } - - /* - * If the rdtgroup is a ctrl_mon group and parent directory - * is the root directory, remove the ctrl_mon group. - * - * If the rdtgroup is a mon group and parent directory - * is a valid "mon_groups" directory, remove the mon group. - */ - if (rdtgrp->type == RDTCTRL_GROUP && parent_kn == rdtgroup_default.kn && - rdtgrp != &rdtgroup_default) { - if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP || - rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) { - ret = rdtgroup_ctrl_remove(rdtgrp); - } else { - ret = rdtgroup_rmdir_ctrl(rdtgrp, tmpmask); - } - } else if (rdtgrp->type == RDTMON_GROUP && - is_mon_groups(parent_kn, kn->name)) { - ret = rdtgroup_rmdir_mon(rdtgrp, tmpmask); - } else { - ret = -EPERM; - } - -out: - rdtgroup_kn_unlock(kn); - free_cpumask_var(tmpmask); - return ret; -} - -/** - * mongrp_reparent() - replace parent CTRL_MON group of a MON group - * @rdtgrp: the MON group whose parent should be replaced - * @new_prdtgrp: replacement parent CTRL_MON group for @rdtgrp - * @cpus: cpumask provided by the caller for use during this call - * - * Replaces the parent CTRL_MON group for a MON group, resulting in all member - * tasks' CLOSID immediately changing to that of the new parent group. - * Monitoring data for the group is unaffected by this operation. - */ -static void mongrp_reparent(struct rdtgroup *rdtgrp, - struct rdtgroup *new_prdtgrp, - cpumask_var_t cpus) -{ - struct rdtgroup *prdtgrp = rdtgrp->mon.parent; - - WARN_ON(rdtgrp->type != RDTMON_GROUP); - WARN_ON(new_prdtgrp->type != RDTCTRL_GROUP); - - /* Nothing to do when simply renaming a MON group. */ - if (prdtgrp == new_prdtgrp) - return; - - WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list)); - list_move_tail(&rdtgrp->mon.crdtgrp_list, - &new_prdtgrp->mon.crdtgrp_list); - - rdtgrp->mon.parent = new_prdtgrp; - rdtgrp->closid = new_prdtgrp->closid; - - /* Propagate updated closid to all tasks in this group. */ - rdt_move_group_tasks(rdtgrp, rdtgrp, cpus); - - update_closid_rmid(cpus, NULL); -} - -static int rdtgroup_rename(struct kernfs_node *kn, - struct kernfs_node *new_parent, const char *new_name) -{ - struct rdtgroup *new_prdtgrp; - struct rdtgroup *rdtgrp; - cpumask_var_t tmpmask; - int ret; - - rdtgrp = kernfs_to_rdtgroup(kn); - new_prdtgrp = kernfs_to_rdtgroup(new_parent); - if (!rdtgrp || !new_prdtgrp) - return -ENOENT; - - /* Release both kernfs active_refs before obtaining rdtgroup mutex. */ - rdtgroup_kn_get(rdtgrp, kn); - rdtgroup_kn_get(new_prdtgrp, new_parent); - - mutex_lock(&rdtgroup_mutex); - - rdt_last_cmd_clear(); - - /* - * Don't allow kernfs_to_rdtgroup() to return a parent rdtgroup if - * either kernfs_node is a file. - */ - if (kernfs_type(kn) != KERNFS_DIR || - kernfs_type(new_parent) != KERNFS_DIR) { - rdt_last_cmd_puts("Source and destination must be directories"); - ret = -EPERM; - goto out; - } - - if ((rdtgrp->flags & RDT_DELETED) || (new_prdtgrp->flags & RDT_DELETED)) { - ret = -ENOENT; - goto out; - } - - if (rdtgrp->type != RDTMON_GROUP || !kn->parent || - !is_mon_groups(kn->parent, kn->name)) { - rdt_last_cmd_puts("Source must be a MON group\n"); - ret = -EPERM; - goto out; - } - - if (!is_mon_groups(new_parent, new_name)) { - rdt_last_cmd_puts("Destination must be a mon_groups subdirectory\n"); - ret = -EPERM; - goto out; - } - - /* - * If the MON group is monitoring CPUs, the CPUs must be assigned to the - * current parent CTRL_MON group and therefore cannot be assigned to - * the new parent, making the move illegal. - */ - if (!cpumask_empty(&rdtgrp->cpu_mask) && - rdtgrp->mon.parent != new_prdtgrp) { - rdt_last_cmd_puts("Cannot move a MON group that monitors CPUs\n"); - ret = -EPERM; - goto out; - } - - /* - * Allocate the cpumask for use in mongrp_reparent() to avoid the - * possibility of failing to allocate it after kernfs_rename() has - * succeeded. - */ - if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL)) { - ret = -ENOMEM; - goto out; - } - - /* - * Perform all input validation and allocations needed to ensure - * mongrp_reparent() will succeed before calling kernfs_rename(), - * otherwise it would be necessary to revert this call if - * mongrp_reparent() failed. - */ - ret = kernfs_rename(kn, new_parent, new_name); - if (!ret) - mongrp_reparent(rdtgrp, new_prdtgrp, tmpmask); - - free_cpumask_var(tmpmask); - -out: - mutex_unlock(&rdtgroup_mutex); - rdtgroup_kn_put(rdtgrp, kn); - rdtgroup_kn_put(new_prdtgrp, new_parent); - return ret; -} - -static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf) -{ - if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3)) - seq_puts(seq, ",cdp"); - - if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2)) - seq_puts(seq, ",cdpl2"); - - if (is_mba_sc(resctrl_arch_get_resource(RDT_RESOURCE_MBA))) - seq_puts(seq, ",mba_MBps"); - - if (resctrl_debug) - seq_puts(seq, ",debug"); - - return 0; -} - -static struct kernfs_syscall_ops rdtgroup_kf_syscall_ops = { - .mkdir = rdtgroup_mkdir, - .rmdir = rdtgroup_rmdir, - .rename = rdtgroup_rename, - .show_options = rdtgroup_show_options, -}; - -static int rdtgroup_setup_root(struct rdt_fs_context *ctx) -{ - rdt_root = kernfs_create_root(&rdtgroup_kf_syscall_ops, - KERNFS_ROOT_CREATE_DEACTIVATED | - KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK, - &rdtgroup_default); - if (IS_ERR(rdt_root)) - return PTR_ERR(rdt_root); - - ctx->kfc.root = rdt_root; - rdtgroup_default.kn = kernfs_root_to_node(rdt_root); - - return 0; -} - -static void rdtgroup_destroy_root(void) -{ - kernfs_destroy_root(rdt_root); - rdtgroup_default.kn = NULL; -} - -static void rdtgroup_setup_default(void) -{ - mutex_lock(&rdtgroup_mutex); - - rdtgroup_default.closid = RESCTRL_RESERVED_CLOSID; - rdtgroup_default.mon.rmid = RESCTRL_RESERVED_RMID; - rdtgroup_default.type = RDTCTRL_GROUP; - INIT_LIST_HEAD(&rdtgroup_default.mon.crdtgrp_list); - - list_add(&rdtgroup_default.rdtgroup_list, &rdt_all_groups); - - mutex_unlock(&rdtgroup_mutex); -} - -static void domain_destroy_mon_state(struct rdt_domain *d) -{ - bitmap_free(d->rmid_busy_llc); - kfree(d->mbm_total); - kfree(d->mbm_local); -} - -void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d) -{ - mutex_lock(&rdtgroup_mutex); - - if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA) - mba_sc_domain_destroy(r, d); - - if (!r->mon_capable) - goto out_unlock; - - /* - * If resctrl is mounted, remove all the - * per domain monitor data directories. - */ - if (resctrl_mounted && resctrl_arch_mon_capable()) - rmdir_mondata_subdir_allrdtgrp(r, d->id); - - if (resctrl_is_mbm_enabled()) - cancel_delayed_work(&d->mbm_over); - if (resctrl_arch_is_llc_occupancy_enabled() && has_busy_rmid(d)) { - /* - * When a package is going down, forcefully - * decrement rmid->ebusy. There is no way to know - * that the L3 was flushed and hence may lead to - * incorrect counts in rare scenarios, but leaving - * the RMID as busy creates RMID leaks if the - * package never comes back. - */ - __check_limbo(d, true); - cancel_delayed_work(&d->cqm_limbo); - } - - domain_destroy_mon_state(d); - -out_unlock: - mutex_unlock(&rdtgroup_mutex); -} - -static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d) -{ - u32 idx_limit = resctrl_arch_system_num_rmid_idx(); - size_t tsize; - - if (resctrl_arch_is_llc_occupancy_enabled()) { - d->rmid_busy_llc = bitmap_zalloc(idx_limit, GFP_KERNEL); - if (!d->rmid_busy_llc) - return -ENOMEM; - } - if (resctrl_arch_is_mbm_total_enabled()) { - tsize = sizeof(*d->mbm_total); - d->mbm_total = kcalloc(idx_limit, tsize, GFP_KERNEL); - if (!d->mbm_total) { - bitmap_free(d->rmid_busy_llc); - return -ENOMEM; - } - } - if (resctrl_arch_is_mbm_local_enabled()) { - tsize = sizeof(*d->mbm_local); - d->mbm_local = kcalloc(idx_limit, tsize, GFP_KERNEL); - if (!d->mbm_local) { - bitmap_free(d->rmid_busy_llc); - kfree(d->mbm_total); - return -ENOMEM; - } - } - - return 0; -} - -int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d) -{ - int err = 0; - - mutex_lock(&rdtgroup_mutex); - - if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA) { - /* RDT_RESOURCE_MBA is never mon_capable */ - err = mba_sc_domain_allocate(r, d); - goto out_unlock; - } - - if (!r->mon_capable) - goto out_unlock; - - err = domain_setup_mon_state(r, d); - if (err) - goto out_unlock; - - if (resctrl_is_mbm_enabled()) { - INIT_DELAYED_WORK(&d->mbm_over, mbm_handle_overflow); - mbm_setup_overflow_handler(d, MBM_OVERFLOW_INTERVAL, - RESCTRL_PICK_ANY_CPU); - } - - if (resctrl_arch_is_llc_occupancy_enabled()) - INIT_DELAYED_WORK(&d->cqm_limbo, cqm_handle_limbo); - - /* - * If the filesystem is not mounted then only the default resource group - * exists. Creation of its directories is deferred until mount time - * by rdt_get_tree() calling mkdir_mondata_all(). - * If resctrl is mounted, add per domain monitor data directories. - */ - if (resctrl_mounted && resctrl_arch_mon_capable()) - mkdir_mondata_subdir_allrdtgrp(r, d); - -out_unlock: - mutex_unlock(&rdtgroup_mutex); - - return err; -} - -void resctrl_online_cpu(unsigned int cpu) -{ - mutex_lock(&rdtgroup_mutex); - /* The CPU is set in default rdtgroup after online. */ - cpumask_set_cpu(cpu, &rdtgroup_default.cpu_mask); - mutex_unlock(&rdtgroup_mutex); -} - -static void clear_childcpus(struct rdtgroup *r, unsigned int cpu) -{ - struct rdtgroup *cr; - - list_for_each_entry(cr, &r->mon.crdtgrp_list, mon.crdtgrp_list) { - if (cpumask_test_and_clear_cpu(cpu, &cr->cpu_mask)) - break; - } -} - -void resctrl_offline_cpu(unsigned int cpu) -{ - struct rdt_resource *l3 = resctrl_arch_get_resource(RDT_RESOURCE_L3); - struct rdtgroup *rdtgrp; - struct rdt_domain *d; - - mutex_lock(&rdtgroup_mutex); - list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) { - if (cpumask_test_and_clear_cpu(cpu, &rdtgrp->cpu_mask)) { - clear_childcpus(rdtgrp, cpu); - break; - } - } - - if (!l3->mon_capable) - goto out_unlock; - - d = resctrl_get_domain_from_cpu(cpu, l3); - if (d) { - if (resctrl_is_mbm_enabled() && cpu == d->mbm_work_cpu) { - cancel_delayed_work(&d->mbm_over); - mbm_setup_overflow_handler(d, 0, cpu); - } - if (resctrl_arch_is_llc_occupancy_enabled() && - cpu == d->cqm_work_cpu && has_busy_rmid(d)) { - cancel_delayed_work(&d->cqm_limbo); - cqm_setup_limbo_handler(d, 0, cpu); - } - } - -out_unlock: - mutex_unlock(&rdtgroup_mutex); -} - -/* - * resctrl_init - resctrl filesystem initialization - * - * Setup resctrl file system including set up root, create mount point, - * register resctrl filesystem, and initialize files under root directory. - * - * Return: 0 on success or -errno - */ -int resctrl_init(void) -{ - int ret = 0; - - seq_buf_init(&last_cmd_status, last_cmd_status_buf, - sizeof(last_cmd_status_buf)); - - rdtgroup_setup_default(); - - thread_throttle_mode_init(); - - ret = resctrl_mon_resource_init(); - if (ret) - return ret; - - ret = sysfs_create_mount_point(fs_kobj, "resctrl"); - if (ret) - return ret; - - ret = register_filesystem(&rdt_fs_type); - if (ret) - goto cleanup_mountpoint; - - /* - * Adding the resctrl debugfs directory here may not be ideal since - * it would let the resctrl debugfs directory appear on the debugfs - * filesystem before the resctrl filesystem is mounted. - * It may also be ok since that would enable debugging of RDT before - * resctrl is mounted. - * The reason why the debugfs directory is created here and not in - * rdt_get_tree() is because rdt_get_tree() takes rdtgroup_mutex and - * during the debugfs directory creation also &sb->s_type->i_mutex_key - * (the lockdep class of inode->i_rwsem). Other filesystem - * interactions (eg. SyS_getdents) have the lock ordering: - * &sb->s_type->i_mutex_key --> &mm->mmap_lock - * During mmap(), called with &mm->mmap_lock, the rdtgroup_mutex - * is taken, thus creating dependency: - * &mm->mmap_lock --> rdtgroup_mutex for the latter that can cause - * issues considering the other two lock dependencies. - * By creating the debugfs directory here we avoid a dependency - * that may cause deadlock (even though file operations cannot - * occur until the filesystem is mounted, but I do not know how to - * tell lockdep that). - */ - debugfs_resctrl = debugfs_create_dir("resctrl", NULL); - - return 0; - -cleanup_mountpoint: - sysfs_remove_mount_point(fs_kobj, "resctrl"); - - return ret; -} - -void resctrl_exit(void) -{ - debugfs_remove_recursive(debugfs_resctrl); - unregister_filesystem(&rdt_fs_type); - sysfs_remove_mount_point(fs_kobj, "resctrl"); - - resctrl_mon_resource_exit(); -} -- Gitee From f31b0c111cf519cf549ba6b97d84fd94c683a7a9 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:52:16 -0800 Subject: [PATCH 126/158] anolis: Revert "x86/resctrl: Move the filesystem bits to headers visible to fs/resctrl" ANBZ: #31045 This reverts commit 10882daf38a125ba19fac73b845603b7b2993226. Signed-off-by: Jason Zeng --- arch/x86/include/asm/resctrl.h | 3 --- arch/x86/kernel/cpu/resctrl/core.c | 5 ---- arch/x86/kernel/cpu/resctrl/internal.h | 36 ++++++++++++++++++++++++++ include/linux/resctrl.h | 3 --- include/linux/resctrl_types.h | 30 --------------------- 5 files changed, 36 insertions(+), 41 deletions(-) diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h index 746431c66fc4..491342f56811 100644 --- a/arch/x86/include/asm/resctrl.h +++ b/arch/x86/include/asm/resctrl.h @@ -218,9 +218,6 @@ int resctrl_arch_measure_l2_residency(void *_plr); int resctrl_arch_measure_l3_residency(void *_plr); void resctrl_cpu_detect(struct cpuinfo_x86 *c); -bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level l); -int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable); - #else static inline void resctrl_arch_sched_in(struct task_struct *tsk) {} diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 87cd508588e5..948b4d409cd9 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -306,11 +306,6 @@ static void rdt_get_cdp_l2_config(void) rdt_get_cdp_config(RDT_RESOURCE_L2); } -bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level l) -{ - return rdt_resources_all[l].cdp_enabled; -} - static void mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r) { diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 0f7e3f10941b..56218193a8ba 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -15,6 +15,12 @@ #define L2_QOS_CDP_ENABLE 0x01ULL +#define CQM_LIMBOCHECK_INTERVAL 1000 + +#define MBM_CNTR_WIDTH_BASE 24 +#define MBM_OVERFLOW_INTERVAL 1000 +#define MAX_MBA_BW 100u +#define MBA_IS_LINEAR 0x4 #define MBM_CNTR_WIDTH_OFFSET_AMD 20 #define RMID_VAL_ERROR BIT_ULL(63) @@ -204,6 +210,29 @@ struct rdtgroup { struct pseudo_lock_region *plr; }; +/* rdtgroup.flags */ +#define RDT_DELETED 1 + +/* rftype.flags */ +#define RFTYPE_FLAGS_CPUS_LIST 1 + +/* + * Define the file type flags for base and info directories. + */ +#define RFTYPE_INFO BIT(0) +#define RFTYPE_BASE BIT(1) +#define RFTYPE_CTRL BIT(4) +#define RFTYPE_MON BIT(5) +#define RFTYPE_TOP BIT(6) +#define RFTYPE_RES_CACHE BIT(8) +#define RFTYPE_RES_MB BIT(9) +#define RFTYPE_DEBUG BIT(10) +#define RFTYPE_CTRL_INFO (RFTYPE_INFO | RFTYPE_CTRL) +#define RFTYPE_MON_INFO (RFTYPE_INFO | RFTYPE_MON) +#define RFTYPE_TOP_INFO (RFTYPE_INFO | RFTYPE_TOP) +#define RFTYPE_CTRL_BASE (RFTYPE_BASE | RFTYPE_CTRL) +#define RFTYPE_MON_BASE (RFTYPE_BASE | RFTYPE_MON) + /* List of all resource groups */ extern struct list_head rdt_all_groups; @@ -341,6 +370,13 @@ static inline struct rdt_resource *resctrl_inc(struct rdt_resource *res) return &hw_res->r_resctrl; } +static inline bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level l) +{ + return rdt_resources_all[l].cdp_enabled; +} + +int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable); + /* * To return the common struct rdt_resource, which is contained in struct * rdt_hw_resource, walk the resctrl member of struct rdt_hw_resource. diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 00cc0457af50..f786ffceeda3 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -41,9 +41,6 @@ int proc_resctrl_show(struct seq_file *m, */ #define RESCTRL_MAX_CBM 32 -extern unsigned int resctrl_rmid_realloc_limit; -extern unsigned int resctrl_rmid_realloc_threshold; - /** * struct pseudo_lock_region - pseudo-lock region information * @s: Resctrl schema for the resource to which this diff --git a/include/linux/resctrl_types.h b/include/linux/resctrl_types.h index fe0b10b589c0..4788bd95dac6 100644 --- a/include/linux/resctrl_types.h +++ b/include/linux/resctrl_types.h @@ -7,36 +7,6 @@ #ifndef __LINUX_RESCTRL_TYPES_H #define __LINUX_RESCTRL_TYPES_H -#define CQM_LIMBOCHECK_INTERVAL 1000 - -#define MBM_CNTR_WIDTH_BASE 24 -#define MBM_OVERFLOW_INTERVAL 1000 -#define MAX_MBA_BW 100u -#define MBA_IS_LINEAR 0x4 - -/* rdtgroup.flags */ -#define RDT_DELETED 1 - -/* rftype.flags */ -#define RFTYPE_FLAGS_CPUS_LIST 1 - -/* - * Define the file type flags for base and info directories. - */ -#define RFTYPE_INFO BIT(0) -#define RFTYPE_BASE BIT(1) -#define RFTYPE_CTRL BIT(4) -#define RFTYPE_MON BIT(5) -#define RFTYPE_TOP BIT(6) -#define RFTYPE_RES_CACHE BIT(8) -#define RFTYPE_RES_MB BIT(9) -#define RFTYPE_DEBUG BIT(10) -#define RFTYPE_CTRL_INFO (RFTYPE_INFO | RFTYPE_CTRL) -#define RFTYPE_MON_INFO (RFTYPE_INFO | RFTYPE_MON) -#define RFTYPE_TOP_INFO (RFTYPE_INFO | RFTYPE_TOP) -#define RFTYPE_CTRL_BASE (RFTYPE_BASE | RFTYPE_CTRL) -#define RFTYPE_MON_BASE (RFTYPE_BASE | RFTYPE_MON) - /* Reads to Local DRAM Memory */ #define READS_TO_LOCAL_MEM BIT(0) -- Gitee From c89755d7cdcbaa6d4e6774015de5ce2197f1bca1 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:52:16 -0800 Subject: [PATCH 127/158] anolis: Revert "fs/resctrl: Add boiler plate for external resctrl code" ANBZ: #31045 This reverts commit cb1246ff7714e6460d3c237168ee204e31bc4737. Signed-off-by: Jason Zeng --- MAINTAINERS | 1 - arch/Kconfig | 8 -------- arch/x86/Kconfig | 10 +++++++--- fs/Kconfig | 1 - fs/Makefile | 1 - fs/resctrl/Kconfig | 23 ----------------------- fs/resctrl/Makefile | 3 --- fs/resctrl/ctrlmondata.c | 0 fs/resctrl/internal.h | 0 fs/resctrl/monitor.c | 0 fs/resctrl/psuedo_lock.c | 0 fs/resctrl/rdtgroup.c | 0 include/linux/resctrl.h | 4 ---- 13 files changed, 7 insertions(+), 44 deletions(-) delete mode 100644 fs/resctrl/Kconfig delete mode 100644 fs/resctrl/Makefile delete mode 100644 fs/resctrl/ctrlmondata.c delete mode 100644 fs/resctrl/internal.h delete mode 100644 fs/resctrl/monitor.c delete mode 100644 fs/resctrl/psuedo_lock.c delete mode 100644 fs/resctrl/rdtgroup.c diff --git a/MAINTAINERS b/MAINTAINERS index 2def3d5f43b3..cde0fe482aae 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18204,7 +18204,6 @@ S: Supported F: Documentation/arch/x86/resctrl* F: arch/x86/include/asm/resctrl.h F: arch/x86/kernel/cpu/resctrl/ -F: fs/resctrl/ F: include/linux/resctrl*.h F: tools/testing/selftests/resctrl/ diff --git a/arch/Kconfig b/arch/Kconfig index b2ccbaa3780e..ad97c9f158e0 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -1341,14 +1341,6 @@ config STRICT_MODULE_RWX config ARCH_HAS_PHYS_TO_DMA bool -config ARCH_HAS_CPU_RESCTRL - bool - help - The 'resctrl' filesystem allows CPU controls of shared resources - such as caches and memory bandwidth to be configured. An architecture - selects this if it provides the arch-specific hooks for the filesystem - and needs the per-task CLOSID/RMID properties. - config HAVE_ARCH_COMPILER_H bool help diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index cc84cef105cf..ac2e95c097c3 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -492,10 +492,8 @@ config GOLDFISH config X86_CPU_RESCTRL bool "x86 CPU resource control support" depends on X86 && (CPU_SUP_INTEL || CPU_SUP_AMD) - depends on MISC_FILESYSTEMS select KERNFS - select ARCH_HAS_CPU_RESCTRL - select RESCTRL_FS + select PROC_CPU_RESCTRL if PROC_FS select RESCTRL_FS_PSEUDO_LOCK help Enable x86 CPU resource control support. @@ -513,6 +511,12 @@ config X86_CPU_RESCTRL Say N if unsure. +config RESCTRL_FS_PSEUDO_LOCK + bool + help + Software mechanism to pin data in a cache portion using + micro-architecture specific knowledge. + if X86_32 config X86_BIGSMP bool "Support for big SMP systems with more than 8 CPUs" diff --git a/fs/Kconfig b/fs/Kconfig index 32d4f37da0d6..c7090d46e7d6 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -332,7 +332,6 @@ source "fs/omfs/Kconfig" source "fs/hpfs/Kconfig" source "fs/qnx4/Kconfig" source "fs/qnx6/Kconfig" -source "fs/resctrl/Kconfig" source "fs/romfs/Kconfig" source "fs/pstore/Kconfig" source "fs/sysv/Kconfig" diff --git a/fs/Makefile b/fs/Makefile index b62375770dee..f9541f40be4e 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -129,4 +129,3 @@ obj-$(CONFIG_EFIVAR_FS) += efivarfs/ obj-$(CONFIG_EROFS_FS) += erofs/ obj-$(CONFIG_VBOXSF_FS) += vboxsf/ obj-$(CONFIG_ZONEFS_FS) += zonefs/ -obj-$(CONFIG_RESCTRL_FS) += resctrl/ diff --git a/fs/resctrl/Kconfig b/fs/resctrl/Kconfig deleted file mode 100644 index 36a1ddbe6c21..000000000000 --- a/fs/resctrl/Kconfig +++ /dev/null @@ -1,23 +0,0 @@ -config RESCTRL_FS - bool "CPU Resource Control Filesystem (resctrl)" - depends on ARCH_HAS_CPU_RESCTRL - select KERNFS - select PROC_CPU_RESCTRL if PROC_FS - help - Resctrl is a filesystem interface - to control allocation and - monitoring of system resources - used by the CPUs. - -config RESCTRL_FS_PSEUDO_LOCK - bool - help - Software mechanism to pin data in a cache portion using - micro-architecture specific knowledge. - -config RESCTRL_RMID_DEPENDS_ON_CLOSID - bool - help - Enable by the architecture when the RMID values depend on the CLOSID. - This causes the closid allocator to search for CLOSID with clean - RMID. diff --git a/fs/resctrl/Makefile b/fs/resctrl/Makefile deleted file mode 100644 index 10fcfb0fdb10..000000000000 --- a/fs/resctrl/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -obj-$(CONFIG_RESCTRL_FS) += rdtgroup.o ctrlmondata.o monitor.o -obj-$(CONFIG_RESCTRL_FS_PSEUDO_LOCK) += psuedo_lock.o diff --git a/fs/resctrl/ctrlmondata.c b/fs/resctrl/ctrlmondata.c deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/fs/resctrl/psuedo_lock.c b/fs/resctrl/psuedo_lock.c deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index f786ffceeda3..5da55e58f229 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -8,10 +8,6 @@ #include #include -#ifdef CONFIG_ARCH_HAS_CPU_RESCTRL -#include -#endif - /* CLOSID, RMID value used by the default control group */ #define RESCTRL_RESERVED_CLOSID 0 #define RESCTRL_RESERVED_RMID 0 -- Gitee From 8345729f994308af4d8d197d1a8514f6a30b9586 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Thu, 12 Mar 2026 21:10:41 -0700 Subject: [PATCH 128/158] anolis: remove resctrl config files ANBZ: #31045 Remove config files: anolis/configs/L0-MANDATORY/default/CONFIG_RESCTRL_FS anolis/configs/L0-MANDATORY/loongarch/CONFIG_RESCTRL_FS anolis/configs/L0-MANDATORY/riscv/CONFIG_RESCTRL_FS anolis/configs/L0-MANDATORY/sw_64-6b/CONFIG_RESCTRL_FS anolis/configs/L0-MANDATORY/sw_64-8a/CONFIG_RESCTRL_FS anolis/configs/L2-OPTIONAL/default/CONFIG_PROC_CPU_RESCTRL anolis/configs/L2-OPTIONAL/loongarch/CONFIG_PROC_CPU_RESCTRL anolis/configs/L2-OPTIONAL/riscv/CONFIG_PROC_CPU_RESCTRL anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_PROC_CPU_RESCTRL anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_PROC_CPU_RESCTRL anolis/configs/L2-OPTIONAL/default/CONFIG_ARCH_HAS_CPU_RESCTRL anolis/configs/L2-OPTIONAL/loongarch/CONFIG_ARCH_HAS_CPU_RESCTRL anolis/configs/L2-OPTIONAL/riscv/CONFIG_ARCH_HAS_CPU_RESCTRL anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_ARCH_HAS_CPU_RESCTRL anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_ARCH_HAS_CPU_RESCTRL anolis/configs/L2-OPTIONAL/arm64/CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID Signed-off-by: Jason Zeng --- anolis/configs/L0-MANDATORY/default/CONFIG_RESCTRL_FS | 1 - anolis/configs/L0-MANDATORY/loongarch/CONFIG_RESCTRL_FS | 1 - anolis/configs/L0-MANDATORY/riscv/CONFIG_RESCTRL_FS | 1 - anolis/configs/L0-MANDATORY/sw_64-6b/CONFIG_RESCTRL_FS | 1 - anolis/configs/L0-MANDATORY/sw_64-8a/CONFIG_RESCTRL_FS | 1 - .../L2-OPTIONAL/arm64/CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID | 1 - anolis/configs/L2-OPTIONAL/default/CONFIG_ARCH_HAS_CPU_RESCTRL | 1 - anolis/configs/L2-OPTIONAL/default/CONFIG_PROC_CPU_RESCTRL | 1 - anolis/configs/L2-OPTIONAL/loongarch/CONFIG_ARCH_HAS_CPU_RESCTRL | 1 - anolis/configs/L2-OPTIONAL/loongarch/CONFIG_PROC_CPU_RESCTRL | 1 - anolis/configs/L2-OPTIONAL/riscv/CONFIG_ARCH_HAS_CPU_RESCTRL | 1 - anolis/configs/L2-OPTIONAL/riscv/CONFIG_PROC_CPU_RESCTRL | 1 - anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_ARCH_HAS_CPU_RESCTRL | 1 - anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_PROC_CPU_RESCTRL | 1 - anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_ARCH_HAS_CPU_RESCTRL | 1 - anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_PROC_CPU_RESCTRL | 1 - 16 files changed, 16 deletions(-) delete mode 100644 anolis/configs/L0-MANDATORY/default/CONFIG_RESCTRL_FS delete mode 100644 anolis/configs/L0-MANDATORY/loongarch/CONFIG_RESCTRL_FS delete mode 100644 anolis/configs/L0-MANDATORY/riscv/CONFIG_RESCTRL_FS delete mode 100644 anolis/configs/L0-MANDATORY/sw_64-6b/CONFIG_RESCTRL_FS delete mode 100644 anolis/configs/L0-MANDATORY/sw_64-8a/CONFIG_RESCTRL_FS delete mode 100644 anolis/configs/L2-OPTIONAL/arm64/CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID delete mode 100644 anolis/configs/L2-OPTIONAL/default/CONFIG_ARCH_HAS_CPU_RESCTRL delete mode 100644 anolis/configs/L2-OPTIONAL/default/CONFIG_PROC_CPU_RESCTRL delete mode 100644 anolis/configs/L2-OPTIONAL/loongarch/CONFIG_ARCH_HAS_CPU_RESCTRL delete mode 100644 anolis/configs/L2-OPTIONAL/loongarch/CONFIG_PROC_CPU_RESCTRL delete mode 100644 anolis/configs/L2-OPTIONAL/riscv/CONFIG_ARCH_HAS_CPU_RESCTRL delete mode 100644 anolis/configs/L2-OPTIONAL/riscv/CONFIG_PROC_CPU_RESCTRL delete mode 100644 anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_ARCH_HAS_CPU_RESCTRL delete mode 100644 anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_PROC_CPU_RESCTRL delete mode 100644 anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_ARCH_HAS_CPU_RESCTRL delete mode 100644 anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_PROC_CPU_RESCTRL diff --git a/anolis/configs/L0-MANDATORY/default/CONFIG_RESCTRL_FS b/anolis/configs/L0-MANDATORY/default/CONFIG_RESCTRL_FS deleted file mode 100644 index 4d7ca33dcdc4..000000000000 --- a/anolis/configs/L0-MANDATORY/default/CONFIG_RESCTRL_FS +++ /dev/null @@ -1 +0,0 @@ -CONFIG_RESCTRL_FS=y diff --git a/anolis/configs/L0-MANDATORY/loongarch/CONFIG_RESCTRL_FS b/anolis/configs/L0-MANDATORY/loongarch/CONFIG_RESCTRL_FS deleted file mode 100644 index 2147c5f9ea80..000000000000 --- a/anolis/configs/L0-MANDATORY/loongarch/CONFIG_RESCTRL_FS +++ /dev/null @@ -1 +0,0 @@ -# CONFIG_RESCTRL_FS is not set diff --git a/anolis/configs/L0-MANDATORY/riscv/CONFIG_RESCTRL_FS b/anolis/configs/L0-MANDATORY/riscv/CONFIG_RESCTRL_FS deleted file mode 100644 index 2147c5f9ea80..000000000000 --- a/anolis/configs/L0-MANDATORY/riscv/CONFIG_RESCTRL_FS +++ /dev/null @@ -1 +0,0 @@ -# CONFIG_RESCTRL_FS is not set diff --git a/anolis/configs/L0-MANDATORY/sw_64-6b/CONFIG_RESCTRL_FS b/anolis/configs/L0-MANDATORY/sw_64-6b/CONFIG_RESCTRL_FS deleted file mode 100644 index 2147c5f9ea80..000000000000 --- a/anolis/configs/L0-MANDATORY/sw_64-6b/CONFIG_RESCTRL_FS +++ /dev/null @@ -1 +0,0 @@ -# CONFIG_RESCTRL_FS is not set diff --git a/anolis/configs/L0-MANDATORY/sw_64-8a/CONFIG_RESCTRL_FS b/anolis/configs/L0-MANDATORY/sw_64-8a/CONFIG_RESCTRL_FS deleted file mode 100644 index 2147c5f9ea80..000000000000 --- a/anolis/configs/L0-MANDATORY/sw_64-8a/CONFIG_RESCTRL_FS +++ /dev/null @@ -1 +0,0 @@ -# CONFIG_RESCTRL_FS is not set diff --git a/anolis/configs/L2-OPTIONAL/arm64/CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID b/anolis/configs/L2-OPTIONAL/arm64/CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID deleted file mode 100644 index 8cddb03cb135..000000000000 --- a/anolis/configs/L2-OPTIONAL/arm64/CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID +++ /dev/null @@ -1 +0,0 @@ -CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID=y diff --git a/anolis/configs/L2-OPTIONAL/default/CONFIG_ARCH_HAS_CPU_RESCTRL b/anolis/configs/L2-OPTIONAL/default/CONFIG_ARCH_HAS_CPU_RESCTRL deleted file mode 100644 index 6cd1474b8d32..000000000000 --- a/anolis/configs/L2-OPTIONAL/default/CONFIG_ARCH_HAS_CPU_RESCTRL +++ /dev/null @@ -1 +0,0 @@ -CONFIG_ARCH_HAS_CPU_RESCTRL=y diff --git a/anolis/configs/L2-OPTIONAL/default/CONFIG_PROC_CPU_RESCTRL b/anolis/configs/L2-OPTIONAL/default/CONFIG_PROC_CPU_RESCTRL deleted file mode 100644 index 8f76128c1853..000000000000 --- a/anolis/configs/L2-OPTIONAL/default/CONFIG_PROC_CPU_RESCTRL +++ /dev/null @@ -1 +0,0 @@ -CONFIG_PROC_CPU_RESCTRL=y diff --git a/anolis/configs/L2-OPTIONAL/loongarch/CONFIG_ARCH_HAS_CPU_RESCTRL b/anolis/configs/L2-OPTIONAL/loongarch/CONFIG_ARCH_HAS_CPU_RESCTRL deleted file mode 100644 index dd3c6353e127..000000000000 --- a/anolis/configs/L2-OPTIONAL/loongarch/CONFIG_ARCH_HAS_CPU_RESCTRL +++ /dev/null @@ -1 +0,0 @@ -# CONFIG_ARCH_HAS_CPU_RESCTRL is not set diff --git a/anolis/configs/L2-OPTIONAL/loongarch/CONFIG_PROC_CPU_RESCTRL b/anolis/configs/L2-OPTIONAL/loongarch/CONFIG_PROC_CPU_RESCTRL deleted file mode 100644 index b4dd102b0a4e..000000000000 --- a/anolis/configs/L2-OPTIONAL/loongarch/CONFIG_PROC_CPU_RESCTRL +++ /dev/null @@ -1 +0,0 @@ -# CONFIG_PROC_CPU_RESCTRL is not set diff --git a/anolis/configs/L2-OPTIONAL/riscv/CONFIG_ARCH_HAS_CPU_RESCTRL b/anolis/configs/L2-OPTIONAL/riscv/CONFIG_ARCH_HAS_CPU_RESCTRL deleted file mode 100644 index dd3c6353e127..000000000000 --- a/anolis/configs/L2-OPTIONAL/riscv/CONFIG_ARCH_HAS_CPU_RESCTRL +++ /dev/null @@ -1 +0,0 @@ -# CONFIG_ARCH_HAS_CPU_RESCTRL is not set diff --git a/anolis/configs/L2-OPTIONAL/riscv/CONFIG_PROC_CPU_RESCTRL b/anolis/configs/L2-OPTIONAL/riscv/CONFIG_PROC_CPU_RESCTRL deleted file mode 100644 index b4dd102b0a4e..000000000000 --- a/anolis/configs/L2-OPTIONAL/riscv/CONFIG_PROC_CPU_RESCTRL +++ /dev/null @@ -1 +0,0 @@ -# CONFIG_PROC_CPU_RESCTRL is not set diff --git a/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_ARCH_HAS_CPU_RESCTRL b/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_ARCH_HAS_CPU_RESCTRL deleted file mode 100644 index dd3c6353e127..000000000000 --- a/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_ARCH_HAS_CPU_RESCTRL +++ /dev/null @@ -1 +0,0 @@ -# CONFIG_ARCH_HAS_CPU_RESCTRL is not set diff --git a/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_PROC_CPU_RESCTRL b/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_PROC_CPU_RESCTRL deleted file mode 100644 index b4dd102b0a4e..000000000000 --- a/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_PROC_CPU_RESCTRL +++ /dev/null @@ -1 +0,0 @@ -# CONFIG_PROC_CPU_RESCTRL is not set diff --git a/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_ARCH_HAS_CPU_RESCTRL b/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_ARCH_HAS_CPU_RESCTRL deleted file mode 100644 index dd3c6353e127..000000000000 --- a/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_ARCH_HAS_CPU_RESCTRL +++ /dev/null @@ -1 +0,0 @@ -# CONFIG_ARCH_HAS_CPU_RESCTRL is not set diff --git a/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_PROC_CPU_RESCTRL b/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_PROC_CPU_RESCTRL deleted file mode 100644 index b4dd102b0a4e..000000000000 --- a/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_PROC_CPU_RESCTRL +++ /dev/null @@ -1 +0,0 @@ -# CONFIG_PROC_CPU_RESCTRL is not set -- Gitee From dfca6918b2ce15b626228c08810031c17a35c726 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:52:16 -0800 Subject: [PATCH 129/158] anolis: Revert "x86/resctrl: Drop __init/__exit on assorted symbols" ANBZ: #31045 This reverts commit 63f229916df9dba486b09df0d2fc7ac2c10b8199. Signed-off-by: Jason Zeng --- arch/x86/kernel/cpu/resctrl/internal.h | 2 +- arch/x86/kernel/cpu/resctrl/monitor.c | 4 ++-- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 8 ++++---- include/linux/resctrl.h | 4 ++-- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 56218193a8ba..3a3962736061 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -457,7 +457,7 @@ void closid_free(int closid); int alloc_rmid(u32 closid); void free_rmid(u32 closid, u32 rmid); int rdt_get_mon_l3_config(struct rdt_resource *r); -void resctrl_mon_resource_exit(void); +void __exit resctrl_mon_resource_exit(void); bool rdt_cpu_has(int flag); void mon_event_count(void *info); int rdtgroup_mondata_show(struct seq_file *m, void *arg); diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 7e6fca138cb7..8b316d9acc3b 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -954,7 +954,7 @@ static int dom_data_init(struct rdt_resource *r) return err; } -static void dom_data_exit(struct rdt_resource *r) +static void __exit dom_data_exit(struct rdt_resource *r) { if (!r->mon_capable) return; @@ -1076,7 +1076,7 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r) return 0; } -void resctrl_mon_resource_exit(void) +void __exit resctrl_mon_resource_exit(void) { struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 1425a33d201d..218aebd6387f 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -2069,7 +2069,7 @@ static struct rftype *rdtgroup_get_rftype_by_name(const char *name) return NULL; } -static void thread_throttle_mode_init(void) +static void __init thread_throttle_mode_init(void) { struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_MBA); struct rftype *rft; @@ -3997,7 +3997,7 @@ static void rdtgroup_destroy_root(void) rdtgroup_default.kn = NULL; } -static void rdtgroup_setup_default(void) +static void __init rdtgroup_setup_default(void) { mutex_lock(&rdtgroup_mutex); @@ -4190,7 +4190,7 @@ void resctrl_offline_cpu(unsigned int cpu) * * Return: 0 on success or -errno */ -int resctrl_init(void) +int __init resctrl_init(void) { int ret = 0; @@ -4244,7 +4244,7 @@ int resctrl_init(void) return ret; } -void resctrl_exit(void) +void __exit resctrl_exit(void) { debugfs_remove_recursive(debugfs_resctrl); unregister_filesystem(&rdt_fs_type); diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 5da55e58f229..f463fb949677 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -393,7 +393,7 @@ void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_domain *d); extern unsigned int resctrl_rmid_realloc_threshold; extern unsigned int resctrl_rmid_realloc_limit; -int resctrl_init(void); -void resctrl_exit(void); +int __init resctrl_init(void); +void __exit resctrl_exit(void); #endif /* _RESCTRL_H */ -- Gitee From a4d5b37df737f11f80c8450a4dc37161b6b77392 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:52:16 -0800 Subject: [PATCH 130/158] anolis: Revert "x86/resctrl: Rename resctrl_sched_in() to begin resctrl_arch_" ANBZ: #31045 This reverts commit e029a786d38c46e82e97761f1a8d73bc4220d309. Signed-off-by: Jason Zeng --- arch/x86/include/asm/resctrl.h | 4 ++-- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 12 ++++++------ arch/x86/kernel/process_32.c | 2 +- arch/x86/kernel/process_64.c | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h index 491342f56811..9940398e367e 100644 --- a/arch/x86/include/asm/resctrl.h +++ b/arch/x86/include/asm/resctrl.h @@ -177,7 +177,7 @@ static inline bool resctrl_arch_match_rmid(struct task_struct *tsk, u32 ignored, return READ_ONCE(tsk->rmid) == rmid; } -static inline void resctrl_arch_sched_in(struct task_struct *tsk) +static inline void resctrl_sched_in(struct task_struct *tsk) { if (static_branch_likely(&rdt_enable_key)) __resctrl_sched_in(tsk); @@ -220,7 +220,7 @@ void resctrl_cpu_detect(struct cpuinfo_x86 *c); #else -static inline void resctrl_arch_sched_in(struct task_struct *tsk) {} +static inline void resctrl_sched_in(struct task_struct *tsk) {} static inline void resctrl_cpu_detect(struct cpuinfo_x86 *c) {} #endif /* CONFIG_X86_CPU_RESCTRL */ diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 218aebd6387f..085fb9c2333a 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -359,7 +359,7 @@ static int rdtgroup_cpus_show(struct kernfs_open_file *of, } /* - * This is safe against resctrl_arch_sched_in() called from __switch_to() + * This is safe against resctrl_sched_in() called from __switch_to() * because __switch_to() is executed with interrupts disabled. A local call * from update_closid_rmid() is protected against __switch_to() because * preemption is disabled. @@ -378,7 +378,7 @@ void resctrl_arch_sync_cpu_defaults(void *info) * executing task might have its own closid selected. Just reuse * the context switch code. */ - resctrl_arch_sched_in(current); + resctrl_sched_in(current); } /* @@ -605,7 +605,7 @@ static void _update_task_closid_rmid(void *task) * Otherwise, the MSR is updated when the task is scheduled in. */ if (task == current) - resctrl_arch_sched_in(task); + resctrl_sched_in(task); } static void update_task_closid_rmid(struct task_struct *t) @@ -663,7 +663,7 @@ static int __rdtgroup_move_task(struct task_struct *tsk, * Ensure the task's closid and rmid are written before determining if * the task is current that will decide if it will be interrupted. * This pairs with the full barrier between the rq->curr update and - * resctrl_arch_sched_in() during context switch. + * resctrl_sched_in() during context switch. */ smp_mb(); @@ -2946,8 +2946,8 @@ static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to, /* * Order the closid/rmid stores above before the loads * in task_curr(). This pairs with the full barrier - * between the rq->curr update and - * resctrl_arch_sched_in() during context switch. + * between the rq->curr update and resctrl_sched_in() + * during context switch. */ smp_mb(); diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 619560eb9f94..708c87b88cc1 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -212,7 +212,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) switch_fpu_finish(); /* Load the Intel cache allocation PQR MSR. */ - resctrl_arch_sched_in(next_p); + resctrl_sched_in(next_p); return prev_p; } diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index fbadfa84a02d..5fee1e0c42ca 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -667,7 +667,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) } /* Load the Intel cache allocation PQR MSR. */ - resctrl_arch_sched_in(next_p); + resctrl_sched_in(next_p); return prev_p; } -- Gitee From 6c97adc2d48957f1ae2ecad23e0ca1e3b93536d7 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:52:16 -0800 Subject: [PATCH 131/158] anolis: Revert "x86/resctrl: Describe resctrl's bitmap size assumptions" ANBZ: #31045 This reverts commit 716a62ea17c67137417bff48aabac6d604ac3fd0. Signed-off-by: Jason Zeng --- include/linux/resctrl.h | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index f463fb949677..84420253dc05 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -26,17 +26,6 @@ int proc_resctrl_show(struct seq_file *m, /* max value for struct rdt_domain's mbps_val */ #define MBA_MAX_MBPS U32_MAX -/* - * Resctrl uses a u32 as a closid bitmap. The maximum closid is 32. - */ -#define RESCTRL_MAX_CLOSID 32 - -/* - * Resctrl uses u32 to hold the user-space config. The maximum bitmap size is - * 32. - */ -#define RESCTRL_MAX_CBM 32 - /** * struct pseudo_lock_region - pseudo-lock region information * @s: Resctrl schema for the resource to which this -- Gitee From 247e9db82baa0fb1370a9d46a0898fa438d327af Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:52:16 -0800 Subject: [PATCH 132/158] anolis: Revert "x86/resctrl: Claim get_domain_from_cpu() for resctrl" ANBZ: #31045 This reverts commit 37f40a165c75ef29f37bf83602f8a4bd5b887215. Signed-off-by: Jason Zeng --- arch/x86/kernel/cpu/resctrl/core.c | 15 ++++++++++++++- arch/x86/kernel/cpu/resctrl/internal.h | 1 + arch/x86/kernel/cpu/resctrl/monitor.c | 2 +- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 2 +- include/linux/resctrl.h | 19 ------------------- 5 files changed, 17 insertions(+), 22 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 948b4d409cd9..1aa6f8244cd1 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -355,6 +355,19 @@ cat_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r) wrmsrl(hw_res->msr_base + i, hw_dom->ctrl_val[i]); } +struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r) +{ + struct rdt_domain *d; + + list_for_each_entry(d, &r->domains, list) { + /* Find the domain that contains this CPU */ + if (cpumask_test_cpu(cpu, &d->cpu_mask)) + return d; + } + + return NULL; +} + u32 resctrl_arch_get_num_closid(struct rdt_resource *r) { return resctrl_to_arch_res(r)->num_closid; @@ -368,7 +381,7 @@ void rdt_ctrl_update(void *arg) int cpu = smp_processor_id(); struct rdt_domain *d; - d = resctrl_get_domain_from_cpu(cpu, r); + d = get_domain_from_cpu(cpu, r); if (d) { hw_res->msr_update(d, m, r); return; diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 3a3962736061..e849d4407769 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -452,6 +452,7 @@ unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, struct rdt_domain *d, unsigned long cbm); enum rdtgrp_mode rdtgroup_mode_by_closid(int closid); int rdtgroup_tasks_assigned(struct rdtgroup *r); +struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r); int closids_supported(void); void closid_free(int closid); int alloc_rmid(u32 closid); diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 8b316d9acc3b..287fb0a5f060 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -676,7 +676,7 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm) idx = resctrl_arch_rmid_idx_encode(closid, rmid); pmbm_data = &dom_mbm->mbm_local[idx]; - dom_mba = resctrl_get_domain_from_cpu(smp_processor_id(), r_mba); + dom_mba = get_domain_from_cpu(smp_processor_id(), r_mba); if (!dom_mba) { pr_warn_once("Failure to get domain for MBA update\n"); return; diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 085fb9c2333a..702a94fad6db 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -4165,7 +4165,7 @@ void resctrl_offline_cpu(unsigned int cpu) if (!l3->mon_capable) goto out_unlock; - d = resctrl_get_domain_from_cpu(cpu, l3); + d = get_domain_from_cpu(cpu, l3); if (d) { if (resctrl_is_mbm_enabled() && cpu == d->mbm_work_cpu) { cancel_delayed_work(&d->mbm_over); diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 84420253dc05..73c111963433 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -2,7 +2,6 @@ #ifndef _RESCTRL_H #define _RESCTRL_H -#include #include #include #include @@ -274,24 +273,6 @@ static inline u32 resctrl_get_config_index(u32 closid, } } -/* - * Caller must hold the cpuhp read lock to prevent the struct rdt_domain being - * freed. - */ -static inline struct rdt_domain * -resctrl_get_domain_from_cpu(int cpu, struct rdt_resource *r) -{ - struct rdt_domain *d; - - list_for_each_entry_rcu(d, &r->domains, list) { - /* Find the domain that contains this CPU */ - if (cpumask_test_cpu(cpu, &d->cpu_mask)) - return d; - } - - return NULL; -} - /* * Update the ctrl_val and apply this config right now. * Must be called on one of the domain's CPUs. -- Gitee From 48907b2572c79e5d179c0a9b412720967d61ca3d Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:52:16 -0800 Subject: [PATCH 133/158] anolis: Revert "x86/resctrl: Move get_config_index() to a header" ANBZ: #31045 This reverts commit 054a6ee7fd7c867564c6a3957fbb29e238bfeef4. Signed-off-by: Jason Zeng --- arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 19 ++++++++++++++++--- include/linux/resctrl.h | 15 --------------- 2 files changed, 16 insertions(+), 18 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c index 4d91a8abb24d..ad9e5516e607 100644 --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c @@ -282,6 +282,19 @@ static int parse_line(char *line, struct resctrl_schema *s, return -EINVAL; } +static u32 get_config_index(u32 closid, enum resctrl_conf_type type) +{ + switch (type) { + default: + case CDP_NONE: + return closid; + case CDP_CODE: + return closid * 2 + 1; + case CDP_DATA: + return closid * 2; + } +} + static bool apply_config(struct rdt_hw_domain *hw_dom, struct resctrl_staged_config *cfg, u32 idx, cpumask_var_t cpu_mask) @@ -303,7 +316,7 @@ int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_domain *d, { struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); - u32 idx = resctrl_get_config_index(closid, t); + u32 idx = get_config_index(closid, t); struct msr_param msr_param; if (!cpumask_test_cpu(smp_processor_id(), &d->cpu_mask)) @@ -343,7 +356,7 @@ int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid) if (!cfg->have_new_ctrl) continue; - idx = resctrl_get_config_index(closid, t); + idx = get_config_index(closid, t); if (!apply_config(hw_dom, cfg, idx, cpu_mask)) continue; @@ -468,7 +481,7 @@ u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d, u32 closid, enum resctrl_conf_type type) { struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); - u32 idx = resctrl_get_config_index(closid, type); + u32 idx = get_config_index(closid, type); return hw_dom->ctrl_val[idx]; } diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 73c111963433..3de5bc63ace0 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -258,21 +258,6 @@ bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt); void resctrl_arch_mon_event_config_write(void *info); void resctrl_arch_mon_event_config_read(void *info); -/* For use by arch code to remap resctrl's smaller CDP CLOSID range */ -static inline u32 resctrl_get_config_index(u32 closid, - enum resctrl_conf_type type) -{ - switch (type) { - default: - case CDP_NONE: - return closid; - case CDP_CODE: - return (closid * 2) + 1; - case CDP_DATA: - return (closid * 2); - } -} - /* * Update the ctrl_val and apply this config right now. * Must be called on one of the domain's CPUs. -- Gitee From 5f0d5e6bea61fdcb316297f411079b64def76635 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:52:16 -0800 Subject: [PATCH 134/158] anolis: Revert "x86/resctrl: Move thread_throttle_mode_init() to be managed by resctrl" ANBZ: #31045 This reverts commit 946f4fbe455b8e056f2b43df2f5e7687a161a4c9. Signed-off-by: Jason Zeng --- arch/x86/kernel/cpu/resctrl/core.c | 1 + arch/x86/kernel/cpu/resctrl/internal.h | 1 + arch/x86/kernel/cpu/resctrl/rdtgroup.c | 9 +-------- 3 files changed, 3 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 1aa6f8244cd1..a320e3d1c2c9 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -227,6 +227,7 @@ static __init bool __get_mem_config_intel(struct rdt_resource *r) r->membw.throttle_mode = THREAD_THROTTLE_PER_THREAD; else r->membw.throttle_mode = THREAD_THROTTLE_MAX; + thread_throttle_mode_init(); r->alloc_capable = true; diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index e849d4407769..be4e8f31b127 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -478,6 +478,7 @@ void cqm_handle_limbo(struct work_struct *work); bool has_busy_rmid(struct rdt_domain *d); void __check_limbo(struct rdt_domain *d, bool force_free); void rdt_domain_reconfigure_cdp(struct rdt_resource *r); +void __init thread_throttle_mode_init(void); void mbm_config_rftype_init(const char *config); void rdt_staged_configs_clear(void); bool closid_allocated(unsigned int closid); diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 702a94fad6db..9275d6f8a74e 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -2069,15 +2069,10 @@ static struct rftype *rdtgroup_get_rftype_by_name(const char *name) return NULL; } -static void __init thread_throttle_mode_init(void) +void __init thread_throttle_mode_init(void) { - struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_MBA); struct rftype *rft; - if (!r->alloc_capable || - r->membw.throttle_mode == THREAD_THROTTLE_UNDEFINED) - return; - rft = rdtgroup_get_rftype_by_name("thread_throttle_mode"); if (!rft) return; @@ -4199,8 +4194,6 @@ int __init resctrl_init(void) rdtgroup_setup_default(); - thread_throttle_mode_init(); - ret = resctrl_mon_resource_init(); if (ret) return ret; -- Gitee From 818acaef80ea36d14e9e29a945f4a2064cc2bda0 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:52:17 -0800 Subject: [PATCH 135/158] anolis: Revert "x86/resctrl: Make resctrl_arch_pseudo_lock_fn() take a plr" ANBZ: #31045 This reverts commit 9285fdfd2679a6dc79de41b905efc5781a1ed07c. Signed-off-by: Jason Zeng --- arch/x86/include/asm/resctrl.h | 2 +- arch/x86/kernel/cpu/resctrl/internal.h | 37 +++++++++++++++++++++ arch/x86/kernel/cpu/resctrl/pseudo_lock.c | 13 ++++---- include/linux/resctrl.h | 39 ----------------------- 4 files changed, 44 insertions(+), 47 deletions(-) diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h index 9940398e367e..a88af68f9fe2 100644 --- a/arch/x86/include/asm/resctrl.h +++ b/arch/x86/include/asm/resctrl.h @@ -212,7 +212,7 @@ static inline void resctrl_arch_mon_ctx_free(struct rdt_resource *r, int evtid, void *ctx) { }; u64 resctrl_arch_get_prefetch_disable_bits(void); -int resctrl_arch_pseudo_lock_fn(void *_plr); +int resctrl_arch_pseudo_lock_fn(void *_rdtgrp); int resctrl_arch_measure_cycles_lat_fn(void *_plr); int resctrl_arch_measure_l2_residency(void *_plr); int resctrl_arch_measure_l3_residency(void *_plr); diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index be4e8f31b127..d6db15839dc4 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -182,6 +182,43 @@ struct mongroup { u32 rmid; }; +/** + * struct pseudo_lock_region - pseudo-lock region information + * @s: Resctrl schema for the resource to which this + * pseudo-locked region belongs + * @d: RDT domain to which this pseudo-locked region + * belongs + * @cbm: bitmask of the pseudo-locked region + * @lock_thread_wq: waitqueue used to wait on the pseudo-locking thread + * completion + * @thread_done: variable used by waitqueue to test if pseudo-locking + * thread completed + * @cpu: core associated with the cache on which the setup code + * will be run + * @line_size: size of the cache lines + * @size: size of pseudo-locked region in bytes + * @kmem: the kernel memory associated with pseudo-locked region + * @minor: minor number of character device associated with this + * region + * @debugfs_dir: pointer to this region's directory in the debugfs + * filesystem + * @pm_reqs: Power management QoS requests related to this region + */ +struct pseudo_lock_region { + struct resctrl_schema *s; + struct rdt_domain *d; + u32 cbm; + wait_queue_head_t lock_thread_wq; + int thread_done; + int cpu; + unsigned int line_size; + unsigned int size; + void *kmem; + unsigned int minor; + struct dentry *debugfs_dir; + struct list_head pm_reqs; +}; + /** * struct rdtgroup - store rdtgroup's data in resctrl file system. * @kn: kernfs node diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c index ba51ab1f70e6..5a66e3b2c2ea 100644 --- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c +++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c @@ -416,7 +416,7 @@ static void pseudo_lock_free(struct rdtgroup *rdtgrp) /** * resctrl_arch_pseudo_lock_fn - Load kernel memory into cache - * @_plr: the pseudo-lock region descriptor + * @_rdtgrp: resource group to which pseudo-lock region belongs * * This is the core pseudo-locking flow. * @@ -433,9 +433,10 @@ static void pseudo_lock_free(struct rdtgroup *rdtgrp) * * Return: 0. Waiter on waitqueue will be woken on completion. */ -int resctrl_arch_pseudo_lock_fn(void *_plr) +int resctrl_arch_pseudo_lock_fn(void *_rdtgrp) { - struct pseudo_lock_region *plr = _plr; + struct rdtgroup *rdtgrp = _rdtgrp; + struct pseudo_lock_region *plr = rdtgrp->plr; u32 rmid_p, closid_p; unsigned long i; u64 saved_msr; @@ -495,8 +496,7 @@ int resctrl_arch_pseudo_lock_fn(void *_plr) * pseudo-locked followed by reading of kernel memory to load it * into the cache. */ - __wrmsr(MSR_IA32_PQR_ASSOC, rmid_p, plr->closid); - + __wrmsr(MSR_IA32_PQR_ASSOC, rmid_p, rdtgrp->closid); /* * Cache was flushed earlier. Now access kernel memory to read it * into cache region associated with just activated plr->closid. @@ -1327,8 +1327,7 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp) plr->thread_done = 0; - plr->closid = rdtgrp->closid; - thread = kthread_create_on_node(resctrl_arch_pseudo_lock_fn, plr, + thread = kthread_create_on_node(resctrl_arch_pseudo_lock_fn, rdtgrp, cpu_to_node(plr->cpu), "pseudo_lock/%u", plr->cpu); if (IS_ERR(thread)) { diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 3de5bc63ace0..6705d7960dfd 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -25,45 +25,6 @@ int proc_resctrl_show(struct seq_file *m, /* max value for struct rdt_domain's mbps_val */ #define MBA_MAX_MBPS U32_MAX -/** - * struct pseudo_lock_region - pseudo-lock region information - * @s: Resctrl schema for the resource to which this - * pseudo-locked region belongs - * @closid: The closid that this pseudo-locked region uses - * @d: RDT domain to which this pseudo-locked region - * belongs - * @cbm: bitmask of the pseudo-locked region - * @lock_thread_wq: waitqueue used to wait on the pseudo-locking thread - * completion - * @thread_done: variable used by waitqueue to test if pseudo-locking - * thread completed - * @cpu: core associated with the cache on which the setup code - * will be run - * @line_size: size of the cache lines - * @size: size of pseudo-locked region in bytes - * @kmem: the kernel memory associated with pseudo-locked region - * @minor: minor number of character device associated with this - * region - * @debugfs_dir: pointer to this region's directory in the debugfs - * filesystem - * @pm_reqs: Power management QoS requests related to this region - */ -struct pseudo_lock_region { - struct resctrl_schema *s; - u32 closid; - struct rdt_domain *d; - u32 cbm; - wait_queue_head_t lock_thread_wq; - int thread_done; - int cpu; - unsigned int line_size; - unsigned int size; - void *kmem; - unsigned int minor; - struct dentry *debugfs_dir; - struct list_head pm_reqs; -}; - /** * struct resctrl_staged_config - parsed configuration to be applied * @new_ctrl: new ctrl value to be loaded -- Gitee From 567e922629850d5c9a754df82a3e9d7a0276e754 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:52:17 -0800 Subject: [PATCH 136/158] anolis: Revert "x86/resctrl: Make prefetch_disable_bits belong to the arch code" ANBZ: #31045 This reverts commit 24788aacf473a7230bc215ee915cc721d742168c. Signed-off-by: Jason Zeng --- arch/x86/kernel/cpu/resctrl/pseudo_lock.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c index 5a66e3b2c2ea..856beb6f668b 100644 --- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c +++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c @@ -85,8 +85,6 @@ static const struct class pseudo_lock_class = { */ u64 resctrl_arch_get_prefetch_disable_bits(void) { - prefetch_disable_bits = 0; - if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || boot_cpu_data.x86 != 6) return 0; @@ -102,8 +100,7 @@ u64 resctrl_arch_get_prefetch_disable_bits(void) * 3 DCU IP Prefetcher Disable (R/W) * 63:4 Reserved */ - prefetch_disable_bits = 0xF; - break; + return 0xF; case INTEL_FAM6_ATOM_GOLDMONT: case INTEL_FAM6_ATOM_GOLDMONT_PLUS: /* @@ -114,11 +111,10 @@ u64 resctrl_arch_get_prefetch_disable_bits(void) * 2 DCU Hardware Prefetcher Disable (R/W) * 63:3 Reserved */ - prefetch_disable_bits = 0x5; - break; + return 0x5; } - return prefetch_disable_bits; + return 0; } /** @@ -719,7 +715,8 @@ int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp) * Not knowing the bits to disable prefetching implies that this * platform does not support Cache Pseudo-Locking. */ - if (resctrl_arch_get_prefetch_disable_bits() == 0) { + prefetch_disable_bits = resctrl_arch_get_prefetch_disable_bits(); + if (prefetch_disable_bits == 0) { rdt_last_cmd_puts("Pseudo-locking not supported\n"); return -EINVAL; } -- Gitee From 010aedbb0f2758913d5dfcafda58d433db65d2a1 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:52:17 -0800 Subject: [PATCH 137/158] anolis: Revert "x86/resctrl: Allow an architecture to disable pseudo lock" ANBZ: #31045 This reverts commit fa816439275b4f01b895892d4052426e4a9ce871. Signed-off-by: Jason Zeng --- arch/x86/Kconfig | 7 ---- arch/x86/kernel/cpu/resctrl/Makefile | 5 ++- arch/x86/kernel/cpu/resctrl/internal.h | 48 +++++--------------------- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 3 +- 4 files changed, 11 insertions(+), 52 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ac2e95c097c3..890c59602fc0 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -494,7 +494,6 @@ config X86_CPU_RESCTRL depends on X86 && (CPU_SUP_INTEL || CPU_SUP_AMD) select KERNFS select PROC_CPU_RESCTRL if PROC_FS - select RESCTRL_FS_PSEUDO_LOCK help Enable x86 CPU resource control support. @@ -511,12 +510,6 @@ config X86_CPU_RESCTRL Say N if unsure. -config RESCTRL_FS_PSEUDO_LOCK - bool - help - Software mechanism to pin data in a cache portion using - micro-architecture specific knowledge. - if X86_32 config X86_BIGSMP bool "Support for big SMP systems with more than 8 CPUs" diff --git a/arch/x86/kernel/cpu/resctrl/Makefile b/arch/x86/kernel/cpu/resctrl/Makefile index 0c13b0befd8a..4a06c37b9cf1 100644 --- a/arch/x86/kernel/cpu/resctrl/Makefile +++ b/arch/x86/kernel/cpu/resctrl/Makefile @@ -1,5 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -obj-$(CONFIG_X86_CPU_RESCTRL) += core.o rdtgroup.o monitor.o -obj-$(CONFIG_X86_CPU_RESCTRL) += ctrlmondata.o -obj-$(CONFIG_RESCTRL_FS_PSEUDO_LOCK) += pseudo_lock.o +obj-$(CONFIG_X86_CPU_RESCTRL) += core.o rdtgroup.o monitor.o +obj-$(CONFIG_X86_CPU_RESCTRL) += ctrlmondata.o pseudo_lock.o CFLAGS_pseudo_lock.o = -I$(src) diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index d6db15839dc4..238b81d3f64a 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -489,6 +489,14 @@ unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, struct rdt_domain *d, unsigned long cbm); enum rdtgrp_mode rdtgroup_mode_by_closid(int closid); int rdtgroup_tasks_assigned(struct rdtgroup *r); +int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp); +int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp); +bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm); +bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d); +int rdt_pseudo_lock_init(void); +void rdt_pseudo_lock_release(void); +int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp); +void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp); struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r); int closids_supported(void); void closid_free(int closid); @@ -521,44 +529,4 @@ void rdt_staged_configs_clear(void); bool closid_allocated(unsigned int closid); int resctrl_find_cleanest_closid(void); -#ifdef CONFIG_RESCTRL_FS_PSEUDO_LOCK -int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp); -int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp); -bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm); -bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d); -int rdt_pseudo_lock_init(void); -void rdt_pseudo_lock_release(void); -int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp); -void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp); -#else -static inline int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp) -{ - return -EOPNOTSUPP; -} - -static inline int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp) -{ - return -EOPNOTSUPP; -} - -static inline bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm) -{ - return false; -} - -static inline bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d) -{ - return false; -} - -static inline int rdt_pseudo_lock_init(void) { return 0; } -static inline void rdt_pseudo_lock_release(void) { } -static inline int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp) -{ - return -EOPNOTSUPP; -} - -static inline void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp) { } -#endif /* CONFIG_RESCTRL_FS_PSEUDO_LOCK */ - #endif /* _ASM_X86_RESCTRL_INTERNAL_H */ diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 9275d6f8a74e..56a0bfdc11f7 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -1452,8 +1452,7 @@ static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of, goto out; } rdtgrp->mode = RDT_MODE_EXCLUSIVE; - } else if (IS_ENABLED(CONFIG_RESCTRL_FS_PSEUDO_LOCK) && - !strcmp(buf, "pseudo-locksetup")) { + } else if (!strcmp(buf, "pseudo-locksetup")) { ret = rdtgroup_locksetup_enter(rdtgrp); if (ret) goto out; -- Gitee From 62507c3631f5abc1b0599f15dd2070f55dacc94a Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Thu, 12 Mar 2026 20:17:15 -0700 Subject: [PATCH 138/158] anolis: remove config file for CONFIG_RESCTRL_FS_PSEUDO_LOCK ANBZ: #31045 Remove config file: anolis/configs/L1-RECOMMEND/x86/CONFIG_RESCTRL_FS_PSEUDO_LOCK Signed-off-by: Jason Zeng --- anolis/configs/L1-RECOMMEND/x86/CONFIG_RESCTRL_FS_PSEUDO_LOCK | 1 - 1 file changed, 1 deletion(-) delete mode 100644 anolis/configs/L1-RECOMMEND/x86/CONFIG_RESCTRL_FS_PSEUDO_LOCK diff --git a/anolis/configs/L1-RECOMMEND/x86/CONFIG_RESCTRL_FS_PSEUDO_LOCK b/anolis/configs/L1-RECOMMEND/x86/CONFIG_RESCTRL_FS_PSEUDO_LOCK deleted file mode 100644 index 731e801f5294..000000000000 --- a/anolis/configs/L1-RECOMMEND/x86/CONFIG_RESCTRL_FS_PSEUDO_LOCK +++ /dev/null @@ -1 +0,0 @@ -CONFIG_RESCTRL_FS_PSEUDO_LOCK=y -- Gitee From 24152f666f7e99d30a4a276055822360d6067c5d Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:52:17 -0800 Subject: [PATCH 139/158] anolis: Revert "x86/resctrl: Add resctrl_arch_ prefix to pseudo lock functions" ANBZ: #31045 This reverts commit 46675abeaadea4d56bca73b2a9ed9bcbdaf7d8c4. Signed-off-by: Jason Zeng --- arch/x86/include/asm/resctrl.h | 5 ---- arch/x86/kernel/cpu/resctrl/pseudo_lock.c | 36 +++++++++++------------ 2 files changed, 17 insertions(+), 24 deletions(-) diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h index a88af68f9fe2..50407e83d0ca 100644 --- a/arch/x86/include/asm/resctrl.h +++ b/arch/x86/include/asm/resctrl.h @@ -211,11 +211,6 @@ static inline void *resctrl_arch_mon_ctx_alloc(struct rdt_resource *r, int evtid static inline void resctrl_arch_mon_ctx_free(struct rdt_resource *r, int evtid, void *ctx) { }; -u64 resctrl_arch_get_prefetch_disable_bits(void); -int resctrl_arch_pseudo_lock_fn(void *_rdtgrp); -int resctrl_arch_measure_cycles_lat_fn(void *_plr); -int resctrl_arch_measure_l2_residency(void *_plr); -int resctrl_arch_measure_l3_residency(void *_plr); void resctrl_cpu_detect(struct cpuinfo_x86 *c); #else diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c index 856beb6f668b..f2315a50ea4f 100644 --- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c +++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c @@ -62,8 +62,7 @@ static const struct class pseudo_lock_class = { }; /** - * resctrl_arch_get_prefetch_disable_bits - prefetch disable bits of supported - * platforms + * get_prefetch_disable_bits - prefetch disable bits of supported platforms * @void: It takes no parameters. * * Capture the list of platforms that have been validated to support @@ -77,13 +76,13 @@ static const struct class pseudo_lock_class = { * in the SDM. * * When adding a platform here also add support for its cache events to - * resctrl_arch_measure_l*_residency() + * measure_cycles_perf_fn() * * Return: * If platform is supported, the bits to disable hardware prefetchers, 0 * if platform is not supported. */ -u64 resctrl_arch_get_prefetch_disable_bits(void) +static u64 get_prefetch_disable_bits(void) { if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || boot_cpu_data.x86 != 6) @@ -411,7 +410,7 @@ static void pseudo_lock_free(struct rdtgroup *rdtgrp) } /** - * resctrl_arch_pseudo_lock_fn - Load kernel memory into cache + * pseudo_lock_fn - Load kernel memory into cache * @_rdtgrp: resource group to which pseudo-lock region belongs * * This is the core pseudo-locking flow. @@ -429,7 +428,7 @@ static void pseudo_lock_free(struct rdtgroup *rdtgrp) * * Return: 0. Waiter on waitqueue will be woken on completion. */ -int resctrl_arch_pseudo_lock_fn(void *_rdtgrp) +static int pseudo_lock_fn(void *_rdtgrp) { struct rdtgroup *rdtgrp = _rdtgrp; struct pseudo_lock_region *plr = rdtgrp->plr; @@ -715,7 +714,7 @@ int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp) * Not knowing the bits to disable prefetching implies that this * platform does not support Cache Pseudo-Locking. */ - prefetch_disable_bits = resctrl_arch_get_prefetch_disable_bits(); + prefetch_disable_bits = get_prefetch_disable_bits(); if (prefetch_disable_bits == 0) { rdt_last_cmd_puts("Pseudo-locking not supported\n"); return -EINVAL; @@ -880,8 +879,7 @@ bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d) } /** - * resctrl_arch_measure_cycles_lat_fn - Measure cycle latency to read - * pseudo-locked memory + * measure_cycles_lat_fn - Measure cycle latency to read pseudo-locked memory * @_plr: pseudo-lock region to measure * * There is no deterministic way to test if a memory region is cached. One @@ -894,7 +892,7 @@ bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d) * * Return: 0. Waiter on waitqueue will be woken on completion. */ -int resctrl_arch_measure_cycles_lat_fn(void *_plr) +static int measure_cycles_lat_fn(void *_plr) { struct pseudo_lock_region *plr = _plr; u32 saved_low, saved_high; @@ -1078,7 +1076,7 @@ static int measure_residency_fn(struct perf_event_attr *miss_attr, return 0; } -int resctrl_arch_measure_l2_residency(void *_plr) +static int measure_l2_residency(void *_plr) { struct pseudo_lock_region *plr = _plr; struct residency_counts counts = {0}; @@ -1116,7 +1114,7 @@ int resctrl_arch_measure_l2_residency(void *_plr) return 0; } -int resctrl_arch_measure_l3_residency(void *_plr) +static int measure_l3_residency(void *_plr) { struct pseudo_lock_region *plr = _plr; struct residency_counts counts = {0}; @@ -1214,18 +1212,18 @@ static int pseudo_lock_measure_cycles(struct rdtgroup *rdtgrp, int sel) plr->cpu = cpu; if (sel == 1) - thread = kthread_create_on_node(resctrl_arch_measure_cycles_lat_fn, - plr, cpu_to_node(cpu), + thread = kthread_create_on_node(measure_cycles_lat_fn, plr, + cpu_to_node(cpu), "pseudo_lock_measure/%u", cpu); else if (sel == 2) - thread = kthread_create_on_node(resctrl_arch_measure_l2_residency, - plr, cpu_to_node(cpu), + thread = kthread_create_on_node(measure_l2_residency, plr, + cpu_to_node(cpu), "pseudo_lock_measure/%u", cpu); else if (sel == 3) - thread = kthread_create_on_node(resctrl_arch_measure_l3_residency, - plr, cpu_to_node(cpu), + thread = kthread_create_on_node(measure_l3_residency, plr, + cpu_to_node(cpu), "pseudo_lock_measure/%u", cpu); else @@ -1324,7 +1322,7 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp) plr->thread_done = 0; - thread = kthread_create_on_node(resctrl_arch_pseudo_lock_fn, rdtgrp, + thread = kthread_create_on_node(pseudo_lock_fn, rdtgrp, cpu_to_node(plr->cpu), "pseudo_lock/%u", plr->cpu); if (IS_ERR(thread)) { -- Gitee From aa17d0c9be10a0f348fb21cb0835daddfc6a0ca2 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:52:17 -0800 Subject: [PATCH 140/158] anolis: Revert "x86/resctrl: Allow resctrl_arch_mon_event_config_write() to return an error" ANBZ: #31045 This reverts commit cb2184614818e523e1637ff60e77b2f9f6045330. Signed-off-by: Jason Zeng --- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 20 ++++---------------- include/linux/resctrl.h | 1 - 2 files changed, 4 insertions(+), 17 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 56a0bfdc11f7..3d3a839eba6b 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -1685,16 +1685,13 @@ void resctrl_arch_mon_event_config_write(void *info) index = mon_event_config_index_get(mon_info->evtid); if (index == INVALID_CONFIG_INDEX) { pr_warn_once("Invalid event id %d\n", mon_info->evtid); - mon_info->err = -EINVAL; return; } wrmsr(MSR_IA32_EVT_CFG_BASE + index, mon_info->mon_config, 0); - - mon_info->err = 0; } -static int mbm_config_write_domain(struct rdt_resource *r, - struct rdt_domain *d, u32 evtid, u32 val) +static void mbm_config_write_domain(struct rdt_resource *r, + struct rdt_domain *d, u32 evtid, u32 val) { struct resctrl_mon_config_info mon_info = {0}; @@ -1707,7 +1704,7 @@ static int mbm_config_write_domain(struct rdt_resource *r, mon_info.evtid = evtid; mondata_config_read(&mon_info); if (mon_info.mon_config == val) - return 0; + return; mon_info.mon_config = val; @@ -1719,10 +1716,6 @@ static int mbm_config_write_domain(struct rdt_resource *r, */ smp_call_function_any(&d->cpu_mask, resctrl_arch_mon_event_config_write, &mon_info, 1); - if (mon_info.err) { - rdt_last_cmd_puts("Invalid event configuration\n"); - return mon_info.err; - } /* * When an Event Configuration is changed, the bandwidth counters @@ -1734,8 +1727,6 @@ static int mbm_config_write_domain(struct rdt_resource *r, * mbm_local and mbm_total counts for all the RMIDs. */ resctrl_arch_reset_rmid_all(r, d); - - return 0; } static int mon_config_write(struct rdt_resource *r, char *tok, u32 evtid) @@ -1743,7 +1734,6 @@ static int mon_config_write(struct rdt_resource *r, char *tok, u32 evtid) char *dom_str = NULL, *id_str; unsigned long dom_id, val; struct rdt_domain *d; - int err; /* Walking r->domains, ensure it can't race with cpuhp */ lockdep_assert_cpus_held(); @@ -1775,9 +1765,7 @@ static int mon_config_write(struct rdt_resource *r, char *tok, u32 evtid) list_for_each_entry(d, &r->domains, list) { if (d->id == dom_id) { - err = mbm_config_write_domain(r, d, evtid, val); - if (err) - return err; + mbm_config_write_domain(r, d, evtid, val); goto next; } } diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 6705d7960dfd..8a7367d1ce45 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -200,7 +200,6 @@ struct resctrl_mon_config_info { struct rdt_domain *d; u32 evtid; u32 mon_config; - int err; }; /* -- Gitee From 3aab0c15f60f044724af0d415490a0f6c0da05a1 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:52:17 -0800 Subject: [PATCH 141/158] anolis: Revert "x86/resctrl: Move mbm_cfg_mask to struct rdt_resource" ANBZ: #31045 This reverts commit 7d1348ba72bccf70e3f76fab947927af2d4a3dc9. Signed-off-by: Jason Zeng --- arch/x86/kernel/cpu/resctrl/internal.h | 3 +++ arch/x86/kernel/cpu/resctrl/monitor.c | 2 +- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 5 +++-- include/linux/resctrl.h | 3 --- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 238b81d3f64a..46370eafb00f 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -371,6 +371,8 @@ struct msr_param { * @msr_update: Function pointer to update QOS MSRs * @mon_scale: cqm counter * mon_scale = occupancy in bytes * @mbm_width: Monitor width, to detect and correct for overflow. + * @mbm_cfg_mask: Bandwidth sources that can be tracked when Bandwidth + * Monitoring Event Configuration (BMEC) is supported. * @cdp_enabled: CDP state of this resource * * Members of this structure are either private to the architecture @@ -385,6 +387,7 @@ struct rdt_hw_resource { struct rdt_resource *r); unsigned int mon_scale; unsigned int mbm_width; + unsigned int mbm_cfg_mask; bool cdp_enabled; }; diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 287fb0a5f060..ccb85c61b43b 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -1068,7 +1068,7 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r) /* Detect list of bandwidth sources that can be tracked */ cpuid_count(0x80000020, 3, &eax, &ebx, &ecx, &edx); - r->mbm_cfg_mask = ecx & MAX_EVT_CONFIG_BITS; + hw_res->mbm_cfg_mask = ecx & MAX_EVT_CONFIG_BITS; } r->mon_capable = true; diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 3d3a839eba6b..e76018687117 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -1731,6 +1731,7 @@ static void mbm_config_write_domain(struct rdt_resource *r, static int mon_config_write(struct rdt_resource *r, char *tok, u32 evtid) { + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); char *dom_str = NULL, *id_str; unsigned long dom_id, val; struct rdt_domain *d; @@ -1757,9 +1758,9 @@ static int mon_config_write(struct rdt_resource *r, char *tok, u32 evtid) } /* Value from user cannot be more than the supported set of events */ - if ((val & r->mbm_cfg_mask) != val) { + if ((val & hw_res->mbm_cfg_mask) != val) { rdt_last_cmd_printf("Invalid event configuration: max valid mask is 0x%02x\n", - r->mbm_cfg_mask); + hw_res->mbm_cfg_mask); return -EINVAL; } diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 8a7367d1ce45..975b80102fbe 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -140,8 +140,6 @@ struct resctrl_membw { * @format_str: Per resource format string to show domain value * @evt_list: List of monitoring events * @fflags: flags to choose base and info files - * @mbm_cfg_mask: Bandwidth sources that can be tracked when Bandwidth - * Monitoring Event Configuration (BMEC) is supported. * @cdp_capable: Is the CDP feature available on this resource */ struct rdt_resource { @@ -159,7 +157,6 @@ struct rdt_resource { const char *format_str; struct list_head evt_list; unsigned long fflags; - unsigned int mbm_cfg_mask; bool cdp_capable; }; -- Gitee From b16569fdede3733ecd269414eb0c7f96084550ad Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:52:17 -0800 Subject: [PATCH 142/158] anolis: Revert "x86/resctrl: Change mon_event_config_{read,write}() to be arch helpers" ANBZ: #31045 This reverts commit e4dc4973daef1f2cedb66fb526cdf377e5ce73a8. Signed-off-by: Jason Zeng --- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 34 +++++++++++++------------- include/linux/resctrl.h | 9 ------- 2 files changed, 17 insertions(+), 26 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index e76018687117..2d6f4e0d3656 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -1580,6 +1580,11 @@ static int rdtgroup_size_show(struct kernfs_open_file *of, return ret; } +struct mon_config_info { + u32 evtid; + u32 mon_config; +}; + #define INVALID_CONFIG_INDEX UINT_MAX /** @@ -1604,9 +1609,9 @@ static inline unsigned int mon_event_config_index_get(u32 evtid) } } -void resctrl_arch_mon_event_config_read(void *info) +static void mon_event_config_read(void *info) { - struct resctrl_mon_config_info *mon_info = info; + struct mon_config_info *mon_info = info; unsigned int index; u64 msrval; @@ -1621,15 +1626,14 @@ void resctrl_arch_mon_event_config_read(void *info) mon_info->mon_config = msrval & MAX_EVT_CONFIG_BITS; } -static void mondata_config_read(struct resctrl_mon_config_info *mon_info) +static void mondata_config_read(struct rdt_domain *d, struct mon_config_info *mon_info) { - smp_call_function_any(&mon_info->d->cpu_mask, - resctrl_arch_mon_event_config_read, mon_info, 1); + smp_call_function_any(&d->cpu_mask, mon_event_config_read, mon_info, 1); } static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid) { - struct resctrl_mon_config_info mon_info = {0}; + struct mon_config_info mon_info = {0}; struct rdt_domain *dom; bool sep = false; @@ -1640,11 +1644,9 @@ static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid if (sep) seq_puts(s, ";"); - memset(&mon_info, 0, sizeof(struct resctrl_mon_config_info)); - mon_info.r = r; - mon_info.d = dom; + memset(&mon_info, 0, sizeof(struct mon_config_info)); mon_info.evtid = evtid; - mondata_config_read(&mon_info); + mondata_config_read(dom, &mon_info); seq_printf(s, "%d=0x%02x", dom->id, mon_info.mon_config); sep = true; @@ -1677,9 +1679,9 @@ static int mbm_local_bytes_config_show(struct kernfs_open_file *of, return 0; } -void resctrl_arch_mon_event_config_write(void *info) +static void mon_event_config_write(void *info) { - struct resctrl_mon_config_info *mon_info = info; + struct mon_config_info *mon_info = info; unsigned int index; index = mon_event_config_index_get(mon_info->evtid); @@ -1693,16 +1695,14 @@ void resctrl_arch_mon_event_config_write(void *info) static void mbm_config_write_domain(struct rdt_resource *r, struct rdt_domain *d, u32 evtid, u32 val) { - struct resctrl_mon_config_info mon_info = {0}; + struct mon_config_info mon_info = {0}; /* * Read the current config value first. If both are the same then * no need to write it again. */ - mon_info.r = r; - mon_info.d = d; mon_info.evtid = evtid; - mondata_config_read(&mon_info); + mondata_config_read(d, &mon_info); if (mon_info.mon_config == val) return; @@ -1714,7 +1714,7 @@ static void mbm_config_write_domain(struct rdt_resource *r, * are scoped at the domain level. Writing any of these MSRs * on one CPU is observed by all the CPUs in the domain. */ - smp_call_function_any(&d->cpu_mask, resctrl_arch_mon_event_config_write, + smp_call_function_any(&d->cpu_mask, mon_event_config_write, &mon_info, 1); /* diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 975b80102fbe..bfc63e8219e5 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -192,13 +192,6 @@ struct resctrl_cpu_sync { u32 rmid; }; -struct resctrl_mon_config_info { - struct rdt_resource *r; - struct rdt_domain *d; - u32 evtid; - u32 mon_config; -}; - /* * Update and re-load this CPUs defaults. Called via IPI, takes a pointer to * struct resctrl_cpu_sync, or NULL. @@ -212,8 +205,6 @@ struct rdt_domain *resctrl_arch_find_domain(struct rdt_resource *r, int id); int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid); bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt); -void resctrl_arch_mon_event_config_write(void *info); -void resctrl_arch_mon_event_config_read(void *info); /* * Update the ctrl_val and apply this config right now. -- Gitee From 98e747401a7e3a8be18f09e8ce48b72dd9cde5fb Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:52:17 -0800 Subject: [PATCH 143/158] anolis: Revert "x86/resctrl: Add resctrl_arch_is_evt_configurable() to abstract BMEC" ANBZ: #31045 This reverts commit 74891eb97fc52327d7cca15339198b54158bb9ce. Signed-off-by: Jason Zeng --- arch/x86/kernel/cpu/resctrl/core.c | 19 ++----------------- arch/x86/kernel/cpu/resctrl/internal.h | 4 ++-- arch/x86/kernel/cpu/resctrl/monitor.c | 18 +++++++++--------- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 2 +- include/linux/resctrl.h | 2 -- 5 files changed, 14 insertions(+), 31 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index a320e3d1c2c9..c23d5da137cd 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -667,7 +667,7 @@ struct rdt_options { bool force_off, force_on; }; -static struct rdt_options rdt_options[] __ro_after_init = { +static struct rdt_options rdt_options[] __initdata = { RDT_OPT(RDT_FLAG_CMT, "cmt", X86_FEATURE_CQM_OCCUP_LLC), RDT_OPT(RDT_FLAG_MBM_TOTAL, "mbmtotal", X86_FEATURE_CQM_MBM_TOTAL), RDT_OPT(RDT_FLAG_MBM_LOCAL, "mbmlocal", X86_FEATURE_CQM_MBM_LOCAL), @@ -707,7 +707,7 @@ static int __init set_rdt_options(char *str) } __setup("rdt", set_rdt_options); -bool rdt_cpu_has(int flag) +bool __init rdt_cpu_has(int flag) { bool ret = boot_cpu_has(flag); struct rdt_options *o; @@ -727,21 +727,6 @@ bool rdt_cpu_has(int flag) return ret; } -bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt) -{ - if (!rdt_cpu_has(X86_FEATURE_BMEC)) - return false; - - switch (evt) { - case QOS_L3_MBM_TOTAL_EVENT_ID: - return rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL); - case QOS_L3_MBM_LOCAL_EVENT_ID: - return rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL); - default: - return false; - } -} - static __init bool get_mem_config(void) { struct rdt_hw_resource *hw_res = &rdt_resources_all[RDT_RESOURCE_MBA]; diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 46370eafb00f..edbccc79246f 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -507,7 +507,7 @@ int alloc_rmid(u32 closid); void free_rmid(u32 closid, u32 rmid); int rdt_get_mon_l3_config(struct rdt_resource *r); void __exit resctrl_mon_resource_exit(void); -bool rdt_cpu_has(int flag); +bool __init rdt_cpu_has(int flag); void mon_event_count(void *info); int rdtgroup_mondata_show(struct seq_file *m, void *arg); void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, @@ -527,7 +527,7 @@ bool has_busy_rmid(struct rdt_domain *d); void __check_limbo(struct rdt_domain *d, bool force_free); void rdt_domain_reconfigure_cdp(struct rdt_resource *r); void __init thread_throttle_mode_init(void); -void mbm_config_rftype_init(const char *config); +void __init mbm_config_rftype_init(const char *config); void rdt_staged_configs_clear(void); bool closid_allocated(unsigned int closid); int resctrl_find_cleanest_closid(void); diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index ccb85c61b43b..c9be2d0819c0 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -1019,15 +1019,6 @@ int resctrl_mon_resource_init(void) l3_mon_evt_init(r); - if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_TOTAL_EVENT_ID)) { - mbm_total_event.configurable = true; - mbm_config_rftype_init("mbm_total_bytes_config"); - } - if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_LOCAL_EVENT_ID)) { - mbm_local_event.configurable = true; - mbm_config_rftype_init("mbm_local_bytes_config"); - } - return 0; } @@ -1069,6 +1060,15 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r) /* Detect list of bandwidth sources that can be tracked */ cpuid_count(0x80000020, 3, &eax, &ebx, &ecx, &edx); hw_res->mbm_cfg_mask = ecx & MAX_EVT_CONFIG_BITS; + + if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL)) { + mbm_total_event.configurable = true; + mbm_config_rftype_init("mbm_total_bytes_config"); + } + if (rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL)) { + mbm_local_event.configurable = true; + mbm_config_rftype_init("mbm_local_bytes_config"); + } } r->mon_capable = true; diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 2d6f4e0d3656..8285b916289c 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -2068,7 +2068,7 @@ void __init thread_throttle_mode_init(void) rft->fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_MB; } -void mbm_config_rftype_init(const char *config) +void __init mbm_config_rftype_init(const char *config) { struct rftype *rft; diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index bfc63e8219e5..b0ee7256e095 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -204,8 +204,6 @@ u32 resctrl_arch_get_num_closid(struct rdt_resource *r); struct rdt_domain *resctrl_arch_find_domain(struct rdt_resource *r, int id); int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid); -bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt); - /* * Update the ctrl_val and apply this config right now. * Must be called on one of the domain's CPUs. -- Gitee From 65772bc12a2346457b657c6d707a4e4e285e8382 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:52:17 -0800 Subject: [PATCH 144/158] anolis: Revert "x86/resctrl: Export the is_mbm_*_enabled() helpers to asm/resctrl.h" ANBZ: #31045 This reverts commit cfefad27a56d4595c818c167344eef6753eb306d. Signed-off-by: Jason Zeng --- arch/x86/include/asm/resctrl.h | 17 ----------- arch/x86/kernel/cpu/resctrl/core.c | 4 +-- arch/x86/kernel/cpu/resctrl/internal.h | 27 +++++++++++++++++ arch/x86/kernel/cpu/resctrl/monitor.c | 18 ++++++------ arch/x86/kernel/cpu/resctrl/rdtgroup.c | 40 +++++++++----------------- 5 files changed, 52 insertions(+), 54 deletions(-) diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h index 50407e83d0ca..5f6a5375bb4a 100644 --- a/arch/x86/include/asm/resctrl.h +++ b/arch/x86/include/asm/resctrl.h @@ -7,7 +7,6 @@ #include #include #include -#include /* * This value can never be a valid CLOSID, and is used when mapping a @@ -44,7 +43,6 @@ DECLARE_PER_CPU(struct resctrl_pqr_state, pqr_state); extern bool rdt_alloc_capable; extern bool rdt_mon_capable; -extern unsigned int rdt_mon_features; DECLARE_STATIC_KEY_FALSE(rdt_enable_key); DECLARE_STATIC_KEY_FALSE(rdt_alloc_enable_key); @@ -84,21 +82,6 @@ static inline void resctrl_arch_disable_mon(void) static_branch_dec_cpuslocked(&rdt_enable_key); } -static inline bool resctrl_arch_is_llc_occupancy_enabled(void) -{ - return (rdt_mon_features & (1 << QOS_L3_OCCUP_EVENT_ID)); -} - -static inline bool resctrl_arch_is_mbm_total_enabled(void) -{ - return (rdt_mon_features & (1 << QOS_L3_MBM_TOTAL_EVENT_ID)); -} - -static inline bool resctrl_arch_is_mbm_local_enabled(void) -{ - return (rdt_mon_features & (1 << QOS_L3_MBM_LOCAL_EVENT_ID)); -} - /* * __resctrl_sched_in() - Writes the task's CLOSid/RMID to IA32_PQR_MSR * diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index c23d5da137cd..7242d15d4806 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -481,13 +481,13 @@ static int arch_domain_mbm_alloc(u32 num_rmid, struct rdt_hw_domain *hw_dom) { size_t tsize; - if (resctrl_arch_is_mbm_total_enabled()) { + if (is_mbm_total_enabled()) { tsize = sizeof(*hw_dom->arch_mbm_total); hw_dom->arch_mbm_total = kcalloc(num_rmid, tsize, GFP_KERNEL); if (!hw_dom->arch_mbm_total) return -ENOMEM; } - if (resctrl_arch_is_mbm_local_enabled()) { + if (is_mbm_local_enabled()) { tsize = sizeof(*hw_dom->arch_mbm_local); hw_dom->arch_mbm_local = kcalloc(num_rmid, tsize, GFP_KERNEL); if (!hw_dom->arch_mbm_local) { diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index edbccc79246f..01fcd4ef26ca 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -130,6 +130,7 @@ struct rmid_read { void *arch_mon_ctx; }; +extern unsigned int rdt_mon_features; extern struct list_head resctrl_schema_all; extern bool resctrl_mounted; @@ -359,6 +360,32 @@ struct msr_param { u32 high; }; +static inline bool is_llc_occupancy_enabled(void) +{ + return (rdt_mon_features & (1 << QOS_L3_OCCUP_EVENT_ID)); +} + +static inline bool is_mbm_total_enabled(void) +{ + return (rdt_mon_features & (1 << QOS_L3_MBM_TOTAL_EVENT_ID)); +} + +static inline bool is_mbm_local_enabled(void) +{ + return (rdt_mon_features & (1 << QOS_L3_MBM_LOCAL_EVENT_ID)); +} + +static inline bool is_mbm_enabled(void) +{ + return (is_mbm_total_enabled() || is_mbm_local_enabled()); +} + +static inline bool is_mbm_event(int e) +{ + return (e >= QOS_L3_MBM_TOTAL_EVENT_ID && + e <= QOS_L3_MBM_LOCAL_EVENT_ID); +} + /** * struct rdt_hw_resource - arch private attributes of a resctrl resource * @r_resctrl: Attributes of the resource used directly by resctrl. diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index c9be2d0819c0..2a1cbd4de6ee 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -251,11 +251,11 @@ void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_domain *d) { struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); - if (resctrl_arch_is_mbm_total_enabled()) + if (is_mbm_total_enabled()) memset(hw_dom->arch_mbm_total, 0, sizeof(*hw_dom->arch_mbm_total) * r->num_rmid); - if (resctrl_arch_is_mbm_local_enabled()) + if (is_mbm_local_enabled()) memset(hw_dom->arch_mbm_local, 0, sizeof(*hw_dom->arch_mbm_local) * r->num_rmid); } @@ -514,7 +514,7 @@ void free_rmid(u32 closid, u32 rmid) entry = __rmid_entry(idx); - if (resctrl_arch_is_llc_occupancy_enabled()) + if (is_llc_occupancy_enabled()) add_rmid_to_limbo(entry); else list_add_tail(&entry->list, &rmid_free_lru); @@ -666,7 +666,7 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm) struct list_head *head; struct rdtgroup *entry; - if (!resctrl_arch_is_mbm_local_enabled()) + if (!is_mbm_local_enabled()) return; r_mba = resctrl_arch_get_resource(RDT_RESOURCE_MBA); @@ -735,7 +735,7 @@ static void mbm_update(struct rdt_resource *r, struct rdt_domain *d, * This is protected from concurrent reads from user * as both the user and we hold the global mutex. */ - if (resctrl_arch_is_mbm_total_enabled()) { + if (is_mbm_total_enabled()) { rr.evtid = QOS_L3_MBM_TOTAL_EVENT_ID; rr.val = 0; rr.arch_mon_ctx = resctrl_arch_mon_ctx_alloc(rr.r, rr.evtid); @@ -749,7 +749,7 @@ static void mbm_update(struct rdt_resource *r, struct rdt_domain *d, resctrl_arch_mon_ctx_free(rr.r, rr.evtid, rr.arch_mon_ctx); } - if (resctrl_arch_is_mbm_local_enabled()) { + if (is_mbm_local_enabled()) { rr.evtid = QOS_L3_MBM_LOCAL_EVENT_ID; rr.val = 0; rr.arch_mon_ctx = resctrl_arch_mon_ctx_alloc(rr.r, rr.evtid); @@ -997,11 +997,11 @@ static void l3_mon_evt_init(struct rdt_resource *r) { INIT_LIST_HEAD(&r->evt_list); - if (resctrl_arch_is_llc_occupancy_enabled()) + if (is_llc_occupancy_enabled()) list_add_tail(&llc_occupancy_event.list, &r->evt_list); - if (resctrl_arch_is_mbm_total_enabled()) + if (is_mbm_total_enabled()) list_add_tail(&mbm_total_event.list, &r->evt_list); - if (resctrl_arch_is_mbm_local_enabled()) + if (is_mbm_local_enabled()) list_add_tail(&mbm_local_event.list, &r->evt_list); } diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 8285b916289c..3f16e7854411 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -114,18 +114,6 @@ void rdt_staged_configs_clear(void) } } -static bool resctrl_is_mbm_enabled(void) -{ - return (resctrl_arch_is_mbm_total_enabled() || - resctrl_arch_is_mbm_local_enabled()); -} - -static bool resctrl_is_mbm_event(int e) -{ - return (e >= QOS_L3_MBM_TOTAL_EVENT_ID && - e <= QOS_L3_MBM_LOCAL_EVENT_ID); -} - /* * Trivial allocator for CLOSIDs. Since h/w only supports a small number, * we can keep a bitmap of free CLOSIDs in a single integer. @@ -173,7 +161,7 @@ static int closid_alloc(void) lockdep_assert_held(&rdtgroup_mutex); if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID) && - resctrl_arch_is_llc_occupancy_enabled()) { + is_llc_occupancy_enabled()) { cleanest_closid = resctrl_find_cleanest_closid(); if (cleanest_closid < 0) return cleanest_closid; @@ -2382,7 +2370,7 @@ static bool supports_mba_mbps(void) { struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_MBA); - return (resctrl_arch_is_mbm_local_enabled() && + return (is_mbm_local_enabled() && r->alloc_capable && is_mba_linear()); } @@ -2750,7 +2738,7 @@ static int rdt_get_tree(struct fs_context *fc) if (resctrl_arch_alloc_capable() || resctrl_arch_mon_capable()) resctrl_mounted = true; - if (resctrl_is_mbm_enabled()) { + if (is_mbm_enabled()) { list_for_each_entry(dom, &l3->domains, list) mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL, RESCTRL_PICK_ANY_CPU); @@ -3119,7 +3107,7 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn, if (ret) goto out_destroy; - if (resctrl_is_mbm_event(mevt->evtid)) + if (is_mbm_event(mevt->evtid)) mon_event_read(&rr, r, d, prgrp, mevt->evtid, true); } kernfs_activate(kn); @@ -4018,9 +4006,9 @@ void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d) if (resctrl_mounted && resctrl_arch_mon_capable()) rmdir_mondata_subdir_allrdtgrp(r, d->id); - if (resctrl_is_mbm_enabled()) + if (is_mbm_enabled()) cancel_delayed_work(&d->mbm_over); - if (resctrl_arch_is_llc_occupancy_enabled() && has_busy_rmid(d)) { + if (is_llc_occupancy_enabled() && has_busy_rmid(d)) { /* * When a package is going down, forcefully * decrement rmid->ebusy. There is no way to know @@ -4044,12 +4032,12 @@ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d) u32 idx_limit = resctrl_arch_system_num_rmid_idx(); size_t tsize; - if (resctrl_arch_is_llc_occupancy_enabled()) { + if (is_llc_occupancy_enabled()) { d->rmid_busy_llc = bitmap_zalloc(idx_limit, GFP_KERNEL); if (!d->rmid_busy_llc) return -ENOMEM; } - if (resctrl_arch_is_mbm_total_enabled()) { + if (is_mbm_total_enabled()) { tsize = sizeof(*d->mbm_total); d->mbm_total = kcalloc(idx_limit, tsize, GFP_KERNEL); if (!d->mbm_total) { @@ -4057,7 +4045,7 @@ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d) return -ENOMEM; } } - if (resctrl_arch_is_mbm_local_enabled()) { + if (is_mbm_local_enabled()) { tsize = sizeof(*d->mbm_local); d->mbm_local = kcalloc(idx_limit, tsize, GFP_KERNEL); if (!d->mbm_local) { @@ -4089,13 +4077,13 @@ int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d) if (err) goto out_unlock; - if (resctrl_is_mbm_enabled()) { + if (is_mbm_enabled()) { INIT_DELAYED_WORK(&d->mbm_over, mbm_handle_overflow); mbm_setup_overflow_handler(d, MBM_OVERFLOW_INTERVAL, RESCTRL_PICK_ANY_CPU); } - if (resctrl_arch_is_llc_occupancy_enabled()) + if (is_llc_occupancy_enabled()) INIT_DELAYED_WORK(&d->cqm_limbo, cqm_handle_limbo); /* @@ -4150,12 +4138,12 @@ void resctrl_offline_cpu(unsigned int cpu) d = get_domain_from_cpu(cpu, l3); if (d) { - if (resctrl_is_mbm_enabled() && cpu == d->mbm_work_cpu) { + if (is_mbm_enabled() && cpu == d->mbm_work_cpu) { cancel_delayed_work(&d->mbm_over); mbm_setup_overflow_handler(d, 0, cpu); } - if (resctrl_arch_is_llc_occupancy_enabled() && - cpu == d->cqm_work_cpu && has_busy_rmid(d)) { + if (is_llc_occupancy_enabled() && cpu == d->cqm_work_cpu && + has_busy_rmid(d)) { cancel_delayed_work(&d->cqm_limbo); cqm_setup_limbo_handler(d, 0, cpu); } -- Gitee From fabed29d79e0cb5aa37dd5769a18c8d785046ff9 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:52:17 -0800 Subject: [PATCH 145/158] anolis: Revert "x86/resctrl: Stop using the for_each_*_rdt_resource() walkers" ANBZ: #31045 This reverts commit 4e298aef7263f11d34142733dac1f84600421a65. Signed-off-by: Jason Zeng --- arch/x86/kernel/cpu/resctrl/pseudo_lock.c | 7 +----- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 30 ++++------------------- 2 files changed, 6 insertions(+), 31 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c index f2315a50ea4f..884b88e25141 100644 --- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c +++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c @@ -840,7 +840,6 @@ bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d) { cpumask_var_t cpu_with_psl; - enum resctrl_res_level i; struct rdt_resource *r; struct rdt_domain *d_i; bool ret = false; @@ -855,11 +854,7 @@ bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d) * First determine which cpus have pseudo-locked regions * associated with them. */ - for (i = 0; i < RDT_NUM_RESOURCES; i++) { - r = resctrl_arch_get_resource(i); - if (!r->alloc_capable) - continue; - + for_each_alloc_capable_rdt_resource(r) { list_for_each_entry(d_i, &r->domains, list) { if (d_i->plr) cpumask_or(cpu_with_psl, cpu_with_psl, diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 3f16e7854411..e736e4d20f63 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -98,17 +98,12 @@ void rdt_last_cmd_printf(const char *fmt, ...) void rdt_staged_configs_clear(void) { - enum resctrl_res_level i; struct rdt_resource *r; struct rdt_domain *dom; lockdep_assert_held(&rdtgroup_mutex); - for (i = 0; i < RDT_NUM_RESOURCES; i++) { - r = resctrl_arch_get_resource(i); - if (!r->alloc_capable) - continue; - + for_each_alloc_capable_rdt_resource(r) { list_for_each_entry(dom, &r->domains, list) memset(dom->staged_config, 0, sizeof(dom->staged_config)); } @@ -2186,7 +2181,6 @@ static int rdtgroup_mkdir_info_resdir(void *priv, char *name, static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn) { - enum resctrl_res_level i; struct resctrl_schema *s; struct rdt_resource *r; unsigned long fflags; @@ -2211,12 +2205,8 @@ static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn) goto out_destroy; } - for (i = 0; i < RDT_NUM_RESOURCES; i++) { - r = resctrl_arch_get_resource(i); - if (!r->mon_capable) - continue; - - fflags = r->fflags | RFTYPE_MON_INFO; + for_each_mon_capable_rdt_resource(r) { + fflags = r->fflags | RFTYPE_MON_INFO; sprintf(name, "%s_MON", r->name); ret = rdtgroup_mkdir_info_resdir(r, name, fflags); if (ret) @@ -2625,15 +2615,10 @@ static int schemata_list_add(struct rdt_resource *r, enum resctrl_conf_type type static int schemata_list_create(void) { - enum resctrl_res_level i; struct rdt_resource *r; int ret = 0; - for (i = 0; i < RDT_NUM_RESOURCES; i++) { - r = resctrl_arch_get_resource(i); - if (!r->alloc_capable) - continue; - + for_each_alloc_capable_rdt_resource(r) { if (resctrl_arch_get_cdp_enabled(r->rid)) { ret = schemata_list_add(r, CDP_CODE); if (ret) @@ -3181,7 +3166,6 @@ static int mkdir_mondata_all(struct kernfs_node *parent_kn, struct rdtgroup *prgrp, struct kernfs_node **dest_kn) { - enum resctrl_res_level i; struct rdt_resource *r; struct kernfs_node *kn; int ret; @@ -3200,11 +3184,7 @@ static int mkdir_mondata_all(struct kernfs_node *parent_kn, * Create the subdirectories for each domain. Note that all events * in a domain like L3 are grouped into a resource whose domain is L3 */ - for (i = 0; i < RDT_NUM_RESOURCES; i++) { - r = resctrl_arch_get_resource(i); - if (!r->mon_capable) - continue; - + for_each_mon_capable_rdt_resource(r) { ret = mkdir_mondata_subdir_alldom(kn, r, prgrp); if (ret) goto out_destroy; -- Gitee From 2f5f823e02a3603bd3bd591ac2c771172dc40e94 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:52:17 -0800 Subject: [PATCH 146/158] anolis: Revert "x86/resctrl: Move max_{name,data}_width into resctrl code" ANBZ: #31045 This reverts commit f850b8a72a994977c823b6b906df2278f017c1ef. Signed-off-by: Jason Zeng --- arch/x86/kernel/cpu/resctrl/core.c | 22 ++++++++++++++++++++++ arch/x86/kernel/cpu/resctrl/rdtgroup.c | 12 ------------ 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 7242d15d4806..bafd289dd4b0 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -44,6 +44,12 @@ static DEFINE_MUTEX(domain_list_lock); */ DEFINE_PER_CPU(struct resctrl_pqr_state, pqr_state); +/* + * Used to store the max resource name width and max resource data width + * to display the schemata in a tabular format + */ +int max_name_width, max_data_width; + /* * Global boolean for rdt_alloc which is true if any * resource allocation is enabled. @@ -642,6 +648,20 @@ static int resctrl_arch_offline_cpu(unsigned int cpu) return 0; } +/* + * Choose a width for the resource name and resource data based on the + * resource that has widest name and cbm. + */ +static __init void rdt_init_padding(void) +{ + struct rdt_resource *r; + + for_each_alloc_capable_rdt_resource(r) { + if (r->data_width > max_data_width) + max_data_width = r->data_width; + } +} + enum { RDT_FLAG_CMT, RDT_FLAG_MBM_TOTAL, @@ -943,6 +963,8 @@ static int __init resctrl_arch_late_init(void) if (!get_rdt_resources()) return -ENODEV; + rdt_init_padding(); + state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/resctrl/cat:online:", resctrl_arch_online_cpu, diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index e736e4d20f63..6cf4ebe9c058 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -58,12 +58,6 @@ static struct kernfs_node *kn_mongrp; /* Kernel fs node for "mon_data" directory under root */ static struct kernfs_node *kn_mondata; -/* - * Used to store the max resource name width and max resource data width - * to display the schemata in a tabular format - */ -int max_name_width, max_data_width; - static struct seq_buf last_cmd_status; static char last_cmd_status_buf[512]; @@ -2601,12 +2595,6 @@ static int schemata_list_add(struct rdt_resource *r, enum resctrl_conf_type type if (cl > max_name_width) max_name_width = cl; - /* - * Choose a width for the resource data based on the resource that has - * widest name and cbm. - */ - max_data_width = max(max_data_width, r->data_width); - INIT_LIST_HEAD(&s->list); list_add(&s->list, &resctrl_schema_all); -- Gitee From 392946c63d75b31e58bdbbcbc9b0bc6021bed5b9 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:52:17 -0800 Subject: [PATCH 147/158] anolis: Revert "x86/resctrl: Move monitor exit work to a restrl exit call" ANBZ: #31045 This reverts commit bc54ca1adf49160278c3912d3312d2a97aaa299d. Signed-off-by: Jason Zeng --- arch/x86/kernel/cpu/resctrl/core.c | 5 +++++ arch/x86/kernel/cpu/resctrl/internal.h | 2 +- arch/x86/kernel/cpu/resctrl/monitor.c | 12 ++++-------- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 2 -- 4 files changed, 10 insertions(+), 11 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index bafd289dd4b0..e5143f8af919 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -992,9 +992,14 @@ late_initcall(resctrl_arch_late_init); static void __exit resctrl_arch_exit(void) { + struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; + cpuhp_remove_state(rdt_online); resctrl_exit(); + + if (r->mon_capable) + rdt_put_mon_l3_config(); } __exitcall(resctrl_arch_exit); diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 01fcd4ef26ca..7a0c74779c53 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -533,7 +533,7 @@ void closid_free(int closid); int alloc_rmid(u32 closid); void free_rmid(u32 closid, u32 rmid); int rdt_get_mon_l3_config(struct rdt_resource *r); -void __exit resctrl_mon_resource_exit(void); +void __exit rdt_put_mon_l3_config(void); bool __init rdt_cpu_has(int flag); void mon_event_count(void *info); int rdtgroup_mondata_show(struct seq_file *m, void *arg); diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 2a1cbd4de6ee..929ec1430b45 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -954,12 +954,10 @@ static int dom_data_init(struct rdt_resource *r) return err; } -static void __exit dom_data_exit(struct rdt_resource *r) +static void __exit dom_data_exit(void) { - if (!r->mon_capable) - return; - mutex_lock(&rdtgroup_mutex); + if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) { kfree(closid_num_dirty_rmid); closid_num_dirty_rmid = NULL; @@ -1076,11 +1074,9 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r) return 0; } -void __exit resctrl_mon_resource_exit(void) +void __exit rdt_put_mon_l3_config(void) { - struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); - - dom_data_exit(r); + dom_data_exit(); } void __init intel_rdt_mbm_apply_quirk(void) diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 6cf4ebe9c058..7a9696f53f2b 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -4186,6 +4186,4 @@ void __exit resctrl_exit(void) debugfs_remove_recursive(debugfs_resctrl); unregister_filesystem(&rdt_fs_type); sysfs_remove_mount_point(fs_kobj, "resctrl"); - - resctrl_mon_resource_exit(); } -- Gitee From 30e4d76ca9beeea23c4811cd2f24ef9fce88ca3c Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:52:17 -0800 Subject: [PATCH 148/158] anolis: Revert "x86/resctrl: Move monitor init work to a resctrl init call" ANBZ: #31045 This reverts commit f000abf1c27606112309393c6868c2b553562124. Signed-off-by: Jason Zeng --- arch/x86/kernel/cpu/resctrl/internal.h | 1 - arch/x86/kernel/cpu/resctrl/monitor.c | 24 +++++++----------------- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 4 ---- 3 files changed, 7 insertions(+), 22 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 7a0c74779c53..031948322eab 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -540,7 +540,6 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg); void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, struct rdt_domain *d, struct rdtgroup *rdtgrp, int evtid, int first); -int resctrl_mon_resource_init(void); void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms, int exclude_cpu); diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 929ec1430b45..06565153ceb2 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -1003,28 +1003,12 @@ static void l3_mon_evt_init(struct rdt_resource *r) list_add_tail(&mbm_local_event.list, &r->evt_list); } -int resctrl_mon_resource_init(void) -{ - struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); - int ret; - - if (!r->mon_capable) - return 0; - - ret = dom_data_init(r); - if (ret) - return ret; - - l3_mon_evt_init(r); - - return 0; -} - int __init rdt_get_mon_l3_config(struct rdt_resource *r) { unsigned int mbm_offset = boot_cpu_data.x86_cache_mbm_width_offset; struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); unsigned int threshold; + int ret; resctrl_rmid_realloc_limit = boot_cpu_data.x86_cache_size * 1024; hw_res->mon_scale = boot_cpu_data.x86_cache_occ_scale; @@ -1052,6 +1036,10 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r) */ resctrl_rmid_realloc_threshold = resctrl_arch_round_mon_val(threshold); + ret = dom_data_init(r); + if (ret) + return ret; + if (rdt_cpu_has(X86_FEATURE_BMEC)) { u32 eax, ebx, ecx, edx; @@ -1069,6 +1057,8 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r) } } + l3_mon_evt_init(r); + r->mon_capable = true; return 0; diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 7a9696f53f2b..13c24cb18d76 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -4138,10 +4138,6 @@ int __init resctrl_init(void) rdtgroup_setup_default(); - ret = resctrl_mon_resource_init(); - if (ret) - return ret; - ret = sysfs_create_mount_point(fs_kobj, "resctrl"); if (ret) return ret; -- Gitee From e09654f371cb8314e4a6e70dae809eeef8796d52 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:52:17 -0800 Subject: [PATCH 149/158] anolis: Revert "x86/resctrl: Add a resctrl helper to reset all the resources" ANBZ: #31045 This reverts commit e460a81ab44a15d58743f9f2de0c0ae22eef3bf0. Signed-off-by: Jason Zeng --- arch/x86/include/asm/resctrl.h | 2 -- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 16 +++++----------- 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h index 5f6a5375bb4a..f61382258743 100644 --- a/arch/x86/include/asm/resctrl.h +++ b/arch/x86/include/asm/resctrl.h @@ -15,8 +15,6 @@ */ #define X86_RESCTRL_EMPTY_CLOSID ((u32)~0) -void resctrl_arch_reset_resources(void); - /** * struct resctrl_pqr_state - State cache for the PQR MSR * @cur_rmid: The cached Resource Monitoring ID diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 13c24cb18d76..1a49c9918f8d 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -2859,14 +2859,6 @@ static int reset_all_ctrls(struct rdt_resource *r) return 0; } -void resctrl_arch_reset_resources(void) -{ - struct rdt_resource *r; - - for_each_capable_rdt_resource(r) - reset_all_ctrls(r); -} - /* * Move tasks from one to the other group. If @from is NULL, then all tasks * in the systems are moved unconditionally (used for teardown). @@ -2976,14 +2968,16 @@ static void rmdir_all_sub(void) static void rdt_kill_sb(struct super_block *sb) { + struct rdt_resource *r; + cpus_read_lock(); mutex_lock(&rdtgroup_mutex); rdt_disable_ctx(); - /* Put everything back to default values. */ - resctrl_arch_reset_resources(); - + /*Put everything back to default values. */ + for_each_alloc_capable_rdt_resource(r) + reset_all_ctrls(r); rmdir_all_sub(); rdt_pseudo_lock_release(); rdtgroup_default.mode = RDT_MODE_SHAREABLE; -- Gitee From 92c42c3bcc418032f2f011a020b00a74951aa6b7 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:52:17 -0800 Subject: [PATCH 150/158] anolis: Revert "x86/resctrl: Move resctrl types to a separate header" ANBZ: #31045 This reverts commit f662c9134df0ff54993bfab8bfbf273f306d6796. Signed-off-by: Jason Zeng --- MAINTAINERS | 1 - arch/x86/kernel/cpu/resctrl/internal.h | 24 +++++++++ include/linux/resctrl.h | 35 ++++++++++++- include/linux/resctrl_types.h | 68 -------------------------- 4 files changed, 58 insertions(+), 70 deletions(-) delete mode 100644 include/linux/resctrl_types.h diff --git a/MAINTAINERS b/MAINTAINERS index cde0fe482aae..85d4f1549af6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18204,7 +18204,6 @@ S: Supported F: Documentation/arch/x86/resctrl* F: arch/x86/include/asm/resctrl.h F: arch/x86/kernel/cpu/resctrl/ -F: include/linux/resctrl*.h F: tools/testing/selftests/resctrl/ READ-COPY UPDATE (RCU) diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 031948322eab..32ade929ea1b 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -32,6 +32,30 @@ */ #define MBM_CNTR_WIDTH_OFFSET_MAX (62 - MBM_CNTR_WIDTH_BASE) +/* Reads to Local DRAM Memory */ +#define READS_TO_LOCAL_MEM BIT(0) + +/* Reads to Remote DRAM Memory */ +#define READS_TO_REMOTE_MEM BIT(1) + +/* Non-Temporal Writes to Local Memory */ +#define NON_TEMP_WRITE_TO_LOCAL_MEM BIT(2) + +/* Non-Temporal Writes to Remote Memory */ +#define NON_TEMP_WRITE_TO_REMOTE_MEM BIT(3) + +/* Reads to Local Memory the system identifies as "Slow Memory" */ +#define READS_TO_LOCAL_S_MEM BIT(4) + +/* Reads to Remote Memory the system identifies as "Slow Memory" */ +#define READS_TO_REMOTE_S_MEM BIT(5) + +/* Dirty Victims to All Types of Memory */ +#define DIRTY_VICTIMS_TO_ALL_MEM BIT(6) + +/* Max event bits supported */ +#define MAX_EVT_CONFIG_BITS GENMASK(6, 0) + /** * cpumask_any_housekeeping() - Choose any CPU in @mask, preferring those that * aren't marked nohz_full diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index b0ee7256e095..c5fcbb524136 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -5,7 +5,6 @@ #include #include #include -#include /* CLOSID, RMID value used by the default control group */ #define RESCTRL_RESERVED_CLOSID 0 @@ -25,6 +24,40 @@ int proc_resctrl_show(struct seq_file *m, /* max value for struct rdt_domain's mbps_val */ #define MBA_MAX_MBPS U32_MAX +/** + * enum resctrl_conf_type - The type of configuration. + * @CDP_NONE: No prioritisation, both code and data are controlled or monitored. + * @CDP_CODE: Configuration applies to instruction fetches. + * @CDP_DATA: Configuration applies to reads and writes. + */ +enum resctrl_conf_type { + CDP_NONE, + CDP_CODE, + CDP_DATA, +}; + +enum resctrl_res_level { + RDT_RESOURCE_L3, + RDT_RESOURCE_L2, + RDT_RESOURCE_MBA, + RDT_RESOURCE_SMBA, + + /* Must be the last */ + RDT_NUM_RESOURCES, +}; + +#define CDP_NUM_TYPES (CDP_DATA + 1) + +/* + * Event IDs, the values match those used to program IA32_QM_EVTSEL before + * reading IA32_QM_CTR on RDT systems. + */ +enum resctrl_event_id { + QOS_L3_OCCUP_EVENT_ID = 0x01, + QOS_L3_MBM_TOTAL_EVENT_ID = 0x02, + QOS_L3_MBM_LOCAL_EVENT_ID = 0x03, +}; + /** * struct resctrl_staged_config - parsed configuration to be applied * @new_ctrl: new ctrl value to be loaded diff --git a/include/linux/resctrl_types.h b/include/linux/resctrl_types.h deleted file mode 100644 index 4788bd95dac6..000000000000 --- a/include/linux/resctrl_types.h +++ /dev/null @@ -1,68 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2024 Arm Ltd. - * Based on arch/x86/kernel/cpu/resctrl/internal.h - */ - -#ifndef __LINUX_RESCTRL_TYPES_H -#define __LINUX_RESCTRL_TYPES_H - -/* Reads to Local DRAM Memory */ -#define READS_TO_LOCAL_MEM BIT(0) - -/* Reads to Remote DRAM Memory */ -#define READS_TO_REMOTE_MEM BIT(1) - -/* Non-Temporal Writes to Local Memory */ -#define NON_TEMP_WRITE_TO_LOCAL_MEM BIT(2) - -/* Non-Temporal Writes to Remote Memory */ -#define NON_TEMP_WRITE_TO_REMOTE_MEM BIT(3) - -/* Reads to Local Memory the system identifies as "Slow Memory" */ -#define READS_TO_LOCAL_S_MEM BIT(4) - -/* Reads to Remote Memory the system identifies as "Slow Memory" */ -#define READS_TO_REMOTE_S_MEM BIT(5) - -/* Dirty Victims to All Types of Memory */ -#define DIRTY_VICTIMS_TO_ALL_MEM BIT(6) - -/* Max event bits supported */ -#define MAX_EVT_CONFIG_BITS GENMASK(6, 0) - -/** - * enum resctrl_conf_type - The type of configuration. - * @CDP_NONE: No prioritisation, both code and data are controlled or monitored. - * @CDP_CODE: Configuration applies to instruction fetches. - * @CDP_DATA: Configuration applies to reads and writes. - */ -enum resctrl_conf_type { - CDP_NONE, - CDP_CODE, - CDP_DATA, -}; - -enum resctrl_res_level { - RDT_RESOURCE_L3, - RDT_RESOURCE_L2, - RDT_RESOURCE_MBA, - RDT_RESOURCE_SMBA, - - /* Must be the last */ - RDT_NUM_RESOURCES, -}; - -#define CDP_NUM_TYPES (CDP_DATA + 1) - -/* - * Event IDs, the values match those used to program IA32_QM_EVTSEL before - * reading IA32_QM_CTR on RDT systems. - */ -enum resctrl_event_id { - QOS_L3_OCCUP_EVENT_ID = 0x01, - QOS_L3_MBM_TOTAL_EVENT_ID = 0x02, - QOS_L3_MBM_LOCAL_EVENT_ID = 0x03, -}; - -#endif /* __LINUX_RESCTRL_TYPES_H */ -- Gitee From ad375c744e062fd4540b7ceb577ef960f605ba89 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:52:17 -0800 Subject: [PATCH 151/158] anolis: Revert "x86/resctrl: Wrap resctrl_arch_find_domain() around rdt_find_domain()" ANBZ: #31045 This reverts commit 3e28a3f9a8bb8afe523319c14b1828d7373a677f. Signed-off-by: Jason Zeng --- arch/x86/kernel/cpu/resctrl/core.c | 9 ++------- arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 2 +- arch/x86/kernel/cpu/resctrl/internal.h | 2 ++ include/linux/resctrl.h | 2 -- 4 files changed, 5 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index e5143f8af919..fd7305ae2121 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -405,8 +405,8 @@ void rdt_ctrl_update(void *arg) * caller, return the first domain whose id is bigger than the input id. * The domain list is sorted by id in ascending order. */ -static struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id, - struct list_head **pos) +struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id, + struct list_head **pos) { struct rdt_domain *d; struct list_head *l; @@ -430,11 +430,6 @@ static struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id, return NULL; } -struct rdt_domain *resctrl_arch_find_domain(struct rdt_resource *r, int id) -{ - return rdt_find_domain(r, id, NULL); -} - static void setup_default_ctrlval(struct rdt_resource *r, u32 *dc) { struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c index ad9e5516e607..077b4cbaa074 100644 --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c @@ -623,7 +623,7 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) evtid = md.u.evtid; r = resctrl_arch_get_resource(resid); - d = resctrl_arch_find_domain(r, domid); + d = rdt_find_domain(r, domid, NULL); if (IS_ERR_OR_NULL(d)) { ret = -ENOENT; goto out; diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 32ade929ea1b..7c073298aabf 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -533,6 +533,8 @@ void rdtgroup_kn_unlock(struct kernfs_node *kn); int rdtgroup_kn_mode_restrict(struct rdtgroup *r, const char *name); int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name, umode_t mask); +struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id, + struct list_head **pos); ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off); int rdtgroup_schemata_show(struct kernfs_open_file *of, diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index c5fcbb524136..f6a4b75f8122 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -233,8 +233,6 @@ void resctrl_arch_sync_cpu_defaults(void *info); /* The number of closid supported by this resource regardless of CDP */ u32 resctrl_arch_get_num_closid(struct rdt_resource *r); - -struct rdt_domain *resctrl_arch_find_domain(struct rdt_resource *r, int id); int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid); /* -- Gitee From 5193d8ae762dca0e90466de1c50de05b450b1665 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:52:17 -0800 Subject: [PATCH 152/158] anolis: Revert "x86/resctrl: Export resctrl fs's init function" ANBZ: #31045 This reverts commit 150ff37f24ddca2d34dff83890f7c432600fdeef. Signed-off-by: Jason Zeng --- arch/x86/kernel/cpu/resctrl/core.c | 12 ++++++------ arch/x86/kernel/cpu/resctrl/internal.h | 3 +++ arch/x86/kernel/cpu/resctrl/rdtgroup.c | 8 ++++---- include/linux/resctrl.h | 3 --- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index fd7305ae2121..ad5ab8cfa455 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -942,7 +942,7 @@ void resctrl_cpu_detect(struct cpuinfo_x86 *c) } } -static int __init resctrl_arch_late_init(void) +static int __init resctrl_late_init(void) { struct rdt_resource *r; int state, ret; @@ -967,7 +967,7 @@ static int __init resctrl_arch_late_init(void) if (state < 0) return state; - ret = resctrl_init(); + ret = rdtgroup_init(); if (ret) { cpuhp_remove_state(state); return ret; @@ -983,18 +983,18 @@ static int __init resctrl_arch_late_init(void) return 0; } -late_initcall(resctrl_arch_late_init); +late_initcall(resctrl_late_init); -static void __exit resctrl_arch_exit(void) +static void __exit resctrl_exit(void) { struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; cpuhp_remove_state(rdt_online); - resctrl_exit(); + rdtgroup_exit(); if (r->mon_capable) rdt_put_mon_l3_config(); } -__exitcall(resctrl_arch_exit); +__exitcall(resctrl_exit); diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 7c073298aabf..9048bd32e86f 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -300,6 +300,9 @@ extern struct list_head rdt_all_groups; extern int max_name_width, max_data_width; +int __init rdtgroup_init(void); +void __exit rdtgroup_exit(void); + /** * struct rftype - describe each file in the resctrl file system * @name: File name diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 1a49c9918f8d..18f097fce51e 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -4116,14 +4116,14 @@ void resctrl_offline_cpu(unsigned int cpu) } /* - * resctrl_init - resctrl filesystem initialization + * rdtgroup_init - rdtgroup initialization * * Setup resctrl file system including set up root, create mount point, - * register resctrl filesystem, and initialize files under root directory. + * register rdtgroup filesystem, and initialize files under root directory. * * Return: 0 on success or -errno */ -int __init resctrl_init(void) +int __init rdtgroup_init(void) { int ret = 0; @@ -4171,7 +4171,7 @@ int __init resctrl_init(void) return ret; } -void __exit resctrl_exit(void) +void __exit rdtgroup_exit(void) { debugfs_remove_recursive(debugfs_resctrl); unregister_filesystem(&rdt_fs_type); diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index f6a4b75f8122..2b79e4159507 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -325,7 +325,4 @@ void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_domain *d); extern unsigned int resctrl_rmid_realloc_threshold; extern unsigned int resctrl_rmid_realloc_limit; -int __init resctrl_init(void); -void __exit resctrl_exit(void); - #endif /* _RESCTRL_H */ -- Gitee From af4238d98bd473e135f62f97fb65cd04efaed78e Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:52:18 -0800 Subject: [PATCH 153/158] anolis: Revert "x86/resctrl: Remove rdtgroup from update_cpu_closid_rmid()" ANBZ: #31045 This reverts commit 9cdf4c8262b44a75da0e99fd94aa90b07cb1af17. Signed-off-by: Jason Zeng --- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 19 ++++--------------- include/linux/resctrl.h | 11 ----------- 2 files changed, 4 insertions(+), 26 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 18f097fce51e..5d2c1ce5b6b1 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -341,13 +341,13 @@ static int rdtgroup_cpus_show(struct kernfs_open_file *of, * from update_closid_rmid() is protected against __switch_to() because * preemption is disabled. */ -void resctrl_arch_sync_cpu_defaults(void *info) +static void update_cpu_closid_rmid(void *info) { - struct resctrl_cpu_sync *r = info; + struct rdtgroup *r = info; if (r) { this_cpu_write(pqr_state.default_closid, r->closid); - this_cpu_write(pqr_state.default_rmid, r->rmid); + this_cpu_write(pqr_state.default_rmid, r->mon.rmid); } /* @@ -362,22 +362,11 @@ void resctrl_arch_sync_cpu_defaults(void *info) * Update the PGR_ASSOC MSR on all cpus in @cpu_mask, * * Per task closids/rmids must have been set up before calling this function. - * @r may be NULL. */ static void update_closid_rmid(const struct cpumask *cpu_mask, struct rdtgroup *r) { - struct resctrl_cpu_sync defaults; - struct resctrl_cpu_sync *defaults_p = NULL; - - if (r) { - defaults.closid = r->closid; - defaults.rmid = r->mon.rmid; - defaults_p = &defaults; - } - - on_each_cpu_mask(cpu_mask, resctrl_arch_sync_cpu_defaults, defaults_p, - 1); + on_each_cpu_mask(cpu_mask, update_cpu_closid_rmid, r, 1); } static int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask, diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 2b79e4159507..6e87bc95f5ea 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -220,17 +220,6 @@ struct resctrl_schema { u32 num_closid; }; -struct resctrl_cpu_sync { - u32 closid; - u32 rmid; -}; - -/* - * Update and re-load this CPUs defaults. Called via IPI, takes a pointer to - * struct resctrl_cpu_sync, or NULL. - */ -void resctrl_arch_sync_cpu_defaults(void *info); - /* The number of closid supported by this resource regardless of CDP */ u32 resctrl_arch_get_num_closid(struct rdt_resource *r); int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid); -- Gitee From 6292a151ee48c7591863478e9571cb566336e328 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:52:18 -0800 Subject: [PATCH 154/158] anolis: Revert "x86/resctrl: Add helper for setting CPU default properties" ANBZ: #31045 This reverts commit e1345d13f3428c782b44589fff7341233305494c. Signed-off-by: Jason Zeng --- arch/x86/include/asm/resctrl.h | 14 +++----------- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 15 +++++---------- 2 files changed, 8 insertions(+), 21 deletions(-) diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h index f61382258743..12dbd2588ca7 100644 --- a/arch/x86/include/asm/resctrl.h +++ b/arch/x86/include/asm/resctrl.h @@ -4,9 +4,8 @@ #ifdef CONFIG_X86_CPU_RESCTRL -#include -#include #include +#include /* * This value can never be a valid CLOSID, and is used when mapping a @@ -97,8 +96,8 @@ static inline void resctrl_arch_disable_mon(void) static inline void __resctrl_sched_in(struct task_struct *tsk) { struct resctrl_pqr_state *state = this_cpu_ptr(&pqr_state); - u32 closid = READ_ONCE(state->default_closid); - u32 rmid = READ_ONCE(state->default_rmid); + u32 closid = state->default_closid; + u32 rmid = state->default_rmid; u32 tmp; /* @@ -133,13 +132,6 @@ static inline unsigned int resctrl_arch_round_mon_val(unsigned int val) return val * scale; } -static inline void resctrl_arch_set_cpu_default_closid_rmid(int cpu, u32 closid, - u32 rmid) -{ - WRITE_ONCE(per_cpu(pqr_state.default_closid, cpu), closid); - WRITE_ONCE(per_cpu(pqr_state.default_rmid, cpu), rmid); -} - static inline void resctrl_arch_set_closid_rmid(struct task_struct *tsk, u32 closid, u32 rmid) { diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 5d2c1ce5b6b1..45372b6a6215 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -3623,18 +3623,14 @@ static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name, static int rdtgroup_rmdir_mon(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask) { struct rdtgroup *prdtgrp = rdtgrp->mon.parent; - u32 closid, rmid; int cpu; /* Give any tasks back to the parent group */ rdt_move_group_tasks(rdtgrp, prdtgrp, tmpmask); /* Update per cpu rmid of the moved CPUs first */ - closid = rdtgrp->closid; - rmid = prdtgrp->mon.rmid; for_each_cpu(cpu, &rdtgrp->cpu_mask) - resctrl_arch_set_cpu_default_closid_rmid(cpu, closid, rmid); - + per_cpu(pqr_state.default_rmid, cpu) = prdtgrp->mon.rmid; /* * Update the MSR on moved CPUs and CPUs which have moved * task running on them. @@ -3667,7 +3663,6 @@ static int rdtgroup_ctrl_remove(struct rdtgroup *rdtgrp) static int rdtgroup_rmdir_ctrl(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask) { - u32 closid, rmid; int cpu; /* Give any tasks back to the default group */ @@ -3678,10 +3673,10 @@ static int rdtgroup_rmdir_ctrl(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask) &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask); /* Update per cpu closid and rmid of the moved CPUs first */ - closid = rdtgroup_default.closid; - rmid = rdtgroup_default.mon.rmid; - for_each_cpu(cpu, &rdtgrp->cpu_mask) - resctrl_arch_set_cpu_default_closid_rmid(cpu, closid, rmid); + for_each_cpu(cpu, &rdtgrp->cpu_mask) { + per_cpu(pqr_state.default_closid, cpu) = rdtgroup_default.closid; + per_cpu(pqr_state.default_rmid, cpu) = rdtgroup_default.mon.rmid; + } /* * Update the MSR on moved CPUs and CPUs which have moved -- Gitee From 8d9db1bf6f34ca0df230468c44c9bdb144ce827f Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:52:18 -0800 Subject: [PATCH 155/158] anolis: Revert "x86/resctrl: Move ctrlval string parsing policy away from the arch code" ANBZ: #31045 This reverts commit 77777ccf69365818c8dd201a5ec867e87c5cdad8. Signed-off-by: Jason Zeng --- arch/x86/kernel/cpu/resctrl/core.c | 4 ++++ arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 28 ++++------------------- arch/x86/kernel/cpu/resctrl/internal.h | 10 ++++++++ include/linux/resctrl.h | 7 ++++++ 4 files changed, 26 insertions(+), 23 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index ad5ab8cfa455..0b38d92d1249 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -75,6 +75,7 @@ struct rdt_hw_resource rdt_resources_all[] = { .name = "L3", .cache_level = 3, .domains = domain_init(RDT_RESOURCE_L3), + .parse_ctrlval = parse_cbm, .format_str = "%d=%0*x", .fflags = RFTYPE_RES_CACHE, }, @@ -88,6 +89,7 @@ struct rdt_hw_resource rdt_resources_all[] = { .name = "L2", .cache_level = 2, .domains = domain_init(RDT_RESOURCE_L2), + .parse_ctrlval = parse_cbm, .format_str = "%d=%0*x", .fflags = RFTYPE_RES_CACHE, }, @@ -101,6 +103,7 @@ struct rdt_hw_resource rdt_resources_all[] = { .name = "MB", .cache_level = 3, .domains = domain_init(RDT_RESOURCE_MBA), + .parse_ctrlval = parse_bw, .format_str = "%d=%*u", .fflags = RFTYPE_RES_MB, }, @@ -112,6 +115,7 @@ struct rdt_hw_resource rdt_resources_all[] = { .name = "SMBA", .cache_level = 3, .domains = domain_init(RDT_RESOURCE_SMBA), + .parse_ctrlval = parse_bw, .format_str = "%d=%*u", .fflags = RFTYPE_RES_MB, }, diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c index 077b4cbaa074..e2634a3263a6 100644 --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c @@ -23,15 +23,6 @@ #include "internal.h" -struct rdt_parse_data { - struct rdtgroup *rdtgrp; - char *buf; -}; - -typedef int (ctrlval_parser_t)(struct rdt_parse_data *data, - struct resctrl_schema *s, - struct rdt_domain *d); - /* * Check whether MBA bandwidth percentage value is correct. The value is * checked against the minimum and max bandwidth values specified by the @@ -73,8 +64,8 @@ static bool bw_validate(char *buf, u32 *data, struct rdt_resource *r) return true; } -static int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s, - struct rdt_domain *d) +int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s, + struct rdt_domain *d) { struct resctrl_staged_config *cfg; u32 closid = data->rdtgrp->closid; @@ -152,8 +143,8 @@ static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r) * Read one cache bit mask (hex). Check that it is valid for the current * resource type. */ -static int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s, - struct rdt_domain *d) +int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s, + struct rdt_domain *d) { struct rdtgroup *rdtgrp = data->rdtgrp; struct resctrl_staged_config *cfg; @@ -209,14 +200,6 @@ static int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s, return 0; } -static ctrlval_parser_t *get_parser(struct rdt_resource *res) -{ - if (res->fflags & RFTYPE_RES_CACHE) - return &parse_cbm; - else - return &parse_bw; -} - /* * For each domain in this resource we expect to find a series of: * id=mask @@ -226,7 +209,6 @@ static ctrlval_parser_t *get_parser(struct rdt_resource *res) static int parse_line(char *line, struct resctrl_schema *s, struct rdtgroup *rdtgrp) { - ctrlval_parser_t *parse_ctrlval = get_parser(s->res); enum resctrl_conf_type t = s->conf_type; struct resctrl_staged_config *cfg; struct rdt_resource *r = s->res; @@ -258,7 +240,7 @@ static int parse_line(char *line, struct resctrl_schema *s, if (d->id == dom_id) { data.buf = dom; data.rdtgrp = rdtgrp; - if (parse_ctrlval(&data, s, d)) + if (r->parse_ctrlval(&data, s, d)) return -EINVAL; if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { cfg = &d->staged_config[t]; diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 9048bd32e86f..65990def6c79 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -413,6 +413,11 @@ static inline bool is_mbm_event(int e) e <= QOS_L3_MBM_LOCAL_EVENT_ID); } +struct rdt_parse_data { + struct rdtgroup *rdtgrp; + char *buf; +}; + /** * struct rdt_hw_resource - arch private attributes of a resctrl resource * @r_resctrl: Attributes of the resource used directly by resctrl. @@ -450,6 +455,11 @@ static inline struct rdt_hw_resource *resctrl_to_arch_res(struct rdt_resource *r return container_of(r, struct rdt_hw_resource, r_resctrl); } +int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s, + struct rdt_domain *d); +int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s, + struct rdt_domain *d); + extern struct mutex rdtgroup_mutex; extern struct rdt_hw_resource rdt_resources_all[]; diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 6e87bc95f5ea..168cc9510069 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -157,6 +157,9 @@ struct resctrl_membw { u32 *mb_map; }; +struct rdt_parse_data; +struct resctrl_schema; + /** * struct rdt_resource - attributes of a resctrl resource * @rid: The index of the resource @@ -171,6 +174,7 @@ struct resctrl_membw { * @data_width: Character width of data when displaying * @default_ctrl: Specifies default cache cbm or memory B/W percent. * @format_str: Per resource format string to show domain value + * @parse_ctrlval: Per resource function pointer to parse control values * @evt_list: List of monitoring events * @fflags: flags to choose base and info files * @cdp_capable: Is the CDP feature available on this resource @@ -188,6 +192,9 @@ struct rdt_resource { int data_width; u32 default_ctrl; const char *format_str; + int (*parse_ctrlval)(struct rdt_parse_data *data, + struct resctrl_schema *s, + struct rdt_domain *d); struct list_head evt_list; unsigned long fflags; bool cdp_capable; -- Gitee From d6a0667f6a5813c483ac4e6a239b5f5fded20f0c Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:52:18 -0800 Subject: [PATCH 156/158] anolis: Revert "x86/resctrl: Add a helper to avoid reaching into the arch code resource list" ANBZ: #31045 This reverts commit 7fe72eac6e5f65dd5824baf5a7e7109622c2dc8f. Signed-off-by: Jason Zeng --- arch/x86/kernel/cpu/resctrl/core.c | 10 +--------- arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 2 +- arch/x86/kernel/cpu/resctrl/internal.h | 10 ++++++++++ arch/x86/kernel/cpu/resctrl/monitor.c | 8 ++++---- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 15 ++++++++------- include/linux/resctrl.h | 17 ----------------- 6 files changed, 24 insertions(+), 38 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 0b38d92d1249..cab67782f19d 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -122,14 +122,6 @@ struct rdt_hw_resource rdt_resources_all[] = { }, }; -struct rdt_resource *resctrl_arch_get_resource(enum resctrl_res_level l) -{ - if (l >= RDT_NUM_RESOURCES) - return NULL; - - return &rdt_resources_all[l].r_resctrl; -} - /* * cache_alloc_hsw_probe() - Have to probe for Intel haswell server CPUs * as they do not have CPUID enumeration support for Cache allocation. @@ -177,7 +169,7 @@ static inline void cache_alloc_hsw_probe(void) bool is_mba_sc(struct rdt_resource *r) { if (!r) - r = resctrl_arch_get_resource(RDT_RESOURCE_MBA); + return rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl.membw.mba_sc; /* * The software controller support is only applicable to MBA resource. diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c index e2634a3263a6..8bd717222f0c 100644 --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c @@ -604,7 +604,7 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) domid = md.u.domid; evtid = md.u.evtid; - r = resctrl_arch_get_resource(resid); + r = &rdt_resources_all[resid].r_resctrl; d = rdt_find_domain(r, domid, NULL); if (IS_ERR_OR_NULL(d)) { ret = -ENOENT; diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 65990def6c79..c99f26ebe7a6 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -466,6 +466,16 @@ extern struct rdt_hw_resource rdt_resources_all[]; extern struct rdtgroup rdtgroup_default; extern struct dentry *debugfs_resctrl; +enum resctrl_res_level { + RDT_RESOURCE_L3, + RDT_RESOURCE_L2, + RDT_RESOURCE_MBA, + RDT_RESOURCE_SMBA, + + /* Must be the last */ + RDT_NUM_RESOURCES, +}; + static inline struct rdt_resource *resctrl_inc(struct rdt_resource *res) { struct rdt_hw_resource *hw_res = resctrl_to_arch_res(res); diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 06565153ceb2..c34a35ec0f03 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -321,7 +321,7 @@ static void limbo_release_entry(struct rmid_entry *entry) */ void __check_limbo(struct rdt_domain *d, bool force_free) { - struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); + struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; u32 idx_limit = resctrl_arch_system_num_rmid_idx(); struct rmid_entry *entry; u32 idx, cur_idx = 1; @@ -467,7 +467,7 @@ int alloc_rmid(u32 closid) static void add_rmid_to_limbo(struct rmid_entry *entry) { - struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); + struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; struct rdt_domain *d; u32 idx; @@ -669,7 +669,7 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm) if (!is_mbm_local_enabled()) return; - r_mba = resctrl_arch_get_resource(RDT_RESOURCE_MBA); + r_mba = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl; closid = rgrp->closid; rmid = rgrp->mon.rmid; @@ -839,7 +839,7 @@ void mbm_handle_overflow(struct work_struct *work) if (!resctrl_mounted || !resctrl_arch_mon_capable()) goto out_unlock; - r = resctrl_arch_get_resource(RDT_RESOURCE_L3); + r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; d = container_of(work, struct rdt_domain, mbm_over.work); list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) { diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 45372b6a6215..1767c1affa60 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -2253,7 +2253,7 @@ static void l2_qos_cfg_update(void *arg) static inline bool is_mba_linear(void) { - return resctrl_arch_get_resource(RDT_RESOURCE_MBA)->membw.delay_linear; + return rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl.membw.delay_linear; } static int set_cache_qos_cfg(int level, bool enable) @@ -2341,7 +2341,7 @@ static void mba_sc_domain_destroy(struct rdt_resource *r, */ static bool supports_mba_mbps(void) { - struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_MBA); + struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl; return (is_mbm_local_enabled() && r->alloc_capable && is_mba_linear()); @@ -2353,7 +2353,7 @@ static bool supports_mba_mbps(void) */ static int set_mba_sc(bool mba_sc) { - struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_MBA); + struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl; u32 num_closid = resctrl_arch_get_num_closid(r); struct rdt_domain *d; int i; @@ -2625,10 +2625,10 @@ static void schemata_list_destroy(void) static int rdt_get_tree(struct fs_context *fc) { - struct rdt_resource *l3 = resctrl_arch_get_resource(RDT_RESOURCE_L3); struct rdt_fs_context *ctx = rdt_fc2context(fc); unsigned long flags = RFTYPE_CTRL_BASE; struct rdt_domain *dom; + struct rdt_resource *r; int ret; cpus_read_lock(); @@ -2701,7 +2701,8 @@ static int rdt_get_tree(struct fs_context *fc) resctrl_mounted = true; if (is_mbm_enabled()) { - list_for_each_entry(dom, &l3->domains, list) + r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; + list_for_each_entry(dom, &r->domains, list) mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL, RESCTRL_PICK_ANY_CPU); } @@ -3877,7 +3878,7 @@ static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf) if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2)) seq_puts(seq, ",cdpl2"); - if (is_mba_sc(resctrl_arch_get_resource(RDT_RESOURCE_MBA))) + if (is_mba_sc(&rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl)) seq_puts(seq, ",mba_MBps"); if (resctrl_debug) @@ -4067,7 +4068,7 @@ static void clear_childcpus(struct rdtgroup *r, unsigned int cpu) void resctrl_offline_cpu(unsigned int cpu) { - struct rdt_resource *l3 = resctrl_arch_get_resource(RDT_RESOURCE_L3); + struct rdt_resource *l3 = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; struct rdtgroup *rdtgrp; struct rdt_domain *d; diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 168cc9510069..a365f67131ec 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -36,16 +36,6 @@ enum resctrl_conf_type { CDP_DATA, }; -enum resctrl_res_level { - RDT_RESOURCE_L3, - RDT_RESOURCE_L2, - RDT_RESOURCE_MBA, - RDT_RESOURCE_SMBA, - - /* Must be the last */ - RDT_NUM_RESOURCES, -}; - #define CDP_NUM_TYPES (CDP_DATA + 1) /* @@ -200,13 +190,6 @@ struct rdt_resource { bool cdp_capable; }; -/* - * Get the resource that exists at this level. If the level is not supported - * a dummy/not-capable resource can be returned. Levels >= RDT_NUM_RESOURCES - * will return NULL. - */ -struct rdt_resource *resctrl_arch_get_resource(enum resctrl_res_level l); - /** * struct resctrl_schema - configuration abilities of a resource presented to * user-space -- Gitee From 14dc31e67957674074d04eef26fdb56a58dc7c71 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:52:18 -0800 Subject: [PATCH 157/158] anolis: Revert "x86/resctrl: Fix allocation of cleanest CLOSID on platforms with no monitors" ANBZ: #31045 This reverts commit 2e35947feee6230863211caf0e366798b9fa7e6a. Signed-off-by: Jason Zeng --- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 1767c1affa60..011e17efb1a6 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -149,8 +149,7 @@ static int closid_alloc(void) lockdep_assert_held(&rdtgroup_mutex); - if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID) && - is_llc_occupancy_enabled()) { + if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) { cleanest_closid = resctrl_find_cleanest_closid(); if (cleanest_closid < 0) return cleanest_closid; -- Gitee From eb9b03b144f933d91780b91f2398b2166d5c0ac4 Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Sun, 25 Jan 2026 23:55:27 -0800 Subject: [PATCH 158/158] anolis: Revert "anolis: x86/resctrl: Add Hygon QoS support" ANBZ: #31045 This reverts commit b97a96c905dccb5bdf3e050a182411f9153a8b1b. Signed-off-by: Jason Zeng --- arch/x86/kernel/cpu/hygon.c | 2 -- arch/x86/kernel/cpu/resctrl/core.c | 10 +++------- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c index 6c5a17d7a4a3..1491a111e619 100644 --- a/arch/x86/kernel/cpu/hygon.c +++ b/arch/x86/kernel/cpu/hygon.c @@ -18,7 +18,6 @@ #include #include #include -#include #include "cpu.h" @@ -121,7 +120,6 @@ static void bsp_init_hygon(struct cpuinfo_x86 *c) x86_amd_ls_cfg_ssbd_mask = 1ULL << 10; } } - resctrl_cpu_detect(c); } static void init_hygon_cap(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index cab67782f19d..4edc2b9d53c4 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -747,8 +747,7 @@ static __init bool get_mem_config(void) if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) return __get_mem_config_intel(&hw_res->r_resctrl); - else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || - boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) + else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) return __rdt_get_mem_config_amd(&hw_res->r_resctrl); return false; @@ -899,8 +898,7 @@ static __init void rdt_init_res_defs(void) { if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) rdt_init_res_defs_intel(); - else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || - boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) + else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) rdt_init_res_defs_amd(); } @@ -931,9 +929,7 @@ void resctrl_cpu_detect(struct cpuinfo_x86 *c) c->x86_cache_occ_scale = ebx; c->x86_cache_mbm_width_offset = eax & 0xff; - if ((c->x86_vendor == X86_VENDOR_AMD || - c->x86_vendor == X86_VENDOR_HYGON) && - !c->x86_cache_mbm_width_offset) + if (c->x86_vendor == X86_VENDOR_AMD && !c->x86_cache_mbm_width_offset) c->x86_cache_mbm_width_offset = MBM_CNTR_WIDTH_OFFSET_AMD; } } -- Gitee