diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index e0193a18f4fad70bd61b187cf1f9c5434b8a4494..99d2596e0174d13ea9ac5f64ad3fbacccec87391 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -5538,7 +5538,7 @@
 	rdt=		[HW,X86,RDT]
 			Turn on/off individual RDT features. List is:
 			cmt, mbmtotal, mbmlocal, l3cat, l3cdp, l2cat, l2cdp,
-			mba, smba, bmec, abmc, sdciae.
+			mba, smba, bmec.
 			E.g. to turn on cmt and turn off mba use:
 				rdt=cmt,!mba
 
diff --git a/Documentation/arch/arm64/cpu-feature-registers.rst b/Documentation/arch/arm64/cpu-feature-registers.rst
index 253e9743de2f96de515ee844f7b3e3a671a368b1..44f9bd78539d3603bc9a31b19c90cc6f7c96c14c 100644
--- a/Documentation/arch/arm64/cpu-feature-registers.rst
+++ b/Documentation/arch/arm64/cpu-feature-registers.rst
@@ -152,8 +152,6 @@ infrastructure:
      +------------------------------+---------+---------+
      | DIT                          | [51-48] |    y    |
      +------------------------------+---------+---------+
-     | MPAM                         | [43-40] |    n    |
-     +------------------------------+---------+---------+
      | SVE                          | [35-32] |    y    |
      +------------------------------+---------+---------+
      | GIC                          | [27-24] |    n    |
diff --git a/Documentation/arch/x86/resctrl.rst b/Documentation/arch/x86/resctrl.rst
index 86adf6840bcd80e74ebb41d8cb712fc09208bc1a..a6279df64a9db8aa69dd08d8643e9cc9b7c42da7 100644
--- a/Documentation/arch/x86/resctrl.rst
+++ b/Documentation/arch/x86/resctrl.rst
@@ -17,18 +17,16 @@ AMD refers to this feature as AMD Platform Quality of Service(AMD QoS).
 This feature is enabled by the CONFIG_X86_CPU_RESCTRL and the x86 /proc/cpuinfo
 flag bits:
 
-=============================================================== ================================
-RDT (Resource Director Technology) Allocation			"rdt_a"
-CAT (Cache Allocation Technology)				"cat_l3", "cat_l2"
-CDP (Code and Data Prioritization)				"cdp_l3", "cdp_l2"
-CQM (Cache QoS Monitoring)					"cqm_llc", "cqm_occup_llc"
-MBM (Memory Bandwidth Monitoring)				"cqm_mbm_total", "cqm_mbm_local"
-MBA (Memory Bandwidth Allocation)				"mba"
-SMBA (Slow Memory Bandwidth Allocation)				""
-BMEC (Bandwidth Monitoring Event Configuration)			""
-ABMC (Assignable Bandwidth Monitoring Counters)			""
-SDCIAE (Smart Data Cache Injection Allocation Enforcement)	""
-=============================================================== ================================
+===============================================	================================
+RDT (Resource Director Technology) Allocation	"rdt_a"
+CAT (Cache Allocation Technology)		"cat_l3", "cat_l2"
+CDP (Code and Data Prioritization)		"cdp_l3", "cdp_l2"
+CQM (Cache QoS Monitoring)			"cqm_llc", "cqm_occup_llc"
+MBM (Memory Bandwidth Monitoring)		"cqm_mbm_total", "cqm_mbm_local"
+MBA (Memory Bandwidth Allocation)		"mba"
+SMBA (Slow Memory Bandwidth Allocation)         ""
+BMEC (Bandwidth Monitoring Event Configuration) ""
+===============================================	================================
 
 Historically, new features were made visible by default in /proc/cpuinfo. This
 resulted in the feature flags becoming hard to parse by humans. Adding a new
@@ -73,11 +71,6 @@ The 'info' directory contains information about the enabled
 resources. Each resource has its own subdirectory. The subdirectory
 names reflect the resource names.
 
-Most of the files in the resource's subdirectory are read-only, and
-describe properties of the resource. Resources that support global
-configuration options also include writable files that can be used
-to modify those settings.
-
 Each subdirectory contains the following files with respect to
 allocation:
 
@@ -142,77 +135,6 @@ related to allocation:
 			"1":
 			      Non-contiguous 1s value in CBM is supported.
 
-"io_alloc":
-		"io_alloc" enables system software to configure the portion of
-		the cache allocated for I/O traffic. File may only exist if the
-		system supports this feature on some of its cache resources.
-
-			"disabled":
-			      Resource supports "io_alloc" but the feature is disabled.
-			      Portions of cache used for allocation of I/O traffic cannot
-			      be configured.
-			"enabled":
-			      Portions of cache used for allocation of I/O traffic
-			      can be configured using "io_alloc_cbm".
-			"not supported":
-			      Support not available for this resource.
-
-		The feature can be modified by writing to the interface, for example:
-
-		To enable::
-
-			# echo 1 > /sys/fs/resctrl/info/L3/io_alloc
-
-		To disable::
-
-			# echo 0 > /sys/fs/resctrl/info/L3/io_alloc
-
-		The underlying implementation may reduce resources available to
-		general (CPU) cache allocation. See architecture specific notes
-		below. Depending on usage requirements the feature can be enabled
-		or disabled.
-
-		On AMD systems, io_alloc feature is supported by the L3 Smart
-		Data Cache Injection Allocation Enforcement (SDCIAE). The CLOSID for
-		io_alloc is the highest CLOSID supported by the resource. When
-		io_alloc is enabled, the highest CLOSID is dedicated to io_alloc and
-		no longer available for general (CPU) cache allocation. When CDP is
-		enabled, io_alloc routes I/O traffic using the highest CLOSID allocated
-		for the instruction cache (CDP_CODE), making this CLOSID no longer
-		available for general (CPU) cache allocation for both the CDP_CODE
-		and CDP_DATA resources.
-
-"io_alloc_cbm":
-		Capacity bitmasks that describe the portions of cache instances to
-		which I/O traffic from supported I/O devices are routed when "io_alloc"
-		is enabled.
-
-		CBMs are displayed in the following format:
-
-			<cache_id0>=<cbm>;<cache_id1>=<cbm>;...
-
-		Example::
-
-			# cat /sys/fs/resctrl/info/L3/io_alloc_cbm
-			0=ffff;1=ffff
-
-		CBMs can be configured by writing to the interface.
-
-		Example::
-
-			# echo 1=ff > /sys/fs/resctrl/info/L3/io_alloc_cbm
-			# cat /sys/fs/resctrl/info/L3/io_alloc_cbm
-			0=ffff;1=00ff
-
-			# echo "0=ff;1=f" > /sys/fs/resctrl/info/L3/io_alloc_cbm
-			# cat /sys/fs/resctrl/info/L3/io_alloc_cbm
-			0=00ff;1=000f
-
-		When CDP is enabled "io_alloc_cbm" associated with the CDP_DATA and CDP_CODE
-		resources may reflect the same values. For example, values read from and
-		written to /sys/fs/resctrl/info/L3DATA/io_alloc_cbm may be reflected by
-		/sys/fs/resctrl/info/L3CODE/io_alloc_cbm and vice versa.
-
 Memory bandwidth(MB) subdirectory contains the following files
 with respect to allocation:
 
@@ -334,205 +256,6 @@ with the following files:
 	    # cat /sys/fs/resctrl/info/L3_MON/mbm_local_bytes_config
 	    0=0x30;1=0x30;3=0x15;4=0x15
 
-"mbm_mode":
-	Reports the list of assignable monitoring features supported. The
-	enclosed brackets indicate which feature is enabled.
-	::
-
-	  cat /sys/fs/resctrl/info/L3_MON/mbm_mode
-	  [mbm_cntr_assign]
-	  legacy
-
-	"mbm_cntr_assign":
-		AMD's ABMC feature is one of the mbm_cntr_assign mode supported.
-		The bandwidth monitoring feature on AMD system only guarantees
-		that RMIDs currently assigned to a processor will be tracked by
-		hardware. The counters of any other RMIDs which are no longer
-		being tracked will be reset to zero. The MBM event counters
-		return "Unavailable" for the RMIDs that are not tracked by
-		hardware. So, there can be only limited number of groups that can
-		give guaranteed monitoring numbers. With ever changing configurations
-		there is no way to definitely know which of these groups are being
-		tracked for certain point of time. Users do not have the option to
-		monitor a group or set of groups for certain period of time without
-		worrying about RMID being reset in between.
-
-		The ABMC feature provides an option to the user to assign a hardware
-		counter to an RMID and monitor the bandwidth as long as it is assigned.
-		The assigned RMID will be tracked by the hardware until the user
-		unassigns it manually. There is no need to worry about counters being
-		reset during this period.
-
-	"Legacy":
-		Legacy mode works without the assignment option. The monitoring works
-		as long as there are enough RMID counters available to support number
-		of monitoring groups.
-
-	* To enable ABMC feature:
-	  ::
-
-	    # echo  "mbm_cntr_assign" > /sys/fs/resctrl/info/L3_MON/mbm_mode
-
-	* To enable the legacy monitoring feature:
-	  ::
-
-	    # echo  "legacy" > /sys/fs/resctrl/info/L3_MON/mbm_mode
-
-	The MBM event counters will reset when mbm_mode is changed. Moving to
-	mbm_cntr_assign will require users to assign the counters to the events to
-	read the events. Otherwise, the MBM event counters will return "Unassigned"
-	when read.
-
-"num_mbm_cntrs":
-	The number of monitoring counters available for assignment.
-
-	Resctrl subsystem provides the interface to count maximum of two
-	MBM events per group, from a combination of total and local events.
-	Keeping the current interface, users can assign a maximum of two
-	monitoring counters per group. User will also have the option to
-	enable only one counter to the group.
-
-	With limited number of counters, system can run out of assignable counters.
-	In mbm_cntr_assign mode, the MBM event counters will return "Unassigned" if
-	the counter is not assigned to the event when read. Users need to assign a
-	counter manually to read the events.
-
-"mbm_control":
-	Reports the resctrl group and monitor status of each group.
-
-	List follows the following format:
-		"<CTRL_MON group>/<MON group>/<domain_id>=<flags>"
-
-	Format for specific type of groups:
-
-	* Default CTRL_MON group:
-		"//<domain_id>=<flags>"
-
-	* Non-default CTRL_MON group:
-		"<CTRL_MON group>//<domain_id>=<flags>"
-
-	* Child MON group of default CTRL_MON group:
-		"/<MON group>/<domain_id>=<flags>"
-
-	* Child MON group of non-default CTRL_MON group:
-		"<CTRL_MON group>/<MON group>/<domain_id>=<flags>"
-
-	Flags can be one of the following:
-	::
-
-	 t  MBM total event is enabled.
-	 l  MBM local event is enabled.
-	 tl Both total and local MBM events are enabled.
-	 _  None of the MBM events are enabled. Only works with opcode '=' for write.
-
-	Examples:
-	::
-
-	 # mkdir /sys/fs/resctrl/mon_groups/child_default_mon_grp
-	 # mkdir /sys/fs/resctrl/non_default_ctrl_mon_grp
-	 # mkdir /sys/fs/resctrl/non_default_ctrl_mon_grp/mon_groups/child_non_default_mon_grp
-
-	 # cat /sys/fs/resctrl/info/L3_MON/mbm_control
-	 non_default_ctrl_mon_grp//0=tl;1=tl;
-	 non_default_ctrl_mon_grp/child_non_default_mon_grp/0=tl;1=tl;
-	 //0=tl;1=tl;
-	 /child_default_mon_grp/0=tl;1=tl;
-
-	 There are four resctrl groups. All the groups have total and local MBM events
-	 enabled on domain 0 and 1.
-
-	Assignment state can be updated by writing to the interface.
-
-	Format is similar to the list format with addition of opcode for the
-	assignment operation.
-
-		"<CTRL_MON group>/<MON group>/<domain_id><opcode><flags>"
-
-	Format for each type of groups:
-
-        * Default CTRL_MON group:
-                "//<domain_id><opcode><flags>"
-
-        * Non-default CTRL_MON group:
-                "<CTRL_MON group>//<domain_id><opcode><flags>"
-
-        * Child MON group of default CTRL_MON group:
-                "/<MON group>/<domain_id><opcode><flags>"
-
-        * Child MON group of non-default CTRL_MON group:
-                "<CTRL_MON group>/<MON group>/<domain_id><opcode><flags>"
-
-	Domain_id '*' wil apply the flags on all the domains.
-
-	Opcode can be one of the following:
-	::
-
-	 = Update the assignment to match the MBM event.
-	 + Assign a MBM event.
-	 - Unassign a MBM event.
-
-	Examples:
-	::
-
-	  Initial group status:
-	  # cat /sys/fs/resctrl/info/L3_MON/mbm_control
-	  non_default_ctrl_mon_grp//0=tl;1=tl;
-	  non_default_ctrl_mon_grp/child_non_default_mon_grp/0=tl;1=tl;
-	  //0=tl;1=tl;
-	  /child_default_mon_grp/0=tl;1=tl;
-
-	  To update the default group to assign only total MBM event on domain 0:
-	  # echo "//0=t" > /sys/fs/resctrl/info/L3_MON/mbm_control
-
-	  Assignment status after the update:
-	  # cat /sys/fs/resctrl/info/L3_MON/mbm_control
-	  non_default_ctrl_mon_grp//0=tl;1=tl;
-	  non_default_ctrl_mon_grp/child_non_default_mon_grp/0=tl;1=tl;
-	  //0=t;1=tl;
-	  /child_default_mon_grp/0=tl;1=tl;
-
-	  To update the MON group child_default_mon_grp to remove total MBM event on domain 1:
-	  # echo "/child_default_mon_grp/1-t" > /sys/fs/resctrl/info/L3_MON/mbm_control
-
-	  Assignment status after the update:
-	  $ cat /sys/fs/resctrl/info/L3_MON/mbm_control
-	  non_default_ctrl_mon_grp//0=tl;1=tl;
-	  non_default_ctrl_mon_grp/child_non_default_mon_grp/0=tl;1=tl;
-	  //0=t;1=tl;
-	  /child_default_mon_grp/0=tl;1=l;
-
-	  To update the MON group non_default_ctrl_mon_grp/child_non_default_mon_grp to
-	  unassign both local and total MBM events on domain 1:
-	  # echo "non_default_ctrl_mon_grp/child_non_default_mon_grp/1=_" >
-			/sys/fs/resctrl/info/L3_MON/mbm_control
-
-	  Assignment status after the update:
-	  non_default_ctrl_mon_grp//0=tl;1=tl;
-	  non_default_ctrl_mon_grp/child_non_default_mon_grp/0=tl;1=_;
-	  //0=t;1=tl;
-	  /child_default_mon_grp/0=tl;1=l;
-
-	  To update the default group to add a local MBM event domain 0.
-	  # echo "//0+l" > /sys/fs/resctrl/info/L3_MON/mbm_control
-
-	  Assignment status after the update:
-	  # cat /sys/fs/resctrl/info/L3_MON/mbm_control
-	  non_default_ctrl_mon_grp//0=tl;1=tl;
-	  non_default_ctrl_mon_grp/child_non_default_mon_grp/0=tl;1=_;
-	  //0=tl;1=tl;
-	  /child_default_mon_grp/0=tl;1=l;
-
-	  To update the non default CTRL_MON group non_default_ctrl_mon_grp to unassign all
-	  the MBM events on all the domains.
-	  # echo "non_default_ctrl_mon_grp//*=_" > /sys/fs/resctrl/info/L3_MON/mbm_control
-
-	  Assignment status after the update:
-	  #cat /sys/fs/resctrl/info/L3_MON/mbm_control
-	  non_default_ctrl_mon_grp//0=_;1=_;
-	  non_default_ctrl_mon_grp/child_non_default_mon_grp/0=tl;1=_;
-	  //0=tl;1=tl;
-	  /child_default_mon_grp/0=tl;1=l;
-
 "max_threshold_occupancy":
 		Read/write file provides the largest value (in
 		bytes) at which a previously used LLC_occupancy
diff --git a/Documentation/devicetree/bindings/arm/arm,mpam-msc.yaml b/Documentation/devicetree/bindings/arm/arm,mpam-msc.yaml
deleted file mode 100644
index 9d542ecb1a7d6c3e3ec820179de29f1dd4259bb4..0000000000000000000000000000000000000000
--- a/Documentation/devicetree/bindings/arm/arm,mpam-msc.yaml
+++ /dev/null
@@ -1,227 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
-%YAML 1.2
----
-$id: http://devicetree.org/schemas/arm/arm,mpam-msc.yaml#
-$schema: http://devicetree.org/meta-schemas/core.yaml#
-
-title: Arm Memory System Resource Partitioning and Monitoring (MPAM)
-
-description: |
-  The Arm MPAM specification can be found here:
-
-  https://developer.arm.com/documentation/ddi0598/latest
-
-maintainers:
-  - Rob Herring <robh@kernel.org>
-
-properties:
-  compatible:
-    items:
-      - const: arm,mpam-msc                   # Further details are discoverable
-      - const: arm,mpam-memory-controller-msc
-
-  reg:
-    maxItems: 1
-    description: A memory region containing registers as defined in the MPAM
-      specification.
-
-  interrupts:
-    minItems: 1
-    items:
-      - description: error (optional)
-      - description: overflow (optional, only for monitoring)
-
-  interrupt-names:
-    oneOf:
-      - items:
-          - enum: [ error, overflow ]
-      - items:
-          - const: error
-          - const: overflow
-
-  arm,not-ready-us:
-    description: The maximum time in microseconds for monitoring data to be
-      accurate after a settings change. For more information, see the
-      Not-Ready (NRDY) bit description in the MPAM specification.
-
-  numa-node-id: true # see NUMA binding
-
-  '#address-cells':
-    const: 1
-
-  '#size-cells':
-    const: 0
-
-patternProperties:
-  '^ris@[0-9a-f]$':
-    type: object
-    additionalProperties: false
-    description: |
-      RIS nodes for each RIS in an MSC. These nodes are required for each RIS
-      implementing known MPAM controls
-
-    properties:
-      compatible:
-        enum:
-            # Bulk storage for cache
-          - arm,mpam-cache
-            # Memory bandwidth
-          - arm,mpam-memory
-
-      reg:
-        minimum: 0
-        maximum: 0xf
-
-      cpus:
-        $ref: '/schemas/types.yaml#/definitions/phandle-array'
-        description:
-          Phandle(s) to the CPU node(s) this RIS belongs to. By default, the parent
-          device's affinity is used.
-
-      arm,mpam-device:
-        $ref: '/schemas/types.yaml#/definitions/phandle'
-        description:
-          By default, the MPAM enabled device associated with a RIS is the MSC's
-          parent node. It is possible for each RIS to be associated with different
-          devices in which case 'arm,mpam-device' should be used.
-
-    required:
-      - compatible
-      - reg
-
-required:
-  - compatible
-  - reg
-
-dependencies:
-  interrupts: [ interrupt-names ]
-
-additionalProperties: false
-
-examples:
-  - |
-    /*
-    cpus {
-        cpu@0 {
-            next-level-cache = <&L2_0>;
-        };
-        cpu@100 {
-            next-level-cache = <&L2_1>;
-        };
-    };
-    */
-    L2_0: cache-controller-0 {
-        compatible = "cache";
-        cache-level = <2>;
-        cache-unified;
-        next-level-cache = <&L3>;
-
-    };
-
-    L2_1: cache-controller-1 {
-        compatible = "cache";
-        cache-level = <2>;
-        cache-unified;
-        next-level-cache = <&L3>;
-
-    };
-
-    L3: cache-controller@30000000 {
-        compatible = "arm,dsu-l3-cache", "cache";
-        cache-level = <3>;
-        cache-unified;
-
-        ranges = <0x0 0x30000000 0x800000>;
-        #address-cells = <1>;
-        #size-cells = <1>;
-
-        msc@10000 {
-            compatible = "arm,mpam-msc";
-
-            /* CPU affinity implied by parent cache node's  */
-            reg = <0x10000 0x2000>;
-            interrupts = <1>, <2>;
-            interrupt-names = "error", "overflow";
-            arm,not-ready-us = <1>;
-        };
-    };
-
-    mem: memory-controller@20000 {
-        compatible = "foo,a-memory-controller";
-        reg = <0x20000 0x1000>;
-
-        #address-cells = <1>;
-        #size-cells = <1>;
-        ranges;
-
-        msc@21000 {
-            compatible = "arm,mpam-memory-controller-msc", "arm,mpam-msc";
-            reg = <0x21000 0x1000>;
-            interrupts = <3>;
-            interrupt-names = "error";
-            arm,not-ready-us = <1>;
-            numa-node-id = <1>;
-        };
-    };
-
-    iommu@40000 {
-        reg = <0x40000 0x1000>;
-
-        ranges;
-        #address-cells = <1>;
-        #size-cells = <1>;
-
-        msc@41000 {
-            compatible = "arm,mpam-msc";
-            reg = <0 0x1000>;
-            interrupts = <5>, <6>;
-            interrupt-names = "error", "overflow";
-            arm,not-ready-us = <1>;
-
-            #address-cells = <1>;
-            #size-cells = <0>;
-
-            ris@2 {
-                compatible = "arm,mpam-cache";
-                reg = <0>;
-                // TODO: How to map to device(s)?
-            };
-        };
-    };
-
-    msc@80000 {
-        compatible = "foo,a-standalone-msc";
-        reg = <0x80000 0x1000>;
-
-        clocks = <&clks 123>;
-
-        ranges;
-        #address-cells = <1>;
-        #size-cells = <1>;
-
-        msc@10000 {
-            compatible = "arm,mpam-msc";
-
-            reg = <0x10000 0x2000>;
-            interrupts = <7>;
-            interrupt-names = "overflow";
-            arm,not-ready-us = <1>;
-
-            #address-cells = <1>;
-            #size-cells = <0>;
-
-            ris@0 {
-                compatible = "arm,mpam-cache";
-                reg = <0>;
-                arm,mpam-device = <&L2_0>;
-            };
-
-            ris@1 {
-                compatible = "arm,mpam-memory";
-                reg = <1>;
-                arm,mpam-device = <&mem>;
-            };
-        };
-    };
-
-...
diff --git a/MAINTAINERS b/MAINTAINERS
index 2def3d5f43b36253ecc6a02708dbed6d1e3b0a18..85d4f1549af6654a2926e99ca92be419a49196b1 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -18204,8 +18204,6 @@ S:	Supported
 F:	Documentation/arch/x86/resctrl*
 F:	arch/x86/include/asm/resctrl.h
 F:	arch/x86/kernel/cpu/resctrl/
-F:	fs/resctrl/
-F:	include/linux/resctrl*.h
 F:	tools/testing/selftests/resctrl/
 
 READ-COPY UPDATE (RCU)
diff --git a/anolis/configs/L0-MANDATORY/arm64/CONFIG_ARM64_MPAM b/anolis/configs/L0-MANDATORY/arm64/CONFIG_ARM64_MPAM
deleted file mode 100644
index 45957b7b4ea21fbd49e5e57512cad210afbdd776..0000000000000000000000000000000000000000
--- a/anolis/configs/L0-MANDATORY/arm64/CONFIG_ARM64_MPAM
+++ /dev/null
@@ -1 +0,0 @@
-CONFIG_ARM64_MPAM=y
diff --git a/anolis/configs/L0-MANDATORY/arm64/CONFIG_ARM_CPU_RESCTRL b/anolis/configs/L0-MANDATORY/arm64/CONFIG_ARM_CPU_RESCTRL
deleted file mode 100644
index b1c35e9ba99b60049d152fff1275430ce1c9203d..0000000000000000000000000000000000000000
--- a/anolis/configs/L0-MANDATORY/arm64/CONFIG_ARM_CPU_RESCTRL
+++ /dev/null
@@ -1 +0,0 @@
-CONFIG_ARM_CPU_RESCTRL=y
diff --git a/anolis/configs/L0-MANDATORY/default/CONFIG_RESCTRL_FS b/anolis/configs/L0-MANDATORY/default/CONFIG_RESCTRL_FS
deleted file mode 100644
index 4d7ca33dcdc4fbeef2e86bef8a544c1656d266f5..0000000000000000000000000000000000000000
--- a/anolis/configs/L0-MANDATORY/default/CONFIG_RESCTRL_FS
+++ /dev/null
@@ -1 +0,0 @@
-CONFIG_RESCTRL_FS=y
diff --git a/anolis/configs/L0-MANDATORY/loongarch/CONFIG_RESCTRL_FS b/anolis/configs/L0-MANDATORY/loongarch/CONFIG_RESCTRL_FS
deleted file mode 100644
index 2147c5f9ea80a3481004517c46838b71eb591a6f..0000000000000000000000000000000000000000
--- a/anolis/configs/L0-MANDATORY/loongarch/CONFIG_RESCTRL_FS
+++ /dev/null
@@ -1 +0,0 @@
-# CONFIG_RESCTRL_FS is not set
diff --git a/anolis/configs/L0-MANDATORY/riscv/CONFIG_RESCTRL_FS b/anolis/configs/L0-MANDATORY/riscv/CONFIG_RESCTRL_FS
deleted file mode 100644
index 2147c5f9ea80a3481004517c46838b71eb591a6f..0000000000000000000000000000000000000000
--- a/anolis/configs/L0-MANDATORY/riscv/CONFIG_RESCTRL_FS
+++ /dev/null
@@ -1 +0,0 @@
-# CONFIG_RESCTRL_FS is not set
diff --git a/anolis/configs/L0-MANDATORY/sw_64-6b/CONFIG_RESCTRL_FS b/anolis/configs/L0-MANDATORY/sw_64-6b/CONFIG_RESCTRL_FS
deleted file mode 100644
index 2147c5f9ea80a3481004517c46838b71eb591a6f..0000000000000000000000000000000000000000
--- a/anolis/configs/L0-MANDATORY/sw_64-6b/CONFIG_RESCTRL_FS
+++ /dev/null
@@ -1 +0,0 @@
-# CONFIG_RESCTRL_FS is not set
diff --git a/anolis/configs/L0-MANDATORY/sw_64-8a/CONFIG_RESCTRL_FS b/anolis/configs/L0-MANDATORY/sw_64-8a/CONFIG_RESCTRL_FS
deleted file mode 100644
index 2147c5f9ea80a3481004517c46838b71eb591a6f..0000000000000000000000000000000000000000
--- a/anolis/configs/L0-MANDATORY/sw_64-8a/CONFIG_RESCTRL_FS
+++ /dev/null
@@ -1 +0,0 @@
-# CONFIG_RESCTRL_FS is not set
diff --git a/anolis/configs/L1-RECOMMEND/x86/CONFIG_RESCTRL_FS_PSEUDO_LOCK b/anolis/configs/L1-RECOMMEND/x86/CONFIG_RESCTRL_FS_PSEUDO_LOCK
deleted file mode 100644
index 731e801f5294b607e96d27ab9be36f6134fd66dd..0000000000000000000000000000000000000000
--- a/anolis/configs/L1-RECOMMEND/x86/CONFIG_RESCTRL_FS_PSEUDO_LOCK
+++ /dev/null
@@ -1 +0,0 @@
-CONFIG_RESCTRL_FS_PSEUDO_LOCK=y
diff --git a/anolis/configs/L2-OPTIONAL/arm64/CONFIG_ACPI_MPAM b/anolis/configs/L2-OPTIONAL/arm64/CONFIG_ACPI_MPAM
deleted file mode 100644
index e93cbd36cedc1b687c9c2d09124388fc11798580..0000000000000000000000000000000000000000
--- a/anolis/configs/L2-OPTIONAL/arm64/CONFIG_ACPI_MPAM
+++ /dev/null
@@ -1 +0,0 @@
-CONFIG_ACPI_MPAM=y
diff --git a/anolis/configs/L2-OPTIONAL/arm64/CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID b/anolis/configs/L2-OPTIONAL/arm64/CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID
deleted file mode 100644
index 8cddb03cb13512f3d86acc0333c8bd8e45932152..0000000000000000000000000000000000000000
--- a/anolis/configs/L2-OPTIONAL/arm64/CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID
+++ /dev/null
@@ -1 +0,0 @@
-CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID=y
diff --git a/anolis/configs/L2-OPTIONAL/default/CONFIG_ARCH_HAS_CPU_RESCTRL b/anolis/configs/L2-OPTIONAL/default/CONFIG_ARCH_HAS_CPU_RESCTRL
deleted file mode 100644
index 6cd1474b8d3299bc79e4f29e3de07765d80b1727..0000000000000000000000000000000000000000
--- a/anolis/configs/L2-OPTIONAL/default/CONFIG_ARCH_HAS_CPU_RESCTRL
+++ /dev/null
@@ -1 +0,0 @@
-CONFIG_ARCH_HAS_CPU_RESCTRL=y
diff --git a/anolis/configs/L2-OPTIONAL/loongarch/CONFIG_ARCH_HAS_CPU_RESCTRL b/anolis/configs/L2-OPTIONAL/loongarch/CONFIG_ARCH_HAS_CPU_RESCTRL
deleted file mode 100644
index dd3c6353e127c4229b2241ccd4e97277eb3f5cf1..0000000000000000000000000000000000000000
--- a/anolis/configs/L2-OPTIONAL/loongarch/CONFIG_ARCH_HAS_CPU_RESCTRL
+++ /dev/null
@@ -1 +0,0 @@
-# CONFIG_ARCH_HAS_CPU_RESCTRL is not set
diff --git a/anolis/configs/L2-OPTIONAL/loongarch/CONFIG_PROC_CPU_RESCTRL b/anolis/configs/L2-OPTIONAL/loongarch/CONFIG_PROC_CPU_RESCTRL
deleted file mode 100644
index b4dd102b0a4e8c58211a42e75fc8a3660ebb1e76..0000000000000000000000000000000000000000
--- a/anolis/configs/L2-OPTIONAL/loongarch/CONFIG_PROC_CPU_RESCTRL
+++ /dev/null
@@ -1 +0,0 @@
-# CONFIG_PROC_CPU_RESCTRL is not set
diff --git a/anolis/configs/L2-OPTIONAL/riscv/CONFIG_ARCH_HAS_CPU_RESCTRL b/anolis/configs/L2-OPTIONAL/riscv/CONFIG_ARCH_HAS_CPU_RESCTRL
deleted file mode 100644
index dd3c6353e127c4229b2241ccd4e97277eb3f5cf1..0000000000000000000000000000000000000000
--- a/anolis/configs/L2-OPTIONAL/riscv/CONFIG_ARCH_HAS_CPU_RESCTRL
+++ /dev/null
@@ -1 +0,0 @@
-# CONFIG_ARCH_HAS_CPU_RESCTRL is not set
diff --git a/anolis/configs/L2-OPTIONAL/riscv/CONFIG_PROC_CPU_RESCTRL b/anolis/configs/L2-OPTIONAL/riscv/CONFIG_PROC_CPU_RESCTRL
deleted file mode 100644
index b4dd102b0a4e8c58211a42e75fc8a3660ebb1e76..0000000000000000000000000000000000000000
--- a/anolis/configs/L2-OPTIONAL/riscv/CONFIG_PROC_CPU_RESCTRL
+++ /dev/null
@@ -1 +0,0 @@
-# CONFIG_PROC_CPU_RESCTRL is not set
diff --git a/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_ARCH_HAS_CPU_RESCTRL b/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_ARCH_HAS_CPU_RESCTRL
deleted file mode 100644
index dd3c6353e127c4229b2241ccd4e97277eb3f5cf1..0000000000000000000000000000000000000000
--- a/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_ARCH_HAS_CPU_RESCTRL
+++ /dev/null
@@ -1 +0,0 @@
-# CONFIG_ARCH_HAS_CPU_RESCTRL is not set
diff --git a/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_PROC_CPU_RESCTRL b/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_PROC_CPU_RESCTRL
deleted file mode 100644
index b4dd102b0a4e8c58211a42e75fc8a3660ebb1e76..0000000000000000000000000000000000000000
--- a/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_PROC_CPU_RESCTRL
+++ /dev/null
@@ -1 +0,0 @@
-# CONFIG_PROC_CPU_RESCTRL is not set
diff --git a/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_ARCH_HAS_CPU_RESCTRL b/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_ARCH_HAS_CPU_RESCTRL
deleted file mode 100644
index dd3c6353e127c4229b2241ccd4e97277eb3f5cf1..0000000000000000000000000000000000000000
--- a/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_ARCH_HAS_CPU_RESCTRL
+++ /dev/null
@@ -1 +0,0 @@
-# CONFIG_ARCH_HAS_CPU_RESCTRL is not set
diff --git a/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_PROC_CPU_RESCTRL b/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_PROC_CPU_RESCTRL
deleted file mode 100644
index b4dd102b0a4e8c58211a42e75fc8a3660ebb1e76..0000000000000000000000000000000000000000
--- a/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_PROC_CPU_RESCTRL
+++ /dev/null
@@ -1 +0,0 @@
-# CONFIG_PROC_CPU_RESCTRL is not set
diff --git a/anolis/configs/L2-OPTIONAL/default/CONFIG_PROC_CPU_RESCTRL b/anolis/configs/L2-OPTIONAL/x86/CONFIG_PROC_CPU_RESCTRL
similarity index 100%
rename from anolis/configs/L2-OPTIONAL/default/CONFIG_PROC_CPU_RESCTRL
rename to anolis/configs/L2-OPTIONAL/x86/CONFIG_PROC_CPU_RESCTRL
diff --git a/arch/Kconfig b/arch/Kconfig
index b2ccbaa3780e9177e7ecac9916c233e66e564513..ad97c9f158e0f388a69917180c0291d1589ff7e4 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -1341,14 +1341,6 @@ config STRICT_MODULE_RWX
 config ARCH_HAS_PHYS_TO_DMA
 	bool
 
-config ARCH_HAS_CPU_RESCTRL
-	bool
-	help
-	  The 'resctrl' filesystem allows CPU controls of shared resources
-	  such as caches and memory bandwidth to be configured. An architecture
-	  selects this if it provides the arch-specific hooks for the filesystem
-	  and needs the per-task CLOSID/RMID properties.
-
 config HAVE_ARCH_COMPILER_H
 	bool
 	help
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 592ea627edb397b34ed0a0557a0b998caad2bcb5..acedee7c16678002ff786e83027ec9a6e4a3f3cd 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -2066,27 +2066,7 @@ config ARM64_TLB_RANGE
 	  The feature introduces new assembly instructions, and they were
 	  support when binutils >= 2.30.
 
-config ARM64_MPAM
-	bool "Enable support for MPAM"
-	select ACPI_MPAM if ACPI
-	select ARCH_HAS_CPU_RESCTRL
-	select RESCTRL_FS
-	help
-	  Memory Partitioning and Monitoring is an optional extension
-	  that allows the CPUs to mark load and store transactions with
-	  labels for partition-id and performance-monitoring-group.
-	  System components, such as the caches, can use the partition-id
-	  to apply a performance policy. MPAM monitors can use the
-	  partition-id and performance-monitoring-group to measure the
-	  cache occupancy or data throughput.
-
-	  Use of this extension requires CPU support, support in the
-	  memory system components (MSC), and a description from firmware
-	  of where the MSC are in the address space.
-
-	  MPAM is exposed to user-space via the resctrl pseudo filesystem.
-
-endmenu
+endmenu # "ARMv8.4 architectural features"
 
 menu "ARMv8.5 architectural features"
 
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 385a6c162c32a992bbdb5885a61581b36577d50a..4dd7dce1917bc5d93217123462c56cb3ce945c6c 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -619,13 +619,6 @@ static inline bool id_aa64pfr1_sme(u64 pfr1)
 	return val > 0;
 }
 
-static inline bool id_aa64pfr0_mpam(u64 pfr0)
-{
-	u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_MPAM_SHIFT);
-
-	return val > 0;
-}
-
 static inline bool id_aa64pfr1_mte(u64 pfr1)
 {
 	u32 val = cpuid_feature_extract_unsigned_field(pfr1, ID_AA64PFR1_EL1_MTE_SHIFT);
diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
index d93c2b5b0b59de1a6c7849c225a4fa948396e5a6..44336899ca278a643d49741b6439c842d00848da 100644
--- a/arch/arm64/include/asm/el2_setup.h
+++ b/arch/arm64/include/asm/el2_setup.h
@@ -274,21 +274,6 @@
 	msr	spsr_el2, x0
 .endm
 
-.macro __init_el2_mpam
-#ifdef CONFIG_ARM64_MPAM
-	/* Memory Partioning And Monitoring: disable EL2 traps */
-	mrs	x1, id_aa64pfr0_el1
-	ubfx	x0, x1, #ID_AA64PFR0_EL1_MPAM_SHIFT, #4
-	cbz	x0, .Lskip_mpam_\@		// skip if no MPAM
-	msr_s	SYS_MPAM2_EL2, xzr		// use the default partition
-						// and disable lower traps
-	mrs_s	x0, SYS_MPAMIDR_EL1
-	tbz	x0, #17, .Lskip_mpam_\@		// skip if no MPAMHCR reg
-	msr_s	SYS_MPAMHCR_EL2, xzr		// clear TRAP_MPAMIDR_EL1 -> EL2
-.Lskip_mpam_\@:
-#endif /* CONFIG_ARM64_MPAM */
-.endm
-
 /**
  * Initialize EL2 registers to sane values. This should be called early on all
  * cores that were booted in EL2. Note that everything gets initialised as
@@ -307,7 +292,6 @@
 	__init_el2_stage2
 	__init_el2_gicv3
 	__init_el2_hstr
-	__init_el2_mpam
 	__init_el2_nvhe_idregs
 	__init_el2_cptr
 	__init_el2_fgt
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 96c7aad7cc43ab0cc5c2a828b3f968b48a0b0697..c11010965b8e06e20bffffb8d627817eb5893e75 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -108,7 +108,6 @@
 
 #define HCRX_GUEST_FLAGS (HCRX_EL2_SMPME | HCRX_EL2_TCR2En)
 #define HCRX_HOST_FLAGS (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En | HCRX_EL2_EnFPM)
-#define MPAMHCR_HOST_FLAGS	0
 
 /* TCR_EL2 Registers bits */
 #define TCR_EL2_RES1		((1U << 31) | (1 << 23))
diff --git a/arch/arm64/include/asm/mpam.h b/arch/arm64/include/asm/mpam.h
deleted file mode 100644
index 9abe1fe58c34317c16bbc3f4aa6da2fabad74639..0000000000000000000000000000000000000000
--- a/arch/arm64/include/asm/mpam.h
+++ /dev/null
@@ -1,167 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2021 Arm Ltd. */
-
-#ifndef __ASM__MPAM_H
-#define __ASM__MPAM_H
-
-#include <linux/arm_mpam.h>
-#include <linux/bitops.h>
-#include <linux/bitfield.h>
-#include <linux/init.h>
-#include <linux/jump_label.h>
-#include <linux/percpu.h>
-#include <linux/sched.h>
-
-#include <asm/cpucaps.h>
-#include <asm/cpufeature.h>
-#include <asm/sysreg.h>
-
-/* CPU Registers */
-#define MPAM_SYSREG_EN			BIT_ULL(63)
-#define MPAM_SYSREG_TRAP_IDR		BIT_ULL(58)
-#define MPAM_SYSREG_TRAP_MPAM0_EL1	BIT_ULL(49)
-#define MPAM_SYSREG_TRAP_MPAM1_EL1	BIT_ULL(48)
-#define MPAM_SYSREG_PMG_D		GENMASK(47, 40)
-#define MPAM_SYSREG_PMG_I		GENMASK(39, 32)
-#define MPAM_SYSREG_PARTID_D		GENMASK(31, 16)
-#define MPAM_SYSREG_PARTID_I		GENMASK(15, 0)
-
-#define MPAMIDR_PMG_MAX			GENMASK(40, 32)
-#define MPAMIDR_PMG_MAX_SHIFT		32
-#define MPAMIDR_PMG_MAX_LEN		8
-#define MPAMIDR_VPMR_MAX		GENMASK(20, 18)
-#define MPAMIDR_VPMR_MAX_SHIFT		18
-#define MPAMIDR_VPMR_MAX_LEN		3
-#define MPAMIDR_HAS_HCR			BIT(17)
-#define MPAMIDR_HAS_HCR_SHIFT		17
-#define MPAMIDR_PARTID_MAX		GENMASK(15, 0)
-#define MPAMIDR_PARTID_MAX_SHIFT	0
-#define MPAMIDR_PARTID_MAX_LEN		15
-
-#define MPAMHCR_EL0_VPMEN		BIT_ULL(0)
-#define MPAMHCR_EL1_VPMEN		BIT_ULL(1)
-#define MPAMHCR_GSTAPP_PLK		BIT_ULL(8)
-#define MPAMHCR_TRAP_MPAMIDR		BIT_ULL(31)
-
-/* Properties of the VPM registers */
-#define MPAM_VPM_NUM_REGS		8
-#define MPAM_VPM_PARTID_LEN		16
-#define MPAM_VPM_PARTID_MASK		0xffff
-#define MPAM_VPM_REG_LEN		64
-#define MPAM_VPM_PARTIDS_PER_REG	(MPAM_VPM_REG_LEN / MPAM_VPM_PARTID_LEN)
-#define MPAM_VPM_MAX_PARTID		(MPAM_VPM_NUM_REGS * MPAM_VPM_PARTIDS_PER_REG)
-
-
-DECLARE_STATIC_KEY_FALSE(arm64_mpam_has_hcr);
-DECLARE_STATIC_KEY_FALSE(mpam_enabled);
-DECLARE_PER_CPU(u64, arm64_mpam_default);
-DECLARE_PER_CPU(u64, arm64_mpam_current);
-
-/* check whether all CPUs have MPAM support */
-static __always_inline bool mpam_cpus_have_feature(void)
-{
-	if (IS_ENABLED(CONFIG_ARM64_MPAM))
-		return cpus_have_final_cap(ARM64_MPAM);
-	return false;
-}
-
-/* check whether all CPUs have MPAM virtualisation support */
-static __always_inline bool mpam_cpus_have_mpam_hcr(void)
-{
-	if (IS_ENABLED(CONFIG_ARM64_MPAM))
-		return static_branch_unlikely(&arm64_mpam_has_hcr);
-	return false;
-}
-
-/* enable MPAM virtualisation support */
-static inline void __init __enable_mpam_hcr(void)
-{
-	if (IS_ENABLED(CONFIG_ARM64_MPAM))
-		static_branch_enable(&arm64_mpam_has_hcr);
-}
-
-/*
- * The resctrl filesystem writes to the partid/pmg values for threads and CPUs,
- * which may race with reads in __mpam_sched_in(). Ensure only one of the old
- * or new values are used. Particular care should be taken with the pmg field
- * as __mpam_sched_in() may read a partid and pmg that don't match, causing
- * this value to be stored with cache allocations, despite being considered
- * 'free' by resctrl.
- *
- * A value in struct thread_info is used instead of struct task_struct as the
- * cpu's u64 register format is used, but struct task_struct has two u32'.
- */
- static inline void mpam_set_cpu_defaults(int cpu, u16 partid_d, u16 partid_i,
-					  u8 pmg_d, u8 pmg_i)
-{
-	u64 default_val;
-
-	default_val = FIELD_PREP(MPAM_SYSREG_PARTID_D, partid_d);
-	default_val |= FIELD_PREP(MPAM_SYSREG_PARTID_I, partid_i);
-	default_val |= FIELD_PREP(MPAM_SYSREG_PMG_D, pmg_d);
-	default_val |= FIELD_PREP(MPAM_SYSREG_PMG_I, pmg_i);
-
-	WRITE_ONCE(per_cpu(arm64_mpam_default, cpu), default_val);
-}
-
-static inline void mpam_set_task_partid_pmg(struct task_struct *tsk,
-					    u16 partid_d, u16 partid_i,
-					    u8 pmg_d, u8 pmg_i)
-{
-#ifdef CONFIG_ARM64_MPAM
-	u64 regval;
-
-	regval = FIELD_PREP(MPAM_SYSREG_PARTID_D, partid_d);
-	regval |= FIELD_PREP(MPAM_SYSREG_PARTID_I, partid_i);
-	regval |= FIELD_PREP(MPAM_SYSREG_PMG_D, pmg_d);
-	regval |= FIELD_PREP(MPAM_SYSREG_PMG_I, pmg_i);
-
-	WRITE_ONCE(task_thread_info(tsk)->mpam_partid_pmg, regval);
-#endif
-}
-
-static inline u64 mpam_get_regval(struct task_struct *tsk)
-{
-#ifdef CONFIG_ARM64_MPAM
-	return READ_ONCE(task_thread_info(tsk)->mpam_partid_pmg);
-#else
-	return 0;
-#endif
-}
-
-static inline void resctrl_arch_set_rmid(struct task_struct *tsk, u32 rmid)
-{
-#ifdef CONFIG_ARM64_MPAM
-	u64 regval = mpam_get_regval(tsk);
-
-	regval &= ~MPAM_SYSREG_PMG_D;
-	regval &= ~MPAM_SYSREG_PMG_I;
-	regval |= FIELD_PREP(MPAM_SYSREG_PMG_D, rmid);
-	regval |= FIELD_PREP(MPAM_SYSREG_PMG_I, rmid);
-
-	WRITE_ONCE(task_thread_info(tsk)->mpam_partid_pmg, regval);
-#endif
-}
-
-static inline void mpam_thread_switch(struct task_struct *tsk)
-{
-	u64 oldregval;
-	int cpu = smp_processor_id();
-	u64 regval = mpam_get_regval(tsk);
-
-	if (!IS_ENABLED(CONFIG_ARM64_MPAM) ||
-	    !static_branch_likely(&mpam_enabled))
-		return;
-
-	if (regval == READ_ONCE(mpam_resctrl_default_group))
-		regval = READ_ONCE(per_cpu(arm64_mpam_default, cpu));
-
-	oldregval = READ_ONCE(per_cpu(arm64_mpam_current, cpu));
-	if (oldregval == regval)
-		return;
-
-	/* Synchronising this write is left until the ERET to EL0 */
-	write_sysreg_s(regval, SYS_MPAM0_EL1);
-	WRITE_ONCE(per_cpu(arm64_mpam_current, cpu), regval);
-}
-#endif /* __ASM__MPAM_H */
diff --git a/arch/arm64/include/asm/resctrl.h b/arch/arm64/include/asm/resctrl.h
deleted file mode 100644
index b506e95cf6e374690a71e0f1d142fe0032ae6e45..0000000000000000000000000000000000000000
--- a/arch/arm64/include/asm/resctrl.h
+++ /dev/null
@@ -1,2 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/arm_mpam.h>
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 752e45c29afaf62b36bee74e450e83f4a3b9355c..e4bb2c2a136a8205b8f06050e22614003fc1438d 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -503,13 +503,6 @@
 #define SYS_MAIR_EL2			sys_reg(3, 4, 10, 2, 0)
 #define SYS_AMAIR_EL2			sys_reg(3, 4, 10, 3, 0)
 
-#define SYS_MPAMHCR_EL2			sys_reg(3, 4, 10, 4, 0)
-#define SYS_MPAMVPMV_EL2		sys_reg(3, 4, 10, 4, 1)
-#define SYS_MPAM2_EL2			sys_reg(3, 4, 10, 5, 0)
-
-#define __VPMn_op2(n)			((n) & 0x7)
-#define SYS_MPAM_VPMn_EL2(n)		sys_reg(3, 4, 10, 6, __VPMn_op2(n))
-
 #define SYS_VBAR_EL2			sys_reg(3, 4, 12, 0, 0)
 #define SYS_RVBAR_EL2			sys_reg(3, 4, 12, 0, 1)
 #define SYS_RMR_EL2			sys_reg(3, 4, 12, 0, 2)
@@ -574,7 +567,6 @@
 #define SYS_TFSR_EL12			sys_reg(3, 5, 5, 6, 0)
 #define SYS_MAIR_EL12			sys_reg(3, 5, 10, 2, 0)
 #define SYS_AMAIR_EL12			sys_reg(3, 5, 10, 3, 0)
-#define SYS_MPAM1_EL12			sys_reg(3, 5, 10, 5, 0)
 #define SYS_VBAR_EL12			sys_reg(3, 5, 12, 0, 0)
 #define SYS_CNTKCTL_EL12		sys_reg(3, 5, 14, 1, 0)
 #define SYS_CNTP_TVAL_EL02		sys_reg(3, 5, 14, 2, 0)
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index f36803eb2ebd277006f46446936b10e63405fdc5..872b8e810a3774f8a8c662d546e5eb0abc7743ab 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -41,9 +41,6 @@ struct thread_info {
 #ifdef CONFIG_SHADOW_CALL_STACK
 	void			*scs_base;
 	void			*scs_sp;
-#endif
-#ifdef CONFIG_ARM64_MPAM
-	u64			mpam_partid_pmg;
 #endif
 	u32			cpu;
 };
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index e9827519f567097b8cd0e27fb5a6dbd115e2ce92..e3bab76856a391ad83c3b6269b9d9638e0a09829 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -67,13 +67,11 @@ obj-$(CONFIG_KEXEC_CORE)		+= machine_kexec.o relocate_kernel.o	\
 obj-$(CONFIG_KEXEC_FILE)		+= machine_kexec_file.o kexec_image.o
 obj-$(CONFIG_ARM64_RELOC_TEST)		+= arm64-reloc-test.o
 arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o
-
 obj-$(CONFIG_CRASH_DUMP)		+= crash_dump.o
 obj-$(CONFIG_CRASH_CORE)		+= crash_core.o
 obj-$(CONFIG_ARM_SDE_INTERFACE)		+= sdei.o
 obj-$(CONFIG_SDEI_WATCHDOG)		+= watchdog_sdei.o
 obj-$(CONFIG_ARM64_PTR_AUTH)		+= pointer_auth.o
-obj-$(CONFIG_ARM64_MPAM)		+= mpam.o
 obj-$(CONFIG_ARM64_MTE)			+= mte.o
 obj-y					+= vdso-wrap.o
 obj-$(CONFIG_COMPAT_VDSO)		+= vdso32-wrap.o
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 6b50132aed89bc4c77658d3c4bb614f7f384825b..c214639da4a4789f64146ed565c79d6507a1b8ef 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -2814,15 +2814,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.cpu_enable = cpu_enable_fpmr,
 		ARM64_CPUID_FIELDS(ID_AA64PFR2_EL1, FPMR, IMP)
 	},
-#ifdef CONFIG_ARM64_MPAM
-	{
-		.desc = "Memory Partitioning And Monitoring",
-		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
-		.capability = ARM64_MPAM,
-		.matches = has_cpuid_feature,
-		ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, MPAM, 1)
-	},
-#endif
 #ifdef CONFIG_ARM64_POE
 	{
 		.desc = "Stage-1 Permission Overlay Extension (S1POE)",
diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
index 6999668e9ecf0811258a02acefabde99543bae49..35f3c795951373549faad3ed2d1bd30ad427eb78 100644
--- a/arch/arm64/kernel/image-vars.h
+++ b/arch/arm64/kernel/image-vars.h
@@ -64,12 +64,6 @@ KVM_NVHE_ALIAS(nvhe_hyp_panic_handler);
 /* Vectors installed by hyp-init on reset HVC. */
 KVM_NVHE_ALIAS(__hyp_stub_vectors);
 
-/* Additional static keys for cpufeatures */
-#ifdef CONFIG_ARM64_MPAM
-KVM_NVHE_ALIAS(arm64_mpam_has_hcr);
-KVM_NVHE_ALIAS(mpam_enabled);
-#endif
-
 /* Static keys which are set if a vGIC trap should be handled in hyp. */
 KVM_NVHE_ALIAS(vgic_v2_cpuif_trap);
 KVM_NVHE_ALIAS(vgic_v3_cpuif_trap);
diff --git a/arch/arm64/kernel/mpam.c b/arch/arm64/kernel/mpam.c
deleted file mode 100644
index 190a0aa4f9c0050f2f1e6fad5e5d1fccbbcd62e0..0000000000000000000000000000000000000000
--- a/arch/arm64/kernel/mpam.c
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2021 Arm Ltd. */
-
-#include <asm/mpam.h>
-
-#include <linux/arm_mpam.h>
-#include <linux/jump_label.h>
-#include <linux/percpu.h>
-
-DEFINE_STATIC_KEY_FALSE(arm64_mpam_has_hcr);
-EXPORT_SYMBOL_FOR_KVM(arm64_mpam_has_hcr);
-DEFINE_STATIC_KEY_FALSE(mpam_enabled);
-EXPORT_SYMBOL_FOR_KVM(mpam_enabled);
-DEFINE_PER_CPU(u64, arm64_mpam_default);
-DEFINE_PER_CPU(u64, arm64_mpam_current);
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index c3b83023851f5ac6d02fd05505a2e9c058de75b9..a44f0e97f6d7cd766185d65fdc602e87bd403ee1 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -49,7 +49,6 @@
 #include <asm/exec.h>
 #include <asm/fpsimd.h>
 #include <asm/mmu_context.h>
-#include <asm/mpam.h>
 #include <asm/mte.h>
 #include <asm/processor.h>
 #include <asm/pointer_auth.h>
@@ -578,12 +577,6 @@ struct task_struct *__switch_to(struct task_struct *prev,
 	if (prev->thread.sctlr_user != next->thread.sctlr_user)
 		update_sctlr_el1(next->thread.sctlr_user);
 
-	/*
-	 * MPAM thread switch happens after the DSB to ensure prev's accesses
-	 * use prev's MPAM settings.
-	 */
-	mpam_thread_switch(next);
-
 	/* the actual thread switch */
 	last = cpu_switch_to(prev, next);
 
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index d274244e1b44097758922e47dc025db1ebbe073c..2b6c9724d55fdd3ee99b21f9234cd0c1fcbfe941 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -27,7 +27,6 @@
 #include <asm/kvm_hyp.h>
 #include <asm/kvm_mmu.h>
 #include <asm/kvm_nested.h>
-#include <asm/mpam.h>
 #include <asm/fpsimd.h>
 #include <asm/debug-monitors.h>
 #include <asm/processor.h>
@@ -173,35 +172,6 @@ static inline void __deactivate_traps_hfgxtr(struct kvm_vcpu *vcpu)
 	write_sysreg_s(ctxt_sys_reg(hctxt, HDFGWTR_EL2), SYS_HDFGWTR_EL2);
 }
 
-static inline void  __activate_traps_mpam(struct kvm_vcpu *vcpu)
-{
-	u64 r = MPAM_SYSREG_TRAP_MPAM0_EL1 | MPAM_SYSREG_TRAP_MPAM1_EL1;
-
-	if (!mpam_cpus_have_feature() || !static_branch_likely(&mpam_enabled))
-		return;
-
-	/* trap guest access to MPAMIDR_EL1 */
-	if (mpam_cpus_have_mpam_hcr()) {
-		write_sysreg_s(MPAMHCR_TRAP_MPAMIDR, SYS_MPAMHCR_EL2);
-	} else {
-		/* From v1.1 TIDR can trap MPAMIDR, set it unconditionally */
-		r |= MPAM_SYSREG_TRAP_IDR;
-	}
-
-	write_sysreg_s(r, SYS_MPAM2_EL2);
-}
-
-static inline void __deactivate_traps_mpam(void)
-{
-	if (!mpam_cpus_have_feature() || !static_branch_likely(&mpam_enabled))
-		return;
-
-	write_sysreg_s(0, SYS_MPAM2_EL2);
-
-	if (mpam_cpus_have_mpam_hcr())
-		write_sysreg_s(MPAMHCR_HOST_FLAGS, SYS_MPAMHCR_EL2);
-}
-
 static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
 {
 	/* Trap on AArch32 cp15 c15 (impdef sysregs) accesses (EL1 or EL0) */
@@ -242,7 +212,6 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
 	}
 
 	__activate_traps_hfgxtr(vcpu);
-	__activate_traps_mpam(vcpu);
 }
 
 static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu)
@@ -262,7 +231,6 @@ static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu)
 		write_sysreg_s(HCRX_HOST_FLAGS, SYS_HCRX_EL2);
 
 	__deactivate_traps_hfgxtr(vcpu);
-	__deactivate_traps_mpam();
 }
 
 static inline void ___activate_traps(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
index 8e99f66b377bd37f3622ae9b5c9bf881517659b8..bb6b571ec627dede466c5fa1d05785a9c9f78764 100644
--- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
+++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
@@ -15,7 +15,6 @@
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_hyp.h>
 #include <asm/kvm_mmu.h>
-#include <asm/mpam.h>
 
 static inline void __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
 {
@@ -244,32 +243,4 @@ static inline void __sysreg32_restore_state(struct kvm_vcpu *vcpu)
 		write_sysreg(__vcpu_sys_reg(vcpu, DBGVCR32_EL2), dbgvcr32_el2);
 }
 
-/*
- * The _EL0 value was written by the host's context switch, copy this into the
- * guest's EL1.
- */
-static inline void __mpam_guest_load(void)
-{
-	if (IS_ENABLED(CONFIG_ARM64_MPAM) && mpam_cpus_have_feature() &&
-	    static_branch_likely(&mpam_enabled))
-		write_sysreg_el1(read_sysreg_s(SYS_MPAM0_EL1), SYS_MPAM1);
-}
-
-/*
- * Copy the _EL2 register back to _EL1, clearing any trap bits EL2 may have set.
- * nVHE world-switch copies the _EL1 register to _EL2. A VHE host writes to the
- * _EL2 register as it is aliased by the hardware when TGE is set.
- */
-static inline void __mpam_guest_put(void)
-{
-	u64 val, mask = MPAM_SYSREG_PMG_D | MPAM_SYSREG_PMG_I |
-			MPAM_SYSREG_PARTID_D | MPAM_SYSREG_PARTID_I;
-
-	if (IS_ENABLED(CONFIG_ARM64_MPAM) && mpam_cpus_have_feature() &&
-	    static_branch_likely(&mpam_enabled)) {
-		val = FIELD_GET(mask, read_sysreg_s(SYS_MPAM2_EL2));
-		write_sysreg_el1(val, SYS_MPAM1);
-	}
-}
-
 #endif /* __ARM64_KVM_HYP_SYSREG_SR_H__ */
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index 024a7871892f4ff81f0447c535e40eae46daf6d2..f1970816fa436d8f2b1d67517840de4bdd73b9c6 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -282,14 +282,6 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
 	return __fixup_guest_exit(vcpu, exit_code, handlers);
 }
 
-/* Use the host thread's partid and pmg for world switch */
-static void __mpam_copy_el1_to_el2(void)
-{
-	if (IS_ENABLED(CONFIG_ARM64_MPAM) && mpam_cpus_have_feature() &&
-	    static_branch_likely(&mpam_enabled))
-		write_sysreg_s(read_sysreg_s(SYS_MPAM1_EL1), SYS_MPAM2_EL2);
-}
-
 /* Switch to the guest for legacy non-VHE systems */
 int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
 {
@@ -299,8 +291,6 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
 	bool pmu_switch_needed;
 	u64 exit_code;
 
-	__mpam_copy_el1_to_el2();
-
 	/*
 	 * Having IRQs masked via PMR when entering the guest means the GIC
 	 * will not signal the CPU of interrupts of lower priority, and the
@@ -360,7 +350,6 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
 	__timer_enable_traps(vcpu);
 
 	__debug_switch_to_guest(vcpu);
-	__mpam_guest_load();
 
 	do {
 		/* Jump in the fire! */
@@ -371,7 +360,6 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
 
 	__sysreg_save_state_nvhe(guest_ctxt);
 	__sysreg32_save_state(vcpu);
-	__mpam_guest_put();
 	__timer_disable_traps(vcpu);
 	__hyp_vgic_save_state(vcpu);
 
diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
index 71af2f8c7568eeb68e603ba2e00a32cfdb0ee16f..ae763061909ac4cf3619f11ce9d2f7b289f330b2 100644
--- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
@@ -90,7 +90,6 @@ void __vcpu_load_switch_sysregs(struct kvm_vcpu *vcpu)
 	__sysreg32_restore_state(vcpu);
 	__sysreg_restore_user_state(guest_ctxt);
 	__sysreg_restore_el1_state(guest_ctxt);
-	__mpam_guest_load();
 
 	vcpu_set_flag(vcpu, SYSREGS_ON_CPU);
 }
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 7738226ef8cf12e56e7577d6aa386bb5c620f531..623737f65228389b3088a667eb7e738c80aa50ad 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -418,31 +418,6 @@ static bool trap_oslar_el1(struct kvm_vcpu *vcpu,
 	return true;
 }
 
-static bool trap_mpam(struct kvm_vcpu *vcpu,
-		      struct sys_reg_params *p,
-		      const struct sys_reg_desc *r)
-{
-	u64 aa64pfr0_el1 = IDREG(vcpu->kvm, SYS_ID_AA64PFR0_EL1);
-
-	/*
-	 * What did we expose to the guest?
-	 * Earlier guests may have seen the ID bits, which can't be removed
-	 * without breaking migration, but MPAMIDR_EL1 can advertise all-zeroes,
-	 * indicating there are zero PARTID/PMG supported by the CPU, allowing
-	 * the other two trapped registers (MPAM1_EL1 and MPAM0_EL1) to be
-	 * treated as RAZ/WI.
-	 * Emulating MPAM1_EL1 as RAZ/WI means the guest sees the MPAMEN bit
-	 * as clear, and realises MPAM isn't usable on this CPU.
-	 */
-	if (FIELD_GET(ID_AA64PFR0_EL1_MPAM_MASK, aa64pfr0_el1)) {
-		p->regval = 0;
-		return true;
-	}
-
-	kvm_inject_undefined(vcpu);
-	return false;
-}
-
 static bool trap_oslsr_el1(struct kvm_vcpu *vcpu,
 			   struct sys_reg_params *p,
 			   const struct sys_reg_desc *r)
@@ -1341,36 +1316,6 @@ static s64 kvm_arm64_ftr_safe_value(u32 id, const struct arm64_ftr_bits *ftrp,
 	return arm64_ftr_safe_value(&kvm_ftr, new, cur);
 }
 
-static u64 kvm_arm64_ftr_max(struct kvm_vcpu *vcpu,
-			     const struct sys_reg_desc *rd)
-{
-	u64 pfr0, val = rd->reset(vcpu, rd);
-	u32 field, id = reg_to_encoding(rd);
-
-	/*
-	 * Some values may reset to a lower value than can be supported,
-	 * get the maximum feature value.
-	 */
-	switch (id) {
-	case SYS_ID_AA64PFR0_EL1:
-		pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
-
-		/*
-		 * MPAM resets to 0, but migration of MPAM=1 guests is needed.
-		 * See trap_mpam() for more.
-		 */
-		field = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_MPAM_SHIFT);
-		if (field == ID_AA64PFR0_EL1_MPAM_1) {
-			val &= ~ID_AA64PFR0_EL1_MPAM_MASK;
-			val |= FIELD_PREP(ID_AA64PFR0_EL1_MPAM_MASK, ID_AA64PFR0_EL1_MPAM_1);
-		}
-
-		break;
-	}
-
-	return val;
-}
-
 /**
  * arm64_check_features() - Check if a feature register value constitutes
  * a subset of features indicated by the idreg's KVM sanitised limit.
@@ -1391,7 +1336,8 @@ static int arm64_check_features(struct kvm_vcpu *vcpu,
 	const struct arm64_ftr_bits *ftrp = NULL;
 	u32 id = reg_to_encoding(rd);
 	u64 writable_mask = rd->val;
-	u64 limit, mask = 0;
+	u64 limit = rd->reset(vcpu, rd);
+	u64 mask = 0;
 	u32 midr = read_cpuid_id();
 
 	/*
@@ -1406,7 +1352,6 @@ static int arm64_check_features(struct kvm_vcpu *vcpu,
 	if (!ftr_reg)
 		return -EINVAL;
 
-	limit = kvm_arm64_ftr_max(vcpu, rd);
 	ftrp = ftr_reg->ftr_bits;
 
 	for (; ftrp && ftrp->width; ftrp++) {
@@ -1624,8 +1569,7 @@ static u64 read_sanitised_id_aa64pfr0_el1(struct kvm_vcpu *vcpu,
 	/*
 	 * MPAM is disabled by default as KVM also needs a set of PARTID to
 	 * program the MPAMVPMx_EL2 PARTID remapping registers with. But some
-	 * older kernels let the guest see the ID bit. Turning it on causes
-	 * the registers to be emulated as RAZ/WI. See trap_mpam() for more.
+	 * older kernels let the guest see the ID bit.
 	 */
 	val &= ~ID_AA64PFR0_EL1_MPAM_MASK;
 
@@ -2393,11 +2337,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 	{ SYS_DESC(SYS_LOREA_EL1), trap_loregion },
 	{ SYS_DESC(SYS_LORN_EL1), trap_loregion },
 	{ SYS_DESC(SYS_LORC_EL1), trap_loregion },
-	{ SYS_DESC(SYS_MPAMIDR_EL1), trap_mpam },
 	{ SYS_DESC(SYS_LORID_EL1), trap_loregion },
 
-	{ SYS_DESC(SYS_MPAM1_EL1), trap_mpam },
-	{ SYS_DESC(SYS_MPAM0_EL1), trap_mpam },
 	{ SYS_DESC(SYS_VBAR_EL1), access_rw, reset_val, VBAR_EL1, 0 },
 	{ SYS_DESC(SYS_DISR_EL1), NULL, reset_val, DISR_EL1, 0 },
 
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index 9946e38b2b6587017f332dcda17ab6753de4678f..0f68f9430e44f068a7bc0b1d4243fcac330dbdb8 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -59,7 +59,6 @@ HW_DBM
 KVM_HVHE
 KVM_PROTECTED_MODE
 MISMATCHED_CACHE_TYPE
-MPAM
 MTE
 MTE_ASYMM
 SME
diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg
index 4ff32f97fcc1cb0cc98bd84cb3bb8a61399df5c5..d32d591859bf25640a613313e9267390674fa866 100644
--- a/arch/arm64/tools/sysreg
+++ b/arch/arm64/tools/sysreg
@@ -3182,22 +3182,6 @@ Res0	1
 Field	0	EN
 EndSysreg
 
-Sysreg	MPAMIDR_EL1	3	0	10	4	4
-Res0	63:62
-Field	61	HAS_SDEFLT
-Field	60	HAS_FORCE_NS
-Field	59	SP4
-Field	58	HAS_TIDR
-Field	57	HAS_ALTSP
-Res0	56:40
-Field	39:32	PMG_MAX
-Res0	31:21
-Field	20:18	VPMR_MAX
-Field	17	HAS_HCR
-Res0	16
-Field	15:0	PARTID_MAX
-EndSysreg
-
 Sysreg	LORID_EL1	3	0	10	4	7
 Res0	63:24
 Field	23:16	LD
@@ -3205,22 +3189,6 @@ Res0	15:8
 Field	7:0	LR
 EndSysreg
 
-Sysreg	MPAM1_EL1	3	0	10	5	0
-Res0	63:48
-Field	47:40	PMG_D
-Field	39:32	PMG_I
-Field	31:16	PARTID_D
-Field	15:0	PARTID_I
-EndSysreg
-
-Sysreg	MPAM0_EL1	3	0	10	5	1
-Res0	63:48
-Field	47:40	PMG_D
-Field	39:32	PMG_I
-Field	31:16	PARTID_D
-Field	15:0	PARTID_I
-EndSysreg
-
 Sysreg	ISR_EL1	3	0	12	1	0
 Res0	63:11
 Field	10	IS
@@ -3234,7 +3202,6 @@ EndSysreg
 Sysreg	ICC_NMIAR1_EL1	3	0	12	9	5
 Res0	63:24
 Field	23:0	INTID
-
 EndSysreg
 
 Sysreg	TRBLIMITR_EL1	3	0	9	11	0
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index cc84cef105cfc0944e7162ec4e148aee3760b1cf..890c59602fc0480c9e467e6c0a296c5aefccf5df 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -492,11 +492,8 @@ config GOLDFISH
 config X86_CPU_RESCTRL
 	bool "x86 CPU resource control support"
 	depends on X86 && (CPU_SUP_INTEL || CPU_SUP_AMD)
-	depends on MISC_FILESYSTEMS
 	select KERNFS
-	select ARCH_HAS_CPU_RESCTRL
-	select RESCTRL_FS
-	select RESCTRL_FS_PSEUDO_LOCK
+	select PROC_CPU_RESCTRL		if PROC_FS
 	help
 	  Enable x86 CPU resource control support.
 
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 5172ff7e623fb83b67038e62e7e4196b63d0a13f..fdac919c8cc568a8c7b2dbca34ba723cbddcbe01 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -511,12 +511,9 @@
 #define X86_FEATURE_TSA_SQ_NO          (21*32+11) /* "" AMD CPU not vulnerable to TSA-SQ */
 #define X86_FEATURE_TSA_L1_NO          (21*32+12) /* "" AMD CPU not vulnerable to TSA-L1 */
 #define X86_FEATURE_CLEAR_CPU_BUF_VM   (21*32+13) /* "" Clear CPU buffers using VERW before VMRUN */
-#define X86_FEATURE_ABMC		(21*32+ 6) /* "" Assignable Bandwidth Monitoring Counters */
 #define X86_FEATURE_AMD_FAST_CPPC	(21*32 + 5) /* Fast CPPC */
 #define X86_FEATURE_AMD_WORKLOAD_CLASS	(21*32 + 7) /* Workload Classification */
 
-#define X86_FEATURE_SDCIAE		(21*32+18) /* L3 Smart Data Cache Injection Allocation Enforcement */
-
 /*
  * BUG word(s)
  */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 39d111515bd03539ba541b394e32922a1a6a36d8..47edd53ae5b8bcc94720a48cc6bc1fec425e67e5 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -1194,9 +1194,6 @@
 #define MSR_IA32_MBA_BW_BASE		0xc0000200
 #define MSR_IA32_SMBA_BW_BASE		0xc0000280
 #define MSR_IA32_EVT_CFG_BASE		0xc0000400
-#define MSR_IA32_L3_QOS_EXT_CFG		0xc00003ff
-#define MSR_IA32_L3_QOS_ABMC_CFG	0xc00003fd
-#define MSR_IA32_L3_QOS_ABMC_DSC	0xc00003fe
 
 /* MSR_IA32_VMX_MISC bits */
 #define MSR_IA32_VMX_MISC_INTEL_PT                 (1ULL << 14)
diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h
index 7ea7c3d7b5bebd31a912495649ef0768b895b6b0..12dbd2588ca7ccdaa1ea641327b5cbf866f50a68 100644
--- a/arch/x86/include/asm/resctrl.h
+++ b/arch/x86/include/asm/resctrl.h
@@ -4,10 +4,8 @@
 
 #ifdef CONFIG_X86_CPU_RESCTRL
 
-#include <linux/jump_label.h>
-#include <linux/percpu.h>
 #include <linux/sched.h>
-#include <linux/resctrl_types.h>
+#include <linux/jump_label.h>
 
 /*
  * This value can never be a valid CLOSID, and is used when mapping a
@@ -16,8 +14,6 @@
  */
 #define X86_RESCTRL_EMPTY_CLOSID         ((u32)~0)
 
-void resctrl_arch_reset_resources(void);
-
 /**
  * struct resctrl_pqr_state - State cache for the PQR MSR
  * @cur_rmid:		The cached Resource Monitoring ID
@@ -44,7 +40,6 @@ DECLARE_PER_CPU(struct resctrl_pqr_state, pqr_state);
 
 extern bool rdt_alloc_capable;
 extern bool rdt_mon_capable;
-extern unsigned int rdt_mon_features;
 
 DECLARE_STATIC_KEY_FALSE(rdt_enable_key);
 DECLARE_STATIC_KEY_FALSE(rdt_alloc_enable_key);
@@ -84,26 +79,6 @@ static inline void resctrl_arch_disable_mon(void)
 	static_branch_dec_cpuslocked(&rdt_enable_key);
 }
 
-static inline bool resctrl_arch_is_llc_occupancy_enabled(void)
-{
-	return (rdt_mon_features & (1 << QOS_L3_OCCUP_EVENT_ID));
-}
-
-static inline bool resctrl_arch_is_mbm_total_enabled(void)
-{
-	return (rdt_mon_features & (1 << QOS_L3_MBM_TOTAL_EVENT_ID));
-}
-
-static inline bool resctrl_arch_is_mbm_local_enabled(void)
-{
-	return (rdt_mon_features & (1 << QOS_L3_MBM_LOCAL_EVENT_ID));
-}
-
-static inline bool resctrl_arch_is_mbm_bps_enabled(void)
-{
-	return false;
-}
-
 /*
  * __resctrl_sched_in() - Writes the task's CLOSid/RMID to IA32_PQR_MSR
  *
@@ -121,8 +96,8 @@ static inline bool resctrl_arch_is_mbm_bps_enabled(void)
 static inline void __resctrl_sched_in(struct task_struct *tsk)
 {
 	struct resctrl_pqr_state *state = this_cpu_ptr(&pqr_state);
-	u32 closid = READ_ONCE(state->default_closid);
-	u32 rmid = READ_ONCE(state->default_rmid);
+	u32 closid = state->default_closid;
+	u32 rmid = state->default_rmid;
 	u32 tmp;
 
 	/*
@@ -157,13 +132,6 @@ static inline unsigned int resctrl_arch_round_mon_val(unsigned int val)
 	return val * scale;
 }
 
-static inline void resctrl_arch_set_cpu_default_closid_rmid(int cpu, u32 closid,
-							    u32 rmid)
-{
-	WRITE_ONCE(per_cpu(pqr_state.default_closid, cpu), closid);
-	WRITE_ONCE(per_cpu(pqr_state.default_rmid, cpu), rmid);
-}
-
 static inline void resctrl_arch_set_closid_rmid(struct task_struct *tsk,
 						u32 closid, u32 rmid)
 {
@@ -182,7 +150,7 @@ static inline bool resctrl_arch_match_rmid(struct task_struct *tsk, u32 ignored,
 	return READ_ONCE(tsk->rmid) == rmid;
 }
 
-static inline void resctrl_arch_sched_in(struct task_struct *tsk)
+static inline void resctrl_sched_in(struct task_struct *tsk)
 {
 	if (static_branch_likely(&rdt_enable_key))
 		__resctrl_sched_in(tsk);
@@ -216,35 +184,11 @@ static inline void *resctrl_arch_mon_ctx_alloc(struct rdt_resource *r, int evtid
 static inline void resctrl_arch_mon_ctx_free(struct rdt_resource *r, int evtid,
 					     void *ctx) { };
 
-int resctrl_arch_mbm_cntr_assign_enable(void);
-void resctrl_arch_mbm_cntr_assign_configure(void);
-void resctrl_arch_mbm_cntr_assign_disable(void);
-bool resctrl_arch_get_mbm_cntr_assign_enable(void);
-
-void resctrl_arch_event_config_set(void *info);
-u32 resctrl_arch_event_config_get(void *d, enum resctrl_event_id eventid);
-
-int resctrl_arch_assign_cntr(void *dom, enum resctrl_event_id evtid,
-			     u32 rmid, u32 cntr_id, u32 closid, bool assign);
-
-u64 resctrl_arch_get_prefetch_disable_bits(void);
-int resctrl_arch_pseudo_lock_fn(void *_plr);
-int resctrl_arch_measure_cycles_lat_fn(void *_plr);
-int resctrl_arch_measure_l2_residency(void *_plr);
-int resctrl_arch_measure_l3_residency(void *_plr);
 void resctrl_cpu_detect(struct cpuinfo_x86 *c);
 
-bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level l);
-int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable);
-
-bool resctrl_arch_is_hwdrc_mb_capable(void);
-int resctrl_arch_set_hwdrc_enabled(enum resctrl_res_level l, bool hwdrc_mb);
-
-bool resctrl_arch_get_abmc_enabled(void);
-
 #else
 
-static inline void resctrl_arch_sched_in(struct task_struct *tsk) {}
+static inline void resctrl_sched_in(struct task_struct *tsk) {}
 static inline void resctrl_cpu_detect(struct cpuinfo_x86 *c) {}
 
 #endif /* CONFIG_X86_CPU_RESCTRL */
diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
index 8be2847473c4ce0e07bc17b7b61528ebcf4f4dbd..7d8733874218d0fd6e0853a216659446ad96803d 100644
--- a/arch/x86/kernel/cpu/cpuid-deps.c
+++ b/arch/x86/kernel/cpu/cpuid-deps.c
@@ -70,10 +70,6 @@ static const struct cpuid_dep cpuid_deps[] = {
 	{ X86_FEATURE_CQM_MBM_LOCAL,		X86_FEATURE_CQM_LLC   },
 	{ X86_FEATURE_BMEC,			X86_FEATURE_CQM_MBM_TOTAL   },
 	{ X86_FEATURE_BMEC,			X86_FEATURE_CQM_MBM_LOCAL   },
-	{ X86_FEATURE_ABMC,			X86_FEATURE_CQM_MBM_TOTAL   },
-	{ X86_FEATURE_ABMC,			X86_FEATURE_CQM_MBM_LOCAL   },
-	{ X86_FEATURE_ABMC,			X86_FEATURE_BMEC      },
-	{ X86_FEATURE_SDCIAE,			X86_FEATURE_CAT_L3    },
 	{ X86_FEATURE_AVX512_BF16,		X86_FEATURE_AVX512VL  },
 	{ X86_FEATURE_AVX512_FP16,		X86_FEATURE_AVX512BW  },
 	{ X86_FEATURE_ENQCMD,			X86_FEATURE_XSAVES    },
diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c
index 6c5a17d7a4a3554600fe6975d537750017d78ebd..1491a111e619610739a69671436bdcbeab48c424 100644
--- a/arch/x86/kernel/cpu/hygon.c
+++ b/arch/x86/kernel/cpu/hygon.c
@@ -18,7 +18,6 @@
 #include <asm/page.h>
 #include <linux/module.h>
 #include <linux/init.h>
-#include <asm/resctrl.h>
 
 #include "cpu.h"
 
@@ -121,7 +120,6 @@ static void bsp_init_hygon(struct cpuinfo_x86 *c)
 			x86_amd_ls_cfg_ssbd_mask = 1ULL << 10;
 		}
 	}
-	resctrl_cpu_detect(c);
 }
 
 static void init_hygon_cap(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/resctrl/Makefile b/arch/x86/kernel/cpu/resctrl/Makefile
index 0c13b0befd8a9b76fe4753b8ee230113e5f3bf54..4a06c37b9cf11bef1b00b546e3c2344691e6e18a 100644
--- a/arch/x86/kernel/cpu/resctrl/Makefile
+++ b/arch/x86/kernel/cpu/resctrl/Makefile
@@ -1,5 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0
-obj-$(CONFIG_X86_CPU_RESCTRL)		+= core.o rdtgroup.o monitor.o
-obj-$(CONFIG_X86_CPU_RESCTRL)		+= ctrlmondata.o
-obj-$(CONFIG_RESCTRL_FS_PSEUDO_LOCK)	+= pseudo_lock.o
+obj-$(CONFIG_X86_CPU_RESCTRL)	+= core.o rdtgroup.o monitor.o
+obj-$(CONFIG_X86_CPU_RESCTRL)	+= ctrlmondata.o pseudo_lock.o
 CFLAGS_pseudo_lock.o = -I$(src)
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index 9f1a439be5abe1a7e0a95dafc2f5ee37ee2e2602..4edc2b9d53c41c6659e69aa26b16eb6e863b0ec7 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -44,6 +44,12 @@ static DEFINE_MUTEX(domain_list_lock);
  */
 DEFINE_PER_CPU(struct resctrl_pqr_state, pqr_state);
 
+/*
+ * Used to store the max resource name width and max resource data width
+ * to display the schemata in a tabular format
+ */
+int max_name_width, max_data_width;
+
 /*
  * Global boolean for rdt_alloc which is true if any
  * resource allocation is enabled.
@@ -69,6 +75,7 @@ struct rdt_hw_resource rdt_resources_all[] = {
 			.name			= "L3",
 			.cache_level		= 3,
 			.domains		= domain_init(RDT_RESOURCE_L3),
+			.parse_ctrlval		= parse_cbm,
 			.format_str		= "%d=%0*x",
 			.fflags			= RFTYPE_RES_CACHE,
 		},
@@ -82,6 +89,7 @@ struct rdt_hw_resource rdt_resources_all[] = {
 			.name			= "L2",
 			.cache_level		= 2,
 			.domains		= domain_init(RDT_RESOURCE_L2),
+			.parse_ctrlval		= parse_cbm,
 			.format_str		= "%d=%0*x",
 			.fflags			= RFTYPE_RES_CACHE,
 		},
@@ -95,6 +103,7 @@ struct rdt_hw_resource rdt_resources_all[] = {
 			.name			= "MB",
 			.cache_level		= 3,
 			.domains		= domain_init(RDT_RESOURCE_MBA),
+			.parse_ctrlval		= parse_bw,
 			.format_str		= "%d=%*u",
 			.fflags			= RFTYPE_RES_MB,
 		},
@@ -106,20 +115,13 @@ struct rdt_hw_resource rdt_resources_all[] = {
 			.name			= "SMBA",
 			.cache_level		= 3,
 			.domains		= domain_init(RDT_RESOURCE_SMBA),
+			.parse_ctrlval		= parse_bw,
 			.format_str		= "%d=%*u",
 			.fflags			= RFTYPE_RES_MB,
 		},
 	},
 };
 
-struct rdt_resource *resctrl_arch_get_resource(enum resctrl_res_level l)
-{
-	if (l >= RDT_NUM_RESOURCES)
-		return NULL;
-
-	return &rdt_resources_all[l].r_resctrl;
-}
-
 /*
  * cache_alloc_hsw_probe() - Have to probe for Intel haswell server CPUs
  * as they do not have CPUID enumeration support for Cache allocation.
@@ -164,6 +166,21 @@ static inline void cache_alloc_hsw_probe(void)
 	rdt_alloc_capable = true;
 }
 
+bool is_mba_sc(struct rdt_resource *r)
+{
+	if (!r)
+		return rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl.membw.mba_sc;
+
+	/*
+	 * The software controller support is only applicable to MBA resource.
+	 * Make sure to check for resource type.
+	 */
+	if (r->rid != RDT_RESOURCE_MBA)
+		return false;
+
+	return r->membw.mba_sc;
+}
+
 /*
  * rdt_get_mb_table() - get a mapping of bandwidth(b/w) percentage values
  * exposed to user interface and the h/w understandable delay values.
@@ -212,6 +229,7 @@ static __init bool __get_mem_config_intel(struct rdt_resource *r)
 		r->membw.throttle_mode = THREAD_THROTTLE_PER_THREAD;
 	else
 		r->membw.throttle_mode = THREAD_THROTTLE_MAX;
+	thread_throttle_mode_init();
 
 	r->alloc_capable = true;
 
@@ -281,11 +299,6 @@ static void rdt_get_cdp_config(int level)
 	rdt_resources_all[level].r_resctrl.cdp_capable = true;
 }
 
-static void rdt_set_io_alloc_capable(struct rdt_resource *r)
-{
-	r->cache.io_alloc_capable = true;
-}
-
 static void rdt_get_cdp_l3_config(void)
 {
 	rdt_get_cdp_config(RDT_RESOURCE_L3);
@@ -296,11 +309,6 @@ static void rdt_get_cdp_l2_config(void)
 	rdt_get_cdp_config(RDT_RESOURCE_L2);
 }
 
-bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level l)
-{
-	return rdt_resources_all[l].cdp_enabled;
-}
-
 static void
 mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r)
 {
@@ -350,6 +358,19 @@ cat_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r)
 		wrmsrl(hw_res->msr_base + i, hw_dom->ctrl_val[i]);
 }
 
+struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r)
+{
+	struct rdt_domain *d;
+
+	list_for_each_entry(d, &r->domains, list) {
+		/* Find the domain that contains this CPU */
+		if (cpumask_test_cpu(cpu, &d->cpu_mask))
+			return d;
+	}
+
+	return NULL;
+}
+
 u32 resctrl_arch_get_num_closid(struct rdt_resource *r)
 {
 	return resctrl_to_arch_res(r)->num_closid;
@@ -363,7 +384,7 @@ void rdt_ctrl_update(void *arg)
 	int cpu = smp_processor_id();
 	struct rdt_domain *d;
 
-	d = resctrl_get_domain_from_cpu(cpu, r);
+	d = get_domain_from_cpu(cpu, r);
 	if (d) {
 		hw_res->msr_update(d, m, r);
 		return;
@@ -380,8 +401,8 @@ void rdt_ctrl_update(void *arg)
  * caller, return the first domain whose id is bigger than the input id.
  * The domain list is sorted by id in ascending order.
  */
-static struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
-					  struct list_head **pos)
+struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
+				   struct list_head **pos)
 {
 	struct rdt_domain *d;
 	struct list_head *l;
@@ -405,11 +426,6 @@ static struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
 	return NULL;
 }
 
-struct rdt_domain *resctrl_arch_find_domain(struct rdt_resource *r, int id)
-{
-	return rdt_find_domain(r, id, NULL);
-}
-
 static void setup_default_ctrlval(struct rdt_resource *r, u32 *dc)
 {
 	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
@@ -462,13 +478,13 @@ static int arch_domain_mbm_alloc(u32 num_rmid, struct rdt_hw_domain *hw_dom)
 {
 	size_t tsize;
 
-	if (resctrl_arch_is_mbm_total_enabled()) {
+	if (is_mbm_total_enabled()) {
 		tsize = sizeof(*hw_dom->arch_mbm_total);
 		hw_dom->arch_mbm_total = kcalloc(num_rmid, tsize, GFP_KERNEL);
 		if (!hw_dom->arch_mbm_total)
 			return -ENOMEM;
 	}
-	if (resctrl_arch_is_mbm_local_enabled()) {
+	if (is_mbm_local_enabled()) {
 		tsize = sizeof(*hw_dom->arch_mbm_local);
 		hw_dom->arch_mbm_local = kcalloc(num_rmid, tsize, GFP_KERNEL);
 		if (!hw_dom->arch_mbm_local) {
@@ -502,7 +518,6 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)
 	struct rdt_domain *d;
 	int err;
 
-	BUG_ON(id > NR_CPUS);
 	lockdep_assert_held(&domain_list_lock);
 
 	d = rdt_find_domain(r, id, &add_pos);
@@ -515,7 +530,6 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)
 		cpumask_set_cpu(cpu, &d->cpu_mask);
 		if (r->cache.arch_has_per_cpu_cfg)
 			rdt_domain_reconfigure_cdp(r);
-		resctrl_arch_mbm_cntr_assign_configure();
 		return;
 	}
 
@@ -525,8 +539,6 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)
 
 	d = &hw_dom->d_resctrl;
 	d->id = id;
-	r->rdt_domain_list[id] = d;
-
 	cpumask_set_cpu(cpu, &d->cpu_mask);
 
 	rdt_domain_reconfigure_cdp(r);
@@ -536,10 +548,7 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)
 		return;
 	}
 
-	resctrl_mbm_evt_config_init(hw_dom);
-	resctrl_arch_mbm_cntr_assign_configure();
-
-	if (r->mon_capable && arch_domain_mbm_alloc(r->mon.num_rmid, hw_dom)) {
+	if (r->mon_capable && arch_domain_mbm_alloc(r->num_rmid, hw_dom)) {
 		domain_free(hw_dom);
 		return;
 	}
@@ -560,7 +569,6 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
 	struct rdt_hw_domain *hw_dom;
 	struct rdt_domain *d;
 
-	BUG_ON(id > NR_CPUS);
 	lockdep_assert_held(&domain_list_lock);
 
 	d = rdt_find_domain(r, id, NULL);
@@ -582,7 +590,6 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
 		 */
 		if (d->plr)
 			d->plr->d = NULL;
-		r->rdt_domain_list[id] = NULL;
 		domain_free(hw_dom);
 
 		return;
@@ -632,6 +639,20 @@ static int resctrl_arch_offline_cpu(unsigned int cpu)
 	return 0;
 }
 
+/*
+ * Choose a width for the resource name and resource data based on the
+ * resource that has widest name and cbm.
+ */
+static __init void rdt_init_padding(void)
+{
+	struct rdt_resource *r;
+
+	for_each_alloc_capable_rdt_resource(r) {
+		if (r->data_width > max_data_width)
+			max_data_width = r->data_width;
+	}
+}
+
 enum {
 	RDT_FLAG_CMT,
 	RDT_FLAG_MBM_TOTAL,
@@ -643,8 +664,6 @@ enum {
 	RDT_FLAG_MBA,
 	RDT_FLAG_SMBA,
 	RDT_FLAG_BMEC,
-	RDT_FLAG_ABMC,
-	RDT_FLAG_SDCIAE,
 };
 
 #define RDT_OPT(idx, n, f)	\
@@ -659,7 +678,7 @@ struct rdt_options {
 	bool	force_off, force_on;
 };
 
-static struct rdt_options rdt_options[]  __ro_after_init = {
+static struct rdt_options rdt_options[]  __initdata = {
 	RDT_OPT(RDT_FLAG_CMT,	    "cmt",	X86_FEATURE_CQM_OCCUP_LLC),
 	RDT_OPT(RDT_FLAG_MBM_TOTAL, "mbmtotal", X86_FEATURE_CQM_MBM_TOTAL),
 	RDT_OPT(RDT_FLAG_MBM_LOCAL, "mbmlocal", X86_FEATURE_CQM_MBM_LOCAL),
@@ -670,8 +689,6 @@ static struct rdt_options rdt_options[]  __ro_after_init = {
 	RDT_OPT(RDT_FLAG_MBA,	    "mba",	X86_FEATURE_MBA),
 	RDT_OPT(RDT_FLAG_SMBA,	    "smba",	X86_FEATURE_SMBA),
 	RDT_OPT(RDT_FLAG_BMEC,	    "bmec",	X86_FEATURE_BMEC),
-	RDT_OPT(RDT_FLAG_ABMC,	    "abmc",	X86_FEATURE_ABMC),
-	RDT_OPT(RDT_FLAG_SDCIAE,    "sdciae",	X86_FEATURE_SDCIAE),
 };
 #define NUM_RDT_OPTIONS ARRAY_SIZE(rdt_options)
 
@@ -701,7 +718,7 @@ static int __init set_rdt_options(char *str)
 }
 __setup("rdt", set_rdt_options);
 
-bool rdt_cpu_has(int flag)
+bool __init rdt_cpu_has(int flag)
 {
 	bool ret = boot_cpu_has(flag);
 	struct rdt_options *o;
@@ -721,21 +738,6 @@ bool rdt_cpu_has(int flag)
 	return ret;
 }
 
-bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt)
-{
-	if (!rdt_cpu_has(X86_FEATURE_BMEC))
-		return false;
-
-	switch (evt) {
-	case QOS_L3_MBM_TOTAL_EVENT_ID:
-		return rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL);
-	case QOS_L3_MBM_LOCAL_EVENT_ID:
-		return rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL);
-	default:
-		return false;
-	}
-}
-
 static __init bool get_mem_config(void)
 {
 	struct rdt_hw_resource *hw_res = &rdt_resources_all[RDT_RESOURCE_MBA];
@@ -745,8 +747,7 @@ static __init bool get_mem_config(void)
 
 	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
 		return __get_mem_config_intel(&hw_res->r_resctrl);
-	else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
-		 boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
+	else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
 		return __rdt_get_mem_config_amd(&hw_res->r_resctrl);
 
 	return false;
@@ -781,8 +782,6 @@ static __init bool get_rdt_alloc_resources(void)
 		rdt_get_cache_alloc_cfg(1, r);
 		if (rdt_cpu_has(X86_FEATURE_CDP_L3))
 			rdt_get_cdp_l3_config();
-		if (rdt_cpu_has(X86_FEATURE_SDCIAE))
-			rdt_set_io_alloc_capable(r);
 		ret = true;
 	}
 	if (rdt_cpu_has(X86_FEATURE_CAT_L2)) {
@@ -899,8 +898,7 @@ static __init void rdt_init_res_defs(void)
 {
 	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
 		rdt_init_res_defs_intel();
-	else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
-		 boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
+	else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
 		rdt_init_res_defs_amd();
 }
 
@@ -931,23 +929,12 @@ void resctrl_cpu_detect(struct cpuinfo_x86 *c)
 		c->x86_cache_occ_scale = ebx;
 		c->x86_cache_mbm_width_offset = eax & 0xff;
 
-		if (!c->x86_cache_mbm_width_offset) {
-			switch (c->x86_vendor) {
-			case X86_VENDOR_AMD:
-				c->x86_cache_mbm_width_offset = MBM_CNTR_WIDTH_OFFSET_AMD;
-				break;
-			case X86_VENDOR_HYGON:
-				c->x86_cache_mbm_width_offset = MBM_CNTR_WIDTH_OFFSET_HYGON;
-				break;
-			default:
-				/* Leave c->x86_cache_mbm_width_offset as 0 */
-				break;
-			}
-		}
+		if (c->x86_vendor == X86_VENDOR_AMD && !c->x86_cache_mbm_width_offset)
+			c->x86_cache_mbm_width_offset = MBM_CNTR_WIDTH_OFFSET_AMD;
 	}
 }
 
-static int __init resctrl_arch_late_init(void)
+static int __init resctrl_late_init(void)
 {
 	struct rdt_resource *r;
 	int state, ret;
@@ -963,6 +950,8 @@ static int __init resctrl_arch_late_init(void)
 	if (!get_rdt_resources())
 		return -ENODEV;
 
+	rdt_init_padding();
+
 	state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
 				  "x86/resctrl/cat:online:",
 				  resctrl_arch_online_cpu,
@@ -970,7 +959,7 @@ static int __init resctrl_arch_late_init(void)
 	if (state < 0)
 		return state;
 
-	ret = resctrl_init();
+	ret = rdtgroup_init();
 	if (ret) {
 		cpuhp_remove_state(state);
 		return ret;
@@ -986,13 +975,18 @@ static int __init resctrl_arch_late_init(void)
 	return 0;
 }
 
-late_initcall(resctrl_arch_late_init);
+late_initcall(resctrl_late_init);
 
-static void __exit resctrl_arch_exit(void)
+static void __exit resctrl_exit(void)
 {
+	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
+
 	cpuhp_remove_state(rdt_online);
 
-	resctrl_exit();
+	rdtgroup_exit();
+
+	if (r->mon_capable)
+		rdt_put_mon_l3_config();
 }
 
-__exitcall(resctrl_arch_exit);
+__exitcall(resctrl_exit);
diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
index b62792ad80acbf081e85da3a07610419796a2563..8bd717222f0ce3429ed207000d43bf46dbf6f3f6 100644
--- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
+++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
@@ -23,6 +23,260 @@
 
 #include "internal.h"
 
+/*
+ * Check whether MBA bandwidth percentage value is correct. The value is
+ * checked against the minimum and max bandwidth values specified by the
+ * hardware. The allocated bandwidth percentage is rounded to the next
+ * control step available on the hardware.
+ */
+static bool bw_validate(char *buf, u32 *data, struct rdt_resource *r)
+{
+	int ret;
+	u32 bw;
+
+	/*
+	 * Only linear delay values is supported for current Intel SKUs.
+	 */
+	if (!r->membw.delay_linear && r->membw.arch_needs_linear) {
+		rdt_last_cmd_puts("No support for non-linear MB domains\n");
+		return false;
+	}
+
+	ret = kstrtou32(buf, 10, &bw);
+	if (ret) {
+		rdt_last_cmd_printf("Invalid MB value %s\n", buf);
+		return false;
+	}
+
+	/* Nothing else to do if software controller is enabled. */
+	if (is_mba_sc(r)) {
+		*data = bw;
+		return true;
+	}
+
+	if (bw < r->membw.min_bw || bw > r->default_ctrl) {
+		rdt_last_cmd_printf("MB value %u out of range [%d,%d]\n",
+				    bw, r->membw.min_bw, r->default_ctrl);
+		return false;
+	}
+
+	*data = roundup(bw, (unsigned long)r->membw.bw_gran);
+	return true;
+}
+
+int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s,
+	     struct rdt_domain *d)
+{
+	struct resctrl_staged_config *cfg;
+	u32 closid = data->rdtgrp->closid;
+	struct rdt_resource *r = s->res;
+	u32 bw_val;
+
+	cfg = &d->staged_config[s->conf_type];
+	if (cfg->have_new_ctrl) {
+		rdt_last_cmd_printf("Duplicate domain %d\n", d->id);
+		return -EINVAL;
+	}
+
+	if (!bw_validate(data->buf, &bw_val, r))
+		return -EINVAL;
+
+	if (is_mba_sc(r)) {
+		d->mbps_val[closid] = bw_val;
+		return 0;
+	}
+
+	cfg->new_ctrl = bw_val;
+	cfg->have_new_ctrl = true;
+
+	return 0;
+}
+
+/*
+ * Check whether a cache bit mask is valid.
+ * On Intel CPUs, non-contiguous 1s value support is indicated by CPUID:
+ *   - CPUID.0x10.1:ECX[3]: L3 non-contiguous 1s value supported if 1
+ *   - CPUID.0x10.2:ECX[3]: L2 non-contiguous 1s value supported if 1
+ *
+ * Haswell does not support a non-contiguous 1s value and additionally
+ * requires at least two bits set.
+ * AMD allows non-contiguous bitmasks.
+ */
+static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r)
+{
+	unsigned long first_bit, zero_bit, val;
+	unsigned int cbm_len = r->cache.cbm_len;
+	int ret;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret) {
+		rdt_last_cmd_printf("Non-hex character in the mask %s\n", buf);
+		return false;
+	}
+
+	if ((r->cache.min_cbm_bits > 0 && val == 0) || val > r->default_ctrl) {
+		rdt_last_cmd_puts("Mask out of range\n");
+		return false;
+	}
+
+	first_bit = find_first_bit(&val, cbm_len);
+	zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);
+
+	/* Are non-contiguous bitmasks allowed? */
+	if (!r->cache.arch_has_sparse_bitmasks &&
+	    (find_next_bit(&val, cbm_len, zero_bit) < cbm_len)) {
+		rdt_last_cmd_printf("The mask %lx has non-consecutive 1-bits\n", val);
+		return false;
+	}
+
+	if ((zero_bit - first_bit) < r->cache.min_cbm_bits) {
+		rdt_last_cmd_printf("Need at least %d bits in the mask\n",
+				    r->cache.min_cbm_bits);
+		return false;
+	}
+
+	*data = val;
+	return true;
+}
+
+/*
+ * Read one cache bit mask (hex). Check that it is valid for the current
+ * resource type.
+ */
+int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s,
+	      struct rdt_domain *d)
+{
+	struct rdtgroup *rdtgrp = data->rdtgrp;
+	struct resctrl_staged_config *cfg;
+	struct rdt_resource *r = s->res;
+	u32 cbm_val;
+
+	cfg = &d->staged_config[s->conf_type];
+	if (cfg->have_new_ctrl) {
+		rdt_last_cmd_printf("Duplicate domain %d\n", d->id);
+		return -EINVAL;
+	}
+
+	/*
+	 * Cannot set up more than one pseudo-locked region in a cache
+	 * hierarchy.
+	 */
+	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP &&
+	    rdtgroup_pseudo_locked_in_hierarchy(d)) {
+		rdt_last_cmd_puts("Pseudo-locked region in hierarchy\n");
+		return -EINVAL;
+	}
+
+	if (!cbm_validate(data->buf, &cbm_val, r))
+		return -EINVAL;
+
+	if ((rdtgrp->mode == RDT_MODE_EXCLUSIVE ||
+	     rdtgrp->mode == RDT_MODE_SHAREABLE) &&
+	    rdtgroup_cbm_overlaps_pseudo_locked(d, cbm_val)) {
+		rdt_last_cmd_puts("CBM overlaps with pseudo-locked region\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * The CBM may not overlap with the CBM of another closid if
+	 * either is exclusive.
+	 */
+	if (rdtgroup_cbm_overlaps(s, d, cbm_val, rdtgrp->closid, true)) {
+		rdt_last_cmd_puts("Overlaps with exclusive group\n");
+		return -EINVAL;
+	}
+
+	if (rdtgroup_cbm_overlaps(s, d, cbm_val, rdtgrp->closid, false)) {
+		if (rdtgrp->mode == RDT_MODE_EXCLUSIVE ||
+		    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
+			rdt_last_cmd_puts("Overlaps with other group\n");
+			return -EINVAL;
+		}
+	}
+
+	cfg->new_ctrl = cbm_val;
+	cfg->have_new_ctrl = true;
+
+	return 0;
+}
+
+/*
+ * For each domain in this resource we expect to find a series of:
+ *	id=mask
+ * separated by ";". The "id" is in decimal, and must match one of
+ * the "id"s for this resource.
+ */
+static int parse_line(char *line, struct resctrl_schema *s,
+		      struct rdtgroup *rdtgrp)
+{
+	enum resctrl_conf_type t = s->conf_type;
+	struct resctrl_staged_config *cfg;
+	struct rdt_resource *r = s->res;
+	struct rdt_parse_data data;
+	char *dom = NULL, *id;
+	struct rdt_domain *d;
+	unsigned long dom_id;
+
+	/* Walking r->domains, ensure it can't race with cpuhp */
+	lockdep_assert_cpus_held();
+
+	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP &&
+	    (r->rid == RDT_RESOURCE_MBA || r->rid == RDT_RESOURCE_SMBA)) {
+		rdt_last_cmd_puts("Cannot pseudo-lock MBA resource\n");
+		return -EINVAL;
+	}
+
+next:
+	if (!line || line[0] == '\0')
+		return 0;
+	dom = strsep(&line, ";");
+	id = strsep(&dom, "=");
+	if (!dom || kstrtoul(id, 10, &dom_id)) {
+		rdt_last_cmd_puts("Missing '=' or non-numeric domain\n");
+		return -EINVAL;
+	}
+	dom = strim(dom);
+	list_for_each_entry(d, &r->domains, list) {
+		if (d->id == dom_id) {
+			data.buf = dom;
+			data.rdtgrp = rdtgrp;
+			if (r->parse_ctrlval(&data, s, d))
+				return -EINVAL;
+			if (rdtgrp->mode ==  RDT_MODE_PSEUDO_LOCKSETUP) {
+				cfg = &d->staged_config[t];
+				/*
+				 * In pseudo-locking setup mode and just
+				 * parsed a valid CBM that should be
+				 * pseudo-locked. Only one locked region per
+				 * resource group and domain so just do
+				 * the required initialization for single
+				 * region and return.
+				 */
+				rdtgrp->plr->s = s;
+				rdtgrp->plr->d = d;
+				rdtgrp->plr->cbm = cfg->new_ctrl;
+				d->plr = rdtgrp->plr;
+				return 0;
+			}
+			goto next;
+		}
+	}
+	return -EINVAL;
+}
+
+static u32 get_config_index(u32 closid, enum resctrl_conf_type type)
+{
+	switch (type) {
+	default:
+	case CDP_NONE:
+		return closid;
+	case CDP_CODE:
+		return closid * 2 + 1;
+	case CDP_DATA:
+		return closid * 2;
+	}
+}
+
 static bool apply_config(struct rdt_hw_domain *hw_dom,
 			 struct resctrl_staged_config *cfg, u32 idx,
 			 cpumask_var_t cpu_mask)
@@ -44,7 +298,7 @@ int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_domain *d,
 {
 	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
 	struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
-	u32 idx = resctrl_get_config_index(closid, t);
+	u32 idx = get_config_index(closid, t);
 	struct msr_param msr_param;
 
 	if (!cpumask_test_cpu(smp_processor_id(), &d->cpu_mask))
@@ -84,7 +338,7 @@ int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid)
 			if (!cfg->have_new_ctrl)
 				continue;
 
-			idx = resctrl_get_config_index(closid, t);
+			idx = get_config_index(closid, t);
 			if (!apply_config(hw_dom, cfg, idx, cpu_mask))
 				continue;
 
@@ -111,51 +365,262 @@ int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid)
 	return 0;
 }
 
+static int rdtgroup_parse_resource(char *resname, char *tok,
+				   struct rdtgroup *rdtgrp)
+{
+	struct resctrl_schema *s;
+
+	list_for_each_entry(s, &resctrl_schema_all, list) {
+		if (!strcmp(resname, s->name) && rdtgrp->closid < s->num_closid)
+			return parse_line(tok, s, rdtgrp);
+	}
+	rdt_last_cmd_printf("Unknown or unsupported resource name '%s'\n", resname);
+	return -EINVAL;
+}
+
+ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
+				char *buf, size_t nbytes, loff_t off)
+{
+	struct resctrl_schema *s;
+	struct rdtgroup *rdtgrp;
+	struct rdt_resource *r;
+	char *tok, *resname;
+	int ret = 0;
+
+	/* Valid input requires a trailing newline */
+	if (nbytes == 0 || buf[nbytes - 1] != '\n')
+		return -EINVAL;
+	buf[nbytes - 1] = '\0';
+
+	rdtgrp = rdtgroup_kn_lock_live(of->kn);
+	if (!rdtgrp) {
+		rdtgroup_kn_unlock(of->kn);
+		return -ENOENT;
+	}
+	rdt_last_cmd_clear();
+
+	/*
+	 * No changes to pseudo-locked region allowed. It has to be removed
+	 * and re-created instead.
+	 */
+	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
+		ret = -EINVAL;
+		rdt_last_cmd_puts("Resource group is pseudo-locked\n");
+		goto out;
+	}
+
+	rdt_staged_configs_clear();
+
+	while ((tok = strsep(&buf, "\n")) != NULL) {
+		resname = strim(strsep(&tok, ":"));
+		if (!tok) {
+			rdt_last_cmd_puts("Missing ':'\n");
+			ret = -EINVAL;
+			goto out;
+		}
+		if (tok[0] == '\0') {
+			rdt_last_cmd_printf("Missing '%s' value\n", resname);
+			ret = -EINVAL;
+			goto out;
+		}
+		ret = rdtgroup_parse_resource(resname, tok, rdtgrp);
+		if (ret)
+			goto out;
+	}
+
+	list_for_each_entry(s, &resctrl_schema_all, list) {
+		r = s->res;
+
+		/*
+		 * Writes to mba_sc resources update the software controller,
+		 * not the control MSR.
+		 */
+		if (is_mba_sc(r))
+			continue;
+
+		ret = resctrl_arch_update_domains(r, rdtgrp->closid);
+		if (ret)
+			goto out;
+	}
+
+	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
+		/*
+		 * If pseudo-locking fails we keep the resource group in
+		 * mode RDT_MODE_PSEUDO_LOCKSETUP with its class of service
+		 * active and updated for just the domain the pseudo-locked
+		 * region was requested for.
+		 */
+		ret = rdtgroup_pseudo_lock_create(rdtgrp);
+	}
+
+out:
+	rdt_staged_configs_clear();
+	rdtgroup_kn_unlock(of->kn);
+	return ret ?: nbytes;
+}
+
 u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d,
 			    u32 closid, enum resctrl_conf_type type)
 {
 	struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
-	u32 idx = resctrl_get_config_index(closid, type);
+	u32 idx = get_config_index(closid, type);
 
 	return hw_dom->ctrl_val[idx];
 }
 
-bool resctrl_arch_get_io_alloc_enabled(struct rdt_resource *r)
+static void show_doms(struct seq_file *s, struct resctrl_schema *schema, int closid)
 {
-	return resctrl_to_arch_res(r)->sdciae_enabled;
+	struct rdt_resource *r = schema->res;
+	struct rdt_domain *dom;
+	bool sep = false;
+	u32 ctrl_val;
+
+	/* Walking r->domains, ensure it can't race with cpuhp */
+	lockdep_assert_cpus_held();
+
+	seq_printf(s, "%*s:", max_name_width, schema->name);
+	list_for_each_entry(dom, &r->domains, list) {
+		if (sep)
+			seq_puts(s, ";");
+
+		if (is_mba_sc(r))
+			ctrl_val = dom->mbps_val[closid];
+		else
+			ctrl_val = resctrl_arch_get_config(r, dom, closid,
+							   schema->conf_type);
+
+		seq_printf(s, r->format_str, dom->id, max_data_width,
+			   ctrl_val);
+		sep = true;
+	}
+	seq_puts(s, "\n");
 }
 
-static void resctrl_sdciae_set_one_amd(void *arg)
+int rdtgroup_schemata_show(struct kernfs_open_file *of,
+			   struct seq_file *s, void *v)
 {
-	bool *enable = arg;
+	struct resctrl_schema *schema;
+	struct rdtgroup *rdtgrp;
+	int ret = 0;
+	u32 closid;
+
+	rdtgrp = rdtgroup_kn_lock_live(of->kn);
+	if (rdtgrp) {
+		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
+			list_for_each_entry(schema, &resctrl_schema_all, list) {
+				seq_printf(s, "%s:uninitialized\n", schema->name);
+			}
+		} else if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
+			if (!rdtgrp->plr->d) {
+				rdt_last_cmd_clear();
+				rdt_last_cmd_puts("Cache domain offline\n");
+				ret = -ENODEV;
+			} else {
+				seq_printf(s, "%s:%d=%x\n",
+					   rdtgrp->plr->s->res->name,
+					   rdtgrp->plr->d->id,
+					   rdtgrp->plr->cbm);
+			}
+		} else {
+			closid = rdtgrp->closid;
+			list_for_each_entry(schema, &resctrl_schema_all, list) {
+				if (closid < schema->num_closid)
+					show_doms(s, schema, closid);
+			}
+		}
+	} else {
+		ret = -ENOENT;
+	}
+	rdtgroup_kn_unlock(of->kn);
+	return ret;
+}
 
-	if (*enable)
-		msr_set_bit(MSR_IA32_L3_QOS_EXT_CFG, SDCIAE_ENABLE_BIT);
-	else
-		msr_clear_bit(MSR_IA32_L3_QOS_EXT_CFG, SDCIAE_ENABLE_BIT);
+static int smp_mon_event_count(void *arg)
+{
+	mon_event_count(arg);
+
+	return 0;
 }
 
-static void _resctrl_sdciae_enable(struct rdt_resource *r, bool enable)
+void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
+		    struct rdt_domain *d, struct rdtgroup *rdtgrp,
+		    int evtid, int first)
 {
-	struct rdt_domain *d;
+	int cpu;
 
-	/* Walking r->ctrl_domains, ensure it can't race with cpuhp */
+	/* When picking a CPU from cpu_mask, ensure it can't race with cpuhp */
 	lockdep_assert_cpus_held();
 
-	/* Update MSR_IA32_L3_QOS_EXT_CFG MSR on all the CPUs in all domains */
-	list_for_each_entry(d, &r->domains, list)
-		on_each_cpu_mask(&d->cpu_mask, resctrl_sdciae_set_one_amd, &enable, 1);
+	/*
+	 * Setup the parameters to pass to mon_event_count() to read the data.
+	 */
+	rr->rgrp = rdtgrp;
+	rr->evtid = evtid;
+	rr->r = r;
+	rr->d = d;
+	rr->val = 0;
+	rr->first = first;
+	rr->arch_mon_ctx = resctrl_arch_mon_ctx_alloc(r, evtid);
+	if (IS_ERR(rr->arch_mon_ctx)) {
+		rr->err = -EINVAL;
+		return;
+	}
+
+	cpu = cpumask_any_housekeeping(&d->cpu_mask, RESCTRL_PICK_ANY_CPU);
+
+	/*
+	 * cpumask_any_housekeeping() prefers housekeeping CPUs, but
+	 * are all the CPUs nohz_full? If yes, pick a CPU to IPI.
+	 * MPAM's resctrl_arch_rmid_read() is unable to read the
+	 * counters on some platforms if its called in IRQ context.
+	 */
+	if (tick_nohz_full_cpu(cpu))
+		smp_call_function_any(&d->cpu_mask, mon_event_count, rr, 1);
+	else
+		smp_call_on_cpu(cpu, smp_mon_event_count, rr, false);
+
+	resctrl_arch_mon_ctx_free(r, evtid, rr->arch_mon_ctx);
 }
 
-int resctrl_arch_io_alloc_enable(struct rdt_resource *r, bool enable)
+int rdtgroup_mondata_show(struct seq_file *m, void *arg)
 {
-	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
+	struct kernfs_open_file *of = m->private;
+	u32 resid, evtid, domid;
+	struct rdtgroup *rdtgrp;
+	struct rdt_resource *r;
+	union mon_data_bits md;
+	struct rdt_domain *d;
+	struct rmid_read rr;
+	int ret = 0;
 
-	if (hw_res->r_resctrl.cache.io_alloc_capable &&
-	    hw_res->sdciae_enabled != enable) {
-		_resctrl_sdciae_enable(r, enable);
-		hw_res->sdciae_enabled = enable;
+	rdtgrp = rdtgroup_kn_lock_live(of->kn);
+	if (!rdtgrp) {
+		ret = -ENOENT;
+		goto out;
 	}
 
-	return 0;
+	md.priv = of->kn->priv;
+	resid = md.u.rid;
+	domid = md.u.domid;
+	evtid = md.u.evtid;
+
+	r = &rdt_resources_all[resid].r_resctrl;
+	d = rdt_find_domain(r, domid, NULL);
+	if (IS_ERR_OR_NULL(d)) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	mon_event_read(&rr, r, d, rdtgrp, evtid, false);
+
+	if (rr.err == -EIO)
+		seq_puts(m, "Error\n");
+	else if (rr.err == -EINVAL)
+		seq_puts(m, "Unavailable\n");
+	else
+		seq_printf(m, "%llu\n", rr.val);
+
+out:
+	rdtgroup_kn_unlock(of->kn);
+	return ret;
 }
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 41c912aa859cb8d80bc932a937c98e8d624cab1b..c99f26ebe7a6537a7cd43274701ac4f489648081 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -10,29 +10,21 @@
 #include <linux/tick.h>
 
 #include <asm/resctrl.h>
-#include <asm/intel-family.h>
-
-/* Memory bandwidth HWDRC */
-#define HWDRC_MSR_OS_MAILBOX_INTERFACE	0xb0
-#define HWDRC_MSR_OS_MAILBOX_DATA	0xb1
-#define HWDRC_MSR_OS_MAILBOX_BUSY_BIT	BIT_ULL(31)
-#define HWDRC_COMMAND_MEM_CLOS_EN	0xd0
-#define HWDRC_SUB_COMMAND_MEM_CLOS_EN	0x54
-#define HWDRC_MEMCLOS_AVAILABLE		BIT_ULL(0)
-#define HWDRC_OS_MAILBOX_RETRY_COUNT	30
 
 #define L3_QOS_CDP_ENABLE		0x01ULL
 
 #define L2_QOS_CDP_ENABLE		0x01ULL
 
-#define MBM_CNTR_WIDTH_OFFSET_AMD	20
+#define CQM_LIMBOCHECK_INTERVAL	1000
 
-/* Hygon MBM counter width as an offset from MBM_CNTR_WIDTH_BASE */
-#define MBM_CNTR_WIDTH_OFFSET_HYGON	8
+#define MBM_CNTR_WIDTH_BASE		24
+#define MBM_OVERFLOW_INTERVAL		1000
+#define MAX_MBA_BW			100u
+#define MBA_IS_LINEAR			0x4
+#define MBM_CNTR_WIDTH_OFFSET_AMD	20
 
 #define RMID_VAL_ERROR			BIT_ULL(63)
 #define RMID_VAL_UNAVAIL		BIT_ULL(62)
-
 /*
  * With the above fields in use 62 bits remain in MSR_IA32_QM_CTR for
  * data to be returned. The counter width is discovered from the hardware
@@ -40,8 +32,314 @@
  */
 #define MBM_CNTR_WIDTH_OFFSET_MAX (62 - MBM_CNTR_WIDTH_BASE)
 
-/* Setting bit 0 in L3_QOS_EXT_CFG enables the ABMC feature. */
-#define ABMC_ENABLE_BIT			0
+/* Reads to Local DRAM Memory */
+#define READS_TO_LOCAL_MEM		BIT(0)
+
+/* Reads to Remote DRAM Memory */
+#define READS_TO_REMOTE_MEM		BIT(1)
+
+/* Non-Temporal Writes to Local Memory */
+#define NON_TEMP_WRITE_TO_LOCAL_MEM	BIT(2)
+
+/* Non-Temporal Writes to Remote Memory */
+#define NON_TEMP_WRITE_TO_REMOTE_MEM	BIT(3)
+
+/* Reads to Local Memory the system identifies as "Slow Memory" */
+#define READS_TO_LOCAL_S_MEM		BIT(4)
+
+/* Reads to Remote Memory the system identifies as "Slow Memory" */
+#define READS_TO_REMOTE_S_MEM		BIT(5)
+
+/* Dirty Victims to All Types of Memory */
+#define DIRTY_VICTIMS_TO_ALL_MEM	BIT(6)
+
+/* Max event bits supported */
+#define MAX_EVT_CONFIG_BITS		GENMASK(6, 0)
+
+/**
+ * cpumask_any_housekeeping() - Choose any CPU in @mask, preferring those that
+ *			        aren't marked nohz_full
+ * @mask:	The mask to pick a CPU from.
+ * @exclude_cpu:The CPU to avoid picking.
+ *
+ * Returns a CPU from @mask, but not @exclude_cpu. If there are housekeeping
+ * CPUs that don't use nohz_full, these are preferred. Pass
+ * RESCTRL_PICK_ANY_CPU to avoid excluding any CPUs.
+ *
+ * When a CPU is excluded, returns >= nr_cpu_ids if no CPUs are available.
+ */
+static inline unsigned int
+cpumask_any_housekeeping(const struct cpumask *mask, int exclude_cpu)
+{
+	unsigned int cpu, hk_cpu;
+
+	if (exclude_cpu == RESCTRL_PICK_ANY_CPU)
+		cpu = cpumask_any(mask);
+	else
+		cpu = cpumask_any_but(mask, exclude_cpu);
+
+	if (!IS_ENABLED(CONFIG_NO_HZ_FULL))
+		return cpu;
+
+	/* If the CPU picked isn't marked nohz_full nothing more needs doing. */
+	if (cpu < nr_cpu_ids && !tick_nohz_full_cpu(cpu))
+		return cpu;
+
+	/* Try to find a CPU that isn't nohz_full to use in preference */
+	hk_cpu = cpumask_nth_andnot(0, mask, tick_nohz_full_mask);
+	if (hk_cpu == exclude_cpu)
+		hk_cpu = cpumask_nth_andnot(1, mask, tick_nohz_full_mask);
+
+	if (hk_cpu < nr_cpu_ids)
+		cpu = hk_cpu;
+
+	return cpu;
+}
+
+struct rdt_fs_context {
+	struct kernfs_fs_context	kfc;
+	bool				enable_cdpl2;
+	bool				enable_cdpl3;
+	bool				enable_mba_mbps;
+	bool				enable_debug;
+};
+
+static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc)
+{
+	struct kernfs_fs_context *kfc = fc->fs_private;
+
+	return container_of(kfc, struct rdt_fs_context, kfc);
+}
+
+/**
+ * struct mon_evt - Entry in the event list of a resource
+ * @evtid:		event id
+ * @name:		name of the event
+ * @configurable:	true if the event is configurable
+ * @list:		entry in &rdt_resource->evt_list
+ */
+struct mon_evt {
+	enum resctrl_event_id	evtid;
+	char			*name;
+	bool			configurable;
+	struct list_head	list;
+};
+
+/**
+ * union mon_data_bits - Monitoring details for each event file
+ * @priv:              Used to store monitoring event data in @u
+ *                     as kernfs private data
+ * @rid:               Resource id associated with the event file
+ * @evtid:             Event id associated with the event file
+ * @domid:             The domain to which the event file belongs
+ * @u:                 Name of the bit fields struct
+ */
+union mon_data_bits {
+	void *priv;
+	struct {
+		unsigned int rid		: 10;
+		enum resctrl_event_id evtid	: 8;
+		unsigned int domid		: 14;
+	} u;
+};
+
+struct rmid_read {
+	struct rdtgroup		*rgrp;
+	struct rdt_resource	*r;
+	struct rdt_domain	*d;
+	enum resctrl_event_id	evtid;
+	bool			first;
+	int			err;
+	u64			val;
+	void			*arch_mon_ctx;
+};
+
+extern unsigned int rdt_mon_features;
+extern struct list_head resctrl_schema_all;
+extern bool resctrl_mounted;
+
+enum rdt_group_type {
+	RDTCTRL_GROUP = 0,
+	RDTMON_GROUP,
+	RDT_NUM_GROUP,
+};
+
+/**
+ * enum rdtgrp_mode - Mode of a RDT resource group
+ * @RDT_MODE_SHAREABLE: This resource group allows sharing of its allocations
+ * @RDT_MODE_EXCLUSIVE: No sharing of this resource group's allocations allowed
+ * @RDT_MODE_PSEUDO_LOCKSETUP: Resource group will be used for Pseudo-Locking
+ * @RDT_MODE_PSEUDO_LOCKED: No sharing of this resource group's allocations
+ *                          allowed AND the allocations are Cache Pseudo-Locked
+ * @RDT_NUM_MODES: Total number of modes
+ *
+ * The mode of a resource group enables control over the allowed overlap
+ * between allocations associated with different resource groups (classes
+ * of service). User is able to modify the mode of a resource group by
+ * writing to the "mode" resctrl file associated with the resource group.
+ *
+ * The "shareable", "exclusive", and "pseudo-locksetup" modes are set by
+ * writing the appropriate text to the "mode" file. A resource group enters
+ * "pseudo-locked" mode after the schemata is written while the resource
+ * group is in "pseudo-locksetup" mode.
+ */
+enum rdtgrp_mode {
+	RDT_MODE_SHAREABLE = 0,
+	RDT_MODE_EXCLUSIVE,
+	RDT_MODE_PSEUDO_LOCKSETUP,
+	RDT_MODE_PSEUDO_LOCKED,
+
+	/* Must be last */
+	RDT_NUM_MODES,
+};
+
+/**
+ * struct mongroup - store mon group's data in resctrl fs.
+ * @mon_data_kn:		kernfs node for the mon_data directory
+ * @parent:			parent rdtgrp
+ * @crdtgrp_list:		child rdtgroup node list
+ * @rmid:			rmid for this rdtgroup
+ */
+struct mongroup {
+	struct kernfs_node	*mon_data_kn;
+	struct rdtgroup		*parent;
+	struct list_head	crdtgrp_list;
+	u32			rmid;
+};
+
+/**
+ * struct pseudo_lock_region - pseudo-lock region information
+ * @s:			Resctrl schema for the resource to which this
+ *			pseudo-locked region belongs
+ * @d:			RDT domain to which this pseudo-locked region
+ *			belongs
+ * @cbm:		bitmask of the pseudo-locked region
+ * @lock_thread_wq:	waitqueue used to wait on the pseudo-locking thread
+ *			completion
+ * @thread_done:	variable used by waitqueue to test if pseudo-locking
+ *			thread completed
+ * @cpu:		core associated with the cache on which the setup code
+ *			will be run
+ * @line_size:		size of the cache lines
+ * @size:		size of pseudo-locked region in bytes
+ * @kmem:		the kernel memory associated with pseudo-locked region
+ * @minor:		minor number of character device associated with this
+ *			region
+ * @debugfs_dir:	pointer to this region's directory in the debugfs
+ *			filesystem
+ * @pm_reqs:		Power management QoS requests related to this region
+ */
+struct pseudo_lock_region {
+	struct resctrl_schema	*s;
+	struct rdt_domain	*d;
+	u32			cbm;
+	wait_queue_head_t	lock_thread_wq;
+	int			thread_done;
+	int			cpu;
+	unsigned int		line_size;
+	unsigned int		size;
+	void			*kmem;
+	unsigned int		minor;
+	struct dentry		*debugfs_dir;
+	struct list_head	pm_reqs;
+};
+
+/**
+ * struct rdtgroup - store rdtgroup's data in resctrl file system.
+ * @kn:				kernfs node
+ * @rdtgroup_list:		linked list for all rdtgroups
+ * @closid:			closid for this rdtgroup
+ * @cpu_mask:			CPUs assigned to this rdtgroup
+ * @flags:			status bits
+ * @waitcount:			how many cpus expect to find this
+ *				group when they acquire rdtgroup_mutex
+ * @type:			indicates type of this rdtgroup - either
+ *				monitor only or ctrl_mon group
+ * @mon:			mongroup related data
+ * @mode:			mode of resource group
+ * @plr:			pseudo-locked region
+ */
+struct rdtgroup {
+	struct kernfs_node		*kn;
+	struct list_head		rdtgroup_list;
+	u32				closid;
+	struct cpumask			cpu_mask;
+	int				flags;
+	atomic_t			waitcount;
+	enum rdt_group_type		type;
+	struct mongroup			mon;
+	enum rdtgrp_mode		mode;
+	struct pseudo_lock_region	*plr;
+};
+
+/* rdtgroup.flags */
+#define	RDT_DELETED		1
+
+/* rftype.flags */
+#define RFTYPE_FLAGS_CPUS_LIST	1
+
+/*
+ * Define the file type flags for base and info directories.
+ */
+#define RFTYPE_INFO			BIT(0)
+#define RFTYPE_BASE			BIT(1)
+#define RFTYPE_CTRL			BIT(4)
+#define RFTYPE_MON			BIT(5)
+#define RFTYPE_TOP			BIT(6)
+#define RFTYPE_RES_CACHE		BIT(8)
+#define RFTYPE_RES_MB			BIT(9)
+#define RFTYPE_DEBUG			BIT(10)
+#define RFTYPE_CTRL_INFO		(RFTYPE_INFO | RFTYPE_CTRL)
+#define RFTYPE_MON_INFO			(RFTYPE_INFO | RFTYPE_MON)
+#define RFTYPE_TOP_INFO			(RFTYPE_INFO | RFTYPE_TOP)
+#define RFTYPE_CTRL_BASE		(RFTYPE_BASE | RFTYPE_CTRL)
+#define RFTYPE_MON_BASE			(RFTYPE_BASE | RFTYPE_MON)
+
+/* List of all resource groups */
+extern struct list_head rdt_all_groups;
+
+extern int max_name_width, max_data_width;
+
+int __init rdtgroup_init(void);
+void __exit rdtgroup_exit(void);
+
+/**
+ * struct rftype - describe each file in the resctrl file system
+ * @name:	File name
+ * @mode:	Access mode
+ * @kf_ops:	File operations
+ * @flags:	File specific RFTYPE_FLAGS_* flags
+ * @fflags:	File specific RFTYPE_* flags
+ * @seq_show:	Show content of the file
+ * @write:	Write to the file
+ */
+struct rftype {
+	char			*name;
+	umode_t			mode;
+	const struct kernfs_ops	*kf_ops;
+	unsigned long		flags;
+	unsigned long		fflags;
+
+	int (*seq_show)(struct kernfs_open_file *of,
+			struct seq_file *sf, void *v);
+	/*
+	 * write() is the generic write callback which maps directly to
+	 * kernfs write operation and overrides all other operations.
+	 * Maximum write size is determined by ->max_write_len.
+	 */
+	ssize_t (*write)(struct kernfs_open_file *of,
+			 char *buf, size_t nbytes, loff_t off);
+};
+
+/**
+ * struct mbm_state - status for each MBM counter in each domain
+ * @prev_bw_bytes: Previous bytes value read for bandwidth calculation
+ * @prev_bw:	The most recent bandwidth in MBps
+ */
+struct mbm_state {
+	u64	prev_bw_bytes;
+	u32	prev_bw;
+};
 
 /**
  * struct arch_mbm_state - values used to compute resctrl_arch_rmid_read()s
@@ -55,9 +353,6 @@ struct arch_mbm_state {
 	u64	prev_msr;
 };
 
-/* Setting bit 1 in MSR_IA32_L3_QOS_EXT_CFG enables the SDCIAE feature. */
-#define SDCIAE_ENABLE_BIT		1
-
 /**
  * struct rdt_hw_domain - Arch private attributes of a set of CPUs that share
  *			  a resource
@@ -65,8 +360,6 @@ struct arch_mbm_state {
  * @ctrl_val:	array of cache or mem ctrl values (indexed by CLOSID)
  * @arch_mbm_total:	arch private state for MBM total bandwidth
  * @arch_mbm_local:	arch private state for MBM local bandwidth
- * @mbm_total_cfg:	MBM total bandwidth configuration
- * @mbm_local_cfg:	MBM local bandwidth configuration
  *
  * Members of this structure are accessed via helpers that provide abstraction.
  */
@@ -75,8 +368,6 @@ struct rdt_hw_domain {
 	u32				*ctrl_val;
 	struct arch_mbm_state		*arch_mbm_total;
 	struct arch_mbm_state		*arch_mbm_local;
-	u32				mbm_total_cfg;
-	u32				mbm_local_cfg;
 };
 
 static inline struct rdt_hw_domain *resctrl_to_arch_dom(struct rdt_domain *r)
@@ -96,6 +387,37 @@ struct msr_param {
 	u32			high;
 };
 
+static inline bool is_llc_occupancy_enabled(void)
+{
+	return (rdt_mon_features & (1 << QOS_L3_OCCUP_EVENT_ID));
+}
+
+static inline bool is_mbm_total_enabled(void)
+{
+	return (rdt_mon_features & (1 << QOS_L3_MBM_TOTAL_EVENT_ID));
+}
+
+static inline bool is_mbm_local_enabled(void)
+{
+	return (rdt_mon_features & (1 << QOS_L3_MBM_LOCAL_EVENT_ID));
+}
+
+static inline bool is_mbm_enabled(void)
+{
+	return (is_mbm_total_enabled() || is_mbm_local_enabled());
+}
+
+static inline bool is_mbm_event(int e)
+{
+	return (e >= QOS_L3_MBM_TOTAL_EVENT_ID &&
+		e <= QOS_L3_MBM_LOCAL_EVENT_ID);
+}
+
+struct rdt_parse_data {
+	struct rdtgroup		*rdtgrp;
+	char			*buf;
+};
+
 /**
  * struct rdt_hw_resource - arch private attributes of a resctrl resource
  * @r_resctrl:		Attributes of the resource used directly by resctrl.
@@ -108,9 +430,9 @@ struct msr_param {
  * @msr_update:		Function pointer to update QOS MSRs
  * @mon_scale:		cqm counter * mon_scale = occupancy in bytes
  * @mbm_width:		Monitor width, to detect and correct for overflow.
+ * @mbm_cfg_mask:	Bandwidth sources that can be tracked when Bandwidth
+ *			Monitoring Event Configuration (BMEC) is supported.
  * @cdp_enabled:	CDP state of this resource
- * @mbm_cntr_assign_enabled:	ABMC feature is enabled
- * @sdciae_enabled:	SDCIAE feature (backing "io_alloc") is enabled.
  *
  * Members of this structure are either private to the architecture
  * e.g. mbm_width, or accessed via helpers that provide abstraction. e.g.
@@ -124,9 +446,8 @@ struct rdt_hw_resource {
 				 struct rdt_resource *r);
 	unsigned int		mon_scale;
 	unsigned int		mbm_width;
+	unsigned int		mbm_cfg_mask;
 	bool			cdp_enabled;
-	bool			mbm_cntr_assign_enabled;
-	bool			sdciae_enabled;
 };
 
 static inline struct rdt_hw_resource *resctrl_to_arch_res(struct rdt_resource *r)
@@ -134,7 +455,26 @@ static inline struct rdt_hw_resource *resctrl_to_arch_res(struct rdt_resource *r
 	return container_of(r, struct rdt_hw_resource, r_resctrl);
 }
 
+int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s,
+	      struct rdt_domain *d);
+int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s,
+	     struct rdt_domain *d);
+
+extern struct mutex rdtgroup_mutex;
+
 extern struct rdt_hw_resource rdt_resources_all[];
+extern struct rdtgroup rdtgroup_default;
+extern struct dentry *debugfs_resctrl;
+
+enum resctrl_res_level {
+	RDT_RESOURCE_L3,
+	RDT_RESOURCE_L2,
+	RDT_RESOURCE_MBA,
+	RDT_RESOURCE_SMBA,
+
+	/* Must be the last */
+	RDT_NUM_RESOURCES,
+};
 
 static inline struct rdt_resource *resctrl_inc(struct rdt_resource *res)
 {
@@ -144,6 +484,13 @@ static inline struct rdt_resource *resctrl_inc(struct rdt_resource *res)
 	return &hw_res->r_resctrl;
 }
 
+static inline bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level l)
+{
+	return rdt_resources_all[l].cdp_enabled;
+}
+
+int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable);
+
 /*
  * To return the common struct rdt_resource, which is contained in struct
  * rdt_hw_resource, walk the resctrl member of struct rdt_hw_resource.
@@ -198,36 +545,66 @@ union cpuid_0x10_x_edx {
 	unsigned int full;
 };
 
-/*
- * ABMC counters can be configured by writing to L3_QOS_ABMC_CFG.
- * @bw_type		: Bandwidth configuration(supported by BMEC)
- *			  tracked by the @cntr_id.
- * @bw_src		: Bandwidth source (RMID or CLOSID).
- * @reserved1		: Reserved.
- * @is_clos		: @bw_src field is a CLOSID (not an RMID).
- * @cntr_id		: Counter identifier.
- * @reserved		: Reserved.
- * @cntr_en		: Tracking enable bit.
- * @cfg_en		: Configuration enable bit.
- */
-union l3_qos_abmc_cfg {
-	struct {
-		unsigned long	bw_type	:32,
-				bw_src	:12,
-				reserved1: 3,
-				is_clos	: 1,
-				cntr_id	: 5,
-				reserved : 9,
-				cntr_en	: 1,
-				cfg_en	: 1;
-	} split;
-	unsigned long full;
-};
+void rdt_last_cmd_clear(void);
+void rdt_last_cmd_puts(const char *s);
+__printf(1, 2)
+void rdt_last_cmd_printf(const char *fmt, ...);
 
 void rdt_ctrl_update(void *arg);
+struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn);
+void rdtgroup_kn_unlock(struct kernfs_node *kn);
+int rdtgroup_kn_mode_restrict(struct rdtgroup *r, const char *name);
+int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name,
+			     umode_t mask);
+struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
+				   struct list_head **pos);
+ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
+				char *buf, size_t nbytes, loff_t off);
+int rdtgroup_schemata_show(struct kernfs_open_file *of,
+			   struct seq_file *s, void *v);
+bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_domain *d,
+			   unsigned long cbm, int closid, bool exclusive);
+unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, struct rdt_domain *d,
+				  unsigned long cbm);
+enum rdtgrp_mode rdtgroup_mode_by_closid(int closid);
+int rdtgroup_tasks_assigned(struct rdtgroup *r);
+int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp);
+int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp);
+bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm);
+bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d);
+int rdt_pseudo_lock_init(void);
+void rdt_pseudo_lock_release(void);
+int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp);
+void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp);
+struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r);
+int closids_supported(void);
+void closid_free(int closid);
+int alloc_rmid(u32 closid);
+void free_rmid(u32 closid, u32 rmid);
 int rdt_get_mon_l3_config(struct rdt_resource *r);
-bool rdt_cpu_has(int flag);
+void __exit rdt_put_mon_l3_config(void);
+bool __init rdt_cpu_has(int flag);
+void mon_event_count(void *info);
+int rdtgroup_mondata_show(struct seq_file *m, void *arg);
+void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
+		    struct rdt_domain *d, struct rdtgroup *rdtgrp,
+		    int evtid, int first);
+void mbm_setup_overflow_handler(struct rdt_domain *dom,
+				unsigned long delay_ms,
+				int exclude_cpu);
+void mbm_handle_overflow(struct work_struct *work);
 void __init intel_rdt_mbm_apply_quirk(void);
+bool is_mba_sc(struct rdt_resource *r);
+void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms,
+			     int exclude_cpu);
+void cqm_handle_limbo(struct work_struct *work);
+bool has_busy_rmid(struct rdt_domain *d);
+void __check_limbo(struct rdt_domain *d, bool force_free);
 void rdt_domain_reconfigure_cdp(struct rdt_resource *r);
-void resctrl_mbm_evt_config_init(struct rdt_hw_domain *hw_dom);
+void __init thread_throttle_mode_init(void);
+void __init mbm_config_rftype_init(const char *config);
+void rdt_staged_configs_clear(void);
+bool closid_allocated(unsigned int closid);
+int resctrl_find_cleanest_closid(void);
+
 #endif /* _ASM_X86_RESCTRL_INTERNAL_H */
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index 0ed28ef8c25a791733325b56cb6fd90100b33d7e..c34a35ec0f031a188fc29424bdef31cd54fa597d 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -25,6 +25,53 @@
 
 #include "internal.h"
 
+/**
+ * struct rmid_entry - dirty tracking for all RMID.
+ * @closid:	The CLOSID for this entry.
+ * @rmid:	The RMID for this entry.
+ * @busy:	The number of domains with cached data using this RMID.
+ * @list:	Member of the rmid_free_lru list when busy == 0.
+ *
+ * Depending on the architecture the correct monitor is accessed using
+ * both @closid and @rmid, or @rmid only.
+ *
+ * Take the rdtgroup_mutex when accessing.
+ */
+struct rmid_entry {
+	u32				closid;
+	u32				rmid;
+	int				busy;
+	struct list_head		list;
+};
+
+/*
+ * @rmid_free_lru - A least recently used list of free RMIDs
+ *     These RMIDs are guaranteed to have an occupancy less than the
+ *     threshold occupancy
+ */
+static LIST_HEAD(rmid_free_lru);
+
+/*
+ * @closid_num_dirty_rmid    The number of dirty RMID each CLOSID has.
+ *     Only allocated when CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID is defined.
+ *     Indexed by CLOSID. Protected by rdtgroup_mutex.
+ */
+static u32 *closid_num_dirty_rmid;
+
+/*
+ * @rmid_limbo_count - count of currently unused but (potentially)
+ *     dirty RMIDs.
+ *     This counts RMIDs that no one is currently using but that
+ *     may have a occupancy value > resctrl_rmid_realloc_threshold. User can
+ *     change the threshold occupancy value.
+ */
+static unsigned int rmid_limbo_count;
+
+/*
+ * @rmid_entry - The entry in the limbo and free lists.
+ */
+static struct rmid_entry	*rmid_ptrs;
+
 /*
  * Global boolean for rdt_monitor which is true if any
  * resource monitoring is enabled.
@@ -36,6 +83,17 @@ bool rdt_mon_capable;
  */
 unsigned int rdt_mon_features;
 
+/*
+ * This is the threshold cache occupancy in bytes at which we will consider an
+ * RMID available for re-allocation.
+ */
+unsigned int resctrl_rmid_realloc_threshold;
+
+/*
+ * This is the maximum value for the reallocation threshold, in bytes.
+ */
+unsigned int resctrl_rmid_realloc_limit;
+
 #define CF(cf)	((unsigned long)(1048576 * (cf) + 0.5))
 
 /*
@@ -99,6 +157,33 @@ static inline u64 get_corrected_mbm_count(u32 rmid, unsigned long val)
 	return val;
 }
 
+/*
+ * x86 and arm64 differ in their handling of monitoring.
+ * x86's RMID are independent numbers, there is only one source of traffic
+ * with an RMID value of '1'.
+ * arm64's PMG extends the PARTID/CLOSID space, there are multiple sources of
+ * traffic with a PMG value of '1', one for each CLOSID, meaning the RMID
+ * value is no longer unique.
+ * To account for this, resctrl uses an index. On x86 this is just the RMID,
+ * on arm64 it encodes the CLOSID and RMID. This gives a unique number.
+ *
+ * The domain's rmid_busy_llc and rmid_ptrs[] are sized by index. The arch code
+ * must accept an attempt to read every index.
+ */
+static inline struct rmid_entry *__rmid_entry(u32 idx)
+{
+	struct rmid_entry *entry;
+	u32 closid, rmid;
+
+	entry = &rmid_ptrs[idx];
+	resctrl_arch_rmid_idx_decode(idx, &closid, &rmid);
+
+	WARN_ON_ONCE(entry->closid != closid);
+	WARN_ON_ONCE(entry->rmid != rmid);
+
+	return entry;
+}
+
 static int __rmid_read(u32 rmid, enum resctrl_event_id eventid, u64 *val)
 {
 	u64 msr_val;
@@ -134,8 +219,6 @@ static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_domain *hw_dom,
 		return &hw_dom->arch_mbm_total[rmid];
 	case QOS_L3_MBM_LOCAL_EVENT_ID:
 		return &hw_dom->arch_mbm_local[rmid];
-	default:
-		break;
 	}
 
 	/* Never expect to get here */
@@ -168,13 +251,13 @@ void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_domain *d)
 {
 	struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
 
-	if (resctrl_arch_is_mbm_total_enabled())
+	if (is_mbm_total_enabled())
 		memset(hw_dom->arch_mbm_total, 0,
-		       sizeof(*hw_dom->arch_mbm_total) * r->mon.num_rmid);
+		       sizeof(*hw_dom->arch_mbm_total) * r->num_rmid);
 
-	if (resctrl_arch_is_mbm_local_enabled())
+	if (is_mbm_local_enabled())
 		memset(hw_dom->arch_mbm_local, 0,
-		       sizeof(*hw_dom->arch_mbm_local) * r->mon.num_rmid);
+		       sizeof(*hw_dom->arch_mbm_local) * r->num_rmid);
 }
 
 static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width)
@@ -219,16 +302,717 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
 	return 0;
 }
 
+static void limbo_release_entry(struct rmid_entry *entry)
+{
+	lockdep_assert_held(&rdtgroup_mutex);
+
+	rmid_limbo_count--;
+	list_add_tail(&entry->list, &rmid_free_lru);
+
+	if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID))
+		closid_num_dirty_rmid[entry->closid]--;
+}
+
+/*
+ * Check the RMIDs that are marked as busy for this domain. If the
+ * reported LLC occupancy is below the threshold clear the busy bit and
+ * decrement the count. If the busy count gets to zero on an RMID, we
+ * free the RMID
+ */
+void __check_limbo(struct rdt_domain *d, bool force_free)
+{
+	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
+	u32 idx_limit = resctrl_arch_system_num_rmid_idx();
+	struct rmid_entry *entry;
+	u32 idx, cur_idx = 1;
+	void *arch_mon_ctx;
+	bool rmid_dirty;
+	u64 val = 0;
+
+	arch_mon_ctx = resctrl_arch_mon_ctx_alloc(r, QOS_L3_OCCUP_EVENT_ID);
+	if (IS_ERR(arch_mon_ctx)) {
+		pr_warn_ratelimited("Failed to allocate monitor context: %ld",
+				    PTR_ERR(arch_mon_ctx));
+		return;
+	}
+
+	/*
+	 * Skip RMID 0 and start from RMID 1 and check all the RMIDs that
+	 * are marked as busy for occupancy < threshold. If the occupancy
+	 * is less than the threshold decrement the busy counter of the
+	 * RMID and move it to the free list when the counter reaches 0.
+	 */
+	for (;;) {
+		idx = find_next_bit(d->rmid_busy_llc, idx_limit, cur_idx);
+		if (idx >= idx_limit)
+			break;
+
+		entry = __rmid_entry(idx);
+		if (resctrl_arch_rmid_read(r, d, entry->closid, entry->rmid,
+					   QOS_L3_OCCUP_EVENT_ID, &val,
+					   arch_mon_ctx)) {
+			rmid_dirty = true;
+		} else {
+			rmid_dirty = (val >= resctrl_rmid_realloc_threshold);
+		}
+
+		if (force_free || !rmid_dirty) {
+			clear_bit(idx, d->rmid_busy_llc);
+			if (!--entry->busy)
+				limbo_release_entry(entry);
+		}
+		cur_idx = idx + 1;
+	}
+
+	resctrl_arch_mon_ctx_free(r, QOS_L3_OCCUP_EVENT_ID, arch_mon_ctx);
+}
+
+bool has_busy_rmid(struct rdt_domain *d)
+{
+	u32 idx_limit = resctrl_arch_system_num_rmid_idx();
+
+	return find_first_bit(d->rmid_busy_llc, idx_limit) != idx_limit;
+}
+
+static struct rmid_entry *resctrl_find_free_rmid(u32 closid)
+{
+	struct rmid_entry *itr;
+	u32 itr_idx, cmp_idx;
+
+	if (list_empty(&rmid_free_lru))
+		return rmid_limbo_count ? ERR_PTR(-EBUSY) : ERR_PTR(-ENOSPC);
+
+	list_for_each_entry(itr, &rmid_free_lru, list) {
+		/*
+		 * Get the index of this free RMID, and the index it would need
+		 * to be if it were used with this CLOSID.
+		 * If the CLOSID is irrelevant on this architecture, the two
+		 * index values are always the same on every entry and thus the
+		 * very first entry will be returned.
+		 */
+		itr_idx = resctrl_arch_rmid_idx_encode(itr->closid, itr->rmid);
+		cmp_idx = resctrl_arch_rmid_idx_encode(closid, itr->rmid);
+
+		if (itr_idx == cmp_idx)
+			return itr;
+	}
+
+	return ERR_PTR(-ENOSPC);
+}
+
+/**
+ * resctrl_find_cleanest_closid() - Find a CLOSID where all the associated
+ *                                  RMID are clean, or the CLOSID that has
+ *                                  the most clean RMID.
+ *
+ * MPAM's equivalent of RMID are per-CLOSID, meaning a freshly allocated CLOSID
+ * may not be able to allocate clean RMID. To avoid this the allocator will
+ * choose the CLOSID with the most clean RMID.
+ *
+ * When the CLOSID and RMID are independent numbers, the first free CLOSID will
+ * be returned.
+ */
+int resctrl_find_cleanest_closid(void)
+{
+	u32 cleanest_closid = ~0;
+	int i = 0;
+
+	lockdep_assert_held(&rdtgroup_mutex);
+
+	if (!IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID))
+		return -EIO;
+
+	for (i = 0; i < closids_supported(); i++) {
+		int num_dirty;
+
+		if (closid_allocated(i))
+			continue;
+
+		num_dirty = closid_num_dirty_rmid[i];
+		if (num_dirty == 0)
+			return i;
+
+		if (cleanest_closid == ~0)
+			cleanest_closid = i;
+
+		if (num_dirty < closid_num_dirty_rmid[cleanest_closid])
+			cleanest_closid = i;
+	}
+
+	if (cleanest_closid == ~0)
+		return -ENOSPC;
+
+	return cleanest_closid;
+}
+
+/*
+ * For MPAM the RMID value is not unique, and has to be considered with
+ * the CLOSID. The (CLOSID, RMID) pair is allocated on all domains, which
+ * allows all domains to be managed by a single free list.
+ * Each domain also has a rmid_busy_llc to reduce the work of the limbo handler.
+ */
+int alloc_rmid(u32 closid)
+{
+	struct rmid_entry *entry;
+
+	lockdep_assert_held(&rdtgroup_mutex);
+
+	entry = resctrl_find_free_rmid(closid);
+	if (IS_ERR(entry))
+		return PTR_ERR(entry);
+
+	list_del(&entry->list);
+	return entry->rmid;
+}
+
+static void add_rmid_to_limbo(struct rmid_entry *entry)
+{
+	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
+	struct rdt_domain *d;
+	u32 idx;
+
+	lockdep_assert_held(&rdtgroup_mutex);
+
+	/* Walking r->domains, ensure it can't race with cpuhp */
+	lockdep_assert_cpus_held();
+
+	idx = resctrl_arch_rmid_idx_encode(entry->closid, entry->rmid);
+
+	entry->busy = 0;
+	list_for_each_entry(d, &r->domains, list) {
+		/*
+		 * For the first limbo RMID in the domain,
+		 * setup up the limbo worker.
+		 */
+		if (!has_busy_rmid(d))
+			cqm_setup_limbo_handler(d, CQM_LIMBOCHECK_INTERVAL,
+						RESCTRL_PICK_ANY_CPU);
+		set_bit(idx, d->rmid_busy_llc);
+		entry->busy++;
+	}
+
+	rmid_limbo_count++;
+	if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID))
+		closid_num_dirty_rmid[entry->closid]++;
+}
+
+void free_rmid(u32 closid, u32 rmid)
+{
+	u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
+	struct rmid_entry *entry;
+
+	lockdep_assert_held(&rdtgroup_mutex);
+
+	/*
+	 * Do not allow the default rmid to be free'd. Comparing by index
+	 * allows architectures that ignore the closid parameter to avoid an
+	 * unnecessary check.
+	 */
+	if (idx == resctrl_arch_rmid_idx_encode(RESCTRL_RESERVED_CLOSID,
+						RESCTRL_RESERVED_RMID))
+		return;
+
+	entry = __rmid_entry(idx);
+
+	if (is_llc_occupancy_enabled())
+		add_rmid_to_limbo(entry);
+	else
+		list_add_tail(&entry->list, &rmid_free_lru);
+}
+
+static struct mbm_state *get_mbm_state(struct rdt_domain *d, u32 closid,
+				       u32 rmid, enum resctrl_event_id evtid)
+{
+	u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
+
+	switch (evtid) {
+	case QOS_L3_MBM_TOTAL_EVENT_ID:
+		return &d->mbm_total[idx];
+	case QOS_L3_MBM_LOCAL_EVENT_ID:
+		return &d->mbm_local[idx];
+	default:
+		return NULL;
+	}
+}
+
+static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
+{
+	struct mbm_state *m;
+	u64 tval = 0;
+
+	if (rr->first) {
+		resctrl_arch_reset_rmid(rr->r, rr->d, closid, rmid, rr->evtid);
+		m = get_mbm_state(rr->d, closid, rmid, rr->evtid);
+		if (m)
+			memset(m, 0, sizeof(struct mbm_state));
+		return 0;
+	}
+
+	rr->err = resctrl_arch_rmid_read(rr->r, rr->d, closid, rmid, rr->evtid,
+					 &tval, rr->arch_mon_ctx);
+	if (rr->err)
+		return rr->err;
+
+	rr->val += tval;
+
+	return 0;
+}
+
+/*
+ * mbm_bw_count() - Update bw count from values previously read by
+ *		    __mon_event_count().
+ * @closid:	The closid used to identify the cached mbm_state.
+ * @rmid:	The rmid used to identify the cached mbm_state.
+ * @rr:		The struct rmid_read populated by __mon_event_count().
+ *
+ * Supporting function to calculate the memory bandwidth
+ * and delta bandwidth in MBps. The chunks value previously read by
+ * __mon_event_count() is compared with the chunks value from the previous
+ * invocation. This must be called once per second to maintain values in MBps.
+ */
+static void mbm_bw_count(u32 closid, u32 rmid, struct rmid_read *rr)
+{
+	u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
+	struct mbm_state *m = &rr->d->mbm_local[idx];
+	u64 cur_bw, bytes, cur_bytes;
+
+	cur_bytes = rr->val;
+	bytes = cur_bytes - m->prev_bw_bytes;
+	m->prev_bw_bytes = cur_bytes;
+
+	cur_bw = bytes / SZ_1M;
+
+	m->prev_bw = cur_bw;
+}
+
+/*
+ * This is scheduled by mon_event_read() to read the CQM/MBM counters
+ * on a domain.
+ */
+void mon_event_count(void *info)
+{
+	struct rdtgroup *rdtgrp, *entry;
+	struct rmid_read *rr = info;
+	struct list_head *head;
+	int ret;
+
+	rdtgrp = rr->rgrp;
+
+	ret = __mon_event_count(rdtgrp->closid, rdtgrp->mon.rmid, rr);
+
+	/*
+	 * For Ctrl groups read data from child monitor groups and
+	 * add them together. Count events which are read successfully.
+	 * Discard the rmid_read's reporting errors.
+	 */
+	head = &rdtgrp->mon.crdtgrp_list;
+
+	if (rdtgrp->type == RDTCTRL_GROUP) {
+		list_for_each_entry(entry, head, mon.crdtgrp_list) {
+			if (__mon_event_count(entry->closid, entry->mon.rmid,
+					      rr) == 0)
+				ret = 0;
+		}
+	}
+
+	/*
+	 * __mon_event_count() calls for newly created monitor groups may
+	 * report -EINVAL/Unavailable if the monitor hasn't seen any traffic.
+	 * Discard error if any of the monitor event reads succeeded.
+	 */
+	if (ret == 0)
+		rr->err = 0;
+}
+
+/*
+ * Feedback loop for MBA software controller (mba_sc)
+ *
+ * mba_sc is a feedback loop where we periodically read MBM counters and
+ * adjust the bandwidth percentage values via the IA32_MBA_THRTL_MSRs so
+ * that:
+ *
+ *   current bandwidth(cur_bw) < user specified bandwidth(user_bw)
+ *
+ * This uses the MBM counters to measure the bandwidth and MBA throttle
+ * MSRs to control the bandwidth for a particular rdtgrp. It builds on the
+ * fact that resctrl rdtgroups have both monitoring and control.
+ *
+ * The frequency of the checks is 1s and we just tag along the MBM overflow
+ * timer. Having 1s interval makes the calculation of bandwidth simpler.
+ *
+ * Although MBA's goal is to restrict the bandwidth to a maximum, there may
+ * be a need to increase the bandwidth to avoid unnecessarily restricting
+ * the L2 <-> L3 traffic.
+ *
+ * Since MBA controls the L2 external bandwidth where as MBM measures the
+ * L3 external bandwidth the following sequence could lead to such a
+ * situation.
+ *
+ * Consider an rdtgroup which had high L3 <-> memory traffic in initial
+ * phases -> mba_sc kicks in and reduced bandwidth percentage values -> but
+ * after some time rdtgroup has mostly L2 <-> L3 traffic.
+ *
+ * In this case we may restrict the rdtgroup's L2 <-> L3 traffic as its
+ * throttle MSRs already have low percentage values.  To avoid
+ * unnecessarily restricting such rdtgroups, we also increase the bandwidth.
+ */
+static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
+{
+	u32 closid, rmid, cur_msr_val, new_msr_val;
+	struct mbm_state *pmbm_data, *cmbm_data;
+	struct rdt_resource *r_mba;
+	struct rdt_domain *dom_mba;
+	u32 cur_bw, user_bw, idx;
+	struct list_head *head;
+	struct rdtgroup *entry;
+
+	if (!is_mbm_local_enabled())
+		return;
+
+	r_mba = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl;
+
+	closid = rgrp->closid;
+	rmid = rgrp->mon.rmid;
+	idx = resctrl_arch_rmid_idx_encode(closid, rmid);
+	pmbm_data = &dom_mbm->mbm_local[idx];
+
+	dom_mba = get_domain_from_cpu(smp_processor_id(), r_mba);
+	if (!dom_mba) {
+		pr_warn_once("Failure to get domain for MBA update\n");
+		return;
+	}
+
+	cur_bw = pmbm_data->prev_bw;
+	user_bw = dom_mba->mbps_val[closid];
+
+	/* MBA resource doesn't support CDP */
+	cur_msr_val = resctrl_arch_get_config(r_mba, dom_mba, closid, CDP_NONE);
+
+	/*
+	 * For Ctrl groups read data from child monitor groups.
+	 */
+	head = &rgrp->mon.crdtgrp_list;
+	list_for_each_entry(entry, head, mon.crdtgrp_list) {
+		cmbm_data = &dom_mbm->mbm_local[entry->mon.rmid];
+		cur_bw += cmbm_data->prev_bw;
+	}
+
+	/*
+	 * Scale up/down the bandwidth linearly for the ctrl group.  The
+	 * bandwidth step is the bandwidth granularity specified by the
+	 * hardware.
+	 * Always increase throttling if current bandwidth is above the
+	 * target set by user.
+	 * But avoid thrashing up and down on every poll by checking
+	 * whether a decrease in throttling is likely to push the group
+	 * back over target. E.g. if currently throttling to 30% of bandwidth
+	 * on a system with 10% granularity steps, check whether moving to
+	 * 40% would go past the limit by multiplying current bandwidth by
+	 * "(30 + 10) / 30".
+	 */
+	if (cur_msr_val > r_mba->membw.min_bw && user_bw < cur_bw) {
+		new_msr_val = cur_msr_val - r_mba->membw.bw_gran;
+	} else if (cur_msr_val < MAX_MBA_BW &&
+		   (user_bw > (cur_bw * (cur_msr_val + r_mba->membw.min_bw) / cur_msr_val))) {
+		new_msr_val = cur_msr_val + r_mba->membw.bw_gran;
+	} else {
+		return;
+	}
+
+	resctrl_arch_update_one(r_mba, dom_mba, closid, CDP_NONE, new_msr_val);
+}
+
+static void mbm_update(struct rdt_resource *r, struct rdt_domain *d,
+		       u32 closid, u32 rmid)
+{
+	struct rmid_read rr;
+
+	rr.first = false;
+	rr.r = r;
+	rr.d = d;
+
+	/*
+	 * This is protected from concurrent reads from user
+	 * as both the user and we hold the global mutex.
+	 */
+	if (is_mbm_total_enabled()) {
+		rr.evtid = QOS_L3_MBM_TOTAL_EVENT_ID;
+		rr.val = 0;
+		rr.arch_mon_ctx = resctrl_arch_mon_ctx_alloc(rr.r, rr.evtid);
+		if (IS_ERR(rr.arch_mon_ctx)) {
+			pr_warn_ratelimited("Failed to allocate monitor context: %ld",
+					    PTR_ERR(rr.arch_mon_ctx));
+			return;
+		}
+
+		__mon_event_count(closid, rmid, &rr);
+
+		resctrl_arch_mon_ctx_free(rr.r, rr.evtid, rr.arch_mon_ctx);
+	}
+	if (is_mbm_local_enabled()) {
+		rr.evtid = QOS_L3_MBM_LOCAL_EVENT_ID;
+		rr.val = 0;
+		rr.arch_mon_ctx = resctrl_arch_mon_ctx_alloc(rr.r, rr.evtid);
+		if (IS_ERR(rr.arch_mon_ctx)) {
+			pr_warn_ratelimited("Failed to allocate monitor context: %ld",
+					    PTR_ERR(rr.arch_mon_ctx));
+			return;
+		}
+
+		__mon_event_count(closid, rmid, &rr);
+
+		/*
+		 * Call the MBA software controller only for the
+		 * control groups and when user has enabled
+		 * the software controller explicitly.
+		 */
+		if (is_mba_sc(NULL))
+			mbm_bw_count(closid, rmid, &rr);
+
+		resctrl_arch_mon_ctx_free(rr.r, rr.evtid, rr.arch_mon_ctx);
+	}
+}
+
+/*
+ * Handler to scan the limbo list and move the RMIDs
+ * to free list whose occupancy < threshold_occupancy.
+ */
+void cqm_handle_limbo(struct work_struct *work)
+{
+	unsigned long delay = msecs_to_jiffies(CQM_LIMBOCHECK_INTERVAL);
+	struct rdt_domain *d;
+
+	cpus_read_lock();
+	mutex_lock(&rdtgroup_mutex);
+
+	d = container_of(work, struct rdt_domain, cqm_limbo.work);
+
+	__check_limbo(d, false);
+
+	if (has_busy_rmid(d)) {
+		d->cqm_work_cpu = cpumask_any_housekeeping(&d->cpu_mask,
+							   RESCTRL_PICK_ANY_CPU);
+		schedule_delayed_work_on(d->cqm_work_cpu, &d->cqm_limbo,
+					 delay);
+	}
+
+	mutex_unlock(&rdtgroup_mutex);
+	cpus_read_unlock();
+}
+
+/**
+ * cqm_setup_limbo_handler() - Schedule the limbo handler to run for this
+ *                             domain.
+ * @dom:           The domain the limbo handler should run for.
+ * @delay_ms:      How far in the future the handler should run.
+ * @exclude_cpu:   Which CPU the handler should not run on,
+ *		   RESCTRL_PICK_ANY_CPU to pick any CPU.
+ */
+void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms,
+			     int exclude_cpu)
+{
+	unsigned long delay = msecs_to_jiffies(delay_ms);
+	int cpu;
+
+	cpu = cpumask_any_housekeeping(&dom->cpu_mask, exclude_cpu);
+	dom->cqm_work_cpu = cpu;
+
+	if (cpu < nr_cpu_ids)
+		schedule_delayed_work_on(cpu, &dom->cqm_limbo, delay);
+}
+
+void mbm_handle_overflow(struct work_struct *work)
+{
+	unsigned long delay = msecs_to_jiffies(MBM_OVERFLOW_INTERVAL);
+	struct rdtgroup *prgrp, *crgrp;
+	struct list_head *head;
+	struct rdt_resource *r;
+	struct rdt_domain *d;
+
+	cpus_read_lock();
+	mutex_lock(&rdtgroup_mutex);
+
+	/*
+	 * If the filesystem has been unmounted this work no longer needs to
+	 * run.
+	 */
+	if (!resctrl_mounted || !resctrl_arch_mon_capable())
+		goto out_unlock;
+
+	r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
+	d = container_of(work, struct rdt_domain, mbm_over.work);
+
+	list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
+		mbm_update(r, d, prgrp->closid, prgrp->mon.rmid);
+
+		head = &prgrp->mon.crdtgrp_list;
+		list_for_each_entry(crgrp, head, mon.crdtgrp_list)
+			mbm_update(r, d, crgrp->closid, crgrp->mon.rmid);
+
+		if (is_mba_sc(NULL))
+			update_mba_bw(prgrp, d);
+	}
+
+	/*
+	 * Re-check for housekeeping CPUs. This allows the overflow handler to
+	 * move off a nohz_full CPU quickly.
+	 */
+	d->mbm_work_cpu = cpumask_any_housekeeping(&d->cpu_mask,
+						   RESCTRL_PICK_ANY_CPU);
+	schedule_delayed_work_on(d->mbm_work_cpu, &d->mbm_over, delay);
+
+out_unlock:
+	mutex_unlock(&rdtgroup_mutex);
+	cpus_read_unlock();
+}
+
+/**
+ * mbm_setup_overflow_handler() - Schedule the overflow handler to run for this
+ *                                domain.
+ * @dom:           The domain the overflow handler should run for.
+ * @delay_ms:      How far in the future the handler should run.
+ * @exclude_cpu:   Which CPU the handler should not run on,
+ *		   RESCTRL_PICK_ANY_CPU to pick any CPU.
+ */
+void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms,
+				int exclude_cpu)
+{
+	unsigned long delay = msecs_to_jiffies(delay_ms);
+	int cpu;
+
+	/*
+	 * When a domain comes online there is no guarantee the filesystem is
+	 * mounted. If not, there is no need to catch counter overflow.
+	 */
+	if (!resctrl_mounted || !resctrl_arch_mon_capable())
+		return;
+	cpu = cpumask_any_housekeeping(&dom->cpu_mask, exclude_cpu);
+	dom->mbm_work_cpu = cpu;
+
+	if (cpu < nr_cpu_ids)
+		schedule_delayed_work_on(cpu, &dom->mbm_over, delay);
+}
+
+static int dom_data_init(struct rdt_resource *r)
+{
+	u32 idx_limit = resctrl_arch_system_num_rmid_idx();
+	u32 num_closid = resctrl_arch_get_num_closid(r);
+	struct rmid_entry *entry = NULL;
+	int err = 0, i;
+	u32 idx;
+
+	mutex_lock(&rdtgroup_mutex);
+	if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) {
+		u32 *tmp;
+
+		/*
+		 * If the architecture hasn't provided a sanitised value here,
+		 * this may result in larger arrays than necessary. Resctrl will
+		 * use a smaller system wide value based on the resources in
+		 * use.
+		 */
+		tmp = kcalloc(num_closid, sizeof(*tmp), GFP_KERNEL);
+		if (!tmp) {
+			err = -ENOMEM;
+			goto out_unlock;
+		}
+
+		closid_num_dirty_rmid = tmp;
+	}
+
+	rmid_ptrs = kcalloc(idx_limit, sizeof(struct rmid_entry), GFP_KERNEL);
+	if (!rmid_ptrs) {
+		if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) {
+			kfree(closid_num_dirty_rmid);
+			closid_num_dirty_rmid = NULL;
+		}
+		err = -ENOMEM;
+		goto out_unlock;
+	}
+
+	for (i = 0; i < idx_limit; i++) {
+		entry = &rmid_ptrs[i];
+		INIT_LIST_HEAD(&entry->list);
+
+		resctrl_arch_rmid_idx_decode(i, &entry->closid, &entry->rmid);
+		list_add_tail(&entry->list, &rmid_free_lru);
+	}
+
+	/*
+	 * RESCTRL_RESERVED_CLOSID and RESCTRL_RESERVED_RMID are special and
+	 * are always allocated. These are used for the rdtgroup_default
+	 * control group, which will be setup later in rdtgroup_init().
+	 */
+	idx = resctrl_arch_rmid_idx_encode(RESCTRL_RESERVED_CLOSID,
+					   RESCTRL_RESERVED_RMID);
+	entry = __rmid_entry(idx);
+	list_del(&entry->list);
+
+out_unlock:
+	mutex_unlock(&rdtgroup_mutex);
+
+	return err;
+}
+
+static void __exit dom_data_exit(void)
+{
+	mutex_lock(&rdtgroup_mutex);
+
+	if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) {
+		kfree(closid_num_dirty_rmid);
+		closid_num_dirty_rmid = NULL;
+	}
+
+	kfree(rmid_ptrs);
+	rmid_ptrs = NULL;
+
+	mutex_unlock(&rdtgroup_mutex);
+}
+
+static struct mon_evt llc_occupancy_event = {
+	.name		= "llc_occupancy",
+	.evtid		= QOS_L3_OCCUP_EVENT_ID,
+};
+
+static struct mon_evt mbm_total_event = {
+	.name		= "mbm_total_bytes",
+	.evtid		= QOS_L3_MBM_TOTAL_EVENT_ID,
+};
+
+static struct mon_evt mbm_local_event = {
+	.name		= "mbm_local_bytes",
+	.evtid		= QOS_L3_MBM_LOCAL_EVENT_ID,
+};
+
+/*
+ * Initialize the event list for the resource.
+ *
+ * Note that MBM events are also part of RDT_RESOURCE_L3 resource
+ * because as per the SDM the total and local memory bandwidth
+ * are enumerated as part of L3 monitoring.
+ */
+static void l3_mon_evt_init(struct rdt_resource *r)
+{
+	INIT_LIST_HEAD(&r->evt_list);
+
+	if (is_llc_occupancy_enabled())
+		list_add_tail(&llc_occupancy_event.list, &r->evt_list);
+	if (is_mbm_total_enabled())
+		list_add_tail(&mbm_total_event.list, &r->evt_list);
+	if (is_mbm_local_enabled())
+		list_add_tail(&mbm_local_event.list, &r->evt_list);
+}
+
 int __init rdt_get_mon_l3_config(struct rdt_resource *r)
 {
 	unsigned int mbm_offset = boot_cpu_data.x86_cache_mbm_width_offset;
 	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
 	unsigned int threshold;
-	u32 eax, ebx, ecx, edx;
+	int ret;
 
 	resctrl_rmid_realloc_limit = boot_cpu_data.x86_cache_size * 1024;
 	hw_res->mon_scale = boot_cpu_data.x86_cache_occ_scale;
-	r->mon.num_rmid = boot_cpu_data.x86_cache_max_rmid + 1;
+	r->num_rmid = boot_cpu_data.x86_cache_max_rmid + 1;
 	hw_res->mbm_width = MBM_CNTR_WIDTH_BASE;
 
 	if (mbm_offset > 0 && mbm_offset <= MBM_CNTR_WIDTH_OFFSET_MAX)
@@ -243,22 +1027,7 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r)
 	 *
 	 * For a 35MB LLC and 56 RMIDs, this is ~1.8% of the LLC.
 	 */
-	threshold = resctrl_rmid_realloc_limit / r->mon.num_rmid;
-
-	if (rdt_cpu_has(X86_FEATURE_ABMC)) {
-		r->mon.mbm_cntr_assignable = true;
-		/*
-		 * Query CPUID_Fn80000020_EBX_x05 for number of
-		 * ABMC counters.
-		 */
-		cpuid_count(0x80000020, 5, &eax, &ebx, &ecx, &edx);
-		r->mon.num_mbm_cntrs = (ebx & 0xFFFF) + 1;
-		if (WARN_ON(r->mon.num_mbm_cntrs > 64))
-			r->mon.num_mbm_cntrs = 64;
-
-		resctrl_file_fflags_init("num_mbm_cntrs", RFTYPE_MON_INFO);
-		resctrl_file_fflags_init("mbm_control", RFTYPE_MON_INFO);
-	}
+	threshold = resctrl_rmid_realloc_limit / r->num_rmid;
 
 	/*
 	 * Because num_rmid may not be a power of two, round the value
@@ -267,43 +1036,37 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r)
 	 */
 	resctrl_rmid_realloc_threshold = resctrl_arch_round_mon_val(threshold);
 
+	ret = dom_data_init(r);
+	if (ret)
+		return ret;
+
 	if (rdt_cpu_has(X86_FEATURE_BMEC)) {
 		u32 eax, ebx, ecx, edx;
 
 		/* Detect list of bandwidth sources that can be tracked */
 		cpuid_count(0x80000020, 3, &eax, &ebx, &ecx, &edx);
-		r->mbm_cfg_mask = ecx & MAX_EVT_CONFIG_BITS;
+		hw_res->mbm_cfg_mask = ecx & MAX_EVT_CONFIG_BITS;
+
+		if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL)) {
+			mbm_total_event.configurable = true;
+			mbm_config_rftype_init("mbm_total_bytes_config");
+		}
+		if (rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL)) {
+			mbm_local_event.configurable = true;
+			mbm_config_rftype_init("mbm_local_bytes_config");
+		}
 	}
 
+	l3_mon_evt_init(r);
+
 	r->mon_capable = true;
 
 	return 0;
 }
 
-void resctrl_mbm_evt_config_init(struct rdt_hw_domain *hw_dom)
+void __exit rdt_put_mon_l3_config(void)
 {
-	unsigned int index;
-	u64 msrval;
-
-	/*
-	 * Read the configuration registers QOS_EVT_CFG_n, where <n> is
-	 * the BMEC event number (EvtID).
-	 */
-	if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_TOTAL_EVENT_ID)) {
-		index = mon_event_config_index_get(QOS_L3_MBM_TOTAL_EVENT_ID);
-		rdmsrl(MSR_IA32_EVT_CFG_BASE + index, msrval);
-		hw_dom->mbm_total_cfg = msrval & MAX_EVT_CONFIG_BITS;
-	} else {
-		hw_dom->mbm_total_cfg = INVALID_CONFIG_VALUE;
-	}
-
-	if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_LOCAL_EVENT_ID)) {
-		index = mon_event_config_index_get(QOS_L3_MBM_LOCAL_EVENT_ID);
-		rdmsrl(MSR_IA32_EVT_CFG_BASE + index, msrval);
-		hw_dom->mbm_local_cfg = msrval & MAX_EVT_CONFIG_BITS;
-	} else {
-		hw_dom->mbm_total_cfg = INVALID_CONFIG_VALUE;
-	}
+	dom_data_exit();
 }
 
 void __init intel_rdt_mbm_apply_quirk(void)
diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
index ba1596afee107f65f784ee004a86d0faabf68eb4..884b88e2514130a89762a9cb93203e558e47425b 100644
--- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
+++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
@@ -39,9 +39,30 @@
  */
 static u64 prefetch_disable_bits;
 
+/*
+ * Major number assigned to and shared by all devices exposing
+ * pseudo-locked regions.
+ */
+static unsigned int pseudo_lock_major;
+static unsigned long pseudo_lock_minor_avail = GENMASK(MINORBITS, 0);
+
+static char *pseudo_lock_devnode(const struct device *dev, umode_t *mode)
+{
+	const struct rdtgroup *rdtgrp;
+
+	rdtgrp = dev_get_drvdata(dev);
+	if (mode)
+		*mode = 0600;
+	return kasprintf(GFP_KERNEL, "pseudo_lock/%s", rdtgrp->kn->name);
+}
+
+static const struct class pseudo_lock_class = {
+	.name = "pseudo_lock",
+	.devnode = pseudo_lock_devnode,
+};
+
 /**
- * resctrl_arch_get_prefetch_disable_bits - prefetch disable bits of supported
- *                                          platforms
+ * get_prefetch_disable_bits - prefetch disable bits of supported platforms
  * @void: It takes no parameters.
  *
  * Capture the list of platforms that have been validated to support
@@ -55,16 +76,14 @@ static u64 prefetch_disable_bits;
  * in the SDM.
  *
  * When adding a platform here also add support for its cache events to
- * resctrl_arch_measure_l*_residency()
+ * measure_cycles_perf_fn()
  *
  * Return:
  * If platform is supported, the bits to disable hardware prefetchers, 0
  * if platform is not supported.
  */
-u64 resctrl_arch_get_prefetch_disable_bits(void)
+static u64 get_prefetch_disable_bits(void)
 {
-	prefetch_disable_bits = 0;
-
 	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
 	    boot_cpu_data.x86 != 6)
 		return 0;
@@ -80,8 +99,7 @@ u64 resctrl_arch_get_prefetch_disable_bits(void)
 		 * 3    DCU IP Prefetcher Disable (R/W)
 		 * 63:4 Reserved
 		 */
-		prefetch_disable_bits = 0xF;
-		break;
+		return 0xF;
 	case INTEL_FAM6_ATOM_GOLDMONT:
 	case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
 		/*
@@ -92,16 +110,308 @@ u64 resctrl_arch_get_prefetch_disable_bits(void)
 		 * 2     DCU Hardware Prefetcher Disable (R/W)
 		 * 63:3  Reserved
 		 */
-		prefetch_disable_bits = 0x5;
-		break;
+		return 0x5;
+	}
+
+	return 0;
+}
+
+/**
+ * pseudo_lock_minor_get - Obtain available minor number
+ * @minor: Pointer to where new minor number will be stored
+ *
+ * A bitmask is used to track available minor numbers. Here the next free
+ * minor number is marked as unavailable and returned.
+ *
+ * Return: 0 on success, <0 on failure.
+ */
+static int pseudo_lock_minor_get(unsigned int *minor)
+{
+	unsigned long first_bit;
+
+	first_bit = find_first_bit(&pseudo_lock_minor_avail, MINORBITS);
+
+	if (first_bit == MINORBITS)
+		return -ENOSPC;
+
+	__clear_bit(first_bit, &pseudo_lock_minor_avail);
+	*minor = first_bit;
+
+	return 0;
+}
+
+/**
+ * pseudo_lock_minor_release - Return minor number to available
+ * @minor: The minor number made available
+ */
+static void pseudo_lock_minor_release(unsigned int minor)
+{
+	__set_bit(minor, &pseudo_lock_minor_avail);
+}
+
+/**
+ * region_find_by_minor - Locate a pseudo-lock region by inode minor number
+ * @minor: The minor number of the device representing pseudo-locked region
+ *
+ * When the character device is accessed we need to determine which
+ * pseudo-locked region it belongs to. This is done by matching the minor
+ * number of the device to the pseudo-locked region it belongs.
+ *
+ * Minor numbers are assigned at the time a pseudo-locked region is associated
+ * with a cache instance.
+ *
+ * Return: On success return pointer to resource group owning the pseudo-locked
+ *         region, NULL on failure.
+ */
+static struct rdtgroup *region_find_by_minor(unsigned int minor)
+{
+	struct rdtgroup *rdtgrp, *rdtgrp_match = NULL;
+
+	list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
+		if (rdtgrp->plr && rdtgrp->plr->minor == minor) {
+			rdtgrp_match = rdtgrp;
+			break;
+		}
+	}
+	return rdtgrp_match;
+}
+
+/**
+ * struct pseudo_lock_pm_req - A power management QoS request list entry
+ * @list:	Entry within the @pm_reqs list for a pseudo-locked region
+ * @req:	PM QoS request
+ */
+struct pseudo_lock_pm_req {
+	struct list_head list;
+	struct dev_pm_qos_request req;
+};
+
+static void pseudo_lock_cstates_relax(struct pseudo_lock_region *plr)
+{
+	struct pseudo_lock_pm_req *pm_req, *next;
+
+	list_for_each_entry_safe(pm_req, next, &plr->pm_reqs, list) {
+		dev_pm_qos_remove_request(&pm_req->req);
+		list_del(&pm_req->list);
+		kfree(pm_req);
+	}
+}
+
+/**
+ * pseudo_lock_cstates_constrain - Restrict cores from entering C6
+ * @plr: Pseudo-locked region
+ *
+ * To prevent the cache from being affected by power management entering
+ * C6 has to be avoided. This is accomplished by requesting a latency
+ * requirement lower than lowest C6 exit latency of all supported
+ * platforms as found in the cpuidle state tables in the intel_idle driver.
+ * At this time it is possible to do so with a single latency requirement
+ * for all supported platforms.
+ *
+ * Since Goldmont is supported, which is affected by X86_BUG_MONITOR,
+ * the ACPI latencies need to be considered while keeping in mind that C2
+ * may be set to map to deeper sleep states. In this case the latency
+ * requirement needs to prevent entering C2 also.
+ *
+ * Return: 0 on success, <0 on failure
+ */
+static int pseudo_lock_cstates_constrain(struct pseudo_lock_region *plr)
+{
+	struct pseudo_lock_pm_req *pm_req;
+	int cpu;
+	int ret;
+
+	for_each_cpu(cpu, &plr->d->cpu_mask) {
+		pm_req = kzalloc(sizeof(*pm_req), GFP_KERNEL);
+		if (!pm_req) {
+			rdt_last_cmd_puts("Failure to allocate memory for PM QoS\n");
+			ret = -ENOMEM;
+			goto out_err;
+		}
+		ret = dev_pm_qos_add_request(get_cpu_device(cpu),
+					     &pm_req->req,
+					     DEV_PM_QOS_RESUME_LATENCY,
+					     30);
+		if (ret < 0) {
+			rdt_last_cmd_printf("Failed to add latency req CPU%d\n",
+					    cpu);
+			kfree(pm_req);
+			ret = -1;
+			goto out_err;
+		}
+		list_add(&pm_req->list, &plr->pm_reqs);
+	}
+
+	return 0;
+
+out_err:
+	pseudo_lock_cstates_relax(plr);
+	return ret;
+}
+
+/**
+ * pseudo_lock_region_clear - Reset pseudo-lock region data
+ * @plr: pseudo-lock region
+ *
+ * All content of the pseudo-locked region is reset - any memory allocated
+ * freed.
+ *
+ * Return: void
+ */
+static void pseudo_lock_region_clear(struct pseudo_lock_region *plr)
+{
+	plr->size = 0;
+	plr->line_size = 0;
+	kfree(plr->kmem);
+	plr->kmem = NULL;
+	plr->s = NULL;
+	if (plr->d)
+		plr->d->plr = NULL;
+	plr->d = NULL;
+	plr->cbm = 0;
+	plr->debugfs_dir = NULL;
+}
+
+/**
+ * pseudo_lock_region_init - Initialize pseudo-lock region information
+ * @plr: pseudo-lock region
+ *
+ * Called after user provided a schemata to be pseudo-locked. From the
+ * schemata the &struct pseudo_lock_region is on entry already initialized
+ * with the resource, domain, and capacity bitmask. Here the information
+ * required for pseudo-locking is deduced from this data and &struct
+ * pseudo_lock_region initialized further. This information includes:
+ * - size in bytes of the region to be pseudo-locked
+ * - cache line size to know the stride with which data needs to be accessed
+ *   to be pseudo-locked
+ * - a cpu associated with the cache instance on which the pseudo-locking
+ *   flow can be executed
+ *
+ * Return: 0 on success, <0 on failure. Descriptive error will be written
+ * to last_cmd_status buffer.
+ */
+static int pseudo_lock_region_init(struct pseudo_lock_region *plr)
+{
+	struct cpu_cacheinfo *ci;
+	int ret;
+	int i;
+
+	/* Pick the first cpu we find that is associated with the cache. */
+	plr->cpu = cpumask_first(&plr->d->cpu_mask);
+
+	if (!cpu_online(plr->cpu)) {
+		rdt_last_cmd_printf("CPU %u associated with cache not online\n",
+				    plr->cpu);
+		ret = -ENODEV;
+		goto out_region;
+	}
+
+	ci = get_cpu_cacheinfo(plr->cpu);
+
+	plr->size = rdtgroup_cbm_to_size(plr->s->res, plr->d, plr->cbm);
+
+	for (i = 0; i < ci->num_leaves; i++) {
+		if (ci->info_list[i].level == plr->s->res->cache_level) {
+			plr->line_size = ci->info_list[i].coherency_line_size;
+			return 0;
+		}
 	}
 
-	return prefetch_disable_bits;
+	ret = -1;
+	rdt_last_cmd_puts("Unable to determine cache line size\n");
+out_region:
+	pseudo_lock_region_clear(plr);
+	return ret;
 }
 
 /**
- * resctrl_arch_pseudo_lock_fn - Load kernel memory into cache
- * @_plr: the pseudo-lock region descriptor
+ * pseudo_lock_init - Initialize a pseudo-lock region
+ * @rdtgrp: resource group to which new pseudo-locked region will belong
+ *
+ * A pseudo-locked region is associated with a resource group. When this
+ * association is created the pseudo-locked region is initialized. The
+ * details of the pseudo-locked region are not known at this time so only
+ * allocation is done and association established.
+ *
+ * Return: 0 on success, <0 on failure
+ */
+static int pseudo_lock_init(struct rdtgroup *rdtgrp)
+{
+	struct pseudo_lock_region *plr;
+
+	plr = kzalloc(sizeof(*plr), GFP_KERNEL);
+	if (!plr)
+		return -ENOMEM;
+
+	init_waitqueue_head(&plr->lock_thread_wq);
+	INIT_LIST_HEAD(&plr->pm_reqs);
+	rdtgrp->plr = plr;
+	return 0;
+}
+
+/**
+ * pseudo_lock_region_alloc - Allocate kernel memory that will be pseudo-locked
+ * @plr: pseudo-lock region
+ *
+ * Initialize the details required to set up the pseudo-locked region and
+ * allocate the contiguous memory that will be pseudo-locked to the cache.
+ *
+ * Return: 0 on success, <0 on failure.  Descriptive error will be written
+ * to last_cmd_status buffer.
+ */
+static int pseudo_lock_region_alloc(struct pseudo_lock_region *plr)
+{
+	int ret;
+
+	ret = pseudo_lock_region_init(plr);
+	if (ret < 0)
+		return ret;
+
+	/*
+	 * We do not yet support contiguous regions larger than
+	 * KMALLOC_MAX_SIZE.
+	 */
+	if (plr->size > KMALLOC_MAX_SIZE) {
+		rdt_last_cmd_puts("Requested region exceeds maximum size\n");
+		ret = -E2BIG;
+		goto out_region;
+	}
+
+	plr->kmem = kzalloc(plr->size, GFP_KERNEL);
+	if (!plr->kmem) {
+		rdt_last_cmd_puts("Unable to allocate memory\n");
+		ret = -ENOMEM;
+		goto out_region;
+	}
+
+	ret = 0;
+	goto out;
+out_region:
+	pseudo_lock_region_clear(plr);
+out:
+	return ret;
+}
+
+/**
+ * pseudo_lock_free - Free a pseudo-locked region
+ * @rdtgrp: resource group to which pseudo-locked region belonged
+ *
+ * The pseudo-locked region's resources have already been released, or not
+ * yet created at this point. Now it can be freed and disassociated from the
+ * resource group.
+ *
+ * Return: void
+ */
+static void pseudo_lock_free(struct rdtgroup *rdtgrp)
+{
+	pseudo_lock_region_clear(rdtgrp->plr);
+	kfree(rdtgrp->plr);
+	rdtgrp->plr = NULL;
+}
+
+/**
+ * pseudo_lock_fn - Load kernel memory into cache
+ * @_rdtgrp: resource group to which pseudo-lock region belongs
  *
  * This is the core pseudo-locking flow.
  *
@@ -118,9 +428,10 @@ u64 resctrl_arch_get_prefetch_disable_bits(void)
  *
  * Return: 0. Waiter on waitqueue will be woken on completion.
  */
-int resctrl_arch_pseudo_lock_fn(void *_plr)
+static int pseudo_lock_fn(void *_rdtgrp)
 {
-	struct pseudo_lock_region *plr = _plr;
+	struct rdtgroup *rdtgrp = _rdtgrp;
+	struct pseudo_lock_region *plr = rdtgrp->plr;
 	u32 rmid_p, closid_p;
 	unsigned long i;
 	u64 saved_msr;
@@ -180,8 +491,7 @@ int resctrl_arch_pseudo_lock_fn(void *_plr)
 	 * pseudo-locked followed by reading of kernel memory to load it
 	 * into the cache.
 	 */
-	__wrmsr(MSR_IA32_PQR_ASSOC, rmid_p, plr->closid);
-
+	__wrmsr(MSR_IA32_PQR_ASSOC, rmid_p, rdtgrp->closid);
 	/*
 	 * Cache was flushed earlier. Now access kernel memory to read it
 	 * into cache region associated with just activated plr->closid.
@@ -229,8 +539,342 @@ int resctrl_arch_pseudo_lock_fn(void *_plr)
 }
 
 /**
- * resctrl_arch_measure_cycles_lat_fn - Measure cycle latency to read
- *                                      pseudo-locked memory
+ * rdtgroup_monitor_in_progress - Test if monitoring in progress
+ * @rdtgrp: resource group being queried
+ *
+ * Return: 1 if monitor groups have been created for this resource
+ * group, 0 otherwise.
+ */
+static int rdtgroup_monitor_in_progress(struct rdtgroup *rdtgrp)
+{
+	return !list_empty(&rdtgrp->mon.crdtgrp_list);
+}
+
+/**
+ * rdtgroup_locksetup_user_restrict - Restrict user access to group
+ * @rdtgrp: resource group needing access restricted
+ *
+ * A resource group used for cache pseudo-locking cannot have cpus or tasks
+ * assigned to it. This is communicated to the user by restricting access
+ * to all the files that can be used to make such changes.
+ *
+ * Permissions restored with rdtgroup_locksetup_user_restore()
+ *
+ * Return: 0 on success, <0 on failure. If a failure occurs during the
+ * restriction of access an attempt will be made to restore permissions but
+ * the state of the mode of these files will be uncertain when a failure
+ * occurs.
+ */
+static int rdtgroup_locksetup_user_restrict(struct rdtgroup *rdtgrp)
+{
+	int ret;
+
+	ret = rdtgroup_kn_mode_restrict(rdtgrp, "tasks");
+	if (ret)
+		return ret;
+
+	ret = rdtgroup_kn_mode_restrict(rdtgrp, "cpus");
+	if (ret)
+		goto err_tasks;
+
+	ret = rdtgroup_kn_mode_restrict(rdtgrp, "cpus_list");
+	if (ret)
+		goto err_cpus;
+
+	if (resctrl_arch_mon_capable()) {
+		ret = rdtgroup_kn_mode_restrict(rdtgrp, "mon_groups");
+		if (ret)
+			goto err_cpus_list;
+	}
+
+	ret = 0;
+	goto out;
+
+err_cpus_list:
+	rdtgroup_kn_mode_restore(rdtgrp, "cpus_list", 0777);
+err_cpus:
+	rdtgroup_kn_mode_restore(rdtgrp, "cpus", 0777);
+err_tasks:
+	rdtgroup_kn_mode_restore(rdtgrp, "tasks", 0777);
+out:
+	return ret;
+}
+
+/**
+ * rdtgroup_locksetup_user_restore - Restore user access to group
+ * @rdtgrp: resource group needing access restored
+ *
+ * Restore all file access previously removed using
+ * rdtgroup_locksetup_user_restrict()
+ *
+ * Return: 0 on success, <0 on failure.  If a failure occurs during the
+ * restoration of access an attempt will be made to restrict permissions
+ * again but the state of the mode of these files will be uncertain when
+ * a failure occurs.
+ */
+static int rdtgroup_locksetup_user_restore(struct rdtgroup *rdtgrp)
+{
+	int ret;
+
+	ret = rdtgroup_kn_mode_restore(rdtgrp, "tasks", 0777);
+	if (ret)
+		return ret;
+
+	ret = rdtgroup_kn_mode_restore(rdtgrp, "cpus", 0777);
+	if (ret)
+		goto err_tasks;
+
+	ret = rdtgroup_kn_mode_restore(rdtgrp, "cpus_list", 0777);
+	if (ret)
+		goto err_cpus;
+
+	if (resctrl_arch_mon_capable()) {
+		ret = rdtgroup_kn_mode_restore(rdtgrp, "mon_groups", 0777);
+		if (ret)
+			goto err_cpus_list;
+	}
+
+	ret = 0;
+	goto out;
+
+err_cpus_list:
+	rdtgroup_kn_mode_restrict(rdtgrp, "cpus_list");
+err_cpus:
+	rdtgroup_kn_mode_restrict(rdtgrp, "cpus");
+err_tasks:
+	rdtgroup_kn_mode_restrict(rdtgrp, "tasks");
+out:
+	return ret;
+}
+
+/**
+ * rdtgroup_locksetup_enter - Resource group enters locksetup mode
+ * @rdtgrp: resource group requested to enter locksetup mode
+ *
+ * A resource group enters locksetup mode to reflect that it would be used
+ * to represent a pseudo-locked region and is in the process of being set
+ * up to do so. A resource group used for a pseudo-locked region would
+ * lose the closid associated with it so we cannot allow it to have any
+ * tasks or cpus assigned nor permit tasks or cpus to be assigned in the
+ * future. Monitoring of a pseudo-locked region is not allowed either.
+ *
+ * The above and more restrictions on a pseudo-locked region are checked
+ * for and enforced before the resource group enters the locksetup mode.
+ *
+ * Returns: 0 if the resource group successfully entered locksetup mode, <0
+ * on failure. On failure the last_cmd_status buffer is updated with text to
+ * communicate details of failure to the user.
+ */
+int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp)
+{
+	int ret;
+
+	/*
+	 * The default resource group can neither be removed nor lose the
+	 * default closid associated with it.
+	 */
+	if (rdtgrp == &rdtgroup_default) {
+		rdt_last_cmd_puts("Cannot pseudo-lock default group\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * Cache Pseudo-locking not supported when CDP is enabled.
+	 *
+	 * Some things to consider if you would like to enable this
+	 * support (using L3 CDP as example):
+	 * - When CDP is enabled two separate resources are exposed,
+	 *   L3DATA and L3CODE, but they are actually on the same cache.
+	 *   The implication for pseudo-locking is that if a
+	 *   pseudo-locked region is created on a domain of one
+	 *   resource (eg. L3CODE), then a pseudo-locked region cannot
+	 *   be created on that same domain of the other resource
+	 *   (eg. L3DATA). This is because the creation of a
+	 *   pseudo-locked region involves a call to wbinvd that will
+	 *   affect all cache allocations on particular domain.
+	 * - Considering the previous, it may be possible to only
+	 *   expose one of the CDP resources to pseudo-locking and
+	 *   hide the other. For example, we could consider to only
+	 *   expose L3DATA and since the L3 cache is unified it is
+	 *   still possible to place instructions there are execute it.
+	 * - If only one region is exposed to pseudo-locking we should
+	 *   still keep in mind that availability of a portion of cache
+	 *   for pseudo-locking should take into account both resources.
+	 *   Similarly, if a pseudo-locked region is created in one
+	 *   resource, the portion of cache used by it should be made
+	 *   unavailable to all future allocations from both resources.
+	 */
+	if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3) ||
+	    resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2)) {
+		rdt_last_cmd_puts("CDP enabled\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * Not knowing the bits to disable prefetching implies that this
+	 * platform does not support Cache Pseudo-Locking.
+	 */
+	prefetch_disable_bits = get_prefetch_disable_bits();
+	if (prefetch_disable_bits == 0) {
+		rdt_last_cmd_puts("Pseudo-locking not supported\n");
+		return -EINVAL;
+	}
+
+	if (rdtgroup_monitor_in_progress(rdtgrp)) {
+		rdt_last_cmd_puts("Monitoring in progress\n");
+		return -EINVAL;
+	}
+
+	if (rdtgroup_tasks_assigned(rdtgrp)) {
+		rdt_last_cmd_puts("Tasks assigned to resource group\n");
+		return -EINVAL;
+	}
+
+	if (!cpumask_empty(&rdtgrp->cpu_mask)) {
+		rdt_last_cmd_puts("CPUs assigned to resource group\n");
+		return -EINVAL;
+	}
+
+	if (rdtgroup_locksetup_user_restrict(rdtgrp)) {
+		rdt_last_cmd_puts("Unable to modify resctrl permissions\n");
+		return -EIO;
+	}
+
+	ret = pseudo_lock_init(rdtgrp);
+	if (ret) {
+		rdt_last_cmd_puts("Unable to init pseudo-lock region\n");
+		goto out_release;
+	}
+
+	/*
+	 * If this system is capable of monitoring a rmid would have been
+	 * allocated when the control group was created. This is not needed
+	 * anymore when this group would be used for pseudo-locking. This
+	 * is safe to call on platforms not capable of monitoring.
+	 */
+	free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
+
+	ret = 0;
+	goto out;
+
+out_release:
+	rdtgroup_locksetup_user_restore(rdtgrp);
+out:
+	return ret;
+}
+
+/**
+ * rdtgroup_locksetup_exit - resource group exist locksetup mode
+ * @rdtgrp: resource group
+ *
+ * When a resource group exits locksetup mode the earlier restrictions are
+ * lifted.
+ *
+ * Return: 0 on success, <0 on failure
+ */
+int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp)
+{
+	int ret;
+
+	if (resctrl_arch_mon_capable()) {
+		ret = alloc_rmid(rdtgrp->closid);
+		if (ret < 0) {
+			rdt_last_cmd_puts("Out of RMIDs\n");
+			return ret;
+		}
+		rdtgrp->mon.rmid = ret;
+	}
+
+	ret = rdtgroup_locksetup_user_restore(rdtgrp);
+	if (ret) {
+		free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
+		return ret;
+	}
+
+	pseudo_lock_free(rdtgrp);
+	return 0;
+}
+
+/**
+ * rdtgroup_cbm_overlaps_pseudo_locked - Test if CBM or portion is pseudo-locked
+ * @d: RDT domain
+ * @cbm: CBM to test
+ *
+ * @d represents a cache instance and @cbm a capacity bitmask that is
+ * considered for it. Determine if @cbm overlaps with any existing
+ * pseudo-locked region on @d.
+ *
+ * @cbm is unsigned long, even if only 32 bits are used, to make the
+ * bitmap functions work correctly.
+ *
+ * Return: true if @cbm overlaps with pseudo-locked region on @d, false
+ * otherwise.
+ */
+bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm)
+{
+	unsigned int cbm_len;
+	unsigned long cbm_b;
+
+	if (d->plr) {
+		cbm_len = d->plr->s->res->cache.cbm_len;
+		cbm_b = d->plr->cbm;
+		if (bitmap_intersects(&cbm, &cbm_b, cbm_len))
+			return true;
+	}
+	return false;
+}
+
+/**
+ * rdtgroup_pseudo_locked_in_hierarchy - Pseudo-locked region in cache hierarchy
+ * @d: RDT domain under test
+ *
+ * The setup of a pseudo-locked region affects all cache instances within
+ * the hierarchy of the region. It is thus essential to know if any
+ * pseudo-locked regions exist within a cache hierarchy to prevent any
+ * attempts to create new pseudo-locked regions in the same hierarchy.
+ *
+ * Return: true if a pseudo-locked region exists in the hierarchy of @d or
+ *         if it is not possible to test due to memory allocation issue,
+ *         false otherwise.
+ */
+bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d)
+{
+	cpumask_var_t cpu_with_psl;
+	struct rdt_resource *r;
+	struct rdt_domain *d_i;
+	bool ret = false;
+
+	/* Walking r->domains, ensure it can't race with cpuhp */
+	lockdep_assert_cpus_held();
+
+	if (!zalloc_cpumask_var(&cpu_with_psl, GFP_KERNEL))
+		return true;
+
+	/*
+	 * First determine which cpus have pseudo-locked regions
+	 * associated with them.
+	 */
+	for_each_alloc_capable_rdt_resource(r) {
+		list_for_each_entry(d_i, &r->domains, list) {
+			if (d_i->plr)
+				cpumask_or(cpu_with_psl, cpu_with_psl,
+					   &d_i->cpu_mask);
+		}
+	}
+
+	/*
+	 * Next test if new pseudo-locked region would intersect with
+	 * existing region.
+	 */
+	if (cpumask_intersects(&d->cpu_mask, cpu_with_psl))
+		ret = true;
+
+	free_cpumask_var(cpu_with_psl);
+	return ret;
+}
+
+/**
+ * measure_cycles_lat_fn - Measure cycle latency to read pseudo-locked memory
  * @_plr: pseudo-lock region to measure
  *
  * There is no deterministic way to test if a memory region is cached. One
@@ -243,7 +887,7 @@ int resctrl_arch_pseudo_lock_fn(void *_plr)
  *
  * Return: 0. Waiter on waitqueue will be woken on completion.
  */
-int resctrl_arch_measure_cycles_lat_fn(void *_plr)
+static int measure_cycles_lat_fn(void *_plr)
 {
 	struct pseudo_lock_region *plr = _plr;
 	u32 saved_low, saved_high;
@@ -427,7 +1071,7 @@ static int measure_residency_fn(struct perf_event_attr *miss_attr,
 	return 0;
 }
 
-int resctrl_arch_measure_l2_residency(void *_plr)
+static int measure_l2_residency(void *_plr)
 {
 	struct pseudo_lock_region *plr = _plr;
 	struct residency_counts counts = {0};
@@ -465,7 +1109,7 @@ int resctrl_arch_measure_l2_residency(void *_plr)
 	return 0;
 }
 
-int resctrl_arch_measure_l3_residency(void *_plr)
+static int measure_l3_residency(void *_plr)
 {
 	struct pseudo_lock_region *plr = _plr;
 	struct residency_counts counts = {0};
@@ -520,3 +1164,441 @@ int resctrl_arch_measure_l3_residency(void *_plr)
 	wake_up_interruptible(&plr->lock_thread_wq);
 	return 0;
 }
+
+/**
+ * pseudo_lock_measure_cycles - Trigger latency measure to pseudo-locked region
+ * @rdtgrp: Resource group to which the pseudo-locked region belongs.
+ * @sel: Selector of which measurement to perform on a pseudo-locked region.
+ *
+ * The measurement of latency to access a pseudo-locked region should be
+ * done from a cpu that is associated with that pseudo-locked region.
+ * Determine which cpu is associated with this region and start a thread on
+ * that cpu to perform the measurement, wait for that thread to complete.
+ *
+ * Return: 0 on success, <0 on failure
+ */
+static int pseudo_lock_measure_cycles(struct rdtgroup *rdtgrp, int sel)
+{
+	struct pseudo_lock_region *plr = rdtgrp->plr;
+	struct task_struct *thread;
+	unsigned int cpu;
+	int ret = -1;
+
+	cpus_read_lock();
+	mutex_lock(&rdtgroup_mutex);
+
+	if (rdtgrp->flags & RDT_DELETED) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	if (!plr->d) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	plr->thread_done = 0;
+	cpu = cpumask_first(&plr->d->cpu_mask);
+	if (!cpu_online(cpu)) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	plr->cpu = cpu;
+
+	if (sel == 1)
+		thread = kthread_create_on_node(measure_cycles_lat_fn, plr,
+						cpu_to_node(cpu),
+						"pseudo_lock_measure/%u",
+						cpu);
+	else if (sel == 2)
+		thread = kthread_create_on_node(measure_l2_residency, plr,
+						cpu_to_node(cpu),
+						"pseudo_lock_measure/%u",
+						cpu);
+	else if (sel == 3)
+		thread = kthread_create_on_node(measure_l3_residency, plr,
+						cpu_to_node(cpu),
+						"pseudo_lock_measure/%u",
+						cpu);
+	else
+		goto out;
+
+	if (IS_ERR(thread)) {
+		ret = PTR_ERR(thread);
+		goto out;
+	}
+	kthread_bind(thread, cpu);
+	wake_up_process(thread);
+
+	ret = wait_event_interruptible(plr->lock_thread_wq,
+				       plr->thread_done == 1);
+	if (ret < 0)
+		goto out;
+
+	ret = 0;
+
+out:
+	mutex_unlock(&rdtgroup_mutex);
+	cpus_read_unlock();
+	return ret;
+}
+
+static ssize_t pseudo_lock_measure_trigger(struct file *file,
+					   const char __user *user_buf,
+					   size_t count, loff_t *ppos)
+{
+	struct rdtgroup *rdtgrp = file->private_data;
+	size_t buf_size;
+	char buf[32];
+	int ret;
+	int sel;
+
+	buf_size = min(count, (sizeof(buf) - 1));
+	if (copy_from_user(buf, user_buf, buf_size))
+		return -EFAULT;
+
+	buf[buf_size] = '\0';
+	ret = kstrtoint(buf, 10, &sel);
+	if (ret == 0) {
+		if (sel != 1 && sel != 2 && sel != 3)
+			return -EINVAL;
+		ret = debugfs_file_get(file->f_path.dentry);
+		if (ret)
+			return ret;
+		ret = pseudo_lock_measure_cycles(rdtgrp, sel);
+		if (ret == 0)
+			ret = count;
+		debugfs_file_put(file->f_path.dentry);
+	}
+
+	return ret;
+}
+
+static const struct file_operations pseudo_measure_fops = {
+	.write = pseudo_lock_measure_trigger,
+	.open = simple_open,
+	.llseek = default_llseek,
+};
+
+/**
+ * rdtgroup_pseudo_lock_create - Create a pseudo-locked region
+ * @rdtgrp: resource group to which pseudo-lock region belongs
+ *
+ * Called when a resource group in the pseudo-locksetup mode receives a
+ * valid schemata that should be pseudo-locked. Since the resource group is
+ * in pseudo-locksetup mode the &struct pseudo_lock_region has already been
+ * allocated and initialized with the essential information. If a failure
+ * occurs the resource group remains in the pseudo-locksetup mode with the
+ * &struct pseudo_lock_region associated with it, but cleared from all
+ * information and ready for the user to re-attempt pseudo-locking by
+ * writing the schemata again.
+ *
+ * Return: 0 if the pseudo-locked region was successfully pseudo-locked, <0
+ * on failure. Descriptive error will be written to last_cmd_status buffer.
+ */
+int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp)
+{
+	struct pseudo_lock_region *plr = rdtgrp->plr;
+	struct task_struct *thread;
+	unsigned int new_minor;
+	struct device *dev;
+	int ret;
+
+	ret = pseudo_lock_region_alloc(plr);
+	if (ret < 0)
+		return ret;
+
+	ret = pseudo_lock_cstates_constrain(plr);
+	if (ret < 0) {
+		ret = -EINVAL;
+		goto out_region;
+	}
+
+	plr->thread_done = 0;
+
+	thread = kthread_create_on_node(pseudo_lock_fn, rdtgrp,
+					cpu_to_node(plr->cpu),
+					"pseudo_lock/%u", plr->cpu);
+	if (IS_ERR(thread)) {
+		ret = PTR_ERR(thread);
+		rdt_last_cmd_printf("Locking thread returned error %d\n", ret);
+		goto out_cstates;
+	}
+
+	kthread_bind(thread, plr->cpu);
+	wake_up_process(thread);
+
+	ret = wait_event_interruptible(plr->lock_thread_wq,
+				       plr->thread_done == 1);
+	if (ret < 0) {
+		/*
+		 * If the thread does not get on the CPU for whatever
+		 * reason and the process which sets up the region is
+		 * interrupted then this will leave the thread in runnable
+		 * state and once it gets on the CPU it will dereference
+		 * the cleared, but not freed, plr struct resulting in an
+		 * empty pseudo-locking loop.
+		 */
+		rdt_last_cmd_puts("Locking thread interrupted\n");
+		goto out_cstates;
+	}
+
+	ret = pseudo_lock_minor_get(&new_minor);
+	if (ret < 0) {
+		rdt_last_cmd_puts("Unable to obtain a new minor number\n");
+		goto out_cstates;
+	}
+
+	/*
+	 * Unlock access but do not release the reference. The
+	 * pseudo-locked region will still be here on return.
+	 *
+	 * The mutex has to be released temporarily to avoid a potential
+	 * deadlock with the mm->mmap_lock which is obtained in the
+	 * device_create() and debugfs_create_dir() callpath below as well as
+	 * before the mmap() callback is called.
+	 */
+	mutex_unlock(&rdtgroup_mutex);
+
+	if (!IS_ERR_OR_NULL(debugfs_resctrl)) {
+		plr->debugfs_dir = debugfs_create_dir(rdtgrp->kn->name,
+						      debugfs_resctrl);
+		if (!IS_ERR_OR_NULL(plr->debugfs_dir))
+			debugfs_create_file("pseudo_lock_measure", 0200,
+					    plr->debugfs_dir, rdtgrp,
+					    &pseudo_measure_fops);
+	}
+
+	dev = device_create(&pseudo_lock_class, NULL,
+			    MKDEV(pseudo_lock_major, new_minor),
+			    rdtgrp, "%s", rdtgrp->kn->name);
+
+	mutex_lock(&rdtgroup_mutex);
+
+	if (IS_ERR(dev)) {
+		ret = PTR_ERR(dev);
+		rdt_last_cmd_printf("Failed to create character device: %d\n",
+				    ret);
+		goto out_debugfs;
+	}
+
+	/* We released the mutex - check if group was removed while we did so */
+	if (rdtgrp->flags & RDT_DELETED) {
+		ret = -ENODEV;
+		goto out_device;
+	}
+
+	plr->minor = new_minor;
+
+	rdtgrp->mode = RDT_MODE_PSEUDO_LOCKED;
+	closid_free(rdtgrp->closid);
+	rdtgroup_kn_mode_restore(rdtgrp, "cpus", 0444);
+	rdtgroup_kn_mode_restore(rdtgrp, "cpus_list", 0444);
+
+	ret = 0;
+	goto out;
+
+out_device:
+	device_destroy(&pseudo_lock_class, MKDEV(pseudo_lock_major, new_minor));
+out_debugfs:
+	debugfs_remove_recursive(plr->debugfs_dir);
+	pseudo_lock_minor_release(new_minor);
+out_cstates:
+	pseudo_lock_cstates_relax(plr);
+out_region:
+	pseudo_lock_region_clear(plr);
+out:
+	return ret;
+}
+
+/**
+ * rdtgroup_pseudo_lock_remove - Remove a pseudo-locked region
+ * @rdtgrp: resource group to which the pseudo-locked region belongs
+ *
+ * The removal of a pseudo-locked region can be initiated when the resource
+ * group is removed from user space via a "rmdir" from userspace or the
+ * unmount of the resctrl filesystem. On removal the resource group does
+ * not go back to pseudo-locksetup mode before it is removed, instead it is
+ * removed directly. There is thus asymmetry with the creation where the
+ * &struct pseudo_lock_region is removed here while it was not created in
+ * rdtgroup_pseudo_lock_create().
+ *
+ * Return: void
+ */
+void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp)
+{
+	struct pseudo_lock_region *plr = rdtgrp->plr;
+
+	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
+		/*
+		 * Default group cannot be a pseudo-locked region so we can
+		 * free closid here.
+		 */
+		closid_free(rdtgrp->closid);
+		goto free;
+	}
+
+	pseudo_lock_cstates_relax(plr);
+	debugfs_remove_recursive(rdtgrp->plr->debugfs_dir);
+	device_destroy(&pseudo_lock_class, MKDEV(pseudo_lock_major, plr->minor));
+	pseudo_lock_minor_release(plr->minor);
+
+free:
+	pseudo_lock_free(rdtgrp);
+}
+
+static int pseudo_lock_dev_open(struct inode *inode, struct file *filp)
+{
+	struct rdtgroup *rdtgrp;
+
+	mutex_lock(&rdtgroup_mutex);
+
+	rdtgrp = region_find_by_minor(iminor(inode));
+	if (!rdtgrp) {
+		mutex_unlock(&rdtgroup_mutex);
+		return -ENODEV;
+	}
+
+	filp->private_data = rdtgrp;
+	atomic_inc(&rdtgrp->waitcount);
+	/* Perform a non-seekable open - llseek is not supported */
+	filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
+
+	mutex_unlock(&rdtgroup_mutex);
+
+	return 0;
+}
+
+static int pseudo_lock_dev_release(struct inode *inode, struct file *filp)
+{
+	struct rdtgroup *rdtgrp;
+
+	mutex_lock(&rdtgroup_mutex);
+	rdtgrp = filp->private_data;
+	WARN_ON(!rdtgrp);
+	if (!rdtgrp) {
+		mutex_unlock(&rdtgroup_mutex);
+		return -ENODEV;
+	}
+	filp->private_data = NULL;
+	atomic_dec(&rdtgrp->waitcount);
+	mutex_unlock(&rdtgroup_mutex);
+	return 0;
+}
+
+static int pseudo_lock_dev_mremap(struct vm_area_struct *area)
+{
+	/* Not supported */
+	return -EINVAL;
+}
+
+static const struct vm_operations_struct pseudo_mmap_ops = {
+	.mremap = pseudo_lock_dev_mremap,
+};
+
+static int pseudo_lock_dev_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	unsigned long vsize = vma->vm_end - vma->vm_start;
+	unsigned long off = vma->vm_pgoff << PAGE_SHIFT;
+	struct pseudo_lock_region *plr;
+	struct rdtgroup *rdtgrp;
+	unsigned long physical;
+	unsigned long psize;
+
+	mutex_lock(&rdtgroup_mutex);
+
+	rdtgrp = filp->private_data;
+	WARN_ON(!rdtgrp);
+	if (!rdtgrp) {
+		mutex_unlock(&rdtgroup_mutex);
+		return -ENODEV;
+	}
+
+	plr = rdtgrp->plr;
+
+	if (!plr->d) {
+		mutex_unlock(&rdtgroup_mutex);
+		return -ENODEV;
+	}
+
+	/*
+	 * Task is required to run with affinity to the cpus associated
+	 * with the pseudo-locked region. If this is not the case the task
+	 * may be scheduled elsewhere and invalidate entries in the
+	 * pseudo-locked region.
+	 */
+	if (!cpumask_subset(current->cpus_ptr, &plr->d->cpu_mask)) {
+		mutex_unlock(&rdtgroup_mutex);
+		return -EINVAL;
+	}
+
+	physical = __pa(plr->kmem) >> PAGE_SHIFT;
+	psize = plr->size - off;
+
+	if (off > plr->size) {
+		mutex_unlock(&rdtgroup_mutex);
+		return -ENOSPC;
+	}
+
+	/*
+	 * Ensure changes are carried directly to the memory being mapped,
+	 * do not allow copy-on-write mapping.
+	 */
+	if (!(vma->vm_flags & VM_SHARED)) {
+		mutex_unlock(&rdtgroup_mutex);
+		return -EINVAL;
+	}
+
+	if (vsize > psize) {
+		mutex_unlock(&rdtgroup_mutex);
+		return -ENOSPC;
+	}
+
+	memset(plr->kmem + off, 0, vsize);
+
+	if (remap_pfn_range(vma, vma->vm_start, physical + vma->vm_pgoff,
+			    vsize, vma->vm_page_prot)) {
+		mutex_unlock(&rdtgroup_mutex);
+		return -EAGAIN;
+	}
+	vma->vm_ops = &pseudo_mmap_ops;
+	mutex_unlock(&rdtgroup_mutex);
+	return 0;
+}
+
+static const struct file_operations pseudo_lock_dev_fops = {
+	.owner =	THIS_MODULE,
+	.llseek =	no_llseek,
+	.read =		NULL,
+	.write =	NULL,
+	.open =		pseudo_lock_dev_open,
+	.release =	pseudo_lock_dev_release,
+	.mmap =		pseudo_lock_dev_mmap,
+};
+
+int rdt_pseudo_lock_init(void)
+{
+	int ret;
+
+	ret = register_chrdev(0, "pseudo_lock", &pseudo_lock_dev_fops);
+	if (ret < 0)
+		return ret;
+
+	pseudo_lock_major = ret;
+
+	ret = class_register(&pseudo_lock_class);
+	if (ret) {
+		unregister_chrdev(pseudo_lock_major, "pseudo_lock");
+		return ret;
+	}
+
+	return 0;
+}
+
+void rdt_pseudo_lock_release(void)
+{
+	class_unregister(&pseudo_lock_class);
+	unregister_chrdev(pseudo_lock_major, "pseudo_lock");
+	pseudo_lock_major = 0;
+}
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 50a49a1ea604d842cedb551a310f2e3c893e0c5c..011e17efb1a66e5b003fee07b53c13633f9f0f58 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -12,8 +12,22 @@
 
 #define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
 
+#include <linux/cacheinfo.h>
 #include <linux/cpu.h>
+#include <linux/debugfs.h>
+#include <linux/fs.h>
+#include <linux/fs_parser.h>
+#include <linux/sysfs.h>
+#include <linux/kernfs.h>
+#include <linux/seq_buf.h>
+#include <linux/seq_file.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/task.h>
 #include <linux/slab.h>
+#include <linux/task_work.h>
+#include <linux/user_namespace.h>
+
+#include <uapi/linux/magic.h>
 
 #include <asm/resctrl.h>
 #include "internal.h"
@@ -22,19 +36,317 @@ DEFINE_STATIC_KEY_FALSE(rdt_enable_key);
 DEFINE_STATIC_KEY_FALSE(rdt_mon_enable_key);
 DEFINE_STATIC_KEY_FALSE(rdt_alloc_enable_key);
 
+/* Mutex to protect rdtgroup access. */
+DEFINE_MUTEX(rdtgroup_mutex);
+
+static struct kernfs_root *rdt_root;
+struct rdtgroup rdtgroup_default;
+LIST_HEAD(rdt_all_groups);
+
+/* list of entries for the schemata file */
+LIST_HEAD(resctrl_schema_all);
+
+/* The filesystem can only be mounted once. */
+bool resctrl_mounted;
+
+/* Kernel fs node for "info" directory under root */
+static struct kernfs_node *kn_info;
+
+/* Kernel fs node for "mon_groups" directory under root */
+static struct kernfs_node *kn_mongrp;
+
+/* Kernel fs node for "mon_data" directory under root */
+static struct kernfs_node *kn_mondata;
+
+static struct seq_buf last_cmd_status;
+static char last_cmd_status_buf[512];
+
+static int rdtgroup_setup_root(struct rdt_fs_context *ctx);
+static void rdtgroup_destroy_root(void);
+
+struct dentry *debugfs_resctrl;
+
+static bool resctrl_debug;
+
+void rdt_last_cmd_clear(void)
+{
+	lockdep_assert_held(&rdtgroup_mutex);
+	seq_buf_clear(&last_cmd_status);
+}
+
+void rdt_last_cmd_puts(const char *s)
+{
+	lockdep_assert_held(&rdtgroup_mutex);
+	seq_buf_puts(&last_cmd_status, s);
+}
+
+void rdt_last_cmd_printf(const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	lockdep_assert_held(&rdtgroup_mutex);
+	seq_buf_vprintf(&last_cmd_status, fmt, ap);
+	va_end(ap);
+}
+
+void rdt_staged_configs_clear(void)
+{
+	struct rdt_resource *r;
+	struct rdt_domain *dom;
+
+	lockdep_assert_held(&rdtgroup_mutex);
+
+	for_each_alloc_capable_rdt_resource(r) {
+		list_for_each_entry(dom, &r->domains, list)
+			memset(dom->staged_config, 0, sizeof(dom->staged_config));
+	}
+}
+
+/*
+ * Trivial allocator for CLOSIDs. Since h/w only supports a small number,
+ * we can keep a bitmap of free CLOSIDs in a single integer.
+ *
+ * Using a global CLOSID across all resources has some advantages and
+ * some drawbacks:
+ * + We can simply set current's closid to assign a task to a resource
+ *   group.
+ * + Context switch code can avoid extra memory references deciding which
+ *   CLOSID to load into the PQR_ASSOC MSR
+ * - We give up some options in configuring resource groups across multi-socket
+ *   systems.
+ * - Our choices on how to configure each resource become progressively more
+ *   limited as the number of resources grows.
+ */
+static unsigned long closid_free_map;
+static int closid_free_map_len;
+
+int closids_supported(void)
+{
+	return closid_free_map_len;
+}
+
+static void closid_init(void)
+{
+	struct resctrl_schema *s;
+	u32 rdt_min_closid = 32;
+
+	/* Compute rdt_min_closid across all resources */
+	list_for_each_entry(s, &resctrl_schema_all, list)
+		rdt_min_closid = min(rdt_min_closid, s->num_closid);
+
+	closid_free_map = BIT_MASK(rdt_min_closid) - 1;
+
+	/* RESCTRL_RESERVED_CLOSID is always reserved for the default group */
+	__clear_bit(RESCTRL_RESERVED_CLOSID, &closid_free_map);
+	closid_free_map_len = rdt_min_closid;
+}
+
+static int closid_alloc(void)
+{
+	int cleanest_closid;
+	u32 closid;
+
+	lockdep_assert_held(&rdtgroup_mutex);
+
+	if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) {
+		cleanest_closid = resctrl_find_cleanest_closid();
+		if (cleanest_closid < 0)
+			return cleanest_closid;
+		closid = cleanest_closid;
+	} else {
+		closid = ffs(closid_free_map);
+		if (closid == 0)
+			return -ENOSPC;
+		closid--;
+	}
+	__clear_bit(closid, &closid_free_map);
+
+	return closid;
+}
+
+void closid_free(int closid)
+{
+	lockdep_assert_held(&rdtgroup_mutex);
+
+	__set_bit(closid, &closid_free_map);
+}
+
+/**
+ * closid_allocated - test if provided closid is in use
+ * @closid: closid to be tested
+ *
+ * Return: true if @closid is currently associated with a resource group,
+ * false if @closid is free
+ */
+bool closid_allocated(unsigned int closid)
+{
+	lockdep_assert_held(&rdtgroup_mutex);
+
+	return !test_bit(closid, &closid_free_map);
+}
+
+/**
+ * rdtgroup_mode_by_closid - Return mode of resource group with closid
+ * @closid: closid if the resource group
+ *
+ * Each resource group is associated with a @closid. Here the mode
+ * of a resource group can be queried by searching for it using its closid.
+ *
+ * Return: mode as &enum rdtgrp_mode of resource group with closid @closid
+ */
+enum rdtgrp_mode rdtgroup_mode_by_closid(int closid)
+{
+	struct rdtgroup *rdtgrp;
+
+	list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
+		if (rdtgrp->closid == closid)
+			return rdtgrp->mode;
+	}
+
+	return RDT_NUM_MODES;
+}
+
+static const char * const rdt_mode_str[] = {
+	[RDT_MODE_SHAREABLE]		= "shareable",
+	[RDT_MODE_EXCLUSIVE]		= "exclusive",
+	[RDT_MODE_PSEUDO_LOCKSETUP]	= "pseudo-locksetup",
+	[RDT_MODE_PSEUDO_LOCKED]	= "pseudo-locked",
+};
+
+/**
+ * rdtgroup_mode_str - Return the string representation of mode
+ * @mode: the resource group mode as &enum rdtgroup_mode
+ *
+ * Return: string representation of valid mode, "unknown" otherwise
+ */
+static const char *rdtgroup_mode_str(enum rdtgrp_mode mode)
+{
+	if (mode < RDT_MODE_SHAREABLE || mode >= RDT_NUM_MODES)
+		return "unknown";
+
+	return rdt_mode_str[mode];
+}
+
+/* set uid and gid of rdtgroup dirs and files to that of the creator */
+static int rdtgroup_kn_set_ugid(struct kernfs_node *kn)
+{
+	struct iattr iattr = { .ia_valid = ATTR_UID | ATTR_GID,
+				.ia_uid = current_fsuid(),
+				.ia_gid = current_fsgid(), };
+
+	if (uid_eq(iattr.ia_uid, GLOBAL_ROOT_UID) &&
+	    gid_eq(iattr.ia_gid, GLOBAL_ROOT_GID))
+		return 0;
+
+	return kernfs_setattr(kn, &iattr);
+}
+
+static int rdtgroup_add_file(struct kernfs_node *parent_kn, struct rftype *rft)
+{
+	struct kernfs_node *kn;
+	int ret;
+
+	kn = __kernfs_create_file(parent_kn, rft->name, rft->mode,
+				  GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
+				  0, rft->kf_ops, rft, NULL, NULL);
+	if (IS_ERR(kn))
+		return PTR_ERR(kn);
+
+	ret = rdtgroup_kn_set_ugid(kn);
+	if (ret) {
+		kernfs_remove(kn);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int rdtgroup_seqfile_show(struct seq_file *m, void *arg)
+{
+	struct kernfs_open_file *of = m->private;
+	struct rftype *rft = of->kn->priv;
+
+	if (rft->seq_show)
+		return rft->seq_show(of, m, arg);
+	return 0;
+}
+
+static ssize_t rdtgroup_file_write(struct kernfs_open_file *of, char *buf,
+				   size_t nbytes, loff_t off)
+{
+	struct rftype *rft = of->kn->priv;
+
+	if (rft->write)
+		return rft->write(of, buf, nbytes, off);
+
+	return -EINVAL;
+}
+
+static const struct kernfs_ops rdtgroup_kf_single_ops = {
+	.atomic_write_len	= PAGE_SIZE,
+	.write			= rdtgroup_file_write,
+	.seq_show		= rdtgroup_seqfile_show,
+};
+
+static const struct kernfs_ops kf_mondata_ops = {
+	.atomic_write_len	= PAGE_SIZE,
+	.seq_show		= rdtgroup_mondata_show,
+};
+
+static bool is_cpu_list(struct kernfs_open_file *of)
+{
+	struct rftype *rft = of->kn->priv;
+
+	return rft->flags & RFTYPE_FLAGS_CPUS_LIST;
+}
+
+static int rdtgroup_cpus_show(struct kernfs_open_file *of,
+			      struct seq_file *s, void *v)
+{
+	struct rdtgroup *rdtgrp;
+	struct cpumask *mask;
+	int ret = 0;
+
+	rdtgrp = rdtgroup_kn_lock_live(of->kn);
+
+	if (rdtgrp) {
+		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
+			if (!rdtgrp->plr->d) {
+				rdt_last_cmd_clear();
+				rdt_last_cmd_puts("Cache domain offline\n");
+				ret = -ENODEV;
+			} else {
+				mask = &rdtgrp->plr->d->cpu_mask;
+				seq_printf(s, is_cpu_list(of) ?
+					   "%*pbl\n" : "%*pb\n",
+					   cpumask_pr_args(mask));
+			}
+		} else {
+			seq_printf(s, is_cpu_list(of) ? "%*pbl\n" : "%*pb\n",
+				   cpumask_pr_args(&rdtgrp->cpu_mask));
+		}
+	} else {
+		ret = -ENOENT;
+	}
+	rdtgroup_kn_unlock(of->kn);
+
+	return ret;
+}
+
 /*
- * This is safe against resctrl_arch_sched_in() called from __switch_to()
+ * This is safe against resctrl_sched_in() called from __switch_to()
  * because __switch_to() is executed with interrupts disabled. A local call
  * from update_closid_rmid() is protected against __switch_to() because
  * preemption is disabled.
  */
-void resctrl_arch_sync_cpu_defaults(void *info)
+static void update_cpu_closid_rmid(void *info)
 {
-	struct resctrl_cpu_sync *r = info;
+	struct rdtgroup *r = info;
 
 	if (r) {
 		this_cpu_write(pqr_state.default_closid, r->closid);
-		this_cpu_write(pqr_state.default_rmid, r->rmid);
+		this_cpu_write(pqr_state.default_rmid, r->mon.rmid);
 	}
 
 	/*
@@ -42,501 +354,3810 @@ void resctrl_arch_sync_cpu_defaults(void *info)
 	 * executing task might have its own closid selected. Just reuse
 	 * the context switch code.
 	 */
-	resctrl_arch_sched_in(current);
+	resctrl_sched_in(current);
 }
 
-void resctrl_arch_mon_event_config_read(void *info)
+/*
+ * Update the PGR_ASSOC MSR on all cpus in @cpu_mask,
+ *
+ * Per task closids/rmids must have been set up before calling this function.
+ */
+static void
+update_closid_rmid(const struct cpumask *cpu_mask, struct rdtgroup *r)
 {
-	struct resctrl_mon_config_info *mon_info = info;
-	unsigned int index;
-	u64 msrval;
+	on_each_cpu_mask(cpu_mask, update_cpu_closid_rmid, r, 1);
+}
 
-	index = mon_event_config_index_get(mon_info->evtid);
-	if (index == INVALID_CONFIG_INDEX) {
-		pr_warn_once("Invalid event id %d\n", mon_info->evtid);
-		return;
+static int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
+			  cpumask_var_t tmpmask)
+{
+	struct rdtgroup *prgrp = rdtgrp->mon.parent, *crgrp;
+	struct list_head *head;
+
+	/* Check whether cpus belong to parent ctrl group */
+	cpumask_andnot(tmpmask, newmask, &prgrp->cpu_mask);
+	if (!cpumask_empty(tmpmask)) {
+		rdt_last_cmd_puts("Can only add CPUs to mongroup that belong to parent\n");
+		return -EINVAL;
 	}
-	rdmsrl(MSR_IA32_EVT_CFG_BASE + index, msrval);
 
-	/* Report only the valid event configuration bits */
-	mon_info->mon_config = msrval & MAX_EVT_CONFIG_BITS;
+	/* Check whether cpus are dropped from this group */
+	cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
+	if (!cpumask_empty(tmpmask)) {
+		/* Give any dropped cpus to parent rdtgroup */
+		cpumask_or(&prgrp->cpu_mask, &prgrp->cpu_mask, tmpmask);
+		update_closid_rmid(tmpmask, prgrp);
+	}
+
+	/*
+	 * If we added cpus, remove them from previous group that owned them
+	 * and update per-cpu rmid
+	 */
+	cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
+	if (!cpumask_empty(tmpmask)) {
+		head = &prgrp->mon.crdtgrp_list;
+		list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
+			if (crgrp == rdtgrp)
+				continue;
+			cpumask_andnot(&crgrp->cpu_mask, &crgrp->cpu_mask,
+				       tmpmask);
+		}
+		update_closid_rmid(tmpmask, rdtgrp);
+	}
+
+	/* Done pushing/pulling - update this group with new mask */
+	cpumask_copy(&rdtgrp->cpu_mask, newmask);
+
+	return 0;
 }
 
-void resctrl_arch_mon_event_config_write(void *info)
+static void cpumask_rdtgrp_clear(struct rdtgroup *r, struct cpumask *m)
 {
-	struct resctrl_mon_config_info *mon_info = info;
-	unsigned int index;
+	struct rdtgroup *crgrp;
 
-	index = mon_event_config_index_get(mon_info->evtid);
-	if (index == INVALID_CONFIG_INDEX) {
-		pr_warn_once("Invalid event id %d\n", mon_info->evtid);
-		mon_info->err = -EINVAL;
-		return;
+	cpumask_andnot(&r->cpu_mask, &r->cpu_mask, m);
+	/* update the child mon group masks as well*/
+	list_for_each_entry(crgrp, &r->mon.crdtgrp_list, mon.crdtgrp_list)
+		cpumask_and(&crgrp->cpu_mask, &r->cpu_mask, &crgrp->cpu_mask);
+}
+
+static int cpus_ctrl_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
+			   cpumask_var_t tmpmask, cpumask_var_t tmpmask1)
+{
+	struct rdtgroup *r, *crgrp;
+	struct list_head *head;
+
+	/* Check whether cpus are dropped from this group */
+	cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
+	if (!cpumask_empty(tmpmask)) {
+		/* Can't drop from default group */
+		if (rdtgrp == &rdtgroup_default) {
+			rdt_last_cmd_puts("Can't drop CPUs from default group\n");
+			return -EINVAL;
+		}
+
+		/* Give any dropped cpus to rdtgroup_default */
+		cpumask_or(&rdtgroup_default.cpu_mask,
+			   &rdtgroup_default.cpu_mask, tmpmask);
+		update_closid_rmid(tmpmask, &rdtgroup_default);
+	}
+
+	/*
+	 * If we added cpus, remove them from previous group and
+	 * the prev group's child groups that owned them
+	 * and update per-cpu closid/rmid.
+	 */
+	cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
+	if (!cpumask_empty(tmpmask)) {
+		list_for_each_entry(r, &rdt_all_groups, rdtgroup_list) {
+			if (r == rdtgrp)
+				continue;
+			cpumask_and(tmpmask1, &r->cpu_mask, tmpmask);
+			if (!cpumask_empty(tmpmask1))
+				cpumask_rdtgrp_clear(r, tmpmask1);
+		}
+		update_closid_rmid(tmpmask, rdtgrp);
+	}
+
+	/* Done pushing/pulling - update this group with new mask */
+	cpumask_copy(&rdtgrp->cpu_mask, newmask);
+
+	/*
+	 * Clear child mon group masks since there is a new parent mask
+	 * now and update the rmid for the cpus the child lost.
+	 */
+	head = &rdtgrp->mon.crdtgrp_list;
+	list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
+		cpumask_and(tmpmask, &rdtgrp->cpu_mask, &crgrp->cpu_mask);
+		update_closid_rmid(tmpmask, rdtgrp);
+		cpumask_clear(&crgrp->cpu_mask);
 	}
-	wrmsr(MSR_IA32_EVT_CFG_BASE + index, mon_info->mon_config, 0);
 
-	mon_info->err = 0;
+	return 0;
 }
 
-static void l3_qos_cfg_update(void *arg)
+static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
+				   char *buf, size_t nbytes, loff_t off)
 {
-	bool *enable = arg;
+	cpumask_var_t tmpmask, newmask, tmpmask1;
+	struct rdtgroup *rdtgrp;
+	int ret;
 
-	wrmsrl(MSR_IA32_L3_QOS_CFG, *enable ? L3_QOS_CDP_ENABLE : 0ULL);
+	if (!buf)
+		return -EINVAL;
+
+	if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
+		return -ENOMEM;
+	if (!zalloc_cpumask_var(&newmask, GFP_KERNEL)) {
+		free_cpumask_var(tmpmask);
+		return -ENOMEM;
+	}
+	if (!zalloc_cpumask_var(&tmpmask1, GFP_KERNEL)) {
+		free_cpumask_var(tmpmask);
+		free_cpumask_var(newmask);
+		return -ENOMEM;
+	}
+
+	rdtgrp = rdtgroup_kn_lock_live(of->kn);
+	if (!rdtgrp) {
+		ret = -ENOENT;
+		goto unlock;
+	}
+
+	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED ||
+	    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
+		ret = -EINVAL;
+		rdt_last_cmd_puts("Pseudo-locking in progress\n");
+		goto unlock;
+	}
+
+	if (is_cpu_list(of))
+		ret = cpulist_parse(buf, newmask);
+	else
+		ret = cpumask_parse(buf, newmask);
+
+	if (ret) {
+		rdt_last_cmd_puts("Bad CPU list/mask\n");
+		goto unlock;
+	}
+
+	/* check that user didn't specify any offline cpus */
+	cpumask_andnot(tmpmask, newmask, cpu_online_mask);
+	if (!cpumask_empty(tmpmask)) {
+		ret = -EINVAL;
+		rdt_last_cmd_puts("Can only assign online CPUs\n");
+		goto unlock;
+	}
+
+	if (rdtgrp->type == RDTCTRL_GROUP)
+		ret = cpus_ctrl_write(rdtgrp, newmask, tmpmask, tmpmask1);
+	else if (rdtgrp->type == RDTMON_GROUP)
+		ret = cpus_mon_write(rdtgrp, newmask, tmpmask);
+	else
+		ret = -EINVAL;
+
+unlock:
+	rdtgroup_kn_unlock(of->kn);
+	free_cpumask_var(tmpmask);
+	free_cpumask_var(newmask);
+	free_cpumask_var(tmpmask1);
+
+	return ret ?: nbytes;
 }
 
-static void l2_qos_cfg_update(void *arg)
+/**
+ * rdtgroup_remove - the helper to remove resource group safely
+ * @rdtgrp: resource group to remove
+ *
+ * On resource group creation via a mkdir, an extra kernfs_node reference is
+ * taken to ensure that the rdtgroup structure remains accessible for the
+ * rdtgroup_kn_unlock() calls where it is removed.
+ *
+ * Drop the extra reference here, then free the rdtgroup structure.
+ *
+ * Return: void
+ */
+static void rdtgroup_remove(struct rdtgroup *rdtgrp)
+{
+	kernfs_put(rdtgrp->kn);
+	kfree(rdtgrp);
+}
+
+static void _update_task_closid_rmid(void *task)
+{
+	/*
+	 * If the task is still current on this CPU, update PQR_ASSOC MSR.
+	 * Otherwise, the MSR is updated when the task is scheduled in.
+	 */
+	if (task == current)
+		resctrl_sched_in(task);
+}
+
+static void update_task_closid_rmid(struct task_struct *t)
+{
+	if (IS_ENABLED(CONFIG_SMP) && task_curr(t))
+		smp_call_function_single(task_cpu(t), _update_task_closid_rmid, t, 1);
+	else
+		_update_task_closid_rmid(t);
+}
+
+static bool task_in_rdtgroup(struct task_struct *tsk, struct rdtgroup *rdtgrp)
+{
+	u32 closid, rmid = rdtgrp->mon.rmid;
+
+	if (rdtgrp->type == RDTCTRL_GROUP)
+		closid = rdtgrp->closid;
+	else if (rdtgrp->type == RDTMON_GROUP)
+		closid = rdtgrp->mon.parent->closid;
+	else
+		return false;
+
+	return resctrl_arch_match_closid(tsk, closid) &&
+	       resctrl_arch_match_rmid(tsk, closid, rmid);
+}
+
+static int __rdtgroup_move_task(struct task_struct *tsk,
+				struct rdtgroup *rdtgrp)
+{
+	/* If the task is already in rdtgrp, no need to move the task. */
+	if (task_in_rdtgroup(tsk, rdtgrp))
+		return 0;
+
+	/*
+	 * Set the task's closid/rmid before the PQR_ASSOC MSR can be
+	 * updated by them.
+	 *
+	 * For ctrl_mon groups, move both closid and rmid.
+	 * For monitor groups, can move the tasks only from
+	 * their parent CTRL group.
+	 */
+	if (rdtgrp->type == RDTMON_GROUP &&
+	    !resctrl_arch_match_closid(tsk, rdtgrp->mon.parent->closid)) {
+		rdt_last_cmd_puts("Can't move task to different control group\n");
+		return -EINVAL;
+	}
+
+	if (rdtgrp->type == RDTMON_GROUP)
+		resctrl_arch_set_closid_rmid(tsk, rdtgrp->mon.parent->closid,
+					     rdtgrp->mon.rmid);
+	else
+		resctrl_arch_set_closid_rmid(tsk, rdtgrp->closid,
+					     rdtgrp->mon.rmid);
+
+	/*
+	 * Ensure the task's closid and rmid are written before determining if
+	 * the task is current that will decide if it will be interrupted.
+	 * This pairs with the full barrier between the rq->curr update and
+	 * resctrl_sched_in() during context switch.
+	 */
+	smp_mb();
+
+	/*
+	 * By now, the task's closid and rmid are set. If the task is current
+	 * on a CPU, the PQR_ASSOC MSR needs to be updated to make the resource
+	 * group go into effect. If the task is not current, the MSR will be
+	 * updated when the task is scheduled in.
+	 */
+	update_task_closid_rmid(tsk);
+
+	return 0;
+}
+
+static bool is_closid_match(struct task_struct *t, struct rdtgroup *r)
+{
+	return (resctrl_arch_alloc_capable() && (r->type == RDTCTRL_GROUP) &&
+		resctrl_arch_match_closid(t, r->closid));
+}
+
+static bool is_rmid_match(struct task_struct *t, struct rdtgroup *r)
+{
+	return (resctrl_arch_mon_capable() && (r->type == RDTMON_GROUP) &&
+		resctrl_arch_match_rmid(t, r->mon.parent->closid,
+					r->mon.rmid));
+}
+
+/**
+ * rdtgroup_tasks_assigned - Test if tasks have been assigned to resource group
+ * @r: Resource group
+ *
+ * Return: 1 if tasks have been assigned to @r, 0 otherwise
+ */
+int rdtgroup_tasks_assigned(struct rdtgroup *r)
+{
+	struct task_struct *p, *t;
+	int ret = 0;
+
+	lockdep_assert_held(&rdtgroup_mutex);
+
+	rcu_read_lock();
+	for_each_process_thread(p, t) {
+		if (is_closid_match(t, r) || is_rmid_match(t, r)) {
+			ret = 1;
+			break;
+		}
+	}
+	rcu_read_unlock();
+
+	return ret;
+}
+
+static int rdtgroup_task_write_permission(struct task_struct *task,
+					  struct kernfs_open_file *of)
+{
+	const struct cred *tcred = get_task_cred(task);
+	const struct cred *cred = current_cred();
+	int ret = 0;
+
+	/*
+	 * Even if we're attaching all tasks in the thread group, we only
+	 * need to check permissions on one of them.
+	 */
+	if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
+	    !uid_eq(cred->euid, tcred->uid) &&
+	    !uid_eq(cred->euid, tcred->suid)) {
+		rdt_last_cmd_printf("No permission to move task %d\n", task->pid);
+		ret = -EPERM;
+	}
+
+	put_cred(tcred);
+	return ret;
+}
+
+static int rdtgroup_move_task(pid_t pid, struct rdtgroup *rdtgrp,
+			      struct kernfs_open_file *of)
+{
+	struct task_struct *tsk;
+	int ret;
+
+	rcu_read_lock();
+	if (pid) {
+		tsk = find_task_by_vpid(pid);
+		if (!tsk) {
+			rcu_read_unlock();
+			rdt_last_cmd_printf("No task %d\n", pid);
+			return -ESRCH;
+		}
+	} else {
+		tsk = current;
+	}
+
+	get_task_struct(tsk);
+	rcu_read_unlock();
+
+	ret = rdtgroup_task_write_permission(tsk, of);
+	if (!ret)
+		ret = __rdtgroup_move_task(tsk, rdtgrp);
+
+	put_task_struct(tsk);
+	return ret;
+}
+
+static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of,
+				    char *buf, size_t nbytes, loff_t off)
+{
+	struct rdtgroup *rdtgrp;
+	char *pid_str;
+	int ret = 0;
+	pid_t pid;
+
+	rdtgrp = rdtgroup_kn_lock_live(of->kn);
+	if (!rdtgrp) {
+		rdtgroup_kn_unlock(of->kn);
+		return -ENOENT;
+	}
+	rdt_last_cmd_clear();
+
+	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED ||
+	    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
+		ret = -EINVAL;
+		rdt_last_cmd_puts("Pseudo-locking in progress\n");
+		goto unlock;
+	}
+
+	while (buf && buf[0] != '\0' && buf[0] != '\n') {
+		pid_str = strim(strsep(&buf, ","));
+
+		if (kstrtoint(pid_str, 0, &pid)) {
+			rdt_last_cmd_printf("Task list parsing error pid %s\n", pid_str);
+			ret = -EINVAL;
+			break;
+		}
+
+		if (pid < 0) {
+			rdt_last_cmd_printf("Invalid pid %d\n", pid);
+			ret = -EINVAL;
+			break;
+		}
+
+		ret = rdtgroup_move_task(pid, rdtgrp, of);
+		if (ret) {
+			rdt_last_cmd_printf("Error while processing task %d\n", pid);
+			break;
+		}
+	}
+
+unlock:
+	rdtgroup_kn_unlock(of->kn);
+
+	return ret ?: nbytes;
+}
+
+static void show_rdt_tasks(struct rdtgroup *r, struct seq_file *s)
+{
+	struct task_struct *p, *t;
+	pid_t pid;
+
+	rcu_read_lock();
+	for_each_process_thread(p, t) {
+		if (is_closid_match(t, r) || is_rmid_match(t, r)) {
+			pid = task_pid_vnr(t);
+			if (pid)
+				seq_printf(s, "%d\n", pid);
+		}
+	}
+	rcu_read_unlock();
+}
+
+static int rdtgroup_tasks_show(struct kernfs_open_file *of,
+			       struct seq_file *s, void *v)
+{
+	struct rdtgroup *rdtgrp;
+	int ret = 0;
+
+	rdtgrp = rdtgroup_kn_lock_live(of->kn);
+	if (rdtgrp)
+		show_rdt_tasks(rdtgrp, s);
+	else
+		ret = -ENOENT;
+	rdtgroup_kn_unlock(of->kn);
+
+	return ret;
+}
+
+static int rdtgroup_closid_show(struct kernfs_open_file *of,
+				struct seq_file *s, void *v)
+{
+	struct rdtgroup *rdtgrp;
+	int ret = 0;
+
+	rdtgrp = rdtgroup_kn_lock_live(of->kn);
+	if (rdtgrp)
+		seq_printf(s, "%u\n", rdtgrp->closid);
+	else
+		ret = -ENOENT;
+	rdtgroup_kn_unlock(of->kn);
+
+	return ret;
+}
+
+static int rdtgroup_rmid_show(struct kernfs_open_file *of,
+			      struct seq_file *s, void *v)
+{
+	struct rdtgroup *rdtgrp;
+	int ret = 0;
+
+	rdtgrp = rdtgroup_kn_lock_live(of->kn);
+	if (rdtgrp)
+		seq_printf(s, "%u\n", rdtgrp->mon.rmid);
+	else
+		ret = -ENOENT;
+	rdtgroup_kn_unlock(of->kn);
+
+	return ret;
+}
+
+#ifdef CONFIG_PROC_CPU_RESCTRL
+
+/*
+ * A task can only be part of one resctrl control group and of one monitor
+ * group which is associated to that control group.
+ *
+ * 1)   res:
+ *      mon:
+ *
+ *    resctrl is not available.
+ *
+ * 2)   res:/
+ *      mon:
+ *
+ *    Task is part of the root resctrl control group, and it is not associated
+ *    to any monitor group.
+ *
+ * 3)  res:/
+ *     mon:mon0
+ *
+ *    Task is part of the root resctrl control group and monitor group mon0.
+ *
+ * 4)  res:group0
+ *     mon:
+ *
+ *    Task is part of resctrl control group group0, and it is not associated
+ *    to any monitor group.
+ *
+ * 5) res:group0
+ *    mon:mon1
+ *
+ *    Task is part of resctrl control group group0 and monitor group mon1.
+ */
+int proc_resctrl_show(struct seq_file *s, struct pid_namespace *ns,
+		      struct pid *pid, struct task_struct *tsk)
+{
+	struct rdtgroup *rdtg;
+	int ret = 0;
+
+	mutex_lock(&rdtgroup_mutex);
+
+	/* Return empty if resctrl has not been mounted. */
+	if (!resctrl_mounted) {
+		seq_puts(s, "res:\nmon:\n");
+		goto unlock;
+	}
+
+	list_for_each_entry(rdtg, &rdt_all_groups, rdtgroup_list) {
+		struct rdtgroup *crg;
+
+		/*
+		 * Task information is only relevant for shareable
+		 * and exclusive groups.
+		 */
+		if (rdtg->mode != RDT_MODE_SHAREABLE &&
+		    rdtg->mode != RDT_MODE_EXCLUSIVE)
+			continue;
+
+		if (!resctrl_arch_match_closid(tsk, rdtg->closid))
+			continue;
+
+		seq_printf(s, "res:%s%s\n", (rdtg == &rdtgroup_default) ? "/" : "",
+			   rdtg->kn->name);
+		seq_puts(s, "mon:");
+		list_for_each_entry(crg, &rdtg->mon.crdtgrp_list,
+				    mon.crdtgrp_list) {
+			if (!resctrl_arch_match_rmid(tsk, crg->mon.parent->closid,
+						     crg->mon.rmid))
+				continue;
+			seq_printf(s, "%s", crg->kn->name);
+			break;
+		}
+		seq_putc(s, '\n');
+		goto unlock;
+	}
+	/*
+	 * The above search should succeed. Otherwise return
+	 * with an error.
+	 */
+	ret = -ENOENT;
+unlock:
+	mutex_unlock(&rdtgroup_mutex);
+
+	return ret;
+}
+#endif
+
+static int rdt_last_cmd_status_show(struct kernfs_open_file *of,
+				    struct seq_file *seq, void *v)
+{
+	int len;
+
+	mutex_lock(&rdtgroup_mutex);
+	len = seq_buf_used(&last_cmd_status);
+	if (len)
+		seq_printf(seq, "%.*s", len, last_cmd_status_buf);
+	else
+		seq_puts(seq, "ok\n");
+	mutex_unlock(&rdtgroup_mutex);
+	return 0;
+}
+
+static int rdt_num_closids_show(struct kernfs_open_file *of,
+				struct seq_file *seq, void *v)
+{
+	struct resctrl_schema *s = of->kn->parent->priv;
+
+	seq_printf(seq, "%u\n", s->num_closid);
+	return 0;
+}
+
+static int rdt_default_ctrl_show(struct kernfs_open_file *of,
+			     struct seq_file *seq, void *v)
+{
+	struct resctrl_schema *s = of->kn->parent->priv;
+	struct rdt_resource *r = s->res;
+
+	seq_printf(seq, "%x\n", r->default_ctrl);
+	return 0;
+}
+
+static int rdt_min_cbm_bits_show(struct kernfs_open_file *of,
+			     struct seq_file *seq, void *v)
+{
+	struct resctrl_schema *s = of->kn->parent->priv;
+	struct rdt_resource *r = s->res;
+
+	seq_printf(seq, "%u\n", r->cache.min_cbm_bits);
+	return 0;
+}
+
+static int rdt_shareable_bits_show(struct kernfs_open_file *of,
+				   struct seq_file *seq, void *v)
+{
+	struct resctrl_schema *s = of->kn->parent->priv;
+	struct rdt_resource *r = s->res;
+
+	seq_printf(seq, "%x\n", r->cache.shareable_bits);
+	return 0;
+}
+
+/*
+ * rdt_bit_usage_show - Display current usage of resources
+ *
+ * A domain is a shared resource that can now be allocated differently. Here
+ * we display the current regions of the domain as an annotated bitmask.
+ * For each domain of this resource its allocation bitmask
+ * is annotated as below to indicate the current usage of the corresponding bit:
+ *   0 - currently unused
+ *   X - currently available for sharing and used by software and hardware
+ *   H - currently used by hardware only but available for software use
+ *   S - currently used and shareable by software only
+ *   E - currently used exclusively by one resource group
+ *   P - currently pseudo-locked by one resource group
+ */
+static int rdt_bit_usage_show(struct kernfs_open_file *of,
+			      struct seq_file *seq, void *v)
+{
+	struct resctrl_schema *s = of->kn->parent->priv;
+	/*
+	 * Use unsigned long even though only 32 bits are used to ensure
+	 * test_bit() is used safely.
+	 */
+	unsigned long sw_shareable = 0, hw_shareable = 0;
+	unsigned long exclusive = 0, pseudo_locked = 0;
+	struct rdt_resource *r = s->res;
+	struct rdt_domain *dom;
+	int i, hwb, swb, excl, psl;
+	enum rdtgrp_mode mode;
+	bool sep = false;
+	u32 ctrl_val;
+
+	cpus_read_lock();
+	mutex_lock(&rdtgroup_mutex);
+	hw_shareable = r->cache.shareable_bits;
+	list_for_each_entry(dom, &r->domains, list) {
+		if (sep)
+			seq_putc(seq, ';');
+		sw_shareable = 0;
+		exclusive = 0;
+		seq_printf(seq, "%d=", dom->id);
+		for (i = 0; i < closids_supported(); i++) {
+			if (!closid_allocated(i))
+				continue;
+			ctrl_val = resctrl_arch_get_config(r, dom, i,
+							   s->conf_type);
+			mode = rdtgroup_mode_by_closid(i);
+			switch (mode) {
+			case RDT_MODE_SHAREABLE:
+				sw_shareable |= ctrl_val;
+				break;
+			case RDT_MODE_EXCLUSIVE:
+				exclusive |= ctrl_val;
+				break;
+			case RDT_MODE_PSEUDO_LOCKSETUP:
+			/*
+			 * RDT_MODE_PSEUDO_LOCKSETUP is possible
+			 * here but not included since the CBM
+			 * associated with this CLOSID in this mode
+			 * is not initialized and no task or cpu can be
+			 * assigned this CLOSID.
+			 */
+				break;
+			case RDT_MODE_PSEUDO_LOCKED:
+			case RDT_NUM_MODES:
+				WARN(1,
+				     "invalid mode for closid %d\n", i);
+				break;
+			}
+		}
+		for (i = r->cache.cbm_len - 1; i >= 0; i--) {
+			pseudo_locked = dom->plr ? dom->plr->cbm : 0;
+			hwb = test_bit(i, &hw_shareable);
+			swb = test_bit(i, &sw_shareable);
+			excl = test_bit(i, &exclusive);
+			psl = test_bit(i, &pseudo_locked);
+			if (hwb && swb)
+				seq_putc(seq, 'X');
+			else if (hwb && !swb)
+				seq_putc(seq, 'H');
+			else if (!hwb && swb)
+				seq_putc(seq, 'S');
+			else if (excl)
+				seq_putc(seq, 'E');
+			else if (psl)
+				seq_putc(seq, 'P');
+			else /* Unused bits remain */
+				seq_putc(seq, '0');
+		}
+		sep = true;
+	}
+	seq_putc(seq, '\n');
+	mutex_unlock(&rdtgroup_mutex);
+	cpus_read_unlock();
+	return 0;
+}
+
+static int rdt_min_bw_show(struct kernfs_open_file *of,
+			     struct seq_file *seq, void *v)
+{
+	struct resctrl_schema *s = of->kn->parent->priv;
+	struct rdt_resource *r = s->res;
+
+	seq_printf(seq, "%u\n", r->membw.min_bw);
+	return 0;
+}
+
+static int rdt_num_rmids_show(struct kernfs_open_file *of,
+			      struct seq_file *seq, void *v)
+{
+	struct rdt_resource *r = of->kn->parent->priv;
+
+	seq_printf(seq, "%d\n", r->num_rmid);
+
+	return 0;
+}
+
+static int rdt_mon_features_show(struct kernfs_open_file *of,
+				 struct seq_file *seq, void *v)
+{
+	struct rdt_resource *r = of->kn->parent->priv;
+	struct mon_evt *mevt;
+
+	list_for_each_entry(mevt, &r->evt_list, list) {
+		seq_printf(seq, "%s\n", mevt->name);
+		if (mevt->configurable)
+			seq_printf(seq, "%s_config\n", mevt->name);
+	}
+
+	return 0;
+}
+
+static int rdt_bw_gran_show(struct kernfs_open_file *of,
+			     struct seq_file *seq, void *v)
+{
+	struct resctrl_schema *s = of->kn->parent->priv;
+	struct rdt_resource *r = s->res;
+
+	seq_printf(seq, "%u\n", r->membw.bw_gran);
+	return 0;
+}
+
+static int rdt_delay_linear_show(struct kernfs_open_file *of,
+			     struct seq_file *seq, void *v)
+{
+	struct resctrl_schema *s = of->kn->parent->priv;
+	struct rdt_resource *r = s->res;
+
+	seq_printf(seq, "%u\n", r->membw.delay_linear);
+	return 0;
+}
+
+static int max_threshold_occ_show(struct kernfs_open_file *of,
+				  struct seq_file *seq, void *v)
+{
+	seq_printf(seq, "%u\n", resctrl_rmid_realloc_threshold);
+
+	return 0;
+}
+
+static int rdt_thread_throttle_mode_show(struct kernfs_open_file *of,
+					 struct seq_file *seq, void *v)
+{
+	struct resctrl_schema *s = of->kn->parent->priv;
+	struct rdt_resource *r = s->res;
+
+	if (r->membw.throttle_mode == THREAD_THROTTLE_PER_THREAD)
+		seq_puts(seq, "per-thread\n");
+	else
+		seq_puts(seq, "max\n");
+
+	return 0;
+}
+
+static ssize_t max_threshold_occ_write(struct kernfs_open_file *of,
+				       char *buf, size_t nbytes, loff_t off)
+{
+	unsigned int bytes;
+	int ret;
+
+	ret = kstrtouint(buf, 0, &bytes);
+	if (ret)
+		return ret;
+
+	if (bytes > resctrl_rmid_realloc_limit)
+		return -EINVAL;
+
+	resctrl_rmid_realloc_threshold = resctrl_arch_round_mon_val(bytes);
+
+	return nbytes;
+}
+
+/*
+ * rdtgroup_mode_show - Display mode of this resource group
+ */
+static int rdtgroup_mode_show(struct kernfs_open_file *of,
+			      struct seq_file *s, void *v)
+{
+	struct rdtgroup *rdtgrp;
+
+	rdtgrp = rdtgroup_kn_lock_live(of->kn);
+	if (!rdtgrp) {
+		rdtgroup_kn_unlock(of->kn);
+		return -ENOENT;
+	}
+
+	seq_printf(s, "%s\n", rdtgroup_mode_str(rdtgrp->mode));
+
+	rdtgroup_kn_unlock(of->kn);
+	return 0;
+}
+
+static enum resctrl_conf_type resctrl_peer_type(enum resctrl_conf_type my_type)
+{
+	switch (my_type) {
+	case CDP_CODE:
+		return CDP_DATA;
+	case CDP_DATA:
+		return CDP_CODE;
+	default:
+	case CDP_NONE:
+		return CDP_NONE;
+	}
+}
+
+static int rdt_has_sparse_bitmasks_show(struct kernfs_open_file *of,
+					struct seq_file *seq, void *v)
+{
+	struct resctrl_schema *s = of->kn->parent->priv;
+	struct rdt_resource *r = s->res;
+
+	seq_printf(seq, "%u\n", r->cache.arch_has_sparse_bitmasks);
+
+	return 0;
+}
+
+/**
+ * __rdtgroup_cbm_overlaps - Does CBM for intended closid overlap with other
+ * @r: Resource to which domain instance @d belongs.
+ * @d: The domain instance for which @closid is being tested.
+ * @cbm: Capacity bitmask being tested.
+ * @closid: Intended closid for @cbm.
+ * @type: CDP type of @r.
+ * @exclusive: Only check if overlaps with exclusive resource groups
+ *
+ * Checks if provided @cbm intended to be used for @closid on domain
+ * @d overlaps with any other closids or other hardware usage associated
+ * with this domain. If @exclusive is true then only overlaps with
+ * resource groups in exclusive mode will be considered. If @exclusive
+ * is false then overlaps with any resource group or hardware entities
+ * will be considered.
+ *
+ * @cbm is unsigned long, even if only 32 bits are used, to make the
+ * bitmap functions work correctly.
+ *
+ * Return: false if CBM does not overlap, true if it does.
+ */
+static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
+				    unsigned long cbm, int closid,
+				    enum resctrl_conf_type type, bool exclusive)
+{
+	enum rdtgrp_mode mode;
+	unsigned long ctrl_b;
+	int i;
+
+	/* Check for any overlap with regions used by hardware directly */
+	if (!exclusive) {
+		ctrl_b = r->cache.shareable_bits;
+		if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len))
+			return true;
+	}
+
+	/* Check for overlap with other resource groups */
+	for (i = 0; i < closids_supported(); i++) {
+		ctrl_b = resctrl_arch_get_config(r, d, i, type);
+		mode = rdtgroup_mode_by_closid(i);
+		if (closid_allocated(i) && i != closid &&
+		    mode != RDT_MODE_PSEUDO_LOCKSETUP) {
+			if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len)) {
+				if (exclusive) {
+					if (mode == RDT_MODE_EXCLUSIVE)
+						return true;
+					continue;
+				}
+				return true;
+			}
+		}
+	}
+
+	return false;
+}
+
+/**
+ * rdtgroup_cbm_overlaps - Does CBM overlap with other use of hardware
+ * @s: Schema for the resource to which domain instance @d belongs.
+ * @d: The domain instance for which @closid is being tested.
+ * @cbm: Capacity bitmask being tested.
+ * @closid: Intended closid for @cbm.
+ * @exclusive: Only check if overlaps with exclusive resource groups
+ *
+ * Resources that can be allocated using a CBM can use the CBM to control
+ * the overlap of these allocations. rdtgroup_cmb_overlaps() is the test
+ * for overlap. Overlap test is not limited to the specific resource for
+ * which the CBM is intended though - when dealing with CDP resources that
+ * share the underlying hardware the overlap check should be performed on
+ * the CDP resource sharing the hardware also.
+ *
+ * Refer to description of __rdtgroup_cbm_overlaps() for the details of the
+ * overlap test.
+ *
+ * Return: true if CBM overlap detected, false if there is no overlap
+ */
+bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_domain *d,
+			   unsigned long cbm, int closid, bool exclusive)
+{
+	enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type);
+	struct rdt_resource *r = s->res;
+
+	if (__rdtgroup_cbm_overlaps(r, d, cbm, closid, s->conf_type,
+				    exclusive))
+		return true;
+
+	if (!resctrl_arch_get_cdp_enabled(r->rid))
+		return false;
+	return  __rdtgroup_cbm_overlaps(r, d, cbm, closid, peer_type, exclusive);
+}
+
+/**
+ * rdtgroup_mode_test_exclusive - Test if this resource group can be exclusive
+ * @rdtgrp: Resource group identified through its closid.
+ *
+ * An exclusive resource group implies that there should be no sharing of
+ * its allocated resources. At the time this group is considered to be
+ * exclusive this test can determine if its current schemata supports this
+ * setting by testing for overlap with all other resource groups.
+ *
+ * Return: true if resource group can be exclusive, false if there is overlap
+ * with allocations of other resource groups and thus this resource group
+ * cannot be exclusive.
+ */
+static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp)
+{
+	int closid = rdtgrp->closid;
+	struct resctrl_schema *s;
+	struct rdt_resource *r;
+	bool has_cache = false;
+	struct rdt_domain *d;
+	u32 ctrl;
+
+	/* Walking r->domains, ensure it can't race with cpuhp */
+	lockdep_assert_cpus_held();
+
+	list_for_each_entry(s, &resctrl_schema_all, list) {
+		r = s->res;
+		if (r->rid == RDT_RESOURCE_MBA || r->rid == RDT_RESOURCE_SMBA)
+			continue;
+		has_cache = true;
+		list_for_each_entry(d, &r->domains, list) {
+			ctrl = resctrl_arch_get_config(r, d, closid,
+						       s->conf_type);
+			if (rdtgroup_cbm_overlaps(s, d, ctrl, closid, false)) {
+				rdt_last_cmd_puts("Schemata overlaps\n");
+				return false;
+			}
+		}
+	}
+
+	if (!has_cache) {
+		rdt_last_cmd_puts("Cannot be exclusive without CAT/CDP\n");
+		return false;
+	}
+
+	return true;
+}
+
+/*
+ * rdtgroup_mode_write - Modify the resource group's mode
+ */
+static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of,
+				   char *buf, size_t nbytes, loff_t off)
+{
+	struct rdtgroup *rdtgrp;
+	enum rdtgrp_mode mode;
+	int ret = 0;
+
+	/* Valid input requires a trailing newline */
+	if (nbytes == 0 || buf[nbytes - 1] != '\n')
+		return -EINVAL;
+	buf[nbytes - 1] = '\0';
+
+	rdtgrp = rdtgroup_kn_lock_live(of->kn);
+	if (!rdtgrp) {
+		rdtgroup_kn_unlock(of->kn);
+		return -ENOENT;
+	}
+
+	rdt_last_cmd_clear();
+
+	mode = rdtgrp->mode;
+
+	if ((!strcmp(buf, "shareable") && mode == RDT_MODE_SHAREABLE) ||
+	    (!strcmp(buf, "exclusive") && mode == RDT_MODE_EXCLUSIVE) ||
+	    (!strcmp(buf, "pseudo-locksetup") &&
+	     mode == RDT_MODE_PSEUDO_LOCKSETUP) ||
+	    (!strcmp(buf, "pseudo-locked") && mode == RDT_MODE_PSEUDO_LOCKED))
+		goto out;
+
+	if (mode == RDT_MODE_PSEUDO_LOCKED) {
+		rdt_last_cmd_puts("Cannot change pseudo-locked group\n");
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (!strcmp(buf, "shareable")) {
+		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
+			ret = rdtgroup_locksetup_exit(rdtgrp);
+			if (ret)
+				goto out;
+		}
+		rdtgrp->mode = RDT_MODE_SHAREABLE;
+	} else if (!strcmp(buf, "exclusive")) {
+		if (!rdtgroup_mode_test_exclusive(rdtgrp)) {
+			ret = -EINVAL;
+			goto out;
+		}
+		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
+			ret = rdtgroup_locksetup_exit(rdtgrp);
+			if (ret)
+				goto out;
+		}
+		rdtgrp->mode = RDT_MODE_EXCLUSIVE;
+	} else if (!strcmp(buf, "pseudo-locksetup")) {
+		ret = rdtgroup_locksetup_enter(rdtgrp);
+		if (ret)
+			goto out;
+		rdtgrp->mode = RDT_MODE_PSEUDO_LOCKSETUP;
+	} else {
+		rdt_last_cmd_puts("Unknown or unsupported mode\n");
+		ret = -EINVAL;
+	}
+
+out:
+	rdtgroup_kn_unlock(of->kn);
+	return ret ?: nbytes;
+}
+
+/**
+ * rdtgroup_cbm_to_size - Translate CBM to size in bytes
+ * @r: RDT resource to which @d belongs.
+ * @d: RDT domain instance.
+ * @cbm: bitmask for which the size should be computed.
+ *
+ * The bitmask provided associated with the RDT domain instance @d will be
+ * translated into how many bytes it represents. The size in bytes is
+ * computed by first dividing the total cache size by the CBM length to
+ * determine how many bytes each bit in the bitmask represents. The result
+ * is multiplied with the number of bits set in the bitmask.
+ *
+ * @cbm is unsigned long, even if only 32 bits are used to make the
+ * bitmap functions work correctly.
+ */
+unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r,
+				  struct rdt_domain *d, unsigned long cbm)
+{
+	struct cpu_cacheinfo *ci;
+	unsigned int size = 0;
+	int num_b, i;
+
+	num_b = bitmap_weight(&cbm, r->cache.cbm_len);
+	ci = get_cpu_cacheinfo(cpumask_any(&d->cpu_mask));
+	for (i = 0; i < ci->num_leaves; i++) {
+		if (ci->info_list[i].level == r->cache_level) {
+			size = ci->info_list[i].size / r->cache.cbm_len * num_b;
+			break;
+		}
+	}
+
+	return size;
+}
+
+/*
+ * rdtgroup_size_show - Display size in bytes of allocated regions
+ *
+ * The "size" file mirrors the layout of the "schemata" file, printing the
+ * size in bytes of each region instead of the capacity bitmask.
+ */
+static int rdtgroup_size_show(struct kernfs_open_file *of,
+			      struct seq_file *s, void *v)
+{
+	struct resctrl_schema *schema;
+	enum resctrl_conf_type type;
+	struct rdtgroup *rdtgrp;
+	struct rdt_resource *r;
+	struct rdt_domain *d;
+	unsigned int size;
+	int ret = 0;
+	u32 closid;
+	bool sep;
+	u32 ctrl;
+
+	rdtgrp = rdtgroup_kn_lock_live(of->kn);
+	if (!rdtgrp) {
+		rdtgroup_kn_unlock(of->kn);
+		return -ENOENT;
+	}
+
+	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
+		if (!rdtgrp->plr->d) {
+			rdt_last_cmd_clear();
+			rdt_last_cmd_puts("Cache domain offline\n");
+			ret = -ENODEV;
+		} else {
+			seq_printf(s, "%*s:", max_name_width,
+				   rdtgrp->plr->s->name);
+			size = rdtgroup_cbm_to_size(rdtgrp->plr->s->res,
+						    rdtgrp->plr->d,
+						    rdtgrp->plr->cbm);
+			seq_printf(s, "%d=%u\n", rdtgrp->plr->d->id, size);
+		}
+		goto out;
+	}
+
+	closid = rdtgrp->closid;
+
+	list_for_each_entry(schema, &resctrl_schema_all, list) {
+		r = schema->res;
+		type = schema->conf_type;
+		sep = false;
+		seq_printf(s, "%*s:", max_name_width, schema->name);
+		list_for_each_entry(d, &r->domains, list) {
+			if (sep)
+				seq_putc(s, ';');
+			if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
+				size = 0;
+			} else {
+				if (is_mba_sc(r))
+					ctrl = d->mbps_val[closid];
+				else
+					ctrl = resctrl_arch_get_config(r, d,
+								       closid,
+								       type);
+				if (r->rid == RDT_RESOURCE_MBA ||
+				    r->rid == RDT_RESOURCE_SMBA)
+					size = ctrl;
+				else
+					size = rdtgroup_cbm_to_size(r, d, ctrl);
+			}
+			seq_printf(s, "%d=%u", d->id, size);
+			sep = true;
+		}
+		seq_putc(s, '\n');
+	}
+
+out:
+	rdtgroup_kn_unlock(of->kn);
+
+	return ret;
+}
+
+struct mon_config_info {
+	u32 evtid;
+	u32 mon_config;
+};
+
+#define INVALID_CONFIG_INDEX   UINT_MAX
+
+/**
+ * mon_event_config_index_get - get the hardware index for the
+ *                              configurable event
+ * @evtid: event id.
+ *
+ * Return: 0 for evtid == QOS_L3_MBM_TOTAL_EVENT_ID
+ *         1 for evtid == QOS_L3_MBM_LOCAL_EVENT_ID
+ *         INVALID_CONFIG_INDEX for invalid evtid
+ */
+static inline unsigned int mon_event_config_index_get(u32 evtid)
+{
+	switch (evtid) {
+	case QOS_L3_MBM_TOTAL_EVENT_ID:
+		return 0;
+	case QOS_L3_MBM_LOCAL_EVENT_ID:
+		return 1;
+	default:
+		/* Should never reach here */
+		return INVALID_CONFIG_INDEX;
+	}
+}
+
+static void mon_event_config_read(void *info)
+{
+	struct mon_config_info *mon_info = info;
+	unsigned int index;
+	u64 msrval;
+
+	index = mon_event_config_index_get(mon_info->evtid);
+	if (index == INVALID_CONFIG_INDEX) {
+		pr_warn_once("Invalid event id %d\n", mon_info->evtid);
+		return;
+	}
+	rdmsrl(MSR_IA32_EVT_CFG_BASE + index, msrval);
+
+	/* Report only the valid event configuration bits */
+	mon_info->mon_config = msrval & MAX_EVT_CONFIG_BITS;
+}
+
+static void mondata_config_read(struct rdt_domain *d, struct mon_config_info *mon_info)
+{
+	smp_call_function_any(&d->cpu_mask, mon_event_config_read, mon_info, 1);
+}
+
+static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid)
+{
+	struct mon_config_info mon_info = {0};
+	struct rdt_domain *dom;
+	bool sep = false;
+
+	cpus_read_lock();
+	mutex_lock(&rdtgroup_mutex);
+
+	list_for_each_entry(dom, &r->domains, list) {
+		if (sep)
+			seq_puts(s, ";");
+
+		memset(&mon_info, 0, sizeof(struct mon_config_info));
+		mon_info.evtid = evtid;
+		mondata_config_read(dom, &mon_info);
+
+		seq_printf(s, "%d=0x%02x", dom->id, mon_info.mon_config);
+		sep = true;
+	}
+	seq_puts(s, "\n");
+
+	mutex_unlock(&rdtgroup_mutex);
+	cpus_read_unlock();
+
+	return 0;
+}
+
+static int mbm_total_bytes_config_show(struct kernfs_open_file *of,
+				       struct seq_file *seq, void *v)
+{
+	struct rdt_resource *r = of->kn->parent->priv;
+
+	mbm_config_show(seq, r, QOS_L3_MBM_TOTAL_EVENT_ID);
+
+	return 0;
+}
+
+static int mbm_local_bytes_config_show(struct kernfs_open_file *of,
+				       struct seq_file *seq, void *v)
+{
+	struct rdt_resource *r = of->kn->parent->priv;
+
+	mbm_config_show(seq, r, QOS_L3_MBM_LOCAL_EVENT_ID);
+
+	return 0;
+}
+
+static void mon_event_config_write(void *info)
+{
+	struct mon_config_info *mon_info = info;
+	unsigned int index;
+
+	index = mon_event_config_index_get(mon_info->evtid);
+	if (index == INVALID_CONFIG_INDEX) {
+		pr_warn_once("Invalid event id %d\n", mon_info->evtid);
+		return;
+	}
+	wrmsr(MSR_IA32_EVT_CFG_BASE + index, mon_info->mon_config, 0);
+}
+
+static void mbm_config_write_domain(struct rdt_resource *r,
+				    struct rdt_domain *d, u32 evtid, u32 val)
+{
+	struct mon_config_info mon_info = {0};
+
+	/*
+	 * Read the current config value first. If both are the same then
+	 * no need to write it again.
+	 */
+	mon_info.evtid = evtid;
+	mondata_config_read(d, &mon_info);
+	if (mon_info.mon_config == val)
+		return;
+
+	mon_info.mon_config = val;
+
+	/*
+	 * Update MSR_IA32_EVT_CFG_BASE MSR on one of the CPUs in the
+	 * domain. The MSRs offset from MSR MSR_IA32_EVT_CFG_BASE
+	 * are scoped at the domain level. Writing any of these MSRs
+	 * on one CPU is observed by all the CPUs in the domain.
+	 */
+	smp_call_function_any(&d->cpu_mask, mon_event_config_write,
+			      &mon_info, 1);
+
+	/*
+	 * When an Event Configuration is changed, the bandwidth counters
+	 * for all RMIDs and Events will be cleared by the hardware. The
+	 * hardware also sets MSR_IA32_QM_CTR.Unavailable (bit 62) for
+	 * every RMID on the next read to any event for every RMID.
+	 * Subsequent reads will have MSR_IA32_QM_CTR.Unavailable (bit 62)
+	 * cleared while it is tracked by the hardware. Clear the
+	 * mbm_local and mbm_total counts for all the RMIDs.
+	 */
+	resctrl_arch_reset_rmid_all(r, d);
+}
+
+static int mon_config_write(struct rdt_resource *r, char *tok, u32 evtid)
+{
+	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
+	char *dom_str = NULL, *id_str;
+	unsigned long dom_id, val;
+	struct rdt_domain *d;
+
+	/* Walking r->domains, ensure it can't race with cpuhp */
+	lockdep_assert_cpus_held();
+
+next:
+	if (!tok || tok[0] == '\0')
+		return 0;
+
+	/* Start processing the strings for each domain */
+	dom_str = strim(strsep(&tok, ";"));
+	id_str = strsep(&dom_str, "=");
+
+	if (!id_str || kstrtoul(id_str, 10, &dom_id)) {
+		rdt_last_cmd_puts("Missing '=' or non-numeric domain id\n");
+		return -EINVAL;
+	}
+
+	if (!dom_str || kstrtoul(dom_str, 16, &val)) {
+		rdt_last_cmd_puts("Non-numeric event configuration value\n");
+		return -EINVAL;
+	}
+
+	/* Value from user cannot be more than the supported set of events */
+	if ((val & hw_res->mbm_cfg_mask) != val) {
+		rdt_last_cmd_printf("Invalid event configuration: max valid mask is 0x%02x\n",
+				    hw_res->mbm_cfg_mask);
+		return -EINVAL;
+	}
+
+	list_for_each_entry(d, &r->domains, list) {
+		if (d->id == dom_id) {
+			mbm_config_write_domain(r, d, evtid, val);
+			goto next;
+		}
+	}
+
+	return -EINVAL;
+}
+
+static ssize_t mbm_total_bytes_config_write(struct kernfs_open_file *of,
+					    char *buf, size_t nbytes,
+					    loff_t off)
+{
+	struct rdt_resource *r = of->kn->parent->priv;
+	int ret;
+
+	/* Valid input requires a trailing newline */
+	if (nbytes == 0 || buf[nbytes - 1] != '\n')
+		return -EINVAL;
+
+	cpus_read_lock();
+	mutex_lock(&rdtgroup_mutex);
+
+	rdt_last_cmd_clear();
+
+	buf[nbytes - 1] = '\0';
+
+	ret = mon_config_write(r, buf, QOS_L3_MBM_TOTAL_EVENT_ID);
+
+	mutex_unlock(&rdtgroup_mutex);
+	cpus_read_unlock();
+
+	return ret ?: nbytes;
+}
+
+static ssize_t mbm_local_bytes_config_write(struct kernfs_open_file *of,
+					    char *buf, size_t nbytes,
+					    loff_t off)
+{
+	struct rdt_resource *r = of->kn->parent->priv;
+	int ret;
+
+	/* Valid input requires a trailing newline */
+	if (nbytes == 0 || buf[nbytes - 1] != '\n')
+		return -EINVAL;
+
+	cpus_read_lock();
+	mutex_lock(&rdtgroup_mutex);
+
+	rdt_last_cmd_clear();
+
+	buf[nbytes - 1] = '\0';
+
+	ret = mon_config_write(r, buf, QOS_L3_MBM_LOCAL_EVENT_ID);
+
+	mutex_unlock(&rdtgroup_mutex);
+	cpus_read_unlock();
+
+	return ret ?: nbytes;
+}
+
+/* rdtgroup information files for one cache resource. */
+static struct rftype res_common_files[] = {
+	{
+		.name		= "last_cmd_status",
+		.mode		= 0444,
+		.kf_ops		= &rdtgroup_kf_single_ops,
+		.seq_show	= rdt_last_cmd_status_show,
+		.fflags		= RFTYPE_TOP_INFO,
+	},
+	{
+		.name		= "num_closids",
+		.mode		= 0444,
+		.kf_ops		= &rdtgroup_kf_single_ops,
+		.seq_show	= rdt_num_closids_show,
+		.fflags		= RFTYPE_CTRL_INFO,
+	},
+	{
+		.name		= "mon_features",
+		.mode		= 0444,
+		.kf_ops		= &rdtgroup_kf_single_ops,
+		.seq_show	= rdt_mon_features_show,
+		.fflags		= RFTYPE_MON_INFO,
+	},
+	{
+		.name		= "num_rmids",
+		.mode		= 0444,
+		.kf_ops		= &rdtgroup_kf_single_ops,
+		.seq_show	= rdt_num_rmids_show,
+		.fflags		= RFTYPE_MON_INFO,
+	},
+	{
+		.name		= "cbm_mask",
+		.mode		= 0444,
+		.kf_ops		= &rdtgroup_kf_single_ops,
+		.seq_show	= rdt_default_ctrl_show,
+		.fflags		= RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE,
+	},
+	{
+		.name		= "min_cbm_bits",
+		.mode		= 0444,
+		.kf_ops		= &rdtgroup_kf_single_ops,
+		.seq_show	= rdt_min_cbm_bits_show,
+		.fflags		= RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE,
+	},
+	{
+		.name		= "shareable_bits",
+		.mode		= 0444,
+		.kf_ops		= &rdtgroup_kf_single_ops,
+		.seq_show	= rdt_shareable_bits_show,
+		.fflags		= RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE,
+	},
+	{
+		.name		= "bit_usage",
+		.mode		= 0444,
+		.kf_ops		= &rdtgroup_kf_single_ops,
+		.seq_show	= rdt_bit_usage_show,
+		.fflags		= RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE,
+	},
+	{
+		.name		= "min_bandwidth",
+		.mode		= 0444,
+		.kf_ops		= &rdtgroup_kf_single_ops,
+		.seq_show	= rdt_min_bw_show,
+		.fflags		= RFTYPE_CTRL_INFO | RFTYPE_RES_MB,
+	},
+	{
+		.name		= "bandwidth_gran",
+		.mode		= 0444,
+		.kf_ops		= &rdtgroup_kf_single_ops,
+		.seq_show	= rdt_bw_gran_show,
+		.fflags		= RFTYPE_CTRL_INFO | RFTYPE_RES_MB,
+	},
+	{
+		.name		= "delay_linear",
+		.mode		= 0444,
+		.kf_ops		= &rdtgroup_kf_single_ops,
+		.seq_show	= rdt_delay_linear_show,
+		.fflags		= RFTYPE_CTRL_INFO | RFTYPE_RES_MB,
+	},
+	/*
+	 * Platform specific which (if any) capabilities are provided by
+	 * thread_throttle_mode. Defer "fflags" initialization to platform
+	 * discovery.
+	 */
+	{
+		.name		= "thread_throttle_mode",
+		.mode		= 0444,
+		.kf_ops		= &rdtgroup_kf_single_ops,
+		.seq_show	= rdt_thread_throttle_mode_show,
+	},
+	{
+		.name		= "max_threshold_occupancy",
+		.mode		= 0644,
+		.kf_ops		= &rdtgroup_kf_single_ops,
+		.write		= max_threshold_occ_write,
+		.seq_show	= max_threshold_occ_show,
+		.fflags		= RFTYPE_MON_INFO | RFTYPE_RES_CACHE,
+	},
+	{
+		.name		= "mbm_total_bytes_config",
+		.mode		= 0644,
+		.kf_ops		= &rdtgroup_kf_single_ops,
+		.seq_show	= mbm_total_bytes_config_show,
+		.write		= mbm_total_bytes_config_write,
+	},
+	{
+		.name		= "mbm_local_bytes_config",
+		.mode		= 0644,
+		.kf_ops		= &rdtgroup_kf_single_ops,
+		.seq_show	= mbm_local_bytes_config_show,
+		.write		= mbm_local_bytes_config_write,
+	},
+	{
+		.name		= "cpus",
+		.mode		= 0644,
+		.kf_ops		= &rdtgroup_kf_single_ops,
+		.write		= rdtgroup_cpus_write,
+		.seq_show	= rdtgroup_cpus_show,
+		.fflags		= RFTYPE_BASE,
+	},
+	{
+		.name		= "cpus_list",
+		.mode		= 0644,
+		.kf_ops		= &rdtgroup_kf_single_ops,
+		.write		= rdtgroup_cpus_write,
+		.seq_show	= rdtgroup_cpus_show,
+		.flags		= RFTYPE_FLAGS_CPUS_LIST,
+		.fflags		= RFTYPE_BASE,
+	},
+	{
+		.name		= "tasks",
+		.mode		= 0644,
+		.kf_ops		= &rdtgroup_kf_single_ops,
+		.write		= rdtgroup_tasks_write,
+		.seq_show	= rdtgroup_tasks_show,
+		.fflags		= RFTYPE_BASE,
+	},
+	{
+		.name		= "mon_hw_id",
+		.mode		= 0444,
+		.kf_ops		= &rdtgroup_kf_single_ops,
+		.seq_show	= rdtgroup_rmid_show,
+		.fflags		= RFTYPE_MON_BASE | RFTYPE_DEBUG,
+	},
+	{
+		.name		= "schemata",
+		.mode		= 0644,
+		.kf_ops		= &rdtgroup_kf_single_ops,
+		.write		= rdtgroup_schemata_write,
+		.seq_show	= rdtgroup_schemata_show,
+		.fflags		= RFTYPE_CTRL_BASE,
+	},
+	{
+		.name		= "mode",
+		.mode		= 0644,
+		.kf_ops		= &rdtgroup_kf_single_ops,
+		.write		= rdtgroup_mode_write,
+		.seq_show	= rdtgroup_mode_show,
+		.fflags		= RFTYPE_CTRL_BASE,
+	},
+	{
+		.name		= "size",
+		.mode		= 0444,
+		.kf_ops		= &rdtgroup_kf_single_ops,
+		.seq_show	= rdtgroup_size_show,
+		.fflags		= RFTYPE_CTRL_BASE,
+	},
+	{
+		.name		= "sparse_masks",
+		.mode		= 0444,
+		.kf_ops		= &rdtgroup_kf_single_ops,
+		.seq_show	= rdt_has_sparse_bitmasks_show,
+		.fflags		= RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE,
+	},
+	{
+		.name		= "ctrl_hw_id",
+		.mode		= 0444,
+		.kf_ops		= &rdtgroup_kf_single_ops,
+		.seq_show	= rdtgroup_closid_show,
+		.fflags		= RFTYPE_CTRL_BASE | RFTYPE_DEBUG,
+	},
+
+};
+
+static int rdtgroup_add_files(struct kernfs_node *kn, unsigned long fflags)
+{
+	struct rftype *rfts, *rft;
+	int ret, len;
+
+	rfts = res_common_files;
+	len = ARRAY_SIZE(res_common_files);
+
+	lockdep_assert_held(&rdtgroup_mutex);
+
+	if (resctrl_debug)
+		fflags |= RFTYPE_DEBUG;
+
+	for (rft = rfts; rft < rfts + len; rft++) {
+		if (rft->fflags && ((fflags & rft->fflags) == rft->fflags)) {
+			ret = rdtgroup_add_file(kn, rft);
+			if (ret)
+				goto error;
+		}
+	}
+
+	return 0;
+error:
+	pr_warn("Failed to add %s, err=%d\n", rft->name, ret);
+	while (--rft >= rfts) {
+		if ((fflags & rft->fflags) == rft->fflags)
+			kernfs_remove_by_name(kn, rft->name);
+	}
+	return ret;
+}
+
+static struct rftype *rdtgroup_get_rftype_by_name(const char *name)
+{
+	struct rftype *rfts, *rft;
+	int len;
+
+	rfts = res_common_files;
+	len = ARRAY_SIZE(res_common_files);
+
+	for (rft = rfts; rft < rfts + len; rft++) {
+		if (!strcmp(rft->name, name))
+			return rft;
+	}
+
+	return NULL;
+}
+
+void __init thread_throttle_mode_init(void)
+{
+	struct rftype *rft;
+
+	rft = rdtgroup_get_rftype_by_name("thread_throttle_mode");
+	if (!rft)
+		return;
+
+	rft->fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_MB;
+}
+
+void __init mbm_config_rftype_init(const char *config)
+{
+	struct rftype *rft;
+
+	rft = rdtgroup_get_rftype_by_name(config);
+	if (rft)
+		rft->fflags = RFTYPE_MON_INFO | RFTYPE_RES_CACHE;
+}
+
+/**
+ * rdtgroup_kn_mode_restrict - Restrict user access to named resctrl file
+ * @r: The resource group with which the file is associated.
+ * @name: Name of the file
+ *
+ * The permissions of named resctrl file, directory, or link are modified
+ * to not allow read, write, or execute by any user.
+ *
+ * WARNING: This function is intended to communicate to the user that the
+ * resctrl file has been locked down - that it is not relevant to the
+ * particular state the system finds itself in. It should not be relied
+ * on to protect from user access because after the file's permissions
+ * are restricted the user can still change the permissions using chmod
+ * from the command line.
+ *
+ * Return: 0 on success, <0 on failure.
+ */
+int rdtgroup_kn_mode_restrict(struct rdtgroup *r, const char *name)
+{
+	struct iattr iattr = {.ia_valid = ATTR_MODE,};
+	struct kernfs_node *kn;
+	int ret = 0;
+
+	kn = kernfs_find_and_get_ns(r->kn, name, NULL);
+	if (!kn)
+		return -ENOENT;
+
+	switch (kernfs_type(kn)) {
+	case KERNFS_DIR:
+		iattr.ia_mode = S_IFDIR;
+		break;
+	case KERNFS_FILE:
+		iattr.ia_mode = S_IFREG;
+		break;
+	case KERNFS_LINK:
+		iattr.ia_mode = S_IFLNK;
+		break;
+	}
+
+	ret = kernfs_setattr(kn, &iattr);
+	kernfs_put(kn);
+	return ret;
+}
+
+/**
+ * rdtgroup_kn_mode_restore - Restore user access to named resctrl file
+ * @r: The resource group with which the file is associated.
+ * @name: Name of the file
+ * @mask: Mask of permissions that should be restored
+ *
+ * Restore the permissions of the named file. If @name is a directory the
+ * permissions of its parent will be used.
+ *
+ * Return: 0 on success, <0 on failure.
+ */
+int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name,
+			     umode_t mask)
+{
+	struct iattr iattr = {.ia_valid = ATTR_MODE,};
+	struct kernfs_node *kn, *parent;
+	struct rftype *rfts, *rft;
+	int ret, len;
+
+	rfts = res_common_files;
+	len = ARRAY_SIZE(res_common_files);
+
+	for (rft = rfts; rft < rfts + len; rft++) {
+		if (!strcmp(rft->name, name))
+			iattr.ia_mode = rft->mode & mask;
+	}
+
+	kn = kernfs_find_and_get_ns(r->kn, name, NULL);
+	if (!kn)
+		return -ENOENT;
+
+	switch (kernfs_type(kn)) {
+	case KERNFS_DIR:
+		parent = kernfs_get_parent(kn);
+		if (parent) {
+			iattr.ia_mode |= parent->mode;
+			kernfs_put(parent);
+		}
+		iattr.ia_mode |= S_IFDIR;
+		break;
+	case KERNFS_FILE:
+		iattr.ia_mode |= S_IFREG;
+		break;
+	case KERNFS_LINK:
+		iattr.ia_mode |= S_IFLNK;
+		break;
+	}
+
+	ret = kernfs_setattr(kn, &iattr);
+	kernfs_put(kn);
+	return ret;
+}
+
+static int rdtgroup_mkdir_info_resdir(void *priv, char *name,
+				      unsigned long fflags)
+{
+	struct kernfs_node *kn_subdir;
+	int ret;
+
+	kn_subdir = kernfs_create_dir(kn_info, name,
+				      kn_info->mode, priv);
+	if (IS_ERR(kn_subdir))
+		return PTR_ERR(kn_subdir);
+
+	ret = rdtgroup_kn_set_ugid(kn_subdir);
+	if (ret)
+		return ret;
+
+	ret = rdtgroup_add_files(kn_subdir, fflags);
+	if (!ret)
+		kernfs_activate(kn_subdir);
+
+	return ret;
+}
+
+static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)
+{
+	struct resctrl_schema *s;
+	struct rdt_resource *r;
+	unsigned long fflags;
+	char name[32];
+	int ret;
+
+	/* create the directory */
+	kn_info = kernfs_create_dir(parent_kn, "info", parent_kn->mode, NULL);
+	if (IS_ERR(kn_info))
+		return PTR_ERR(kn_info);
+
+	ret = rdtgroup_add_files(kn_info, RFTYPE_TOP_INFO);
+	if (ret)
+		goto out_destroy;
+
+	/* loop over enabled controls, these are all alloc_capable */
+	list_for_each_entry(s, &resctrl_schema_all, list) {
+		r = s->res;
+		fflags = r->fflags | RFTYPE_CTRL_INFO;
+		ret = rdtgroup_mkdir_info_resdir(s, s->name, fflags);
+		if (ret)
+			goto out_destroy;
+	}
+
+	for_each_mon_capable_rdt_resource(r) {
+		fflags = r->fflags | RFTYPE_MON_INFO;
+		sprintf(name, "%s_MON", r->name);
+		ret = rdtgroup_mkdir_info_resdir(r, name, fflags);
+		if (ret)
+			goto out_destroy;
+	}
+
+	ret = rdtgroup_kn_set_ugid(kn_info);
+	if (ret)
+		goto out_destroy;
+
+	kernfs_activate(kn_info);
+
+	return 0;
+
+out_destroy:
+	kernfs_remove(kn_info);
+	return ret;
+}
+
+static int
+mongroup_create_dir(struct kernfs_node *parent_kn, struct rdtgroup *prgrp,
+		    char *name, struct kernfs_node **dest_kn)
+{
+	struct kernfs_node *kn;
+	int ret;
+
+	/* create the directory */
+	kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp);
+	if (IS_ERR(kn))
+		return PTR_ERR(kn);
+
+	if (dest_kn)
+		*dest_kn = kn;
+
+	ret = rdtgroup_kn_set_ugid(kn);
+	if (ret)
+		goto out_destroy;
+
+	kernfs_activate(kn);
+
+	return 0;
+
+out_destroy:
+	kernfs_remove(kn);
+	return ret;
+}
+
+static void l3_qos_cfg_update(void *arg)
+{
+	bool *enable = arg;
+
+	wrmsrl(MSR_IA32_L3_QOS_CFG, *enable ? L3_QOS_CDP_ENABLE : 0ULL);
+}
+
+static void l2_qos_cfg_update(void *arg)
+{
+	bool *enable = arg;
+
+	wrmsrl(MSR_IA32_L2_QOS_CFG, *enable ? L2_QOS_CDP_ENABLE : 0ULL);
+}
+
+static inline bool is_mba_linear(void)
+{
+	return rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl.membw.delay_linear;
+}
+
+static int set_cache_qos_cfg(int level, bool enable)
+{
+	void (*update)(void *arg);
+	struct rdt_resource *r_l;
+	cpumask_var_t cpu_mask;
+	struct rdt_domain *d;
+	int cpu;
+
+	/* Walking r->domains, ensure it can't race with cpuhp */
+	lockdep_assert_cpus_held();
+
+	if (level == RDT_RESOURCE_L3)
+		update = l3_qos_cfg_update;
+	else if (level == RDT_RESOURCE_L2)
+		update = l2_qos_cfg_update;
+	else
+		return -EINVAL;
+
+	if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
+		return -ENOMEM;
+
+	r_l = &rdt_resources_all[level].r_resctrl;
+	list_for_each_entry(d, &r_l->domains, list) {
+		if (r_l->cache.arch_has_per_cpu_cfg)
+			/* Pick all the CPUs in the domain instance */
+			for_each_cpu(cpu, &d->cpu_mask)
+				cpumask_set_cpu(cpu, cpu_mask);
+		else
+			/* Pick one CPU from each domain instance to update MSR */
+			cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
+	}
+
+	/* Update QOS_CFG MSR on all the CPUs in cpu_mask */
+	on_each_cpu_mask(cpu_mask, update, &enable, 1);
+
+	free_cpumask_var(cpu_mask);
+
+	return 0;
+}
+
+/* Restore the qos cfg state when a domain comes online */
+void rdt_domain_reconfigure_cdp(struct rdt_resource *r)
+{
+	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
+
+	if (!r->cdp_capable)
+		return;
+
+	if (r->rid == RDT_RESOURCE_L2)
+		l2_qos_cfg_update(&hw_res->cdp_enabled);
+
+	if (r->rid == RDT_RESOURCE_L3)
+		l3_qos_cfg_update(&hw_res->cdp_enabled);
+}
+
+static int mba_sc_domain_allocate(struct rdt_resource *r, struct rdt_domain *d)
+{
+	u32 num_closid = resctrl_arch_get_num_closid(r);
+	int cpu = cpumask_any(&d->cpu_mask);
+	int i;
+
+	d->mbps_val = kcalloc_node(num_closid, sizeof(*d->mbps_val),
+				   GFP_KERNEL, cpu_to_node(cpu));
+	if (!d->mbps_val)
+		return -ENOMEM;
+
+	for (i = 0; i < num_closid; i++)
+		d->mbps_val[i] = MBA_MAX_MBPS;
+
+	return 0;
+}
+
+static void mba_sc_domain_destroy(struct rdt_resource *r,
+				  struct rdt_domain *d)
+{
+	kfree(d->mbps_val);
+	d->mbps_val = NULL;
+}
+
+/*
+ * MBA software controller is supported only if
+ * MBM is supported and MBA is in linear scale.
+ */
+static bool supports_mba_mbps(void)
+{
+	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl;
+
+	return (is_mbm_local_enabled() &&
+		r->alloc_capable && is_mba_linear());
+}
+
+/*
+ * Enable or disable the MBA software controller
+ * which helps user specify bandwidth in MBps.
+ */
+static int set_mba_sc(bool mba_sc)
+{
+	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl;
+	u32 num_closid = resctrl_arch_get_num_closid(r);
+	struct rdt_domain *d;
+	int i;
+
+	if (!supports_mba_mbps() || mba_sc == is_mba_sc(r))
+		return -EINVAL;
+
+	r->membw.mba_sc = mba_sc;
+
+	list_for_each_entry(d, &r->domains, list) {
+		for (i = 0; i < num_closid; i++)
+			d->mbps_val[i] = MBA_MAX_MBPS;
+	}
+
+	return 0;
+}
+
+static int cdp_enable(int level)
+{
+	struct rdt_resource *r_l = &rdt_resources_all[level].r_resctrl;
+	int ret;
+
+	if (!r_l->alloc_capable)
+		return -EINVAL;
+
+	ret = set_cache_qos_cfg(level, true);
+	if (!ret)
+		rdt_resources_all[level].cdp_enabled = true;
+
+	return ret;
+}
+
+static void cdp_disable(int level)
+{
+	struct rdt_hw_resource *r_hw = &rdt_resources_all[level];
+
+	if (r_hw->cdp_enabled) {
+		set_cache_qos_cfg(level, false);
+		r_hw->cdp_enabled = false;
+	}
+}
+
+int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable)
+{
+	struct rdt_hw_resource *hw_res = &rdt_resources_all[l];
+
+	if (!hw_res->r_resctrl.cdp_capable)
+		return -EINVAL;
+
+	if (enable)
+		return cdp_enable(l);
+
+	cdp_disable(l);
+
+	return 0;
+}
+
+/*
+ * We don't allow rdtgroup directories to be created anywhere
+ * except the root directory. Thus when looking for the rdtgroup
+ * structure for a kernfs node we are either looking at a directory,
+ * in which case the rdtgroup structure is pointed at by the "priv"
+ * field, otherwise we have a file, and need only look to the parent
+ * to find the rdtgroup.
+ */
+static struct rdtgroup *kernfs_to_rdtgroup(struct kernfs_node *kn)
+{
+	if (kernfs_type(kn) == KERNFS_DIR) {
+		/*
+		 * All the resource directories use "kn->priv"
+		 * to point to the "struct rdtgroup" for the
+		 * resource. "info" and its subdirectories don't
+		 * have rdtgroup structures, so return NULL here.
+		 */
+		if (kn == kn_info || kn->parent == kn_info)
+			return NULL;
+		else
+			return kn->priv;
+	} else {
+		return kn->parent->priv;
+	}
+}
+
+static void rdtgroup_kn_get(struct rdtgroup *rdtgrp, struct kernfs_node *kn)
+{
+	atomic_inc(&rdtgrp->waitcount);
+	kernfs_break_active_protection(kn);
+}
+
+static void rdtgroup_kn_put(struct rdtgroup *rdtgrp, struct kernfs_node *kn)
+{
+	if (atomic_dec_and_test(&rdtgrp->waitcount) &&
+	    (rdtgrp->flags & RDT_DELETED)) {
+		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
+		    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
+			rdtgroup_pseudo_lock_remove(rdtgrp);
+		kernfs_unbreak_active_protection(kn);
+		rdtgroup_remove(rdtgrp);
+	} else {
+		kernfs_unbreak_active_protection(kn);
+	}
+}
+
+struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn)
+{
+	struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn);
+
+	if (!rdtgrp)
+		return NULL;
+
+	rdtgroup_kn_get(rdtgrp, kn);
+
+	cpus_read_lock();
+	mutex_lock(&rdtgroup_mutex);
+
+	/* Was this group deleted while we waited? */
+	if (rdtgrp->flags & RDT_DELETED)
+		return NULL;
+
+	return rdtgrp;
+}
+
+void rdtgroup_kn_unlock(struct kernfs_node *kn)
+{
+	struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn);
+
+	if (!rdtgrp)
+		return;
+
+	mutex_unlock(&rdtgroup_mutex);
+	cpus_read_unlock();
+
+	rdtgroup_kn_put(rdtgrp, kn);
+}
+
+static int mkdir_mondata_all(struct kernfs_node *parent_kn,
+			     struct rdtgroup *prgrp,
+			     struct kernfs_node **mon_data_kn);
+
+static void rdt_disable_ctx(void)
+{
+	resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, false);
+	resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, false);
+	set_mba_sc(false);
+
+	resctrl_debug = false;
+}
+
+static int rdt_enable_ctx(struct rdt_fs_context *ctx)
+{
+	int ret = 0;
+
+	if (ctx->enable_cdpl2) {
+		ret = resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, true);
+		if (ret)
+			goto out_done;
+	}
+
+	if (ctx->enable_cdpl3) {
+		ret = resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, true);
+		if (ret)
+			goto out_cdpl2;
+	}
+
+	if (ctx->enable_mba_mbps) {
+		ret = set_mba_sc(true);
+		if (ret)
+			goto out_cdpl3;
+	}
+
+	if (ctx->enable_debug)
+		resctrl_debug = true;
+
+	return 0;
+
+out_cdpl3:
+	resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, false);
+out_cdpl2:
+	resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, false);
+out_done:
+	return ret;
+}
+
+static int schemata_list_add(struct rdt_resource *r, enum resctrl_conf_type type)
+{
+	struct resctrl_schema *s;
+	const char *suffix = "";
+	int ret, cl;
+
+	s = kzalloc(sizeof(*s), GFP_KERNEL);
+	if (!s)
+		return -ENOMEM;
+
+	s->res = r;
+	s->num_closid = resctrl_arch_get_num_closid(r);
+	if (resctrl_arch_get_cdp_enabled(r->rid))
+		s->num_closid /= 2;
+
+	s->conf_type = type;
+	switch (type) {
+	case CDP_CODE:
+		suffix = "CODE";
+		break;
+	case CDP_DATA:
+		suffix = "DATA";
+		break;
+	case CDP_NONE:
+		suffix = "";
+		break;
+	}
+
+	ret = snprintf(s->name, sizeof(s->name), "%s%s", r->name, suffix);
+	if (ret >= sizeof(s->name)) {
+		kfree(s);
+		return -EINVAL;
+	}
+
+	cl = strlen(s->name);
+
+	/*
+	 * If CDP is supported by this resource, but not enabled,
+	 * include the suffix. This ensures the tabular format of the
+	 * schemata file does not change between mounts of the filesystem.
+	 */
+	if (r->cdp_capable && !resctrl_arch_get_cdp_enabled(r->rid))
+		cl += 4;
+
+	if (cl > max_name_width)
+		max_name_width = cl;
+
+	INIT_LIST_HEAD(&s->list);
+	list_add(&s->list, &resctrl_schema_all);
+
+	return 0;
+}
+
+static int schemata_list_create(void)
+{
+	struct rdt_resource *r;
+	int ret = 0;
+
+	for_each_alloc_capable_rdt_resource(r) {
+		if (resctrl_arch_get_cdp_enabled(r->rid)) {
+			ret = schemata_list_add(r, CDP_CODE);
+			if (ret)
+				break;
+
+			ret = schemata_list_add(r, CDP_DATA);
+		} else {
+			ret = schemata_list_add(r, CDP_NONE);
+		}
+
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
+
+static void schemata_list_destroy(void)
+{
+	struct resctrl_schema *s, *tmp;
+
+	list_for_each_entry_safe(s, tmp, &resctrl_schema_all, list) {
+		list_del(&s->list);
+		kfree(s);
+	}
+}
+
+static int rdt_get_tree(struct fs_context *fc)
+{
+	struct rdt_fs_context *ctx = rdt_fc2context(fc);
+	unsigned long flags = RFTYPE_CTRL_BASE;
+	struct rdt_domain *dom;
+	struct rdt_resource *r;
+	int ret;
+
+	cpus_read_lock();
+	mutex_lock(&rdtgroup_mutex);
+	/*
+	 * resctrl file system can only be mounted once.
+	 */
+	if (resctrl_mounted) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	ret = rdtgroup_setup_root(ctx);
+	if (ret)
+		goto out;
+
+	ret = rdt_enable_ctx(ctx);
+	if (ret)
+		goto out_root;
+
+	ret = schemata_list_create();
+	if (ret) {
+		schemata_list_destroy();
+		goto out_ctx;
+	}
+
+	closid_init();
+
+	if (resctrl_arch_mon_capable())
+		flags |= RFTYPE_MON;
+
+	ret = rdtgroup_add_files(rdtgroup_default.kn, flags);
+	if (ret)
+		goto out_schemata_free;
+
+	kernfs_activate(rdtgroup_default.kn);
+
+	ret = rdtgroup_create_info_dir(rdtgroup_default.kn);
+	if (ret < 0)
+		goto out_schemata_free;
+
+	if (resctrl_arch_mon_capable()) {
+		ret = mongroup_create_dir(rdtgroup_default.kn,
+					  &rdtgroup_default, "mon_groups",
+					  &kn_mongrp);
+		if (ret < 0)
+			goto out_info;
+
+		ret = mkdir_mondata_all(rdtgroup_default.kn,
+					&rdtgroup_default, &kn_mondata);
+		if (ret < 0)
+			goto out_mongrp;
+		rdtgroup_default.mon.mon_data_kn = kn_mondata;
+	}
+
+	ret = rdt_pseudo_lock_init();
+	if (ret)
+		goto out_mondata;
+
+	ret = kernfs_get_tree(fc);
+	if (ret < 0)
+		goto out_psl;
+
+	if (resctrl_arch_alloc_capable())
+		resctrl_arch_enable_alloc();
+	if (resctrl_arch_mon_capable())
+		resctrl_arch_enable_mon();
+
+	if (resctrl_arch_alloc_capable() || resctrl_arch_mon_capable())
+		resctrl_mounted = true;
+
+	if (is_mbm_enabled()) {
+		r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
+		list_for_each_entry(dom, &r->domains, list)
+			mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL,
+						   RESCTRL_PICK_ANY_CPU);
+	}
+
+	goto out;
+
+out_psl:
+	rdt_pseudo_lock_release();
+out_mondata:
+	if (resctrl_arch_mon_capable())
+		kernfs_remove(kn_mondata);
+out_mongrp:
+	if (resctrl_arch_mon_capable())
+		kernfs_remove(kn_mongrp);
+out_info:
+	kernfs_remove(kn_info);
+out_schemata_free:
+	schemata_list_destroy();
+out_ctx:
+	rdt_disable_ctx();
+out_root:
+	rdtgroup_destroy_root();
+out:
+	rdt_last_cmd_clear();
+	mutex_unlock(&rdtgroup_mutex);
+	cpus_read_unlock();
+	return ret;
+}
+
+enum rdt_param {
+	Opt_cdp,
+	Opt_cdpl2,
+	Opt_mba_mbps,
+	Opt_debug,
+	nr__rdt_params
+};
+
+static const struct fs_parameter_spec rdt_fs_parameters[] = {
+	fsparam_flag("cdp",		Opt_cdp),
+	fsparam_flag("cdpl2",		Opt_cdpl2),
+	fsparam_flag("mba_MBps",	Opt_mba_mbps),
+	fsparam_flag("debug",		Opt_debug),
+	{}
+};
+
+static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param)
+{
+	struct rdt_fs_context *ctx = rdt_fc2context(fc);
+	struct fs_parse_result result;
+	int opt;
+
+	opt = fs_parse(fc, rdt_fs_parameters, param, &result);
+	if (opt < 0)
+		return opt;
+
+	switch (opt) {
+	case Opt_cdp:
+		ctx->enable_cdpl3 = true;
+		return 0;
+	case Opt_cdpl2:
+		ctx->enable_cdpl2 = true;
+		return 0;
+	case Opt_mba_mbps:
+		if (!supports_mba_mbps())
+			return -EINVAL;
+		ctx->enable_mba_mbps = true;
+		return 0;
+	case Opt_debug:
+		ctx->enable_debug = true;
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+static void rdt_fs_context_free(struct fs_context *fc)
+{
+	struct rdt_fs_context *ctx = rdt_fc2context(fc);
+
+	kernfs_free_fs_context(fc);
+	kfree(ctx);
+}
+
+static const struct fs_context_operations rdt_fs_context_ops = {
+	.free		= rdt_fs_context_free,
+	.parse_param	= rdt_parse_param,
+	.get_tree	= rdt_get_tree,
+};
+
+static int rdt_init_fs_context(struct fs_context *fc)
+{
+	struct rdt_fs_context *ctx;
+
+	ctx = kzalloc(sizeof(struct rdt_fs_context), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	ctx->kfc.magic = RDTGROUP_SUPER_MAGIC;
+	fc->fs_private = &ctx->kfc;
+	fc->ops = &rdt_fs_context_ops;
+	put_user_ns(fc->user_ns);
+	fc->user_ns = get_user_ns(&init_user_ns);
+	fc->global = true;
+	return 0;
+}
+
+static int reset_all_ctrls(struct rdt_resource *r)
+{
+	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
+	struct rdt_hw_domain *hw_dom;
+	struct msr_param msr_param;
+	cpumask_var_t cpu_mask;
+	struct rdt_domain *d;
+	int i;
+
+	/* Walking r->domains, ensure it can't race with cpuhp */
+	lockdep_assert_cpus_held();
+
+	if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
+		return -ENOMEM;
+
+	msr_param.res = r;
+	msr_param.low = 0;
+	msr_param.high = hw_res->num_closid;
+
+	/*
+	 * Disable resource control for this resource by setting all
+	 * CBMs in all domains to the maximum mask value. Pick one CPU
+	 * from each domain to update the MSRs below.
+	 */
+	list_for_each_entry(d, &r->domains, list) {
+		hw_dom = resctrl_to_arch_dom(d);
+		cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
+
+		for (i = 0; i < hw_res->num_closid; i++)
+			hw_dom->ctrl_val[i] = r->default_ctrl;
+	}
+
+	/* Update CBM on all the CPUs in cpu_mask */
+	on_each_cpu_mask(cpu_mask, rdt_ctrl_update, &msr_param, 1);
+
+	free_cpumask_var(cpu_mask);
+
+	return 0;
+}
+
+/*
+ * Move tasks from one to the other group. If @from is NULL, then all tasks
+ * in the systems are moved unconditionally (used for teardown).
+ *
+ * If @mask is not NULL the cpus on which moved tasks are running are set
+ * in that mask so the update smp function call is restricted to affected
+ * cpus.
+ */
+static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to,
+				 struct cpumask *mask)
+{
+	struct task_struct *p, *t;
+
+	read_lock(&tasklist_lock);
+	for_each_process_thread(p, t) {
+		if (!from || is_closid_match(t, from) ||
+		    is_rmid_match(t, from)) {
+			resctrl_arch_set_closid_rmid(t, to->closid,
+						     to->mon.rmid);
+
+			/*
+			 * Order the closid/rmid stores above before the loads
+			 * in task_curr(). This pairs with the full barrier
+			 * between the rq->curr update and resctrl_sched_in()
+			 * during context switch.
+			 */
+			smp_mb();
+
+			/*
+			 * If the task is on a CPU, set the CPU in the mask.
+			 * The detection is inaccurate as tasks might move or
+			 * schedule before the smp function call takes place.
+			 * In such a case the function call is pointless, but
+			 * there is no other side effect.
+			 */
+			if (IS_ENABLED(CONFIG_SMP) && mask && task_curr(t))
+				cpumask_set_cpu(task_cpu(t), mask);
+		}
+	}
+	read_unlock(&tasklist_lock);
+}
+
+static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp)
+{
+	struct rdtgroup *sentry, *stmp;
+	struct list_head *head;
+
+	head = &rdtgrp->mon.crdtgrp_list;
+	list_for_each_entry_safe(sentry, stmp, head, mon.crdtgrp_list) {
+		free_rmid(sentry->closid, sentry->mon.rmid);
+		list_del(&sentry->mon.crdtgrp_list);
+
+		if (atomic_read(&sentry->waitcount) != 0)
+			sentry->flags = RDT_DELETED;
+		else
+			rdtgroup_remove(sentry);
+	}
+}
+
+/*
+ * Forcibly remove all of subdirectories under root.
+ */
+static void rmdir_all_sub(void)
+{
+	struct rdtgroup *rdtgrp, *tmp;
+
+	/* Move all tasks to the default resource group */
+	rdt_move_group_tasks(NULL, &rdtgroup_default, NULL);
+
+	list_for_each_entry_safe(rdtgrp, tmp, &rdt_all_groups, rdtgroup_list) {
+		/* Free any child rmids */
+		free_all_child_rdtgrp(rdtgrp);
+
+		/* Remove each rdtgroup other than root */
+		if (rdtgrp == &rdtgroup_default)
+			continue;
+
+		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
+		    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
+			rdtgroup_pseudo_lock_remove(rdtgrp);
+
+		/*
+		 * Give any CPUs back to the default group. We cannot copy
+		 * cpu_online_mask because a CPU might have executed the
+		 * offline callback already, but is still marked online.
+		 */
+		cpumask_or(&rdtgroup_default.cpu_mask,
+			   &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
+
+		free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
+
+		kernfs_remove(rdtgrp->kn);
+		list_del(&rdtgrp->rdtgroup_list);
+
+		if (atomic_read(&rdtgrp->waitcount) != 0)
+			rdtgrp->flags = RDT_DELETED;
+		else
+			rdtgroup_remove(rdtgrp);
+	}
+	/* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */
+	update_closid_rmid(cpu_online_mask, &rdtgroup_default);
+
+	kernfs_remove(kn_info);
+	kernfs_remove(kn_mongrp);
+	kernfs_remove(kn_mondata);
+}
+
+static void rdt_kill_sb(struct super_block *sb)
+{
+	struct rdt_resource *r;
+
+	cpus_read_lock();
+	mutex_lock(&rdtgroup_mutex);
+
+	rdt_disable_ctx();
+
+	/*Put everything back to default values. */
+	for_each_alloc_capable_rdt_resource(r)
+		reset_all_ctrls(r);
+	rmdir_all_sub();
+	rdt_pseudo_lock_release();
+	rdtgroup_default.mode = RDT_MODE_SHAREABLE;
+	schemata_list_destroy();
+	rdtgroup_destroy_root();
+	if (resctrl_arch_alloc_capable())
+		resctrl_arch_disable_alloc();
+	if (resctrl_arch_mon_capable())
+		resctrl_arch_disable_mon();
+	resctrl_mounted = false;
+	kernfs_kill_sb(sb);
+	mutex_unlock(&rdtgroup_mutex);
+	cpus_read_unlock();
+}
+
+static struct file_system_type rdt_fs_type = {
+	.name			= "resctrl",
+	.init_fs_context	= rdt_init_fs_context,
+	.parameters		= rdt_fs_parameters,
+	.kill_sb		= rdt_kill_sb,
+};
+
+static int mon_addfile(struct kernfs_node *parent_kn, const char *name,
+		       void *priv)
+{
+	struct kernfs_node *kn;
+	int ret = 0;
+
+	kn = __kernfs_create_file(parent_kn, name, 0444,
+				  GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0,
+				  &kf_mondata_ops, priv, NULL, NULL);
+	if (IS_ERR(kn))
+		return PTR_ERR(kn);
+
+	ret = rdtgroup_kn_set_ugid(kn);
+	if (ret) {
+		kernfs_remove(kn);
+		return ret;
+	}
+
+	return ret;
+}
+
+/*
+ * Remove all subdirectories of mon_data of ctrl_mon groups
+ * and monitor groups with given domain id.
+ */
+static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
+					   unsigned int dom_id)
+{
+	struct rdtgroup *prgrp, *crgrp;
+	char name[32];
+
+	list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
+		sprintf(name, "mon_%s_%02d", r->name, dom_id);
+		kernfs_remove_by_name(prgrp->mon.mon_data_kn, name);
+
+		list_for_each_entry(crgrp, &prgrp->mon.crdtgrp_list, mon.crdtgrp_list)
+			kernfs_remove_by_name(crgrp->mon.mon_data_kn, name);
+	}
+}
+
+static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
+				struct rdt_domain *d,
+				struct rdt_resource *r, struct rdtgroup *prgrp)
+{
+	union mon_data_bits priv;
+	struct kernfs_node *kn;
+	struct mon_evt *mevt;
+	struct rmid_read rr;
+	char name[32];
+	int ret;
+
+	sprintf(name, "mon_%s_%02d", r->name, d->id);
+	/* create the directory */
+	kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp);
+	if (IS_ERR(kn))
+		return PTR_ERR(kn);
+
+	ret = rdtgroup_kn_set_ugid(kn);
+	if (ret)
+		goto out_destroy;
+
+	if (WARN_ON(list_empty(&r->evt_list))) {
+		ret = -EPERM;
+		goto out_destroy;
+	}
+
+	priv.u.rid = r->rid;
+	priv.u.domid = d->id;
+	list_for_each_entry(mevt, &r->evt_list, list) {
+		priv.u.evtid = mevt->evtid;
+		ret = mon_addfile(kn, mevt->name, priv.priv);
+		if (ret)
+			goto out_destroy;
+
+		if (is_mbm_event(mevt->evtid))
+			mon_event_read(&rr, r, d, prgrp, mevt->evtid, true);
+	}
+	kernfs_activate(kn);
+	return 0;
+
+out_destroy:
+	kernfs_remove(kn);
+	return ret;
+}
+
+/*
+ * Add all subdirectories of mon_data for "ctrl_mon" groups
+ * and "monitor" groups with given domain id.
+ */
+static void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
+					   struct rdt_domain *d)
+{
+	struct kernfs_node *parent_kn;
+	struct rdtgroup *prgrp, *crgrp;
+	struct list_head *head;
+
+	list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
+		parent_kn = prgrp->mon.mon_data_kn;
+		mkdir_mondata_subdir(parent_kn, d, r, prgrp);
+
+		head = &prgrp->mon.crdtgrp_list;
+		list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
+			parent_kn = crgrp->mon.mon_data_kn;
+			mkdir_mondata_subdir(parent_kn, d, r, crgrp);
+		}
+	}
+}
+
+static int mkdir_mondata_subdir_alldom(struct kernfs_node *parent_kn,
+				       struct rdt_resource *r,
+				       struct rdtgroup *prgrp)
+{
+	struct rdt_domain *dom;
+	int ret;
+
+	/* Walking r->domains, ensure it can't race with cpuhp */
+	lockdep_assert_cpus_held();
+
+	list_for_each_entry(dom, &r->domains, list) {
+		ret = mkdir_mondata_subdir(parent_kn, dom, r, prgrp);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+/*
+ * This creates a directory mon_data which contains the monitored data.
+ *
+ * mon_data has one directory for each domain which are named
+ * in the format mon_<domain_name>_<domain_id>. For ex: A mon_data
+ * with L3 domain looks as below:
+ * ./mon_data:
+ * mon_L3_00
+ * mon_L3_01
+ * mon_L3_02
+ * ...
+ *
+ * Each domain directory has one file per event:
+ * ./mon_L3_00/:
+ * llc_occupancy
+ *
+ */
+static int mkdir_mondata_all(struct kernfs_node *parent_kn,
+			     struct rdtgroup *prgrp,
+			     struct kernfs_node **dest_kn)
+{
+	struct rdt_resource *r;
+	struct kernfs_node *kn;
+	int ret;
+
+	/*
+	 * Create the mon_data directory first.
+	 */
+	ret = mongroup_create_dir(parent_kn, prgrp, "mon_data", &kn);
+	if (ret)
+		return ret;
+
+	if (dest_kn)
+		*dest_kn = kn;
+
+	/*
+	 * Create the subdirectories for each domain. Note that all events
+	 * in a domain like L3 are grouped into a resource whose domain is L3
+	 */
+	for_each_mon_capable_rdt_resource(r) {
+		ret = mkdir_mondata_subdir_alldom(kn, r, prgrp);
+		if (ret)
+			goto out_destroy;
+	}
+
+	return 0;
+
+out_destroy:
+	kernfs_remove(kn);
+	return ret;
+}
+
+/**
+ * cbm_ensure_valid - Enforce validity on provided CBM
+ * @_val:	Candidate CBM
+ * @r:		RDT resource to which the CBM belongs
+ *
+ * The provided CBM represents all cache portions available for use. This
+ * may be represented by a bitmap that does not consist of contiguous ones
+ * and thus be an invalid CBM.
+ * Here the provided CBM is forced to be a valid CBM by only considering
+ * the first set of contiguous bits as valid and clearing all bits.
+ * The intention here is to provide a valid default CBM with which a new
+ * resource group is initialized. The user can follow this with a
+ * modification to the CBM if the default does not satisfy the
+ * requirements.
+ */
+static u32 cbm_ensure_valid(u32 _val, struct rdt_resource *r)
+{
+	unsigned int cbm_len = r->cache.cbm_len;
+	unsigned long first_bit, zero_bit;
+	unsigned long val = _val;
+
+	if (!val)
+		return 0;
+
+	first_bit = find_first_bit(&val, cbm_len);
+	zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);
+
+	/* Clear any remaining bits to ensure contiguous region */
+	bitmap_clear(&val, zero_bit, cbm_len - zero_bit);
+	return (u32)val;
+}
+
+/*
+ * Initialize cache resources per RDT domain
+ *
+ * Set the RDT domain up to start off with all usable allocations. That is,
+ * all shareable and unused bits. All-zero CBM is invalid.
+ */
+static int __init_one_rdt_domain(struct rdt_domain *d, struct resctrl_schema *s,
+				 u32 closid)
+{
+	enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type);
+	enum resctrl_conf_type t = s->conf_type;
+	struct resctrl_staged_config *cfg;
+	struct rdt_resource *r = s->res;
+	u32 used_b = 0, unused_b = 0;
+	unsigned long tmp_cbm;
+	enum rdtgrp_mode mode;
+	u32 peer_ctl, ctrl_val;
+	int i;
+
+	cfg = &d->staged_config[t];
+	cfg->have_new_ctrl = false;
+	cfg->new_ctrl = r->cache.shareable_bits;
+	used_b = r->cache.shareable_bits;
+	for (i = 0; i < closids_supported(); i++) {
+		if (closid_allocated(i) && i != closid) {
+			mode = rdtgroup_mode_by_closid(i);
+			if (mode == RDT_MODE_PSEUDO_LOCKSETUP)
+				/*
+				 * ctrl values for locksetup aren't relevant
+				 * until the schemata is written, and the mode
+				 * becomes RDT_MODE_PSEUDO_LOCKED.
+				 */
+				continue;
+			/*
+			 * If CDP is active include peer domain's
+			 * usage to ensure there is no overlap
+			 * with an exclusive group.
+			 */
+			if (resctrl_arch_get_cdp_enabled(r->rid))
+				peer_ctl = resctrl_arch_get_config(r, d, i,
+								   peer_type);
+			else
+				peer_ctl = 0;
+			ctrl_val = resctrl_arch_get_config(r, d, i,
+							   s->conf_type);
+			used_b |= ctrl_val | peer_ctl;
+			if (mode == RDT_MODE_SHAREABLE)
+				cfg->new_ctrl |= ctrl_val | peer_ctl;
+		}
+	}
+	if (d->plr && d->plr->cbm > 0)
+		used_b |= d->plr->cbm;
+	unused_b = used_b ^ (BIT_MASK(r->cache.cbm_len) - 1);
+	unused_b &= BIT_MASK(r->cache.cbm_len) - 1;
+	cfg->new_ctrl |= unused_b;
+	/*
+	 * Force the initial CBM to be valid, user can
+	 * modify the CBM based on system availability.
+	 */
+	cfg->new_ctrl = cbm_ensure_valid(cfg->new_ctrl, r);
+	/*
+	 * Assign the u32 CBM to an unsigned long to ensure that
+	 * bitmap_weight() does not access out-of-bound memory.
+	 */
+	tmp_cbm = cfg->new_ctrl;
+	if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) < r->cache.min_cbm_bits) {
+		rdt_last_cmd_printf("No space on %s:%d\n", s->name, d->id);
+		return -ENOSPC;
+	}
+	cfg->have_new_ctrl = true;
+
+	return 0;
+}
+
+/*
+ * Initialize cache resources with default values.
+ *
+ * A new RDT group is being created on an allocation capable (CAT)
+ * supporting system. Set this group up to start off with all usable
+ * allocations.
+ *
+ * If there are no more shareable bits available on any domain then
+ * the entire allocation will fail.
+ */
+static int rdtgroup_init_cat(struct resctrl_schema *s, u32 closid)
+{
+	struct rdt_domain *d;
+	int ret;
+
+	list_for_each_entry(d, &s->res->domains, list) {
+		ret = __init_one_rdt_domain(d, s, closid);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+
+/* Initialize MBA resource with default values. */
+static void rdtgroup_init_mba(struct rdt_resource *r, u32 closid)
+{
+	struct resctrl_staged_config *cfg;
+	struct rdt_domain *d;
+
+	list_for_each_entry(d, &r->domains, list) {
+		if (is_mba_sc(r)) {
+			d->mbps_val[closid] = MBA_MAX_MBPS;
+			continue;
+		}
+
+		cfg = &d->staged_config[CDP_NONE];
+		cfg->new_ctrl = r->default_ctrl;
+		cfg->have_new_ctrl = true;
+	}
+}
+
+/* Initialize the RDT group's allocations. */
+static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
+{
+	struct resctrl_schema *s;
+	struct rdt_resource *r;
+	int ret = 0;
+
+	rdt_staged_configs_clear();
+
+	list_for_each_entry(s, &resctrl_schema_all, list) {
+		r = s->res;
+		if (r->rid == RDT_RESOURCE_MBA ||
+		    r->rid == RDT_RESOURCE_SMBA) {
+			rdtgroup_init_mba(r, rdtgrp->closid);
+			if (is_mba_sc(r))
+				continue;
+		} else {
+			ret = rdtgroup_init_cat(s, rdtgrp->closid);
+			if (ret < 0)
+				goto out;
+		}
+
+		ret = resctrl_arch_update_domains(r, rdtgrp->closid);
+		if (ret < 0) {
+			rdt_last_cmd_puts("Failed to initialize allocations\n");
+			goto out;
+		}
+
+	}
+
+	rdtgrp->mode = RDT_MODE_SHAREABLE;
+
+out:
+	rdt_staged_configs_clear();
+	return ret;
+}
+
+static int mkdir_rdt_prepare_rmid_alloc(struct rdtgroup *rdtgrp)
+{
+	int ret;
+
+	if (!resctrl_arch_mon_capable())
+		return 0;
+
+	ret = alloc_rmid(rdtgrp->closid);
+	if (ret < 0) {
+		rdt_last_cmd_puts("Out of RMIDs\n");
+		return ret;
+	}
+	rdtgrp->mon.rmid = ret;
+
+	ret = mkdir_mondata_all(rdtgrp->kn, rdtgrp, &rdtgrp->mon.mon_data_kn);
+	if (ret) {
+		rdt_last_cmd_puts("kernfs subdir error\n");
+		free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void mkdir_rdt_prepare_rmid_free(struct rdtgroup *rgrp)
+{
+	if (resctrl_arch_mon_capable())
+		free_rmid(rgrp->closid, rgrp->mon.rmid);
+}
+
+static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
+			     const char *name, umode_t mode,
+			     enum rdt_group_type rtype, struct rdtgroup **r)
+{
+	struct rdtgroup *prdtgrp, *rdtgrp;
+	unsigned long files = 0;
+	struct kernfs_node *kn;
+	int ret;
+
+	prdtgrp = rdtgroup_kn_lock_live(parent_kn);
+	if (!prdtgrp) {
+		ret = -ENODEV;
+		goto out_unlock;
+	}
+
+	if (rtype == RDTMON_GROUP &&
+	    (prdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
+	     prdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)) {
+		ret = -EINVAL;
+		rdt_last_cmd_puts("Pseudo-locking in progress\n");
+		goto out_unlock;
+	}
+
+	/* allocate the rdtgroup. */
+	rdtgrp = kzalloc(sizeof(*rdtgrp), GFP_KERNEL);
+	if (!rdtgrp) {
+		ret = -ENOSPC;
+		rdt_last_cmd_puts("Kernel out of memory\n");
+		goto out_unlock;
+	}
+	*r = rdtgrp;
+	rdtgrp->mon.parent = prdtgrp;
+	rdtgrp->type = rtype;
+	INIT_LIST_HEAD(&rdtgrp->mon.crdtgrp_list);
+
+	/* kernfs creates the directory for rdtgrp */
+	kn = kernfs_create_dir(parent_kn, name, mode, rdtgrp);
+	if (IS_ERR(kn)) {
+		ret = PTR_ERR(kn);
+		rdt_last_cmd_puts("kernfs create error\n");
+		goto out_free_rgrp;
+	}
+	rdtgrp->kn = kn;
+
+	/*
+	 * kernfs_remove() will drop the reference count on "kn" which
+	 * will free it. But we still need it to stick around for the
+	 * rdtgroup_kn_unlock(kn) call. Take one extra reference here,
+	 * which will be dropped by kernfs_put() in rdtgroup_remove().
+	 */
+	kernfs_get(kn);
+
+	ret = rdtgroup_kn_set_ugid(kn);
+	if (ret) {
+		rdt_last_cmd_puts("kernfs perm error\n");
+		goto out_destroy;
+	}
+
+	if (rtype == RDTCTRL_GROUP) {
+		files = RFTYPE_BASE | RFTYPE_CTRL;
+		if (resctrl_arch_mon_capable())
+			files |= RFTYPE_MON;
+	} else {
+		files = RFTYPE_BASE | RFTYPE_MON;
+	}
+
+	ret = rdtgroup_add_files(kn, files);
+	if (ret) {
+		rdt_last_cmd_puts("kernfs fill error\n");
+		goto out_destroy;
+	}
+
+	/*
+	 * The caller unlocks the parent_kn upon success.
+	 */
+	return 0;
+
+out_destroy:
+	kernfs_put(rdtgrp->kn);
+	kernfs_remove(rdtgrp->kn);
+out_free_rgrp:
+	kfree(rdtgrp);
+out_unlock:
+	rdtgroup_kn_unlock(parent_kn);
+	return ret;
+}
+
+static void mkdir_rdt_prepare_clean(struct rdtgroup *rgrp)
+{
+	kernfs_remove(rgrp->kn);
+	rdtgroup_remove(rgrp);
+}
+
+/*
+ * Create a monitor group under "mon_groups" directory of a control
+ * and monitor group(ctrl_mon). This is a resource group
+ * to monitor a subset of tasks and cpus in its parent ctrl_mon group.
+ */
+static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn,
+			      const char *name, umode_t mode)
+{
+	struct rdtgroup *rdtgrp, *prgrp;
+	int ret;
+
+	ret = mkdir_rdt_prepare(parent_kn, name, mode, RDTMON_GROUP, &rdtgrp);
+	if (ret)
+		return ret;
+
+	prgrp = rdtgrp->mon.parent;
+	rdtgrp->closid = prgrp->closid;
+
+	ret = mkdir_rdt_prepare_rmid_alloc(rdtgrp);
+	if (ret) {
+		mkdir_rdt_prepare_clean(rdtgrp);
+		goto out_unlock;
+	}
+
+	kernfs_activate(rdtgrp->kn);
+
+	/*
+	 * Add the rdtgrp to the list of rdtgrps the parent
+	 * ctrl_mon group has to track.
+	 */
+	list_add_tail(&rdtgrp->mon.crdtgrp_list, &prgrp->mon.crdtgrp_list);
+
+out_unlock:
+	rdtgroup_kn_unlock(parent_kn);
+	return ret;
+}
+
+/*
+ * These are rdtgroups created under the root directory. Can be used
+ * to allocate and monitor resources.
+ */
+static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
+				   const char *name, umode_t mode)
+{
+	struct rdtgroup *rdtgrp;
+	struct kernfs_node *kn;
+	u32 closid;
+	int ret;
+
+	ret = mkdir_rdt_prepare(parent_kn, name, mode, RDTCTRL_GROUP, &rdtgrp);
+	if (ret)
+		return ret;
+
+	kn = rdtgrp->kn;
+	ret = closid_alloc();
+	if (ret < 0) {
+		rdt_last_cmd_puts("Out of CLOSIDs\n");
+		goto out_common_fail;
+	}
+	closid = ret;
+	ret = 0;
+
+	rdtgrp->closid = closid;
+
+	ret = mkdir_rdt_prepare_rmid_alloc(rdtgrp);
+	if (ret)
+		goto out_closid_free;
+
+	kernfs_activate(rdtgrp->kn);
+
+	ret = rdtgroup_init_alloc(rdtgrp);
+	if (ret < 0)
+		goto out_rmid_free;
+
+	list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups);
+
+	if (resctrl_arch_mon_capable()) {
+		/*
+		 * Create an empty mon_groups directory to hold the subset
+		 * of tasks and cpus to monitor.
+		 */
+		ret = mongroup_create_dir(kn, rdtgrp, "mon_groups", NULL);
+		if (ret) {
+			rdt_last_cmd_puts("kernfs subdir error\n");
+			goto out_del_list;
+		}
+	}
+
+	goto out_unlock;
+
+out_del_list:
+	list_del(&rdtgrp->rdtgroup_list);
+out_rmid_free:
+	mkdir_rdt_prepare_rmid_free(rdtgrp);
+out_closid_free:
+	closid_free(closid);
+out_common_fail:
+	mkdir_rdt_prepare_clean(rdtgrp);
+out_unlock:
+	rdtgroup_kn_unlock(parent_kn);
+	return ret;
+}
+
+/*
+ * We allow creating mon groups only with in a directory called "mon_groups"
+ * which is present in every ctrl_mon group. Check if this is a valid
+ * "mon_groups" directory.
+ *
+ * 1. The directory should be named "mon_groups".
+ * 2. The mon group itself should "not" be named "mon_groups".
+ *   This makes sure "mon_groups" directory always has a ctrl_mon group
+ *   as parent.
+ */
+static bool is_mon_groups(struct kernfs_node *kn, const char *name)
+{
+	return (!strcmp(kn->name, "mon_groups") &&
+		strcmp(name, "mon_groups"));
+}
+
+static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
+			  umode_t mode)
+{
+	/* Do not accept '\n' to avoid unparsable situation. */
+	if (strchr(name, '\n'))
+		return -EINVAL;
+
+	/*
+	 * If the parent directory is the root directory and RDT
+	 * allocation is supported, add a control and monitoring
+	 * subdirectory
+	 */
+	if (resctrl_arch_alloc_capable() && parent_kn == rdtgroup_default.kn)
+		return rdtgroup_mkdir_ctrl_mon(parent_kn, name, mode);
+
+	/*
+	 * If RDT monitoring is supported and the parent directory is a valid
+	 * "mon_groups" directory, add a monitoring subdirectory.
+	 */
+	if (resctrl_arch_mon_capable() && is_mon_groups(parent_kn, name))
+		return rdtgroup_mkdir_mon(parent_kn, name, mode);
+
+	return -EPERM;
+}
+
+static int rdtgroup_rmdir_mon(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
+{
+	struct rdtgroup *prdtgrp = rdtgrp->mon.parent;
+	int cpu;
+
+	/* Give any tasks back to the parent group */
+	rdt_move_group_tasks(rdtgrp, prdtgrp, tmpmask);
+
+	/* Update per cpu rmid of the moved CPUs first */
+	for_each_cpu(cpu, &rdtgrp->cpu_mask)
+		per_cpu(pqr_state.default_rmid, cpu) = prdtgrp->mon.rmid;
+	/*
+	 * Update the MSR on moved CPUs and CPUs which have moved
+	 * task running on them.
+	 */
+	cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
+	update_closid_rmid(tmpmask, NULL);
+
+	rdtgrp->flags = RDT_DELETED;
+	free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
+
+	/*
+	 * Remove the rdtgrp from the parent ctrl_mon group's list
+	 */
+	WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list));
+	list_del(&rdtgrp->mon.crdtgrp_list);
+
+	kernfs_remove(rdtgrp->kn);
+
+	return 0;
+}
+
+static int rdtgroup_ctrl_remove(struct rdtgroup *rdtgrp)
 {
-	bool *enable = arg;
+	rdtgrp->flags = RDT_DELETED;
+	list_del(&rdtgrp->rdtgroup_list);
 
-	wrmsrl(MSR_IA32_L2_QOS_CFG, *enable ? L2_QOS_CDP_ENABLE : 0ULL);
+	kernfs_remove(rdtgrp->kn);
+	return 0;
 }
 
-static int set_cache_qos_cfg(int level, bool enable)
+static int rdtgroup_rmdir_ctrl(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
 {
-	void (*update)(void *arg);
-	struct rdt_resource *r_l;
-	cpumask_var_t cpu_mask;
-	struct rdt_domain *d;
 	int cpu;
 
-	/* Walking r->domains, ensure it can't race with cpuhp */
-	lockdep_assert_cpus_held();
-
-	if (level == RDT_RESOURCE_L3)
-		update = l3_qos_cfg_update;
-	else if (level == RDT_RESOURCE_L2)
-		update = l2_qos_cfg_update;
-	else
-		return -EINVAL;
+	/* Give any tasks back to the default group */
+	rdt_move_group_tasks(rdtgrp, &rdtgroup_default, tmpmask);
 
-	if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
-		return -ENOMEM;
+	/* Give any CPUs back to the default group */
+	cpumask_or(&rdtgroup_default.cpu_mask,
+		   &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
 
-	r_l = &rdt_resources_all[level].r_resctrl;
-	list_for_each_entry(d, &r_l->domains, list) {
-		if (r_l->cache.arch_has_per_cpu_cfg)
-			/* Pick all the CPUs in the domain instance */
-			for_each_cpu(cpu, &d->cpu_mask)
-				cpumask_set_cpu(cpu, cpu_mask);
-		else
-			/* Pick one CPU from each domain instance to update MSR */
-			cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
+	/* Update per cpu closid and rmid of the moved CPUs first */
+	for_each_cpu(cpu, &rdtgrp->cpu_mask) {
+		per_cpu(pqr_state.default_closid, cpu) = rdtgroup_default.closid;
+		per_cpu(pqr_state.default_rmid, cpu) = rdtgroup_default.mon.rmid;
 	}
 
-	/* Update QOS_CFG MSR on all the CPUs in cpu_mask */
-	on_each_cpu_mask(cpu_mask, update, &enable, 1);
+	/*
+	 * Update the MSR on moved CPUs and CPUs which have moved
+	 * task running on them.
+	 */
+	cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
+	update_closid_rmid(tmpmask, NULL);
 
-	free_cpumask_var(cpu_mask);
+	free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
+	closid_free(rdtgrp->closid);
+
+	rdtgroup_ctrl_remove(rdtgrp);
+
+	/*
+	 * Free all the child monitor group rmids.
+	 */
+	free_all_child_rdtgrp(rdtgrp);
 
 	return 0;
 }
 
-/* Restore the qos cfg state when a domain comes online */
-void rdt_domain_reconfigure_cdp(struct rdt_resource *r)
+static int rdtgroup_rmdir(struct kernfs_node *kn)
 {
-	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
+	struct kernfs_node *parent_kn = kn->parent;
+	struct rdtgroup *rdtgrp;
+	cpumask_var_t tmpmask;
+	int ret = 0;
 
-	if (!r->cdp_capable)
-		return;
+	if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
+		return -ENOMEM;
 
-	if (r->rid == RDT_RESOURCE_L2)
-		l2_qos_cfg_update(&hw_res->cdp_enabled);
+	rdtgrp = rdtgroup_kn_lock_live(kn);
+	if (!rdtgrp) {
+		ret = -EPERM;
+		goto out;
+	}
 
-	if (r->rid == RDT_RESOURCE_L3)
-		l3_qos_cfg_update(&hw_res->cdp_enabled);
+	/*
+	 * If the rdtgroup is a ctrl_mon group and parent directory
+	 * is the root directory, remove the ctrl_mon group.
+	 *
+	 * If the rdtgroup is a mon group and parent directory
+	 * is a valid "mon_groups" directory, remove the mon group.
+	 */
+	if (rdtgrp->type == RDTCTRL_GROUP && parent_kn == rdtgroup_default.kn &&
+	    rdtgrp != &rdtgroup_default) {
+		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
+		    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
+			ret = rdtgroup_ctrl_remove(rdtgrp);
+		} else {
+			ret = rdtgroup_rmdir_ctrl(rdtgrp, tmpmask);
+		}
+	} else if (rdtgrp->type == RDTMON_GROUP &&
+		 is_mon_groups(parent_kn, kn->name)) {
+		ret = rdtgroup_rmdir_mon(rdtgrp, tmpmask);
+	} else {
+		ret = -EPERM;
+	}
+
+out:
+	rdtgroup_kn_unlock(kn);
+	free_cpumask_var(tmpmask);
+	return ret;
 }
 
-static int cdp_enable(int level)
+/**
+ * mongrp_reparent() - replace parent CTRL_MON group of a MON group
+ * @rdtgrp:		the MON group whose parent should be replaced
+ * @new_prdtgrp:	replacement parent CTRL_MON group for @rdtgrp
+ * @cpus:		cpumask provided by the caller for use during this call
+ *
+ * Replaces the parent CTRL_MON group for a MON group, resulting in all member
+ * tasks' CLOSID immediately changing to that of the new parent group.
+ * Monitoring data for the group is unaffected by this operation.
+ */
+static void mongrp_reparent(struct rdtgroup *rdtgrp,
+			    struct rdtgroup *new_prdtgrp,
+			    cpumask_var_t cpus)
 {
-	struct rdt_resource *r_l = &rdt_resources_all[level].r_resctrl;
-	int ret;
+	struct rdtgroup *prdtgrp = rdtgrp->mon.parent;
 
-	if (!r_l->alloc_capable)
-		return -EINVAL;
+	WARN_ON(rdtgrp->type != RDTMON_GROUP);
+	WARN_ON(new_prdtgrp->type != RDTCTRL_GROUP);
 
-	ret = set_cache_qos_cfg(level, true);
-	if (!ret)
-		rdt_resources_all[level].cdp_enabled = true;
+	/* Nothing to do when simply renaming a MON group. */
+	if (prdtgrp == new_prdtgrp)
+		return;
 
-	return ret;
-}
+	WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list));
+	list_move_tail(&rdtgrp->mon.crdtgrp_list,
+		       &new_prdtgrp->mon.crdtgrp_list);
 
-static void cdp_disable(int level)
-{
-	struct rdt_hw_resource *r_hw = &rdt_resources_all[level];
+	rdtgrp->mon.parent = new_prdtgrp;
+	rdtgrp->closid = new_prdtgrp->closid;
 
-	if (r_hw->cdp_enabled) {
-		set_cache_qos_cfg(level, false);
-		r_hw->cdp_enabled = false;
-	}
+	/* Propagate updated closid to all tasks in this group. */
+	rdt_move_group_tasks(rdtgrp, rdtgrp, cpus);
+
+	update_closid_rmid(cpus, NULL);
 }
 
-int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable)
+static int rdtgroup_rename(struct kernfs_node *kn,
+			   struct kernfs_node *new_parent, const char *new_name)
 {
-	struct rdt_hw_resource *hw_res = &rdt_resources_all[l];
-
-	if (!hw_res->r_resctrl.cdp_capable)
-		return -EINVAL;
+	struct rdtgroup *new_prdtgrp;
+	struct rdtgroup *rdtgrp;
+	cpumask_var_t tmpmask;
+	int ret;
 
-	if (enable)
-		return cdp_enable(l);
+	rdtgrp = kernfs_to_rdtgroup(kn);
+	new_prdtgrp = kernfs_to_rdtgroup(new_parent);
+	if (!rdtgrp || !new_prdtgrp)
+		return -ENOENT;
 
-	cdp_disable(l);
+	/* Release both kernfs active_refs before obtaining rdtgroup mutex. */
+	rdtgroup_kn_get(rdtgrp, kn);
+	rdtgroup_kn_get(new_prdtgrp, new_parent);
 
-	return 0;
-}
+	mutex_lock(&rdtgroup_mutex);
 
-/*
- * Workaround to detect if memory bandwidth HWDRC feature is capable.
- *
- * CPUID for memory bandwidth HWDRC feature is not exposed by H/W.
- * Check presence of HWDRC OS mailbox MSRs. Read out the discovery bit of
- * HWDRC OS mailbox data which indicates if the feature is capable.
- */
-bool resctrl_arch_is_hwdrc_mb_capable(void)
-{
-	u32 retries;
-	u64 data;
-	int status;
+	rdt_last_cmd_clear();
 
-	/* Only enable memory bandwidth HWDRC after ICELAKE server */
-	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
-	    boot_cpu_data.x86_model < INTEL_FAM6_ICELAKE_X) {
-		pr_debug("HWDRC: Not support until ICELAKE server\n");
+	/*
+	 * Don't allow kernfs_to_rdtgroup() to return a parent rdtgroup if
+	 * either kernfs_node is a file.
+	 */
+	if (kernfs_type(kn) != KERNFS_DIR ||
+	    kernfs_type(new_parent) != KERNFS_DIR) {
+		rdt_last_cmd_puts("Source and destination must be directories");
+		ret = -EPERM;
 		goto out;
 	}
 
-	/* Check presence of mailbox MSRs */
-	if (rdmsrl_safe(HWDRC_MSR_OS_MAILBOX_INTERFACE, &data)) {
-		pr_debug("HWDRC: Can't access OS mailbox interface MSR\n");
+	if ((rdtgrp->flags & RDT_DELETED) || (new_prdtgrp->flags & RDT_DELETED)) {
+		ret = -ENOENT;
 		goto out;
 	}
 
-	if (rdmsrl_safe(HWDRC_MSR_OS_MAILBOX_DATA, &data)) {
-		pr_debug("HWDRC: Can't access OS mailbox data MSR\n");
+	if (rdtgrp->type != RDTMON_GROUP || !kn->parent ||
+	    !is_mon_groups(kn->parent, kn->name)) {
+		rdt_last_cmd_puts("Source must be a MON group\n");
+		ret = -EPERM;
 		goto out;
 	}
 
-	/* Poll for run_busy bit == 0 */
-	status = -EBUSY;
-	retries = HWDRC_OS_MAILBOX_RETRY_COUNT;
-	do {
-		rdmsrl(HWDRC_MSR_OS_MAILBOX_INTERFACE, data);
-		if (!(data & HWDRC_MSR_OS_MAILBOX_BUSY_BIT)) {
-			status = 0;
-			break;
-		}
-	} while (--retries);
-
-	if (status)
+	if (!is_mon_groups(new_parent, new_name)) {
+		rdt_last_cmd_puts("Destination must be a mon_groups subdirectory\n");
+		ret = -EPERM;
 		goto out;
+	}
 
-	/* Write command register: 0x800054d0 */
-	data = HWDRC_MSR_OS_MAILBOX_BUSY_BIT |
-		HWDRC_SUB_COMMAND_MEM_CLOS_EN << 8 |
-		HWDRC_COMMAND_MEM_CLOS_EN;
-	pr_debug("HWDRC: Write command register: 0x%llx\n", data);
-	if (wrmsrl_safe(HWDRC_MSR_OS_MAILBOX_INTERFACE, data)) {
-		pr_debug("HWDRC: Write command register 0x%llx failed!\n", data);
+	/*
+	 * If the MON group is monitoring CPUs, the CPUs must be assigned to the
+	 * current parent CTRL_MON group and therefore cannot be assigned to
+	 * the new parent, making the move illegal.
+	 */
+	if (!cpumask_empty(&rdtgrp->cpu_mask) &&
+	    rdtgrp->mon.parent != new_prdtgrp) {
+		rdt_last_cmd_puts("Cannot move a MON group that monitors CPUs\n");
+		ret = -EPERM;
 		goto out;
 	}
 
-	/* Poll for run_busy bit == 0 */
-	retries = HWDRC_OS_MAILBOX_RETRY_COUNT;
-	do {
-		rdmsrl(HWDRC_MSR_OS_MAILBOX_INTERFACE, data);
-		if (!(data & HWDRC_MSR_OS_MAILBOX_BUSY_BIT)) {
-			rdmsrl(HWDRC_MSR_OS_MAILBOX_DATA, data);
-			pr_debug("HWDRC: Read MEM_CLOS_EN data: 0x%llx\n", data);
+	/*
+	 * Allocate the cpumask for use in mongrp_reparent() to avoid the
+	 * possibility of failing to allocate it after kernfs_rename() has
+	 * succeeded.
+	 */
+	if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL)) {
+		ret = -ENOMEM;
+		goto out;
+	}
 
-			/* Feature capability bit is set */
-			if (data & HWDRC_MEMCLOS_AVAILABLE) {
-				pr_debug("HWDRC: Memory bandwidth HWDRC is capable\n");
-				return true;
-			}
+	/*
+	 * Perform all input validation and allocations needed to ensure
+	 * mongrp_reparent() will succeed before calling kernfs_rename(),
+	 * otherwise it would be necessary to revert this call if
+	 * mongrp_reparent() failed.
+	 */
+	ret = kernfs_rename(kn, new_parent, new_name);
+	if (!ret)
+		mongrp_reparent(rdtgrp, new_prdtgrp, tmpmask);
 
-			/* Feature capability bit is not set */
-			break;
-		}
-	} while (--retries);
+	free_cpumask_var(tmpmask);
 
 out:
-	pr_debug("HWDRC: Memory bandwidth HWDRC is not capable\n");
-	return false;
+	mutex_unlock(&rdtgroup_mutex);
+	rdtgroup_kn_put(rdtgrp, kn);
+	rdtgroup_kn_put(new_prdtgrp, new_parent);
+	return ret;
 }
 
-static void mba_enable(enum resctrl_res_level l)
+static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf)
 {
-	struct rdt_hw_resource *r_hw = &rdt_resources_all[l];
-	struct rdt_resource *r = &r_hw->r_resctrl;
+	if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3))
+		seq_puts(seq, ",cdp");
 
-	r->alloc_capable = true;
+	if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2))
+		seq_puts(seq, ",cdpl2");
+
+	if (is_mba_sc(&rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl))
+		seq_puts(seq, ",mba_MBps");
+
+	if (resctrl_debug)
+		seq_puts(seq, ",debug");
+
+	return 0;
 }
 
-static void mba_disable(enum resctrl_res_level l)
+static struct kernfs_syscall_ops rdtgroup_kf_syscall_ops = {
+	.mkdir		= rdtgroup_mkdir,
+	.rmdir		= rdtgroup_rmdir,
+	.rename		= rdtgroup_rename,
+	.show_options	= rdtgroup_show_options,
+};
+
+static int rdtgroup_setup_root(struct rdt_fs_context *ctx)
 {
-	struct rdt_hw_resource *r_hw = &rdt_resources_all[l];
-	struct rdt_resource *r = &r_hw->r_resctrl;
+	rdt_root = kernfs_create_root(&rdtgroup_kf_syscall_ops,
+				      KERNFS_ROOT_CREATE_DEACTIVATED |
+				      KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK,
+				      &rdtgroup_default);
+	if (IS_ERR(rdt_root))
+		return PTR_ERR(rdt_root);
+
+	ctx->kfc.root = rdt_root;
+	rdtgroup_default.kn = kernfs_root_to_node(rdt_root);
 
-	r->alloc_capable = false;
+	return 0;
 }
 
-/*
- * Currently memory bandwidth HWDRC feature is enabled or disabled by the user
- * outside of the scope of the resctrl filesystem. When memory bandwidth HWDRC
- * is enabled, it takes over MBA hooks in resctrl for memory bandwidth
- * throttling.
- *
- * Set memory bandwidth HWDRC enabled in resctrl so that the user who enables
- * memory bandwidth HWDRC can make sure that resctrl doesn't provide any hooks
- * to control MBA.
- *
- * Set memory bandwidth HWDRC disabled in resctrl, MBA is enabled by default.
- */
-int resctrl_arch_set_hwdrc_enabled(enum resctrl_res_level l, bool hwdrc_mb)
+static void rdtgroup_destroy_root(void)
 {
-	struct rdt_resource *r = &rdt_resources_all[l].r_resctrl;
+	kernfs_destroy_root(rdt_root);
+	rdtgroup_default.kn = NULL;
+}
 
-	if (!resctrl_arch_is_hwdrc_mb_capable() || hwdrc_mb == r->membw.hwdrc_mb)
-		return -EINVAL;
+static void __init rdtgroup_setup_default(void)
+{
+	mutex_lock(&rdtgroup_mutex);
 
-	/* MBA and memory bandwidth HWDRC features are mutually exclusive */
-	if (hwdrc_mb)
-		mba_disable(l);
-	else
-		mba_enable(l);
+	rdtgroup_default.closid = RESCTRL_RESERVED_CLOSID;
+	rdtgroup_default.mon.rmid = RESCTRL_RESERVED_RMID;
+	rdtgroup_default.type = RDTCTRL_GROUP;
+	INIT_LIST_HEAD(&rdtgroup_default.mon.crdtgrp_list);
 
-	r->membw.hwdrc_mb = hwdrc_mb;
+	list_add(&rdtgroup_default.rdtgroup_list, &rdt_all_groups);
 
-	return 0;
+	mutex_unlock(&rdtgroup_mutex);
 }
 
-/*
- * Update L3_QOS_EXT_CFG MSR on all the CPUs associated with the resource.
- */
-static void resctrl_abmc_set_one_amd(void *arg)
+static void domain_destroy_mon_state(struct rdt_domain *d)
 {
-	bool *enable = arg;
-
-	if (*enable)
-		msr_set_bit(MSR_IA32_L3_QOS_EXT_CFG, ABMC_ENABLE_BIT);
-	else
-		msr_clear_bit(MSR_IA32_L3_QOS_EXT_CFG, ABMC_ENABLE_BIT);
+	bitmap_free(d->rmid_busy_llc);
+	kfree(d->mbm_total);
+	kfree(d->mbm_local);
 }
 
-static void _resctrl_abmc_enable(struct rdt_resource *r, bool enable)
+void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d)
 {
-	struct rdt_domain *d;
+	mutex_lock(&rdtgroup_mutex);
+
+	if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA)
+		mba_sc_domain_destroy(r, d);
+
+	if (!r->mon_capable)
+		goto out_unlock;
 
 	/*
-	 * Hardware counters will reset after switching the monitor mode.
-	 * Reset the architectural state so that reading of hardware
-	 * counter is not considered as an overflow in the next update.
+	 * If resctrl is mounted, remove all the
+	 * per domain monitor data directories.
 	 */
-	list_for_each_entry(d, &r->domains, list) {
-		on_each_cpu_mask(&d->cpu_mask,
-				 resctrl_abmc_set_one_amd, &enable, 1);
-		resctrl_arch_reset_rmid_all(r, d);
+	if (resctrl_mounted && resctrl_arch_mon_capable())
+		rmdir_mondata_subdir_allrdtgrp(r, d->id);
+
+	if (is_mbm_enabled())
+		cancel_delayed_work(&d->mbm_over);
+	if (is_llc_occupancy_enabled() && has_busy_rmid(d)) {
+		/*
+		 * When a package is going down, forcefully
+		 * decrement rmid->ebusy. There is no way to know
+		 * that the L3 was flushed and hence may lead to
+		 * incorrect counts in rare scenarios, but leaving
+		 * the RMID as busy creates RMID leaks if the
+		 * package never comes back.
+		 */
+		__check_limbo(d, true);
+		cancel_delayed_work(&d->cqm_limbo);
 	}
-}
 
-bool resctrl_arch_get_abmc_enabled(void)
-{
-	return rdt_resources_all[RDT_RESOURCE_L3].mbm_cntr_assign_enabled;
+	domain_destroy_mon_state(d);
+
+out_unlock:
+	mutex_unlock(&rdtgroup_mutex);
 }
 
-int resctrl_arch_mbm_cntr_assign_enable(void)
+static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
 {
-	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
-	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
-
-	lockdep_assert_held(&rdtgroup_mutex);
+	u32 idx_limit = resctrl_arch_system_num_rmid_idx();
+	size_t tsize;
 
-	if (r->mon.mbm_cntr_assignable && !hw_res->mbm_cntr_assign_enabled) {
-		_resctrl_abmc_enable(r, true);
-		hw_res->mbm_cntr_assign_enabled = true;
+	if (is_llc_occupancy_enabled()) {
+		d->rmid_busy_llc = bitmap_zalloc(idx_limit, GFP_KERNEL);
+		if (!d->rmid_busy_llc)
+			return -ENOMEM;
+	}
+	if (is_mbm_total_enabled()) {
+		tsize = sizeof(*d->mbm_total);
+		d->mbm_total = kcalloc(idx_limit, tsize, GFP_KERNEL);
+		if (!d->mbm_total) {
+			bitmap_free(d->rmid_busy_llc);
+			return -ENOMEM;
+		}
+	}
+	if (is_mbm_local_enabled()) {
+		tsize = sizeof(*d->mbm_local);
+		d->mbm_local = kcalloc(idx_limit, tsize, GFP_KERNEL);
+		if (!d->mbm_local) {
+			bitmap_free(d->rmid_busy_llc);
+			kfree(d->mbm_total);
+			return -ENOMEM;
+		}
 	}
 
 	return 0;
 }
 
-void resctrl_arch_mbm_cntr_assign_configure(void)
+int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d)
 {
-	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
-	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
-	bool enable = true;
+	int err = 0;
 
 	mutex_lock(&rdtgroup_mutex);
 
-	if (r->mon.mbm_cntr_assignable) {
-		if (!hw_res->mbm_cntr_assign_enabled)
-			hw_res->mbm_cntr_assign_enabled = true;
-		resctrl_abmc_set_one_amd(&enable);
+	if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA) {
+		/* RDT_RESOURCE_MBA is never mon_capable */
+		err = mba_sc_domain_allocate(r, d);
+		goto out_unlock;
 	}
 
-	mutex_unlock(&rdtgroup_mutex);
-}
-
-void resctrl_arch_mbm_cntr_assign_disable(void)
-{
-	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
-	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
+	if (!r->mon_capable)
+		goto out_unlock;
 
-	lockdep_assert_held(&rdtgroup_mutex);
+	err = domain_setup_mon_state(r, d);
+	if (err)
+		goto out_unlock;
 
-	if (hw_res->mbm_cntr_assign_enabled) {
-		_resctrl_abmc_enable(r, false);
-		hw_res->mbm_cntr_assign_enabled = false;
+	if (is_mbm_enabled()) {
+		INIT_DELAYED_WORK(&d->mbm_over, mbm_handle_overflow);
+		mbm_setup_overflow_handler(d, MBM_OVERFLOW_INTERVAL,
+					   RESCTRL_PICK_ANY_CPU);
 	}
-}
 
-bool resctrl_arch_get_mbm_cntr_assign_enable(void)
-{
-	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
-	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
+	if (is_llc_occupancy_enabled())
+		INIT_DELAYED_WORK(&d->cqm_limbo, cqm_handle_limbo);
 
-	return hw_res->mbm_cntr_assign_enabled;
-}
+	/*
+	 * If the filesystem is not mounted then only the default resource group
+	 * exists. Creation of its directories is deferred until mount time
+	 * by rdt_get_tree() calling mkdir_mondata_all().
+	 * If resctrl is mounted, add per domain monitor data directories.
+	 */
+	if (resctrl_mounted && resctrl_arch_mon_capable())
+		mkdir_mondata_subdir_allrdtgrp(r, d);
 
-static void rdtgroup_abmc_cfg(void *info)
-{
-	u64 *msrval = info;
+out_unlock:
+	mutex_unlock(&rdtgroup_mutex);
 
-	wrmsrl(MSR_IA32_L3_QOS_ABMC_CFG, *msrval);
+	return err;
 }
 
-/*
- * Send an IPI to the domain to assign the counter id to RMID.
- */
-int resctrl_arch_assign_cntr(void *dom, enum resctrl_event_id evtid,
-			     u32 rmid, u32 cntr_id, u32 closid, bool assign)
+void resctrl_online_cpu(unsigned int cpu)
 {
-	struct rdt_domain *d = dom;
-	struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
-	union l3_qos_abmc_cfg abmc_cfg = { 0 };
-	struct arch_mbm_state *arch_mbm;
+	mutex_lock(&rdtgroup_mutex);
+	/* The CPU is set in default rdtgroup after online. */
+	cpumask_set_cpu(cpu, &rdtgroup_default.cpu_mask);
+	mutex_unlock(&rdtgroup_mutex);
+}
 
-	abmc_cfg.split.cfg_en = 1;
-	abmc_cfg.split.cntr_en = assign ? 1 : 0;
-	abmc_cfg.split.cntr_id = cntr_id;
-	abmc_cfg.split.bw_src = rmid;
+static void clear_childcpus(struct rdtgroup *r, unsigned int cpu)
+{
+	struct rdtgroup *cr;
 
-	/* Update the event configuration from the domain */
-	if (evtid == QOS_L3_MBM_TOTAL_EVENT_ID) {
-		abmc_cfg.split.bw_type = hw_dom->mbm_total_cfg;
-		arch_mbm = &hw_dom->arch_mbm_total[rmid];
-	} else {
-		abmc_cfg.split.bw_type = hw_dom->mbm_local_cfg;
-		arch_mbm = &hw_dom->arch_mbm_local[rmid];
+	list_for_each_entry(cr, &r->mon.crdtgrp_list, mon.crdtgrp_list) {
+		if (cpumask_test_and_clear_cpu(cpu, &cr->cpu_mask))
+			break;
 	}
-
-	smp_call_function_any(&d->cpu_mask, rdtgroup_abmc_cfg, &abmc_cfg, 1);
-
-	/*
-	 * Reset the architectural state so that reading of hardware
-	 * counter is not considered as an overflow in next update.
-	 */
-	if (arch_mbm)
-		memset(arch_mbm, 0, sizeof(struct arch_mbm_state));
-
-	return 0;
 }
 
-static int reset_all_ctrls(struct rdt_resource *r)
+void resctrl_offline_cpu(unsigned int cpu)
 {
-	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
-	struct rdt_hw_domain *hw_dom;
-	struct msr_param msr_param;
-	cpumask_var_t cpu_mask;
+	struct rdt_resource *l3 = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
+	struct rdtgroup *rdtgrp;
 	struct rdt_domain *d;
-	int i;
-
-	/* Walking r->domains, ensure it can't race with cpuhp */
-	lockdep_assert_cpus_held();
-
-	if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
-		return -ENOMEM;
-
-	msr_param.res = r;
-	msr_param.low = 0;
-	msr_param.high = hw_res->num_closid;
-
-	/*
-	 * Disable resource control for this resource by setting all
-	 * CBMs in all domains to the maximum mask value. Pick one CPU
-	 * from each domain to update the MSRs below.
-	 */
-	list_for_each_entry(d, &r->domains, list) {
-		hw_dom = resctrl_to_arch_dom(d);
-		cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
 
-		for (i = 0; i < hw_res->num_closid; i++)
-			hw_dom->ctrl_val[i] = r->default_ctrl;
+	mutex_lock(&rdtgroup_mutex);
+	list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
+		if (cpumask_test_and_clear_cpu(cpu, &rdtgrp->cpu_mask)) {
+			clear_childcpus(rdtgrp, cpu);
+			break;
+		}
 	}
 
-	/* Update CBM on all the CPUs in cpu_mask */
-	on_each_cpu_mask(cpu_mask, rdt_ctrl_update, &msr_param, 1);
-
-	free_cpumask_var(cpu_mask);
-
-	return 0;
-}
+	if (!l3->mon_capable)
+		goto out_unlock;
 
-void resctrl_arch_reset_resources(void)
-{
-	struct rdt_resource *r;
+	d = get_domain_from_cpu(cpu, l3);
+	if (d) {
+		if (is_mbm_enabled() && cpu == d->mbm_work_cpu) {
+			cancel_delayed_work(&d->mbm_over);
+			mbm_setup_overflow_handler(d, 0, cpu);
+		}
+		if (is_llc_occupancy_enabled() && cpu == d->cqm_work_cpu &&
+		    has_busy_rmid(d)) {
+			cancel_delayed_work(&d->cqm_limbo);
+			cqm_setup_limbo_handler(d, 0, cpu);
+		}
+	}
 
-	for_each_capable_rdt_resource(r)
-		reset_all_ctrls(r);
+out_unlock:
+	mutex_unlock(&rdtgroup_mutex);
 }
 
-u32 resctrl_arch_event_config_get(void *dom, enum resctrl_event_id eventid)
+/*
+ * rdtgroup_init - rdtgroup initialization
+ *
+ * Setup resctrl file system including set up root, create mount point,
+ * register rdtgroup filesystem, and initialize files under root directory.
+ *
+ * Return: 0 on success or -errno
+ */
+int __init rdtgroup_init(void)
 {
-	struct rdt_domain *d = dom;
-	struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
+	int ret = 0;
 
-	switch (eventid) {
-	case QOS_L3_OCCUP_EVENT_ID:
-	case QOS_MC_MBM_BPS_EVENT_ID:
-		break;
-	case QOS_L3_MBM_TOTAL_EVENT_ID:
-		return hw_dom->mbm_total_cfg;
-	case QOS_L3_MBM_LOCAL_EVENT_ID:
-		return hw_dom->mbm_local_cfg;
-	}
+	seq_buf_init(&last_cmd_status, last_cmd_status_buf,
+		     sizeof(last_cmd_status_buf));
 
-	/* Never expect to get here */
-	WARN_ON_ONCE(1);
+	rdtgroup_setup_default();
 
-	return INVALID_CONFIG_VALUE;
-}
+	ret = sysfs_create_mount_point(fs_kobj, "resctrl");
+	if (ret)
+		return ret;
 
-void resctrl_arch_event_config_set(void *info)
-{
-	struct resctrl_mon_config_info *mon_info = info;
-	struct rdt_hw_domain *hw_dom;
-	unsigned int index;
+	ret = register_filesystem(&rdt_fs_type);
+	if (ret)
+		goto cleanup_mountpoint;
 
-	index = mon_event_config_index_get(mon_info->evtid);
-	if (index == INVALID_CONFIG_INDEX)
-		return;
+	/*
+	 * Adding the resctrl debugfs directory here may not be ideal since
+	 * it would let the resctrl debugfs directory appear on the debugfs
+	 * filesystem before the resctrl filesystem is mounted.
+	 * It may also be ok since that would enable debugging of RDT before
+	 * resctrl is mounted.
+	 * The reason why the debugfs directory is created here and not in
+	 * rdt_get_tree() is because rdt_get_tree() takes rdtgroup_mutex and
+	 * during the debugfs directory creation also &sb->s_type->i_mutex_key
+	 * (the lockdep class of inode->i_rwsem). Other filesystem
+	 * interactions (eg. SyS_getdents) have the lock ordering:
+	 * &sb->s_type->i_mutex_key --> &mm->mmap_lock
+	 * During mmap(), called with &mm->mmap_lock, the rdtgroup_mutex
+	 * is taken, thus creating dependency:
+	 * &mm->mmap_lock --> rdtgroup_mutex for the latter that can cause
+	 * issues considering the other two lock dependencies.
+	 * By creating the debugfs directory here we avoid a dependency
+	 * that may cause deadlock (even though file operations cannot
+	 * occur until the filesystem is mounted, but I do not know how to
+	 * tell lockdep that).
+	 */
+	debugfs_resctrl = debugfs_create_dir("resctrl", NULL);
 
-	wrmsr(MSR_IA32_EVT_CFG_BASE + index, mon_info->mon_config, 0);
+	return 0;
 
-	hw_dom = resctrl_to_arch_dom(mon_info->d);
+cleanup_mountpoint:
+	sysfs_remove_mount_point(fs_kobj, "resctrl");
 
-	switch (mon_info->evtid) {
-	case QOS_L3_OCCUP_EVENT_ID:
-	case QOS_MC_MBM_BPS_EVENT_ID:
-		break;
-	case QOS_L3_MBM_TOTAL_EVENT_ID:
-		hw_dom->mbm_total_cfg = mon_info->mon_config;
-		break;
-	case QOS_L3_MBM_LOCAL_EVENT_ID:
-		hw_dom->mbm_local_cfg =  mon_info->mon_config;
-		break;
-	}
+	return ret;
+}
 
-	mon_info->err = 0;
+void __exit rdtgroup_exit(void)
+{
+	debugfs_remove_recursive(debugfs_resctrl);
+	unregister_filesystem(&rdt_fs_type);
+	sysfs_remove_mount_point(fs_kobj, "resctrl");
 }
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 0773ce13715fe5fffbe45f88040a4c37f68bf556..e1d8419f6e0878f2057dcc6ba35a7f4d4caf857f 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -52,11 +52,9 @@ static const struct cpuid_bit cpuid_bits[] = {
 	{ X86_FEATURE_BMEC,		CPUID_EBX,  3, 0x80000020, 0 },
 	{ X86_FEATURE_TSA_SQ_NO,	CPUID_ECX,  1, 0x80000021, 0 },
 	{ X86_FEATURE_TSA_L1_NO,	CPUID_ECX,  2, 0x80000021, 0 },
-	{ X86_FEATURE_ABMC,		CPUID_EBX,  5, 0x80000020, 0 },
 	{ X86_FEATURE_AMD_WORKLOAD_CLASS,	CPUID_EAX,  22, 0x80000021, 0 },
 	{ X86_FEATURE_PERFMON_V2,	CPUID_EAX,  0, 0x80000022, 0 },
 	{ X86_FEATURE_AMD_LBR_V2,	CPUID_EAX,  1, 0x80000022, 0 },
-	{ X86_FEATURE_SDCIAE,			CPUID_EBX,  6, 0x80000020, 0 },
 	{ X86_FEATURE_AMD_LBR_PMC_FREEZE,	CPUID_EAX,  2, 0x80000022, 0 },
 	{ 0, 0, 0, 0, 0 }
 };
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 619560eb9f94c049fdcbb1a2e4dcb48938fff674..708c87b88cc150ee64145de90de938e0482afa3f 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -212,7 +212,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	switch_fpu_finish();
 
 	/* Load the Intel cache allocation PQR MSR. */
-	resctrl_arch_sched_in(next_p);
+	resctrl_sched_in(next_p);
 
 	return prev_p;
 }
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index fbadfa84a02d6ac1b5d816c30f61873f17329cbc..5fee1e0c42ca1df61a83cc1fc9b5db2dfa7b5e50 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -667,7 +667,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	}
 
 	/* Load the Intel cache allocation PQR MSR. */
-	resctrl_arch_sched_in(next_p);
+	resctrl_sched_in(next_p);
 
 	return prev_p;
 }
diff --git a/drivers/acpi/arm64/Kconfig b/drivers/acpi/arm64/Kconfig
index f2fd79f22e7d836b07401eb055725b2632ba624c..b3ed6212244c1e5405008355b7d0878252564251 100644
--- a/drivers/acpi/arm64/Kconfig
+++ b/drivers/acpi/arm64/Kconfig
@@ -21,6 +21,3 @@ config ACPI_AGDI
 
 config ACPI_APMT
 	bool
-
-config ACPI_MPAM
-	bool
diff --git a/drivers/acpi/arm64/Makefile b/drivers/acpi/arm64/Makefile
index d8bd6066a4c67545ad0e75eb68ef27170b0d810e..726944648c9bcefa5d0462d855e72556513c15d9 100644
--- a/drivers/acpi/arm64/Makefile
+++ b/drivers/acpi/arm64/Makefile
@@ -4,6 +4,5 @@ obj-$(CONFIG_ACPI_IORT) 	+= iort.o
 obj-$(CONFIG_ACPI_GTDT) 	+= gtdt.o
 obj-$(CONFIG_ACPI_APMT) 	+= apmt.o
 obj-$(CONFIG_ARM_AMBA)		+= amba.o
-obj-$(CONFIG_ACPI_MPAM) 	+= mpam.o
 obj-y				+= dma.o init.o
 obj-y				+= thermal_cpufreq.o
diff --git a/drivers/acpi/arm64/mpam.c b/drivers/acpi/arm64/mpam.c
deleted file mode 100644
index 153ef041abf0ca1e037b91dde21b86a62ba903ba..0000000000000000000000000000000000000000
--- a/drivers/acpi/arm64/mpam.c
+++ /dev/null
@@ -1,451 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2022 Arm Ltd.
-
-/* Parse the MPAM ACPI table feeding the discovered nodes into the driver */
-
-#define pr_fmt(fmt) "ACPI MPAM: " fmt
-
-#include <linux/acpi.h>
-#include <linux/arm_mpam.h>
-#include <linux/cpu.h>
-#include <linux/cpumask.h>
-#include <linux/platform_device.h>
-
-#include <acpi/processor.h>
-
-#include <asm/mpam.h>
-
-/* Flags for acpi_table_mpam_msc.*_interrupt_flags */
-#define ACPI_MPAM_MSC_IRQ_MODE_EDGE                    1
-#define ACPI_MPAM_MSC_IRQ_TYPE_MASK                    (3<<1)
-#define ACPI_MPAM_MSC_IRQ_TYPE_WIRED                   0
-#define ACPI_MPAM_MSC_IRQ_AFFINITY_PROCESSOR_CONTAINER (1<<3)
-#define ACPI_MPAM_MSC_IRQ_AFFINITY_VALID               (1<<4)
-
-int ddrc_freq;
-
-/* Use OEM info in MPAM ACPI table to distinguish different machine types */
-struct acpi_mpam_machine_oem_info {
-	enum mpam_machine_type type;
-	char signature[ACPI_NAMESEG_SIZE + 1];
-	u8 revision;
-	char oem_id[ACPI_OEM_ID_SIZE + 1];
-	char oem_table_id[ACPI_OEM_TABLE_ID_SIZE + 1];
-	u32 oem_revision;
-};
-
-static struct acpi_mpam_machine_oem_info acpi_mpam_machines[MPAM_NUM_MACHINE_TYPES] = {
-	[MPAM_YITIAN710] = {
-		.signature = "YMPM",
-		.revision = 0,
-		.oem_id = "PTG   ",
-		.oem_table_id = "PTG01   ",
-		.oem_revision = 0,
-	},
-};
-
-static bool frob_irq(struct platform_device *pdev, int intid, u32 flags,
-		     int *irq, u32 processor_container_uid)
-{
-	int sense;
-
-	if (!intid)
-		return false;
-
-	/* 0 in this field indicates a wired interrupt */
-	if (flags & ACPI_MPAM_MSC_IRQ_TYPE_MASK)
-		return false;
-
-	if (flags & ACPI_MPAM_MSC_IRQ_MODE_EDGE)
-		sense = ACPI_EDGE_SENSITIVE;
-	else
-		sense = ACPI_LEVEL_SENSITIVE;
-
-	/*
-	 * If the GSI is in the GIC's PPI range, try and create a partitioned
-	 * percpu interrupt.
-	 */
-	if (16 <= intid && intid < 32 && processor_container_uid != ~0) {
-		pr_err_once("Partitioned interrupts not supported\n");
-		return false;
-	} else {
-		*irq = acpi_register_gsi(&pdev->dev, intid, sense,
-					 ACPI_ACTIVE_HIGH);
-	}
-	if (*irq <= 0) {
-		pr_err_once("Failed to register interrupt 0x%x with ACPI\n",
-			    intid);
-		return false;
-	}
-
-	return true;
-}
-
-static void acpi_mpam_parse_irqs(struct platform_device *pdev,
-				 struct acpi_mpam_msc_node *tbl_msc,
-				 struct resource *res, int *res_idx)
-{
-	u32 flags, aff = ~0;
-	int irq;
-
-	flags = tbl_msc->overflow_interrupt_flags;
-	if (flags & ACPI_MPAM_MSC_IRQ_AFFINITY_VALID &&
-	    flags & ACPI_MPAM_MSC_IRQ_AFFINITY_PROCESSOR_CONTAINER)
-		aff = tbl_msc->overflow_interrupt_affinity;
-	if (frob_irq(pdev, tbl_msc->overflow_interrupt, flags, &irq, aff)) {
-		res[*res_idx].start = irq;
-		res[*res_idx].end = irq;
-		res[*res_idx].flags = IORESOURCE_IRQ;
-		res[*res_idx].name = "overflow";
-
-		(*res_idx)++;
-	}
-
-	flags = tbl_msc->error_interrupt_flags;
-	if (flags & ACPI_MPAM_MSC_IRQ_AFFINITY_VALID &&
-	    flags & ACPI_MPAM_MSC_IRQ_AFFINITY_PROCESSOR_CONTAINER)
-		aff = tbl_msc->error_interrupt_affinity;
-	else
-		aff = ~0;
-	if (frob_irq(pdev, tbl_msc->error_interrupt, flags, &irq, aff)) {
-		res[*res_idx].start = irq;
-		res[*res_idx].end = irq;
-		res[*res_idx].flags = IORESOURCE_IRQ;
-		res[*res_idx].name = "error";
-
-		(*res_idx)++;
-	}
-}
-
-static int acpi_mpam_parse_resource(struct mpam_msc *msc,
-				    struct acpi_mpam_resource_node *res)
-{
-	u32 cache_id;
-	int level;
-
-	switch (res->locator_type) {
-	case ACPI_MPAM_LOCATION_TYPE_PROCESSOR_CACHE:
-		cache_id = res->locator.cache_locator.cache_reference;
-		if (mpam_current_machine == MPAM_YITIAN710) {
-			/*
-			 * YITIAN710's BIOS doesn't support find level from
-			 * cache id. Since it only supports L3 cache, use a
-			 * fixed value, 3.
-			 */
-			level = 3;
-		} else {
-			level = find_acpi_cache_level_from_id(cache_id);
-			if (level < 0) {
-				pr_err_once("Bad level for cache with id %u\n", cache_id);
-				return level;
-			}
-		}
-		return mpam_ris_create(msc, res->ris_index, MPAM_CLASS_CACHE,
-				       level, cache_id);
-	case ACPI_MPAM_LOCATION_TYPE_MEMORY:
-		if (mpam_current_machine == MPAM_YITIAN710)
-			ddrc_freq = res->locator.memory_locator.reserved;
-		return mpam_ris_create(msc, res->ris_index, MPAM_CLASS_MEMORY,
-				       255, res->locator.memory_locator.proximity_domain);
-	default:
-		/* These get discovered later and treated as unknown */
-		return 0;
-	}
-}
-
-int acpi_mpam_parse_resources(struct mpam_msc *msc,
-			      struct acpi_mpam_msc_node *tbl_msc)
-{
-	int i, err;
-	struct acpi_mpam_resource_node *resources;
-
-	resources = (struct acpi_mpam_resource_node *)(tbl_msc + 1);
-	for (i = 0; i < tbl_msc->num_resouce_nodes; i++) {
-		err = acpi_mpam_parse_resource(msc, &resources[i]);
-		if (err)
-			return err;
-	}
-
-	return 0;
-}
-
-static bool __init parse_msc_pm_link(struct acpi_mpam_msc_node *tbl_msc,
-				     struct platform_device *pdev,
-				     u32 *acpi_id)
-{
-	bool acpi_id_valid = false;
-	struct acpi_device *buddy;
-	char hid[16], uid[16];
-	int err;
-
-	memset(&hid, 0, sizeof(hid));
-	memcpy(hid, &tbl_msc->hardware_id_linked_device,
-	       sizeof(tbl_msc->hardware_id_linked_device));
-
-	if (!strcmp(hid, ACPI_PROCESSOR_CONTAINER_HID)) {
-		*acpi_id = tbl_msc->instance_id_linked_device;
-		acpi_id_valid = true;
-	}
-
-	err = snprintf(uid, sizeof(uid), "%u",
-		       tbl_msc->instance_id_linked_device);
-	if (err < 0 || err >= sizeof(uid))
-		return acpi_id_valid;
-
-	buddy = acpi_dev_get_first_match_dev(hid, uid, -1);
-	if (buddy) {
-		device_link_add(&pdev->dev, &buddy->dev, DL_FLAG_STATELESS);
-	}
-
-	return acpi_id_valid;
-}
-
-static int decode_interface_type(struct acpi_mpam_msc_node *tbl_msc,
-				 enum mpam_msc_iface *iface)
-{
-	switch (tbl_msc->interface_type){
-	case 0:
-		*iface = MPAM_IFACE_MMIO;
-		return 0;
-	case 1:
-		*iface = MPAM_IFACE_PCC;
-		return 0;
-	default:
-		return -EINVAL;
-	}
-}
-
-static int __init _parse_table(struct acpi_table_header *table)
-{
-	char *table_end, *table_offset = (char *)(table + 1);
-	struct property_entry props[4]; /* needs a sentinel */
-	struct acpi_mpam_msc_node *tbl_msc;
-	int next_res, next_prop, err = 0;
-	struct acpi_device *companion;
-	struct platform_device *pdev;
-	enum mpam_msc_iface iface;
-	struct resource res[3];
-	char uid[16];
-	u32 acpi_id;
-	int msc_num = 0;
-
-	table_end = (char *)table + table->length;
-
-	while (table_offset < table_end) {
-		tbl_msc = (struct acpi_mpam_msc_node *)table_offset;
-		table_offset += tbl_msc->length;
-
-		/*
-		 * If any of the reserved fields are set, make no attempt to
-		 * parse the msc structure. This will prevent the driver from
-		 * probing all the MSC, meaning it can't discover the system
-		 * wide supported partid and pmg ranges. This avoids whatever
-		 * this MSC is truncating the partids and creating a screaming
-		 * error interrupt.
-		 */
-		if (tbl_msc->reserved || tbl_msc->reserved1 || tbl_msc->reserved2)
-			continue;
-
-		if (decode_interface_type(tbl_msc, &iface))
-			continue;
-
-		next_res = 0;
-		next_prop = 0;
-		memset(res, 0, sizeof(res));
-		memset(props, 0, sizeof(props));
-
-		/*
-		 * Use an extra msc_num instead of msc->identifier, since MSC
-		 * nodes with different types in MPAM ACPI table may have the
-		 * same id value.
-		 */
-		pdev = platform_device_alloc("mpam_msc", msc_num++);
-		if (IS_ERR(pdev)) {
-			err = PTR_ERR(pdev);
-			break;
-		}
-
-		if (tbl_msc->length < sizeof(*tbl_msc)) {
-			err = -EINVAL;
-			break;
-		}
-
-		/* Some power management is described in the namespace: */
-		err = snprintf(uid, sizeof(uid), "%u", tbl_msc->identifier);
-		if (err > 0 && err < sizeof(uid)) {
-			companion = acpi_dev_get_first_match_dev("ARMHAA5C", uid, -1);
-			if (companion)
-				ACPI_COMPANION_SET(&pdev->dev, companion);
-		}
-
-		if (iface == MPAM_IFACE_MMIO) {
-			res[next_res].name = "MPAM:MSC";
-			res[next_res].start = tbl_msc->base_address;
-			res[next_res].end = tbl_msc->base_address + tbl_msc->mmio_size - 1;
-			res[next_res].flags = IORESOURCE_MEM;
-			next_res++;
-		} else if (iface == MPAM_IFACE_PCC) {
-			props[next_prop++] = PROPERTY_ENTRY_U32("pcc-channel",
-								tbl_msc->base_address);
-			next_prop++;
-		}
-
-		acpi_mpam_parse_irqs(pdev, tbl_msc, res, &next_res);
-		err = platform_device_add_resources(pdev, res, next_res);
-		if (err)
-			break;
-
-		props[next_prop++] = PROPERTY_ENTRY_U32("arm,not-ready-us",
-							tbl_msc->max_nrdy_usec);
-
-		/*
-		 * The MSC's CPU affinity is described via its linked power
-		 * management device, but only if it points at a Processor or
-		 * Processor Container.
-		 */
-		if (parse_msc_pm_link(tbl_msc, pdev, &acpi_id)) {
-			props[next_prop++] = PROPERTY_ENTRY_U32("cpu_affinity",
-								acpi_id);
-		}
-
-		err = device_create_managed_software_node(&pdev->dev, props,
-							  NULL);
-		if (err)
-			break;
-
-		/* Come back later if you want the RIS too */
-		err = platform_device_add_data(pdev, tbl_msc, tbl_msc->length);
-		if (err)
-			break;
-
-		platform_device_add(pdev);
-	}
-
-	if (err)
-		platform_device_put(pdev);
-
-	return err;
-}
-
-static struct acpi_table_header *get_table(void)
-{
-	struct acpi_table_header *table;
-	enum mpam_machine_type mtype;
-	acpi_status status;
-
-	if (acpi_disabled || !mpam_cpus_have_feature())
-		return NULL;
-
-	mtype = acpi_mpam_get_machine_type();
-
-	if (mtype != MPAM_DEFAULT_MACHINE)
-		status = acpi_get_table(acpi_mpam_machines[mtype].signature, 0, &table);
-	else
-		status = acpi_get_table(ACPI_SIG_MPAM, 0, &table);
-	if (ACPI_FAILURE(status))
-		return NULL;
-
-	if (mtype == MPAM_DEFAULT_MACHINE && table->revision != 1)
-		return NULL;
-
-	/*
-	 * Kunpeng's MPAM ACPI adopts an older version of MPAM ACPI, so
-	 * this MPAM ACPI driver is not suitable for Kunpeng platform.
-	 * Skip it.
-	 */
-	if (!strncmp(table->oem_id, "HISI", 4)) {
-		acpi_put_table(table);
-		return NULL;
-	}
-
-	return table;
-}
-
-
-
-static int __init acpi_mpam_parse(void)
-{
-	struct acpi_table_header *mpam;
-	int err;
-
-	mpam = get_table();
-	if (!mpam)
-		return 0;
-
-	err = _parse_table(mpam);
-	acpi_put_table(mpam);
-
-	return err;
-}
-
-static int _count_msc(struct acpi_table_header *table)
-{
-	char *table_end, *table_offset = (char *)(table + 1);
-	struct acpi_mpam_msc_node *tbl_msc;
-	int ret = 0;
-
-	tbl_msc = (struct acpi_mpam_msc_node *)table_offset;
-	table_end = (char *)table + table->length;
-
-	while (table_offset < table_end) {
-		if (tbl_msc->length < sizeof(*tbl_msc))
-			return -EINVAL;
-
-		ret++;
-
-		table_offset += tbl_msc->length;
-		tbl_msc = (struct acpi_mpam_msc_node *)table_offset;
-	}
-
-	return ret;
-}
-
-
-int acpi_mpam_count_msc(void)
-{
-	struct acpi_table_header *mpam;
-	int ret;
-
-	mpam = get_table();
-	if (!mpam)
-		return 0;
-
-	ret = _count_msc(mpam);
-	acpi_put_table(mpam);
-
-	return ret;
-}
-
-enum mpam_machine_type acpi_mpam_get_machine_type(void)
-{
-	struct acpi_table_header *table;
-	enum mpam_machine_type ret;
-	acpi_status status;
-	int i;
-
-	ret = MPAM_DEFAULT_MACHINE;
-
-	for (i = MPAM_DEFAULT_MACHINE + 1; i < MPAM_NUM_MACHINE_TYPES; i++) {
-		status = acpi_get_table(acpi_mpam_machines[i].signature, 0, &table);
-		if (ACPI_FAILURE(status))
-			continue;
-
-		if (!memcmp(acpi_mpam_machines[i].oem_id, table->oem_id, ACPI_OEM_ID_SIZE) &&
-		    !memcmp(acpi_mpam_machines[i].oem_table_id, table->oem_table_id,
-		    ACPI_OEM_TABLE_ID_SIZE) &&
-		    acpi_mpam_machines[i].oem_revision == table->oem_revision) {
-			ret = i;
-		}
-
-		acpi_put_table(table);
-	}
-
-	return ret;
-}
-
-/*
- * Call after ACPI devices have been created, which happens behind acpi_scan_init()
- * called from subsys_initcall(). PCC requires the mailbox driver, which is
- * initialised from postcore_initcall().
- */
-subsys_initcall_sync(acpi_mpam_parse);
diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c
index eea6ea5fcdcad0a09c1ed18f9a491b20151bcc69..54676e3d82dd598a76bfbec10440eae57483365a 100644
--- a/drivers/acpi/pptt.c
+++ b/drivers/acpi/pptt.c
@@ -21,8 +21,6 @@
 #include <linux/cacheinfo.h>
 #include <acpi/processor.h>
 
-typedef int (*acpi_pptt_cpu_callback_t)(struct acpi_pptt_processor *, void *);
-
 static struct acpi_subtable_header *fetch_pptt_subtable(struct acpi_table_header *table_hdr,
 							u32 pptt_ref)
 {
@@ -183,10 +181,9 @@ acpi_find_cache_level(struct acpi_table_header *table_hdr,
  * levels and split cache levels (data/instruction).
  * @table_hdr: Pointer to the head of the PPTT table
  * @cpu_node: processor node we wish to count caches for
- * @levels: Number of levels if success. (*levels) should be initialized by
- *          the caller with the value to be used as the starting level.
+ * @levels: Number of levels if success.
  * @split_levels:	Number of split cache levels (data/instruction) if
- *			success. Can be NULL.
+ *			success. Can by NULL.
  *
  * Given a processor node containing a processing unit, walk into it and count
  * how many levels exist solely for it, and then walk up each level until we hit
@@ -301,177 +298,6 @@ static struct acpi_pptt_processor *acpi_find_processor_node(struct acpi_table_he
 	return NULL;
 }
 
-/*
- * acpi_pptt_find_cache_backwards() - Given a PPTT cache find a processor node
- * that points to it. This lets us find a cacheinfo node by fw_token, but
- * is totally broken as many processor node may point at the same PPTT
- * cache indicating different instances of the cache. (e.g. all the L1
- * caches are the same shape, but they aren't the same cache).
- * This only works if you cooked your PPTT table to look like this.
- */
-struct acpi_pptt_processor *
-acpi_pptt_find_cache_backwards(struct acpi_table_header *table_hdr,
-			       struct acpi_pptt_cache *cache)
-{
-	struct acpi_pptt_processor *cpu_node;
-	struct acpi_subtable_header *entry;
-	struct acpi_subtable_header *res;
-	unsigned long table_end;
-	u32 proc_sz;
-	int i;
-
-	table_end = (unsigned long)table_hdr + table_hdr->length;
-	entry = ACPI_ADD_PTR(struct acpi_subtable_header, table_hdr,
-			     sizeof(struct acpi_table_pptt));
-	proc_sz = sizeof(struct acpi_pptt_processor *);
-
-	/* find the processor structure which points at  with this cpuid */
-	while ((unsigned long)entry + proc_sz < table_end) {
-		if (entry->length == 0) {
-			pr_warn("Invalid zero length subtable\n");
-			break;
-		}
-
-		cpu_node = (struct acpi_pptt_processor *)entry;
-		entry = ACPI_ADD_PTR(struct acpi_subtable_header, entry,
-				     entry->length);
-
-		if (cpu_node->header.type != ACPI_PPTT_TYPE_PROCESSOR)
-			continue;
-
-		for (i = 0; i < cpu_node->number_of_priv_resources; i++) {
-			res = acpi_get_pptt_resource(table_hdr, cpu_node, i);
-			if (&cache->header == res)
-				return cpu_node;
-		}
-	}
-
-	return NULL;
-}
-
-/* parent_node points into the table, but the table isn't provided. */
-static void acpi_pptt_get_child_cpus(struct acpi_pptt_processor *parent_node,
-				     cpumask_t *cpus)
-{
-	struct acpi_pptt_processor *cpu_node;
-	struct acpi_table_header *table_hdr;
-	acpi_status status;
-	u32 acpi_id;
-	int cpu;
-
-	status = acpi_get_table(ACPI_SIG_PPTT, 0, &table_hdr);
-	if (ACPI_FAILURE(status))
-		return;
-
-	for_each_possible_cpu(cpu) {
-		acpi_id = get_acpi_id_for_cpu(cpu);
-		cpu_node = acpi_find_processor_node(table_hdr, acpi_id);
-
-		while (cpu_node) {
-			if (cpu_node == parent_node) {
-				cpumask_set_cpu(cpu, cpus);
-				break;
-			}
-			cpu_node = fetch_pptt_node(table_hdr, cpu_node->parent);
-		}
-	}
-
-	acpi_put_table(table_hdr);
-
-	return;
-}
-
-/**
- * acpi_pptt_for_each_container() - Iterate over all processor containers
- *
- * Not all 'Processor' entries in the PPTT are either a CPU or a Processor
- * Container, they may exist purely to describe a Private resource. CPUs
- * have to be leaves, so a Processor Container is a non-leaf that has the
- * 'ACPI Processor ID valid' flag set.
- *
- * Return: 0 for a complete walk, or the first non-zero value from the callback
- *         that stopped the walk.
- */
-int acpi_pptt_for_each_container(acpi_pptt_cpu_callback_t callback, void *arg)
-{
-	struct acpi_pptt_processor *cpu_node;
-	struct acpi_table_header *table_hdr;
-	struct acpi_subtable_header *entry;
-	bool leaf_flag, has_leaf_flag = false;
-	unsigned long table_end;
-	acpi_status status;
-	u32 proc_sz;
-	int ret = 0;
-
-	status = acpi_get_table(ACPI_SIG_PPTT, 0, &table_hdr);
-	if (ACPI_FAILURE(status))
-		return 0;
-
-	if (table_hdr->revision > 1)
-		has_leaf_flag = true;
-
-	table_end = (unsigned long)table_hdr + table_hdr->length;
-	entry = ACPI_ADD_PTR(struct acpi_subtable_header, table_hdr,
-			     sizeof(struct acpi_table_pptt));
-	proc_sz = sizeof(struct acpi_pptt_processor);
-	while ((unsigned long)entry + proc_sz < table_end) {
-		cpu_node = (struct acpi_pptt_processor *)entry;
-		if (entry->type == ACPI_PPTT_TYPE_PROCESSOR &&
-		    cpu_node->flags & ACPI_PPTT_ACPI_PROCESSOR_ID_VALID)
-		{
-			leaf_flag = cpu_node->flags & ACPI_PPTT_ACPI_LEAF_NODE;
-			if ((has_leaf_flag && !leaf_flag) ||
-			    (!has_leaf_flag && !acpi_pptt_leaf_node(table_hdr, cpu_node)))
-			{
-				ret = callback(cpu_node, arg);
-				if (ret)
-					break;
-			}
-		}
-		entry = ACPI_ADD_PTR(struct acpi_subtable_header, entry,
-				     entry->length);
-	}
-
-	acpi_put_table(table_hdr);
-
-	return ret;
-}
-
-struct __cpus_from_container_arg {
-	u32 acpi_cpu_id;
-	cpumask_t *cpus;
-};
-
-static int __cpus_from_container(struct acpi_pptt_processor *container, void *arg)
-{
-	struct __cpus_from_container_arg *params = arg;
-
-	if (container->acpi_processor_id == params->acpi_cpu_id)
-		acpi_pptt_get_child_cpus(container, params->cpus);
-
-	return 0;
-}
-
-/**
- * acpi_pptt_get_cpus_from_container() - Populate a cpumask with all CPUs in a
- * 					 processor containers
- *
- * Find the specified Processor Container, and fill cpus with all the cpus
- * below it.
- *
- * Return: 0 for a complete walk, or an error if the mask is incomplete.
- */
-int acpi_pptt_get_cpus_from_container(u32 acpi_cpu_id, cpumask_t *cpus)
-{
-	struct __cpus_from_container_arg params;
-
-	params.acpi_cpu_id = acpi_cpu_id;
-	params.cpus = cpus;
-
-	cpumask_clear(cpus);
-	return acpi_pptt_for_each_container(&__cpus_from_container, &params);
-}
-
 static u8 acpi_cache_type(enum cache_type type)
 {
 	switch (type) {
@@ -991,215 +817,3 @@ int find_acpi_cpu_topology_hetero_id(unsigned int cpu)
 	return find_acpi_cpu_topology_tag(cpu, PPTT_ABORT_PACKAGE,
 					  ACPI_PPTT_ACPI_IDENTICAL);
 }
-
-
-/**
- * find_acpi_cache_level_from_id() - Get the level of the specified cache
- * @cache_id: The id field of the unified cache
- *
- * Determine the level relative to any CPU for the unified cache identified by
- * cache_id. This allows the property to be found even if the CPUs are offline.
- *
- * The returned level can be used to group unified caches that are peers.
- *
- * The PPTT table must be rev 3 or later,
- *
- * If one CPUs L2 is shared with another as L3, this function will return
- * and unpredictable value.
- *
- * Return: -ENOENT if the PPTT doesn't exist, or the cache cannot be found.
- * Otherwise returns a value which represents the level of the specified cache.
- */
-int find_acpi_cache_level_from_id(u32 cache_id)
-{
-	u32 acpi_cpu_id;
-	acpi_status status;
-	int level, cpu, num_levels;
-	struct acpi_pptt_cache *cache;
-	struct acpi_table_header *table;
-	struct acpi_pptt_cache_v1* cache_v1;
-	struct acpi_pptt_processor *cpu_node;
-
-	status = acpi_get_table(ACPI_SIG_PPTT, 0, &table);
-	if (ACPI_FAILURE(status)) {
-		acpi_pptt_warn_missing();
-		return -ENOENT;
-	}
-
-	if (table->revision < 3) {
-		acpi_put_table(table);
-		return -ENOENT;
-	}
-
-	/*
-	 * If we found the cache first, we'd still need to walk from each CPU
-	 * to find the level...
-	 */
-	for_each_possible_cpu(cpu) {
-
-		num_levels = 0;
-		acpi_cpu_id = get_acpi_id_for_cpu(cpu);
-		cpu_node = acpi_find_processor_node(table, acpi_cpu_id);
-		if (!cpu_node)
-			break;
-		acpi_count_levels(table, cpu_node, &num_levels, NULL);
-
-		/* Start at 1 for L1 */
-		for (level = 1; level <= num_levels; level++) {
-			cache = acpi_find_cache_node(table, acpi_cpu_id,
-						     ACPI_PPTT_CACHE_TYPE_UNIFIED,
-						     level, &cpu_node);
-			if (!cache)
-				continue;
-
-			cache_v1 = ACPI_ADD_PTR(struct acpi_pptt_cache_v1,
-						cache,
-						sizeof(struct acpi_pptt_cache));
-
-			if (cache->flags & ACPI_PPTT_CACHE_ID_VALID &&
-			    cache_v1->cache_id == cache_id) {
-				acpi_put_table(table);
-				return level;
-			}
-		}
-	}
-
-	acpi_put_table(table);
-	return -ENOENT;
-}
-
-/**
- * acpi_pptt_get_cpumask_from_cache_id() - Get the cpus associated with the
- * 					   specified cache
- * @cache_id: The id field of the unified cache
- * @cpus: Where to buidl the cpumask
- *
- * Determine which CPUs are below this cache in the PPTT. This allows the property
- * to be found even if the CPUs are offline.
- *
- * The PPTT table must be rev 3 or later,
- *
- * Return: -ENOENT if the PPTT doesn't exist, or the cache cannot be found.
- * Otherwise returns 0 and sets the cpus in the provided cpumask.
- */
-int acpi_pptt_get_cpumask_from_cache_id(u32 cache_id, cpumask_t *cpus)
-{
-	u32 acpi_cpu_id;
-	acpi_status status;
-	int level, cpu, num_levels;
-	struct acpi_pptt_cache *cache;
-	struct acpi_table_header *table;
-	struct acpi_pptt_cache_v1* cache_v1;
-	struct acpi_pptt_processor *cpu_node;
-
-	cpumask_clear(cpus);
-
-	status = acpi_get_table(ACPI_SIG_PPTT, 0, &table);
-	if (ACPI_FAILURE(status)) {
-		acpi_pptt_warn_missing();
-		return -ENOENT;
-	}
-
-	if (table->revision < 3) {
-		acpi_put_table(table);
-		return -ENOENT;
-	}
-
-	/*
-	 * If we found the cache first, we'd still need to walk from each cpu.
-	 */
-	for_each_possible_cpu(cpu) {
-
-		num_levels = 0;
-		acpi_cpu_id = get_acpi_id_for_cpu(cpu);
-		cpu_node = acpi_find_processor_node(table, acpi_cpu_id);
-		if (!cpu_node)
-			break;
-		acpi_count_levels(table, cpu_node, &num_levels, NULL);
-
-		/* Start at 1 for L1 */
-		for (level = 1; level <= num_levels; level++) {
-			cache = acpi_find_cache_node(table, acpi_cpu_id,
-						     ACPI_PPTT_CACHE_TYPE_UNIFIED,
-						     level, &cpu_node);
-			if (!cache)
-				continue;
-
-			cache_v1 = ACPI_ADD_PTR(struct acpi_pptt_cache_v1,
-						cache,
-						sizeof(struct acpi_pptt_cache));
-
-			if (cache->flags & ACPI_PPTT_CACHE_ID_VALID &&
-			    cache_v1->cache_id == cache_id) {
-				cpumask_set_cpu(cpu, cpus);
-			}
-		}
-	}
-
-	acpi_put_table(table);
-	return 0;
-}
-
-/**
- * acpi_pptt_get_cpumask_from_cache_id_and_level() - Get the cpus associated with the
- *						     cache specified by id and level
- * @cache_id: The id field of the unified cache
- * @cache_level: The level of the unified cache
- * @cpus: Where to buidl the cpumask
- *
- * Determine which CPUs are below this cache in the PPTT. This allows the property
- * to be found even if the CPUs are offline.
- *
- * The PPTT table must be rev 3 or later,
- *
- * Return: -ENOENT if the PPTT doesn't exist, or the cache cannot be found.
- * Otherwise returns 0 and sets the cpus in the provided cpumask.
- */
-int acpi_pptt_get_cpumask_from_cache_id_and_level(u32 cache_id, u32 cache_level,
-						  cpumask_t *cpus)
-{
-	u32 acpi_cpu_id;
-	acpi_status status;
-	int cpu;
-	struct acpi_table_header *table;
-	struct acpi_pptt_cache *cache_node;
-	struct acpi_pptt_processor *cpu_node;
-
-	cpumask_clear(cpus);
-
-	status = acpi_get_table(ACPI_SIG_PPTT, 0, &table);
-	if (ACPI_FAILURE(status)) {
-		acpi_pptt_warn_missing();
-		return -ENOENT;
-	}
-
-	/*
-	 * FIXME: Since this function does not actually use the cache id in the
-	 * PPTT table, we downgrade the revision requirement.
-	 */
-	if (table->revision < 2) {
-		acpi_put_table(table);
-		return -ENOENT;
-	}
-
-	for_each_possible_cpu(cpu) {
-		acpi_cpu_id = get_acpi_id_for_cpu(cpu);
-		cpu_node = acpi_find_processor_node(table, acpi_cpu_id);
-		if (!cpu_node)
-			continue;
-
-		cache_node = acpi_find_cache_node(table, acpi_cpu_id,
-						  ACPI_PPTT_CACHE_TYPE_UNIFIED,
-						  cache_level, &cpu_node);
-
-		if (!cache_node)
-			continue;
-
-		cpu_node = acpi_pptt_find_cache_backwards(table, cache_node);
-		if (cpu_node->acpi_processor_id == cache_id)
-			cpumask_set_cpu(cpu, cpus);
-	}
-
-	acpi_put_table(table);
-	return 0;
-}
diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c
index 8961e4101a1d044cef058be6d73e35332624361a..6b4c292f989d2f0753fd699132402e6b0a243467 100644
--- a/drivers/acpi/tables.c
+++ b/drivers/acpi/tables.c
@@ -431,7 +431,7 @@ static const char table_sigs[][ACPI_NAMESEG_SIZE] __initconst = {
 	ACPI_SIG_PSDT, ACPI_SIG_RSDT, ACPI_SIG_XSDT, ACPI_SIG_SSDT,
 	ACPI_SIG_IORT, ACPI_SIG_NFIT, ACPI_SIG_HMAT, ACPI_SIG_PPTT,
 	ACPI_SIG_NHLT, ACPI_SIG_AEST, ACPI_SIG_CEDT, ACPI_SIG_AGDI,
-	ACPI_SIG_NBFT, ACPI_SIG_MPAM };
+	ACPI_SIG_NBFT };
 
 #define ACPI_HEADER_SIZE sizeof(struct acpi_table_header)
 
diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c
index 24200cb949ccd57a86bd4a3714c716c92d58de30..0fbb1575e297ab90bcbb4ced9858bedd257abf07 100644
--- a/drivers/base/cacheinfo.c
+++ b/drivers/base/cacheinfo.c
@@ -183,38 +183,6 @@ static bool cache_node_is_unified(struct cacheinfo *this_leaf,
 	return of_property_read_bool(np, "cache-unified");
 }
 
-unsigned long cache_of_get_id(struct device_node *np)
-{
-	struct device_node *cpu;
-	unsigned long min_id = ~0UL;
-
-	for_each_of_cpu_node(cpu) {
-		struct device_node *cache_node = cpu;
-		u64 id = of_get_cpu_hwid(cache_node, 0);
-
-		while ((cache_node = of_find_next_cache_node(cache_node))) {
-			if ((cache_node == np) && (id < min_id)) {
-				min_id = id;
-				of_node_put(cache_node);
-				break;
-			}
-			of_node_put(cache_node);
-		}
-	}
-
-	return min_id;
-}
-
-static void cache_of_set_id(struct cacheinfo *this_leaf, struct device_node *np)
-{
-	unsigned long id = cache_of_get_id(np);
-
-	if (id != ~0UL) {
-		this_leaf->id = id;
-		this_leaf->attributes |= CACHE_ID;
-	}
-}
-
 static void cache_of_set_props(struct cacheinfo *this_leaf,
 			       struct device_node *np)
 {
@@ -230,7 +198,6 @@ static void cache_of_set_props(struct cacheinfo *this_leaf,
 	cache_get_line_size(this_leaf, np);
 	cache_nr_sets(this_leaf, np);
 	cache_associativity(this_leaf);
-	cache_of_set_id(this_leaf, np);
 }
 
 static int cache_setup_of_node(unsigned int cpu)
@@ -655,19 +622,13 @@ static ssize_t file_name##_show(struct device *dev,		\
 	return sysfs_emit(buf, "%u\n", this_leaf->object);	\
 }
 
+show_one(id, id);
 show_one(level, level);
 show_one(coherency_line_size, coherency_line_size);
 show_one(number_of_sets, number_of_sets);
 show_one(physical_line_partition, physical_line_partition);
 show_one(ways_of_associativity, ways_of_associativity);
 
-static ssize_t id_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
-
-	return sysfs_emit(buf, "%lu\n", this_leaf->id);
-}
-
 static ssize_t size_show(struct device *dev,
 			 struct device_attribute *attr, char *buf)
 {
diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
index 2f022d1fb052b039fd31ae97fedb7ff8795901b5..77aa37de59880f33eeb35f43747333dd5a34e2d5 100644
--- a/drivers/perf/arm-cmn.c
+++ b/drivers/perf/arm-cmn.c
@@ -2435,7 +2435,6 @@ static int arm_cmn_probe(struct platform_device *pdev)
 	struct arm_cmn *cmn;
 	const char *name;
 	static atomic_t id;
-	struct resource *cfg;
 	int err, rootnode, this_id;
 
 	cmn = devm_kzalloc(&pdev->dev, sizeof(*cmn), GFP_KERNEL);
@@ -2451,16 +2450,7 @@ static int arm_cmn_probe(struct platform_device *pdev)
 		rootnode = arm_cmn600_acpi_probe(pdev, cmn);
 	} else {
 		rootnode = 0;
-
-		/*
-		 * Avoid registering resources as the PMUs registers are
-		 * scattered through CMN, and may appear either side of
-		 * registers for other 'devices'. (e.g. the MPAM MSC controls).
-		 */
-		cfg = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-		if (!cfg)
-			return -EINVAL;
-		cmn->base = devm_ioremap(&pdev->dev, cfg->start, resource_size(cfg));
+		cmn->base = devm_platform_ioremap_resource(pdev, 0);
 		if (IS_ERR(cmn->base))
 			return PTR_ERR(cmn->base);
 		if (cmn->part == PART_CMN600)
diff --git a/drivers/platform/Kconfig b/drivers/platform/Kconfig
index f26534a4a83b5aedb85c616f90db739a0ceaba4a..868b20361769c37048ef58392d9aae43abcaf021 100644
--- a/drivers/platform/Kconfig
+++ b/drivers/platform/Kconfig
@@ -9,8 +9,6 @@ source "drivers/platform/chrome/Kconfig"
 
 source "drivers/platform/mellanox/Kconfig"
 
-source "drivers/platform/mpam/Kconfig"
-
 source "drivers/platform/olpc/Kconfig"
 
 source "drivers/platform/surface/Kconfig"
diff --git a/drivers/platform/Makefile b/drivers/platform/Makefile
index 54ee16e4e4d8a96e7795dfe152827693b862ad56..8296d4c41eb7706133d909b215e6f059996256f1 100644
--- a/drivers/platform/Makefile
+++ b/drivers/platform/Makefile
@@ -12,4 +12,3 @@ obj-$(CONFIG_OLPC_EC)		+= olpc/
 obj-$(CONFIG_GOLDFISH)		+= goldfish/
 obj-$(CONFIG_CHROME_PLATFORMS)	+= chrome/
 obj-$(CONFIG_SURFACE_PLATFORMS)	+= surface/
-obj-$(CONFIG_ARM_CPU_RESCTRL)	+= mpam/
diff --git a/drivers/platform/mpam/Kconfig b/drivers/platform/mpam/Kconfig
deleted file mode 100644
index 75f5b2454fbe45b9531e6a680ecdc55df166200b..0000000000000000000000000000000000000000
--- a/drivers/platform/mpam/Kconfig
+++ /dev/null
@@ -1,8 +0,0 @@
-# Confusingly, this is everything but the CPU bits of MPAM. CPU here means
-# CPU resources, not containers or cgroups etc.
-config ARM_CPU_RESCTRL
-	bool
-	default y
-	depends on ARM64 && ARCH_HAS_CPU_RESCTRL
-	depends on MISC_FILESYSTEMS
-	select RESCTRL_RMID_DEPENDS_ON_CLOSID
diff --git a/drivers/platform/mpam/Makefile b/drivers/platform/mpam/Makefile
deleted file mode 100644
index 37693be531c34151078e6e6055d0bb4eea9099bf..0000000000000000000000000000000000000000
--- a/drivers/platform/mpam/Makefile
+++ /dev/null
@@ -1 +0,0 @@
-obj-$(CONFIG_ARM_CPU_RESCTRL) += mpam_devices.o mpam_resctrl.o
diff --git a/drivers/platform/mpam/mpam_devices.c b/drivers/platform/mpam/mpam_devices.c
deleted file mode 100644
index c7dfc1e17ed69f1b94381a23986ed75431335abd..0000000000000000000000000000000000000000
--- a/drivers/platform/mpam/mpam_devices.c
+++ /dev/null
@@ -1,2529 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2022 Arm Ltd.
-
-#define pr_fmt(fmt) "mpam: " fmt
-
-#include <linux/acpi.h>
-#include <linux/atomic.h>
-#include <linux/arm_mpam.h>
-#include <linux/bitfield.h>
-#include <linux/bitmap.h>
-#include <linux/cacheinfo.h>
-#include <linux/cpu.h>
-#include <linux/cpumask.h>
-#include <linux/device.h>
-#include <linux/errno.h>
-#include <linux/gfp.h>
-#include <linux/interrupt.h>
-#include <linux/irq.h>
-#include <linux/irqdesc.h>
-#include <linux/list.h>
-#include <linux/lockdep.h>
-#include <linux/mutex.h>
-#include <linux/of.h>
-#include <linux/of_platform.h>
-#include <linux/platform_device.h>
-#include <linux/printk.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/types.h>
-#include <linux/workqueue.h>
-
-#include <acpi/pcc.h>
-
-#include <asm/mpam.h>
-
-#include "mpam_internal.h"
-
-extern int ddrc_freq;
-
-/*
- * mpam_list_lock protects the SRCU lists when writing. Once the
- * mpam_enabled key is enabled these lists are read-only,
- * unless the error interrupt disables the driver.
- */
-static DEFINE_MUTEX(mpam_list_lock);
-static LIST_HEAD(mpam_all_msc);
-
-struct srcu_struct mpam_srcu;
-
-enum mpam_machine_type mpam_current_machine;
-
-/* MPAM isn't available until all the MSC have been probed. */
-static u32 mpam_num_msc;
-
-static int mpam_cpuhp_state;
-static DEFINE_MUTEX(mpam_cpuhp_state_lock);
-
-/*
- * The smallest common values for any CPU or MSC in the system.
- * Generating traffic outside this range will result in screaming interrupts.
- */
-u16 mpam_partid_max;
-u8 mpam_pmg_max;
-static bool partid_max_init, partid_max_published;
-static DEFINE_SPINLOCK(partid_max_lock);
-
-/*
- * mpam is enabled once all devices have been probed from CPU online callbacks,
- * scheduled via this work_struct. If access to an MSC depends on a CPU that
- * was not brought online at boot, this can happen surprisingly late.
- */
-static DECLARE_WORK(mpam_enable_work, &mpam_enable);
-
-/*
- * All mpam error interrupts indicate a software bug. On receipt, disable the
- * driver.
- */
-static DECLARE_WORK(mpam_broken_work, &mpam_disable);
-
-/*
- * An MSC is a container for resources, each identified by their RIS index.
- * Components are a group of RIS that control the same thing.
- * Classes are the set components of the same type.
- *
- * e.g. The set of RIS that make up the L2 are a component. These are sometimes
- * termed slices. They should be configured as if they were one MSC.
- *
- * e.g. The SoC probably has more than one L2, each attached to a distinct set
- * of CPUs. All the L2 components are grouped as a class.
- *
- * When creating an MSC, struct mpam_msc is added to the all mpam_all_msc list,
- * then linked via struct mpam_ris to a component and a class.
- * The same MSC may exist under different class->component paths, but the RIS
- * index will be unique.
- */
-LIST_HEAD(mpam_classes);
-
-static u32 __mpam_read_reg(struct mpam_msc *msc, u16 reg)
-{
-	WARN_ON_ONCE(reg + sizeof(u32) > msc->mapped_hwpage_sz);
-	WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &msc->accessibility));
-
-	return readl_relaxed(msc->mapped_hwpage + reg);
-}
-
-static void __mpam_write_reg(struct mpam_msc *msc, u16 reg, u32 val)
-{
-	WARN_ON_ONCE(reg + sizeof(u32) > msc->mapped_hwpage_sz);
-	WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &msc->accessibility));
-
-	writel_relaxed(val, msc->mapped_hwpage + reg);
-}
-
-#define mpam_read_partsel_reg(msc, reg)			\
-({							\
-	u32 ____ret;					\
-							\
-	lockdep_assert_held_once(&msc->part_sel_lock);	\
-	____ret = __mpam_read_reg(msc, MPAMF_##reg);	\
-							\
-	____ret;					\
-})
-
-#define mpam_write_partsel_reg(msc, reg, val)			\
-({								\
-	lockdep_assert_held_once(&msc->part_sel_lock);		\
-	__mpam_write_reg(msc, MPAMCFG_##reg, val);		\
-})
-
-#define mpam_read_monsel_reg(msc, reg)			\
-({							\
-	u32 ____ret;					\
-							\
-	lockdep_assert_held_once(&msc->mon_sel_lock);	\
-	____ret = __mpam_read_reg(msc, MSMON_##reg);	\
-							\
-	____ret;					\
-})
-
-#define mpam_write_monsel_reg(msc, reg, val)			\
-({								\
-	lockdep_assert_held_once(&msc->mon_sel_lock);		\
-	__mpam_write_reg(msc, MSMON_##reg, val);		\
-})
-
-static u64 mpam_msc_read_idr(struct mpam_msc *msc)
-{
-	u64 idr_high = 0, idr_low;
-
-	lockdep_assert_held(&msc->part_sel_lock);
-
-	idr_low = mpam_read_partsel_reg(msc, IDR);
-	if (FIELD_GET(MPAMF_IDR_HAS_EXT, idr_low))
-		idr_high = mpam_read_partsel_reg(msc, IDR + 4);
-
-	return (idr_high << 32) | idr_low;
-}
-
-static void mpam_msc_zero_esr(struct mpam_msc *msc)
-{
-	writel_relaxed(0, msc->mapped_hwpage + MPAMF_ESR);
-	if (msc->has_extd_esr)
-		writel_relaxed(0, msc->mapped_hwpage + MPAMF_ESR + 4);
-}
-
-static u64 mpam_msc_read_esr(struct mpam_msc *msc)
-{
-	u64 esr_high = 0, esr_low;
-
-	esr_low = readl_relaxed(msc->mapped_hwpage + MPAMF_ESR);
-	if (msc->has_extd_esr)
-		esr_high = readl_relaxed(msc->mapped_hwpage + MPAMF_ESR + 4);
-
-	return (esr_high << 32) | esr_low;
-}
-
-static void __mpam_part_sel(u8 ris_idx, u16 partid, struct mpam_msc *msc)
-{
-	u32 partsel;
-
-	lockdep_assert_held(&msc->part_sel_lock);
-
-	partsel = FIELD_PREP(MPAMCFG_PART_SEL_RIS, ris_idx) |
-		  FIELD_PREP(MPAMCFG_PART_SEL_PARTID_SEL, partid);
-	mpam_write_partsel_reg(msc, PART_SEL, partsel);
-}
-
-int mpam_register_requestor(u16 partid_max, u8 pmg_max)
-{
-	int err = 0;
-
-	spin_lock(&partid_max_lock);
-	if (!partid_max_init) {
-		mpam_partid_max = partid_max;
-		mpam_pmg_max = pmg_max;
-		partid_max_init = true;
-	} else if (!partid_max_published) {
-		mpam_partid_max = min(mpam_partid_max, partid_max);
-		mpam_pmg_max = min(mpam_pmg_max, pmg_max);
-	} else {
-		/* New requestors can't lower the values */
-		if ((partid_max < mpam_partid_max) || (pmg_max < mpam_pmg_max))
-			err = -EBUSY;
-	}
-	spin_unlock(&partid_max_lock);
-
-	return err;
-}
-EXPORT_SYMBOL(mpam_register_requestor);
-
-static struct mpam_component *
-mpam_component_alloc(struct mpam_class *class, int id, gfp_t gfp)
-{
-	struct mpam_component *comp;
-
-	lockdep_assert_held(&mpam_list_lock);
-
-	comp = kzalloc(sizeof(*comp), gfp);
-	if (!comp)
-		return ERR_PTR(-ENOMEM);
-
-	comp->comp_id = id;
-	INIT_LIST_HEAD_RCU(&comp->ris);
-	/* affinity is updated when ris are added */
-	INIT_LIST_HEAD_RCU(&comp->class_list);
-	comp->class = class;
-
-	list_add_rcu(&comp->class_list, &class->components);
-
-	return comp;
-}
-
-static struct mpam_component *
-mpam_component_get(struct mpam_class *class, int id, bool alloc, gfp_t gfp)
-{
-	struct mpam_component *comp;
-
-	lockdep_assert_held(&mpam_list_lock);
-
-	list_for_each_entry(comp, &class->components, class_list) {
-		if (comp->comp_id == id)
-			return comp;
-	}
-
-	if (!alloc)
-		return ERR_PTR(-ENOENT);
-
-	return mpam_component_alloc(class, id, gfp);
-}
-
-static struct mpam_class *
-mpam_class_alloc(u8 level_idx, enum mpam_class_types type, gfp_t gfp)
-{
-	struct mpam_class *class;
-
-	lockdep_assert_held(&mpam_list_lock);
-
-	class = kzalloc(sizeof(*class), gfp);
-	if (!class)
-		return ERR_PTR(-ENOMEM);
-
-	INIT_LIST_HEAD_RCU(&class->components);
-	/* affinity is updated when ris are added */
-	class->level = level_idx;
-	class->type = type;
-	INIT_LIST_HEAD_RCU(&class->classes_list);
-	ida_init(&class->ida_csu_mon);
-	ida_init(&class->ida_mbwu_mon);
-
-	list_add_rcu(&class->classes_list, &mpam_classes);
-
-	return class;
-}
-
-static struct mpam_class *
-mpam_class_get(u8 level_idx, enum mpam_class_types type, bool alloc, gfp_t gfp)
-{
-	bool found = false;
-	struct mpam_class *class;
-
-	lockdep_assert_held(&mpam_list_lock);
-
-	list_for_each_entry(class, &mpam_classes, classes_list) {
-		if (class->type == type && class->level == level_idx) {
-			found = true;
-			break;
-		}
-	}
-
-	if (found)
-		return class;
-
-	if (!alloc)
-		return ERR_PTR(-ENOENT);
-
-	return mpam_class_alloc(level_idx, type, gfp);
-}
-
-static void mpam_class_destroy(struct mpam_class *class)
-{
-	lockdep_assert_held(&mpam_list_lock);
-
-	list_del_rcu(&class->classes_list);
-	synchronize_srcu(&mpam_srcu);
-	kfree(class);
-}
-
-static void mpam_comp_destroy(struct mpam_component *comp)
-{
-	struct mpam_class *class = comp->class;
-
-	lockdep_assert_held(&mpam_list_lock);
-
-	list_del_rcu(&comp->class_list);
-	synchronize_srcu(&mpam_srcu);
-	kfree(comp);
-
-	if (list_empty(&class->components))
-		mpam_class_destroy(class);
-}
-
-/* synchronise_srcu() before freeing ris */
-static void mpam_ris_destroy(struct mpam_msc_ris *ris)
-{
-	struct mpam_component *comp = ris->comp;
-	struct mpam_class *class = comp->class;
-	struct mpam_msc *msc = ris->msc;
-
-	lockdep_assert_held(&mpam_list_lock);
-	lockdep_assert_preemption_enabled();
-
-	clear_bit(ris->ris_idx, msc->ris_idxs);
-	list_del_rcu(&ris->comp_list);
-	list_del_rcu(&ris->msc_list);
-
-	cpumask_andnot(&comp->affinity, &comp->affinity, &ris->affinity);
-	cpumask_andnot(&class->affinity, &class->affinity, &ris->affinity);
-
-	if (list_empty(&comp->ris))
-		mpam_comp_destroy(comp);
-}
-
-/*
- * There are two ways of reaching a struct mpam_msc_ris. Via the
- * class->component->ris, or via the msc.
- * When destroying the msc, the other side needs unlinking and cleaning up too.
- * synchronise_srcu() before freeing msc.
- */
-static void mpam_msc_destroy(struct mpam_msc *msc)
-{
-	struct mpam_msc_ris *ris, *tmp;
-
-	lockdep_assert_held(&mpam_list_lock);
-	lockdep_assert_preemption_enabled();
-
-	list_for_each_entry_safe(ris, tmp, &msc->ris, msc_list)
-		mpam_ris_destroy(ris);
-}
-
-/*
- * The cacheinfo structures are only populated when CPUs are online.
- * This helper walks the device tree to include offline CPUs too.
- */
-static int get_cpumask_from_cache_id(u32 cache_id, u32 cache_level,
-				     cpumask_t *affinity)
-{
-	int cpu, err;
-	u32 iter_level;
-	int iter_cache_id;
-	struct device_node *iter;
-
-	if (!acpi_disabled) {
-		if (mpam_current_machine == MPAM_YITIAN710)
-			return acpi_pptt_get_cpumask_from_cache_id_and_level(
-					cache_id, cache_level, affinity);
-		return acpi_pptt_get_cpumask_from_cache_id(cache_id, affinity);
-	}
-
-	for_each_possible_cpu(cpu) {
-		iter = of_get_cpu_node(cpu, NULL);
-		if (!iter) {
-			pr_err("Failed to find cpu%d device node\n", cpu);
-			return -ENOENT;
-		}
-
-		while ((iter = of_find_next_cache_node(iter))) {
-			err = of_property_read_u32(iter, "cache-level",
-						   &iter_level);
-			if (err || (iter_level != cache_level)) {
-				of_node_put(iter);
-				continue;
-			}
-
-			/*
-			 * get_cpu_cacheinfo_id() isn't ready until sometime
-			 * during device_initcall(). Use cache_of_get_id().
-			 */
-			iter_cache_id = cache_of_get_id(iter);
-			if (cache_id == ~0UL) {
-				of_node_put(iter);
-				continue;
-			}
-
-			if (iter_cache_id == cache_id)
-				cpumask_set_cpu(cpu, affinity);
-
-			of_node_put(iter);
-		}
-	}
-
-	return 0;
-}
-
-
-/*
- * cpumask_of_node() only knows about online CPUs. This can't tell us whether
- * a class is represented on all possible CPUs.
- */
-static void get_cpumask_from_node_id(u32 node_id, cpumask_t *affinity)
-{
-	int cpu;
-
-	for_each_possible_cpu(cpu) {
-		if (node_id == cpu_to_node(cpu))
-			cpumask_set_cpu(cpu, affinity);
-	}
-}
-
-static int get_cpumask_from_cache(struct device_node *cache,
-				  cpumask_t *affinity)
-{
-	int err;
-	u32 cache_level;
-	int cache_id;
-
-	err = of_property_read_u32(cache, "cache-level", &cache_level);
-	if (err) {
-		pr_err("Failed to read cache-level from cache node\n");
-		return -ENOENT;
-	}
-
-	cache_id = cache_of_get_id(cache);
-	if (cache_id == ~0UL) {
-		pr_err("Failed to calculate cache-id from cache node\n");
-		return -ENOENT;
-	}
-
-	return get_cpumask_from_cache_id(cache_id, cache_level, affinity);
-}
-
-static int mpam_ris_get_affinity(struct mpam_msc *msc, cpumask_t *affinity,
-				 enum mpam_class_types type,
-				 struct mpam_class *class,
-				 struct mpam_component *comp)
-{
-	int err;
-
-	switch (type) {
-	case MPAM_CLASS_CACHE:
-		err = get_cpumask_from_cache_id(comp->comp_id, class->level,
-						affinity);
-		if (err)
-			return err;
-
-		if (cpumask_empty(affinity))
-			pr_warn_once("%s no CPUs associated with cache node",
-				     dev_name(&msc->pdev->dev));
-
-		break;
-	case MPAM_CLASS_MEMORY:
-		get_cpumask_from_node_id(comp->comp_id, affinity);
-		if (cpumask_empty(affinity))
-			pr_warn_once("%s no CPUs associated with memory node",
-				     dev_name(&msc->pdev->dev));
-		break;
-	case MPAM_CLASS_UNKNOWN:
-		return 0;
-	}
-
-	cpumask_and(affinity, affinity, &msc->accessibility);
-
-	return 0;
-}
-
-static int mpam_ris_create_locked(struct mpam_msc *msc, u8 ris_idx,
-				  enum mpam_class_types type, u8 class_id,
-				  int component_id, gfp_t gfp)
-{
-	int err;
-	struct mpam_msc_ris *ris;
-	struct mpam_class *class;
-	struct mpam_component *comp;
-
-	lockdep_assert_held(&mpam_list_lock);
-
-	if (test_and_set_bit(ris_idx, msc->ris_idxs))
-		return -EBUSY;
-
-	ris = devm_kzalloc(&msc->pdev->dev, sizeof(*ris), gfp);
-	if (!ris)
-		return -ENOMEM;
-
-	class = mpam_class_get(class_id, type, true, gfp);
-	if (IS_ERR(class))
-		return PTR_ERR(class);
-
-	comp = mpam_component_get(class, component_id, true, gfp);
-	if (IS_ERR(comp)) {
-		if (list_empty(&class->components))
-			mpam_class_destroy(class);
-		return PTR_ERR(comp);
-	}
-
-	err = mpam_ris_get_affinity(msc, &ris->affinity, type, class, comp);
-	if (err) {
-		if (list_empty(&class->components))
-			mpam_class_destroy(class);
-		return err;
-	}
-
-	ris->ris_idx = ris_idx;
-	INIT_LIST_HEAD_RCU(&ris->comp_list);
-	INIT_LIST_HEAD_RCU(&ris->msc_list);
-	ris->msc = msc;
-	ris->comp = comp;
-
-	cpumask_or(&comp->affinity, &comp->affinity, &ris->affinity);
-	cpumask_or(&class->affinity, &class->affinity, &ris->affinity);
-	list_add_rcu(&ris->comp_list, &comp->ris);
-	list_add_rcu(&ris->msc_list, &msc->ris);
-
-	return 0;
-}
-
-int mpam_ris_create(struct mpam_msc *msc, u8 ris_idx,
-		    enum mpam_class_types type, u8 class_id, int component_id)
-{
-	int err;
-
-	mutex_lock(&mpam_list_lock);
-	err = mpam_ris_create_locked(msc, ris_idx, type, class_id,
-				     component_id, GFP_KERNEL);
-	mutex_unlock(&mpam_list_lock);
-
-	return err;
-}
-
-static struct mpam_msc_ris *mpam_get_or_create_ris(struct mpam_msc *msc,
-						   u8 ris_idx)
-{
-	int err;
-	struct mpam_msc_ris *ris, *found = ERR_PTR(-ENOENT);
-
-	lockdep_assert_held(&mpam_list_lock);
-
-	if (!test_bit(ris_idx, msc->ris_idxs)) {
-		err = mpam_ris_create_locked(msc, ris_idx, MPAM_CLASS_UNKNOWN,
-					     0, 0, GFP_ATOMIC);
-		if (err)
-			return ERR_PTR(err);
-	}
-
-	list_for_each_entry(ris, &msc->ris, msc_list) {
-		if (ris->ris_idx == ris_idx) {
-			found = ris;
-			break;
-		}
-	}
-
-	return found;
-}
-
-static void mpam_ris_hw_probe(struct mpam_msc_ris *ris)
-{
-	int err;
-	struct mpam_msc *msc = ris->msc;
-	struct mpam_props *props = &ris->props;
-	struct mpam_class *class = ris->comp->class;
-
-	lockdep_assert_held(&msc->lock);
-	lockdep_assert_held(&msc->part_sel_lock);
-
-	/* Cache Capacity Partitioning */
-	if (FIELD_GET(MPAMF_IDR_HAS_CCAP_PART, ris->idr)) {
-		u32 ccap_features = mpam_read_partsel_reg(msc, CCAP_IDR);
-
-		props->cmax_wd = FIELD_GET(MPAMF_CCAP_IDR_CMAX_WD, ccap_features);
-		if (props->cmax_wd)
-			mpam_set_feature(mpam_feat_ccap_part, props);
-	}
-
-	/* Cache Portion partitioning */
-	if (FIELD_GET(MPAMF_IDR_HAS_CPOR_PART, ris->idr)) {
-		u32 cpor_features = mpam_read_partsel_reg(msc, CPOR_IDR);
-
-		props->cpbm_wd = FIELD_GET(MPAMF_CPOR_IDR_CPBM_WD, cpor_features);
-		if (props->cpbm_wd)
-			mpam_set_feature(mpam_feat_cpor_part, props);
-	}
-
-	/* Memory bandwidth partitioning */
-	if (FIELD_GET(MPAMF_IDR_HAS_MBW_PART, ris->idr)) {
-		u32 mbw_features = mpam_read_partsel_reg(msc, MBW_IDR);
-
-		/* portion bitmap resolution */
-		props->mbw_pbm_bits = FIELD_GET(MPAMF_MBW_IDR_BWPBM_WD, mbw_features);
-		if (props->mbw_pbm_bits &&
-		    FIELD_GET(MPAMF_MBW_IDR_HAS_PBM, mbw_features))
-			mpam_set_feature(mpam_feat_mbw_part, props);
-
-		props->bwa_wd = FIELD_GET(MPAMF_MBW_IDR_BWA_WD, mbw_features);
-		if (props->bwa_wd && FIELD_GET(MPAMF_MBW_IDR_HAS_MAX, mbw_features))
-			mpam_set_feature(mpam_feat_mbw_max, props);
-
-		if (props->bwa_wd && FIELD_GET(MPAMF_MBW_IDR_HAS_MIN, mbw_features))
-			mpam_set_feature(mpam_feat_mbw_min, props);
-
-		if (props->bwa_wd && FIELD_GET(MPAMF_MBW_IDR_HAS_PROP, mbw_features))
-			mpam_set_feature(mpam_feat_mbw_prop, props);
-	}
-
-	/* Priority partitioning */
-	if (FIELD_GET(MPAMF_IDR_HAS_PRI_PART, ris->idr)) {
-		u32 pri_features = mpam_read_partsel_reg(msc, PRI_IDR);
-
-		props->intpri_wd = FIELD_GET(MPAMF_PRI_IDR_INTPRI_WD, pri_features);
-		if (props->intpri_wd && FIELD_GET(MPAMF_PRI_IDR_HAS_INTPRI, pri_features)) {
-			mpam_set_feature(mpam_feat_intpri_part, props);
-			if (FIELD_GET(MPAMF_PRI_IDR_INTPRI_0_IS_LOW, pri_features))
-				mpam_set_feature(mpam_feat_intpri_part_0_low, props);
-		}
-
-		props->dspri_wd = FIELD_GET(MPAMF_PRI_IDR_DSPRI_WD, pri_features);
-		if (props->dspri_wd && FIELD_GET(MPAMF_PRI_IDR_HAS_DSPRI, pri_features)) {
-			mpam_set_feature(mpam_feat_dspri_part, props);
-			if (FIELD_GET(MPAMF_PRI_IDR_DSPRI_0_IS_LOW, pri_features))
-				mpam_set_feature(mpam_feat_dspri_part_0_low, props);
-		}
-	}
-
-	/* Performance Monitoring */
-	if (FIELD_GET(MPAMF_IDR_HAS_MSMON, ris->idr)) {
-		u32 msmon_features = mpam_read_partsel_reg(msc, MSMON_IDR);
-
-		if (FIELD_GET(MPAMF_MSMON_IDR_MSMON_CSU, msmon_features)) {
-			u32 csumonidr, discard;
-
-			/*
-			 * If the firmware max-nrdy-us property is missing, the
-			 * CSU counters can't be used. Should we wait forever?
-			 */
-			err = device_property_read_u32(&msc->pdev->dev,
-						       "arm,not-ready-us",
-						       &discard);
-
-			csumonidr = mpam_read_partsel_reg(msc, CSUMON_IDR);
-			props->num_csu_mon = FIELD_GET(MPAMF_CSUMON_IDR_NUM_MON, csumonidr);
-			if (props->num_csu_mon && !err)
-				mpam_set_feature(mpam_feat_msmon_csu, props);
-			else if (props->num_csu_mon)
-				pr_err_once("Counters are not usable because not-ready timeout was not provided by firmware.");
-		}
-		if (FIELD_GET(MPAMF_MSMON_IDR_MSMON_MBWU, msmon_features)) {
-			bool has_long;
-			u32 mbwumonidr = mpam_read_partsel_reg(msc, MBWUMON_IDR);
-
-			props->num_mbwu_mon = FIELD_GET(MPAMF_MBWUMON_IDR_NUM_MON, mbwumonidr);
-			if (props->num_mbwu_mon)
-				mpam_set_feature(mpam_feat_msmon_mbwu, props);
-
-			if (FIELD_GET(MPAMF_MBWUMON_IDR_HAS_RWBW, mbwumonidr))
-				mpam_set_feature(mpam_feat_msmon_mbwu_rwbw, props);
-
-			/*
-			 * Treat long counter and its extension, lwd as mutually
-			 * exclusive feature bits. Though these are dependent
-			 * fields at the implementation level, there would never
-			 * be a need for mpam_feat_msmon_mbwu_44counter (long
-			 * counter) and mpam_feat_msmon_mbwu_63counter (lwd)
-			 * bits to be set together.
-			 *
-			 * mpam_feat_msmon_mbwu isn't treated as an exclusive
-			 * bit as this feature bit would be used as the "front
-			 * facing feature bit" for any checks related to mbwu
-			 * monitors.
-			 */
-			has_long = FIELD_GET(MPAMF_MBWUMON_IDR_HAS_LONG, mbwumonidr);
-			if (props->num_mbwu_mon && has_long) {
-				if (FIELD_GET(MPAMF_MBWUMON_IDR_LWD, mbwumonidr))
-					mpam_set_feature(mpam_feat_msmon_mbwu_63counter, props);
-				else
-					mpam_set_feature(mpam_feat_msmon_mbwu_44counter, props);
-			}
-		}
-	}
-
-	if (FIELD_GET(MPAMF_IDR_HAS_IMPL_IDR, ris->idr))
-		if (mpam_current_machine == MPAM_YITIAN710 && class->type == MPAM_CLASS_MEMORY)
-			mpam_set_feature(mpam_feat_impl_msmon_mbwu, props);
-
-	/*
-	 * RIS with PARTID narrowing don't have enough storage for one
-	 * configuration per PARTID. If these are in a class we could use,
-	 * reduce the supported partid_max to match the numer of intpartid.
-	 * If the class is unknown, just ignore it.
-	 */
-	if (FIELD_GET(MPAMF_IDR_HAS_PARTID_NRW, ris->idr) &&
-	    class->type != MPAM_CLASS_UNKNOWN) {
-		u32 nrwidr = mpam_read_partsel_reg(msc, PARTID_NRW_IDR);
-		u16 partid_max = FIELD_GET(MPAMF_PARTID_NRW_IDR_INTPARTID_MAX, nrwidr);
-
-		mpam_set_feature(mpam_feat_partid_nrw, props);
-		msc->partid_max = min(msc->partid_max, partid_max);
-	}
-}
-
-static int mpam_msc_hw_probe(struct mpam_msc *msc)
-{
-	u64 idr;
-	u16 partid_max;
-	u8 ris_idx, pmg_max;
-	struct mpam_msc_ris *ris;
-
-	lockdep_assert_held(&msc->lock);
-
-	spin_lock(&msc->part_sel_lock);
-	idr = mpam_read_partsel_reg(msc, AIDR);
-	if ((idr & MPAMF_AIDR_ARCH_MAJOR_REV) != MPAM_ARCHITECTURE_V1) {
-		pr_err_once("%s does not match MPAM architecture v1.0\n",
-			    dev_name(&msc->pdev->dev));
-		spin_unlock(&msc->part_sel_lock);
-		return -EIO;
-	}
-
-	idr = mpam_msc_read_idr(msc);
-	spin_unlock(&msc->part_sel_lock);
-
-	msc->ris_max = FIELD_GET(MPAMF_IDR_RIS_MAX, idr);
-
-	/* Use these values so partid/pmg always starts with a valid value */
-	msc->partid_max = FIELD_GET(MPAMF_IDR_PARTID_MAX, idr);
-	msc->pmg_max = FIELD_GET(MPAMF_IDR_PMG_MAX, idr);
-
-	for (ris_idx = 0; ris_idx <= msc->ris_max; ris_idx++) {
-		spin_lock(&msc->part_sel_lock);
-		__mpam_part_sel(ris_idx, 0, msc);
-		idr = mpam_msc_read_idr(msc);
-		spin_unlock(&msc->part_sel_lock);
-
-		partid_max = FIELD_GET(MPAMF_IDR_PARTID_MAX, idr);
-		pmg_max = FIELD_GET(MPAMF_IDR_PMG_MAX, idr);
-		msc->partid_max = min(msc->partid_max, partid_max);
-		msc->pmg_max = min(msc->pmg_max, pmg_max);
-		msc->has_extd_esr = FIELD_GET(MPAMF_IDR_HAS_EXT_ESR, idr);
-
-		ris = mpam_get_or_create_ris(msc, ris_idx);
-		if (IS_ERR(ris)) {
-			return PTR_ERR(ris);
-		}
-		ris->idr = idr;
-
-		spin_lock(&msc->part_sel_lock);
-		__mpam_part_sel(ris_idx, 0, msc);
-		mpam_ris_hw_probe(ris);
-		spin_unlock(&msc->part_sel_lock);
-	}
-
-	spin_lock(&partid_max_lock);
-	mpam_partid_max = min(mpam_partid_max, msc->partid_max);
-	mpam_pmg_max = min(mpam_pmg_max, msc->pmg_max);
-	spin_unlock(&partid_max_lock);
-
-	msc->probed = true;
-
-	return 0;
-}
-
-struct mon_read
-{
-	struct mpam_msc_ris		*ris;
-	struct mon_cfg			*ctx;
-	enum mpam_device_features	type;
-	u64				*val;
-	int				err;
-};
-
-static bool mpam_ris_has_mbwu_long_counter(struct mpam_msc_ris *ris)
-{
-	return (mpam_has_feature(mpam_feat_msmon_mbwu_63counter, &ris->props) ||
-		mpam_has_feature(mpam_feat_msmon_mbwu_44counter, &ris->props));
-}
-
-static u64 mpam_msc_read_mbwu_l(struct mpam_msc *msc)
-{
-	int retry = 3;
-	u32 mbwu_l_low;
-	u64 mbwu_l_high1, mbwu_l_high2;
-
-	lockdep_assert_held_once(&msc->mon_sel_lock);
-
-	WARN_ON_ONCE((MSMON_MBWU_L + sizeof(u64)) > msc->mapped_hwpage_sz);
-	WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &msc->accessibility));
-
-	mbwu_l_high2 = readl_relaxed(msc->mapped_hwpage + MSMON_MBWU_L + 4);
-	do {
-		mbwu_l_high1 = mbwu_l_high2;
-		mbwu_l_low = readl_relaxed(msc->mapped_hwpage + MSMON_MBWU_L);
-		mbwu_l_high2 = readl_relaxed(msc->mapped_hwpage + MSMON_MBWU_L + 4);
-
-		retry--;
-	} while (mbwu_l_high1 != mbwu_l_high2 && retry > 0);
-
-	if (mbwu_l_high2 == mbwu_l_high1)
-		return (mbwu_l_high1 << 32) | mbwu_l_low;
-	return MSMON___NRDY_L;
-}
-
-static void mpam_msc_zero_mbwu_l(struct mpam_msc *msc)
-{
-	lockdep_assert_held_once(&msc->mon_sel_lock);
-
-	WARN_ON_ONCE((MSMON_MBWU_L + sizeof(u64)) > msc->mapped_hwpage_sz);
-	WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &msc->accessibility));
-
-	writel_relaxed(0, msc->mapped_hwpage + MSMON_MBWU_L);
-	writel_relaxed(0, msc->mapped_hwpage + MSMON_MBWU_L + 4);
-}
-
-static void gen_msmon_ctl_flt_vals(struct mon_read *m, u32 *ctl_val,
-				   u32 *flt_val)
-{
-	struct mon_cfg *ctx = m->ctx;
-
-	switch (m->type) {
-	case mpam_feat_msmon_csu:
-		*ctl_val = MSMON_CFG_MBWU_CTL_TYPE_CSU;
-		break;
-	case mpam_feat_msmon_mbwu:
-		*ctl_val = MSMON_CFG_MBWU_CTL_TYPE_MBWU;
-		break;
-	default:
-		return;
-	}
-
-	/*
-	 * For CSU counters its implementation-defined what happens when not
-	 * filtering by partid.
-	 */
-	*ctl_val |= MSMON_CFG_x_CTL_MATCH_PARTID;
-
-	*flt_val = FIELD_PREP(MSMON_CFG_MBWU_FLT_PARTID, ctx->partid);
-	*flt_val |= FIELD_PREP(MSMON_CFG_MBWU_FLT_RWBW, ctx->opts);
-	if (m->ctx->match_pmg) {
-		*ctl_val |= MSMON_CFG_x_CTL_MATCH_PMG;
-		*flt_val |= FIELD_PREP(MSMON_CFG_MBWU_FLT_PMG, ctx->pmg);
-	}
-
-	if (mpam_has_feature(mpam_feat_msmon_mbwu_rwbw, &m->ris->props))
-		*flt_val |= FIELD_PREP(MSMON_CFG_MBWU_FLT_RWBW, ctx->opts);
-}
-
-static void read_msmon_ctl_flt_vals(struct mon_read *m, u32 *ctl_val,
-				    u32 *flt_val)
-{
-	struct mpam_msc *msc = m->ris->msc;
-
-	switch (m->type) {
-	case mpam_feat_msmon_csu:
-		*ctl_val = mpam_read_monsel_reg(msc, CFG_CSU_CTL);
-		*flt_val = mpam_read_monsel_reg(msc, CFG_CSU_FLT);
-		break;
-	case mpam_feat_msmon_mbwu:
-		*ctl_val = mpam_read_monsel_reg(msc, CFG_MBWU_CTL);
-		*flt_val = mpam_read_monsel_reg(msc, CFG_MBWU_FLT);
-		break;
-	default:
-		return;
-	}
-}
-
-static void write_msmon_ctl_flt_vals(struct mon_read *m, u32 ctl_val,
-				     u32 flt_val)
-{
-	struct mpam_msc *msc = m->ris->msc;
-	struct msmon_mbwu_state *mbwu_state;
-
-	/*
-	 * Write the ctl_val with the enable bit cleared, reset the counter,
-	 * then enable counter.
-	 */
-	switch (m->type) {
-	case mpam_feat_msmon_csu:
-		mpam_write_monsel_reg(msc, CFG_CSU_FLT, flt_val);
-		mpam_write_monsel_reg(msc, CFG_CSU_CTL, ctl_val);
-		mpam_write_monsel_reg(msc, CSU, 0);
-		mpam_write_monsel_reg(msc, CFG_CSU_CTL, ctl_val|MSMON_CFG_x_CTL_EN);
-		break;
-	case mpam_feat_msmon_mbwu:
-		mpam_write_monsel_reg(msc, CFG_MBWU_FLT, flt_val);
-		mpam_write_monsel_reg(msc, CFG_MBWU_CTL, ctl_val);
-
-		if (mpam_ris_has_mbwu_long_counter(m->ris))
-			mpam_msc_zero_mbwu_l(m->ris->msc);
-		else
-			mpam_write_monsel_reg(msc, MBWU, 0);
-
-		mpam_write_monsel_reg(msc, CFG_MBWU_CTL, ctl_val|MSMON_CFG_x_CTL_EN);
-
-		mbwu_state = &m->ris->mbwu_state[m->ctx->mon];
-		if (mbwu_state)
-			mbwu_state->prev_val = 0;
-
-		break;
-	default:
-		return;
-	}
-}
-
-static u64 mpam_msmon_overflow_val(struct mpam_msc_ris *ris)
-{
-	/* TODO: implement scaling counters */
-	if (mpam_has_feature(mpam_feat_msmon_mbwu_63counter, &ris->props))
-		return GENMASK_ULL(62, 0);
-	else if (mpam_has_feature(mpam_feat_msmon_mbwu_44counter, &ris->props))
-		return GENMASK_ULL(43, 0);
-	else
-		return GENMASK_ULL(30, 0);
-}
-
-static void __ris_msmon_read(void *arg)
-{
-	bool nrdy = false;
-	unsigned long flags;
-	bool config_mismatch;
-	struct mon_read *m = arg;
-	u64 now, overflow_val = 0;
-	struct mon_cfg *ctx = m->ctx;
-	bool reset_on_next_read = false;
-	struct mpam_msc_ris *ris = m->ris;
-	struct mpam_msc *msc = m->ris->msc;
-	struct msmon_mbwu_state *mbwu_state;
-	u32 mon_sel, ctl_val, flt_val, cur_ctl, cur_flt;
-
-	lockdep_assert_held(&msc->lock);
-
-	spin_lock_irqsave(&msc->mon_sel_lock, flags);
-	mon_sel = FIELD_PREP(MSMON_CFG_MON_SEL_MON_SEL, ctx->mon) |
-		  FIELD_PREP(MSMON_CFG_MON_SEL_RIS, ris->ris_idx);
-	mpam_write_monsel_reg(msc, CFG_MON_SEL, mon_sel);
-
-	if (m->type == mpam_feat_msmon_mbwu) {
-		mbwu_state = &ris->mbwu_state[ctx->mon];
-		if (mbwu_state) {
-			reset_on_next_read = mbwu_state->reset_on_next_read;
-			mbwu_state->reset_on_next_read = false;
-		}
-	}
-
-	/*
-	 * Read the existing configuration to avoid re-writing the same values.
-	 * This saves waiting for 'nrdy' on subsequent reads.
-	 */
-	read_msmon_ctl_flt_vals(m, &cur_ctl, &cur_flt);
-	gen_msmon_ctl_flt_vals(m, &ctl_val, &flt_val);
-	config_mismatch = cur_flt != flt_val ||
-			  cur_ctl != (ctl_val | MSMON_CFG_x_CTL_EN);
-
-	if (config_mismatch || reset_on_next_read)
-		write_msmon_ctl_flt_vals(m, ctl_val, flt_val);
-
-	switch (m->type) {
-	case mpam_feat_msmon_csu:
-		now = mpam_read_monsel_reg(msc, CSU);
-		nrdy = now & MSMON___NRDY;
-		now = FIELD_GET(MSMON___VALUE, now);
-		break;
-	case mpam_feat_msmon_mbwu:
-		/*
-		 * If long or lwd counters are supported, use them, else revert
-		 * to the 32 bit counter.
-		 */
-		if (mpam_ris_has_mbwu_long_counter(ris)) {
-			now = mpam_msc_read_mbwu_l(msc);
-			nrdy = now & MSMON___NRDY_L;
-			if (mpam_has_feature(mpam_feat_msmon_mbwu_63counter, &ris->props))
-				now = FIELD_GET(MSMON___LWD_VALUE, now);
-			else
-				now = FIELD_GET(MSMON___L_VALUE, now);
-		} else {
-			now = mpam_read_monsel_reg(msc, MBWU);
-			nrdy = now & MSMON___NRDY;
-			now = FIELD_GET(MSMON___VALUE, now);
-		}
-
-		if (nrdy)
-			break;
-
-		if (!mbwu_state)
-			break;
-
-		/* Add any pre-overflow value to the mbwu_state->val */
-		if (mbwu_state->prev_val > now)
-			overflow_val = mpam_msmon_overflow_val(ris) - mbwu_state->prev_val;
-
-		mbwu_state->prev_val = now;
-		mbwu_state->correction += overflow_val;
-
-		/* Include bandwidth consumed before the last hardware reset */
-		now += mbwu_state->correction;
-		break;
-	default:
-		return;
-	}
-	spin_unlock_irqrestore(&msc->mon_sel_lock, flags);
-
-	if (nrdy) {
-		m->err = -EBUSY;
-		return;
-	}
-
-	*(m->val) += now;
-}
-
-static void __ris_impl_msmon_read(void *arg)
-{
-	unsigned long flags;
-	struct mon_read *m = arg;
-	u64 mb_val = 0;
-	struct mon_cfg *ctx = m->ctx;
-	struct mpam_msc *msc = m->ris->msc;
-	u32 custom_reg_base_addr, cycle, val;
-
-	lockdep_assert_held(&msc->lock);
-	if (m->type != mpam_feat_impl_msmon_mbwu)
-		return;
-
-	/* Other machine can extend this function */
-	if (mpam_current_machine != MPAM_YITIAN710)
-		return;
-
-	spin_lock_irqsave(&msc->part_sel_lock, flags);
-
-	__mpam_write_reg(msc, MPAMCFG_PART_SEL, ctx->mon);
-
-	custom_reg_base_addr = __mpam_read_reg(msc, MPAMF_IMPL_IDR);
-
-	cycle = __mpam_read_reg(msc, custom_reg_base_addr + MPAMF_CUST_WINDW_OFFSET);
-	val = __mpam_read_reg(msc, custom_reg_base_addr + MPAMF_CUST_MBWC_OFFSET);
-
-	spin_unlock_irqrestore(&msc->part_sel_lock, flags);
-
-	if (val & MSMON___NRDY) {
-		m->err = -EBUSY;
-		return;
-	}
-
-	mb_val = MBWU_GET(val);
-
-	mb_val = mb_val * 32 * ddrc_freq * 1000000 / cycle; /* B/s */
-	*(m->val) += mb_val;
-}
-
-static int _msmon_read(struct mpam_component *comp, struct mon_read *arg)
-{
-	int err, idx;
-	struct mpam_msc *msc;
-	struct mpam_msc_ris *ris;
-
-	idx = srcu_read_lock(&mpam_srcu);
-	list_for_each_entry_rcu(ris, &comp->ris, comp_list) {
-		arg->ris = ris;
-
-		msc = ris->msc;
-		mutex_lock(&msc->lock);
-		if (arg->type == mpam_feat_msmon_csu ||
-		    arg->type == mpam_feat_msmon_mbwu)
-			err = smp_call_function_any(&msc->accessibility,
-					__ris_msmon_read, arg, true);
-		else if (arg->type == mpam_feat_impl_msmon_mbwu)
-			err = smp_call_function_any(&msc->accessibility,
-					__ris_impl_msmon_read, arg, true);
-		else
-			err = -EOPNOTSUPP;
-		mutex_unlock(&msc->lock);
-		if (!err && arg->err)
-			err = arg->err;
-		if (err)
-			break;
-	}
-	srcu_read_unlock(&mpam_srcu, idx);
-
-	return err;
-}
-
-int mpam_msmon_read(struct mpam_component *comp, struct mon_cfg *ctx,
-		    enum mpam_device_features type, u64 *val)
-{
-	int err;
-	struct mon_read arg;
-	u64 wait_jiffies = 0;
-	struct mpam_props *cprops = &comp->class->props;
-
-	might_sleep();
-
-	if (!mpam_is_enabled())
-		return -EIO;
-
-	if (!mpam_has_feature(type, cprops))
-		return -EOPNOTSUPP;
-
-	memset(&arg, 0, sizeof(arg));
-	arg.ctx = ctx;
-	arg.type = type;
-	arg.val = val;
-	*val = 0;
-
-	err = _msmon_read(comp, &arg);
-	if (err == -EBUSY)
-		wait_jiffies = usecs_to_jiffies(comp->class->nrdy_usec);
-
-	while (wait_jiffies)
-		wait_jiffies = schedule_timeout_uninterruptible(wait_jiffies);
-
-	if (err == -EBUSY) {
-		memset(&arg, 0, sizeof(arg));
-		arg.ctx = ctx;
-		arg.type = type;
-		arg.val = val;
-		*val = 0;
-
-		err = _msmon_read(comp, &arg);
-	}
-
-	return err;
-}
-
-void mpam_msmon_reset_all_mbwu(struct mpam_component *comp)
-{
-	int idx, i;
-	unsigned long flags;
-	struct mpam_msc *msc;
-	struct mpam_msc_ris *ris;
-
-	if (!mpam_is_enabled())
-		return;
-
-	idx = srcu_read_lock(&mpam_srcu);
-	list_for_each_entry_rcu(ris, &comp->ris, comp_list) {
-		if (!mpam_has_feature(mpam_feat_msmon_mbwu, &ris->props))
-			continue;
-
-		msc = ris->msc;
-		spin_lock_irqsave(&msc->mon_sel_lock, flags);
-		for (i = 0; i < ris->props.num_mbwu_mon; i++) {
-			ris->mbwu_state[i].correction = 0;
-			ris->mbwu_state[i].reset_on_next_read = true;
-		}
-		spin_unlock_irqrestore(&msc->mon_sel_lock, flags);
-	}
-	srcu_read_unlock(&mpam_srcu, idx);
-}
-
-void mpam_msmon_reset_mbwu(struct mpam_component *comp, struct mon_cfg *ctx)
-{
-	int idx;
-	unsigned long flags;
-	struct mpam_msc *msc;
-	struct mpam_msc_ris *ris;
-
-	if (!mpam_is_enabled())
-		return;
-
-	idx = srcu_read_lock(&mpam_srcu);
-	list_for_each_entry_rcu(ris, &comp->ris, comp_list) {
-		if (!mpam_has_feature(mpam_feat_msmon_mbwu, &ris->props))
-			continue;
-
-		msc = ris->msc;
-		spin_lock_irqsave(&msc->mon_sel_lock, flags);
-		ris->mbwu_state[ctx->mon].correction = 0;
-		ris->mbwu_state[ctx->mon].reset_on_next_read = true;
-		spin_unlock_irqrestore(&msc->mon_sel_lock, flags);
-	}
-	srcu_read_unlock(&mpam_srcu, idx);
-}
-
-static void mpam_reset_msc_bitmap(struct mpam_msc *msc, u16 reg, u16 wd)
-{
-	u32 num_words, msb;
-	u32 bm = ~0;
-	int i;
-
-	lockdep_assert_held(&msc->part_sel_lock);
-
-	/*
-	 * Write all ~0 to all but the last 32bit-word, which may
-	 * have fewer bits...
-	 */
-	num_words = DIV_ROUND_UP(wd, 32);
-	for (i = 0; i < num_words - 1; i++, reg += sizeof(bm))
-		__mpam_write_reg(msc, reg, bm);
-
-	/*
-	 * ....and then the last (maybe) partial 32bit word. When wd is a
-	 * multiple of 32, msb should be 31 to write a full 32bit word.
-	 */
-	msb = (wd - 1) % 32;
-	bm = GENMASK(msb , 0);
-	if (bm)
-		__mpam_write_reg(msc, reg, bm);
-}
-
-static void mpam_reprogram_ris_partid(struct mpam_msc_ris *ris, u16 partid,
-				      struct mpam_config *cfg)
-{
-	u32 pri_val = 0;
-	u16 cmax = MPAMCFG_CMAX_CMAX;
-	struct mpam_msc *msc = ris->msc;
-	u16 bwa_fract = MPAMCFG_MBW_MAX_MAX;
-	struct mpam_props *rprops = &ris->props;
-	u16 dspri = GENMASK(rprops->dspri_wd, 0);
-	u16 intpri = GENMASK(rprops->intpri_wd, 0);
-	u32 custom_reg_base_addr;
-
-	spin_lock(&msc->part_sel_lock);
-	__mpam_part_sel(ris->ris_idx, partid, msc);
-
-	if(mpam_has_feature(mpam_feat_partid_nrw, rprops))
-		mpam_write_partsel_reg(msc, INTPARTID,
-				      (MPAMCFG_PART_SEL_INTERNAL | partid));
-
-	if (mpam_has_feature(mpam_feat_cpor_part, rprops)) {
-		if (mpam_has_feature(mpam_feat_cpor_part, cfg))
-			mpam_write_partsel_reg(msc, CPBM, cfg->cpbm);
-		else
-			mpam_reset_msc_bitmap(msc, MPAMCFG_CPBM,
-					      rprops->cpbm_wd);
-	}
-
-	if (mpam_has_feature(mpam_feat_mbw_part, rprops)) {
-		if (mpam_has_feature(mpam_feat_mbw_part, cfg))
-			mpam_write_partsel_reg(msc, MBW_PBM, cfg->mbw_pbm);
-		else
-			mpam_reset_msc_bitmap(msc, MPAMCFG_MBW_PBM,
-					      rprops->mbw_pbm_bits);
-	}
-
-	if (mpam_has_feature(mpam_feat_mbw_min, rprops))
-		mpam_write_partsel_reg(msc, MBW_MIN, 0);
-
-	if (mpam_has_feature(mpam_feat_mbw_max, rprops)) {
-		if (mpam_has_feature(mpam_feat_mbw_max, cfg))
-			mpam_write_partsel_reg(msc, MBW_MAX, cfg->mbw_max);
-		else
-			mpam_write_partsel_reg(msc, MBW_MAX, bwa_fract);
-	}
-
-	if (mpam_has_feature(mpam_feat_mbw_prop, rprops))
-		mpam_write_partsel_reg(msc, MBW_PROP, bwa_fract);
-
-	if (mpam_has_feature(mpam_feat_ccap_part, rprops))
-		mpam_write_partsel_reg(msc, CMAX, cmax);
-
-	if (mpam_has_feature(mpam_feat_intpri_part, rprops) ||
-	    mpam_has_feature(mpam_feat_dspri_part, rprops)) {
-		/* aces high? */
-		if (!mpam_has_feature(mpam_feat_intpri_part_0_low, rprops))
-			intpri = 0;
-		if (!mpam_has_feature(mpam_feat_dspri_part_0_low, rprops))
-			dspri = 0;
-
-		if (mpam_has_feature(mpam_feat_intpri_part, rprops))
-			pri_val |= FIELD_PREP(MPAMCFG_PRI_INTPRI, intpri);
-		if (mpam_has_feature(mpam_feat_dspri_part, rprops))
-			pri_val |= FIELD_PREP(MPAMCFG_PRI_DSPRI, dspri);
-
-		mpam_write_partsel_reg(msc, PRI, pri_val);
-	}
-
-	if (FIELD_GET(MPAMF_IDR_HAS_IMPL_IDR, ris->idr)) {
-		if (mpam_current_machine == MPAM_YITIAN710) {
-			custom_reg_base_addr = __mpam_read_reg(msc, MPAMF_IMPL_IDR);
-			__mpam_write_reg(msc, custom_reg_base_addr +
-					 MPAMF_CUST_WINDW_OFFSET,
-					 MBWU_WINWD_MAX);
-		}
-	}
-
-	spin_unlock(&msc->part_sel_lock);
-}
-
-struct reprogram_ris {
-	struct mpam_msc_ris *ris;
-	struct mpam_config *cfg;
-};
-
-/* Call with MSC lock held */
-static int mpam_reprogram_ris(void *_arg)
-{
-	u16 partid, partid_max;
-	struct reprogram_ris *arg = _arg;
-	struct mpam_msc_ris *ris = arg->ris;
-	struct mpam_config *cfg = arg->cfg;
-
-	if (ris->in_reset_state)
-		return 0;
-
-	spin_lock(&partid_max_lock);
-	partid_max = mpam_partid_max;
-	spin_unlock(&partid_max_lock);
-	for (partid = 0; partid <= partid_max; partid++)
-		mpam_reprogram_ris_partid(ris, partid, cfg);
-
-	return 0;
-}
-
-static int mpam_restore_mbwu_state(void *_ris)
-{
-	int i;
-	struct mon_read mwbu_arg;
-	struct mpam_msc_ris *ris = _ris;
-
-	for (i = 0; i < ris->props.num_mbwu_mon; i++) {
-		if (ris->mbwu_state[i].enabled) {
-			mwbu_arg.ris = ris;
-			mwbu_arg.ctx = &ris->mbwu_state[i].cfg;
-			mwbu_arg.type = mpam_feat_msmon_mbwu;
-
-			__ris_msmon_read(&mwbu_arg);
-		}
-	}
-
-	return 0;
-}
-
-static int mpam_save_mbwu_state(void *arg)
-{
-	int i;
-	u64 val;
-	struct mon_cfg *cfg;
-	unsigned long flags;
-	u32 cur_flt, cur_ctl, mon_sel;
-	struct mpam_msc_ris *ris = arg;
-	struct mpam_msc *msc = ris->msc;
-	struct msmon_mbwu_state *mbwu_state;
-
-	for (i = 0; i < ris->props.num_mbwu_mon; i++) {
-		mbwu_state = &ris->mbwu_state[i];
-		cfg = &mbwu_state->cfg;
-
-		spin_lock_irqsave(&msc->mon_sel_lock, flags);
-		mon_sel = FIELD_PREP(MSMON_CFG_MON_SEL_MON_SEL, i) |
-			  FIELD_PREP(MSMON_CFG_MON_SEL_RIS, ris->ris_idx);
-		mpam_write_monsel_reg(msc, CFG_MON_SEL, mon_sel);
-
-		cur_flt = mpam_read_monsel_reg(msc, CFG_MBWU_FLT);
-		cur_ctl = mpam_read_monsel_reg(msc, CFG_MBWU_CTL);
-		mpam_write_monsel_reg(msc, CFG_MBWU_CTL, 0);
-
-		if (mpam_ris_has_mbwu_long_counter(ris)) {
-			val = mpam_msc_read_mbwu_l(msc);
-			mpam_msc_zero_mbwu_l(msc);
-		} else {
-			val = mpam_read_monsel_reg(msc, MBWU);
-			mpam_write_monsel_reg(msc, MBWU, 0);
-		}
-
-		cfg->mon = i;
-		cfg->pmg = FIELD_GET(MSMON_CFG_MBWU_FLT_PMG, cur_flt);
-		cfg->match_pmg = FIELD_GET(MSMON_CFG_x_CTL_MATCH_PMG, cur_ctl);
-		cfg->partid = FIELD_GET(MSMON_CFG_MBWU_FLT_PARTID, cur_flt);
-		mbwu_state->correction += val;
-		mbwu_state->enabled = FIELD_GET(MSMON_CFG_x_CTL_EN, cur_ctl);
-		spin_unlock_irqrestore(&msc->mon_sel_lock, flags);
-	}
-
-	return 0;
-}
-
-/*
- * Called via smp_call_on_cpu() to prevent migration, while still being
- * pre-emptible.
- */
-static int mpam_reset_ris(void *arg)
-{
-	struct mpam_msc_ris *ris = arg;
-	struct reprogram_ris reprogram_arg;
-	struct mpam_config empty_cfg = { 0 };
-
-	if (ris->in_reset_state)
-		return 0;
-
-	reprogram_arg.ris = ris;
-	reprogram_arg.cfg = &empty_cfg;
-
-	mpam_reprogram_ris(&reprogram_arg);
-
-	return 0;
-}
-
-/*
- * Get the preferred CPU for this MSC. If it is accessible from this CPU,
- * this CPU is preferred. This can be preempted/migrated, it will only result
- * in more work.
- */
-static int mpam_get_msc_preferred_cpu(struct mpam_msc *msc)
-{
-	int cpu = raw_smp_processor_id();
-
-	if (cpumask_test_cpu(cpu, &msc->accessibility))
-		return cpu;
-
-	return cpumask_first_and(&msc->accessibility, cpu_online_mask);
-}
-
-static int mpam_touch_msc(struct mpam_msc *msc, int (*fn)(void *a), void *arg)
-{
-	lockdep_assert_irqs_enabled();
-	lockdep_assert_cpus_held();
-	lockdep_assert_held(&msc->lock);
-
-	return smp_call_on_cpu(mpam_get_msc_preferred_cpu(msc), fn, arg, true);
-}
-
-static void mpam_reset_msc(struct mpam_msc *msc, bool online)
-{
-	int idx;
-	struct mpam_msc_ris *ris;
-
-	lockdep_assert_held(&msc->lock);
-
-	idx = srcu_read_lock(&mpam_srcu);
-	list_for_each_entry_rcu(ris, &msc->ris, msc_list) {
-		mpam_touch_msc(msc, &mpam_reset_ris, ris);
-
-		/*
-		 * Set in_reset_state when coming online. The reset state
-		 * for non-zero partid may be lost while the CPUs are offline.
-		 */
-		ris->in_reset_state = online;
-
-		if (mpam_is_enabled() && !online)
-			mpam_touch_msc(msc, &mpam_save_mbwu_state, ris);
-	}
-	srcu_read_unlock(&mpam_srcu, idx);
-}
-
-static void mpam_reprogram_msc(struct mpam_msc *msc)
-{
-	int idx;
-	u16 partid;
-	bool reset;
-	struct mpam_config *cfg;
-	struct mpam_msc_ris *ris;
-
-	lockdep_assert_held(&msc->lock);
-
-	idx = srcu_read_lock(&mpam_srcu);
-	list_for_each_entry_rcu(ris, &msc->ris, msc_list) {
-		if (!mpam_is_enabled() && !ris->in_reset_state) {
-			mpam_touch_msc(msc, &mpam_reset_ris, ris);
-			ris->in_reset_state = true;
-			continue;
-		}
-
-		reset = true;
-		for (partid = 0; partid <= mpam_partid_max; partid++) {
-			cfg = &ris->comp->cfg[partid];
-			if (cfg->features)
-				reset = false;
-
-			mpam_reprogram_ris_partid(ris, partid, cfg);
-		}
-		ris->in_reset_state = reset;
-
-		if (mpam_has_feature(mpam_feat_msmon_mbwu, &ris->props))
-			mpam_touch_msc(msc, &mpam_restore_mbwu_state, ris);
-	}
-	srcu_read_unlock(&mpam_srcu, idx);
-}
-
-static void _enable_percpu_irq(void *_irq)
-{
-	int *irq = _irq;
-	enable_percpu_irq(*irq, IRQ_TYPE_NONE);
-}
-
-static int mpam_cpu_online(unsigned int cpu)
-{
-	int idx;
-	struct mpam_msc *msc;
-
-	idx = srcu_read_lock(&mpam_srcu);
-	list_for_each_entry_rcu(msc, &mpam_all_msc, glbl_list) {
-		if (!cpumask_test_cpu(cpu, &msc->accessibility))
-			continue;
-
-		mutex_lock(&msc->lock);
-		if (msc->reenable_error_ppi)
-			_enable_percpu_irq(&msc->reenable_error_ppi);
-
-		if (atomic_fetch_inc(&msc->online_refs) == 0)
-			mpam_reprogram_msc(msc);
-		mutex_unlock(&msc->lock);
-	}
-	srcu_read_unlock(&mpam_srcu, idx);
-
-	if (mpam_is_enabled())
-		mpam_resctrl_online_cpu(cpu);
-
-	return 0;
-}
-
-/* Before mpam is enabled, try to probe new MSC */
-static int mpam_discovery_cpu_online(unsigned int cpu)
-{
-	int err = 0;
-	struct mpam_msc *msc;
-	bool new_device_probed = false;
-
-	if (mpam_is_enabled())
-		return 0;
-
-	mutex_lock(&mpam_list_lock);
-	list_for_each_entry(msc, &mpam_all_msc, glbl_list) {
-		if (!cpumask_test_cpu(cpu, &msc->accessibility))
-			continue;
-
-		mutex_lock(&msc->lock);
-		if (!msc->probed)
-			err = mpam_msc_hw_probe(msc);
-		mutex_unlock(&msc->lock);
-
-		if (!err)
-			new_device_probed = true;
-		else
-			break; // mpam_broken
-	}
-	mutex_unlock(&mpam_list_lock);
-
-	if (new_device_probed && !err)
-		schedule_work(&mpam_enable_work);
-
-	if (err < 0)
-		return err;
-
-	return mpam_cpu_online(cpu);
-}
-
-static int mpam_cpu_offline(unsigned int cpu)
-{
-	int idx;
-	struct mpam_msc *msc;
-
-	idx = srcu_read_lock(&mpam_srcu);
-	list_for_each_entry_rcu(msc, &mpam_all_msc, glbl_list) {
-		if (!cpumask_test_cpu(cpu, &msc->accessibility))
-			continue;
-
-		mutex_lock(&msc->lock);
-		if (msc->reenable_error_ppi)
-			disable_percpu_irq(msc->reenable_error_ppi);
-
-		if (atomic_dec_and_test(&msc->online_refs))
-			mpam_reset_msc(msc, false);
-		mutex_unlock(&msc->lock);
-	}
-	srcu_read_unlock(&mpam_srcu, idx);
-
-	if (mpam_is_enabled())
-		mpam_resctrl_offline_cpu(cpu);
-
-	return 0;
-}
-
-static void mpam_register_cpuhp_callbacks(int (*online)(unsigned int online))
-{
-	mutex_lock(&mpam_cpuhp_state_lock);
-	mpam_cpuhp_state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "mpam:online",
-					     online, mpam_cpu_offline);
-	if (mpam_cpuhp_state <= 0) {
-		pr_err("Failed to register cpuhp callbacks");
-		mpam_cpuhp_state = 0;
-	}
-	mutex_unlock(&mpam_cpuhp_state_lock);
-}
-
-static int __setup_ppi(struct mpam_msc *msc)
-{
-	int cpu;
-
-	msc->error_dev_id = alloc_percpu_gfp(struct mpam_msc *, GFP_KERNEL);
-	if (!msc->error_dev_id)
-		return -ENOMEM;
-
-	for_each_cpu(cpu, &msc->accessibility) {
-		struct mpam_msc *empty = *per_cpu_ptr(msc->error_dev_id, cpu);
-		if (empty != NULL) {
-			pr_err_once("%s shares PPI with %s!\n", 				    dev_name(&msc->pdev->dev),
-				    dev_name(&empty->pdev->dev));
-			return -EBUSY;
-		}
-		*per_cpu_ptr(msc->error_dev_id, cpu) = msc;
-	}
-
-	return 0;
-}
-
-static int mpam_msc_setup_error_irq(struct mpam_msc *msc)
-{
-	int irq;
-
-	irq = platform_get_irq_byname_optional(msc->pdev, "error");
-	if (irq <= 0)
-		return 0;
-
-	/* Allocate and initialise the percpu device pointer for PPI */
-	if (irq_is_percpu(irq))
-
-		return __setup_ppi(msc);
-
-	/* sanity check: shared interrupts can be routed anywhere? */
-	if (!cpumask_equal(&msc->accessibility, cpu_possible_mask)) {
-		pr_err_once("msc:%u is a private resource with a shared error interrupt",
-			    msc->id);
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static enum mpam_machine_type mpam_dt_get_machine_type(void)
-{
-	/* FIXME: not supported yet */
-	return MPAM_DEFAULT_MACHINE;
-}
-
-static int mpam_dt_count_msc(void)
-{
-	int count = 0;
-	struct device_node *np;
-
-	for_each_compatible_node(np, NULL, "arm,mpam-msc")
-		count++;
-
-	return count;
-}
-
-static int mpam_dt_parse_resource(struct mpam_msc *msc, struct device_node *np,
-				  u32 ris_idx)
-{
-	int err = 0;
-	u32 level = 0;
-	unsigned long cache_id;
-	struct device_node *cache;
-
-	do {
-		if (of_device_is_compatible(np, "arm,mpam-cache")) {
-			cache = of_parse_phandle(np, "arm,mpam-device", 0);
-			if (!cache) {
-				pr_err("Failed to read phandle\n");
-				break;
-			}
-		} else if (of_device_is_compatible(np->parent, "cache")) {
-			cache = np->parent;
-		} else {
-			/* For now, only caches are supported */
-			cache = NULL;
-			break;
-		}
-
-		err = of_property_read_u32(cache, "cache-level", &level);
-		if (err) {
-			pr_err("Failed to read cache-level\n");
-			break;
-		}
-
-		cache_id = cache_of_get_id(cache);
-		if (cache_id == ~0UL) {
-			err = -ENOENT;
-			break;
-		}
-
-		err = mpam_ris_create(msc, ris_idx, MPAM_CLASS_CACHE, level,
-				      cache_id);
-	} while (0);
-	of_node_put(cache);
-
-	return err;
-}
-
-
-static int mpam_dt_parse_resources(struct mpam_msc *msc, void *ignored)
-{
-	int err, num_ris = 0;
-	const u32 *ris_idx_p;
-	struct device_node *iter, *np;
-
-	np = msc->pdev->dev.of_node;
-	for_each_child_of_node(np, iter) {
-		ris_idx_p = of_get_property(iter, "reg", NULL);
-		if (ris_idx_p) {
-			num_ris++;
-			err = mpam_dt_parse_resource(msc, iter, *ris_idx_p);
-			if (err) {
-				of_node_put(iter);
-				return err;
-			}
-		}
-	}
-
-	if (!num_ris)
-		mpam_dt_parse_resource(msc, np, 0);
-
-	return err;
-}
-
-static int get_msc_affinity(struct mpam_msc *msc)
-{
-	struct device_node *parent;
-	u32 affinity_id;
-	int err;
-
-	if (!acpi_disabled) {
-		err = device_property_read_u32(&msc->pdev->dev, "cpu_affinity",
-					       &affinity_id);
-		if (err) {
-			cpumask_copy(&msc->accessibility, cpu_possible_mask);
-			err = 0;
-		} else {
-			err = acpi_pptt_get_cpus_from_container(affinity_id,
-								&msc->accessibility);
-		}
-
-		return err;
-	}
-
-	/* This depends on the path to of_node */
-	parent = of_get_parent(msc->pdev->dev.of_node);
-	if (parent == of_root) {
-		cpumask_copy(&msc->accessibility, cpu_possible_mask);
-		err = 0;
-	} else {
-		if (of_device_is_compatible(parent, "cache")) {
-			err = get_cpumask_from_cache(parent,
-						     &msc->accessibility);
-		} else {
-			err = -EINVAL;
-			pr_err("Cannot determine accessibility of MSC: %s\n",
-			       dev_name(&msc->pdev->dev));
-		}
-	}
-	of_node_put(parent);
-
-	return err;
-}
-
-static int fw_num_msc;
-
-static void mpam_pcc_rx_callback(struct mbox_client *cl, void *msg)
-{
-	/* TODO: wake up tasks blocked on this MSC's PCC channel */
-}
-
-static int mpam_msc_drv_remove(struct platform_device *pdev)
-{
-	struct mpam_msc *msc = platform_get_drvdata(pdev);
-
-	if (!msc)
-		return 0;
-
-	mutex_lock(&mpam_list_lock);
-	mpam_num_msc--;
-	platform_set_drvdata(pdev, NULL);
-	list_del_rcu(&msc->glbl_list);
-	mpam_msc_destroy(msc);
-	synchronize_srcu(&mpam_srcu);
-	mutex_unlock(&mpam_list_lock);
-
-	return 0;
-}
-
-static int mpam_msc_drv_probe(struct platform_device *pdev)
-{
-	int err;
-	pgprot_t prot;
-	void * __iomem io;
-	struct mpam_msc *msc;
-	struct resource *msc_res;
-	void *plat_data = pdev->dev.platform_data;
-
-	mutex_lock(&mpam_list_lock);
-	do {
-		msc = devm_kzalloc(&pdev->dev, sizeof(*msc), GFP_KERNEL);
-		if (!msc) {
-			err = -ENOMEM;
-			break;
-		}
-
-		INIT_LIST_HEAD_RCU(&msc->glbl_list);
-		msc->pdev = pdev;
-
-		err = device_property_read_u32(&pdev->dev, "arm,not-ready-us",
-					       &msc->nrdy_usec);
-		if (err) {
-			/* This will prevent CSU monitors being usable */
-			msc->nrdy_usec = 0;
-		}
-
-		err = get_msc_affinity(msc);
-		if (err)
-			break;
-		if (cpumask_empty(&msc->accessibility)) {
-			pr_err_once("msc:%u is not accessible from any CPU!",
-				    msc->id);
-			err = -EINVAL;
-			break;
-		}
-
-		mutex_init(&msc->lock);
-		msc->id = mpam_num_msc++;
-		INIT_LIST_HEAD_RCU(&msc->ris);
-		spin_lock_init(&msc->part_sel_lock);
-		spin_lock_init(&msc->mon_sel_lock);
-
-		err = mpam_msc_setup_error_irq(msc);
-		if (err) {
-			msc = ERR_PTR(err);
-			break;
-		}
-
-		if (device_property_read_u32(&pdev->dev, "pcc-channel",
-					     &msc->pcc_subspace_id))
-			msc->iface = MPAM_IFACE_MMIO;
-		else
-			msc->iface = MPAM_IFACE_PCC;
-
-		if (msc->iface == MPAM_IFACE_MMIO) {
-			io = devm_platform_get_and_ioremap_resource(pdev, 0,
-								    &msc_res);
-			if (IS_ERR(io)) {
-				pr_err("Failed to map MSC base address\n");
-				err = PTR_ERR(io);
-				break;
-			}
-			msc->mapped_hwpage_sz = msc_res->end - msc_res->start + 1;
-			msc->mapped_hwpage = io;
-			if (msc->mapped_hwpage_sz < MPAM_MIN_MMIO_SIZE) {
-				pr_err("MSC MMIO space size is too small\n");
-				err = -EINVAL;
-				break;
-			}
-		} else if (msc->iface == MPAM_IFACE_PCC) {
-			msc->pcc_cl.dev = &pdev->dev;
-			msc->pcc_cl.rx_callback = mpam_pcc_rx_callback;
-			msc->pcc_cl.tx_block = false;
-			msc->pcc_cl.tx_tout = 1000; /* 1s */
-			msc->pcc_cl.knows_txdone = false;
-
-			msc->pcc_chan = pcc_mbox_request_channel(&msc->pcc_cl,
-								 msc->pcc_subspace_id);
-			if (IS_ERR(msc->pcc_chan)) {
-				pr_err("Failed to request MSC PCC channel\n");
-				err = PTR_ERR(msc->pcc_chan);
-				break;
-			}
-
-			prot = __acpi_get_mem_attribute(msc->pcc_chan->shmem_base_addr);
-			io = ioremap_prot(msc->pcc_chan->shmem_base_addr,
-					  msc->pcc_chan->shmem_size, pgprot_val(prot));
-			if (IS_ERR(io)) {
-				pr_err("Failed to map MSC base address\n");
-				pcc_mbox_free_channel(msc->pcc_chan);
-				err = PTR_ERR(io);
-				break;
-			}
-
-			/* TODO: issue a read to update the registers */
-
-			msc->mapped_hwpage_sz = msc->pcc_chan->shmem_size;
-			msc->mapped_hwpage = io + sizeof(struct acpi_pcct_shared_memory);
-		}
-
-		list_add_rcu(&msc->glbl_list, &mpam_all_msc);
-		platform_set_drvdata(pdev, msc);
-	} while (0);
-	mutex_unlock(&mpam_list_lock);
-
-	if (!err) {
-		/* Create RIS entries described by firmware */
-		if (!acpi_disabled)
-			err = acpi_mpam_parse_resources(msc, plat_data);
-		else
-			err = mpam_dt_parse_resources(msc, plat_data);
-	}
-
-	if (err)
-		mpam_msc_drv_remove(pdev);
-
-	if (!err && fw_num_msc == mpam_num_msc)
-		mpam_register_cpuhp_callbacks(&mpam_discovery_cpu_online);
-
-	return err;
-}
-
-/*
- * If a resource doesn't match class feature/configuration, do the right thing.
- * For 'num' properties we can just take the minimum.
- * For properties where the mismatched unused bits would make a difference, we
- * nobble the class feature, as we can't configure all the resources.
- * e.g. The L3 cache is composed of two resources with 13 and 17 portion
- * bitmaps respectively.
- */
-static void
-__resource_props_mismatch(struct mpam_msc_ris *ris, struct mpam_class *class)
-{
-	struct mpam_props *cprops = &class->props;
-	struct mpam_props *rprops = &ris->props;
-
-	lockdep_assert_held(&mpam_list_lock); /* we modify class */
-
-	/* Clear missing features */
-	cprops->features &= rprops->features;
-
-	/* Clear incompatible features */
-	if (cprops->cpbm_wd != rprops->cpbm_wd)
-		mpam_clear_feature(mpam_feat_cpor_part, &cprops->features);
-	if (cprops->mbw_pbm_bits != rprops->mbw_pbm_bits)
-		mpam_clear_feature(mpam_feat_mbw_part, &cprops->features);
-
-	/* bwa_wd is a count of bits, fewer bits means less precision */
-	if (cprops->bwa_wd != rprops->bwa_wd)
-		cprops->bwa_wd = min(cprops->bwa_wd, rprops->bwa_wd);
-
-	/* For num properties, take the minimum */
-	if (cprops->num_csu_mon != rprops->num_csu_mon)
-		cprops->num_csu_mon = min(cprops->num_csu_mon, rprops->num_csu_mon);
-	if (cprops->num_mbwu_mon != rprops->num_mbwu_mon)
-		cprops->num_mbwu_mon = min(cprops->num_mbwu_mon, rprops->num_mbwu_mon);
-
-	if (cprops->intpri_wd != rprops->intpri_wd)
-		cprops->intpri_wd = min(cprops->intpri_wd, rprops->intpri_wd);
-	if (cprops->dspri_wd != rprops->dspri_wd)
-		cprops->dspri_wd = min(cprops->dspri_wd, rprops->dspri_wd);
-
-	/* {int,ds}pri may not have differing 0-low behaviour */
-	if (mpam_has_feature(mpam_feat_intpri_part_0_low, cprops) !=
-	    mpam_has_feature(mpam_feat_intpri_part_0_low, rprops))
-		mpam_clear_feature(mpam_feat_intpri_part, &cprops->features);
-	if (mpam_has_feature(mpam_feat_dspri_part_0_low, cprops) !=
-	    mpam_has_feature(mpam_feat_dspri_part_0_low, rprops))
-		mpam_clear_feature(mpam_feat_dspri_part, &cprops->features);
-}
-
-/*
- * Copy the first component's first resources's properties and features to the
- * class. __resource_props_mismatch() will remove conflicts.
- * It is not possible to have a class with no components, or a component with
- * no resources.
- */
-static void mpam_enable_init_class_features(struct mpam_class *class)
-{
-	struct mpam_msc_ris *ris;
-	struct mpam_component *comp;
-
-	comp = list_first_entry_or_null(&class->components,
-					struct mpam_component, class_list);
-	if (WARN_ON(!comp))
-		return;
-
-	ris = list_first_entry_or_null(&comp->ris,
-				       struct mpam_msc_ris, comp_list);
-	if (WARN_ON(!ris))
-		return;
-
-	class->props = ris->props;
-}
-
-/* Merge all the common resource features into class. */
-static void mpam_enable_merge_features(void)
-{
-	struct mpam_msc_ris *ris;
-	struct mpam_class *class;
-	struct mpam_component *comp;
-
-	lockdep_assert_held(&mpam_list_lock);
-
-	list_for_each_entry(class, &mpam_classes, classes_list) {
-		mpam_enable_init_class_features(class);
-
-		list_for_each_entry(comp, &class->components, class_list) {
-			list_for_each_entry(ris, &comp->ris, comp_list) {
-				__resource_props_mismatch(ris, class);
-
-				class->nrdy_usec = max(class->nrdy_usec,
-						     ris->msc->nrdy_usec);
-			}
-		}
-	}
-}
-
-static char *mpam_errcode_names[16] = {
-	[0] = "No error",
-	[1] = "PARTID_SEL_Range",
-	[2] = "Req_PARTID_Range",
-	[3] = "MSMONCFG_ID_RANGE",
-	[4] = "Req_PMG_Range",
-	[5] = "Monitor_Range",
-	[6] = "intPARTID_Range",
-	[7] = "Unexpected_INTERNAL",
-	[8] = "Undefined_RIS_PART_SEL",
-	[9] = "RIS_No_Control",
-	[10] = "Undefined_RIS_MON_SEL",
-	[11] = "RIS_No_Monitor",
-	[12 ... 15] = "Reserved"
-};
-
-static int mpam_enable_msc_ecr(void *_msc)
-{
-	struct mpam_msc *msc = _msc;
-
-	writel_relaxed(1, msc->mapped_hwpage + MPAMF_ECR);
-
-	return 0;
-}
-
-static int mpam_disable_msc_ecr(void *_msc)
-{
-	struct mpam_msc *msc = _msc;
-
-	writel_relaxed(0, msc->mapped_hwpage + MPAMF_ECR);
-
-	return 0;
-}
-
-static irqreturn_t __mpam_irq_handler(int irq, struct mpam_msc *msc)
-{
-	u64 reg;
-	u16 partid;
-	u8 errcode, pmg, ris;
-
-	if (WARN_ON_ONCE(!msc) ||
-	    WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(),
-					   &msc->accessibility)))
-		return IRQ_NONE;
-
-	reg = mpam_msc_read_esr(msc);
-
-	errcode = FIELD_GET(MPAMF_ESR_ERRCODE, reg);
-	if (!errcode)
-		return IRQ_NONE;
-
-	/* Clear level triggered irq */
-	mpam_msc_zero_esr(msc);
-
-	partid = FIELD_GET(MPAMF_ESR_PARTID_OR_MON, reg);
-	pmg = FIELD_GET(MPAMF_ESR_PMG, reg);
-	ris = FIELD_GET(MPAMF_ESR_RIS, reg);
-
-	pr_err("error irq from msc:%u '%s', partid:%u, pmg: %u, ris: %u\n",
-	       msc->id, mpam_errcode_names[errcode], partid, pmg, ris);
-
-	/*
-	 * To prevent this interrupt from repeatedly cancelling the scheduled
-	 * work to disable mpam, disable the error interrupt.
-	 */
-	mpam_disable_msc_ecr(msc);
-
-	schedule_work(&mpam_broken_work);
-
-	return IRQ_HANDLED;
-}
-
-static irqreturn_t mpam_ppi_handler(int irq, void *dev_id)
-{
-	struct mpam_msc *msc = *(struct mpam_msc **)dev_id;
-
-	return __mpam_irq_handler(irq, msc);
-}
-
-static irqreturn_t mpam_spi_handler(int irq, void *dev_id)
-{
-	struct mpam_msc *msc = dev_id;
-
-	return __mpam_irq_handler(irq, msc);
-}
-
-static int mpam_register_irqs(void)
-{
-	int err, irq;
-	struct mpam_msc *msc;
-
-	lockdep_assert_cpus_held();
-	lockdep_assert_held(&mpam_list_lock);
-
-	list_for_each_entry(msc, &mpam_all_msc, glbl_list) {
-		irq = platform_get_irq_byname_optional(msc->pdev, "error");
-		if (irq <= 0)
-			continue;
-
-		/* The MPAM spec says the interrupt can be SPI, PPI or LPI */
-		/* We anticipate sharing the interrupt with other MSCs */
-		if (irq_is_percpu(irq)) {
-			err = request_percpu_irq(irq, &mpam_ppi_handler,
-						 "mpam:msc:error",
-						 msc->error_dev_id);
-			if (err)
-				return err;
-
-			mutex_lock(&msc->lock);
-			msc->reenable_error_ppi = irq;
-			smp_call_function_many(&msc->accessibility,
-					       &_enable_percpu_irq, &irq,
-					       true);
-			mutex_unlock(&msc->lock);
-		} else {
-			err = devm_request_irq(&msc->pdev->dev, irq,
-					       &mpam_spi_handler, IRQF_SHARED,
-					       "mpam:msc:error", msc);
-			if (err)
-				return err;
-		}
-
-		mutex_lock(&msc->lock);
-		msc->error_irq_requested = true;
-		mpam_touch_msc(msc, mpam_enable_msc_ecr, msc);
-		msc->error_irq_hw_enabled = true;
-		mutex_unlock(&msc->lock);
-	}
-
-	return 0;
-}
-
-static void mpam_unregister_irqs(void)
-{
-	int irq;
-	struct mpam_msc *msc;
-
-	cpus_read_lock();
-	/* take the lock as free_irq() can sleep */
-	mutex_lock(&mpam_list_lock);
-	list_for_each_entry(msc, &mpam_all_msc, glbl_list) {
-		irq = platform_get_irq_byname_optional(msc->pdev, "error");
-		if (irq <= 0)
-			continue;
-
-		mutex_lock(&msc->lock);
-		if (msc->error_irq_hw_enabled) {
-			mpam_touch_msc(msc, mpam_disable_msc_ecr, msc);
-			msc->error_irq_hw_enabled = false;
-		}
-
-		if (msc->error_irq_requested) {
-			if (irq_is_percpu(irq)) {
-				msc->reenable_error_ppi = 0;
-				free_percpu_irq(irq, msc->error_dev_id);
-			} else {
-				devm_free_irq(&msc->pdev->dev, irq, msc);
-			}
-			msc->error_irq_requested = false;
-		}
-		mutex_unlock(&msc->lock);
-	}
-	mutex_unlock(&mpam_list_lock);
-	cpus_read_unlock();
-}
-
-static void __destroy_component_cfg(struct mpam_component *comp)
-{
-	unsigned long flags;
-	struct mpam_msc_ris *ris;
-	struct msmon_mbwu_state *mbwu_state;
-
-	kfree(comp->cfg);
-	list_for_each_entry(ris, &comp->ris, comp_list) {
-		mutex_lock(&ris->msc->lock);
-		spin_lock_irqsave(&ris->msc->mon_sel_lock, flags);
-		mbwu_state = ris->mbwu_state;
-		ris->mbwu_state = NULL;
-		spin_unlock_irqrestore(&ris->msc->mon_sel_lock, flags);
-		mutex_unlock(&ris->msc->lock);
-
-		kfree(mbwu_state);
-	}
-}
-
-static int __allocate_component_cfg(struct mpam_component *comp)
-{
-	unsigned long flags;
-	struct mpam_msc_ris *ris;
-	struct msmon_mbwu_state *mbwu_state;
-
-	if (comp->cfg)
-		return 0;
-
-	comp->cfg = kcalloc(mpam_partid_max + 1, sizeof(*comp->cfg), GFP_KERNEL);
-	if (!comp->cfg)
-		return -ENOMEM;
-
-	list_for_each_entry(ris, &comp->ris, comp_list) {
-		if (!ris->props.num_mbwu_mon)
-			continue;
-
-		mbwu_state = kcalloc(ris->props.num_mbwu_mon,
-				     sizeof(*ris->mbwu_state), GFP_KERNEL);
-		if (!mbwu_state) {
-			__destroy_component_cfg(comp);
-			return -ENOMEM;
-		}
-
-		mutex_lock(&ris->msc->lock);
-		spin_lock_irqsave(&ris->msc->mon_sel_lock, flags);
-		ris->mbwu_state = mbwu_state;
-		spin_unlock_irqrestore(&ris->msc->mon_sel_lock, flags);
-		mutex_unlock(&ris->msc->lock);
-	}
-
-	return 0;
-}
-
-static int mpam_allocate_config(void)
-{
-	int err = 0;
-	struct mpam_class *class;
-	struct mpam_component *comp;
-
-	lockdep_assert_held(&mpam_list_lock);
-
-	list_for_each_entry(class, &mpam_classes, classes_list) {
-		list_for_each_entry(comp, &class->components, class_list) {
-			err = __allocate_component_cfg(comp);
-			if (err)
-				return err;
-		}
-	}
-
-	return 0;
-}
-
-static void mpam_enable_once(void)
-{
-	int err;
-
-	/*
-	 * If all the MSC have been probed, enabling the IRQs happens next.
-	 * That involves cross-calling to a CPU that can reach the MSC, and
-	 * the locks must be taken in this order:
-	 */
-	cpus_read_lock();
-	mutex_lock(&mpam_list_lock);
-	do {
-		mpam_enable_merge_features();
-
-		err = mpam_allocate_config();
-		if (err) {
-			pr_err("Failed to allocate configuration arrays.\n");
-			break;
-		}
-
-		err = mpam_register_irqs();
-		if (err) {
-			pr_warn("Failed to register irqs: %d\n", err);
-			break;
-		}
-	} while (0);
-	mutex_unlock(&mpam_list_lock);
-	cpus_read_unlock();
-
-	if (!err) {
-		err = mpam_resctrl_setup();
-		if (err)
-			pr_err("Failed to initialise resctrl: %d\n", err);
-	}
-
-	if (err) {
-		schedule_work(&mpam_broken_work);
-		return;
-	}
-
-	mutex_lock(&mpam_cpuhp_state_lock);
-	cpuhp_remove_state(mpam_cpuhp_state);
-	mpam_cpuhp_state = 0;
-	mutex_unlock(&mpam_cpuhp_state_lock);
-
-	/*
-	 * Once the cpuhp callbacks have been changed, mpam_partid_max can no
-	 * longer change.
-	 */
-	spin_lock(&partid_max_lock);
-	partid_max_published = true;
-	spin_unlock(&partid_max_lock);
-
-	static_branch_enable(&mpam_enabled);
-	mpam_register_cpuhp_callbacks(mpam_cpu_online);
-
-	pr_info("MPAM enabled with %u partid and %u pmg\n",
-		READ_ONCE(mpam_partid_max) + 1, READ_ONCE(mpam_pmg_max) + 1);
-}
-
-void mpam_reset_class(struct mpam_class *class)
-{
-	int idx;
-	struct mpam_msc_ris *ris;
-	struct mpam_component *comp;
-
-	idx = srcu_read_lock(&mpam_srcu);
-	list_for_each_entry_rcu(comp, &class->components, class_list) {
-		memset(comp->cfg, 0, ((mpam_partid_max + 1) * sizeof(*comp->cfg)));
-
-		list_for_each_entry_rcu(ris, &comp->ris, comp_list) {
-			mutex_lock(&ris->msc->lock);
-			mpam_touch_msc(ris->msc, mpam_reset_ris, ris);
-			mutex_unlock(&ris->msc->lock);
-			ris->in_reset_state = true;
-		}
-	}
-	srcu_read_unlock(&mpam_srcu, idx);
-}
-
-/*
- * Called in response to an error IRQ.
- * All of MPAMs errors indicate a software bug, restore any modified
- * controls to their reset values.
- */
-void mpam_disable(struct work_struct *ignored)
-{
-	int idx;
-	struct mpam_class *class;
-
-	mutex_lock(&mpam_cpuhp_state_lock);
-	if (mpam_cpuhp_state) {
-		cpuhp_remove_state(mpam_cpuhp_state);
-		mpam_cpuhp_state = 0;
-	}
-	mutex_unlock(&mpam_cpuhp_state_lock);
-
-	mpam_resctrl_exit();
-
-	static_branch_disable(&mpam_enabled);
-
-	mpam_unregister_irqs();
-
-	idx = srcu_read_lock(&mpam_srcu);
-	list_for_each_entry_rcu(class, &mpam_classes, classes_list)
-		mpam_reset_class(class);
-	srcu_read_unlock(&mpam_srcu, idx);
-}
-
-/*
- * Enable mpam once all devices have been probed.
- * Scheduled by mpam_discovery_cpu_online() once all devices have been created.
- * Also scheduled when new devices are probed when new CPUs come online.
- */
-void mpam_enable(struct work_struct *work)
-{
-	static atomic_t once;
-	struct mpam_msc *msc;
-	bool all_devices_probed = true;
-
-	mutex_lock(&mpam_list_lock);
-	list_for_each_entry(msc, &mpam_all_msc, glbl_list) {
-		mutex_lock(&msc->lock);
-		if (!msc->probed)
-			all_devices_probed = false;
-		mutex_unlock(&msc->lock);
-
-		if (!all_devices_probed)
-			break;
-	}
-	mutex_unlock(&mpam_list_lock);
-
-	if (all_devices_probed && !atomic_fetch_inc(&once))
-		mpam_enable_once();
-}
-
-struct mpam_write_config_arg {
-	struct mpam_msc_ris *ris;
-	struct mpam_component *comp;
-	u16 partid;
-};
-
-static int __write_config(void *arg)
-{
-	struct mpam_write_config_arg *c = arg;
-
-	mpam_reprogram_ris_partid(c->ris, c->partid, &c->comp->cfg[c->partid]);
-
-	return 0;
-}
-
-/* TODO: split into write_config/sync_config */
-/* TODO: add config_dirty bitmap to drive sync_config */
-int mpam_apply_config(struct mpam_component *comp, u16 partid,
-		      struct mpam_config *cfg)
-{
-	struct mpam_write_config_arg arg;
-	struct mpam_msc_ris *ris;
-	int idx;
-
-	lockdep_assert_cpus_held();
-
-	if (!memcmp(&comp->cfg[partid], cfg, sizeof(*cfg)))
-		return 0;
-
-	comp->cfg[partid] = *cfg;
-	arg.comp = comp;
-	arg.partid = partid;
-
-	idx = srcu_read_lock(&mpam_srcu);
-	list_for_each_entry_rcu(ris, &comp->ris, comp_list) {
-		arg.ris = ris;
-		mutex_lock(&ris->msc->lock);
-		mpam_touch_msc(ris->msc, __write_config, &arg);
-		mutex_unlock(&ris->msc->lock);
-	}
-	srcu_read_unlock(&mpam_srcu, idx);
-
-	return 0;
-}
-
-static const struct of_device_id mpam_of_match[] = {
-	{ .compatible = "arm,mpam-msc", },
-	{},
-};
-MODULE_DEVICE_TABLE(of, mpam_of_match);
-
-static struct platform_driver mpam_msc_driver = {
-	.driver = {
-		.name = "mpam_msc",
-		.of_match_table = of_match_ptr(mpam_of_match),
-	},
-	.probe = mpam_msc_drv_probe,
-	.remove = mpam_msc_drv_remove,
-};
-
-/*
- * MSC that are hidden under caches are not created as platform devices
- * as there is no cache driver. Caches are also special-cased in
- * get_msc_affinity().
- */
-static void mpam_dt_create_foundling_msc(void)
-{
-	int err;
-	struct device_node *cache;
-
-	for_each_compatible_node(cache, NULL, "cache") {
-		err = of_platform_populate(cache, mpam_of_match, NULL, NULL);
-		if (err) {
-			pr_err("Failed to create MSC devices under caches\n");
-		}
-	}
-}
-
-static int __init arm64_mpam_register_cpus(void)
-{
-	u64 mpamidr = read_sysreg_s(SYS_MPAMIDR_EL1);
-	u16 partid_max = FIELD_GET(MPAMIDR_PARTID_MAX, mpamidr);
-	u8 pmg_max = FIELD_GET(MPAMIDR_PMG_MAX, mpamidr);
-
-	return mpam_register_requestor(partid_max, pmg_max);
-}
-
-static int mpam_on;
-
-static int __init mpam_on_setup(char *str)
-{
-	mpam_on = 1;
-	pr_info("MPAM is supported now");
-	return 1;
-}
-__setup("mpam_on", mpam_on_setup);
-
-static int __init mpam_msc_driver_init(void)
-{
-	if (!mpam_on)
-		return 0;
-
-	bool mpam_not_available = false;
-	int err;
-
-	if (!mpam_cpus_have_feature())
-		return -EOPNOTSUPP;
-
-	init_srcu_struct(&mpam_srcu);
-
-	if (!acpi_disabled)
-		mpam_current_machine = acpi_mpam_get_machine_type();
-	else
-		mpam_current_machine = mpam_dt_get_machine_type();
-
-	if (!acpi_disabled)
-		fw_num_msc = acpi_mpam_count_msc();
-	else
-		fw_num_msc = mpam_dt_count_msc();
-
-	if (fw_num_msc <= 0) {
-		pr_err("No MSC devices found in firmware\n");
-		return -EINVAL;
-	}
-
-	/*
-	 * Access MPAM system registers after MPAM ACPI table is parsed, since
-	 * some BIOSs disable MPAM system registers accessing but export MPAM in
-	 * ID_AA64PFR0_EL1. So we can only rely on the MPAM ACPI table to
-	 * determine whether MPAM feature is enabled.
-	 */
-	err = arm64_mpam_register_cpus();
-	if (err)
-		return err;
-
-	/*
-	 * If the MPAM CPU interface is not implemented, or reserved by
-	 * firmware, there is no point touching the rest of the hardware.
-	 */
-	spin_lock(&partid_max_lock);
-	if (!partid_max_init || (!mpam_partid_max && !mpam_pmg_max))
-		mpam_not_available = true;
-	spin_unlock(&partid_max_lock);
-
-	if (mpam_not_available)
-		return 0;
-
-	if (acpi_disabled)
-		mpam_dt_create_foundling_msc();
-
-	return platform_driver_register(&mpam_msc_driver);
-}
-/* Must occur after arm64_mpam_register_cpus() from arch_initcall() */
-subsys_initcall(mpam_msc_driver_init);
diff --git a/drivers/platform/mpam/mpam_internal.h b/drivers/platform/mpam/mpam_internal.h
deleted file mode 100644
index c46af5caa90c84b0f90a14efa1ae9d9e1f76c04a..0000000000000000000000000000000000000000
--- a/drivers/platform/mpam/mpam_internal.h
+++ /dev/null
@@ -1,587 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2021 Arm Ltd.
-
-#ifndef MPAM_INTERNAL_H
-#define MPAM_INTERNAL_H
-
-#include <linux/arm_mpam.h>
-#include <linux/atomic.h>
-#include <linux/cpumask.h>
-#include <linux/io.h>
-#include <linux/jump_label.h>
-#include <linux/mailbox_client.h>
-#include <linux/mutex.h>
-#include <linux/resctrl.h>
-#include <linux/sizes.h>
-#include <linux/srcu.h>
-
-DECLARE_STATIC_KEY_FALSE(mpam_enabled);
-
-/* Value to indicate the allocated monitor is derived from the RMID index. */
-#define USE_RMID_IDX	(U16_MAX + 1)
-
-/*
- * Only these event configuration bits are supported. MPAM can't know if
- * data is being written back, these will show up as a write.
- */
-#define READS_TO_LOCAL_MEM             BIT(0)
-#define NON_TEMP_WRITE_TO_LOCAL_MEM    BIT(2)
-#define MPAM_RESTRL_EVT_CONFIG_VALID	(READS_TO_LOCAL_MEM | NON_TEMP_WRITE_TO_LOCAL_MEM)
-
-static inline bool mpam_is_enabled(void)
-{
-	return static_branch_likely(&mpam_enabled);
-}
-
-struct mpam_msc
-{
-	/* member of mpam_all_msc */
-	struct list_head        glbl_list;
-
-	int			id;
-	struct platform_device *pdev;
-
-	/* Not modified after mpam_is_enabled() becomes true */
-	enum mpam_msc_iface	iface;
-	u32			pcc_subspace_id;
-	struct mbox_client	pcc_cl;
-	struct pcc_mbox_chan	*pcc_chan;
-	u32			nrdy_usec;
-	cpumask_t		accessibility;
-	bool			has_extd_esr;
-
-	int				reenable_error_ppi;
-	struct mpam_msc * __percpu	*error_dev_id;
-
-	atomic_t		online_refs;
-
-	struct mutex		lock;
-	bool			probed;
-	bool			error_irq_requested;
-	bool			error_irq_hw_enabled;
-	u16			partid_max;
-	u8			pmg_max;
-	unsigned long		ris_idxs[128 / BITS_PER_LONG];
-	u32			ris_max;
-
-	/* mpam_msc_ris of this component */
-	struct list_head	ris;
-
-	/*
-	 * part_sel_lock protects access to the MSC hardware registers that are
-	 * affected by MPAMCFG_PART_SEL. (including the ID registers)
-	 * If needed, take msc->lock first.
-	 */
-	spinlock_t		part_sel_lock;
-	spinlock_t		mon_sel_lock;
-	void __iomem *		mapped_hwpage;
-	size_t			mapped_hwpage_sz;
-};
-
-/*
- * When we compact the supported features, we don't care what they are.
- * Storing them as a bitmap makes life easy.
- */
-typedef u32 mpam_features_t;
-
-/* Bits for mpam_features_t */
-enum mpam_device_features {
-	mpam_feat_ccap_part = 0,
-	mpam_feat_cpor_part,
-	mpam_feat_mbw_part,
-	mpam_feat_mbw_min,
-	mpam_feat_mbw_max,
-	mpam_feat_mbw_prop,
-	mpam_feat_intpri_part,
-	mpam_feat_intpri_part_0_low,
-	mpam_feat_dspri_part,
-	mpam_feat_dspri_part_0_low,
-	mpam_feat_msmon,
-	mpam_feat_msmon_csu,
-	mpam_feat_msmon_csu_capture,
-	/*
-	 * Having mpam_feat_msmon_mbwu set doesn't mean the regular 31 bit MBWU
-	 * counter would be used. The exact counter used is decided based on the
-	 * status of mpam_feat_msmon_mbwu_l/mpam_feat_msmon_mbwu_lwd as well.
-	 */
-	mpam_feat_msmon_mbwu,
-	mpam_feat_msmon_mbwu_44counter,
-	mpam_feat_msmon_mbwu_63counter,
-	mpam_feat_msmon_mbwu_capture,
-	mpam_feat_msmon_mbwu_rwbw,
-	mpam_feat_msmon_capt,
-	mpam_feat_impl_msmon_mbwu,
-	mpam_feat_partid_nrw,
-	MPAM_FEATURE_LAST,
-};
-#define MPAM_ALL_FEATURES      ((1<<MPAM_FEATURE_LAST) - 1)
-
-struct mpam_props
-{
-	mpam_features_t		features;
-
-	u16			cpbm_wd;
-	u16			mbw_pbm_bits;
-	u8			bwa_wd;
-	u16			cmax_wd;
-	u16			intpri_wd;
-	u16			dspri_wd;
-	u16			num_csu_mon;
-	u16			num_mbwu_mon;
-};
-
-#define mpam_has_feature(_feat, x)	((1<<_feat) & (x)->features)
-#define mpam_set_feature(_feat, x)	((x)->features |= (1<<_feat))
-
-static inline void mpam_clear_feature(enum mpam_device_features feat,
-				      mpam_features_t *supported)
-{
-	*supported &= ~(1<<feat);
-}
-
-struct mpam_class
-{
-	/* mpam_components in this class */
-	struct list_head	components;
-
-	cpumask_t		affinity;
-
-	struct mpam_props	props;
-	u32			nrdy_usec;
-	u8			level;
-	enum mpam_class_types	type;
-
-	/* member of mpam_classes */
-	struct list_head	classes_list;
-
-	struct ida		ida_csu_mon;
-	struct ida		ida_mbwu_mon;
-};
-
-struct mpam_config {
-	/* Which configuration values are valid. 0 is used for reset */
-	mpam_features_t		features;
-
-	u32	cpbm;
-	u32	mbw_pbm;
-	u16	mbw_max;
-};
-
-struct mpam_component
-{
-	u32			comp_id;
-
-	/* mpam_msc_ris in this component */
-	struct list_head	ris;
-
-	cpumask_t		affinity;
-
-	/*
-	 * Array of configuration values, indexed by partid.
-	 * Read from cpuhp callbacks, hold the cpuhp lock when writing.
-	 */
-	struct mpam_config	*cfg;
-
-	/* member of mpam_class:components */
-	struct list_head	class_list;
-
-	/* parent: */
-	struct mpam_class	*class;
-};
-
-/* The values for MSMON_CFG_MBWU_FLT.RWBW */
-enum mon_filter_options {
-	COUNT_BOTH	= 0,
-	COUNT_WRITE	= 1,
-	COUNT_READ	= 2,
-};
-
-struct mon_cfg {
-	u16                     mon;
-	u8                      pmg;
-	bool                    match_pmg;
-	u32                     partid;
-	enum mon_filter_options opts;
-};
-
-/*
- * Changes to enabled and cfg are protected by the msc->lock.
- * Changes to reset_on_next_read, prev_val and correction are protected by the
- * msc's mon_sel_lock.
- */
-struct msmon_mbwu_state {
-	bool		enabled;
-	bool		reset_on_next_read;
-	struct mon_cfg	cfg;
-
-	/* The value last read from the hardware. Used to detect overflow. */
-	u64		prev_val;
-
-	/*
-	 * The value to add to the new reading to account for power management,
-	 * and shifts to trigger the overflow interrupt.
-	 */
-	u64		correction;
-};
-
-struct mpam_msc_ris {
-	u8			ris_idx;
-	u64			idr;
-	struct mpam_props	props;
-	bool			in_reset_state;
-
-	cpumask_t		affinity;
-
-	/* member of mpam_component:ris */
-	struct list_head	comp_list;
-
-	/* member of mpam_msc:ris */
-	struct list_head	msc_list;
-
-	/* parents: */
-	struct mpam_msc		*msc;
-	struct mpam_component	*comp;
-
-	/* msmon mbwu configuration is preserved over reset */
-	struct msmon_mbwu_state	*mbwu_state;
-};
-
-struct mpam_resctrl_dom {
-	struct mpam_component	*comp;
-	struct rdt_domain	resctrl_dom;
-
-	u32			mbm_local_evt_cfg;
-};
-
-struct mpam_resctrl_res {
-	struct mpam_class	*class;
-	struct rdt_resource	resctrl_res;
-};
-
-static inline int mpam_alloc_csu_mon(struct mpam_class *class)
-{
-	struct mpam_props *cprops = &class->props;
-
-	if (!mpam_has_feature(mpam_feat_msmon_csu, cprops))
-		return -EOPNOTSUPP;
-
-	return ida_alloc_range(&class->ida_csu_mon, 0, cprops->num_csu_mon - 1,
-                                GFP_KERNEL);
-}
-
-static inline void mpam_free_csu_mon(struct mpam_class *class, int csu_mon)
-{
-	ida_free(&class->ida_csu_mon, csu_mon);
-}
-
-static inline int mpam_alloc_mbwu_mon(struct mpam_class *class)
-{
-	struct mpam_props *cprops = &class->props;
-
-	if (!mpam_has_feature(mpam_feat_msmon_mbwu, cprops))
-		return -EOPNOTSUPP;
-
-	return ida_alloc_range(&class->ida_mbwu_mon, 0,
-			       cprops->num_mbwu_mon - 1, GFP_KERNEL);
-}
-
-static inline void mpam_free_mbwu_mon(struct mpam_class *class, int mbwu_mon)
-{
-	ida_free(&class->ida_mbwu_mon, mbwu_mon);
-}
-
-/* List of all classes */
-extern struct list_head mpam_classes;
-extern struct srcu_struct mpam_srcu;
-
-/* System wide partid/pmg values */
-extern u16 mpam_partid_max;
-extern u8 mpam_pmg_max;
-
-/* Scheduled work callback to enable mpam once all MSC have been probed */
-void mpam_enable(struct work_struct *work);
-void mpam_disable(struct work_struct *work);
-
-void mpam_reset_class(struct mpam_class *class);
-
-int mpam_apply_config(struct mpam_component *comp, u16 partid,
-		      struct mpam_config *cfg);
-
-int mpam_msmon_read(struct mpam_component *comp, struct mon_cfg *ctx,
-		    enum mpam_device_features, u64 *val);
-void mpam_msmon_reset_mbwu(struct mpam_component *comp, struct mon_cfg *ctx);
-void mpam_msmon_reset_all_mbwu(struct mpam_component *comp);
-
-int mpam_resctrl_online_cpu(unsigned int cpu);
-int mpam_resctrl_offline_cpu(unsigned int cpu);
-
-int mpam_resctrl_setup(void);
-void mpam_resctrl_exit(void);
-
-/*
- * MPAM MSCs have the following register layout. See:
- * Arm Architecture Reference Manual Supplement - Memory System Resource
- * Partitioning and Monitoring (MPAM), for Armv8-A. DDI 0598A.a
- */
-#define MPAM_ARCHITECTURE_V1    0x10
-#define MPAM_MIN_MMIO_SIZE      0x3000
-
-/* Memory mapped control pages: */
-/* ID Register offsets in the memory mapped page */
-#define MPAMF_IDR               0x0000  /* features id register */
-#define MPAMF_MSMON_IDR         0x0080  /* performance monitoring features */
-#define MPAMF_IMPL_IDR          0x0028  /* imp-def partitioning */
-#define MPAMF_CPOR_IDR          0x0030  /* cache-portion partitioning */
-#define MPAMF_CCAP_IDR          0x0038  /* cache-capacity partitioning */
-#define MPAMF_MBW_IDR           0x0040  /* mem-bw partitioning */
-#define MPAMF_PRI_IDR           0x0048  /* priority partitioning */
-#define MPAMF_CSUMON_IDR        0x0088  /* cache-usage monitor */
-#define MPAMF_MBWUMON_IDR       0x0090  /* mem-bw usage monitor */
-#define MPAMF_PARTID_NRW_IDR    0x0050  /* partid-narrowing */
-#define MPAMF_IIDR              0x0018  /* implementer id register */
-#define MPAMF_AIDR              0x0020  /* architectural id register */
-
-/* Configuration and Status Register offsets in the memory mapped page */
-#define MPAMCFG_PART_SEL        0x0100  /* partid to configure: */
-#define MPAMCFG_CPBM            0x1000  /* cache-portion config */
-#define MPAMCFG_CMAX            0x0108  /* cache-capacity config */
-#define MPAMCFG_MBW_MIN         0x0200  /* min mem-bw config */
-#define MPAMCFG_MBW_MAX         0x0208  /* max mem-bw config */
-#define MPAMCFG_MBW_WINWD       0x0220  /* mem-bw accounting window config */
-#define MPAMCFG_MBW_PBM         0x2000  /* mem-bw portion bitmap config */
-#define MPAMCFG_PRI             0x0400  /* priority partitioning config */
-#define MPAMCFG_MBW_PROP        0x0500  /* mem-bw stride config */
-#define MPAMCFG_INTPARTID       0x0600  /* partid-narrowing config */
-
-#define MSMON_CFG_MON_SEL       0x0800  /* monitor selector */
-#define MSMON_CFG_CSU_FLT       0x0810  /* cache-usage monitor filter */
-#define MSMON_CFG_CSU_CTL       0x0818  /* cache-usage monitor config */
-#define MSMON_CFG_MBWU_FLT      0x0820  /* mem-bw monitor filter */
-#define MSMON_CFG_MBWU_CTL      0x0828  /* mem-bw monitor config */
-#define MSMON_CSU               0x0840  /* current cache-usage */
-#define MSMON_CSU_CAPTURE       0x0848  /* last cache-usage value captured */
-#define MSMON_MBWU              0x0860  /* current mem-bw usage value */
-#define MSMON_MBWU_CAPTURE      0x0868  /* last mem-bw value captured */
-#define MSMON_MBWU_L		0x0880  /* current long mem-bw usage value */
-#define MSMON_MBWU_CAPTURE_L	0x0890  /* last long mem-bw value captured */
-#define MSMON_CAPT_EVNT         0x0808  /* signal a capture event */
-#define MPAMF_ESR               0x00F8  /* error status register */
-#define MPAMF_ECR               0x00F0  /* error control register */
-
-/* MPAMF_IDR - MPAM features ID register */
-#define MPAMF_IDR_PARTID_MAX            GENMASK(15, 0)
-#define MPAMF_IDR_PMG_MAX               GENMASK(23, 16)
-#define MPAMF_IDR_HAS_CCAP_PART         BIT(24)
-#define MPAMF_IDR_HAS_CPOR_PART         BIT(25)
-#define MPAMF_IDR_HAS_MBW_PART          BIT(26)
-#define MPAMF_IDR_HAS_PRI_PART          BIT(27)
-#define MPAMF_IDR_HAS_EXT               BIT(28)
-#define MPAMF_IDR_HAS_IMPL_IDR          BIT(29)
-#define MPAMF_IDR_HAS_MSMON             BIT(30)
-#define MPAMF_IDR_HAS_PARTID_NRW        BIT(31)
-#define MPAMF_IDR_HAS_RIS               BIT(32)
-#define MPAMF_IDR_HAS_EXT_ESR           BIT(38)
-#define MPAMF_IDR_HAS_ESR               BIT(39)
-#define MPAMF_IDR_RIS_MAX               GENMASK(59, 56)
-
-
-/* MPAMF_MSMON_IDR - MPAM performance monitoring ID register */
-#define MPAMF_MSMON_IDR_MSMON_CSU               BIT(16)
-#define MPAMF_MSMON_IDR_MSMON_MBWU              BIT(17)
-#define MPAMF_MSMON_IDR_HAS_LOCAL_CAPT_EVNT     BIT(31)
-
-/* MPAMF_CPOR_IDR - MPAM features cache portion partitioning ID register */
-#define MPAMF_CPOR_IDR_CPBM_WD                  GENMASK(15, 0)
-
-/* MPAMF_CCAP_IDR - MPAM features cache capacity partitioning ID register */
-#define MPAMF_CCAP_IDR_CMAX_WD                  GENMASK(5, 0)
-
-/* MPAMF_MBW_IDR - MPAM features memory bandwidth partitioning ID register */
-#define MPAMF_MBW_IDR_BWA_WD            GENMASK(5, 0)
-#define MPAMF_MBW_IDR_HAS_MIN           BIT(10)
-#define MPAMF_MBW_IDR_HAS_MAX           BIT(11)
-#define MPAMF_MBW_IDR_HAS_PBM           BIT(12)
-#define MPAMF_MBW_IDR_HAS_PROP          BIT(13)
-#define MPAMF_MBW_IDR_WINDWR            BIT(14)
-#define MPAMF_MBW_IDR_BWPBM_WD          GENMASK(28, 16)
-
-/* MPAMF_PRI_IDR - MPAM features priority partitioning ID register */
-#define MPAMF_PRI_IDR_HAS_INTPRI        BIT(0)
-#define MPAMF_PRI_IDR_INTPRI_0_IS_LOW   BIT(1)
-#define MPAMF_PRI_IDR_INTPRI_WD         GENMASK(9, 4)
-#define MPAMF_PRI_IDR_HAS_DSPRI         BIT(16)
-#define MPAMF_PRI_IDR_DSPRI_0_IS_LOW    BIT(17)
-#define MPAMF_PRI_IDR_DSPRI_WD          GENMASK(25, 20)
-
-/* MPAMF_CSUMON_IDR - MPAM cache storage usage monitor ID register */
-#define MPAMF_CSUMON_IDR_NUM_MON        GENMASK(15, 0)
-#define MPAMF_CSUMON_IDR_HAS_CAPTURE    BIT(31)
-
-/* MPAMF_MBWUMON_IDR - MPAM memory bandwidth usage monitor ID register */
-#define MPAMF_MBWUMON_IDR_NUM_MON       GENMASK(15, 0)
-#define MPAMF_MBWUMON_IDR_HAS_RWBW      BIT(28)
-#define MPAMF_MBWUMON_IDR_LWD           BIT(29)
-#define MPAMF_MBWUMON_IDR_HAS_LONG      BIT(30)
-#define MPAMF_MBWUMON_IDR_HAS_CAPTURE   BIT(31)
-
-/* MPAMF_PARTID_NRW_IDR - MPAM PARTID narrowing ID register */
-#define MPAMF_PARTID_NRW_IDR_INTPARTID_MAX      GENMASK(15, 0)
-
-/* MPAMF_IIDR - MPAM implementation ID register */
-#define MPAMF_IIDR_PRODUCTID    GENMASK(31, 20)
-#define MPAMF_IIDR_PRODUCTID_SHIFT	20
-#define MPAMF_IIDR_VARIANT      GENMASK(19, 16)
-#define MPAMF_IIDR_VARIANT_SHIFT	16
-#define MPAMF_IIDR_REVISON      GENMASK(15, 12)
-#define MPAMF_IIDR_REVISON_SHIFT	12
-#define MPAMF_IIDR_IMPLEMENTER  GENMASK(11, 0)
-#define MPAMF_IIDR_IMPLEMENTER_SHIFT	0
-
-/* MPAMF_AIDR - MPAM architecture ID register */
-#define MPAMF_AIDR_ARCH_MAJOR_REV       GENMASK(7, 4)
-#define MPAMF_AIDR_ARCH_MINOR_REV       GENMASK(3, 0)
-
-/* MPAMCFG_PART_SEL - MPAM partition configuration selection register */
-#define MPAMCFG_PART_SEL_PARTID_SEL     GENMASK(15, 0)
-#define MPAMCFG_PART_SEL_INTERNAL       BIT(16)
-#define MPAMCFG_PART_SEL_RIS            GENMASK(27, 24)
-
-/* MPAMCFG_CMAX - MPAM cache portion bitmap partition configuration register */
-#define MPAMCFG_CMAX_CMAX               GENMASK(15, 0)
-
-/*
- * MPAMCFG_MBW_MIN - MPAM memory minimum bandwidth partitioning configuration
- *                   register
- */
-#define MPAMCFG_MBW_MIN_MIN             GENMASK(15, 0)
-
-/*
- * MPAMCFG_MBW_MAX - MPAM memory maximum bandwidth partitioning configuration
- *                   register
- */
-#define MPAMCFG_MBW_MAX_MAX             GENMASK(15, 0)
-#define MPAMCFG_MBW_MAX_HARDLIM         BIT(31)
-
-/*
- * MPAMCFG_MBW_WINWD - MPAM memory bandwidth partitioning window width
- *                     register
- */
-#define MPAMCFG_MBW_WINWD_US_FRAC       GENMASK(7, 0)
-#define MPAMCFG_MBW_WINWD_US_INT        GENMASK(23, 8)
-
-
-/* MPAMCFG_PRI - MPAM priority partitioning configuration register */
-#define MPAMCFG_PRI_INTPRI              GENMASK(15, 0)
-#define MPAMCFG_PRI_DSPRI               GENMASK(31, 16)
-
-/*
- * MPAMCFG_MBW_PROP - Memory bandwidth proportional stride partitioning
- *                    configuration register
- */
-#define MPAMCFG_MBW_PROP_STRIDEM1       GENMASK(15, 0)
-#define MPAMCFG_MBW_PROP_EN             BIT(31)
-
-/*
- * MPAMCFG_INTPARTID - MPAM internal partition narrowing configuration register
- */
-#define MPAMCFG_INTPARTID_INTPARTID     GENMASK(15, 0)
-#define MPAMCFG_INTPARTID_INTERNAL      BIT(16)
-
-/* MSMON_CFG_MON_SEL - Memory system performance monitor selection register */
-#define MSMON_CFG_MON_SEL_MON_SEL       GENMASK(7, 0)
-#define MSMON_CFG_MON_SEL_RIS           GENMASK(27, 24)
-
-/* MPAMF_ESR - MPAM Error Status Register */
-#define MPAMF_ESR_PARTID_OR_MON GENMASK(15, 0)
-#define MPAMF_ESR_PMG           GENMASK(23, 16)
-#define MPAMF_ESR_ERRCODE       GENMASK(27, 24)
-#define MPAMF_ESR_OVRWR         BIT(31)
-#define MPAMF_ESR_RIS           GENMASK(35, 32)
-
-/* MPAMF_ECR - MPAM Error Control Register */
-#define MPAMF_ECR_INTEN         BIT(0)
-
-/* Error conditions in accessing memory mapped registers */
-#define MPAM_ERRCODE_NONE                       0
-#define MPAM_ERRCODE_PARTID_SEL_RANGE           1
-#define MPAM_ERRCODE_REQ_PARTID_RANGE           2
-#define MPAM_ERRCODE_MSMONCFG_ID_RANGE          3
-#define MPAM_ERRCODE_REQ_PMG_RANGE              4
-#define MPAM_ERRCODE_MONITOR_RANGE              5
-#define MPAM_ERRCODE_INTPARTID_RANGE            6
-#define MPAM_ERRCODE_UNEXPECTED_INTERNAL        7
-
-/*
- * MSMON_CFG_CSU_FLT - Memory system performance monitor configure cache storage
- *                    usage monitor filter register
- */
-#define MSMON_CFG_CSU_FLT_PARTID       GENMASK(15, 0)
-#define MSMON_CFG_CSU_FLT_PMG          GENMASK(23, 16)
-
-/*
- * MSMON_CFG_CSU_CTL - Memory system performance monitor configure cache storage
- *                    usage monitor control register
- * MSMON_CFG_MBWU_CTL - Memory system performance monitor configure memory
- *                     bandwidth usage monitor control register
- */
-#define MSMON_CFG_x_CTL_TYPE           GENMASK(7, 0)
-#define MSMON_CFG_x_CTL_MATCH_PARTID   BIT(16)
-#define MSMON_CFG_x_CTL_MATCH_PMG      BIT(17)
-#define MSMON_CFG_x_CTL_SCLEN          BIT(19)
-#define MSMON_CFG_x_CTL_SUBTYPE        GENMASK(23, 20)
-#define MSMON_CFG_x_CTL_OFLOW_FRZ      BIT(24)
-#define MSMON_CFG_x_CTL_OFLOW_INTR     BIT(25)
-#define MSMON_CFG_x_CTL_OFLOW_STATUS   BIT(26)
-#define MSMON_CFG_x_CTL_CAPT_RESET     BIT(27)
-#define MSMON_CFG_x_CTL_CAPT_EVNT      GENMASK(30, 28)
-#define MSMON_CFG_x_CTL_EN             BIT(31)
-
-#define MSMON_CFG_MBWU_CTL_TYPE_MBWU			0x42
-#define MSMON_CFG_MBWU_CTL_TYPE_CSU			0x43
-
-#define MSMON_CFG_MBWU_CTL_SUBTYPE_NONE                 0
-#define MSMON_CFG_MBWU_CTL_SUBTYPE_READ                 1
-#define MSMON_CFG_MBWU_CTL_SUBTYPE_WRITE                2
-#define MSMON_CFG_MBWU_CTL_SUBTYPE_BOTH                 3
-
-#define MSMON_CFG_MBWU_CTL_SUBTYPE_MAX                  3
-#define MSMON_CFG_MBWU_CTL_SUBTYPE_MASK                 0x3
-
-/*
- * MSMON_CFG_MBWU_FLT - Memory system performance monitor configure memory
- *                     bandwidth usage monitor filter register
- */
-#define MSMON_CFG_MBWU_FLT_PARTID               GENMASK(15, 0)
-#define MSMON_CFG_MBWU_FLT_PMG                  GENMASK(23, 16)
-#define MSMON_CFG_MBWU_FLT_RWBW                 GENMASK(31, 30)
-
-/*
- * MSMON_CSU - Memory system performance monitor cache storage usage monitor
- *            register
- * MSMON_CSU_CAPTURE -  Memory system performance monitor cache storage usage
- *                     capture register
- * MSMON_MBWU  - Memory system performance monitor memory bandwidth usage
- *               monitor register
- * MSMON_MBWU_CAPTURE - Memory system performance monitor memory bandwidth usage
- *                     capture register
- */
-#define MSMON___VALUE          GENMASK(30, 0)
-#define MSMON___NRDY           BIT(31)
-#define MSMON___NRDY_L		BIT(63)
-#define MSMON___L_VALUE		GENMASK(43, 0)
-#define MSMON___LWD_VALUE	GENMASK(62, 0)
-
-/*
- * MSMON_CAPT_EVNT - Memory system performance monitoring capture event
- *                  generation register
- */
-#define MSMON_CAPT_EVNT_NOW    BIT(0)
-
-/* Used for PTG Yitian710 specific MB monitoring feature */
-#define MBWU_MASK GENMASK(23, 0)
-#define MBWU_WINWD_MAX GENMASK(22, 0)
-#define MBWU_GET(v) ((v) & MBWU_MASK)
-#define MPAMF_CUST_MBWC_OFFSET 0x08
-#define MPAMF_CUST_WINDW_OFFSET 0x0C
-
-#endif /* MPAM_INTERNAL_H */
diff --git a/drivers/platform/mpam/mpam_resctrl.c b/drivers/platform/mpam/mpam_resctrl.c
deleted file mode 100644
index ee1a66a6c09924426f87e0350c3c555ff6682c39..0000000000000000000000000000000000000000
--- a/drivers/platform/mpam/mpam_resctrl.c
+++ /dev/null
@@ -1,1276 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2021 Arm Ltd.
-
-#define pr_fmt(fmt) "mpam: resctrl: " fmt
-
-#include <linux/arm_mpam.h>
-#include <linux/cacheinfo.h>
-#include <linux/cpu.h>
-#include <linux/cpumask.h>
-#include <linux/errno.h>
-#include <linux/limits.h>
-#include <linux/list.h>
-#include <linux/printk.h>
-#include <linux/rculist.h>
-#include <linux/resctrl.h>
-#include <linux/slab.h>
-#include <linux/types.h>
-#include <linux/wait.h>
-
-#include <asm/mpam.h>
-
-#include "mpam_internal.h"
-
-u64 mpam_resctrl_default_group;
-
-DECLARE_WAIT_QUEUE_HEAD(resctrl_mon_ctx_waiters);
-
-/*
- * The classes we've picked to map to resctrl resources.
- * Class pointer may be NULL.
- */
-static struct mpam_resctrl_res mpam_resctrl_exports[RDT_NUM_RESOURCES];
-
-static bool exposed_alloc_capable;
-static bool exposed_mon_capable;
-static struct mpam_class *mbm_local_class;
-static struct mpam_class *mbm_total_class;
-static struct mpam_class *mbm_bps_class;
-
-/*
- * MPAM emulates CDP by setting different PARTID in the I/D fields of MPAM1_EL1.
- * This applies globally to all traffic the CPU generates.
- */
-static bool cdp_enabled;
-
-/*
- * If resctrl_init() succeeded, resctrl_exit() can be used to remove support
- * for the filesystem in the event of an error.
- */
-static bool resctrl_enabled;
-
-/*
- * mpam_resctrl_pick_caches() needs to know the size of the caches. cacheinfo
- * populates this from a device_initcall(). mpam_resctrl_setup() must wait.
- */
-static bool cacheinfo_ready;
-static DECLARE_WAIT_QUEUE_HEAD(wait_cacheinfo_ready);
-
-/* A dummy mon context to use when the monitors were allocated up front */
-u32 __mon_is_rmid_idx = USE_RMID_IDX;
-void *mon_is_rmid_idx = &__mon_is_rmid_idx;
-
-bool resctrl_arch_alloc_capable(void)
-{
-	return exposed_alloc_capable;
-}
-
-bool resctrl_arch_mon_capable(void)
-{
-	return exposed_mon_capable;
-}
-
-bool resctrl_arch_is_mbm_local_enabled(void)
-{
-	return mbm_local_class;
-}
-
-bool resctrl_arch_is_mbm_total_enabled(void)
-{
-	return mbm_total_class;
-}
-
-bool resctrl_arch_is_mbm_bps_enabled(void)
-{
-	return mbm_bps_class;
-}
-
-bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level rid)
-{
-	switch (rid) {
-	case RDT_RESOURCE_L2:
-	case RDT_RESOURCE_L3:
-		return cdp_enabled;
-	case RDT_RESOURCE_MBA:
-	default:
-		/*
-		 * x86's MBA control doesn't support CDP, so user-space doesn't
-		 * expect it.
-		 */
-		return false;
-	}
-}
-
-int resctrl_arch_set_cdp_enabled(enum resctrl_res_level ignored, bool enable)
-{
-	u64 regval;
-	u32 partid, partid_i, partid_d;
-
-	cdp_enabled = enable;
-
-	partid = RESCTRL_RESERVED_CLOSID;
-
-	if (enable) {
-		partid_d = resctrl_get_config_index(partid, CDP_CODE);
-		partid_i = resctrl_get_config_index(partid, CDP_DATA);
-		regval = FIELD_PREP(MPAM_SYSREG_PARTID_D, partid_d) |
-			 FIELD_PREP(MPAM_SYSREG_PARTID_I, partid_i);
-
-	} else {
-		regval = FIELD_PREP(MPAM_SYSREG_PARTID_D, partid) |
-			 FIELD_PREP(MPAM_SYSREG_PARTID_I, partid);
-	}
-
-	WRITE_ONCE(mpam_resctrl_default_group, regval);
-
-	return 0;
-}
-
-static bool mpam_resctrl_hide_cdp(enum resctrl_res_level rid)
-{
-	return cdp_enabled && !resctrl_arch_get_cdp_enabled(rid);
-}
-
-/*
- * MSC may raise an error interrupt if it sees an out or range partid/pmg,
- * and go on to truncate the value. Regardless of what the hardware supports,
- * only the system wide safe value is safe to use.
- */
-u32 resctrl_arch_get_num_closid(struct rdt_resource *ignored)
-{
-	return mpam_partid_max + 1;
-}
-
-u32 resctrl_arch_system_num_rmid_idx(void)
-{
-	u8 closid_shift = fls(mpam_pmg_max);
-	u32 num_partid = resctrl_arch_get_num_closid(NULL);
-
-	return num_partid << closid_shift;
-}
-
-u32 resctrl_arch_rmid_idx_encode(u32 closid, u32 rmid)
-{
-	u8 closid_shift = fls(mpam_pmg_max);
-
-	if (WARN_ON_ONCE(closid_shift > 8))
-		closid_shift = 8;
-
-	return (closid << closid_shift) | rmid;
-}
-
-void resctrl_arch_rmid_idx_decode(u32 idx, u32 *closid, u32 *rmid)
-{
-	u8 closid_shift = fls(mpam_pmg_max);
-	u32 pmg_mask = ~(~0 << closid_shift);
-
-	if (WARN_ON_ONCE(closid_shift > 8))
-		closid_shift = 8;
-
-	*closid = idx >> closid_shift;
-	*rmid = idx & pmg_mask;
-}
-
-void resctrl_arch_sched_in(struct task_struct *tsk)
-{
-	lockdep_assert_preemption_disabled();
-
-	mpam_thread_switch(tsk);
-}
-
-void resctrl_arch_set_cpu_default_closid_rmid(int cpu, u32 closid, u32 pmg)
-{
-	if (WARN_ON_ONCE(closid > U16_MAX) || WARN_ON_ONCE(pmg > U8_MAX))
-		return;
-
-	if (!cdp_enabled) {
-		mpam_set_cpu_defaults(cpu, closid, closid, pmg, pmg);
-	} else {
-		/*
-		 * When CDP is enabled, resctrl halves the closid range and we
-		 * use odd/even partid for one closid.
-		 */
-		u32 partid_d = resctrl_get_config_index(closid, CDP_DATA);
-		u32 partid_i = resctrl_get_config_index(closid, CDP_CODE);
-
-		mpam_set_cpu_defaults(cpu, partid_d, partid_i, pmg, pmg);
-	}
-}
-
-void resctrl_arch_sync_cpu_defaults(void *info)
-{
-	struct resctrl_cpu_sync *r = info;
-
-	lockdep_assert_preemption_disabled();
-
-	if (r) {
-		resctrl_arch_set_cpu_default_closid_rmid(smp_processor_id(),
-							 r->closid, r->rmid);
-	}
-
-	resctrl_arch_sched_in(current);
-}
-
-void resctrl_arch_set_closid_rmid(struct task_struct *tsk, u32 closid, u32 rmid)
-{
-
-
-	if (WARN_ON_ONCE(closid > U16_MAX) || WARN_ON_ONCE(rmid > U8_MAX))
-		return;
-
-	if (!cdp_enabled) {
-		mpam_set_task_partid_pmg(tsk, closid, closid, rmid, rmid);
-	} else {
-		u32 partid_d = resctrl_get_config_index(closid, CDP_DATA);
-		u32 partid_i = resctrl_get_config_index(closid, CDP_CODE);
-
-		mpam_set_task_partid_pmg(tsk, partid_d, partid_i, rmid, rmid);
-	}
-}
-
-bool resctrl_arch_match_closid(struct task_struct *tsk, u32 closid)
-{
-	u64 regval = mpam_get_regval(tsk);
-	u32 tsk_closid = FIELD_GET(MPAM_SYSREG_PARTID_D, regval);
-
-	if (cdp_enabled)
-		tsk_closid >>= 1;
-
-	return tsk_closid == closid;
-}
-
-/* The task's pmg is not unique, the partid must be considered too */
-bool resctrl_arch_match_rmid(struct task_struct *tsk, u32 closid, u32 rmid)
-{
-	u64 regval = mpam_get_regval(tsk);
-	u32 tsk_closid = FIELD_GET(MPAM_SYSREG_PARTID_D, regval);
-	u32 tsk_rmid = FIELD_GET(MPAM_SYSREG_PMG_D, regval);
-
-	if (cdp_enabled)
-		tsk_closid >>= 1;
-
-	return (tsk_closid == closid) && (tsk_rmid == rmid);
-}
-
-struct rdt_resource *resctrl_arch_get_resource(enum resctrl_res_level l)
-{
-	if (l >= RDT_NUM_RESOURCES)
-		return NULL;
-
-	return &mpam_resctrl_exports[l].resctrl_res;
-}
-
-static void *resctrl_arch_mon_ctx_alloc_no_wait(struct rdt_resource *r,
-						int evtid)
-{
-	struct mpam_resctrl_res *res;
-	u32 *ret = kmalloc(sizeof(*ret), GFP_KERNEL);
-
-	if (!ret)
-		return ERR_PTR(-ENOMEM);
-
-	switch (evtid) {
-	case QOS_L3_OCCUP_EVENT_ID:
-		res = container_of(r, struct mpam_resctrl_res, resctrl_res);
-
-		*ret = mpam_alloc_csu_mon(res->class);
-		return ret;
-	case QOS_L3_MBM_LOCAL_EVENT_ID:
-	case QOS_L3_MBM_TOTAL_EVENT_ID:
-		return mon_is_rmid_idx;
-	case QOS_MC_MBM_BPS_EVENT_ID:
-		if (mpam_current_machine == MPAM_YITIAN710)
-			return mon_is_rmid_idx;
-		return ERR_PTR(-EOPNOTSUPP);
-	}
-
-	return ERR_PTR(-EOPNOTSUPP);
-}
-
-void *resctrl_arch_mon_ctx_alloc(struct rdt_resource *r, int evtid)
-{
-	DEFINE_WAIT(wait);
-	void *ret;
-
-	might_sleep();
-
-	do {
-		prepare_to_wait(&resctrl_mon_ctx_waiters, &wait,
-				TASK_INTERRUPTIBLE);
-		ret = resctrl_arch_mon_ctx_alloc_no_wait(r, evtid);
-		if (PTR_ERR(ret) == -ENOSPC)
-			schedule();
-	} while (PTR_ERR(ret) == -ENOSPC && !signal_pending(current));
-	finish_wait(&resctrl_mon_ctx_waiters, &wait);
-
-	return ret;
-}
-
-void resctrl_arch_mon_ctx_free(struct rdt_resource *r, int evtid,
-			       void *arch_mon_ctx)
-{
-	struct mpam_resctrl_res *res;
-	u32 mon = *(u32 *)arch_mon_ctx;
-
-	if (mon == USE_RMID_IDX)
-		return;
-	kfree(arch_mon_ctx);
-	arch_mon_ctx = NULL;
-
-	res = container_of(r, struct mpam_resctrl_res, resctrl_res);
-
-	switch (evtid) {
-	case QOS_L3_OCCUP_EVENT_ID:
-		mpam_free_csu_mon(res->class, mon);
-		wake_up(&resctrl_mon_ctx_waiters);
-		return;
-	case QOS_L3_MBM_TOTAL_EVENT_ID:
-	case QOS_L3_MBM_LOCAL_EVENT_ID:
-	case QOS_MC_MBM_BPS_EVENT_ID:
-		return;
-	}
-}
-
-static enum mon_filter_options resctrl_evt_config_to_mpam(u32 local_evt_cfg)
-{
-	switch (local_evt_cfg) {
-	case READS_TO_LOCAL_MEM:
-		return COUNT_READ;
-	case NON_TEMP_WRITE_TO_LOCAL_MEM:
-		return COUNT_WRITE;
-	default:
-		return COUNT_BOTH;
-	}
-}
-
-int resctrl_arch_rmid_read(struct rdt_resource	*r, struct rdt_domain *d,
-			   u32 closid, u32 rmid, enum resctrl_event_id eventid,
-			   u64 *val, void *arch_mon_ctx)
-{
-	int err;
-	u64 cdp_val;
-	struct mon_cfg cfg;
-	struct mpam_resctrl_dom *dom;
-	u32 mon = *(u32 *)arch_mon_ctx;
-	enum mpam_device_features type;
-
-	resctrl_arch_rmid_read_context_check();
-
-	dom = container_of(d, struct mpam_resctrl_dom, resctrl_dom);
-
-	switch (eventid) {
-	case QOS_L3_OCCUP_EVENT_ID:
-		type = mpam_feat_msmon_csu;
-		break;
-	case QOS_L3_MBM_LOCAL_EVENT_ID:
-	case QOS_L3_MBM_TOTAL_EVENT_ID:
-		type = mpam_feat_msmon_mbwu;
-		break;
-	case QOS_MC_MBM_BPS_EVENT_ID:
-		if (mpam_current_machine == MPAM_YITIAN710)
-			type = mpam_feat_impl_msmon_mbwu;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	if (mon == USE_RMID_IDX)
-		cfg.mon = resctrl_arch_rmid_idx_encode(closid, rmid);
-	else
-		cfg.mon = mon;
-
-	cfg.match_pmg = true;
-	cfg.pmg = rmid;
-	cfg.opts = resctrl_evt_config_to_mpam(dom->mbm_local_evt_cfg);
-
-	if (cdp_enabled) {
-		cfg.partid = closid << 1;
-		err = mpam_msmon_read(dom->comp, &cfg, type, val);
-		if (err)
-			return err;
-
-		cfg.partid += 1;
-		err = mpam_msmon_read(dom->comp, &cfg, type, &cdp_val);
-		if (!err)
-			*val += cdp_val;
-	} else {
-		cfg.partid = closid;
-		err = mpam_msmon_read(dom->comp, &cfg, type, val);
-	}
-
-	return err;
-}
-
-void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d,
-			     u32 closid, u32 rmid, enum resctrl_event_id eventid)
-{
-	struct mon_cfg cfg;
-	struct mpam_resctrl_dom *dom;
-
-	if (eventid != QOS_L3_MBM_LOCAL_EVENT_ID)
-		return;
-
-	cfg.mon = resctrl_arch_rmid_idx_encode(closid, rmid);
-	cfg.match_pmg = true;
-	cfg.pmg = rmid;
-
-	dom = container_of(d, struct mpam_resctrl_dom, resctrl_dom);
-
-	if (cdp_enabled) {
-		cfg.partid = closid << 1;
-		mpam_msmon_reset_mbwu(dom->comp, &cfg);
-
-		cfg.partid += 1;
-		mpam_msmon_reset_mbwu(dom->comp, &cfg);
-	} else {
-		cfg.partid = closid;
-		mpam_msmon_reset_mbwu(dom->comp, &cfg);
-	}
-}
-
-/*
- * The rmid realloc threshold should be for the smallest cache exposed to
- * resctrl.
- */
-static void update_rmid_limits(unsigned int size)
-{
-	u32 num_unique_pmg = resctrl_arch_system_num_rmid_idx();
-
-	if (WARN_ON_ONCE(!size))
-		return;
-
-	if (resctrl_rmid_realloc_limit && size > resctrl_rmid_realloc_limit)
-		return;
-
-	resctrl_rmid_realloc_limit = size;
-	resctrl_rmid_realloc_threshold = size / num_unique_pmg;
-}
-
-static bool cache_has_usable_cpor(struct mpam_class *class)
-{
-	struct mpam_props *cprops = &class->props;
-
-	if (!mpam_has_feature(mpam_feat_cpor_part, cprops))
-		return false;
-
-	/* TODO: Scaling is not yet supported */
-	return (class->props.cpbm_wd <= RESCTRL_MAX_CBM);
-}
-
-static bool cache_has_usable_csu(struct mpam_class *class)
-{
-	struct mpam_props *cprops;
-
-	if (!class)
-		return false;
-
-	cprops = &class->props;
-
-	if (!mpam_has_feature(mpam_feat_msmon_csu, cprops))
-		return false;
-
-	/*
-	 * CSU counters settle on the value, so we can get away with
-	 * having only one.
-	 */
-	if (!cprops->num_csu_mon)
-		return false;
-
-	return (mpam_partid_max > 1) || (mpam_pmg_max != 0);
-}
-
-bool resctrl_arch_is_llc_occupancy_enabled(void)
-{
-	return cache_has_usable_csu(mpam_resctrl_exports[RDT_RESOURCE_L3].class);
-}
-
-static bool class_has_usable_mbwu(struct mpam_class *class)
-{
-	struct mpam_props *cprops = &class->props;
-
-	if (!mpam_has_feature(mpam_feat_msmon_mbwu, cprops))
-		return false;
-
-	/*
-	 * resctrl expects the bandwidth counters to be free running,
-	 * which means we need as many monitors as resctrl has
-	 * control/monitor groups.
-	 */
-	if (cprops->num_mbwu_mon < resctrl_arch_system_num_rmid_idx())
-		return false;
-
-	return (mpam_partid_max > 1) || (mpam_pmg_max != 0);
-}
-
-static bool class_has_usable_impl_mbwu(struct mpam_class *class)
-{
-	struct mpam_props *cprops = &class->props;
-
-	if (!mpam_has_feature(mpam_feat_impl_msmon_mbwu, cprops))
-		return false;
-
-	return true;
-}
-
-static bool mba_class_use_mbw_part(struct mpam_props *cprops)
-{
-	/* TODO: Scaling is not yet supported */
-	return (mpam_has_feature(mpam_feat_mbw_part, cprops) &&
-		cprops->mbw_pbm_bits < MAX_MBA_BW);
-}
-
-static bool class_has_usable_mba(struct mpam_props *cprops)
-{
-	if (mba_class_use_mbw_part(cprops) ||
-	    mpam_has_feature(mpam_feat_mbw_max, cprops))
-		return true;
-
-	return false;
-}
-
-/*
- * Calculate the percentage change from each implemented bit in the control
- * This can return 0 when BWA_WD is greater than 6. (100 / (1<<7) == 0)
- */
-static u32 get_mba_granularity(struct mpam_props *cprops)
-{
-	if (mba_class_use_mbw_part(cprops)) {
-		return MAX_MBA_BW / cprops->mbw_pbm_bits;
-	} else if (mpam_has_feature(mpam_feat_mbw_max, cprops)) {
-		/*
-		 * bwa_wd is the number of bits implemented in the 0.xxx
-		 * fixed point fraction. 1 bit is 50%, 2 is 25% etc.
-		 */
-		return max_t(u32, 1, (MAX_MBA_BW / BIT(cprops->bwa_wd)));
-	}
-
-	return 0;
-}
-
-static u32 mbw_pbm_to_percent(unsigned long mbw_pbm, struct mpam_props *cprops)
-{
-	u32 bit, result = 0, granularity = get_mba_granularity(cprops);
-
-	for_each_set_bit(bit, &mbw_pbm, cprops->mbw_pbm_bits % 32) {
-		result += granularity;
-	}
-
-	return result;
-}
-
-static u32 mbw_max_to_percent(u16 mbw_max, struct mpam_props *cprops)
-{
-	u8 bit;
-	u32 divisor = 2, value = 0;
-
-	for (bit = 15; bit; bit--) {
-		if (mbw_max & BIT(bit))
-			/*
-			 * Left shift by 16 bits to preserve the precision of
-			 * the division operation.
-			 */
-			value += (MAX_MBA_BW << 16) / divisor;
-		divisor <<= 1;
-	}
-
-	/* Use the upper bound of the fixed-point fraction. */
-	value = (value + (MAX_MBA_BW << (16 - cprops->bwa_wd))) >> 16;
-
-	return value;
-}
-
-static u32 percent_to_mbw_pbm(u8 pc, struct mpam_props *cprops)
-{
-	u32 granularity = get_mba_granularity(cprops);
-	u8 num_bits = pc / granularity;
-
-	if (!num_bits)
-		return 0;
-
-	/* TODO: pick bits at random to avoid contention */
-	return (1 << num_bits) - 1;
-}
-
-static u16 percent_to_mbw_max(u8 pc, struct mpam_props *cprops)
-{
-	u8 bit;
-	u32 granularity, pc_ls, divisor = 2, value = 0;
-
-	if (WARN_ON_ONCE(cprops->bwa_wd > 15))
-		return MAX_MBA_BW;
-
-	/* Set the pc value to be a multiple of granularity. */
-	granularity = get_mba_granularity(cprops);
-	pc = roundup(pc, (u8) granularity);
-	if (pc > 100)
-		pc = 100;
-
-	/*
-	 * Left shift by 16 bits to preserve the precision of the division
-	 * operation.
-	 */
-	pc_ls = (u32) pc << 16;
-
-	for (bit = 15; bit; bit--) {
-		if (pc_ls >= (MAX_MBA_BW << 16) / divisor) {
-			pc_ls -= (MAX_MBA_BW << 16) / divisor;
-			value |= BIT(bit);
-		}
-		divisor <<= 1;
-
-		if (!pc_ls || !((MAX_MBA_BW << 16) / divisor))
-			break;
-	}
-
-	value &= GENMASK(15, 15 - cprops->bwa_wd + 1);
-
-	return value;
-}
-
-/* Test whether we can export MPAM_CLASS_CACHE:{2,3}? */
-static void mpam_resctrl_pick_caches(void)
-{
-	int idx;
-	unsigned int cache_size;
-	struct mpam_class *class;
-	struct mpam_resctrl_res *res;
-
-	lockdep_assert_cpus_held();
-
-	idx = srcu_read_lock(&mpam_srcu);
-	list_for_each_entry_rcu(class, &mpam_classes, classes_list) {
-		struct mpam_props *cprops = &class->props;
-		bool has_cpor = cache_has_usable_cpor(class);
-
-		if (class->type != MPAM_CLASS_CACHE) {
-			pr_debug("pick_caches: Class is not a cache\n");
-			continue;
-		}
-
-		if (class->level != 2 && class->level != 3) {
-			pr_debug("pick_caches: not L2 or L3\n");
-			continue;
-		}
-
-		if (class->level == 2 && !has_cpor) {
-			pr_debug("pick_caches: L2 missing CPOR\n");
-			continue;
-		} else if (!has_cpor && !cache_has_usable_csu(class)) {
-			pr_debug("pick_caches: Cache misses CPOR and CSU\n");
-			continue;
-		}
-
-		if (!cpumask_equal(&class->affinity, cpu_possible_mask)) {
-			pr_debug("pick_caches: Class has missing CPUs\n");
-			continue;
-		}
-
-		/* Assume cache levels are the same size for all CPUs... */
-		cache_size = get_cpu_cacheinfo_size(smp_processor_id(), class->level);
-		if (!cache_size) {
-			pr_debug("pick_caches: Could not read cache size\n");
-			continue;
-		}
-
-		if (mpam_has_feature(mpam_feat_msmon_csu, cprops))
-			update_rmid_limits(cache_size);
-
-		if (class->level == 2) {
-			res = &mpam_resctrl_exports[RDT_RESOURCE_L2];
-			res->resctrl_res.name = "L2";
-		} else {
-			res = &mpam_resctrl_exports[RDT_RESOURCE_L3];
-			res->resctrl_res.name = "L3";
-		}
-		res->class = class;
-	}
-	srcu_read_unlock(&mpam_srcu, idx);
-}
-
-static void mpam_resctrl_pick_mba(void)
-{
-	struct mpam_class *class, *candidate_class = NULL;
-	struct mpam_resctrl_res *res;
-	int idx;
-
-	lockdep_assert_cpus_held();
-
-	idx = srcu_read_lock(&mpam_srcu);
-	list_for_each_entry_rcu(class, &mpam_classes, classes_list) {
-		struct mpam_props *cprops = &class->props;
-
-		if (class->level < 3)
-			continue;
-
-		if (!class_has_usable_mba(cprops))
-			continue;
-
-		if (!cpumask_equal(&class->affinity, cpu_possible_mask))
-			continue;
-
-		/*
-		 * mba_sc reads the mbm_local counter, and waggles the MBA controls.
-		 * mbm_local is implicitly part of the L3, pick a resouce to be MBA
-		 * that as close as possible to the L3.
-		 */
-		if (!candidate_class || class->level < candidate_class->level)
-			candidate_class = class;
-	}
-	srcu_read_unlock(&mpam_srcu, idx);
-
-	if (candidate_class) {
-		res = &mpam_resctrl_exports[RDT_RESOURCE_MBA];
-		res->class = candidate_class;
-		res->resctrl_res.name = "MB";
-	}
-}
-
-bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt)
-{
-	struct mpam_props *cprops;
-
-	switch (evt) {
-	case QOS_L3_MBM_LOCAL_EVENT_ID:
-		if (!mbm_local_class)
-			return false;
-		cprops = &mbm_local_class->props;
-
-		return mpam_has_feature(mpam_feat_msmon_mbwu_rwbw, cprops);
-	default:
-		return false;
-	}
-}
-
-u32 resctrl_arch_event_config_get(void *info, enum resctrl_event_id eventid)
-{
-	struct mpam_resctrl_dom *dom;
-	struct resctrl_mon_config_info *mon_info = info;
-
-	dom = container_of(mon_info->d, struct mpam_resctrl_dom, resctrl_dom);
-	mon_info->mon_config = dom->mbm_local_evt_cfg & MAX_EVT_CONFIG_BITS;
-
-	return mon_info->mon_config;
-}
-
-void resctrl_arch_event_config_set(void *info)
-{
-	struct mpam_resctrl_dom *dom;
-	struct resctrl_mon_config_info *mon_info = info;
-
-	if (mon_info->mon_config & ~MPAM_RESTRL_EVT_CONFIG_VALID) {
-		mon_info->err = -EOPNOTSUPP;
-		return;
-	}
-
-	dom = container_of(mon_info->d, struct mpam_resctrl_dom, resctrl_dom);
-	dom->mbm_local_evt_cfg = mon_info->mon_config & MPAM_RESTRL_EVT_CONFIG_VALID;
-}
-
-void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_domain *d)
-{
-	struct mpam_resctrl_dom *dom;
-
-	dom = container_of(d, struct mpam_resctrl_dom, resctrl_dom);
-	dom->mbm_local_evt_cfg = MPAM_RESTRL_EVT_CONFIG_VALID;
-	mpam_msmon_reset_all_mbwu(dom->comp);
-}
-
-static int mpam_resctrl_resource_init(struct mpam_resctrl_res *res)
-{
-	struct mpam_class *class = res->class;
-	struct rdt_resource *r = &res->resctrl_res;
-	bool has_mbwu = class_has_usable_mbwu(class);
-
-	/* Is this one of the two well-known caches? */
-	if (res->resctrl_res.rid == RDT_RESOURCE_L2 ||
-	    res->resctrl_res.rid == RDT_RESOURCE_L3) {
-		bool has_csu = cache_has_usable_csu(class);
-
-		r->cache_level = class->level;
-
-		/* TODO: Scaling is not yet supported */
-		r->cache.cbm_len = class->props.cpbm_wd;
-		r->cache.arch_has_sparse_bitmasks = true;
-
-		/* mpam_devices will reject empty bitmaps */
-		r->cache.min_cbm_bits = 1;
-
-		/* TODO: kill these properties off as they are derivatives */
-		r->format_str = "%d=%0*x";
-		r->fflags = RFTYPE_RES_CACHE;
-		r->default_ctrl = BIT_MASK(class->props.cpbm_wd) - 1;
-		r->data_width = (class->props.cpbm_wd + 3) / 4;
-
-		/*
-		 * Which bits are shared with other ...things...
-		 * Unknown devices use partid-0 which uses all the bitmap
-		 * fields. Until we configured the SMMU and GIC not to do this
-		 * 'all the bits' is the correct answer here.
-		 */
-		r->cache.shareable_bits = r->default_ctrl;
-
-		if (mpam_has_feature(mpam_feat_cpor_part, &class->props)) {
-			r->alloc_capable = true;
-			exposed_alloc_capable = true;
-		}
-
-		/*
-		 * MBWU counters may be 'local' or 'total' depending on where
-		 * they are in the topology. Counters on caches are assumed to
-		 * be local. If it's on the memory controller, its assumed to
-		 * be global.
-		 */
-		if (has_mbwu && class->level >= 3) {
-			mbm_local_class = class;
-			r->mon_capable = true;
-		}
-
-		/*
-		 * CSU counters only make sense on a cache. The file is called
-		 * llc_occupancy, but its expected to the on the L3.
-		 */
-		if (has_csu && class->type == MPAM_CLASS_CACHE &&
-		    class->level == 3) {
-			r->mon_capable = true;
-		}
-	} else if (res->resctrl_res.rid == RDT_RESOURCE_MBA) {
-		struct mpam_props *cprops = &class->props;
-
-		/* TODO: kill these properties off as they are derivatives */
-		r->format_str = "%d=%*u";
-		r->fflags = RFTYPE_RES_MB;
-		r->default_ctrl = MAX_MBA_BW;
-		r->data_width = 3;
-
-		r->membw.delay_linear = true;
-		r->membw.throttle_mode = THREAD_THROTTLE_UNDEFINED;
-		r->membw.bw_gran = get_mba_granularity(cprops);
-		r->membw.min_bw = r->membw.bw_gran;
-
-		/* Round up to at least 1% */
-		if (!r->membw.bw_gran)
-			r->membw.bw_gran = 1;
-
-		if (class_has_usable_mba(cprops)) {
-			r->alloc_capable = true;
-			exposed_alloc_capable = true;
-		}
-
-		if (has_mbwu && class->type == MPAM_CLASS_MEMORY) {
-			mbm_total_class = class;
-			r->mon_capable = true;
-		} else if (class_has_usable_impl_mbwu(class)) {
-			r->mon_capable = true;
-			if (mpam_current_machine == MPAM_YITIAN710)
-				mbm_bps_class = class;
-		}
-	}
-
-	if (r->mon_capable) {
-		exposed_mon_capable = true;
-
-		/*
-		 * Unfortunately, num_rmid doesn't mean anything for
-		 * mpam, and its exposed to user-space!
-		 * num-rmid is supposed to mean the number of groups
-		 * that can be created, both control or monitor groups.
-		 * For mpam, each control group has its own pmg/rmid
-		 * space.
-		 */
-		r->mon.num_rmid = 1;
-	}
-
-	return 0;
-}
-
-int mpam_resctrl_setup(void)
-{
-	int err = 0;
-	struct mpam_resctrl_res *res;
-	enum resctrl_res_level i;
-
-	wait_event(wait_cacheinfo_ready, cacheinfo_ready);
-
-	cpus_read_lock();
-	for (i = 0; i < RDT_NUM_RESOURCES; i++) {
-		res = &mpam_resctrl_exports[i];
-		INIT_LIST_HEAD(&res->resctrl_res.domains);
-		INIT_LIST_HEAD(&res->resctrl_res.mon.evt_list);
-		res->resctrl_res.rid = i;
-	}
-
-	mpam_resctrl_pick_caches();
-	mpam_resctrl_pick_mba();
-	/* TODO: mpam_resctrl_pick_counters(); */
-
-	for (i = 0; i < RDT_NUM_RESOURCES; i++) {
-		res = &mpam_resctrl_exports[i];
-		if (!res->class)
-			continue;	// dummy resource
-
-		err = mpam_resctrl_resource_init(res);
-		if (err)
-			break;
-	}
-	cpus_read_unlock();
-
-	if (!err && !exposed_alloc_capable && !exposed_mon_capable)
-		err = -EOPNOTSUPP;
-
-	if (!err) {
-		if (!is_power_of_2(mpam_pmg_max + 1)) {
-			/*
-			 * If not all the partid*pmg values are valid indexes,
-			 * resctrl may allocate pmg that don't exist. This
-			 * should cause an error interrupt.
-			 */
-			pr_warn("Number of PMG is not a power of 2! resctrl may misbehave");
-		}
-
-		err = resctrl_init();
-		if (!err)
-			WRITE_ONCE(resctrl_enabled, true);
-	}
-
-	return err;
-}
-
-void mpam_resctrl_exit(void)
-{
-	if (!READ_ONCE(resctrl_enabled))
-		return;
-
-	WRITE_ONCE(resctrl_enabled, false);
-	resctrl_exit();
-}
-
-u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d,
-			    u32 closid, enum resctrl_conf_type type)
-{
-	u32 partid;
-	struct mpam_config *cfg;
-	struct mpam_props *cprops;
-	struct mpam_resctrl_res *res;
-	struct mpam_resctrl_dom *dom;
-	enum mpam_device_features configured_by;
-
-	lockdep_assert_cpus_held();
-
-	if (!mpam_is_enabled())
-		return r->default_ctrl;
-
-	res = container_of(r, struct mpam_resctrl_res, resctrl_res);
-	dom = container_of(d, struct mpam_resctrl_dom, resctrl_dom);
-	cprops = &res->class->props;
-
-	if (mpam_resctrl_hide_cdp(r->rid))
-		partid = resctrl_get_config_index(closid, CDP_CODE);
-	else
-		partid = resctrl_get_config_index(closid, type);
-	cfg = &dom->comp->cfg[partid];
-
-	switch (r->rid) {
-	case RDT_RESOURCE_L2:
-	case RDT_RESOURCE_L3:
-		configured_by = mpam_feat_cpor_part;
-		break;
-	case RDT_RESOURCE_MBA:
-		if (mba_class_use_mbw_part(cprops)) {
-			configured_by = mpam_feat_mbw_part;
-			break;
-		} else if (mpam_has_feature(mpam_feat_mbw_max, cprops)) {
-			configured_by = mpam_feat_mbw_max;
-			break;
-		}
-		fallthrough;
-	default:
-		return -EINVAL;
-	}
-
-	if (!r->alloc_capable || partid >= resctrl_arch_get_num_closid(r) ||
-	    !mpam_has_feature(configured_by, cfg))
-		return r->default_ctrl;
-
-	switch (configured_by) {
-	case mpam_feat_cpor_part:
-		/* TODO: Scaling is not yet supported */
-		return cfg->cpbm;
-	case mpam_feat_mbw_part:
-		/* TODO: Scaling is not yet supported */
-		return mbw_pbm_to_percent(cfg->mbw_pbm, cprops);
-	case mpam_feat_mbw_max:
-		return mbw_max_to_percent(cfg->mbw_max, cprops);
-	default:
-		return -EINVAL;
-	}
-}
-
-int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_domain *d,
-			    u32 closid, enum resctrl_conf_type t, u32 cfg_val)
-{
-	int err;
-	u32 partid;
-	struct mpam_config cfg;
-	struct mpam_props *cprops;
-	struct mpam_resctrl_res *res;
-	struct mpam_resctrl_dom *dom;
-
-	lockdep_assert_cpus_held();
-	lockdep_assert_irqs_enabled();
-
-	/*
-	 * NOTE: don't check the CPU as mpam_apply_config() doesn't care,
-	 * and resctrl_arch_update_domains() depends on this.
-	 */
-	res = container_of(r, struct mpam_resctrl_res, resctrl_res);
-	dom = container_of(d, struct mpam_resctrl_dom, resctrl_dom);
-	cprops = &res->class->props;
-
-	partid = resctrl_get_config_index(closid, t);
-	if (!r->alloc_capable || partid >= resctrl_arch_get_num_closid(r))
-		return -EINVAL;
-
-	switch (r->rid) {
-	case RDT_RESOURCE_L2:
-	case RDT_RESOURCE_L3:
-		/* TODO: Scaling is not yet supported */
-		cfg.cpbm = cfg_val;
-		mpam_set_feature(mpam_feat_cpor_part, &cfg);
-		break;
-	case RDT_RESOURCE_MBA:
-		if (mba_class_use_mbw_part(cprops)) {
-			cfg.mbw_pbm = percent_to_mbw_pbm(cfg_val, cprops);
-			mpam_set_feature(mpam_feat_mbw_part, &cfg);
-			break;
-		} else if (mpam_has_feature(mpam_feat_mbw_max, cprops)) {
-			cfg.mbw_max = percent_to_mbw_max(cfg_val, cprops);
-			mpam_set_feature(mpam_feat_mbw_max, &cfg);
-			break;
-		}
-		fallthrough;
-	default:
-		return -EINVAL;
-	}
-
-	/*
-	 * When CDP is enabled, but the resource doesn't support it, we need to
-	 * apply the same configuration to the other partid.
-	 */
-	if (mpam_resctrl_hide_cdp(r->rid)) {
-		partid = resctrl_get_config_index(closid, CDP_CODE);
-		err = mpam_apply_config(dom->comp, partid, &cfg);
-		if (err)
-			return err;
-
-		partid = resctrl_get_config_index(closid, CDP_DATA);
-		return mpam_apply_config(dom->comp, partid, &cfg);
-
-	} else {
-		return mpam_apply_config(dom->comp, partid, &cfg);
-	}
-}
-
-/* TODO: this is IPI heavy */
-int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid)
-{
-	int err = 0;
-	struct rdt_domain *d;
-	enum resctrl_conf_type t;
-	struct resctrl_staged_config *cfg;
-
-	lockdep_assert_cpus_held();
-	lockdep_assert_irqs_enabled();
-
-	list_for_each_entry(d, &r->domains, list) {
-		for (t = 0; t < CDP_NUM_TYPES; t++) {
-			cfg = &d->staged_config[t];
-			if (!cfg->have_new_ctrl)
-				continue;
-
-			err = resctrl_arch_update_one(r, d, closid, t,
-						      cfg->new_ctrl);
-			if (err)
-				return err;
-		}
-	}
-
-	return err;
-}
-
-void resctrl_arch_reset_resources(void)
-{
-	int i, idx;
-	struct mpam_class *class;
-	struct mpam_resctrl_res *res;
-
-	lockdep_assert_cpus_held();
-
-	if (!mpam_is_enabled())
-		return;
-
-	for (i = 0; i < RDT_NUM_RESOURCES; i++) {
-		res = &mpam_resctrl_exports[i];
-
-		if (!res->class)
-			continue;	// dummy resource
-
-		if (!res->resctrl_res.alloc_capable)
-			continue;
-
-		idx = srcu_read_lock(&mpam_srcu);
-		list_for_each_entry_rcu(class, &mpam_classes, classes_list)
-			mpam_reset_class(class);
-		srcu_read_unlock(&mpam_srcu, idx);
-	}
-}
-
-static struct mpam_resctrl_dom *
-mpam_resctrl_alloc_domain(unsigned int cpu, struct mpam_resctrl_res *res)
-{
-	struct mpam_resctrl_dom *dom;
-	struct mpam_class *class = res->class;
-	struct mpam_component *comp_iter, *comp;
-
-	comp = NULL;
-	list_for_each_entry(comp_iter, &class->components, class_list) {
-		if (cpumask_test_cpu(cpu, &comp_iter->affinity)) {
-			comp = comp_iter;
-			break;
-		}
-	}
-
-	/* cpu with unknown exported component? */
-	if (WARN_ON_ONCE(!comp))
-		return ERR_PTR(-EINVAL);
-
-	dom = kzalloc_node(sizeof(*dom), GFP_KERNEL, cpu_to_node(cpu));
-	if (!dom)
-		return ERR_PTR(-ENOMEM);
-
-	dom->comp = comp;
-	INIT_LIST_HEAD(&dom->resctrl_dom.list);
-	dom->resctrl_dom.id = comp->comp_id;
-	dom->mbm_local_evt_cfg = MPAM_RESTRL_EVT_CONFIG_VALID;
-	cpumask_set_cpu(cpu, &dom->resctrl_dom.cpu_mask);
-
-	/* TODO: this list should be sorted */
-	list_add_tail(&dom->resctrl_dom.list, &res->resctrl_res.domains);
-
-	return dom;
-}
-
-/* Like resctrl_get_domain_from_cpu(), but for offline CPUs */
-static struct mpam_resctrl_dom *
-mpam_get_domain_from_cpu(int cpu, struct mpam_resctrl_res *res)
-{
-	struct rdt_domain *d;
-	struct mpam_resctrl_dom *dom;
-
-	lockdep_assert_cpus_held();
-
-	list_for_each_entry(d, &res->resctrl_res.domains, list) {
-		dom = container_of(d, struct mpam_resctrl_dom, resctrl_dom);
-
-		if (cpumask_test_cpu(cpu, &dom->comp->affinity))
-			return dom;
-	}
-
-	return NULL;
-}
-
-struct rdt_domain *resctrl_arch_find_domain(struct rdt_resource *r, int id)
-{
-	struct rdt_domain *d;
-	struct mpam_resctrl_dom *dom;
-
-	lockdep_assert_cpus_held();
-
-	list_for_each_entry(d, &r->domains, list) {
-		dom = container_of(d, struct mpam_resctrl_dom, resctrl_dom);
-		if (dom->comp->comp_id == id)
-			return &dom->resctrl_dom;
-	}
-
-	return NULL;
-}
-
-int mpam_resctrl_online_cpu(unsigned int cpu)
-{
-	int i, err;
-	struct mpam_resctrl_dom *dom;
-	struct mpam_resctrl_res *res;
-	struct rdt_resource *r;
-
-	for (i = 0; i < RDT_NUM_RESOURCES; i++) {
-		res = &mpam_resctrl_exports[i];
-		r = &res->resctrl_res;
-
-		if (!res->class)
-			continue;	// dummy_resource;
-
-		dom = mpam_get_domain_from_cpu(cpu, res);
-		if (dom) {
-			cpumask_set_cpu(cpu, &dom->resctrl_dom.cpu_mask);
-			continue;
-		}
-
-		dom = mpam_resctrl_alloc_domain(cpu, res);
-		if (IS_ERR(dom))
-			return PTR_ERR(dom);
-		r->rdt_domain_list[i] = &dom->resctrl_dom;
-		err = resctrl_online_domain(&res->resctrl_res, &dom->resctrl_dom);
-		if (err)
-			return err;
-	}
-
-	resctrl_online_cpu(cpu);
-	return 0;
-}
-
-int mpam_resctrl_offline_cpu(unsigned int cpu)
-{
-	int i;
-	struct rdt_domain *d;
-	struct mpam_resctrl_res *res;
-	struct mpam_resctrl_dom *dom;
-	struct rdt_resource *r;
-
-	resctrl_offline_cpu(cpu);
-
-	for (i = 0; i < RDT_NUM_RESOURCES; i++) {
-		res = &mpam_resctrl_exports[i];
-		r = &res->resctrl_res;
-
-		if (!res->class)
-			continue;	// dummy resource
-
-		d = resctrl_get_domain_from_cpu(cpu, &res->resctrl_res);
-		dom = container_of(d, struct mpam_resctrl_dom, resctrl_dom);
-
-		/* The last one standing was ahead of us... */
-		if (WARN_ON_ONCE(!d))
-			continue;
-
-		cpumask_clear_cpu(cpu, &d->cpu_mask);
-
-		if (!cpumask_empty(&d->cpu_mask))
-			continue;
-
-		resctrl_offline_domain(&res->resctrl_res, &dom->resctrl_dom);
-		list_del(&d->list);
-
-		r->rdt_domain_list[i] = NULL;
-		kfree(dom);
-	}
-
-	return 0;
-}
-
-static int __init __cacheinfo_ready(void)
-{
-	cacheinfo_ready = true;
-	wake_up(&wait_cacheinfo_ready);
-
-	return 0;
-}
-device_initcall_sync(__cacheinfo_ready);
diff --git a/fs/Kconfig b/fs/Kconfig
index 32d4f37da0d6dc55ee5476441d9d01bce1e52f66..c7090d46e7d6441ce2439e6c64dc0f6c4b0f905b 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -332,7 +332,6 @@ source "fs/omfs/Kconfig"
 source "fs/hpfs/Kconfig"
 source "fs/qnx4/Kconfig"
 source "fs/qnx6/Kconfig"
-source "fs/resctrl/Kconfig"
 source "fs/romfs/Kconfig"
 source "fs/pstore/Kconfig"
 source "fs/sysv/Kconfig"
diff --git a/fs/Makefile b/fs/Makefile
index b62375770deed116ab26f3ce7f37198b54d09c01..f9541f40be4e08fbdee72f39e8c0c7ef856fb3f6 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -129,4 +129,3 @@ obj-$(CONFIG_EFIVAR_FS)		+= efivarfs/
 obj-$(CONFIG_EROFS_FS)		+= erofs/
 obj-$(CONFIG_VBOXSF_FS)		+= vboxsf/
 obj-$(CONFIG_ZONEFS_FS)		+= zonefs/
-obj-$(CONFIG_RESCTRL_FS)	+= resctrl/
diff --git a/fs/resctrl/Kconfig b/fs/resctrl/Kconfig
deleted file mode 100644
index 36a1ddbe6c216faf4ca69eeff8273b2400481491..0000000000000000000000000000000000000000
--- a/fs/resctrl/Kconfig
+++ /dev/null
@@ -1,23 +0,0 @@
-config RESCTRL_FS
-	bool "CPU Resource Control Filesystem (resctrl)"
-	depends on ARCH_HAS_CPU_RESCTRL
-	select KERNFS
-	select PROC_CPU_RESCTRL               if PROC_FS
-	help
-	  Resctrl is a filesystem interface
-	  to control allocation and
-	  monitoring of system resources
-	  used by the CPUs.
-
-config RESCTRL_FS_PSEUDO_LOCK
-	bool
-	help
-          Software mechanism to pin data in a cache portion using
-          micro-architecture specific knowledge.
-
-config RESCTRL_RMID_DEPENDS_ON_CLOSID
-	bool
-	help
-	  Enable by the architecture when the RMID values depend on the CLOSID.
-	  This causes the closid allocator to search for CLOSID with clean
-	  RMID.
diff --git a/fs/resctrl/Makefile b/fs/resctrl/Makefile
deleted file mode 100644
index 10fcfb0fdb102a4329e2cbbfdc119e3b288d7db2..0000000000000000000000000000000000000000
--- a/fs/resctrl/Makefile
+++ /dev/null
@@ -1,3 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-obj-$(CONFIG_RESCTRL_FS)		+= rdtgroup.o ctrlmondata.o monitor.o
-obj-$(CONFIG_RESCTRL_FS_PSEUDO_LOCK)	+= psuedo_lock.o
diff --git a/fs/resctrl/ctrlmondata.c b/fs/resctrl/ctrlmondata.c
deleted file mode 100644
index 6651e39c1cb8e24243a043653c1c59f192807634..0000000000000000000000000000000000000000
--- a/fs/resctrl/ctrlmondata.c
+++ /dev/null
@@ -1,827 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Resource Director Technology(RDT)
- * - Cache Allocation code.
- *
- * Copyright (C) 2016 Intel Corporation
- *
- * Authors:
- *    Fenghua Yu <fenghua.yu@intel.com>
- *    Tony Luck <tony.luck@intel.com>
- *
- * More information about RDT be found in the Intel (R) x86 Architecture
- * Software Developer Manual June 2016, volume 3, section 17.17.
- */
-
-#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
-
-#include <linux/cpu.h>
-#include <linux/kernfs.h>
-#include <linux/seq_file.h>
-#include <linux/slab.h>
-#include "internal.h"
-
-struct rdt_parse_data {
-	u32			closid;
-	enum rdtgrp_mode	mode;
-	char *buf;
-};
-
-typedef int(ctrlval_parser_t)(struct rdt_parse_data *data,
-			      struct resctrl_schema *s, struct rdt_domain *d);
-
-/*
- * Check whether MBA bandwidth percentage value is correct. The value is
- * checked against the minimum and max bandwidth values specified by the
- * hardware. The allocated bandwidth percentage is rounded to the next
- * control step available on the hardware.
- */
-static bool bw_validate(char *buf, u32 *data, struct rdt_resource *r)
-{
-	int ret;
-	u32 bw;
-
-	/*
-	 * Only linear delay values is supported for current Intel SKUs.
-	 */
-	if (!r->membw.delay_linear && r->membw.arch_needs_linear) {
-		rdt_last_cmd_puts("No support for non-linear MB domains\n");
-		return false;
-	}
-
-	ret = kstrtou32(buf, 10, &bw);
-	if (ret) {
-		rdt_last_cmd_printf("Invalid MB value %s\n", buf);
-		return false;
-	}
-
-	/* Nothing else to do if software controller is enabled. */
-	if (is_mba_sc(r)) {
-		*data = bw;
-		return true;
-	}
-
-	if (bw < r->membw.min_bw || bw > r->default_ctrl) {
-		rdt_last_cmd_printf("MB value %u out of range [%d,%d]\n", bw,
-				    r->membw.min_bw, r->default_ctrl);
-		return false;
-	}
-
-	*data = roundup(bw, (unsigned long)r->membw.bw_gran);
-	return true;
-}
-
-static int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s,
-		    struct rdt_domain *d)
-{
-	struct resctrl_staged_config *cfg;
-	struct rdt_resource *r = s->res;
-	u32 closid = data->closid;
-	u32 bw_val;
-
-	cfg = &d->staged_config[s->conf_type];
-	if (cfg->have_new_ctrl) {
-		rdt_last_cmd_printf("Duplicate domain %d\n", d->id);
-		return -EINVAL;
-	}
-
-	if (!bw_validate(data->buf, &bw_val, r))
-		return -EINVAL;
-
-	if (is_mba_sc(r)) {
-		d->mbps_val[closid] = bw_val;
-		return 0;
-	}
-
-	cfg->new_ctrl = bw_val;
-	cfg->have_new_ctrl = true;
-
-	return 0;
-}
-
-/*
- * Check whether a cache bit mask is valid.
- * On Intel CPUs, non-contiguous 1s value support is indicated by CPUID:
- *   - CPUID.0x10.1:ECX[3]: L3 non-contiguous 1s value supported if 1
- *   - CPUID.0x10.2:ECX[3]: L2 non-contiguous 1s value supported if 1
- *
- * Haswell does not support a non-contiguous 1s value and additionally
- * requires at least two bits set.
- * AMD allows non-contiguous bitmasks.
- */
-static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r)
-{
-	unsigned long first_bit, zero_bit, val;
-	unsigned int cbm_len = r->cache.cbm_len;
-	int ret;
-
-	ret = kstrtoul(buf, 16, &val);
-	if (ret) {
-		rdt_last_cmd_printf("Non-hex character in the mask %s\n", buf);
-		return false;
-	}
-
-	if ((r->cache.min_cbm_bits > 0 && val == 0) || val > r->default_ctrl) {
-		rdt_last_cmd_puts("Mask out of range\n");
-		return false;
-	}
-
-	first_bit = find_first_bit(&val, cbm_len);
-	zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);
-
-	/* Are non-contiguous bitmasks allowed? */
-	if (!r->cache.arch_has_sparse_bitmasks &&
-	    (find_next_bit(&val, cbm_len, zero_bit) < cbm_len)) {
-		rdt_last_cmd_printf("The mask %lx has non-consecutive 1-bits\n",
-				    val);
-		return false;
-	}
-
-	if ((zero_bit - first_bit) < r->cache.min_cbm_bits) {
-		rdt_last_cmd_printf("Need at least %d bits in the mask\n",
-				    r->cache.min_cbm_bits);
-		return false;
-	}
-
-	*data = val;
-	return true;
-}
-
-/*
- * Read one cache bit mask (hex). Check that it is valid for the current
- * resource type.
- */
-static int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s,
-		     struct rdt_domain *d)
-{
-	enum rdtgrp_mode mode = data->mode;
-	struct resctrl_staged_config *cfg;
-	struct rdt_resource *r = s->res;
-	u32 closid = data->closid;
-	u32 cbm_val;
-
-	cfg = &d->staged_config[s->conf_type];
-	if (cfg->have_new_ctrl) {
-		rdt_last_cmd_printf("Duplicate domain %d\n", d->id);
-		return -EINVAL;
-	}
-
-	/*
-	 * Cannot set up more than one pseudo-locked region in a cache
-	 * hierarchy.
-	 */
-	if (mode == RDT_MODE_PSEUDO_LOCKSETUP &&
-	    rdtgroup_pseudo_locked_in_hierarchy(d)) {
-		rdt_last_cmd_puts("Pseudo-locked region in hierarchy\n");
-		return -EINVAL;
-	}
-
-	if (!cbm_validate(data->buf, &cbm_val, r))
-		return -EINVAL;
-
-	if ((mode == RDT_MODE_EXCLUSIVE || mode == RDT_MODE_SHAREABLE) &&
-	    rdtgroup_cbm_overlaps_pseudo_locked(d, cbm_val)) {
-		rdt_last_cmd_puts("CBM overlaps with pseudo-locked region\n");
-		return -EINVAL;
-	}
-
-	/*
-	 * The CBM may not overlap with the CBM of another closid if
-	 * either is exclusive.
-	 */
-	if (rdtgroup_cbm_overlaps(s, d, cbm_val, closid, true)) {
-		rdt_last_cmd_puts("Overlaps with exclusive group\n");
-		return -EINVAL;
-	}
-
-	if (rdtgroup_cbm_overlaps(s, d, cbm_val, closid, false)) {
-		if (mode == RDT_MODE_EXCLUSIVE ||
-		    mode == RDT_MODE_PSEUDO_LOCKSETUP) {
-			rdt_last_cmd_puts("Overlaps with other group\n");
-			return -EINVAL;
-		}
-	}
-
-	cfg->new_ctrl = cbm_val;
-	cfg->have_new_ctrl = true;
-
-	return 0;
-}
-
-static ctrlval_parser_t *get_parser(struct rdt_resource *res)
-{
-	if (res->fflags & RFTYPE_RES_CACHE)
-		return &parse_cbm;
-	else
-		return &parse_bw;
-}
-
-/*
- * For each domain in this resource we expect to find a series of:
- *	id=mask
- * separated by ";". The "id" is in decimal, and must match one of
- * the "id"s for this resource.
- */
-static int parse_line(char *line, struct resctrl_schema *s,
-		      struct rdtgroup *rdtgrp)
-{
-	ctrlval_parser_t *parse_ctrlval = get_parser(s->res);
-	enum resctrl_conf_type t = s->conf_type;
-	struct resctrl_staged_config *cfg;
-	struct rdt_resource *r = s->res;
-	struct rdt_parse_data data;
-	char *dom = NULL, *id;
-	struct rdt_domain *d;
-	unsigned long dom_id;
-
-	/* Walking r->domains, ensure it can't race with cpuhp */
-	lockdep_assert_cpus_held();
-
-	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP &&
-	    (r->rid == RDT_RESOURCE_MBA || r->rid == RDT_RESOURCE_SMBA)) {
-		rdt_last_cmd_puts("Cannot pseudo-lock MBA resource\n");
-		return -EINVAL;
-	}
-
-next:
-	if (!line || line[0] == '\0')
-		return 0;
-	dom = strsep(&line, ";");
-	id = strsep(&dom, "=");
-	if (!dom || kstrtoul(id, 10, &dom_id)) {
-		rdt_last_cmd_puts("Missing '=' or non-numeric domain\n");
-		return -EINVAL;
-	}
-	dom = strim(dom);
-	list_for_each_entry(d, &r->domains, list) {
-		if (d->id == dom_id) {
-			data.buf = dom;
-			data.closid = rdtgrp->closid;
-			data.mode = rdtgrp->mode;
-			if (parse_ctrlval(&data, s, d))
-				return -EINVAL;
-			if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
-				cfg = &d->staged_config[t];
-				/*
-				 * In pseudo-locking setup mode and just
-				 * parsed a valid CBM that should be
-				 * pseudo-locked. Only one locked region per
-				 * resource group and domain so just do
-				 * the required initialization for single
-				 * region and return.
-				 */
-				rdtgrp->plr->s = s;
-				rdtgrp->plr->d = d;
-				rdtgrp->plr->cbm = cfg->new_ctrl;
-				d->plr = rdtgrp->plr;
-				return 0;
-			}
-			goto next;
-		}
-	}
-	return -EINVAL;
-}
-
-static int rdtgroup_parse_resource(char *resname, char *tok,
-				   struct rdtgroup *rdtgrp)
-{
-	struct resctrl_schema *s;
-
-	list_for_each_entry(s, &resctrl_schema_all, list) {
-		if (!strcmp(resname, s->name) && rdtgrp->closid < s->num_closid)
-			return parse_line(tok, s, rdtgrp);
-	}
-	rdt_last_cmd_printf("Unknown or unsupported resource name '%s'\n", resname);
-	return -EINVAL;
-}
-
-ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
-				char *buf, size_t nbytes, loff_t off)
-{
-	struct resctrl_schema *s;
-	struct rdtgroup *rdtgrp;
-	struct rdt_resource *r;
-	char *tok, *resname;
-	int ret = 0;
-
-	/* Valid input requires a trailing newline */
-	if (nbytes == 0 || buf[nbytes - 1] != '\n')
-		return -EINVAL;
-	buf[nbytes - 1] = '\0';
-
-	rdtgrp = rdtgroup_kn_lock_live(of->kn);
-	if (!rdtgrp) {
-		rdtgroup_kn_unlock(of->kn);
-		return -ENOENT;
-	}
-	rdt_last_cmd_clear();
-
-	/*
-	 * No changes to pseudo-locked region allowed. It has to be removed
-	 * and re-created instead.
-	 */
-	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
-		ret = -EINVAL;
-		rdt_last_cmd_puts("Resource group is pseudo-locked\n");
-		goto out;
-	}
-
-	rdt_staged_configs_clear();
-
-	while ((tok = strsep(&buf, "\n")) != NULL) {
-		resname = strim(strsep(&tok, ":"));
-		if (!tok) {
-			rdt_last_cmd_puts("Missing ':'\n");
-			ret = -EINVAL;
-			goto out;
-		}
-		if (tok[0] == '\0') {
-			rdt_last_cmd_printf("Missing '%s' value\n", resname);
-			ret = -EINVAL;
-			goto out;
-		}
-		ret = rdtgroup_parse_resource(resname, tok, rdtgrp);
-		if (ret)
-			goto out;
-	}
-
-	list_for_each_entry(s, &resctrl_schema_all, list) {
-		r = s->res;
-
-		/*
-		 * Writes to mba_sc resources update the software controller,
-		 * not the control MSR.
-		 */
-		if (is_mba_sc(r))
-			continue;
-
-		ret = resctrl_arch_update_domains(r, rdtgrp->closid);
-		if (ret)
-			goto out;
-	}
-
-	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
-		/*
-		 * If pseudo-locking fails we keep the resource group in
-		 * mode RDT_MODE_PSEUDO_LOCKSETUP with its class of service
-		 * active and updated for just the domain the pseudo-locked
-		 * region was requested for.
-		 */
-		ret = rdtgroup_pseudo_lock_create(rdtgrp);
-	}
-
-out:
-	rdt_staged_configs_clear();
-	rdtgroup_kn_unlock(of->kn);
-	return ret ?: nbytes;
-}
-
-static void show_doms(struct seq_file *s, struct resctrl_schema *schema,
-		      char *resource_name, int closid)
-{
-	struct rdt_resource *r = schema->res;
-	struct rdt_domain *dom;
-	bool sep = false;
-	u32 ctrl_val;
-
-	/* Walking r->domains, ensure it can't race with cpuhp */
-	lockdep_assert_cpus_held();
-
-	if (resource_name)
-		seq_printf(s, "%*s:", max_name_width, resource_name);
-	list_for_each_entry(dom, &r->domains, list) {
-		if (sep)
-			seq_puts(s, ";");
-
-		if (is_mba_sc(r))
-			ctrl_val = dom->mbps_val[closid];
-		else
-			ctrl_val = resctrl_arch_get_config(r, dom, closid,
-							   schema->conf_type);
-
-		seq_printf(s, r->format_str, dom->id, max_data_width,
-			   ctrl_val);
-		sep = true;
-	}
-	seq_puts(s, "\n");
-}
-
-int rdtgroup_schemata_show(struct kernfs_open_file *of,
-			   struct seq_file *s, void *v)
-{
-	struct resctrl_schema *schema;
-	struct rdtgroup *rdtgrp;
-	int ret = 0;
-	u32 closid;
-
-	rdtgrp = rdtgroup_kn_lock_live(of->kn);
-	if (rdtgrp) {
-		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
-			list_for_each_entry(schema, &resctrl_schema_all, list) {
-				seq_printf(s, "%s:uninitialized\n", schema->name);
-			}
-		} else if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
-			if (!rdtgrp->plr->d) {
-				rdt_last_cmd_clear();
-				rdt_last_cmd_puts("Cache domain offline\n");
-				ret = -ENODEV;
-			} else {
-				seq_printf(s, "%s:%d=%x\n",
-					   rdtgrp->plr->s->res->name,
-					   rdtgrp->plr->d->id,
-					   rdtgrp->plr->cbm);
-			}
-		} else {
-			closid = rdtgrp->closid;
-			list_for_each_entry(schema, &resctrl_schema_all, list) {
-				if (closid < schema->num_closid)
-					show_doms(s, schema, schema->name, closid);
-			}
-		}
-	} else {
-		ret = -ENOENT;
-	}
-	rdtgroup_kn_unlock(of->kn);
-	return ret;
-}
-
-static int smp_mon_event_count(void *arg)
-{
-	mon_event_count(arg);
-
-	return 0;
-}
-
-void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
-		    struct rdt_domain *d, struct rdtgroup *rdtgrp,
-		    int evtid, int first)
-{
-	int cpu;
-
-	/* When picking a CPU from cpu_mask, ensure it can't race with cpuhp */
-	lockdep_assert_cpus_held();
-
-	/*
-	 * Setup the parameters to pass to mon_event_count() to read the data.
-	 */
-	rr->rgrp = rdtgrp;
-	rr->evtid = evtid;
-	rr->r = r;
-	rr->d = d;
-	rr->val = 0;
-	rr->first = first;
-	rr->arch_mon_ctx = resctrl_arch_mon_ctx_alloc(r, evtid);
-	if (IS_ERR(rr->arch_mon_ctx)) {
-		rr->err = -EINVAL;
-		return;
-	}
-
-	cpu = cpumask_any_housekeeping(&d->cpu_mask, RESCTRL_PICK_ANY_CPU);
-
-	/*
-	 * cpumask_any_housekeeping() prefers housekeeping CPUs, but
-	 * are all the CPUs nohz_full? If yes, pick a CPU to IPI.
-	 * MPAM's resctrl_arch_rmid_read() is unable to read the
-	 * counters on some platforms if its called in IRQ context.
-	 */
-	if (tick_nohz_full_cpu(cpu))
-		smp_call_function_any(&d->cpu_mask, mon_event_count, rr, 1);
-	else
-		smp_call_on_cpu(cpu, smp_mon_event_count, rr, false);
-
-	resctrl_arch_mon_ctx_free(r, evtid, rr->arch_mon_ctx);
-}
-
-int rdtgroup_mondata_show(struct seq_file *m, void *arg)
-{
-	struct kernfs_open_file *of = m->private;
-	u32 resid, evtid, domid;
-	struct rdtgroup *rdtgrp;
-	struct rdt_resource *r;
-	union mon_data_bits md;
-	struct rdt_domain *d;
-	struct rmid_read rr;
-	int ret = 0, index;
-
-	rdtgrp = rdtgroup_kn_lock_live(of->kn);
-	if (!rdtgrp) {
-		ret = -ENOENT;
-		goto out;
-	}
-
-	md.priv = of->kn->priv;
-	resid = md.u.rid;
-	domid = md.u.domid;
-	evtid = md.u.evtid;
-
-	r = resctrl_arch_get_resource(resid);
-	d = resctrl_arch_find_domain(r, domid);
-	if (IS_ERR_OR_NULL(d)) {
-		ret = -ENOENT;
-		goto out;
-	}
-
-	if (resctrl_arch_get_abmc_enabled() && evtid != QOS_L3_OCCUP_EVENT_ID) {
-		index = mon_event_config_index_get(evtid);
-		if (index != INVALID_CONFIG_INDEX &&
-		    rdtgrp->mon.cntr_id[index] == MON_CNTR_UNSET) {
-			rr.err = -ENOENT;
-			goto checkresult;
-		}
-	}
-
-	mon_event_read(&rr, r, d, rdtgrp, evtid, false);
-
-checkresult:
-	if (rr.err == -EIO)
-		seq_puts(m, "Error\n");
-	else if (rr.err == -EINVAL)
-		seq_puts(m, "Unavailable\n");
-	else if (rr.err == -ENOENT)
-		seq_puts(m, "Unassigned\n");
-	else
-		seq_printf(m, "%llu\n", rr.val);
-
-out:
-	rdtgroup_kn_unlock(of->kn);
-	return ret;
-}
-
-#ifdef CONFIG_X86
-int resctrl_io_alloc_show(struct kernfs_open_file *of, struct seq_file *seq, void *v)
-{
-	struct resctrl_schema *s = of->kn->parent->priv;
-	struct rdt_resource *r = s->res;
-
-	mutex_lock(&rdtgroup_mutex);
-
-	if (r->cache.io_alloc_capable) {
-		if (resctrl_arch_get_io_alloc_enabled(r))
-			seq_puts(seq, "enabled\n");
-		else
-			seq_puts(seq, "disabled\n");
-	} else {
-		seq_puts(seq, "not supported\n");
-	}
-
-	mutex_unlock(&rdtgroup_mutex);
-
-	return 0;
-}
-
-/*
- * resctrl_io_alloc_closid_supported() - io_alloc feature utilizes the
- * highest CLOSID value to direct I/O traffic. Ensure that io_alloc_closid
- * is in the supported range.
- */
-static bool resctrl_io_alloc_closid_supported(u32 io_alloc_closid)
-{
-	return io_alloc_closid < closids_supported();
-}
-
-/*
- * Initialize io_alloc CLOSID cache resource CBM with all usable (shared
- * and unused) cache portions.
- */
-static int resctrl_io_alloc_init_cbm(struct resctrl_schema *s, u32 closid)
-{
-	enum resctrl_conf_type peer_type;
-	struct rdt_resource *r = s->res;
-	struct rdt_domain *d;
-	int ret;
-
-	rdt_staged_configs_clear();
-
-	ret = rdtgroup_init_cat(s, closid);
-	if (ret < 0)
-		goto out;
-
-	/* Keep CDP_CODE and CDP_DATA of io_alloc CLOSID's CBM in sync. */
-	if (resctrl_arch_get_cdp_enabled(r->rid)) {
-		peer_type = resctrl_peer_type(s->conf_type);
-		list_for_each_entry(d, &s->res->domains, list)
-			memcpy(&d->staged_config[peer_type],
-			       &d->staged_config[s->conf_type],
-			       sizeof(d->staged_config[0]));
-	}
-
-	ret = resctrl_arch_update_domains(r, closid);
-out:
-	rdt_staged_configs_clear();
-	return ret;
-}
-
-/*
- * resctrl_io_alloc_closid() - io_alloc feature routes I/O traffic using
- * the highest available CLOSID. Retrieve the maximum CLOSID supported by the
- * resource. Note that if Code Data Prioritization (CDP) is enabled, the number
- * of available CLOSIDs is reduced by half.
- */
-static u32 resctrl_io_alloc_closid(struct rdt_resource *r)
-{
-	if (resctrl_arch_get_cdp_enabled(r->rid))
-		return resctrl_arch_get_num_closid(r) / 2  - 1;
-	else
-		return resctrl_arch_get_num_closid(r) - 1;
-}
-
-ssize_t resctrl_io_alloc_write(struct kernfs_open_file *of, char *buf,
-			       size_t nbytes, loff_t off)
-{
-	struct resctrl_schema *s = of->kn->parent->priv;
-	struct rdt_resource *r = s->res;
-	char const *grp_name;
-	u32 io_alloc_closid;
-	bool enable;
-	int ret;
-
-	ret = kstrtobool(buf, &enable);
-	if (ret)
-		return ret;
-
-	cpus_read_lock();
-	mutex_lock(&rdtgroup_mutex);
-
-	rdt_last_cmd_clear();
-
-	if (!r->cache.io_alloc_capable) {
-		rdt_last_cmd_printf("io_alloc is not supported on %s\n", s->name);
-		ret = -ENODEV;
-		goto out_unlock;
-	}
-
-	/* If the feature is already up to date, no action is needed. */
-	if (resctrl_arch_get_io_alloc_enabled(r) == enable)
-		goto out_unlock;
-
-	io_alloc_closid = resctrl_io_alloc_closid(r);
-	if (!resctrl_io_alloc_closid_supported(io_alloc_closid)) {
-		rdt_last_cmd_printf("io_alloc CLOSID (ctrl_hw_id) %u is not available\n",
-				    io_alloc_closid);
-		ret = -EINVAL;
-		goto out_unlock;
-	}
-
-	if (enable) {
-		if (!closid_alloc_fixed(io_alloc_closid)) {
-			grp_name = rdtgroup_name_by_closid(io_alloc_closid);
-			WARN_ON_ONCE(!grp_name);
-			rdt_last_cmd_printf("CLOSID (ctrl_hw_id) %u for io_alloc is used by %s group\n",
-					    io_alloc_closid, grp_name ? grp_name : "another");
-			ret = -ENOSPC;
-			goto out_unlock;
-		}
-
-		ret = resctrl_io_alloc_init_cbm(s, io_alloc_closid);
-		if (ret) {
-			rdt_last_cmd_puts("Failed to initialize io_alloc allocations\n");
-			closid_free(io_alloc_closid);
-			goto out_unlock;
-		}
-	} else {
-		closid_free(io_alloc_closid);
-	}
-
-	ret = resctrl_arch_io_alloc_enable(r, enable);
-	if (enable && ret) {
-		rdt_last_cmd_puts("Failed to enable io_alloc feature\n");
-		closid_free(io_alloc_closid);
-	}
-
-out_unlock:
-	mutex_unlock(&rdtgroup_mutex);
-	cpus_read_unlock();
-
-	return ret ?: nbytes;
-}
-
-int resctrl_io_alloc_cbm_show(struct kernfs_open_file *of, struct seq_file *seq, void *v)
-{
-	struct resctrl_schema *s = of->kn->parent->priv;
-	struct rdt_resource *r = s->res;
-	int ret = 0;
-
-	cpus_read_lock();
-	mutex_lock(&rdtgroup_mutex);
-
-	rdt_last_cmd_clear();
-
-	if (!r->cache.io_alloc_capable) {
-		rdt_last_cmd_printf("io_alloc is not supported on %s\n", s->name);
-		ret = -ENODEV;
-		goto out_unlock;
-	}
-
-	if (!resctrl_arch_get_io_alloc_enabled(r)) {
-		rdt_last_cmd_printf("io_alloc is not enabled on %s\n", s->name);
-		ret = -EINVAL;
-		goto out_unlock;
-	}
-
-	/*
-	 * When CDP is enabled, the CBMs of the highest CLOSID of CDP_CODE and
-	 * CDP_DATA are kept in sync. As a result, the io_alloc CBMs shown for
-	 * either CDP resource are identical and accurately represent the CBMs
-	 * used for I/O.
-	 */
-	show_doms(seq, s, NULL, resctrl_io_alloc_closid(r));
-
-out_unlock:
-	mutex_unlock(&rdtgroup_mutex);
-	cpus_read_unlock();
-	return ret;
-}
-
-static int resctrl_io_alloc_parse_line(char *line,  struct rdt_resource *r,
-				       struct resctrl_schema *s, u32 closid)
-{
-	enum resctrl_conf_type peer_type;
-	struct rdt_parse_data data;
-	struct rdt_domain *d;
-	char *dom = NULL, *id;
-	unsigned long dom_id;
-
-next:
-	if (!line || line[0] == '\0')
-		return 0;
-
-	dom = strsep(&line, ";");
-	id = strsep(&dom, "=");
-	if (!dom || kstrtoul(id, 10, &dom_id)) {
-		rdt_last_cmd_puts("Missing '=' or non-numeric domain\n");
-		return -EINVAL;
-	}
-
-	dom = strim(dom);
-	list_for_each_entry(d, &r->domains, list) {
-		if (d->id == dom_id) {
-			data.buf = dom;
-			data.mode = RDT_MODE_SHAREABLE;
-			data.closid = closid;
-			if (parse_cbm(&data, s, d))
-				return -EINVAL;
-			/*
-			 * Keep io_alloc CLOSID's CBM of CDP_CODE and CDP_DATA
-			 * in sync.
-			 */
-			if (resctrl_arch_get_cdp_enabled(r->rid)) {
-				peer_type = resctrl_peer_type(s->conf_type);
-				memcpy(&d->staged_config[peer_type],
-				       &d->staged_config[s->conf_type],
-				       sizeof(d->staged_config[0]));
-			}
-			goto next;
-		}
-	}
-
-	return -EINVAL;
-}
-
-ssize_t resctrl_io_alloc_cbm_write(struct kernfs_open_file *of, char *buf,
-				   size_t nbytes, loff_t off)
-{
-	struct resctrl_schema *s = of->kn->parent->priv;
-	struct rdt_resource *r = s->res;
-	u32 io_alloc_closid;
-	int ret = 0;
-
-	/* Valid input requires a trailing newline */
-	if (nbytes == 0 || buf[nbytes - 1] != '\n')
-		return -EINVAL;
-
-	buf[nbytes - 1] = '\0';
-
-	cpus_read_lock();
-	mutex_lock(&rdtgroup_mutex);
-	rdt_last_cmd_clear();
-
-	if (!r->cache.io_alloc_capable) {
-		rdt_last_cmd_printf("io_alloc is not supported on %s\n", s->name);
-		ret = -ENODEV;
-		goto out_unlock;
-	}
-
-	if (!resctrl_arch_get_io_alloc_enabled(r)) {
-		rdt_last_cmd_printf("io_alloc is not enabled on %s\n", s->name);
-		ret = -EINVAL;
-		goto out_unlock;
-	}
-
-	io_alloc_closid = resctrl_io_alloc_closid(r);
-
-	rdt_staged_configs_clear();
-	ret = resctrl_io_alloc_parse_line(buf, r, s, io_alloc_closid);
-	if (ret)
-		goto out_clear_configs;
-
-	ret = resctrl_arch_update_domains(r, io_alloc_closid);
-
-out_clear_configs:
-	rdt_staged_configs_clear();
-out_unlock:
-	mutex_unlock(&rdtgroup_mutex);
-	cpus_read_unlock();
-
-	return ret ?: nbytes;
-}
-#endif
diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h
deleted file mode 100644
index 3283448e59559a8137021ea493e22eaebbd7c33a..0000000000000000000000000000000000000000
--- a/fs/resctrl/internal.h
+++ /dev/null
@@ -1,383 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _FS_RESCTRL_INTERNAL_H
-#define _FS_RESCTRL_INTERNAL_H
-
-#include <linux/resctrl.h>
-#include <linux/sched.h>
-#include <linux/kernfs.h>
-#include <linux/fs_context.h>
-#include <linux/jump_label.h>
-#include <linux/tick.h>
-
-#include <asm/resctrl.h>
-
-/* Maximum assignable counters per resctrl group */
-#define MAX_CNTRS			2
-
-#define MON_CNTR_UNSET			U32_MAX
-
-/**
- * cpumask_any_housekeeping() - Choose any CPU in @mask, preferring those that
- *			        aren't marked nohz_full
- * @mask:	The mask to pick a CPU from.
- * @exclude_cpu:The CPU to avoid picking.
- *
- * Returns a CPU from @mask, but not @exclude_cpu. If there are housekeeping
- * CPUs that don't use nohz_full, these are preferred. Pass
- * RESCTRL_PICK_ANY_CPU to avoid excluding any CPUs.
- *
- * When a CPU is excluded, returns >= nr_cpu_ids if no CPUs are available.
- */
-static inline unsigned int
-cpumask_any_housekeeping(const struct cpumask *mask, int exclude_cpu)
-{
-	unsigned int cpu, hk_cpu;
-
-	if (exclude_cpu == RESCTRL_PICK_ANY_CPU)
-		cpu = cpumask_any(mask);
-	else
-		cpu = cpumask_any_but(mask, exclude_cpu);
-
-	/* Only continue if tick_nohz_full_mask has been initialized. */
-	if (!tick_nohz_full_enabled())
-		return cpu;
-
-	/* If the CPU picked isn't marked nohz_full nothing more needs doing. */
-	if (cpu < nr_cpu_ids && !tick_nohz_full_cpu(cpu))
-		return cpu;
-
-	/* Try to find a CPU that isn't nohz_full to use in preference */
-	hk_cpu = cpumask_nth_andnot(0, mask, tick_nohz_full_mask);
-	if (hk_cpu == exclude_cpu)
-		hk_cpu = cpumask_nth_andnot(1, mask, tick_nohz_full_mask);
-
-	if (hk_cpu < nr_cpu_ids)
-		cpu = hk_cpu;
-
-	return cpu;
-}
-
-struct rdt_fs_context {
-	struct kernfs_fs_context	kfc;
-	bool				enable_cdpl2;
-	bool				enable_cdpl3;
-	bool				enable_mba_mbps;
-	bool				enable_debug;
-	bool				enable_hwdrc_mb;
-};
-
-static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc)
-{
-	struct kernfs_fs_context *kfc = fc->fs_private;
-
-	return container_of(kfc, struct rdt_fs_context, kfc);
-}
-
-/**
- * struct mon_evt - Entry in the event list of a resource
- * @evtid:		event id
- * @name:		name of the event
- * @configurable:	true if the event is configurable
- * @list:		entry in &rdt_resource->mon.evt_list
- */
-struct mon_evt {
-	enum resctrl_event_id	evtid;
-	char			*name;
-	bool			configurable;
-	struct list_head	list;
-};
-
-/**
- * union mon_data_bits - Monitoring details for each event file
- * @priv:              Used to store monitoring event data in @u
- *                     as kernfs private data
- * @rid:               Resource id associated with the event file
- * @evtid:             Event id associated with the event file
- * @domid:             The domain to which the event file belongs
- * @u:                 Name of the bit fields struct
- */
-union mon_data_bits {
-	void *priv;
-	struct {
-		unsigned int rid		: 10;
-		enum resctrl_event_id evtid	: 8;
-		unsigned int domid		: 14;
-	} u;
-};
-
-struct rmid_read {
-	struct rdtgroup		*rgrp;
-	struct rdt_resource	*r;
-	struct rdt_domain	*d;
-	enum resctrl_event_id	evtid;
-	bool			first;
-	int			err;
-	u64			val;
-	void			*arch_mon_ctx;
-};
-
-extern struct list_head resctrl_schema_all;
-extern bool resctrl_mounted;
-
-enum rdt_group_type {
-	RDTCTRL_GROUP = 0,
-	RDTMON_GROUP,
-	RDT_NUM_GROUP,
-};
-
-/**
- * enum rdtgrp_mode - Mode of a RDT resource group
- * @RDT_MODE_SHAREABLE: This resource group allows sharing of its allocations
- * @RDT_MODE_EXCLUSIVE: No sharing of this resource group's allocations allowed
- * @RDT_MODE_PSEUDO_LOCKSETUP: Resource group will be used for Pseudo-Locking
- * @RDT_MODE_PSEUDO_LOCKED: No sharing of this resource group's allocations
- *                          allowed AND the allocations are Cache Pseudo-Locked
- * @RDT_NUM_MODES: Total number of modes
- *
- * The mode of a resource group enables control over the allowed overlap
- * between allocations associated with different resource groups (classes
- * of service). User is able to modify the mode of a resource group by
- * writing to the "mode" resctrl file associated with the resource group.
- *
- * The "shareable", "exclusive", and "pseudo-locksetup" modes are set by
- * writing the appropriate text to the "mode" file. A resource group enters
- * "pseudo-locked" mode after the schemata is written while the resource
- * group is in "pseudo-locksetup" mode.
- */
-enum rdtgrp_mode {
-	RDT_MODE_SHAREABLE = 0,
-	RDT_MODE_EXCLUSIVE,
-	RDT_MODE_PSEUDO_LOCKSETUP,
-	RDT_MODE_PSEUDO_LOCKED,
-
-	/* Must be last */
-	RDT_NUM_MODES,
-};
-
-/**
- * struct mongroup - store mon group's data in resctrl fs.
- * @mon_data_kn:		kernfs node for the mon_data directory
- * @parent:			parent rdtgrp
- * @crdtgrp_list:		child rdtgroup node list
- * @rmid:			rmid for this rdtgroup
- * @cntr_id:			Counter ids for assignment
- */
-struct mongroup {
-	struct kernfs_node	*mon_data_kn;
-	struct rdtgroup		*parent;
-	struct list_head	crdtgrp_list;
-	u32			rmid;
-	u32			cntr_id[MAX_CNTRS];
-};
-
-/**
- * struct rdtgroup - store rdtgroup's data in resctrl file system.
- * @kn:				kernfs node
- * @rdtgroup_list:		linked list for all rdtgroups
- * @closid:			closid for this rdtgroup
- * @cpu_mask:			CPUs assigned to this rdtgroup
- * @flags:			status bits
- * @waitcount:			how many cpus expect to find this
- *				group when they acquire rdtgroup_mutex
- * @type:			indicates type of this rdtgroup - either
- *				monitor only or ctrl_mon group
- * @mon:			mongroup related data
- * @mode:			mode of resource group
- * @plr:			pseudo-locked region
- */
-struct rdtgroup {
-	struct kernfs_node		*kn;
-	struct list_head		rdtgroup_list;
-	u32				closid;
-	struct cpumask			cpu_mask;
-	int				flags;
-	atomic_t			waitcount;
-	enum rdt_group_type		type;
-	struct mongroup			mon;
-	enum rdtgrp_mode		mode;
-	struct pseudo_lock_region	*plr;
-};
-
-/* List of all resource groups */
-extern struct list_head rdt_all_groups;
-
-extern int max_name_width, max_data_width;
-
-/**
- * struct rftype - describe each file in the resctrl file system
- * @name:	File name
- * @mode:	Access mode
- * @kf_ops:	File operations
- * @flags:	File specific RFTYPE_FLAGS_* flags
- * @fflags:	File specific RFTYPE_* flags
- * @seq_show:	Show content of the file
- * @write:	Write to the file
- */
-struct rftype {
-	char			*name;
-	umode_t			mode;
-	const struct kernfs_ops	*kf_ops;
-	unsigned long		flags;
-	unsigned long		fflags;
-
-	int (*seq_show)(struct kernfs_open_file *of,
-			struct seq_file *sf, void *v);
-	/*
-	 * write() is the generic write callback which maps directly to
-	 * kernfs write operation and overrides all other operations.
-	 * Maximum write size is determined by ->max_write_len.
-	 */
-	ssize_t (*write)(struct kernfs_open_file *of,
-			 char *buf, size_t nbytes, loff_t off);
-};
-
-/**
- * struct mbm_state - status for each MBM counter in each domain
- * @prev_bw_bytes: Previous bytes value read for bandwidth calculation
- * @prev_bw:	The most recent bandwidth in MBps
- */
-struct mbm_state {
-	u64	prev_bw_bytes;
-	u32	prev_bw;
-};
-
-static inline  bool is_mba_sc(struct rdt_resource *r)
-{
-	if (!r)
-		r = resctrl_arch_get_resource(RDT_RESOURCE_MBA);
-
-	/*
-	 * The software controller support is only applicable to MBA resource.
-	 * Make sure to check for resource type.
-	 */
-	if (r->rid != RDT_RESOURCE_MBA)
-		return false;
-
-	return r->membw.mba_sc;
-}
-
-static inline bool is_hwdrc_enabled(struct rdt_resource *r)
-{
-	if (!r)
-		r = resctrl_arch_get_resource(RDT_RESOURCE_MBA);
-
-	return r->membw.hwdrc_mb;
-}
-
-extern struct mutex rdtgroup_mutex;
-extern struct rdtgroup rdtgroup_default;
-extern struct dentry *debugfs_resctrl;
-
-void rdt_last_cmd_clear(void);
-void rdt_last_cmd_puts(const char *s);
-__printf(1, 2)
-void rdt_last_cmd_printf(const char *fmt, ...);
-
-struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn);
-void rdtgroup_kn_unlock(struct kernfs_node *kn);
-int rdtgroup_kn_mode_restrict(struct rdtgroup *r, const char *name);
-int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name,
-			     umode_t mask);
-ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
-				char *buf, size_t nbytes, loff_t off);
-int rdtgroup_schemata_show(struct kernfs_open_file *of,
-			   struct seq_file *s, void *v);
-bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_domain *d,
-			   unsigned long cbm, int closid, bool exclusive);
-unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, struct rdt_domain *d,
-				  unsigned long cbm);
-enum rdtgrp_mode rdtgroup_mode_by_closid(int closid);
-int rdtgroup_tasks_assigned(struct rdtgroup *r);
-int closids_supported(void);
-void closid_free(int closid);
-int alloc_rmid(u32 closid);
-void free_rmid(u32 closid, u32 rmid);
-void resctrl_mon_resource_exit(void);
-void mon_event_count(void *info);
-int rdtgroup_mondata_show(struct seq_file *m, void *arg);
-void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
-		    struct rdt_domain *d, struct rdtgroup *rdtgrp,
-		    int evtid, int first);
-int resctrl_mon_resource_init(void);
-void mbm_setup_overflow_handler(struct rdt_domain *dom,
-				unsigned long delay_ms,
-				int exclude_cpu);
-void mbm_handle_overflow(struct work_struct *work);
-bool is_mba_sc(struct rdt_resource *r);
-void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms,
-			     int exclude_cpu);
-void cqm_handle_limbo(struct work_struct *work);
-bool has_busy_rmid(struct rdt_domain *d);
-void __check_limbo(struct rdt_domain *d, bool force_free);
-int mbm_cntr_alloc(struct rdt_resource *r);
-void mbm_cntr_free(u32 cntr_id);
-void rdt_staged_configs_clear(void);
-bool closid_allocated(unsigned int closid);
-
-bool closid_alloc_fixed(u32 closid);
-
-int resctrl_find_cleanest_closid(void);
-unsigned int mon_event_config_index_get(u32 evtid);
-int rdtgroup_assign_cntr(struct rdtgroup *rdtgrp, enum resctrl_event_id evtid);
-int rdtgroup_alloc_cntr(struct rdtgroup *rdtgrp, int index);
-int rdtgroup_unassign_cntr(struct rdtgroup *rdtgrp, enum resctrl_event_id evtid);
-void rdtgroup_free_cntr(struct rdt_resource *r, struct rdtgroup *rdtgrp, int index);
-
-#ifdef CONFIG_X86
-int resctrl_io_alloc_show(struct kernfs_open_file *of, struct seq_file *seq, void *v);
-
-int rdtgroup_init_cat(struct resctrl_schema *s, u32 closid);
-
-enum resctrl_conf_type resctrl_peer_type(enum resctrl_conf_type my_type);
-
-ssize_t resctrl_io_alloc_write(struct kernfs_open_file *of, char *buf,
-			       size_t nbytes, loff_t off);
-
-const char *rdtgroup_name_by_closid(u32 closid);
-int resctrl_io_alloc_cbm_show(struct kernfs_open_file *of, struct seq_file *seq,
-			      void *v);
-ssize_t resctrl_io_alloc_cbm_write(struct kernfs_open_file *of, char *buf,
-				   size_t nbytes, loff_t off);
-#endif
-
-#ifdef CONFIG_RESCTRL_FS_PSEUDO_LOCK
-int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp);
-int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp);
-bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm);
-bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d);
-int rdt_pseudo_lock_init(void);
-void rdt_pseudo_lock_release(void);
-int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp);
-void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp);
-#else
-static inline int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp)
-{
-	return -EOPNOTSUPP;
-}
-
-static inline int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp)
-{
-	return -EOPNOTSUPP;
-}
-
-static inline bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm)
-{
-	return false;
-}
-
-static inline bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d)
-{
-	return false;
-}
-
-static inline int rdt_pseudo_lock_init(void) { return 0; }
-static inline void rdt_pseudo_lock_release(void) { }
-static inline int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp)
-{
-	return -EOPNOTSUPP;
-}
-
-static inline void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp) { }
-#endif /* CONFIG_RESCTRL_FS_PSEUDO_LOCK */
-
-#endif /* _FS_RESCTRL_INTERNAL_H */
diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c
deleted file mode 100644
index b6bb310bb10bdc6b7eeb3cb652d4847b72065e68..0000000000000000000000000000000000000000
--- a/fs/resctrl/monitor.c
+++ /dev/null
@@ -1,875 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Resource Director Technology(RDT)
- * - Monitoring code
- *
- * Copyright (C) 2017 Intel Corporation
- *
- * Author:
- *    Vikas Shivappa <vikas.shivappa@intel.com>
- *
- * This replaces the cqm.c based on perf but we reuse a lot of
- * code and datastructures originally from Peter Zijlstra and Matt Fleming.
- *
- * More information about RDT be found in the Intel (R) x86 Architecture
- * Software Developer Manual June 2016, volume 3, section 17.17.
- */
-
-#include <linux/cpu.h>
-#include <linux/module.h>
-#include <linux/sizes.h>
-#include <linux/slab.h>
-#include "internal.h"
-
-/*
- * struct rmid_entry - dirty tracking for all RMID.
- * @closid:	The CLOSID for this entry.
- * @rmid:	The RMID for this entry.
- * @busy:	The number of domains with cached data using this RMID.
- * @list:	Member of the rmid_free_lru list when busy == 0.
- *
- * Depending on the architecture the correct monitor is accessed using
- * both @closid and @rmid, or @rmid only.
- *
- * Take the rdtgroup_mutex when accessing.
- */
-struct rmid_entry {
-	u32				closid;
-	u32				rmid;
-	int				busy;
-	struct list_head		list;
-};
-
-/*
- * @rmid_free_lru - A least recently used list of free RMIDs
- *     These RMIDs are guaranteed to have an occupancy less than the
- *     threshold occupancy
- */
-static LIST_HEAD(rmid_free_lru);
-
-/*
- * @closid_num_dirty_rmid    The number of dirty RMID each CLOSID has.
- *     Only allocated when CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID is defined.
- *     Indexed by CLOSID. Protected by rdtgroup_mutex.
- */
-static u32 *closid_num_dirty_rmid;
-
-/*
- * @rmid_limbo_count - count of currently unused but (potentially)
- *     dirty RMIDs.
- *     This counts RMIDs that no one is currently using but that
- *     may have a occupancy value > resctrl_rmid_realloc_threshold. User can
- *     change the threshold occupancy value.
- */
-static unsigned int rmid_limbo_count;
-
-/*
- * @rmid_entry - The entry in the limbo and free lists.
- */
-static struct rmid_entry	*rmid_ptrs;
-
-/*
- * This is the threshold cache occupancy in bytes at which we will consider an
- * RMID available for re-allocation.
- */
-unsigned int resctrl_rmid_realloc_threshold;
-
-/*
- * This is the maximum value for the reallocation threshold, in bytes.
- */
-unsigned int resctrl_rmid_realloc_limit;
-
-/*
- * x86 and arm64 differ in their handling of monitoring.
- * x86's RMID are independent numbers, there is only one source of traffic
- * with an RMID value of '1'.
- * arm64's PMG extends the PARTID/CLOSID space, there are multiple sources of
- * traffic with a PMG value of '1', one for each CLOSID, meaning the RMID
- * value is no longer unique.
- * To account for this, resctrl uses an index. On x86 this is just the RMID,
- * on arm64 it encodes the CLOSID and RMID. This gives a unique number.
- *
- * The domain's rmid_busy_llc and rmid_ptrs[] are sized by index. The arch code
- * must accept an attempt to read every index.
- */
-static inline struct rmid_entry *__rmid_entry(u32 idx)
-{
-	struct rmid_entry *entry;
-	u32 closid, rmid;
-
-	entry = &rmid_ptrs[idx];
-	resctrl_arch_rmid_idx_decode(idx, &closid, &rmid);
-
-	WARN_ON_ONCE(entry->closid != closid);
-	WARN_ON_ONCE(entry->rmid != rmid);
-
-	return entry;
-}
-
-static void limbo_release_entry(struct rmid_entry *entry)
-{
-	lockdep_assert_held(&rdtgroup_mutex);
-
-	rmid_limbo_count--;
-	list_add_tail(&entry->list, &rmid_free_lru);
-
-	if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID))
-		closid_num_dirty_rmid[entry->closid]--;
-}
-
-/*
- * Check the RMIDs that are marked as busy for this domain. If the
- * reported LLC occupancy is below the threshold clear the busy bit and
- * decrement the count. If the busy count gets to zero on an RMID, we
- * free the RMID
- */
-void __check_limbo(struct rdt_domain *d, bool force_free)
-{
-	struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
-	u32 idx_limit = resctrl_arch_system_num_rmid_idx();
-	struct rmid_entry *entry;
-	u32 idx, cur_idx = 1;
-	void *arch_mon_ctx;
-	bool rmid_dirty;
-	u64 val = 0;
-
-	arch_mon_ctx = resctrl_arch_mon_ctx_alloc(r, QOS_L3_OCCUP_EVENT_ID);
-	if (IS_ERR(arch_mon_ctx)) {
-		pr_warn_ratelimited("Failed to allocate monitor context: %ld",
-				    PTR_ERR(arch_mon_ctx));
-		return;
-	}
-
-	/*
-	 * Skip RMID 0 and start from RMID 1 and check all the RMIDs that
-	 * are marked as busy for occupancy < threshold. If the occupancy
-	 * is less than the threshold decrement the busy counter of the
-	 * RMID and move it to the free list when the counter reaches 0.
-	 */
-	for (;;) {
-		idx = find_next_bit(d->rmid_busy_llc, idx_limit, cur_idx);
-		if (idx >= idx_limit)
-			break;
-
-		entry = __rmid_entry(idx);
-		if (resctrl_arch_rmid_read(r, d, entry->closid, entry->rmid,
-					   QOS_L3_OCCUP_EVENT_ID, &val,
-					   arch_mon_ctx)) {
-			rmid_dirty = true;
-		} else {
-			rmid_dirty = (val >= resctrl_rmid_realloc_threshold);
-		}
-
-		if (force_free || !rmid_dirty) {
-			clear_bit(idx, d->rmid_busy_llc);
-			if (!--entry->busy)
-				limbo_release_entry(entry);
-		}
-		cur_idx = idx + 1;
-	}
-
-	resctrl_arch_mon_ctx_free(r, QOS_L3_OCCUP_EVENT_ID, arch_mon_ctx);
-}
-
-bool has_busy_rmid(struct rdt_domain *d)
-{
-	u32 idx_limit = resctrl_arch_system_num_rmid_idx();
-
-	return find_first_bit(d->rmid_busy_llc, idx_limit) != idx_limit;
-}
-
-static struct rmid_entry *resctrl_find_free_rmid(u32 closid)
-{
-	struct rmid_entry *itr;
-	u32 itr_idx, cmp_idx;
-
-	if (list_empty(&rmid_free_lru))
-		return rmid_limbo_count ? ERR_PTR(-EBUSY) : ERR_PTR(-ENOSPC);
-
-	list_for_each_entry(itr, &rmid_free_lru, list) {
-		/*
-		 * Get the index of this free RMID, and the index it would need
-		 * to be if it were used with this CLOSID.
-		 * If the CLOSID is irrelevant on this architecture, the two
-		 * index values are always the same on every entry and thus the
-		 * very first entry will be returned.
-		 */
-		itr_idx = resctrl_arch_rmid_idx_encode(itr->closid, itr->rmid);
-		cmp_idx = resctrl_arch_rmid_idx_encode(closid, itr->rmid);
-
-		if (itr_idx == cmp_idx)
-			return itr;
-	}
-
-	return ERR_PTR(-ENOSPC);
-}
-
-/**
- * resctrl_find_cleanest_closid() - Find a CLOSID where all the associated
- *                                  RMID are clean, or the CLOSID that has
- *                                  the most clean RMID.
- *
- * MPAM's equivalent of RMID are per-CLOSID, meaning a freshly allocated CLOSID
- * may not be able to allocate clean RMID. To avoid this the allocator will
- * choose the CLOSID with the most clean RMID.
- *
- * When the CLOSID and RMID are independent numbers, the first free CLOSID will
- * be returned.
- */
-int resctrl_find_cleanest_closid(void)
-{
-	u32 cleanest_closid = ~0;
-	int i = 0;
-
-	lockdep_assert_held(&rdtgroup_mutex);
-
-	if (!IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID))
-		return -EIO;
-
-	for (i = 0; i < closids_supported(); i++) {
-		int num_dirty;
-
-		if (closid_allocated(i))
-			continue;
-
-		num_dirty = closid_num_dirty_rmid[i];
-		if (num_dirty == 0)
-			return i;
-
-		if (cleanest_closid == ~0)
-			cleanest_closid = i;
-
-		if (num_dirty < closid_num_dirty_rmid[cleanest_closid])
-			cleanest_closid = i;
-	}
-
-	if (cleanest_closid == ~0)
-		return -ENOSPC;
-
-	return cleanest_closid;
-}
-
-/*
- * For MPAM the RMID value is not unique, and has to be considered with
- * the CLOSID. The (CLOSID, RMID) pair is allocated on all domains, which
- * allows all domains to be managed by a single free list.
- * Each domain also has a rmid_busy_llc to reduce the work of the limbo handler.
- */
-int alloc_rmid(u32 closid)
-{
-	struct rmid_entry *entry;
-
-	lockdep_assert_held(&rdtgroup_mutex);
-
-	entry = resctrl_find_free_rmid(closid);
-	if (IS_ERR(entry))
-		return PTR_ERR(entry);
-
-	list_del(&entry->list);
-	return entry->rmid;
-}
-
-static void add_rmid_to_limbo(struct rmid_entry *entry)
-{
-	struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
-	struct rdt_domain *d;
-	u32 idx;
-
-	lockdep_assert_held(&rdtgroup_mutex);
-
-	/* Walking r->domains, ensure it can't race with cpuhp */
-	lockdep_assert_cpus_held();
-
-	idx = resctrl_arch_rmid_idx_encode(entry->closid, entry->rmid);
-
-	entry->busy = 0;
-	list_for_each_entry(d, &r->domains, list) {
-		/*
-		 * For the first limbo RMID in the domain,
-		 * setup up the limbo worker.
-		 */
-		if (!has_busy_rmid(d))
-			cqm_setup_limbo_handler(d, CQM_LIMBOCHECK_INTERVAL,
-						RESCTRL_PICK_ANY_CPU);
-		set_bit(idx, d->rmid_busy_llc);
-		entry->busy++;
-	}
-
-	rmid_limbo_count++;
-	if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID))
-		closid_num_dirty_rmid[entry->closid]++;
-}
-
-void free_rmid(u32 closid, u32 rmid)
-{
-	u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
-	struct rmid_entry *entry;
-
-	lockdep_assert_held(&rdtgroup_mutex);
-
-	/*
-	 * Do not allow the default rmid to be free'd. Comparing by index
-	 * allows architectures that ignore the closid parameter to avoid an
-	 * unnecessary check.
-	 */
-	if (!resctrl_arch_mon_capable() ||
-	    idx == resctrl_arch_rmid_idx_encode(RESCTRL_RESERVED_CLOSID,
-						RESCTRL_RESERVED_RMID))
-		return;
-
-	entry = __rmid_entry(idx);
-
-	if (resctrl_arch_is_llc_occupancy_enabled())
-		add_rmid_to_limbo(entry);
-	else
-		list_add_tail(&entry->list, &rmid_free_lru);
-}
-
-static struct mbm_state *get_mbm_state(struct rdt_domain *d, u32 closid,
-				       u32 rmid, enum resctrl_event_id evtid)
-{
-	u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
-
-	switch (evtid) {
-	case QOS_L3_MBM_TOTAL_EVENT_ID:
-		return &d->mbm_total[idx];
-	case QOS_L3_MBM_LOCAL_EVENT_ID:
-		return &d->mbm_local[idx];
-	default:
-		return NULL;
-	}
-}
-
-static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
-{
-	struct mbm_state *m;
-	u64 tval = 0;
-
-	if (rr->first) {
-		resctrl_arch_reset_rmid(rr->r, rr->d, closid, rmid, rr->evtid);
-		m = get_mbm_state(rr->d, closid, rmid, rr->evtid);
-		if (m)
-			memset(m, 0, sizeof(struct mbm_state));
-		return 0;
-	}
-
-	rr->err = resctrl_arch_rmid_read(rr->r, rr->d, closid, rmid, rr->evtid,
-					 &tval, rr->arch_mon_ctx);
-	if (rr->err)
-		return rr->err;
-
-	rr->val += tval;
-
-	return 0;
-}
-
-/*
- * mbm_bw_count() - Update bw count from values previously read by
- *		    __mon_event_count().
- * @closid:	The closid used to identify the cached mbm_state.
- * @rmid:	The rmid used to identify the cached mbm_state.
- * @rr:		The struct rmid_read populated by __mon_event_count().
- *
- * Supporting function to calculate the memory bandwidth
- * and delta bandwidth in MBps. The chunks value previously read by
- * __mon_event_count() is compared with the chunks value from the previous
- * invocation. This must be called once per second to maintain values in MBps.
- */
-static void mbm_bw_count(u32 closid, u32 rmid, struct rmid_read *rr)
-{
-	u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
-	struct mbm_state *m = &rr->d->mbm_local[idx];
-	u64 cur_bw, bytes, cur_bytes;
-
-	cur_bytes = rr->val;
-	bytes = cur_bytes - m->prev_bw_bytes;
-	m->prev_bw_bytes = cur_bytes;
-
-	cur_bw = bytes / SZ_1M;
-
-	m->prev_bw = cur_bw;
-}
-
-/*
- * This is scheduled by mon_event_read() to read the CQM/MBM counters
- * on a domain.
- */
-void mon_event_count(void *info)
-{
-	struct rdtgroup *rdtgrp, *entry;
-	struct rmid_read *rr = info;
-	struct list_head *head;
-	int ret;
-
-	rdtgrp = rr->rgrp;
-
-	ret = __mon_event_count(rdtgrp->closid, rdtgrp->mon.rmid, rr);
-
-	/*
-	 * For Ctrl groups read data from child monitor groups and
-	 * add them together. Count events which are read successfully.
-	 * Discard the rmid_read's reporting errors.
-	 */
-	head = &rdtgrp->mon.crdtgrp_list;
-
-	if (rdtgrp->type == RDTCTRL_GROUP) {
-		list_for_each_entry(entry, head, mon.crdtgrp_list) {
-			if (__mon_event_count(entry->closid, entry->mon.rmid,
-					      rr) == 0)
-				ret = 0;
-		}
-	}
-
-	/*
-	 * __mon_event_count() calls for newly created monitor groups may
-	 * report -EINVAL/Unavailable if the monitor hasn't seen any traffic.
-	 * Discard error if any of the monitor event reads succeeded.
-	 */
-	if (ret == 0)
-		rr->err = 0;
-}
-
-/*
- * Feedback loop for MBA software controller (mba_sc)
- *
- * mba_sc is a feedback loop where we periodically read MBM counters and
- * adjust the bandwidth percentage values via the IA32_MBA_THRTL_MSRs so
- * that:
- *
- *   current bandwidth(cur_bw) < user specified bandwidth(user_bw)
- *
- * This uses the MBM counters to measure the bandwidth and MBA throttle
- * MSRs to control the bandwidth for a particular rdtgrp. It builds on the
- * fact that resctrl rdtgroups have both monitoring and control.
- *
- * The frequency of the checks is 1s and we just tag along the MBM overflow
- * timer. Having 1s interval makes the calculation of bandwidth simpler.
- *
- * Although MBA's goal is to restrict the bandwidth to a maximum, there may
- * be a need to increase the bandwidth to avoid unnecessarily restricting
- * the L2 <-> L3 traffic.
- *
- * Since MBA controls the L2 external bandwidth where as MBM measures the
- * L3 external bandwidth the following sequence could lead to such a
- * situation.
- *
- * Consider an rdtgroup which had high L3 <-> memory traffic in initial
- * phases -> mba_sc kicks in and reduced bandwidth percentage values -> but
- * after some time rdtgroup has mostly L2 <-> L3 traffic.
- *
- * In this case we may restrict the rdtgroup's L2 <-> L3 traffic as its
- * throttle MSRs already have low percentage values.  To avoid
- * unnecessarily restricting such rdtgroups, we also increase the bandwidth.
- */
-static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
-{
-	u32 closid, rmid, cur_msr_val, new_msr_val;
-	struct mbm_state *pmbm_data, *cmbm_data;
-	struct rdt_resource *r_mba;
-	struct rdt_domain *dom_mba;
-	u32 cur_bw, user_bw, idx;
-	struct list_head *head;
-	struct rdtgroup *entry;
-
-	if (!resctrl_arch_is_mbm_local_enabled())
-		return;
-
-	r_mba = resctrl_arch_get_resource(RDT_RESOURCE_MBA);
-
-	closid = rgrp->closid;
-	rmid = rgrp->mon.rmid;
-	idx = resctrl_arch_rmid_idx_encode(closid, rmid);
-	pmbm_data = &dom_mbm->mbm_local[idx];
-
-	dom_mba = resctrl_get_domain_from_cpu(smp_processor_id(), r_mba);
-	if (!dom_mba) {
-		pr_warn_once("Failure to get domain for MBA update\n");
-		return;
-	}
-
-	cur_bw = pmbm_data->prev_bw;
-	user_bw = dom_mba->mbps_val[closid];
-
-	/* MBA resource doesn't support CDP */
-	cur_msr_val = resctrl_arch_get_config(r_mba, dom_mba, closid, CDP_NONE);
-
-	/*
-	 * For Ctrl groups read data from child monitor groups.
-	 */
-	head = &rgrp->mon.crdtgrp_list;
-	list_for_each_entry(entry, head, mon.crdtgrp_list) {
-		cmbm_data = &dom_mbm->mbm_local[entry->mon.rmid];
-		cur_bw += cmbm_data->prev_bw;
-	}
-
-	/*
-	 * Scale up/down the bandwidth linearly for the ctrl group.  The
-	 * bandwidth step is the bandwidth granularity specified by the
-	 * hardware.
-	 * Always increase throttling if current bandwidth is above the
-	 * target set by user.
-	 * But avoid thrashing up and down on every poll by checking
-	 * whether a decrease in throttling is likely to push the group
-	 * back over target. E.g. if currently throttling to 30% of bandwidth
-	 * on a system with 10% granularity steps, check whether moving to
-	 * 40% would go past the limit by multiplying current bandwidth by
-	 * "(30 + 10) / 30".
-	 */
-	if (cur_msr_val > r_mba->membw.min_bw && user_bw < cur_bw) {
-		new_msr_val = cur_msr_val - r_mba->membw.bw_gran;
-	} else if (cur_msr_val < MAX_MBA_BW &&
-		   (user_bw > (cur_bw * (cur_msr_val + r_mba->membw.min_bw) / cur_msr_val))) {
-		new_msr_val = cur_msr_val + r_mba->membw.bw_gran;
-	} else {
-		return;
-	}
-
-	resctrl_arch_update_one(r_mba, dom_mba, closid, CDP_NONE, new_msr_val);
-}
-
-static void mbm_update(struct rdt_resource *r, struct rdt_domain *d,
-		       u32 closid, u32 rmid)
-{
-	struct rmid_read rr;
-
-	rr.first = false;
-	rr.r = r;
-	rr.d = d;
-
-	/*
-	 * This is protected from concurrent reads from user
-	 * as both the user and we hold the global mutex.
-	 */
-	if (resctrl_arch_is_mbm_total_enabled()) {
-		rr.evtid = QOS_L3_MBM_TOTAL_EVENT_ID;
-		rr.val = 0;
-		rr.arch_mon_ctx = resctrl_arch_mon_ctx_alloc(rr.r, rr.evtid);
-		if (IS_ERR(rr.arch_mon_ctx)) {
-			pr_warn_ratelimited("Failed to allocate monitor context: %ld",
-					    PTR_ERR(rr.arch_mon_ctx));
-			return;
-		}
-
-		__mon_event_count(closid, rmid, &rr);
-
-		resctrl_arch_mon_ctx_free(rr.r, rr.evtid, rr.arch_mon_ctx);
-	}
-	if (resctrl_arch_is_mbm_local_enabled()) {
-		rr.evtid = QOS_L3_MBM_LOCAL_EVENT_ID;
-		rr.val = 0;
-		rr.arch_mon_ctx = resctrl_arch_mon_ctx_alloc(rr.r, rr.evtid);
-		if (IS_ERR(rr.arch_mon_ctx)) {
-			pr_warn_ratelimited("Failed to allocate monitor context: %ld",
-					    PTR_ERR(rr.arch_mon_ctx));
-			return;
-		}
-
-		__mon_event_count(closid, rmid, &rr);
-
-		/*
-		 * Call the MBA software controller only for the
-		 * control groups and when user has enabled
-		 * the software controller explicitly.
-		 */
-		if (is_mba_sc(NULL))
-			mbm_bw_count(closid, rmid, &rr);
-
-		resctrl_arch_mon_ctx_free(rr.r, rr.evtid, rr.arch_mon_ctx);
-	}
-}
-
-/*
- * Handler to scan the limbo list and move the RMIDs
- * to free list whose occupancy < threshold_occupancy.
- */
-void cqm_handle_limbo(struct work_struct *work)
-{
-	unsigned long delay = msecs_to_jiffies(CQM_LIMBOCHECK_INTERVAL);
-	struct rdt_domain *d;
-
-	cpus_read_lock();
-	mutex_lock(&rdtgroup_mutex);
-
-	d = container_of(work, struct rdt_domain, cqm_limbo.work);
-
-	__check_limbo(d, false);
-
-	if (has_busy_rmid(d)) {
-		d->cqm_work_cpu = cpumask_any_housekeeping(&d->cpu_mask,
-							   RESCTRL_PICK_ANY_CPU);
-		schedule_delayed_work_on(d->cqm_work_cpu, &d->cqm_limbo,
-					 delay);
-	}
-
-	mutex_unlock(&rdtgroup_mutex);
-	cpus_read_unlock();
-}
-
-/**
- * cqm_setup_limbo_handler() - Schedule the limbo handler to run for this
- *                             domain.
- * @dom:           The domain the limbo handler should run for.
- * @delay_ms:      How far in the future the handler should run.
- * @exclude_cpu:   Which CPU the handler should not run on,
- *		   RESCTRL_PICK_ANY_CPU to pick any CPU.
- */
-void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms,
-			     int exclude_cpu)
-{
-	unsigned long delay = msecs_to_jiffies(delay_ms);
-	int cpu;
-
-	cpu = cpumask_any_housekeeping(&dom->cpu_mask, exclude_cpu);
-	dom->cqm_work_cpu = cpu;
-
-	if (cpu < nr_cpu_ids)
-		schedule_delayed_work_on(cpu, &dom->cqm_limbo, delay);
-}
-
-bool is_rdt_domain_valid(struct rdt_resource *r, struct rdt_domain *d)
-{
-	int i;
-
-	for (i = 0; i < NR_CPUS; i++)
-		if (r->rdt_domain_list[i] == d)
-			return true;
-	return false;
-}
-
-void mbm_handle_overflow(struct work_struct *work)
-{
-	unsigned long delay = msecs_to_jiffies(MBM_OVERFLOW_INTERVAL);
-	struct rdtgroup *prgrp, *crgrp;
-	struct list_head *head;
-	struct rdt_resource *r;
-	struct rdt_domain *d;
-
-	cpus_read_lock();
-	mutex_lock(&rdtgroup_mutex);
-
-	/*
-	 * If the filesystem has been unmounted this work no longer needs to
-	 * run.
-	 */
-	if (!resctrl_mounted || !resctrl_arch_mon_capable())
-		goto out_unlock;
-
-	r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
-	d = container_of(work, struct rdt_domain, mbm_over.work);
-
-	if (!is_rdt_domain_valid(r, d))
-		goto out_unlock;
-
-	list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
-		mbm_update(r, d, prgrp->closid, prgrp->mon.rmid);
-
-		head = &prgrp->mon.crdtgrp_list;
-		list_for_each_entry(crgrp, head, mon.crdtgrp_list)
-			mbm_update(r, d, crgrp->closid, crgrp->mon.rmid);
-
-		if (is_mba_sc(NULL))
-			update_mba_bw(prgrp, d);
-	}
-
-	/*
-	 * Re-check for housekeeping CPUs. This allows the overflow handler to
-	 * move off a nohz_full CPU quickly.
-	 */
-	d->mbm_work_cpu = cpumask_any_housekeeping(&d->cpu_mask,
-						   RESCTRL_PICK_ANY_CPU);
-	schedule_delayed_work_on(d->mbm_work_cpu, &d->mbm_over, delay);
-
-out_unlock:
-	mutex_unlock(&rdtgroup_mutex);
-	cpus_read_unlock();
-}
-
-/**
- * mbm_setup_overflow_handler() - Schedule the overflow handler to run for this
- *                                domain.
- * @dom:           The domain the overflow handler should run for.
- * @delay_ms:      How far in the future the handler should run.
- * @exclude_cpu:   Which CPU the handler should not run on,
- *		   RESCTRL_PICK_ANY_CPU to pick any CPU.
- */
-void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms,
-				int exclude_cpu)
-{
-	unsigned long delay = msecs_to_jiffies(delay_ms);
-	int cpu;
-
-	/*
-	 * When a domain comes online there is no guarantee the filesystem is
-	 * mounted. If not, there is no need to catch counter overflow.
-	 */
-	if (!resctrl_mounted || !resctrl_arch_mon_capable())
-		return;
-	cpu = cpumask_any_housekeeping(&dom->cpu_mask, exclude_cpu);
-	dom->mbm_work_cpu = cpu;
-
-	if (cpu < nr_cpu_ids)
-		schedule_delayed_work_on(cpu, &dom->mbm_over, delay);
-}
-
-static int dom_data_init(struct rdt_resource *r)
-{
-	u32 idx_limit = resctrl_arch_system_num_rmid_idx();
-	u32 num_closid = resctrl_arch_get_num_closid(r);
-	struct rmid_entry *entry = NULL;
-	int err = 0, i;
-	u32 idx;
-
-	mutex_lock(&rdtgroup_mutex);
-	if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) {
-		u32 *tmp;
-
-		/*
-		 * If the architecture hasn't provided a sanitised value here,
-		 * this may result in larger arrays than necessary. Resctrl will
-		 * use a smaller system wide value based on the resources in
-		 * use.
-		 */
-		tmp = kcalloc(num_closid, sizeof(*tmp), GFP_KERNEL);
-		if (!tmp) {
-			err = -ENOMEM;
-			goto out_unlock;
-		}
-
-		closid_num_dirty_rmid = tmp;
-	}
-
-	rmid_ptrs = kcalloc(idx_limit, sizeof(struct rmid_entry), GFP_KERNEL);
-	if (!rmid_ptrs) {
-		if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) {
-			kfree(closid_num_dirty_rmid);
-			closid_num_dirty_rmid = NULL;
-		}
-		err = -ENOMEM;
-		goto out_unlock;
-	}
-
-	for (i = 0; i < idx_limit; i++) {
-		entry = &rmid_ptrs[i];
-		INIT_LIST_HEAD(&entry->list);
-
-		resctrl_arch_rmid_idx_decode(i, &entry->closid, &entry->rmid);
-		list_add_tail(&entry->list, &rmid_free_lru);
-	}
-
-	/*
-	 * RESCTRL_RESERVED_CLOSID and RESCTRL_RESERVED_RMID are special and
-	 * are always allocated. These are used for the rdtgroup_default
-	 * control group, which will be setup later in rdtgroup_init().
-	 */
-	idx = resctrl_arch_rmid_idx_encode(RESCTRL_RESERVED_CLOSID,
-					   RESCTRL_RESERVED_RMID);
-	entry = __rmid_entry(idx);
-	list_del(&entry->list);
-
-out_unlock:
-	mutex_unlock(&rdtgroup_mutex);
-
-	return err;
-}
-
-static void dom_data_exit(struct rdt_resource *r)
-{
-	if (!r->mon_capable)
-		return;
-
-	mutex_lock(&rdtgroup_mutex);
-	if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) {
-		kfree(closid_num_dirty_rmid);
-		closid_num_dirty_rmid = NULL;
-	}
-
-	kfree(rmid_ptrs);
-	rmid_ptrs = NULL;
-
-	mutex_unlock(&rdtgroup_mutex);
-}
-
-static struct mon_evt llc_occupancy_event = {
-	.name		= "llc_occupancy",
-	.evtid		= QOS_L3_OCCUP_EVENT_ID,
-};
-
-static struct mon_evt mbm_total_event = {
-	.name		= "mbm_total_bytes",
-	.evtid		= QOS_L3_MBM_TOTAL_EVENT_ID,
-};
-
-static struct mon_evt mbm_local_event = {
-	.name		= "mbm_local_bytes",
-	.evtid		= QOS_L3_MBM_LOCAL_EVENT_ID,
-};
-
-static struct mon_evt mbm_bps_event = {
-	.name		= "mbm_local_bytes",
-	.evtid		= QOS_MC_MBM_BPS_EVENT_ID,
-};
-
-/*
- * Initialize the event list for the resource.
- *
- * Note that MBM events are also part of RDT_RESOURCE_L3 resource
- * because as per the SDM the total and local memory bandwidth
- * are enumerated as part of L3 monitoring.
- */
-static void l3_mon_evt_init(struct rdt_resource *r)
-{
-	INIT_LIST_HEAD(&r->mon.evt_list);
-
-	if (resctrl_arch_is_llc_occupancy_enabled())
-		list_add_tail(&llc_occupancy_event.list, &r->mon.evt_list);
-	if (resctrl_arch_is_mbm_total_enabled())
-		list_add_tail(&mbm_total_event.list, &r->mon.evt_list);
-	if (resctrl_arch_is_mbm_local_enabled())
-		list_add_tail(&mbm_local_event.list, &r->mon.evt_list);
-}
-
-static void mc_mon_evt_init(struct rdt_resource *r)
-{
-	INIT_LIST_HEAD(&r->mon.evt_list);
-
-	if (resctrl_arch_is_mbm_bps_enabled())
-		list_add_tail(&mbm_bps_event.list, &r->mon.evt_list);
-}
-
-int resctrl_mon_resource_init(void)
-{
-	struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
-	int ret;
-
-	if (!r->mon_capable)
-		return 0;
-
-	ret = dom_data_init(r);
-	if (ret)
-		return ret;
-
-	l3_mon_evt_init(r);
-
-	if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_TOTAL_EVENT_ID)) {
-		mbm_total_event.configurable = true;
-		resctrl_file_fflags_init("mbm_total_bytes_config",
-					 RFTYPE_MON_INFO | RFTYPE_RES_CACHE);
-	}
-	if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_LOCAL_EVENT_ID)) {
-		mbm_local_event.configurable = true;
-		resctrl_file_fflags_init("mbm_local_bytes_config",
-					 RFTYPE_MON_INFO | RFTYPE_RES_CACHE);
-	}
-
-	r = resctrl_arch_get_resource(RDT_RESOURCE_MBA);
-	mc_mon_evt_init(r);
-
-	return 0;
-}
-
-void resctrl_mon_resource_exit(void)
-{
-	struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
-
-	dom_data_exit(r);
-}
diff --git a/fs/resctrl/psuedo_lock.c b/fs/resctrl/psuedo_lock.c
deleted file mode 100644
index 077c2abb6edd90cad1e1e237dd4c1861cf1dc025..0000000000000000000000000000000000000000
--- a/fs/resctrl/psuedo_lock.c
+++ /dev/null
@@ -1,1122 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Resource Director Technology (RDT)
- *
- * Pseudo-locking support built on top of Cache Allocation Technology (CAT)
- *
- * Copyright (C) 2018 Intel Corporation
- *
- * Author: Reinette Chatre <reinette.chatre@intel.com>
- */
-
-#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
-
-#include <linux/cacheinfo.h>
-#include <linux/cpu.h>
-#include <linux/cpumask.h>
-#include <linux/debugfs.h>
-#include <linux/kthread.h>
-#include <linux/mman.h>
-#include <linux/perf_event.h>
-#include <linux/pm_qos.h>
-#include <linux/slab.h>
-#include <linux/uaccess.h>
-
-#include <asm/cacheflush.h>
-#include <asm/resctrl.h>
-#include <asm/perf_event.h>
-
-#include "internal.h"
-
-/*
- * Major number assigned to and shared by all devices exposing
- * pseudo-locked regions.
- */
-static unsigned int pseudo_lock_major;
-static unsigned long pseudo_lock_minor_avail = GENMASK(MINORBITS, 0);
-
-static char *pseudo_lock_devnode(const struct device *dev, umode_t *mode)
-{
-	const struct rdtgroup *rdtgrp;
-
-	rdtgrp = dev_get_drvdata(dev);
-	if (mode)
-		*mode = 0600;
-	return kasprintf(GFP_KERNEL, "pseudo_lock/%s", rdtgrp->kn->name);
-}
-
-static const struct class pseudo_lock_class = {
-	.name = "pseudo_lock",
-	.devnode = pseudo_lock_devnode,
-};
-
-/**
- * pseudo_lock_minor_get - Obtain available minor number
- * @minor: Pointer to where new minor number will be stored
- *
- * A bitmask is used to track available minor numbers. Here the next free
- * minor number is marked as unavailable and returned.
- *
- * Return: 0 on success, <0 on failure.
- */
-static int pseudo_lock_minor_get(unsigned int *minor)
-{
-	unsigned long first_bit;
-
-	first_bit = find_first_bit(&pseudo_lock_minor_avail, MINORBITS);
-
-	if (first_bit == MINORBITS)
-		return -ENOSPC;
-
-	__clear_bit(first_bit, &pseudo_lock_minor_avail);
-	*minor = first_bit;
-
-	return 0;
-}
-
-/**
- * pseudo_lock_minor_release - Return minor number to available
- * @minor: The minor number made available
- */
-static void pseudo_lock_minor_release(unsigned int minor)
-{
-	__set_bit(minor, &pseudo_lock_minor_avail);
-}
-
-/**
- * region_find_by_minor - Locate a pseudo-lock region by inode minor number
- * @minor: The minor number of the device representing pseudo-locked region
- *
- * When the character device is accessed we need to determine which
- * pseudo-locked region it belongs to. This is done by matching the minor
- * number of the device to the pseudo-locked region it belongs.
- *
- * Minor numbers are assigned at the time a pseudo-locked region is associated
- * with a cache instance.
- *
- * Return: On success return pointer to resource group owning the pseudo-locked
- *         region, NULL on failure.
- */
-static struct rdtgroup *region_find_by_minor(unsigned int minor)
-{
-	struct rdtgroup *rdtgrp, *rdtgrp_match = NULL;
-
-	list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
-		if (rdtgrp->plr && rdtgrp->plr->minor == minor) {
-			rdtgrp_match = rdtgrp;
-			break;
-		}
-	}
-	return rdtgrp_match;
-}
-
-/**
- * struct pseudo_lock_pm_req - A power management QoS request list entry
- * @list:	Entry within the @pm_reqs list for a pseudo-locked region
- * @req:	PM QoS request
- */
-struct pseudo_lock_pm_req {
-	struct list_head list;
-	struct dev_pm_qos_request req;
-};
-
-static void pseudo_lock_cstates_relax(struct pseudo_lock_region *plr)
-{
-	struct pseudo_lock_pm_req *pm_req, *next;
-
-	list_for_each_entry_safe(pm_req, next, &plr->pm_reqs, list) {
-		dev_pm_qos_remove_request(&pm_req->req);
-		list_del(&pm_req->list);
-		kfree(pm_req);
-	}
-}
-
-/**
- * pseudo_lock_cstates_constrain - Restrict cores from entering C6
- * @plr: Pseudo-locked region
- *
- * To prevent the cache from being affected by power management entering
- * C6 has to be avoided. This is accomplished by requesting a latency
- * requirement lower than lowest C6 exit latency of all supported
- * platforms as found in the cpuidle state tables in the intel_idle driver.
- * At this time it is possible to do so with a single latency requirement
- * for all supported platforms.
- *
- * Since Goldmont is supported, which is affected by X86_BUG_MONITOR,
- * the ACPI latencies need to be considered while keeping in mind that C2
- * may be set to map to deeper sleep states. In this case the latency
- * requirement needs to prevent entering C2 also.
- *
- * Return: 0 on success, <0 on failure
- */
-static int pseudo_lock_cstates_constrain(struct pseudo_lock_region *plr)
-{
-	struct pseudo_lock_pm_req *pm_req;
-	int cpu;
-	int ret;
-
-	for_each_cpu(cpu, &plr->d->cpu_mask) {
-		pm_req = kzalloc(sizeof(*pm_req), GFP_KERNEL);
-		if (!pm_req) {
-			rdt_last_cmd_puts("Failure to allocate memory for PM QoS\n");
-			ret = -ENOMEM;
-			goto out_err;
-		}
-		ret = dev_pm_qos_add_request(get_cpu_device(cpu),
-					     &pm_req->req,
-					     DEV_PM_QOS_RESUME_LATENCY,
-					     30);
-		if (ret < 0) {
-			rdt_last_cmd_printf("Failed to add latency req CPU%d\n",
-					    cpu);
-			kfree(pm_req);
-			ret = -1;
-			goto out_err;
-		}
-		list_add(&pm_req->list, &plr->pm_reqs);
-	}
-
-	return 0;
-
-out_err:
-	pseudo_lock_cstates_relax(plr);
-	return ret;
-}
-
-/**
- * pseudo_lock_region_clear - Reset pseudo-lock region data
- * @plr: pseudo-lock region
- *
- * All content of the pseudo-locked region is reset - any memory allocated
- * freed.
- *
- * Return: void
- */
-static void pseudo_lock_region_clear(struct pseudo_lock_region *plr)
-{
-	plr->size = 0;
-	plr->line_size = 0;
-	kfree(plr->kmem);
-	plr->kmem = NULL;
-	plr->s = NULL;
-	if (plr->d)
-		plr->d->plr = NULL;
-	plr->d = NULL;
-	plr->cbm = 0;
-	plr->debugfs_dir = NULL;
-}
-
-/**
- * pseudo_lock_region_init - Initialize pseudo-lock region information
- * @plr: pseudo-lock region
- *
- * Called after user provided a schemata to be pseudo-locked. From the
- * schemata the &struct pseudo_lock_region is on entry already initialized
- * with the resource, domain, and capacity bitmask. Here the information
- * required for pseudo-locking is deduced from this data and &struct
- * pseudo_lock_region initialized further. This information includes:
- * - size in bytes of the region to be pseudo-locked
- * - cache line size to know the stride with which data needs to be accessed
- *   to be pseudo-locked
- * - a cpu associated with the cache instance on which the pseudo-locking
- *   flow can be executed
- *
- * Return: 0 on success, <0 on failure. Descriptive error will be written
- * to last_cmd_status buffer.
- */
-static int pseudo_lock_region_init(struct pseudo_lock_region *plr)
-{
-	struct cpu_cacheinfo *ci;
-	int ret;
-	int i;
-
-	/* Pick the first cpu we find that is associated with the cache. */
-	plr->cpu = cpumask_first(&plr->d->cpu_mask);
-
-	if (!cpu_online(plr->cpu)) {
-		rdt_last_cmd_printf("CPU %u associated with cache not online\n",
-				    plr->cpu);
-		ret = -ENODEV;
-		goto out_region;
-	}
-
-	ci = get_cpu_cacheinfo(plr->cpu);
-
-	plr->size = rdtgroup_cbm_to_size(plr->s->res, plr->d, plr->cbm);
-
-	for (i = 0; i < ci->num_leaves; i++) {
-		if (ci->info_list[i].level == plr->s->res->cache_level) {
-			plr->line_size = ci->info_list[i].coherency_line_size;
-			return 0;
-		}
-	}
-
-	ret = -1;
-	rdt_last_cmd_puts("Unable to determine cache line size\n");
-out_region:
-	pseudo_lock_region_clear(plr);
-	return ret;
-}
-
-/**
- * pseudo_lock_init - Initialize a pseudo-lock region
- * @rdtgrp: resource group to which new pseudo-locked region will belong
- *
- * A pseudo-locked region is associated with a resource group. When this
- * association is created the pseudo-locked region is initialized. The
- * details of the pseudo-locked region are not known at this time so only
- * allocation is done and association established.
- *
- * Return: 0 on success, <0 on failure
- */
-static int pseudo_lock_init(struct rdtgroup *rdtgrp)
-{
-	struct pseudo_lock_region *plr;
-
-	plr = kzalloc(sizeof(*plr), GFP_KERNEL);
-	if (!plr)
-		return -ENOMEM;
-
-	init_waitqueue_head(&plr->lock_thread_wq);
-	INIT_LIST_HEAD(&plr->pm_reqs);
-	rdtgrp->plr = plr;
-	return 0;
-}
-
-/**
- * pseudo_lock_region_alloc - Allocate kernel memory that will be pseudo-locked
- * @plr: pseudo-lock region
- *
- * Initialize the details required to set up the pseudo-locked region and
- * allocate the contiguous memory that will be pseudo-locked to the cache.
- *
- * Return: 0 on success, <0 on failure.  Descriptive error will be written
- * to last_cmd_status buffer.
- */
-static int pseudo_lock_region_alloc(struct pseudo_lock_region *plr)
-{
-	int ret;
-
-	ret = pseudo_lock_region_init(plr);
-	if (ret < 0)
-		return ret;
-
-	/*
-	 * We do not yet support contiguous regions larger than
-	 * KMALLOC_MAX_SIZE.
-	 */
-	if (plr->size > KMALLOC_MAX_SIZE) {
-		rdt_last_cmd_puts("Requested region exceeds maximum size\n");
-		ret = -E2BIG;
-		goto out_region;
-	}
-
-	plr->kmem = kzalloc(plr->size, GFP_KERNEL);
-	if (!plr->kmem) {
-		rdt_last_cmd_puts("Unable to allocate memory\n");
-		ret = -ENOMEM;
-		goto out_region;
-	}
-
-	ret = 0;
-	goto out;
-out_region:
-	pseudo_lock_region_clear(plr);
-out:
-	return ret;
-}
-
-/**
- * pseudo_lock_free - Free a pseudo-locked region
- * @rdtgrp: resource group to which pseudo-locked region belonged
- *
- * The pseudo-locked region's resources have already been released, or not
- * yet created at this point. Now it can be freed and disassociated from the
- * resource group.
- *
- * Return: void
- */
-static void pseudo_lock_free(struct rdtgroup *rdtgrp)
-{
-	pseudo_lock_region_clear(rdtgrp->plr);
-	kfree(rdtgrp->plr);
-	rdtgrp->plr = NULL;
-}
-
-/**
- * rdtgroup_monitor_in_progress - Test if monitoring in progress
- * @rdtgrp: resource group being queried
- *
- * Return: 1 if monitor groups have been created for this resource
- * group, 0 otherwise.
- */
-static int rdtgroup_monitor_in_progress(struct rdtgroup *rdtgrp)
-{
-	return !list_empty(&rdtgrp->mon.crdtgrp_list);
-}
-
-/**
- * rdtgroup_locksetup_user_restrict - Restrict user access to group
- * @rdtgrp: resource group needing access restricted
- *
- * A resource group used for cache pseudo-locking cannot have cpus or tasks
- * assigned to it. This is communicated to the user by restricting access
- * to all the files that can be used to make such changes.
- *
- * Permissions restored with rdtgroup_locksetup_user_restore()
- *
- * Return: 0 on success, <0 on failure. If a failure occurs during the
- * restriction of access an attempt will be made to restore permissions but
- * the state of the mode of these files will be uncertain when a failure
- * occurs.
- */
-static int rdtgroup_locksetup_user_restrict(struct rdtgroup *rdtgrp)
-{
-	int ret;
-
-	ret = rdtgroup_kn_mode_restrict(rdtgrp, "tasks");
-	if (ret)
-		return ret;
-
-	ret = rdtgroup_kn_mode_restrict(rdtgrp, "cpus");
-	if (ret)
-		goto err_tasks;
-
-	ret = rdtgroup_kn_mode_restrict(rdtgrp, "cpus_list");
-	if (ret)
-		goto err_cpus;
-
-	if (resctrl_arch_mon_capable()) {
-		ret = rdtgroup_kn_mode_restrict(rdtgrp, "mon_groups");
-		if (ret)
-			goto err_cpus_list;
-	}
-
-	ret = 0;
-	goto out;
-
-err_cpus_list:
-	rdtgroup_kn_mode_restore(rdtgrp, "cpus_list", 0777);
-err_cpus:
-	rdtgroup_kn_mode_restore(rdtgrp, "cpus", 0777);
-err_tasks:
-	rdtgroup_kn_mode_restore(rdtgrp, "tasks", 0777);
-out:
-	return ret;
-}
-
-/**
- * rdtgroup_locksetup_user_restore - Restore user access to group
- * @rdtgrp: resource group needing access restored
- *
- * Restore all file access previously removed using
- * rdtgroup_locksetup_user_restrict()
- *
- * Return: 0 on success, <0 on failure.  If a failure occurs during the
- * restoration of access an attempt will be made to restrict permissions
- * again but the state of the mode of these files will be uncertain when
- * a failure occurs.
- */
-static int rdtgroup_locksetup_user_restore(struct rdtgroup *rdtgrp)
-{
-	int ret;
-
-	ret = rdtgroup_kn_mode_restore(rdtgrp, "tasks", 0777);
-	if (ret)
-		return ret;
-
-	ret = rdtgroup_kn_mode_restore(rdtgrp, "cpus", 0777);
-	if (ret)
-		goto err_tasks;
-
-	ret = rdtgroup_kn_mode_restore(rdtgrp, "cpus_list", 0777);
-	if (ret)
-		goto err_cpus;
-
-	if (resctrl_arch_mon_capable()) {
-		ret = rdtgroup_kn_mode_restore(rdtgrp, "mon_groups", 0777);
-		if (ret)
-			goto err_cpus_list;
-	}
-
-	ret = 0;
-	goto out;
-
-err_cpus_list:
-	rdtgroup_kn_mode_restrict(rdtgrp, "cpus_list");
-err_cpus:
-	rdtgroup_kn_mode_restrict(rdtgrp, "cpus");
-err_tasks:
-	rdtgroup_kn_mode_restrict(rdtgrp, "tasks");
-out:
-	return ret;
-}
-
-/**
- * rdtgroup_locksetup_enter - Resource group enters locksetup mode
- * @rdtgrp: resource group requested to enter locksetup mode
- *
- * A resource group enters locksetup mode to reflect that it would be used
- * to represent a pseudo-locked region and is in the process of being set
- * up to do so. A resource group used for a pseudo-locked region would
- * lose the closid associated with it so we cannot allow it to have any
- * tasks or cpus assigned nor permit tasks or cpus to be assigned in the
- * future. Monitoring of a pseudo-locked region is not allowed either.
- *
- * The above and more restrictions on a pseudo-locked region are checked
- * for and enforced before the resource group enters the locksetup mode.
- *
- * Returns: 0 if the resource group successfully entered locksetup mode, <0
- * on failure. On failure the last_cmd_status buffer is updated with text to
- * communicate details of failure to the user.
- */
-int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp)
-{
-	int ret;
-
-	/*
-	 * The default resource group can neither be removed nor lose the
-	 * default closid associated with it.
-	 */
-	if (rdtgrp == &rdtgroup_default) {
-		rdt_last_cmd_puts("Cannot pseudo-lock default group\n");
-		return -EINVAL;
-	}
-
-	/*
-	 * Cache Pseudo-locking not supported when CDP is enabled.
-	 *
-	 * Some things to consider if you would like to enable this
-	 * support (using L3 CDP as example):
-	 * - When CDP is enabled two separate resources are exposed,
-	 *   L3DATA and L3CODE, but they are actually on the same cache.
-	 *   The implication for pseudo-locking is that if a
-	 *   pseudo-locked region is created on a domain of one
-	 *   resource (eg. L3CODE), then a pseudo-locked region cannot
-	 *   be created on that same domain of the other resource
-	 *   (eg. L3DATA). This is because the creation of a
-	 *   pseudo-locked region involves a call to wbinvd that will
-	 *   affect all cache allocations on particular domain.
-	 * - Considering the previous, it may be possible to only
-	 *   expose one of the CDP resources to pseudo-locking and
-	 *   hide the other. For example, we could consider to only
-	 *   expose L3DATA and since the L3 cache is unified it is
-	 *   still possible to place instructions there are execute it.
-	 * - If only one region is exposed to pseudo-locking we should
-	 *   still keep in mind that availability of a portion of cache
-	 *   for pseudo-locking should take into account both resources.
-	 *   Similarly, if a pseudo-locked region is created in one
-	 *   resource, the portion of cache used by it should be made
-	 *   unavailable to all future allocations from both resources.
-	 */
-	if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3) ||
-	    resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2)) {
-		rdt_last_cmd_puts("CDP enabled\n");
-		return -EINVAL;
-	}
-
-	/*
-	 * Not knowing the bits to disable prefetching implies that this
-	 * platform does not support Cache Pseudo-Locking.
-	 */
-	if (resctrl_arch_get_prefetch_disable_bits() == 0) {
-		rdt_last_cmd_puts("Pseudo-locking not supported\n");
-		return -EINVAL;
-	}
-
-	if (rdtgroup_monitor_in_progress(rdtgrp)) {
-		rdt_last_cmd_puts("Monitoring in progress\n");
-		return -EINVAL;
-	}
-
-	if (rdtgroup_tasks_assigned(rdtgrp)) {
-		rdt_last_cmd_puts("Tasks assigned to resource group\n");
-		return -EINVAL;
-	}
-
-	if (!cpumask_empty(&rdtgrp->cpu_mask)) {
-		rdt_last_cmd_puts("CPUs assigned to resource group\n");
-		return -EINVAL;
-	}
-
-	if (rdtgroup_locksetup_user_restrict(rdtgrp)) {
-		rdt_last_cmd_puts("Unable to modify resctrl permissions\n");
-		return -EIO;
-	}
-
-	ret = pseudo_lock_init(rdtgrp);
-	if (ret) {
-		rdt_last_cmd_puts("Unable to init pseudo-lock region\n");
-		goto out_release;
-	}
-
-	/*
-	 * If this system is capable of monitoring a rmid would have been
-	 * allocated when the control group was created. This is not needed
-	 * anymore when this group would be used for pseudo-locking. This
-	 * is safe to call on platforms not capable of monitoring.
-	 */
-	free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
-
-	ret = 0;
-	goto out;
-
-out_release:
-	rdtgroup_locksetup_user_restore(rdtgrp);
-out:
-	return ret;
-}
-
-/**
- * rdtgroup_locksetup_exit - resource group exist locksetup mode
- * @rdtgrp: resource group
- *
- * When a resource group exits locksetup mode the earlier restrictions are
- * lifted.
- *
- * Return: 0 on success, <0 on failure
- */
-int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp)
-{
-	int ret;
-
-	if (resctrl_arch_mon_capable()) {
-		ret = alloc_rmid(rdtgrp->closid);
-		if (ret < 0) {
-			rdt_last_cmd_puts("Out of RMIDs\n");
-			return ret;
-		}
-		rdtgrp->mon.rmid = ret;
-	}
-
-	ret = rdtgroup_locksetup_user_restore(rdtgrp);
-	if (ret) {
-		free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
-		return ret;
-	}
-
-	pseudo_lock_free(rdtgrp);
-	return 0;
-}
-
-/**
- * rdtgroup_cbm_overlaps_pseudo_locked - Test if CBM or portion is pseudo-locked
- * @d: RDT domain
- * @cbm: CBM to test
- *
- * @d represents a cache instance and @cbm a capacity bitmask that is
- * considered for it. Determine if @cbm overlaps with any existing
- * pseudo-locked region on @d.
- *
- * @cbm is unsigned long, even if only 32 bits are used, to make the
- * bitmap functions work correctly.
- *
- * Return: true if @cbm overlaps with pseudo-locked region on @d, false
- * otherwise.
- */
-bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm)
-{
-	unsigned int cbm_len;
-	unsigned long cbm_b;
-
-	if (d->plr) {
-		cbm_len = d->plr->s->res->cache.cbm_len;
-		cbm_b = d->plr->cbm;
-		if (bitmap_intersects(&cbm, &cbm_b, cbm_len))
-			return true;
-	}
-	return false;
-}
-
-/**
- * rdtgroup_pseudo_locked_in_hierarchy - Pseudo-locked region in cache hierarchy
- * @d: RDT domain under test
- *
- * The setup of a pseudo-locked region affects all cache instances within
- * the hierarchy of the region. It is thus essential to know if any
- * pseudo-locked regions exist within a cache hierarchy to prevent any
- * attempts to create new pseudo-locked regions in the same hierarchy.
- *
- * Return: true if a pseudo-locked region exists in the hierarchy of @d or
- *         if it is not possible to test due to memory allocation issue,
- *         false otherwise.
- */
-bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d)
-{
-	cpumask_var_t cpu_with_psl;
-	enum resctrl_res_level i;
-	struct rdt_resource *r;
-	struct rdt_domain *d_i;
-	bool ret = false;
-
-	/* Walking r->domains, ensure it can't race with cpuhp */
-	lockdep_assert_cpus_held();
-
-	if (!zalloc_cpumask_var(&cpu_with_psl, GFP_KERNEL))
-		return true;
-
-	/*
-	 * First determine which cpus have pseudo-locked regions
-	 * associated with them.
-	 */
-	for (i = 0; i < RDT_NUM_RESOURCES; i++) {
-		r = resctrl_arch_get_resource(i);
-		if (!r->alloc_capable)
-			continue;
-
-		list_for_each_entry(d_i, &r->domains, list) {
-			if (d_i->plr)
-				cpumask_or(cpu_with_psl, cpu_with_psl,
-					   &d_i->cpu_mask);
-		}
-	}
-
-	/*
-	 * Next test if new pseudo-locked region would intersect with
-	 * existing region.
-	 */
-	if (cpumask_intersects(&d->cpu_mask, cpu_with_psl))
-		ret = true;
-
-	free_cpumask_var(cpu_with_psl);
-	return ret;
-}
-
-/**
- * pseudo_lock_measure_cycles - Trigger latency measure to pseudo-locked region
- * @rdtgrp: Resource group to which the pseudo-locked region belongs.
- * @sel: Selector of which measurement to perform on a pseudo-locked region.
- *
- * The measurement of latency to access a pseudo-locked region should be
- * done from a cpu that is associated with that pseudo-locked region.
- * Determine which cpu is associated with this region and start a thread on
- * that cpu to perform the measurement, wait for that thread to complete.
- *
- * Return: 0 on success, <0 on failure
- */
-static int pseudo_lock_measure_cycles(struct rdtgroup *rdtgrp, int sel)
-{
-	struct pseudo_lock_region *plr = rdtgrp->plr;
-	struct task_struct *thread;
-	unsigned int cpu;
-	int ret = -1;
-
-	cpus_read_lock();
-	mutex_lock(&rdtgroup_mutex);
-
-	if (rdtgrp->flags & RDT_DELETED) {
-		ret = -ENODEV;
-		goto out;
-	}
-
-	if (!plr->d) {
-		ret = -ENODEV;
-		goto out;
-	}
-
-	plr->thread_done = 0;
-	cpu = cpumask_first(&plr->d->cpu_mask);
-	if (!cpu_online(cpu)) {
-		ret = -ENODEV;
-		goto out;
-	}
-
-	plr->cpu = cpu;
-
-	if (sel == 1)
-		thread = kthread_create_on_node(resctrl_arch_measure_cycles_lat_fn,
-						plr, cpu_to_node(cpu),
-						"pseudo_lock_measure/%u",
-						cpu);
-	else if (sel == 2)
-		thread = kthread_create_on_node(resctrl_arch_measure_l2_residency,
-						plr, cpu_to_node(cpu),
-						"pseudo_lock_measure/%u",
-						cpu);
-	else if (sel == 3)
-		thread = kthread_create_on_node(resctrl_arch_measure_l3_residency,
-						plr, cpu_to_node(cpu),
-						"pseudo_lock_measure/%u",
-						cpu);
-	else
-		goto out;
-
-	if (IS_ERR(thread)) {
-		ret = PTR_ERR(thread);
-		goto out;
-	}
-	kthread_bind(thread, cpu);
-	wake_up_process(thread);
-
-	ret = wait_event_interruptible(plr->lock_thread_wq,
-				       plr->thread_done == 1);
-	if (ret < 0)
-		goto out;
-
-	ret = 0;
-
-out:
-	mutex_unlock(&rdtgroup_mutex);
-	cpus_read_unlock();
-	return ret;
-}
-
-static ssize_t pseudo_lock_measure_trigger(struct file *file,
-					   const char __user *user_buf,
-					   size_t count, loff_t *ppos)
-{
-	struct rdtgroup *rdtgrp = file->private_data;
-	size_t buf_size;
-	char buf[32];
-	int ret;
-	int sel;
-
-	buf_size = min(count, (sizeof(buf) - 1));
-	if (copy_from_user(buf, user_buf, buf_size))
-		return -EFAULT;
-
-	buf[buf_size] = '\0';
-	ret = kstrtoint(buf, 10, &sel);
-	if (ret == 0) {
-		if (sel != 1 && sel != 2 && sel != 3)
-			return -EINVAL;
-		ret = debugfs_file_get(file->f_path.dentry);
-		if (ret)
-			return ret;
-		ret = pseudo_lock_measure_cycles(rdtgrp, sel);
-		if (ret == 0)
-			ret = count;
-		debugfs_file_put(file->f_path.dentry);
-	}
-
-	return ret;
-}
-
-static const struct file_operations pseudo_measure_fops = {
-	.write = pseudo_lock_measure_trigger,
-	.open = simple_open,
-	.llseek = default_llseek,
-};
-
-/**
- * rdtgroup_pseudo_lock_create - Create a pseudo-locked region
- * @rdtgrp: resource group to which pseudo-lock region belongs
- *
- * Called when a resource group in the pseudo-locksetup mode receives a
- * valid schemata that should be pseudo-locked. Since the resource group is
- * in pseudo-locksetup mode the &struct pseudo_lock_region has already been
- * allocated and initialized with the essential information. If a failure
- * occurs the resource group remains in the pseudo-locksetup mode with the
- * &struct pseudo_lock_region associated with it, but cleared from all
- * information and ready for the user to re-attempt pseudo-locking by
- * writing the schemata again.
- *
- * Return: 0 if the pseudo-locked region was successfully pseudo-locked, <0
- * on failure. Descriptive error will be written to last_cmd_status buffer.
- */
-int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp)
-{
-	struct pseudo_lock_region *plr = rdtgrp->plr;
-	struct task_struct *thread;
-	unsigned int new_minor;
-	struct device *dev;
-	int ret;
-
-	ret = pseudo_lock_region_alloc(plr);
-	if (ret < 0)
-		return ret;
-
-	ret = pseudo_lock_cstates_constrain(plr);
-	if (ret < 0) {
-		ret = -EINVAL;
-		goto out_region;
-	}
-
-	plr->thread_done = 0;
-
-	plr->closid = rdtgrp->closid;
-	thread = kthread_create_on_node(resctrl_arch_pseudo_lock_fn, plr,
-					cpu_to_node(plr->cpu),
-					"pseudo_lock/%u", plr->cpu);
-	if (IS_ERR(thread)) {
-		ret = PTR_ERR(thread);
-		rdt_last_cmd_printf("Locking thread returned error %d\n", ret);
-		goto out_cstates;
-	}
-
-	kthread_bind(thread, plr->cpu);
-	wake_up_process(thread);
-
-	ret = wait_event_interruptible(plr->lock_thread_wq,
-				       plr->thread_done == 1);
-	if (ret < 0) {
-		/*
-		 * If the thread does not get on the CPU for whatever
-		 * reason and the process which sets up the region is
-		 * interrupted then this will leave the thread in runnable
-		 * state and once it gets on the CPU it will dereference
-		 * the cleared, but not freed, plr struct resulting in an
-		 * empty pseudo-locking loop.
-		 */
-		rdt_last_cmd_puts("Locking thread interrupted\n");
-		goto out_cstates;
-	}
-
-	ret = pseudo_lock_minor_get(&new_minor);
-	if (ret < 0) {
-		rdt_last_cmd_puts("Unable to obtain a new minor number\n");
-		goto out_cstates;
-	}
-
-	/*
-	 * Unlock access but do not release the reference. The
-	 * pseudo-locked region will still be here on return.
-	 *
-	 * The mutex has to be released temporarily to avoid a potential
-	 * deadlock with the mm->mmap_lock which is obtained in the
-	 * device_create() and debugfs_create_dir() callpath below as well as
-	 * before the mmap() callback is called.
-	 */
-	mutex_unlock(&rdtgroup_mutex);
-
-	if (!IS_ERR_OR_NULL(debugfs_resctrl)) {
-		plr->debugfs_dir = debugfs_create_dir(rdtgrp->kn->name,
-						      debugfs_resctrl);
-		if (!IS_ERR_OR_NULL(plr->debugfs_dir))
-			debugfs_create_file("pseudo_lock_measure", 0200,
-					    plr->debugfs_dir, rdtgrp,
-					    &pseudo_measure_fops);
-	}
-
-	dev = device_create(&pseudo_lock_class, NULL,
-			    MKDEV(pseudo_lock_major, new_minor),
-			    rdtgrp, "%s", rdtgrp->kn->name);
-
-	mutex_lock(&rdtgroup_mutex);
-
-	if (IS_ERR(dev)) {
-		ret = PTR_ERR(dev);
-		rdt_last_cmd_printf("Failed to create character device: %d\n",
-				    ret);
-		goto out_debugfs;
-	}
-
-	/* We released the mutex - check if group was removed while we did so */
-	if (rdtgrp->flags & RDT_DELETED) {
-		ret = -ENODEV;
-		goto out_device;
-	}
-
-	plr->minor = new_minor;
-
-	rdtgrp->mode = RDT_MODE_PSEUDO_LOCKED;
-	closid_free(rdtgrp->closid);
-	rdtgroup_kn_mode_restore(rdtgrp, "cpus", 0444);
-	rdtgroup_kn_mode_restore(rdtgrp, "cpus_list", 0444);
-
-	ret = 0;
-	goto out;
-
-out_device:
-	device_destroy(&pseudo_lock_class, MKDEV(pseudo_lock_major, new_minor));
-out_debugfs:
-	debugfs_remove_recursive(plr->debugfs_dir);
-	pseudo_lock_minor_release(new_minor);
-out_cstates:
-	pseudo_lock_cstates_relax(plr);
-out_region:
-	pseudo_lock_region_clear(plr);
-out:
-	return ret;
-}
-
-/**
- * rdtgroup_pseudo_lock_remove - Remove a pseudo-locked region
- * @rdtgrp: resource group to which the pseudo-locked region belongs
- *
- * The removal of a pseudo-locked region can be initiated when the resource
- * group is removed from user space via a "rmdir" from userspace or the
- * unmount of the resctrl filesystem. On removal the resource group does
- * not go back to pseudo-locksetup mode before it is removed, instead it is
- * removed directly. There is thus asymmetry with the creation where the
- * &struct pseudo_lock_region is removed here while it was not created in
- * rdtgroup_pseudo_lock_create().
- *
- * Return: void
- */
-void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp)
-{
-	struct pseudo_lock_region *plr = rdtgrp->plr;
-
-	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
-		/*
-		 * Default group cannot be a pseudo-locked region so we can
-		 * free closid here.
-		 */
-		closid_free(rdtgrp->closid);
-		goto free;
-	}
-
-	pseudo_lock_cstates_relax(plr);
-	debugfs_remove_recursive(rdtgrp->plr->debugfs_dir);
-	device_destroy(&pseudo_lock_class, MKDEV(pseudo_lock_major, plr->minor));
-	pseudo_lock_minor_release(plr->minor);
-
-free:
-	pseudo_lock_free(rdtgrp);
-}
-
-static int pseudo_lock_dev_open(struct inode *inode, struct file *filp)
-{
-	struct rdtgroup *rdtgrp;
-
-	mutex_lock(&rdtgroup_mutex);
-
-	rdtgrp = region_find_by_minor(iminor(inode));
-	if (!rdtgrp) {
-		mutex_unlock(&rdtgroup_mutex);
-		return -ENODEV;
-	}
-
-	filp->private_data = rdtgrp;
-	atomic_inc(&rdtgrp->waitcount);
-	/* Perform a non-seekable open - llseek is not supported */
-	filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
-
-	mutex_unlock(&rdtgroup_mutex);
-
-	return 0;
-}
-
-static int pseudo_lock_dev_release(struct inode *inode, struct file *filp)
-{
-	struct rdtgroup *rdtgrp;
-
-	mutex_lock(&rdtgroup_mutex);
-	rdtgrp = filp->private_data;
-	WARN_ON(!rdtgrp);
-	if (!rdtgrp) {
-		mutex_unlock(&rdtgroup_mutex);
-		return -ENODEV;
-	}
-	filp->private_data = NULL;
-	atomic_dec(&rdtgrp->waitcount);
-	mutex_unlock(&rdtgroup_mutex);
-	return 0;
-}
-
-static int pseudo_lock_dev_mremap(struct vm_area_struct *area)
-{
-	/* Not supported */
-	return -EINVAL;
-}
-
-static const struct vm_operations_struct pseudo_mmap_ops = {
-	.mremap = pseudo_lock_dev_mremap,
-};
-
-static int pseudo_lock_dev_mmap(struct file *filp, struct vm_area_struct *vma)
-{
-	unsigned long vsize = vma->vm_end - vma->vm_start;
-	unsigned long off = vma->vm_pgoff << PAGE_SHIFT;
-	struct pseudo_lock_region *plr;
-	struct rdtgroup *rdtgrp;
-	unsigned long physical;
-	unsigned long psize;
-
-	mutex_lock(&rdtgroup_mutex);
-
-	rdtgrp = filp->private_data;
-	WARN_ON(!rdtgrp);
-	if (!rdtgrp) {
-		mutex_unlock(&rdtgroup_mutex);
-		return -ENODEV;
-	}
-
-	plr = rdtgrp->plr;
-
-	if (!plr->d) {
-		mutex_unlock(&rdtgroup_mutex);
-		return -ENODEV;
-	}
-
-	/*
-	 * Task is required to run with affinity to the cpus associated
-	 * with the pseudo-locked region. If this is not the case the task
-	 * may be scheduled elsewhere and invalidate entries in the
-	 * pseudo-locked region.
-	 */
-	if (!cpumask_subset(current->cpus_ptr, &plr->d->cpu_mask)) {
-		mutex_unlock(&rdtgroup_mutex);
-		return -EINVAL;
-	}
-
-	physical = __pa(plr->kmem) >> PAGE_SHIFT;
-	psize = plr->size - off;
-
-	if (off > plr->size) {
-		mutex_unlock(&rdtgroup_mutex);
-		return -ENOSPC;
-	}
-
-	/*
-	 * Ensure changes are carried directly to the memory being mapped,
-	 * do not allow copy-on-write mapping.
-	 */
-	if (!(vma->vm_flags & VM_SHARED)) {
-		mutex_unlock(&rdtgroup_mutex);
-		return -EINVAL;
-	}
-
-	if (vsize > psize) {
-		mutex_unlock(&rdtgroup_mutex);
-		return -ENOSPC;
-	}
-
-	memset(plr->kmem + off, 0, vsize);
-
-	if (remap_pfn_range(vma, vma->vm_start, physical + vma->vm_pgoff,
-			    vsize, vma->vm_page_prot)) {
-		mutex_unlock(&rdtgroup_mutex);
-		return -EAGAIN;
-	}
-	vma->vm_ops = &pseudo_mmap_ops;
-	mutex_unlock(&rdtgroup_mutex);
-	return 0;
-}
-
-static const struct file_operations pseudo_lock_dev_fops = {
-	.owner =	THIS_MODULE,
-	.llseek =	no_llseek,
-	.read =		NULL,
-	.write =	NULL,
-	.open =		pseudo_lock_dev_open,
-	.release =	pseudo_lock_dev_release,
-	.mmap =		pseudo_lock_dev_mmap,
-};
-
-int rdt_pseudo_lock_init(void)
-{
-	int ret;
-
-	ret = register_chrdev(0, "pseudo_lock", &pseudo_lock_dev_fops);
-	if (ret < 0)
-		return ret;
-
-	pseudo_lock_major = ret;
-
-	ret = class_register(&pseudo_lock_class);
-	if (ret) {
-		unregister_chrdev(pseudo_lock_major, "pseudo_lock");
-		return ret;
-	}
-
-	return 0;
-}
-
-void rdt_pseudo_lock_release(void)
-{
-	class_unregister(&pseudo_lock_class);
-	unregister_chrdev(pseudo_lock_major, "pseudo_lock");
-	pseudo_lock_major = 0;
-}
diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c
deleted file mode 100644
index 457de2adc251851a0d1286d2989aa487cc61b02f..0000000000000000000000000000000000000000
--- a/fs/resctrl/rdtgroup.c
+++ /dev/null
@@ -1,4787 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * User interface for Resource Allocation in Resource Director Technology(RDT)
- *
- * Copyright (C) 2016 Intel Corporation
- *
- * Author: Fenghua Yu <fenghua.yu@intel.com>
- *
- * More information about RDT be found in the Intel (R) x86 Architecture
- * Software Developer Manual.
- */
-
-#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
-
-#include <linux/cacheinfo.h>
-#include <linux/cpu.h>
-#include <linux/debugfs.h>
-#include <linux/fs.h>
-#include <linux/fs_parser.h>
-#include <linux/sysfs.h>
-#include <linux/kernfs.h>
-#include <linux/seq_buf.h>
-#include <linux/seq_file.h>
-#include <linux/sched/signal.h>
-#include <linux/sched/task.h>
-#include <linux/slab.h>
-#include <linux/task_work.h>
-#include <linux/user_namespace.h>
-
-#include <uapi/linux/magic.h>
-
-#include <asm/resctrl.h>
-#include "internal.h"
-
-/* Mutex to protect rdtgroup access. */
-DEFINE_MUTEX(rdtgroup_mutex);
-
-static struct kernfs_root *rdt_root;
-struct rdtgroup rdtgroup_default;
-LIST_HEAD(rdt_all_groups);
-
-/* list of entries for the schemata file */
-LIST_HEAD(resctrl_schema_all);
-
-/* The filesystem can only be mounted once. */
-bool resctrl_mounted;
-
-/* Kernel fs node for "info" directory under root */
-static struct kernfs_node *kn_info;
-
-/* Kernel fs node for "mon_groups" directory under root */
-static struct kernfs_node *kn_mongrp;
-
-/* Kernel fs node for "mon_data" directory under root */
-static struct kernfs_node *kn_mondata;
-
-/*
- * Used to store the max resource name width and max resource data width
- * to display the schemata in a tabular format
- */
-int max_name_width, max_data_width;
-
-static struct seq_buf last_cmd_status;
-static char last_cmd_status_buf[512];
-
-static int rdtgroup_setup_root(struct rdt_fs_context *ctx);
-static void rdtgroup_destroy_root(void);
-
-struct dentry *debugfs_resctrl;
-
-static bool resctrl_debug;
-
-void rdt_last_cmd_clear(void)
-{
-	lockdep_assert_held(&rdtgroup_mutex);
-	seq_buf_clear(&last_cmd_status);
-}
-
-void rdt_last_cmd_puts(const char *s)
-{
-	lockdep_assert_held(&rdtgroup_mutex);
-	seq_buf_puts(&last_cmd_status, s);
-}
-
-void rdt_last_cmd_printf(const char *fmt, ...)
-{
-	va_list ap;
-
-	va_start(ap, fmt);
-	lockdep_assert_held(&rdtgroup_mutex);
-	seq_buf_vprintf(&last_cmd_status, fmt, ap);
-	va_end(ap);
-}
-
-void rdt_staged_configs_clear(void)
-{
-	enum resctrl_res_level i;
-	struct rdt_resource *r;
-	struct rdt_domain *dom;
-
-	lockdep_assert_held(&rdtgroup_mutex);
-
-	for (i = 0; i < RDT_NUM_RESOURCES; i++) {
-		r = resctrl_arch_get_resource(i);
-		if (!r->alloc_capable)
-			continue;
-
-		list_for_each_entry(dom, &r->domains, list)
-			memset(dom->staged_config, 0, sizeof(dom->staged_config));
-	}
-}
-
-static bool resctrl_is_mbm_enabled(void)
-{
-	return (resctrl_arch_is_mbm_total_enabled() ||
-		resctrl_arch_is_mbm_local_enabled());
-}
-
-static bool resctrl_is_mbm_event(int e)
-{
-	return (e >= QOS_L3_MBM_TOTAL_EVENT_ID &&
-		e <= QOS_L3_MBM_LOCAL_EVENT_ID);
-}
-
-/*
- * Trivial allocator for CLOSIDs. Use BITMAP APIs to manipulate a bitmap
- * of free CLOSIDs.
- *
- * Using a global CLOSID across all resources has some advantages and
- * some drawbacks:
- * + We can simply set current's closid to assign a task to a resource
- *   group.
- * + Context switch code can avoid extra memory references deciding which
- *   CLOSID to load into the PQR_ASSOC MSR
- * - We give up some options in configuring resource groups across multi-socket
- *   systems.
- * - Our choices on how to configure each resource become progressively more
- *   limited as the number of resources grows.
- */
-static unsigned long *closid_free_map;
-static int closid_free_map_len;
-
-int closids_supported(void)
-{
-	return closid_free_map_len;
-}
-
-static void closid_init(void)
-{
-	struct resctrl_schema *s;
-	u32 rdt_min_closid = ~0;
-
-	/* Compute rdt_min_closid across all resources */
-	list_for_each_entry(s, &resctrl_schema_all, list)
-		rdt_min_closid = min(rdt_min_closid, s->num_closid);
-
-	closid_free_map = bitmap_alloc(rdt_min_closid, GFP_KERNEL);
-	bitmap_fill(closid_free_map, rdt_min_closid);
-
-	/* RESCTRL_RESERVED_CLOSID is always reserved for the default group */
-	__clear_bit(RESCTRL_RESERVED_CLOSID, closid_free_map);
-	closid_free_map_len = rdt_min_closid;
-}
-
-static int closid_alloc(void)
-{
-	int cleanest_closid;
-	u32 closid;
-
-	lockdep_assert_held(&rdtgroup_mutex);
-
-	if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID) &&
-	    resctrl_arch_is_llc_occupancy_enabled()) {
-		cleanest_closid = resctrl_find_cleanest_closid();
-		if (cleanest_closid < 0)
-			return cleanest_closid;
-		closid = cleanest_closid;
-	} else {
-		closid = find_first_bit(closid_free_map, closid_free_map_len);
-		if (closid == closid_free_map_len)
-			return -ENOSPC;
-	}
-
-	__clear_bit(closid, closid_free_map);
-
-	return closid;
-}
-
-void closid_free(int closid)
-{
-	lockdep_assert_held(&rdtgroup_mutex);
-
-	__set_bit(closid, closid_free_map);
-}
-
-/**
- * closid_allocated - test if provided closid is in use
- * @closid: closid to be tested
- *
- * Return: true if @closid is currently associated with a resource group,
- * false if @closid is free
- */
-bool closid_allocated(unsigned int closid)
-{
-	lockdep_assert_held(&rdtgroup_mutex);
-
-	return !test_bit(closid, closid_free_map);
-}
-
-/*
- * Counter bitmap for tracking the available counters.
- * ABMC feature provides set of hardware counters for enabling events.
- * Each event takes one hardware counter. Kernel needs to keep track
- * of number of available counters.
- */
-static DECLARE_BITMAP(mbm_cntrs_free_map, 64);
-
-static void mbm_cntrs_init(struct rdt_resource *r)
-{
-	bitmap_fill(mbm_cntrs_free_map, r->mon.num_mbm_cntrs);
-}
-
-int mbm_cntr_alloc(struct rdt_resource *r)
-{
-	int cntr_id;
-
-	cntr_id = find_first_bit(mbm_cntrs_free_map, r->mon.num_mbm_cntrs);
-	if (cntr_id >= r->mon.num_mbm_cntrs)
-		return -ENOSPC;
-
-	__clear_bit(cntr_id, mbm_cntrs_free_map);
-
-	return cntr_id;
-}
-
-void mbm_cntr_free(u32 cntr_id)
-{
-	__set_bit(cntr_id, mbm_cntrs_free_map);
-}
-
-bool closid_alloc_fixed(u32 closid)
-{
-	return __test_and_clear_bit(closid, closid_free_map);
-}
-
-/**
- * rdtgroup_mode_by_closid - Return mode of resource group with closid
- * @closid: closid if the resource group
- *
- * Each resource group is associated with a @closid. Here the mode
- * of a resource group can be queried by searching for it using its closid.
- *
- * Return: mode as &enum rdtgrp_mode of resource group with closid @closid
- */
-enum rdtgrp_mode rdtgroup_mode_by_closid(int closid)
-{
-	struct rdtgroup *rdtgrp;
-
-	list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
-		if (rdtgrp->closid == closid)
-			return rdtgrp->mode;
-	}
-
-	return RDT_NUM_MODES;
-}
-
-static const char * const rdt_mode_str[] = {
-	[RDT_MODE_SHAREABLE]		= "shareable",
-	[RDT_MODE_EXCLUSIVE]		= "exclusive",
-	[RDT_MODE_PSEUDO_LOCKSETUP]	= "pseudo-locksetup",
-	[RDT_MODE_PSEUDO_LOCKED]	= "pseudo-locked",
-};
-
-/**
- * rdtgroup_mode_str - Return the string representation of mode
- * @mode: the resource group mode as &enum rdtgroup_mode
- *
- * Return: string representation of valid mode, "unknown" otherwise
- */
-static const char *rdtgroup_mode_str(enum rdtgrp_mode mode)
-{
-	if (mode < RDT_MODE_SHAREABLE || mode >= RDT_NUM_MODES)
-		return "unknown";
-
-	return rdt_mode_str[mode];
-}
-
-/* set uid and gid of rdtgroup dirs and files to that of the creator */
-static int rdtgroup_kn_set_ugid(struct kernfs_node *kn)
-{
-	struct iattr iattr = { .ia_valid = ATTR_UID | ATTR_GID,
-				.ia_uid = current_fsuid(),
-				.ia_gid = current_fsgid(), };
-
-	if (uid_eq(iattr.ia_uid, GLOBAL_ROOT_UID) &&
-	    gid_eq(iattr.ia_gid, GLOBAL_ROOT_GID))
-		return 0;
-
-	return kernfs_setattr(kn, &iattr);
-}
-
-static int rdtgroup_add_file(struct kernfs_node *parent_kn, struct rftype *rft)
-{
-	struct kernfs_node *kn;
-	int ret;
-
-	kn = __kernfs_create_file(parent_kn, rft->name, rft->mode,
-				  GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
-				  0, rft->kf_ops, rft, NULL, NULL);
-	if (IS_ERR(kn))
-		return PTR_ERR(kn);
-
-	ret = rdtgroup_kn_set_ugid(kn);
-	if (ret) {
-		kernfs_remove(kn);
-		return ret;
-	}
-
-	return 0;
-}
-
-static int rdtgroup_seqfile_show(struct seq_file *m, void *arg)
-{
-	struct kernfs_open_file *of = m->private;
-	struct rftype *rft = of->kn->priv;
-
-	if (rft->seq_show)
-		return rft->seq_show(of, m, arg);
-	return 0;
-}
-
-static ssize_t rdtgroup_file_write(struct kernfs_open_file *of, char *buf,
-				   size_t nbytes, loff_t off)
-{
-	struct rftype *rft = of->kn->priv;
-
-	if (rft->write)
-		return rft->write(of, buf, nbytes, off);
-
-	return -EINVAL;
-}
-
-static const struct kernfs_ops rdtgroup_kf_single_ops = {
-	.atomic_write_len	= PAGE_SIZE,
-	.write			= rdtgroup_file_write,
-	.seq_show		= rdtgroup_seqfile_show,
-};
-
-static const struct kernfs_ops kf_mondata_ops = {
-	.atomic_write_len	= PAGE_SIZE,
-	.seq_show		= rdtgroup_mondata_show,
-};
-
-static bool is_cpu_list(struct kernfs_open_file *of)
-{
-	struct rftype *rft = of->kn->priv;
-
-	return rft->flags & RFTYPE_FLAGS_CPUS_LIST;
-}
-
-static int rdtgroup_cpus_show(struct kernfs_open_file *of,
-			      struct seq_file *s, void *v)
-{
-	struct rdtgroup *rdtgrp;
-	struct cpumask *mask;
-	int ret = 0;
-
-	rdtgrp = rdtgroup_kn_lock_live(of->kn);
-
-	if (rdtgrp) {
-		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
-			if (!rdtgrp->plr->d) {
-				rdt_last_cmd_clear();
-				rdt_last_cmd_puts("Cache domain offline\n");
-				ret = -ENODEV;
-			} else {
-				mask = &rdtgrp->plr->d->cpu_mask;
-				seq_printf(s, is_cpu_list(of) ?
-					   "%*pbl\n" : "%*pb\n",
-					   cpumask_pr_args(mask));
-			}
-		} else {
-			seq_printf(s, is_cpu_list(of) ? "%*pbl\n" : "%*pb\n",
-				   cpumask_pr_args(&rdtgrp->cpu_mask));
-		}
-	} else {
-		ret = -ENOENT;
-	}
-	rdtgroup_kn_unlock(of->kn);
-
-	return ret;
-}
-
-/*
- * Update the PGR_ASSOC MSR on all cpus in @cpu_mask,
- *
- * Per task closids/rmids must have been set up before calling this function.
- * @r may be NULL.
- */
-static void
-update_closid_rmid(const struct cpumask *cpu_mask, struct rdtgroup *r)
-{
-	struct resctrl_cpu_sync defaults;
-	struct resctrl_cpu_sync *defaults_p = NULL;
-
-	if (r) {
-		defaults.closid = r->closid;
-		defaults.rmid = r->mon.rmid;
-		defaults_p = &defaults;
-	}
-
-	on_each_cpu_mask(cpu_mask, resctrl_arch_sync_cpu_defaults, defaults_p,
-			 1);
-}
-
-static int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
-			  cpumask_var_t tmpmask)
-{
-	struct rdtgroup *prgrp = rdtgrp->mon.parent, *crgrp;
-	struct list_head *head;
-
-	/* Check whether cpus belong to parent ctrl group */
-	cpumask_andnot(tmpmask, newmask, &prgrp->cpu_mask);
-	if (!cpumask_empty(tmpmask)) {
-		rdt_last_cmd_puts("Can only add CPUs to mongroup that belong to parent\n");
-		return -EINVAL;
-	}
-
-	/* Check whether cpus are dropped from this group */
-	cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
-	if (!cpumask_empty(tmpmask)) {
-		/* Give any dropped cpus to parent rdtgroup */
-		cpumask_or(&prgrp->cpu_mask, &prgrp->cpu_mask, tmpmask);
-		update_closid_rmid(tmpmask, prgrp);
-	}
-
-	/*
-	 * If we added cpus, remove them from previous group that owned them
-	 * and update per-cpu rmid
-	 */
-	cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
-	if (!cpumask_empty(tmpmask)) {
-		head = &prgrp->mon.crdtgrp_list;
-		list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
-			if (crgrp == rdtgrp)
-				continue;
-			cpumask_andnot(&crgrp->cpu_mask, &crgrp->cpu_mask,
-				       tmpmask);
-		}
-		update_closid_rmid(tmpmask, rdtgrp);
-	}
-
-	/* Done pushing/pulling - update this group with new mask */
-	cpumask_copy(&rdtgrp->cpu_mask, newmask);
-
-	return 0;
-}
-
-static void cpumask_rdtgrp_clear(struct rdtgroup *r, struct cpumask *m)
-{
-	struct rdtgroup *crgrp;
-
-	cpumask_andnot(&r->cpu_mask, &r->cpu_mask, m);
-	/* update the child mon group masks as well*/
-	list_for_each_entry(crgrp, &r->mon.crdtgrp_list, mon.crdtgrp_list)
-		cpumask_and(&crgrp->cpu_mask, &r->cpu_mask, &crgrp->cpu_mask);
-}
-
-static int cpus_ctrl_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
-			   cpumask_var_t tmpmask, cpumask_var_t tmpmask1)
-{
-	struct rdtgroup *r, *crgrp;
-	struct list_head *head;
-
-	/* Check whether cpus are dropped from this group */
-	cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
-	if (!cpumask_empty(tmpmask)) {
-		/* Can't drop from default group */
-		if (rdtgrp == &rdtgroup_default) {
-			rdt_last_cmd_puts("Can't drop CPUs from default group\n");
-			return -EINVAL;
-		}
-
-		/* Give any dropped cpus to rdtgroup_default */
-		cpumask_or(&rdtgroup_default.cpu_mask,
-			   &rdtgroup_default.cpu_mask, tmpmask);
-		update_closid_rmid(tmpmask, &rdtgroup_default);
-	}
-
-	/*
-	 * If we added cpus, remove them from previous group and
-	 * the prev group's child groups that owned them
-	 * and update per-cpu closid/rmid.
-	 */
-	cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
-	if (!cpumask_empty(tmpmask)) {
-		list_for_each_entry(r, &rdt_all_groups, rdtgroup_list) {
-			if (r == rdtgrp)
-				continue;
-			cpumask_and(tmpmask1, &r->cpu_mask, tmpmask);
-			if (!cpumask_empty(tmpmask1))
-				cpumask_rdtgrp_clear(r, tmpmask1);
-		}
-		update_closid_rmid(tmpmask, rdtgrp);
-	}
-
-	/* Done pushing/pulling - update this group with new mask */
-	cpumask_copy(&rdtgrp->cpu_mask, newmask);
-
-	/*
-	 * Clear child mon group masks since there is a new parent mask
-	 * now and update the rmid for the cpus the child lost.
-	 */
-	head = &rdtgrp->mon.crdtgrp_list;
-	list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
-		cpumask_and(tmpmask, &rdtgrp->cpu_mask, &crgrp->cpu_mask);
-		update_closid_rmid(tmpmask, rdtgrp);
-		cpumask_clear(&crgrp->cpu_mask);
-	}
-
-	return 0;
-}
-
-static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
-				   char *buf, size_t nbytes, loff_t off)
-{
-	cpumask_var_t tmpmask, newmask, tmpmask1;
-	struct rdtgroup *rdtgrp;
-	int ret;
-
-	if (!buf)
-		return -EINVAL;
-
-	if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
-		return -ENOMEM;
-	if (!zalloc_cpumask_var(&newmask, GFP_KERNEL)) {
-		free_cpumask_var(tmpmask);
-		return -ENOMEM;
-	}
-	if (!zalloc_cpumask_var(&tmpmask1, GFP_KERNEL)) {
-		free_cpumask_var(tmpmask);
-		free_cpumask_var(newmask);
-		return -ENOMEM;
-	}
-
-	rdtgrp = rdtgroup_kn_lock_live(of->kn);
-	if (!rdtgrp) {
-		ret = -ENOENT;
-		goto unlock;
-	}
-
-	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED ||
-	    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
-		ret = -EINVAL;
-		rdt_last_cmd_puts("Pseudo-locking in progress\n");
-		goto unlock;
-	}
-
-	if (is_cpu_list(of))
-		ret = cpulist_parse(buf, newmask);
-	else
-		ret = cpumask_parse(buf, newmask);
-
-	if (ret) {
-		rdt_last_cmd_puts("Bad CPU list/mask\n");
-		goto unlock;
-	}
-
-	/* check that user didn't specify any offline cpus */
-	cpumask_andnot(tmpmask, newmask, cpu_online_mask);
-	if (!cpumask_empty(tmpmask)) {
-		ret = -EINVAL;
-		rdt_last_cmd_puts("Can only assign online CPUs\n");
-		goto unlock;
-	}
-
-	if (rdtgrp->type == RDTCTRL_GROUP)
-		ret = cpus_ctrl_write(rdtgrp, newmask, tmpmask, tmpmask1);
-	else if (rdtgrp->type == RDTMON_GROUP)
-		ret = cpus_mon_write(rdtgrp, newmask, tmpmask);
-	else
-		ret = -EINVAL;
-
-unlock:
-	rdtgroup_kn_unlock(of->kn);
-	free_cpumask_var(tmpmask);
-	free_cpumask_var(newmask);
-	free_cpumask_var(tmpmask1);
-
-	return ret ?: nbytes;
-}
-
-/**
- * rdtgroup_remove - the helper to remove resource group safely
- * @rdtgrp: resource group to remove
- *
- * On resource group creation via a mkdir, an extra kernfs_node reference is
- * taken to ensure that the rdtgroup structure remains accessible for the
- * rdtgroup_kn_unlock() calls where it is removed.
- *
- * Drop the extra reference here, then free the rdtgroup structure.
- *
- * Return: void
- */
-static void rdtgroup_remove(struct rdtgroup *rdtgrp)
-{
-	kernfs_put(rdtgrp->kn);
-	kfree(rdtgrp);
-}
-
-static void _update_task_closid_rmid(void *task)
-{
-	/*
-	 * If the task is still current on this CPU, update PQR_ASSOC MSR.
-	 * Otherwise, the MSR is updated when the task is scheduled in.
-	 */
-	if (task == current)
-		resctrl_arch_sched_in(task);
-}
-
-static void update_task_closid_rmid(struct task_struct *t)
-{
-	if (IS_ENABLED(CONFIG_SMP) && task_curr(t))
-		smp_call_function_single(task_cpu(t), _update_task_closid_rmid, t, 1);
-	else
-		_update_task_closid_rmid(t);
-}
-
-static bool task_in_rdtgroup(struct task_struct *tsk, struct rdtgroup *rdtgrp)
-{
-	u32 closid, rmid = rdtgrp->mon.rmid;
-
-	if (rdtgrp->type == RDTCTRL_GROUP)
-		closid = rdtgrp->closid;
-	else if (rdtgrp->type == RDTMON_GROUP)
-		closid = rdtgrp->mon.parent->closid;
-	else
-		return false;
-
-	return resctrl_arch_match_closid(tsk, closid) &&
-	       resctrl_arch_match_rmid(tsk, closid, rmid);
-}
-
-static int __rdtgroup_move_task(struct task_struct *tsk,
-				struct rdtgroup *rdtgrp)
-{
-	/* If the task is already in rdtgrp, no need to move the task. */
-	if (task_in_rdtgroup(tsk, rdtgrp))
-		return 0;
-
-	/*
-	 * Set the task's closid/rmid before the PQR_ASSOC MSR can be
-	 * updated by them.
-	 *
-	 * For ctrl_mon groups, move both closid and rmid.
-	 * For monitor groups, can move the tasks only from
-	 * their parent CTRL group.
-	 */
-	if (rdtgrp->type == RDTMON_GROUP &&
-	    !resctrl_arch_match_closid(tsk, rdtgrp->mon.parent->closid)) {
-		rdt_last_cmd_puts("Can't move task to different control group\n");
-		return -EINVAL;
-	}
-
-	if (rdtgrp->type == RDTMON_GROUP)
-		resctrl_arch_set_closid_rmid(tsk, rdtgrp->mon.parent->closid,
-					     rdtgrp->mon.rmid);
-	else
-		resctrl_arch_set_closid_rmid(tsk, rdtgrp->closid,
-					     rdtgrp->mon.rmid);
-
-	/*
-	 * Ensure the task's closid and rmid are written before determining if
-	 * the task is current that will decide if it will be interrupted.
-	 * This pairs with the full barrier between the rq->curr update and
-	 * resctrl_arch_sched_in() during context switch.
-	 */
-	smp_mb();
-
-	/*
-	 * By now, the task's closid and rmid are set. If the task is current
-	 * on a CPU, the PQR_ASSOC MSR needs to be updated to make the resource
-	 * group go into effect. If the task is not current, the MSR will be
-	 * updated when the task is scheduled in.
-	 */
-	update_task_closid_rmid(tsk);
-
-	return 0;
-}
-
-static bool is_closid_match(struct task_struct *t, struct rdtgroup *r)
-{
-	return (resctrl_arch_alloc_capable() && (r->type == RDTCTRL_GROUP) &&
-		resctrl_arch_match_closid(t, r->closid));
-}
-
-static bool is_rmid_match(struct task_struct *t, struct rdtgroup *r)
-{
-	return (resctrl_arch_mon_capable() && (r->type == RDTMON_GROUP) &&
-		resctrl_arch_match_rmid(t, r->mon.parent->closid,
-					r->mon.rmid));
-}
-
-/**
- * rdtgroup_tasks_assigned - Test if tasks have been assigned to resource group
- * @r: Resource group
- *
- * Return: 1 if tasks have been assigned to @r, 0 otherwise
- */
-int rdtgroup_tasks_assigned(struct rdtgroup *r)
-{
-	struct task_struct *p, *t;
-	int ret = 0;
-
-	lockdep_assert_held(&rdtgroup_mutex);
-
-	rcu_read_lock();
-	for_each_process_thread(p, t) {
-		if (is_closid_match(t, r) || is_rmid_match(t, r)) {
-			ret = 1;
-			break;
-		}
-	}
-	rcu_read_unlock();
-
-	return ret;
-}
-
-static int rdtgroup_task_write_permission(struct task_struct *task,
-					  struct kernfs_open_file *of)
-{
-	const struct cred *tcred = get_task_cred(task);
-	const struct cred *cred = current_cred();
-	int ret = 0;
-
-	/*
-	 * Even if we're attaching all tasks in the thread group, we only
-	 * need to check permissions on one of them.
-	 */
-	if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
-	    !uid_eq(cred->euid, tcred->uid) &&
-	    !uid_eq(cred->euid, tcred->suid)) {
-		rdt_last_cmd_printf("No permission to move task %d\n", task->pid);
-		ret = -EPERM;
-	}
-
-	put_cred(tcred);
-	return ret;
-}
-
-static int rdtgroup_move_task(pid_t pid, struct rdtgroup *rdtgrp,
-			      struct kernfs_open_file *of)
-{
-	struct task_struct *tsk;
-	int ret;
-
-	rcu_read_lock();
-	if (pid) {
-		tsk = find_task_by_vpid(pid);
-		if (!tsk) {
-			rcu_read_unlock();
-			rdt_last_cmd_printf("No task %d\n", pid);
-			return -ESRCH;
-		}
-	} else {
-		tsk = current;
-	}
-
-	get_task_struct(tsk);
-	rcu_read_unlock();
-
-	ret = rdtgroup_task_write_permission(tsk, of);
-	if (!ret)
-		ret = __rdtgroup_move_task(tsk, rdtgrp);
-
-	put_task_struct(tsk);
-	return ret;
-}
-
-static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of,
-				    char *buf, size_t nbytes, loff_t off)
-{
-	struct rdtgroup *rdtgrp;
-	char *pid_str;
-	int ret = 0;
-	pid_t pid;
-
-	rdtgrp = rdtgroup_kn_lock_live(of->kn);
-	if (!rdtgrp) {
-		rdtgroup_kn_unlock(of->kn);
-		return -ENOENT;
-	}
-	rdt_last_cmd_clear();
-
-	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED ||
-	    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
-		ret = -EINVAL;
-		rdt_last_cmd_puts("Pseudo-locking in progress\n");
-		goto unlock;
-	}
-
-	while (buf && buf[0] != '\0' && buf[0] != '\n') {
-		pid_str = strim(strsep(&buf, ","));
-
-		if (kstrtoint(pid_str, 0, &pid)) {
-			rdt_last_cmd_printf("Task list parsing error pid %s\n", pid_str);
-			ret = -EINVAL;
-			break;
-		}
-
-		if (pid < 0) {
-			rdt_last_cmd_printf("Invalid pid %d\n", pid);
-			ret = -EINVAL;
-			break;
-		}
-
-		ret = rdtgroup_move_task(pid, rdtgrp, of);
-		if (ret) {
-			rdt_last_cmd_printf("Error while processing task %d\n", pid);
-			break;
-		}
-	}
-
-unlock:
-	rdtgroup_kn_unlock(of->kn);
-
-	return ret ?: nbytes;
-}
-
-static void show_rdt_tasks(struct rdtgroup *r, struct seq_file *s)
-{
-	struct task_struct *p, *t;
-	pid_t pid;
-
-	rcu_read_lock();
-	for_each_process_thread(p, t) {
-		if (is_closid_match(t, r) || is_rmid_match(t, r)) {
-			pid = task_pid_vnr(t);
-			if (pid)
-				seq_printf(s, "%d\n", pid);
-		}
-	}
-	rcu_read_unlock();
-}
-
-static int rdtgroup_tasks_show(struct kernfs_open_file *of,
-			       struct seq_file *s, void *v)
-{
-	struct rdtgroup *rdtgrp;
-	int ret = 0;
-
-	rdtgrp = rdtgroup_kn_lock_live(of->kn);
-	if (rdtgrp)
-		show_rdt_tasks(rdtgrp, s);
-	else
-		ret = -ENOENT;
-	rdtgroup_kn_unlock(of->kn);
-
-	return ret;
-}
-
-static int rdtgroup_closid_show(struct kernfs_open_file *of,
-				struct seq_file *s, void *v)
-{
-	struct rdtgroup *rdtgrp;
-	int ret = 0;
-
-	rdtgrp = rdtgroup_kn_lock_live(of->kn);
-	if (rdtgrp)
-		seq_printf(s, "%u\n", rdtgrp->closid);
-	else
-		ret = -ENOENT;
-	rdtgroup_kn_unlock(of->kn);
-
-	return ret;
-}
-
-static int rdtgroup_rmid_show(struct kernfs_open_file *of,
-			      struct seq_file *s, void *v)
-{
-	struct rdtgroup *rdtgrp;
-	int ret = 0;
-
-	rdtgrp = rdtgroup_kn_lock_live(of->kn);
-	if (rdtgrp)
-		seq_printf(s, "%u\n", rdtgrp->mon.rmid);
-	else
-		ret = -ENOENT;
-	rdtgroup_kn_unlock(of->kn);
-
-	return ret;
-}
-
-static int rdtgroup_mbm_mode_show(struct kernfs_open_file *of,
-				  struct seq_file *s, void *v)
-{
-	struct rdt_resource *r = of->kn->parent->priv;
-
-	if (r->mon.mbm_cntr_assignable) {
-		if (resctrl_arch_get_abmc_enabled()) {
-			seq_puts(s, "[mbm_cntr_assign]\n");
-			seq_puts(s, "legacy\n");
-		} else {
-			seq_puts(s, "mbm_cntr_assign\n");
-			seq_puts(s, "[legacy]\n");
-		}
-	} else {
-		seq_puts(s, "[legacy]\n");
-	}
-
-	return 0;
-}
-
-static void rdtgroup_mbm_cntr_reset(struct rdt_resource *r)
-{
-	struct rdtgroup *prgrp, *crgrp;
-	struct rdt_domain *dom;
-
-	mbm_cntrs_init(r);
-
-	list_for_each_entry(dom, &r->domains, list)
-		bitmap_zero(dom->mbm_cntr_map, r->mon.num_mbm_cntrs);
-
-	/* Reset the cntr_id's for all the monitor groups */
-	list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
-		prgrp->mon.cntr_id[0] = MON_CNTR_UNSET;
-		prgrp->mon.cntr_id[1] = MON_CNTR_UNSET;
-		list_for_each_entry(crgrp, &prgrp->mon.crdtgrp_list,
-				    mon.crdtgrp_list) {
-			crgrp->mon.cntr_id[0] = MON_CNTR_UNSET;
-			crgrp->mon.cntr_id[1] = MON_CNTR_UNSET;
-		}
-	}
-}
-
-static ssize_t rdtgroup_mbm_mode_write(struct kernfs_open_file *of,
-				       char *buf, size_t nbytes,
-				       loff_t off)
-{
-	int mbm_cntr_assign = resctrl_arch_get_abmc_enabled();
-	struct rdt_resource *r = of->kn->parent->priv;
-	int ret = 0;
-
-	/* Valid input requires a trailing newline */
-	if (nbytes == 0 || buf[nbytes - 1] != '\n')
-		return -EINVAL;
-
-	buf[nbytes - 1] = '\0';
-
-	cpus_read_lock();
-	mutex_lock(&rdtgroup_mutex);
-
-	rdt_last_cmd_clear();
-
-	if (!strcmp(buf, "legacy")) {
-		if (mbm_cntr_assign)
-			resctrl_arch_mbm_cntr_assign_disable();
-	} else if (!strcmp(buf, "mbm_cntr_assign")) {
-		if (!mbm_cntr_assign) {
-			rdtgroup_mbm_cntr_reset(r);
-			ret = resctrl_arch_mbm_cntr_assign_enable();
-		}
-	} else {
-		ret = -EINVAL;
-	}
-
-	mutex_unlock(&rdtgroup_mutex);
-	cpus_read_unlock();
-
-	return ret ?: nbytes;
-}
-
-static int rdtgroup_num_mbm_cntrs_show(struct kernfs_open_file *of,
-				       struct seq_file *s, void *v)
-{
-	struct rdt_resource *r = of->kn->parent->priv;
-
-	seq_printf(s, "%d\n", r->mon.num_mbm_cntrs);
-
-	return 0;
-}
-
-static char *rdtgroup_mon_state_to_str(struct rdtgroup *rdtgrp,
-				       struct rdt_domain *d, char *str)
-{
-	char *tmp = str;
-	int index;
-
-	/*
-	 * Query the monitor state for the domain.
-	 * Index 0 for evtid == QOS_L3_MBM_TOTAL_EVENT_ID
-	 * Index 1 for evtid == QOS_L3_MBM_LOCAL_EVENT_ID
-	 */
-	index = mon_event_config_index_get(QOS_L3_MBM_TOTAL_EVENT_ID);
-	if (rdtgrp->mon.cntr_id[index] != MON_CNTR_UNSET &&
-	    test_bit(rdtgrp->mon.cntr_id[index], d->mbm_cntr_map))
-		*tmp++ = 't';
-
-	index = mon_event_config_index_get(QOS_L3_MBM_LOCAL_EVENT_ID);
-	if (rdtgrp->mon.cntr_id[index] != MON_CNTR_UNSET &&
-	    test_bit(rdtgrp->mon.cntr_id[index], d->mbm_cntr_map))
-		*tmp++ = 'l';
-
-	if (tmp == str)
-		*tmp++ = '_';
-
-	*tmp = '\0';
-	return str;
-}
-
-static int rdtgroup_mbm_control_show(struct kernfs_open_file *of,
-				     struct seq_file *s, void *v)
-{
-	struct rdt_resource *r = of->kn->parent->priv;
-	struct rdt_domain *dom;
-	struct rdtgroup *rdtg;
-	char str[10];
-
-	if (!resctrl_arch_get_mbm_cntr_assign_enable()) {
-		rdt_last_cmd_puts("ABMC feature is not enabled\n");
-		return -EINVAL;
-	}
-
-	mutex_lock(&rdtgroup_mutex);
-
-	list_for_each_entry(rdtg, &rdt_all_groups, rdtgroup_list) {
-		struct rdtgroup *crg;
-
-		seq_printf(s, "%s//", rdtg->kn->name);
-
-		list_for_each_entry(dom, &r->domains, list)
-			seq_printf(s, "%d=%s;", dom->id,
-				   rdtgroup_mon_state_to_str(rdtg, dom, str));
-		seq_putc(s, '\n');
-
-		list_for_each_entry(crg, &rdtg->mon.crdtgrp_list,
-				    mon.crdtgrp_list) {
-			seq_printf(s, "%s/%s/", rdtg->kn->name, crg->kn->name);
-
-			list_for_each_entry(dom, &r->domains, list)
-				seq_printf(s, "%d=%s;", dom->id,
-					   rdtgroup_mon_state_to_str(crg, dom, str));
-			seq_putc(s, '\n');
-		}
-	}
-
-	mutex_unlock(&rdtgroup_mutex);
-	return 0;
-}
-
-/*
- * Update the assign states for the domain.
- *
- * If this is a new assignment for the group then allocate a counter and update
- * the assignment else just update the assign state
- */
-static int rdtgroup_assign_update(struct rdtgroup *rdtgrp, enum resctrl_event_id evtid,
-				  struct rdt_domain *d)
-{
-	int ret, index;
-
-	index = mon_event_config_index_get(evtid);
-	if (index == INVALID_CONFIG_INDEX)
-                return -EINVAL;
-
-	if (rdtgrp->mon.cntr_id[index] == MON_CNTR_UNSET) {
-		ret = rdtgroup_alloc_cntr(rdtgrp, index);
-		if (ret < 0)
-			goto out_done;
-	}
-
-	/* Update the state on all domains if d == NULL */
-	if (d == NULL) {
-		ret = rdtgroup_assign_cntr(rdtgrp, evtid);
-	} else {
-		ret = resctrl_arch_assign_cntr(d, evtid, rdtgrp->mon.rmid,
-					       rdtgrp->mon.cntr_id[index],
-					       rdtgrp->closid, 1);
-		if (!ret)
-			set_bit(rdtgrp->mon.cntr_id[index], d->mbm_cntr_map);
-	}
-
-out_done:
-	return ret;
-}
-
-/*
- * Update the unassign state for the domain.
- *
- * Free the counter if it is unassigned on all the domains else just
- * update the unassign state
- */
-static int rdtgroup_unassign_update(struct rdtgroup *rdtgrp, enum resctrl_event_id evtid,
-				    struct rdt_domain *d)
-{
-	struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
-	int ret = 0, index;
-
-	index = mon_event_config_index_get(evtid);
-	if (index == INVALID_CONFIG_INDEX)
-                return -EINVAL;
-
-	if (rdtgrp->mon.cntr_id[index] == MON_CNTR_UNSET)
-		goto out_done;
-
-	if (d == NULL) {
-		ret = rdtgroup_unassign_cntr(rdtgrp, evtid);
-	} else {
-		ret = resctrl_arch_assign_cntr(d, evtid, rdtgrp->mon.rmid,
-					       rdtgrp->mon.cntr_id[index],
-					       rdtgrp->closid, 0);
-		if (!ret) {
-			clear_bit(rdtgrp->mon.cntr_id[index], d->mbm_cntr_map);
-			rdtgroup_free_cntr(r, rdtgrp, index);
-		}
-	}
-
-out_done:
-	return ret;
-}
-
-static int rdtgroup_str_to_mon_state(char *flag)
-{
-	int i, mon_state = 0;
-
-	for (i = 0; i < strlen(flag); i++) {
-		switch (*(flag + i)) {
-		case 't':
-			mon_state |= ASSIGN_TOTAL;
-			break;
-		case 'l':
-			mon_state |= ASSIGN_LOCAL;
-			break;
-		case '_':
-			mon_state = ASSIGN_NONE;
-			break;
-		default:
-			break;
-		}
-	}
-
-	return mon_state;
-}
-
-static struct rdtgroup *rdtgroup_find_grp(enum rdt_group_type rtype, char *p_grp, char *c_grp)
-{
-	struct rdtgroup *rdtg, *crg;
-
-	if (rtype == RDTCTRL_GROUP && *p_grp == '\0') {
-		return &rdtgroup_default;
-	} else if (rtype == RDTCTRL_GROUP) {
-		list_for_each_entry(rdtg, &rdt_all_groups, rdtgroup_list)
-			if (!strcmp(p_grp, rdtg->kn->name))
-				return rdtg;
-	} else if (rtype == RDTMON_GROUP) {
-		list_for_each_entry(rdtg, &rdt_all_groups, rdtgroup_list) {
-			if (!strcmp(p_grp, rdtg->kn->name)) {
-				list_for_each_entry(crg, &rdtg->mon.crdtgrp_list,
-						    mon.crdtgrp_list) {
-					if (!strcmp(c_grp, crg->kn->name))
-						return crg;
-				}
-			}
-		}
-	}
-
-	return NULL;
-}
-
-static int rdtgroup_process_flags(struct rdt_resource *r,
-				  enum rdt_group_type rtype,
-				  char *p_grp, char *c_grp, char *tok)
-{
-	int op, mon_state, assign_state, unassign_state;
-	char *dom_str, *id_str, *op_str;
-	struct rdt_domain *d;
-	struct rdtgroup *rdtgrp;
-	unsigned long dom_id;
-	int ret, found = 0;
-
-	rdtgrp = rdtgroup_find_grp(rtype, p_grp, c_grp);
-
-	if (!rdtgrp) {
-		rdt_last_cmd_puts("Not a valid resctrl group\n");
-		return -EINVAL;
-	}
-
-next:
-	if (!tok || tok[0] == '\0')
-		return 0;
-
-	/* Start processing the strings for each domain */
-	dom_str = strim(strsep(&tok, ";"));
-
-	op_str = strpbrk(dom_str, "=+-");
-
-	if (op_str) {
-		op = *op_str;
-	} else {
-		rdt_last_cmd_puts("Missing operation =, +, -, _ character\n");
-		return -EINVAL;
-	}
-
-	id_str = strsep(&dom_str, "=+-");
-
-	/* Check for domain id '*' which means all domains */
-	if (id_str && *id_str == '*') {
-		d = NULL;
-		goto check_state;
-	} else if (!id_str || kstrtoul(id_str, 10, &dom_id)) {
-		rdt_last_cmd_puts("Missing domain id\n");
-		return -EINVAL;
-	}
-
-	/* Verify if the dom_id is valid */
-	list_for_each_entry(d, &r->domains, list) {
-		if (d->id == dom_id) {
-			found = 1;
-			break;
-		}
-	}
-
-	if (!found) {
-		rdt_last_cmd_printf("Invalid domain id %ld\n", dom_id);
-		return -EINVAL;
-	}
-
-check_state:
-	mon_state = rdtgroup_str_to_mon_state(dom_str);
-
-	assign_state = 0;
-	unassign_state = 0;
-
-	switch (op) {
-	case '+':
-		if (mon_state == ASSIGN_NONE) {
-			rdt_last_cmd_puts("Invalid assign opcode\n");
-			goto out_fail;
-		}
-		assign_state = mon_state;
-		break;
-	case '-':
-		if (mon_state == ASSIGN_NONE) {
-			rdt_last_cmd_puts("Invalid assign opcode\n");
-			goto out_fail;
-		}
-		unassign_state = mon_state;
-		break;
-	case '=':
-		assign_state = mon_state;
-		unassign_state = (ASSIGN_TOTAL | ASSIGN_LOCAL) & ~assign_state;
-		break;
-	default:
-		break;
-	}
-
-	if (assign_state & ASSIGN_TOTAL) {
-		ret = rdtgroup_assign_update(rdtgrp, QOS_L3_MBM_TOTAL_EVENT_ID, d);
-		if (ret)
-			goto out_fail;
-	}
-
-	if (assign_state & ASSIGN_LOCAL) {
-		ret = rdtgroup_assign_update(rdtgrp, QOS_L3_MBM_LOCAL_EVENT_ID, d);
-		if (ret)
-			goto out_fail;
-	}
-
-	if (unassign_state & ASSIGN_TOTAL) {
-		ret = rdtgroup_unassign_update(rdtgrp, QOS_L3_MBM_TOTAL_EVENT_ID, d);
-		if (ret)
-			goto out_fail;
-	}
-
-	if (unassign_state & ASSIGN_LOCAL) {
-		ret = rdtgroup_unassign_update(rdtgrp, QOS_L3_MBM_LOCAL_EVENT_ID, d);
-		if (ret)
-			goto out_fail;
-	}
-
-	goto next;
-
-out_fail:
-
-	return -EINVAL;
-}
-
-static ssize_t rdtgroup_mbm_control_write(struct kernfs_open_file *of,
-					  char *buf, size_t nbytes,
-					  loff_t off)
-{
-	struct rdt_resource *r = of->kn->parent->priv;
-	char *token, *cmon_grp, *mon_grp;
-	int ret;
-
-	if (!resctrl_arch_get_abmc_enabled())
-		return -EINVAL;
-
-	/* Valid input requires a trailing newline */
-	if (nbytes == 0 || buf[nbytes - 1] != '\n')
-		return -EINVAL;
-
-	buf[nbytes - 1] = '\0';
-
-	cpus_read_lock();
-	mutex_lock(&rdtgroup_mutex);
-	rdt_last_cmd_clear();
-
-	while ((token = strsep(&buf, "\n")) != NULL) {
-		if (strstr(token, "//")) {
-			/*
-			 * The CTRL_MON group processing:
-			 * default CTRL_MON group: "//<flags>"
-			 * non-default CTRL_MON group: "<CTRL_MON group>//flags"
-			 * The CTRL_MON group will be empty string if it is a
-			 * default group.
-			 */
-			cmon_grp = strsep(&token, "//");
-
-			/*
-			 * strsep returns empty string for contiguous delimiters.
-			 * Make sure check for two consecutive delimiters and
-			 * advance the token.
-			 */
-			mon_grp = strsep(&token, "//");
-			if (*mon_grp != '\0') {
-				rdt_last_cmd_printf("Invalid CTRL_MON group format %s\n", token);
-				ret = -EINVAL;
-				break;
-			}
-
-			ret = rdtgroup_process_flags(r, RDTCTRL_GROUP, cmon_grp, mon_grp, token);
-			if (ret)
-				break;
-		} else if (strstr(token, "/")) {
-			/*
-			 * MON group processing:
-			 * MON_GROUP inside default CTRL_MON group: "/<MON group>/<flags>"
-			 * MON_GROUP within CTRL_MON group: "<CTRL_MON group>/<MON group>/<flags>"
-			 */
-			cmon_grp = strsep(&token, "/");
-
-			/* Extract the MON_GROUP. It cannot be empty string */
-			mon_grp = strsep(&token, "/");
-			if (*mon_grp == '\0') {
-				rdt_last_cmd_printf("Invalid MON_GROUP format %s\n", token);
-				ret = -EINVAL;
-				break;
-			}
-
-			ret = rdtgroup_process_flags(r, RDTMON_GROUP, cmon_grp, mon_grp, token);
-			if (ret)
-				break;
-		}
-	}
-
-	mutex_unlock(&rdtgroup_mutex);
-	cpus_read_unlock();
-
-	return ret ?: nbytes;
-}
-
-#ifdef CONFIG_PROC_CPU_RESCTRL
-
-/*
- * A task can only be part of one resctrl control group and of one monitor
- * group which is associated to that control group.
- *
- * 1)   res:
- *      mon:
- *
- *    resctrl is not available.
- *
- * 2)   res:/
- *      mon:
- *
- *    Task is part of the root resctrl control group, and it is not associated
- *    to any monitor group.
- *
- * 3)  res:/
- *     mon:mon0
- *
- *    Task is part of the root resctrl control group and monitor group mon0.
- *
- * 4)  res:group0
- *     mon:
- *
- *    Task is part of resctrl control group group0, and it is not associated
- *    to any monitor group.
- *
- * 5) res:group0
- *    mon:mon1
- *
- *    Task is part of resctrl control group group0 and monitor group mon1.
- */
-int proc_resctrl_show(struct seq_file *s, struct pid_namespace *ns,
-		      struct pid *pid, struct task_struct *tsk)
-{
-	struct rdtgroup *rdtg;
-	int ret = 0;
-
-	mutex_lock(&rdtgroup_mutex);
-
-	/* Return empty if resctrl has not been mounted. */
-	if (!resctrl_mounted) {
-		seq_puts(s, "res:\nmon:\n");
-		goto unlock;
-	}
-
-	list_for_each_entry(rdtg, &rdt_all_groups, rdtgroup_list) {
-		struct rdtgroup *crg;
-
-		/*
-		 * Task information is only relevant for shareable
-		 * and exclusive groups.
-		 */
-		if (rdtg->mode != RDT_MODE_SHAREABLE &&
-		    rdtg->mode != RDT_MODE_EXCLUSIVE)
-			continue;
-
-		if (!resctrl_arch_match_closid(tsk, rdtg->closid))
-			continue;
-
-		seq_printf(s, "res:%s%s\n", (rdtg == &rdtgroup_default) ? "/" : "",
-			   rdtg->kn->name);
-		seq_puts(s, "mon:");
-		list_for_each_entry(crg, &rdtg->mon.crdtgrp_list,
-				    mon.crdtgrp_list) {
-			if (!resctrl_arch_match_rmid(tsk, crg->mon.parent->closid,
-						     crg->mon.rmid))
-				continue;
-			seq_printf(s, "%s", crg->kn->name);
-			break;
-		}
-		seq_putc(s, '\n');
-		goto unlock;
-	}
-	/*
-	 * The above search should succeed. Otherwise return
-	 * with an error.
-	 */
-	ret = -ENOENT;
-unlock:
-	mutex_unlock(&rdtgroup_mutex);
-
-	return ret;
-}
-#endif
-
-static int rdt_last_cmd_status_show(struct kernfs_open_file *of,
-				    struct seq_file *seq, void *v)
-{
-	int len;
-
-	mutex_lock(&rdtgroup_mutex);
-	len = seq_buf_used(&last_cmd_status);
-	if (len)
-		seq_printf(seq, "%.*s", len, last_cmd_status_buf);
-	else
-		seq_puts(seq, "ok\n");
-	mutex_unlock(&rdtgroup_mutex);
-	return 0;
-}
-
-static int rdt_num_closids_show(struct kernfs_open_file *of,
-				struct seq_file *seq, void *v)
-{
-	struct resctrl_schema *s = of->kn->parent->priv;
-
-	seq_printf(seq, "%u\n", s->num_closid);
-	return 0;
-}
-
-static int rdt_default_ctrl_show(struct kernfs_open_file *of,
-			     struct seq_file *seq, void *v)
-{
-	struct resctrl_schema *s = of->kn->parent->priv;
-	struct rdt_resource *r = s->res;
-
-	seq_printf(seq, "%x\n", r->default_ctrl);
-	return 0;
-}
-
-static int rdt_min_cbm_bits_show(struct kernfs_open_file *of,
-			     struct seq_file *seq, void *v)
-{
-	struct resctrl_schema *s = of->kn->parent->priv;
-	struct rdt_resource *r = s->res;
-
-	seq_printf(seq, "%u\n", r->cache.min_cbm_bits);
-	return 0;
-}
-
-static int rdt_shareable_bits_show(struct kernfs_open_file *of,
-				   struct seq_file *seq, void *v)
-{
-	struct resctrl_schema *s = of->kn->parent->priv;
-	struct rdt_resource *r = s->res;
-
-	seq_printf(seq, "%x\n", r->cache.shareable_bits);
-	return 0;
-}
-
-/*
- * rdt_bit_usage_show - Display current usage of resources
- *
- * A domain is a shared resource that can now be allocated differently. Here
- * we display the current regions of the domain as an annotated bitmask.
- * For each domain of this resource its allocation bitmask
- * is annotated as below to indicate the current usage of the corresponding bit:
- *   0 - currently unused
- *   X - currently available for sharing and used by software and hardware
- *   H - currently used by hardware only but available for software use
- *   S - currently used and shareable by software only
- *   E - currently used exclusively by one resource group
- *   P - currently pseudo-locked by one resource group
- */
-static int rdt_bit_usage_show(struct kernfs_open_file *of,
-			      struct seq_file *seq, void *v)
-{
-	struct resctrl_schema *s = of->kn->parent->priv;
-	/*
-	 * Use unsigned long even though only 32 bits are used to ensure
-	 * test_bit() is used safely.
-	 */
-	unsigned long sw_shareable = 0, hw_shareable = 0;
-	unsigned long exclusive = 0, pseudo_locked = 0;
-	struct rdt_resource *r = s->res;
-	struct rdt_domain *dom;
-	int i, hwb, swb, excl, psl;
-	enum rdtgrp_mode mode;
-	bool sep = false;
-	u32 ctrl_val;
-
-	cpus_read_lock();
-	mutex_lock(&rdtgroup_mutex);
-	hw_shareable = r->cache.shareable_bits;
-	list_for_each_entry(dom, &r->domains, list) {
-		if (sep)
-			seq_putc(seq, ';');
-		sw_shareable = 0;
-		exclusive = 0;
-		seq_printf(seq, "%d=", dom->id);
-		for (i = 0; i < closids_supported(); i++) {
-			if (!closid_allocated(i))
-				continue;
-			ctrl_val = resctrl_arch_get_config(r, dom, i,
-							   s->conf_type);
-			mode = rdtgroup_mode_by_closid(i);
-			switch (mode) {
-			case RDT_MODE_SHAREABLE:
-				sw_shareable |= ctrl_val;
-				break;
-			case RDT_MODE_EXCLUSIVE:
-				exclusive |= ctrl_val;
-				break;
-			case RDT_MODE_PSEUDO_LOCKSETUP:
-			/*
-			 * RDT_MODE_PSEUDO_LOCKSETUP is possible
-			 * here but not included since the CBM
-			 * associated with this CLOSID in this mode
-			 * is not initialized and no task or cpu can be
-			 * assigned this CLOSID.
-			 */
-				break;
-			case RDT_MODE_PSEUDO_LOCKED:
-			case RDT_NUM_MODES:
-				WARN(1,
-				     "invalid mode for closid %d\n", i);
-				break;
-			}
-		}
-		for (i = r->cache.cbm_len - 1; i >= 0; i--) {
-			pseudo_locked = dom->plr ? dom->plr->cbm : 0;
-			hwb = test_bit(i, &hw_shareable);
-			swb = test_bit(i, &sw_shareable);
-			excl = test_bit(i, &exclusive);
-			psl = test_bit(i, &pseudo_locked);
-			if (hwb && swb)
-				seq_putc(seq, 'X');
-			else if (hwb && !swb)
-				seq_putc(seq, 'H');
-			else if (!hwb && swb)
-				seq_putc(seq, 'S');
-			else if (excl)
-				seq_putc(seq, 'E');
-			else if (psl)
-				seq_putc(seq, 'P');
-			else /* Unused bits remain */
-				seq_putc(seq, '0');
-		}
-		sep = true;
-	}
-	seq_putc(seq, '\n');
-	mutex_unlock(&rdtgroup_mutex);
-	cpus_read_unlock();
-	return 0;
-}
-
-static int rdt_min_bw_show(struct kernfs_open_file *of,
-			     struct seq_file *seq, void *v)
-{
-	struct resctrl_schema *s = of->kn->parent->priv;
-	struct rdt_resource *r = s->res;
-
-	seq_printf(seq, "%u\n", r->membw.min_bw);
-	return 0;
-}
-
-static int rdt_num_rmids_show(struct kernfs_open_file *of,
-			      struct seq_file *seq, void *v)
-{
-	struct rdt_resource *r = of->kn->parent->priv;
-
-	seq_printf(seq, "%d\n", r->mon.num_rmid);
-
-	return 0;
-}
-
-static int rdt_mon_features_show(struct kernfs_open_file *of,
-				 struct seq_file *seq, void *v)
-{
-	struct rdt_resource *r = of->kn->parent->priv;
-	struct mon_evt *mevt;
-
-	list_for_each_entry(mevt, &r->mon.evt_list, list) {
-		seq_printf(seq, "%s\n", mevt->name);
-		if (mevt->configurable)
-			seq_printf(seq, "%s_config\n", mevt->name);
-	}
-
-	return 0;
-}
-
-static int rdt_bw_gran_show(struct kernfs_open_file *of,
-			     struct seq_file *seq, void *v)
-{
-	struct resctrl_schema *s = of->kn->parent->priv;
-	struct rdt_resource *r = s->res;
-
-	seq_printf(seq, "%u\n", r->membw.bw_gran);
-	return 0;
-}
-
-static int rdt_delay_linear_show(struct kernfs_open_file *of,
-			     struct seq_file *seq, void *v)
-{
-	struct resctrl_schema *s = of->kn->parent->priv;
-	struct rdt_resource *r = s->res;
-
-	seq_printf(seq, "%u\n", r->membw.delay_linear);
-	return 0;
-}
-
-static int max_threshold_occ_show(struct kernfs_open_file *of,
-				  struct seq_file *seq, void *v)
-{
-	seq_printf(seq, "%u\n", resctrl_rmid_realloc_threshold);
-
-	return 0;
-}
-
-static int rdt_thread_throttle_mode_show(struct kernfs_open_file *of,
-					 struct seq_file *seq, void *v)
-{
-	struct resctrl_schema *s = of->kn->parent->priv;
-	struct rdt_resource *r = s->res;
-
-	if (r->membw.throttle_mode == THREAD_THROTTLE_PER_THREAD)
-		seq_puts(seq, "per-thread\n");
-	else
-		seq_puts(seq, "max\n");
-
-	return 0;
-}
-
-static ssize_t max_threshold_occ_write(struct kernfs_open_file *of,
-				       char *buf, size_t nbytes, loff_t off)
-{
-	unsigned int bytes;
-	int ret;
-
-	ret = kstrtouint(buf, 0, &bytes);
-	if (ret)
-		return ret;
-
-	if (bytes > resctrl_rmid_realloc_limit)
-		return -EINVAL;
-
-	resctrl_rmid_realloc_threshold = resctrl_arch_round_mon_val(bytes);
-
-	return nbytes;
-}
-
-/*
- * rdtgroup_mode_show - Display mode of this resource group
- */
-static int rdtgroup_mode_show(struct kernfs_open_file *of,
-			      struct seq_file *s, void *v)
-{
-	struct rdtgroup *rdtgrp;
-
-	rdtgrp = rdtgroup_kn_lock_live(of->kn);
-	if (!rdtgrp) {
-		rdtgroup_kn_unlock(of->kn);
-		return -ENOENT;
-	}
-
-	seq_printf(s, "%s\n", rdtgroup_mode_str(rdtgrp->mode));
-
-	rdtgroup_kn_unlock(of->kn);
-	return 0;
-}
-
-enum resctrl_conf_type resctrl_peer_type(enum resctrl_conf_type my_type)
-{
-	switch (my_type) {
-	case CDP_CODE:
-		return CDP_DATA;
-	case CDP_DATA:
-		return CDP_CODE;
-	default:
-	case CDP_NONE:
-		return CDP_NONE;
-	}
-}
-
-static int rdt_has_sparse_bitmasks_show(struct kernfs_open_file *of,
-					struct seq_file *seq, void *v)
-{
-	struct resctrl_schema *s = of->kn->parent->priv;
-	struct rdt_resource *r = s->res;
-
-	seq_printf(seq, "%u\n", r->cache.arch_has_sparse_bitmasks);
-
-	return 0;
-}
-
-/**
- * __rdtgroup_cbm_overlaps - Does CBM for intended closid overlap with other
- * @r: Resource to which domain instance @d belongs.
- * @d: The domain instance for which @closid is being tested.
- * @cbm: Capacity bitmask being tested.
- * @closid: Intended closid for @cbm.
- * @type: CDP type of @r.
- * @exclusive: Only check if overlaps with exclusive resource groups
- *
- * Checks if provided @cbm intended to be used for @closid on domain
- * @d overlaps with any other closids or other hardware usage associated
- * with this domain. If @exclusive is true then only overlaps with
- * resource groups in exclusive mode will be considered. If @exclusive
- * is false then overlaps with any resource group or hardware entities
- * will be considered.
- *
- * @cbm is unsigned long, even if only 32 bits are used, to make the
- * bitmap functions work correctly.
- *
- * Return: false if CBM does not overlap, true if it does.
- */
-static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
-				    unsigned long cbm, int closid,
-				    enum resctrl_conf_type type, bool exclusive)
-{
-	enum rdtgrp_mode mode;
-	unsigned long ctrl_b;
-	int i;
-
-	/* Check for any overlap with regions used by hardware directly */
-	if (!exclusive) {
-		ctrl_b = r->cache.shareable_bits;
-		if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len))
-			return true;
-	}
-
-	/* Check for overlap with other resource groups */
-	for (i = 0; i < closids_supported(); i++) {
-		ctrl_b = resctrl_arch_get_config(r, d, i, type);
-		mode = rdtgroup_mode_by_closid(i);
-		if (closid_allocated(i) && i != closid &&
-		    mode != RDT_MODE_PSEUDO_LOCKSETUP) {
-			if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len)) {
-				if (exclusive) {
-					if (mode == RDT_MODE_EXCLUSIVE)
-						return true;
-					continue;
-				}
-				return true;
-			}
-		}
-	}
-
-	return false;
-}
-
-/**
- * rdtgroup_cbm_overlaps - Does CBM overlap with other use of hardware
- * @s: Schema for the resource to which domain instance @d belongs.
- * @d: The domain instance for which @closid is being tested.
- * @cbm: Capacity bitmask being tested.
- * @closid: Intended closid for @cbm.
- * @exclusive: Only check if overlaps with exclusive resource groups
- *
- * Resources that can be allocated using a CBM can use the CBM to control
- * the overlap of these allocations. rdtgroup_cmb_overlaps() is the test
- * for overlap. Overlap test is not limited to the specific resource for
- * which the CBM is intended though - when dealing with CDP resources that
- * share the underlying hardware the overlap check should be performed on
- * the CDP resource sharing the hardware also.
- *
- * Refer to description of __rdtgroup_cbm_overlaps() for the details of the
- * overlap test.
- *
- * Return: true if CBM overlap detected, false if there is no overlap
- */
-bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_domain *d,
-			   unsigned long cbm, int closid, bool exclusive)
-{
-	enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type);
-	struct rdt_resource *r = s->res;
-
-	if (__rdtgroup_cbm_overlaps(r, d, cbm, closid, s->conf_type,
-				    exclusive))
-		return true;
-
-	if (!resctrl_arch_get_cdp_enabled(r->rid))
-		return false;
-	return  __rdtgroup_cbm_overlaps(r, d, cbm, closid, peer_type, exclusive);
-}
-
-/**
- * rdtgroup_mode_test_exclusive - Test if this resource group can be exclusive
- * @rdtgrp: Resource group identified through its closid.
- *
- * An exclusive resource group implies that there should be no sharing of
- * its allocated resources. At the time this group is considered to be
- * exclusive this test can determine if its current schemata supports this
- * setting by testing for overlap with all other resource groups.
- *
- * Return: true if resource group can be exclusive, false if there is overlap
- * with allocations of other resource groups and thus this resource group
- * cannot be exclusive.
- */
-static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp)
-{
-	int closid = rdtgrp->closid;
-	struct resctrl_schema *s;
-	struct rdt_resource *r;
-	bool has_cache = false;
-	struct rdt_domain *d;
-	u32 ctrl;
-
-	/* Walking r->domains, ensure it can't race with cpuhp */
-	lockdep_assert_cpus_held();
-
-	list_for_each_entry(s, &resctrl_schema_all, list) {
-		r = s->res;
-		if (r->rid == RDT_RESOURCE_MBA || r->rid == RDT_RESOURCE_SMBA)
-			continue;
-		has_cache = true;
-		list_for_each_entry(d, &r->domains, list) {
-			ctrl = resctrl_arch_get_config(r, d, closid,
-						       s->conf_type);
-			if (rdtgroup_cbm_overlaps(s, d, ctrl, closid, false)) {
-				rdt_last_cmd_puts("Schemata overlaps\n");
-				return false;
-			}
-		}
-	}
-
-	if (!has_cache) {
-		rdt_last_cmd_puts("Cannot be exclusive without CAT/CDP\n");
-		return false;
-	}
-
-	return true;
-}
-
-/*
- * rdtgroup_mode_write - Modify the resource group's mode
- */
-static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of,
-				   char *buf, size_t nbytes, loff_t off)
-{
-	struct rdtgroup *rdtgrp;
-	enum rdtgrp_mode mode;
-	int ret = 0;
-
-	/* Valid input requires a trailing newline */
-	if (nbytes == 0 || buf[nbytes - 1] != '\n')
-		return -EINVAL;
-	buf[nbytes - 1] = '\0';
-
-	rdtgrp = rdtgroup_kn_lock_live(of->kn);
-	if (!rdtgrp) {
-		rdtgroup_kn_unlock(of->kn);
-		return -ENOENT;
-	}
-
-	rdt_last_cmd_clear();
-
-	mode = rdtgrp->mode;
-
-	if ((!strcmp(buf, "shareable") && mode == RDT_MODE_SHAREABLE) ||
-	    (!strcmp(buf, "exclusive") && mode == RDT_MODE_EXCLUSIVE) ||
-	    (!strcmp(buf, "pseudo-locksetup") &&
-	     mode == RDT_MODE_PSEUDO_LOCKSETUP) ||
-	    (!strcmp(buf, "pseudo-locked") && mode == RDT_MODE_PSEUDO_LOCKED))
-		goto out;
-
-	if (mode == RDT_MODE_PSEUDO_LOCKED) {
-		rdt_last_cmd_puts("Cannot change pseudo-locked group\n");
-		ret = -EINVAL;
-		goto out;
-	}
-
-	if (!strcmp(buf, "shareable")) {
-		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
-			ret = rdtgroup_locksetup_exit(rdtgrp);
-			if (ret)
-				goto out;
-		}
-		rdtgrp->mode = RDT_MODE_SHAREABLE;
-	} else if (!strcmp(buf, "exclusive")) {
-		if (!rdtgroup_mode_test_exclusive(rdtgrp)) {
-			ret = -EINVAL;
-			goto out;
-		}
-		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
-			ret = rdtgroup_locksetup_exit(rdtgrp);
-			if (ret)
-				goto out;
-		}
-		rdtgrp->mode = RDT_MODE_EXCLUSIVE;
-	} else if (IS_ENABLED(CONFIG_RESCTRL_FS_PSEUDO_LOCK) &&
-		   !strcmp(buf, "pseudo-locksetup")) {
-		ret = rdtgroup_locksetup_enter(rdtgrp);
-		if (ret)
-			goto out;
-		rdtgrp->mode = RDT_MODE_PSEUDO_LOCKSETUP;
-	} else {
-		rdt_last_cmd_puts("Unknown or unsupported mode\n");
-		ret = -EINVAL;
-	}
-
-out:
-	rdtgroup_kn_unlock(of->kn);
-	return ret ?: nbytes;
-}
-
-/**
- * rdtgroup_cbm_to_size - Translate CBM to size in bytes
- * @r: RDT resource to which @d belongs.
- * @d: RDT domain instance.
- * @cbm: bitmask for which the size should be computed.
- *
- * The bitmask provided associated with the RDT domain instance @d will be
- * translated into how many bytes it represents. The size in bytes is
- * computed by first dividing the total cache size by the CBM length to
- * determine how many bytes each bit in the bitmask represents. The result
- * is multiplied with the number of bits set in the bitmask.
- *
- * @cbm is unsigned long, even if only 32 bits are used to make the
- * bitmap functions work correctly.
- */
-unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r,
-				  struct rdt_domain *d, unsigned long cbm)
-{
-	struct cpu_cacheinfo *ci;
-	unsigned int size = 0;
-	int num_b, i;
-
-	num_b = bitmap_weight(&cbm, r->cache.cbm_len);
-	ci = get_cpu_cacheinfo(cpumask_any(&d->cpu_mask));
-	for (i = 0; i < ci->num_leaves; i++) {
-		if (ci->info_list[i].level == r->cache_level) {
-			size = ci->info_list[i].size / r->cache.cbm_len * num_b;
-			break;
-		}
-	}
-
-	return size;
-}
-
-/*
- * rdtgroup_size_show - Display size in bytes of allocated regions
- *
- * The "size" file mirrors the layout of the "schemata" file, printing the
- * size in bytes of each region instead of the capacity bitmask.
- */
-static int rdtgroup_size_show(struct kernfs_open_file *of,
-			      struct seq_file *s, void *v)
-{
-	struct resctrl_schema *schema;
-	enum resctrl_conf_type type;
-	struct rdtgroup *rdtgrp;
-	struct rdt_resource *r;
-	struct rdt_domain *d;
-	unsigned int size;
-	int ret = 0;
-	u32 closid;
-	bool sep;
-	u32 ctrl;
-
-	rdtgrp = rdtgroup_kn_lock_live(of->kn);
-	if (!rdtgrp) {
-		rdtgroup_kn_unlock(of->kn);
-		return -ENOENT;
-	}
-
-	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
-		if (!rdtgrp->plr->d) {
-			rdt_last_cmd_clear();
-			rdt_last_cmd_puts("Cache domain offline\n");
-			ret = -ENODEV;
-		} else {
-			seq_printf(s, "%*s:", max_name_width,
-				   rdtgrp->plr->s->name);
-			size = rdtgroup_cbm_to_size(rdtgrp->plr->s->res,
-						    rdtgrp->plr->d,
-						    rdtgrp->plr->cbm);
-			seq_printf(s, "%d=%u\n", rdtgrp->plr->d->id, size);
-		}
-		goto out;
-	}
-
-	closid = rdtgrp->closid;
-
-	list_for_each_entry(schema, &resctrl_schema_all, list) {
-		r = schema->res;
-		type = schema->conf_type;
-		sep = false;
-		seq_printf(s, "%*s:", max_name_width, schema->name);
-		list_for_each_entry(d, &r->domains, list) {
-			if (sep)
-				seq_putc(s, ';');
-			if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
-				size = 0;
-			} else {
-				if (is_mba_sc(r))
-					ctrl = d->mbps_val[closid];
-				else
-					ctrl = resctrl_arch_get_config(r, d,
-								       closid,
-								       type);
-				if (r->rid == RDT_RESOURCE_MBA ||
-				    r->rid == RDT_RESOURCE_SMBA)
-					size = ctrl;
-				else
-					size = rdtgroup_cbm_to_size(r, d, ctrl);
-			}
-			seq_printf(s, "%d=%u", d->id, size);
-			sep = true;
-		}
-		seq_putc(s, '\n');
-	}
-
-out:
-	rdtgroup_kn_unlock(of->kn);
-
-	return ret;
-}
-
-static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid)
-{
-	struct rdt_domain *dom;
-	bool sep = false;
-	u32 val;
-
-	cpus_read_lock();
-	mutex_lock(&rdtgroup_mutex);
-
-	list_for_each_entry(dom, &r->domains, list) {
-		if (sep)
-			seq_puts(s, ";");
-
-		val = resctrl_arch_event_config_get(dom, evtid);
-		if (val == INVALID_CONFIG_VALUE)
-			break;
-
-		seq_printf(s, "%d=0x%02x", dom->id, val);
-		sep = true;
-	}
-	seq_puts(s, "\n");
-
-	mutex_unlock(&rdtgroup_mutex);
-	cpus_read_unlock();
-
-	return 0;
-}
-
-static int mbm_total_bytes_config_show(struct kernfs_open_file *of,
-				       struct seq_file *seq, void *v)
-{
-	struct rdt_resource *r = of->kn->parent->priv;
-
-	mbm_config_show(seq, r, QOS_L3_MBM_TOTAL_EVENT_ID);
-
-	return 0;
-}
-
-static int mbm_local_bytes_config_show(struct kernfs_open_file *of,
-				       struct seq_file *seq, void *v)
-{
-	struct rdt_resource *r = of->kn->parent->priv;
-
-	mbm_config_show(seq, r, QOS_L3_MBM_LOCAL_EVENT_ID);
-
-	return 0;
-}
-
-static int mbm_config_write_domain(struct rdt_resource *r,
-				   struct rdt_domain *d, u32 evtid, u32 val)
-{
-	struct resctrl_mon_config_info mon_info = {0};
-	u32 config_val;
-
-	/*
-	 * Check the current config value first. If both are the same then
-	 * no need to write it again.
-	 */
-	config_val = resctrl_arch_event_config_get(d, evtid);
-	if (config_val == INVALID_CONFIG_VALUE || config_val == val)
-		return 0;
-
-	mon_info.d = d;
-	mon_info.evtid = evtid;
-	mon_info.mon_config = val;
-
-	/*
-	 * Update MSR_IA32_EVT_CFG_BASE MSR on one of the CPUs in the
-	 * domain. The MSRs offset from MSR MSR_IA32_EVT_CFG_BASE
-	 * are scoped at the domain level. Writing any of these MSRs
-	 * on one CPU is observed by all the CPUs in the domain.
-	 */
-	smp_call_function_any(&d->cpu_mask, resctrl_arch_event_config_set,
-			      &mon_info, 1);
-	if (mon_info.err) {
-		rdt_last_cmd_puts("Invalid event configuration\n");
-		return mon_info.err;
-	}
-
-	/*
-	 * When an Event Configuration is changed, the bandwidth counters
-	 * for all RMIDs and Events will be cleared by the hardware. The
-	 * hardware also sets MSR_IA32_QM_CTR.Unavailable (bit 62) for
-	 * every RMID on the next read to any event for every RMID.
-	 * Subsequent reads will have MSR_IA32_QM_CTR.Unavailable (bit 62)
-	 * cleared while it is tracked by the hardware. Clear the
-	 * mbm_local and mbm_total counts for all the RMIDs.
-	 */
-	resctrl_arch_reset_rmid_all(r, d);
-
-	return 0;
-}
-
-static int mon_config_write(struct rdt_resource *r, char *tok, u32 evtid)
-{
-	char *dom_str = NULL, *id_str;
-	unsigned long dom_id, val;
-	struct rdt_domain *d;
-	int err;
-
-	/* Walking r->domains, ensure it can't race with cpuhp */
-	lockdep_assert_cpus_held();
-
-next:
-	if (!tok || tok[0] == '\0')
-		return 0;
-
-	/* Start processing the strings for each domain */
-	dom_str = strim(strsep(&tok, ";"));
-	id_str = strsep(&dom_str, "=");
-
-	if (!id_str || kstrtoul(id_str, 10, &dom_id)) {
-		rdt_last_cmd_puts("Missing '=' or non-numeric domain id\n");
-		return -EINVAL;
-	}
-
-	if (!dom_str || kstrtoul(dom_str, 16, &val)) {
-		rdt_last_cmd_puts("Non-numeric event configuration value\n");
-		return -EINVAL;
-	}
-
-	/* Value from user cannot be more than the supported set of events */
-	if ((val & r->mbm_cfg_mask) != val) {
-		rdt_last_cmd_printf("Invalid event configuration: max valid mask is 0x%02x\n",
-				    r->mbm_cfg_mask);
-		return -EINVAL;
-	}
-
-	list_for_each_entry(d, &r->domains, list) {
-		if (d->id == dom_id) {
-			err = mbm_config_write_domain(r, d, evtid, val);
-			if (err)
-				return err;
-			goto next;
-		}
-	}
-
-	return -EINVAL;
-}
-
-static ssize_t mbm_total_bytes_config_write(struct kernfs_open_file *of,
-					    char *buf, size_t nbytes,
-					    loff_t off)
-{
-	struct rdt_resource *r = of->kn->parent->priv;
-	int ret;
-
-	/* Valid input requires a trailing newline */
-	if (nbytes == 0 || buf[nbytes - 1] != '\n')
-		return -EINVAL;
-
-	cpus_read_lock();
-	mutex_lock(&rdtgroup_mutex);
-
-	rdt_last_cmd_clear();
-
-	buf[nbytes - 1] = '\0';
-
-	ret = mon_config_write(r, buf, QOS_L3_MBM_TOTAL_EVENT_ID);
-
-	mutex_unlock(&rdtgroup_mutex);
-	cpus_read_unlock();
-
-	return ret ?: nbytes;
-}
-
-static ssize_t mbm_local_bytes_config_write(struct kernfs_open_file *of,
-					    char *buf, size_t nbytes,
-					    loff_t off)
-{
-	struct rdt_resource *r = of->kn->parent->priv;
-	int ret;
-
-	/* Valid input requires a trailing newline */
-	if (nbytes == 0 || buf[nbytes - 1] != '\n')
-		return -EINVAL;
-
-	cpus_read_lock();
-	mutex_lock(&rdtgroup_mutex);
-
-	rdt_last_cmd_clear();
-
-	buf[nbytes - 1] = '\0';
-
-	ret = mon_config_write(r, buf, QOS_L3_MBM_LOCAL_EVENT_ID);
-
-	mutex_unlock(&rdtgroup_mutex);
-	cpus_read_unlock();
-
-	return ret ?: nbytes;
-}
-
-/* Allocate a new counter id if the event is unassigned */
-int rdtgroup_alloc_cntr(struct rdtgroup *rdtgrp, int index)
-{
-	struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
-	int cntr_id;
-
-	/* Nothing to do if event has been assigned already */
-	if (rdtgrp->mon.cntr_id[index] != MON_CNTR_UNSET) {
-		rdt_last_cmd_puts("ABMC counter is assigned already\n");
-		return 0;
-	}
-
-	/*
-	 * Allocate a new counter id and update domains
-	 */
-	cntr_id = mbm_cntr_alloc(r);
-	if (cntr_id < 0) {
-		rdt_last_cmd_puts("Out of ABMC counters\n");
-		return -ENOSPC;
-	}
-
-	rdtgrp->mon.cntr_id[index] = cntr_id;
-
-	return 0;
-}
-
-/*
- * Assign a hardware counter to the group and assign the counter
- * all the domains in the group. It will try to allocate the mbm
- * counter if the counter is available.
- */
-int rdtgroup_assign_cntr(struct rdtgroup *rdtgrp, enum resctrl_event_id evtid)
-{
-	struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
-	struct rdt_domain *d;
-	int index;
-
-	index = mon_event_config_index_get(evtid);
-	if (index == INVALID_CONFIG_INDEX)
-		return -EINVAL;
-
-	if (rdtgroup_alloc_cntr(rdtgrp, index))
-		return -EINVAL;
-
-	list_for_each_entry(d, &r->domains, list) {
-		resctrl_arch_assign_cntr(d, evtid, rdtgrp->mon.rmid,
-					 rdtgrp->mon.cntr_id[index],
-					 rdtgrp->closid, true);
-		set_bit(rdtgrp->mon.cntr_id[index], d->mbm_cntr_map);
-	}
-
-	return 0;
-}
-
-static int rdtgroup_mbm_cntr_test(struct rdt_resource *r, u32 cntr_id)
-{
-	struct rdt_domain *d;
-
-	list_for_each_entry(d, &r->domains, list)
-		if (test_bit(cntr_id, d->mbm_cntr_map))
-			return 1;
-
-	return 0;
-}
-
-/* Free the counter id after the event is unassigned */
-void rdtgroup_free_cntr(struct rdt_resource *r, struct rdtgroup *rdtgrp,
-			int index)
-{
-	/* Update the counter bitmap */
-	if (!rdtgroup_mbm_cntr_test(r, rdtgrp->mon.cntr_id[index])) {
-		mbm_cntr_free(rdtgrp->mon.cntr_id[index]);
-		rdtgrp->mon.cntr_id[index] = MON_CNTR_UNSET;
-	}
-}
-
-/*
- * Unassign a hardware counter from the group and update all the domains
- * in the group.
- */
-int rdtgroup_unassign_cntr(struct rdtgroup *rdtgrp, enum resctrl_event_id evtid)
-{
-	struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
-	struct rdt_domain *d;
-	int index;
-
-	index = mon_event_config_index_get(evtid);
-	if (index == INVALID_CONFIG_INDEX)
-		return -EINVAL;
-
-	if (rdtgrp->mon.cntr_id[index] != MON_CNTR_UNSET) {
-		list_for_each_entry(d, &r->domains, list) {
-			resctrl_arch_assign_cntr(d, evtid, rdtgrp->mon.rmid,
-						 rdtgrp->mon.cntr_id[index],
-						 rdtgrp->closid, false);
-			clear_bit(rdtgrp->mon.cntr_id[index],
-				  d->mbm_cntr_map);
-		}
-
-		/* Free the counter at group level */
-		rdtgroup_free_cntr(r, rdtgrp, index);
-	}
-
-	return 0;
-}
-
-const char *rdtgroup_name_by_closid(u32 closid)
-{
-	struct rdtgroup *rdtgrp;
-
-	list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
-		if (rdtgrp->closid == closid)
-			return rdtgrp->kn->name;
-	}
-
-	return NULL;
-}
-
-/* rdtgroup information files for one cache resource. */
-static struct rftype res_common_files[] = {
-	{
-		.name		= "last_cmd_status",
-		.mode		= 0444,
-		.kf_ops		= &rdtgroup_kf_single_ops,
-		.seq_show	= rdt_last_cmd_status_show,
-		.fflags		= RFTYPE_TOP_INFO,
-	},
-	{
-		.name		= "num_closids",
-		.mode		= 0444,
-		.kf_ops		= &rdtgroup_kf_single_ops,
-		.seq_show	= rdt_num_closids_show,
-		.fflags		= RFTYPE_CTRL_INFO,
-	},
-	{
-		.name		= "mon_features",
-		.mode		= 0444,
-		.kf_ops		= &rdtgroup_kf_single_ops,
-		.seq_show	= rdt_mon_features_show,
-		.fflags		= RFTYPE_MON_INFO,
-	},
-	{
-		.name		= "num_rmids",
-		.mode		= 0444,
-		.kf_ops		= &rdtgroup_kf_single_ops,
-		.seq_show	= rdt_num_rmids_show,
-		.fflags		= RFTYPE_MON_INFO,
-	},
-	{
-		.name		= "cbm_mask",
-		.mode		= 0444,
-		.kf_ops		= &rdtgroup_kf_single_ops,
-		.seq_show	= rdt_default_ctrl_show,
-		.fflags		= RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE,
-	},
-	{
-		.name		= "min_cbm_bits",
-		.mode		= 0444,
-		.kf_ops		= &rdtgroup_kf_single_ops,
-		.seq_show	= rdt_min_cbm_bits_show,
-		.fflags		= RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE,
-	},
-	{
-		.name		= "shareable_bits",
-		.mode		= 0444,
-		.kf_ops		= &rdtgroup_kf_single_ops,
-		.seq_show	= rdt_shareable_bits_show,
-		.fflags		= RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE,
-	},
-	{
-		.name		= "bit_usage",
-		.mode		= 0444,
-		.kf_ops		= &rdtgroup_kf_single_ops,
-		.seq_show	= rdt_bit_usage_show,
-		.fflags		= RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE,
-	},
-	{
-		.name		= "min_bandwidth",
-		.mode		= 0444,
-		.kf_ops		= &rdtgroup_kf_single_ops,
-		.seq_show	= rdt_min_bw_show,
-		.fflags		= RFTYPE_CTRL_INFO | RFTYPE_RES_MB,
-	},
-	{
-		.name		= "bandwidth_gran",
-		.mode		= 0444,
-		.kf_ops		= &rdtgroup_kf_single_ops,
-		.seq_show	= rdt_bw_gran_show,
-		.fflags		= RFTYPE_CTRL_INFO | RFTYPE_RES_MB,
-	},
-	{
-		.name		= "delay_linear",
-		.mode		= 0444,
-		.kf_ops		= &rdtgroup_kf_single_ops,
-		.seq_show	= rdt_delay_linear_show,
-		.fflags		= RFTYPE_CTRL_INFO | RFTYPE_RES_MB,
-	},
-	/*
-	 * Platform specific which (if any) capabilities are provided by
-	 * thread_throttle_mode. Defer "fflags" initialization to platform
-	 * discovery.
-	 */
-	{
-		.name		= "thread_throttle_mode",
-		.mode		= 0444,
-		.kf_ops		= &rdtgroup_kf_single_ops,
-		.seq_show	= rdt_thread_throttle_mode_show,
-	},
-#ifdef CONFIG_X86
-	{
-		.name		= "io_alloc",
-		.mode		= 0644,
-		.kf_ops		= &rdtgroup_kf_single_ops,
-		.seq_show	= resctrl_io_alloc_show,
-		.write          = resctrl_io_alloc_write,
-	},
-	{
-		.name		= "io_alloc_cbm",
-		.mode		= 0644,
-		.kf_ops		= &rdtgroup_kf_single_ops,
-		.seq_show	= resctrl_io_alloc_cbm_show,
-		.write		= resctrl_io_alloc_cbm_write,
-	},
-#endif
-	{
-		.name		= "max_threshold_occupancy",
-		.mode		= 0644,
-		.kf_ops		= &rdtgroup_kf_single_ops,
-		.write		= max_threshold_occ_write,
-		.seq_show	= max_threshold_occ_show,
-		.fflags		= RFTYPE_MON_INFO | RFTYPE_RES_CACHE,
-	},
-	{
-		.name		= "mbm_total_bytes_config",
-		.mode		= 0644,
-		.kf_ops		= &rdtgroup_kf_single_ops,
-		.seq_show	= mbm_total_bytes_config_show,
-		.write		= mbm_total_bytes_config_write,
-	},
-	{
-		.name		= "mbm_local_bytes_config",
-		.mode		= 0644,
-		.kf_ops		= &rdtgroup_kf_single_ops,
-		.seq_show	= mbm_local_bytes_config_show,
-		.write		= mbm_local_bytes_config_write,
-	},
-	{
-		.name		= "mbm_mode",
-		.mode		= 0644,
-		.kf_ops		= &rdtgroup_kf_single_ops,
-		.seq_show	= rdtgroup_mbm_mode_show,
-		.write		= rdtgroup_mbm_mode_write,
-		.fflags		= RFTYPE_MON_INFO,
-	},
-	{
-		.name		= "cpus",
-		.mode		= 0644,
-		.kf_ops		= &rdtgroup_kf_single_ops,
-		.write		= rdtgroup_cpus_write,
-		.seq_show	= rdtgroup_cpus_show,
-		.fflags		= RFTYPE_BASE,
-	},
-	{
-		.name		= "num_mbm_cntrs",
-		.mode		= 0444,
-		.kf_ops		= &rdtgroup_kf_single_ops,
-		.seq_show	= rdtgroup_num_mbm_cntrs_show,
-	},
-	{
-		.name		= "mbm_control",
-		.mode		= 0644,
-		.kf_ops		= &rdtgroup_kf_single_ops,
-		.seq_show	= rdtgroup_mbm_control_show,
-		.write		= rdtgroup_mbm_control_write,
-	},
-	{
-		.name		= "cpus_list",
-		.mode		= 0644,
-		.kf_ops		= &rdtgroup_kf_single_ops,
-		.write		= rdtgroup_cpus_write,
-		.seq_show	= rdtgroup_cpus_show,
-		.flags		= RFTYPE_FLAGS_CPUS_LIST,
-		.fflags		= RFTYPE_BASE,
-	},
-	{
-		.name		= "tasks",
-		.mode		= 0644,
-		.kf_ops		= &rdtgroup_kf_single_ops,
-		.write		= rdtgroup_tasks_write,
-		.seq_show	= rdtgroup_tasks_show,
-		.fflags		= RFTYPE_BASE,
-	},
-	{
-		.name		= "mon_hw_id",
-		.mode		= 0444,
-		.kf_ops		= &rdtgroup_kf_single_ops,
-		.seq_show	= rdtgroup_rmid_show,
-		.fflags		= RFTYPE_MON_BASE | RFTYPE_DEBUG,
-	},
-	{
-		.name		= "schemata",
-		.mode		= 0644,
-		.kf_ops		= &rdtgroup_kf_single_ops,
-		.write		= rdtgroup_schemata_write,
-		.seq_show	= rdtgroup_schemata_show,
-		.fflags		= RFTYPE_CTRL_BASE,
-	},
-	{
-		.name		= "mode",
-		.mode		= 0644,
-		.kf_ops		= &rdtgroup_kf_single_ops,
-		.write		= rdtgroup_mode_write,
-		.seq_show	= rdtgroup_mode_show,
-		.fflags		= RFTYPE_CTRL_BASE,
-	},
-	{
-		.name		= "size",
-		.mode		= 0444,
-		.kf_ops		= &rdtgroup_kf_single_ops,
-		.seq_show	= rdtgroup_size_show,
-		.fflags		= RFTYPE_CTRL_BASE,
-	},
-	{
-		.name		= "sparse_masks",
-		.mode		= 0444,
-		.kf_ops		= &rdtgroup_kf_single_ops,
-		.seq_show	= rdt_has_sparse_bitmasks_show,
-		.fflags		= RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE,
-	},
-	{
-		.name		= "ctrl_hw_id",
-		.mode		= 0444,
-		.kf_ops		= &rdtgroup_kf_single_ops,
-		.seq_show	= rdtgroup_closid_show,
-		.fflags		= RFTYPE_CTRL_BASE | RFTYPE_DEBUG,
-	},
-
-};
-
-static int rdtgroup_add_files(struct kernfs_node *kn, unsigned long fflags)
-{
-	struct rftype *rfts, *rft;
-	int ret, len;
-
-	rfts = res_common_files;
-	len = ARRAY_SIZE(res_common_files);
-
-	lockdep_assert_held(&rdtgroup_mutex);
-
-	if (resctrl_debug)
-		fflags |= RFTYPE_DEBUG;
-
-	for (rft = rfts; rft < rfts + len; rft++) {
-		if (rft->fflags && ((fflags & rft->fflags) == rft->fflags)) {
-			ret = rdtgroup_add_file(kn, rft);
-			if (ret)
-				goto error;
-		}
-	}
-
-	return 0;
-error:
-	pr_warn("Failed to add %s, err=%d\n", rft->name, ret);
-	while (--rft >= rfts) {
-		if ((fflags & rft->fflags) == rft->fflags)
-			kernfs_remove_by_name(kn, rft->name);
-	}
-	return ret;
-}
-
-static struct rftype *rdtgroup_get_rftype_by_name(const char *name)
-{
-	struct rftype *rfts, *rft;
-	int len;
-
-	rfts = res_common_files;
-	len = ARRAY_SIZE(res_common_files);
-
-	for (rft = rfts; rft < rfts + len; rft++) {
-		if (!strcmp(rft->name, name))
-			return rft;
-	}
-
-	return NULL;
-}
-
-#ifdef CONFIG_X86
-/*
- * The resctrl file "io_alloc" is added using L3 resource. However, it results
- * in this file being visible for *all* cache resources (eg. L2 cache),
- * whether it supports "io_alloc" or not.
- */
-static void io_alloc_init(void)
-{
-	struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
-
-	if (r->cache.io_alloc_capable) {
-		resctrl_file_fflags_init("io_alloc", RFTYPE_CTRL_INFO |
-					 RFTYPE_RES_CACHE);
-		resctrl_file_fflags_init("io_alloc_cbm",
-					 RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE);
-	}
-}
-#endif
-
-void resctrl_file_fflags_init(const char *config, unsigned long fflags)
-{
-	struct rftype *rft;
-
-	rft = rdtgroup_get_rftype_by_name(config);
-	if (rft)
-		rft->fflags = fflags;
-}
-
-/**
- * rdtgroup_kn_mode_restrict - Restrict user access to named resctrl file
- * @r: The resource group with which the file is associated.
- * @name: Name of the file
- *
- * The permissions of named resctrl file, directory, or link are modified
- * to not allow read, write, or execute by any user.
- *
- * WARNING: This function is intended to communicate to the user that the
- * resctrl file has been locked down - that it is not relevant to the
- * particular state the system finds itself in. It should not be relied
- * on to protect from user access because after the file's permissions
- * are restricted the user can still change the permissions using chmod
- * from the command line.
- *
- * Return: 0 on success, <0 on failure.
- */
-int rdtgroup_kn_mode_restrict(struct rdtgroup *r, const char *name)
-{
-	struct iattr iattr = {.ia_valid = ATTR_MODE,};
-	struct kernfs_node *kn;
-	int ret = 0;
-
-	kn = kernfs_find_and_get_ns(r->kn, name, NULL);
-	if (!kn)
-		return -ENOENT;
-
-	switch (kernfs_type(kn)) {
-	case KERNFS_DIR:
-		iattr.ia_mode = S_IFDIR;
-		break;
-	case KERNFS_FILE:
-		iattr.ia_mode = S_IFREG;
-		break;
-	case KERNFS_LINK:
-		iattr.ia_mode = S_IFLNK;
-		break;
-	}
-
-	ret = kernfs_setattr(kn, &iattr);
-	kernfs_put(kn);
-	return ret;
-}
-
-/**
- * rdtgroup_kn_mode_restore - Restore user access to named resctrl file
- * @r: The resource group with which the file is associated.
- * @name: Name of the file
- * @mask: Mask of permissions that should be restored
- *
- * Restore the permissions of the named file. If @name is a directory the
- * permissions of its parent will be used.
- *
- * Return: 0 on success, <0 on failure.
- */
-int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name,
-			     umode_t mask)
-{
-	struct iattr iattr = {.ia_valid = ATTR_MODE,};
-	struct kernfs_node *kn, *parent;
-	struct rftype *rfts, *rft;
-	int ret, len;
-
-	rfts = res_common_files;
-	len = ARRAY_SIZE(res_common_files);
-
-	for (rft = rfts; rft < rfts + len; rft++) {
-		if (!strcmp(rft->name, name))
-			iattr.ia_mode = rft->mode & mask;
-	}
-
-	kn = kernfs_find_and_get_ns(r->kn, name, NULL);
-	if (!kn)
-		return -ENOENT;
-
-	switch (kernfs_type(kn)) {
-	case KERNFS_DIR:
-		parent = kernfs_get_parent(kn);
-		if (parent) {
-			iattr.ia_mode |= parent->mode;
-			kernfs_put(parent);
-		}
-		iattr.ia_mode |= S_IFDIR;
-		break;
-	case KERNFS_FILE:
-		iattr.ia_mode |= S_IFREG;
-		break;
-	case KERNFS_LINK:
-		iattr.ia_mode |= S_IFLNK;
-		break;
-	}
-
-	ret = kernfs_setattr(kn, &iattr);
-	kernfs_put(kn);
-	return ret;
-}
-
-static int rdtgroup_mkdir_info_resdir(void *priv, char *name,
-				      unsigned long fflags)
-{
-	struct kernfs_node *kn_subdir;
-	int ret;
-
-	kn_subdir = kernfs_create_dir(kn_info, name,
-				      kn_info->mode, priv);
-	if (IS_ERR(kn_subdir))
-		return PTR_ERR(kn_subdir);
-
-	ret = rdtgroup_kn_set_ugid(kn_subdir);
-	if (ret)
-		return ret;
-
-	ret = rdtgroup_add_files(kn_subdir, fflags);
-	if (!ret)
-		kernfs_activate(kn_subdir);
-
-	return ret;
-}
-
-static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)
-{
-	enum resctrl_res_level i;
-	struct resctrl_schema *s;
-	struct rdt_resource *r;
-	unsigned long fflags;
-	char name[32];
-	int ret;
-
-	/* create the directory */
-	kn_info = kernfs_create_dir(parent_kn, "info", parent_kn->mode, NULL);
-	if (IS_ERR(kn_info))
-		return PTR_ERR(kn_info);
-
-	ret = rdtgroup_add_files(kn_info, RFTYPE_TOP_INFO);
-	if (ret)
-		goto out_destroy;
-
-	/* loop over enabled controls, these are all alloc_capable */
-	list_for_each_entry(s, &resctrl_schema_all, list) {
-		r = s->res;
-		fflags = r->fflags | RFTYPE_CTRL_INFO;
-		ret = rdtgroup_mkdir_info_resdir(s, s->name, fflags);
-		if (ret)
-			goto out_destroy;
-	}
-
-	for (i = 0; i < RDT_NUM_RESOURCES; i++) {
-		r = resctrl_arch_get_resource(i);
-		if (!r->mon_capable)
-			continue;
-
-		fflags =  r->fflags | RFTYPE_MON_INFO;
-		sprintf(name, "%s_MON", r->name);
-		ret = rdtgroup_mkdir_info_resdir(r, name, fflags);
-		if (ret)
-			goto out_destroy;
-	}
-
-	ret = rdtgroup_kn_set_ugid(kn_info);
-	if (ret)
-		goto out_destroy;
-
-	kernfs_activate(kn_info);
-
-	return 0;
-
-out_destroy:
-	kernfs_remove(kn_info);
-	return ret;
-}
-
-static int
-mongroup_create_dir(struct kernfs_node *parent_kn, struct rdtgroup *prgrp,
-		    char *name, struct kernfs_node **dest_kn)
-{
-	struct kernfs_node *kn;
-	int ret;
-
-	/* create the directory */
-	kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp);
-	if (IS_ERR(kn))
-		return PTR_ERR(kn);
-
-	if (dest_kn)
-		*dest_kn = kn;
-
-	ret = rdtgroup_kn_set_ugid(kn);
-	if (ret)
-		goto out_destroy;
-
-	kernfs_activate(kn);
-
-	return 0;
-
-out_destroy:
-	kernfs_remove(kn);
-	return ret;
-}
-
-static inline bool is_mba_linear(void)
-{
-	return resctrl_arch_get_resource(RDT_RESOURCE_MBA)->membw.delay_linear;
-}
-
-static int mba_sc_domain_allocate(struct rdt_resource *r, struct rdt_domain *d)
-{
-	u32 num_closid = resctrl_arch_get_num_closid(r);
-	int cpu = cpumask_any(&d->cpu_mask);
-	int i;
-
-	d->mbps_val = kcalloc_node(num_closid, sizeof(*d->mbps_val),
-				   GFP_KERNEL, cpu_to_node(cpu));
-	if (!d->mbps_val)
-		return -ENOMEM;
-
-	for (i = 0; i < num_closid; i++)
-		d->mbps_val[i] = MBA_MAX_MBPS;
-
-	return 0;
-}
-
-static void mba_sc_domain_destroy(struct rdt_resource *r,
-				  struct rdt_domain *d)
-{
-	kfree(d->mbps_val);
-	d->mbps_val = NULL;
-}
-
-/*
- * MBA software controller is supported only if
- * MBM is supported and MBA is in linear scale.
- */
-static bool supports_mba_mbps(void)
-{
-	struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_MBA);
-
-	return (resctrl_arch_is_mbm_local_enabled() &&
-		r->alloc_capable && is_mba_linear());
-}
-
-/*
- * Enable or disable the MBA software controller
- * which helps user specify bandwidth in MBps.
- */
-static int set_mba_sc(bool mba_sc)
-{
-	struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_MBA);
-	u32 num_closid = resctrl_arch_get_num_closid(r);
-	struct rdt_domain *d;
-	int i;
-
-	if (!supports_mba_mbps() || mba_sc == is_mba_sc(r))
-		return -EINVAL;
-
-	r->membw.mba_sc = mba_sc;
-
-	list_for_each_entry(d, &r->domains, list) {
-		for (i = 0; i < num_closid; i++)
-			d->mbps_val[i] = MBA_MAX_MBPS;
-	}
-
-	return 0;
-}
-
-/*
- * We don't allow rdtgroup directories to be created anywhere
- * except the root directory. Thus when looking for the rdtgroup
- * structure for a kernfs node we are either looking at a directory,
- * in which case the rdtgroup structure is pointed at by the "priv"
- * field, otherwise we have a file, and need only look to the parent
- * to find the rdtgroup.
- */
-static struct rdtgroup *kernfs_to_rdtgroup(struct kernfs_node *kn)
-{
-	if (kernfs_type(kn) == KERNFS_DIR) {
-		/*
-		 * All the resource directories use "kn->priv"
-		 * to point to the "struct rdtgroup" for the
-		 * resource. "info" and its subdirectories don't
-		 * have rdtgroup structures, so return NULL here.
-		 */
-		if (kn == kn_info || kn->parent == kn_info)
-			return NULL;
-		else
-			return kn->priv;
-	} else {
-		return kn->parent->priv;
-	}
-}
-
-static void rdtgroup_kn_get(struct rdtgroup *rdtgrp, struct kernfs_node *kn)
-{
-	atomic_inc(&rdtgrp->waitcount);
-	kernfs_break_active_protection(kn);
-}
-
-static void rdtgroup_kn_put(struct rdtgroup *rdtgrp, struct kernfs_node *kn)
-{
-	if (atomic_dec_and_test(&rdtgrp->waitcount) &&
-	    (rdtgrp->flags & RDT_DELETED)) {
-		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
-		    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
-			rdtgroup_pseudo_lock_remove(rdtgrp);
-		kernfs_unbreak_active_protection(kn);
-		rdtgroup_remove(rdtgrp);
-	} else {
-		kernfs_unbreak_active_protection(kn);
-	}
-}
-
-struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn)
-{
-	struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn);
-
-	if (!rdtgrp)
-		return NULL;
-
-	rdtgroup_kn_get(rdtgrp, kn);
-
-	cpus_read_lock();
-	mutex_lock(&rdtgroup_mutex);
-
-	/* Was this group deleted while we waited? */
-	if (rdtgrp->flags & RDT_DELETED)
-		return NULL;
-
-	return rdtgrp;
-}
-
-void rdtgroup_kn_unlock(struct kernfs_node *kn)
-{
-	struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn);
-
-	if (!rdtgrp)
-		return;
-
-	mutex_unlock(&rdtgroup_mutex);
-	cpus_read_unlock();
-
-	rdtgroup_kn_put(rdtgrp, kn);
-}
-
-static int mkdir_mondata_all(struct kernfs_node *parent_kn,
-			     struct rdtgroup *prgrp,
-			     struct kernfs_node **mon_data_kn);
-
-static void rdt_disable_ctx(void)
-{
-	resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, false);
-	resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, false);
-	set_mba_sc(false);
-	resctrl_arch_set_hwdrc_enabled(RDT_RESOURCE_MBA, false);
-
-	resctrl_debug = false;
-}
-
-static int rdt_enable_ctx(struct rdt_fs_context *ctx)
-{
-	int ret = 0;
-
-	if (ctx->enable_cdpl2) {
-		ret = resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, true);
-		if (ret)
-			goto out_done;
-	}
-
-	if (ctx->enable_cdpl3) {
-		ret = resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, true);
-		if (ret)
-			goto out_cdpl2;
-	}
-
-	/*
-	 * MBA and memory bandwidth HWDRC features are mutually exclusive.
-	 * So mba_MBps and hwdrc_mb options could not be set at the same time.
-	 */
-	if (ctx->enable_hwdrc_mb && ctx->enable_mba_mbps) {
-		pr_debug("Option 'mba_MBps' and 'hwdrc_mb' are mutually exclusive\n");
-		ret = -EINVAL;
-		goto out_cdpl3;
-	}
-
-	if (!ret && ctx->enable_hwdrc_mb)
-		ret = resctrl_arch_set_hwdrc_enabled(RDT_RESOURCE_MBA, true);
-
-	if (ctx->enable_mba_mbps) {
-		ret = set_mba_sc(true);
-		if (ret)
-			goto out_cdpl3;
-	}
-
-	if (ctx->enable_debug)
-		resctrl_debug = true;
-
-	return 0;
-
-out_cdpl3:
-	resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, false);
-out_cdpl2:
-	resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, false);
-out_done:
-	return ret;
-}
-
-static int schemata_list_add(struct rdt_resource *r, enum resctrl_conf_type type)
-{
-	struct resctrl_schema *s;
-	const char *suffix = "";
-	int ret, cl;
-
-	s = kzalloc(sizeof(*s), GFP_KERNEL);
-	if (!s)
-		return -ENOMEM;
-
-	s->res = r;
-	s->num_closid = resctrl_arch_get_num_closid(r);
-	if (resctrl_arch_get_cdp_enabled(r->rid))
-		s->num_closid /= 2;
-
-	s->conf_type = type;
-	switch (type) {
-	case CDP_CODE:
-		suffix = "CODE";
-		break;
-	case CDP_DATA:
-		suffix = "DATA";
-		break;
-	case CDP_NONE:
-		suffix = "";
-		break;
-	}
-
-	ret = snprintf(s->name, sizeof(s->name), "%s%s", r->name, suffix);
-	if (ret >= sizeof(s->name)) {
-		kfree(s);
-		return -EINVAL;
-	}
-
-	cl = strlen(s->name);
-
-	/*
-	 * If CDP is supported by this resource, but not enabled,
-	 * include the suffix. This ensures the tabular format of the
-	 * schemata file does not change between mounts of the filesystem.
-	 */
-	if (r->cdp_capable && !resctrl_arch_get_cdp_enabled(r->rid))
-		cl += 4;
-
-	if (cl > max_name_width)
-		max_name_width = cl;
-
-	/*
-	 * Choose a width for the resource data based on the resource that has
-	 * widest name and cbm.
-	 */
-	max_data_width = max(max_data_width, r->data_width);
-
-	INIT_LIST_HEAD(&s->list);
-	list_add(&s->list, &resctrl_schema_all);
-
-	return 0;
-}
-
-static int schemata_list_create(void)
-{
-	enum resctrl_res_level i;
-	struct rdt_resource *r;
-	int ret = 0;
-
-	for (i = 0; i < RDT_NUM_RESOURCES; i++) {
-		r = resctrl_arch_get_resource(i);
-		if (!r->alloc_capable)
-			continue;
-
-		if (resctrl_arch_get_cdp_enabled(r->rid)) {
-			ret = schemata_list_add(r, CDP_CODE);
-			if (ret)
-				break;
-
-			ret = schemata_list_add(r, CDP_DATA);
-		} else {
-			ret = schemata_list_add(r, CDP_NONE);
-		}
-
-		if (ret)
-			break;
-	}
-
-	return ret;
-}
-
-static void schemata_list_destroy(void)
-{
-	struct resctrl_schema *s, *tmp;
-
-	list_for_each_entry_safe(s, tmp, &resctrl_schema_all, list) {
-		list_del(&s->list);
-		kfree(s);
-	}
-}
-
-/*
- * Called when new group is created. Assign the counters if ABMC is
- * already enabled. Two counters are required per group, one for total
- * event and one for local event. With limited number of counters,
- * the assignments can fail in some cases. But, it is not required to
- * fail the group creation. Users have the option to modify the
- * assignments after the group creation.
- */
-static int rdtgroup_assign_cntrs(struct rdtgroup *rdtgrp)
-{
-	int ret = 0;
-
-	if (!resctrl_arch_get_abmc_enabled())
-		return 0;
-
-	if (resctrl_arch_is_mbm_total_enabled())
-		ret = rdtgroup_assign_cntr(rdtgrp, QOS_L3_MBM_TOTAL_EVENT_ID);
-
-	if (!ret && resctrl_arch_is_mbm_local_enabled())
-		ret = rdtgroup_assign_cntr(rdtgrp, QOS_L3_MBM_LOCAL_EVENT_ID);
-
-	return ret;
-}
-
-static int rdtgroup_unassign_cntrs(struct rdtgroup *rdtgrp)
-{
-	int ret = 0;
-
-	if (!resctrl_arch_get_abmc_enabled())
-		return 0;
-
-	if (resctrl_arch_is_mbm_total_enabled())
-		ret = rdtgroup_unassign_cntr(rdtgrp, QOS_L3_MBM_TOTAL_EVENT_ID);
-
-	if (!ret && resctrl_arch_is_mbm_local_enabled())
-		ret = rdtgroup_unassign_cntr(rdtgrp, QOS_L3_MBM_LOCAL_EVENT_ID);
-
-	return ret;
-}
-
-static int rdt_get_tree(struct fs_context *fc)
-{
-	struct rdt_resource *l3 = resctrl_arch_get_resource(RDT_RESOURCE_L3);
-	struct rdt_fs_context *ctx = rdt_fc2context(fc);
-	unsigned long flags = RFTYPE_CTRL_BASE;
-	struct rdt_domain *dom;
-	int ret;
-
-	cpus_read_lock();
-	mutex_lock(&rdtgroup_mutex);
-	/*
-	 * resctrl file system can only be mounted once.
-	 */
-	if (resctrl_mounted) {
-		ret = -EBUSY;
-		goto out;
-	}
-
-	ret = rdtgroup_setup_root(ctx);
-	if (ret)
-		goto out;
-
-	ret = rdt_enable_ctx(ctx);
-	if (ret)
-		goto out_root;
-
-	ret = schemata_list_create();
-	if (ret) {
-		schemata_list_destroy();
-		goto out_ctx;
-	}
-
-	closid_init();
-
-	mbm_cntrs_init(resctrl_arch_get_resource(RDT_RESOURCE_L3));
-
-	if (resctrl_arch_mon_capable())
-		flags |= RFTYPE_MON;
-
-	ret = rdtgroup_add_files(rdtgroup_default.kn, flags);
-	if (ret)
-		goto out_schemata_free;
-
-	kernfs_activate(rdtgroup_default.kn);
-
-	ret = rdtgroup_create_info_dir(rdtgroup_default.kn);
-	if (ret < 0)
-		goto out_schemata_free;
-
-	if (resctrl_arch_mon_capable()) {
-		ret = mongroup_create_dir(rdtgroup_default.kn,
-					  &rdtgroup_default, "mon_groups",
-					  &kn_mongrp);
-		if (ret < 0)
-			goto out_info;
-
-		ret = mkdir_mondata_all(rdtgroup_default.kn,
-					&rdtgroup_default, &kn_mondata);
-		if (ret < 0)
-			goto out_mongrp;
-		rdtgroup_default.mon.mon_data_kn = kn_mondata;
-
-		rdtgroup_assign_cntrs(&rdtgroup_default);
-	}
-
-	ret = rdt_pseudo_lock_init();
-	if (ret)
-		goto out_mondata;
-
-	ret = kernfs_get_tree(fc);
-	if (ret < 0)
-		goto out_psl;
-
-	if (resctrl_arch_alloc_capable())
-		resctrl_arch_enable_alloc();
-	if (resctrl_arch_mon_capable())
-		resctrl_arch_enable_mon();
-
-	if (resctrl_arch_alloc_capable() || resctrl_arch_mon_capable())
-		resctrl_mounted = true;
-
-	if (resctrl_is_mbm_enabled()) {
-		list_for_each_entry(dom, &l3->domains, list)
-			mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL,
-						   RESCTRL_PICK_ANY_CPU);
-	}
-
-	goto out;
-
-out_psl:
-	rdt_pseudo_lock_release();
-out_mondata:
-	rdtgroup_unassign_cntrs(&rdtgroup_default);
-	if (resctrl_arch_mon_capable())
-		kernfs_remove(kn_mondata);
-out_mongrp:
-	if (resctrl_arch_mon_capable())
-		kernfs_remove(kn_mongrp);
-out_info:
-	kernfs_remove(kn_info);
-out_schemata_free:
-	schemata_list_destroy();
-out_ctx:
-	rdt_disable_ctx();
-out_root:
-	rdtgroup_destroy_root();
-out:
-	rdt_last_cmd_clear();
-	mutex_unlock(&rdtgroup_mutex);
-	cpus_read_unlock();
-	return ret;
-}
-
-enum rdt_param {
-	Opt_cdp,
-	Opt_cdpl2,
-	Opt_mba_mbps,
-	Opt_hwdrc_mb,
-	Opt_debug,
-	nr__rdt_params
-};
-
-static const struct fs_parameter_spec rdt_fs_parameters[] = {
-	fsparam_flag("cdp",		Opt_cdp),
-	fsparam_flag("cdpl2",		Opt_cdpl2),
-	fsparam_flag("mba_MBps",	Opt_mba_mbps),
-	fsparam_flag("hwdrc_mb",	Opt_hwdrc_mb),
-	fsparam_flag("debug",		Opt_debug),
-	{}
-};
-
-static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param)
-{
-	struct rdt_fs_context *ctx = rdt_fc2context(fc);
-	struct fs_parse_result result;
-	int opt;
-
-	opt = fs_parse(fc, rdt_fs_parameters, param, &result);
-	if (opt < 0)
-		return opt;
-
-	switch (opt) {
-	case Opt_cdp:
-		ctx->enable_cdpl3 = true;
-		return 0;
-	case Opt_cdpl2:
-		ctx->enable_cdpl2 = true;
-		return 0;
-	case Opt_mba_mbps:
-		if (!supports_mba_mbps())
-			return -EINVAL;
-		ctx->enable_mba_mbps = true;
-		return 0;
-	case Opt_hwdrc_mb:
-		if (!resctrl_arch_is_hwdrc_mb_capable())
-			return -EINVAL;
-		ctx->enable_hwdrc_mb = true;
-		return 0;
-	case Opt_debug:
-		ctx->enable_debug = true;
-		return 0;
-	}
-
-	return -EINVAL;
-}
-
-static void rdt_fs_context_free(struct fs_context *fc)
-{
-	struct rdt_fs_context *ctx = rdt_fc2context(fc);
-
-	kernfs_free_fs_context(fc);
-	kfree(ctx);
-}
-
-static const struct fs_context_operations rdt_fs_context_ops = {
-	.free		= rdt_fs_context_free,
-	.parse_param	= rdt_parse_param,
-	.get_tree	= rdt_get_tree,
-};
-
-static int rdt_init_fs_context(struct fs_context *fc)
-{
-	struct rdt_fs_context *ctx;
-
-	ctx = kzalloc(sizeof(struct rdt_fs_context), GFP_KERNEL);
-	if (!ctx)
-		return -ENOMEM;
-
-	ctx->kfc.magic = RDTGROUP_SUPER_MAGIC;
-	fc->fs_private = &ctx->kfc;
-	fc->ops = &rdt_fs_context_ops;
-	put_user_ns(fc->user_ns);
-	fc->user_ns = get_user_ns(&init_user_ns);
-	fc->global = true;
-	return 0;
-}
-
-/*
- * Move tasks from one to the other group. If @from is NULL, then all tasks
- * in the systems are moved unconditionally (used for teardown).
- *
- * If @mask is not NULL the cpus on which moved tasks are running are set
- * in that mask so the update smp function call is restricted to affected
- * cpus.
- */
-static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to,
-				 struct cpumask *mask)
-{
-	struct task_struct *p, *t;
-
-	read_lock(&tasklist_lock);
-	for_each_process_thread(p, t) {
-		if (!from || is_closid_match(t, from) ||
-		    is_rmid_match(t, from)) {
-			resctrl_arch_set_closid_rmid(t, to->closid,
-						     to->mon.rmid);
-
-			/*
-			 * Order the closid/rmid stores above before the loads
-			 * in task_curr(). This pairs with the full barrier
-			 * between the rq->curr update and
-			 * resctrl_arch_sched_in() during context switch.
-			 */
-			smp_mb();
-
-			/*
-			 * If the task is on a CPU, set the CPU in the mask.
-			 * The detection is inaccurate as tasks might move or
-			 * schedule before the smp function call takes place.
-			 * In such a case the function call is pointless, but
-			 * there is no other side effect.
-			 */
-			if (IS_ENABLED(CONFIG_SMP) && mask && task_curr(t))
-				cpumask_set_cpu(task_cpu(t), mask);
-		}
-	}
-	read_unlock(&tasklist_lock);
-}
-
-static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp)
-{
-	struct rdtgroup *sentry, *stmp;
-	struct list_head *head;
-
-	head = &rdtgrp->mon.crdtgrp_list;
-	list_for_each_entry_safe(sentry, stmp, head, mon.crdtgrp_list) {
-		free_rmid(sentry->closid, sentry->mon.rmid);
-		list_del(&sentry->mon.crdtgrp_list);
-
-		if (atomic_read(&sentry->waitcount) != 0)
-			sentry->flags = RDT_DELETED;
-		else
-			rdtgroup_remove(sentry);
-	}
-}
-
-/*
- * Forcibly remove all of subdirectories under root.
- */
-static void rmdir_all_sub(void)
-{
-	struct rdtgroup *rdtgrp, *tmp;
-
-	/* Move all tasks to the default resource group */
-	rdt_move_group_tasks(NULL, &rdtgroup_default, NULL);
-
-	list_for_each_entry_safe(rdtgrp, tmp, &rdt_all_groups, rdtgroup_list) {
-		/* Free any child rmids */
-		free_all_child_rdtgrp(rdtgrp);
-
-		/* Remove each rdtgroup other than root */
-		if (rdtgrp == &rdtgroup_default)
-			continue;
-
-		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
-		    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
-			rdtgroup_pseudo_lock_remove(rdtgrp);
-
-		/*
-		 * Give any CPUs back to the default group. We cannot copy
-		 * cpu_online_mask because a CPU might have executed the
-		 * offline callback already, but is still marked online.
-		 */
-		cpumask_or(&rdtgroup_default.cpu_mask,
-			   &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
-
-		free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
-
-		kernfs_remove(rdtgrp->kn);
-		list_del(&rdtgrp->rdtgroup_list);
-
-		if (atomic_read(&rdtgrp->waitcount) != 0)
-			rdtgrp->flags = RDT_DELETED;
-		else
-			rdtgroup_remove(rdtgrp);
-	}
-	/* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */
-	update_closid_rmid(cpu_online_mask, &rdtgroup_default);
-
-	kernfs_remove(kn_info);
-	kernfs_remove(kn_mongrp);
-	kernfs_remove(kn_mondata);
-}
-
-static void rdt_kill_sb(struct super_block *sb)
-{
-	struct rdt_resource *l3 = resctrl_arch_get_resource(RDT_RESOURCE_L3);
-	struct rdt_domain *d;
-
-	cpus_read_lock();
-	mutex_lock(&rdtgroup_mutex);
-
-	rdt_disable_ctx();
-
-	/* Put everything back to default values. */
-	resctrl_arch_reset_resources();
-
-	rmdir_all_sub();
-
-	/*
-	 * When resctrl is umounted, forcefully cancel delayed works since the
-	 * new mount option may be changed.
-	 */
-	list_for_each_entry(d, &l3->domains, list) {
-		if (resctrl_is_mbm_enabled())
-			cancel_delayed_work(&d->mbm_over);
-		if (resctrl_arch_is_llc_occupancy_enabled() && has_busy_rmid(d)) {
-			__check_limbo(d, true);
-			cancel_delayed_work(&d->cqm_limbo);
-		}
-	}
-
-	rdt_pseudo_lock_release();
-	rdtgroup_default.mode = RDT_MODE_SHAREABLE;
-	schemata_list_destroy();
-	rdtgroup_destroy_root();
-	if (resctrl_arch_alloc_capable())
-		resctrl_arch_disable_alloc();
-	if (resctrl_arch_mon_capable())
-		resctrl_arch_disable_mon();
-
-	rdtgroup_unassign_cntrs(&rdtgroup_default);
-	resctrl_mounted = false;
-	kernfs_kill_sb(sb);
-	mutex_unlock(&rdtgroup_mutex);
-	cpus_read_unlock();
-}
-
-static struct file_system_type rdt_fs_type = {
-	.name			= "resctrl",
-	.init_fs_context	= rdt_init_fs_context,
-	.parameters		= rdt_fs_parameters,
-	.kill_sb		= rdt_kill_sb,
-};
-
-static int mon_addfile(struct kernfs_node *parent_kn, const char *name,
-		       void *priv)
-{
-	struct kernfs_node *kn;
-	int ret = 0;
-
-	kn = __kernfs_create_file(parent_kn, name, 0444,
-				  GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0,
-				  &kf_mondata_ops, priv, NULL, NULL);
-	if (IS_ERR(kn))
-		return PTR_ERR(kn);
-
-	ret = rdtgroup_kn_set_ugid(kn);
-	if (ret) {
-		kernfs_remove(kn);
-		return ret;
-	}
-
-	return ret;
-}
-
-/*
- * Remove all subdirectories of mon_data of ctrl_mon groups
- * and monitor groups with given domain id.
- */
-static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
-					   unsigned int dom_id)
-{
-	struct rdtgroup *prgrp, *crgrp;
-	char name[32];
-
-	list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
-		sprintf(name, "mon_%s_%02d", r->name, dom_id);
-		kernfs_remove_by_name(prgrp->mon.mon_data_kn, name);
-
-		list_for_each_entry(crgrp, &prgrp->mon.crdtgrp_list, mon.crdtgrp_list)
-			kernfs_remove_by_name(crgrp->mon.mon_data_kn, name);
-	}
-}
-
-static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
-				struct rdt_domain *d,
-				struct rdt_resource *r, struct rdtgroup *prgrp)
-{
-	union mon_data_bits priv;
-	struct kernfs_node *kn;
-	struct mon_evt *mevt;
-	struct rmid_read rr;
-	char name[32];
-	int ret;
-
-	sprintf(name, "mon_%s_%02d", r->name, d->id);
-	/* create the directory */
-	kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp);
-	if (IS_ERR(kn))
-		return PTR_ERR(kn);
-
-	ret = rdtgroup_kn_set_ugid(kn);
-	if (ret)
-		goto out_destroy;
-
-	if (WARN_ON(list_empty(&r->mon.evt_list))) {
-		ret = -EPERM;
-		goto out_destroy;
-	}
-
-	priv.u.rid = r->rid;
-	priv.u.domid = d->id;
-	list_for_each_entry(mevt, &r->mon.evt_list, list) {
-		priv.u.evtid = mevt->evtid;
-		ret = mon_addfile(kn, mevt->name, priv.priv);
-		if (ret)
-			goto out_destroy;
-
-		if (resctrl_is_mbm_event(mevt->evtid))
-			mon_event_read(&rr, r, d, prgrp, mevt->evtid, true);
-	}
-	kernfs_activate(kn);
-	return 0;
-
-out_destroy:
-	kernfs_remove(kn);
-	return ret;
-}
-
-/*
- * Add all subdirectories of mon_data for "ctrl_mon" groups
- * and "monitor" groups with given domain id.
- */
-static void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
-					   struct rdt_domain *d)
-{
-	struct kernfs_node *parent_kn;
-	struct rdtgroup *prgrp, *crgrp;
-	struct list_head *head;
-
-	list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
-		parent_kn = prgrp->mon.mon_data_kn;
-		mkdir_mondata_subdir(parent_kn, d, r, prgrp);
-
-		head = &prgrp->mon.crdtgrp_list;
-		list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
-			parent_kn = crgrp->mon.mon_data_kn;
-			mkdir_mondata_subdir(parent_kn, d, r, crgrp);
-		}
-	}
-}
-
-static int mkdir_mondata_subdir_alldom(struct kernfs_node *parent_kn,
-				       struct rdt_resource *r,
-				       struct rdtgroup *prgrp)
-{
-	struct rdt_domain *dom;
-	int ret;
-
-	/* Walking r->domains, ensure it can't race with cpuhp */
-	lockdep_assert_cpus_held();
-
-	list_for_each_entry(dom, &r->domains, list) {
-		ret = mkdir_mondata_subdir(parent_kn, dom, r, prgrp);
-		if (ret)
-			return ret;
-	}
-
-	return 0;
-}
-
-/*
- * This creates a directory mon_data which contains the monitored data.
- *
- * mon_data has one directory for each domain which are named
- * in the format mon_<domain_name>_<domain_id>. For ex: A mon_data
- * with L3 domain looks as below:
- * ./mon_data:
- * mon_L3_00
- * mon_L3_01
- * mon_L3_02
- * ...
- *
- * Each domain directory has one file per event:
- * ./mon_L3_00/:
- * llc_occupancy
- *
- */
-static int mkdir_mondata_all(struct kernfs_node *parent_kn,
-			     struct rdtgroup *prgrp,
-			     struct kernfs_node **dest_kn)
-{
-	enum resctrl_res_level i;
-	struct rdt_resource *r;
-	struct kernfs_node *kn;
-	int ret;
-
-	/*
-	 * Create the mon_data directory first.
-	 */
-	ret = mongroup_create_dir(parent_kn, prgrp, "mon_data", &kn);
-	if (ret)
-		return ret;
-
-	if (dest_kn)
-		*dest_kn = kn;
-
-	/*
-	 * Create the subdirectories for each domain. Note that all events
-	 * in a domain like L3 are grouped into a resource whose domain is L3
-	 */
-	for (i = 0; i < RDT_NUM_RESOURCES; i++) {
-		r = resctrl_arch_get_resource(i);
-		if (!r->mon_capable)
-			continue;
-
-		ret = mkdir_mondata_subdir_alldom(kn, r, prgrp);
-		if (ret)
-			goto out_destroy;
-	}
-
-	return 0;
-
-out_destroy:
-	kernfs_remove(kn);
-	return ret;
-}
-
-/**
- * cbm_ensure_valid - Enforce validity on provided CBM
- * @_val:	Candidate CBM
- * @r:		RDT resource to which the CBM belongs
- *
- * The provided CBM represents all cache portions available for use. This
- * may be represented by a bitmap that does not consist of contiguous ones
- * and thus be an invalid CBM.
- * Here the provided CBM is forced to be a valid CBM by only considering
- * the first set of contiguous bits as valid and clearing all bits.
- * The intention here is to provide a valid default CBM with which a new
- * resource group is initialized. The user can follow this with a
- * modification to the CBM if the default does not satisfy the
- * requirements.
- */
-static u32 cbm_ensure_valid(u32 _val, struct rdt_resource *r)
-{
-	unsigned int cbm_len = r->cache.cbm_len;
-	unsigned long first_bit, zero_bit;
-	unsigned long val = _val;
-
-	if (!val)
-		return 0;
-
-	first_bit = find_first_bit(&val, cbm_len);
-	zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);
-
-	/* Clear any remaining bits to ensure contiguous region */
-	bitmap_clear(&val, zero_bit, cbm_len - zero_bit);
-	return (u32)val;
-}
-
-/*
- * Initialize cache resources per RDT domain
- *
- * Set the RDT domain up to start off with all usable allocations. That is,
- * all shareable and unused bits. All-zero CBM is invalid.
- */
-static int __init_one_rdt_domain(struct rdt_domain *d, struct resctrl_schema *s,
-				 u32 closid)
-{
-	enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type);
-	enum resctrl_conf_type t = s->conf_type;
-	struct resctrl_staged_config *cfg;
-	struct rdt_resource *r = s->res;
-	u32 used_b = 0, unused_b = 0;
-	unsigned long tmp_cbm;
-	enum rdtgrp_mode mode;
-	u32 peer_ctl, ctrl_val;
-	int i;
-
-	cfg = &d->staged_config[t];
-	cfg->have_new_ctrl = false;
-	cfg->new_ctrl = r->cache.shareable_bits;
-	used_b = r->cache.shareable_bits;
-	for (i = 0; i < closids_supported(); i++) {
-		if (closid_allocated(i) && i != closid) {
-			mode = rdtgroup_mode_by_closid(i);
-			if (mode == RDT_MODE_PSEUDO_LOCKSETUP)
-				/*
-				 * ctrl values for locksetup aren't relevant
-				 * until the schemata is written, and the mode
-				 * becomes RDT_MODE_PSEUDO_LOCKED.
-				 */
-				continue;
-			/*
-			 * If CDP is active include peer domain's
-			 * usage to ensure there is no overlap
-			 * with an exclusive group.
-			 */
-			if (resctrl_arch_get_cdp_enabled(r->rid))
-				peer_ctl = resctrl_arch_get_config(r, d, i,
-								   peer_type);
-			else
-				peer_ctl = 0;
-			ctrl_val = resctrl_arch_get_config(r, d, i,
-							   s->conf_type);
-			used_b |= ctrl_val | peer_ctl;
-			if (mode == RDT_MODE_SHAREABLE)
-				cfg->new_ctrl |= ctrl_val | peer_ctl;
-		}
-	}
-	if (d->plr && d->plr->cbm > 0)
-		used_b |= d->plr->cbm;
-	unused_b = used_b ^ (BIT_MASK(r->cache.cbm_len) - 1);
-	unused_b &= BIT_MASK(r->cache.cbm_len) - 1;
-	cfg->new_ctrl |= unused_b;
-	/*
-	 * Force the initial CBM to be valid, user can
-	 * modify the CBM based on system availability.
-	 */
-	cfg->new_ctrl = cbm_ensure_valid(cfg->new_ctrl, r);
-	/*
-	 * Assign the u32 CBM to an unsigned long to ensure that
-	 * bitmap_weight() does not access out-of-bound memory.
-	 */
-	tmp_cbm = cfg->new_ctrl;
-	if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) < r->cache.min_cbm_bits) {
-		rdt_last_cmd_printf("No space on %s:%d\n", s->name, d->id);
-		return -ENOSPC;
-	}
-	cfg->have_new_ctrl = true;
-
-	return 0;
-}
-
-/*
- * Initialize cache resources with default values.
- *
- * A new RDT group is being created on an allocation capable (CAT)
- * supporting system. Set this group up to start off with all usable
- * allocations.
- *
- * If there are no more shareable bits available on any domain then
- * the entire allocation will fail.
- */
-int rdtgroup_init_cat(struct resctrl_schema *s, u32 closid)
-{
-	struct rdt_domain *d;
-	int ret;
-
-	list_for_each_entry(d, &s->res->domains, list) {
-		ret = __init_one_rdt_domain(d, s, closid);
-		if (ret < 0)
-			return ret;
-	}
-
-	return 0;
-}
-
-/* Initialize MBA resource with default values. */
-static void rdtgroup_init_mba(struct rdt_resource *r, u32 closid)
-{
-	struct resctrl_staged_config *cfg;
-	struct rdt_domain *d;
-
-	list_for_each_entry(d, &r->domains, list) {
-		if (is_mba_sc(r)) {
-			d->mbps_val[closid] = MBA_MAX_MBPS;
-			continue;
-		}
-
-		cfg = &d->staged_config[CDP_NONE];
-		cfg->new_ctrl = r->default_ctrl;
-		cfg->have_new_ctrl = true;
-	}
-}
-
-/* Initialize the RDT group's allocations. */
-static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
-{
-	struct resctrl_schema *s;
-	struct rdt_resource *r;
-	int ret = 0;
-
-	rdt_staged_configs_clear();
-
-	list_for_each_entry(s, &resctrl_schema_all, list) {
-		r = s->res;
-		if (r->rid == RDT_RESOURCE_MBA ||
-		    r->rid == RDT_RESOURCE_SMBA) {
-			rdtgroup_init_mba(r, rdtgrp->closid);
-			if (is_mba_sc(r))
-				continue;
-		} else {
-			ret = rdtgroup_init_cat(s, rdtgrp->closid);
-			if (ret < 0)
-				goto out;
-		}
-
-		ret = resctrl_arch_update_domains(r, rdtgrp->closid);
-		if (ret < 0) {
-			rdt_last_cmd_puts("Failed to initialize allocations\n");
-			goto out;
-		}
-
-	}
-
-	rdtgrp->mode = RDT_MODE_SHAREABLE;
-
-out:
-	rdt_staged_configs_clear();
-	return ret;
-}
-
-static int mkdir_rdt_prepare_rmid_alloc(struct rdtgroup *rdtgrp)
-{
-	int ret;
-
-	if (!resctrl_arch_mon_capable())
-		return 0;
-
-	ret = alloc_rmid(rdtgrp->closid);
-	if (ret < 0) {
-		rdt_last_cmd_puts("Out of RMIDs\n");
-		return ret;
-	}
-	rdtgrp->mon.rmid = ret;
-
-	rdtgrp->mon.cntr_id[0] = MON_CNTR_UNSET;
-	rdtgrp->mon.cntr_id[1] = MON_CNTR_UNSET;
-
-	ret = mkdir_mondata_all(rdtgrp->kn, rdtgrp, &rdtgrp->mon.mon_data_kn);
-	if (ret) {
-		rdt_last_cmd_puts("kernfs subdir error\n");
-		free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
-		return ret;
-	}
-
-	return 0;
-}
-
-static void mkdir_rdt_prepare_rmid_free(struct rdtgroup *rgrp)
-{
-	if (resctrl_arch_mon_capable())
-		free_rmid(rgrp->closid, rgrp->mon.rmid);
-}
-
-static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
-			     const char *name, umode_t mode,
-			     enum rdt_group_type rtype, struct rdtgroup **r)
-{
-	struct rdtgroup *prdtgrp, *rdtgrp;
-	unsigned long files = 0;
-	struct kernfs_node *kn;
-	int ret;
-
-	prdtgrp = rdtgroup_kn_lock_live(parent_kn);
-	if (!prdtgrp) {
-		ret = -ENODEV;
-		goto out_unlock;
-	}
-
-	if (rtype == RDTMON_GROUP &&
-	    (prdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
-	     prdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)) {
-		ret = -EINVAL;
-		rdt_last_cmd_puts("Pseudo-locking in progress\n");
-		goto out_unlock;
-	}
-
-	/* allocate the rdtgroup. */
-	rdtgrp = kzalloc(sizeof(*rdtgrp), GFP_KERNEL);
-	if (!rdtgrp) {
-		ret = -ENOSPC;
-		rdt_last_cmd_puts("Kernel out of memory\n");
-		goto out_unlock;
-	}
-	*r = rdtgrp;
-	rdtgrp->mon.parent = prdtgrp;
-	rdtgrp->type = rtype;
-	INIT_LIST_HEAD(&rdtgrp->mon.crdtgrp_list);
-
-	/* kernfs creates the directory for rdtgrp */
-	kn = kernfs_create_dir(parent_kn, name, mode, rdtgrp);
-	if (IS_ERR(kn)) {
-		ret = PTR_ERR(kn);
-		rdt_last_cmd_puts("kernfs create error\n");
-		goto out_free_rgrp;
-	}
-	rdtgrp->kn = kn;
-
-	/*
-	 * kernfs_remove() will drop the reference count on "kn" which
-	 * will free it. But we still need it to stick around for the
-	 * rdtgroup_kn_unlock(kn) call. Take one extra reference here,
-	 * which will be dropped by kernfs_put() in rdtgroup_remove().
-	 */
-	kernfs_get(kn);
-
-	ret = rdtgroup_kn_set_ugid(kn);
-	if (ret) {
-		rdt_last_cmd_puts("kernfs perm error\n");
-		goto out_destroy;
-	}
-
-	if (rtype == RDTCTRL_GROUP) {
-		files = RFTYPE_BASE | RFTYPE_CTRL;
-		if (resctrl_arch_mon_capable())
-			files |= RFTYPE_MON;
-	} else {
-		files = RFTYPE_BASE | RFTYPE_MON;
-	}
-
-	ret = rdtgroup_add_files(kn, files);
-	if (ret) {
-		rdt_last_cmd_puts("kernfs fill error\n");
-		goto out_destroy;
-	}
-
-	/*
-	 * The caller unlocks the parent_kn upon success.
-	 */
-	return 0;
-
-out_destroy:
-	kernfs_put(rdtgrp->kn);
-	kernfs_remove(rdtgrp->kn);
-out_free_rgrp:
-	kfree(rdtgrp);
-out_unlock:
-	rdtgroup_kn_unlock(parent_kn);
-	return ret;
-}
-
-static void mkdir_rdt_prepare_clean(struct rdtgroup *rgrp)
-{
-	kernfs_remove(rgrp->kn);
-	rdtgroup_remove(rgrp);
-}
-
-/*
- * Create a monitor group under "mon_groups" directory of a control
- * and monitor group(ctrl_mon). This is a resource group
- * to monitor a subset of tasks and cpus in its parent ctrl_mon group.
- */
-static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn,
-			      const char *name, umode_t mode)
-{
-	struct rdtgroup *rdtgrp, *prgrp;
-	int ret;
-
-	ret = mkdir_rdt_prepare(parent_kn, name, mode, RDTMON_GROUP, &rdtgrp);
-	if (ret)
-		return ret;
-
-	prgrp = rdtgrp->mon.parent;
-	rdtgrp->closid = prgrp->closid;
-
-	ret = mkdir_rdt_prepare_rmid_alloc(rdtgrp);
-	if (ret) {
-		mkdir_rdt_prepare_clean(rdtgrp);
-		goto out_unlock;
-	}
-
-	rdtgroup_assign_cntrs(rdtgrp);
-
-	kernfs_activate(rdtgrp->kn);
-
-	/*
-	 * Add the rdtgrp to the list of rdtgrps the parent
-	 * ctrl_mon group has to track.
-	 */
-	list_add_tail(&rdtgrp->mon.crdtgrp_list, &prgrp->mon.crdtgrp_list);
-
-out_unlock:
-	rdtgroup_kn_unlock(parent_kn);
-	return ret;
-}
-
-/*
- * These are rdtgroups created under the root directory. Can be used
- * to allocate and monitor resources.
- */
-static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
-				   const char *name, umode_t mode)
-{
-	struct rdtgroup *rdtgrp;
-	struct kernfs_node *kn;
-	u32 closid;
-	int ret;
-
-	ret = mkdir_rdt_prepare(parent_kn, name, mode, RDTCTRL_GROUP, &rdtgrp);
-	if (ret)
-		return ret;
-
-	kn = rdtgrp->kn;
-	ret = closid_alloc();
-	if (ret < 0) {
-		rdt_last_cmd_puts("Out of CLOSIDs\n");
-		goto out_common_fail;
-	}
-	closid = ret;
-	ret = 0;
-
-	rdtgrp->closid = closid;
-
-	ret = mkdir_rdt_prepare_rmid_alloc(rdtgrp);
-	if (ret)
-		goto out_closid_free;
-
-	rdtgroup_assign_cntrs(rdtgrp);
-
-	kernfs_activate(rdtgrp->kn);
-
-	ret = rdtgroup_init_alloc(rdtgrp);
-	if (ret < 0)
-		goto out_rmid_free;
-
-	list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups);
-
-	if (resctrl_arch_mon_capable()) {
-		/*
-		 * Create an empty mon_groups directory to hold the subset
-		 * of tasks and cpus to monitor.
-		 */
-		ret = mongroup_create_dir(kn, rdtgrp, "mon_groups", NULL);
-		if (ret) {
-			rdt_last_cmd_puts("kernfs subdir error\n");
-			goto out_del_list;
-		}
-	}
-
-	goto out_unlock;
-
-out_del_list:
-	list_del(&rdtgrp->rdtgroup_list);
-out_rmid_free:
-	rdtgroup_unassign_cntrs(rdtgrp);
-	mkdir_rdt_prepare_rmid_free(rdtgrp);
-out_closid_free:
-	closid_free(closid);
-out_common_fail:
-	mkdir_rdt_prepare_clean(rdtgrp);
-out_unlock:
-	rdtgroup_kn_unlock(parent_kn);
-	return ret;
-}
-
-/*
- * We allow creating mon groups only with in a directory called "mon_groups"
- * which is present in every ctrl_mon group. Check if this is a valid
- * "mon_groups" directory.
- *
- * 1. The directory should be named "mon_groups".
- * 2. The mon group itself should "not" be named "mon_groups".
- *   This makes sure "mon_groups" directory always has a ctrl_mon group
- *   as parent.
- */
-static bool is_mon_groups(struct kernfs_node *kn, const char *name)
-{
-	return (!strcmp(kn->name, "mon_groups") &&
-		strcmp(name, "mon_groups"));
-}
-
-static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
-			  umode_t mode)
-{
-	/* Do not accept '\n' to avoid unparsable situation. */
-	if (strchr(name, '\n'))
-		return -EINVAL;
-
-	/*
-	 * If the parent directory is the root directory and RDT
-	 * allocation is supported, add a control and monitoring
-	 * subdirectory
-	 */
-	if (resctrl_arch_alloc_capable() && parent_kn == rdtgroup_default.kn)
-		return rdtgroup_mkdir_ctrl_mon(parent_kn, name, mode);
-
-	/*
-	 * If RDT monitoring is supported and the parent directory is a valid
-	 * "mon_groups" directory, add a monitoring subdirectory.
-	 */
-	if (resctrl_arch_mon_capable() && is_mon_groups(parent_kn, name))
-		return rdtgroup_mkdir_mon(parent_kn, name, mode);
-
-	return -EPERM;
-}
-
-static int rdtgroup_rmdir_mon(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
-{
-	struct rdtgroup *prdtgrp = rdtgrp->mon.parent;
-	u32 closid, rmid;
-	int cpu;
-
-	/* Give any tasks back to the parent group */
-	rdt_move_group_tasks(rdtgrp, prdtgrp, tmpmask);
-
-	/* Update per cpu rmid of the moved CPUs first */
-	closid = rdtgrp->closid;
-	rmid = prdtgrp->mon.rmid;
-	for_each_cpu(cpu, &rdtgrp->cpu_mask)
-		resctrl_arch_set_cpu_default_closid_rmid(cpu, closid, rmid);
-
-	/*
-	 * Update the MSR on moved CPUs and CPUs which have moved
-	 * task running on them.
-	 */
-	cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
-	update_closid_rmid(tmpmask, NULL);
-
-	rdtgrp->flags = RDT_DELETED;
-
-	rdtgroup_unassign_cntrs(rdtgrp);
-
-	free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
-
-	/*
-	 * Remove the rdtgrp from the parent ctrl_mon group's list
-	 */
-	WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list));
-	list_del(&rdtgrp->mon.crdtgrp_list);
-
-	kernfs_remove(rdtgrp->kn);
-
-	return 0;
-}
-
-static int rdtgroup_ctrl_remove(struct rdtgroup *rdtgrp)
-{
-	rdtgrp->flags = RDT_DELETED;
-	list_del(&rdtgrp->rdtgroup_list);
-
-	kernfs_remove(rdtgrp->kn);
-	return 0;
-}
-
-static int rdtgroup_rmdir_ctrl(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
-{
-	u32 closid, rmid;
-	int cpu;
-
-	/* Give any tasks back to the default group */
-	rdt_move_group_tasks(rdtgrp, &rdtgroup_default, tmpmask);
-
-	/* Give any CPUs back to the default group */
-	cpumask_or(&rdtgroup_default.cpu_mask,
-		   &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
-
-	/* Update per cpu closid and rmid of the moved CPUs first */
-	closid = rdtgroup_default.closid;
-	rmid = rdtgroup_default.mon.rmid;
-	for_each_cpu(cpu, &rdtgrp->cpu_mask)
-		resctrl_arch_set_cpu_default_closid_rmid(cpu, closid, rmid);
-
-	/*
-	 * Update the MSR on moved CPUs and CPUs which have moved
-	 * task running on them.
-	 */
-	cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
-	update_closid_rmid(tmpmask, NULL);
-
-	rdtgroup_unassign_cntrs(rdtgrp);
-
-	free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
-	closid_free(rdtgrp->closid);
-
-	rdtgroup_ctrl_remove(rdtgrp);
-
-	/*
-	 * Free all the child monitor group rmids.
-	 */
-	free_all_child_rdtgrp(rdtgrp);
-
-	return 0;
-}
-
-static int rdtgroup_rmdir(struct kernfs_node *kn)
-{
-	struct kernfs_node *parent_kn = kn->parent;
-	struct rdtgroup *rdtgrp;
-	cpumask_var_t tmpmask;
-	int ret = 0;
-
-	if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
-		return -ENOMEM;
-
-	rdtgrp = rdtgroup_kn_lock_live(kn);
-	if (!rdtgrp) {
-		ret = -EPERM;
-		goto out;
-	}
-
-	/*
-	 * If the rdtgroup is a ctrl_mon group and parent directory
-	 * is the root directory, remove the ctrl_mon group.
-	 *
-	 * If the rdtgroup is a mon group and parent directory
-	 * is a valid "mon_groups" directory, remove the mon group.
-	 */
-	if (rdtgrp->type == RDTCTRL_GROUP && parent_kn == rdtgroup_default.kn &&
-	    rdtgrp != &rdtgroup_default) {
-		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
-		    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
-			ret = rdtgroup_ctrl_remove(rdtgrp);
-		} else {
-			ret = rdtgroup_rmdir_ctrl(rdtgrp, tmpmask);
-		}
-	} else if (rdtgrp->type == RDTMON_GROUP &&
-		 is_mon_groups(parent_kn, kn->name)) {
-		ret = rdtgroup_rmdir_mon(rdtgrp, tmpmask);
-	} else {
-		ret = -EPERM;
-	}
-
-out:
-	rdtgroup_kn_unlock(kn);
-	free_cpumask_var(tmpmask);
-	return ret;
-}
-
-/**
- * mongrp_reparent() - replace parent CTRL_MON group of a MON group
- * @rdtgrp:		the MON group whose parent should be replaced
- * @new_prdtgrp:	replacement parent CTRL_MON group for @rdtgrp
- * @cpus:		cpumask provided by the caller for use during this call
- *
- * Replaces the parent CTRL_MON group for a MON group, resulting in all member
- * tasks' CLOSID immediately changing to that of the new parent group.
- * Monitoring data for the group is unaffected by this operation.
- */
-static void mongrp_reparent(struct rdtgroup *rdtgrp,
-			    struct rdtgroup *new_prdtgrp,
-			    cpumask_var_t cpus)
-{
-	struct rdtgroup *prdtgrp = rdtgrp->mon.parent;
-
-	WARN_ON(rdtgrp->type != RDTMON_GROUP);
-	WARN_ON(new_prdtgrp->type != RDTCTRL_GROUP);
-
-	/* Nothing to do when simply renaming a MON group. */
-	if (prdtgrp == new_prdtgrp)
-		return;
-
-	WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list));
-	list_move_tail(&rdtgrp->mon.crdtgrp_list,
-		       &new_prdtgrp->mon.crdtgrp_list);
-
-	rdtgrp->mon.parent = new_prdtgrp;
-	rdtgrp->closid = new_prdtgrp->closid;
-
-	/* Propagate updated closid to all tasks in this group. */
-	rdt_move_group_tasks(rdtgrp, rdtgrp, cpus);
-
-	update_closid_rmid(cpus, NULL);
-}
-
-static int rdtgroup_rename(struct kernfs_node *kn,
-			   struct kernfs_node *new_parent, const char *new_name)
-{
-	struct rdtgroup *new_prdtgrp;
-	struct rdtgroup *rdtgrp;
-	cpumask_var_t tmpmask;
-	int ret;
-
-	rdtgrp = kernfs_to_rdtgroup(kn);
-	new_prdtgrp = kernfs_to_rdtgroup(new_parent);
-	if (!rdtgrp || !new_prdtgrp)
-		return -ENOENT;
-
-	/* Release both kernfs active_refs before obtaining rdtgroup mutex. */
-	rdtgroup_kn_get(rdtgrp, kn);
-	rdtgroup_kn_get(new_prdtgrp, new_parent);
-
-	mutex_lock(&rdtgroup_mutex);
-
-	rdt_last_cmd_clear();
-
-	/*
-	 * Don't allow kernfs_to_rdtgroup() to return a parent rdtgroup if
-	 * either kernfs_node is a file.
-	 */
-	if (kernfs_type(kn) != KERNFS_DIR ||
-	    kernfs_type(new_parent) != KERNFS_DIR) {
-		rdt_last_cmd_puts("Source and destination must be directories");
-		ret = -EPERM;
-		goto out;
-	}
-
-	if ((rdtgrp->flags & RDT_DELETED) || (new_prdtgrp->flags & RDT_DELETED)) {
-		ret = -ENOENT;
-		goto out;
-	}
-
-	if (rdtgrp->type != RDTMON_GROUP || !kn->parent ||
-	    !is_mon_groups(kn->parent, kn->name)) {
-		rdt_last_cmd_puts("Source must be a MON group\n");
-		ret = -EPERM;
-		goto out;
-	}
-
-	if (!is_mon_groups(new_parent, new_name)) {
-		rdt_last_cmd_puts("Destination must be a mon_groups subdirectory\n");
-		ret = -EPERM;
-		goto out;
-	}
-
-	/*
-	 * If the MON group is monitoring CPUs, the CPUs must be assigned to the
-	 * current parent CTRL_MON group and therefore cannot be assigned to
-	 * the new parent, making the move illegal.
-	 */
-	if (!cpumask_empty(&rdtgrp->cpu_mask) &&
-	    rdtgrp->mon.parent != new_prdtgrp) {
-		rdt_last_cmd_puts("Cannot move a MON group that monitors CPUs\n");
-		ret = -EPERM;
-		goto out;
-	}
-
-	/*
-	 * Allocate the cpumask for use in mongrp_reparent() to avoid the
-	 * possibility of failing to allocate it after kernfs_rename() has
-	 * succeeded.
-	 */
-	if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL)) {
-		ret = -ENOMEM;
-		goto out;
-	}
-
-	/*
-	 * Perform all input validation and allocations needed to ensure
-	 * mongrp_reparent() will succeed before calling kernfs_rename(),
-	 * otherwise it would be necessary to revert this call if
-	 * mongrp_reparent() failed.
-	 */
-	ret = kernfs_rename(kn, new_parent, new_name);
-	if (!ret)
-		mongrp_reparent(rdtgrp, new_prdtgrp, tmpmask);
-
-	free_cpumask_var(tmpmask);
-
-out:
-	mutex_unlock(&rdtgroup_mutex);
-	rdtgroup_kn_put(rdtgrp, kn);
-	rdtgroup_kn_put(new_prdtgrp, new_parent);
-	return ret;
-}
-
-static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf)
-{
-	if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3))
-		seq_puts(seq, ",cdp");
-
-	if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2))
-		seq_puts(seq, ",cdpl2");
-
-	if (is_mba_sc(resctrl_arch_get_resource(RDT_RESOURCE_MBA)))
-		seq_puts(seq, ",mba_MBps");
-
-	if (is_hwdrc_enabled(resctrl_arch_get_resource(RDT_RESOURCE_MBA)))
-		seq_puts(seq, ",hwdrc_mb");
-
-	if (resctrl_debug)
-		seq_puts(seq, ",debug");
-
-	return 0;
-}
-
-static struct kernfs_syscall_ops rdtgroup_kf_syscall_ops = {
-	.mkdir		= rdtgroup_mkdir,
-	.rmdir		= rdtgroup_rmdir,
-	.rename		= rdtgroup_rename,
-	.show_options	= rdtgroup_show_options,
-};
-
-static int rdtgroup_setup_root(struct rdt_fs_context *ctx)
-{
-	rdt_root = kernfs_create_root(&rdtgroup_kf_syscall_ops,
-				      KERNFS_ROOT_CREATE_DEACTIVATED |
-				      KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK,
-				      &rdtgroup_default);
-	if (IS_ERR(rdt_root))
-		return PTR_ERR(rdt_root);
-
-	ctx->kfc.root = rdt_root;
-	rdtgroup_default.kn = kernfs_root_to_node(rdt_root);
-
-	return 0;
-}
-
-static void rdtgroup_destroy_root(void)
-{
-	kernfs_destroy_root(rdt_root);
-	rdtgroup_default.kn = NULL;
-}
-
-static void rdtgroup_setup_default(void)
-{
-	mutex_lock(&rdtgroup_mutex);
-
-	rdtgroup_default.closid = RESCTRL_RESERVED_CLOSID;
-	rdtgroup_default.mon.rmid = RESCTRL_RESERVED_RMID;
-	rdtgroup_default.type = RDTCTRL_GROUP;
-	rdtgroup_default.mon.cntr_id[0] = MON_CNTR_UNSET;
-	rdtgroup_default.mon.cntr_id[1] = MON_CNTR_UNSET;
-
-	INIT_LIST_HEAD(&rdtgroup_default.mon.crdtgrp_list);
-
-	list_add(&rdtgroup_default.rdtgroup_list, &rdt_all_groups);
-
-	mutex_unlock(&rdtgroup_mutex);
-}
-
-static void domain_destroy_mon_state(struct rdt_domain *d)
-{
-	bitmap_free(d->mbm_cntr_map);
-	bitmap_free(d->rmid_busy_llc);
-	kfree(d->mbm_total);
-	kfree(d->mbm_local);
-}
-
-void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d)
-{
-	mutex_lock(&rdtgroup_mutex);
-
-	if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA)
-		mba_sc_domain_destroy(r, d);
-
-	if (!r->mon_capable)
-		goto out_unlock;
-
-	/*
-	 * If resctrl is mounted, remove all the
-	 * per domain monitor data directories.
-	 */
-	if (resctrl_mounted && resctrl_arch_mon_capable())
-		rmdir_mondata_subdir_allrdtgrp(r, d->id);
-
-	if (resctrl_is_mbm_enabled())
-		cancel_delayed_work(&d->mbm_over);
-	if (resctrl_arch_is_llc_occupancy_enabled() && has_busy_rmid(d)) {
-		/*
-		 * When a package is going down, forcefully
-		 * decrement rmid->ebusy. There is no way to know
-		 * that the L3 was flushed and hence may lead to
-		 * incorrect counts in rare scenarios, but leaving
-		 * the RMID as busy creates RMID leaks if the
-		 * package never comes back.
-		 */
-		__check_limbo(d, true);
-		cancel_delayed_work(&d->cqm_limbo);
-	}
-
-	domain_destroy_mon_state(d);
-
-out_unlock:
-	mutex_unlock(&rdtgroup_mutex);
-}
-
-static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
-{
-	u32 idx_limit = resctrl_arch_system_num_rmid_idx();
-	size_t tsize;
-
-	if (resctrl_arch_is_llc_occupancy_enabled()) {
-		d->rmid_busy_llc = bitmap_zalloc(idx_limit, GFP_KERNEL);
-		if (!d->rmid_busy_llc)
-			return -ENOMEM;
-	}
-	if (resctrl_arch_is_mbm_total_enabled()) {
-		tsize = sizeof(*d->mbm_total);
-		d->mbm_total = kcalloc(idx_limit, tsize, GFP_KERNEL);
-		if (!d->mbm_total) {
-			bitmap_free(d->rmid_busy_llc);
-			return -ENOMEM;
-		}
-	}
-	if (resctrl_arch_is_mbm_local_enabled()) {
-		tsize = sizeof(*d->mbm_local);
-		d->mbm_local = kcalloc(idx_limit, tsize, GFP_KERNEL);
-		if (!d->mbm_local) {
-			bitmap_free(d->rmid_busy_llc);
-			kfree(d->mbm_total);
-			return -ENOMEM;
-		}
-	}
-	if (resctrl_is_mbm_enabled()) {
-		d->mbm_cntr_map = bitmap_zalloc(r->mon.num_mbm_cntrs, GFP_KERNEL);
-		if (!d->mbm_cntr_map) {
-			bitmap_free(d->rmid_busy_llc);
-			kfree(d->mbm_total);
-			kfree(d->mbm_local);
-			return -ENOMEM;
-		}
-	}
-
-	return 0;
-}
-
-int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d)
-{
-	int err = 0;
-
-	mutex_lock(&rdtgroup_mutex);
-
-	if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA) {
-		/* RDT_RESOURCE_MBA is never mon_capable */
-		err = mba_sc_domain_allocate(r, d);
-		goto out_unlock;
-	}
-
-	if (!r->mon_capable)
-		goto out_unlock;
-
-	err = domain_setup_mon_state(r, d);
-	if (err)
-		goto out_unlock;
-
-	if (resctrl_is_mbm_enabled()) {
-		INIT_DELAYED_WORK(&d->mbm_over, mbm_handle_overflow);
-		mbm_setup_overflow_handler(d, MBM_OVERFLOW_INTERVAL,
-					   RESCTRL_PICK_ANY_CPU);
-	}
-
-	if (resctrl_arch_is_llc_occupancy_enabled())
-		INIT_DELAYED_WORK(&d->cqm_limbo, cqm_handle_limbo);
-
-	/*
-	 * If the filesystem is not mounted then only the default resource group
-	 * exists. Creation of its directories is deferred until mount time
-	 * by rdt_get_tree() calling mkdir_mondata_all().
-	 * If resctrl is mounted, add per domain monitor data directories.
-	 */
-	if (resctrl_mounted && resctrl_arch_mon_capable())
-		mkdir_mondata_subdir_allrdtgrp(r, d);
-
-out_unlock:
-	mutex_unlock(&rdtgroup_mutex);
-
-	return err;
-}
-
-void resctrl_online_cpu(unsigned int cpu)
-{
-	mutex_lock(&rdtgroup_mutex);
-	/* The CPU is set in default rdtgroup after online. */
-	cpumask_set_cpu(cpu, &rdtgroup_default.cpu_mask);
-	mutex_unlock(&rdtgroup_mutex);
-}
-
-static void clear_childcpus(struct rdtgroup *r, unsigned int cpu)
-{
-	struct rdtgroup *cr;
-
-	list_for_each_entry(cr, &r->mon.crdtgrp_list, mon.crdtgrp_list) {
-		if (cpumask_test_and_clear_cpu(cpu, &cr->cpu_mask))
-			break;
-	}
-}
-
-void resctrl_offline_cpu(unsigned int cpu)
-{
-	struct rdt_resource *l3 = resctrl_arch_get_resource(RDT_RESOURCE_L3);
-	struct rdtgroup *rdtgrp;
-	struct rdt_domain *d;
-
-	mutex_lock(&rdtgroup_mutex);
-	list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
-		if (cpumask_test_and_clear_cpu(cpu, &rdtgrp->cpu_mask)) {
-			clear_childcpus(rdtgrp, cpu);
-			break;
-		}
-	}
-
-	if (!l3->mon_capable)
-		goto out_unlock;
-
-	d = resctrl_get_domain_from_cpu(cpu, l3);
-	if (d) {
-		if (resctrl_is_mbm_enabled() && cpu == d->mbm_work_cpu) {
-			cancel_delayed_work(&d->mbm_over);
-			mbm_setup_overflow_handler(d, 0, cpu);
-		}
-		if (resctrl_arch_is_llc_occupancy_enabled() &&
-		    cpu == d->cqm_work_cpu && has_busy_rmid(d)) {
-			cancel_delayed_work(&d->cqm_limbo);
-			cqm_setup_limbo_handler(d, 0, cpu);
-		}
-	}
-
-out_unlock:
-	mutex_unlock(&rdtgroup_mutex);
-}
-
-/*
- * resctrl_init - resctrl filesystem initialization
- *
- * Setup resctrl file system including set up root, create mount point,
- * register resctrl filesystem, and initialize files under root directory.
- *
- * Return: 0 on success or -errno
- */
-int resctrl_init(void)
-{
-	int ret = 0;
-
-	seq_buf_init(&last_cmd_status, last_cmd_status_buf,
-		     sizeof(last_cmd_status_buf));
-
-	rdtgroup_setup_default();
-
-	resctrl_file_fflags_init("thread_throttle_mode",
-				 RFTYPE_CTRL_INFO | RFTYPE_RES_MB);
-#ifdef CONFIG_X86
-	io_alloc_init();
-#endif
-
-	ret = resctrl_mon_resource_init();
-	if (ret)
-		return ret;
-
-	ret = sysfs_create_mount_point(fs_kobj, "resctrl");
-	if (ret)
-		return ret;
-
-	ret = register_filesystem(&rdt_fs_type);
-	if (ret)
-		goto cleanup_mountpoint;
-
-	/*
-	 * Adding the resctrl debugfs directory here may not be ideal since
-	 * it would let the resctrl debugfs directory appear on the debugfs
-	 * filesystem before the resctrl filesystem is mounted.
-	 * It may also be ok since that would enable debugging of RDT before
-	 * resctrl is mounted.
-	 * The reason why the debugfs directory is created here and not in
-	 * rdt_get_tree() is because rdt_get_tree() takes rdtgroup_mutex and
-	 * during the debugfs directory creation also &sb->s_type->i_mutex_key
-	 * (the lockdep class of inode->i_rwsem). Other filesystem
-	 * interactions (eg. SyS_getdents) have the lock ordering:
-	 * &sb->s_type->i_mutex_key --> &mm->mmap_lock
-	 * During mmap(), called with &mm->mmap_lock, the rdtgroup_mutex
-	 * is taken, thus creating dependency:
-	 * &mm->mmap_lock --> rdtgroup_mutex for the latter that can cause
-	 * issues considering the other two lock dependencies.
-	 * By creating the debugfs directory here we avoid a dependency
-	 * that may cause deadlock (even though file operations cannot
-	 * occur until the filesystem is mounted, but I do not know how to
-	 * tell lockdep that).
-	 */
-	debugfs_resctrl = debugfs_create_dir("resctrl", NULL);
-
-	return 0;
-
-cleanup_mountpoint:
-	sysfs_remove_mount_point(fs_kobj, "resctrl");
-
-	return ret;
-}
-
-void resctrl_exit(void)
-{
-	debugfs_remove_recursive(debugfs_resctrl);
-	unregister_filesystem(&rdt_fs_type);
-	sysfs_remove_mount_point(fs_kobj, "resctrl");
-
-	resctrl_mon_resource_exit();
-}
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index b339abbfa2a4841599976e6ab89d11e0364e5188..8ac16b6967d8d6119da7a75b1ec83534f57d06b1 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -1493,11 +1493,6 @@ int find_acpi_cpu_topology(unsigned int cpu, int level);
 int find_acpi_cpu_topology_cluster(unsigned int cpu);
 int find_acpi_cpu_topology_package(unsigned int cpu);
 int find_acpi_cpu_topology_hetero_id(unsigned int cpu);
-int find_acpi_cache_level_from_id(u32 cache_id);
-int acpi_pptt_get_cpus_from_container(u32 acpi_cpu_id, cpumask_t *cpus);
-int acpi_pptt_get_cpumask_from_cache_id(u32 cache_id, cpumask_t *cpus);
-int acpi_pptt_get_cpumask_from_cache_id_and_level(u32 cache_id, u32 cache_level,
-						  cpumask_t *cpus);
 #else
 static inline int acpi_pptt_cpu_is_thread(unsigned int cpu)
 {
@@ -1519,26 +1514,6 @@ static inline int find_acpi_cpu_topology_hetero_id(unsigned int cpu)
 {
 	return -EINVAL;
 }
-static inline int find_acpi_cache_level_from_id(u32 cache_id)
-{
-	return -EINVAL;
-}
-static inline int acpi_pptt_get_cpus_from_container(u32 acpi_cpu_id,
-						    cpumask_t *cpus)
-{
-	return -EINVAL;
-}
-static inline int acpi_pptt_get_cpumask_from_cache_id(u32 cache_id,
-						      cpumask_t *cpus)
-{
-	return -EINVAL;
-}
-static inline int acpi_pptt_get_cpumask_from_cache_id_and_level(u32 cache_id,
-								u32 cache_level,
-								cpumask_t *cpus)
-{
-	return -EINVAL;
-}
 #endif
 
 #ifdef CONFIG_ARM64
@@ -1577,8 +1552,4 @@ static inline void acpi_device_notify(struct device *dev) { }
 static inline void acpi_device_notify_remove(struct device *dev) { }
 #endif
 
-struct acpi_pptt_processor *
-acpi_pptt_find_cache_backwards(struct acpi_table_header *table_hdr,
-			       struct acpi_pptt_cache *cache);
-
 #endif	/*_LINUX_ACPI_H*/
diff --git a/include/linux/arm_mpam.h b/include/linux/arm_mpam.h
deleted file mode 100644
index c7698dda09fb86d1598cf0bbb94a64d230572ab9..0000000000000000000000000000000000000000
--- a/include/linux/arm_mpam.h
+++ /dev/null
@@ -1,160 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2021 Arm Ltd.
-
-#ifndef __LINUX_ARM_MPAM_H
-#define __LINUX_ARM_MPAM_H
-
-#include <linux/acpi.h>
-#include <linux/resctrl_types.h>
-#include <linux/types.h>
-
-/*
- * The value of the MPAM1_EL1 sysreg when a task is in the default group.
- * This is used by the context switch code to use the resctrl CPU property
- * instead. The value is modified when CDP is enabled/disabled by mounting
- * the resctrl filesystem.
- */
-extern u64 mpam_resctrl_default_group;
-
-#include <asm/mpam.h>
-
-struct mpam_msc;
-
-enum mpam_msc_iface {
-	MPAM_IFACE_MMIO,	/* a real MPAM MSC */
-	MPAM_IFACE_PCC,		/* a fake MPAM MSC */
-};
-
-enum mpam_class_types {
-	MPAM_CLASS_CACHE,       /* Well known caches, e.g. L2 */
-	MPAM_CLASS_MEMORY,      /* Main memory */
-	MPAM_CLASS_UNKNOWN,     /* Everything else, e.g. SMMU */
-};
-
-enum mpam_machine_type {
-	MPAM_DEFAULT_MACHINE,
-	MPAM_YITIAN710,
-
-	MPAM_NUM_MACHINE_TYPES,
-};
-
-/* Machine identifier which can be used for vendor-specific MPAM features */
-extern enum mpam_machine_type mpam_current_machine;
-
-#ifdef CONFIG_ACPI_MPAM
-/* Parse the ACPI description of resources entries for this MSC. */
-int acpi_mpam_parse_resources(struct mpam_msc *msc,
-			      struct acpi_mpam_msc_node *tbl_msc);
-int acpi_mpam_count_msc(void);
-enum mpam_machine_type acpi_mpam_get_machine_type(void);
-#else
-static inline int acpi_mpam_parse_resources(struct mpam_msc *msc,
-					    struct acpi_mpam_msc_node *tbl_msc)
-{
-	return -EINVAL;
-}
-static inline int acpi_mpam_count_msc(void) { return -EINVAL; }
-static inline enum mpam_machine_type acpi_mpam_get_machine_type(void)
-{
-	return MPAM_DEFAULT_MACHINE;
-}
-#endif
-
-int mpam_register_requestor(u16 partid_max, u8 pmg_max);
-
-int mpam_ris_create(struct mpam_msc *msc, u8 ris_idx,
-		    enum mpam_class_types type, u8 class_id, int component_id);
-
-static inline unsigned int resctrl_arch_round_mon_val(unsigned int val)
-{
-	return val;
-}
-
-/* MPAM counters requires a monitor to be allocated */
-static inline bool resctrl_arch_event_is_free_running(enum resctrl_event_id evt)
-{
-	return false;
-}
-
-bool resctrl_arch_alloc_capable(void);
-bool resctrl_arch_mon_capable(void);
-bool resctrl_arch_is_llc_occupancy_enabled(void);
-bool resctrl_arch_is_mbm_local_enabled(void);
-bool resctrl_arch_is_mbm_total_enabled(void);
-bool resctrl_arch_is_mbm_bps_enabled(void);
-
-static inline bool resctrl_arch_is_hwdrc_mb_capable(void)
-{
-	return false;
-}
-
-static inline int resctrl_arch_set_hwdrc_enabled(enum resctrl_res_level ignored, bool hwdrc_mb)
-{
-	return hwdrc_mb ? -EINVAL : 0;
-}
-
-/* reset cached configurations, then all devices */
-void resctrl_arch_reset_resources(void);
-
-bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level ignored);
-int resctrl_arch_set_cdp_enabled(enum resctrl_res_level ignored, bool enable);
-bool resctrl_arch_match_closid(struct task_struct *tsk, u32 closid);
-bool resctrl_arch_match_rmid(struct task_struct *tsk, u32 closid, u32 rmid);
-void resctrl_arch_set_cpu_default_closid(int cpu, u32 closid);
-void resctrl_arch_set_closid_rmid(struct task_struct *tsk, u32 closid, u32 rmid);
-void resctrl_arch_set_cpu_default_closid_rmid(int cpu, u32 closid, u32 pmg);
-void resctrl_arch_sched_in(struct task_struct *tsk);
-u32 resctrl_arch_rmid_idx_encode(u32 closid, u32 rmid);
-void resctrl_arch_rmid_idx_decode(u32 idx, u32 *closid, u32 *rmid);
-u32 resctrl_arch_system_num_rmid_idx(void);
-
-struct rdt_resource;
-void *resctrl_arch_mon_ctx_alloc(struct rdt_resource *r, int evtid);
-void resctrl_arch_mon_ctx_free(struct rdt_resource *r, int evtid, void *ctx);
-
-/* Pseudo lock is not supported by MPAM */
-static inline int resctrl_arch_pseudo_lock_fn(void *_plr) { return 0; }
-static inline int resctrl_arch_measure_l2_residency(void *_plr) { return 0; }
-static inline int resctrl_arch_measure_l3_residency(void *_plr) { return 0; }
-static inline int resctrl_arch_measure_cycles_lat_fn(void *_plr) { return 0; }
-static inline u64 resctrl_arch_get_prefetch_disable_bits(void) { return 0; }
-
-/*
- * The CPU configuration for MPAM is cheap to write, and is only written if it
- * has changed. No need for fine grained enables.
- */
-static inline void resctrl_arch_enable_mon(void) { }
-static inline void resctrl_arch_disable_mon(void) { }
-static inline void resctrl_arch_enable_alloc(void) { }
-static inline void resctrl_arch_disable_alloc(void) { }
-
-static inline void resctrl_arch_mbm_cntr_assign_configure(void) { }
-
-static inline bool resctrl_arch_get_abmc_enabled(void)
-{
-	return false;
-}
-
-static inline int resctrl_arch_mbm_cntr_assign_enable(void)
-{
-	return -EINVAL;
-}
-
-static inline void resctrl_arch_mbm_cntr_assign_disable(void) { }
-
-void resctrl_arch_event_config_set(void *info);
-u32 resctrl_arch_event_config_get(void *dom,
-						enum resctrl_event_id eventid);
-
-static inline bool resctrl_arch_get_mbm_cntr_assign_enable(void)
-{
-	return false;
-}
-
-static inline int resctrl_arch_assign_cntr(void *dom, enum resctrl_event_id evtid,
-			     u32 rmid, u32 cntr_id, u32 closid, bool assign)
-{
-	return -EINVAL;
-}
-
-#endif /* __LINUX_ARM_MPAM_H */
diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h
index 251e8b393ec7282caf889438b6999fc74a297735..d504eb4b49abec9a1620ecc3bb3ae5651fd64f16 100644
--- a/include/linux/cacheinfo.h
+++ b/include/linux/cacheinfo.h
@@ -47,7 +47,7 @@ extern unsigned int coherency_max_size;
  * keeping, the remaining members form the core properties of the cache
  */
 struct cacheinfo {
-	unsigned long id;
+	unsigned int id;
 	enum cache_type type;
 	unsigned int level;
 	unsigned int coherency_line_size;
@@ -111,13 +111,12 @@ int acpi_get_cache_info(unsigned int cpu,
 #endif
 
 const struct attribute_group *cache_get_priv_group(struct cacheinfo *this_leaf);
-unsigned long cache_of_get_id(struct device_node *np);
 
 /*
  * Get the id of the cache associated with @cpu at level @level.
  * cpuhp lock must be held.
  */
-static inline unsigned long get_cpu_cacheinfo_id(int cpu, int level)
+static inline int get_cpu_cacheinfo_id(int cpu, int level)
 {
 	struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu);
 	int i;
@@ -126,32 +125,11 @@ static inline unsigned long get_cpu_cacheinfo_id(int cpu, int level)
 		if (ci->info_list[i].level == level) {
 			if (ci->info_list[i].attributes & CACHE_ID)
 				return ci->info_list[i].id;
-			return ~0UL;
+			return -1;
 		}
 	}
 
-	return ~0UL;
-}
-
-/*
- * Get the size of the cache associated with @cpu at level @level.
- * cpuhp lock must be held.
- */
-static inline unsigned int get_cpu_cacheinfo_size(int cpu, int level)
-{
-	struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu);
-	int i;
-
-	if (!ci->info_list)
-		return 0;
-
-	for (i = 0; i < ci->num_leaves; i++) {
-		if (ci->info_list[i].level == level) {
-			return ci->info_list[i].size;
-		}
-	}
-
-	return 0;
+	return -1;
 }
 
 #ifdef CONFIG_ARM64
diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index 01bef03c0eddd4af484fe79e54a53931e2699784..a365f67131eceffe2768fe06af778d2e2360ff92 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -2,17 +2,9 @@
 #ifndef _RESCTRL_H
 #define _RESCTRL_H
 
-#include <linux/cpu.h>
 #include <linux/kernel.h>
 #include <linux/list.h>
 #include <linux/pid.h>
-#include <linux/resctrl_types.h>
-
-#ifdef CONFIG_ARCH_HAS_CPU_RESCTRL
-#include <asm/resctrl.h>
-#endif
-
-extern struct mutex rdtgroup_mutex;
 
 /* CLOSID, RMID value used by the default control group */
 #define RESCTRL_RESERVED_CLOSID		0
@@ -32,52 +24,28 @@ int proc_resctrl_show(struct seq_file *m,
 /* max value for struct rdt_domain's mbps_val */
 #define MBA_MAX_MBPS   U32_MAX
 
-/*
- * Resctrl uses u32 to hold the user-space config. The maximum bitmap size is
- * 32.
+/**
+ * enum resctrl_conf_type - The type of configuration.
+ * @CDP_NONE:	No prioritisation, both code and data are controlled or monitored.
+ * @CDP_CODE:	Configuration applies to instruction fetches.
+ * @CDP_DATA:	Configuration applies to reads and writes.
  */
-#define RESCTRL_MAX_CBM			32
+enum resctrl_conf_type {
+	CDP_NONE,
+	CDP_CODE,
+	CDP_DATA,
+};
 
-extern unsigned int resctrl_rmid_realloc_limit;
-extern unsigned int resctrl_rmid_realloc_threshold;
+#define CDP_NUM_TYPES	(CDP_DATA + 1)
 
-/**
- * struct pseudo_lock_region - pseudo-lock region information
- * @s:			Resctrl schema for the resource to which this
- *			pseudo-locked region belongs
- * @closid:		The closid that this pseudo-locked region uses
- * @d:			RDT domain to which this pseudo-locked region
- *			belongs
- * @cbm:		bitmask of the pseudo-locked region
- * @lock_thread_wq:	waitqueue used to wait on the pseudo-locking thread
- *			completion
- * @thread_done:	variable used by waitqueue to test if pseudo-locking
- *			thread completed
- * @cpu:		core associated with the cache on which the setup code
- *			will be run
- * @line_size:		size of the cache lines
- * @size:		size of pseudo-locked region in bytes
- * @kmem:		the kernel memory associated with pseudo-locked region
- * @minor:		minor number of character device associated with this
- *			region
- * @debugfs_dir:	pointer to this region's directory in the debugfs
- *			filesystem
- * @pm_reqs:		Power management QoS requests related to this region
+/*
+ * Event IDs, the values match those used to program IA32_QM_EVTSEL before
+ * reading IA32_QM_CTR on RDT systems.
  */
-struct pseudo_lock_region {
-	struct resctrl_schema	*s;
-	u32			closid;
-	struct rdt_domain	*d;
-	u32			cbm;
-	wait_queue_head_t	lock_thread_wq;
-	int			thread_done;
-	int			cpu;
-	unsigned int		line_size;
-	unsigned int		size;
-	void			*kmem;
-	unsigned int		minor;
-	struct dentry		*debugfs_dir;
-	struct list_head	pm_reqs;
+enum resctrl_event_id {
+	QOS_L3_OCCUP_EVENT_ID		= 0x01,
+	QOS_L3_MBM_TOTAL_EVENT_ID	= 0x02,
+	QOS_L3_MBM_LOCAL_EVENT_ID	= 0x03,
 };
 
 /**
@@ -102,7 +70,6 @@ struct resctrl_staged_config {
  * @cqm_limbo:		worker to periodically read CQM h/w counters
  * @mbm_work_cpu:	worker CPU for MBM h/w counters
  * @cqm_work_cpu:	worker CPU for CQM h/w counters
- * @mbm_cntr_map:	bitmap to track domain counter assignment
  * @plr:		pseudo-locked region (if any) associated with domain
  * @staged_config:	parsed configuration to be applied
  * @mbps_val:		When mba_sc is enabled, this holds the array of user
@@ -120,7 +87,6 @@ struct rdt_domain {
 	struct delayed_work		cqm_limbo;
 	int				mbm_work_cpu;
 	int				cqm_work_cpu;
-	unsigned long			*mbm_cntr_map;
 	struct pseudo_lock_region	*plr;
 	struct resctrl_staged_config	staged_config[CDP_NUM_TYPES];
 	u32				*mbps_val;
@@ -137,8 +103,6 @@ struct rdt_domain {
  * @arch_has_sparse_bitmasks:	True if a bitmask like f00f is valid.
  * @arch_has_per_cpu_cfg:	True if QOS_CFG register for this cache
  *				level has CPU scope.
- * @io_alloc_capable:	True if portion of the cache can be configured
- *			for I/O traffic.
  */
 struct resctrl_cache {
 	unsigned int	cbm_len;
@@ -146,7 +110,6 @@ struct resctrl_cache {
 	unsigned int	shareable_bits;
 	bool		arch_has_sparse_bitmasks;
 	bool		arch_has_per_cpu_cfg;
-	bool		io_alloc_capable;
 };
 
 /**
@@ -172,7 +135,6 @@ enum membw_throttle_mode {
  * @throttle_mode:	Bandwidth throttling mode when threads request
  *			different memory bandwidths
  * @mba_sc:		True if MBA software controller(mba_sc) is enabled
- * @hwdrc_mb:		True if memory bandwidth HWDRC is enabled
  * @mb_map:		Mapping of memory B/W percentage to memory B/W delay
  */
 struct resctrl_membw {
@@ -182,29 +144,18 @@ struct resctrl_membw {
 	bool				arch_needs_linear;
 	enum membw_throttle_mode	throttle_mode;
 	bool				mba_sc;
-	bool				hwdrc_mb;
 	u32				*mb_map;
 };
 
-/**
- * struct resctrl_mon - Monitoring related data
- * @num_rmid:		Number of RMIDs available
- * @num_mbm_cntrs:	Number of monitoring counters
- * @mbm_cntr_assignable:Is system capable of supporting monitor assignment?
- * @evt_list:		List of monitoring events
- */
-struct resctrl_mon {
-	int			num_rmid;
-	int			num_mbm_cntrs;
-	bool			mbm_cntr_assignable;
-	struct list_head	evt_list;
-};
+struct rdt_parse_data;
+struct resctrl_schema;
 
 /**
  * struct rdt_resource - attributes of a resctrl resource
  * @rid:		The index of the resource
  * @alloc_capable:	Is allocation available on this machine
  * @mon_capable:	Is monitor feature available on this machine
+ * @num_rmid:		Number of RMIDs available
  * @cache_level:	Which cache level defines scope of this resource
  * @cache:		Cache allocation related data
  * @membw:		If the component has bandwidth controls, their properties.
@@ -213,37 +164,32 @@ struct resctrl_mon {
  * @data_width:		Character width of data when displaying
  * @default_ctrl:	Specifies default cache cbm or memory B/W percent.
  * @format_str:		Per resource format string to show domain value
+ * @parse_ctrlval:	Per resource function pointer to parse control values
+ * @evt_list:		List of monitoring events
  * @fflags:		flags to choose base and info files
- * @mbm_cfg_mask:	Bandwidth sources that can be tracked when Bandwidth
- *			Monitoring Event Configuration (BMEC) is supported.
  * @cdp_capable:	Is the CDP feature available on this resource
  */
 struct rdt_resource {
 	int			rid;
 	bool			alloc_capable;
 	bool			mon_capable;
+	int			num_rmid;
 	int			cache_level;
 	struct resctrl_cache	cache;
 	struct resctrl_membw	membw;
-	struct resctrl_mon mon;
 	struct list_head	domains;
 	char			*name;
 	int			data_width;
 	u32			default_ctrl;
 	const char		*format_str;
+	int			(*parse_ctrlval)(struct rdt_parse_data *data,
+						 struct resctrl_schema *s,
+						 struct rdt_domain *d);
+	struct list_head	evt_list;
 	unsigned long		fflags;
-	unsigned int		mbm_cfg_mask;
 	bool			cdp_capable;
-	struct rdt_domain	*rdt_domain_list[NR_CPUS];
 };
 
-/*
- * Get the resource that exists at this level. If the level is not supported
- * a dummy/not-capable resource can be returned. Levels >= RDT_NUM_RESOURCES
- * will return NULL.
- */
-struct rdt_resource *resctrl_arch_get_resource(enum resctrl_res_level l);
-
 /**
  * struct resctrl_schema - configuration abilities of a resource presented to
  *			   user-space
@@ -264,95 +210,10 @@ struct resctrl_schema {
 	u32				num_closid;
 };
 
-struct resctrl_cpu_sync {
-	u32 closid;
-	u32 rmid;
-};
-
-struct resctrl_mon_config_info {
-	struct rdt_resource *r;
-	struct rdt_domain   *d;
-	u32                  evtid;
-	u32                  mon_config;
-	int                  err;
-};
-
-/*
- * Assignment flags for ABMC feature
- */
-#define ASSIGN_NONE			0
-#define ASSIGN_TOTAL			BIT(QOS_L3_MBM_TOTAL_EVENT_ID)
-#define ASSIGN_LOCAL			BIT(QOS_L3_MBM_LOCAL_EVENT_ID)
-
-/**
- * mon_event_config_index_get - get the hardware index for the
- *                              configurable event
- * @evtid: event id.
- *
- * Return: 0 for evtid == QOS_L3_MBM_TOTAL_EVENT_ID
- *         1 for evtid == QOS_L3_MBM_LOCAL_EVENT_ID
- *         INVALID_CONFIG_INDEX for invalid evtid
- */
-static inline unsigned int mon_event_config_index_get(u32 evtid)
-{
-	switch (evtid) {
-	case QOS_L3_MBM_TOTAL_EVENT_ID:
-		return 0;
-	case QOS_L3_MBM_LOCAL_EVENT_ID:
-		return 1;
-	default:
-		/* Should never reach here */
-		return INVALID_CONFIG_INDEX;
-	}
-}
-
-/*
- * Update and re-load this CPUs defaults. Called via IPI, takes a pointer to
- * struct resctrl_cpu_sync, or NULL.
- */
-void resctrl_arch_sync_cpu_defaults(void *info);
-
 /* The number of closid supported by this resource regardless of CDP */
 u32 resctrl_arch_get_num_closid(struct rdt_resource *r);
-
-struct rdt_domain *resctrl_arch_find_domain(struct rdt_resource *r, int id);
 int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid);
 
-bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt);
-
-/* For use by arch code to remap resctrl's smaller CDP CLOSID range */
-static inline u32 resctrl_get_config_index(u32 closid,
-					   enum resctrl_conf_type type)
-{
-	switch (type) {
-	default:
-	case CDP_NONE:
-		return closid;
-	case CDP_CODE:
-			return (closid * 2) + 1;
-	case CDP_DATA:
-			return (closid * 2);
-	}
-}
-
-/*
- * Caller must hold the cpuhp read lock to prevent the struct rdt_domain being
- * freed.
- */
-static inline struct rdt_domain *
-resctrl_get_domain_from_cpu(int cpu, struct rdt_resource *r)
-{
-	struct rdt_domain *d;
-
-	list_for_each_entry_rcu(d, &r->domains, list) {
-		/* Find the domain that contains this CPU */
-		if (cpumask_test_cpu(cpu, &d->cpu_mask))
-			return d;
-	}
-
-	return NULL;
-}
-
 /*
  * Update the ctrl_val and apply this config right now.
  * Must be called on one of the domain's CPUs.
@@ -440,32 +301,7 @@ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d,
  */
 void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_domain *d);
 
-/**
- * resctrl_arch_io_alloc_enable() - Enable/disable io_alloc feature.
- * @r:		The resctrl resource.
- * @enable:	Enable (true) or disable (false) io_alloc on resource @r.
- *
- * This can be called from any CPU.
- *
- * Return:
- * 0 on success, <0 on error.
- */
-int resctrl_arch_io_alloc_enable(struct rdt_resource *r, bool enable);
-
-/**
- * resctrl_arch_get_io_alloc_enabled() - Get io_alloc feature state.
- * @r:		The resctrl resource.
- *
- * Return:
- * true if io_alloc is enabled or false if disabled.
- */
-bool resctrl_arch_get_io_alloc_enabled(struct rdt_resource *r);
-
 extern unsigned int resctrl_rmid_realloc_threshold;
 extern unsigned int resctrl_rmid_realloc_limit;
-void resctrl_file_fflags_init(const char *config, unsigned long fflags);
-
-int resctrl_init(void);
-void resctrl_exit(void);
 
 #endif /* _RESCTRL_H */
diff --git a/include/linux/resctrl_types.h b/include/linux/resctrl_types.h
deleted file mode 100644
index 7c6dad3df4ff9baa0e3f03cc38f3803bc016ee04..0000000000000000000000000000000000000000
--- a/include/linux/resctrl_types.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2024 Arm Ltd.
- * Based on arch/x86/kernel/cpu/resctrl/internal.h
- */
-
-#ifndef __LINUX_RESCTRL_TYPES_H
-#define __LINUX_RESCTRL_TYPES_H
-
-#define INVALID_CONFIG_VALUE	U32_MAX
-#define INVALID_CONFIG_INDEX	UINT_MAX
-
-#define CQM_LIMBOCHECK_INTERVAL	1000
-
-#define MBM_CNTR_WIDTH_BASE		24
-#define MBM_OVERFLOW_INTERVAL		1000
-#define MAX_MBA_BW			100u
-#define MBA_IS_LINEAR			0x4
-
-/* rdtgroup.flags */
-#define	RDT_DELETED		1
-
-/* rftype.flags */
-#define RFTYPE_FLAGS_CPUS_LIST	1
-
-/*
- * Define the file type flags for base and info directories.
- */
-#define RFTYPE_INFO			BIT(0)
-#define RFTYPE_BASE			BIT(1)
-#define RFTYPE_CTRL			BIT(4)
-#define RFTYPE_MON			BIT(5)
-#define RFTYPE_TOP			BIT(6)
-#define RFTYPE_RES_CACHE		BIT(8)
-#define RFTYPE_RES_MB			BIT(9)
-#define RFTYPE_DEBUG			BIT(10)
-#define RFTYPE_CTRL_INFO		(RFTYPE_INFO | RFTYPE_CTRL)
-#define RFTYPE_MON_INFO			(RFTYPE_INFO | RFTYPE_MON)
-#define RFTYPE_TOP_INFO			(RFTYPE_INFO | RFTYPE_TOP)
-#define RFTYPE_CTRL_BASE		(RFTYPE_BASE | RFTYPE_CTRL)
-#define RFTYPE_MON_BASE			(RFTYPE_BASE | RFTYPE_MON)
-
-/* Max event bits supported */
-#define MAX_EVT_CONFIG_BITS		GENMASK(6, 0)
-
-/**
- * enum resctrl_conf_type - The type of configuration.
- * @CDP_NONE:	No prioritisation, both code and data are controlled or monitored.
- * @CDP_CODE:	Configuration applies to instruction fetches.
- * @CDP_DATA:	Configuration applies to reads and writes.
- */
-enum resctrl_conf_type {
-	CDP_NONE,
-	CDP_CODE,
-	CDP_DATA,
-};
-
-enum resctrl_res_level {
-	RDT_RESOURCE_L3,
-	RDT_RESOURCE_L2,
-	RDT_RESOURCE_MBA,
-	RDT_RESOURCE_SMBA,
-
-	/* Must be the last */
-	RDT_NUM_RESOURCES,
-};
-
-#define CDP_NUM_TYPES	(CDP_DATA + 1)
-
-/*
- * Event IDs, the values match those used to program IA32_QM_EVTSEL before
- * reading IA32_QM_CTR on RDT systems.
- */
-enum resctrl_event_id {
-	QOS_L3_OCCUP_EVENT_ID		= 0x01,
-	QOS_L3_MBM_TOTAL_EVENT_ID	= 0x02,
-	QOS_L3_MBM_LOCAL_EVENT_ID	= 0x03,
-	QOS_MC_MBM_BPS_EVENT_ID		= 0x04,
-};
-
-#define RESCTRL_MAX_EVENT_NUM		4
-
-#endif /* __LINUX_RESCTRL_TYPES_H */