From 5928d841409ab31ac7aa7eca9deb515f1509677e Mon Sep 17 00:00:00 2001 From: leoliu-oc Date: Mon, 9 Mar 2026 14:32:58 +0800 Subject: [PATCH 1/6] PCI: pciehp: Detect device replacement during system sleep mainline inclusion from v6.11-rc1 commit 9d573d19547b3fae0c1d4e5fce52bdad3fda3664 upstream. category: feature ------------------- Ricky reports that replacing a device in a hotplug slot during ACPI sleep state S3 does not cause re-enumeration on resume, as one would expect. Instead, the new device is treated as if it was the old one. There is no bulletproof way to detect device replacement, but as a heuristic, check whether the device identity in config space matches cached data in struct pci_dev (Vendor ID, Device ID, Class Code, Revision ID, Subsystem Vendor ID, Subsystem ID). Additionally, cache and compare the Device Serial Number (PCIe r6.2 sec 7.9.3). If a mismatch is detected, mark the old device disconnected (to prevent its driver from accessing the new device) and synthesize a Presence Detect Changed event. The device identity in config space which is compared here is the same as the one included in the signed Subject Alternative Name per PCIe r6.1 sec 6.31.3. Thus, the present commit prevents attacks where a valid device is replaced with a malicious device during system sleep and the valid device's driver obliviously accesses the malicious device. This is about as much as can be done at the PCI layer. Drivers may have additional ways to identify devices (such as reading a WWID from some register) and may trigger re-enumeration when detecting an identity change on resume. Link: https://lore.kernel.org/r/a1afaa12f341d146ecbea27c1743661c71683833.1716992815.git.lukas@wunner.de Reported-by: Ricky Wu Closes: https://lore.kernel.org/r/a608b5930d0a48f092f717c0e137454b@realtek.com Tested-by: Ricky Wu Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Signed-off-by: leoliu-oc --- drivers/pci/hotplug/pciehp.h | 4 +++ drivers/pci/hotplug/pciehp_core.c | 42 ++++++++++++++++++++++++++++++- drivers/pci/hotplug/pciehp_hpc.c | 5 ++++ drivers/pci/hotplug/pciehp_pci.c | 4 +++ 4 files changed, 54 insertions(+), 1 deletion(-) diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h index e0a614acee05..273dd8c66f4e 100644 --- a/drivers/pci/hotplug/pciehp.h +++ b/drivers/pci/hotplug/pciehp.h @@ -46,6 +46,9 @@ extern int pciehp_poll_time; /** * struct controller - PCIe hotplug controller * @pcie: pointer to the controller's PCIe port service device + * @dsn: cached copy of Device Serial Number of Function 0 in the hotplug slot + * (PCIe r6.2 sec 7.9.3); used to determine whether a hotplugged device + * was replaced with a different one during system sleep * @slot_cap: cached copy of the Slot Capabilities register * @inband_presence_disabled: In-Band Presence Detect Disable supported by * controller and disabled per spec recommendation (PCIe r5.0, appendix I @@ -87,6 +90,7 @@ extern int pciehp_poll_time; */ struct controller { struct pcie_device *pcie; + u64 dsn; u32 slot_cap; /* capabilities and quirks */ unsigned int inband_presence_disabled:1; diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c index 4042d87d539d..c69e7e8d5618 100644 --- a/drivers/pci/hotplug/pciehp_core.c +++ b/drivers/pci/hotplug/pciehp_core.c @@ -283,6 +283,32 @@ static int pciehp_suspend(struct pcie_device *dev) return 0; } +static bool pciehp_device_replaced(struct controller *ctrl) +{ + struct pci_dev *pdev __free(pci_dev_put); + u32 reg; + + pdev = pci_get_slot(ctrl->pcie->port->subordinate, PCI_DEVFN(0, 0)); + if (!pdev) + return true; + + if (pci_read_config_dword(pdev, PCI_VENDOR_ID, ®) || + reg != (pdev->vendor | (pdev->device << 16)) || + pci_read_config_dword(pdev, PCI_CLASS_REVISION, ®) || + reg != (pdev->revision | (pdev->class << 8))) + return true; + + if (pdev->hdr_type == PCI_HEADER_TYPE_NORMAL && + (pci_read_config_dword(pdev, PCI_SUBSYSTEM_VENDOR_ID, ®) || + reg != (pdev->subsystem_vendor | (pdev->subsystem_device << 16)))) + return true; + + if (pci_get_dsn(pdev) != ctrl->dsn) + return true; + + return false; +} + static int pciehp_resume_noirq(struct pcie_device *dev) { struct controller *ctrl = get_service_data(dev); @@ -292,9 +318,23 @@ static int pciehp_resume_noirq(struct pcie_device *dev) ctrl->cmd_busy = true; /* clear spurious events from rediscovery of inserted card */ - if (ctrl->state == ON_STATE || ctrl->state == BLINKINGOFF_STATE) + if (ctrl->state == ON_STATE || ctrl->state == BLINKINGOFF_STATE) { pcie_clear_hotplug_events(ctrl); + /* + * If hotplugged device was replaced with a different one + * during system sleep, mark the old device disconnected + * (to prevent its driver from accessing the new device) + * and synthesize a Presence Detect Changed event. + */ + if (pciehp_device_replaced(ctrl)) { + ctrl_dbg(ctrl, "device replaced during system sleep\n"); + pci_walk_bus(ctrl->pcie->port->subordinate, + pci_dev_set_disconnected, NULL); + pciehp_request(ctrl, PCI_EXP_SLTSTA_PDC); + } + } + return 0; } #endif diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index b0bccc4d0da2..c5b591830406 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -1056,6 +1056,11 @@ struct controller *pcie_init(struct pcie_device *dev) } } + pdev = pci_get_slot(subordinate, PCI_DEVFN(0, 0)); + if (pdev) + ctrl->dsn = pci_get_dsn(pdev); + pci_dev_put(pdev); + return ctrl; } diff --git a/drivers/pci/hotplug/pciehp_pci.c b/drivers/pci/hotplug/pciehp_pci.c index ad12515a4a12..65e50bee1a8c 100644 --- a/drivers/pci/hotplug/pciehp_pci.c +++ b/drivers/pci/hotplug/pciehp_pci.c @@ -72,6 +72,10 @@ int pciehp_configure_device(struct controller *ctrl) pci_bus_add_devices(parent); down_read_nested(&ctrl->reset_lock, ctrl->depth); + dev = pci_get_slot(parent, PCI_DEVFN(0, 0)); + ctrl->dsn = pci_get_dsn(dev); + pci_dev_put(dev); + out: pci_unlock_rescan_remove(); return ret; -- Gitee From 68efe212bb3b6d0ab6bd1f69e8ea0c14e0f55c3c Mon Sep 17 00:00:00 2001 From: leoliu-oc Date: Mon, 9 Mar 2026 14:32:59 +0800 Subject: [PATCH 2/6] PCI: Make pci_destroy_dev() concurrent safe mainline inclusion from v6.13-rc1 commit e3f30d563a388220a7c4e3b9a7b52ac0b0324b26 upstream. category: feature ------------------- Use an atomic flag instead of the racy check against the device's kobj parent. We shouldn't be poking into device implementation details at this level anyway. Link: https://lore.kernel.org/r/20241022224851.340648-3-kbusch@meta.com Signed-off-by: Keith Busch Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron Signed-off-by: leoliu-oc --- drivers/pci/pci.h | 6 ++++++ drivers/pci/remove.c | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index cac029e7140e..fbe22537f756 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -382,6 +382,7 @@ static inline int pci_dev_set_disconnected(struct pci_dev *dev, void *unused) #define PCI_DEV_ADDED 0 #define PCI_DPC_RECOVERED 1 #define PCI_DPC_RECOVERING 2 +#define PCI_DEV_REMOVED 3 static inline void pci_dev_assign_added(struct pci_dev *dev, bool added) { @@ -393,6 +394,11 @@ static inline bool pci_dev_is_added(const struct pci_dev *dev) return test_bit(PCI_DEV_ADDED, &dev->priv_flags); } +static inline bool pci_dev_test_and_set_removed(struct pci_dev *dev) +{ + return test_and_set_bit(PCI_DEV_REMOVED, &dev->priv_flags); +} + #ifdef CONFIG_PCIEAER #include diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c index d749ea8250d6..2b866742516a 100644 --- a/drivers/pci/remove.c +++ b/drivers/pci/remove.c @@ -30,7 +30,7 @@ static void pci_stop_dev(struct pci_dev *dev) static void pci_destroy_dev(struct pci_dev *dev) { - if (!dev->dev.kobj.parent) + if (pci_dev_test_and_set_removed(dev)) return; device_del(&dev->dev); -- Gitee From 82a7b1175992d67c799e95780a6bfa55a1c2c8aa Mon Sep 17 00:00:00 2001 From: leoliu-oc Date: Mon, 9 Mar 2026 14:33:00 +0800 Subject: [PATCH 3/6] PCI: pciehp: Ignore Presence Detect Changed caused by DPC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mainline inclusion from v6.16-rc1 commit c3be50f7547ccb533284b22f74baf37d3379843e upstream. category: feature ------------------- Commit a97396c6eb13 ("PCI: pciehp: Ignore Link Down/Up caused by DPC") amended PCIe hotplug to not bring down the slot upon Data Link Layer State Changed events caused by Downstream Port Containment. However Keith reports off-list that if the slot uses in-band presence detect (i.e. Presence Detect State is derived from Data Link Layer Link Active), DPC also causes a spurious Presence Detect Changed event. This needs to be ignored as well. Unfortunately there's no register indicating that in-band presence detect is used. PCIe r5.0 sec 7.5.3.10 introduced the In-Band PD Disable bit in the Slot Control Register. The PCIe hotplug driver sets this bit on ports supporting it. But older ports may still use in-band presence detect. If in-band presence detect can be disabled, Presence Detect Changed events occurring during DPC must not be ignored because they signal device replacement. On all other ports, device replacement cannot be detected reliably because the Presence Detect Changed event could be a side effect of DPC. On those (older) ports, perform a best-effort device replacement check by comparing the Vendor ID, Device ID and other data in Config Space with the values cached in struct pci_dev. Use the existing helper pciehp_device_replaced() to accomplish this. It is currently #ifdef'ed to CONFIG_PM_SLEEP in pciehp_core.c, so move it to pciehp_hpc.c where most other functions accessing config space reside. Reported-by: Keith Busch Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Reviewed-by: Kuppuswamy Sathyanarayanan Reviewed-by: Ilpo Järvinen Link: https://patch.msgid.link/fa264ff71952915c4e35a53c89eb0cde8455a5c5.1744298239.git.lukas@wunner.de Signed-off-by: leoliu-oc --- drivers/pci/hotplug/pciehp.h | 1 + drivers/pci/hotplug/pciehp_core.c | 26 ---------------- drivers/pci/hotplug/pciehp_hpc.c | 49 ++++++++++++++++++++++++++----- 3 files changed, 43 insertions(+), 33 deletions(-) diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h index 273dd8c66f4e..debc79b0adfb 100644 --- a/drivers/pci/hotplug/pciehp.h +++ b/drivers/pci/hotplug/pciehp.h @@ -187,6 +187,7 @@ int pciehp_card_present(struct controller *ctrl); int pciehp_card_present_or_link_active(struct controller *ctrl); int pciehp_check_link_status(struct controller *ctrl); int pciehp_check_link_active(struct controller *ctrl); +bool pciehp_device_replaced(struct controller *ctrl); void pciehp_release_ctrl(struct controller *ctrl); int pciehp_sysfs_enable_slot(struct hotplug_slot *hotplug_slot); diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c index c69e7e8d5618..575403ef06db 100644 --- a/drivers/pci/hotplug/pciehp_core.c +++ b/drivers/pci/hotplug/pciehp_core.c @@ -283,32 +283,6 @@ static int pciehp_suspend(struct pcie_device *dev) return 0; } -static bool pciehp_device_replaced(struct controller *ctrl) -{ - struct pci_dev *pdev __free(pci_dev_put); - u32 reg; - - pdev = pci_get_slot(ctrl->pcie->port->subordinate, PCI_DEVFN(0, 0)); - if (!pdev) - return true; - - if (pci_read_config_dword(pdev, PCI_VENDOR_ID, ®) || - reg != (pdev->vendor | (pdev->device << 16)) || - pci_read_config_dword(pdev, PCI_CLASS_REVISION, ®) || - reg != (pdev->revision | (pdev->class << 8))) - return true; - - if (pdev->hdr_type == PCI_HEADER_TYPE_NORMAL && - (pci_read_config_dword(pdev, PCI_SUBSYSTEM_VENDOR_ID, ®) || - reg != (pdev->subsystem_vendor | (pdev->subsystem_device << 16)))) - return true; - - if (pci_get_dsn(pdev) != ctrl->dsn) - return true; - - return false; -} - static int pciehp_resume_noirq(struct pcie_device *dev) { struct controller *ctrl = get_service_data(dev); diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index c5b591830406..04ba0fac031b 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -559,18 +559,48 @@ void pciehp_power_off_slot(struct controller *ctrl) PCI_EXP_SLTCTL_PWR_OFF); } +bool pciehp_device_replaced(struct controller *ctrl) +{ + struct pci_dev *pdev __free(pci_dev_put) = NULL; + u32 reg; + + if (pci_dev_is_disconnected(ctrl->pcie->port)) + return false; + + pdev = pci_get_slot(ctrl->pcie->port->subordinate, PCI_DEVFN(0, 0)); + if (!pdev) + return true; + + if (pci_read_config_dword(pdev, PCI_VENDOR_ID, ®) || + reg != (pdev->vendor | (pdev->device << 16)) || + pci_read_config_dword(pdev, PCI_CLASS_REVISION, ®) || + reg != (pdev->revision | (pdev->class << 8))) + return true; + + if (pdev->hdr_type == PCI_HEADER_TYPE_NORMAL && + (pci_read_config_dword(pdev, PCI_SUBSYSTEM_VENDOR_ID, ®) || + reg != (pdev->subsystem_vendor | (pdev->subsystem_device << 16)))) + return true; + + if (pci_get_dsn(pdev) != ctrl->dsn) + return true; + + return false; +} + static void pciehp_ignore_dpc_link_change(struct controller *ctrl, - struct pci_dev *pdev, int irq) + struct pci_dev *pdev, int irq, + u16 ignored_events) { /* * Ignore link changes which occurred while waiting for DPC recovery. * Could be several if DPC triggered multiple times consecutively. */ synchronize_hardirq(irq); - atomic_and(~PCI_EXP_SLTSTA_DLLSC, &ctrl->pending_events); + atomic_and(~ignored_events, &ctrl->pending_events); if (pciehp_poll_mode) pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, - PCI_EXP_SLTSTA_DLLSC); + ignored_events); ctrl_info(ctrl, "Slot(%s): Link Down/Up ignored (recovered by DPC)\n", slot_name(ctrl)); @@ -580,8 +610,8 @@ static void pciehp_ignore_dpc_link_change(struct controller *ctrl, * Synthesize it to ensure that it is acted on. */ down_read_nested(&ctrl->reset_lock, ctrl->depth); - if (!pciehp_check_link_active(ctrl)) - pciehp_request(ctrl, PCI_EXP_SLTSTA_DLLSC); + if (!pciehp_check_link_active(ctrl) || pciehp_device_replaced(ctrl)) + pciehp_request(ctrl, ignored_events); up_read(&ctrl->reset_lock); } @@ -732,8 +762,13 @@ static irqreturn_t pciehp_ist(int irq, void *dev_id) */ if ((events & PCI_EXP_SLTSTA_DLLSC) && pci_dpc_recovered(pdev) && ctrl->state == ON_STATE) { - events &= ~PCI_EXP_SLTSTA_DLLSC; - pciehp_ignore_dpc_link_change(ctrl, pdev, irq); + u16 ignored_events = PCI_EXP_SLTSTA_DLLSC; + + if (!ctrl->inband_presence_disabled) + ignored_events |= events & PCI_EXP_SLTSTA_PDC; + + events &= ~ignored_events; + pciehp_ignore_dpc_link_change(ctrl, pdev, irq, ignored_events); } /* -- Gitee From 688b986475b15d0c237b99781058a3bb19a5074f Mon Sep 17 00:00:00 2001 From: leoliu-oc Date: Mon, 9 Mar 2026 14:33:00 +0800 Subject: [PATCH 4/6] PCI: pciehp: Ignore Link Down/Up caused by Secondary Bus Reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mainline inclusion from v6.16-rc1 commit 2af781a9edc4ef5f6684c0710cc3542d9be48b31 upstream. category: feature ------------------- When a Secondary Bus Reset is issued at a hotplug port, it causes a Data Link Layer State Changed event as a side effect. On hotplug ports using in-band presence detect, it additionally causes a Presence Detect Changed event. These spurious events should not result in teardown and re-enumeration of the device in the slot. Hence commit 2e35afaefe64 ("PCI: pciehp: Add reset_slot() method") masked the Presence Detect Changed Enable bit in the Slot Control register during a Secondary Bus Reset. Commit 06a8d89af551 ("PCI: pciehp: Disable link notification across slot reset") additionally masked the Data Link Layer State Changed Enable bit. However masking those bits only disables interrupt generation (PCIe r6.2 sec 6.7.3.1). The events are still visible in the Slot Status register and picked up by the IRQ handler if it runs during a Secondary Bus Reset. This can happen if the interrupt is shared or if an unmasked hotplug event occurs, e.g. Attention Button Pressed or Power Fault Detected. The likelihood of this happening used to be small, so it wasn't much of a problem in practice. That has changed with the recent introduction of bandwidth control in v6.13-rc1 with commit 665745f27487 ("PCI/bwctrl: Re-add BW notification portdrv as PCIe BW controller"): Bandwidth control shares the interrupt with PCIe hotplug. A Secondary Bus Reset causes a Link Bandwidth Notification, so the hotplug IRQ handler runs, picks up the masked events and tears down the device in the slot. As a result, Joel reports VFIO passthrough failure of a GPU, which Ilpo root-caused to the incorrect handling of masked hotplug events. Clearly, a more reliable way is needed to ignore spurious hotplug events. For Downstream Port Containment, a new ignore mechanism was introduced by commit a97396c6eb13 ("PCI: pciehp: Ignore Link Down/Up caused by DPC"). It has been working reliably for the past four years. Adapt it for Secondary Bus Resets. Introduce two helpers to annotate code sections which cause spurious link changes: pci_hp_ignore_link_change() and pci_hp_unignore_link_change() Use those helpers in lieu of masking interrupts in the Slot Control register. Introduce a helper to check whether such a code section is executing concurrently and if so, await it: pci_hp_spurious_link_change() Invoke the helper in the hotplug IRQ thread pciehp_ist(). Re-use the IRQ thread's existing code which ignores DPC-induced link changes unless the link is unexpectedly down after reset recovery or the device was replaced during the bus reset. That code block in pciehp_ist() was previously only executed if a Data Link Layer State Changed event has occurred. Additionally execute it for Presence Detect Changed events. That's necessary for compatibility with PCIe r1.0 hotplug ports because Data Link Layer State Changed didn't exist before PCIe r1.1. DPC was added with PCIe r3.1 and thus DPC-capable hotplug ports always support Data Link Layer State Changed events. But the same cannot be assumed for Secondary Bus Reset, which already existed in PCIe r1.0. Secondary Bus Reset is only one of many causes of spurious link changes. Others include runtime suspend to D3cold, firmware updates or FPGA reconfiguration. The new pci_hp_{,un}ignore_link_change() helpers may be used by all kinds of drivers to annotate such code sections, hence their declarations are publicly visible in . A case in point is the Mellanox Ethernet driver which disables a firmware reset feature if the Ethernet card is attached to a hotplug port, see commit 3d7a3f2612d7 ("net/mlx5: Nack sync reset request when HotPlug is enabled"). Going forward, PCIe hotplug will be able to cope gracefully with all such use cases once the code sections are properly annotated. The new helpers internally use two bits in struct pci_dev's priv_flags as well as a wait_queue. This mirrors what was done for DPC by commit a97396c6eb13 ("PCI: pciehp: Ignore Link Down/Up caused by DPC"). That may be insufficient if spurious link changes are caused by multiple sources simultaneously. An example might be a Secondary Bus Reset issued by AER during FPGA reconfiguration. If this turns out to happen in real life, support for it can easily be added by replacing the PCI_LINK_CHANGING flag with an atomic_t counter incremented by pci_hp_ignore_link_change() and decremented by pci_hp_unignore_link_change(). Instead of awaiting a zero PCI_LINK_CHANGING flag, the pci_hp_spurious_link_change() helper would then simply await a zero counter. Fixes: 665745f27487 ("PCI/bwctrl: Re-add BW notification portdrv as PCIe BW controller") Reported-by: Joel Mathew Thomas Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219765 Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Tested-by: Joel Mathew Thomas Reviewed-by: Kuppuswamy Sathyanarayanan Reviewed-by: Ilpo Järvinen Link: https://patch.msgid.link/d04deaf49d634a2edf42bf3c06ed81b4ca54d17b.1744298239.git.lukas@wunner.de Signed-off-by: leoliu-oc --- drivers/pci/hotplug/pci_hotplug_core.c | 69 ++++++++++++++++++++++++++ drivers/pci/hotplug/pciehp_hpc.c | 35 +++++-------- drivers/pci/pci.h | 3 ++ include/linux/pci.h | 8 +++ 4 files changed, 92 insertions(+), 23 deletions(-) diff --git a/drivers/pci/hotplug/pci_hotplug_core.c b/drivers/pci/hotplug/pci_hotplug_core.c index 058d5937d8a9..5606ceaffba4 100644 --- a/drivers/pci/hotplug/pci_hotplug_core.c +++ b/drivers/pci/hotplug/pci_hotplug_core.c @@ -558,6 +558,75 @@ void pci_hp_destroy(struct hotplug_slot *slot) } EXPORT_SYMBOL_GPL(pci_hp_destroy); +static DECLARE_WAIT_QUEUE_HEAD(pci_hp_link_change_wq); + +/** + * pci_hp_ignore_link_change - begin code section causing spurious link changes + * @pdev: PCI hotplug bridge + * + * Mark the beginning of a code section causing spurious link changes on the + * Secondary Bus of @pdev, e.g. as a side effect of a Secondary Bus Reset, + * D3cold transition, firmware update or FPGA reconfiguration. + * + * Hotplug drivers can thus check whether such a code section is executing + * concurrently, await it with pci_hp_spurious_link_change() and ignore the + * resulting link change events. + * + * Must be paired with pci_hp_unignore_link_change(). May be called both + * from the PCI core and from Endpoint drivers. May be called for bridges + * which are not hotplug-capable, in which case it has no effect because + * no hotplug driver is bound to the bridge. + */ +void pci_hp_ignore_link_change(struct pci_dev *pdev) +{ + set_bit(PCI_LINK_CHANGING, &pdev->priv_flags); + smp_mb__after_atomic(); /* pairs with implied barrier of wait_event() */ +} + +/** + * pci_hp_unignore_link_change - end code section causing spurious link changes + * @pdev: PCI hotplug bridge + * + * Mark the end of a code section causing spurious link changes on the + * Secondary Bus of @pdev. Must be paired with pci_hp_ignore_link_change(). + */ +void pci_hp_unignore_link_change(struct pci_dev *pdev) +{ + set_bit(PCI_LINK_CHANGED, &pdev->priv_flags); + mb(); /* ensure pci_hp_spurious_link_change() sees either bit set */ + clear_bit(PCI_LINK_CHANGING, &pdev->priv_flags); + wake_up_all(&pci_hp_link_change_wq); +} + +/** + * pci_hp_spurious_link_change - check for spurious link changes + * @pdev: PCI hotplug bridge + * + * Check whether a code section is executing concurrently which is causing + * spurious link changes on the Secondary Bus of @pdev. Await the end of the + * code section if so. + * + * May be called by hotplug drivers to check whether a link change is spurious + * and can be ignored. + * + * Because a genuine link change may have occurred in-between a spurious link + * change and the invocation of this function, hotplug drivers should perform + * sanity checks such as retrieving the current link state and bringing down + * the slot if the link is down. + * + * Return: %true if such a code section has been executing concurrently, + * otherwise %false. Also return %true if such a code section has not been + * executing concurrently, but at least once since the last invocation of this + * function. + */ +bool pci_hp_spurious_link_change(struct pci_dev *pdev) +{ + wait_event(pci_hp_link_change_wq, + !test_bit(PCI_LINK_CHANGING, &pdev->priv_flags)); + + return test_and_clear_bit(PCI_LINK_CHANGED, &pdev->priv_flags); +} + static int __init pci_hotplug_init(void) { int result; diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 04ba0fac031b..d1a42e888b02 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -588,21 +588,21 @@ bool pciehp_device_replaced(struct controller *ctrl) return false; } -static void pciehp_ignore_dpc_link_change(struct controller *ctrl, - struct pci_dev *pdev, int irq, - u16 ignored_events) +static void pciehp_ignore_link_change(struct controller *ctrl, + struct pci_dev *pdev, int irq, + u16 ignored_events) { /* * Ignore link changes which occurred while waiting for DPC recovery. * Could be several if DPC triggered multiple times consecutively. + * Also ignore link changes caused by Secondary Bus Reset, etc. */ synchronize_hardirq(irq); atomic_and(~ignored_events, &ctrl->pending_events); if (pciehp_poll_mode) pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, ignored_events); - ctrl_info(ctrl, "Slot(%s): Link Down/Up ignored (recovered by DPC)\n", - slot_name(ctrl)); + ctrl_info(ctrl, "Slot(%s): Link Down/Up ignored\n", slot_name(ctrl)); /* * If the link is unexpectedly down after successful recovery, @@ -758,9 +758,11 @@ static irqreturn_t pciehp_ist(int irq, void *dev_id) /* * Ignore Link Down/Up events caused by Downstream Port Containment - * if recovery from the error succeeded. + * if recovery succeeded, or caused by Secondary Bus Reset, + * suspend to D3cold, firmware update, FPGA reconfiguration, etc. */ - if ((events & PCI_EXP_SLTSTA_DLLSC) && pci_dpc_recovered(pdev) && + if ((events & (PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_DLLSC)) && + (pci_dpc_recovered(pdev) || pci_hp_spurious_link_change(pdev)) && ctrl->state == ON_STATE) { u16 ignored_events = PCI_EXP_SLTSTA_DLLSC; @@ -768,7 +770,7 @@ static irqreturn_t pciehp_ist(int irq, void *dev_id) ignored_events |= events & PCI_EXP_SLTSTA_PDC; events &= ~ignored_events; - pciehp_ignore_dpc_link_change(ctrl, pdev, irq, ignored_events); + pciehp_ignore_link_change(ctrl, pdev, irq, ignored_events); } /* @@ -933,7 +935,6 @@ int pciehp_reset_slot(struct hotplug_slot *hotplug_slot, bool probe) { struct controller *ctrl = to_ctrl(hotplug_slot); struct pci_dev *pdev = ctrl_dev(ctrl); - u16 stat_mask = 0, ctrl_mask = 0; int rc; if (probe) @@ -941,23 +942,11 @@ int pciehp_reset_slot(struct hotplug_slot *hotplug_slot, bool probe) down_write_nested(&ctrl->reset_lock, ctrl->depth); - if (!ATTN_BUTTN(ctrl)) { - ctrl_mask |= PCI_EXP_SLTCTL_PDCE; - stat_mask |= PCI_EXP_SLTSTA_PDC; - } - ctrl_mask |= PCI_EXP_SLTCTL_DLLSCE; - stat_mask |= PCI_EXP_SLTSTA_DLLSC; - - pcie_write_cmd(ctrl, 0, ctrl_mask); - ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__, - pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, 0); + pci_hp_ignore_link_change(pdev); rc = pci_bridge_secondary_bus_reset(ctrl->pcie->port); - pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, stat_mask); - pcie_write_cmd_nowait(ctrl, ctrl_mask, ctrl_mask); - ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__, - pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, ctrl_mask); + pci_hp_unignore_link_change(pdev); up_write(&ctrl->reset_lock); return rc; diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index fbe22537f756..7312b508d1e9 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -152,6 +152,7 @@ static inline int pci_proc_detach_bus(struct pci_bus *bus) { return 0; } /* Functions for PCI Hotplug drivers to use */ int pci_hp_add_bridge(struct pci_dev *dev); +bool pci_hp_spurious_link_change(struct pci_dev *pdev); #ifdef HAVE_PCI_LEGACY void pci_create_legacy_files(struct pci_bus *bus); @@ -383,6 +384,8 @@ static inline int pci_dev_set_disconnected(struct pci_dev *dev, void *unused) #define PCI_DPC_RECOVERED 1 #define PCI_DPC_RECOVERING 2 #define PCI_DEV_REMOVED 3 +#define PCI_LINK_CHANGED 4 +#define PCI_LINK_CHANGING 5 static inline void pci_dev_assign_added(struct pci_dev *dev, bool added) { diff --git a/include/linux/pci.h b/include/linux/pci.h index 71d3507e91f6..8854a2b24a80 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1875,6 +1875,14 @@ static inline bool pcie_aspm_support_enabled(void) { return false; } static inline bool pcie_aspm_enabled(struct pci_dev *pdev) { return false; } #endif +#ifdef CONFIG_HOTPLUG_PCI +void pci_hp_ignore_link_change(struct pci_dev *pdev); +void pci_hp_unignore_link_change(struct pci_dev *pdev); +#else +static inline void pci_hp_ignore_link_change(struct pci_dev *pdev) { } +static inline void pci_hp_unignore_link_change(struct pci_dev *pdev) { } +#endif + #ifdef CONFIG_PCIEAER bool pci_aer_available(void); #else -- Gitee From 5a081a297b8f4177d42f2a11dd03fb28e3477af0 Mon Sep 17 00:00:00 2001 From: leoliu-oc Date: Mon, 9 Mar 2026 14:33:01 +0800 Subject: [PATCH 5/6] PCI: pciehp: Ignore belated Presence Detect Changed caused by DPC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mainline inclusion from v6.16-rc3 commit bbf10cd686835d5a4b8566dc73a3b00b4cd7932a upstream. category: feature ------------------- Commit c3be50f7547c ("PCI: pciehp: Ignore Presence Detect Changed caused by DPC") sought to ignore Presence Detect Changed events occurring as a side effect of Downstream Port Containment. The commit awaits recovery from DPC and then clears events which occurred in the meantime. However if the first event seen after DPC is Data Link Layer State Changed, only that event is cleared and not Presence Detect Changed. The object of the commit is thus defeated. That's because pciehp_ist() computes the events to clear based on the local "events" variable instead of "ctrl->pending_events". The former contains the events that had occurred when pciehp_ist() was entered, whereas the latter also contains events that have accumulated while awaiting DPC recovery. In practice, the order of PDC and DLLSC events is arbitrary and the delay in-between can be several milliseconds. So change the logic to always clear PDC events, even if they come after an initial DLLSC event. Fixes: c3be50f7547c ("PCI: pciehp: Ignore Presence Detect Changed caused by DPC") Reported-by: Lương Việt Hoàng Reported-by: Joel Mathew Thomas Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219765#c165 Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Tested-by: Lương Việt Hoàng Tested-by: Joel Mathew Thomas Link: https://patch.msgid.link/d9c4286a16253af7e93eaf12e076e3ef3546367a.1750257164.git.lukas@wunner.de Signed-off-by: leoliu-oc --- drivers/pci/hotplug/pciehp_hpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index d1a42e888b02..3b230b384e87 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -767,7 +767,7 @@ static irqreturn_t pciehp_ist(int irq, void *dev_id) u16 ignored_events = PCI_EXP_SLTSTA_DLLSC; if (!ctrl->inband_presence_disabled) - ignored_events |= events & PCI_EXP_SLTSTA_PDC; + ignored_events |= PCI_EXP_SLTSTA_PDC; events &= ~ignored_events; pciehp_ignore_link_change(ctrl, pdev, irq, ignored_events); -- Gitee From 0015d56ec25b5e8f11b91bb84e6d4edaae90dcf4 Mon Sep 17 00:00:00 2001 From: leoliu-oc Date: Mon, 9 Mar 2026 14:33:02 +0800 Subject: [PATCH 6/6] PCI: dpc: Increase pciehp waiting time for DPC recovery zhaoxin inclusion category: feature -------------------- Commit a97396c6eb13 ("PCI: pciehp: Ignore Link Down/Up caused by DPC") amended PCIe hotplug to not bring down the slot upon Data Link Layer State Changed events caused by Downstream Port Containment. However, PCIe hotplug (pciehp) waits up to 4 seconds before assuming that DPC recovery has failed and disabling the slot. This timeout period is insufficient for some PCIe devices. For example, the E810 dual-port network card driver needs to take over 10 seconds to execute its err_detected() callback. Since this exceeds the maximum wait time allowed for DPC recovery by the hotplug IRQ threads, a race condition occurs between the hotplug thread and the dpc_handler() thread. Signed-off-by: leoliu-oc --- drivers/pci/pcie/dpc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c index e1923b944a02..74b8b458b7ef 100644 --- a/drivers/pci/pcie/dpc.c +++ b/drivers/pci/pcie/dpc.c @@ -115,10 +115,10 @@ bool pci_dpc_recovered(struct pci_dev *pdev) /* * Need a timeout in case DPC never completes due to failure of * dpc_wait_rp_inactive(). The spec doesn't mandate a time limit, - * but reports indicate that DPC completes within 4 seconds. + * but reports indicate that DPC completes within 16 seconds. */ wait_event_timeout(dpc_completed_waitqueue, dpc_completed(pdev), - msecs_to_jiffies(4000)); + msecs_to_jiffies(16000)); return test_and_clear_bit(PCI_DPC_RECOVERED, &pdev->priv_flags); } -- Gitee