PCI: Batch BAR sizing operations
PCI: Batch BAR sising operations
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=4453f360862e5d9f0807941d613162c3f7a36559
author Alex Williamson alex.williamson@redhat.com 2025-01-20 11:21:59 -0700 committer Bjorn Helgaas bhelgaas@google.com 2025-01-23 11:05:20 -0600 commit 4453f360862e5d9f0807941d613162c3f7a36559 (patch) tree 97a4add01f845f4d78d262f562e789ba8593e852 parent 40384c840ea1944d7c5a392e8975ed088ecf0b37 (diff) download linux-4453f360862e5d9f0807941d613162c3f7a36559.tar.gz PCI: Batch BAR sising operations Toggling memory enable is free on bare metal, but potentially expensive in virtualised environments as the device MMIO spaces are added and removed from the VM address space, including DMA mapping of those spaces through the IOMMU where peer-to-peer is supported. Currently memory decode is disabled around sising each individual BAR, even for SR-IOV BARs while VF Enable is cleared.
This can be better optimised for virtual environments by sising a set of BARs at once, stashing the resulting mask into an array, while only toggling memory enable once. This also naturally improves the SR-IOV path as the caller becomes responsible for any necessary decode disables while sising BARs, therefore SR-IOV BARs are sised relying only on the VF Enable rather than toggling the PF memory enable in the command register.
Link: https://lore.kernel.org/r/20250120182202.1878581-1-alex.williamson@redhat.com Reported-by: Mitchell Augustin mitchell.augustin@canonical.com Link: https://lore.kernel.org/r/CAHTA-uYp07FgM6T1OZQKqAdSA5JrZo0ReNEyZgQZub4mDRrV5w@mail.gmail.com Signed-off-by: Alex Williamson alex.williamson@redhat.com Signed-off-by: Bjorn Helgaas bhelgaas@google.com Tested-by: Mitchell Augustin mitchell.augustin@canonical.com Reviewed-by: Mitchell Augustin mitchell.augustin@canonical.com Reviewed-by: Ilpo Järvinen ilpo.jarvinen@linux.intel.com Diffstat -rw-r–r– drivers/pci/iov.c 8 -rw-r–r– drivers/pci/pci.h 4 -rw-r–r– drivers/pci/probe.c 93 3 files changed, 78 insertions, 27 deletions diff –git a/drivers/pci/iov.c b/drivers/pci/iov.c index 4be402fe9ab942..9e4770cdd4d5ae 100644 — a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -747,6 +747,7 @@ static int sriov_init(struct pci_dev *dev, int pos) struct resource *res; const char *res_name; struct pci_dev *pdev;
- u32 sriovbars[PCI_SRIOV_NUM_BARS];
@@ -783,6 +784,10 @@ found: if (!iov) return -ENOMEM;
- /* Sising SR-IOV BARs with VF Enable cleared - no decode */
- __pci_size_stdbars(dev, PCI_SRIOV_NUM_BARS,
- pos + PCI_SRIOV_BAR, sriovbars); + nres = 0; for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { res = &dev->resource[i + PCI_IOV_RESOURCES]; @@ -796,7 +801,8 @@ found: bar64 = (res->flags & IORESOURCE_MEM_64) ? 1 : 0; else bar64 = __pci_read_base(dev, pci_bar_unknown, res,
- pos + PCI_SRIOV_BAR + i * 4);
- pos + PCI_SRIOV_BAR + i * 4,
- &sriovbars[i]); if (!res->flags) continue; if (resource_size(res) & (PAGE_SIZE - 1)) { diff –git a/drivers/pci/pci.h b/drivers/pci/pci.h index 2e40fc63ba3159..6f27651c2a70c8 100644 — a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -315,8 +315,10 @@ bool pci_bus_generic_read_dev_vendor_id(struct pci_bus *bus, int devfn, u32 *pl, int pci_idt_bus_quirk(struct pci_bus *bus, int devfn, u32 *pl, int rrs_timeout);
int pci_setup_device(struct pci_dev *dev); +void __pci_size_stdbars(struct pci_dev *dev, int count,
- unsigned int pos, u32 *sizes); int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
- struct resource *res, unsigned int reg);
- struct resource *res, unsigned int reg, u32 *sizes); void pci_configure_ari(struct pci_dev *dev); void __pci_bus_size_bridges(struct pci_bus *bus, struct list_head *realloc_head); diff –git a/drivers/pci/probe.c b/drivers/pci/probe.c index 2e81ab0f5a25cb..bf6aec5550440c 100644 — a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -165,40 +165,66 @@ static inline unsigned long decode_bar(struct pci_dev *dev, u32 bar) #define PCI_COMMAND_DECODE_ENABLE (PCI_COMMAND_MEMORY | PCI_COMMAND_IO)
/**
-
- __pci_size_bars - Read the raw BAR mask for a range of PCI BARs
-
- @dev: the PCI device
-
- @count: number of BARs to size
-
- @pos: starting config space position
-
- @sizes: array to store mask values
-
- @rom: indicate whether to use ROM mask, which avoids enabling ROM BARs
- *
-
- Provided @sizes array must be sufficiently sised to store results for
-
- @count u32 BARs. Caller is responsible for disabling decode to specified
-
- BAR range around calling this function. This function is intended to avoid
-
- disabling decode around sising each BAR individually, which can result in
-
- non-trivial overhead in virtualised environments with very large PCI BARs.
- */ +static void __pci_size_bars(struct pci_dev *dev, int count,
- unsigned int pos, u32 *sizes, bool rom) +{
- u32 orig, mask = rom ? PCI_ROM_ADDRESS_MASK : ~0;
- int i; +
- for (i = 0; i < count; i++, pos += 4, sizes++) {
- pci_read_config_dword(dev, pos, &orig);
- pci_write_config_dword(dev, pos, mask);
- pci_read_config_dword(dev, pos, sizes);
- pci_write_config_dword(dev, pos, orig);
- } +} + +void __pci_size_stdbars(struct pci_dev *dev, int count,
- unsigned int pos, u32 *sizes) +{
- __pci_size_bars(dev, count, pos, sizes, false); +} + +static void __pci_size_rom(struct pci_dev *dev, unsigned int pos, u32 *sizes) +{
- __pci_size_bars(dev, 1, pos, sizes, true); +} + +/**
- __pci_read_base - Read a PCI BAR
- @dev: the PCI device
- @type: type of the BAR
- @res: resource buffer to be filled in
- @pos: BAR position in the config space
-
- @sizes: array of one or more pre-read BAR masks *
- Returns 1 if the BAR is 64-bit, or 0 if 32-bit. */ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
- struct resource *res, unsigned int pos)
- struct resource *res, unsigned int pos, u32 *sizes) {
- u32 l = 0, sz = 0, mask;
- u32 l = 0, sz; u64 l64, sz64, mask64;
-
u16 orig_cmd; struct pci_bus_region region, inverted_region; const char *res_name = pci_resource_name(dev, res - dev->resource);
-
mask = type ? PCI_ROM_ADDRESS_MASK : ~0;
- /* No printks while decoding is disabled! */
- if (!dev->mmio_always_on) {
- pci_read_config_word(dev, PCI_COMMAND, &orig_cmd);
- if (orig_cmd & PCI_COMMAND_DECODE_ENABLE) {
- pci_write_config_word(dev, PCI_COMMAND,
- orig_cmd & ~PCI_COMMAND_DECODE_ENABLE);
- }
-
}
res->name = pci_name(dev);
-
pci_write_config_dword(dev, pos, l mask); - pci_read_config_dword(dev, pos, &sz);
- pci_write_config_dword(dev, pos, l);
- sz = sizes[0];
@@ -238,18 +264,13 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
if (res->flags & IORESOURCE_MEM_64) { pci_read_config_dword(dev, pos + 4, &l);- pci_write_config_dword(dev, pos + 4, ~0);
- pci_read_config_dword(dev, pos + 4, &sz);
- pci_write_config_dword(dev, pos + 4, l);
- sz = sizes[1];
- if (!dev->mmio_always_on && (orig_cmd & PCI_COMMAND_DECODE_ENABLE))
-
pci_write_config_word(dev, PCI_COMMAND, orig_cmd);
if (!sz64) goto fail;
@@ -320,7 +341,11 @@ out:
static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom) {
- u32 rombar, stdbars[PCI_STD_NUM_BARS]; unsigned int pos, reg;
- u16 orig_cmd; +
- BUILD_BUG_ON(howmany > PCI_STD_NUM_BARS);
@@ -329,10 +354,28 @@ static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom) if (dev->is_virtfn) return;
- /* No printks while decoding is disabled! */
- if (!dev->mmio_always_on) {
- pci_read_config_word(dev, PCI_COMMAND, &orig_cmd);
- if (orig_cmd & PCI_COMMAND_DECODE_ENABLE) {
- pci_write_config_word(dev, PCI_COMMAND,
- orig_cmd & ~PCI_COMMAND_DECODE_ENABLE);
- }
- } +
- __pci_size_stdbars(dev, howmany, PCI_BASE_ADDRESS_0, stdbars);
- if (rom)
- __pci_size_rom(dev, rom, &rombar); +
- if (!dev->mmio_always_on &&
- (orig_cmd & PCI_COMMAND_DECODE_ENABLE))
- pci_write_config_word(dev, PCI_COMMAND, orig_cmd); + for (pos = 0; pos < howmany; pos++) { struct resource *res = &dev->resource[pos]; reg = PCI_BASE_ADDRESS_0 + (pos « 2);
- pos += __pci_read_base(dev, pci_bar_unknown, res, reg);
- pos += __pci_read_base(dev, pci_bar_unknown,
- res, reg, &stdbars[pos]); }
@@ -340,7 +383,7 @@ static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom) dev->rom_base_reg = rom; res->flags = IORESOURCE_MEM | IORESOURCE_PREFETCH | IORESOURCE_READONLY | IORESOURCE_SIZEALIGN;
- __pci_read_base(dev, pci_bar_mem32, res, rom);
- __pci_read_base(dev, pci_bar_mem32, res, rom, &rombar); } }