diff options
Diffstat (limited to 'roms/skiboot/core/pci.c')
-rw-r--r-- | roms/skiboot/core/pci.c | 1962 |
1 files changed, 1962 insertions, 0 deletions
diff --git a/roms/skiboot/core/pci.c b/roms/skiboot/core/pci.c new file mode 100644 index 000000000..e195ecbf4 --- /dev/null +++ b/roms/skiboot/core/pci.c @@ -0,0 +1,1962 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Base PCI support + * + * Copyright 2013-2019 IBM Corp. + */ + +#include <skiboot.h> +#include <cpu.h> +#include <pci.h> +#include <pci-cfg.h> +#include <pci-slot.h> +#include <pci-quirk.h> +#include <timebase.h> +#include <device.h> + +#define MAX_PHB_ID 256 +static struct phb *phbs[MAX_PHB_ID]; +int last_phb_id = 0; + +/* + * Generic PCI utilities + */ + +static int64_t __pci_find_cap(struct phb *phb, uint16_t bdfn, + uint8_t want, bool check_cap_indicator) +{ + int64_t rc; + uint16_t stat, cap; + uint8_t pos, next; + + rc = pci_cfg_read16(phb, bdfn, PCI_CFG_STAT, &stat); + if (rc) + return rc; + if (check_cap_indicator && !(stat & PCI_CFG_STAT_CAP)) + return OPAL_UNSUPPORTED; + rc = pci_cfg_read8(phb, bdfn, PCI_CFG_CAP, &pos); + if (rc) + return rc; + pos &= 0xfc; + while(pos) { + rc = pci_cfg_read16(phb, bdfn, pos, &cap); + if (rc) + return rc; + if ((cap & 0xff) == want) + return pos; + next = (cap >> 8) & 0xfc; + if (next == pos) { + PCIERR(phb, bdfn, "pci_find_cap hit a loop !\n"); + break; + } + pos = next; + } + return OPAL_UNSUPPORTED; +} + +/* pci_find_cap - Find a PCI capability in a device config space + * + * This will return a config space offset (positive) or a negative + * error (OPAL error codes). + * + * OPAL_UNSUPPORTED is returned if the capability doesn't exist + */ +int64_t pci_find_cap(struct phb *phb, uint16_t bdfn, uint8_t want) +{ + return __pci_find_cap(phb, bdfn, want, true); +} + +/* pci_find_ecap - Find a PCIe extended capability in a device + * config space + * + * This will return a config space offset (positive) or a negative + * error (OPAL error code). Additionally, if the "version" argument + * is non-NULL, the capability version will be returned there. + * + * OPAL_UNSUPPORTED is returned if the capability doesn't exist + */ +int64_t pci_find_ecap(struct phb *phb, uint16_t bdfn, uint16_t want, + uint8_t *version) +{ + int64_t rc; + uint32_t cap; + uint16_t off, prev = 0; + + for (off = 0x100; off && off < 0x1000; off = (cap >> 20) & 0xffc ) { + if (off == prev) { + PCIERR(phb, bdfn, "pci_find_ecap hit a loop !\n"); + break; + } + prev = off; + rc = pci_cfg_read32(phb, bdfn, off, &cap); + if (rc) + return rc; + + /* no ecaps supported */ + if (cap == 0 || (cap & 0xffff) == 0xffff) + return OPAL_UNSUPPORTED; + + if ((cap & 0xffff) == want) { + if (version) + *version = (cap >> 16) & 0xf; + return off; + } + } + return OPAL_UNSUPPORTED; +} + +static void pci_init_pcie_cap(struct phb *phb, struct pci_device *pd) +{ + int64_t ecap = 0; + uint16_t reg; + uint32_t val; + + /* On the upstream port of PLX bridge 8724 (rev ba), PCI_STATUS + * register doesn't have capability indicator though it support + * various PCI capabilities. So we need ignore that bit when + * looking for PCI capabilities on the upstream port, which is + * limited to one that seats directly under root port. + */ + if (pd->vdid == 0x872410b5 && pd->parent && !pd->parent->parent) { + uint8_t rev; + + pci_cfg_read8(phb, pd->bdfn, PCI_CFG_REV_ID, &rev); + if (rev == 0xba) + ecap = __pci_find_cap(phb, pd->bdfn, + PCI_CFG_CAP_ID_EXP, false); + else + ecap = pci_find_cap(phb, pd->bdfn, PCI_CFG_CAP_ID_EXP); + } else { + ecap = pci_find_cap(phb, pd->bdfn, PCI_CFG_CAP_ID_EXP); + } + + if (ecap <= 0) { + pd->dev_type = PCIE_TYPE_LEGACY; + return; + } + + pci_set_cap(pd, PCI_CFG_CAP_ID_EXP, ecap, NULL, NULL, false); + + /* + * XXX We observe a problem on some PLX switches where one + * of the downstream ports appears as an upstream port, we + * fix that up here otherwise, other code will misbehave + */ + pci_cfg_read16(phb, pd->bdfn, ecap + PCICAP_EXP_CAPABILITY_REG, ®); + pd->dev_type = GETFIELD(PCICAP_EXP_CAP_TYPE, reg); + if (pd->parent && pd->parent->dev_type == PCIE_TYPE_SWITCH_UPPORT && + pd->vdid == 0x874810b5 && pd->dev_type == PCIE_TYPE_SWITCH_UPPORT) { + PCIDBG(phb, pd->bdfn, "Fixing up bad PLX downstream port !\n"); + pd->dev_type = PCIE_TYPE_SWITCH_DNPORT; + } + + /* XXX Handle ARI */ + if (pd->dev_type == PCIE_TYPE_SWITCH_DNPORT || + pd->dev_type == PCIE_TYPE_ROOT_PORT) + pd->scan_map = 0x1; + + /* Read MPS capability, whose maximal size is 4096 */ + pci_cfg_read32(phb, pd->bdfn, ecap + PCICAP_EXP_DEVCAP, &val); + pd->mps = (128 << GETFIELD(PCICAP_EXP_DEVCAP_MPSS, val)); + if (pd->mps > 4096) + pd->mps = 4096; +} + +static void pci_init_aer_cap(struct phb *phb, struct pci_device *pd) +{ + int64_t pos; + + if (!pci_has_cap(pd, PCI_CFG_CAP_ID_EXP, false)) + return; + + pos = pci_find_ecap(phb, pd->bdfn, PCIECAP_ID_AER, NULL); + if (pos > 0) + pci_set_cap(pd, PCIECAP_ID_AER, pos, NULL, NULL, true); +} + +static void pci_init_pm_cap(struct phb *phb, struct pci_device *pd) +{ + int64_t pos; + + pos = pci_find_cap(phb, pd->bdfn, PCI_CFG_CAP_ID_PM); + if (pos > 0) + pci_set_cap(pd, PCI_CFG_CAP_ID_PM, pos, NULL, NULL, false); +} + +void pci_init_capabilities(struct phb *phb, struct pci_device *pd) +{ + pci_init_pcie_cap(phb, pd); + pci_init_aer_cap(phb, pd); + pci_init_pm_cap(phb, pd); +} + +bool pci_wait_crs(struct phb *phb, uint16_t bdfn, uint32_t *out_vdid) +{ + uint32_t retries, vdid; + int64_t rc; + bool had_crs = false; + + for (retries = 0; retries < 40; retries++) { + rc = pci_cfg_read32(phb, bdfn, PCI_CFG_VENDOR_ID, &vdid); + if (rc) + return false; + if (vdid == 0xffffffff || vdid == 0x00000000) + return false; + if (vdid != 0xffff0001) + break; + had_crs = true; + time_wait_ms(100); + } + if (vdid == 0xffff0001) { + PCIERR(phb, bdfn, "CRS timeout !\n"); + return false; + } + if (had_crs) + PCIDBG(phb, bdfn, "Probe success after %d CRS\n", retries); + + if (out_vdid) + *out_vdid = vdid; + return true; +} + +static struct pci_device *pci_scan_one(struct phb *phb, struct pci_device *parent, + uint16_t bdfn) +{ + struct pci_device *pd = NULL; + uint32_t vdid; + int64_t rc; + uint8_t htype; + + if (!pci_wait_crs(phb, bdfn, &vdid)) + return NULL; + + /* Perform a dummy write to the device in order for it to + * capture it's own bus number, so any subsequent error + * messages will be properly tagged + */ + pci_cfg_write32(phb, bdfn, PCI_CFG_VENDOR_ID, vdid); + + pd = zalloc(sizeof(struct pci_device)); + if (!pd) { + PCIERR(phb, bdfn,"Failed to allocate structure pci_device !\n"); + goto fail; + } + pd->phb = phb; + pd->bdfn = bdfn; + pd->vdid = vdid; + pci_cfg_read32(phb, bdfn, PCI_CFG_SUBSYS_VENDOR_ID, &pd->sub_vdid); + pci_cfg_read32(phb, bdfn, PCI_CFG_REV_ID, &pd->class); + pd->class >>= 8; + + pd->parent = parent; + list_head_init(&pd->pcrf); + list_head_init(&pd->children); + rc = pci_cfg_read8(phb, bdfn, PCI_CFG_HDR_TYPE, &htype); + if (rc) { + PCIERR(phb, bdfn, "Failed to read header type !\n"); + goto fail; + } + pd->is_multifunction = !!(htype & 0x80); + pd->is_bridge = (htype & 0x7f) != 0; + pd->is_vf = false; + pd->scan_map = 0xffffffff; /* Default */ + pd->primary_bus = PCI_BUS_NUM(bdfn); + + pci_init_capabilities(phb, pd); + + /* If it's a bridge, sanitize the bus numbers to avoid forwarding + * + * This will help when walking down those bridges later on + */ + if (pd->is_bridge) { + pci_cfg_write8(phb, bdfn, PCI_CFG_PRIMARY_BUS, pd->primary_bus); + pci_cfg_write8(phb, bdfn, PCI_CFG_SECONDARY_BUS, 0); + pci_cfg_write8(phb, bdfn, PCI_CFG_SUBORDINATE_BUS, 0); + } + + /* XXX Need to do some basic setups, such as MPSS, MRS, + * RCB, etc... + */ + + PCIDBG(phb, bdfn, "Found VID:%04x DEV:%04x TYP:%d MF%s BR%s EX%s\n", + vdid & 0xffff, vdid >> 16, pd->dev_type, + pd->is_multifunction ? "+" : "-", + pd->is_bridge ? "+" : "-", + pci_has_cap(pd, PCI_CFG_CAP_ID_EXP, false) ? "+" : "-"); + + /* Try to get PCI slot behind the device */ + if (platform.pci_get_slot_info) + platform.pci_get_slot_info(phb, pd); + + /* Put it to the child device of list of PHB or parent */ + if (!parent) + list_add_tail(&phb->devices, &pd->link); + else + list_add_tail(&parent->children, &pd->link); + + /* + * Call PHB hook + */ + if (phb->ops->device_init) + phb->ops->device_init(phb, pd, NULL); + + return pd; + fail: + if (pd) + free(pd); + return NULL; +} + +/* pci_check_clear_freeze - Probing empty slot will result in an EEH + * freeze. Currently we have a single PE mapping + * everything (default state of our backend) so + * we just check and clear the state of PE#0 + * + * returns true if a freeze was detected + * + * NOTE: We currently only handle simple PE freeze, not PHB fencing + * (or rather our backend does) + */ +bool pci_check_clear_freeze(struct phb *phb) +{ + uint8_t freeze_state; + uint16_t pci_error_type, sev; + int64_t pe_number, rc; + + /* Retrieve the reserved PE number */ + pe_number = OPAL_PARAMETER; + if (phb->ops->get_reserved_pe_number) + pe_number = phb->ops->get_reserved_pe_number(phb); + if (pe_number < 0) + return false; + + /* Retrieve the frozen state */ + rc = phb->ops->eeh_freeze_status(phb, pe_number, &freeze_state, + &pci_error_type, &sev); + if (rc) + return true; /* phb fence? */ + + if (freeze_state == OPAL_EEH_STOPPED_NOT_FROZEN) + return false; + /* We can't handle anything worse than an ER here */ + if (sev > OPAL_EEH_SEV_NO_ERROR && + sev < OPAL_EEH_SEV_PE_ER) { + PCIERR(phb, 0, "Fatal probe in %s error !\n", __func__); + return true; + } + + phb->ops->eeh_freeze_clear(phb, pe_number, + OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); + return true; +} + +/* + * Turn off slot's power supply if there are nothing connected for + * 2 purposes: power saving obviously and initialize the slot to + * to initial power-off state for hotplug. + * + * The power should be turned on if the downstream link of the slot + * isn't up. + */ +static void pci_slot_set_power_state(struct phb *phb, + struct pci_device *pd, + uint8_t state) +{ + struct pci_slot *slot; + uint8_t cur_state; + int32_t wait = 100; + int64_t rc; + + if (!pd || !pd->slot) + return; + + slot = pd->slot; + if (!slot->pluggable || + !slot->ops.get_power_state || + !slot->ops.set_power_state) + return; + + if (state == PCI_SLOT_POWER_OFF) { + /* Bail if there're something connected */ + if (!list_empty(&pd->children)) { + PCIERR(phb, pd->bdfn, "Attempted to power off slot with attached devices!\n"); + return; + } + + pci_slot_add_flags(slot, PCI_SLOT_FLAG_BOOTUP); + rc = slot->ops.get_power_state(slot, &cur_state); + if (rc != OPAL_SUCCESS) { + PCINOTICE(phb, pd->bdfn, "Error %lld getting slot power state\n", rc); + cur_state = PCI_SLOT_POWER_OFF; + } + + pci_slot_remove_flags(slot, PCI_SLOT_FLAG_BOOTUP); + if (cur_state == PCI_SLOT_POWER_OFF) + return; + } + + pci_slot_add_flags(slot, + (PCI_SLOT_FLAG_BOOTUP | PCI_SLOT_FLAG_ENFORCE)); + rc = slot->ops.set_power_state(slot, state); + if (rc == OPAL_SUCCESS) + goto success; + if (rc != OPAL_ASYNC_COMPLETION) { + PCINOTICE(phb, pd->bdfn, "Error %lld powering %s slot\n", + rc, state == PCI_SLOT_POWER_ON ? "on" : "off"); + goto error; + } + + /* Wait until the operation is completed */ + do { + if (slot->state == PCI_SLOT_STATE_SPOWER_DONE) + break; + + check_timers(false); + time_wait_ms(10); + } while (--wait >= 0); + + if (wait < 0) { + PCINOTICE(phb, pd->bdfn, "Timeout powering %s slot\n", + state == PCI_SLOT_POWER_ON ? "on" : "off"); + goto error; + } + +success: + PCIDBG(phb, pd->bdfn, "Powering %s hotpluggable slot\n", + state == PCI_SLOT_POWER_ON ? "on" : "off"); +error: + pci_slot_remove_flags(slot, + (PCI_SLOT_FLAG_BOOTUP | PCI_SLOT_FLAG_ENFORCE)); + pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL); +} + +static bool pci_bridge_power_on(struct phb *phb, struct pci_device *pd) +{ + int32_t ecap; + uint16_t pcie_cap, slot_sts, slot_ctl, link_ctl; + uint32_t slot_cap; + int64_t rc; + + /* + * If there is a PCI slot associated with the bridge, to use + * the PCI slot's facality to power it on. + */ + if (pd->slot) { + struct pci_slot *slot = pd->slot; + uint8_t presence; + + /* + * We assume the presence state is OPAL_PCI_SLOT_PRESENT + * by default. In this way, we won't miss anything when + * the operation isn't supported or hitting error upon + * retrieving it. + */ + if (slot->ops.get_presence_state) { + rc = slot->ops.get_presence_state(slot, &presence); + if (rc == OPAL_SUCCESS && + presence == OPAL_PCI_SLOT_EMPTY) + return false; + } + + /* To power it on */ + pci_slot_set_power_state(phb, pd, PCI_SLOT_POWER_ON); + return true; + } + + if (!pci_has_cap(pd, PCI_CFG_CAP_ID_EXP, false)) + return true; + + /* Check if slot is supported */ + ecap = pci_cap(pd, PCI_CFG_CAP_ID_EXP, false); + pci_cfg_read16(phb, pd->bdfn, + ecap + PCICAP_EXP_CAPABILITY_REG, &pcie_cap); + if (!(pcie_cap & PCICAP_EXP_CAP_SLOT)) + return true; + + /* Check presence */ + pci_cfg_read16(phb, pd->bdfn, + ecap + PCICAP_EXP_SLOTSTAT, &slot_sts); + if (!(slot_sts & PCICAP_EXP_SLOTSTAT_PDETECTST)) + return false; + + /* Ensure that power control is supported */ + pci_cfg_read32(phb, pd->bdfn, + ecap + PCICAP_EXP_SLOTCAP, &slot_cap); + if (!(slot_cap & PCICAP_EXP_SLOTCAP_PWCTRL)) + return true; + + + /* Read the slot control register, check if the slot is off */ + pci_cfg_read16(phb, pd->bdfn, ecap + PCICAP_EXP_SLOTCTL, &slot_ctl); + PCITRACE(phb, pd->bdfn, " SLOT_CTL=%04x\n", slot_ctl); + if (slot_ctl & PCICAP_EXP_SLOTCTL_PWRCTLR) { + PCIDBG(phb, pd->bdfn, "Bridge power is off, turning on ...\n"); + slot_ctl &= ~PCICAP_EXP_SLOTCTL_PWRCTLR; + slot_ctl |= SETFIELD(PCICAP_EXP_SLOTCTL_PWRI, 0, PCIE_INDIC_ON); + pci_cfg_write16(phb, pd->bdfn, + ecap + PCICAP_EXP_SLOTCTL, slot_ctl); + + /* Wait a couple of seconds */ + time_wait_ms(2000); + } + + /* Enable link */ + pci_cfg_read16(phb, pd->bdfn, ecap + PCICAP_EXP_LCTL, &link_ctl); + PCITRACE(phb, pd->bdfn, " LINK_CTL=%04x\n", link_ctl); + link_ctl &= ~PCICAP_EXP_LCTL_LINK_DIS; + pci_cfg_write16(phb, pd->bdfn, ecap + PCICAP_EXP_LCTL, link_ctl); + + return true; +} + +static bool pci_bridge_wait_link(struct phb *phb, + struct pci_device *pd, + bool was_reset) +{ + int32_t ecap = 0; + uint32_t link_cap = 0, retries = 100; + uint16_t link_sts; + + if (pci_has_cap(pd, PCI_CFG_CAP_ID_EXP, false)) { + ecap = pci_cap(pd, PCI_CFG_CAP_ID_EXP, false); + pci_cfg_read32(phb, pd->bdfn, ecap + PCICAP_EXP_LCAP, &link_cap); + } + + /* + * If link state reporting isn't supported, wait 1 second + * if the downstream link was ever resetted. + */ + if (!(link_cap & PCICAP_EXP_LCAP_DL_ACT_REP)) { + if (was_reset) + time_wait_ms(1000); + + return true; + } + + /* + * Link state reporting is supported, wait for the link to + * come up until timeout. + */ + PCIDBG(phb, pd->bdfn, "waiting for link... \n"); + while (retries--) { + pci_cfg_read16(phb, pd->bdfn, + ecap + PCICAP_EXP_LSTAT, &link_sts); + if (link_sts & PCICAP_EXP_LSTAT_DLLL_ACT) + break; + + time_wait_ms(100); + } + + if (!(link_sts & PCICAP_EXP_LSTAT_DLLL_ACT)) { + PCIERR(phb, pd->bdfn, "Timeout waiting for downstream link\n"); + return false; + } + + /* Need another 100ms before touching the config space */ + time_wait_ms(100); + PCIDBG(phb, pd->bdfn, "link is up\n"); + + return true; +} + +/* pci_enable_bridge - Called before scanning a bridge + * + * Ensures error flags are clean, disable master abort, and + * check if the subordinate bus isn't reset, the slot is enabled + * on PCIe, etc... + */ +static bool pci_enable_bridge(struct phb *phb, struct pci_device *pd) +{ + uint16_t bctl; + bool was_reset = false; + + /* Disable master aborts, clear errors */ + pci_cfg_read16(phb, pd->bdfn, PCI_CFG_BRCTL, &bctl); + bctl &= ~PCI_CFG_BRCTL_MABORT_REPORT; + pci_cfg_write16(phb, pd->bdfn, PCI_CFG_BRCTL, bctl); + + + /* PCI-E bridge, check the slot state. We don't do that on the + * root complex as this is handled separately and not all our + * RCs implement the standard register set. + */ + if ((pd->dev_type == PCIE_TYPE_ROOT_PORT && pd->primary_bus > 0) || + pd->dev_type == PCIE_TYPE_SWITCH_DNPORT) { + if (pci_has_cap(pd, PCI_CFG_CAP_ID_EXP, false)) { + int32_t ecap; + uint32_t link_cap = 0; + uint16_t link_sts = 0; + + ecap = pci_cap(pd, PCI_CFG_CAP_ID_EXP, false); + pci_cfg_read32(phb, pd->bdfn, + ecap + PCICAP_EXP_LCAP, &link_cap); + + /* + * No need to touch the power supply if the PCIe link has + * been up. Further more, the slot presence bit is lost while + * the PCIe link is up on the specific PCI topology. In that + * case, we need ignore the slot presence bit and go ahead for + * probing. Otherwise, the NVMe adapter won't be probed. + * + * PHB3 root port, PLX switch 8748 (10b5:8748), PLX swich 9733 + * (10b5:9733), PMC 8546 swtich (11f8:8546), NVMe adapter + * (1c58:0023). + */ + ecap = pci_cap(pd, PCI_CFG_CAP_ID_EXP, false); + pci_cfg_read32(phb, pd->bdfn, + ecap + PCICAP_EXP_LCAP, &link_cap); + pci_cfg_read16(phb, pd->bdfn, + ecap + PCICAP_EXP_LSTAT, &link_sts); + if ((link_cap & PCICAP_EXP_LCAP_DL_ACT_REP) && + (link_sts & PCICAP_EXP_LSTAT_DLLL_ACT)) + return true; + } + + /* Power on the downstream slot or link */ + if (!pci_bridge_power_on(phb, pd)) + return false; + } + + /* Clear secondary reset */ + if (bctl & PCI_CFG_BRCTL_SECONDARY_RESET) { + PCIDBG(phb, pd->bdfn, + "Bridge secondary reset is on, clearing it ...\n"); + bctl &= ~PCI_CFG_BRCTL_SECONDARY_RESET; + pci_cfg_write16(phb, pd->bdfn, PCI_CFG_BRCTL, bctl); + time_wait_ms(1000); + was_reset = true; + } + + /* PCI-E bridge, wait for link */ + if (pd->dev_type == PCIE_TYPE_ROOT_PORT || + pd->dev_type == PCIE_TYPE_SWITCH_DNPORT) { + if (!pci_bridge_wait_link(phb, pd, was_reset)) + return false; + } + + /* Clear error status */ + pci_cfg_write16(phb, pd->bdfn, PCI_CFG_STAT, 0xffff); + return true; +} + +/* Clear up bridge resources */ +static void pci_cleanup_bridge(struct phb *phb, struct pci_device *pd) +{ + uint16_t cmd; + + pci_cfg_write16(phb, pd->bdfn, PCI_CFG_IO_BASE_U16, 0xffff); + pci_cfg_write8(phb, pd->bdfn, PCI_CFG_IO_BASE, 0xf0); + pci_cfg_write16(phb, pd->bdfn, PCI_CFG_IO_LIMIT_U16, 0); + pci_cfg_write8(phb, pd->bdfn, PCI_CFG_IO_LIMIT, 0); + pci_cfg_write16(phb, pd->bdfn, PCI_CFG_MEM_BASE, 0xfff0); + pci_cfg_write16(phb, pd->bdfn, PCI_CFG_MEM_LIMIT, 0); + pci_cfg_write32(phb, pd->bdfn, PCI_CFG_PREF_MEM_BASE_U32, 0xffffffff); + pci_cfg_write16(phb, pd->bdfn, PCI_CFG_PREF_MEM_BASE, 0xfff0); + pci_cfg_write32(phb, pd->bdfn, PCI_CFG_PREF_MEM_LIMIT_U32, 0); + pci_cfg_write16(phb, pd->bdfn, PCI_CFG_PREF_MEM_LIMIT, 0); + + /* Note: This is a bit fishy but since we have closed all the + * bridge windows above, it shouldn't be a problem. Basically + * we enable Memory, IO and Bus Master on the bridge because + * some versions of Linux will fail to do it themselves. + */ + pci_cfg_read16(phb, pd->bdfn, PCI_CFG_CMD, &cmd); + cmd |= PCI_CFG_CMD_IO_EN | PCI_CFG_CMD_MEM_EN; + cmd |= PCI_CFG_CMD_BUS_MASTER_EN; + pci_cfg_write16(phb, pd->bdfn, PCI_CFG_CMD, cmd); +} + +/* Remove all subordinate PCI devices leading from the indicated + * PCI bus. It's used to remove all PCI devices behind one PCI + * slot at unplugging time + */ +void pci_remove_bus(struct phb *phb, struct list_head *list) +{ + struct pci_device *pd, *tmp; + + list_for_each_safe(list, pd, tmp, link) { + pci_remove_bus(phb, &pd->children); + + if (phb->ops->device_remove) + phb->ops->device_remove(phb, pd); + + /* Release device node and PCI slot */ + if (pd->dn) + dt_free(pd->dn); + if (pd->slot) + free(pd->slot); + + /* Remove from parent list and release itself */ + list_del(&pd->link); + free(pd); + } +} + +static void pci_set_power_limit(struct pci_device *pd) +{ + uint32_t offset, val; + uint16_t caps; + + offset = pci_cap(pd, PCI_CFG_CAP_ID_EXP, false); + if (!offset) + return; /* legacy dev */ + + pci_cfg_read16(pd->phb, pd->bdfn, + offset + PCICAP_EXP_CAPABILITY_REG, &caps); + + if (!(caps & PCICAP_EXP_CAP_SLOT)) + return; /* bridge has no slot capabilities */ + if (!pd->slot || !pd->slot->power_limit) + return; + + pci_cfg_read32(pd->phb, pd->bdfn, offset + PCICAP_EXP_SLOTCAP, &val); + + val = SETFIELD(PCICAP_EXP_SLOTCAP_SPLSC, val, 0); /* 1W scale */ + val = SETFIELD(PCICAP_EXP_SLOTCAP_SPLVA, val, pd->slot->power_limit); + + pci_cfg_write32(pd->phb, pd->bdfn, offset + PCICAP_EXP_SLOTCAP, val); + + /* update the cached copy in the slot */ + pd->slot->slot_cap = val; + + PCIDBG(pd->phb, pd->bdfn, "Slot power limit set to %dW\n", + pd->slot->power_limit); +} + +/* Perform a recursive scan of the bus at bus_number populating + * the list passed as an argument. This also performs the bus + * numbering, so it returns the largest bus number that was + * assigned. + * + * Note: Eventually this might want to access some VPD information + * in order to know what slots to scan and what not etc.. + * + * XXX NOTE: We might want to enable ARI along the way... + * + * XXX NOTE: We might also want to setup the PCIe MPS/MRSS properly + * here as Linux may or may not do it + */ +uint8_t pci_scan_bus(struct phb *phb, uint8_t bus, uint8_t max_bus, + struct list_head *list, struct pci_device *parent, + bool scan_downstream) +{ + struct pci_device *pd = NULL, *rc = NULL; + uint8_t dev, fn, next_bus, max_sub; + uint32_t scan_map; + + /* Decide what to scan */ + scan_map = parent ? parent->scan_map : phb->scan_map; + + /* Do scan */ + for (dev = 0; dev < 32; dev++) { + if (!(scan_map & (1ul << dev))) + continue; + + /* Scan the device */ + pd = pci_scan_one(phb, parent, (bus << 8) | (dev << 3)); + pci_check_clear_freeze(phb); + if (!pd) + continue; + + /* Record RC when its downstream link is down */ + if (!scan_downstream && dev == 0 && !rc) + rc = pd; + + /* XXX Handle ARI */ + if (!pd->is_multifunction) + continue; + for (fn = 1; fn < 8; fn++) { + pd = pci_scan_one(phb, parent, + ((uint16_t)bus << 8) | (dev << 3) | fn); + pci_check_clear_freeze(phb); + } + } + + /* Reserve all possible buses if RC's downstream link is down + * if PCI hotplug is supported. + */ + if (rc && rc->slot && rc->slot->pluggable) { + next_bus = bus + 1; + rc->secondary_bus = next_bus; + rc->subordinate_bus = max_bus; + pci_cfg_write8(phb, rc->bdfn, PCI_CFG_SECONDARY_BUS, + rc->secondary_bus); + pci_cfg_write8(phb, rc->bdfn, PCI_CFG_SUBORDINATE_BUS, + rc->subordinate_bus); + } + + /* set the power limit for any downstream slots while we're here */ + list_for_each(list, pd, link) { + if (pd->is_bridge) + pci_set_power_limit(pd); + } + + /* + * We only scan downstream if instructed to do so by the + * caller. Typically we avoid the scan when we know the + * link is down already, which happens for the top level + * root complex, and avoids a long secondary timeout + */ + if (!scan_downstream) { + list_for_each(list, pd, link) + pci_slot_set_power_state(phb, pd, PCI_SLOT_POWER_OFF); + + return bus; + } + + next_bus = bus + 1; + max_sub = bus; + + /* Scan down bridges */ + list_for_each(list, pd, link) { + bool do_scan; + + if (!pd->is_bridge) + continue; + + /* Configure the bridge with the returned values */ + if (next_bus <= bus) { + PCIERR(phb, pd->bdfn, "Out of bus numbers !\n"); + max_bus = next_bus = 0; /* Failure case */ + } + + pd->secondary_bus = next_bus; + pd->subordinate_bus = max_bus; + pci_cfg_write8(phb, pd->bdfn, PCI_CFG_SECONDARY_BUS, next_bus); + pci_cfg_write8(phb, pd->bdfn, PCI_CFG_SUBORDINATE_BUS, max_bus); + if (!next_bus) + break; + + PCIDBG(phb, pd->bdfn, "Bus %02x..%02x scanning...\n", + next_bus, max_bus); + + /* Clear up bridge resources */ + pci_cleanup_bridge(phb, pd); + + /* Configure the bridge. This will enable power to the slot + * if it's currently disabled, lift reset, etc... + * + * Return false if we know there's nothing behind the bridge + */ + do_scan = pci_enable_bridge(phb, pd); + + /* Perform recursive scan */ + if (do_scan) { + max_sub = pci_scan_bus(phb, next_bus, max_bus, + &pd->children, pd, true); + } else { + /* Empty bridge. We leave room for hotplug + * slots if the downstream port is pluggable. + */ + if (pd->slot && !pd->slot->pluggable) + max_sub = next_bus; + else { + max_sub = next_bus + 4; + if (max_sub > max_bus) + max_sub = max_bus; + } + } + + pd->subordinate_bus = max_sub; + pci_cfg_write8(phb, pd->bdfn, PCI_CFG_SUBORDINATE_BUS, max_sub); + next_bus = max_sub + 1; + + /* power off the slot if there's nothing below it */ + if (list_empty(&pd->children)) + pci_slot_set_power_state(phb, pd, PCI_SLOT_POWER_OFF); + } + + return max_sub; +} + +static int pci_get_mps(struct phb *phb, + struct pci_device *pd, void *userdata) +{ + uint32_t *mps = (uint32_t *)userdata; + + /* Only check PCI device that had MPS capacity */ + if (phb && pd && pd->mps && *mps > pd->mps) + *mps = pd->mps; + + return 0; +} + +static int pci_configure_mps(struct phb *phb, + struct pci_device *pd, + void *userdata __unused) +{ + uint32_t ecap, aercap, mps; + uint16_t val; + + assert(phb); + assert(pd); + + /* If the MPS isn't acceptable one, bail immediately */ + mps = phb->mps; + if (mps < 128 || mps > 4096) + return 1; + + /* Retrieve PCIe and AER capability */ + ecap = pci_cap(pd, PCI_CFG_CAP_ID_EXP, false); + aercap = pci_cap(pd, PCIECAP_ID_AER, true); + + /* PCIe device always has MPS capacity */ + if (pd->mps) { + mps = ilog2(mps) - 7; + + pci_cfg_read16(phb, pd->bdfn, ecap + PCICAP_EXP_DEVCTL, &val); + val = SETFIELD(PCICAP_EXP_DEVCTL_MPS, val, mps); + pci_cfg_write16(phb, pd->bdfn, ecap + PCICAP_EXP_DEVCTL, val); + } + + /* Changing MPS on upstream PCI bridge might cause some error + * bits in PCIe and AER capability. To clear them to avoid + * confusion. + */ + if (aercap) { + pci_cfg_write32(phb, pd->bdfn, aercap + PCIECAP_AER_UE_STATUS, + 0xffffffff); + pci_cfg_write32(phb, pd->bdfn, aercap + PCIECAP_AER_CE_STATUS, + 0xffffffff); + } + if (ecap) + pci_cfg_write16(phb, pd->bdfn, ecap + PCICAP_EXP_DEVSTAT, 0xf); + + return 0; +} + +static void pci_disable_completion_timeout(struct phb *phb, struct pci_device *pd) +{ + uint32_t ecap, val; + uint16_t pcie_cap; + + /* PCIE capability required */ + if (!pci_has_cap(pd, PCI_CFG_CAP_ID_EXP, false)) + return; + + /* Check PCIe capability version */ + ecap = pci_cap(pd, PCI_CFG_CAP_ID_EXP, false); + pci_cfg_read16(phb, pd->bdfn, + ecap + PCICAP_EXP_CAPABILITY_REG, &pcie_cap); + if ((pcie_cap & PCICAP_EXP_CAP_VERSION) <= 1) + return; + + /* Check if it has capability to disable completion timeout */ + pci_cfg_read32(phb, pd->bdfn, ecap + PCIECAP_EXP_DCAP2, &val); + if (!(val & PCICAP_EXP_DCAP2_CMPTOUT_DIS)) + return; + + /* Disable completion timeout without more check */ + pci_cfg_read32(phb, pd->bdfn, ecap + PCICAP_EXP_DCTL2, &val); + val |= PCICAP_EXP_DCTL2_CMPTOUT_DIS; + pci_cfg_write32(phb, pd->bdfn, ecap + PCICAP_EXP_DCTL2, val); +} + +void pci_device_init(struct phb *phb, struct pci_device *pd) +{ + pci_configure_mps(phb, pd, NULL); + pci_disable_completion_timeout(phb, pd); +} + +static void pci_reset_phb(void *data) +{ + struct phb *phb = data; + struct pci_slot *slot = phb->slot; + int64_t rc; + + if (!slot || !slot->ops.run_sm) { + PCINOTICE(phb, 0, "Cannot issue reset\n"); + return; + } + + pci_slot_add_flags(slot, PCI_SLOT_FLAG_BOOTUP); + rc = slot->ops.run_sm(slot); + while (rc > 0) { + PCITRACE(phb, 0, "Waiting %ld ms\n", tb_to_msecs(rc)); + time_wait(rc); + rc = slot->ops.run_sm(slot); + } + pci_slot_remove_flags(slot, PCI_SLOT_FLAG_BOOTUP); + if (rc < 0) + PCIDBG(phb, 0, "Error %lld resetting\n", rc); +} + +static void pci_scan_phb(void *data) +{ + struct phb *phb = data; + struct pci_slot *slot = phb->slot; + uint8_t link; + uint32_t mps = 0xffffffff; + int64_t rc; + + if (!slot || !slot->ops.get_link_state) { + PCIERR(phb, 0, "Cannot query link status\n"); + link = 0; + } else { + rc = slot->ops.get_link_state(slot, &link); + if (rc != OPAL_SUCCESS) { + PCIERR(phb, 0, "Error %lld querying link status\n", + rc); + link = 0; + } + } + + if (!link) + PCIDBG(phb, 0, "Link down\n"); + else + PCIDBG(phb, 0, "Link up at x%d width\n", link); + + /* Scan root port and downstream ports if applicable */ + PCIDBG(phb, 0, "Scanning (upstream%s)...\n", + link ? "+downsteam" : " only"); + pci_scan_bus(phb, 0, 0xff, &phb->devices, NULL, link); + + /* Configure MPS (Max Payload Size) for PCIe domain */ + pci_walk_dev(phb, NULL, pci_get_mps, &mps); + phb->mps = mps; + pci_walk_dev(phb, NULL, pci_configure_mps, NULL); +} + +int64_t pci_register_phb(struct phb *phb, int opal_id) +{ + /* The user didn't specify an opal_id, allocate one */ + if (opal_id == OPAL_DYNAMIC_PHB_ID) { + /* This is called at init time in non-concurrent way, so no lock needed */ + for (opal_id = 0; opal_id < ARRAY_SIZE(phbs); opal_id++) + if (!phbs[opal_id]) + break; + if (opal_id >= ARRAY_SIZE(phbs)) { + prerror("PHB: Failed to find a free ID slot\n"); + return OPAL_RESOURCE; + } + } else { + if (opal_id >= ARRAY_SIZE(phbs)) { + prerror("PHB: ID %x out of range !\n", opal_id); + return OPAL_PARAMETER; + } + /* The user did specify an opal_id, check it's free */ + if (phbs[opal_id]) { + prerror("PHB: Duplicate registration of ID %x\n", opal_id); + return OPAL_PARAMETER; + } + } + + phbs[opal_id] = phb; + phb->opal_id = opal_id; + if (opal_id > last_phb_id) + last_phb_id = opal_id; + dt_add_property_cells(phb->dt_node, "ibm,opal-phbid", 0, phb->opal_id); + PCIDBG(phb, 0, "PCI: Registered PHB\n"); + + init_lock(&phb->lock); + list_head_init(&phb->devices); + + phb->filter_map = zalloc(BITMAP_BYTES(0x10000)); + assert(phb->filter_map); + + return OPAL_SUCCESS; +} + +int64_t pci_unregister_phb(struct phb *phb) +{ + /* XXX We want some kind of RCU or RWlock to make things + * like that happen while no OPAL callback is in progress, + * that way we avoid taking a lock in each of them. + * + * Right now we don't unregister so we are fine + */ + phbs[phb->opal_id] = phb; + + return OPAL_SUCCESS; +} + +struct phb *pci_get_phb(uint64_t phb_id) +{ + if (phb_id >= ARRAY_SIZE(phbs)) + return NULL; + + /* XXX See comment in pci_unregister_phb() about locking etc... */ + return phbs[phb_id]; +} + +static const char *pci_class_name(uint32_t class_code) +{ + uint8_t class = class_code >> 16; + uint8_t sub = (class_code >> 8) & 0xff; + uint8_t pif = class_code & 0xff; + + switch(class) { + case 0x00: + switch(sub) { + case 0x00: return "device"; + case 0x01: return "vga"; + } + break; + case 0x01: + switch(sub) { + case 0x00: return "scsi"; + case 0x01: return "ide"; + case 0x02: return "fdc"; + case 0x03: return "ipi"; + case 0x04: return "raid"; + case 0x05: return "ata"; + case 0x06: return "sata"; + case 0x07: return "sas"; + default: return "mass-storage"; + } + case 0x02: + switch(sub) { + case 0x00: return "ethernet"; + case 0x01: return "token-ring"; + case 0x02: return "fddi"; + case 0x03: return "atm"; + case 0x04: return "isdn"; + case 0x05: return "worldfip"; + case 0x06: return "picmg"; + default: return "network"; + } + case 0x03: + switch(sub) { + case 0x00: return "vga"; + case 0x01: return "xga"; + case 0x02: return "3d-controller"; + default: return "display"; + } + case 0x04: + switch(sub) { + case 0x00: return "video"; + case 0x01: return "sound"; + case 0x02: return "telephony"; + default: return "multimedia-device"; + } + case 0x05: + switch(sub) { + case 0x00: return "memory"; + case 0x01: return "flash"; + default: return "memory-controller"; + } + case 0x06: + switch(sub) { + case 0x00: return "host"; + case 0x01: return "isa"; + case 0x02: return "eisa"; + case 0x03: return "mca"; + case 0x04: return "pci"; + case 0x05: return "pcmcia"; + case 0x06: return "nubus"; + case 0x07: return "cardbus"; + case 0x08: return "raceway"; + case 0x09: return "semi-transparent-pci"; + case 0x0a: return "infiniband"; + default: return "unknown-bridge"; + } + case 0x07: + switch(sub) { + case 0x00: + switch(pif) { + case 0x01: return "16450-serial"; + case 0x02: return "16550-serial"; + case 0x03: return "16650-serial"; + case 0x04: return "16750-serial"; + case 0x05: return "16850-serial"; + case 0x06: return "16950-serial"; + default: return "serial"; + } + case 0x01: + switch(pif) { + case 0x01: return "bi-directional-parallel"; + case 0x02: return "ecp-1.x-parallel"; + case 0x03: return "ieee1284-controller"; + case 0xfe: return "ieee1284-device"; + default: return "parallel"; + } + case 0x02: return "multiport-serial"; + case 0x03: + switch(pif) { + case 0x01: return "16450-modem"; + case 0x02: return "16550-modem"; + case 0x03: return "16650-modem"; + case 0x04: return "16750-modem"; + default: return "modem"; + } + case 0x04: return "gpib"; + case 0x05: return "smart-card"; + default: return "communication-controller"; + } + case 0x08: + switch(sub) { + case 0x00: + switch(pif) { + case 0x01: return "isa-pic"; + case 0x02: return "eisa-pic"; + case 0x10: return "io-apic"; + case 0x20: return "iox-apic"; + default: return "interrupt-controller"; + } + case 0x01: + switch(pif) { + case 0x01: return "isa-dma"; + case 0x02: return "eisa-dma"; + default: return "dma-controller"; + } + case 0x02: + switch(pif) { + case 0x01: return "isa-system-timer"; + case 0x02: return "eisa-system-timer"; + default: return "timer"; + } + case 0x03: + switch(pif) { + case 0x01: return "isa-rtc"; + default: return "rtc"; + } + case 0x04: return "hotplug-controller"; + case 0x05: return "sd-host-controller"; + default: return "system-peripheral"; + } + case 0x09: + switch(sub) { + case 0x00: return "keyboard"; + case 0x01: return "pen"; + case 0x02: return "mouse"; + case 0x03: return "scanner"; + case 0x04: return "gameport"; + default: return "input-controller"; + } + case 0x0a: + switch(sub) { + case 0x00: return "clock"; + default: return "docking-station"; + } + case 0x0b: + switch(sub) { + case 0x00: return "386"; + case 0x01: return "486"; + case 0x02: return "pentium"; + case 0x10: return "alpha"; + case 0x20: return "powerpc"; + case 0x30: return "mips"; + case 0x40: return "co-processor"; + default: return "cpu"; + } + case 0x0c: + switch(sub) { + case 0x00: return "firewire"; + case 0x01: return "access-bus"; + case 0x02: return "ssa"; + case 0x03: + switch(pif) { + case 0x00: return "usb-uhci"; + case 0x10: return "usb-ohci"; + case 0x20: return "usb-ehci"; + case 0x30: return "usb-xhci"; + case 0xfe: return "usb-device"; + default: return "usb"; + } + case 0x04: return "fibre-channel"; + case 0x05: return "smb"; + case 0x06: return "infiniband"; + case 0x07: + switch(pif) { + case 0x00: return "impi-smic"; + case 0x01: return "impi-kbrd"; + case 0x02: return "impi-bltr"; + default: return "impi"; + } + case 0x08: return "secos"; + case 0x09: return "canbus"; + default: return "serial-bus"; + } + case 0x0d: + switch(sub) { + case 0x00: return "irda"; + case 0x01: return "consumer-ir"; + case 0x10: return "rf-controller"; + case 0x11: return "bluetooth"; + case 0x12: return "broadband"; + case 0x20: return "enet-802.11a"; + case 0x21: return "enet-802.11b"; + default: return "wireless-controller"; + } + case 0x0e: return "intelligent-controller"; + case 0x0f: + switch(sub) { + case 0x01: return "satellite-tv"; + case 0x02: return "satellite-audio"; + case 0x03: return "satellite-voice"; + case 0x04: return "satellite-data"; + default: return "satellite-device"; + } + case 0x10: + switch(sub) { + case 0x00: return "network-encryption"; + case 0x01: return "entertainment-encryption"; + default: return "encryption"; + } + case 0x011: + switch(sub) { + case 0x00: return "dpio"; + case 0x01: return "counter"; + case 0x10: return "measurement"; + case 0x20: return "management-card"; + default: return "data-processing"; + } + } + return "device"; +} + +void pci_std_swizzle_irq_map(struct dt_node *np, + struct pci_device *pd, + struct pci_lsi_state *lstate, + uint8_t swizzle) +{ + __be32 *p, *map; + int dev, irq, esize, edevcount; + size_t map_size; + + /* Some emulated setups don't use standard interrupts + * representation + */ + if (lstate->int_size == 0) + return; + + /* Calculate the size of a map entry: + * + * 3 cells : PCI Address + * 1 cell : PCI IRQ + * 1 cell : PIC phandle + * n cells : PIC irq (n = lstate->int_size) + * + * Assumption: PIC address is 0-size + */ + esize = 3 + 1 + 1 + lstate->int_size; + + /* Number of map "device" entries + * + * A PCI Express root or downstream port needs only one + * entry for device 0. Anything else will get a full map + * for all possible 32 child device numbers + * + * If we have been passed a host bridge (pd == NULL) we also + * do a simple per-pin map + */ + if (!pd || (pd->dev_type == PCIE_TYPE_ROOT_PORT || + pd->dev_type == PCIE_TYPE_SWITCH_DNPORT)) { + edevcount = 1; + dt_add_property_cells(np, "interrupt-map-mask", 0, 0, 0, 7); + } else { + edevcount = 32; + dt_add_property_cells(np, "interrupt-map-mask", + 0xf800, 0, 0, 7); + } + map_size = esize * edevcount * 4 * sizeof(u32); + map = p = zalloc(map_size); + if (!map) { + prerror("Failed to allocate interrupt-map-mask !\n"); + return; + } + + for (dev = 0; dev < edevcount; dev++) { + for (irq = 0; irq < 4; irq++) { + /* Calculate pin */ + size_t i; + uint32_t new_irq = (irq + dev + swizzle) % 4; + + /* PCI address portion */ + *(p++) = cpu_to_be32(dev << (8 + 3)); + *(p++) = 0; + *(p++) = 0; + + /* PCI interrupt portion */ + *(p++) = cpu_to_be32(irq + 1); + + /* Parent phandle */ + *(p++) = cpu_to_be32(lstate->int_parent[new_irq]); + + /* Parent desc */ + for (i = 0; i < lstate->int_size; i++) + *(p++) = cpu_to_be32(lstate->int_val[new_irq][i]); + } + } + + dt_add_property(np, "interrupt-map", map, map_size); + free(map); +} + +static void pci_add_loc_code(struct dt_node *np) +{ + struct dt_node *p; + const char *lcode = NULL; + + for (p = np->parent; p; p = p->parent) { + /* prefer slot-label by default */ + lcode = dt_prop_get_def(p, "ibm,slot-label", NULL); + if (lcode) + break; + + /* otherwise use the fully qualified location code */ + lcode = dt_prop_get_def(p, "ibm,slot-location-code", NULL); + if (lcode) + break; + } + + if (!lcode) + lcode = dt_prop_get_def(np, "ibm,slot-location-code", NULL); + + if (!lcode) { + /* Fall back to finding a ibm,loc-code */ + for (p = np->parent; p; p = p->parent) { + lcode = dt_prop_get_def(p, "ibm,loc-code", NULL); + if (lcode) + break; + } + } + + if (!lcode) + return; + + dt_add_property_string(np, "ibm,loc-code", lcode); +} + +static void pci_print_summary_line(struct phb *phb, struct pci_device *pd, + struct dt_node *np, u32 rev_class, + const char *cname) +{ + const char *label, *dtype, *s; +#define MAX_SLOTSTR 80 + char slotstr[MAX_SLOTSTR + 1] = { 0, }; + + /* If it's a slot, it has a slot-label */ + label = dt_prop_get_def(np, "ibm,slot-label", NULL); + if (label) { + u32 lanes = dt_prop_get_u32_def(np, "ibm,slot-wired-lanes", 0); + static const char *lanestrs[] = { + "", " x1", " x2", " x4", " x8", "x16", "x32", "32b", "64b" + }; + const char *lstr = lanes > PCI_SLOT_WIRED_LANES_PCIX_64 ? "" : lanestrs[lanes]; + snprintf(slotstr, MAX_SLOTSTR, "SLOT=%3s %s", label, lstr); + /* XXX Add more slot info */ + } else { + /* + * No label, ignore downstream switch legs and root complex, + * Those would essentially be non-populated + */ + if (pd->dev_type != PCIE_TYPE_ROOT_PORT && + pd->dev_type != PCIE_TYPE_SWITCH_DNPORT) { + /* It's a mere device, get loc code */ + s = dt_prop_get_def(np, "ibm,loc-code", NULL); + if (s) + snprintf(slotstr, MAX_SLOTSTR, "LOC_CODE=%s", s); + } + } + + if (pci_has_cap(pd, PCI_CFG_CAP_ID_EXP, false)) { + static const char *pcie_types[] = { + "EP ", "LGCY", "????", "????", "ROOT", "SWUP", "SWDN", + "ETOX", "XTOE", "RINT", "EVTC" }; + if (pd->dev_type >= ARRAY_SIZE(pcie_types)) + dtype = "????"; + else + dtype = pcie_types[pd->dev_type]; + } else + dtype = pd->is_bridge ? "PCIB" : "PCID"; + + if (pd->is_bridge) + PCINOTICE(phb, pd->bdfn, + "[%s] %04x %04x R:%02x C:%06x B:%02x..%02x %s\n", + dtype, PCI_VENDOR_ID(pd->vdid), + PCI_DEVICE_ID(pd->vdid), + rev_class & 0xff, rev_class >> 8, pd->secondary_bus, + pd->subordinate_bus, slotstr); + else + PCINOTICE(phb, pd->bdfn, + "[%s] %04x %04x R:%02x C:%06x (%14s) %s\n", + dtype, PCI_VENDOR_ID(pd->vdid), + PCI_DEVICE_ID(pd->vdid), + rev_class & 0xff, rev_class >> 8, cname, slotstr); +} + +static void __noinline pci_add_one_device_node(struct phb *phb, + struct pci_device *pd, + struct dt_node *parent_node, + struct pci_lsi_state *lstate, + uint8_t swizzle) +{ + struct dt_node *np; + const char *cname; +#define MAX_NAME 256 + char name[MAX_NAME]; + char compat[MAX_NAME]; + uint32_t rev_class; + uint8_t intpin; + bool is_pcie; + + pci_cfg_read32(phb, pd->bdfn, PCI_CFG_REV_ID, &rev_class); + pci_cfg_read8(phb, pd->bdfn, PCI_CFG_INT_PIN, &intpin); + is_pcie = pci_has_cap(pd, PCI_CFG_CAP_ID_EXP, false); + + /* + * Some IBM PHBs (p7ioc?) have an invalid PCI class code. Linux + * uses prefers to read the class code from the DT rather than + * re-reading config space we can hack around it here. + */ + if (is_pcie && pd->dev_type == PCIE_TYPE_ROOT_PORT) + rev_class = (rev_class & 0xff) | 0x6040000; + cname = pci_class_name(rev_class >> 8); + + if (PCI_FUNC(pd->bdfn)) + snprintf(name, MAX_NAME - 1, "%s@%x,%x", + cname, PCI_DEV(pd->bdfn), PCI_FUNC(pd->bdfn)); + else + snprintf(name, MAX_NAME - 1, "%s@%x", + cname, PCI_DEV(pd->bdfn)); + pd->dn = np = dt_new(parent_node, name); + + /* + * NB: ibm,pci-config-space-type is the PAPR way of indicating the + * device has a 4KB config space. It's got nothing to do with the + * standard Type 0/1 config spaces defined by PCI. + */ + if (is_pcie || phb->phb_type == phb_type_npu_v2_opencapi) { + snprintf(compat, MAX_NAME, "pciex%x,%x", + PCI_VENDOR_ID(pd->vdid), PCI_DEVICE_ID(pd->vdid)); + dt_add_property_cells(np, "ibm,pci-config-space-type", 1); + } else { + snprintf(compat, MAX_NAME, "pci%x,%x", + PCI_VENDOR_ID(pd->vdid), PCI_DEVICE_ID(pd->vdid)); + dt_add_property_cells(np, "ibm,pci-config-space-type", 0); + } + dt_add_property_cells(np, "class-code", rev_class >> 8); + dt_add_property_cells(np, "revision-id", rev_class & 0xff); + dt_add_property_cells(np, "vendor-id", PCI_VENDOR_ID(pd->vdid)); + dt_add_property_cells(np, "device-id", PCI_DEVICE_ID(pd->vdid)); + if (intpin) + dt_add_property_cells(np, "interrupts", intpin); + + pci_handle_quirk(phb, pd); + + /* XXX FIXME: Add a few missing ones such as + * + * - devsel-speed (!express) + * - max-latency + * - min-grant + * - subsystem-id + * - subsystem-vendor-id + * - ... + */ + + /* Add slot properties if needed and iff this is a bridge */ + if (pd->slot) + pci_slot_add_dt_properties(pd->slot, np); + + /* + * Use the phb base location code for root ports if the platform + * doesn't provide one via slot->add_properties() operation. + */ + if (pd->dev_type == PCIE_TYPE_ROOT_PORT && phb->base_loc_code && + !dt_has_node_property(np, "ibm,slot-location-code", NULL)) + dt_add_property_string(np, "ibm,slot-location-code", + phb->base_loc_code); + + /* Make up location code */ + if (platform.pci_add_loc_code) + platform.pci_add_loc_code(np, pd); + else + pci_add_loc_code(np); + + /* XXX FIXME: We don't look for BARs, we only put the config space + * entry in the "reg" property. That's enough for Linux and we might + * even want to make this legit in future ePAPR + */ + dt_add_property_cells(np, "reg", pd->bdfn << 8, 0, 0, 0, 0); + + /* Print summary info about the device */ + pci_print_summary_line(phb, pd, np, rev_class, cname); + if (!pd->is_bridge) + return; + + dt_add_property_cells(np, "#address-cells", 3); + dt_add_property_cells(np, "#size-cells", 2); + dt_add_property_cells(np, "#interrupt-cells", 1); + + /* We want "device_type" for bridges */ + if (is_pcie) + dt_add_property_string(np, "device_type", "pciex"); + else + dt_add_property_string(np, "device_type", "pci"); + + /* Update the current interrupt swizzling level based on our own + * device number + */ + swizzle = (swizzle + PCI_DEV(pd->bdfn)) & 3; + + /* We generate a standard-swizzling interrupt map. This is pretty + * big, we *could* try to be smarter for things that aren't hotplug + * slots at least and only populate those entries for which there's + * an actual children (especially on PCI Express), but for now that + * will do + */ + pci_std_swizzle_irq_map(np, pd, lstate, swizzle); + + /* Parts of the OF address translation in the kernel will fail to + * correctly translate a PCI address if translating a 1:1 mapping + * (ie. an empty ranges property). + * Instead add a ranges property that explicitly translates 1:1. + */ + dt_add_property_cells(np, "ranges", + /* 64-bit direct mapping. We know the bridges + * don't cover the entire address space so + * use 0xf00... as a good compromise. */ + 0x02000000, 0x0, 0x0, + 0x02000000, 0x0, 0x0, + 0xf0000000, 0x0); +} + +void __noinline pci_add_device_nodes(struct phb *phb, + struct list_head *list, + struct dt_node *parent_node, + struct pci_lsi_state *lstate, + uint8_t swizzle) +{ + struct pci_device *pd; + + /* Add all child devices */ + list_for_each(list, pd, link) { + pci_add_one_device_node(phb, pd, parent_node, + lstate, swizzle); + if (list_empty(&pd->children)) + continue; + + pci_add_device_nodes(phb, &pd->children, + pd->dn, lstate, swizzle); + } +} + +static void pci_do_jobs(void (*fn)(void *)) +{ + struct cpu_job **jobs; + int i; + + jobs = zalloc(sizeof(struct cpu_job *) * ARRAY_SIZE(phbs)); + assert(jobs); + for (i = 0; i < ARRAY_SIZE(phbs); i++) { + if (!phbs[i]) { + jobs[i] = NULL; + continue; + } + + jobs[i] = __cpu_queue_job(NULL, phbs[i]->dt_node->name, + fn, phbs[i], false); + assert(jobs[i]); + + } + + /* If no secondary CPUs, do everything sync */ + cpu_process_local_jobs(); + + /* Wait until all tasks are done */ + for (i = 0; i < ARRAY_SIZE(phbs); i++) { + if (!jobs[i]) + continue; + + cpu_wait_job(jobs[i], true); + } + free(jobs); +} + +static void __pci_init_slots(void) +{ + unsigned int i; + + /* Some PHBs may need that long to debounce the presence detect + * after HW initialization. + */ + for (i = 0; i < ARRAY_SIZE(phbs); i++) { + if (phbs[i]) { + time_wait_ms(20); + break; + } + } + + if (platform.pre_pci_fixup) + platform.pre_pci_fixup(); + + prlog(PR_NOTICE, "PCI: Resetting PHBs and training links...\n"); + pci_do_jobs(pci_reset_phb); + + prlog(PR_NOTICE, "PCI: Probing slots...\n"); + pci_do_jobs(pci_scan_phb); + + if (platform.pci_probe_complete) + platform.pci_probe_complete(); + + prlog(PR_NOTICE, "PCI Summary:\n"); + + for (i = 0; i < ARRAY_SIZE(phbs); i++) { + if (!phbs[i]) + continue; + + pci_add_device_nodes(phbs[i], &phbs[i]->devices, + phbs[i]->dt_node, &phbs[i]->lstate, 0); + } + + /* PHB final fixup */ + for (i = 0; i < ARRAY_SIZE(phbs); i++) { + if (!phbs[i] || !phbs[i]->ops || !phbs[i]->ops->phb_final_fixup) + continue; + + phbs[i]->ops->phb_final_fixup(phbs[i]); + } +} + +static void __pci_reset(struct list_head *list) +{ + struct pci_device *pd; + struct pci_cfg_reg_filter *pcrf; + int i; + + while ((pd = list_pop(list, struct pci_device, link)) != NULL) { + __pci_reset(&pd->children); + dt_free(pd->dn); + free(pd->slot); + while((pcrf = list_pop(&pd->pcrf, struct pci_cfg_reg_filter, link)) != NULL) { + free(pcrf); + } + for(i=0; i < 64; i++) + if (pd->cap[i].free_func) + pd->cap[i].free_func(pd->cap[i].data); + free(pd); + } +} + +int64_t pci_reset(void) +{ + unsigned int i; + + prlog(PR_NOTICE, "PCI: Clearing all devices...\n"); + + for (i = 0; i < ARRAY_SIZE(phbs); i++) { + struct phb *phb = phbs[i]; + if (!phb) + continue; + __pci_reset(&phb->devices); + + pci_slot_set_state(phb->slot, PCI_SLOT_STATE_CRESET_START); + } + + /* Do init and discovery of PCI slots in parallel */ + __pci_init_slots(); + + return 0; +} + +void pci_init_slots(void) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(phbs); i++) { + struct phb *phb = phbs[i]; + if (!phb) + continue; + pci_slot_set_state(phb->slot, PCI_SLOT_STATE_FRESET_POWER_OFF); + } + __pci_init_slots(); +} + +/* + * Complete iteration on current level before switching to + * child level, which is the proper order for restoring + * PCI bus range on bridges. + */ +static struct pci_device *__pci_walk_dev(struct phb *phb, + struct list_head *l, + int (*cb)(struct phb *, + struct pci_device *, + void *), + void *userdata) +{ + struct pci_device *pd, *child; + + if (list_empty(l)) + return NULL; + + list_for_each(l, pd, link) { + if (cb && cb(phb, pd, userdata)) + return pd; + } + + list_for_each(l, pd, link) { + child = __pci_walk_dev(phb, &pd->children, cb, userdata); + if (child) + return child; + } + + return NULL; +} + +struct pci_device *pci_walk_dev(struct phb *phb, + struct pci_device *pd, + int (*cb)(struct phb *, + struct pci_device *, + void *), + void *userdata) +{ + if (pd) + return __pci_walk_dev(phb, &pd->children, cb, userdata); + + return __pci_walk_dev(phb, &phb->devices, cb, userdata); +} + +static int __pci_find_dev(struct phb *phb, + struct pci_device *pd, void *userdata) +{ + uint16_t bdfn = *((uint16_t *)userdata); + + if (!phb || !pd) + return 0; + + if (pd->bdfn == bdfn) + return 1; + + return 0; +} + +struct pci_device *pci_find_dev(struct phb *phb, uint16_t bdfn) +{ + return pci_walk_dev(phb, NULL, __pci_find_dev, &bdfn); +} + +static int __pci_restore_bridge_buses(struct phb *phb, + struct pci_device *pd, + void *data __unused) +{ + uint32_t vdid; + + /* If the device is behind a switch, wait for the switch */ + if (!pd->is_vf && !(pd->bdfn & 7) && pd->parent != NULL && + pd->parent->dev_type == PCIE_TYPE_SWITCH_DNPORT) { + if (!pci_bridge_wait_link(phb, pd->parent, true)) { + PCIERR(phb, pd->bdfn, "Timeout waiting for switch\n"); + return -1; + } + } + + /* Wait for config space to stop returning CRS */ + if (!pci_wait_crs(phb, pd->bdfn, &vdid)) + return -1; + + /* Make all devices below a bridge "re-capture" the bdfn */ + pci_cfg_write32(phb, pd->bdfn, PCI_CFG_VENDOR_ID, vdid); + + if (!pd->is_bridge) + return 0; + + pci_cfg_write8(phb, pd->bdfn, PCI_CFG_PRIMARY_BUS, + pd->primary_bus); + pci_cfg_write8(phb, pd->bdfn, PCI_CFG_SECONDARY_BUS, + pd->secondary_bus); + pci_cfg_write8(phb, pd->bdfn, PCI_CFG_SUBORDINATE_BUS, + pd->subordinate_bus); + return 0; +} + +void pci_restore_bridge_buses(struct phb *phb, struct pci_device *pd) +{ + pci_walk_dev(phb, pd, __pci_restore_bridge_buses, NULL); +} + +void pci_restore_slot_bus_configs(struct pci_slot *slot) +{ + /* + * We might lose the bus numbers during the reset operation + * and we need to restore them. Otherwise, some adapters (e.g. + * IPR) can't be probed properly by the kernel. We don't need + * to restore bus numbers for every kind of reset, however, + * it's not harmful to always restore the bus numbers, which + * simplifies the logic. + */ + pci_restore_bridge_buses(slot->phb, slot->pd); + if (slot->phb->ops->device_init) + pci_walk_dev(slot->phb, slot->pd, + slot->phb->ops->device_init, NULL); +} + +struct pci_cfg_reg_filter *pci_find_cfg_reg_filter(struct pci_device *pd, + uint32_t start, uint32_t len) +{ + struct pci_cfg_reg_filter *pcrf; + + /* Check on the cached range, which contains holes */ + if ((start + len) <= pd->pcrf_start || + pd->pcrf_end <= start) + return NULL; + + list_for_each(&pd->pcrf, pcrf, link) { + if (start >= pcrf->start && + (start + len) <= (pcrf->start + pcrf->len)) + return pcrf; + } + + return NULL; +} + +static bool pci_device_has_cfg_reg_filters(struct phb *phb, uint16_t bdfn) +{ + return bitmap_tst_bit(*phb->filter_map, bdfn); +} + +int64_t pci_handle_cfg_filters(struct phb *phb, uint32_t bdfn, + uint32_t offset, uint32_t len, + uint32_t *data, bool write) +{ + struct pci_device *pd; + struct pci_cfg_reg_filter *pcrf; + uint32_t flags; + + if (!pci_device_has_cfg_reg_filters(phb, bdfn)) + return OPAL_PARTIAL; + pd = pci_find_dev(phb, bdfn); + pcrf = pd ? pci_find_cfg_reg_filter(pd, offset, len) : NULL; + if (!pcrf || !pcrf->func) + return OPAL_PARTIAL; + + flags = write ? PCI_REG_FLAG_WRITE : PCI_REG_FLAG_READ; + if ((pcrf->flags & flags) != flags) + return OPAL_PARTIAL; + + return pcrf->func(pd, pcrf, offset, len, data, write); +} + +struct pci_cfg_reg_filter *pci_add_cfg_reg_filter(struct pci_device *pd, + uint32_t start, uint32_t len, + uint32_t flags, + pci_cfg_reg_func func) +{ + struct pci_cfg_reg_filter *pcrf; + + pcrf = pci_find_cfg_reg_filter(pd, start, len); + if (pcrf) + return pcrf; + + pcrf = zalloc(sizeof(*pcrf) + ((len + 0x4) & ~0x3)); + if (!pcrf) + return NULL; + + /* Don't validate the flags so that the private flags + * can be supported for debugging purpose. + */ + pcrf->flags = flags; + pcrf->start = start; + pcrf->len = len; + pcrf->func = func; + pcrf->data = (uint8_t *)(pcrf + 1); + + if (start < pd->pcrf_start) + pd->pcrf_start = start; + if (pd->pcrf_end < (start + len)) + pd->pcrf_end = start + len; + list_add_tail(&pd->pcrf, &pcrf->link); + bitmap_set_bit(*pd->phb->filter_map, pd->bdfn); + + return pcrf; +} |