aboutsummaryrefslogtreecommitdiffstats
path: root/roms/skiboot/hw/occ.c
diff options
context:
space:
mode:
Diffstat (limited to 'roms/skiboot/hw/occ.c')
-rw-r--r--roms/skiboot/hw/occ.c2339
1 files changed, 2339 insertions, 0 deletions
diff --git a/roms/skiboot/hw/occ.c b/roms/skiboot/hw/occ.c
new file mode 100644
index 000000000..8d7bcbec9
--- /dev/null
+++ b/roms/skiboot/hw/occ.c
@@ -0,0 +1,2339 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Interface with the On Chip Controller,
+ * which enforces power and thermal management
+ *
+ * Copyright 2013-2019 IBM Corp.
+ */
+
+#include <skiboot.h>
+#include <xscom.h>
+#include <xscom-p8-regs.h>
+#include <io.h>
+#include <cpu.h>
+#include <chip.h>
+#include <mem_region.h>
+#include <timebase.h>
+#include <errorlog.h>
+#include <opal-api.h>
+#include <opal-msg.h>
+#include <timer.h>
+#include <i2c.h>
+#include <powercap.h>
+#include <psr.h>
+#include <sensor.h>
+#include <occ.h>
+#include <psi.h>
+
+/* OCC Communication Area for PStates */
+
+#define P8_HOMER_OPAL_DATA_OFFSET 0x1F8000
+#define P9_HOMER_OPAL_DATA_OFFSET 0x0E2000
+
+#define OPAL_DYNAMIC_DATA_OFFSET 0x0B80
+/* relative to HOMER_OPAL_DATA_OFFSET */
+
+#define MAX_PSTATES 256
+#define MAX_P8_CORES 12
+#define MAX_P9_CORES 24
+#define MAX_P10_CORES 32
+
+#define MAX_OPAL_CMD_DATA_LENGTH 4090
+#define MAX_OCC_RSP_DATA_LENGTH 8698
+
+#define P8_PIR_CORE_MASK 0xFFF8
+#define P9_PIR_QUAD_MASK 0xFFF0
+#define P10_PIR_CHIP_MASK 0x0000
+#define FREQ_MAX_IN_DOMAIN 0
+#define FREQ_MOST_RECENTLY_SET 1
+
+/**
+ * OCC-OPAL Shared Memory Region
+ *
+ * Reference document :
+ * https://github.com/open-power/docs/blob/master/occ/OCC_OpenPwr_FW_Interfaces.pdf
+ *
+ * Supported layout versions:
+ * - 0x01, 0x02 : P8
+ * https://github.com/open-power/occ/blob/master_p8/src/occ/proc/proc_pstate.h
+ *
+ * - 0x90 : P9
+ * https://github.com/open-power/occ/blob/master/src/occ_405/proc/proc_pstate.h
+ * In 0x90 the data is separated into :-
+ * -- Static Data (struct occ_pstate_table): Data is written once by OCC
+ * -- Dynamic Data (struct occ_dynamic_data): Data is updated at runtime
+ *
+ * struct occ_pstate_table - Pstate table layout
+ * @valid: Indicates if data is valid
+ * @version: Layout version [Major/Minor]
+ * @v2.throttle: Reason for limiting the max pstate
+ * @v9.occ_role: OCC role (Master/Slave)
+ * @v#.pstate_min: Minimum pstate ever allowed
+ * @v#.pstate_nom: Nominal pstate
+ * @v#.pstate_turbo: Maximum turbo pstate
+ * @v#.pstate_ultra_turbo: Maximum ultra turbo pstate and the maximum
+ * pstate ever allowed
+ * @v#.pstates: Pstate-id and frequency list from Pmax to Pmin
+ * @v#.pstates.id: Pstate-id
+ * @v#.pstates.flags: Pstate-flag(reserved)
+ * @v2.pstates.vdd: Voltage Identifier
+ * @v2.pstates.vcs: Voltage Identifier
+ * @v#.pstates.freq_khz: Frequency in KHz
+ * @v#.core_max[1..N]: Max pstate with N active cores
+ * @spare/reserved/pad: Unused data
+ */
+struct occ_pstate_table {
+ u8 valid;
+ u8 version;
+ union __packed {
+ struct __packed { /* Version 0x01 and 0x02 */
+ u8 throttle;
+ s8 pstate_min;
+ s8 pstate_nom;
+ s8 pstate_turbo;
+ s8 pstate_ultra_turbo;
+ u8 spare;
+ u64 reserved;
+ struct __packed {
+ s8 id;
+ u8 flags;
+ u8 vdd;
+ u8 vcs;
+ __be32 freq_khz;
+ } pstates[MAX_PSTATES];
+ s8 core_max[MAX_P8_CORES];
+ u8 pad[100];
+ } v2;
+ struct __packed { /* Version 0x90 */
+ u8 occ_role;
+ u8 pstate_min;
+ u8 pstate_nom;
+ u8 pstate_turbo;
+ u8 pstate_ultra_turbo;
+ u8 spare;
+ u64 reserved1;
+ u64 reserved2;
+ struct __packed {
+ u8 id;
+ u8 flags;
+ u16 reserved;
+ __be32 freq_khz;
+ } pstates[MAX_PSTATES];
+ u8 core_max[MAX_P9_CORES];
+ u8 pad[56];
+ } v9;
+ struct __packed { /* Version 0xA0 */
+ u8 occ_role;
+ u8 pstate_min;
+ u8 pstate_fixed_freq;
+ u8 pstate_base;
+ u8 pstate_ultra_turbo;
+ u8 pstate_fmax;
+ u8 minor;
+ u8 pstate_bottom_throttle;
+ u8 spare;
+ u8 spare1;
+ u32 reserved_32;
+ u64 reserved_64;
+ struct __packed {
+ u8 id;
+ u8 valid;
+ u16 reserved;
+ __be32 freq_khz;
+ } pstates[MAX_PSTATES];
+ u8 core_max[MAX_P10_CORES];
+ u8 pad[48];
+ } v10;
+ };
+} __packed;
+
+/**
+ * OPAL-OCC Command Response Interface
+ *
+ * OPAL-OCC Command Buffer
+ *
+ * ---------------------------------------------------------------------
+ * | OPAL | Cmd | OPAL | | Cmd Data | Cmd Data | OPAL |
+ * | Cmd | Request | OCC | Reserved | Length | Length | Cmd |
+ * | Flags | ID | Cmd | | (MSB) | (LSB) | Data... |
+ * ---------------------------------------------------------------------
+ * | ….OPAL Command Data up to max of Cmd Data Length 4090 bytes |
+ * | |
+ * ---------------------------------------------------------------------
+ *
+ * OPAL Command Flag
+ *
+ * -----------------------------------------------------------------
+ * | Bit 7 | Bit 6 | Bit 5 | Bit 4 | Bit 3 | Bit 2 | Bit 1 | Bit 0 |
+ * | (msb) | | | | | | | (lsb) |
+ * -----------------------------------------------------------------
+ * |Cmd | | | | | | | |
+ * |Ready | | | | | | | |
+ * -----------------------------------------------------------------
+ *
+ * struct opal_command_buffer - Defines the layout of OPAL command buffer
+ * @flag: Provides general status of the command
+ * @request_id: Token to identify request
+ * @cmd: Command sent
+ * @data_size: Command data length
+ * @data: Command specific data
+ * @spare: Unused byte
+ */
+struct opal_command_buffer {
+ u8 flag;
+ u8 request_id;
+ u8 cmd;
+ u8 spare;
+ u16 data_size;
+ u8 data[MAX_OPAL_CMD_DATA_LENGTH];
+} __packed;
+
+/**
+ * OPAL-OCC Response Buffer
+ *
+ * ---------------------------------------------------------------------
+ * | OCC | Cmd | OPAL | Response | Rsp Data | Rsp Data | OPAL |
+ * | Rsp | Request | OCC | Status | Length | Length | Rsp |
+ * | Flags | ID | Cmd | | (MSB) | (LSB) | Data... |
+ * ---------------------------------------------------------------------
+ * | ….OPAL Response Data up to max of Rsp Data Length 8698 bytes |
+ * | |
+ * ---------------------------------------------------------------------
+ *
+ * OCC Response Flag
+ *
+ * -----------------------------------------------------------------
+ * | Bit 7 | Bit 6 | Bit 5 | Bit 4 | Bit 3 | Bit 2 | Bit 1 | Bit 0 |
+ * | (msb) | | | | | | | (lsb) |
+ * -----------------------------------------------------------------
+ * | | | | | | |OCC in | Rsp |
+ * | | | | | | |progress|Ready |
+ * -----------------------------------------------------------------
+ *
+ * struct occ_response_buffer - Defines the layout of OCC response buffer
+ * @flag: Provides general status of the response
+ * @request_id: Token to identify request
+ * @cmd: Command requested
+ * @status: Indicates success/failure status of
+ * the command
+ * @data_size: Response data length
+ * @data: Response specific data
+ */
+struct occ_response_buffer {
+ u8 flag;
+ u8 request_id;
+ u8 cmd;
+ u8 status;
+ u16 data_size;
+ u8 data[MAX_OCC_RSP_DATA_LENGTH];
+} __packed;
+
+/**
+ * OCC-OPAL Shared Memory Interface Dynamic Data Vx90
+ *
+ * struct occ_dynamic_data - Contains runtime attributes
+ * @occ_state: Current state of OCC
+ * @major_version: Major version number
+ * @minor_version: Minor version number (backwards compatible)
+ * Version 1 indicates GPU presence populated
+ * @gpus_present: Bitmask of GPUs present (on systems where GPU
+ * presence is detected through APSS)
+ * @cpu_throttle: Reason for limiting the max pstate
+ * @mem_throttle: Reason for throttling memory
+ * @quick_pwr_drop: Indicates if QPD is asserted
+ * @pwr_shifting_ratio: Indicates the current percentage of power to
+ * take away from the CPU vs GPU when shifting
+ * power to maintain a power cap. Value of 100
+ * means take all power from CPU.
+ * @pwr_cap_type: Indicates type of power cap in effect
+ * @hard_min_pwr_cap: Hard minimum system power cap in Watts.
+ * Guaranteed unless hardware failure
+ * @max_pwr_cap: Maximum allowed system power cap in Watts
+ * @cur_pwr_cap: Current system power cap
+ * @soft_min_pwr_cap: Soft powercap minimum. OCC may or may not be
+ * able to maintain this
+ * @spare/reserved: Unused data
+ * @cmd: Opal Command Buffer
+ * @rsp: OCC Response Buffer
+ */
+struct occ_dynamic_data {
+ u8 occ_state;
+ u8 major_version;
+ u8 minor_version;
+ u8 gpus_present;
+ struct __packed { /* Version 0x90 */
+ u8 spare1;
+ } v9;
+ struct __packed { /* Version 0xA0 */
+ u8 wof_enabled;
+ } v10;
+ u8 cpu_throttle;
+ u8 mem_throttle;
+ u8 quick_pwr_drop;
+ u8 pwr_shifting_ratio;
+ u8 pwr_cap_type;
+ u16 hard_min_pwr_cap;
+ u16 max_pwr_cap;
+ u16 cur_pwr_cap;
+ u16 soft_min_pwr_cap;
+ u8 pad[110];
+ struct opal_command_buffer cmd;
+ struct occ_response_buffer rsp;
+} __packed;
+
+static bool occ_reset;
+static struct lock occ_lock = LOCK_UNLOCKED;
+static unsigned long homer_opal_data_offset;
+
+DEFINE_LOG_ENTRY(OPAL_RC_OCC_PSTATE_INIT, OPAL_PLATFORM_ERR_EVT, OPAL_OCC,
+ OPAL_CEC_HARDWARE, OPAL_INFO,
+ OPAL_NA);
+
+DEFINE_LOG_ENTRY(OPAL_RC_OCC_TIMEOUT, OPAL_PLATFORM_ERR_EVT, OPAL_OCC,
+ OPAL_CEC_HARDWARE, OPAL_UNRECOVERABLE_ERR_GENERAL,
+ OPAL_NA);
+
+/*
+ * POWER9 and newer platforms have pstate values which are unsigned
+ * positive values. They are continuous set of unsigned integers
+ * [0 to +N] where Pmax is 0 and Pmin is N. The linear ordering of
+ * pstates for P9 has changed compared to P8. Where P8 has negative
+ * pstate values advertised as [0 to -N] where Pmax is 0 and
+ * Pmin is -N. The following routine helps to abstract pstate
+ * comparison with pmax and perform sanity checks on pstate limits.
+ */
+
+/**
+ * cmp_pstates: Compares the given two pstates and determines which
+ * among them is associated with a higher pstate.
+ *
+ * @a,@b: The pstate ids of the pstates being compared.
+ *
+ * Returns: -1 : If pstate associated with @a is smaller than
+ * the pstate associated with @b.
+ * 0 : If pstates associated with @a and @b are equal.
+ * 1 : If pstate associated with @a is greater than
+ * the pstate associated with @b.
+ */
+static int cmp_pstates(int a, int b)
+{
+ /* P8 has 0 to -N (pmax to pmin), P9 has 0 to +N (pmax to pmin) */
+ if (a > b)
+ return (proc_gen == proc_gen_p8)? 1 : -1;
+ else if (a < b)
+ return (proc_gen == proc_gen_p8)? -1 : 1;
+
+ return 0;
+}
+
+static inline
+struct occ_pstate_table *get_occ_pstate_table(struct proc_chip *chip)
+{
+ return (struct occ_pstate_table *)
+ (chip->homer_base + homer_opal_data_offset);
+}
+
+static inline
+struct occ_dynamic_data *get_occ_dynamic_data(struct proc_chip *chip)
+{
+ return (struct occ_dynamic_data *)
+ (chip->homer_base + homer_opal_data_offset +
+ OPAL_DYNAMIC_DATA_OFFSET);
+}
+
+/*
+ * On Chips which have at least one active EX unit, check the
+ * HOMER area for pstate-table valid bit on versions 0x1 and 0x2, or
+ * HOMER dynamic area occ_state on version 0x90.
+ */
+static bool wait_for_all_occ_init(void)
+{
+ struct proc_chip *chip;
+ struct dt_node *xn;
+ struct occ_pstate_table *occ_data;
+ struct occ_dynamic_data *occ_dyn_data;
+ int tries;
+ uint64_t start_time, end_time;
+ uint32_t timeout = 0;
+
+ if (platform.occ_timeout)
+ timeout = platform.occ_timeout();
+
+ start_time = mftb();
+ for_each_chip(chip) {
+ u8 version;
+
+ /*
+ * If the chip doesn't any EX unit present, then OCC
+ * will not update the pstate-table. So, skip the
+ * check.
+ */
+ if (!chip->ex_present) {
+ prlog(PR_DEBUG, "OCC: Chip %02x has no active EX units. Skipping check\n",
+ chip->id);
+ continue;
+ }
+
+ /* Check for valid homer address */
+ if (!chip->homer_base) {
+ /**
+ * @fwts-label OCCInvalidHomerBase
+ * @fwts-advice The HOMER base address for a chip
+ * was not valid. This means that OCC (On Chip
+ * Controller) will be non-functional and CPU
+ * frequency scaling will not be functional. CPU may
+ * be set to a safe, low frequency. Power savings in
+ * CPU idle or CPU hotplug may be impacted.
+ */
+ prlog(PR_ERR,"OCC: Chip: %x homer_base is not valid\n",
+ chip->id);
+ return false;
+ }
+
+ /* Get PState table address */
+ occ_data = get_occ_pstate_table(chip);
+
+ /*
+ * Wait for the OCC to set an appropriate version bit.
+ * The wait is needed since on some platforms (such P8
+ * Tuletta), OCC is not loaded before OPAL boot. Hence
+ * initialization can take a while.
+ *
+ * Note: Checking for occ_data->version == (0x01/0x02/0x90/0xA0)
+ * is ok because we clear all of
+ * homer_base+size before passing memory to host
+ * services. This ensures occ_data->version == 0x0
+ * before OCC load.
+ */
+ tries = timeout * 10;
+ while (tries--) {
+ version = occ_data->version;
+
+ if (version == 0x01 || version == 0x02 ||
+ version == 0x90 || version == 0xA0)
+ break;
+
+ time_wait_ms(100);
+ }
+
+ version = occ_data->version;
+ switch (version) {
+ case 0x1:
+ case 0x2:
+ /*
+ * OCC-OPAL interface version 0x1 and 0x2 do not have
+ * the dynamic data. Hence the the only way to figure out
+ * if the OCC is up or not is to check the valid-bit
+ * in the pstate table.
+ */
+ if (occ_data->valid != 1) {
+ /**
+ * @fwts-label OCCInvalidPStateTable
+ * @fwts-advice The pstate table for a chip
+ * was not valid. This means that OCC (On Chip
+ * Controller) will be non-functional and CPU
+ * frequency scaling will not be functional. CPU may
+ * be set to a low, safe frequency. This means
+ * that CPU idle states and CPU frequency scaling
+ * may not be functional.
+ */
+ prlog(PR_ERR, "OCC: Chip: %x PState table is not valid\n",
+ chip->id);
+ return false;
+ }
+ break;
+
+ case 0x90:
+ /*
+ * OCC-OPAL interface version 0x90 has a
+ * dynamic data section. This has an
+ * occ_state field whose values inform about
+ * the state of the OCC.
+ *
+ * 0x00 = OCC not running. No communication
+ * allowed.
+ *
+ * 0x01 = Standby. No communication allowed.
+ *
+ * 0x02 = Observation State. Communication
+ * allowed and is command dependent.
+ *
+ * 0x03 = Active State. Communication allowed
+ * and is command dependent.
+ *
+ * 0x04 = Safe State. No communication
+ * allowed. Just like CPU throttle
+ * status, some failures will not allow
+ * for OCC to update state to safe.
+ *
+ * 0x05 = Characterization State.
+ * Communication allowed and is command
+ * dependent.
+ *
+ * We will error out if OCC is not in the
+ * Active State.
+ *
+ * XXX : Should we error out only if no
+ * communication is allowed with the
+ * OCC ?
+ */
+ occ_dyn_data = get_occ_dynamic_data(chip);
+ if (occ_dyn_data->occ_state != 0x3) {
+ /**
+ * @fwts-label OCCInactive
+ * @fwts-advice The OCC for a chip was not active.
+ * This means that CPU frequency scaling will
+ * not be functional. CPU may be set to a low,
+ * safe frequency. This means that CPU idle
+ * states and CPU frequency scaling may not be
+ * functional.
+ */
+ prlog(PR_ERR, "OCC: Chip: %x: OCC not active\n",
+ chip->id);
+ return false;
+ }
+ break;
+
+ case 0xA0:
+ /*
+ * OCC-OPAL interface version 0x90 has a
+ * dynamic data section. This has an
+ * occ_state field whose values inform about
+ * the state of the OCC.
+ *
+ * 0x00 = OCC not running. No communication
+ * allowed.
+ *
+ * 0x01 = Standby. No communication allowed.
+ *
+ * 0x02 = Observation State. Communication
+ * allowed and is command dependent.
+ *
+ * 0x03 = Active State. Communication allowed
+ * and is command dependent.
+ *
+ * 0x04 = Safe State. No communication
+ * allowed. Just like CPU throttle
+ * status, some failures will not allow
+ * for OCC to update state to safe.
+ *
+ * 0x05 = Characterization State.
+ * Communication allowed and is command
+ * dependent.
+ *
+ * We will error out if OCC is not in the
+ * Active State.
+ *
+ * XXX : Should we error out only if no
+ * communication is allowed with the
+ * OCC ?
+ */
+ occ_dyn_data = get_occ_dynamic_data(chip);
+ if (occ_dyn_data->occ_state != 0x3) {
+ /**
+ * @fwts-label OCCInactive
+ * @fwts-advice The OCC for a chip was not active.
+ * This means that CPU frequency scaling will
+ * not be functional. CPU may be set to a low,
+ * safe frequency. This means that CPU idle
+ * states and CPU frequency scaling may not be
+ * functional.
+ */
+ prlog(PR_ERR, "OCC: Chip: %x: OCC not active\n",
+ chip->id);
+ return false;
+ }
+ break;
+
+ default:
+ prlog(PR_ERR, "OCC: Unknown OCC-OPAL interface version.\n");
+ return false;
+ }
+
+ if (!chip->occ_functional)
+ chip->occ_functional = true;
+
+ prlog(PR_DEBUG, "OCC: Chip %02x Data (%016llx) = %016llx\n",
+ chip->id, (uint64_t)occ_data, be64_to_cpu(*(__be64 *)occ_data));
+
+ if (version == 0x90 || version == 0xA0) {
+ occ_dyn_data = get_occ_dynamic_data(chip);
+ prlog(PR_DEBUG, "OCC: Chip %02x Dynamic Data (%016llx) = %016llx\n",
+ chip->id, (uint64_t)occ_dyn_data,
+ be64_to_cpu(*(__be64 *)occ_dyn_data));
+ }
+ }
+
+ end_time = mftb();
+ prlog(PR_NOTICE, "OCC: All Chip Rdy after %lu ms\n",
+ tb_to_msecs(end_time - start_time));
+
+ dt_for_each_compatible(dt_root, xn, "ibm,xscom") {
+ const struct dt_property *p;
+ p = dt_find_property(xn, "ibm,occ-functional-state");
+ if (!p)
+ dt_add_property_cells(xn, "ibm,occ-functional-state",
+ 0x1);
+ }
+ return true;
+}
+
+/*
+ * OCC provides pstate table entries in continuous descending order.
+ * Parse the pstate table to skip pstate_ids that are greater
+ * than Pmax. If a pstate_id is equal to Pmin then add it to
+ * the list and break from the loop as this is the last valid
+ * element in the pstate table.
+ */
+static void parse_pstates_v2(struct occ_pstate_table *data, __be32 *dt_id,
+ __be32 *dt_freq, int nr_pstates, int pmax, int pmin)
+{
+ int i, j;
+
+ for (i = 0, j = 0; i < MAX_PSTATES && j < nr_pstates; i++) {
+ if (cmp_pstates(data->v2.pstates[i].id, pmax) > 0)
+ continue;
+
+ dt_id[j] = cpu_to_be32(data->v2.pstates[i].id);
+ dt_freq[j] = cpu_to_be32(be32_to_cpu(data->v2.pstates[i].freq_khz) / 1000);
+ j++;
+
+ if (data->v2.pstates[i].id == pmin)
+ break;
+ }
+
+ if (j != nr_pstates)
+ prerror("OCC: Expected pstates(%d) is not equal to parsed pstates(%d)\n",
+ nr_pstates, j);
+}
+
+static void parse_pstates_v9(struct occ_pstate_table *data, __be32 *dt_id,
+ __be32 *dt_freq, int nr_pstates, int pmax, int pmin)
+{
+ int i, j;
+
+ for (i = 0, j = 0; i < MAX_PSTATES && j < nr_pstates; i++) {
+ if (cmp_pstates(data->v9.pstates[i].id, pmax) > 0)
+ continue;
+
+ dt_id[j] = cpu_to_be32(data->v9.pstates[i].id);
+ dt_freq[j] = cpu_to_be32(be32_to_cpu(data->v9.pstates[i].freq_khz) / 1000);
+ j++;
+
+ if (data->v9.pstates[i].id == pmin)
+ break;
+ }
+
+ if (j != nr_pstates)
+ prerror("OCC: Expected pstates(%d) is not equal to parsed pstates(%d)\n",
+ nr_pstates, j);
+}
+
+static void parse_pstates_v10(struct occ_pstate_table *data, __be32 *dt_id,
+ __be32 *dt_freq, int nr_pstates, int pmax, int pmin)
+{
+ int i, j;
+ int invalid = 0;
+
+ for (i = 0, j = 0; i < MAX_PSTATES && j < nr_pstates; i++) {
+ if (cmp_pstates(data->v10.pstates[i].id, pmax) > 0)
+ continue;
+
+ if (!data->v10.pstates[i].valid) {
+ prlog(PR_WARNING, "OCC: Found Invalid pstate with index %d. Skipping it.\n", i);
+ invalid++;
+ continue;
+ }
+
+ dt_id[j] = cpu_to_be32(data->v10.pstates[i].id);
+ dt_freq[j] = cpu_to_be32(be32_to_cpu(data->v10.pstates[i].freq_khz) / 1000);
+ j++;
+
+ if (data->v10.pstates[i].id == pmin)
+ break;
+ }
+
+ if ((j + invalid) != nr_pstates) {
+ prerror("OCC: Expected pstates(%d) not equal to (Parsed pstates(%d) + Invalid Pstates (%d))\n",
+ nr_pstates, j, invalid);
+ }
+}
+
+static void parse_vid(struct occ_pstate_table *occ_data,
+ struct dt_node *node, u8 nr_pstates,
+ int pmax, int pmin)
+{
+ u8 *dt_vdd, *dt_vcs;
+ int i, j;
+
+ dt_vdd = malloc(nr_pstates);
+ assert(dt_vdd);
+ dt_vcs = malloc(nr_pstates);
+ assert(dt_vcs);
+
+ for (i = 0, j = 0; i < MAX_PSTATES && j < nr_pstates; i++) {
+ if (cmp_pstates(occ_data->v2.pstates[i].id, pmax) > 0)
+ continue;
+
+ dt_vdd[j] = occ_data->v2.pstates[i].vdd;
+ dt_vcs[j] = occ_data->v2.pstates[i].vcs;
+ j++;
+
+ if (occ_data->v2.pstates[i].id == pmin)
+ break;
+ }
+
+ dt_add_property(node, "ibm,pstate-vdds", dt_vdd, nr_pstates);
+ dt_add_property(node, "ibm,pstate-vcss", dt_vcs, nr_pstates);
+
+ free(dt_vdd);
+ free(dt_vcs);
+}
+
+/* Add device tree properties to describe pstates states */
+/* Return nominal pstate to set in each core */
+static bool add_cpu_pstate_properties(struct dt_node *power_mgt,
+ int *pstate_nom)
+{
+ struct proc_chip *chip;
+ uint64_t occ_data_area;
+ struct occ_pstate_table *occ_data = NULL;
+ struct occ_dynamic_data *occ_dyn_data;
+ /* Arrays for device tree */
+ __be32 *dt_id, *dt_freq;
+ int pmax, pmin, pnom;
+ u8 nr_pstates;
+ bool ultra_turbo_supported;
+ int i, major, minor;
+
+ prlog(PR_DEBUG, "OCC: CPU pstate state device tree init\n");
+
+ /*
+ * Find first chip with an OCC which has as a valid
+ * pstate-table
+ */
+ for_each_chip(chip) {
+ occ_data = get_occ_pstate_table(chip);
+
+ /* Dump first 16 bytes of PState table */
+ occ_data_area = (uint64_t)occ_data;
+ prlog(PR_DEBUG, "OCC: Chip %02d :Data (%16llx) = %16llx %16llx\n",
+ chip->id, occ_data_area,
+ be64_to_cpu(*(__be64 *)occ_data_area),
+ be64_to_cpu(*(__be64 *)(occ_data_area + 8)));
+
+ if (occ_data->valid)
+ break;
+ /*
+ * XXX : Error out if !occ_data->valid but Chip has at
+ * least one EX Unit?
+ */
+ }
+
+ assert(occ_data);
+ if (!occ_data->valid) {
+ /**
+ * @fwts-label OCCInvalidPStateTableDT
+ * @fwts-advice The pstate tables for none of the chips
+ * are valid. This means that OCC (On Chip
+ * Controller) will be non-functional. This means
+ * that CPU idle states and CPU frequency scaling
+ * will not be functional as OPAL doesn't populate
+ * the device tree with pstates in this case.
+ */
+ prlog(PR_ERR, "OCC: PState table is not valid\n");
+ return false;
+ }
+
+ /*
+ * Workload-Optimized-Frequency(WOF) or Ultra-Turbo is supported
+ * from version 0x02 onwards. If WOF is disabled then, the max
+ * ultra_turbo pstate will be equal to max turbo pstate.
+ */
+ ultra_turbo_supported = true;
+
+ major = occ_data->version >> 4;
+ minor = occ_data->version & 0xF;
+
+ /* Parse Pmax, Pmin and Pnominal */
+ switch (major) {
+ case 0:
+ if (proc_gen >= proc_gen_p9) {
+ /**
+ * @fwts-label OCCInvalidVersion02
+ * @fwts-advice The PState table layout version is not
+ * supported in P9. So OPAL will not parse the PState
+ * table. CPU frequency scaling will not be functional
+ * as frequency and pstate-ids are not added to DT.
+ */
+ prerror("OCC: Version %x is not supported in P9\n",
+ occ_data->version);
+ return false;
+ }
+ if (minor == 0x1)
+ ultra_turbo_supported = false;
+ pmin = occ_data->v2.pstate_min;
+ pnom = occ_data->v2.pstate_nom;
+ if (ultra_turbo_supported)
+ pmax = occ_data->v2.pstate_ultra_turbo;
+ else
+ pmax = occ_data->v2.pstate_turbo;
+ break;
+ case 0x9:
+ if (proc_gen == proc_gen_p8) {
+ /**
+ * @fwts-label OCCInvalidVersion90
+ * @fwts-advice The PState table layout version is not
+ * supported in P8. So OPAL will not parse the PState
+ * table. CPU frequency scaling will not be functional
+ * as frequency and pstate-ids are not added to DT.
+ */
+ prerror("OCC: Version %x is not supported in P8\n",
+ occ_data->version);
+ return false;
+ }
+ pmin = occ_data->v9.pstate_min;
+ pnom = occ_data->v9.pstate_nom;
+ pmax = occ_data->v9.pstate_ultra_turbo;
+ break;
+ case 0xA:
+ pmin = occ_data->v10.pstate_min;
+ pnom = occ_data->v10.pstate_fixed_freq;
+ occ_dyn_data = get_occ_dynamic_data(chip);
+ if (occ_dyn_data->v10.wof_enabled)
+ pmax = occ_data->v10.pstate_ultra_turbo;
+ else
+ pmax = occ_data->v10.pstate_fmax;
+ break;
+ default:
+ /**
+ * @fwts-label OCCUnsupportedVersion
+ * @fwts-advice The PState table layout version is not
+ * supported. So OPAL will not parse the PState table.
+ * CPU frequency scaling will not be functional as OPAL
+ * doesn't populate the device tree with pstates.
+ */
+ prerror("OCC: Unsupported pstate table layout version %d\n",
+ occ_data->version);
+ return false;
+ }
+
+ /* Sanity check for pstate limits */
+ if (cmp_pstates(pmin, pmax) > 0) {
+ /**
+ * @fwts-label OCCInvalidPStateLimits
+ * @fwts-advice The min pstate is greater than the
+ * max pstate, this could be due to corrupted/invalid
+ * data in OCC-OPAL shared memory region. So OPAL has
+ * not added pstates to device tree. This means that
+ * CPU Frequency management will not be functional in
+ * the host.
+ */
+ prerror("OCC: Invalid pstate limits. Pmin(%d) > Pmax (%d)\n",
+ pmin, pmax);
+ return false;
+ }
+
+ if (cmp_pstates(pnom, pmax) > 0) {
+ /**
+ * @fwts-label OCCInvalidNominalPState
+ * @fwts-advice The nominal pstate is greater than the
+ * max pstate, this could be due to corrupted/invalid
+ * data in OCC-OPAL shared memory region. So OPAL has
+ * limited the nominal pstate to max pstate.
+ */
+ prerror("OCC: Clipping nominal pstate(%d) to Pmax(%d)\n",
+ pnom, pmax);
+ pnom = pmax;
+ }
+
+ nr_pstates = labs(pmax - pmin) + 1;
+ prlog(PR_DEBUG, "OCC: Version %x Min %d Nom %d Max %d Nr States %d\n",
+ occ_data->version, pmin, pnom, pmax, nr_pstates);
+ if (((major == 0x9 || major == 0xA) && nr_pstates <= 1) ||
+ (major == 0 && (nr_pstates <= 1 || nr_pstates > 128))) {
+ /**
+ * @fwts-label OCCInvalidPStateRange
+ * @fwts-advice The number of pstates is outside the valid
+ * range (currently <=1 or > 128 on p8, >255 on P9), so OPAL
+ * has not added pstates to the device tree. This means that
+ * OCC (On Chip Controller) will be non-functional. This means
+ * that CPU idle states and CPU frequency scaling
+ * will not be functional.
+ */
+ prerror("OCC: OCC range is not valid; No of pstates = %d\n",
+ nr_pstates);
+ return false;
+ }
+
+ dt_id = malloc(nr_pstates * sizeof(__be32));
+ assert(dt_id);
+ dt_freq = malloc(nr_pstates * sizeof(__be32));
+ assert(dt_freq);
+
+ switch (major) {
+ case 0:
+ parse_pstates_v2(occ_data, dt_id, dt_freq, nr_pstates,
+ pmax, pmin);
+ break;
+ case 0x9:
+ parse_pstates_v9(occ_data, dt_id, dt_freq, nr_pstates,
+ pmax, pmin);
+ break;
+ case 0xA:
+ parse_pstates_v10(occ_data, dt_id, dt_freq, nr_pstates,
+ pmax, pmin);
+ break;
+ default:
+ return false;
+ }
+
+ /* Add the device-tree entries */
+ dt_add_property(power_mgt, "ibm,pstate-ids", dt_id,
+ nr_pstates * sizeof(__be32));
+ dt_add_property(power_mgt, "ibm,pstate-frequencies-mhz", dt_freq,
+ nr_pstates * sizeof(__be32));
+ dt_add_property_cells(power_mgt, "ibm,pstate-min", pmin);
+ dt_add_property_cells(power_mgt, "ibm,pstate-nominal", pnom);
+ dt_add_property_cells(power_mgt, "ibm,pstate-max", pmax);
+
+ free(dt_freq);
+ free(dt_id);
+
+ /*
+ * Parse and add WOF properties: turbo, ultra-turbo and core_max array.
+ * core_max[1..n] array provides the max sustainable pstate that can be
+ * achieved with i active cores in the chip.
+ */
+ if (ultra_turbo_supported) {
+ int pturbo, pultra_turbo;
+ u8 nr_cores = get_available_nr_cores_in_chip(chip->id);
+ __be32 *dt_cmax;
+
+ dt_cmax = malloc(nr_cores * sizeof(u32));
+ assert(dt_cmax);
+ switch (major) {
+ case 0:
+ pturbo = occ_data->v2.pstate_turbo;
+ pultra_turbo = occ_data->v2.pstate_ultra_turbo;
+ for (i = 0; i < nr_cores; i++)
+ dt_cmax[i] = cpu_to_be32(occ_data->v2.core_max[i]);
+ break;
+ case 0x9:
+ pturbo = occ_data->v9.pstate_turbo;
+ pultra_turbo = occ_data->v9.pstate_ultra_turbo;
+ for (i = 0; i < nr_cores; i++)
+ dt_cmax[i] = cpu_to_be32(occ_data->v9.core_max[i]);
+ break;
+ case 0xA:
+ pturbo = occ_data->v10.pstate_base;
+ pultra_turbo = occ_data->v10.pstate_ultra_turbo;
+ for (i = 0; i < nr_cores; i++)
+ dt_cmax[i] = cpu_to_be32(occ_data->v10.core_max[i]);
+ break;
+ default:
+ return false;
+ }
+
+ if (cmp_pstates(pturbo, pmax) > 0) {
+ prerror("OCC: Clipping turbo pstate(%d) to Pmax(%d)\n",
+ pturbo, pmax);
+ dt_add_property_cells(power_mgt, "ibm,pstate-turbo",
+ pmax);
+ } else {
+ dt_add_property_cells(power_mgt, "ibm,pstate-turbo",
+ pturbo);
+ }
+
+ dt_add_property_cells(power_mgt, "ibm,pstate-ultra-turbo",
+ pultra_turbo);
+ dt_add_property(power_mgt, "ibm,pstate-core-max", dt_cmax,
+ nr_cores * sizeof(u32));
+
+ dt_add_property_cells(power_mgt, "ibm,pstate-base", pturbo);
+ free(dt_cmax);
+ }
+
+ if (major == 0x9 || major == 0xA)
+ goto out;
+
+ dt_add_property_cells(power_mgt, "#address-cells", 2);
+ dt_add_property_cells(power_mgt, "#size-cells", 1);
+
+ /* Add chip specific pstate properties */
+ for_each_chip(chip) {
+ struct dt_node *occ_node;
+
+ occ_data = get_occ_pstate_table(chip);
+ occ_node = dt_new_addr(power_mgt, "occ", (uint64_t)occ_data);
+ if (!occ_node) {
+ /**
+ * @fwts-label OCCDTFailedNodeCreation
+ * @fwts-advice Failed to create
+ * /ibm,opal/power-mgt/occ. Per-chip pstate properties
+ * are not added to Device Tree.
+ */
+ prerror("OCC: Failed to create /ibm,opal/power-mgt/occ@%llx\n",
+ (uint64_t)occ_data);
+ return false;
+ }
+
+ dt_add_property_cells(occ_node, "reg",
+ hi32((uint64_t)occ_data),
+ lo32((uint64_t)occ_data),
+ OPAL_DYNAMIC_DATA_OFFSET +
+ sizeof(struct occ_dynamic_data));
+ dt_add_property_cells(occ_node, "ibm,chip-id", chip->id);
+
+ /*
+ * Parse and add pstate Voltage Identifiers (VID) to DT which
+ * are provided by OCC in version 0x01 and 0x02
+ */
+ parse_vid(occ_data, occ_node, nr_pstates, pmax, pmin);
+ }
+out:
+ /* Return pstate to set for each core */
+ *pstate_nom = pnom;
+ return true;
+}
+
+/*
+ * Prepare chip for pstate transitions
+ */
+
+static bool cpu_pstates_prepare_core(struct proc_chip *chip,
+ struct cpu_thread *c,
+ int pstate_nom)
+{
+ uint32_t core = pir_to_core_id(c->pir);
+ uint64_t tmp, pstate;
+ int rc;
+
+ /*
+ * Currently Fastsleep init clears EX_PM_SPR_OVERRIDE_EN.
+ * Need to ensure only relevant bits are inited
+ */
+
+ /* Init PM GP1 for SCOM based PSTATE control to set nominal freq
+ *
+ * Use the OR SCOM to set the required bits in PM_GP1 register
+ * since the OCC might be mainpulating the PM_GP1 register as well.
+ */
+ rc = xscom_write(chip->id, XSCOM_ADDR_P8_EX_SLAVE(core, EX_PM_SET_GP1),
+ EX_PM_SETUP_GP1_PM_SPR_OVERRIDE_EN);
+ if (rc) {
+ log_simple_error(&e_info(OPAL_RC_OCC_PSTATE_INIT),
+ "OCC: Failed to write PM_GP1 in pstates init\n");
+ return false;
+ }
+
+ /* Set new pstate to core */
+ rc = xscom_read(chip->id, XSCOM_ADDR_P8_EX_SLAVE(core, EX_PM_PPMCR), &tmp);
+ if (rc) {
+ log_simple_error(&e_info(OPAL_RC_OCC_PSTATE_INIT),
+ "OCC: Failed to read PM_PPMCR from OCC in pstates init\n");
+ return false;
+ }
+ tmp = tmp & ~0xFFFF000000000000ULL;
+ pstate = ((uint64_t) pstate_nom) & 0xFF;
+ tmp = tmp | (pstate << 56) | (pstate << 48);
+ rc = xscom_write(chip->id, XSCOM_ADDR_P8_EX_SLAVE(core, EX_PM_PPMCR), tmp);
+ if (rc) {
+ log_simple_error(&e_info(OPAL_RC_OCC_PSTATE_INIT),
+ "OCC: Failed to write PM_PPMCR in pstates init\n");
+ return false;
+ }
+ time_wait_ms(1); /* Wait for PState to change */
+ /*
+ * Init PM GP1 for SPR based PSTATE control.
+ * Once OCC is active EX_PM_SETUP_GP1_DPLL_FREQ_OVERRIDE_EN will be
+ * cleared by OCC. Sapphire need not clear.
+ * However wait for DVFS state machine to become idle after min->nominal
+ * transition initiated above. If not switch over to SPR control could fail.
+ *
+ * Use the AND SCOM to clear the required bits in PM_GP1 register
+ * since the OCC might be mainpulating the PM_GP1 register as well.
+ */
+ tmp = ~EX_PM_SETUP_GP1_PM_SPR_OVERRIDE_EN;
+ rc = xscom_write(chip->id, XSCOM_ADDR_P8_EX_SLAVE(core, EX_PM_CLEAR_GP1),
+ tmp);
+ if (rc) {
+ log_simple_error(&e_info(OPAL_RC_OCC_PSTATE_INIT),
+ "OCC: Failed to write PM_GP1 in pstates init\n");
+ return false;
+ }
+
+ /* Just debug */
+ rc = xscom_read(chip->id, XSCOM_ADDR_P8_EX_SLAVE(core, EX_PM_PPMSR), &tmp);
+ if (rc) {
+ log_simple_error(&e_info(OPAL_RC_OCC_PSTATE_INIT),
+ "OCC: Failed to read PM_PPMSR from OCC"
+ "in pstates init\n");
+ return false;
+ }
+ prlog(PR_DEBUG, "OCC: Chip %x Core %x PPMSR %016llx\n",
+ chip->id, core, tmp);
+
+ /*
+ * If PMSR is still in transition at this point due to PState change
+ * initiated above, then the switchover to SPR may not work.
+ * ToDo: Check for DVFS state machine idle before change.
+ */
+
+ return true;
+}
+
+static bool occ_opal_msg_outstanding = false;
+static void occ_msg_consumed(void *data __unused, int status __unused)
+{
+ lock(&occ_lock);
+ occ_opal_msg_outstanding = false;
+ unlock(&occ_lock);
+}
+
+static inline u8 get_cpu_throttle(struct proc_chip *chip)
+{
+ struct occ_pstate_table *pdata = get_occ_pstate_table(chip);
+ struct occ_dynamic_data *data;
+
+ switch (pdata->version >> 4) {
+ case 0:
+ return pdata->v2.throttle;
+ case 0x9:
+ case 0xA:
+ data = get_occ_dynamic_data(chip);
+ return data->cpu_throttle;
+ default:
+ return 0;
+ };
+}
+
+bool is_occ_reset(void)
+{
+ return occ_reset;
+}
+
+static void occ_throttle_poll(void *data __unused)
+{
+ struct proc_chip *chip;
+ struct occ_pstate_table *occ_data;
+ struct opal_occ_msg occ_msg;
+ int rc;
+
+ if (!try_lock(&occ_lock))
+ return;
+ if (occ_reset) {
+ int inactive = 0;
+
+ for_each_chip(chip) {
+ occ_data = get_occ_pstate_table(chip);
+ if (occ_data->valid != 1) {
+ inactive = 1;
+ break;
+ }
+ }
+ if (!inactive) {
+ /*
+ * Queue OCC_THROTTLE with throttle status as 0 to
+ * indicate all OCCs are active after a reset.
+ */
+ occ_msg.type = cpu_to_be64(OCC_THROTTLE);
+ occ_msg.chip = 0;
+ occ_msg.throttle_status = 0;
+ rc = _opal_queue_msg(OPAL_MSG_OCC, NULL, NULL,
+ sizeof(struct opal_occ_msg),
+ &occ_msg);
+ if (!rc)
+ occ_reset = false;
+ }
+ } else {
+ if (occ_opal_msg_outstanding)
+ goto done;
+ for_each_chip(chip) {
+ u8 throttle;
+
+ occ_data = get_occ_pstate_table(chip);
+ throttle = get_cpu_throttle(chip);
+ if ((occ_data->valid == 1) &&
+ (chip->throttle != throttle) &&
+ (throttle <= OCC_MAX_THROTTLE_STATUS)) {
+ occ_msg.type = cpu_to_be64(OCC_THROTTLE);
+ occ_msg.chip = cpu_to_be64(chip->id);
+ occ_msg.throttle_status = cpu_to_be64(throttle);
+ rc = _opal_queue_msg(OPAL_MSG_OCC, NULL,
+ occ_msg_consumed,
+ sizeof(struct opal_occ_msg),
+ &occ_msg);
+ if (!rc) {
+ chip->throttle = throttle;
+ occ_opal_msg_outstanding = true;
+ break;
+ }
+ }
+ }
+ }
+done:
+ unlock(&occ_lock);
+}
+
+/* OPAL-OCC Command/Response Interface */
+
+enum occ_state {
+ OCC_STATE_NOT_RUNNING = 0x00,
+ OCC_STATE_STANDBY = 0x01,
+ OCC_STATE_OBSERVATION = 0x02,
+ OCC_STATE_ACTIVE = 0x03,
+ OCC_STATE_SAFE = 0x04,
+ OCC_STATE_CHARACTERIZATION = 0x05,
+};
+
+enum occ_role {
+ OCC_ROLE_SLAVE = 0x0,
+ OCC_ROLE_MASTER = 0x1,
+};
+
+enum occ_cmd {
+ OCC_CMD_CLEAR_SENSOR_DATA,
+ OCC_CMD_SET_POWER_CAP,
+ OCC_CMD_SET_POWER_SHIFTING_RATIO,
+ OCC_CMD_SELECT_SENSOR_GROUP,
+};
+
+struct opal_occ_cmd_info {
+ enum occ_cmd cmd;
+ u8 cmd_value;
+ u16 cmd_size;
+ u16 rsp_size;
+ int timeout_ms;
+ u16 state_mask;
+ u8 role_mask;
+};
+
+static struct opal_occ_cmd_info occ_cmds[] = {
+ { OCC_CMD_CLEAR_SENSOR_DATA,
+ 0xD0, 4, 4, 1000,
+ PPC_BIT16(OCC_STATE_OBSERVATION) |
+ PPC_BIT16(OCC_STATE_ACTIVE) |
+ PPC_BIT16(OCC_STATE_CHARACTERIZATION),
+ PPC_BIT8(OCC_ROLE_MASTER) | PPC_BIT8(OCC_ROLE_SLAVE)
+ },
+ { OCC_CMD_SET_POWER_CAP,
+ 0xD1, 2, 2, 1000,
+ PPC_BIT16(OCC_STATE_OBSERVATION) |
+ PPC_BIT16(OCC_STATE_ACTIVE) |
+ PPC_BIT16(OCC_STATE_CHARACTERIZATION),
+ PPC_BIT8(OCC_ROLE_MASTER)
+ },
+ { OCC_CMD_SET_POWER_SHIFTING_RATIO,
+ 0xD2, 1, 1, 1000,
+ PPC_BIT16(OCC_STATE_OBSERVATION) |
+ PPC_BIT16(OCC_STATE_ACTIVE) |
+ PPC_BIT16(OCC_STATE_CHARACTERIZATION),
+ PPC_BIT8(OCC_ROLE_MASTER) | PPC_BIT8(OCC_ROLE_SLAVE)
+ },
+ { OCC_CMD_SELECT_SENSOR_GROUP,
+ 0xD3, 2, 2, 1000,
+ PPC_BIT16(OCC_STATE_OBSERVATION) |
+ PPC_BIT16(OCC_STATE_ACTIVE) |
+ PPC_BIT16(OCC_STATE_CHARACTERIZATION),
+ PPC_BIT8(OCC_ROLE_MASTER) | PPC_BIT8(OCC_ROLE_SLAVE)
+ },
+};
+
+enum occ_response_status {
+ OCC_RSP_SUCCESS = 0x00,
+ OCC_RSP_INVALID_COMMAND = 0x11,
+ OCC_RSP_INVALID_CMD_DATA_LENGTH = 0x12,
+ OCC_RSP_INVALID_DATA = 0x13,
+ OCC_RSP_INTERNAL_ERROR = 0x15,
+};
+
+#define OCC_FLAG_RSP_READY 0x01
+#define OCC_FLAG_CMD_IN_PROGRESS 0x02
+#define OPAL_FLAG_CMD_READY 0x80
+
+struct opal_occ_cmd_data {
+ u8 *data;
+ enum occ_cmd cmd;
+};
+
+static struct cmd_interface {
+ struct lock queue_lock;
+ struct timer timeout;
+ struct opal_occ_cmd_data *cdata;
+ struct opal_command_buffer *cmd;
+ struct occ_response_buffer *rsp;
+ u8 *occ_state;
+ u8 *valid;
+ u32 chip_id;
+ u32 token;
+ u16 enabled_sensor_mask;
+ u8 occ_role;
+ u8 request_id;
+ bool cmd_in_progress;
+ bool retry;
+} *chips;
+
+static int nr_occs;
+
+static inline struct cmd_interface *get_chip_cmd_interface(int chip_id)
+{
+ int i;
+
+ for (i = 0; i < nr_occs; i++)
+ if (chips[i].chip_id == chip_id)
+ return &chips[i];
+
+ return NULL;
+}
+
+static inline bool occ_in_progress(struct cmd_interface *chip)
+{
+ return (chip->rsp->flag == OCC_FLAG_CMD_IN_PROGRESS);
+}
+
+static int write_occ_cmd(struct cmd_interface *chip)
+{
+ struct opal_command_buffer *cmd = chip->cmd;
+ enum occ_cmd ocmd = chip->cdata->cmd;
+
+ if (!chip->retry && occ_in_progress(chip)) {
+ chip->cmd_in_progress = false;
+ return OPAL_BUSY;
+ }
+
+ cmd->flag = chip->rsp->flag = 0;
+ cmd->cmd = occ_cmds[ocmd].cmd_value;
+ cmd->request_id = chip->request_id++;
+ cmd->data_size = occ_cmds[ocmd].cmd_size;
+ memcpy(&cmd->data, chip->cdata->data, cmd->data_size);
+ cmd->flag = OPAL_FLAG_CMD_READY;
+
+ schedule_timer(&chip->timeout,
+ msecs_to_tb(occ_cmds[ocmd].timeout_ms));
+
+ return OPAL_ASYNC_COMPLETION;
+}
+
+static int64_t opal_occ_command(struct cmd_interface *chip, int token,
+ struct opal_occ_cmd_data *cdata)
+{
+ int rc;
+
+ if (!(*chip->valid) ||
+ (!(PPC_BIT16(*chip->occ_state) & occ_cmds[cdata->cmd].state_mask)))
+ return OPAL_HARDWARE;
+
+ if (!(PPC_BIT8(chip->occ_role) & occ_cmds[cdata->cmd].role_mask))
+ return OPAL_PERMISSION;
+
+ lock(&chip->queue_lock);
+ if (chip->cmd_in_progress) {
+ rc = OPAL_BUSY;
+ goto out;
+ }
+
+ chip->cdata = cdata;
+ chip->token = token;
+ chip->cmd_in_progress = true;
+ chip->retry = false;
+ rc = write_occ_cmd(chip);
+out:
+ unlock(&chip->queue_lock);
+ return rc;
+}
+
+static inline bool sanity_check_opal_cmd(struct opal_command_buffer *cmd,
+ struct cmd_interface *chip)
+{
+ return ((cmd->cmd == occ_cmds[chip->cdata->cmd].cmd_value) &&
+ (cmd->request_id == chip->request_id - 1) &&
+ (cmd->data_size == occ_cmds[chip->cdata->cmd].cmd_size));
+}
+
+static inline bool check_occ_rsp(struct opal_command_buffer *cmd,
+ struct occ_response_buffer *rsp)
+{
+ if (cmd->cmd != rsp->cmd) {
+ prlog(PR_DEBUG, "OCC: Command value mismatch in OCC response"
+ "rsp->cmd = %d cmd->cmd = %d\n", rsp->cmd, cmd->cmd);
+ return false;
+ }
+
+ if (cmd->request_id != rsp->request_id) {
+ prlog(PR_DEBUG, "OCC: Request ID mismatch in OCC response"
+ "rsp->request_id = %d cmd->request_id = %d\n",
+ rsp->request_id, cmd->request_id);
+ return false;
+ }
+
+ return true;
+}
+
+static inline void queue_occ_rsp_msg(int token, int rc)
+{
+ int ret;
+
+ ret = opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
+ cpu_to_be64(token),
+ cpu_to_be64(rc));
+ if (ret)
+ prerror("OCC: Failed to queue OCC response status message\n");
+}
+
+static void occ_cmd_timeout_handler(struct timer *t __unused, void *data,
+ uint64_t now __unused)
+{
+ struct cmd_interface *chip = data;
+
+ lock(&chip->queue_lock);
+ if (!chip->cmd_in_progress)
+ goto exit;
+
+ if (!chip->retry) {
+ prlog(PR_DEBUG, "OCC: Command timeout, retrying\n");
+ chip->retry = true;
+ write_occ_cmd(chip);
+ } else {
+ chip->cmd_in_progress = false;
+ queue_occ_rsp_msg(chip->token, OPAL_TIMEOUT);
+ prlog(PR_DEBUG, "OCC: Command timeout after retry\n");
+ }
+exit:
+ unlock(&chip->queue_lock);
+}
+
+static int read_occ_rsp(struct occ_response_buffer *rsp)
+{
+ switch (rsp->status) {
+ case OCC_RSP_SUCCESS:
+ return OPAL_SUCCESS;
+ case OCC_RSP_INVALID_COMMAND:
+ prlog(PR_DEBUG, "OCC: Rsp status: Invalid command\n");
+ break;
+ case OCC_RSP_INVALID_CMD_DATA_LENGTH:
+ prlog(PR_DEBUG, "OCC: Rsp status: Invalid command data length\n");
+ break;
+ case OCC_RSP_INVALID_DATA:
+ prlog(PR_DEBUG, "OCC: Rsp status: Invalid command data\n");
+ break;
+ case OCC_RSP_INTERNAL_ERROR:
+ prlog(PR_DEBUG, "OCC: Rsp status: OCC internal error\n");
+ break;
+ default:
+ break;
+ }
+
+ /* Clear the OCC response flag */
+ rsp->flag = 0;
+ return OPAL_INTERNAL_ERROR;
+}
+
+static void handle_occ_rsp(uint32_t chip_id)
+{
+ struct cmd_interface *chip;
+ struct opal_command_buffer *cmd;
+ struct occ_response_buffer *rsp;
+
+ chip = get_chip_cmd_interface(chip_id);
+ if (!chip)
+ return;
+
+ cmd = chip->cmd;
+ rsp = chip->rsp;
+
+ /*Read rsp*/
+ if (rsp->flag != OCC_FLAG_RSP_READY)
+ return;
+ lock(&chip->queue_lock);
+ if (!chip->cmd_in_progress)
+ goto exit;
+
+ cancel_timer(&chip->timeout);
+ if (!sanity_check_opal_cmd(cmd, chip) ||
+ !check_occ_rsp(cmd, rsp)) {
+ if (!chip->retry) {
+ prlog(PR_DEBUG, "OCC: Command-response mismatch, retrying\n");
+ chip->retry = true;
+ write_occ_cmd(chip);
+ } else {
+ chip->cmd_in_progress = false;
+ queue_occ_rsp_msg(chip->token, OPAL_INTERNAL_ERROR);
+ prlog(PR_DEBUG, "OCC: Command-response mismatch\n");
+ }
+ goto exit;
+ }
+
+ if (rsp->cmd == occ_cmds[OCC_CMD_SELECT_SENSOR_GROUP].cmd_value &&
+ rsp->status == OCC_RSP_SUCCESS)
+ chip->enabled_sensor_mask = *(u16 *)chip->cdata->data;
+
+ chip->cmd_in_progress = false;
+ queue_occ_rsp_msg(chip->token, read_occ_rsp(chip->rsp));
+exit:
+ unlock(&chip->queue_lock);
+}
+
+bool occ_get_gpu_presence(struct proc_chip *chip, int gpu_num)
+{
+ struct occ_dynamic_data *ddata;
+ static int max_retries = 20;
+ static bool found = false;
+
+ assert(gpu_num <= 2);
+
+ ddata = get_occ_dynamic_data(chip);
+ while (!found && max_retries) {
+ if (ddata->major_version == 0 && ddata->minor_version >= 1) {
+ found = true;
+ break;
+ }
+ time_wait_ms(100);
+ max_retries--;
+ ddata = get_occ_dynamic_data(chip);
+ }
+
+ if (!found) {
+ prlog(PR_INFO, "OCC: No GPU slot presence, assuming GPU present\n");
+ return true;
+ }
+
+ return (bool)(ddata->gpus_present & 1 << gpu_num);
+}
+
+static void occ_add_powercap_sensors(struct dt_node *power_mgt);
+static void occ_add_psr_sensors(struct dt_node *power_mgt);
+
+static void occ_cmd_interface_init(void)
+{
+ struct occ_dynamic_data *data;
+ struct occ_pstate_table *pdata;
+ struct dt_node *power_mgt;
+ struct proc_chip *chip;
+ int i = 0, major;
+
+ /* Check if the OCC data is valid */
+ for_each_chip(chip) {
+ pdata = get_occ_pstate_table(chip);
+ if (!pdata->valid)
+ return;
+ }
+
+ chip = next_chip(NULL);
+ pdata = get_occ_pstate_table(chip);
+ major = pdata->version >> 4;
+ if (major != 0x9 || major != 0xA)
+ return;
+
+ for_each_chip(chip)
+ nr_occs++;
+
+ chips = malloc(sizeof(*chips) * nr_occs);
+ assert(chips);
+
+ for_each_chip(chip) {
+ pdata = get_occ_pstate_table(chip);
+ data = get_occ_dynamic_data(chip);
+ chips[i].chip_id = chip->id;
+ chips[i].occ_state = &data->occ_state;
+ chips[i].valid = &pdata->valid;
+ chips[i].cmd = &data->cmd;
+ chips[i].rsp = &data->rsp;
+ switch (major) {
+ case 0x9:
+ chips[i].occ_role = pdata->v9.occ_role;
+ break;
+ case 0xA:
+ chips[i].occ_role = pdata->v10.occ_role;
+ break;
+ }
+ init_lock(&chips[i].queue_lock);
+ chips[i].cmd_in_progress = false;
+ chips[i].request_id = 0;
+ chips[i].enabled_sensor_mask = OCC_ENABLED_SENSOR_MASK;
+ init_timer(&chips[i].timeout, occ_cmd_timeout_handler,
+ &chips[i]);
+ i++;
+ }
+
+ power_mgt = dt_find_by_path(dt_root, "/ibm,opal/power-mgt");
+ if (!power_mgt) {
+ prerror("OCC: dt node /ibm,opal/power-mgt not found\n");
+ return;
+ }
+
+ /* Add powercap sensors to DT */
+ occ_add_powercap_sensors(power_mgt);
+
+ /* Add power-shifting-ratio CPU-GPU sensors to DT */
+ occ_add_psr_sensors(power_mgt);
+}
+
+/* Powercap interface */
+enum sensor_powercap_occ_attr {
+ POWERCAP_OCC_SOFT_MIN,
+ POWERCAP_OCC_MAX,
+ POWERCAP_OCC_CUR,
+ POWERCAP_OCC_HARD_MIN,
+};
+
+static void occ_add_powercap_sensors(struct dt_node *power_mgt)
+{
+ struct dt_node *pcap, *node;
+ u32 handle;
+
+ pcap = dt_new(power_mgt, "powercap");
+ if (!pcap) {
+ prerror("OCC: Failed to create powercap node\n");
+ return;
+ }
+
+ dt_add_property_string(pcap, "compatible", "ibm,opal-powercap");
+ node = dt_new(pcap, "system-powercap");
+ if (!node) {
+ prerror("OCC: Failed to create system powercap node\n");
+ return;
+ }
+
+ handle = powercap_make_handle(POWERCAP_CLASS_OCC, POWERCAP_OCC_CUR);
+ dt_add_property_cells(node, "powercap-current", handle);
+
+ handle = powercap_make_handle(POWERCAP_CLASS_OCC,
+ POWERCAP_OCC_SOFT_MIN);
+ dt_add_property_cells(node, "powercap-min", handle);
+
+ handle = powercap_make_handle(POWERCAP_CLASS_OCC, POWERCAP_OCC_MAX);
+ dt_add_property_cells(node, "powercap-max", handle);
+
+ handle = powercap_make_handle(POWERCAP_CLASS_OCC,
+ POWERCAP_OCC_HARD_MIN);
+ dt_add_property_cells(node, "powercap-hard-min", handle);
+
+}
+
+int occ_get_powercap(u32 handle, u32 *pcap)
+{
+ struct occ_pstate_table *pdata;
+ struct occ_dynamic_data *ddata;
+ struct proc_chip *chip;
+
+ chip = next_chip(NULL);
+ pdata = get_occ_pstate_table(chip);
+ ddata = get_occ_dynamic_data(chip);
+
+ if (!pdata->valid)
+ return OPAL_HARDWARE;
+
+ switch (powercap_get_attr(handle)) {
+ case POWERCAP_OCC_SOFT_MIN:
+ *pcap = ddata->soft_min_pwr_cap;
+ break;
+ case POWERCAP_OCC_MAX:
+ *pcap = ddata->max_pwr_cap;
+ break;
+ case POWERCAP_OCC_CUR:
+ *pcap = ddata->cur_pwr_cap;
+ break;
+ case POWERCAP_OCC_HARD_MIN:
+ *pcap = ddata->hard_min_pwr_cap;
+ break;
+ default:
+ *pcap = 0;
+ return OPAL_UNSUPPORTED;
+ }
+
+ return OPAL_SUCCESS;
+}
+
+static u16 pcap_cdata;
+static struct opal_occ_cmd_data pcap_data = {
+ .data = (u8 *)&pcap_cdata,
+ .cmd = OCC_CMD_SET_POWER_CAP,
+};
+
+int __attribute__((__const__)) occ_set_powercap(u32 handle, int token, u32 pcap)
+{
+ struct occ_dynamic_data *ddata;
+ struct proc_chip *chip;
+ int i;
+
+ if (powercap_get_attr(handle) != POWERCAP_OCC_CUR)
+ return OPAL_PERMISSION;
+
+ if (!chips)
+ return OPAL_HARDWARE;
+
+ for (i = 0; i < nr_occs; i++)
+ if (chips[i].occ_role == OCC_ROLE_MASTER)
+ break;
+
+ if (!(*chips[i].valid))
+ return OPAL_HARDWARE;
+
+ chip = get_chip(chips[i].chip_id);
+ ddata = get_occ_dynamic_data(chip);
+
+ if (pcap == ddata->cur_pwr_cap)
+ return OPAL_SUCCESS;
+
+ if (pcap && (pcap > ddata->max_pwr_cap ||
+ pcap < ddata->soft_min_pwr_cap))
+ return OPAL_PARAMETER;
+
+ pcap_cdata = pcap;
+ return opal_occ_command(&chips[i], token, &pcap_data);
+};
+
+/* Power-Shifting Ratio */
+enum psr_type {
+ PSR_TYPE_CPU_TO_GPU, /* 0% Cap GPU first, 100% Cap CPU first */
+};
+
+int occ_get_psr(u32 handle, u32 *ratio)
+{
+ struct occ_dynamic_data *ddata;
+ struct proc_chip *chip;
+ u8 i = psr_get_rid(handle);
+
+ if (psr_get_type(handle) != PSR_TYPE_CPU_TO_GPU)
+ return OPAL_UNSUPPORTED;
+
+ if (i > nr_occs)
+ return OPAL_UNSUPPORTED;
+
+ if (!(*chips[i].valid))
+ return OPAL_HARDWARE;
+
+ chip = get_chip(chips[i].chip_id);
+ ddata = get_occ_dynamic_data(chip);
+ *ratio = ddata->pwr_shifting_ratio;
+ return OPAL_SUCCESS;
+}
+
+static u8 psr_cdata;
+static struct opal_occ_cmd_data psr_data = {
+ .data = &psr_cdata,
+ .cmd = OCC_CMD_SET_POWER_SHIFTING_RATIO,
+};
+
+int occ_set_psr(u32 handle, int token, u32 ratio)
+{
+ struct occ_dynamic_data *ddata;
+ struct proc_chip *chip;
+ u8 i = psr_get_rid(handle);
+
+ if (psr_get_type(handle) != PSR_TYPE_CPU_TO_GPU)
+ return OPAL_UNSUPPORTED;
+
+ if (ratio > 100)
+ return OPAL_PARAMETER;
+
+ if (i > nr_occs)
+ return OPAL_UNSUPPORTED;
+
+ if (!(*chips[i].valid))
+ return OPAL_HARDWARE;
+
+ chip = get_chip(chips[i].chip_id);
+ ddata = get_occ_dynamic_data(chip);
+ if (ratio == ddata->pwr_shifting_ratio)
+ return OPAL_SUCCESS;
+
+ psr_cdata = ratio;
+ return opal_occ_command(&chips[i], token, &psr_data);
+}
+
+static void occ_add_psr_sensors(struct dt_node *power_mgt)
+{
+ struct dt_node *node;
+ int i;
+
+ node = dt_new(power_mgt, "psr");
+ if (!node) {
+ prerror("OCC: Failed to create power-shifting-ratio node\n");
+ return;
+ }
+
+ dt_add_property_string(node, "compatible",
+ "ibm,opal-power-shift-ratio");
+ dt_add_property_cells(node, "#address-cells", 1);
+ dt_add_property_cells(node, "#size-cells", 0);
+ for (i = 0; i < nr_occs; i++) {
+ struct dt_node *cnode;
+ char name[20];
+ u32 handle = psr_make_handle(PSR_CLASS_OCC, i,
+ PSR_TYPE_CPU_TO_GPU);
+
+ cnode = dt_new_addr(node, "cpu-to-gpu", handle);
+ if (!cnode) {
+ prerror("OCC: Failed to create power-shifting-ratio node\n");
+ return;
+ }
+
+ snprintf(name, 20, "cpu_to_gpu_%d", chips[i].chip_id);
+ dt_add_property_string(cnode, "label", name);
+ dt_add_property_cells(cnode, "handle", handle);
+ dt_add_property_cells(cnode, "reg", chips[i].chip_id);
+ }
+}
+
+/* OCC clear sensor limits CSM/Profiler/Job-scheduler */
+
+enum occ_sensor_limit_group {
+ OCC_SENSOR_LIMIT_GROUP_CSM = 0x10,
+ OCC_SENSOR_LIMIT_GROUP_PROFILER = 0x20,
+ OCC_SENSOR_LIMIT_GROUP_JOB_SCHED = 0x40,
+};
+
+static u32 sensor_limit;
+static struct opal_occ_cmd_data slimit_data = {
+ .data = (u8 *)&sensor_limit,
+ .cmd = OCC_CMD_CLEAR_SENSOR_DATA,
+};
+
+int occ_sensor_group_clear(u32 group_hndl, int token)
+{
+ u32 limit = sensor_get_rid(group_hndl);
+ u8 i = sensor_get_attr(group_hndl);
+
+ if (i > nr_occs)
+ return OPAL_UNSUPPORTED;
+
+ switch (limit) {
+ case OCC_SENSOR_LIMIT_GROUP_CSM:
+ case OCC_SENSOR_LIMIT_GROUP_PROFILER:
+ case OCC_SENSOR_LIMIT_GROUP_JOB_SCHED:
+ break;
+ default:
+ return OPAL_UNSUPPORTED;
+ }
+
+ if (!(*chips[i].valid))
+ return OPAL_HARDWARE;
+
+ sensor_limit = limit << 24;
+ return opal_occ_command(&chips[i], token, &slimit_data);
+}
+
+static u16 sensor_enable;
+static struct opal_occ_cmd_data sensor_mask_data = {
+ .data = (u8 *)&sensor_enable,
+ .cmd = OCC_CMD_SELECT_SENSOR_GROUP,
+};
+
+int occ_sensor_group_enable(u32 group_hndl, int token, bool enable)
+{
+ u16 type = sensor_get_rid(group_hndl);
+ u8 i = sensor_get_attr(group_hndl);
+
+ if (i > nr_occs)
+ return OPAL_UNSUPPORTED;
+
+ switch (type) {
+ case OCC_SENSOR_TYPE_GENERIC:
+ case OCC_SENSOR_TYPE_CURRENT:
+ case OCC_SENSOR_TYPE_VOLTAGE:
+ case OCC_SENSOR_TYPE_TEMPERATURE:
+ case OCC_SENSOR_TYPE_UTILIZATION:
+ case OCC_SENSOR_TYPE_TIME:
+ case OCC_SENSOR_TYPE_FREQUENCY:
+ case OCC_SENSOR_TYPE_POWER:
+ case OCC_SENSOR_TYPE_PERFORMANCE:
+ break;
+ default:
+ return OPAL_UNSUPPORTED;
+ }
+
+ if (!(*chips[i].valid))
+ return OPAL_HARDWARE;
+
+ if (enable && (type & chips[i].enabled_sensor_mask))
+ return OPAL_SUCCESS;
+ else if (!enable && !(type & chips[i].enabled_sensor_mask))
+ return OPAL_SUCCESS;
+
+ sensor_enable = enable ? type | chips[i].enabled_sensor_mask :
+ ~type & chips[i].enabled_sensor_mask;
+
+ return opal_occ_command(&chips[i], token, &sensor_mask_data);
+}
+
+void occ_add_sensor_groups(struct dt_node *sg, __be32 *phandles, u32 *ptype,
+ int nr_phandles, int chipid)
+{
+ struct group_info {
+ int type;
+ const char *str;
+ u32 ops;
+ } groups[] = {
+ { OCC_SENSOR_LIMIT_GROUP_CSM, "csm",
+ OPAL_SENSOR_GROUP_CLEAR
+ },
+ { OCC_SENSOR_LIMIT_GROUP_PROFILER, "profiler",
+ OPAL_SENSOR_GROUP_CLEAR
+ },
+ { OCC_SENSOR_LIMIT_GROUP_JOB_SCHED, "js",
+ OPAL_SENSOR_GROUP_CLEAR
+ },
+ { OCC_SENSOR_TYPE_GENERIC, "generic",
+ OPAL_SENSOR_GROUP_ENABLE
+ },
+ { OCC_SENSOR_TYPE_CURRENT, "curr",
+ OPAL_SENSOR_GROUP_ENABLE
+ },
+ { OCC_SENSOR_TYPE_VOLTAGE, "in",
+ OPAL_SENSOR_GROUP_ENABLE
+ },
+ { OCC_SENSOR_TYPE_TEMPERATURE, "temp",
+ OPAL_SENSOR_GROUP_ENABLE
+ },
+ { OCC_SENSOR_TYPE_UTILIZATION, "utilization",
+ OPAL_SENSOR_GROUP_ENABLE
+ },
+ { OCC_SENSOR_TYPE_TIME, "time",
+ OPAL_SENSOR_GROUP_ENABLE
+ },
+ { OCC_SENSOR_TYPE_FREQUENCY, "frequency",
+ OPAL_SENSOR_GROUP_ENABLE
+ },
+ { OCC_SENSOR_TYPE_POWER, "power",
+ OPAL_SENSOR_GROUP_ENABLE
+ },
+ { OCC_SENSOR_TYPE_PERFORMANCE, "performance",
+ OPAL_SENSOR_GROUP_ENABLE
+ },
+ };
+ int i, j;
+
+ /*
+ * Dont add sensor groups if cmd-interface is not intialized
+ */
+ if (!chips)
+ return;
+
+ for (i = 0; i < nr_occs; i++)
+ if (chips[i].chip_id == chipid)
+ break;
+
+ for (j = 0; j < ARRAY_SIZE(groups); j++) {
+ struct dt_node *node;
+ char name[20];
+ u32 handle;
+
+ snprintf(name, 20, "occ-%s", groups[j].str);
+ handle = sensor_make_handler(SENSOR_OCC, 0,
+ groups[j].type, i);
+ node = dt_new_addr(sg, name, handle);
+ if (!node) {
+ prerror("Failed to create sensor group nodes\n");
+ return;
+ }
+
+ dt_add_property_cells(node, "sensor-group-id", handle);
+ dt_add_property_string(node, "type", groups[j].str);
+
+ if (groups[j].type == OCC_SENSOR_TYPE_CURRENT ||
+ groups[j].type == OCC_SENSOR_TYPE_VOLTAGE ||
+ groups[j].type == OCC_SENSOR_TYPE_TEMPERATURE ||
+ groups[j].type == OCC_SENSOR_TYPE_POWER) {
+ dt_add_property_string(node, "sensor-type",
+ groups[j].str);
+ dt_add_property_string(node, "compatible",
+ "ibm,opal-sensor");
+ }
+
+ dt_add_property_cells(node, "ibm,chip-id", chipid);
+ dt_add_property_cells(node, "reg", handle);
+ if (groups[j].ops == OPAL_SENSOR_GROUP_ENABLE) {
+ __be32 *_phandles;
+ int k, pcount = 0;
+
+ _phandles = malloc(sizeof(u32) * nr_phandles);
+ assert(_phandles);
+ for (k = 0; k < nr_phandles; k++)
+ if (ptype[k] == groups[j].type)
+ _phandles[pcount++] = phandles[k];
+ if (pcount)
+ dt_add_property(node, "sensors", _phandles,
+ pcount * sizeof(u32));
+ free(_phandles);
+ } else {
+ dt_add_property(node, "sensors", phandles,
+ nr_phandles * sizeof(u32));
+ }
+ dt_add_property_cells(node, "ops", groups[j].ops);
+ }
+}
+
+/* CPU-OCC PState init */
+/* Called after OCC init on P8 and P9 */
+void occ_pstates_init(void)
+{
+ struct proc_chip *chip;
+ struct cpu_thread *c;
+ struct dt_node *power_mgt;
+ int pstate_nom;
+ u32 freq_domain_mask;
+ u8 domain_runs_at;
+ static bool occ_pstates_initialized;
+
+ power_mgt = dt_find_by_path(dt_root, "/ibm,opal/power-mgt");
+ if (!power_mgt) {
+ /**
+ * @fwts-label OCCDTNodeNotFound
+ * @fwts-advice Device tree node /ibm,opal/power-mgt not
+ * found. OPAL didn't add pstate information to device tree.
+ * Probably a firmware bug.
+ */
+ prlog(PR_ERR, "OCC: dt node /ibm,opal/power-mgt not found\n");
+ return;
+ }
+
+ /* Handle fast reboots */
+ if (occ_pstates_initialized) {
+ struct dt_node *child;
+ int i;
+ const char *props[] = {
+ "ibm,pstate-core-max",
+ "ibm,pstate-frequencies-mhz",
+ "ibm,pstate-ids",
+ "ibm,pstate-max",
+ "ibm,pstate-min",
+ "ibm,pstate-nominal",
+ "ibm,pstate-turbo",
+ "ibm,pstate-ultra-turbo",
+ "ibm,pstate-base",
+ "#address-cells",
+ "#size-cells",
+ };
+
+ for (i = 0; i < ARRAY_SIZE(props); i++)
+ dt_check_del_prop(power_mgt, props[i]);
+
+ dt_for_each_child(power_mgt, child)
+ if (!strncmp(child->name, "occ", 3))
+ dt_free(child);
+ }
+
+ switch (proc_gen) {
+ case proc_gen_p8:
+ homer_opal_data_offset = P8_HOMER_OPAL_DATA_OFFSET;
+ break;
+ case proc_gen_p9:
+ case proc_gen_p10:
+ homer_opal_data_offset = P9_HOMER_OPAL_DATA_OFFSET;
+ break;
+ default:
+ return;
+ }
+
+ chip = next_chip(NULL);
+ if (!chip->homer_base) {
+ log_simple_error(&e_info(OPAL_RC_OCC_PSTATE_INIT),
+ "OCC: No HOMER detected, assuming no pstates\n");
+ return;
+ }
+
+ /* Wait for all OCC to boot up */
+ if(!wait_for_all_occ_init()) {
+ log_simple_error(&e_info(OPAL_RC_OCC_TIMEOUT),
+ "OCC: Initialization on all chips did not complete"
+ "(timed out)\n");
+ return;
+ }
+
+ /*
+ * Check boundary conditions and add device tree nodes
+ * and return nominal pstate to set for the core
+ */
+ if (!add_cpu_pstate_properties(power_mgt, &pstate_nom)) {
+ log_simple_error(&e_info(OPAL_RC_OCC_PSTATE_INIT),
+ "Skiping core cpufreq init due to OCC error\n");
+ } else if (proc_gen == proc_gen_p8) {
+ /*
+ * Setup host based pstates and set nominal frequency only in
+ * P8.
+ */
+ for_each_chip(chip)
+ for_each_available_core_in_chip(c, chip->id)
+ cpu_pstates_prepare_core(chip, c, pstate_nom);
+ }
+
+ if (occ_pstates_initialized)
+ return;
+
+ /* Add opal_poller to poll OCC throttle status of each chip */
+ for_each_chip(chip)
+ chip->throttle = 0;
+ opal_add_poller(occ_throttle_poll, NULL);
+ occ_pstates_initialized = true;
+
+ /* Init OPAL-OCC command-response interface */
+ occ_cmd_interface_init();
+
+ /* TODO Firmware plumbing required so as to have two modes to set
+ * PMCR based on max in domain or most recently used. As of today,
+ * it is always max in domain for P9.
+ */
+ domain_runs_at = 0;
+ freq_domain_mask = 0;
+ if (proc_gen == proc_gen_p8) {
+ freq_domain_mask = P8_PIR_CORE_MASK;
+ domain_runs_at = FREQ_MOST_RECENTLY_SET;
+ } else if (proc_gen == proc_gen_p9) {
+ freq_domain_mask = P9_PIR_QUAD_MASK;
+ domain_runs_at = FREQ_MAX_IN_DOMAIN;
+ } else if (proc_gen == proc_gen_p10) {
+ freq_domain_mask = P10_PIR_CHIP_MASK;
+ domain_runs_at = FREQ_MAX_IN_DOMAIN;
+ } else {
+ assert(0);
+ }
+
+ dt_add_property_cells(power_mgt, "freq-domain-mask", freq_domain_mask);
+ dt_add_property_cells(power_mgt, "domain-runs-at", domain_runs_at);
+}
+
+int find_master_and_slave_occ(uint64_t **master, uint64_t **slave,
+ int *nr_masters, int *nr_slaves)
+{
+ struct proc_chip *chip;
+ int nr_chips = 0, i;
+ uint64_t chipids[MAX_CHIPS];
+
+ for_each_chip(chip) {
+ chipids[nr_chips++] = chip->id;
+ }
+
+ chip = next_chip(NULL);
+ /*
+ * Proc0 is the master OCC for Tuleta/Alpine boxes.
+ * Hostboot expects the pair of chips for MURANO, so pass the sibling
+ * chip id along with proc0 to hostboot.
+ */
+ *nr_masters = (chip->type == PROC_CHIP_P8_MURANO) ? 2 : 1;
+ *master = (uint64_t *)malloc(*nr_masters * sizeof(uint64_t));
+
+ if (!*master) {
+ printf("OCC: master array alloc failure\n");
+ return -ENOMEM;
+ }
+
+ if (nr_chips - *nr_masters > 0) {
+ *nr_slaves = nr_chips - *nr_masters;
+ *slave = (uint64_t *)malloc(*nr_slaves * sizeof(uint64_t));
+ if (!*slave) {
+ printf("OCC: slave array alloc failure\n");
+ return -ENOMEM;
+ }
+ }
+
+ for (i = 0; i < nr_chips; i++) {
+ if (i < *nr_masters) {
+ *(*master + i) = chipids[i];
+ continue;
+ }
+ *(*slave + i - *nr_masters) = chipids[i];
+ }
+ return 0;
+}
+
+
+int occ_msg_queue_occ_reset(void)
+{
+ struct opal_occ_msg occ_msg = { CPU_TO_BE64(OCC_RESET), 0, 0 };
+ struct proc_chip *chip;
+ int rc;
+
+ lock(&occ_lock);
+ rc = _opal_queue_msg(OPAL_MSG_OCC, NULL, NULL,
+ sizeof(struct opal_occ_msg), &occ_msg);
+ if (rc) {
+ prlog(PR_INFO, "OCC: Failed to queue OCC_RESET message\n");
+ goto out;
+ }
+ /*
+ * Set 'valid' byte of occ_pstate_table to 0 since OCC
+ * may not clear this byte on a reset.
+ * OCC will set the 'valid' byte to 1 when it becomes
+ * active again.
+ */
+ for_each_chip(chip) {
+ struct occ_pstate_table *occ_data;
+
+ occ_data = get_occ_pstate_table(chip);
+ occ_data->valid = 0;
+ chip->throttle = 0;
+ }
+ occ_reset = true;
+out:
+ unlock(&occ_lock);
+ return rc;
+}
+
+#define PV_OCC_GP0 0x01000000
+#define PV_OCC_GP0_AND 0x01000004
+#define PV_OCC_GP0_OR 0x01000005
+#define PV_OCC_GP0_PNOR_OWNER PPC_BIT(18) /* 1 = OCC / Host, 0 = BMC */
+
+static void occ_pnor_set_one_owner(uint32_t chip_id, enum pnor_owner owner)
+{
+ uint64_t reg, mask;
+
+ if (owner == PNOR_OWNER_HOST) {
+ reg = PV_OCC_GP0_OR;
+ mask = PV_OCC_GP0_PNOR_OWNER;
+ } else {
+ reg = PV_OCC_GP0_AND;
+ mask = ~PV_OCC_GP0_PNOR_OWNER;
+ }
+
+ xscom_write(chip_id, reg, mask);
+}
+
+void occ_pnor_set_owner(enum pnor_owner owner)
+{
+ struct proc_chip *chip;
+
+ for_each_chip(chip)
+ occ_pnor_set_one_owner(chip->id, owner);
+}
+
+
+#define P8_OCB_OCI_OCCMISC 0x6a020
+#define P8_OCB_OCI_OCCMISC_AND 0x6a021
+#define P8_OCB_OCI_OCCMISC_OR 0x6a022
+
+#define P9_OCB_OCI_OCCMISC 0x6c080
+#define P9_OCB_OCI_OCCMISC_CLEAR 0x6c081
+#define P9_OCB_OCI_OCCMISC_OR 0x6c082
+
+#define OCB_OCI_OCIMISC_IRQ PPC_BIT(0)
+#define OCB_OCI_OCIMISC_IRQ_TMGT PPC_BIT(1)
+#define OCB_OCI_OCIMISC_IRQ_SLW_TMR PPC_BIT(14)
+#define OCB_OCI_OCIMISC_IRQ_OPAL_DUMMY PPC_BIT(15)
+
+#define P8_OCB_OCI_OCIMISC_MASK (OCB_OCI_OCIMISC_IRQ_TMGT | \
+ OCB_OCI_OCIMISC_IRQ_OPAL_DUMMY | \
+ OCB_OCI_OCIMISC_IRQ_SLW_TMR)
+
+#define OCB_OCI_OCIMISC_IRQ_I2C PPC_BIT(2)
+#define OCB_OCI_OCIMISC_IRQ_SHMEM PPC_BIT(3)
+#define P9_OCB_OCI_OCIMISC_MASK (OCB_OCI_OCIMISC_IRQ_TMGT | \
+ OCB_OCI_OCIMISC_IRQ_I2C | \
+ OCB_OCI_OCIMISC_IRQ_SHMEM | \
+ OCB_OCI_OCIMISC_IRQ_OPAL_DUMMY)
+
+void occ_send_dummy_interrupt(void)
+{
+ struct psi *psi;
+ struct proc_chip *chip = get_chip(this_cpu()->chip_id);
+
+ /* Emulators don't do this */
+ if (chip_quirk(QUIRK_NO_OCC_IRQ))
+ return;
+
+ /* Find a functional PSI. This ensures an interrupt even if
+ * the psihb on the current chip is not configured */
+ if (chip->psi)
+ psi = chip->psi;
+ else
+ psi = psi_find_functional_chip();
+
+ if (!psi) {
+ prlog_once(PR_WARNING, "PSI: no functional PSI HB found, "
+ "no self interrupts delivered\n");
+ return;
+ }
+
+ switch (proc_gen) {
+ case proc_gen_p8:
+ xscom_write(psi->chip_id, P8_OCB_OCI_OCCMISC_OR,
+ OCB_OCI_OCIMISC_IRQ |
+ OCB_OCI_OCIMISC_IRQ_OPAL_DUMMY);
+ break;
+ case proc_gen_p9:
+ xscom_write(psi->chip_id, P9_OCB_OCI_OCCMISC_OR,
+ OCB_OCI_OCIMISC_IRQ |
+ OCB_OCI_OCIMISC_IRQ_OPAL_DUMMY);
+ break;
+ case proc_gen_p10:
+ xscom_write(psi->chip_id, P9_OCB_OCI_OCCMISC_OR,
+ OCB_OCI_OCIMISC_IRQ |
+ OCB_OCI_OCIMISC_IRQ_OPAL_DUMMY);
+ break;
+ default:
+ break;
+ }
+}
+
+void occ_p8_interrupt(uint32_t chip_id)
+{
+ uint64_t ireg;
+ int64_t rc;
+
+ /* The OCC interrupt is used to mux up to 15 different sources */
+ rc = xscom_read(chip_id, P8_OCB_OCI_OCCMISC, &ireg);
+ if (rc) {
+ prerror("OCC: Failed to read interrupt status !\n");
+ /* Should we mask it in the XIVR ? */
+ return;
+ }
+ prlog(PR_TRACE, "OCC: IRQ received: %04llx\n", ireg >> 48);
+
+ /* Clear the bits */
+ xscom_write(chip_id, P8_OCB_OCI_OCCMISC_AND, ~ireg);
+
+ /* Dispatch */
+ if (ireg & OCB_OCI_OCIMISC_IRQ_TMGT)
+ prd_tmgt_interrupt(chip_id);
+ if (ireg & OCB_OCI_OCIMISC_IRQ_SLW_TMR)
+ check_timers(true);
+
+ /* We may have masked-out OCB_OCI_OCIMISC_IRQ in the previous
+ * OCCMISC_AND write. Check if there are any new source bits set,
+ * and trigger another interrupt if so.
+ */
+ rc = xscom_read(chip_id, P8_OCB_OCI_OCCMISC, &ireg);
+ if (!rc && (ireg & P8_OCB_OCI_OCIMISC_MASK))
+ xscom_write(chip_id, P8_OCB_OCI_OCCMISC_OR,
+ OCB_OCI_OCIMISC_IRQ);
+}
+
+void occ_p9_interrupt(uint32_t chip_id)
+{
+ u64 ireg;
+ s64 rc;
+
+ /* The OCC interrupt is used to mux up to 15 different sources */
+ rc = xscom_read(chip_id, P9_OCB_OCI_OCCMISC, &ireg);
+ if (rc) {
+ prerror("OCC: Failed to read interrupt status !\n");
+ return;
+ }
+ prlog(PR_TRACE, "OCC: IRQ received: %04llx\n", ireg >> 48);
+
+ /* Clear the bits */
+ xscom_write(chip_id, P9_OCB_OCI_OCCMISC_CLEAR, ireg);
+
+ /* Dispatch */
+ if (ireg & OCB_OCI_OCIMISC_IRQ_TMGT)
+ prd_tmgt_interrupt(chip_id);
+
+ if (ireg & OCB_OCI_OCIMISC_IRQ_SHMEM) {
+ occ_throttle_poll(NULL);
+ handle_occ_rsp(chip_id);
+ }
+
+ if (ireg & OCB_OCI_OCIMISC_IRQ_I2C)
+ p9_i2c_bus_owner_change(chip_id);
+
+ /* We may have masked-out OCB_OCI_OCIMISC_IRQ in the previous
+ * OCCMISC_AND write. Check if there are any new source bits set,
+ * and trigger another interrupt if so.
+ */
+ rc = xscom_read(chip_id, P9_OCB_OCI_OCCMISC, &ireg);
+ if (!rc && (ireg & P9_OCB_OCI_OCIMISC_MASK))
+ xscom_write(chip_id, P9_OCB_OCI_OCCMISC_OR,
+ OCB_OCI_OCIMISC_IRQ);
+}